[PATCH v3 3/7] parser: Add series parsing
Andy Doan
andy.doan at linaro.org
Fri Sep 23 06:02:21 AEST 2016
On 09/12/2016 04:53 PM, Stephen Finucane wrote:
> It is now possible to parse and store series, so do just that.
> The parsing at the moment is based on both RFC822 headers and
> subject lines.
>
> Signed-off-by: Stephen Finucane <stephenfinucane at hotmail.com>
thanks for the tests!
Reviewed-by: Andy Doan <andy.doan at linaro.org>
> ---
> v3:
> - Rework how nested series are handled once again
> - Don't search for references when creating a cover letter
> v2:
> - Rebase onto master, moving changes into 'parser'
> - Add unit tests
> - Merge "Handle 'xxx (v2)' style messages" patch
> - Merge "Handle series sent 'in-reply-to'" patch
> - Handle capitalized version prefixes like [V2]
> - Trivial cleanup of some parser functions
> ---
> patchwork/models.py | 7 ++-
> patchwork/parser.py | 127 +++++++++++++++++++++++++++++++++++++----
> patchwork/tests/test_parser.py | 111 +++++++++++++++++++++++++++++------
> patchwork/tests/utils.py | 32 +++++++++++
> 4 files changed, 247 insertions(+), 30 deletions(-)
>
> diff --git a/patchwork/models.py b/patchwork/models.py
> index 2875369..52325d2 100644
> --- a/patchwork/models.py
> +++ b/patchwork/models.py
> @@ -230,8 +230,11 @@ class SeriesRevision(models.Model):
> try:
> return self.cover_letter.name
> except CoverLetter.DoesNotExist:
> - return '[Series #%d, revision #%d]' % (self.group.id,
> - self.version)
> + if self.group:
> + return '[Series #%d, revision #%d]' % (self.group.id,
> + self.version)
> + else:
> + return '[Untitled series]'
>
> @property
> def actual_total(self):
> diff --git a/patchwork/parser.py b/patchwork/parser.py
> index 1805df8..d20ac19 100644
> --- a/patchwork/parser.py
> +++ b/patchwork/parser.py
> @@ -21,8 +21,10 @@
>
> import codecs
> import datetime
> -from email.header import Header, decode_header
> -from email.utils import parsedate_tz, mktime_tz
> +from email.header import Header
> +from email.header import decode_header
> +from email.utils import parsedate_tz
> +from email.utils import mktime_tz
> from fnmatch import fnmatch
> from functools import reduce
> import logging
> @@ -33,9 +35,17 @@ from django.contrib.auth.models import User
> from django.utils import six
> from django.utils.six.moves import map
>
> -from patchwork.models import (Patch, Project, Person, Comment, State,
> - DelegationRule, Submission, CoverLetter,
> - get_default_initial_patch_state)
> +from patchwork.models import Comment
> +from patchwork.models import CoverLetter
> +from patchwork.models import DelegationRule
> +from patchwork.models import get_default_initial_patch_state
> +from patchwork.models import Patch
> +from patchwork.models import Person
> +from patchwork.models import Project
> +from patchwork.models import SeriesRevision
> +from patchwork.models import SeriesReference
> +from patchwork.models import State
> +from patchwork.models import Submission
>
>
> _hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
> @@ -100,6 +110,34 @@ def find_project_by_header(mail):
> return project
>
>
> +def find_series(mail):
> + """Find a patch's `SeriesRevision`.
> +
> + Traverse RFC822 headers, starting with most recent first, to find
> + ancestors and the related series. Headers are traversed in reverse
> + to handle series sent in reply to previous series, like so:
> +
> + [PATCH 0/3] A cover letter
> + [PATCH 1/3] The first patch
> + ...
> + [PATCH v2 0/3] A cover letter
> + [PATCH v2 1/3] The first patch
> + ...
> +
> + Args:
> + mail (email.message.Message): The mail to extract series from
> +
> + Returns:
> + The matching `SeriesRevision` instance, if any
> + """
> + for ref in find_references(mail, True):
> + # try parsing by RFC5322 fields first
> + try:
> + return SeriesReference.objects.get(msgid=ref).series
> + except SeriesReference.DoesNotExist:
> + pass
> +
> +
> def find_author(mail):
> from_header = clean_header(mail.get('From'))
> name, email = (None, None)
> @@ -161,10 +199,13 @@ def find_headers(mail):
> for (k, v) in list(mail.items())])
>
>
> -def find_references(mail):
> +def find_references(mail, include_msgid=False):
> """Construct a list of possible reply message ids."""
> refs = []
>
> + if include_msgid:
> + refs.append(mail.get('Message-ID'))
> +
> if 'In-Reply-To' in mail:
> refs.append(mail.get('In-Reply-To'))
>
> @@ -178,6 +219,13 @@ def find_references(mail):
> return refs
>
>
> +def _parse_prefixes(subject_prefixes, regex):
> + for prefix in subject_prefixes:
> + m = regex.match(prefix)
> + if m:
> + return m
> +
> +
> def parse_series_marker(subject_prefixes):
> """Extract series markers from subject.
>
> @@ -193,14 +241,36 @@ def parse_series_marker(subject_prefixes):
> """
>
> regex = re.compile('^([0-9]+)/([0-9]+)$')
> - for prefix in subject_prefixes:
> - m = regex.match(prefix)
> - if not m:
> - continue
> + m = _parse_prefixes(subject_prefixes, regex)
> + if m:
> return (int(m.group(1)), int(m.group(2)))
> +
> return (None, None)
>
>
> +def parse_version(subject, subject_prefixes):
> + """Extract patch version.
> +
> + Args:
> + subject: Main body of subject line
> + subject_prefixes: List of subject prefixes to extract version
> + from
> +
> + Returns:
> + version if found, else 1
> + """
> + regex = re.compile('^[vV](\d+)$')
> + m = _parse_prefixes(subject_prefixes, regex)
> + if m:
> + return int(m.group(1))
> +
> + m = re.search(r'\([vV](\d+)\)', subject)
> + if m:
> + return int(m.group(1))
> +
> + return 1
> +
> +
> def find_content(project, mail):
> """Extract a comment and potential diff from a mail."""
> patchbuf = None
> @@ -622,6 +692,7 @@ def parse_mail(mail, list_id=None):
> author = find_author(mail)
> name, prefixes = clean_subject(mail.get('Subject'), [project.linkname])
> x, n = parse_series_marker(prefixes)
> + version = parse_version(name, prefixes)
> refs = find_references(mail)
> date = find_date(mail)
> headers = find_headers(mail)
> @@ -638,9 +709,22 @@ def parse_mail(mail, list_id=None):
> filenames = find_filenames(diff)
> delegate = auto_delegate(project, filenames)
>
> + series = find_series(mail)
> + if not series and n: # the series markers indicates a series
> + series = SeriesRevision(date=date,
> + submitter=author,
> + version=version,
> + total=n)
> + series.save()
> +
> + for ref in refs + [msgid]: # save references for series
> + # we don't want duplicates
> + SeriesReference.objects.get_or_create(series=series, msgid=ref)
> +
> patch = Patch(
> msgid=msgid,
> project=project,
> + series=series,
> name=name,
> date=date,
> headers=headers,
> @@ -674,9 +758,32 @@ def parse_mail(mail, list_id=None):
> if is_cover_letter:
> author.save()
>
> + # we don't use 'find_series' here as a cover letter will
> + # always be the first item in a thread, thus the references
> + # could only point to a different series or unrelated
> + # message
> + try:
> + series = SeriesReference.objects.get(msgid=msgid).series
> + except SeriesReference.DoesNotExist:
> + series = None
> +
> + if not series:
> + series = SeriesRevision(date=date,
> + submitter=author,
> + version=version,
> + total=n)
> + series.save()
> +
> + # we don't save the in-reply-to or references fields
> + # for a cover letter, as they can't refer to the same
> + # series
> + SeriesReference.objects.get_or_create(series=series,
> + msgid=msgid)
> +
> cover_letter = CoverLetter(
> msgid=msgid,
> project=project,
> + series=series,
> name=name,
> date=date,
> headers=headers,
> diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py
> index 7b5c71b..4783e0f 100644
> --- a/patchwork/tests/test_parser.py
> +++ b/patchwork/tests/test_parser.py
> @@ -34,11 +34,14 @@ from patchwork.parser import clean_subject
> from patchwork.parser import find_author
> from patchwork.parser import find_content
> from patchwork.parser import find_project_by_header
> +from patchwork.parser import find_series
> from patchwork.parser import parse_mail as _parse_mail
> from patchwork.parser import parse_pull_request
> from patchwork.parser import parse_series_marker
> +from patchwork.parser import parse_version
> from patchwork.parser import split_prefixes
> from patchwork.tests.utils import create_project
> +from patchwork.tests.utils import create_series_reference
> from patchwork.tests.utils import create_state
> from patchwork.tests.utils import create_user
> from patchwork.tests.utils import read_patch
> @@ -323,6 +326,72 @@ class SenderCorrelationTest(TestCase):
> self.assertEqual(person_b.id, person_a.id)
>
>
> +class SeriesCorrelationTest(TestCase):
> + """Validate correct behavior of find_series."""
> +
> + @staticmethod
> + def _create_email(msgid, references=None):
> + """Create a sample mail.
> +
> + Arguments:
> + msgid (str): The message's msgid
> + references (list): A list of preceding messages' msgids,
> + oldest first
> + """
> + mail = 'Message-Id: %s\n' % msgid + \
> + 'From: example user <user at example.com>\n' + \
> + 'Subject: Tests\n'
> +
> + if references:
> + mail += 'In-Reply-To: %s\n' % references[-1]
> + mail += 'References: %s\n' % '\n\t'.join(references)
> +
> + mail += 'test\n\n' + SAMPLE_DIFF
> + return message_from_string(mail)
> +
> + def test_new_series(self):
> + msgid = make_msgid()
> + email = self._create_email(msgid)
> +
> + self.assertIsNone(find_series(email))
> +
> + def test_first_reply(self):
> + msgid_a = make_msgid()
> + msgid_b = make_msgid()
> + email = self._create_email(msgid_b, [msgid_a])
> +
> + # assume msgid_a was already handled
> + ref = create_series_reference(msgid=msgid_a)
> +
> + series = find_series(email)
> + self.assertEqual(series, ref.series)
> +
> + def test_nested_series(self):
> + """Handle a series sent in-reply-to an existing series."""
> + # create an old series with a "cover letter"
> + msgids = [make_msgid()]
> + ref_v1 = create_series_reference(msgid=msgids[0])
> +
> + # ...and three patches
> + for i in range(3):
> + msgids.append(make_msgid())
> + create_series_reference(msgid=msgids[-1],
> + series=ref_v1.series)
> +
> + # now create a new series with "cover letter"
> + msgids.append(make_msgid())
> + ref_v2 = create_series_reference(msgid=msgids[-1])
> +
> + # ...and the "first patch" of this new series
> + msgid = make_msgid()
> + email = self._create_email(msgid, msgids)
> + series = find_series(email)
> +
> + # this should link to the second series - not the first
> + self.assertEqual(len(msgids), 4 + 1) # old series + new cover
> + self.assertEqual(series, ref_v2.series)
> +
> +
> class SubjectEncodingTest(TestCase):
> """Validate correct handling of encoded subjects."""
>
> @@ -658,24 +727,6 @@ class ParseInitialTagsTest(PatchTest):
> tag__name='Tested-by').count, 1)
>
>
> -class PrefixTest(TestCase):
> -
> - def test_split_prefixes(self):
> - self.assertEqual(split_prefixes('PATCH'), ['PATCH'])
> - self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC'])
> - self.assertEqual(split_prefixes(''), [])
> - self.assertEqual(split_prefixes('PATCH,'), ['PATCH'])
> - self.assertEqual(split_prefixes('PATCH '), ['PATCH'])
> - self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC'])
> - self.assertEqual(split_prefixes('PATCH 1/2'), ['PATCH', '1/2'])
> -
> - def test_series_markers(self):
> - self.assertEqual(parse_series_marker([]), (None, None))
> - self.assertEqual(parse_series_marker(['bar']), (None, None))
> - self.assertEqual(parse_series_marker(['bar', '1/2']), (1, 2))
> - self.assertEqual(parse_series_marker(['bar', '0/12']), (0, 12))
> -
> -
> class SubjectTest(TestCase):
>
> def test_clean_subject(self):
> @@ -706,3 +757,27 @@ class SubjectTest(TestCase):
> ('[bar] meep', ['bar']))
> self.assertEqual(clean_subject('[FOO] [bar] meep', ['foo']),
> ('[bar] meep', ['bar']))
> + def test_split_prefixes(self):
> + self.assertEqual(split_prefixes('PATCH'), ['PATCH'])
> + self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC'])
> + self.assertEqual(split_prefixes(''), [])
> + self.assertEqual(split_prefixes('PATCH,'), ['PATCH'])
> + self.assertEqual(split_prefixes('PATCH '), ['PATCH'])
> + self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC'])
> + self.assertEqual(split_prefixes('PATCH 1/2'), ['PATCH', '1/2'])
> +
> + def test_series_markers(self):
> + self.assertEqual(parse_series_marker([]), (None, None))
> + self.assertEqual(parse_series_marker(['bar']), (None, None))
> + self.assertEqual(parse_series_marker(['bar', '1/2']), (1, 2))
> + self.assertEqual(parse_series_marker(['bar', '0/12']), (0, 12))
> +
> + def test_version(self):
> + self.assertEqual(parse_version('', []), 1)
> + self.assertEqual(parse_version('Hello, world', []), 1)
> + self.assertEqual(parse_version('Hello, world', ['version']), 1)
> + self.assertEqual(parse_version('Hello, world', ['v2']), 2)
> + self.assertEqual(parse_version('Hello, world', ['V6']), 6)
> + self.assertEqual(parse_version('Hello, world', ['v10']), 10)
> + self.assertEqual(parse_version('Hello, world (v2)', []), 2)
> + self.assertEqual(parse_version('Hello, world (V6)', []), 6)
> diff --git a/patchwork/tests/utils.py b/patchwork/tests/utils.py
> index 29455d2..cf30dac 100644
> --- a/patchwork/tests/utils.py
> +++ b/patchwork/tests/utils.py
> @@ -31,6 +31,9 @@ from patchwork.models import CoverLetter
> from patchwork.models import Patch
> from patchwork.models import Person
> from patchwork.models import Project
> +from patchwork.models import Series
> +from patchwork.models import SeriesReference
> +from patchwork.models import SeriesRevision
> from patchwork.models import State
>
> SAMPLE_DIFF = """--- /dev/null 2011-01-01 00:00:00.000000000 +0800
> @@ -217,6 +220,35 @@ def create_check(**kwargs):
> return Check.objects.create(**values)
>
>
> +def create_series(**kwargs):
> + """Create 'Series' object."""
> + return Series.objects.create(**kwargs)
> +
> +
> +def create_series_revision(**kwargs):
> + """Create 'SeriesRevision' object."""
> + values = {
> + 'group': create_series() if 'group' not in kwargs else None,
> + 'date': dt.now(),
> + 'submitter': create_person() if 'submitter' not in kwargs else None,
> + 'total': 1,
> + }
> + values.update(**kwargs)
> +
> + return SeriesRevision.objects.create(**values)
> +
> +
> +def create_series_reference(**kwargs):
> + """Create 'SeriesReference' object."""
> + values = {
> + 'series': create_series_revision() if 'series' not in kwargs else None,
> + 'msgid': make_msgid(),
> + }
> + values.update(**kwargs)
> +
> + return SeriesReference.objects.create(**values)
> +
> +
> def _create_submissions(create_func, count=1, **kwargs):
> """Create 'count' Submission-based objects.
>
>
More information about the Patchwork
mailing list