[PATCH v3 3/7] parser: Add series parsing

Andy Doan andy.doan at linaro.org
Fri Sep 23 06:02:21 AEST 2016


On 09/12/2016 04:53 PM, Stephen Finucane wrote:
> It is now possible to parse and store series, so do just that.
> The parsing at the moment is based on both RFC822 headers and
> subject lines.
> 
> Signed-off-by: Stephen Finucane <stephenfinucane at hotmail.com>

thanks for the tests!

Reviewed-by: Andy Doan <andy.doan at linaro.org>

> ---
> v3:
> - Rework how nested series are handled once again
> - Don't search for references when creating a cover letter
> v2:
> - Rebase onto master, moving changes into 'parser'
> - Add unit tests
> - Merge "Handle 'xxx (v2)' style messages" patch
> - Merge "Handle series sent 'in-reply-to'" patch
> - Handle capitalized version prefixes like [V2]
> - Trivial cleanup of some parser functions
> ---
>  patchwork/models.py            |   7 ++-
>  patchwork/parser.py            | 127 +++++++++++++++++++++++++++++++++++++----
>  patchwork/tests/test_parser.py | 111 +++++++++++++++++++++++++++++------
>  patchwork/tests/utils.py       |  32 +++++++++++
>  4 files changed, 247 insertions(+), 30 deletions(-)
> 
> diff --git a/patchwork/models.py b/patchwork/models.py
> index 2875369..52325d2 100644
> --- a/patchwork/models.py
> +++ b/patchwork/models.py
> @@ -230,8 +230,11 @@ class SeriesRevision(models.Model):
>          try:
>              return self.cover_letter.name
>          except CoverLetter.DoesNotExist:
> -            return '[Series #%d, revision #%d]' % (self.group.id,
> -                                                   self.version)
> +            if self.group:
> +                return '[Series #%d, revision #%d]' % (self.group.id,
> +                                                       self.version)
> +            else:
> +                return '[Untitled series]'
>  
>      @property
>      def actual_total(self):
> diff --git a/patchwork/parser.py b/patchwork/parser.py
> index 1805df8..d20ac19 100644
> --- a/patchwork/parser.py
> +++ b/patchwork/parser.py
> @@ -21,8 +21,10 @@
>  
>  import codecs
>  import datetime
> -from email.header import Header, decode_header
> -from email.utils import parsedate_tz, mktime_tz
> +from email.header import Header
> +from email.header import decode_header
> +from email.utils import parsedate_tz
> +from email.utils import mktime_tz
>  from fnmatch import fnmatch
>  from functools import reduce
>  import logging
> @@ -33,9 +35,17 @@ from django.contrib.auth.models import User
>  from django.utils import six
>  from django.utils.six.moves import map
>  
> -from patchwork.models import (Patch, Project, Person, Comment, State,
> -                              DelegationRule, Submission, CoverLetter,
> -                              get_default_initial_patch_state)
> +from patchwork.models import Comment
> +from patchwork.models import CoverLetter
> +from patchwork.models import DelegationRule
> +from patchwork.models import get_default_initial_patch_state
> +from patchwork.models import Patch
> +from patchwork.models import Person
> +from patchwork.models import Project
> +from patchwork.models import SeriesRevision
> +from patchwork.models import SeriesReference
> +from patchwork.models import State
> +from patchwork.models import Submission
>  
>  
>  _hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
> @@ -100,6 +110,34 @@ def find_project_by_header(mail):
>      return project
>  
>  
> +def find_series(mail):
> +    """Find a patch's `SeriesRevision`.
> +
> +    Traverse RFC822 headers, starting with most recent first, to find
> +    ancestors and the related series. Headers are traversed in reverse
> +    to handle series sent in reply to previous series, like so:
> +
> +        [PATCH 0/3] A cover letter
> +          [PATCH 1/3] The first patch
> +          ...
> +          [PATCH v2 0/3] A cover letter
> +            [PATCH v2 1/3] The first patch
> +            ...
> +
> +    Args:
> +        mail (email.message.Message): The mail to extract series from
> +
> +    Returns:
> +        The matching `SeriesRevision` instance, if any
> +    """
> +    for ref in find_references(mail, True):
> +        # try parsing by RFC5322 fields first
> +        try:
> +            return SeriesReference.objects.get(msgid=ref).series
> +        except SeriesReference.DoesNotExist:
> +            pass
> +
> +
>  def find_author(mail):
>      from_header = clean_header(mail.get('From'))
>      name, email = (None, None)
> @@ -161,10 +199,13 @@ def find_headers(mail):
>                     for (k, v) in list(mail.items())])
>  
>  
> -def find_references(mail):
> +def find_references(mail, include_msgid=False):
>      """Construct a list of possible reply message ids."""
>      refs = []
>  
> +    if include_msgid:
> +        refs.append(mail.get('Message-ID'))
> +
>      if 'In-Reply-To' in mail:
>          refs.append(mail.get('In-Reply-To'))
>  
> @@ -178,6 +219,13 @@ def find_references(mail):
>      return refs
>  
>  
> +def _parse_prefixes(subject_prefixes, regex):
> +    for prefix in subject_prefixes:
> +        m = regex.match(prefix)
> +        if m:
> +            return m
> +
> +
>  def parse_series_marker(subject_prefixes):
>      """Extract series markers from subject.
>  
> @@ -193,14 +241,36 @@ def parse_series_marker(subject_prefixes):
>      """
>  
>      regex = re.compile('^([0-9]+)/([0-9]+)$')
> -    for prefix in subject_prefixes:
> -        m = regex.match(prefix)
> -        if not m:
> -            continue
> +    m = _parse_prefixes(subject_prefixes, regex)
> +    if m:
>          return (int(m.group(1)), int(m.group(2)))
> +
>      return (None, None)
>  
>  
> +def parse_version(subject, subject_prefixes):
> +    """Extract patch version.
> +
> +    Args:
> +        subject: Main body of subject line
> +        subject_prefixes: List of subject prefixes to extract version
> +          from
> +
> +    Returns:
> +        version if found, else 1
> +    """
> +    regex = re.compile('^[vV](\d+)$')
> +    m = _parse_prefixes(subject_prefixes, regex)
> +    if m:
> +        return int(m.group(1))
> +
> +    m = re.search(r'\([vV](\d+)\)', subject)
> +    if m:
> +        return int(m.group(1))
> +
> +    return 1
> +
> +
>  def find_content(project, mail):
>      """Extract a comment and potential diff from a mail."""
>      patchbuf = None
> @@ -622,6 +692,7 @@ def parse_mail(mail, list_id=None):
>      author = find_author(mail)
>      name, prefixes = clean_subject(mail.get('Subject'), [project.linkname])
>      x, n = parse_series_marker(prefixes)
> +    version = parse_version(name, prefixes)
>      refs = find_references(mail)
>      date = find_date(mail)
>      headers = find_headers(mail)
> @@ -638,9 +709,22 @@ def parse_mail(mail, list_id=None):
>              filenames = find_filenames(diff)
>              delegate = auto_delegate(project, filenames)
>  
> +        series = find_series(mail)
> +        if not series and n:  # the series markers indicates a series
> +            series = SeriesRevision(date=date,
> +                                    submitter=author,
> +                                    version=version,
> +                                    total=n)
> +            series.save()
> +
> +            for ref in refs + [msgid]:  # save references for series
> +                # we don't want duplicates
> +                SeriesReference.objects.get_or_create(series=series, msgid=ref)
> +
>          patch = Patch(
>              msgid=msgid,
>              project=project,
> +            series=series,
>              name=name,
>              date=date,
>              headers=headers,
> @@ -674,9 +758,32 @@ def parse_mail(mail, list_id=None):
>          if is_cover_letter:
>              author.save()
>  
> +            # we don't use 'find_series' here as a cover letter will
> +            # always be the first item in a thread, thus the references
> +            # could only point to a different series or unrelated
> +            # message
> +            try:
> +                series = SeriesReference.objects.get(msgid=msgid).series
> +            except SeriesReference.DoesNotExist:
> +                series = None
> +
> +            if not series:
> +                series = SeriesRevision(date=date,
> +                                        submitter=author,
> +                                        version=version,
> +                                        total=n)
> +                series.save()
> +
> +                # we don't save the in-reply-to or references fields
> +                # for a cover letter, as they can't refer to the same
> +                # series
> +                SeriesReference.objects.get_or_create(series=series,
> +                                                      msgid=msgid)
> +
>              cover_letter = CoverLetter(
>                  msgid=msgid,
>                  project=project,
> +                series=series,
>                  name=name,
>                  date=date,
>                  headers=headers,
> diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py
> index 7b5c71b..4783e0f 100644
> --- a/patchwork/tests/test_parser.py
> +++ b/patchwork/tests/test_parser.py
> @@ -34,11 +34,14 @@ from patchwork.parser import clean_subject
>  from patchwork.parser import find_author
>  from patchwork.parser import find_content
>  from patchwork.parser import find_project_by_header
> +from patchwork.parser import find_series
>  from patchwork.parser import parse_mail as _parse_mail
>  from patchwork.parser import parse_pull_request
>  from patchwork.parser import parse_series_marker
> +from patchwork.parser import parse_version
>  from patchwork.parser import split_prefixes
>  from patchwork.tests.utils import create_project
> +from patchwork.tests.utils import create_series_reference
>  from patchwork.tests.utils import create_state
>  from patchwork.tests.utils import create_user
>  from patchwork.tests.utils import read_patch
> @@ -323,6 +326,72 @@ class SenderCorrelationTest(TestCase):
>          self.assertEqual(person_b.id, person_a.id)
>  
>  
> +class SeriesCorrelationTest(TestCase):
> +    """Validate correct behavior of find_series."""
> +
> +    @staticmethod
> +    def _create_email(msgid, references=None):
> +        """Create a sample mail.
> +
> +        Arguments:
> +            msgid (str): The message's msgid
> +            references (list): A list of preceding messages' msgids,
> +                oldest first
> +        """
> +        mail = 'Message-Id: %s\n' % msgid + \
> +               'From: example user <user at example.com>\n' + \
> +               'Subject: Tests\n'
> +
> +        if references:
> +            mail += 'In-Reply-To: %s\n' % references[-1]
> +            mail += 'References: %s\n' % '\n\t'.join(references)
> +
> +        mail += 'test\n\n' + SAMPLE_DIFF
> +        return message_from_string(mail)
> +
> +    def test_new_series(self):
> +        msgid = make_msgid()
> +        email = self._create_email(msgid)
> +
> +        self.assertIsNone(find_series(email))
> +
> +    def test_first_reply(self):
> +        msgid_a = make_msgid()
> +        msgid_b = make_msgid()
> +        email = self._create_email(msgid_b, [msgid_a])
> +
> +        # assume msgid_a was already handled
> +        ref = create_series_reference(msgid=msgid_a)
> +
> +        series = find_series(email)
> +        self.assertEqual(series, ref.series)
> +
> +    def test_nested_series(self):
> +        """Handle a series sent in-reply-to an existing series."""
> +        # create an old series with a "cover letter"
> +        msgids = [make_msgid()]
> +        ref_v1 = create_series_reference(msgid=msgids[0])
> +
> +        # ...and three patches
> +        for i in range(3):
> +            msgids.append(make_msgid())
> +            create_series_reference(msgid=msgids[-1],
> +                                    series=ref_v1.series)
> +
> +        # now create a new series with "cover letter"
> +        msgids.append(make_msgid())
> +        ref_v2 = create_series_reference(msgid=msgids[-1])
> +
> +        # ...and the "first patch" of this new series
> +        msgid = make_msgid()
> +        email = self._create_email(msgid, msgids)
> +        series = find_series(email)
> +
> +        # this should link to the second series - not the first
> +        self.assertEqual(len(msgids), 4 + 1)  # old series + new cover
> +        self.assertEqual(series, ref_v2.series)
> +
> +
>  class SubjectEncodingTest(TestCase):
>      """Validate correct handling of encoded subjects."""
>  
> @@ -658,24 +727,6 @@ class ParseInitialTagsTest(PatchTest):
>              tag__name='Tested-by').count, 1)
>  
>  
> -class PrefixTest(TestCase):
> -
> -    def test_split_prefixes(self):
> -        self.assertEqual(split_prefixes('PATCH'), ['PATCH'])
> -        self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC'])
> -        self.assertEqual(split_prefixes(''), [])
> -        self.assertEqual(split_prefixes('PATCH,'), ['PATCH'])
> -        self.assertEqual(split_prefixes('PATCH '), ['PATCH'])
> -        self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC'])
> -        self.assertEqual(split_prefixes('PATCH 1/2'), ['PATCH', '1/2'])
> -
> -    def test_series_markers(self):
> -        self.assertEqual(parse_series_marker([]), (None, None))
> -        self.assertEqual(parse_series_marker(['bar']), (None, None))
> -        self.assertEqual(parse_series_marker(['bar', '1/2']), (1, 2))
> -        self.assertEqual(parse_series_marker(['bar', '0/12']), (0, 12))
> -
> -
>  class SubjectTest(TestCase):
>  
>      def test_clean_subject(self):
> @@ -706,3 +757,27 @@ class SubjectTest(TestCase):
>                           ('[bar] meep', ['bar']))
>          self.assertEqual(clean_subject('[FOO] [bar] meep', ['foo']),
>                           ('[bar] meep', ['bar']))
> +    def test_split_prefixes(self):
> +        self.assertEqual(split_prefixes('PATCH'), ['PATCH'])
> +        self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC'])
> +        self.assertEqual(split_prefixes(''), [])
> +        self.assertEqual(split_prefixes('PATCH,'), ['PATCH'])
> +        self.assertEqual(split_prefixes('PATCH '), ['PATCH'])
> +        self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC'])
> +        self.assertEqual(split_prefixes('PATCH 1/2'), ['PATCH', '1/2'])
> +
> +    def test_series_markers(self):
> +        self.assertEqual(parse_series_marker([]), (None, None))
> +        self.assertEqual(parse_series_marker(['bar']), (None, None))
> +        self.assertEqual(parse_series_marker(['bar', '1/2']), (1, 2))
> +        self.assertEqual(parse_series_marker(['bar', '0/12']), (0, 12))
> +
> +    def test_version(self):
> +        self.assertEqual(parse_version('', []), 1)
> +        self.assertEqual(parse_version('Hello, world', []), 1)
> +        self.assertEqual(parse_version('Hello, world', ['version']), 1)
> +        self.assertEqual(parse_version('Hello, world', ['v2']), 2)
> +        self.assertEqual(parse_version('Hello, world', ['V6']), 6)
> +        self.assertEqual(parse_version('Hello, world', ['v10']), 10)
> +        self.assertEqual(parse_version('Hello, world (v2)', []), 2)
> +        self.assertEqual(parse_version('Hello, world (V6)', []), 6)
> diff --git a/patchwork/tests/utils.py b/patchwork/tests/utils.py
> index 29455d2..cf30dac 100644
> --- a/patchwork/tests/utils.py
> +++ b/patchwork/tests/utils.py
> @@ -31,6 +31,9 @@ from patchwork.models import CoverLetter
>  from patchwork.models import Patch
>  from patchwork.models import Person
>  from patchwork.models import Project
> +from patchwork.models import Series
> +from patchwork.models import SeriesReference
> +from patchwork.models import SeriesRevision
>  from patchwork.models import State
>  
>  SAMPLE_DIFF = """--- /dev/null	2011-01-01 00:00:00.000000000 +0800
> @@ -217,6 +220,35 @@ def create_check(**kwargs):
>      return Check.objects.create(**values)
>  
>  
> +def create_series(**kwargs):
> +    """Create 'Series' object."""
> +    return Series.objects.create(**kwargs)
> +
> +
> +def create_series_revision(**kwargs):
> +    """Create 'SeriesRevision' object."""
> +    values = {
> +        'group': create_series() if 'group' not in kwargs else None,
> +        'date': dt.now(),
> +        'submitter': create_person() if 'submitter' not in kwargs else None,
> +        'total': 1,
> +    }
> +    values.update(**kwargs)
> +
> +    return SeriesRevision.objects.create(**values)
> +
> +
> +def create_series_reference(**kwargs):
> +    """Create 'SeriesReference' object."""
> +    values = {
> +        'series': create_series_revision() if 'series' not in kwargs else None,
> +        'msgid': make_msgid(),
> +    }
> +    values.update(**kwargs)
> +
> +    return SeriesReference.objects.create(**values)
> +
> +
>  def _create_submissions(create_func, count=1, **kwargs):
>      """Create 'count' Submission-based objects.
>  
> 



More information about the Patchwork mailing list