[PATCH v3 3/7] parser: Add series parsing
Stephen Finucane
stephenfinucane at hotmail.com
Tue Sep 13 07:53:29 AEST 2016
It is now possible to parse and store series, so do just that.
The parsing at the moment is based on both RFC822 headers and
subject lines.
Signed-off-by: Stephen Finucane <stephenfinucane at hotmail.com>
---
v3:
- Rework how nested series are handled once again
- Don't search for references when creating a cover letter
v2:
- Rebase onto master, moving changes into 'parser'
- Add unit tests
- Merge "Handle 'xxx (v2)' style messages" patch
- Merge "Handle series sent 'in-reply-to'" patch
- Handle capitalized version prefixes like [V2]
- Trivial cleanup of some parser functions
---
patchwork/models.py | 7 ++-
patchwork/parser.py | 127 +++++++++++++++++++++++++++++++++++++----
patchwork/tests/test_parser.py | 111 +++++++++++++++++++++++++++++------
patchwork/tests/utils.py | 32 +++++++++++
4 files changed, 247 insertions(+), 30 deletions(-)
diff --git a/patchwork/models.py b/patchwork/models.py
index 2875369..52325d2 100644
--- a/patchwork/models.py
+++ b/patchwork/models.py
@@ -230,8 +230,11 @@ class SeriesRevision(models.Model):
try:
return self.cover_letter.name
except CoverLetter.DoesNotExist:
- return '[Series #%d, revision #%d]' % (self.group.id,
- self.version)
+ if self.group:
+ return '[Series #%d, revision #%d]' % (self.group.id,
+ self.version)
+ else:
+ return '[Untitled series]'
@property
def actual_total(self):
diff --git a/patchwork/parser.py b/patchwork/parser.py
index 1805df8..d20ac19 100644
--- a/patchwork/parser.py
+++ b/patchwork/parser.py
@@ -21,8 +21,10 @@
import codecs
import datetime
-from email.header import Header, decode_header
-from email.utils import parsedate_tz, mktime_tz
+from email.header import Header
+from email.header import decode_header
+from email.utils import parsedate_tz
+from email.utils import mktime_tz
from fnmatch import fnmatch
from functools import reduce
import logging
@@ -33,9 +35,17 @@ from django.contrib.auth.models import User
from django.utils import six
from django.utils.six.moves import map
-from patchwork.models import (Patch, Project, Person, Comment, State,
- DelegationRule, Submission, CoverLetter,
- get_default_initial_patch_state)
+from patchwork.models import Comment
+from patchwork.models import CoverLetter
+from patchwork.models import DelegationRule
+from patchwork.models import get_default_initial_patch_state
+from patchwork.models import Patch
+from patchwork.models import Person
+from patchwork.models import Project
+from patchwork.models import SeriesRevision
+from patchwork.models import SeriesReference
+from patchwork.models import State
+from patchwork.models import Submission
_hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
@@ -100,6 +110,34 @@ def find_project_by_header(mail):
return project
+def find_series(mail):
+ """Find a patch's `SeriesRevision`.
+
+ Traverse RFC822 headers, starting with most recent first, to find
+ ancestors and the related series. Headers are traversed in reverse
+ to handle series sent in reply to previous series, like so:
+
+ [PATCH 0/3] A cover letter
+ [PATCH 1/3] The first patch
+ ...
+ [PATCH v2 0/3] A cover letter
+ [PATCH v2 1/3] The first patch
+ ...
+
+ Args:
+ mail (email.message.Message): The mail to extract series from
+
+ Returns:
+ The matching `SeriesRevision` instance, if any
+ """
+ for ref in find_references(mail, True):
+ # try parsing by RFC5322 fields first
+ try:
+ return SeriesReference.objects.get(msgid=ref).series
+ except SeriesReference.DoesNotExist:
+ pass
+
+
def find_author(mail):
from_header = clean_header(mail.get('From'))
name, email = (None, None)
@@ -161,10 +199,13 @@ def find_headers(mail):
for (k, v) in list(mail.items())])
-def find_references(mail):
+def find_references(mail, include_msgid=False):
"""Construct a list of possible reply message ids."""
refs = []
+ if include_msgid:
+ refs.append(mail.get('Message-ID'))
+
if 'In-Reply-To' in mail:
refs.append(mail.get('In-Reply-To'))
@@ -178,6 +219,13 @@ def find_references(mail):
return refs
+def _parse_prefixes(subject_prefixes, regex):
+ for prefix in subject_prefixes:
+ m = regex.match(prefix)
+ if m:
+ return m
+
+
def parse_series_marker(subject_prefixes):
"""Extract series markers from subject.
@@ -193,14 +241,36 @@ def parse_series_marker(subject_prefixes):
"""
regex = re.compile('^([0-9]+)/([0-9]+)$')
- for prefix in subject_prefixes:
- m = regex.match(prefix)
- if not m:
- continue
+ m = _parse_prefixes(subject_prefixes, regex)
+ if m:
return (int(m.group(1)), int(m.group(2)))
+
return (None, None)
+def parse_version(subject, subject_prefixes):
+ """Extract patch version.
+
+ Args:
+ subject: Main body of subject line
+ subject_prefixes: List of subject prefixes to extract version
+ from
+
+ Returns:
+ version if found, else 1
+ """
+ regex = re.compile('^[vV](\d+)$')
+ m = _parse_prefixes(subject_prefixes, regex)
+ if m:
+ return int(m.group(1))
+
+ m = re.search(r'\([vV](\d+)\)', subject)
+ if m:
+ return int(m.group(1))
+
+ return 1
+
+
def find_content(project, mail):
"""Extract a comment and potential diff from a mail."""
patchbuf = None
@@ -622,6 +692,7 @@ def parse_mail(mail, list_id=None):
author = find_author(mail)
name, prefixes = clean_subject(mail.get('Subject'), [project.linkname])
x, n = parse_series_marker(prefixes)
+ version = parse_version(name, prefixes)
refs = find_references(mail)
date = find_date(mail)
headers = find_headers(mail)
@@ -638,9 +709,22 @@ def parse_mail(mail, list_id=None):
filenames = find_filenames(diff)
delegate = auto_delegate(project, filenames)
+ series = find_series(mail)
+ if not series and n: # the series markers indicates a series
+ series = SeriesRevision(date=date,
+ submitter=author,
+ version=version,
+ total=n)
+ series.save()
+
+ for ref in refs + [msgid]: # save references for series
+ # we don't want duplicates
+ SeriesReference.objects.get_or_create(series=series, msgid=ref)
+
patch = Patch(
msgid=msgid,
project=project,
+ series=series,
name=name,
date=date,
headers=headers,
@@ -674,9 +758,32 @@ def parse_mail(mail, list_id=None):
if is_cover_letter:
author.save()
+ # we don't use 'find_series' here as a cover letter will
+ # always be the first item in a thread, thus the references
+ # could only point to a different series or unrelated
+ # message
+ try:
+ series = SeriesReference.objects.get(msgid=msgid).series
+ except SeriesReference.DoesNotExist:
+ series = None
+
+ if not series:
+ series = SeriesRevision(date=date,
+ submitter=author,
+ version=version,
+ total=n)
+ series.save()
+
+ # we don't save the in-reply-to or references fields
+ # for a cover letter, as they can't refer to the same
+ # series
+ SeriesReference.objects.get_or_create(series=series,
+ msgid=msgid)
+
cover_letter = CoverLetter(
msgid=msgid,
project=project,
+ series=series,
name=name,
date=date,
headers=headers,
diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py
index 7b5c71b..4783e0f 100644
--- a/patchwork/tests/test_parser.py
+++ b/patchwork/tests/test_parser.py
@@ -34,11 +34,14 @@ from patchwork.parser import clean_subject
from patchwork.parser import find_author
from patchwork.parser import find_content
from patchwork.parser import find_project_by_header
+from patchwork.parser import find_series
from patchwork.parser import parse_mail as _parse_mail
from patchwork.parser import parse_pull_request
from patchwork.parser import parse_series_marker
+from patchwork.parser import parse_version
from patchwork.parser import split_prefixes
from patchwork.tests.utils import create_project
+from patchwork.tests.utils import create_series_reference
from patchwork.tests.utils import create_state
from patchwork.tests.utils import create_user
from patchwork.tests.utils import read_patch
@@ -323,6 +326,72 @@ class SenderCorrelationTest(TestCase):
self.assertEqual(person_b.id, person_a.id)
+class SeriesCorrelationTest(TestCase):
+ """Validate correct behavior of find_series."""
+
+ @staticmethod
+ def _create_email(msgid, references=None):
+ """Create a sample mail.
+
+ Arguments:
+ msgid (str): The message's msgid
+ references (list): A list of preceding messages' msgids,
+ oldest first
+ """
+ mail = 'Message-Id: %s\n' % msgid + \
+ 'From: example user <user at example.com>\n' + \
+ 'Subject: Tests\n'
+
+ if references:
+ mail += 'In-Reply-To: %s\n' % references[-1]
+ mail += 'References: %s\n' % '\n\t'.join(references)
+
+ mail += 'test\n\n' + SAMPLE_DIFF
+ return message_from_string(mail)
+
+ def test_new_series(self):
+ msgid = make_msgid()
+ email = self._create_email(msgid)
+
+ self.assertIsNone(find_series(email))
+
+ def test_first_reply(self):
+ msgid_a = make_msgid()
+ msgid_b = make_msgid()
+ email = self._create_email(msgid_b, [msgid_a])
+
+ # assume msgid_a was already handled
+ ref = create_series_reference(msgid=msgid_a)
+
+ series = find_series(email)
+ self.assertEqual(series, ref.series)
+
+ def test_nested_series(self):
+ """Handle a series sent in-reply-to an existing series."""
+ # create an old series with a "cover letter"
+ msgids = [make_msgid()]
+ ref_v1 = create_series_reference(msgid=msgids[0])
+
+ # ...and three patches
+ for i in range(3):
+ msgids.append(make_msgid())
+ create_series_reference(msgid=msgids[-1],
+ series=ref_v1.series)
+
+ # now create a new series with "cover letter"
+ msgids.append(make_msgid())
+ ref_v2 = create_series_reference(msgid=msgids[-1])
+
+ # ...and the "first patch" of this new series
+ msgid = make_msgid()
+ email = self._create_email(msgid, msgids)
+ series = find_series(email)
+
+ # this should link to the second series - not the first
+ self.assertEqual(len(msgids), 4 + 1) # old series + new cover
+ self.assertEqual(series, ref_v2.series)
+
+
class SubjectEncodingTest(TestCase):
"""Validate correct handling of encoded subjects."""
@@ -658,24 +727,6 @@ class ParseInitialTagsTest(PatchTest):
tag__name='Tested-by').count, 1)
-class PrefixTest(TestCase):
-
- def test_split_prefixes(self):
- self.assertEqual(split_prefixes('PATCH'), ['PATCH'])
- self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC'])
- self.assertEqual(split_prefixes(''), [])
- self.assertEqual(split_prefixes('PATCH,'), ['PATCH'])
- self.assertEqual(split_prefixes('PATCH '), ['PATCH'])
- self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC'])
- self.assertEqual(split_prefixes('PATCH 1/2'), ['PATCH', '1/2'])
-
- def test_series_markers(self):
- self.assertEqual(parse_series_marker([]), (None, None))
- self.assertEqual(parse_series_marker(['bar']), (None, None))
- self.assertEqual(parse_series_marker(['bar', '1/2']), (1, 2))
- self.assertEqual(parse_series_marker(['bar', '0/12']), (0, 12))
-
-
class SubjectTest(TestCase):
def test_clean_subject(self):
@@ -706,3 +757,27 @@ class SubjectTest(TestCase):
('[bar] meep', ['bar']))
self.assertEqual(clean_subject('[FOO] [bar] meep', ['foo']),
('[bar] meep', ['bar']))
+ def test_split_prefixes(self):
+ self.assertEqual(split_prefixes('PATCH'), ['PATCH'])
+ self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC'])
+ self.assertEqual(split_prefixes(''), [])
+ self.assertEqual(split_prefixes('PATCH,'), ['PATCH'])
+ self.assertEqual(split_prefixes('PATCH '), ['PATCH'])
+ self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC'])
+ self.assertEqual(split_prefixes('PATCH 1/2'), ['PATCH', '1/2'])
+
+ def test_series_markers(self):
+ self.assertEqual(parse_series_marker([]), (None, None))
+ self.assertEqual(parse_series_marker(['bar']), (None, None))
+ self.assertEqual(parse_series_marker(['bar', '1/2']), (1, 2))
+ self.assertEqual(parse_series_marker(['bar', '0/12']), (0, 12))
+
+ def test_version(self):
+ self.assertEqual(parse_version('', []), 1)
+ self.assertEqual(parse_version('Hello, world', []), 1)
+ self.assertEqual(parse_version('Hello, world', ['version']), 1)
+ self.assertEqual(parse_version('Hello, world', ['v2']), 2)
+ self.assertEqual(parse_version('Hello, world', ['V6']), 6)
+ self.assertEqual(parse_version('Hello, world', ['v10']), 10)
+ self.assertEqual(parse_version('Hello, world (v2)', []), 2)
+ self.assertEqual(parse_version('Hello, world (V6)', []), 6)
diff --git a/patchwork/tests/utils.py b/patchwork/tests/utils.py
index 29455d2..cf30dac 100644
--- a/patchwork/tests/utils.py
+++ b/patchwork/tests/utils.py
@@ -31,6 +31,9 @@ from patchwork.models import CoverLetter
from patchwork.models import Patch
from patchwork.models import Person
from patchwork.models import Project
+from patchwork.models import Series
+from patchwork.models import SeriesReference
+from patchwork.models import SeriesRevision
from patchwork.models import State
SAMPLE_DIFF = """--- /dev/null 2011-01-01 00:00:00.000000000 +0800
@@ -217,6 +220,35 @@ def create_check(**kwargs):
return Check.objects.create(**values)
+def create_series(**kwargs):
+ """Create 'Series' object."""
+ return Series.objects.create(**kwargs)
+
+
+def create_series_revision(**kwargs):
+ """Create 'SeriesRevision' object."""
+ values = {
+ 'group': create_series() if 'group' not in kwargs else None,
+ 'date': dt.now(),
+ 'submitter': create_person() if 'submitter' not in kwargs else None,
+ 'total': 1,
+ }
+ values.update(**kwargs)
+
+ return SeriesRevision.objects.create(**values)
+
+
+def create_series_reference(**kwargs):
+ """Create 'SeriesReference' object."""
+ values = {
+ 'series': create_series_revision() if 'series' not in kwargs else None,
+ 'msgid': make_msgid(),
+ }
+ values.update(**kwargs)
+
+ return SeriesReference.objects.create(**values)
+
+
def _create_submissions(create_func, count=1, **kwargs):
"""Create 'count' Submission-based objects.
--
2.7.4
More information about the Patchwork
mailing list