[PATCH v7 3/8] parser: Add series parsing
Stephen Finucane
stephen at that.guru
Sun Oct 30 00:13:35 AEDT 2016
It is now possible to parse and store series, so do just that.
The parsing at the moment is based on both RFC822 headers and
subject lines.
Signed-off-by: Stephen Finucane <stephen at that.guru>
Reviewed-by: Andy Doan <andy.doan at linaro.org>
Reviewed-by: Daniel Axtens <dja at axtens.net>
Reviewed-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
Tested-by: Russell Currey <ruscur at russell.cc>
---
v7:
- Update to reference 'Series', not 'SeriesRevision'
- Clarify why we need to save SeriesReference objects
- Rename some functions
v4:
- Update per new 'SeriesRevision'-'Patch' relationship
v3:
- Rework how nested series are handled once again
- Don't search for references when creating a cover letter
v2:
- Rebase onto master, moving changes into 'parser'
- Add unit tests
- Merge "Handle 'xxx (v2)' style messages" patch
- Merge "Handle series sent 'in-reply-to'" patch
- Handle capitalized version prefixes like [V2]
- Trivial cleanup of some parser functions
---
patchwork/parser.py | 138 ++++++++++++++++++++++++++++++++++++++---
patchwork/tests/test_parser.py | 112 +++++++++++++++++++++++++++------
patchwork/tests/utils.py | 25 ++++++++
3 files changed, 248 insertions(+), 27 deletions(-)
diff --git a/patchwork/parser.py b/patchwork/parser.py
index 15476b1..a4036ff 100644
--- a/patchwork/parser.py
+++ b/patchwork/parser.py
@@ -21,8 +21,10 @@
import codecs
import datetime
-from email.header import decode_header, make_header
-from email.utils import parsedate_tz, mktime_tz
+from email.header import decode_header
+from email.header import make_header
+from email.utils import mktime_tz
+from email.utils import parsedate_tz
from fnmatch import fnmatch
import logging
import re
@@ -30,9 +32,17 @@ import re
from django.contrib.auth.models import User
from django.utils import six
-from patchwork.models import (Patch, Project, Person, Comment, State,
- DelegationRule, Submission, CoverLetter,
- get_default_initial_patch_state)
+from patchwork.models import Comment
+from patchwork.models import CoverLetter
+from patchwork.models import DelegationRule
+from patchwork.models import get_default_initial_patch_state
+from patchwork.models import Patch
+from patchwork.models import Person
+from patchwork.models import Project
+from patchwork.models import Series
+from patchwork.models import SeriesReference
+from patchwork.models import State
+from patchwork.models import Submission
_hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
@@ -162,6 +172,42 @@ def find_project_by_header(mail):
return project
+def find_series(mail):
+ """Find a patch's series.
+
+ Traverse RFC822 headers, starting with most recent first, to find
+ ancestors and the related series. Headers are traversed in reverse
+ to handle series sent in reply to previous series, like so:
+
+ [PATCH 0/3] A cover letter
+ [PATCH 1/3] The first patch
+ ...
+ [PATCH v2 0/3] A cover letter
+ [PATCH v2 1/3] The first patch
+ ...
+
+ This means we evaluate headers like so:
+
+ - first, check for a Series that directly matches this message's
+ Message-ID
+ - then, check for a series that matches the In-Reply-To
+ - then, check for a series that matches the References, from most
+ recent (the patch's closest ancestor) to least recent
+
+ Args:
+ mail (email.message.Message): The mail to extract series from
+
+ Returns:
+ The matching ``Series`` instance, if any
+ """
+ for ref in [mail.get('Message-Id')] + find_references(mail):
+ # try parsing by RFC5322 fields first
+ try:
+ return SeriesReference.objects.get(msgid=ref).series
+ except SeriesReference.DoesNotExist:
+ pass
+
+
def find_author(mail):
from_header = clean_header(mail.get('From'))
name, email = (None, None)
@@ -243,6 +289,13 @@ def find_references(mail):
return refs
+def _find_matching_prefix(subject_prefixes, regex):
+ for prefix in subject_prefixes:
+ m = regex.match(prefix)
+ if m:
+ return m
+
+
def parse_series_marker(subject_prefixes):
"""Extract series markers from subject.
@@ -258,14 +311,36 @@ def parse_series_marker(subject_prefixes):
"""
regex = re.compile('^([0-9]+)/([0-9]+)$')
- for prefix in subject_prefixes:
- m = regex.match(prefix)
- if not m:
- continue
+ m = _find_matching_prefix(subject_prefixes, regex)
+ if m:
return (int(m.group(1)), int(m.group(2)))
+
return (None, None)
+def parse_version(subject, subject_prefixes):
+ """Extract patch version.
+
+ Args:
+ subject: Main body of subject line
+ subject_prefixes: List of subject prefixes to extract version
+ from
+
+ Returns:
+ version if found, else 1
+ """
+ regex = re.compile('^[vV](\d+)$')
+ m = _find_matching_prefix(subject_prefixes, regex)
+ if m:
+ return int(m.group(1))
+
+ m = re.search(r'\([vV](\d+)\)', subject)
+ if m:
+ return int(m.group(1))
+
+ return 1
+
+
def find_content(project, mail):
"""Extract a comment and potential diff from a mail."""
patchbuf = None
@@ -696,6 +771,7 @@ def parse_mail(mail, list_id=None):
name, prefixes = clean_subject(subject, [project.linkname])
is_comment = subject_check(subject)
x, n = parse_series_marker(prefixes)
+ version = parse_version(name, prefixes)
refs = find_references(mail)
date = find_date(mail)
headers = find_headers(mail)
@@ -712,6 +788,23 @@ def parse_mail(mail, list_id=None):
filenames = find_filenames(diff)
delegate = auto_delegate(project, filenames)
+ series = find_series(mail)
+ if not series and n: # the series markers indicates a series
+ series = Series(date=date,
+ submitter=author,
+ version=version,
+ total=n)
+ series.save()
+
+ # NOTE(stephenfin) We must save references for series. We
+ # do this to handle the case where a later patch is
+ # received first. Without storing references, it would not
+ # be possible to identify the relationship between patches
+ # as the earlier patch does not reference the later one.
+ for ref in refs + [msgid]:
+ # we don't want duplicates
+ SeriesReference.objects.get_or_create(series=series, msgid=ref)
+
patch = Patch(
msgid=msgid,
project=project,
@@ -727,6 +820,9 @@ def parse_mail(mail, list_id=None):
patch.save()
logger.debug('Patch saved')
+ if series:
+ series.add_patch(patch, x)
+
return patch
elif x == 0: # (potential) cover letters
# if refs are empty, it's implicitly a cover letter. If not,
@@ -749,6 +845,28 @@ def parse_mail(mail, list_id=None):
if is_cover_letter:
author.save()
+ # we don't use 'find_series' here as a cover letter will
+ # always be the first item in a thread, thus the references
+ # could only point to a different series or unrelated
+ # message
+ try:
+ series = SeriesReference.objects.get(msgid=msgid).series
+ except SeriesReference.DoesNotExist:
+ series = None
+
+ if not series:
+ series = Series(date=date,
+ submitter=author,
+ version=version,
+ total=n)
+ series.save()
+
+ # we don't save the in-reply-to or references fields
+ # for a cover letter, as they can't refer to the same
+ # series
+ SeriesReference.objects.get_or_create(series=series,
+ msgid=msgid)
+
cover_letter = CoverLetter(
msgid=msgid,
project=project,
@@ -760,6 +878,8 @@ def parse_mail(mail, list_id=None):
cover_letter.save()
logger.debug('Cover letter saved')
+ series.add_cover_letter(cover_letter)
+
return cover_letter
# comments
diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py
index e16ffbc..96166ad 100644
--- a/patchwork/tests/test_parser.py
+++ b/patchwork/tests/test_parser.py
@@ -35,13 +35,16 @@ from patchwork.parser import clean_subject
from patchwork.parser import find_author
from patchwork.parser import find_content
from patchwork.parser import find_project_by_header
+from patchwork.parser import find_series
from patchwork.parser import parse_mail as _parse_mail
from patchwork.parser import parse_pull_request
from patchwork.parser import parse_series_marker
+from patchwork.parser import parse_version
from patchwork.parser import split_prefixes
from patchwork.parser import subject_check
from patchwork.tests import TEST_MAIL_DIR
from patchwork.tests.utils import create_project
+from patchwork.tests.utils import create_series_reference
from patchwork.tests.utils import create_state
from patchwork.tests.utils import create_user
from patchwork.tests.utils import read_patch
@@ -328,6 +331,72 @@ class SenderCorrelationTest(TestCase):
self.assertEqual(person_b.id, person_a.id)
+class SeriesCorrelationTest(TestCase):
+ """Validate correct behavior of find_series."""
+
+ @staticmethod
+ def _create_email(msgid, references=None):
+ """Create a sample mail.
+
+ Arguments:
+ msgid (str): The message's msgid
+ references (list): A list of preceding messages' msgids,
+ oldest first
+ """
+ mail = 'Message-Id: %s\n' % msgid + \
+ 'From: example user <user at example.com>\n' + \
+ 'Subject: Tests\n'
+
+ if references:
+ mail += 'In-Reply-To: %s\n' % references[-1]
+ mail += 'References: %s\n' % '\n\t'.join(references)
+
+ mail += 'test\n\n' + SAMPLE_DIFF
+ return message_from_string(mail)
+
+ def test_new_series(self):
+ msgid = make_msgid()
+ email = self._create_email(msgid)
+
+ self.assertIsNone(find_series(email))
+
+ def test_first_reply(self):
+ msgid_a = make_msgid()
+ msgid_b = make_msgid()
+ email = self._create_email(msgid_b, [msgid_a])
+
+ # assume msgid_a was already handled
+ ref = create_series_reference(msgid=msgid_a)
+
+ series = find_series(email)
+ self.assertEqual(series, ref.series)
+
+ def test_nested_series(self):
+ """Handle a series sent in-reply-to an existing series."""
+ # create an old series with a "cover letter"
+ msgids = [make_msgid()]
+ ref_v1 = create_series_reference(msgid=msgids[0])
+
+ # ...and three patches
+ for i in range(3):
+ msgids.append(make_msgid())
+ create_series_reference(msgid=msgids[-1],
+ series=ref_v1.series)
+
+ # now create a new series with "cover letter"
+ msgids.append(make_msgid())
+ ref_v2 = create_series_reference(msgid=msgids[-1])
+
+ # ...and the "first patch" of this new series
+ msgid = make_msgid()
+ email = self._create_email(msgid, msgids)
+ series = find_series(email)
+
+ # this should link to the second series - not the first
+ self.assertEqual(len(msgids), 4 + 1) # old series + new cover
+ self.assertEqual(series, ref_v2.series)
+
+
class SubjectEncodingTest(TestCase):
"""Validate correct handling of encoded subjects."""
@@ -692,24 +761,6 @@ class ParseInitialTagsTest(PatchTest):
tag__name='Tested-by').count, 1)
-class PrefixTest(TestCase):
-
- def test_split_prefixes(self):
- self.assertEqual(split_prefixes('PATCH'), ['PATCH'])
- self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC'])
- self.assertEqual(split_prefixes(''), [])
- self.assertEqual(split_prefixes('PATCH,'), ['PATCH'])
- self.assertEqual(split_prefixes('PATCH '), ['PATCH'])
- self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC'])
- self.assertEqual(split_prefixes('PATCH 1/2'), ['PATCH', '1/2'])
-
- def test_series_markers(self):
- self.assertEqual(parse_series_marker([]), (None, None))
- self.assertEqual(parse_series_marker(['bar']), (None, None))
- self.assertEqual(parse_series_marker(['bar', '1/2']), (1, 2))
- self.assertEqual(parse_series_marker(['bar', '0/12']), (0, 12))
-
-
class SubjectTest(TestCase):
def test_clean_subject(self):
@@ -748,3 +799,28 @@ class SubjectTest(TestCase):
self.assertIsNotNone(subject_check('RE meep'))
self.assertIsNotNone(subject_check('Re meep'))
self.assertIsNotNone(subject_check('re meep'))
+
+ def test_split_prefixes(self):
+ self.assertEqual(split_prefixes('PATCH'), ['PATCH'])
+ self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC'])
+ self.assertEqual(split_prefixes(''), [])
+ self.assertEqual(split_prefixes('PATCH,'), ['PATCH'])
+ self.assertEqual(split_prefixes('PATCH '), ['PATCH'])
+ self.assertEqual(split_prefixes('PATCH,RFC'), ['PATCH', 'RFC'])
+ self.assertEqual(split_prefixes('PATCH 1/2'), ['PATCH', '1/2'])
+
+ def test_series_markers(self):
+ self.assertEqual(parse_series_marker([]), (None, None))
+ self.assertEqual(parse_series_marker(['bar']), (None, None))
+ self.assertEqual(parse_series_marker(['bar', '1/2']), (1, 2))
+ self.assertEqual(parse_series_marker(['bar', '0/12']), (0, 12))
+
+ def test_version(self):
+ self.assertEqual(parse_version('', []), 1)
+ self.assertEqual(parse_version('Hello, world', []), 1)
+ self.assertEqual(parse_version('Hello, world', ['version']), 1)
+ self.assertEqual(parse_version('Hello, world', ['v2']), 2)
+ self.assertEqual(parse_version('Hello, world', ['V6']), 6)
+ self.assertEqual(parse_version('Hello, world', ['v10']), 10)
+ self.assertEqual(parse_version('Hello, world (v2)', []), 2)
+ self.assertEqual(parse_version('Hello, world (V6)', []), 6)
diff --git a/patchwork/tests/utils.py b/patchwork/tests/utils.py
index 23b0c87..52cd0f9 100644
--- a/patchwork/tests/utils.py
+++ b/patchwork/tests/utils.py
@@ -31,6 +31,8 @@ from patchwork.models import CoverLetter
from patchwork.models import Patch
from patchwork.models import Person
from patchwork.models import Project
+from patchwork.models import Series
+from patchwork.models import SeriesReference
from patchwork.models import State
from patchwork.tests import TEST_PATCH_DIR
@@ -217,6 +219,29 @@ def create_check(**kwargs):
return Check.objects.create(**values)
+def create_series(**kwargs):
+ """Create 'Series' object."""
+ values = {
+ 'date': dt.now(),
+ 'submitter': create_person() if 'submitter' not in kwargs else None,
+ 'total': 1,
+ }
+ values.update(**kwargs)
+
+ return Series.objects.create(**values)
+
+
+def create_series_reference(**kwargs):
+ """Create 'SeriesReference' object."""
+ values = {
+ 'series': create_series() if 'series' not in kwargs else None,
+ 'msgid': make_msgid(),
+ }
+ values.update(**kwargs)
+
+ return SeriesReference.objects.create(**values)
+
+
def _create_submissions(create_func, count=1, **kwargs):
"""Create 'count' Submission-based objects.
--
2.7.4
More information about the Patchwork
mailing list