[PATCH 03/10] parsemail: Add series parsing

Stephen Finucane stephen.finucane at intel.com
Mon Jun 13 20:41:35 AEST 2016


It is now possible to parse and store series, so do just that.
The parsing at the moment is based on both RFC822 headers and
subject lines.

Signed-off-by: Stephen Finucane <stephen.finucane at intel.com>
---
 patchwork/bin/parsemail.py |  112 ++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 102 insertions(+), 10 deletions(-)

diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py
index 8648d29..1c7ce0f 100755
--- a/patchwork/bin/parsemail.py
+++ b/patchwork/bin/parsemail.py
@@ -25,8 +25,10 @@ import argparse
 import codecs
 import datetime
 from email import message_from_file
-from email.header import Header, decode_header
-from email.utils import parsedate_tz, mktime_tz
+from email.header import Header
+from email.header import decode_header
+from email.utils import parsedate_tz
+from email.utils import mktime_tz
 from fnmatch import fnmatch
 from functools import reduce
 import logging
@@ -41,10 +43,20 @@ from django.utils.log import AdminEmailHandler
 from django.utils import six
 from django.utils.six.moves import map
 
-from patchwork.models import (Patch, Project, Person, Comment, State,
-                              DelegationRule, Submission, CoverLetter,
-                              get_default_initial_patch_state)
-from patchwork.parser import parse_patch, patch_get_filenames
+from patchwork.models import Comment
+from patchwork.models import CoverLetter
+from patchwork.models import DelegationRule
+from patchwork.models import get_default_initial_patch_state
+from patchwork.models import Patch
+from patchwork.models import Person
+from patchwork.models import Project
+from patchwork.models import SeriesRevision
+from patchwork.models import SeriesReference
+from patchwork.models import State
+from patchwork.models import Submission
+
+from patchwork.parser import parse_patch
+from patchwork.parser import patch_get_filenames
 
 LOGGER = logging.getLogger(__name__)
 
@@ -114,6 +126,31 @@ def find_project_by_header(mail):
     return project
 
 
+def find_series(mail):
+    """Find a patch's `SeriesRevision`.
+
+    Args:
+        mail (email.message.Message): The mail to extract series from
+
+    Returns:
+        The matching `SeriesRevision` instance, if any
+    """
+    series = None
+
+    for ref in find_references(mail) + [mail.get('Message-ID').strip()]:
+        # try parsing by RFC5322 fields first
+        try:
+            series_ref = SeriesReference.objects.get(msgid=ref)
+            series = series_ref.series
+        except SeriesReference.DoesNotExist:
+            pass
+
+        if series:
+            break
+
+    return series
+
+
 def find_author(mail):
 
     from_header = clean_header(mail.get('From'))
@@ -202,6 +239,13 @@ def find_references(mail):
     return refs
 
 
+def _parse_prefixes(subject_prefixes, regex):
+    for prefix in subject_prefixes:
+        m = regex.match(prefix)
+        if m:
+            return m
+
+
 def parse_series_marker(subject_prefixes):
     """Extract series markers from subject.
 
@@ -217,14 +261,31 @@ def parse_series_marker(subject_prefixes):
     """
 
     regex = re.compile('^([0-9]+)/([0-9]+)$')
-    for prefix in subject_prefixes:
-        m = regex.match(prefix)
-        if not m:
-            continue
+    m = _parse_prefixes(subject_prefixes, regex)
+    if m:
         return (int(m.group(1)), int(m.group(2)))
+
     return (None, None)
 
 
+def parse_version(subject_prefixes):
+    """Extract patch version.
+
+    Args:
+        subject_prefixes: List of subject prefixes to extract version
+          from
+
+    Returns:
+        version if found, else 1
+    """
+    regex = re.compile('^v([0-9]+)$')
+    m = _parse_prefixes(subject_prefixes, regex)
+    if m:
+        return int(m.group(1))
+
+    return 1
+
+
 def find_content(project, mail):
     patchbuf = None
     commentbuf = ''
@@ -481,9 +542,11 @@ def parse_mail(mail, list_id=None):
     author = find_author(mail)
     name, prefixes = clean_subject(mail.get('Subject'), [project.linkname])
     x, n = parse_series_marker(prefixes)
+    version = parse_version(prefixes)
     refs = find_references(mail)
     date = find_date(mail)
     headers = find_headers(mail)
+    # TODO(stephenfin) This should have a 'parse_' prefix
     pull_url = find_pull_request(message)
 
     # build objects
@@ -497,9 +560,24 @@ def parse_mail(mail, list_id=None):
             filenames = patch_get_filenames(diff)
             delegate = auto_delegate(project, filenames)
 
+        # TODO(stephenfin) Eventually this should be moved to a function
+        series = find_series(mail)
+        if not series and n:  # the series markers indicates a series
+            series = SeriesRevision(date=date,
+                                    submitter=author,
+                                    version=version,
+                                    total=n)
+            series.save()
+
+            for ref in refs + [msgid]:  # save references for series
+                series_ref = SeriesReference(series=series,
+                                             msgid=ref)
+                series_ref.save()
+
         patch = Patch(
             msgid=msgid,
             project=project,
+            series=series,
             name=name,
             date=date,
             headers=headers,
@@ -529,9 +607,23 @@ def parse_mail(mail, list_id=None):
         if is_cover_letter:
             author.save()
 
+            series = find_series(mail)
+            if not series:
+                series = SeriesRevision(date=date,
+                                        submitter=author,
+                                        version=version,
+                                        total=n)
+                series.save()
+
+                for ref in refs + [msgid]:  # save references for series
+                    series_ref = SeriesReference(series=series,
+                                                 msgid=ref)
+                    series_ref.save()
+
             cover_letter = CoverLetter(
                 msgid=msgid,
                 project=project,
+                series=series,
                 name=name,
                 date=date,
                 headers=headers,
-- 
1.7.4.1



More information about the Patchwork mailing list