[PATCH 03/10] parsemail: Add series parsing
Stephen Finucane
stephen.finucane at intel.com
Mon Jun 13 20:41:35 AEST 2016
It is now possible to parse and store series, so do just that.
The parsing at the moment is based on both RFC822 headers and
subject lines.
Signed-off-by: Stephen Finucane <stephen.finucane at intel.com>
---
patchwork/bin/parsemail.py | 112 ++++++++++++++++++++++++++++++++++++++++----
1 files changed, 102 insertions(+), 10 deletions(-)
diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py
index 8648d29..1c7ce0f 100755
--- a/patchwork/bin/parsemail.py
+++ b/patchwork/bin/parsemail.py
@@ -25,8 +25,10 @@ import argparse
import codecs
import datetime
from email import message_from_file
-from email.header import Header, decode_header
-from email.utils import parsedate_tz, mktime_tz
+from email.header import Header
+from email.header import decode_header
+from email.utils import parsedate_tz
+from email.utils import mktime_tz
from fnmatch import fnmatch
from functools import reduce
import logging
@@ -41,10 +43,20 @@ from django.utils.log import AdminEmailHandler
from django.utils import six
from django.utils.six.moves import map
-from patchwork.models import (Patch, Project, Person, Comment, State,
- DelegationRule, Submission, CoverLetter,
- get_default_initial_patch_state)
-from patchwork.parser import parse_patch, patch_get_filenames
+from patchwork.models import Comment
+from patchwork.models import CoverLetter
+from patchwork.models import DelegationRule
+from patchwork.models import get_default_initial_patch_state
+from patchwork.models import Patch
+from patchwork.models import Person
+from patchwork.models import Project
+from patchwork.models import SeriesRevision
+from patchwork.models import SeriesReference
+from patchwork.models import State
+from patchwork.models import Submission
+
+from patchwork.parser import parse_patch
+from patchwork.parser import patch_get_filenames
LOGGER = logging.getLogger(__name__)
@@ -114,6 +126,31 @@ def find_project_by_header(mail):
return project
+def find_series(mail):
+ """Find a patch's `SeriesRevision`.
+
+ Args:
+ mail (email.message.Message): The mail to extract series from
+
+ Returns:
+ The matching `SeriesRevision` instance, if any
+ """
+ series = None
+
+ for ref in find_references(mail) + [mail.get('Message-ID').strip()]:
+ # try parsing by RFC5322 fields first
+ try:
+ series_ref = SeriesReference.objects.get(msgid=ref)
+ series = series_ref.series
+ except SeriesReference.DoesNotExist:
+ pass
+
+ if series:
+ break
+
+ return series
+
+
def find_author(mail):
from_header = clean_header(mail.get('From'))
@@ -202,6 +239,13 @@ def find_references(mail):
return refs
+def _parse_prefixes(subject_prefixes, regex):
+ for prefix in subject_prefixes:
+ m = regex.match(prefix)
+ if m:
+ return m
+
+
def parse_series_marker(subject_prefixes):
"""Extract series markers from subject.
@@ -217,14 +261,31 @@ def parse_series_marker(subject_prefixes):
"""
regex = re.compile('^([0-9]+)/([0-9]+)$')
- for prefix in subject_prefixes:
- m = regex.match(prefix)
- if not m:
- continue
+ m = _parse_prefixes(subject_prefixes, regex)
+ if m:
return (int(m.group(1)), int(m.group(2)))
+
return (None, None)
+def parse_version(subject_prefixes):
+ """Extract patch version.
+
+ Args:
+ subject_prefixes: List of subject prefixes to extract version
+ from
+
+ Returns:
+ version if found, else 1
+ """
+ regex = re.compile('^v([0-9]+)$')
+ m = _parse_prefixes(subject_prefixes, regex)
+ if m:
+ return int(m.group(1))
+
+ return 1
+
+
def find_content(project, mail):
patchbuf = None
commentbuf = ''
@@ -481,9 +542,11 @@ def parse_mail(mail, list_id=None):
author = find_author(mail)
name, prefixes = clean_subject(mail.get('Subject'), [project.linkname])
x, n = parse_series_marker(prefixes)
+ version = parse_version(prefixes)
refs = find_references(mail)
date = find_date(mail)
headers = find_headers(mail)
+ # TODO(stephenfin) This should have a 'parse_' prefix
pull_url = find_pull_request(message)
# build objects
@@ -497,9 +560,24 @@ def parse_mail(mail, list_id=None):
filenames = patch_get_filenames(diff)
delegate = auto_delegate(project, filenames)
+ # TODO(stephenfin) Eventually this should be moved to a function
+ series = find_series(mail)
+ if not series and n: # the series markers indicates a series
+ series = SeriesRevision(date=date,
+ submitter=author,
+ version=version,
+ total=n)
+ series.save()
+
+ for ref in refs + [msgid]: # save references for series
+ series_ref = SeriesReference(series=series,
+ msgid=ref)
+ series_ref.save()
+
patch = Patch(
msgid=msgid,
project=project,
+ series=series,
name=name,
date=date,
headers=headers,
@@ -529,9 +607,23 @@ def parse_mail(mail, list_id=None):
if is_cover_letter:
author.save()
+ series = find_series(mail)
+ if not series:
+ series = SeriesRevision(date=date,
+ submitter=author,
+ version=version,
+ total=n)
+ series.save()
+
+ for ref in refs + [msgid]: # save references for series
+ series_ref = SeriesReference(series=series,
+ msgid=ref)
+ series_ref.save()
+
cover_letter = CoverLetter(
msgid=msgid,
project=project,
+ series=series,
name=name,
date=date,
headers=headers,
--
1.7.4.1
More information about the Patchwork
mailing list