[PATCH 03/10] parsemail: Add series parsing
Andy Doan
andy.doan at linaro.org
Fri Jun 24 04:23:02 AEST 2016
On 06/13/2016 05:41 AM, Stephen Finucane wrote:
> It is now possible to parse and store series, so do just that.
> The parsing at the moment is based on both RFC822 headers and
> subject lines.
>
> Signed-off-by: Stephen Finucane <stephen.finucane at intel.com>
> ---
> patchwork/bin/parsemail.py | 112 ++++++++++++++++++++++++++++++++++++++++----
> 1 files changed, 102 insertions(+), 10 deletions(-)
>
> diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py
> index 8648d29..1c7ce0f 100755
> --- a/patchwork/bin/parsemail.py
> +++ b/patchwork/bin/parsemail.py
> @@ -25,8 +25,10 @@ import argparse
> import codecs
> import datetime
> from email import message_from_file
> -from email.header import Header, decode_header
> -from email.utils import parsedate_tz, mktime_tz
> +from email.header import Header
> +from email.header import decode_header
> +from email.utils import parsedate_tz
> +from email.utils import mktime_tz
> from fnmatch import fnmatch
> from functools import reduce
> import logging
> @@ -41,10 +43,20 @@ from django.utils.log import AdminEmailHandler
> from django.utils import six
> from django.utils.six.moves import map
>
> -from patchwork.models import (Patch, Project, Person, Comment, State,
> - DelegationRule, Submission, CoverLetter,
> - get_default_initial_patch_state)
> -from patchwork.parser import parse_patch, patch_get_filenames
> +from patchwork.models import Comment
> +from patchwork.models import CoverLetter
> +from patchwork.models import DelegationRule
> +from patchwork.models import get_default_initial_patch_state
> +from patchwork.models import Patch
> +from patchwork.models import Person
> +from patchwork.models import Project
> +from patchwork.models import SeriesRevision
> +from patchwork.models import SeriesReference
> +from patchwork.models import State
> +from patchwork.models import Submission
> +
> +from patchwork.parser import parse_patch
> +from patchwork.parser import patch_get_filenames
kind of an odd way to import. Why not the more common:
from patchwork.models import (
Comment,
CoverLetter,
...,
)
> LOGGER = logging.getLogger(__name__)
>
> @@ -114,6 +126,31 @@ def find_project_by_header(mail):
> return project
>
>
> +def find_series(mail):
> + """Find a patch's `SeriesRevision`.
> +
> + Args:
> + mail (email.message.Message): The mail to extract series from
> +
> + Returns:
> + The matching `SeriesRevision` instance, if any
> + """
> + series = None
> +
> + for ref in find_references(mail) + [mail.get('Message-ID').strip()]:
> + # try parsing by RFC5322 fields first
> + try:
> + series_ref = SeriesReference.objects.get(msgid=ref)
> + series = series_ref.series
> + except SeriesReference.DoesNotExist:
> + pass
> +
> + if series:
> + break
> +
> + return series
> +
> +
> def find_author(mail):
>
> from_header = clean_header(mail.get('From'))
> @@ -202,6 +239,13 @@ def find_references(mail):
> return refs
>
>
> +def _parse_prefixes(subject_prefixes, regex):
> + for prefix in subject_prefixes:
> + m = regex.match(prefix)
> + if m:
> + return m
> +
> +
> def parse_series_marker(subject_prefixes):
> """Extract series markers from subject.
>
> @@ -217,14 +261,31 @@ def parse_series_marker(subject_prefixes):
> """
>
> regex = re.compile('^([0-9]+)/([0-9]+)$')
> - for prefix in subject_prefixes:
> - m = regex.match(prefix)
> - if not m:
> - continue
> + m = _parse_prefixes(subject_prefixes, regex)
> + if m:
> return (int(m.group(1)), int(m.group(2)))
> +
> return (None, None)
>
>
> +def parse_version(subject_prefixes):
> + """Extract patch version.
> +
> + Args:
> + subject_prefixes: List of subject prefixes to extract version
> + from
> +
> + Returns:
> + version if found, else 1
> + """
> + regex = re.compile('^v([0-9]+)$')
> + m = _parse_prefixes(subject_prefixes, regex)
> + if m:
> + return int(m.group(1))
> +
> + return 1
> +
> +
> def find_content(project, mail):
> patchbuf = None
> commentbuf = ''
> @@ -481,9 +542,11 @@ def parse_mail(mail, list_id=None):
> author = find_author(mail)
> name, prefixes = clean_subject(mail.get('Subject'), [project.linkname])
> x, n = parse_series_marker(prefixes)
> + version = parse_version(prefixes)
> refs = find_references(mail)
> date = find_date(mail)
> headers = find_headers(mail)
> + # TODO(stephenfin) This should have a 'parse_' prefix
> pull_url = find_pull_request(message)
>
> # build objects
> @@ -497,9 +560,24 @@ def parse_mail(mail, list_id=None):
> filenames = patch_get_filenames(diff)
> delegate = auto_delegate(project, filenames)
>
> + # TODO(stephenfin) Eventually this should be moved to a function
> + series = find_series(mail)
> + if not series and n: # the series markers indicates a series
> + series = SeriesRevision(date=date,
> + submitter=author,
> + version=version,
> + total=n)
> + series.save()
> +
> + for ref in refs + [msgid]: # save references for series
> + series_ref = SeriesReference(series=series,
> + msgid=ref)
> + series_ref.save()
A bit of nitpicking, but you can do this in one line with:
SeriesReference.objects.create(series=series, msgid=ref)
> patch = Patch(
> msgid=msgid,
> project=project,
> + series=series,
> name=name,
> date=date,
> headers=headers,
> @@ -529,9 +607,23 @@ def parse_mail(mail, list_id=None):
> if is_cover_letter:
> author.save()
>
> + series = find_series(mail)
> + if not series:
> + series = SeriesRevision(date=date,
> + submitter=author,
> + version=version,
> + total=n)
> + series.save()
> +
> + for ref in refs + [msgid]: # save references for series
> + series_ref = SeriesReference(series=series,
> + msgid=ref)
> + series_ref.save()
> +
> cover_letter = CoverLetter(
> msgid=msgid,
> project=project,
> + series=series,
> name=name,
> date=date,
> headers=headers,
I still get nervous about not having unit tests for this as we develop
this. Then we could have tests demonstrating which kinds of patch-series
emails this supports.
More information about the Patchwork
mailing list