[PATCH 03/10] parsemail: Add series parsing

Andy Doan andy.doan at linaro.org
Fri Jun 24 04:23:02 AEST 2016


On 06/13/2016 05:41 AM, Stephen Finucane wrote:
> It is now possible to parse and store series, so do just that.
> The parsing at the moment is based on both RFC822 headers and
> subject lines.
>
> Signed-off-by: Stephen Finucane <stephen.finucane at intel.com>
> ---
>   patchwork/bin/parsemail.py |  112 ++++++++++++++++++++++++++++++++++++++++----
>   1 files changed, 102 insertions(+), 10 deletions(-)
>
> diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py
> index 8648d29..1c7ce0f 100755
> --- a/patchwork/bin/parsemail.py
> +++ b/patchwork/bin/parsemail.py
> @@ -25,8 +25,10 @@ import argparse
>   import codecs
>   import datetime
>   from email import message_from_file
> -from email.header import Header, decode_header
> -from email.utils import parsedate_tz, mktime_tz
> +from email.header import Header
> +from email.header import decode_header
> +from email.utils import parsedate_tz
> +from email.utils import mktime_tz
>   from fnmatch import fnmatch
>   from functools import reduce
>   import logging
> @@ -41,10 +43,20 @@ from django.utils.log import AdminEmailHandler
>   from django.utils import six
>   from django.utils.six.moves import map
>
> -from patchwork.models import (Patch, Project, Person, Comment, State,
> -                              DelegationRule, Submission, CoverLetter,
> -                              get_default_initial_patch_state)
> -from patchwork.parser import parse_patch, patch_get_filenames
> +from patchwork.models import Comment
> +from patchwork.models import CoverLetter
> +from patchwork.models import DelegationRule
> +from patchwork.models import get_default_initial_patch_state
> +from patchwork.models import Patch
> +from patchwork.models import Person
> +from patchwork.models import Project
> +from patchwork.models import SeriesRevision
> +from patchwork.models import SeriesReference
> +from patchwork.models import State
> +from patchwork.models import Submission
> +
> +from patchwork.parser import parse_patch
> +from patchwork.parser import patch_get_filenames

kind of an odd way to import. Why not the more common:

from patchwork.models import (
     Comment,
     CoverLetter,
     ...,
)

>   LOGGER = logging.getLogger(__name__)
>
> @@ -114,6 +126,31 @@ def find_project_by_header(mail):
>       return project
>
>
> +def find_series(mail):
> +    """Find a patch's `SeriesRevision`.
> +
> +    Args:
> +        mail (email.message.Message): The mail to extract series from
> +
> +    Returns:
> +        The matching `SeriesRevision` instance, if any
> +    """
> +    series = None
> +
> +    for ref in find_references(mail) + [mail.get('Message-ID').strip()]:
> +        # try parsing by RFC5322 fields first
> +        try:
> +            series_ref = SeriesReference.objects.get(msgid=ref)
> +            series = series_ref.series
> +        except SeriesReference.DoesNotExist:
> +            pass
> +
> +        if series:
> +            break
> +
> +    return series
> +
> +
>   def find_author(mail):
>
>       from_header = clean_header(mail.get('From'))
> @@ -202,6 +239,13 @@ def find_references(mail):
>       return refs
>
>
> +def _parse_prefixes(subject_prefixes, regex):
> +    for prefix in subject_prefixes:
> +        m = regex.match(prefix)
> +        if m:
> +            return m
> +
> +
>   def parse_series_marker(subject_prefixes):
>       """Extract series markers from subject.
>
> @@ -217,14 +261,31 @@ def parse_series_marker(subject_prefixes):
>       """
>
>       regex = re.compile('^([0-9]+)/([0-9]+)$')
> -    for prefix in subject_prefixes:
> -        m = regex.match(prefix)
> -        if not m:
> -            continue
> +    m = _parse_prefixes(subject_prefixes, regex)
> +    if m:
>           return (int(m.group(1)), int(m.group(2)))
> +
>       return (None, None)
>
>
> +def parse_version(subject_prefixes):
> +    """Extract patch version.
> +
> +    Args:
> +        subject_prefixes: List of subject prefixes to extract version
> +          from
> +
> +    Returns:
> +        version if found, else 1
> +    """
> +    regex = re.compile('^v([0-9]+)$')
> +    m = _parse_prefixes(subject_prefixes, regex)
> +    if m:
> +        return int(m.group(1))
> +
> +    return 1
> +
> +
>   def find_content(project, mail):
>       patchbuf = None
>       commentbuf = ''
> @@ -481,9 +542,11 @@ def parse_mail(mail, list_id=None):
>       author = find_author(mail)
>       name, prefixes = clean_subject(mail.get('Subject'), [project.linkname])
>       x, n = parse_series_marker(prefixes)
> +    version = parse_version(prefixes)
>       refs = find_references(mail)
>       date = find_date(mail)
>       headers = find_headers(mail)
> +    # TODO(stephenfin) This should have a 'parse_' prefix
>       pull_url = find_pull_request(message)
>
>       # build objects
> @@ -497,9 +560,24 @@ def parse_mail(mail, list_id=None):
>               filenames = patch_get_filenames(diff)
>               delegate = auto_delegate(project, filenames)
>
> +        # TODO(stephenfin) Eventually this should be moved to a function
> +        series = find_series(mail)
> +        if not series and n:  # the series markers indicates a series
> +            series = SeriesRevision(date=date,
> +                                    submitter=author,
> +                                    version=version,
> +                                    total=n)
> +            series.save()
> +
> +            for ref in refs + [msgid]:  # save references for series
> +                series_ref = SeriesReference(series=series,
> +                                             msgid=ref)
> +                series_ref.save()

A bit of nitpicking, but you can do this in one line with:
     SeriesReference.objects.create(series=series, msgid=ref)

>           patch = Patch(
>               msgid=msgid,
>               project=project,
> +            series=series,
>               name=name,
>               date=date,
>               headers=headers,
> @@ -529,9 +607,23 @@ def parse_mail(mail, list_id=None):
>           if is_cover_letter:
>               author.save()
>
> +            series = find_series(mail)
> +            if not series:
> +                series = SeriesRevision(date=date,
> +                                        submitter=author,
> +                                        version=version,
> +                                        total=n)
> +                series.save()
> +
> +                for ref in refs + [msgid]:  # save references for series
> +                    series_ref = SeriesReference(series=series,
> +                                                 msgid=ref)
> +                    series_ref.save()
> +
>               cover_letter = CoverLetter(
>                   msgid=msgid,
>                   project=project,
> +                series=series,
>                   name=name,
>                   date=date,
>                   headers=headers,

I still get nervous about not having unit tests for this as we develop 
this. Then we could have tests demonstrating which kinds of patch-series 
emails this supports.




More information about the Patchwork mailing list