[PATCH v2 06/11] parsemail: Restructure 'parse_content'

Stephen Finucane stephen.finucane at intel.com
Sat Apr 2 01:57:42 AEDT 2016


The 'parse_content' function is designed, as the title suggests, to
parse the content of an email. It should not be used to build objects
or similar. Move this functionality out of the function, updating the
unit tests as necessary.

Signed-off-by: Stephen Finucane <stephen.finucane at intel.com>
---
 patchwork/bin/parsemail.py          | 137 ++++++++++++++++----------------
 patchwork/tests/test_patchparser.py | 154 ++++++++++++++++--------------------
 2 files changed, 136 insertions(+), 155 deletions(-)

diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py
index 36fd4cb..de9778f 100755
--- a/patchwork/bin/parsemail.py
+++ b/patchwork/bin/parsemail.py
@@ -143,7 +143,7 @@ def find_author(mail):
             break
 
     if email is None:
-        raise Exception("Could not parse From: header")
+        raise ValueError("Invalid 'From' header")
 
     email = email.strip()
     if name is not None:
@@ -228,7 +228,6 @@ def parse_series_marker(subject_prefixes):
 def find_content(project, mail):
     patchbuf = None
     commentbuf = ''
-    pullurl = None
 
     for part in mail.walk():
         if part.get_content_maintype() != 'text':
@@ -264,48 +263,22 @@ def find_content(project, mail):
 
             # Could not find a valid decoded payload.  Fail.
             if payload is None:
-                return (None, None, None)
+                return None, None
 
         if subtype in ['x-patch', 'x-diff']:
             patchbuf = payload
-
         elif subtype == 'plain':
             c = payload
 
             if not patchbuf:
-                (patchbuf, c) = parse_patch(payload)
-
-            if not pullurl:
-                pullurl = find_pull_request(payload)
+                patchbuf, c = parse_patch(payload)
 
             if c is not None:
                 commentbuf += c.strip() + '\n'
 
-    patch = None
-    comment = None
-    filenames = None
-
-    if patchbuf:
-        filenames = patch_get_filenames(patchbuf)
-
-    if pullurl or patchbuf:
-        name, prefixes = clean_subject(mail.get('Subject'),
-                                       [project.linkname])
-        patch = Patch(name=name, pull_url=pullurl, diff=patchbuf,
-                      content=clean_content(commentbuf), date=mail_date(mail),
-                      headers=mail_headers(mail))
+    commentbuf = clean_content(commentbuf)
 
-    if commentbuf and not patch:
-        refs = build_references_list(mail)
-        cpatch = find_patch_for_comment(project, refs)
-        if not cpatch:
-            return (None, None, None)
-        comment = Comment(submission=cpatch,
-                          date=mail_date(mail),
-                          content=clean_content(commentbuf),
-                          headers=mail_headers(mail))
-
-    return (patch, comment, filenames)
+    return patchbuf, commentbuf
 
 
 def find_patch_for_comment(project, refs):
@@ -457,21 +430,18 @@ def parse_mail(mail, list_id=None):
     """
     # some basic sanity checks
     if 'From' not in mail:
-        LOGGER.debug("Ignoring patch due to missing 'From'")
-        return 1
+        raise ValueError("Missing 'From' header")
 
     if 'Subject' not in mail:
-        LOGGER.debug("Ignoring patch due to missing 'Subject'")
-        return 1
+        raise ValueError("Missing 'Subject' header")
 
     if 'Message-Id' not in mail:
-        LOGGER.debug("Ignoring patch due to missing 'Message-Id'")
-        return 1
+        raise ValueError("Missing 'Message-Id' header")
 
     hint = mail.get('X-Patchwork-Hint', '').lower()
     if hint == 'ignore':
-        LOGGER.debug("Ignoring patch due to 'ignore' hint")
-        return 0
+        LOGGER.debug("Ignoring email due to 'ignore' hint")
+        return
 
     if list_id:
         project = find_project_by_id(list_id)
@@ -479,45 +449,74 @@ def parse_mail(mail, list_id=None):
         project = find_project_by_header(mail)
 
     if project is None:
-        LOGGER.error('Failed to find a project for patch')
-        return 1
+        LOGGER.error('Failed to find a project for email')
+        return
 
     msgid = mail.get('Message-Id').strip()
+    author, save_required = find_author(mail)
+    name, _ = clean_subject(mail.get('Subject'), [project.linkname])
+    refs = build_references_list(mail)
 
-    (author, save_required) = find_author(mail)
+    # parse content
 
-    (patch, comment, filenames) = find_content(project, mail)
+    diff, message = find_content(project, mail)
 
-    if patch:
-        delegate = get_delegate(mail.get('X-Patchwork-Delegate', '').strip())
-        if not delegate:
-            delegate = auto_delegate(project, filenames)
+    if not (diff or message):
+        return  # nothing to work with
+
+    pull_url = find_pull_request(message)
 
+    # build objects
+
+    if diff or pull_url:  # patches or pull requests
         # we delay the saving until we know we have a patch.
         if save_required:
             author.save()
-            save_required = False
-        patch.submitter = author
-        patch.msgid = msgid
-        patch.project = project
-        patch.state = get_state(mail.get('X-Patchwork-State', '').strip())
-        patch.delegate = get_delegate(
-            mail.get('X-Patchwork-Delegate', '').strip())
+
+        delegate = get_delegate(mail.get('X-Patchwork-Delegate', '').strip())
+        if not delegate and diff:
+            filenames = patch_get_filenames(diff)
+            delegate = auto_delegate(project, filenames)
+
+        patch = Patch(
+            msgid=msgid,
+            project=project,
+            name=name,
+            date=mail_date(mail),
+            headers=mail_headers(mail),
+            submitter=author,
+            content=message,
+            diff=diff,
+            pull_url=pull_url,
+            delegate=delegate,
+            state=get_state(mail.get('X-Patchwork-State', '').strip()))
         patch.save()
         LOGGER.debug('Patch saved')
 
-    if comment:
-        if save_required:
-            author.save()
-        # we defer this assignment until we know that we have a saved patch
-        if patch:
-            comment.patch = patch
-        comment.submitter = author
-        comment.msgid = msgid
-        comment.save()
-        LOGGER.debug('Comment saved')
+        return patch
 
-    return 0
+    # comments
+
+    # we only save comments if we have the parent email
+    patch = find_patch_for_comment(project, refs)
+    if not patch:
+        return
+
+    # ...and we only save the author if we're saving the comment
+    if save_required:
+        author.save()
+
+    comment = Comment(
+        submission=patch,
+        msgid=msgid,
+        date=mail_date(mail),
+        headers=mail_headers(mail),
+        submitter=author,
+        content=message)
+    comment.save()
+    LOGGER.debug('Comment saved')
+
+    return comment
 
 extra_error_message = '''
 == Mail
@@ -575,14 +574,16 @@ def main(args):
 
     mail = message_from_file(args['infile'])
     try:
-        return parse_mail(mail, args['list_id'])
+        result = parse_mail(mail, args['list_id'])
+        if result:
+            return 0
+        return 1
     except:
         if logger:
             logger.exception('Error when parsing incoming email', extra={
                 'mail': mail.as_string(),
             })
         raise
-    return parse_mail(mail, args['list_id'])
 
 if __name__ == '__main__':
     sys.exit(main(sys.argv))
diff --git a/patchwork/tests/test_patchparser.py b/patchwork/tests/test_patchparser.py
index 2bc54cb..c935de6 100644
--- a/patchwork/tests/test_patchparser.py
+++ b/patchwork/tests/test_patchparser.py
@@ -25,8 +25,8 @@ from django.test import TestCase
 
 from patchwork.bin.parsemail import (find_content, find_author,
                                      find_project_by_header, parse_mail,
-                                     split_prefixes, clean_subject,
-                                     parse_series_marker)
+                                     find_pull_request, split_prefixes,
+                                     clean_subject, parse_series_marker)
 from patchwork.models import (Project, Person, Patch, Comment, State,
                               get_default_initial_patch_state)
 from patchwork.tests.utils import (read_patch, read_mail, create_email,
@@ -43,21 +43,17 @@ class PatchTest(TestCase):
 class InlinePatchTest(PatchTest):
     patch_filename = '0001-add-line.patch'
     test_content = 'Test for attached patch'
-    expected_filenames = ['meep.text']
 
     def setUp(self):
         self.orig_patch = read_patch(self.patch_filename)
         email = create_email(self.test_content + '\n' + self.orig_patch)
-        self.patch, _, self.filenames = find_content(self.project, email)
+        self.diff, self.message = find_content(self.project, email)
 
     def testPatchContent(self):
-        self.assertEqual(self.patch.diff, self.orig_patch)
+        self.assertEqual(self.diff, self.orig_patch)
 
     def testCommentContent(self):
-        self.assertEqual(self.patch.content, self.test_content)
-
-    def testFilenames(self):
-        self.assertEqual(self.filenames, self.expected_filenames)
+        self.assertEqual(self.message, self.test_content)
 
 
 class AttachmentPatchTest(InlinePatchTest):
@@ -70,7 +66,7 @@ class AttachmentPatchTest(InlinePatchTest):
         email = create_email(self.test_content, multipart=True)
         attachment = MIMEText(self.orig_patch, _subtype=self.content_subtype)
         email.attach(attachment)
-        self.patch, _, self.filenames = find_content(self.project, email)
+        self.diff, self.message = find_content(self.project, email)
 
 
 class AttachmentXDiffPatchTest(AttachmentPatchTest):
@@ -85,12 +81,11 @@ class UTF8InlinePatchTest(InlinePatchTest):
         self.orig_patch = read_patch(self.patch_filename, self.patch_encoding)
         email = create_email(self.test_content + '\n' + self.orig_patch,
                              content_encoding=self.patch_encoding)
-        self.patch, _, self.filenames = find_content(self.project, email)
+        self.diff, self.message = find_content(self.project, email)
 
 
 class NoCharsetInlinePatchTest(InlinePatchTest):
-
-    """ Test mails with no content-type or content-encoding header """
+    """Test mails with no content-type or content-encoding header."""
     patch_filename = '0001-add-line.patch'
 
     def setUp(self):
@@ -98,7 +93,7 @@ class NoCharsetInlinePatchTest(InlinePatchTest):
         email = create_email(self.test_content + '\n' + self.orig_patch)
         del email['Content-Type']
         del email['Content-Transfer-Encoding']
-        self.patch, _, self.filenames = find_content(self.project, email)
+        self.diff, self.message = find_content(self.project, email)
 
 
 class SignatureCommentTest(InlinePatchTest):
@@ -110,7 +105,7 @@ class SignatureCommentTest(InlinePatchTest):
         email = create_email(
             self.test_content + '\n' +
             '-- \nsig\n' + self.orig_patch)
-        self.patch, _, self.filenames = find_content(self.project, email)
+        self.diff, self.message = find_content(self.project, email)
 
 
 class ListFooterTest(InlinePatchTest):
@@ -124,7 +119,7 @@ class ListFooterTest(InlinePatchTest):
             '_______________________________________________\n' +
             'Linuxppc-dev mailing list\n' +
             self.orig_patch)
-        self.patch, _, self.filenames = find_content(self.project, email)
+        self.diff, self.message = find_content(self.project, email)
 
 
 class DiffWordInCommentTest(InlinePatchTest):
@@ -133,15 +128,13 @@ class DiffWordInCommentTest(InlinePatchTest):
 
 
 class UpdateCommentTest(InlinePatchTest):
-
-    """ Test for '---\nUpdate: v2' style comments to patches. """
+    """Test for '---\nUpdate: v2' style comments to patches."""
     patch_filename = '0001-add-line.patch'
     test_content = 'Test comment\nmore comment\n---\nUpdate: test update'
 
 
 class UpdateSigCommentTest(SignatureCommentTest):
-
-    """ Test for '---\nUpdate: v2' style comments to patches, with a sig """
+    """Test for '---\nUpdate: v2' style comments to patches, with a sig."""
     patch_filename = '0001-add-line.patch'
     test_content = 'Test comment\nmore comment\n---\nUpdate: test update'
 
@@ -157,7 +150,7 @@ class SenderEncodingTest(TestCase):
                'Subject: test\n\n' + \
                'test'
         self.email = message_from_string(mail)
-        (self.person, new) = find_author(self.email)
+        self.person, _ = find_author(self.email)
         self.person.save()
 
     def tearDown(self):
@@ -202,12 +195,11 @@ class SubjectEncodingTest(PatchTest):
                'From: %s\n' % self.sender + \
                'Subject: %s\n\n' % self.subject_header + \
                'test\n\n' + defaults.patch
-        self.projects = defaults.project
         self.email = message_from_string(mail)
 
     def testSubjectEncoding(self):
-        patch, _, _ = find_content(self.project, self.email)
-        self.assertEqual(patch.name, self.subject)
+        name, _ = clean_subject(self.email.get('Subject'))
+        self.assertEqual(name, self.subject)
 
 
 class SubjectUTF8QPEncodingTest(SubjectEncodingTest):
@@ -234,29 +226,29 @@ class SenderCorrelationTest(TestCase):
     def setUp(self):
         self.existing_sender_mail = self.mail(self.existing_sender)
         self.non_existing_sender_mail = self.mail(self.non_existing_sender)
-        (self.person, new) = find_author(self.existing_sender_mail)
+        self.person, _ = find_author(self.existing_sender_mail)
         self.person.save()
 
     def testExisingSender(self):
-        (person, new) = find_author(self.existing_sender_mail)
-        self.assertEqual(new, False)
+        person, save_required = find_author(self.existing_sender_mail)
+        self.assertEqual(save_required, False)
         self.assertEqual(person.id, self.person.id)
 
     def testNonExisingSender(self):
-        (person, new) = find_author(self.non_existing_sender_mail)
-        self.assertEqual(new, True)
+        person, save_required = find_author(self.non_existing_sender_mail)
+        self.assertEqual(save_required, True)
         self.assertEqual(person.id, None)
 
     def testExistingDifferentFormat(self):
         mail = self.mail('existing at example.com')
-        (person, new) = find_author(mail)
-        self.assertEqual(new, False)
+        person, save_required = find_author(mail)
+        self.assertEqual(save_required, False)
         self.assertEqual(person.id, self.person.id)
 
     def testExistingDifferentCase(self):
         mail = self.mail(self.existing_sender.upper())
-        (person, new) = find_author(mail)
-        self.assertEqual(new, False)
+        person, save_required = find_author(mail)
+        self.assertEqual(save_required, False)
         self.assertEqual(person.id, self.person.id)
 
     def tearDown(self):
@@ -264,10 +256,8 @@ class SenderCorrelationTest(TestCase):
 
 
 class MultipleProjectPatchTest(TestCase):
-
-    """ Test that patches sent to multiple patchwork projects are
-        handled correctly """
-
+    """Test that patches sent to multiple patchwork projects are
+       handled correctly."""
     fixtures = ['default_states']
     test_content = 'Test Comment'
     patch_filename = '0001-add-line.patch'
@@ -305,10 +295,8 @@ class MultipleProjectPatchTest(TestCase):
 
 
 class MultipleProjectPatchCommentTest(MultipleProjectPatchTest):
-
     """Test that followups to multiple-project patches end up on the
        correct patch."""
-
     comment_msgid = '<2 at example.com>'
     comment_content = 'test comment'
 
@@ -333,11 +321,10 @@ class MultipleProjectPatchCommentTest(MultipleProjectPatchTest):
 
 
 class ListIdHeaderTest(TestCase):
-
     """Test that we parse List-Id headers from mails correctly."""
-
     def setUp(self):
-        self.project = Project(linkname='test-project-1', name='Project 1',
+        self.project = Project(linkname='test-project-1',
+                               name='Project 1',
                                listid='1.example.com',
                                listemail='1 at example.com')
         self.project.save()
@@ -366,8 +353,8 @@ class ListIdHeaderTest(TestCase):
         self.assertEqual(project, None)
 
     def testShortListId(self):
-        """ Some mailing lists have List-Id headers in short formats, where it
-            is only the list ID itself (without enclosing angle-brackets). """
+        """Some mailing lists have List-Id headers in short formats, where it
+           is only the list ID itself (without enclosing angle-brackets). """
         email = MIMEText('')
         email['List-Id'] = self.project.listid
         project = find_project_by_header(email)
@@ -384,7 +371,6 @@ class ListIdHeaderTest(TestCase):
 
 
 class MBoxPatchTest(PatchTest):
-
     def setUp(self):
         self.mail = read_mail(self.mail_file, project=self.project)
 
@@ -393,11 +379,11 @@ class GitPullTest(MBoxPatchTest):
     mail_file = '0001-git-pull-request.mbox'
 
     def testGitPullRequest(self):
-        patch, comment, _ = find_content(self.project, self.mail)
-        self.assertTrue(patch is not None)
-        self.assertTrue(patch.pull_url is not None)
-        self.assertTrue(patch.diff is None)
-        self.assertTrue(patch.content is not None)
+        diff, message = find_content(self.project, self.mail)
+        pull_url = find_pull_request(message)
+        self.assertTrue(diff is None)
+        self.assertTrue(message is not None)
+        self.assertTrue(pull_url is not None)
 
 
 class GitPullWrappedTest(GitPullTest):
@@ -408,16 +394,15 @@ class GitPullWithDiffTest(MBoxPatchTest):
     mail_file = '0003-git-pull-request-with-diff.mbox'
 
     def testGitPullWithDiff(self):
-        patch, _, _ = find_content(self.project, self.mail)
-        self.assertTrue(patch is not None)
+        diff, message = find_content(self.project, self.mail)
+        pull_url = find_pull_request(message)
         self.assertEqual('git://git.kernel.org/pub/scm/linux/kernel/git/tip/'
                          'linux-2.6-tip.git x86-fixes-for-linus',
-                         patch.pull_url)
+                         pull_url)
         self.assertTrue(
-            patch.diff.startswith(
+            diff.startswith(
                 'diff --git a/arch/x86/include/asm/smp.h'),
-            patch.diff)
-        self.assertTrue(patch.content is not None)
+            diff)
 
 
 class GitPullGitSSHUrlTest(GitPullTest):
@@ -436,65 +421,60 @@ class GitRenameOnlyTest(MBoxPatchTest):
     mail_file = '0008-git-rename.mbox'
 
     def testGitRename(self):
-        patch, _, _ = find_content(self.project, self.mail)
-        self.assertTrue(patch is not None)
-        self.assertEqual(patch.diff.count("\nrename from "), 2)
-        self.assertEqual(patch.diff.count("\nrename to "), 2)
+        diff, _ = find_content(self.project, self.mail)
+        self.assertTrue(diff is not None)
+        self.assertEqual(diff.count("\nrename from "), 2)
+        self.assertEqual(diff.count("\nrename to "), 2)
 
 
 class GitRenameWithDiffTest(MBoxPatchTest):
     mail_file = '0009-git-rename-with-diff.mbox'
 
     def testGitRename(self):
-        patch, _, _ = find_content(self.project, self.mail)
-        self.assertTrue(patch is not None)
-        self.assertTrue(patch.content is not None)
-        self.assertEqual(patch.diff.count("\nrename from "), 2)
-        self.assertEqual(patch.diff.count("\nrename to "), 2)
-        self.assertEqual(patch.diff.count('\n-a\n+b'), 1)
+        diff, message = find_content(self.project, self.mail)
+        self.assertTrue(diff is not None)
+        self.assertTrue(message is not None)
+        self.assertEqual(diff.count("\nrename from "), 2)
+        self.assertEqual(diff.count("\nrename to "), 2)
+        self.assertEqual(diff.count('\n-a\n+b'), 1)
 
 
 class CVSFormatPatchTest(MBoxPatchTest):
     mail_file = '0007-cvs-format-diff.mbox'
 
     def testPatch(self):
-        patch, comment, _ = find_content(self.project, self.mail)
-        self.assertTrue(patch is not None)
-        self.assertTrue(comment is None)
-        self.assertTrue(patch.diff.startswith('Index'))
+        diff, message = find_content(self.project, self.mail)
+        self.assertTrue(diff.startswith('Index'))
 
 
 class CharsetFallbackPatchTest(MBoxPatchTest):
-
-    """ Test mail with and invalid charset name, and check that we can parse
-        with one of the fallback encodings"""
-
+    """Test mail with and invalid charset name, and check that we can parse
+       with one of the fallback encodings."""
     mail_file = '0010-invalid-charset.mbox'
 
     def testPatch(self):
-        patch, comment, _ = find_content(self.project, self.mail)
-        self.assertTrue(comment is None)
-        self.assertTrue(patch.content is not None)
-        self.assertTrue(patch.diff is not None)
+        diff, message = find_content(self.project, self.mail)
+        self.assertTrue(diff is not None)
+        self.assertTrue(message is not None)
 
 
 class NoNewlineAtEndOfFilePatchTest(MBoxPatchTest):
     mail_file = '0011-no-newline-at-end-of-file.mbox'
 
     def testPatch(self):
-        patch, comment, _ = find_content(self.project, self.mail)
-        self.assertTrue(patch is not None)
-        self.assertTrue(comment is None)
-        self.assertTrue(patch.diff.startswith(
+        diff, message = find_content(self.project, self.mail)
+        self.assertTrue(diff is not None)
+        self.assertTrue(message is not None)
+        self.assertTrue(diff.startswith(
             'diff --git a/tools/testing/selftests/powerpc/Makefile'))
         # Confirm the trailing no newline marker doesn't end up in the comment
-        self.assertFalse(
-            patch.content.rstrip().endswith('\ No newline at end of file'))
+        self.assertFalse(message.rstrip().endswith(
+            '\ No newline at end of file'))
         # Confirm it's instead at the bottom of the patch
-        self.assertTrue(
-            patch.diff.rstrip().endswith('\ No newline at end of file'))
+        self.assertTrue(diff.rstrip().endswith(
+            '\ No newline at end of file'))
         # Confirm we got both markers
-        self.assertEqual(2, patch.diff.count('\ No newline at end of file'))
+        self.assertEqual(2, diff.count('\ No newline at end of file'))
 
 
 class DelegateRequestTest(TestCase):
-- 
2.0.0



More information about the Patchwork mailing list