[PATCH V2] Prefer patch subject/author when they're specified in the body of the email

Guilherme Salgado guilherme.salgado at linaro.org
Fri May 13 17:39:45 EST 2011


To be consistent with git-am, use the subject (extracted from the first line,
when it begins with 'Subject:') and the author (extracted from the first or
second line, when it begins with 'From:') and use that as Patch.name and
Patch.author respectively.  For more details on the exact format that is
supported, check git-am's manpage.

Signed-off-by: Guilherme Salgado <guilherme.salgado at linaro.org>
---
 apps/patchwork/bin/parsemail.py        |   92 ++++++++++++++++++++++++------
 apps/patchwork/models.py               |    4 +
 apps/patchwork/tests/factory.py        |    8 ++-
 apps/patchwork/tests/patchparser.py    |   99 +++++++++++++++++++++++++++++---
 lib/sql/migration/009-patch-author.sql |    5 ++
 5 files changed, 178 insertions(+), 30 deletions(-)
 create mode 100644 lib/sql/migration/009-patch-author.sql

diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py
index bf05218..65ede37 100755
--- a/apps/patchwork/bin/parsemail.py
+++ b/apps/patchwork/bin/parsemail.py
@@ -108,11 +108,7 @@ def find_project(mail):
         project = find_project_by_list_address(mail)
     return project
 
-def find_author(mail):
-
-    from_header = clean_header(mail.get('From'))
-    (name, email) = (None, None)
-
+def parse_name_and_email(text):
     # tuple of (regex, fn)
     #  - where fn returns a (name, email) tuple from the match groups resulting
     #    from re.match().groups()
@@ -128,20 +124,26 @@ def find_author(mail):
     ]
 
     for regex, fn in from_res:
-        match = regex.match(from_header)
+        match = regex.match(text)
         if match:
-            (name, email) = fn(match.groups())
-            break
+            name, email = fn(match.groups())
+            if name is not None:
+                name = name.strip()
+            if email is not None:
+                email = email.strip()
+            return name, email
+    return None, None
+
+
+def find_submitter(mail):
+
+    from_header = clean_header(mail.get('From'))
+    name, email = parse_name_and_email(from_header)
 
     if email is None:
         raise Exception("Could not parse From: header")
 
-    email = email.strip()
-    if name is not None:
-        name = name.strip()
-
     new_person = False
-
     try:
         person = Person.objects.get(email__iexact = email)
     except Person.DoesNotExist:
@@ -228,6 +230,54 @@ def find_content(project, mail):
 
     return (patch, comment)
 
+def find_patch_name(comment):
+    if not comment:
+        return None
+    first_line = comment.content.split('\n', 1)[0]
+    if first_line.startswith('Subject:'):
+        return first_line.replace('Subject:', '').strip()
+    return None
+
+def find_author(comment):
+    """Return a Person entry for the author specified in the given comment.
+
+    The author of a patch is sometimes not the person who's submitting it, and
+    in these cases it is specified in the body of the email, either in the
+    first or the second line, using the following format:
+
+        From: Somebody <somebody at somewhere.tld>
+
+    This is the format understood by 'git-am', which is described in detail in
+    its manpage.
+    """
+    if not comment:
+        return None
+    lines = comment.content.split('\n')
+    if len(lines) == 0:
+        return None
+    # Append an extra line at the end so that our code can assume there will
+    # always be at least two lines, simplifying things significantly.
+    lines.append('')
+    first_line, second_line = lines[:2]
+    if first_line.startswith('From:'):
+        author_line = first_line
+    elif (first_line.startswith('Subject:') and
+          second_line.startswith('From:')):
+        author_line = second_line
+    else:
+        return None
+
+    name, email = parse_name_and_email(author_line.replace('From:', ''))
+    if email is None:
+        return None
+
+    try:
+        person = Person.objects.get(email__iexact=email)
+    except Person.DoesNotExist:
+        person = Person(name=name, email=email)
+        person.save()
+    return person
+
 def find_patch_for_comment(project, mail):
     # construct a list of possible reply message ids
     refs = []
@@ -408,16 +458,22 @@ def parse_mail(mail):
 
     msgid = mail.get('Message-Id').strip()
 
-    (author, save_required) = find_author(mail)
+    (submitter, save_required) = find_submitter(mail)
 
     (patch, comment) = find_content(project, mail)
 
     if patch:
         # we delay the saving until we know we have a patch.
         if save_required:
-            author.save()
+            submitter.save()
             save_required = False
-        patch.submitter = author
+        patch.submitter = submitter
+        author = find_author(comment)
+        if author is not None:
+            patch.author = author
+        name = find_patch_name(comment)
+        if name is not None:
+            patch.name = name
         patch.msgid = msgid
         patch.project = project
         patch.state = get_state(mail.get('X-Patchwork-State', '').strip())
@@ -430,12 +486,12 @@ def parse_mail(mail):
 
     if comment:
         if save_required:
-            author.save()
+            submitter.save()
         # looks like the original constructor for Comment takes the pk
         # when the Comment is created. reset it here.
         if patch:
             comment.patch = patch
-        comment.submitter = author
+        comment.submitter = submitter
         comment.msgid = msgid
         try:
             comment.save()
diff --git a/apps/patchwork/models.py b/apps/patchwork/models.py
index 2b5d429..cfead27 100644
--- a/apps/patchwork/models.py
+++ b/apps/patchwork/models.py
@@ -203,6 +203,7 @@ class Patch(models.Model):
     name = models.CharField(max_length=255)
     date = models.DateTimeField(default=datetime.datetime.now)
     submitter = models.ForeignKey(Person)
+    author = models.ForeignKey(Person, related_name='author_id')
     delegate = models.ForeignKey(User, blank = True, null = True)
     state = models.ForeignKey(State)
     archived = models.BooleanField(default = False)
@@ -224,6 +225,9 @@ class Patch(models.Model):
         except:
             self.state = State.objects.get(ordering =  0)
 
+        if self.author_id is None:
+            self.author = self.submitter
+
         if self.hash is None and self.content is not None:
             self.hash = hash_patch(self.content).hexdigest()
 
diff --git a/apps/patchwork/tests/factory.py b/apps/patchwork/tests/factory.py
index 1eed452..1ece6ad 100644
--- a/apps/patchwork/tests/factory.py
+++ b/apps/patchwork/tests/factory.py
@@ -93,14 +93,16 @@ class ObjectFactory(object):
         bundle.save()
         return bundle
 
-    def makePatch(self, project = None, submitter = None, date = None,
-                  content = None):
+    def makePatch(self, project = None, submitter = None, author = None,
+                  date = None, content = None):
         if date is None:
             date = datetime.now()
         if project is None:
             project = self.makeProject()
         if submitter is None:
             submitter = self.makePerson()
+        if author is None:
+            author = submitter
         if content is None:
             content = textwrap.dedent("""\
                 --- a/apps/patchwork/bin/parsemail.py
@@ -112,7 +114,7 @@ class ObjectFactory(object):
                 -        mail_headers(mail)
                 """)
         msgid = email.utils.make_msgid(idstring = self.getUniqueString())
-        patch = Patch(project = project, msgid = msgid,
+        patch = Patch(project = project, msgid = msgid, author = author,
                       name = self.getUniqueString(), submitter = submitter,
                       date = date, content = content)
         patch.save()
diff --git a/apps/patchwork/tests/patchparser.py b/apps/patchwork/tests/patchparser.py
index d141412..59287b7 100644
--- a/apps/patchwork/tests/patchparser.py
+++ b/apps/patchwork/tests/patchparser.py
@@ -17,11 +17,12 @@
 # along with Patchwork; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
+import textwrap
 import unittest
-import os
-from email import message_from_string
+from email import message_from_string, utils
 import settings
 from patchwork.models import Project, Person, Patch, Comment
+from patchwork.tests.factory import factory
 from patchwork.tests.utils import read_patch, read_mail, create_email, defaults
 
 try:
@@ -36,7 +37,7 @@ class PatchTest(unittest.TestCase):
     project = defaults.project
 
 from patchwork.bin.parsemail import (
-    extract_email_addresses, find_content, find_author, find_project,
+    extract_email_addresses, find_content, find_submitter, find_project,
     parse_mail)
 
 class InlinePatchTest(PatchTest):
@@ -143,7 +144,7 @@ class SenderEncodingTest(unittest.TestCase):
                'Subject: test\n\n' + \
                'test'
         self.email = message_from_string(mail)
-        (self.person, new) = find_author(self.email)
+        (self.person, new) = find_submitter(self.email)
         self.person.save()
 
     def tearDown(self):
@@ -209,28 +210,28 @@ class SenderCorrelationTest(unittest.TestCase):
     def setUp(self):
         self.existing_sender_mail = self.mail(self.existing_sender)
         self.non_existing_sender_mail = self.mail(self.non_existing_sender)
-        (self.person, new) = find_author(self.existing_sender_mail)
+        (self.person, new) = find_submitter(self.existing_sender_mail)
         self.person.save()
 
     def testExisingSender(self):
-        (person, new) = find_author(self.existing_sender_mail)
+        (person, new) = find_submitter(self.existing_sender_mail)
         self.assertEqual(new, False)
         self.assertEqual(person.id, self.person.id)
 
     def testNonExisingSender(self):
-        (person, new) = find_author(self.non_existing_sender_mail)
+        (person, new) = find_submitter(self.non_existing_sender_mail)
         self.assertEqual(new, True)
         self.assertEqual(person.id, None)
 
     def testExistingDifferentFormat(self):
         mail = self.mail('existing at example.com')
-        (person, new) = find_author(mail)
+        (person, new) = find_submitter(mail)
         self.assertEqual(new, False)
         self.assertEqual(person.id, self.person.id)
 
     def testExistingDifferentCase(self):
         mail = self.mail(self.existing_sender.upper())
-        (person, new) = find_author(mail)
+        (person, new) = find_submitter(mail)
         self.assertEqual(new, False)
         self.assertEqual(person.id, self.person.id)
 
@@ -301,6 +302,86 @@ class MultipleProjectPatchCommentTest(MultipleProjectPatchTest):
             # and the one we parsed in setUp()
             self.assertEquals(Comment.objects.filter(patch = patch).count(), 2)
 
+
+class TestAuthorParsing(unittest.TestCase):
+
+    submitter_email = 'barfoorino at example.com'
+    submitter = 'Bar Foorino <%s>' % submitter_email
+    diff = textwrap.dedent("""
+        ---
+        diff --git a/omap_device.h b/omap_device.h
+        --- a/omap_device.h
+        +++ b/omap_device.h
+        @@ -50,1 +50,2 @@ extern struct device omap_device_parent;
+          * @hwmods: (one .. many per omap_device)
+        + * @set_rate: fn ptr to change the operating rate.
+        """)
+
+    def test_patch_author_parsing(self):
+        # If the first line in the body of an email starts with 'From:' and
+        # contains an email address, we'll use that address to lookup the
+        # patch author, creating a new Person to represent it if necessary.
+        content = textwrap.dedent("""
+            From: Foo Barino <author at example.com>
+
+            This patch does this and that.
+            """)
+        msgid = self.make_email_and_parse_it(content + self.diff)
+        patch = Patch.objects.get(msgid=msgid)
+        self.assertEqual(self.submitter_email, patch.submitter.email)
+        self.assertEqual('author at example.com', patch.author.email)
+        self.assertEqual('Foo Barino', patch.author.name)
+
+    def test_patch_author_and_name_parsing(self):
+        # If the first line in the body of an email starts with 'Subject:',
+        # we'll use that as the patch name and look for an author on the next
+        # line (identified by a 'From:' string at the start).
+        content = textwrap.dedent("""
+            Subject: A patch for this and that
+            From: Foo Barino <author at example.com>
+
+            This patch does this and that.
+            """)
+        msgid = self.make_email_and_parse_it(content + self.diff)
+        patch = Patch.objects.get(msgid=msgid)
+        self.assertEqual(self.submitter_email, patch.submitter.email)
+        self.assertEqual('A patch for this and that', patch.name)
+        self.assertEqual('author at example.com', patch.author.email)
+
+    def test_patch_author_parsing_from_email_without_patch(self):
+        content = textwrap.dedent("""
+            From: Foo Barino <foo at example.com>
+
+            This patch does this and that but I forgot to include it here.
+            """)
+        msgid = self.make_email_and_parse_it(content)
+        self.assertRaises(Patch.DoesNotExist, Patch.objects.get, msgid=msgid)
+
+    def test_patch_author_parsing_when_author_line_is_not_the_first(self):
+        # In this case the 'From:' line is not the first one in the email, so
+        # it's discarded and we use the patch submitter as its author.
+        content = textwrap.dedent("""
+            Hey, notice that I'm submitting this on behalf of Foo Barino!
+
+            From: Foo Barino <author at example.com>
+            """)
+        msgid = self.make_email_and_parse_it(content + self.diff)
+        patch = Patch.objects.get(msgid=msgid)
+        self.assertEqual(self.submitter_email, patch.submitter.email)
+        self.assertEqual(self.submitter_email, patch.author.email)
+
+    def make_email_and_parse_it(self, content):
+        project = factory.makeProject()
+        msgid = utils.make_msgid(idstring=factory.getUniqueString())
+        email = MIMEText(content)
+        email['From'] = self.submitter
+        email['Subject'] = 'Whatever'
+        email['Message-Id'] = msgid
+        email['List-ID'] = '<' + project.listid + '>'
+        parse_mail(email)
+        return msgid
+
+
 class EmailProjectGuessing(unittest.TestCase):
     """Projects are guessed based on List-Id headers or recipient addresses"""
     def setUp(self):
diff --git a/lib/sql/migration/009-patch-author.sql b/lib/sql/migration/009-patch-author.sql
new file mode 100644
index 0000000..ae67244
--- /dev/null
+++ b/lib/sql/migration/009-patch-author.sql
@@ -0,0 +1,5 @@
+BEGIN;
+ALTER TABLE patchwork_patch ADD column "author_id" integer REFERENCES "patchwork_person" ("id") DEFERRABLE INITIALLY DEFERRED;
+UPDATE patchwork_patch SET author_id = submitter_id;
+ALTER TABLE patchwork_patch ALTER COLUMN author_id SET NOT NULL;
+COMMIT;



More information about the Patchwork mailing list