[RFC] Store patch authors in the new Patch.author field

Guilherme Salgado guilherme.salgado at linaro.org
Fri Apr 29 06:36:49 EST 2011


When the first line of the email message that contains a patch starts with
'From:' and contain an email address, we use that address as the author;
otherwise we use the same as the submitter.

Currently this information is not shown anywhere but it could be easily
exposed in the UI and or over XML-RPC. I'd be happy to do that if there are no
objections to this patch.

Signed-off-by: Guilherme Salgado <guilherme.salgado at linaro.org>
---

 apps/patchwork/bin/parsemail.py     |   49 +++++++++++++++++--
 apps/patchwork/models.py            |    4 ++
 apps/patchwork/tests/factory.py     |    7 ++-
 apps/patchwork/tests/patchparser.py |   91 ++++++++++++++++++++++++++++++++---
 4 files changed, 134 insertions(+), 17 deletions(-)

diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py
index bf05218..b4b989c 100755
--- a/apps/patchwork/bin/parsemail.py
+++ b/apps/patchwork/bin/parsemail.py
@@ -108,7 +108,7 @@ def find_project(mail):
         project = find_project_by_list_address(mail)
     return project
 
-def find_author(mail):
+def find_submitter(mail):
 
     from_header = clean_header(mail.get('From'))
     (name, email) = (None, None)
@@ -228,6 +228,40 @@ def find_content(project, mail):
 
     return (patch, comment)
 
+def find_author(comment):
+    """Return a Person entry for the author specified in the given comment.
+
+    The author of a patch is sometimes not the person who's submitting it, and
+    in these cases the first line of the email will specify the author, using
+    the following format:
+
+        From: Somebody <somebody at somewhere.tld>
+
+    When the first line starts with 'From:' and contain an email address, we
+    return a Person entry representing that email address, creating a new one
+    if necessary.  If the first line doesn't start with 'From:' or it doesn't
+    contain an email address, we return None.
+    """
+    if not comment:
+        return None
+    first_line = comment.content.split('\n', 1)[0]
+    if first_line.startswith('From:'):
+        emails = extract_email_addresses(first_line)
+        if len(emails) == 1:
+            email = emails[0]
+            try:
+                person = Person.objects.get(email__iexact=email)
+            except Person.DoesNotExist:
+                name = email.split('@', 1)[0]
+                person = Person(name=name, email=email)
+                person.save()
+            return person
+        elif len(emails) == 0:
+            return None
+        else:
+            raise AssertionError(
+                'This patch has more than one author: %s.' % first_line)
+
 def find_patch_for_comment(project, mail):
     # construct a list of possible reply message ids
     refs = []
@@ -408,16 +442,19 @@ def parse_mail(mail):
 
     msgid = mail.get('Message-Id').strip()
 
-    (author, save_required) = find_author(mail)
+    (submitter, save_required) = find_submitter(mail)
 
     (patch, comment) = find_content(project, mail)
 
     if patch:
         # we delay the saving until we know we have a patch.
         if save_required:
-            author.save()
+            submitter.save()
             save_required = False
-        patch.submitter = author
+        patch.submitter = submitter
+        author = find_author(comment)
+        if author is not None:
+            patch.author = author
         patch.msgid = msgid
         patch.project = project
         patch.state = get_state(mail.get('X-Patchwork-State', '').strip())
@@ -430,12 +467,12 @@ def parse_mail(mail):
 
     if comment:
         if save_required:
-            author.save()
+            submitter.save()
         # looks like the original constructor for Comment takes the pk
         # when the Comment is created. reset it here.
         if patch:
             comment.patch = patch
-        comment.submitter = author
+        comment.submitter = submitter
         comment.msgid = msgid
         try:
             comment.save()
diff --git a/apps/patchwork/models.py b/apps/patchwork/models.py
index 2b5d429..cfead27 100644
--- a/apps/patchwork/models.py
+++ b/apps/patchwork/models.py
@@ -203,6 +203,7 @@ class Patch(models.Model):
     name = models.CharField(max_length=255)
     date = models.DateTimeField(default=datetime.datetime.now)
     submitter = models.ForeignKey(Person)
+    author = models.ForeignKey(Person, related_name='author_id')
     delegate = models.ForeignKey(User, blank = True, null = True)
     state = models.ForeignKey(State)
     archived = models.BooleanField(default = False)
@@ -224,6 +225,9 @@ class Patch(models.Model):
         except:
             self.state = State.objects.get(ordering =  0)
 
+        if self.author_id is None:
+            self.author = self.submitter
+
         if self.hash is None and self.content is not None:
             self.hash = hash_patch(self.content).hexdigest()
 
diff --git a/apps/patchwork/tests/factory.py b/apps/patchwork/tests/factory.py
index 7ca4c23..53184cc 100644
--- a/apps/patchwork/tests/factory.py
+++ b/apps/patchwork/tests/factory.py
@@ -95,13 +95,16 @@ class ObjectFactory(object):
         bundle.save()
         return bundle
 
-    def makePatch(self, project=None, submitter=None, date=None, content=None):
+    def makePatch(self, project=None, submitter=None, author=None, date=None,
+                  content=None):
         if date is None:
             date = datetime.now()
         if project is None:
             project = self.makeProject()
         if submitter is None:
             submitter = self.makePerson()
+        if author is None:
+            author = submitter
         if content is None:
             content = textwrap.dedent("""\
                 --- a/apps/patchwork/bin/parsemail.py
@@ -115,7 +118,7 @@ class ObjectFactory(object):
         msgid = email.utils.make_msgid(idstring=self.getUniqueString())
         patch = Patch(
             project=project, msgid=msgid, name=self.getUniqueString(),
-            submitter=submitter, date=date, content=content,
+            submitter=submitter, date=date, content=content, author=author,
             state=State.objects.get(name='New'))
         patch.save()
         return patch
diff --git a/apps/patchwork/tests/patchparser.py b/apps/patchwork/tests/patchparser.py
index d141412..69adcf6 100644
--- a/apps/patchwork/tests/patchparser.py
+++ b/apps/patchwork/tests/patchparser.py
@@ -17,11 +17,12 @@
 # along with Patchwork; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
+import textwrap
 import unittest
-import os
-from email import message_from_string
+from email import message_from_string, utils
 import settings
 from patchwork.models import Project, Person, Patch, Comment
+from patchwork.tests.factory import factory
 from patchwork.tests.utils import read_patch, read_mail, create_email, defaults
 
 try:
@@ -36,7 +37,7 @@ class PatchTest(unittest.TestCase):
     project = defaults.project
 
 from patchwork.bin.parsemail import (
-    extract_email_addresses, find_content, find_author, find_project,
+    extract_email_addresses, find_content, find_submitter, find_project,
     parse_mail)
 
 class InlinePatchTest(PatchTest):
@@ -143,7 +144,7 @@ class SenderEncodingTest(unittest.TestCase):
                'Subject: test\n\n' + \
                'test'
         self.email = message_from_string(mail)
-        (self.person, new) = find_author(self.email)
+        (self.person, new) = find_submitter(self.email)
         self.person.save()
 
     def tearDown(self):
@@ -209,28 +210,28 @@ class SenderCorrelationTest(unittest.TestCase):
     def setUp(self):
         self.existing_sender_mail = self.mail(self.existing_sender)
         self.non_existing_sender_mail = self.mail(self.non_existing_sender)
-        (self.person, new) = find_author(self.existing_sender_mail)
+        (self.person, new) = find_submitter(self.existing_sender_mail)
         self.person.save()
 
     def testExisingSender(self):
-        (person, new) = find_author(self.existing_sender_mail)
+        (person, new) = find_submitter(self.existing_sender_mail)
         self.assertEqual(new, False)
         self.assertEqual(person.id, self.person.id)
 
     def testNonExisingSender(self):
-        (person, new) = find_author(self.non_existing_sender_mail)
+        (person, new) = find_submitter(self.non_existing_sender_mail)
         self.assertEqual(new, True)
         self.assertEqual(person.id, None)
 
     def testExistingDifferentFormat(self):
         mail = self.mail('existing at example.com')
-        (person, new) = find_author(mail)
+        (person, new) = find_submitter(mail)
         self.assertEqual(new, False)
         self.assertEqual(person.id, self.person.id)
 
     def testExistingDifferentCase(self):
         mail = self.mail(self.existing_sender.upper())
-        (person, new) = find_author(mail)
+        (person, new) = find_submitter(mail)
         self.assertEqual(new, False)
         self.assertEqual(person.id, self.person.id)
 
@@ -301,6 +302,78 @@ class MultipleProjectPatchCommentTest(MultipleProjectPatchTest):
             # and the one we parsed in setUp()
             self.assertEquals(Comment.objects.filter(patch = patch).count(), 2)
 
+
+class TestAuthorParsing(unittest.TestCase):
+
+    submitter_email = 'barfoorino at example.com'
+    submitter = 'Bar Foorino <%s>' % submitter_email
+
+    def test_patch_author_parsing(self):
+        # If the first line in the body of an email starts with 'From:' and
+        # contains an email address, we'll use that address to lookup the
+        # patch author, creating a new Person to represent it if necessary.
+        content = textwrap.dedent("""
+            From: Foo Barino <author at example.com>
+
+            This patch does this and that.
+
+            ---
+            diff --git a/omap_device.h b/omap_device.h
+            --- a/omap_device.h
+            +++ b/omap_device.h
+            @@ -50,1 +50,2 @@ extern struct device omap_device_parent;
+              * @hwmods: (one .. many per omap_device)
+            + * @set_rate: fn ptr to change the operating rate.
+            """)
+        email, msgid = self.make_email(content)
+        parse_mail(email)
+        patch = Patch.objects.get(msgid=msgid)
+        self.assertEqual(self.submitter_email, patch.submitter.email)
+        self.assertEqual('author at example.com', patch.author.email)
+
+    def test_patch_author_parsing_from_email_without_patch(self):
+        content = textwrap.dedent("""
+            From: Foo Barino <foo at example.com>
+
+            This patch does this and that but I forgot to include it here.
+            """)
+        email, msgid = self.make_email(content)
+        parse_mail(email)
+        self.assertRaises(Patch.DoesNotExist, Patch.objects.get, msgid=msgid)
+
+    def test_patch_author_parsing_when_author_line_is_not_the_first(self):
+        # In this case the 'From:' line is not the first one in the email, so
+        # it's discarded and we use the patch submitter as its author.
+        content = textwrap.dedent("""
+            Hey, notice that I'm submitting this on behalf of Foo Barino!
+
+            From: Foo Barino <author at example.com>
+
+            ---
+            diff --git a/omap_device.h b/omap_device.h
+            --- a/omap_device.h
+            +++ b/omap_device.h
+            @@ -50,1 +50,2 @@ extern struct device omap_device_parent;
+              * @hwmods: (one .. many per omap_device)
+            + * @set_rate: fn ptr to change the operating rate.
+            """)
+        email, msgid = self.make_email(content)
+        parse_mail(email)
+        patch = Patch.objects.get(msgid=msgid)
+        self.assertEqual(self.submitter_email, patch.submitter.email)
+        self.assertEqual(self.submitter_email, patch.author.email)
+
+    def make_email(self, content):
+        project = factory.makeProject()
+        msgid = utils.make_msgid(idstring=factory.getUniqueString())
+        email = MIMEText(content)
+        email['From'] = self.submitter
+        email['Subject'] = 'Whatever'
+        email['Message-Id'] = msgid
+        email['List-ID'] = '<' + project.listid + '>'
+        return email, msgid
+
+
 class EmailProjectGuessing(unittest.TestCase):
     """Projects are guessed based on List-Id headers or recipient addresses"""
     def setUp(self):
diff --git a/lib/sql/migration/009-patch-author.sql b/lib/sql/migration/009-patch-author.sql
new file mode 100644
index 0000000..ae67244
--- /dev/null
+++ b/lib/sql/migration/009-patch-author.sql
@@ -0,0 +1,5 @@
+BEGIN;
+ALTER TABLE patchwork_patch ADD column "author_id" integer REFERENCES "patchwork_person" ("id") DEFERRABLE INITIALLY DEFERRED;
+UPDATE patchwork_patch SET author_id = submitter_id;
+ALTER TABLE patchwork_patch ALTER COLUMN author_id SET NOT NULL;
+COMMIT;



More information about the Patchwork mailing list