[PATCH v2 6/9] parser: close a TOCTTOU bug on Person creation

Daniel Axtens dja at axtens.net
Sun Feb 25 01:50:17 AEDT 2018


find_author looks up a person by email, and if they do not exist,
creates a Person model, which may be saved later if the message
contains something valuable.

Multiple simultaneous processes can race here: both can do the SELECT,
find there is no Person, and create the model. One will succeed in
saving, the other will get an IntegrityError.

Reduce the window by making find_author into get_or_create_author, and
plumb that through. (Remove a test that specifically required find_author
to *not* create).

More importantly, cover the case where we lose the race, by using
get_or_create which handles the race case, catching the IntegrityError
internally and fetching the winning Person model.

Reviewed-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
Signed-off-by: Daniel Axtens <dja at axtens.net>

---

v2: Remove extraneous try/except IntegrityError, thanks Andrew
---
 patchwork/parser.py            | 36 ++++++++++++++++++++----------------
 patchwork/tests/test_parser.py | 37 +++++++++++++++----------------------
 2 files changed, 35 insertions(+), 38 deletions(-)

diff --git a/patchwork/parser.py b/patchwork/parser.py
index cbf88fe4e464..885c20b3df4f 100644
--- a/patchwork/parser.py
+++ b/patchwork/parser.py
@@ -29,6 +29,7 @@ import logging
 import re
 
 from django.contrib.auth.models import User
+from django.db import IntegrityError
 from django.utils import six
 
 from patchwork.models import Comment
@@ -241,7 +242,7 @@ def _find_series_by_references(project, mail):
             continue
 
 
-def _find_series_by_markers(project, mail):
+def _find_series_by_markers(project, mail, author):
     """Find a patch's series using series markers and sender.
 
     Identify suitable series for a patch using a combination of the
@@ -262,7 +263,6 @@ def _find_series_by_markers(project, mail):
     still won't help us if someone spams the mailing list with
     duplicate series but that's a tricky situation for anyone to parse.
     """
-    author = find_author(mail)
 
     subject = mail.get('Subject')
     name, prefixes = clean_subject(subject, [project.linkname])
@@ -282,7 +282,7 @@ def _find_series_by_markers(project, mail):
         return
 
 
-def find_series(project, mail):
+def find_series(project, mail, author):
     """Find a series, if any, for a given patch.
 
     Args:
@@ -297,10 +297,10 @@ def find_series(project, mail):
     if series:
         return series
 
-    return _find_series_by_markers(project, mail)
+    return _find_series_by_markers(project, mail, author)
 
 
-def find_author(mail):
+def get_or_create_author(mail):
     from_header = clean_header(mail.get('From'))
 
     if not from_header:
@@ -341,12 +341,17 @@ def find_author(mail):
     if name is not None:
         name = name.strip()[:255]
 
-    try:
-        person = Person.objects.get(email__iexact=email)
-        if name:  # use the latest provided name
-            person.name = name
-    except Person.DoesNotExist:
-        person = Person(name=name, email=email)
+    # this correctly handles the case where we lose the race to create
+    # the person and another process beats us to it. (If the record
+    # does not exist, g_o_c invokes _create_object_from_params which
+    # catches the IntegrityError and repeats the SELECT.)
+    person = Person.objects.get_or_create(email__iexact=email,
+                                          defaults={'name': name,
+                                                    'email': email})[0]
+
+    if name:  # use the latest provided name
+        person.name = name
+        person.save()
 
     return person
 
@@ -947,7 +952,6 @@ def parse_mail(mail, list_id=None):
         raise ValueError("Broken 'Message-Id' header")
     msgid = msgid[:255]
 
-    author = find_author(mail)
     subject = mail.get('Subject')
     name, prefixes = clean_subject(subject, [project.linkname])
     is_comment = subject_check(subject)
@@ -973,7 +977,7 @@ def parse_mail(mail, list_id=None):
 
     if not is_comment and (diff or pull_url):  # patches or pull requests
         # we delay the saving until we know we have a patch.
-        author.save()
+        author = get_or_create_author(mail)
 
         delegate = find_delegate_by_header(mail)
         if not delegate and diff:
@@ -984,7 +988,7 @@ def parse_mail(mail, list_id=None):
         # series to match against.
         series = None
         if n:
-            series = find_series(project, mail)
+            series = find_series(project, mail, author)
         else:
             x = n = 1
 
@@ -1061,7 +1065,7 @@ def parse_mail(mail, list_id=None):
                 is_cover_letter = True
 
         if is_cover_letter:
-            author.save()
+            author = get_or_create_author(mail)
 
             # we don't use 'find_series' here as a cover letter will
             # always be the first item in a thread, thus the references
@@ -1109,7 +1113,7 @@ def parse_mail(mail, list_id=None):
     if not submission:
         return
 
-    author.save()
+    author = get_or_create_author(mail)
 
     comment = Comment(
         submission=submission,
diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py
index 68bcb937b273..6cfe7a484443 100644
--- a/patchwork/tests/test_parser.py
+++ b/patchwork/tests/test_parser.py
@@ -34,7 +34,7 @@ from patchwork.models import Patch
 from patchwork.models import Person
 from patchwork.models import State
 from patchwork.parser import clean_subject
-from patchwork.parser import find_author
+from patchwork.parser import get_or_create_author
 from patchwork.parser import find_patch_content as find_content
 from patchwork.parser import find_project_by_header
 from patchwork.parser import find_series
@@ -225,7 +225,7 @@ class SenderEncodingTest(TestCase):
 
     def _test_encoding(self, from_header, sender_name, sender_email):
         email = self._create_email(from_header)
-        person = find_author(email)
+        person = get_or_create_author(email)
         person.save()
 
         # ensure it was parsed correctly
@@ -241,7 +241,7 @@ class SenderEncodingTest(TestCase):
     def test_empty(self):
         email = self._create_email('')
         with self.assertRaises(ValueError):
-            find_author(email)
+            get_or_create_author(email)
 
     def test_ascii_encoding(self):
         from_header = 'example user <user at example.com>'
@@ -269,7 +269,7 @@ class SenderEncodingTest(TestCase):
 
 
 class SenderCorrelationTest(TestCase):
-    """Validate correct behavior of the find_author case.
+    """Validate correct behavior of the get_or_create_author case.
 
     Relies of checking the internal state of a Django model object.
 
@@ -284,25 +284,16 @@ class SenderCorrelationTest(TestCase):
                'test\n'
         return message_from_string(mail)
 
-    def test_non_existing_sender(self):
-        sender = 'Non-existing Sender <nonexisting at example.com>'
-        mail = self._create_email(sender)
-
-        # don't create the person - attempt to find immediately
-        person = find_author(mail)
-        self.assertEqual(person._state.adding, True)
-        self.assertEqual(person.id, None)
-
     def test_existing_sender(self):
         sender = 'Existing Sender <existing at example.com>'
         mail = self._create_email(sender)
 
         # create the person first
-        person_a = find_author(mail)
+        person_a = get_or_create_author(mail)
         person_a.save()
 
         # then attempt to parse email with the same 'From' line
-        person_b = find_author(mail)
+        person_b = get_or_create_author(mail)
         self.assertEqual(person_b._state.adding, False)
         self.assertEqual(person_b.id, person_a.id)
 
@@ -311,12 +302,12 @@ class SenderCorrelationTest(TestCase):
         mail = self._create_email(sender)
 
         # create the person first
-        person_a = find_author(mail)
+        person_a = get_or_create_author(mail)
         person_a.save()
 
         # then attempt to parse email with a new 'From' line
         mail = self._create_email('existing at example.com')
-        person_b = find_author(mail)
+        person_b = get_or_create_author(mail)
         self.assertEqual(person_b._state.adding, False)
         self.assertEqual(person_b.id, person_a.id)
 
@@ -324,11 +315,11 @@ class SenderCorrelationTest(TestCase):
         sender = 'Existing Sender <existing at example.com>'
         mail = self._create_email(sender)
 
-        person_a = find_author(mail)
+        person_a = get_or_create_author(mail)
         person_a.save()
 
         mail = self._create_email(sender.upper())
-        person_b = find_author(mail)
+        person_b = get_or_create_author(mail)
         self.assertEqual(person_b._state.adding, False)
         self.assertEqual(person_b.id, person_a.id)
 
@@ -361,7 +352,8 @@ class SeriesCorrelationTest(TestCase):
         email = self._create_email(msgid)
         project = create_project()
 
-        self.assertIsNone(find_series(project, email))
+        self.assertIsNone(find_series(project, email,
+                                      get_or_create_author(email)))
 
     def test_first_reply(self):
         msgid_a = make_msgid()
@@ -371,7 +363,8 @@ class SeriesCorrelationTest(TestCase):
         # assume msgid_a was already handled
         ref = create_series_reference(msgid=msgid_a)
 
-        series = find_series(ref.series.project, email)
+        series = find_series(ref.series.project, email,
+                             get_or_create_author(email))
         self.assertEqual(series, ref.series)
 
     def test_nested_series(self):
@@ -395,7 +388,7 @@ class SeriesCorrelationTest(TestCase):
         # ...and the "first patch" of this new series
         msgid = make_msgid()
         email = self._create_email(msgid, msgids)
-        series = find_series(project, email)
+        series = find_series(project, email, get_or_create_author(email))
 
         # this should link to the second series - not the first
         self.assertEqual(len(msgids), 4 + 1)  # old series + new cover
-- 
2.14.1



More information about the Patchwork mailing list