[PATCH v2 6/9] parser: close a TOCTTOU bug on Person creation
Daniel Axtens
dja at axtens.net
Sun Feb 25 01:50:17 AEDT 2018
find_author looks up a person by email, and if they do not exist,
creates a Person model, which may be saved later if the message
contains something valuable.
Multiple simultaneous processes can race here: both can do the SELECT,
find there is no Person, and create the model. One will succeed in
saving, the other will get an IntegrityError.
Reduce the window by making find_author into get_or_create_author, and
plumb that through. (Remove a test that specifically required find_author
to *not* create).
More importantly, cover the case where we lose the race, by using
get_or_create which handles the race case, catching the IntegrityError
internally and fetching the winning Person model.
Reviewed-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
Signed-off-by: Daniel Axtens <dja at axtens.net>
---
v2: Remove extraneous try/except IntegrityError, thanks Andrew
---
patchwork/parser.py | 36 ++++++++++++++++++++----------------
patchwork/tests/test_parser.py | 37 +++++++++++++++----------------------
2 files changed, 35 insertions(+), 38 deletions(-)
diff --git a/patchwork/parser.py b/patchwork/parser.py
index cbf88fe4e464..885c20b3df4f 100644
--- a/patchwork/parser.py
+++ b/patchwork/parser.py
@@ -29,6 +29,7 @@ import logging
import re
from django.contrib.auth.models import User
+from django.db import IntegrityError
from django.utils import six
from patchwork.models import Comment
@@ -241,7 +242,7 @@ def _find_series_by_references(project, mail):
continue
-def _find_series_by_markers(project, mail):
+def _find_series_by_markers(project, mail, author):
"""Find a patch's series using series markers and sender.
Identify suitable series for a patch using a combination of the
@@ -262,7 +263,6 @@ def _find_series_by_markers(project, mail):
still won't help us if someone spams the mailing list with
duplicate series but that's a tricky situation for anyone to parse.
"""
- author = find_author(mail)
subject = mail.get('Subject')
name, prefixes = clean_subject(subject, [project.linkname])
@@ -282,7 +282,7 @@ def _find_series_by_markers(project, mail):
return
-def find_series(project, mail):
+def find_series(project, mail, author):
"""Find a series, if any, for a given patch.
Args:
@@ -297,10 +297,10 @@ def find_series(project, mail):
if series:
return series
- return _find_series_by_markers(project, mail)
+ return _find_series_by_markers(project, mail, author)
-def find_author(mail):
+def get_or_create_author(mail):
from_header = clean_header(mail.get('From'))
if not from_header:
@@ -341,12 +341,17 @@ def find_author(mail):
if name is not None:
name = name.strip()[:255]
- try:
- person = Person.objects.get(email__iexact=email)
- if name: # use the latest provided name
- person.name = name
- except Person.DoesNotExist:
- person = Person(name=name, email=email)
+ # this correctly handles the case where we lose the race to create
+ # the person and another process beats us to it. (If the record
+ # does not exist, g_o_c invokes _create_object_from_params which
+ # catches the IntegrityError and repeats the SELECT.)
+ person = Person.objects.get_or_create(email__iexact=email,
+ defaults={'name': name,
+ 'email': email})[0]
+
+ if name: # use the latest provided name
+ person.name = name
+ person.save()
return person
@@ -947,7 +952,6 @@ def parse_mail(mail, list_id=None):
raise ValueError("Broken 'Message-Id' header")
msgid = msgid[:255]
- author = find_author(mail)
subject = mail.get('Subject')
name, prefixes = clean_subject(subject, [project.linkname])
is_comment = subject_check(subject)
@@ -973,7 +977,7 @@ def parse_mail(mail, list_id=None):
if not is_comment and (diff or pull_url): # patches or pull requests
# we delay the saving until we know we have a patch.
- author.save()
+ author = get_or_create_author(mail)
delegate = find_delegate_by_header(mail)
if not delegate and diff:
@@ -984,7 +988,7 @@ def parse_mail(mail, list_id=None):
# series to match against.
series = None
if n:
- series = find_series(project, mail)
+ series = find_series(project, mail, author)
else:
x = n = 1
@@ -1061,7 +1065,7 @@ def parse_mail(mail, list_id=None):
is_cover_letter = True
if is_cover_letter:
- author.save()
+ author = get_or_create_author(mail)
# we don't use 'find_series' here as a cover letter will
# always be the first item in a thread, thus the references
@@ -1109,7 +1113,7 @@ def parse_mail(mail, list_id=None):
if not submission:
return
- author.save()
+ author = get_or_create_author(mail)
comment = Comment(
submission=submission,
diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py
index 68bcb937b273..6cfe7a484443 100644
--- a/patchwork/tests/test_parser.py
+++ b/patchwork/tests/test_parser.py
@@ -34,7 +34,7 @@ from patchwork.models import Patch
from patchwork.models import Person
from patchwork.models import State
from patchwork.parser import clean_subject
-from patchwork.parser import find_author
+from patchwork.parser import get_or_create_author
from patchwork.parser import find_patch_content as find_content
from patchwork.parser import find_project_by_header
from patchwork.parser import find_series
@@ -225,7 +225,7 @@ class SenderEncodingTest(TestCase):
def _test_encoding(self, from_header, sender_name, sender_email):
email = self._create_email(from_header)
- person = find_author(email)
+ person = get_or_create_author(email)
person.save()
# ensure it was parsed correctly
@@ -241,7 +241,7 @@ class SenderEncodingTest(TestCase):
def test_empty(self):
email = self._create_email('')
with self.assertRaises(ValueError):
- find_author(email)
+ get_or_create_author(email)
def test_ascii_encoding(self):
from_header = 'example user <user at example.com>'
@@ -269,7 +269,7 @@ class SenderEncodingTest(TestCase):
class SenderCorrelationTest(TestCase):
- """Validate correct behavior of the find_author case.
+ """Validate correct behavior of the get_or_create_author case.
Relies of checking the internal state of a Django model object.
@@ -284,25 +284,16 @@ class SenderCorrelationTest(TestCase):
'test\n'
return message_from_string(mail)
- def test_non_existing_sender(self):
- sender = 'Non-existing Sender <nonexisting at example.com>'
- mail = self._create_email(sender)
-
- # don't create the person - attempt to find immediately
- person = find_author(mail)
- self.assertEqual(person._state.adding, True)
- self.assertEqual(person.id, None)
-
def test_existing_sender(self):
sender = 'Existing Sender <existing at example.com>'
mail = self._create_email(sender)
# create the person first
- person_a = find_author(mail)
+ person_a = get_or_create_author(mail)
person_a.save()
# then attempt to parse email with the same 'From' line
- person_b = find_author(mail)
+ person_b = get_or_create_author(mail)
self.assertEqual(person_b._state.adding, False)
self.assertEqual(person_b.id, person_a.id)
@@ -311,12 +302,12 @@ class SenderCorrelationTest(TestCase):
mail = self._create_email(sender)
# create the person first
- person_a = find_author(mail)
+ person_a = get_or_create_author(mail)
person_a.save()
# then attempt to parse email with a new 'From' line
mail = self._create_email('existing at example.com')
- person_b = find_author(mail)
+ person_b = get_or_create_author(mail)
self.assertEqual(person_b._state.adding, False)
self.assertEqual(person_b.id, person_a.id)
@@ -324,11 +315,11 @@ class SenderCorrelationTest(TestCase):
sender = 'Existing Sender <existing at example.com>'
mail = self._create_email(sender)
- person_a = find_author(mail)
+ person_a = get_or_create_author(mail)
person_a.save()
mail = self._create_email(sender.upper())
- person_b = find_author(mail)
+ person_b = get_or_create_author(mail)
self.assertEqual(person_b._state.adding, False)
self.assertEqual(person_b.id, person_a.id)
@@ -361,7 +352,8 @@ class SeriesCorrelationTest(TestCase):
email = self._create_email(msgid)
project = create_project()
- self.assertIsNone(find_series(project, email))
+ self.assertIsNone(find_series(project, email,
+ get_or_create_author(email)))
def test_first_reply(self):
msgid_a = make_msgid()
@@ -371,7 +363,8 @@ class SeriesCorrelationTest(TestCase):
# assume msgid_a was already handled
ref = create_series_reference(msgid=msgid_a)
- series = find_series(ref.series.project, email)
+ series = find_series(ref.series.project, email,
+ get_or_create_author(email))
self.assertEqual(series, ref.series)
def test_nested_series(self):
@@ -395,7 +388,7 @@ class SeriesCorrelationTest(TestCase):
# ...and the "first patch" of this new series
msgid = make_msgid()
email = self._create_email(msgid, msgids)
- series = find_series(project, email)
+ series = find_series(project, email, get_or_create_author(email))
# this should link to the second series - not the first
self.assertEqual(len(msgids), 4 + 1) # old series + new cover
--
2.14.1
More information about the Patchwork
mailing list