[PATCH 6/9] parser: close a TOCTTOU bug on Person creation
Andrew Donnellan
andrew.donnellan at au1.ibm.com
Thu Feb 22 15:44:16 AEDT 2018
On 22/02/18 01:17, Daniel Axtens wrote:
> find_author looks up a person by email, and if they do not exist,
> creates a Person model, which may be saved later if the message
> contains something valuable.
>
> Multiple simultaneous processes can race here: both can do the SELECT,
> find there is no Person, and create the model. One will succeed in
> saving, the other will get an IntegrityError.
>
> Reduce the window by making find_author into get_or_create_author, and
> plumb that through. (Remove a test that specifically required find_author
> to *not* create).
>
> More importantly, cover the case where we lose the race, by catching the
> IntegrityError and fetching the winning Person model.
>
> Signed-off-by: Daniel Axtens <dja at axtens.net>
LGTM apart from one comment below
Reviewed-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>
> ---
> patchwork/parser.py | 32 ++++++++++++++++++--------------
> patchwork/tests/test_parser.py | 37 +++++++++++++++----------------------
> 2 files changed, 33 insertions(+), 36 deletions(-)
>
> diff --git a/patchwork/parser.py b/patchwork/parser.py
> index cbf88fe4e464..1f3b3dd8901f 100644
> --- a/patchwork/parser.py
> +++ b/patchwork/parser.py
> @@ -29,6 +29,7 @@ import logging
> import re
>
> from django.contrib.auth.models import User
> +from django.db import IntegrityError
> from django.utils import six
>
> from patchwork.models import Comment
> @@ -241,7 +242,7 @@ def _find_series_by_references(project, mail):
> continue
>
>
> -def _find_series_by_markers(project, mail):
> +def _find_series_by_markers(project, mail, author):
> """Find a patch's series using series markers and sender.
>
> Identify suitable series for a patch using a combination of the
> @@ -262,7 +263,6 @@ def _find_series_by_markers(project, mail):
> still won't help us if someone spams the mailing list with
> duplicate series but that's a tricky situation for anyone to parse.
> """
> - author = find_author(mail)
>
> subject = mail.get('Subject')
> name, prefixes = clean_subject(subject, [project.linkname])
> @@ -282,7 +282,7 @@ def _find_series_by_markers(project, mail):
> return
>
>
> -def find_series(project, mail):
> +def find_series(project, mail, author):
> """Find a series, if any, for a given patch.
>
> Args:
> @@ -297,10 +297,10 @@ def find_series(project, mail):
> if series:
> return series
>
> - return _find_series_by_markers(project, mail)
> + return _find_series_by_markers(project, mail, author)
>
>
> -def find_author(mail):
> +def get_or_create_author(mail):
> from_header = clean_header(mail.get('From'))
>
> if not from_header:
> @@ -342,11 +342,16 @@ def find_author(mail):
> name = name.strip()[:255]
>
> try:
> + person = Person.objects.get_or_create(email__iexact=email,
> + defaults={'name': name,
> + 'email': email})[0]
> + except IntegrityError:
> + # we lost the race to create the person
> person = Person.objects.get(email__iexact=email)
get_or_create() should handle the IntegrityError in there for you.
> - if name: # use the latest provided name
> - person.name = name
> - except Person.DoesNotExist:
> - person = Person(name=name, email=email)
> +
> + if name: # use the latest provided name
> + person.name = name
> + person.save()
>
> return person
>
> @@ -947,7 +952,6 @@ def parse_mail(mail, list_id=None):
> raise ValueError("Broken 'Message-Id' header")
> msgid = msgid[:255]
>
> - author = find_author(mail)
> subject = mail.get('Subject')
> name, prefixes = clean_subject(subject, [project.linkname])
> is_comment = subject_check(subject)
> @@ -973,7 +977,7 @@ def parse_mail(mail, list_id=None):
>
> if not is_comment and (diff or pull_url): # patches or pull requests
> # we delay the saving until we know we have a patch.
> - author.save()
> + author = get_or_create_author(mail)
>
> delegate = find_delegate_by_header(mail)
> if not delegate and diff:
> @@ -984,7 +988,7 @@ def parse_mail(mail, list_id=None):
> # series to match against.
> series = None
> if n:
> - series = find_series(project, mail)
> + series = find_series(project, mail, author)
> else:
> x = n = 1
>
> @@ -1061,7 +1065,7 @@ def parse_mail(mail, list_id=None):
> is_cover_letter = True
>
> if is_cover_letter:
> - author.save()
> + author = get_or_create_author(mail)
>
> # we don't use 'find_series' here as a cover letter will
> # always be the first item in a thread, thus the references
> @@ -1109,7 +1113,7 @@ def parse_mail(mail, list_id=None):
> if not submission:
> return
>
> - author.save()
> + author = get_or_create_author(mail)
>
> comment = Comment(
> submission=submission,
> diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py
> index 68bcb937b273..6cfe7a484443 100644
> --- a/patchwork/tests/test_parser.py
> +++ b/patchwork/tests/test_parser.py
> @@ -34,7 +34,7 @@ from patchwork.models import Patch
> from patchwork.models import Person
> from patchwork.models import State
> from patchwork.parser import clean_subject
> -from patchwork.parser import find_author
> +from patchwork.parser import get_or_create_author
> from patchwork.parser import find_patch_content as find_content
> from patchwork.parser import find_project_by_header
> from patchwork.parser import find_series
> @@ -225,7 +225,7 @@ class SenderEncodingTest(TestCase):
>
> def _test_encoding(self, from_header, sender_name, sender_email):
> email = self._create_email(from_header)
> - person = find_author(email)
> + person = get_or_create_author(email)
> person.save()
>
> # ensure it was parsed correctly
> @@ -241,7 +241,7 @@ class SenderEncodingTest(TestCase):
> def test_empty(self):
> email = self._create_email('')
> with self.assertRaises(ValueError):
> - find_author(email)
> + get_or_create_author(email)
>
> def test_ascii_encoding(self):
> from_header = 'example user <user at example.com>'
> @@ -269,7 +269,7 @@ class SenderEncodingTest(TestCase):
>
>
> class SenderCorrelationTest(TestCase):
> - """Validate correct behavior of the find_author case.
> + """Validate correct behavior of the get_or_create_author case.
>
> Relies of checking the internal state of a Django model object.
>
> @@ -284,25 +284,16 @@ class SenderCorrelationTest(TestCase):
> 'test\n'
> return message_from_string(mail)
>
> - def test_non_existing_sender(self):
> - sender = 'Non-existing Sender <nonexisting at example.com>'
> - mail = self._create_email(sender)
> -
> - # don't create the person - attempt to find immediately
> - person = find_author(mail)
> - self.assertEqual(person._state.adding, True)
> - self.assertEqual(person.id, None)
> -
> def test_existing_sender(self):
> sender = 'Existing Sender <existing at example.com>'
> mail = self._create_email(sender)
>
> # create the person first
> - person_a = find_author(mail)
> + person_a = get_or_create_author(mail)
> person_a.save()
>
> # then attempt to parse email with the same 'From' line
> - person_b = find_author(mail)
> + person_b = get_or_create_author(mail)
> self.assertEqual(person_b._state.adding, False)
> self.assertEqual(person_b.id, person_a.id)
>
> @@ -311,12 +302,12 @@ class SenderCorrelationTest(TestCase):
> mail = self._create_email(sender)
>
> # create the person first
> - person_a = find_author(mail)
> + person_a = get_or_create_author(mail)
> person_a.save()
>
> # then attempt to parse email with a new 'From' line
> mail = self._create_email('existing at example.com')
> - person_b = find_author(mail)
> + person_b = get_or_create_author(mail)
> self.assertEqual(person_b._state.adding, False)
> self.assertEqual(person_b.id, person_a.id)
>
> @@ -324,11 +315,11 @@ class SenderCorrelationTest(TestCase):
> sender = 'Existing Sender <existing at example.com>'
> mail = self._create_email(sender)
>
> - person_a = find_author(mail)
> + person_a = get_or_create_author(mail)
> person_a.save()
>
> mail = self._create_email(sender.upper())
> - person_b = find_author(mail)
> + person_b = get_or_create_author(mail)
> self.assertEqual(person_b._state.adding, False)
> self.assertEqual(person_b.id, person_a.id)
>
> @@ -361,7 +352,8 @@ class SeriesCorrelationTest(TestCase):
> email = self._create_email(msgid)
> project = create_project()
>
> - self.assertIsNone(find_series(project, email))
> + self.assertIsNone(find_series(project, email,
> + get_or_create_author(email)))
>
> def test_first_reply(self):
> msgid_a = make_msgid()
> @@ -371,7 +363,8 @@ class SeriesCorrelationTest(TestCase):
> # assume msgid_a was already handled
> ref = create_series_reference(msgid=msgid_a)
>
> - series = find_series(ref.series.project, email)
> + series = find_series(ref.series.project, email,
> + get_or_create_author(email))
> self.assertEqual(series, ref.series)
>
> def test_nested_series(self):
> @@ -395,7 +388,7 @@ class SeriesCorrelationTest(TestCase):
> # ...and the "first patch" of this new series
> msgid = make_msgid()
> email = self._create_email(msgid, msgids)
> - series = find_series(project, email)
> + series = find_series(project, email, get_or_create_author(email))
>
> # this should link to the second series - not the first
> self.assertEqual(len(msgids), 4 + 1) # old series + new cover
>
--
Andrew Donnellan OzLabs, ADL Canberra
andrew.donnellan at au1.ibm.com IBM Australia Limited
More information about the Patchwork
mailing list