[PATCH 6/9] parser: close a TOCTTOU bug on Person creation

Andrew Donnellan andrew.donnellan at au1.ibm.com
Thu Feb 22 15:44:16 AEDT 2018


On 22/02/18 01:17, Daniel Axtens wrote:
> find_author looks up a person by email, and if they do not exist,
> creates a Person model, which may be saved later if the message
> contains something valuable.
> 
> Multiple simultaneous processes can race here: both can do the SELECT,
> find there is no Person, and create the model. One will succeed in
> saving, the other will get an IntegrityError.
> 
> Reduce the window by making find_author into get_or_create_author, and
> plumb that through. (Remove a test that specifically required find_author
> to *not* create).
> 
> More importantly, cover the case where we lose the race, by catching the
> IntegrityError and fetching the winning Person model.
> 
> Signed-off-by: Daniel Axtens <dja at axtens.net>

LGTM apart from one comment below

Reviewed-by: Andrew Donnellan <andrew.donnellan at au1.ibm.com>


> ---
>   patchwork/parser.py            | 32 ++++++++++++++++++--------------
>   patchwork/tests/test_parser.py | 37 +++++++++++++++----------------------
>   2 files changed, 33 insertions(+), 36 deletions(-)
> 
> diff --git a/patchwork/parser.py b/patchwork/parser.py
> index cbf88fe4e464..1f3b3dd8901f 100644
> --- a/patchwork/parser.py
> +++ b/patchwork/parser.py
> @@ -29,6 +29,7 @@ import logging
>   import re
> 
>   from django.contrib.auth.models import User
> +from django.db import IntegrityError
>   from django.utils import six
> 
>   from patchwork.models import Comment
> @@ -241,7 +242,7 @@ def _find_series_by_references(project, mail):
>               continue
> 
> 
> -def _find_series_by_markers(project, mail):
> +def _find_series_by_markers(project, mail, author):
>       """Find a patch's series using series markers and sender.
> 
>       Identify suitable series for a patch using a combination of the
> @@ -262,7 +263,6 @@ def _find_series_by_markers(project, mail):
>       still won't help us if someone spams the mailing list with
>       duplicate series but that's a tricky situation for anyone to parse.
>       """
> -    author = find_author(mail)
> 
>       subject = mail.get('Subject')
>       name, prefixes = clean_subject(subject, [project.linkname])
> @@ -282,7 +282,7 @@ def _find_series_by_markers(project, mail):
>           return
> 
> 
> -def find_series(project, mail):
> +def find_series(project, mail, author):
>       """Find a series, if any, for a given patch.
> 
>       Args:
> @@ -297,10 +297,10 @@ def find_series(project, mail):
>       if series:
>           return series
> 
> -    return _find_series_by_markers(project, mail)
> +    return _find_series_by_markers(project, mail, author)
> 
> 
> -def find_author(mail):
> +def get_or_create_author(mail):
>       from_header = clean_header(mail.get('From'))
> 
>       if not from_header:
> @@ -342,11 +342,16 @@ def find_author(mail):
>           name = name.strip()[:255]
> 
>       try:
> +        person = Person.objects.get_or_create(email__iexact=email,
> +                                              defaults={'name': name,
> +                                                        'email': email})[0]
> +    except IntegrityError:
> +        # we lost the race to create the person
>           person = Person.objects.get(email__iexact=email)

get_or_create() should handle the IntegrityError in there for you.

> -        if name:  # use the latest provided name
> -            person.name = name
> -    except Person.DoesNotExist:
> -        person = Person(name=name, email=email)
> +
> +    if name:  # use the latest provided name
> +        person.name = name
> +        person.save()
> 
>       return person
> 
> @@ -947,7 +952,6 @@ def parse_mail(mail, list_id=None):
>           raise ValueError("Broken 'Message-Id' header")
>       msgid = msgid[:255]
> 
> -    author = find_author(mail)
>       subject = mail.get('Subject')
>       name, prefixes = clean_subject(subject, [project.linkname])
>       is_comment = subject_check(subject)
> @@ -973,7 +977,7 @@ def parse_mail(mail, list_id=None):
> 
>       if not is_comment and (diff or pull_url):  # patches or pull requests
>           # we delay the saving until we know we have a patch.
> -        author.save()
> +        author = get_or_create_author(mail)
> 
>           delegate = find_delegate_by_header(mail)
>           if not delegate and diff:
> @@ -984,7 +988,7 @@ def parse_mail(mail, list_id=None):
>           # series to match against.
>           series = None
>           if n:
> -            series = find_series(project, mail)
> +            series = find_series(project, mail, author)
>           else:
>               x = n = 1
> 
> @@ -1061,7 +1065,7 @@ def parse_mail(mail, list_id=None):
>                   is_cover_letter = True
> 
>           if is_cover_letter:
> -            author.save()
> +            author = get_or_create_author(mail)
> 
>               # we don't use 'find_series' here as a cover letter will
>               # always be the first item in a thread, thus the references
> @@ -1109,7 +1113,7 @@ def parse_mail(mail, list_id=None):
>       if not submission:
>           return
> 
> -    author.save()
> +    author = get_or_create_author(mail)
> 
>       comment = Comment(
>           submission=submission,
> diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py
> index 68bcb937b273..6cfe7a484443 100644
> --- a/patchwork/tests/test_parser.py
> +++ b/patchwork/tests/test_parser.py
> @@ -34,7 +34,7 @@ from patchwork.models import Patch
>   from patchwork.models import Person
>   from patchwork.models import State
>   from patchwork.parser import clean_subject
> -from patchwork.parser import find_author
> +from patchwork.parser import get_or_create_author
>   from patchwork.parser import find_patch_content as find_content
>   from patchwork.parser import find_project_by_header
>   from patchwork.parser import find_series
> @@ -225,7 +225,7 @@ class SenderEncodingTest(TestCase):
> 
>       def _test_encoding(self, from_header, sender_name, sender_email):
>           email = self._create_email(from_header)
> -        person = find_author(email)
> +        person = get_or_create_author(email)
>           person.save()
> 
>           # ensure it was parsed correctly
> @@ -241,7 +241,7 @@ class SenderEncodingTest(TestCase):
>       def test_empty(self):
>           email = self._create_email('')
>           with self.assertRaises(ValueError):
> -            find_author(email)
> +            get_or_create_author(email)
> 
>       def test_ascii_encoding(self):
>           from_header = 'example user <user at example.com>'
> @@ -269,7 +269,7 @@ class SenderEncodingTest(TestCase):
> 
> 
>   class SenderCorrelationTest(TestCase):
> -    """Validate correct behavior of the find_author case.
> +    """Validate correct behavior of the get_or_create_author case.
> 
>       Relies of checking the internal state of a Django model object.
> 
> @@ -284,25 +284,16 @@ class SenderCorrelationTest(TestCase):
>                  'test\n'
>           return message_from_string(mail)
> 
> -    def test_non_existing_sender(self):
> -        sender = 'Non-existing Sender <nonexisting at example.com>'
> -        mail = self._create_email(sender)
> -
> -        # don't create the person - attempt to find immediately
> -        person = find_author(mail)
> -        self.assertEqual(person._state.adding, True)
> -        self.assertEqual(person.id, None)
> -
>       def test_existing_sender(self):
>           sender = 'Existing Sender <existing at example.com>'
>           mail = self._create_email(sender)
> 
>           # create the person first
> -        person_a = find_author(mail)
> +        person_a = get_or_create_author(mail)
>           person_a.save()
> 
>           # then attempt to parse email with the same 'From' line
> -        person_b = find_author(mail)
> +        person_b = get_or_create_author(mail)
>           self.assertEqual(person_b._state.adding, False)
>           self.assertEqual(person_b.id, person_a.id)
> 
> @@ -311,12 +302,12 @@ class SenderCorrelationTest(TestCase):
>           mail = self._create_email(sender)
> 
>           # create the person first
> -        person_a = find_author(mail)
> +        person_a = get_or_create_author(mail)
>           person_a.save()
> 
>           # then attempt to parse email with a new 'From' line
>           mail = self._create_email('existing at example.com')
> -        person_b = find_author(mail)
> +        person_b = get_or_create_author(mail)
>           self.assertEqual(person_b._state.adding, False)
>           self.assertEqual(person_b.id, person_a.id)
> 
> @@ -324,11 +315,11 @@ class SenderCorrelationTest(TestCase):
>           sender = 'Existing Sender <existing at example.com>'
>           mail = self._create_email(sender)
> 
> -        person_a = find_author(mail)
> +        person_a = get_or_create_author(mail)
>           person_a.save()
> 
>           mail = self._create_email(sender.upper())
> -        person_b = find_author(mail)
> +        person_b = get_or_create_author(mail)
>           self.assertEqual(person_b._state.adding, False)
>           self.assertEqual(person_b.id, person_a.id)
> 
> @@ -361,7 +352,8 @@ class SeriesCorrelationTest(TestCase):
>           email = self._create_email(msgid)
>           project = create_project()
> 
> -        self.assertIsNone(find_series(project, email))
> +        self.assertIsNone(find_series(project, email,
> +                                      get_or_create_author(email)))
> 
>       def test_first_reply(self):
>           msgid_a = make_msgid()
> @@ -371,7 +363,8 @@ class SeriesCorrelationTest(TestCase):
>           # assume msgid_a was already handled
>           ref = create_series_reference(msgid=msgid_a)
> 
> -        series = find_series(ref.series.project, email)
> +        series = find_series(ref.series.project, email,
> +                             get_or_create_author(email))
>           self.assertEqual(series, ref.series)
> 
>       def test_nested_series(self):
> @@ -395,7 +388,7 @@ class SeriesCorrelationTest(TestCase):
>           # ...and the "first patch" of this new series
>           msgid = make_msgid()
>           email = self._create_email(msgid, msgids)
> -        series = find_series(project, email)
> +        series = find_series(project, email, get_or_create_author(email))
> 
>           # this should link to the second series - not the first
>           self.assertEqual(len(msgids), 4 + 1)  # old series + new cover
> 

-- 
Andrew Donnellan              OzLabs, ADL Canberra
andrew.donnellan at au1.ibm.com  IBM Australia Limited



More information about the Patchwork mailing list