[PATCH] parser: Unmangle From: headers that have been mangled for DMARC purposes
Jonathan Nieder
jrnieder at gmail.com
Fri Oct 11 06:41:32 AEDT 2019
Hi,
Andrew Donnellan wrote:
> To avoid triggering spam filters due to failed signature validation, many
> mailing lists mangle the From header to change the From address to be the
> address of the list, typically where the sender's domain has a strict DMARC
> policy enabled.
>
> In this case, we should try to unmangle the From header.
>
> Add support for using the X-Original-Sender or Reply-To headers, as used by
> Google Groups and Mailman respectively, to unmangle the From header when
> necessary.
>
> Closes: #64 ("Incorrect submitter when using googlegroups")
> Reported-by: Alexandre Belloni <alexandre.belloni at bootlin.com>
> Reported-by: Stephen Rothwell <sfr at canb.auug.org.au>
> Signed-off-by: Andrew Donnellan <ajd at linux.ibm.com>
> ---
> patchwork/parser.py | 75 ++++++++++++++++++++++++++++++----
> patchwork/tests/test_parser.py | 68 ++++++++++++++++++++++++++++--
> 2 files changed, 130 insertions(+), 13 deletions(-)
Interesting! I'm cc-ing the Git mailing list in case "git am" might
wnat to learn the same support.
Thanks,
Jonathan
(patch left unsnipped for reference)
> diff --git a/patchwork/parser.py b/patchwork/parser.py
> index 7dc66bc05a5b..79beac5617b1 100644
> --- a/patchwork/parser.py
> +++ b/patchwork/parser.py
> @@ -321,12 +321,7 @@ def find_series(project, mail, author):
> return _find_series_by_markers(project, mail, author)
>
>
> -def get_or_create_author(mail):
> - from_header = clean_header(mail.get('From'))
> -
> - if not from_header:
> - raise ValueError("Invalid 'From' header")
> -
> +def split_from_header(from_header):
> name, email = (None, None)
>
> # tuple of (regex, fn)
> @@ -355,6 +350,65 @@ def get_or_create_author(mail):
> (name, email) = fn(match.groups())
> break
>
> + return (name, email)
> +
> +
> +# Unmangle From addresses that have been mangled for DMARC purposes.
> +#
> +# To avoid triggering spam filters due to failed signature validation, many
> +# mailing lists mangle the From header to change the From address to be the
> +# address of the list, typically where the sender's domain has a strict
> +# DMARC policy enabled.
> +#
> +# Unfortunately, there's no standardised way of preserving the original
> +# From address.
> +#
> +# Google Groups adds an X-Original-Sender header. If present, we use that.
> +#
> +# Mailman preserves the original address by adding a Reply-To, except in the
> +# case where the list is set to either reply to list, or reply to a specific
> +# address, in which case the original From is added to Cc instead. These corner
> +# cases are dumb, but we try and handle things as sensibly as possible by
> +# looking for a name in Reply-To/Cc that matches From. It's inexact but should
> +# be good enough for our purposes.
> +def get_original_sender(mail, name, email):
> + if name and ' via ' in name:
> + # Mailman uses the format "<name> via <list>"
> + # Google Groups uses "'<name>' via <list>"
> + stripped_name = name[:name.rfind(' via ')].strip().strip("'")
> +
> + original_sender = clean_header(mail.get('X-Original-Sender', ''))
> + if original_sender:
> + new_email = split_from_header(original_sender)[1].strip()[:255]
> + return (stripped_name, new_email)
> +
> + addrs = []
> + reply_to_headers = mail.get_all('Reply-To') or []
> + cc_headers = mail.get_all('Cc') or []
> + for header in reply_to_headers + cc_headers:
> + header = clean_header(header)
> + addrs = header.split(",")
> + for addr in addrs:
> + new_name, new_email = split_from_header(addr)
> + if new_name:
> + new_name = new_name.strip()[:255]
> + if new_email:
> + new_email = new_email.strip()[:255]
> + if new_name == stripped_name:
> + return (stripped_name, new_email)
> +
> + # If we can't figure out the original sender, just keep it as is
> + return (name, email)
> +
> +
> +def get_or_create_author(mail, project=None):
> + from_header = clean_header(mail.get('From'))
> +
> + if not from_header:
> + raise ValueError("Invalid 'From' header")
> +
> + name, email = split_from_header(from_header)
> +
> if not email:
> raise ValueError("Invalid 'From' header")
>
> @@ -362,6 +416,9 @@ def get_or_create_author(mail):
> if name is not None:
> name = name.strip()[:255]
>
> + if project and email.lower() == project.listemail.lower():
> + name, email = get_original_sender(mail, name, email)
> +
> # this correctly handles the case where we lose the race to create
> # the person and another process beats us to it. (If the record
> # does not exist, g_o_c invokes _create_object_from_params which
> @@ -1004,7 +1061,7 @@ def parse_mail(mail, list_id=None):
>
> if not is_comment and (diff or pull_url): # patches or pull requests
> # we delay the saving until we know we have a patch.
> - author = get_or_create_author(mail)
> + author = get_or_create_author(mail, project)
>
> delegate = find_delegate_by_header(mail)
> if not delegate and diff:
> @@ -1099,7 +1156,7 @@ def parse_mail(mail, list_id=None):
> is_cover_letter = True
>
> if is_cover_letter:
> - author = get_or_create_author(mail)
> + author = get_or_create_author(mail, project)
>
> # we don't use 'find_series' here as a cover letter will
> # always be the first item in a thread, thus the references
> @@ -1159,7 +1216,7 @@ def parse_mail(mail, list_id=None):
> if not submission:
> return
>
> - author = get_or_create_author(mail)
> + author = get_or_create_author(mail, project)
>
> try:
> comment = Comment.objects.create(
> diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py
> index e5a2fca3044e..85c6e7550f93 100644
> --- a/patchwork/tests/test_parser.py
> +++ b/patchwork/tests/test_parser.py
> @@ -265,11 +265,23 @@ class SenderCorrelationTest(TestCase):
> """
>
> @staticmethod
> - def _create_email(from_header):
> + def _create_email(from_header, reply_tos=None, ccs=None,
> + x_original_sender=None):
> mail = 'Message-Id: %s\n' % make_msgid() + \
> - 'From: %s\n' % from_header + \
> - 'Subject: Tests\n\n'\
> - 'test\n'
> + 'From: %s\n' % from_header
> +
> + if reply_tos:
> + mail += 'Reply-To: %s\n' % ', '.join(reply_tos)
> +
> + if ccs:
> + mail += 'Cc: %s\n' % ', '.join(ccs)
> +
> + if x_original_sender:
> + mail += 'X-Original-Sender: %s\n' % x_original_sender
> +
> + mail += 'Subject: Tests\n\n'\
> + 'test\n'
> +
> return message_from_string(mail)
>
> def test_existing_sender(self):
> @@ -311,6 +323,54 @@ class SenderCorrelationTest(TestCase):
> self.assertEqual(person_b._state.adding, False)
> self.assertEqual(person_b.id, person_a.id)
>
> + def test_mailman_dmarc_munging(self):
> + project = create_project()
> + real_sender = 'Existing Sender <existing at example.com>'
> + munged_sender = 'Existing Sender via List <{}>'.format(
> + project.listemail)
> + other_email = 'Other Person <other at example.com>'
> +
> + # Unmunged author
> + mail = self._create_email(real_sender)
> + person_a = get_or_create_author(mail, project)
> + person_a.save()
> +
> + # Single Reply-To
> + mail = self._create_email(munged_sender, [real_sender])
> + person_b = get_or_create_author(mail, project)
> + self.assertEqual(person_b._state.adding, False)
> + self.assertEqual(person_b.id, person_a.id)
> +
> + # Single Cc
> + mail = self._create_email(munged_sender, [], [real_sender])
> + person_b = get_or_create_author(mail, project)
> + self.assertEqual(person_b._state.adding, False)
> + self.assertEqual(person_b.id, person_a.id)
> +
> + # Multiple Reply-Tos and Ccs
> + mail = self._create_email(munged_sender, [other_email, real_sender],
> + [other_email, other_email])
> + person_b = get_or_create_author(mail, project)
> + self.assertEqual(person_b._state.adding, False)
> + self.assertEqual(person_b.id, person_a.id)
> +
> + def test_google_dmarc_munging(self):
> + project = create_project()
> + real_sender = 'Existing Sender <existing at example.com>'
> + munged_sender = "'Existing Sender' via List <{}>".format(
> + project.listemail)
> +
> + # Unmunged author
> + mail = self._create_email(real_sender)
> + person_a = get_or_create_author(mail, project)
> + person_a.save()
> +
> + # X-Original-Sender header
> + mail = self._create_email(munged_sender, None, None, real_sender)
> + person_b = get_or_create_author(mail, project)
> + self.assertEqual(person_b._state.adding, False)
> + self.assertEqual(person_b.id, person_a.id)
> +
>
> class SeriesCorrelationTest(TestCase):
> """Validate correct behavior of find_series."""
More information about the Patchwork
mailing list