[PATCH 2/n] parser: Ignore CFWS in Message-ID header
Stephen Finucane
stephen at that.guru
Sat May 7 03:53:18 AEST 2022
On Fri, 2022-05-06 at 18:49 +0100, Stephen Finucane wrote:
> We recently started stripping comments and folding white space from the
> In-Reply-To and References headers. Do so also for the Message-ID
> header.
>
> Signed-off-by: Stephen Finucane <stephen at that.guru>
> Related: #399
> ---
> patchwork/parser.py | 43 ++++++++++++++++++++++++++--------
> patchwork/tests/test_parser.py | 32 +++++++++++++++++++++++++
> 2 files changed, 65 insertions(+), 10 deletions(-)
>
> diff --git patchwork/parser.py patchwork/parser.py
> index 17cc2325..f219f466 100644
> --- patchwork/parser.py
> +++ patchwork/parser.py
> @@ -236,15 +236,14 @@ def _find_series_by_references(project, mail):
> name, prefixes = clean_subject(subject, [project.linkname])
> version = parse_version(name, prefixes)
>
> - refs = find_references(mail)
> - h = clean_header(mail.get('Message-Id'))
> - if h:
> - refs = [h] + refs
> + msg_id = find_message_id(mail)
> + refs = [msg_id] + find_references(mail)
>
> for ref in refs:
> try:
> series = SeriesReference.objects.get(
> - msgid=ref[:255], project=project).series
> + msgid=ref[:255], project=project,
> + ).series
>
> if series.version != version:
> # if the versions don't match, at least make sure these were
> @@ -473,6 +472,34 @@ def find_headers(mail):
> return '\n'.join(strings)
>
>
> +def find_message_id(mail):
> + """Extract the 'message-id' headers from a given mail and validate it.
> +
> + The validation here is simply checking that the Message-ID is correctly
> + formatted per RFC-2822. However, even if it's not we'll attempt to use what
> + we're given because a patch tracked in Patchwork with janky threading is
> + better than no patch whatsoever.
> + """
> + header = clean_header(mail.get('Message-Id'))
> + if not header:
> + raise ValueError("Broken 'Message-Id' header")
> +
> + msgid = _msgid_re.search(header)
> + if msgid:
> + msgid = msgid.group(0)
> + else:
> + # This is only info level since the admin likely can't do anything
> + # about this
> + logger.info(
> + "Malformed 'Message-Id' header. The 'msg-id' component should be "
> + "surrounded by angle brackets. Saving raw header. This may "
> + "include comments and extra comments."
> + )
I wonder if I should do this also for the 'In-Reply-To' field? I can't do it
easily for the 'References' field since there's zero way to delineate things if
I do, but that shouldn't matter since as long as we don't drop patches and
things appear roughly in order, we don't really need the 'References' field:
'In-Reply-To' is enough to find *a* parent.
Stephen
> + msgid = header
> +
> + return msgid[:255]
> +
> +
> def find_references(mail):
> """Construct a list of possible reply message ids.
>
> @@ -1062,11 +1089,7 @@ def parse_mail(mail, list_id=None):
>
> # parse metadata
>
> - msgid = clean_header(mail.get('Message-Id'))
> - if not msgid:
> - raise ValueError("Broken 'Message-Id' header")
> - msgid = msgid[:255]
> -
> + msgid = find_message_id(mail)
> subject = mail.get('Subject')
> name, prefixes = clean_subject(subject, [project.linkname])
> is_comment = subject_check(subject)
> diff --git patchwork/tests/test_parser.py patchwork/tests/test_parser.py
> index f65ad4b1..980a8afb 100644
> --- patchwork/tests/test_parser.py
> +++ patchwork/tests/test_parser.py
> @@ -1265,6 +1265,38 @@ class DuplicateMailTest(TestCase):
> self.assertEqual(Cover.objects.count(), 1)
>
>
> +class TestFindMessageID(TestCase):
> +
> + def test_find_message_id__missing_header(self):
> + email = create_email('test')
> + del email['Message-Id']
> + email['Message-Id'] = ''
> +
> + with self.assertRaises(ValueError) as cm:
> + parser.find_message_id(email)
> + self.assertIn("Broken 'Message-Id' header", str(cm.exeception))
> +
> + def test_find_message_id__header_with_comments(self):
> + """Test that we strip comments from the Message-ID field."""
> + message_id = '<xnzgy1de8d.fsf at rhel8.vm> (message ID with a comment)'
> + email = create_email('test', msgid=message_id)
> +
> + expected = '<xnzgy1de8d.fsf at rhel8.vm>'
> + actual = parser.find_message_id(email)
> +
> + self.assertEqual(expected, actual)
> +
> + def test_find_message_id__invalid_header_fallback(self):
> + """Test that we accept badly formatted Message-ID fields."""
> + message_id = '5899d592-8c87-47d9-92b6-d34260ce1aa4 at radware.com>'
> + email = create_email('test', msgid=message_id)
> +
> + expected = '5899d592-8c87-47d9-92b6-d34260ce1aa4 at radware.com>'
> + actual = parser.find_message_id(email)
> +
> + self.assertEqual(expected, actual)
> +
> +
> class TestFindReferences(TestCase):
>
> def test_find_references__header_with_comments(self):
More information about the Patchwork
mailing list