[PATCH 2/n] parser: Ignore CFWS in Message-ID header
Stephen Finucane
stephen at that.guru
Sat May 7 03:49:26 AEST 2022
We recently started stripping comments and folding white space from the
In-Reply-To and References headers. Do so also for the Message-ID
header.
Signed-off-by: Stephen Finucane <stephen at that.guru>
Related: #399
---
patchwork/parser.py | 43 ++++++++++++++++++++++++++--------
patchwork/tests/test_parser.py | 32 +++++++++++++++++++++++++
2 files changed, 65 insertions(+), 10 deletions(-)
diff --git patchwork/parser.py patchwork/parser.py
index 17cc2325..f219f466 100644
--- patchwork/parser.py
+++ patchwork/parser.py
@@ -236,15 +236,14 @@ def _find_series_by_references(project, mail):
name, prefixes = clean_subject(subject, [project.linkname])
version = parse_version(name, prefixes)
- refs = find_references(mail)
- h = clean_header(mail.get('Message-Id'))
- if h:
- refs = [h] + refs
+ msg_id = find_message_id(mail)
+ refs = [msg_id] + find_references(mail)
for ref in refs:
try:
series = SeriesReference.objects.get(
- msgid=ref[:255], project=project).series
+ msgid=ref[:255], project=project,
+ ).series
if series.version != version:
# if the versions don't match, at least make sure these were
@@ -473,6 +472,34 @@ def find_headers(mail):
return '\n'.join(strings)
+def find_message_id(mail):
+ """Extract the 'message-id' headers from a given mail and validate it.
+
+ The validation here is simply checking that the Message-ID is correctly
+ formatted per RFC-2822. However, even if it's not we'll attempt to use what
+ we're given because a patch tracked in Patchwork with janky threading is
+ better than no patch whatsoever.
+ """
+ header = clean_header(mail.get('Message-Id'))
+ if not header:
+ raise ValueError("Broken 'Message-Id' header")
+
+ msgid = _msgid_re.search(header)
+ if msgid:
+ msgid = msgid.group(0)
+ else:
+ # This is only info level since the admin likely can't do anything
+ # about this
+ logger.info(
+ "Malformed 'Message-Id' header. The 'msg-id' component should be "
+ "surrounded by angle brackets. Saving raw header. This may "
+ "include comments and extra comments."
+ )
+ msgid = header
+
+ return msgid[:255]
+
+
def find_references(mail):
"""Construct a list of possible reply message ids.
@@ -1062,11 +1089,7 @@ def parse_mail(mail, list_id=None):
# parse metadata
- msgid = clean_header(mail.get('Message-Id'))
- if not msgid:
- raise ValueError("Broken 'Message-Id' header")
- msgid = msgid[:255]
-
+ msgid = find_message_id(mail)
subject = mail.get('Subject')
name, prefixes = clean_subject(subject, [project.linkname])
is_comment = subject_check(subject)
diff --git patchwork/tests/test_parser.py patchwork/tests/test_parser.py
index f65ad4b1..980a8afb 100644
--- patchwork/tests/test_parser.py
+++ patchwork/tests/test_parser.py
@@ -1265,6 +1265,38 @@ class DuplicateMailTest(TestCase):
self.assertEqual(Cover.objects.count(), 1)
+class TestFindMessageID(TestCase):
+
+ def test_find_message_id__missing_header(self):
+ email = create_email('test')
+ del email['Message-Id']
+ email['Message-Id'] = ''
+
+ with self.assertRaises(ValueError) as cm:
+ parser.find_message_id(email)
+ self.assertIn("Broken 'Message-Id' header", str(cm.exeception))
+
+ def test_find_message_id__header_with_comments(self):
+ """Test that we strip comments from the Message-ID field."""
+ message_id = '<xnzgy1de8d.fsf at rhel8.vm> (message ID with a comment)'
+ email = create_email('test', msgid=message_id)
+
+ expected = '<xnzgy1de8d.fsf at rhel8.vm>'
+ actual = parser.find_message_id(email)
+
+ self.assertEqual(expected, actual)
+
+ def test_find_message_id__invalid_header_fallback(self):
+ """Test that we accept badly formatted Message-ID fields."""
+ message_id = '5899d592-8c87-47d9-92b6-d34260ce1aa4 at radware.com>'
+ email = create_email('test', msgid=message_id)
+
+ expected = '5899d592-8c87-47d9-92b6-d34260ce1aa4 at radware.com>'
+ actual = parser.find_message_id(email)
+
+ self.assertEqual(expected, actual)
+
+
class TestFindReferences(TestCase):
def test_find_references__header_with_comments(self):
--
2.35.1
More information about the Patchwork
mailing list