[PATCH] parser: Ignore CFWS in In-Reply-To, References headers
Stephen Finucane
stephen at that.guru
Sat May 7 03:10:27 AEST 2022
RFC2822 states that [1] a comment or folding white space is permitted to
be inserted before or after a msg-id in in the Message-ID, In-Reply-To
or References fields. Allow for this.
[1] https://tools.ietf.org/html/rfc2822#section-3.6.4
Signed-off-by: Stephen Finucane <stephen at that.guru>
Closes: #399
---
patchwork/parser.py | 13 ++-
patchwork/tests/test_parser.py | 79 +++++++++++++++++--
.../notes/issue-399-09d6f17aa54b14b2.yaml | 11 +++
3 files changed, 89 insertions(+), 14 deletions(-)
create mode 100644 releasenotes/notes/issue-399-09d6f17aa54b14b2.yaml
diff --git patchwork/parser.py patchwork/parser.py
index e6e1a7fb..17cc2325 100644
--- patchwork/parser.py
+++ patchwork/parser.py
@@ -31,6 +31,7 @@ from patchwork.models import SeriesReference
from patchwork.models import State
+_msgid_re = re.compile(r'<[^>]+>')
_hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
_filename_re = re.compile(r'^(---|\+\+\+) (\S+)')
list_id_headers = ['List-ID', 'X-Mailing-List', 'X-list']
@@ -483,19 +484,15 @@ def find_references(mail):
if 'In-Reply-To' in mail:
for in_reply_to in mail.get_all('In-Reply-To'):
- r = clean_header(in_reply_to)
- if r:
- refs.append(r)
+ ref = _msgid_re.search(clean_header(in_reply_to))
+ if ref:
+ refs.append(ref.group(0))
if 'References' in mail:
for references_header in mail.get_all('References'):
- h = clean_header(references_header)
- if not h:
- continue
- references = h.split()
+ references = _msgid_re.findall(clean_header(references_header))
references.reverse()
for ref in references:
- ref = ref.strip()
if ref not in refs:
refs.append(ref)
diff --git patchwork/tests/test_parser.py patchwork/tests/test_parser.py
index 12f984bc..f65ad4b1 100644
--- patchwork/tests/test_parser.py
+++ patchwork/tests/test_parser.py
@@ -74,6 +74,7 @@ def _create_email(
sender=None,
listid=None,
in_reply_to=None,
+ references=None,
headers=None,
):
msg['Message-Id'] = msgid or make_msgid()
@@ -84,6 +85,9 @@ def _create_email(
if in_reply_to:
msg['In-Reply-To'] = in_reply_to
+ if references:
+ msg['References'] = references
+
for header in headers or {}:
msg[header] = headers[header]
@@ -97,12 +101,21 @@ def create_email(
sender=None,
listid=None,
in_reply_to=None,
+ references=None,
headers=None,
):
msg = MIMEText(content, _charset='us-ascii')
return _create_email(
- msg, msgid, subject, sender, listid, in_reply_to, headers)
+ msg,
+ msgid,
+ subject,
+ sender,
+ listid,
+ in_reply_to,
+ references,
+ headers,
+ )
def parse_mail(*args, **kwargs):
@@ -1216,7 +1229,7 @@ class DuplicateMailTest(TestCase):
def test_duplicate_patch(self):
diff = read_patch('0001-add-line.patch')
- m = create_email(diff, listid=self.listid, msgid='1 at example.com')
+ m = create_email(diff, listid=self.listid, msgid='<1 at example.com>')
self._test_duplicate_mail(m)
@@ -1224,18 +1237,26 @@ class DuplicateMailTest(TestCase):
def test_duplicate_comment(self):
diff = read_patch('0001-add-line.patch')
- m1 = create_email(diff, listid=self.listid, msgid='1 at example.com')
+ m1 = create_email(
+ diff,
+ listid=self.listid,
+ msgid='<1 at example.com>',
+ )
_parse_mail(m1)
- m2 = create_email('test', listid=self.listid, msgid='2 at example.com',
- in_reply_to='1 at example.com')
+ m2 = create_email(
+ 'test',
+ listid=self.listid,
+ msgid='<2 at example.com>',
+ in_reply_to='<1 at example.com>',
+ )
self._test_duplicate_mail(m2)
self.assertEqual(Patch.objects.count(), 1)
self.assertEqual(PatchComment.objects.count(), 1)
def test_duplicate_coverletter(self):
- m = create_email('test', listid=self.listid, msgid='1 at example.com')
+ m = create_email('test', listid=self.listid, msgid='<1 at example.com>')
del m['Subject']
m['Subject'] = '[PATCH 0/1] test cover letter'
@@ -1244,6 +1265,52 @@ class DuplicateMailTest(TestCase):
self.assertEqual(Cover.objects.count(), 1)
+class TestFindReferences(TestCase):
+
+ def test_find_references__header_with_comments(self):
+ """Test that we strip comments from References, In-Reply-To fields."""
+ in_reply_to = (
+ '<4574b99b-edac-d8dc-9141-79c3109d2fcc at huawei.com> (message from\n'
+ ' liqingqing on Thu, 1 Apr 2021 16:51:45 +0800)'
+ )
+ email = create_email('test', in_reply_to=in_reply_to)
+
+ expected = ['<4574b99b-edac-d8dc-9141-79c3109d2fcc at huawei.com>']
+ actual = parser.find_references(email)
+
+ self.assertEqual(expected, actual)
+
+ def test_find_references__duplicate_references(self):
+ """Test that we ignore duplicate message IDs in 'References'."""
+ message_id = '<20130510114450.7104c5d2 at nehalam.linuxnetplumber.net>'
+ in_reply_to = \
+ '<525534677.5312512.1368202896189.JavaMail.root at vmware.com>'
+ references = (
+ '<AFCFCEB8EB0E24448E4EB95988BA1E531FA2EA0D at xmb-aln-x05.cisco.com>\n' # noqa: E501
+ ' <CAE68AUOr7B5a2QvduJhH0kEHPi+sR9X3qfrtumgLxT1BK4VS+Q at mail.gmail.com>\n' # noqa: E501
+ ' <1676591087.5291867.1368201908283.JavaMail.root at vmware.com>\n'
+ ' <20130510091549.3c064df6 at nehalam.linuxnetplumber.net>\n'
+ ' <525534677.5312512.1368202896189.JavaMail.root at vmware.com>'
+ )
+ email = create_email(
+ 'test',
+ msgid=message_id,
+ in_reply_to=in_reply_to,
+ references=references,
+ )
+
+ expected = [
+ '<525534677.5312512.1368202896189.JavaMail.root at vmware.com>',
+ '<20130510091549.3c064df6 at nehalam.linuxnetplumber.net>',
+ '<1676591087.5291867.1368201908283.JavaMail.root at vmware.com>',
+ '<CAE68AUOr7B5a2QvduJhH0kEHPi+sR9X3qfrtumgLxT1BK4VS+Q at mail.gmail.com>', # noqa: E501
+ '<AFCFCEB8EB0E24448E4EB95988BA1E531FA2EA0D at xmb-aln-x05.cisco.com>',
+ ]
+ actual = parser.find_references(email)
+
+ self.assertEqual(expected, actual)
+
+
class TestCommentCorrelation(TestCase):
def test_find_patch_for_comment__no_reply(self):
diff --git releasenotes/notes/issue-399-09d6f17aa54b14b2.yaml releasenotes/notes/issue-399-09d6f17aa54b14b2.yaml
new file mode 100644
index 00000000..e925a606
--- /dev/null
+++ releasenotes/notes/issue-399-09d6f17aa54b14b2.yaml
@@ -0,0 +1,11 @@
+---
+fixes:
+ - |
+ Comments and whitespace are now correctly stripped from the ``Message-ID``,
+ ``In-Reply-To``, and ``References`` headers. One side effect of this change
+ is that the parser is now stricter with regards to the format of the
+ ``msg-id`` component of these headers: all identifiers must now be
+ surrounded by angle brackets, e.g. ``<abcdef at example.com>``. This is
+ mandated in the spec and a review of mailing lists archives suggest it is
+ broadly adhered to. Without these markers, there is no way to delimit
+ ``msg-id`` from any surrounding comments and whitespace.
--
2.35.1
More information about the Patchwork
mailing list