[PATCH 08/10] parse(mail|archive): handle early fail within email module

Daniel Axtens dja at axtens.net
Wed Jun 28 17:48:50 AEST 2017


Certain really messed up email messages can cause a failure within
the email module (at least on py3). Catch this.

Signed-off-by: Daniel Axtens <dja at axtens.net>
---
 patchwork/management/commands/parsearchive.py |  9 ++++++++
 patchwork/management/commands/parsemail.py    | 31 ++++++++++++++++-----------
 2 files changed, 27 insertions(+), 13 deletions(-)

diff --git a/patchwork/management/commands/parsearchive.py b/patchwork/management/commands/parsearchive.py
index a3c8360186c8..3aab58a45bcd 100644
--- a/patchwork/management/commands/parsearchive.py
+++ b/patchwork/management/commands/parsearchive.py
@@ -77,6 +77,15 @@ class Command(BaseCommand):
 
         count = len(mbox)
 
+        # detect broken mails in the mbox
+        # see earlyfail fuzz test on py3
+        try:
+            for m in mbox:
+                pass
+        except AttributeError:
+            logger.warning('Broken mbox/Maildir, aborting')
+            return
+
         logger.info('Parsing %d mails', count)
         for i, msg in enumerate(mbox):
             try:
diff --git a/patchwork/management/commands/parsemail.py b/patchwork/management/commands/parsemail.py
index 9adfb25b09e3..52ec8bc56899 100644
--- a/patchwork/management/commands/parsemail.py
+++ b/patchwork/management/commands/parsemail.py
@@ -58,20 +58,25 @@ class Command(base.BaseCommand):
     def handle(self, *args, **options):
         infile = args[0] if args else options['infile']
 
-        if infile:
-            logger.info('Parsing mail loaded by filename')
-            if six.PY3:
-                with open(infile, 'rb') as file_:
-                    mail = email.message_from_binary_file(file_)
-            else:
-                with open(infile) as file_:
-                    mail = email.message_from_file(file_)
-        else:
-            logger.info('Parsing mail loaded from stdin')
-            if six.PY3:
-                mail = email.message_from_binary_file(sys.stdin.buffer)
+        try:
+            if infile:
+                logger.info('Parsing mail loaded by filename')
+                if six.PY3:
+                    with open(infile, 'rb') as file_:
+                        mail = email.message_from_binary_file(file_)
+                else:
+                    with open(infile) as file_:
+                        mail = email.message_from_file(file_)
             else:
-                mail = email.message_from_file(sys.stdin)
+                logger.info('Parsing mail loaded from stdin')
+                if six.PY3:
+                    mail = email.message_from_binary_file(sys.stdin.buffer)
+                else:
+                    mail = email.message_from_file(sys.stdin)
+        except AttributeError:
+            logger.warning("Broken email ignored")
+            return
+
         try:
             result = parse_mail(mail, options['list_id'])
             if result:
-- 
2.11.0



More information about the Patchwork mailing list