[PATCH 1/3] parse(mail|archive): handle early fail within email module

Daniel Axtens dja at axtens.net
Sat Jul 1 14:28:42 AEST 2017


Certain really messed up email messages can cause a failure within
the email module (at least on py3). Catch this.

Signed-off-by: Daniel Axtens <dja at axtens.net>
---
 patchwork/management/commands/parsearchive.py | 15 +++++++++++++
 patchwork/management/commands/parsemail.py    | 31 ++++++++++++++++-----------
 2 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/patchwork/management/commands/parsearchive.py b/patchwork/management/commands/parsearchive.py
index 4e102a988e76..a9abbbff0d0d 100644
--- a/patchwork/management/commands/parsearchive.py
+++ b/patchwork/management/commands/parsearchive.py
@@ -77,6 +77,21 @@ class Command(BaseCommand):
 
         count = len(mbox)
 
+        # Iterate through the mbox. This will pick up exceptions that
+        # are only thrown when a broken email is found part way
+        # through. Without this block, we'd get the exception thrown
+        # in enumerate(mbox) below, which is harder to catch.
+        #
+        # The alternative is converting the mbox to a list of
+        # messages, but that requires holding the entire thing in
+        # memory, which is wateful.
+        try:
+            for m in mbox:
+                pass
+        except AttributeError:
+            logger.warning('Broken mbox/Maildir, aborting')
+            return
+
         logger.info('Parsing %d mails', count)
         for i, msg in enumerate(mbox):
             try:
diff --git a/patchwork/management/commands/parsemail.py b/patchwork/management/commands/parsemail.py
index 9adfb25b09e3..52ec8bc56899 100644
--- a/patchwork/management/commands/parsemail.py
+++ b/patchwork/management/commands/parsemail.py
@@ -58,20 +58,25 @@ class Command(base.BaseCommand):
     def handle(self, *args, **options):
         infile = args[0] if args else options['infile']
 
-        if infile:
-            logger.info('Parsing mail loaded by filename')
-            if six.PY3:
-                with open(infile, 'rb') as file_:
-                    mail = email.message_from_binary_file(file_)
-            else:
-                with open(infile) as file_:
-                    mail = email.message_from_file(file_)
-        else:
-            logger.info('Parsing mail loaded from stdin')
-            if six.PY3:
-                mail = email.message_from_binary_file(sys.stdin.buffer)
+        try:
+            if infile:
+                logger.info('Parsing mail loaded by filename')
+                if six.PY3:
+                    with open(infile, 'rb') as file_:
+                        mail = email.message_from_binary_file(file_)
+                else:
+                    with open(infile) as file_:
+                        mail = email.message_from_file(file_)
             else:
-                mail = email.message_from_file(sys.stdin)
+                logger.info('Parsing mail loaded from stdin')
+                if six.PY3:
+                    mail = email.message_from_binary_file(sys.stdin.buffer)
+                else:
+                    mail = email.message_from_file(sys.stdin)
+        except AttributeError:
+            logger.warning("Broken email ignored")
+            return
+
         try:
             result = parse_mail(mail, options['list_id'])
             if result:
-- 
2.11.0



More information about the Patchwork mailing list