[PATCH 1/3] parse(mail|archive): handle early fail within email module
Daniel Axtens
dja at axtens.net
Sat Jul 1 14:28:42 AEST 2017
Certain really messed up email messages can cause a failure within
the email module (at least on py3). Catch this.
Signed-off-by: Daniel Axtens <dja at axtens.net>
---
patchwork/management/commands/parsearchive.py | 15 +++++++++++++
patchwork/management/commands/parsemail.py | 31 ++++++++++++++++-----------
2 files changed, 33 insertions(+), 13 deletions(-)
diff --git a/patchwork/management/commands/parsearchive.py b/patchwork/management/commands/parsearchive.py
index 4e102a988e76..a9abbbff0d0d 100644
--- a/patchwork/management/commands/parsearchive.py
+++ b/patchwork/management/commands/parsearchive.py
@@ -77,6 +77,21 @@ class Command(BaseCommand):
count = len(mbox)
+ # Iterate through the mbox. This will pick up exceptions that
+ # are only thrown when a broken email is found part way
+ # through. Without this block, we'd get the exception thrown
+ # in enumerate(mbox) below, which is harder to catch.
+ #
+ # The alternative is converting the mbox to a list of
+ # messages, but that requires holding the entire thing in
+ # memory, which is wateful.
+ try:
+ for m in mbox:
+ pass
+ except AttributeError:
+ logger.warning('Broken mbox/Maildir, aborting')
+ return
+
logger.info('Parsing %d mails', count)
for i, msg in enumerate(mbox):
try:
diff --git a/patchwork/management/commands/parsemail.py b/patchwork/management/commands/parsemail.py
index 9adfb25b09e3..52ec8bc56899 100644
--- a/patchwork/management/commands/parsemail.py
+++ b/patchwork/management/commands/parsemail.py
@@ -58,20 +58,25 @@ class Command(base.BaseCommand):
def handle(self, *args, **options):
infile = args[0] if args else options['infile']
- if infile:
- logger.info('Parsing mail loaded by filename')
- if six.PY3:
- with open(infile, 'rb') as file_:
- mail = email.message_from_binary_file(file_)
- else:
- with open(infile) as file_:
- mail = email.message_from_file(file_)
- else:
- logger.info('Parsing mail loaded from stdin')
- if six.PY3:
- mail = email.message_from_binary_file(sys.stdin.buffer)
+ try:
+ if infile:
+ logger.info('Parsing mail loaded by filename')
+ if six.PY3:
+ with open(infile, 'rb') as file_:
+ mail = email.message_from_binary_file(file_)
+ else:
+ with open(infile) as file_:
+ mail = email.message_from_file(file_)
else:
- mail = email.message_from_file(sys.stdin)
+ logger.info('Parsing mail loaded from stdin')
+ if six.PY3:
+ mail = email.message_from_binary_file(sys.stdin.buffer)
+ else:
+ mail = email.message_from_file(sys.stdin)
+ except AttributeError:
+ logger.warning("Broken email ignored")
+ return
+
try:
result = parse_mail(mail, options['list_id'])
if result:
--
2.11.0
More information about the Patchwork
mailing list