[PATCH] Make parsemail-batch a managment command

Daniel Axtens dja at axtens.net
Fri Feb 24 15:28:35 AEDT 2017


This brings it into line with the other import commands.

It's also an order of magnitude or so faster.

Signed-off-by: Daniel Axtens <dja at axtens.net>

---

This isn't that useful per se, but I'm using it to benchmark
the performance impact of changes look for opportunities to
speed up.
---
 patchwork/bin/parsemail-batch.sh                 | 25 +++-----
 patchwork/management/commands/parsemail-batch.py | 77 ++++++++++++++++++++++++
 2 files changed, 85 insertions(+), 17 deletions(-)
 create mode 100644 patchwork/management/commands/parsemail-batch.py

diff --git a/patchwork/bin/parsemail-batch.sh b/patchwork/bin/parsemail-batch.sh
index d42712ed0995..f35808e9d11b 100755
--- a/patchwork/bin/parsemail-batch.sh
+++ b/patchwork/bin/parsemail-batch.sh
@@ -20,25 +20,16 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 BIN_DIR=$(dirname "$0")
+PATCHWORK_BASE=$(readlink -e "$BIN_DIR/../..")
 
-if [ $# -lt 1 ]; then
-    echo "usage: $0 <dir> [options]" >&2
-    exit 1
+if [ -z "$PW_PYTHON" ]; then
+    PW_PYTHON=python2
 fi
 
-mail_dir="$1"
-
-echo "dir: $mail_dir"
-
-if [ ! -d "$mail_dir" ]; then
-    echo "$mail_dir should be a directory"? >&2
-    exit 1
+if [ -z "$DJANGO_SETTINGS_MODULE" ]; then
+    DJANGO_SETTINGS_MODULE=patchwork.settings.production
 fi
 
-shift
-
-find "$mail_dir" -maxdepth 1 |
-while read -r line; do
-    echo "$line"
-    "$BIN_DIR/parsemail.sh" "$@" < "$line"
-done
+PYTHONPATH="${PATCHWORK_BASE}:${PATCHWORK_BASE}/lib/python:$PYTHONPATH" \
+    DJANGO_SETTINGS_MODULE="$DJANGO_SETTINGS_MODULE" \
+    "$PW_PYTHON" "$PATCHWORK_BASE/manage.py" parsemail-batch "$@"
diff --git a/patchwork/management/commands/parsemail-batch.py b/patchwork/management/commands/parsemail-batch.py
new file mode 100644
index 000000000000..ad4dce35f595
--- /dev/null
+++ b/patchwork/management/commands/parsemail-batch.py
@@ -0,0 +1,77 @@
+# Patchwork - automated patch tracking system
+# Copyright (C) 2017, Daniel Axtens, IBM Corporation.
+#
+# This file is part of the Patchwork package.
+#
+# Patchwork is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Patchwork is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+
+import email
+import logging
+from optparse import make_option
+import sys
+import os
+
+import django
+from django.core.management import base
+from django.utils import six
+
+from patchwork.parser import parse_mail
+
+logger = logging.getLogger(__name__)
+
+
+class Command(base.BaseCommand):
+    help = 'Parse a directory of mbox files and store any patches/comments found.'
+
+    if django.VERSION < (1, 8):
+        args = '<indir>'
+        option_list = base.BaseCommand.option_list + (
+            make_option(
+                '--list-id',
+                help='mailing list ID. If not supplied, this will be '
+                'extracted from the mail headers.'),
+        )
+    else:
+        def add_arguments(self, parser):
+            parser.add_argument(
+                'indir',
+                type=str,
+                default=None,
+                help='input mbox directory')
+            parser.add_argument(
+                '--list-id',
+                help='mailing list ID. If not supplied, this will be '
+                'extracted from the mail headers.')
+
+    def handle(self, *args, **options):
+        indir = args[0] if args else options['indir']
+
+        if not os.path.isdir(indir):
+            logger.error('%s is not a directory' % indir)
+            sys.exit(1)
+
+        for infile in sorted(os.listdir(indir)):
+            print(infile)
+            if six.PY3:
+                with open(os.path.join(indir, infile), 'rb') as file_:
+                    mail = email.message_from_binary_file(file_)
+            else:
+                with open(os.path.join(indir, infile)) as file_:
+                    mail = email.message_from_file(file_)
+
+            try:
+                result = parse_mail(mail, options['list_id'])
+                if not result:
+                    logger.warning('Failed to parse mail')
+            except Exception as e:
+                logger.warning('Error when parsing incoming email ' + \
+                               str(e),
+                               extra={'mail': mail.as_string()})
-- 
2.9.3



More information about the Patchwork mailing list