[PATCH v2 10/10] parsearchive: Convert to a management command

Stephen Finucane stephenfinucane at hotmail.com
Mon Aug 22 04:09:10 AEST 2016


From: Stephen Finucane <stephen.finucane at intel.com>

As with parsemail, parsearchive makes more sense as a management
command. Make it so.

As with the conversion of the 'parsemail' tool, this removes
customisable logging as it's not necessary.

Signed-off-by: Stephen Finucane <stephen.finucane at intel.com>
Closes-bug: #17
---
 docs/development.md                           |   8 +-
 patchwork/bin/parsearchive.py                 | 106 --------------------------
 patchwork/management/commands/parsearchive.py |  87 +++++++++++++++++++++
 3 files changed, 91 insertions(+), 110 deletions(-)
 delete mode 100755 patchwork/bin/parsearchive.py
 create mode 100644 patchwork/management/commands/parsearchive.py

diff --git a/docs/development.md b/docs/development.md
index 593eddf..3e4e3df 100644
--- a/docs/development.md
+++ b/docs/development.md
@@ -192,8 +192,8 @@ using the aptly-named `createsuperuser` command:
 
 Once this is done, it's beneficial to load some real emails into the system.
 This can be done manually, however it's generally much easier to download
-an archive from a Mailman instance and load these using the `parsearchive.py`
-tool. You can do this like so:
+an archive from a Mailman instance and load these using the `parsearchive`
+command. You can do this like so:
 
     (.venv)$ mm_user=myusername
     (.venv)$ mm_pass=mypassword
@@ -212,8 +212,8 @@ find more informations about this [here][ref-mman-bulk].
 Load these archives into Patchwork. Depending on the size of the downloaded
 archives this may take some time:
 
-    (.venv)$ PYTHONPATH=. ./patchwork/bin/parsearchive.py \
-      --list-id=patchwork.ozlabs.org patchwork.mbox
+    (.venv)$ ./manage.py parsearchive --list-id=patchwork.ozlabs.org \
+      patchwork.mbox
 
 Finally, run the server and browse to the IP address of your board using your
 browser of choice:
diff --git a/patchwork/bin/parsearchive.py b/patchwork/bin/parsearchive.py
deleted file mode 100755
index 8986b22..0000000
--- a/patchwork/bin/parsearchive.py
+++ /dev/null
@@ -1,106 +0,0 @@
-#!/usr/bin/env python
-#
-# Patchwork - automated patch tracking system
-# Copyright (C) 2015 Intel Corporation
-#
-# This file is part of the Patchwork package.
-#
-# Patchwork is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# Patchwork is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with Patchwork; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Utility to parse an mbox archive file."""
-
-from __future__ import absolute_import
-
-import argparse
-import logging
-import mailbox
-
-import django
-
-from patchwork.parser import parse_mail
-from patchwork import models
-
-LOGGER = logging.getLogger(__name__)
-
-VERBOSITY_LEVELS = {
-    'debug': logging.DEBUG,
-    'info': logging.INFO,
-    'warning': logging.WARNING,
-    'error': logging.ERROR,
-    'critical': logging.CRITICAL
-}
-
-
-def parse_mbox(path, list_id):
-    results = {
-        models.Patch: 0,
-        models.CoverLetter: 0,
-        models.Comment: 0,
-    }
-    duplicates = 0
-    dropped = 0
-
-    mbox = mailbox.mbox(path)
-    for msg in mbox:
-        try:
-            obj = parse_mail(msg, list_id)
-            if obj:
-                results[type(obj)] += 1
-            else:
-                dropped += 1
-        except django.db.utils.IntegrityError:
-            duplicates += 1
-    print('Processed %(total)d messages -->\n'
-          '  %(covers)4d cover letters\n'
-          '  %(patches)4d patches\n'
-          '  %(comments)4d comments\n'
-          '  %(duplicates)4d duplicates\n'
-          '  %(dropped)4d dropped\n'
-          'Total: %(new)s new entries' % {
-              'total': len(mbox),
-              'covers': results[models.CoverLetter],
-              'patches': results[models.Patch],
-              'comments': results[models.Comment],
-              'duplicates': duplicates,
-              'dropped': dropped,
-              'new': len(mbox) - duplicates - dropped,
-          })
-
-
-def main():
-    django.setup()
-    parser = argparse.ArgumentParser(description=__doc__)
-
-    def list_logging_levels():
-        """Give a summary of all available logging levels."""
-        return sorted(VERBOSITY_LEVELS.keys(),
-                      key=lambda x: VERBOSITY_LEVELS[x])
-
-    parser.add_argument('inpath', help='input mbox filename')
-
-    group = parser.add_argument_group('Mail parsing configuration')
-    group.add_argument('--list-id', help='mailing list ID. If not supplied '
-                       'this will be extracted from the mail headers.')
-    group.add_argument('--verbosity', choices=list_logging_levels(),
-                       help='debug level', default='info')
-
-    args = vars(parser.parse_args())
-
-    logging.basicConfig(level=VERBOSITY_LEVELS[args['verbosity']])
-
-    parse_mbox(args['inpath'], args['list_id'])
-
-if __name__ == '__main__':
-    main()
diff --git a/patchwork/management/commands/parsearchive.py b/patchwork/management/commands/parsearchive.py
new file mode 100644
index 0000000..093129f
--- /dev/null
+++ b/patchwork/management/commands/parsearchive.py
@@ -0,0 +1,87 @@
+# Patchwork - automated patch tracking system
+# Copyright (C) 2016 Intel Corporation
+#
+# This file is part of the Patchwork package.
+#
+# Patchwork is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Patchwork is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Patchwork; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+import logging
+import mailbox
+from optparse import make_option
+
+import django
+from django.core.management.base import BaseCommand
+
+from patchwork import models
+from patchwork.parser import parse_mail
+
+logger = logging.getLogger(__name__)
+
+
+class Command(BaseCommand):
+    help = 'Parse an mbox archive file and store any patches/comments found'
+    args = '<infile>'  # Django < 1.8 compatibility
+    option_list = BaseCommand.option_list + (
+        make_option(
+            '--list-id',
+            help='mailing list ID. If not supplied, this will be extracted '
+            'from the mail headers.'
+        ),
+    )
+
+    def handle(self, *args, **options):
+        results = {
+            models.Patch: 0,
+            models.CoverLetter: 0,
+            models.Comment: 0,
+        }
+        duplicates = 0
+        dropped = 0
+
+        # TODO(stephenfin): Support passing via stdin?
+        mbox = mailbox.mbox(args[0])
+        count = len(mbox)
+
+        logger.info('Parsing %d mails', count)
+        for i, msg in enumerate(mbox):
+            try:
+                obj = parse_mail(msg, options['list_id'])
+                if obj:
+                    results[type(obj)] += 1
+                else:
+                    dropped += 1
+            except django.db.utils.IntegrityError:
+                duplicates += 1
+
+            if (i % 10) == 0:
+                self.stdout.write('%06d/%06d\r' % (i, count), ending='')
+                self.stdout.flush()
+
+        self.stdout.write(
+            'Processed %(total)d messages -->\n'
+            '  %(covers)4d cover letters\n'
+            '  %(patches)4d patches\n'
+            '  %(comments)4d comments\n'
+            '  %(duplicates)4d duplicates\n'
+            '  %(dropped)4d dropped\n'
+            'Total: %(new)s new entries' % {
+                'total': count,
+                'covers': results[models.CoverLetter],
+                'patches': results[models.Patch],
+                'comments': results[models.Comment],
+                'duplicates': duplicates,
+                'dropped': dropped,
+                'new': count - duplicates - dropped,
+            })
-- 
2.7.4



More information about the Patchwork mailing list