[PATCH v6 2/2] parsearchive: Convert to a management command

Stephen Finucane stephenfinucane at hotmail.com
Mon Sep 12 04:55:25 AEST 2016


As with parsemail, parsearchive makes more sense as a management
command. Make it so.

As with the conversion of the 'parsemail' tool, this removes
customisable logging as it's not necessary.

Signed-off-by: Stephen Finucane <stephenfinucane at hotmail.com>
Closes-bug: #17
---
v6:
- Add additional unit tests
v4:
- Add unit tests
- Add support for Django 1.10
---
 docs/development.md                           |   8 +-
 patchwork/bin/parsearchive.py                 | 106 --------------------------
 patchwork/management/commands/parsearchive.py | 106 ++++++++++++++++++++++++++
 patchwork/tests/test_management.py            |  32 ++++++++
 4 files changed, 142 insertions(+), 110 deletions(-)
 delete mode 100755 patchwork/bin/parsearchive.py
 create mode 100644 patchwork/management/commands/parsearchive.py

diff --git a/docs/development.md b/docs/development.md
index e51f7b1..36d2fdf 100644
--- a/docs/development.md
+++ b/docs/development.md
@@ -268,8 +268,8 @@ using the aptly-named `createsuperuser` command:
 
 Once this is done, it's beneficial to load some real emails into the system.
 This can be done manually, however it's generally much easier to download
-an archive from a Mailman instance and load these using the `parsearchive.py`
-tool. You can do this like so:
+an archive from a Mailman instance and load these using the `parsearchive`
+command. You can do this like so:
 
     (.venv)$ mm_user=myusername
     (.venv)$ mm_pass=mypassword
@@ -288,8 +288,8 @@ find more informations about this [here][ref-mman-bulk].
 Load these archives into Patchwork. Depending on the size of the downloaded
 archives this may take some time:
 
-    (.venv)$ PYTHONPATH=. ./patchwork/bin/parsearchive.py \
-      --list-id=patchwork.ozlabs.org patchwork.mbox
+    (.venv)$ ./manage.py parsearchive --list-id=patchwork.ozlabs.org \
+      patchwork.mbox
 
 Finally, run the server and browse to the IP address of your board using your
 browser of choice:
diff --git a/patchwork/bin/parsearchive.py b/patchwork/bin/parsearchive.py
deleted file mode 100755
index 8986b22..0000000
--- a/patchwork/bin/parsearchive.py
+++ /dev/null
@@ -1,106 +0,0 @@
-#!/usr/bin/env python
-#
-# Patchwork - automated patch tracking system
-# Copyright (C) 2015 Intel Corporation
-#
-# This file is part of the Patchwork package.
-#
-# Patchwork is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# Patchwork is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with Patchwork; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-
-"""Utility to parse an mbox archive file."""
-
-from __future__ import absolute_import
-
-import argparse
-import logging
-import mailbox
-
-import django
-
-from patchwork.parser import parse_mail
-from patchwork import models
-
-LOGGER = logging.getLogger(__name__)
-
-VERBOSITY_LEVELS = {
-    'debug': logging.DEBUG,
-    'info': logging.INFO,
-    'warning': logging.WARNING,
-    'error': logging.ERROR,
-    'critical': logging.CRITICAL
-}
-
-
-def parse_mbox(path, list_id):
-    results = {
-        models.Patch: 0,
-        models.CoverLetter: 0,
-        models.Comment: 0,
-    }
-    duplicates = 0
-    dropped = 0
-
-    mbox = mailbox.mbox(path)
-    for msg in mbox:
-        try:
-            obj = parse_mail(msg, list_id)
-            if obj:
-                results[type(obj)] += 1
-            else:
-                dropped += 1
-        except django.db.utils.IntegrityError:
-            duplicates += 1
-    print('Processed %(total)d messages -->\n'
-          '  %(covers)4d cover letters\n'
-          '  %(patches)4d patches\n'
-          '  %(comments)4d comments\n'
-          '  %(duplicates)4d duplicates\n'
-          '  %(dropped)4d dropped\n'
-          'Total: %(new)s new entries' % {
-              'total': len(mbox),
-              'covers': results[models.CoverLetter],
-              'patches': results[models.Patch],
-              'comments': results[models.Comment],
-              'duplicates': duplicates,
-              'dropped': dropped,
-              'new': len(mbox) - duplicates - dropped,
-          })
-
-
-def main():
-    django.setup()
-    parser = argparse.ArgumentParser(description=__doc__)
-
-    def list_logging_levels():
-        """Give a summary of all available logging levels."""
-        return sorted(VERBOSITY_LEVELS.keys(),
-                      key=lambda x: VERBOSITY_LEVELS[x])
-
-    parser.add_argument('inpath', help='input mbox filename')
-
-    group = parser.add_argument_group('Mail parsing configuration')
-    group.add_argument('--list-id', help='mailing list ID. If not supplied '
-                       'this will be extracted from the mail headers.')
-    group.add_argument('--verbosity', choices=list_logging_levels(),
-                       help='debug level', default='info')
-
-    args = vars(parser.parse_args())
-
-    logging.basicConfig(level=VERBOSITY_LEVELS[args['verbosity']])
-
-    parse_mbox(args['inpath'], args['list_id'])
-
-if __name__ == '__main__':
-    main()
diff --git a/patchwork/management/commands/parsearchive.py b/patchwork/management/commands/parsearchive.py
new file mode 100644
index 0000000..310c6cf
--- /dev/null
+++ b/patchwork/management/commands/parsearchive.py
@@ -0,0 +1,106 @@
+# Patchwork - automated patch tracking system
+# Copyright (C) 2016 Stephen Finucane <stephenfinucane at hotmail.com>
+#
+# This file is part of the Patchwork package.
+#
+# Patchwork is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Patchwork is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Patchwork; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+import logging
+import mailbox
+from optparse import make_option
+import os
+import sys
+
+import django
+from django.core.management.base import BaseCommand
+
+from patchwork import models
+from patchwork.parser import parse_mail
+
+logger = logging.getLogger(__name__)
+
+
+class Command(BaseCommand):
+    help = 'Parse an mbox archive file and store any patches/comments found.'
+
+    if django.VERSION < (1, 8):
+        args = '<infile>'
+        option_list = BaseCommand.option_list + (
+            make_option(
+                '--list-id',
+                help='mailing list ID. If not supplied, this will be '
+                'extracted from the mail headers.'),
+        )
+    else:
+        def add_arguments(self, parser):
+            parser.add_argument(
+                'infile',
+                help='input mbox filename')
+            parser.add_argument(
+                '--list-id',
+                help='mailing list ID. If not supplied, this will be '
+                'extracted from the mail headers.')
+
+    def handle(self, *args, **options):
+        results = {
+            models.Patch: 0,
+            models.CoverLetter: 0,
+            models.Comment: 0,
+        }
+        duplicates = 0
+        dropped = 0
+
+        # TODO(stephenfin): Support passing via stdin
+        infile = args[0] if args else options['infile']
+        # the mailbox.mbox function will create a new mbox if the file
+        # file does not exist. We must manually validate this.
+        if not os.path.exists(infile):
+            self.stdout.write('Invalid path: %s' % infile)
+            sys.exit(1)
+
+        mbox = mailbox.mbox(infile)
+        count = len(mbox)
+
+        logger.info('Parsing %d mails', count)
+        for i, msg in enumerate(mbox):
+            try:
+                obj = parse_mail(msg, options['list_id'])
+                if obj:
+                    results[type(obj)] += 1
+                else:
+                    dropped += 1
+            except django.db.utils.IntegrityError:
+                duplicates += 1
+
+            if (i % 10) == 0:
+                self.stdout.write('%06d/%06d\r' % (i, count), ending='')
+                self.stdout.flush()
+
+        self.stdout.write(
+            'Processed %(total)d messages -->\n'
+            '  %(covers)4d cover letters\n'
+            '  %(patches)4d patches\n'
+            '  %(comments)4d comments\n'
+            '  %(duplicates)4d duplicates\n'
+            '  %(dropped)4d dropped\n'
+            'Total: %(new)s new entries' % {
+                'total': count,
+                'covers': results[models.CoverLetter],
+                'patches': results[models.Patch],
+                'comments': results[models.Comment],
+                'duplicates': duplicates,
+                'dropped': dropped,
+                'new': count - duplicates - dropped,
+            })
diff --git a/patchwork/tests/test_management.py b/patchwork/tests/test_management.py
index 6cd21e2..f2b968f 100644
--- a/patchwork/tests/test_management.py
+++ b/patchwork/tests/test_management.py
@@ -21,6 +21,7 @@ import os
 
 from django.core.management import call_command
 from django.test import TestCase
+from django.utils.six import StringIO
 
 from patchwork import models
 from patchwork.tests import TEST_MAIL_DIR
@@ -78,3 +79,34 @@ class ParsemailTest(TestCase):
 
         count = models.Patch.objects.filter(project=project.id).count()
         self.assertEqual(count, 1)
+
+
+class ParsearchiveTest(TestCase):
+
+    def test_invalid_path(self):
+        out = StringIO()
+        with self.assertRaises(SystemExit) as exc:
+            call_command('parsearchive', 'xyz123random', stdout=out)
+        self.assertEqual(exc.exception.code, 1)
+
+    def test_missing_project_path(self):
+        out = StringIO()
+        path = os.path.join(TEST_MAIL_DIR, '0001-git-pull-request.mbox')
+        call_command('parsearchive', path, stdout=out)
+
+        self.assertIn('Processed 1 messages -->', out.getvalue())
+        self.assertIn('1 dropped', out.getvalue())
+
+    def test_valid_path(self):
+        project = utils.create_project()
+        utils.create_state()
+
+        out = StringIO()
+        path = os.path.join(TEST_MAIL_DIR, '0001-git-pull-request.mbox')
+        call_command('parsearchive', path, list_id=project.listid, stdout=out)
+
+        self.assertIn('Processed 1 messages -->', out.getvalue())
+        self.assertIn('1 patches', out.getvalue())
+
+        count = models.Patch.objects.filter(project=project.id).count()
+        self.assertEqual(count, 1)
-- 
2.7.4



More information about the Patchwork mailing list