[PATCH 10/11] parsearchive: Convert to a management command

Andy Doan andy.doan at linaro.org
Wed Jul 20 07:33:03 AEST 2016


On 07/13/2016 04:40 AM, Stephen Finucane wrote:
> As with parsemail, parsearchive makes more sense as a management
> command. Make it so.
>
> As with the conversion of the 'parsemail' tool, this removes
> customisable logging as it's not necessary.
>
> Signed-off-by: Stephen Finucane <stephen.finucane at intel.com>

Reviewed-by: Andy Doan <andy.doan at linaro.org>

> Closes-bug: #17
> ---
>  docs/development.md                           |    8 +-
>  patchwork/bin/parsearchive.py                 |  106 -------------------------
>  patchwork/management/commands/parsearchive.py |   87 ++++++++++++++++++++
>  3 files changed, 91 insertions(+), 110 deletions(-)
>  delete mode 100755 patchwork/bin/parsearchive.py
>  create mode 100644 patchwork/management/commands/parsearchive.py
>
> diff --git a/docs/development.md b/docs/development.md
> index de5dac5..4a6b994 100644
> --- a/docs/development.md
> +++ b/docs/development.md
> @@ -192,8 +192,8 @@ using the aptly-named `createsuperuser` command:
>
>  Once this is done, it's beneficial to load some real emails into the system.
>  This can be done manually, however it's generally much easier to download
> -an archive from a Mailman instance and load these using the `parsearchive.py`
> -tool. You can do this like so:
> +an archive from a Mailman instance and load these using the `parsearchive`
> +command. You can do this like so:
>
>      (.venv)$ mm_user=myusername
>      (.venv)$ mm_pass=mypassword
> @@ -212,8 +212,8 @@ find more informations about this [here][ref-mman-bulk].
>  Load these archives into Patchwork. Depending on the size of the downloaded
>  archives this may take some time:
>
> -    (.venv)$ PYTHONPATH=. ./patchwork/bin/parsearchive.py \
> -      --list-id=patchwork.ozlabs.org patchwork.mbox
> +    (.venv)$ ./manage.py parsearchive --list-id=patchwork.ozlabs.org \
> +      patchwork.mbox
>
>  Finally, run the server and browse to the IP address of your board using your
>  browser of choice:
> diff --git a/patchwork/bin/parsearchive.py b/patchwork/bin/parsearchive.py
> deleted file mode 100755
> index 8986b22..0000000
> --- a/patchwork/bin/parsearchive.py
> +++ /dev/null
> @@ -1,106 +0,0 @@
> -#!/usr/bin/env python
> -#
> -# Patchwork - automated patch tracking system
> -# Copyright (C) 2015 Intel Corporation
> -#
> -# This file is part of the Patchwork package.
> -#
> -# Patchwork is free software; you can redistribute it and/or modify
> -# it under the terms of the GNU General Public License as published by
> -# the Free Software Foundation; either version 2 of the License, or
> -# (at your option) any later version.
> -#
> -# Patchwork is distributed in the hope that it will be useful,
> -# but WITHOUT ANY WARRANTY; without even the implied warranty of
> -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> -# GNU General Public License for more details.
> -#
> -# You should have received a copy of the GNU General Public License
> -# along with Patchwork; if not, write to the Free Software
> -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> -
> -"""Utility to parse an mbox archive file."""
> -
> -from __future__ import absolute_import
> -
> -import argparse
> -import logging
> -import mailbox
> -
> -import django
> -
> -from patchwork.parser import parse_mail
> -from patchwork import models
> -
> -LOGGER = logging.getLogger(__name__)
> -
> -VERBOSITY_LEVELS = {
> -    'debug': logging.DEBUG,
> -    'info': logging.INFO,
> -    'warning': logging.WARNING,
> -    'error': logging.ERROR,
> -    'critical': logging.CRITICAL
> -}
> -
> -
> -def parse_mbox(path, list_id):
> -    results = {
> -        models.Patch: 0,
> -        models.CoverLetter: 0,
> -        models.Comment: 0,
> -    }
> -    duplicates = 0
> -    dropped = 0
> -
> -    mbox = mailbox.mbox(path)
> -    for msg in mbox:
> -        try:
> -            obj = parse_mail(msg, list_id)
> -            if obj:
> -                results[type(obj)] += 1
> -            else:
> -                dropped += 1
> -        except django.db.utils.IntegrityError:
> -            duplicates += 1
> -    print('Processed %(total)d messages -->\n'
> -          '  %(covers)4d cover letters\n'
> -          '  %(patches)4d patches\n'
> -          '  %(comments)4d comments\n'
> -          '  %(duplicates)4d duplicates\n'
> -          '  %(dropped)4d dropped\n'
> -          'Total: %(new)s new entries' % {
> -              'total': len(mbox),
> -              'covers': results[models.CoverLetter],
> -              'patches': results[models.Patch],
> -              'comments': results[models.Comment],
> -              'duplicates': duplicates,
> -              'dropped': dropped,
> -              'new': len(mbox) - duplicates - dropped,
> -          })
> -
> -
> -def main():
> -    django.setup()
> -    parser = argparse.ArgumentParser(description=__doc__)
> -
> -    def list_logging_levels():
> -        """Give a summary of all available logging levels."""
> -        return sorted(VERBOSITY_LEVELS.keys(),
> -                      key=lambda x: VERBOSITY_LEVELS[x])
> -
> -    parser.add_argument('inpath', help='input mbox filename')
> -
> -    group = parser.add_argument_group('Mail parsing configuration')
> -    group.add_argument('--list-id', help='mailing list ID. If not supplied '
> -                       'this will be extracted from the mail headers.')
> -    group.add_argument('--verbosity', choices=list_logging_levels(),
> -                       help='debug level', default='info')
> -
> -    args = vars(parser.parse_args())
> -
> -    logging.basicConfig(level=VERBOSITY_LEVELS[args['verbosity']])
> -
> -    parse_mbox(args['inpath'], args['list_id'])
> -
> -if __name__ == '__main__':
> -    main()
> diff --git a/patchwork/management/commands/parsearchive.py b/patchwork/management/commands/parsearchive.py
> new file mode 100644
> index 0000000..093129f
> --- /dev/null
> +++ b/patchwork/management/commands/parsearchive.py
> @@ -0,0 +1,87 @@
> +# Patchwork - automated patch tracking system
> +# Copyright (C) 2016 Intel Corporation
> +#
> +# This file is part of the Patchwork package.
> +#
> +# Patchwork is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# Patchwork is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with Patchwork; if not, write to the Free Software
> +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> +
> +import logging
> +import mailbox
> +from optparse import make_option
> +
> +import django
> +from django.core.management.base import BaseCommand
> +
> +from patchwork import models
> +from patchwork.parser import parse_mail
> +
> +logger = logging.getLogger(__name__)
> +
> +
> +class Command(BaseCommand):
> +    help = 'Parse an mbox archive file and store any patches/comments found'
> +    args = '<infile>'  # Django < 1.8 compatibility
> +    option_list = BaseCommand.option_list + (
> +        make_option(
> +            '--list-id',
> +            help='mailing list ID. If not supplied, this will be extracted '
> +            'from the mail headers.'
> +        ),
> +    )
> +
> +    def handle(self, *args, **options):
> +        results = {
> +            models.Patch: 0,
> +            models.CoverLetter: 0,
> +            models.Comment: 0,
> +        }
> +        duplicates = 0
> +        dropped = 0
> +
> +        # TODO(stephenfin): Support passing via stdin?
> +        mbox = mailbox.mbox(args[0])
> +        count = len(mbox)
> +
> +        logger.info('Parsing %d mails', count)
> +        for i, msg in enumerate(mbox):
> +            try:
> +                obj = parse_mail(msg, options['list_id'])
> +                if obj:
> +                    results[type(obj)] += 1
> +                else:
> +                    dropped += 1
> +            except django.db.utils.IntegrityError:
> +                duplicates += 1
> +
> +            if (i % 10) == 0:
> +                self.stdout.write('%06d/%06d\r' % (i, count), ending='')
> +                self.stdout.flush()
> +
> +        self.stdout.write(
> +            'Processed %(total)d messages -->\n'
> +            '  %(covers)4d cover letters\n'
> +            '  %(patches)4d patches\n'
> +            '  %(comments)4d comments\n'
> +            '  %(duplicates)4d duplicates\n'
> +            '  %(dropped)4d dropped\n'
> +            'Total: %(new)s new entries' % {
> +                'total': count,
> +                'covers': results[models.CoverLetter],
> +                'patches': results[models.Patch],
> +                'comments': results[models.Comment],
> +                'duplicates': duplicates,
> +                'dropped': dropped,
> +                'new': count - duplicates - dropped,
> +            })
>



More information about the Patchwork mailing list