[PATCH] Make parsemail-batch a managment command

Stephen Finucane stephen at that.guru
Sat Feb 25 23:08:58 AEDT 2017


On Fri, 2017-02-24 at 15:28 +1100, Daniel Axtens wrote:
> This brings it into line with the other import commands.
> 
> It's also an order of magnitude or so faster.
> 
> Signed-off-by: Daniel Axtens <dja at axtens.net>

I never got the difference between this and 'parsearchive'. Care to
enlighten me? :)

Stephen

> 
> ---
> 
> This isn't that useful per se, but I'm using it to benchmark
> the performance impact of changes look for opportunities to
> speed up.
> ---
>  patchwork/bin/parsemail-batch.sh                 | 25 +++-----
>  patchwork/management/commands/parsemail-batch.py | 77
> ++++++++++++++++++++++++
>  2 files changed, 85 insertions(+), 17 deletions(-)
>  create mode 100644 patchwork/management/commands/parsemail-batch.py
> 
> diff --git a/patchwork/bin/parsemail-batch.sh
> b/patchwork/bin/parsemail-batch.sh
> index d42712ed0995..f35808e9d11b 100755
> --- a/patchwork/bin/parsemail-batch.sh
> +++ b/patchwork/bin/parsemail-batch.sh
> @@ -20,25 +20,16 @@
>  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
> 1307  USA
>  
>  BIN_DIR=$(dirname "$0")
> +PATCHWORK_BASE=$(readlink -e "$BIN_DIR/../..")
>  
> -if [ $# -lt 1 ]; then
> -    echo "usage: $0 <dir> [options]" >&2
> -    exit 1
> +if [ -z "$PW_PYTHON" ]; then
> +    PW_PYTHON=python2
>  fi
>  
> -mail_dir="$1"
> -
> -echo "dir: $mail_dir"
> -
> -if [ ! -d "$mail_dir" ]; then
> -    echo "$mail_dir should be a directory"? >&2
> -    exit 1
> +if [ -z "$DJANGO_SETTINGS_MODULE" ]; then
> +    DJANGO_SETTINGS_MODULE=patchwork.settings.production
>  fi
>  
> -shift
> -
> -find "$mail_dir" -maxdepth 1 |
> -while read -r line; do
> -    echo "$line"
> -    "$BIN_DIR/parsemail.sh" "$@" < "$line"
> -done
> +PYTHONPATH="${PATCHWORK_BASE}:${PATCHWORK_BASE}/lib/python:$PYTHONPA
> TH" \
> +    DJANGO_SETTINGS_MODULE="$DJANGO_SETTINGS_MODULE" \
> +    "$PW_PYTHON" "$PATCHWORK_BASE/manage.py" parsemail-batch "$@"
> diff --git a/patchwork/management/commands/parsemail-batch.py
> b/patchwork/management/commands/parsemail-batch.py
> new file mode 100644
> index 000000000000..ad4dce35f595
> --- /dev/null
> +++ b/patchwork/management/commands/parsemail-batch.py
> @@ -0,0 +1,77 @@
> +# Patchwork - automated patch tracking system
> +# Copyright (C) 2017, Daniel Axtens, IBM Corporation.
> +#
> +# This file is part of the Patchwork package.
> +#
> +# Patchwork is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published
> by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# Patchwork is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +
> +import email
> +import logging
> +from optparse import make_option
> +import sys
> +import os
> +
> +import django
> +from django.core.management import base
> +from django.utils import six
> +
> +from patchwork.parser import parse_mail
> +
> +logger = logging.getLogger(__name__)
> +
> +
> +class Command(base.BaseCommand):
> +    help = 'Parse a directory of mbox files and store any
> patches/comments found.'
> +
> +    if django.VERSION < (1, 8):
> +        args = '<indir>'
> +        option_list = base.BaseCommand.option_list + (
> +            make_option(
> +                '--list-id',
> +                help='mailing list ID. If not supplied, this will be
> '
> +                'extracted from the mail headers.'),
> +        )
> +    else:
> +        def add_arguments(self, parser):
> +            parser.add_argument(
> +                'indir',
> +                type=str,
> +                default=None,
> +                help='input mbox directory')
> +            parser.add_argument(
> +                '--list-id',
> +                help='mailing list ID. If not supplied, this will be
> '
> +                'extracted from the mail headers.')
> +
> +    def handle(self, *args, **options):
> +        indir = args[0] if args else options['indir']
> +
> +        if not os.path.isdir(indir):
> +            logger.error('%s is not a directory' % indir)
> +            sys.exit(1)
> +
> +        for infile in sorted(os.listdir(indir)):
> +            print(infile)
> +            if six.PY3:
> +                with open(os.path.join(indir, infile), 'rb') as
> file_:
> +                    mail = email.message_from_binary_file(file_)
> +            else:
> +                with open(os.path.join(indir, infile)) as file_:
> +                    mail = email.message_from_file(file_)
> +
> +            try:
> +                result = parse_mail(mail, options['list_id'])
> +                if not result:
> +                    logger.warning('Failed to parse mail')
> +            except Exception as e:
> +                logger.warning('Error when parsing incoming email '
> + \
> +                               str(e),
> +                               extra={'mail': mail.as_string()})



More information about the Patchwork mailing list