[PATCH 2/4] bin/parsemail: Support user-provided list ID

Stephen Finucane stephen.finucane at intel.com
Fri Nov 13 14:48:05 AEDT 2015


Sometimes mails won't contain the headers necessary to extract a
mailing list's ID (for example: mails downloaded from Mailman
archives). However, should the user already know the correct mailing
list ID then this extraction is not necessary.

Allow the user to provide a mailing list ID by the command line.

Signed-off-by: Stephen Finucane <stephen.finucane at intel.com>
---
 patchwork/bin/parsemail.py | 55 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 43 insertions(+), 12 deletions(-)

diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py
index e05f036..0562a45 100755
--- a/patchwork/bin/parsemail.py
+++ b/patchwork/bin/parsemail.py
@@ -19,6 +19,7 @@
 # along with Patchwork; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
+import argparse
 import codecs
 import datetime
 from email import message_from_file
@@ -59,7 +60,17 @@ def clean_header(header):
     return normalise_space(u' '.join(fragments))
 
 
-def find_project(mail):
+def find_project_by_id(list_id):
+    """Find a `project` object with given `list_id`."""
+    project = None
+    try:
+        project = Project.objects.get(listid=list_id)
+    except Project.DoesNotExist:
+        pass
+    return project
+
+
+def find_project_by_header(mail):
     project = None
     listid_res = [re.compile(r'.*<([^>]+)>.*', re.S),
                   re.compile(r'^([\S]+)$', re.S)]
@@ -77,15 +88,12 @@ def find_project(mail):
 
             listid = match.group(1)
 
-            try:
-                project = Project.objects.get(listid=listid)
+            project = find_project_by_id(listid)
+            if project:
                 break
-            except Project.DoesNotExist:
-                pass
 
     return project
 
-
 def find_author(mail):
 
     from_header = clean_header(mail.get('From'))
@@ -365,8 +373,16 @@ def get_delegate(delegate_email):
     return None
 
 
-def parse_mail(mail):
-    """Parse a mail and add to the database."""
+def parse_mail(mail, list_id=None):
+    """Parse a mail and add to the database.
+
+    Args:
+        mail (`mbox.Mail`): Mail to parse and add.
+        list_id (str): Mailing list ID
+
+    Returns:
+        None
+    """
     # some basic sanity checks
     if 'From' not in mail:
         return 0
@@ -381,7 +397,10 @@ def parse_mail(mail):
     if hint == 'ignore':
         return 0
 
-    project = find_project(mail)
+    if list_id:
+        project = find_project_by_id(list_id)
+    else:
+        project = find_project_by_header(mail)
 
     if project is None:
         print("no project found")
@@ -451,16 +470,28 @@ def setup_error_handler():
 def main(args):
     django.setup()
     logger = setup_error_handler()
-    mail = message_from_file(sys.stdin)
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
+                        default=sys.stdin, help='input mbox file (a filename '
+                        'or stdin)')
+
+    group = parser.add_argument_group('Mail parsing configuration')
+    group.add_argument('--list-id', help='mailing list ID. If not supplied '
+                       'this will be extracted from the mail headers.')
+
+    args = vars(parser.parse_args())
+
+    mail = message_from_file(args['infile'])
     try:
-        return parse_mail(mail)
+        return parse_mail(mail, args['list_id'])
     except:
         if logger:
             logger.exception('Error when parsing incoming email', extra={
                 'mail': mail.as_string(),
             })
         raise
-
+    return parse_mail(mail, args['list_id'])
 
 if __name__ == '__main__':
     sys.exit(main(sys.argv))
-- 
2.0.0



More information about the Patchwork mailing list