[PATCH v2 1/5] Move email address parsing functions to a separate module

Mon Nov 18 17:00:38 EST 2013

A future patch would like to be able to parse out an email address in
a file other than parsemail.py.  Create a common emailutils module to
hanlde this.

Signed-off-by: Doug Anderson <dianders at chromium.org>
---
 apps/patchwork/bin/parsemail.py | 54 ++---------------------
 apps/patchwork/emailutils.py    | 94 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 98 insertions(+), 50 deletions(-)
 create mode 100644 apps/patchwork/emailutils.py

diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py
index b6eb97a..92d6bb3 100755
--- a/apps/patchwork/bin/parsemail.py
+++ b/apps/patchwork/bin/parsemail.py
@@ -26,13 +26,14 @@ import time
 import operator
 from email import message_from_file
 try:
-    from email.header import Header, decode_header
+    from email.header import Header
     from email.utils import parsedate_tz, mktime_tz
 except ImportError:
     # Python 2.4 compatibility
-    from email.Header import Header, decode_header
+    from email.Header import Header
     from email.Utils import parsedate_tz, mktime_tz
 
+from patchwork.emailutils import clean_header, normalise_space, parse_from
 from patchwork.parser import parse_patch
 from patchwork.models import Patch, Project, Person, Comment, State, \
         get_default_initial_patch_state
@@ -40,23 +41,6 @@ from django.contrib.auth.models import User
 
 list_id_headers = ['List-ID', 'X-Mailing-List', 'X-list']
 
-whitespace_re = re.compile('\s+')
-def normalise_space(str):
-    return whitespace_re.sub(' ', str).strip()
-
-def clean_header(header):
-    """ Decode (possibly non-ascii) headers """
-
-    def decode(fragment):
-        (frag_str, frag_encoding) = fragment
-        if frag_encoding:
-            return frag_str.decode(frag_encoding)
-        return frag_str.decode()
-
-    fragments = map(decode, decode_header(header))
-
-    return normalise_space(u' '.join(fragments))
-
 def find_project(mail):
     project = None
     listid_res = [re.compile('.*<([^>]+)>.*', re.S),
@@ -84,37 +68,7 @@ def find_project(mail):
     return project
 
 def find_author(mail):
-
-    from_header = clean_header(mail.get('From'))
-    (name, email) = (None, None)
-
-    # tuple of (regex, fn)
-    #  - where fn returns a (name, email) tuple from the match groups resulting
-    #    from re.match().groups()
-    from_res = [
-        # for "Firstname Lastname" <example at example.com> style addresses
-       (re.compile('"?(.*?)"?\s*<([^>]+)>'), (lambda g: (g[0], g[1]))),
-
-       # for example at example.com (Firstname Lastname) style addresses
-       (re.compile('"?(.*?)"?\s*\(([^\)]+)\)'), (lambda g: (g[1], g[0]))),
-
-       # everything else
-       (re.compile('(.*)'), (lambda g: (None, g[0]))),
-    ]
-
-    for regex, fn in from_res:
-        match = regex.match(from_header)
-        if match:
-            (name, email) = fn(match.groups())
-            break
-
-    if email is None:
-        raise Exception("Could not parse From: header")
-
-    email = email.strip()
-    if name is not None:
-        name = name.strip()
-
+    name, email = parse_from(mail.get('From'))
     new_person = False
 
     try:
diff --git a/apps/patchwork/emailutils.py b/apps/patchwork/emailutils.py
new file mode 100644
index 0000000..2c906a9
--- /dev/null
+++ b/apps/patchwork/emailutils.py
@@ -0,0 +1,94 @@
+# Patchwork - automated patch tracking system
+# Copyright (C) 2008 Jeremy Kerr <jk at ozlabs.org>
+#
+# This file is part of the Patchwork package.
+#
+# Patchwork is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Patchwork is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Patchwork; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+
+import re
+
+try:
+    from email.header import decode_header
+except ImportError:
+    # Python 2.4 compatibility
+    from email.Header import decode_header
+
+whitespace_re = re.compile('\s+')
+def normalise_space(str):
+    return whitespace_re.sub(' ', str).strip()
+
+def clean_header(header):
+    """ Decode (possibly non-ascii) headers """
+
+    def decode(fragment):
+        (frag_str, frag_encoding) = fragment
+        if frag_encoding:
+            return frag_str.decode(frag_encoding)
+        return frag_str.decode()
+
+    fragments = map(decode, decode_header(header))
+
+    return normalise_space(u' '.join(fragments))
+
+def parse_from(from_header):
+    """Parse a "From" header into a (unicode) name and email address.
+
+    >>> parse_from("=?utf-8?b?RG/DvGc=?= Anderson <dianders at chromium.org>")
+    (u'Do\\xfcg Anderson', u'dianders at chromium.org')
+    >>> parse_from("Doug =?utf-8?b?QW5kw6lyc29u?= <dianders at chromium.org>")
+    (u'Doug And\\xe9rson', u'dianders at chromium.org')
+    >>> parse_from("=?utf-8?b?RG/DvGcgQW5kw6lyc29u?= <dianders at chromium.org>")
+    (u'Do\\xfcg And\\xe9rson', u'dianders at chromium.org')
+    >>> parse_from("Doug Anderson <dianders at chromium.org>")
+    (u'Doug Anderson', u'dianders at chromium.org')
+
+    @from_header: An ASCII string containing the "From" header maybe encoded
+                  with RFC 2822.
+    @return: A tuple (name, email) where name is a unicode version of the name
+             and email is the email address with no name.
+    """
+    from_header = clean_header(from_header)
+    (name, email) = (None, None)
+
+    # tuple of (regex, fn)
+    #  - where fn returns a (name, email) tuple from the match groups resulting
+    #    from re.match().groups()
+    from_res = [
+        # for "Firstname Lastname" <example at example.com> style addresses
+       (re.compile('"?(.*?)"?\s*<([^>]+)>'), (lambda g: (g[0], g[1]))),
+
+       # for example at example.com (Firstname Lastname) style addresses
+       (re.compile('"?(.*?)"?\s*\(([^\)]+)\)'), (lambda g: (g[1], g[0]))),
+
+       # everything else
+       (re.compile('(.*)'), (lambda g: (None, g[0]))),
+    ]
+
+    for regex, fn in from_res:
+        match = regex.match(from_header)
+        if match:
+            (name, email) = fn(match.groups())
+            break
+
+    if email is None:
+        raise Exception("Could not parse From: header")
+
+    email = email.strip()
+    if name is not None:
+        name = name.strip()
+
+    return name, email
+
-- 
1.8.4.1