[PATCH v2 1/5] Move email address parsing functions to a separate module
Doug Anderson
dianders at chromium.org
Mon Nov 18 17:00:38 EST 2013
A future patch would like to be able to parse out an email address in
a file other than parsemail.py. Create a common emailutils module to
hanlde this.
Signed-off-by: Doug Anderson <dianders at chromium.org>
---
apps/patchwork/bin/parsemail.py | 54 ++---------------------
apps/patchwork/emailutils.py | 94 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 98 insertions(+), 50 deletions(-)
create mode 100644 apps/patchwork/emailutils.py
diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py
index b6eb97a..92d6bb3 100755
--- a/apps/patchwork/bin/parsemail.py
+++ b/apps/patchwork/bin/parsemail.py
@@ -26,13 +26,14 @@ import time
import operator
from email import message_from_file
try:
- from email.header import Header, decode_header
+ from email.header import Header
from email.utils import parsedate_tz, mktime_tz
except ImportError:
# Python 2.4 compatibility
- from email.Header import Header, decode_header
+ from email.Header import Header
from email.Utils import parsedate_tz, mktime_tz
+from patchwork.emailutils import clean_header, normalise_space, parse_from
from patchwork.parser import parse_patch
from patchwork.models import Patch, Project, Person, Comment, State, \
get_default_initial_patch_state
@@ -40,23 +41,6 @@ from django.contrib.auth.models import User
list_id_headers = ['List-ID', 'X-Mailing-List', 'X-list']
-whitespace_re = re.compile('\s+')
-def normalise_space(str):
- return whitespace_re.sub(' ', str).strip()
-
-def clean_header(header):
- """ Decode (possibly non-ascii) headers """
-
- def decode(fragment):
- (frag_str, frag_encoding) = fragment
- if frag_encoding:
- return frag_str.decode(frag_encoding)
- return frag_str.decode()
-
- fragments = map(decode, decode_header(header))
-
- return normalise_space(u' '.join(fragments))
-
def find_project(mail):
project = None
listid_res = [re.compile('.*<([^>]+)>.*', re.S),
@@ -84,37 +68,7 @@ def find_project(mail):
return project
def find_author(mail):
-
- from_header = clean_header(mail.get('From'))
- (name, email) = (None, None)
-
- # tuple of (regex, fn)
- # - where fn returns a (name, email) tuple from the match groups resulting
- # from re.match().groups()
- from_res = [
- # for "Firstname Lastname" <example at example.com> style addresses
- (re.compile('"?(.*?)"?\s*<([^>]+)>'), (lambda g: (g[0], g[1]))),
-
- # for example at example.com (Firstname Lastname) style addresses
- (re.compile('"?(.*?)"?\s*\(([^\)]+)\)'), (lambda g: (g[1], g[0]))),
-
- # everything else
- (re.compile('(.*)'), (lambda g: (None, g[0]))),
- ]
-
- for regex, fn in from_res:
- match = regex.match(from_header)
- if match:
- (name, email) = fn(match.groups())
- break
-
- if email is None:
- raise Exception("Could not parse From: header")
-
- email = email.strip()
- if name is not None:
- name = name.strip()
-
+ name, email = parse_from(mail.get('From'))
new_person = False
try:
diff --git a/apps/patchwork/emailutils.py b/apps/patchwork/emailutils.py
new file mode 100644
index 0000000..2c906a9
--- /dev/null
+++ b/apps/patchwork/emailutils.py
@@ -0,0 +1,94 @@
+# Patchwork - automated patch tracking system
+# Copyright (C) 2008 Jeremy Kerr <jk at ozlabs.org>
+#
+# This file is part of the Patchwork package.
+#
+# Patchwork is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Patchwork is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Patchwork; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+
+
+import re
+
+try:
+ from email.header import decode_header
+except ImportError:
+ # Python 2.4 compatibility
+ from email.Header import decode_header
+
+whitespace_re = re.compile('\s+')
+def normalise_space(str):
+ return whitespace_re.sub(' ', str).strip()
+
+def clean_header(header):
+ """ Decode (possibly non-ascii) headers """
+
+ def decode(fragment):
+ (frag_str, frag_encoding) = fragment
+ if frag_encoding:
+ return frag_str.decode(frag_encoding)
+ return frag_str.decode()
+
+ fragments = map(decode, decode_header(header))
+
+ return normalise_space(u' '.join(fragments))
+
+def parse_from(from_header):
+ """Parse a "From" header into a (unicode) name and email address.
+
+ >>> parse_from("=?utf-8?b?RG/DvGc=?= Anderson <dianders at chromium.org>")
+ (u'Do\\xfcg Anderson', u'dianders at chromium.org')
+ >>> parse_from("Doug =?utf-8?b?QW5kw6lyc29u?= <dianders at chromium.org>")
+ (u'Doug And\\xe9rson', u'dianders at chromium.org')
+ >>> parse_from("=?utf-8?b?RG/DvGcgQW5kw6lyc29u?= <dianders at chromium.org>")
+ (u'Do\\xfcg And\\xe9rson', u'dianders at chromium.org')
+ >>> parse_from("Doug Anderson <dianders at chromium.org>")
+ (u'Doug Anderson', u'dianders at chromium.org')
+
+ @from_header: An ASCII string containing the "From" header maybe encoded
+ with RFC 2822.
+ @return: A tuple (name, email) where name is a unicode version of the name
+ and email is the email address with no name.
+ """
+ from_header = clean_header(from_header)
+ (name, email) = (None, None)
+
+ # tuple of (regex, fn)
+ # - where fn returns a (name, email) tuple from the match groups resulting
+ # from re.match().groups()
+ from_res = [
+ # for "Firstname Lastname" <example at example.com> style addresses
+ (re.compile('"?(.*?)"?\s*<([^>]+)>'), (lambda g: (g[0], g[1]))),
+
+ # for example at example.com (Firstname Lastname) style addresses
+ (re.compile('"?(.*?)"?\s*\(([^\)]+)\)'), (lambda g: (g[1], g[0]))),
+
+ # everything else
+ (re.compile('(.*)'), (lambda g: (None, g[0]))),
+ ]
+
+ for regex, fn in from_res:
+ match = regex.match(from_header)
+ if match:
+ (name, email) = fn(match.groups())
+ break
+
+ if email is None:
+ raise Exception("Could not parse From: header")
+
+ email = email.strip()
+ if name is not None:
+ name = name.strip()
+
+ return name, email
+
--
1.8.4.1
More information about the Patchwork
mailing list