[PATCH] parsemail: Fallback to common charsets when charset is None or x-unknown
Jeremy Kerr
jk at ozlabs.org
Mon Jul 14 12:21:32 EST 2014
From: Siddhesh Poyarekar <siddhesh at redhat.com>
We recently encountered a case in our glibc patchwork instance on
sourceware, where a patch was dropped because it had x-unknown
charset.
This change adds a fallback on a set of encodings (instead of just
utf-8) when the charset is not mentioned or if it is set as x-unknown.
Minor changes and testcase by Jeremy Kerr <jk at ozlabs.org>
Signed-off-by: Siddhesh Poyarekar <siddhesh at redhat.com>
Signed-off-by: Jeremy Kerr <jk at ozlabs.org>
---
apps/patchwork/bin/parsemail.py | 40 ++++-
apps/patchwork/tests/mail/0010-invalid-charset.mbox | 91 ++++++++++++
apps/patchwork/tests/test_patchparser.py | 11 +
3 files changed, 136 insertions(+), 6 deletions(-)
diff --git a/apps/patchwork/bin/parsemail.py b/apps/patchwork/bin/parsemail.py
index b6eb97a..2a4866f 100755
--- a/apps/patchwork/bin/parsemail.py
+++ b/apps/patchwork/bin/parsemail.py
@@ -24,6 +24,7 @@ import re
import datetime
import time
import operator
+import codecs
from email import message_from_file
try:
from email.header import Header, decode_header
@@ -147,6 +148,13 @@ def find_pull_request(content):
return match.group(1)
return None
+def try_decode(payload, charset):
+ try:
+ payload = unicode(payload, charset)
+ except UnicodeDecodeError:
+ return None
+ return payload
+
def find_content(project, mail):
patchbuf = None
commentbuf = ''
@@ -157,15 +165,35 @@ def find_content(project, mail):
continue
payload = part.get_payload(decode=True)
- charset = part.get_content_charset()
subtype = part.get_content_subtype()
- # if we don't have a charset, assume utf-8
- if charset is None:
- charset = 'utf-8'
-
if not isinstance(payload, unicode):
- payload = unicode(payload, charset)
+ charset = part.get_content_charset()
+
+ # Check that we have a charset that we understand. Otherwise,
+ # ignore it and fallback to our standard set.
+ if charset is not None:
+ try:
+ codec = codecs.lookup(charset)
+ except LookupError:
+ charset = None
+
+ # If there is no charset or if it is unknown, then try some common
+ # charsets before we fail.
+ if charset is None:
+ try_charsets = ['utf-8', 'windows-1252', 'iso-8859-1']
+ else:
+ try_charsets = [charset]
+
+ for cset in try_charsets:
+ decoded_payload = try_decode(payload, cset)
+ if decoded_payload is not None:
+ break
+ payload = decoded_payload
+
+ # Could not find a valid decoded payload. Fail.
+ if payload is None:
+ return (None, None)
if subtype in ['x-patch', 'x-diff']:
patchbuf = payload
diff --git a/apps/patchwork/tests/mail/0010-invalid-charset.mbox b/apps/patchwork/tests/mail/0010-invalid-charset.mbox
new file mode 100644
index 0000000..a8614ef
--- /dev/null
+++ b/apps/patchwork/tests/mail/0010-invalid-charset.mbox
@@ -0,0 +1,91 @@
+From libc-alpha-return-50517-siddhesh=redhat.com at sourceware.org Thu Jun 5 10:36:33 2014
+Received: (qmail 11948 invoked by alias); 4 Jun 2014 17:51:01 -0000
+Mailing-List: contact libc-alpha-help at sourceware.org; run by ezmlm
+List-Id: <libc-alpha.sourceware.org>
+Sender: libc-alpha-owner at sourceware.org
+Date: Wed, 4 Jun 2014 17:50:46 +0000
+From: "Joseph S. Myers" <joseph at codesourcery.com>
+To: <libc-alpha at sourceware.org>
+Subject: Fix pow overflow in non-default rounding modes (bug 16315)
+Message-ID: <Pine.LNX.4.64.1406041749420.3719 at digraph.polyomino.org.uk>
+MIME-Version: 1.0
+Content-Type: multipart/mixed;
+ boundary="-1152306461-1522705971-1401904246=:3719"
+Content-Length: 24171
+
+---1152306461-1522705971-1401904246=:3719
+Content-Type: text/plain; charset="none"
+Content-Transfer-Encoding: QUOTED-PRINTABLE
+
+This patch, relative to a tree with
+<https://sourceware.org/ml/libc-alpha/2014-06/msg00076.html> applied,
+fixes bug 16315, bad pow handling of overflow/underflow in non-default
+rounding modes. Tests of pow are duly converted to ALL_RM_TEST to run
+all tests in all rounding modes.
+
+There are two main issues here. First, various implementations
+compute a negative result by negating a positive result, but this
+yields inappropriate overflow / underflow values for directed
+rounding, so either overflow / underflow results need recomputing in
+the correct sign, or the relevant overflowing / underflowing operation
+needs to be made to have a result of the correct sign. Second, the
+dbl-64 implementation sets FE_TONEAREST internally; in the overflow /
+underflow case, the result needs recomputing in the original rounding
+mode.
+
+Tested x86_64 and x86 and ulps updated accordingly.
+
+(auto-libm-test-out diffs omitted below.)
+
+2014-06-04 Joseph Myers <joseph at codesourcery.com>
+
+=09[BZ #16315]
+=09* sysdeps/i386/fpu/e_pow.S (__ieee754_pow): Ensure possibly
+=09overflowing or underflowing operations take place with sign of
+=09result.
+=09* sysdeps/i386/fpu/e_powf.S (__ieee754_powf): Likewise.
+=09* sysdeps/i386/fpu/e_powl.S (__ieee754_powl): Likewise.
+=09* sysdeps/ieee754/dbl-64/e_pow.c: Include <math.h>.
+=09(__ieee754_pow): Recompute overflowing and underflowing results in
+=09original rounding mode.
+=09* sysdeps/x86/fpu/powl_helper.c: Include <stdbool.h>.
+=09(__powl_helper): Allow negative argument X and scale negated value
+=09as needed. Avoid passing value outside [-1, 1] to f2xm1.
+=09* sysdeps/x86_64/fpu/e_powl.S (__ieee754_powl): Ensure possibly
+=09overflowing or underflowing operations take place with sign of
+=09result.
+=09* sysdeps/x86_64/fpu/multiarch/e_pow.c [HAVE_FMA4_SUPPORT]:
+=09Include <math.h>.
+=09* math/auto-libm-test-in: Add more tests of pow.
+=09* math/auto-libm-test-out: Regenerated.
+=09* math/libm-test.inc (pow_test): Use ALL_RM_TEST.
+=09(pow_tonearest_test_data): Remove.
+=09(pow_test_tonearest): Likewise.
+=09(pow_towardzero_test_data): Likewise.
+=09(pow_test_towardzero): Likewise.
+=09(pow_downward_test_data): Likewise.
+=09(pow_test_downward): Likewise.
+=09(pow_upward_test_data): Likewise.
+=09(pow_test_upward): Likewise.
+=09(main): Don't call removed functions.
+=09* sysdeps/i386/fpu/libm-test-ulps: Update.
+=09* sysdeps/x86_64/fpu/libm-test-ulps: Likewise.
+
+diff --git a/sysdeps/x86_64/fpu/multiarch/e_pow.c b/sysdeps/x86_64/fpu/mult=
+iarch/e_pow.c
+index a740b6c..433cce0 100644
+--- a/sysdeps/x86_64/fpu/multiarch/e_pow.c
++++ b/sysdeps/x86_64/fpu/multiarch/e_pow.c
+@@ -1,5 +1,6 @@
+ #ifdef HAVE_FMA4_SUPPORT
+ # include <init-arch.h>
++# include <math.h>
+ # include <math_private.h>
+=20
+ extern double __ieee754_pow_sse2 (double, double);
+
+--=20
+Joseph S. Myers
+joseph at codesourcery.com
+---1152306461-1522705971-1401904246=:3719--
+
diff --git a/apps/patchwork/tests/test_patchparser.py b/apps/patchwork/tests/test_patchparser.py
index 0496a69..d9a24c1 100644
--- a/apps/patchwork/tests/test_patchparser.py
+++ b/apps/patchwork/tests/test_patchparser.py
@@ -422,6 +422,17 @@ class CVSFormatPatchTest(MBoxPatchTest):
self.assertTrue(comment is not None)
self.assertTrue(patch.content.startswith('Index'))
+class CharsetFallbackPatchTest(MBoxPatchTest):
+ """ Test mail with and invalid charset name, and check that we can parse
+ with one of the fallback encodings"""
+
+ mail_file = '0010-invalid-charset.mbox'
+
+ def testPatch(self):
+ (patch, comment) = find_content(self.project, self.mail)
+ self.assertTrue(patch is not None)
+ self.assertTrue(comment is not None)
+
class DelegateRequestTest(TestCase):
patch_filename = '0001-add-line.patch'
msgid = '<1 at example.com>'
More information about the Patchwork
mailing list