[PATCH 10/10] [RFC] Fuzzing harness

Daniel Axtens dja at axtens.net
Wed Jun 28 17:48:52 AEST 2017


 - install python-afl in Docker (py2 doesn't seem to work)

 - change parser to return BrokenEmailException. This allows
   us to catch other sorts of ValueError.

 - fuzz management command to be used in py-afl-fuzz

Signed-off-by: Daniel Axtens <dja at axtens.net>
---
 patchwork/management/commands/fuzz.py | 88 +++++++++++++++++++++++++++++++++++
 patchwork/parser.py                   | 18 ++++---
 patchwork/tests/test_parser.py        |  5 +-
 tools/docker/Dockerfile               |  2 +
 tools/fuzzer_dict                     | 52 +++++++++++++++++++++
 5 files changed, 156 insertions(+), 9 deletions(-)
 create mode 100644 patchwork/management/commands/fuzz.py
 create mode 100644 tools/fuzzer_dict

diff --git a/patchwork/management/commands/fuzz.py b/patchwork/management/commands/fuzz.py
new file mode 100644
index 000000000000..c2c08bcfbec2
--- /dev/null
+++ b/patchwork/management/commands/fuzz.py
@@ -0,0 +1,88 @@
+# Patchwork - automated patch tracking system
+# Copyright (C) 2016 Stephen Finucane <stephen at that.guru>
+#
+# This file is part of the Patchwork package.
+#
+# Patchwork is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# Patchwork is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with Patchwork; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+
+import email
+import logging
+
+from django.core.management import base
+from django.utils import six
+
+from patchwork.models import Person
+from patchwork.models import Patch
+from patchwork.models import Series
+from patchwork.models import CoverLetter
+from patchwork.models import Comment
+from patchwork.models import SeriesReference
+from patchwork.parser import parse_mail
+from patchwork.parser import BrokenEmailException
+
+import afl
+afl.init()
+
+logger = logging.getLogger(__name__)
+
+
+class Command(base.BaseCommand):
+    help = 'Parse an mbox file and store any patch/comment found.'
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            'infile',
+            nargs=1,
+            type=str,
+            help='input mbox file')
+        parser.add_argument(
+            '--list-id',
+            help='mailing list ID. If not supplied, this will be '
+            'extracted from the mail headers.')
+
+    def cleanup(self):
+        Series.objects.all().delete()
+        SeriesReference.objects.all().delete()
+        Patch.objects.all().delete()
+        Comment.objects.all().delete()
+        CoverLetter.objects.all().delete()
+        Person.objects.all().delete()
+
+    def handle(self, *args, **options):
+        infile = options['infile'][0]
+
+        logger.info('Parsing mail loaded by filename')
+        try:
+            if six.PY3:
+                with open(infile, 'rb') as file_:
+                    mail = email.message_from_binary_file(file_)
+            else:
+                with open(infile) as file_:
+                    mail = email.message_from_file(file_)
+        except AttributeError:
+            logger.warning("Broken email ignored")
+            return
+
+        try:
+            parse_mail(mail, options['list_id'])
+            self.cleanup()
+        except BrokenEmailException:
+            logger.warning("Broken email ignored")
+            self.cleanup()
+        except Exception as E:
+            logger.exception('Error when parsing incoming email',
+                             extra={'mail': mail.as_string()})
+            self.cleanup()
+            raise E
diff --git a/patchwork/parser.py b/patchwork/parser.py
index 46e6ca161574..0000eaeafa6f 100644
--- a/patchwork/parser.py
+++ b/patchwork/parser.py
@@ -54,6 +54,10 @@ SERIES_DELAY_INTERVAL = 10
 logger = logging.getLogger(__name__)
 
 
+class BrokenEmailException(Exception):
+    pass
+
+
 def normalise_space(value):
     whitespace_re = re.compile(r'\s+')
     return whitespace_re.sub(' ', value).strip()
@@ -293,7 +297,7 @@ def find_author(mail):
     from_header = clean_header(mail.get('From'))
 
     if not from_header:
-        raise ValueError("Invalid 'From' header")
+        raise BrokenEmailException("Invalid 'From' header")
 
     name, email = (None, None)
 
@@ -324,7 +328,7 @@ def find_author(mail):
             break
 
     if not email:
-        raise ValueError("Invalid 'From' header")
+        raise BrokenEmailException("Invalid 'From' header")
 
     email = email.strip()
     if name is not None:
@@ -627,7 +631,7 @@ def clean_subject(subject, drop_prefixes=None):
     subject = clean_header(subject)
 
     if not subject:
-        raise ValueError("Invalid 'Subject' header")
+        raise BrokenEmailException("Invalid 'Subject' header")
 
     if drop_prefixes is None:
         drop_prefixes = []
@@ -908,13 +912,13 @@ def parse_mail(mail, list_id=None):
     """
     # some basic sanity checks
     if 'From' not in mail:
-        raise ValueError("Missing 'From' header")
+        raise BrokenEmailException("Missing 'From' header")
 
     if 'Subject' not in mail:
-        raise ValueError("Missing 'Subject' header")
+        raise BrokenEmailException("Missing 'Subject' header")
 
     if 'Message-Id' not in mail:
-        raise ValueError("Missing 'Message-Id' header")
+        raise BrokenEmailException("Missing 'Message-Id' header")
 
     hint = clean_header(mail.get('X-Patchwork-Hint', ''))
     if hint and hint.lower() == 'ignore':
@@ -934,7 +938,7 @@ def parse_mail(mail, list_id=None):
 
     msgid = clean_header(mail.get('Message-Id'))
     if not msgid:
-        raise ValueError("Broken 'Message-Id' header")
+        raise BrokenEmailException("Broken 'Message-Id' header")
     msgid = msgid.strip()[:255]
 
     author = find_author(mail)
diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py
index 80a559ec871b..19a9f856c0a8 100644
--- a/patchwork/tests/test_parser.py
+++ b/patchwork/tests/test_parser.py
@@ -43,6 +43,7 @@ from patchwork.parser import parse_series_marker
 from patchwork.parser import parse_version
 from patchwork.parser import split_prefixes
 from patchwork.parser import subject_check
+from patchwork.parser import BrokenEmailException
 from patchwork.tests import TEST_MAIL_DIR
 from patchwork.tests import TEST_FUZZ_DIR
 from patchwork.tests.utils import create_project
@@ -236,7 +237,7 @@ class SenderEncodingTest(TestCase):
 
     def test_empty(self):
         email = self._create_email('')
-        with self.assertRaises(ValueError):
+        with self.assertRaises(BrokenEmailException):
             find_author(email)
 
     def test_ascii_encoding(self):
@@ -838,7 +839,7 @@ class FuzzTest(TestCase):
         m = load_mail(file_path)
         try:
             parse_mail(m, list_id="patchwork.ozlabs.org")
-        except ValueError:
+        except BrokenEmailException:
             pass
 
     @skipIf(six.PY2, "breaks only on python3")
diff --git a/tools/docker/Dockerfile b/tools/docker/Dockerfile
index ff05707a6049..266603e3bdcf 100644
--- a/tools/docker/Dockerfile
+++ b/tools/docker/Dockerfile
@@ -48,6 +48,8 @@ RUN cat /tmp/bashrc >> /home/patchwork/.bashrc
 
 COPY tools/docker/entrypoint.sh /usr/local/bin/entrypoint.sh
 
+RUN apt-get install -y cython cython3; pip3 install python-afl; pip install python-afl
+
 ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
 USER patchwork
 WORKDIR /home/patchwork/patchwork
diff --git a/tools/fuzzer_dict b/tools/fuzzer_dict
new file mode 100644
index 000000000000..3aef26dc6154
--- /dev/null
+++ b/tools/fuzzer_dict
@@ -0,0 +1,52 @@
+mid1="Message-ID:"
+mid2="Message-Id:"
+from1="From:"
+from2="From "
+to="To:"
+cc="CC:"
+cc2="Cc:"
+subject="Subject:"
+date="Date:"
+ct="Content-Type:"
+mime="This is a multi-part message in MIME format."
+tp="text/plain;"
+csiso="charset=ISO-8859-1; "
+csutf="charset=\"utf-8\"; "
+utf8="Rafa\x25\x82 Mi\xc5\x82ecki <zajec5 at gmail.com>"
+sob="Signed-off-by: "
+gitb="--- "
+cvsb="==="
+rb="Reviewed-by: "
+ab="Acked-by: "
+utf8enc="=?utf-8?b?UmFmYcWCIE1pxYJlY2tp?="
+utf8brokenenc="=?UTF-8?q?Rafa=FF=FF=20Mi=FF=FFecki?="
+diff="diff "
+index1="index "
+index2="Index: "
+list1="List-ID"
+list2="X-Mailing-List"
+list3="X-list"
+irt="In-Reply-To:"
+ref="References:"
+of=" of "
+xpatch="x-patch"
+xdiff="x-diff"
+text="text"
+plain="plain"
+patch="PATCH"
+re="Re:"
+fwd="Fwd:"
+sig="-- "
+foot="_____"
+atat="@@"
+rename1="rename from "
+rename2="rename to "
+plus="+++ "
+nnl="\\ No newline at end of file"
+pr1="The following changes since commit"
+pr2="are available in the git repository at:\x0a"
+xps="X-Patchwork-State:"
+xpd="X-Patchwork-Delegate:"
+xph="X-Patchwork-Hint:"
+ignore="ignore"
+devnull="/dev/null"
\ No newline at end of file
-- 
2.11.0



More information about the Patchwork mailing list