[PATCH v3 01/10] trivial: Cleanup of 'parser'
Stephen Finucane
stephenfinucane at hotmail.com
Tue Aug 30 08:50:21 AEST 2016
From: Stephen Finucane <stephen.finucane at intel.com>
Do some cleanup of the file by removing excess whitespace, adding some
documentation, removing shadowing of keywords and renaming some
functions to more accurately reflect their purpose.
Signed-off-by: Stephen Finucane <stephen.finucane at intel.com>
Reviewed-by: Andy Doan <andy.doan at linaro.org>
---
patchwork/bin/parsemail.py | 4 +--
patchwork/models.py | 4 +--
patchwork/parser.py | 74 +++++++++++++++++++++-------------------------
3 files changed, 38 insertions(+), 44 deletions(-)
diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py
index 48f809f..8913076 100755
--- a/patchwork/bin/parsemail.py
+++ b/patchwork/bin/parsemail.py
@@ -44,7 +44,7 @@ from django.utils.six.moves import map
from patchwork.models import (Patch, Project, Person, Comment, State,
DelegationRule, Submission, CoverLetter,
get_default_initial_patch_state)
-from patchwork.parser import parse_patch, patch_get_filenames
+from patchwork.parser import parse_patch, find_filenames
LOGGER = logging.getLogger(__name__)
@@ -494,7 +494,7 @@ def parse_mail(mail, list_id=None):
delegate = find_delegate(mail)
if not delegate and diff:
- filenames = patch_get_filenames(diff)
+ filenames = find_filenames(diff)
delegate = auto_delegate(project, filenames)
patch = Patch(
diff --git a/patchwork/models.py b/patchwork/models.py
index 521b20c..ee2ee63 100644
--- a/patchwork/models.py
+++ b/patchwork/models.py
@@ -35,7 +35,7 @@ from django.utils.functional import cached_property
from django.utils.six.moves import filter
from patchwork.fields import HashField
-from patchwork.parser import extract_tags, hash_patch
+from patchwork.parser import extract_tags, hash_diff
@python_2_unicode_compatible
@@ -364,7 +364,7 @@ class Patch(Submission):
self.state = get_default_initial_patch_state()
if self.hash is None and self.diff is not None:
- self.hash = hash_patch(self.diff).hexdigest()
+ self.hash = hash_diff(self.diff).hexdigest()
super(Patch, self).save(**kwargs)
diff --git a/patchwork/parser.py b/patchwork/parser.py
index 8bf9b21..f173431 100644
--- a/patchwork/parser.py
+++ b/patchwork/parser.py
@@ -33,7 +33,22 @@ _hunk_re = re.compile('^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
_filename_re = re.compile('^(---|\+\+\+) (\S+)')
-def parse_patch(text):
+def parse_patch(content):
+ """Split a mail's contents into a diff and comment.
+
+ This is a state machine that takes a patch, generally in UNIX mbox
+ format, and splits it into the component comments and diff.
+
+ Args:
+ patch: The patch to be split
+
+ Returns:
+ A tuple containing the diff and comment. Either one or both of
+ these can be empty.
+
+ Raises:
+ Exception: The state machine transitioned to an invalid state.
+ """
patchbuf = ''
commentbuf = ''
buf = ''
@@ -68,7 +83,7 @@ def parse_patch(text):
lc = (0, 0)
hunk = 0
- for line in text.split('\n'):
+ for line in content.split('\n'):
line += '\n'
if state == 0:
@@ -76,14 +91,11 @@ def parse_patch(text):
or line.startswith('Index: '):
state = 1
buf += line
-
elif line.startswith('--- '):
state = 2
buf += line
-
else:
commentbuf += line
-
elif state == 1:
buf += line
if line.startswith('--- '):
@@ -91,25 +103,20 @@ def parse_patch(text):
if line.startswith(('rename from ', 'rename to ')):
state = 6
-
elif state == 2:
if line.startswith('+++ '):
state = 3
buf += line
-
elif hunk:
state = 1
buf += line
-
else:
state = 0
commentbuf += buf + line
buf = ''
-
elif state == 3:
match = _hunk_re.match(line)
if match:
-
def fn(x):
if not x:
return 1
@@ -120,26 +127,21 @@ def parse_patch(text):
state = 4
patchbuf += buf + line
buf = ''
-
elif line.startswith('--- '):
patchbuf += buf + line
buf = ''
state = 2
-
elif hunk and line.startswith('\ No newline at end of file'):
# If we had a hunk and now we see this, it's part of the patch,
# and we're still expecting another @@ line.
patchbuf += line
-
elif hunk:
state = 1
buf += line
-
else:
state = 0
commentbuf += buf + line
buf = ''
-
elif state == 4 or state == 5:
if line.startswith('-'):
lc[0] -= 1
@@ -159,21 +161,17 @@ def parse_patch(text):
hunk += 1
else:
state = 5
-
elif state == 6:
if line.startswith(('rename to ', 'rename from ')):
patchbuf += buf + line
buf = ''
-
elif line.startswith('--- '):
patchbuf += buf + line
buf = ''
state = 2
-
else:
buf += line
state = 1
-
else:
raise Exception("Unknown state %d! (line '%s')" % (state, line))
@@ -185,19 +183,19 @@ def parse_patch(text):
if commentbuf == '':
commentbuf = None
- return (patchbuf, commentbuf)
+ return patchbuf, commentbuf
-def hash_patch(str):
+def hash_diff(diff):
+ """Generate a hash from a diff."""
# normalise spaces
- str = str.replace('\r', '')
- str = str.strip() + '\n'
+ diff = diff.replace('\r', '')
+ diff = diff.strip() + '\n'
prefixes = ['-', '+', ' ']
hash = hashlib.sha1()
- for line in str.split('\n'):
-
+ for line in diff.split('\n'):
if len(line) <= 0:
continue
@@ -213,7 +211,6 @@ def hash_patch(str):
filename += '/'.join(filename_match.group(2).split('/')[1:])
line = filename_match.group(1) + ' ' + filename
-
elif hunk_match:
# remove line numbers, but leave line counts
def fn(x):
@@ -222,11 +219,9 @@ def hash_patch(str):
return int(x)
line_nos = list(map(fn, hunk_match.groups()))
line = '@@ -%d +%d @@' % tuple(line_nos)
-
elif line[0] in prefixes:
# if we have a +, - or context line, leave as-is
pass
-
else:
# other lines are ignored
continue
@@ -246,15 +241,15 @@ def extract_tags(content, tags):
return counts
-def patch_get_filenames(str):
+def find_filenames(diff):
+ """Find files changes in a given diff."""
# normalise spaces
- str = str.replace('\r', '')
- str = str.strip() + '\n'
+ diff = diff.replace('\r', '')
+ diff = diff.strip() + '\n'
filenames = {}
- for line in str.split('\n'):
-
+ for line in diff.split('\n'):
if len(line) <= 0:
continue
@@ -291,21 +286,20 @@ def main(args):
# decode from (assumed) UTF-8
content = sys.stdin.read().decode('utf-8')
-
- (patch, comment) = parse_patch(content)
+ patch, comment = parse_patch(content)
if options.print_hash and patch:
- print(hash_patch(patch).hexdigest())
+ print(hash_diff(patch).hexdigest())
if options.print_patch and patch:
- print("Patch: ------\n" + patch)
+ print('Patch: ------\n' + patch)
if options.print_comment and comment:
- print("Comment: ----\n" + comment)
+ print('Comment: ----\n' + comment)
if options.print_filenames:
- filenames = patch_get_filenames(content)
- print("File names: ----\n" + '\n'.join(filenames))
+ filenames = find_filenames(content)
+ print('File names: ----\n' + '\n'.join(filenames))
if __name__ == '__main__':
import sys
--
2.7.4
More information about the Patchwork
mailing list