[PATCH 01/11] trivial: Cleanup of 'parser'
Andy Doan
andy.doan at linaro.org
Wed Jul 20 07:32:50 AEST 2016
On 07/13/2016 04:40 AM, Stephen Finucane wrote:
> Do some cleanup of the file by removing excess whitespace, adding some
> documentation, removing shadowing of keywords and renaming some
> functions to more accurately reflect their purpose.
>
> Signed-off-by: Stephen Finucane <stephen.finucane at intel.com>
Reviewed-by: Andy Doan <andy.doan at linaro.org>
> ---
> patchwork/bin/parsemail.py | 4 +-
> patchwork/models.py | 4 +-
> patchwork/parser.py | 74 ++++++++++++++++++++-----------------------
> 3 files changed, 38 insertions(+), 44 deletions(-)
>
> diff --git a/patchwork/bin/parsemail.py b/patchwork/bin/parsemail.py
> index 8648d29..56cd126 100755
> --- a/patchwork/bin/parsemail.py
> +++ b/patchwork/bin/parsemail.py
> @@ -44,7 +44,7 @@ from django.utils.six.moves import map
> from patchwork.models import (Patch, Project, Person, Comment, State,
> DelegationRule, Submission, CoverLetter,
> get_default_initial_patch_state)
> -from patchwork.parser import parse_patch, patch_get_filenames
> +from patchwork.parser import parse_patch, find_filenames
>
> LOGGER = logging.getLogger(__name__)
>
> @@ -494,7 +494,7 @@ def parse_mail(mail, list_id=None):
>
> delegate = find_delegate(mail)
> if not delegate and diff:
> - filenames = patch_get_filenames(diff)
> + filenames = find_filenames(diff)
> delegate = auto_delegate(project, filenames)
>
> patch = Patch(
> diff --git a/patchwork/models.py b/patchwork/models.py
> index 521b20c..ee2ee63 100644
> --- a/patchwork/models.py
> +++ b/patchwork/models.py
> @@ -35,7 +35,7 @@ from django.utils.functional import cached_property
> from django.utils.six.moves import filter
>
> from patchwork.fields import HashField
> -from patchwork.parser import extract_tags, hash_patch
> +from patchwork.parser import extract_tags, hash_diff
>
>
> @python_2_unicode_compatible
> @@ -364,7 +364,7 @@ class Patch(Submission):
> self.state = get_default_initial_patch_state()
>
> if self.hash is None and self.diff is not None:
> - self.hash = hash_patch(self.diff).hexdigest()
> + self.hash = hash_diff(self.diff).hexdigest()
>
> super(Patch, self).save(**kwargs)
>
> diff --git a/patchwork/parser.py b/patchwork/parser.py
> index 8bf9b21..f173431 100644
> --- a/patchwork/parser.py
> +++ b/patchwork/parser.py
> @@ -33,7 +33,22 @@ _hunk_re = re.compile('^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
> _filename_re = re.compile('^(---|\+\+\+) (\S+)')
>
>
> -def parse_patch(text):
> +def parse_patch(content):
> + """Split a mail's contents into a diff and comment.
> +
> + This is a state machine that takes a patch, generally in UNIX mbox
> + format, and splits it into the component comments and diff.
> +
> + Args:
> + patch: The patch to be split
> +
> + Returns:
> + A tuple containing the diff and comment. Either one or both of
> + these can be empty.
> +
> + Raises:
> + Exception: The state machine transitioned to an invalid state.
> + """
> patchbuf = ''
> commentbuf = ''
> buf = ''
> @@ -68,7 +83,7 @@ def parse_patch(text):
> lc = (0, 0)
> hunk = 0
>
> - for line in text.split('\n'):
> + for line in content.split('\n'):
> line += '\n'
>
> if state == 0:
> @@ -76,14 +91,11 @@ def parse_patch(text):
> or line.startswith('Index: '):
> state = 1
> buf += line
> -
> elif line.startswith('--- '):
> state = 2
> buf += line
> -
> else:
> commentbuf += line
> -
> elif state == 1:
> buf += line
> if line.startswith('--- '):
> @@ -91,25 +103,20 @@ def parse_patch(text):
>
> if line.startswith(('rename from ', 'rename to ')):
> state = 6
> -
> elif state == 2:
> if line.startswith('+++ '):
> state = 3
> buf += line
> -
> elif hunk:
> state = 1
> buf += line
> -
> else:
> state = 0
> commentbuf += buf + line
> buf = ''
> -
> elif state == 3:
> match = _hunk_re.match(line)
> if match:
> -
> def fn(x):
> if not x:
> return 1
> @@ -120,26 +127,21 @@ def parse_patch(text):
> state = 4
> patchbuf += buf + line
> buf = ''
> -
> elif line.startswith('--- '):
> patchbuf += buf + line
> buf = ''
> state = 2
> -
> elif hunk and line.startswith('\ No newline at end of file'):
> # If we had a hunk and now we see this, it's part of the patch,
> # and we're still expecting another @@ line.
> patchbuf += line
> -
> elif hunk:
> state = 1
> buf += line
> -
> else:
> state = 0
> commentbuf += buf + line
> buf = ''
> -
> elif state == 4 or state == 5:
> if line.startswith('-'):
> lc[0] -= 1
> @@ -159,21 +161,17 @@ def parse_patch(text):
> hunk += 1
> else:
> state = 5
> -
> elif state == 6:
> if line.startswith(('rename to ', 'rename from ')):
> patchbuf += buf + line
> buf = ''
> -
> elif line.startswith('--- '):
> patchbuf += buf + line
> buf = ''
> state = 2
> -
> else:
> buf += line
> state = 1
> -
> else:
> raise Exception("Unknown state %d! (line '%s')" % (state, line))
>
> @@ -185,19 +183,19 @@ def parse_patch(text):
> if commentbuf == '':
> commentbuf = None
>
> - return (patchbuf, commentbuf)
> + return patchbuf, commentbuf
>
>
> -def hash_patch(str):
> +def hash_diff(diff):
> + """Generate a hash from a diff."""
> # normalise spaces
> - str = str.replace('\r', '')
> - str = str.strip() + '\n'
> + diff = diff.replace('\r', '')
> + diff = diff.strip() + '\n'
>
> prefixes = ['-', '+', ' ']
> hash = hashlib.sha1()
>
> - for line in str.split('\n'):
> -
> + for line in diff.split('\n'):
> if len(line) <= 0:
> continue
>
> @@ -213,7 +211,6 @@ def hash_patch(str):
> filename += '/'.join(filename_match.group(2).split('/')[1:])
>
> line = filename_match.group(1) + ' ' + filename
> -
> elif hunk_match:
> # remove line numbers, but leave line counts
> def fn(x):
> @@ -222,11 +219,9 @@ def hash_patch(str):
> return int(x)
> line_nos = list(map(fn, hunk_match.groups()))
> line = '@@ -%d +%d @@' % tuple(line_nos)
> -
> elif line[0] in prefixes:
> # if we have a +, - or context line, leave as-is
> pass
> -
> else:
> # other lines are ignored
> continue
> @@ -246,15 +241,15 @@ def extract_tags(content, tags):
> return counts
>
>
> -def patch_get_filenames(str):
> +def find_filenames(diff):
> + """Find files changes in a given diff."""
> # normalise spaces
> - str = str.replace('\r', '')
> - str = str.strip() + '\n'
> + diff = diff.replace('\r', '')
> + diff = diff.strip() + '\n'
>
> filenames = {}
>
> - for line in str.split('\n'):
> -
> + for line in diff.split('\n'):
> if len(line) <= 0:
> continue
>
> @@ -291,21 +286,20 @@ def main(args):
>
> # decode from (assumed) UTF-8
> content = sys.stdin.read().decode('utf-8')
> -
> - (patch, comment) = parse_patch(content)
> + patch, comment = parse_patch(content)
>
> if options.print_hash and patch:
> - print(hash_patch(patch).hexdigest())
> + print(hash_diff(patch).hexdigest())
>
> if options.print_patch and patch:
> - print("Patch: ------\n" + patch)
> + print('Patch: ------\n' + patch)
>
> if options.print_comment and comment:
> - print("Comment: ----\n" + comment)
> + print('Comment: ----\n' + comment)
>
> if options.print_filenames:
> - filenames = patch_get_filenames(content)
> - print("File names: ----\n" + '\n'.join(filenames))
> + filenames = find_filenames(content)
> + print('File names: ----\n' + '\n'.join(filenames))
>
> if __name__ == '__main__':
> import sys
>
More information about the Patchwork
mailing list