[PATCH v3 3/5] hasher: Create hasher module

Daniel Axtens dja at axtens.net
Fri Dec 2 15:49:35 AEDT 2016


Hi Stephen,

AFAICT this is mostly a code move, not changing the functionality of the
hasher. On that basis, and seeing how you're using it later on in the
series:

Reviewed-by: Daniel Axtens <dja at axtens.net>

Regards,
Daniel

Stephen Finucane <stephen at that.guru> writes:

> This exposes the hashing functionality of Patchwork without requiring
> Django or similar dependencies.
>
> Signed-off-by: Stephen Finucane <stephen at that.guru>
> Cc: Paul Jakma <paul at jakma.org>
> Cc: Tom Rini <trini at konsulko.com>
> ---
>  patchwork/hasher.py | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++
>  patchwork/models.py | 52 ++------------------------------
>  patchwork/parser.py |  2 --
>  3 files changed, 89 insertions(+), 52 deletions(-)
>  create mode 100644 patchwork/hasher.py
>
> diff --git a/patchwork/hasher.py b/patchwork/hasher.py
> new file mode 100644
> index 0000000..d9bb6c2
> --- /dev/null
> +++ b/patchwork/hasher.py
> @@ -0,0 +1,87 @@
> +#!/usr/bin/env python
> +#
> +# Patchwork - automated patch tracking system
> +# Copyright (C) 2008 Jeremy Kerr <jk at ozlabs.org>
> +#
> +# This file is part of the Patchwork package.
> +#
> +# Patchwork is free software; you can redistribute it and/or modify
> +# it under the terms of the GNU General Public License as published by
> +# the Free Software Foundation; either version 2 of the License, or
> +# (at your option) any later version.
> +#
> +# Patchwork is distributed in the hope that it will be useful,
> +# but WITHOUT ANY WARRANTY; without even the implied warranty of
> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> +# GNU General Public License for more details.
> +#
> +# You should have received a copy of the GNU General Public License
> +# along with Patchwork; if not, write to the Free Software
> +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> +
> +"""Hash generation for diffs."""
> +
> +import hashlib
> +import re
> +import sys
> +
> +HUNK_RE = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
> +FILENAME_RE = re.compile(r'^(---|\+\+\+) (\S+)')
> +
> +
> +def hash_diff(diff):
> +    """Generate a hash from a diff."""
> +
> +    # normalise spaces
> +    diff = diff.replace('\r', '')
> +    diff = diff.strip() + '\n'
> +
> +    prefixes = ['-', '+', ' ']
> +    hashed = hashlib.sha1()
> +
> +    for line in diff.split('\n'):
> +        if len(line) <= 0:
> +            continue
> +
> +        hunk_match = HUNK_RE.match(line)
> +        filename_match = FILENAME_RE.match(line)
> +
> +        if filename_match:
> +            # normalise -p1 top-directories
> +            if filename_match.group(1) == '---':
> +                filename = 'a/'
> +            else:
> +                filename = 'b/'
> +            filename += '/'.join(filename_match.group(2).split('/')[1:])
> +
> +            line = filename_match.group(1) + ' ' + filename
> +        elif hunk_match:
> +            # remove line numbers, but leave line counts
> +            def fn(x):
> +                if not x:
> +                    return 1
> +                return int(x)
> +            line_nos = list(map(fn, hunk_match.groups()))
> +            line = '@@ -%d +%d @@' % tuple(line_nos)
> +        elif line[0] in prefixes:
> +            # if we have a +, - or context line, leave as-is
> +            pass
> +        else:
> +            # other lines are ignored
> +            continue
> +
> +        hashed.update((line + '\n').encode('utf-8'))
> +
> +    return hashed.hexdigest()
> +
> +
> +def main(args):
> +    """Hash a diff provided by stdin.
> +
> +    This is required by scripts found in /tools
> +    """
> +    print(hash_diff('\n'.join(sys.stdin.readlines())))
> +
> +
> +if __name__ == '__main__':
> +    sys.exit(main(sys.argv))
> diff --git a/patchwork/models.py b/patchwork/models.py
> index 15a2936..cff9587 100644
> --- a/patchwork/models.py
> +++ b/patchwork/models.py
> @@ -22,7 +22,6 @@ from __future__ import absolute_import
>  
>  from collections import Counter, OrderedDict
>  import datetime
> -import hashlib
>  import random
>  import re
>  
> @@ -35,6 +34,7 @@ from django.utils.encoding import python_2_unicode_compatible
>  from django.utils.functional import cached_property
>  
>  from patchwork.fields import HashField
> +from patchwork.hasher import hash_diff
>  
>  
>  @python_2_unicode_compatible
> @@ -366,54 +366,6 @@ class Patch(SeriesMixin, Submission):
>  
>          return counts
>  
> -    @staticmethod
> -    def hash_diff(diff):
> -        """Generate a hash from a diff."""
> -        hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
> -        filename_re = re.compile(r'^(---|\+\+\+) (\S+)')
> -
> -        # normalise spaces
> -        diff = diff.replace('\r', '')
> -        diff = diff.strip() + '\n'
> -
> -        prefixes = ['-', '+', ' ']
> -        hash = hashlib.sha1()
> -
> -        for line in diff.split('\n'):
> -            if len(line) <= 0:
> -                continue
> -
> -            hunk_match = hunk_re.match(line)
> -            filename_match = filename_re.match(line)
> -
> -            if filename_match:
> -                # normalise -p1 top-directories
> -                if filename_match.group(1) == '---':
> -                    filename = 'a/'
> -                else:
> -                    filename = 'b/'
> -                filename += '/'.join(filename_match.group(2).split('/')[1:])
> -
> -                line = filename_match.group(1) + ' ' + filename
> -            elif hunk_match:
> -                # remove line numbers, but leave line counts
> -                def fn(x):
> -                    if not x:
> -                        return 1
> -                    return int(x)
> -                line_nos = list(map(fn, hunk_match.groups()))
> -                line = '@@ -%d +%d @@' % tuple(line_nos)
> -            elif line[0] in prefixes:
> -                # if we have a +, - or context line, leave as-is
> -                pass
> -            else:
> -                # other lines are ignored
> -                continue
> -
> -            hash.update((line + '\n').encode('utf-8'))
> -
> -        return hash
> -
>      def _set_tag(self, tag, count):
>          if count == 0:
>              self.patchtag_set.filter(tag=tag).delete()
> @@ -441,7 +393,7 @@ class Patch(SeriesMixin, Submission):
>              self.state = get_default_initial_patch_state()
>  
>          if self.hash is None and self.diff is not None:
> -            self.hash = self.hash_diff(self.diff).hexdigest()
> +            self.hash = hash_diff(self.diff)
>  
>          super(Patch, self).save(**kwargs)
>  
> diff --git a/patchwork/parser.py b/patchwork/parser.py
> index 9d1b79e..16cc53c 100644
> --- a/patchwork/parser.py
> +++ b/patchwork/parser.py
> @@ -1,5 +1,3 @@
> -#!/usr/bin/env python
> -#
>  # Patchwork - automated patch tracking system
>  # Copyright (C) 2008 Jeremy Kerr <jk at ozlabs.org>
>  #
> -- 
> 2.9.3
>
> _______________________________________________
> Patchwork mailing list
> Patchwork at lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/patchwork


More information about the Patchwork mailing list