[RFC 1/2] Rework tagging infrastructure

vkabatov at redhat.com vkabatov at redhat.com
Sat Mar 17 01:38:31 AEDT 2018


From: Veronika Kabatova <vkabatov at redhat.com>

Solve #113 and #57 GitHub issues, allow tags on comments, fix up
returning tags in the API.

Signed-off-by: Veronika Kabatova <vkabatov at redhat.com>
---
 docs/deployment/management.rst         |  16 +--
 patchwork/api/cover.py                 |  32 ++++-
 patchwork/api/patch.py                 |  41 ++++++-
 patchwork/management/commands/retag.py |  14 ++-
 patchwork/models.py                    | 210 ++++++++++++++++-----------------
 patchwork/templatetags/patch.py        |   3 +-
 patchwork/views/__init__.py            |   3 -
 patchwork/views/utils.py               |   8 +-
 8 files changed, 194 insertions(+), 133 deletions(-)

diff --git a/docs/deployment/management.rst b/docs/deployment/management.rst
index c50b7b6..c2ee2b6 100644
--- a/docs/deployment/management.rst
+++ b/docs/deployment/management.rst
@@ -119,17 +119,17 @@ retag
 
 .. program:: manage.py retag
 
-Update the tag (Ack/Review/Test) counts on existing patches.
+Update the tag (Ack/Review/Test) counts on existing submissions.
 
 .. code-block:: shell
 
-  ./manage.py retag [<patch_id>...]
+  ./manage.py retag [<submission_id>...]
 
-Patchwork extracts :ref:`tags <overview-tags>` from each patch it receives. By
-default, three tags are extracted, but it's possible to change this on a
-per-instance basis. Should you add additional tags, you may wish to scan older
-patches for these new tags.
+Patchwork extracts :ref:`tags <overview-tags>` from each submission it
+receives. By default, three tags are extracted, but it's possible to change
+this on a per-instance basis. Should you add additional tags, you may wish to
+scan older submissions for these new tags.
 
-.. option:: patch_id
+.. option:: submission_id
 
-   a patch ID number. If not supplied, all patches will be updated.
+   a submission ID number. If not supplied, all submissions will be updated.
diff --git a/patchwork/api/cover.py b/patchwork/api/cover.py
index 1064504..2563c50 100644
--- a/patchwork/api/cover.py
+++ b/patchwork/api/cover.py
@@ -17,6 +17,8 @@
 # along with Patchwork; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
+from django.db.models import Q
+
 import email.parser
 
 from rest_framework.generics import ListAPIView
@@ -28,7 +30,9 @@ from patchwork.api.filters import CoverLetterFilter
 from patchwork.api.embedded import PersonSerializer
 from patchwork.api.embedded import ProjectSerializer
 from patchwork.api.embedded import SeriesSerializer
+from patchwork.models import Comment
 from patchwork.models import CoverLetter
+from patchwork.models import RelatedTag
 
 
 class CoverLetterListSerializer(HyperlinkedModelSerializer):
@@ -37,15 +41,41 @@ class CoverLetterListSerializer(HyperlinkedModelSerializer):
     submitter = PersonSerializer(read_only=True)
     mbox = SerializerMethodField()
     series = SeriesSerializer(many=True, read_only=True)
+    tags = SerializerMethodField()
 
     def get_mbox(self, instance):
         request = self.context.get('request')
         return request.build_absolute_uri(instance.get_mbox_url())
 
+    def get_tags(self, instance):
+        tags = instance.project.tags
+        if not tags:
+            return {}
+
+        all_tags = {tag.name: [] for tag in tags}
+
+        related_tags = RelatedTag.objects.filter(
+            Q(submission__id=instance.id)
+            | Q(comment__id__in=[comment.id for comment in
+                                 instance.comments.all()])
+        )
+
+        for related_tag in related_tags:
+            all_tags[related_tag.tag.name].extend([value.value for value in
+                                                   related_tag.values.all()])
+
+        # Sanitize the values -- remove possible duplicates and unused tags
+        for tag in tags:
+            if all_tags[tag.name]:
+                all_tags[tag.name] = set(all_tags[tag.name])
+            else:
+                del(all_tags[tag.name])
+        return all_tags
+
     class Meta:
         model = CoverLetter
         fields = ('id', 'url', 'project', 'msgid', 'date', 'name', 'submitter',
-                  'mbox', 'series')
+                  'mbox', 'series', 'tags')
         read_only_fields = fields
         extra_kwargs = {
             'url': {'view_name': 'api-cover-detail'},
diff --git a/patchwork/api/patch.py b/patchwork/api/patch.py
index 115feff..79329ae 100644
--- a/patchwork/api/patch.py
+++ b/patchwork/api/patch.py
@@ -17,6 +17,8 @@
 # along with Patchwork; if not, write to the Free Software
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
+from django.db.models import Q
+
 import email.parser
 
 from django.utils.translation import ugettext_lazy as _
@@ -34,6 +36,8 @@ from patchwork.api.embedded import ProjectSerializer
 from patchwork.api.embedded import SeriesSerializer
 from patchwork.api.embedded import UserSerializer
 from patchwork.models import Patch
+from patchwork.models import RelatedTag
+from patchwork.models import SeriesPatch
 from patchwork.models import State
 from patchwork.parser import clean_subject
 
@@ -92,9 +96,40 @@ class PatchListSerializer(HyperlinkedModelSerializer):
         return request.build_absolute_uri(instance.get_mbox_url())
 
     def get_tags(self, instance):
-        # TODO(stephenfin): Make tags performant, possibly by reworking the
-        # model
-        return {}
+        tags = instance.project.tags
+        if not tags:
+            return {}
+
+        all_tags = {tag.name: [] for tag in tags}
+
+        patch_tags = RelatedTag.objects.filter(
+            Q(submission__id=instance.id)
+            | Q(comment__id__in=[comment.id for comment in
+                                 instance.comments.all()])
+        )
+        cover = SeriesPatch.objects.get(
+            patch_id=instance.id).series.cover_letter
+        if cover:
+            cover_tags = RelatedTag.objects.filter(
+                Q(submission__id=cover.submission_ptr_id)
+                | Q(comment__id__in=[comment.id for comment in
+                                     cover.comments.all()])
+            )
+        else:
+            cover_tags = RelatedTag.objects.none()
+
+        for related_tag in (patch_tags | cover_tags):
+            all_tags[related_tag.tag.name].extend([value.value for value in
+                                                   related_tag.values.all()])
+
+        # Sanitize the values -- remove possible duplicates and unused tags
+        for tag in tags:
+            if all_tags[tag.name]:
+                all_tags[tag.name] = set(all_tags[tag.name])
+            else:
+                del(all_tags[tag.name])
+
+        return all_tags
 
     def get_check(self, instance):
         return instance.combined_check_state
diff --git a/patchwork/management/commands/retag.py b/patchwork/management/commands/retag.py
index 8617ff4..f10f88e 100644
--- a/patchwork/management/commands/retag.py
+++ b/patchwork/management/commands/retag.py
@@ -19,15 +19,15 @@
 
 from django.core.management.base import BaseCommand
 
-from patchwork.models import Patch
+from patchwork.models import Submission
 
 
 class Command(BaseCommand):
-    help = 'Update the tag (Ack/Review/Test) counts on existing patches'
-    args = '[<patch_id>...]'
+    help = 'Update tags on existing submissions and related comments'
+    args = '[<submission_id>...]'
 
     def handle(self, *args, **options):
-        query = Patch.objects
+        query = Submission.objects
 
         if args:
             query = query.filter(id__in=args)
@@ -36,8 +36,10 @@ class Command(BaseCommand):
 
         count = query.count()
 
-        for i, patch in enumerate(query.iterator()):
-            patch.refresh_tag_counts()
+        for i, submission in enumerate(query.iterator()):
+            submission.refresh_tags()
+            for comment in submission.comments.all():
+                comment.refresh_tags()
             if (i % 10) == 0:
                 self.stdout.write('%06d/%06d\r' % (i, count), ending='')
                 self.stdout.flush()
diff --git a/patchwork/models.py b/patchwork/models.py
index b249175..ed4cdd5 100644
--- a/patchwork/models.py
+++ b/patchwork/models.py
@@ -20,8 +20,6 @@
 
 from __future__ import absolute_import
 
-from collections import Counter
-from collections import OrderedDict
 import datetime
 import random
 import re
@@ -29,6 +27,10 @@ import re
 import django
 from django.conf import settings
 from django.contrib.auth.models import User
+from django.contrib.contenttypes.fields import GenericForeignKey
+from django.contrib.contenttypes.fields import GenericRelation
+from django.contrib.contenttypes.models import ContentType
+from django.db.models import Q
 from django.db import models
 from django.utils.encoding import python_2_unicode_compatible
 from django.utils.functional import cached_property
@@ -242,10 +244,6 @@ class Tag(models.Model):
                                       ' tag\'s count in the patch list view',
                                       default=True)
 
-    @property
-    def attr_name(self):
-        return 'tag_%d_count' % self.id
-
     def __str__(self):
         return self.name
 
@@ -253,62 +251,28 @@ class Tag(models.Model):
         ordering = ['abbrev']
 
 
-class PatchTag(models.Model):
-    patch = models.ForeignKey('Patch', on_delete=models.CASCADE)
-    tag = models.ForeignKey('Tag', on_delete=models.CASCADE)
-    count = models.IntegerField(default=1)
-
-    class Meta:
-        unique_together = [('patch', 'tag')]
-
-
-def get_default_initial_patch_state():
-    return State.objects.get(ordering=0)
-
-
-class PatchQuerySet(models.query.QuerySet):
+class TagValue(models.Model):
+    value = models.CharField(max_length=255, unique=True)
 
-    def with_tag_counts(self, project=None):
-        if project and not project.use_tags:
-            return self
-
-        # We need the project's use_tags field loaded for Project.tags().
-        # Using prefetch_related means we'll share the one instance of
-        # Project, and share the project.tags cache between all patch.project
-        # references.
-        qs = self.prefetch_related('project')
-        select = OrderedDict()
-        select_params = []
-
-        # All projects have the same tags, so we're good to go here
-        if project:
-            tags = project.tags
-        else:
-            tags = Tag.objects.all()
+    def __str__(self):
+        return self.value
 
-        for tag in tags:
-            select[tag.attr_name] = (
-                "coalesce("
-                "(SELECT count FROM patchwork_patchtag"
-                " WHERE patchwork_patchtag.patch_id="
-                "patchwork_patch.submission_ptr_id"
-                " AND patchwork_patchtag.tag_id=%s), 0)")
-            select_params.append(tag.id)
 
-        return qs.extra(select=select, select_params=select_params)
+class RelatedTag(models.Model):
+    # Allow association with both submissions and comments
+    content_type = models.ForeignKey(ContentType, on_delete=models.CASCADE)
+    object_id = models.PositiveIntegerField()
+    related_to = GenericForeignKey('content_type', 'object_id')
 
+    tag = models.ForeignKey('Tag', on_delete=models.CASCADE)
+    values = models.ManyToManyField(TagValue)
 
-class PatchManager(models.Manager):
-    use_for_related_fields = True
-    # NOTE(stephenfin): This is necessary to silence a warning with Django >=
-    # 1.10. Remove when 1.10 is the minimum supported version.
-    silence_use_for_related_fields_deprecation = True
+    class Meta:
+        unique_together = [('content_type', 'object_id', 'tag')]
 
-    def get_queryset(self):
-        return PatchQuerySet(self.model, using=self.db)
 
-    def with_tag_counts(self, project):
-        return self.get_queryset().with_tag_counts(project)
+def get_default_initial_patch_state():
+    return State.objects.get(ordering=0)
 
 
 class EmailMixin(models.Model):
@@ -324,17 +288,54 @@ class EmailMixin(models.Model):
     submitter = models.ForeignKey(Person, on_delete=models.CASCADE)
     content = models.TextField(null=True, blank=True)
 
-    response_re = re.compile(
-        r'^(Tested|Reviewed|Acked|Signed-off|Nacked|Reported)-by:.*$',
-        re.M | re.I)
+    @staticmethod
+    def extract_tags(content, tags):
+        found_tags = {}
+        for tag in tags:
+            regex = re.compile(tag.pattern + '\s(.*)',
+                               re.MULTILINE | re.IGNORECASE)
+            found_tags[tag.name] = regex.findall(content)
 
-    @property
-    def patch_responses(self):
-        if not self.content:
-            return ''
+        return found_tags
+
+    def _set_tag_values(self, tag, value_list):
+        if not value_list:
+            self.related_tags.filter(tag=tag).delete()
+            return
+
+        obj_type = ContentType.objects.get_for_model(self)
+        relatedtag, _ = RelatedTag.objects.get_or_create(content_type=obj_type,
+                                                         object_id=self.id,
+                                                         tag=tag)
+        old_values = set([tag_value.value for tag_value
+                          in relatedtag.values.all()])
+
+        # Counting more acks by the same person multiple times doesn't make
+        # sense so let's use sets to get unique values.
+        for to_remove in relatedtag.values.filter(
+                value__in=old_values - set(value_list)):
+            relatedtag.values.remove(to_remove)
+
+        for new_value in set(value_list) - old_values:
+            # Maybe the value already exists but isn't associated with this
+            # content (eg person who acked another patch). Reuse it.
+            new, _ = TagValue.objects.get_or_create(value=new_value)
+            relatedtag.values.add(new)
+        relatedtag.save()
+
+    def refresh_tags(self):
+        if hasattr(self, 'project'):
+            tags = self.project.tags
+        else:
+            # Tagged comment
+            tags = self.submission.project.tags
+        if not tags:
+            return
 
-        return ''.join([match.group(0) + '\n' for match in
-                        self.response_re.finditer(self.content)])
+        if self.content:
+            related_tags = self.extract_tags(self.content, tags)
+            for tag in tags:
+                self._set_tag_values(tag, related_tags[tag.name])
 
     def save(self, *args, **kwargs):
         # Modifying a submission via admin interface changes '\n' newlines in
@@ -367,6 +368,7 @@ class Submission(FilenameMixin, EmailMixin, models.Model):
     # submission metadata
 
     name = models.CharField(max_length=255)
+    related_tags = GenericRelation(RelatedTag, related_query_name='submission')
 
     # patchwork metadata
 
@@ -376,6 +378,10 @@ class Submission(FilenameMixin, EmailMixin, models.Model):
     def __str__(self):
         return self.name
 
+    def save(self, *args, **kwargs):
+        super(Submission, self).save(*args, **kwargs)
+        self.refresh_tags()
+
     class Meta:
         ordering = ['date']
         unique_together = [('msgid', 'project')]
@@ -413,7 +419,6 @@ class Patch(SeriesMixin, Submission):
     diff = models.TextField(null=True, blank=True)
     commit_ref = models.CharField(max_length=255, null=True, blank=True)
     pull_url = models.CharField(max_length=255, null=True, blank=True)
-    tags = models.ManyToManyField(Tag, through=PatchTag)
 
     # patchwork metadata
 
@@ -423,40 +428,6 @@ class Patch(SeriesMixin, Submission):
     archived = models.BooleanField(default=False)
     hash = HashField(null=True, blank=True)
 
-    objects = PatchManager()
-
-    @staticmethod
-    def extract_tags(content, tags):
-        counts = Counter()
-
-        for tag in tags:
-            regex = re.compile(tag.pattern, re.MULTILINE | re.IGNORECASE)
-            counts[tag] = len(regex.findall(content))
-
-        return counts
-
-    def _set_tag(self, tag, count):
-        if count == 0:
-            self.patchtag_set.filter(tag=tag).delete()
-            return
-        patchtag, _ = PatchTag.objects.get_or_create(patch=self, tag=tag)
-        if patchtag.count != count:
-            patchtag.count = count
-            patchtag.save()
-
-    def refresh_tag_counts(self):
-        tags = self.project.tags
-        counter = Counter()
-
-        if self.content:
-            counter += self.extract_tags(self.content, tags)
-
-        for comment in self.comments.all():
-            counter = counter + self.extract_tags(comment.content, tags)
-
-        for tag in tags:
-            self._set_tag(tag, counter[tag])
-
     def save(self, *args, **kwargs):
         if not hasattr(self, 'state') or not self.state:
             self.state = get_default_initial_patch_state()
@@ -466,8 +437,6 @@ class Patch(SeriesMixin, Submission):
 
         super(Patch, self).save(**kwargs)
 
-        self.refresh_tag_counts()
-
     def is_editable(self, user):
         if not is_authenticated(user):
             return False
@@ -478,6 +447,36 @@ class Patch(SeriesMixin, Submission):
         return self.project.is_editable(user)
 
     @property
+    def dict_of_all_related_tags(self):
+        all_related_tags = {tag: [] for tag in self.project.tags}
+
+        patch_tags = RelatedTag.objects.filter(
+            Q(submission__id=self.id)
+            | Q(comment__id__in=[comment.id for comment in
+                                 self.comments.all()])
+        )
+        cover = SeriesPatch.objects.get(
+             patch_id=self.id).series.cover_letter
+        if cover:
+            cover_tags = RelatedTag.objects.filter(
+                Q(submission__id=cover.submission_ptr_id)
+                | Q(comment__id__in=[comment.id for comment in
+                                     cover.comments.all()])
+            )
+        else:
+            cover_tags = RelatedTag.objects.none()
+
+        for related_tag in (patch_tags | cover_tags):
+            all_related_tags[related_tag.tag].extend([value.value for value in
+                                                      related_tag.values.all()]
+                                                     )
+        # Remove possible duplicates
+        for key in all_related_tags:
+            all_related_tags[key] = set(all_related_tags[key])
+
+        return all_related_tags
+
+    @property
     def combined_check_state(self):
         """Return the combined state for all checks.
 
@@ -588,16 +587,11 @@ class Comment(EmailMixin, models.Model):
     submission = models.ForeignKey(Submission, related_name='comments',
                                    related_query_name='comment',
                                    on_delete=models.CASCADE)
+    related_tags = GenericRelation(RelatedTag, related_query_name='comment')
 
     def save(self, *args, **kwargs):
         super(Comment, self).save(*args, **kwargs)
-        if hasattr(self.submission, 'patch'):
-            self.submission.patch.refresh_tag_counts()
-
-    def delete(self, *args, **kwargs):
-        super(Comment, self).delete(*args, **kwargs)
-        if hasattr(self.submission, 'patch'):
-            self.submission.patch.refresh_tag_counts()
+        self.refresh_tags()
 
     class Meta:
         ordering = ['date']
diff --git a/patchwork/templatetags/patch.py b/patchwork/templatetags/patch.py
index 4350e09..d4bacf9 100644
--- a/patchwork/templatetags/patch.py
+++ b/patchwork/templatetags/patch.py
@@ -34,8 +34,9 @@ register = template.Library()
 def patch_tags(patch):
     counts = []
     titles = []
+    all_tags = patch.dict_of_all_related_tags
     for tag in [t for t in patch.project.tags if t.show_column]:
-        count = getattr(patch, tag.attr_name)
+        count = len(all_tags[tag])
         titles.append('%d %s' % (count, tag.name))
         if count == 0:
             counts.append("-")
diff --git a/patchwork/views/__init__.py b/patchwork/views/__init__.py
index 3baf299..ea3b88b 100644
--- a/patchwork/views/__init__.py
+++ b/patchwork/views/__init__.py
@@ -272,9 +272,6 @@ def generic_list(request, project, view, view_args=None, filter_settings=None,
     if patches is None:
         patches = Patch.objects.filter(project=project)
 
-    # annotate with tag counts
-    patches = patches.with_tag_counts(project)
-
     patches = context['filters'].apply(patches)
     if not editable_order:
         patches = order.apply(patches)
diff --git a/patchwork/views/utils.py b/patchwork/views/utils.py
index 84682b8..bdba458 100644
--- a/patchwork/views/utils.py
+++ b/patchwork/views/utils.py
@@ -32,6 +32,7 @@ from django.utils import six
 
 from patchwork.models import Comment
 from patchwork.models import Patch
+from patchwork.models import RelatedTag
 from patchwork.models import Series
 
 if settings.ENABLE_REST_API:
@@ -75,9 +76,10 @@ def _submission_to_mbox(submission):
     else:
         postscript = ''
 
-    # TODO(stephenfin): Make this use the tags infrastructure
-    for comment in Comment.objects.filter(submission=submission):
-        body += comment.patch_responses
+    for comment in submission.comments.all():
+        for related_tag in comment.related_tags.all():
+            for value in related_tag.values.all():
+                body += '%s: %s\n' % (related_tag.name, value.value)
 
     if postscript:
         body += '---\n' + postscript + '\n'
-- 
2.13.6



More information about the Patchwork mailing list