[RFC v2 1/1] Rework tagging infrastructure

vkabatov at redhat.com vkabatov at redhat.com
Fri Apr 20 02:36:56 AEST 2018


From: Veronika Kabatova <vkabatov at redhat.com>

Solve #113 and #57 GitHub issues, fix up returning tags in the API,
keep track of tag origin to later be able to add tags to comments in
the API.

Use relations Tag-Patch and Tag-CoverLetter to avoid duplication of
tags for each patch in series.

Signed-off-by: Veronika Kabatova <vkabatov at redhat.com>
---
 docs/usage/overview.rst                     |   9 +-
 patchwork/api/cover.py                      |  24 +++-
 patchwork/api/patch.py                      |  35 +++++-
 patchwork/management/commands/retag.py      |  15 ++-
 patchwork/migrations/0026_rework_tagging.py | 123 +++++++++++++++++++
 patchwork/models.py                         | 176 +++++++++++-----------------
 patchwork/templatetags/patch.py             |   3 +-
 patchwork/views/__init__.py                 |   3 -
 patchwork/views/utils.py                    |  16 ++-
 9 files changed, 277 insertions(+), 127 deletions(-)
 create mode 100644 patchwork/migrations/0026_rework_tagging.py

diff --git a/docs/usage/overview.rst b/docs/usage/overview.rst
index cc193f3..d7db840 100644
--- a/docs/usage/overview.rst
+++ b/docs/usage/overview.rst
@@ -110,10 +110,11 @@ one delegate can be assigned to a patch.
 Tags
 ~~~~
 
-Tags are specially formatted metadata appended to the foot the body of a patch
-or a comment on a patch. Patchwork extracts these tags at parse time and
-associates them with the patch. You add extra tags to an email by replying to
-the email. The following tags are available on a standard Patchwork install:
+Tags are specially formatted metadata appended to the foot the body of a patch,
+cover letter or a comment related to them. Patchwork extracts these tags at
+parse time and associates them with patches. You add extra tags to an email by
+replying to the email. The following tags are available on a standard Patchwork
+install:
 
 Acked-by:
 
diff --git a/patchwork/api/cover.py b/patchwork/api/cover.py
index fc7ae97..4d82277 100644
--- a/patchwork/api/cover.py
+++ b/patchwork/api/cover.py
@@ -29,6 +29,7 @@ from patchwork.api.embedded import PersonSerializer
 from patchwork.api.embedded import ProjectSerializer
 from patchwork.api.embedded import SeriesSerializer
 from patchwork.models import CoverLetter
+from patchwork.models import SubmissionTag
 
 
 class CoverLetterListSerializer(BaseHyperlinkedModelSerializer):
@@ -37,18 +38,36 @@ class CoverLetterListSerializer(BaseHyperlinkedModelSerializer):
     submitter = PersonSerializer(read_only=True)
     mbox = SerializerMethodField()
     series = SeriesSerializer(many=True, read_only=True)
+    tags = SerializerMethodField()
 
     def get_mbox(self, instance):
         request = self.context.get('request')
         return request.build_absolute_uri(instance.get_mbox_url())
 
+    def get_tags(self, instance):
+        tags = instance.project.tags
+        if not tags:
+            return {}
+
+        all_tags = {}
+        related_tags = SubmissionTag.objects.filter(
+            submission=instance).values_list('tag__name', 'value').distinct()
+        for tag in tags:
+            all_tags[tag.name] = [related_tag[1] for related_tag in
+                                  related_tags if related_tag[0] == tag.name]
+            # Don't show tags that are not present
+            if not all_tags[tag.name]:
+                del(all_tags[tag.name])
+
+        return all_tags
+
     class Meta:
         model = CoverLetter
         fields = ('id', 'url', 'project', 'msgid', 'date', 'name', 'submitter',
-                  'mbox', 'series')
+                  'mbox', 'series', 'tags')
         read_only_fields = fields
         versioned_fields = {
-            '1.1': ('mbox', ),
+            '1.1': ('mbox', 'tags'),
         }
         extra_kwargs = {
             'url': {'view_name': 'api-cover-detail'},
@@ -68,6 +87,7 @@ class CoverLetterDetailSerializer(CoverLetterListSerializer):
         fields = CoverLetterListSerializer.Meta.fields + ('headers', 'content')
         read_only_fields = fields
         extra_kwargs = CoverLetterListSerializer.Meta.extra_kwargs
+        versioned_fields = CoverLetterListSerializer.Meta.versioned_fields
 
 
 class CoverLetterList(ListAPIView):
diff --git a/patchwork/api/patch.py b/patchwork/api/patch.py
index 115feff..295b046 100644
--- a/patchwork/api/patch.py
+++ b/patchwork/api/patch.py
@@ -24,9 +24,9 @@ from rest_framework.generics import ListAPIView
 from rest_framework.generics import RetrieveUpdateAPIView
 from rest_framework.relations import RelatedField
 from rest_framework.reverse import reverse
-from rest_framework.serializers import HyperlinkedModelSerializer
 from rest_framework.serializers import SerializerMethodField
 
+from patchwork.api.base import BaseHyperlinkedModelSerializer
 from patchwork.api.base import PatchworkPermission
 from patchwork.api.filters import PatchFilter
 from patchwork.api.embedded import PersonSerializer
@@ -34,7 +34,9 @@ from patchwork.api.embedded import ProjectSerializer
 from patchwork.api.embedded import SeriesSerializer
 from patchwork.api.embedded import UserSerializer
 from patchwork.models import Patch
+from patchwork.models import SeriesPatch
 from patchwork.models import State
+from patchwork.models import SubmissionTag
 from patchwork.parser import clean_subject
 
 
@@ -75,7 +77,7 @@ class StateField(RelatedField):
         return State.objects.all()
 
 
-class PatchListSerializer(HyperlinkedModelSerializer):
+class PatchListSerializer(BaseHyperlinkedModelSerializer):
 
     project = ProjectSerializer(read_only=True)
     state = StateField()
@@ -92,9 +94,28 @@ class PatchListSerializer(HyperlinkedModelSerializer):
         return request.build_absolute_uri(instance.get_mbox_url())
 
     def get_tags(self, instance):
-        # TODO(stephenfin): Make tags performant, possibly by reworking the
-        # model
-        return {}
+        tags = instance.project.tags
+        if not tags:
+            return {}
+
+        sub_ids = [instance.id]
+        cover = SeriesPatch.objects.get(
+            patch_id=instance.id).series.cover_letter
+        if cover:
+            sub_ids.append(cover.id)
+
+        all_tags = {}
+        related_tags = SubmissionTag.objects.filter(
+            submission__id__in=sub_ids).values_list('tag__name',
+                                                    'value').distinct()
+        for tag in tags:
+            all_tags[tag.name] = [related_tag[1] for related_tag in
+                                  related_tags if related_tag[0] == tag.name]
+            # Don't show tags that are not present
+            if not all_tags[tag.name]:
+                del(all_tags[tag.name])
+
+        return all_tags
 
     def get_check(self, instance):
         return instance.combined_check_state
@@ -115,6 +136,9 @@ class PatchListSerializer(HyperlinkedModelSerializer):
         extra_kwargs = {
             'url': {'view_name': 'api-patch-detail'},
         }
+        versioned_fields = {
+            '1.1': ('tags', ),
+        }
 
 
 class PatchDetailSerializer(PatchListSerializer):
@@ -136,6 +160,7 @@ class PatchDetailSerializer(PatchListSerializer):
         read_only_fields = PatchListSerializer.Meta.read_only_fields + (
             'headers', 'content', 'diff', 'prefixes')
         extra_kwargs = PatchListSerializer.Meta.extra_kwargs
+        versioned_fields = PatchListSerializer.Meta.versioned_fields
 
 
 class PatchList(ListAPIView):
diff --git a/patchwork/management/commands/retag.py b/patchwork/management/commands/retag.py
index 8617ff4..db40256 100644
--- a/patchwork/management/commands/retag.py
+++ b/patchwork/management/commands/retag.py
@@ -19,11 +19,12 @@
 
 from django.core.management.base import BaseCommand
 
+from patchwork.models import Cover
 from patchwork.models import Patch
 
 
 class Command(BaseCommand):
-    help = 'Update the tag (Ack/Review/Test) counts on existing patches'
+    help = 'Update tags on existing patches'
     args = '[<patch_id>...]'
 
     def handle(self, *args, **options):
@@ -37,7 +38,17 @@ class Command(BaseCommand):
         count = query.count()
 
         for i, patch in enumerate(query.iterator()):
-            patch.refresh_tag_counts()
+            patch.refresh_tags()
+            for comment in patch.comments.all():
+                comment.refresh_tags()
+
+            cover = SeriesPatch.objects.get(
+                patch_id=patch.id).series.cover_letter
+            if cover:
+                cover.refresh_tags()
+                for comment in cover.comments.all():
+                    comment.refresh_tags()
+
             if (i % 10) == 0:
                 self.stdout.write('%06d/%06d\r' % (i, count), ending='')
                 self.stdout.flush()
diff --git a/patchwork/migrations/0026_rework_tagging.py b/patchwork/migrations/0026_rework_tagging.py
new file mode 100644
index 0000000..88ddcac
--- /dev/null
+++ b/patchwork/migrations/0026_rework_tagging.py
@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+import re
+
+
+# Django migrations don't allow us to call models' methods because the
+# migration will break if the methods change. Therefore we need to use an
+# altered copy of all the code needed.
+def extract_tags(extract_from, tags):
+    found_tags = {}
+
+    if not extract_from.content:
+        return found_tags
+
+    for tag in tags:
+        regex = re.compile(tag.pattern + r'\s(.*)', re.M | re.I)
+        found_tags[tag] = regex.findall(extract_from.content)
+
+    return found_tags
+
+
+def save_tag_values(apps, submission, tag, values, comment_origin=None):
+    if not values:
+        # We don't need to delete tags since none exist yet and we can't
+        # delete comments etc. during the migration
+        return
+
+    SubmissionTag = apps.get_model('patchwork', 'SubmissionTag')
+    SubmissionTag.objects.bulk_create([SubmissionTag(
+        submission=submission,
+        tag=tag,
+        value=value,
+        from_comment=comment_origin
+    ) for value in values])
+
+
+def create_all(apps, schema_editor):
+    Tag = apps.get_model('patchwork', 'Tag')
+    tags = Tag.objects.all()
+
+    Submission = apps.get_model('patchwork', 'Submission')
+    for submission in Submission.objects.all():
+        extracted = extract_tags(submission, tags)
+        for tag in extracted:
+            save_tag_values(apps, submission, tag, extracted[tag])
+
+    Comment = apps.get_model('patchwork', 'Comment')
+    for comment in Comment.objects.all():
+        extracted = extract_tags(comment, tags)
+        for tag in extracted:
+            save_tag_values(apps,
+                            comment.submission,
+                            tag,
+                            extracted[tag],
+                            comment_origin=comment.id)
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('patchwork', '0025_add_regex_validators'),
+    ]
+
+    operations = [
+        migrations.CreateModel(
+            name='SubmissionTag',
+            fields=[
+                ('id', models.AutoField(auto_created=True,
+                                        primary_key=True,
+                                        serialize=False,
+                                        verbose_name='ID')),
+                ('value', models.CharField(max_length=255)),
+                ('from_comment', models.IntegerField(null=True)),
+                ('submission', models.ForeignKey(
+                    on_delete=django.db.models.deletion.CASCADE,
+                    to='patchwork.Submission'
+                )),
+                ('tag', models.ForeignKey(
+                    on_delete=django.db.models.deletion.CASCADE,
+                    to='patchwork.Tag'
+                )),
+            ],
+        ),
+        migrations.AlterUniqueTogether(
+            name='patchtag',
+            unique_together=set([]),
+        ),
+        migrations.RemoveField(
+            model_name='patchtag',
+            name='patch',
+        ),
+        migrations.RemoveField(
+            model_name='patchtag',
+            name='tag',
+        ),
+        migrations.RemoveField(
+            model_name='patch',
+            name='tags',
+        ),
+        migrations.DeleteModel(
+            name='PatchTag',
+        ),
+        migrations.AddField(
+            model_name='submission',
+            name='related_tags',
+            field=models.ManyToManyField(
+                through='patchwork.SubmissionTag',
+                to='patchwork.Tag'
+            ),
+        ),
+        migrations.AlterUniqueTogether(
+            name='submissiontag',
+            unique_together=set([('submission',
+                                  'tag',
+                                  'value',
+                                  'from_comment')]),
+        ),
+        migrations.RunPython(create_all, atomic=False),
+    ]
diff --git a/patchwork/models.py b/patchwork/models.py
index f91b994..84447cc 100644
--- a/patchwork/models.py
+++ b/patchwork/models.py
@@ -20,8 +20,6 @@
 
 from __future__ import absolute_import
 
-from collections import Counter
-from collections import OrderedDict
 import datetime
 import random
 import re
@@ -252,10 +250,6 @@ class Tag(models.Model):
                                       ' tag\'s count in the patch list view',
                                       default=True)
 
-    @property
-    def attr_name(self):
-        return 'tag_%d_count' % self.id
-
     def __str__(self):
         return self.name
 
@@ -263,64 +257,20 @@ class Tag(models.Model):
         ordering = ['abbrev']
 
 
-class PatchTag(models.Model):
-    patch = models.ForeignKey('Patch', on_delete=models.CASCADE)
-    tag = models.ForeignKey('Tag', on_delete=models.CASCADE)
-    count = models.IntegerField(default=1)
+class SubmissionTag(models.Model):
+    submission = models.ForeignKey('Submission')
+    tag = models.ForeignKey('Tag')
+    value = models.CharField(max_length=255)
+    from_comment = models.IntegerField(null=True)
 
     class Meta:
-        unique_together = [('patch', 'tag')]
+        unique_together = [('submission', 'tag', 'value', 'from_comment')]
 
 
 def get_default_initial_patch_state():
     return State.objects.get(ordering=0)
 
 
-class PatchQuerySet(models.query.QuerySet):
-
-    def with_tag_counts(self, project=None):
-        if project and not project.use_tags:
-            return self
-
-        # We need the project's use_tags field loaded for Project.tags().
-        # Using prefetch_related means we'll share the one instance of
-        # Project, and share the project.tags cache between all patch.project
-        # references.
-        qs = self.prefetch_related('project')
-        select = OrderedDict()
-        select_params = []
-
-        # All projects have the same tags, so we're good to go here
-        if project:
-            tags = project.tags
-        else:
-            tags = Tag.objects.all()
-
-        for tag in tags:
-            select[tag.attr_name] = (
-                "coalesce("
-                "(SELECT count FROM patchwork_patchtag"
-                " WHERE patchwork_patchtag.patch_id="
-                "patchwork_patch.submission_ptr_id"
-                " AND patchwork_patchtag.tag_id=%s), 0)")
-            select_params.append(tag.id)
-
-        return qs.extra(select=select, select_params=select_params)
-
-
-class PatchManager(models.Manager):
-    use_for_related_fields = True
-    # NOTE(stephenfin): This is necessary to silence a warning with Django >=
-    # 1.10. Remove when 1.10 is the minimum supported version.
-    silence_use_for_related_fields_deprecation = True
-
-    def get_queryset(self):
-        return PatchQuerySet(self.model, using=self.db)
-
-    def with_tag_counts(self, project):
-        return self.get_queryset().with_tag_counts(project)
-
-
 class EmailMixin(models.Model):
     """Mixin for models with an email-origin."""
     # email metadata
@@ -334,17 +284,16 @@ class EmailMixin(models.Model):
     submitter = models.ForeignKey(Person, on_delete=models.CASCADE)
     content = models.TextField(null=True, blank=True)
 
-    response_re = re.compile(
-        r'^(Tested|Reviewed|Acked|Signed-off|Nacked|Reported)-by:.*$',
-        re.M | re.I)
+    def _extract_tags(self, tags):
+        found_tags = {}
 
-    @property
-    def patch_responses(self):
         if not self.content:
-            return ''
+            return found_tags
 
-        return ''.join([match.group(0) + '\n' for match in
-                        self.response_re.finditer(self.content)])
+        for tag in tags:
+            regex = re.compile(tag.pattern + r'\s(.*)', re.M | re.I)
+            found_tags[tag] = regex.findall(self.content)
+        return found_tags
 
     def save(self, *args, **kwargs):
         # Modifying a submission via admin interface changes '\n' newlines in
@@ -353,6 +302,7 @@ class EmailMixin(models.Model):
         # on PY2
         self.content = self.content.replace('\r\n', '\n')
         super(EmailMixin, self).save(*args, **kwargs)
+        self.refresh_tags()
 
     class Meta:
         abstract = True
@@ -377,6 +327,34 @@ class Submission(FilenameMixin, EmailMixin, models.Model):
     # submission metadata
 
     name = models.CharField(max_length=255)
+    related_tags = models.ManyToManyField(Tag, through=SubmissionTag)
+
+    def save_tag_values(self, tag, values, comment_origin=None):
+        current_objs = SubmissionTag.objects.filter(
+            submission=self,
+            from_comment=comment_origin,
+            tag=tag
+        )
+
+        if not values:
+            current_objs.delete()
+            return
+
+        # In case the origin is modified, delete tags that were removed
+        current_objs.exclude(value__in=values).delete()
+
+        values_to_add = set(values) - set(current_objs.values_list('value'))
+        SubmissionTag.objects.bulk_create([SubmissionTag(
+            submission=self,
+            tag=tag,
+            value=value,
+            from_comment=comment_origin
+        ) for value in values_to_add])
+
+    def refresh_tags(self):
+        submission_tags = self._extract_tags(Tag.objects.all())
+        for tag in submission_tags:
+            self.save_tag_values(tag, submission_tags[tag])
 
     # patchwork metadata
 
@@ -386,6 +364,10 @@ class Submission(FilenameMixin, EmailMixin, models.Model):
     def __str__(self):
         return self.name
 
+    def delete(self, *args, **kwargs):
+        SubmissionTag.objects.filter(submission=self).delete()
+        super(Submission, self).delete(*args, **kwargs)
+
     class Meta:
         ordering = ['date']
         unique_together = [('msgid', 'project')]
@@ -423,7 +405,6 @@ class Patch(SeriesMixin, Submission):
     diff = models.TextField(null=True, blank=True)
     commit_ref = models.CharField(max_length=255, null=True, blank=True)
     pull_url = models.CharField(max_length=255, null=True, blank=True)
-    tags = models.ManyToManyField(Tag, through=PatchTag)
 
     # patchwork metadata
 
@@ -437,40 +418,6 @@ class Patch(SeriesMixin, Submission):
     # patches in a project without needing to do a JOIN.
     patch_project = models.ForeignKey(Project, on_delete=models.CASCADE)
 
-    objects = PatchManager()
-
-    @staticmethod
-    def extract_tags(content, tags):
-        counts = Counter()
-
-        for tag in tags:
-            regex = re.compile(tag.pattern, re.MULTILINE | re.IGNORECASE)
-            counts[tag] = len(regex.findall(content))
-
-        return counts
-
-    def _set_tag(self, tag, count):
-        if count == 0:
-            self.patchtag_set.filter(tag=tag).delete()
-            return
-        patchtag, _ = PatchTag.objects.get_or_create(patch=self, tag=tag)
-        if patchtag.count != count:
-            patchtag.count = count
-            patchtag.save()
-
-    def refresh_tag_counts(self):
-        tags = self.project.tags
-        counter = Counter()
-
-        if self.content:
-            counter += self.extract_tags(self.content, tags)
-
-        for comment in self.comments.all():
-            counter = counter + self.extract_tags(comment.content, tags)
-
-        for tag in tags:
-            self._set_tag(tag, counter[tag])
-
     def save(self, *args, **kwargs):
         if not hasattr(self, 'state') or not self.state:
             self.state = get_default_initial_patch_state()
@@ -480,8 +427,6 @@ class Patch(SeriesMixin, Submission):
 
         super(Patch, self).save(**kwargs)
 
-        self.refresh_tag_counts()
-
     def is_editable(self, user):
         if not is_authenticated(user):
             return False
@@ -492,6 +437,22 @@ class Patch(SeriesMixin, Submission):
         return self.project.is_editable(user)
 
     @property
+    def all_tags(self):
+        sub_ids = [self.id]
+        cover = SeriesPatch.objects.get(patch_id=self.id).series.cover_letter
+        if cover:
+            sub_ids.append(cover.id)
+        related_tags = SubmissionTag.objects.filter(
+            submission__id__in=sub_ids).values_list('tag__name',
+                                                    'value').distinct()
+
+        patch_tags = {}
+        for tag in self.project.tags:
+            patch_tags[tag] = [related_tag[1] for related_tag in related_tags
+                               if related_tag[0] == tag.name]
+        return patch_tags
+
+    @property
     def combined_check_state(self):
         """Return the combined state for all checks.
 
@@ -603,15 +564,16 @@ class Comment(EmailMixin, models.Model):
                                    related_query_name='comment',
                                    on_delete=models.CASCADE)
 
-    def save(self, *args, **kwargs):
-        super(Comment, self).save(*args, **kwargs)
-        if hasattr(self.submission, 'patch'):
-            self.submission.patch.refresh_tag_counts()
+    def refresh_tags(self):
+        comment_tags = self._extract_tags(Tag.objects.all())
+        for tag in comment_tags:
+            self.submission.save_tag_values(tag,
+                                            comment_tags[tag],
+                                            comment_origin=self.id)
 
     def delete(self, *args, **kwargs):
+        SubmissionTag.objects.filter(from_comment=self.id).delete()
         super(Comment, self).delete(*args, **kwargs)
-        if hasattr(self.submission, 'patch'):
-            self.submission.patch.refresh_tag_counts()
 
     class Meta:
         ordering = ['date']
diff --git a/patchwork/templatetags/patch.py b/patchwork/templatetags/patch.py
index 4350e09..f226dc0 100644
--- a/patchwork/templatetags/patch.py
+++ b/patchwork/templatetags/patch.py
@@ -34,8 +34,9 @@ register = template.Library()
 def patch_tags(patch):
     counts = []
     titles = []
+    all_tags = patch.all_tags
     for tag in [t for t in patch.project.tags if t.show_column]:
-        count = getattr(patch, tag.attr_name)
+        count = len(all_tags[tag])
         titles.append('%d %s' % (count, tag.name))
         if count == 0:
             counts.append("-")
diff --git a/patchwork/views/__init__.py b/patchwork/views/__init__.py
index f8d23a3..8c41df6 100644
--- a/patchwork/views/__init__.py
+++ b/patchwork/views/__init__.py
@@ -272,9 +272,6 @@ def generic_list(request, project, view, view_args=None, filter_settings=None,
     if patches is None:
         patches = Patch.objects.filter(patch_project=project)
 
-    # annotate with tag counts
-    patches = patches.with_tag_counts(project)
-
     patches = context['filters'].apply(patches)
     if not editable_order:
         patches = order.apply(patches)
diff --git a/patchwork/views/utils.py b/patchwork/views/utils.py
index f5ff43c..8a0ce23 100644
--- a/patchwork/views/utils.py
+++ b/patchwork/views/utils.py
@@ -32,6 +32,8 @@ from django.utils import six
 
 from patchwork.models import Comment
 from patchwork.models import Patch
+from patchwork.models import SeriesPatch
+from patchwork.models import SubmissionTag
 from patchwork.models import Series
 
 if settings.ENABLE_REST_API:
@@ -75,9 +77,17 @@ def _submission_to_mbox(submission):
     else:
         postscript = ''
 
-    # TODO(stephenfin): Make this use the tags infrastructure
-    for comment in Comment.objects.filter(submission=submission):
-        body += comment.patch_responses
+    sub_ids = [submission.id]
+    if is_patch:
+        cover = SeriesPatch.objects.get(
+            patch_id=submission.id).series.cover_letter
+        if cover:
+            sub_ids.append(cover.id)
+
+    for (tagname, value) in SubmissionTag.objects.filter(
+            submission__id__in=sub_ids).values_list(
+                'tag__name', 'value').distinct():
+        body += '%s: %s\n' % (tagname, value)
 
     if postscript:
         body += '---\n' + postscript + '\n'
-- 
2.13.6



More information about the Patchwork mailing list