[RFC v2 1/1] Rework tagging infrastructure
vkabatov at redhat.com
vkabatov at redhat.com
Fri Apr 20 02:36:56 AEST 2018
From: Veronika Kabatova <vkabatov at redhat.com>
Solve #113 and #57 GitHub issues, fix up returning tags in the API,
keep track of tag origin to later be able to add tags to comments in
the API.
Use relations Tag-Patch and Tag-CoverLetter to avoid duplication of
tags for each patch in series.
Signed-off-by: Veronika Kabatova <vkabatov at redhat.com>
---
docs/usage/overview.rst | 9 +-
patchwork/api/cover.py | 24 +++-
patchwork/api/patch.py | 35 +++++-
patchwork/management/commands/retag.py | 15 ++-
patchwork/migrations/0026_rework_tagging.py | 123 +++++++++++++++++++
patchwork/models.py | 176 +++++++++++-----------------
patchwork/templatetags/patch.py | 3 +-
patchwork/views/__init__.py | 3 -
patchwork/views/utils.py | 16 ++-
9 files changed, 277 insertions(+), 127 deletions(-)
create mode 100644 patchwork/migrations/0026_rework_tagging.py
diff --git a/docs/usage/overview.rst b/docs/usage/overview.rst
index cc193f3..d7db840 100644
--- a/docs/usage/overview.rst
+++ b/docs/usage/overview.rst
@@ -110,10 +110,11 @@ one delegate can be assigned to a patch.
Tags
~~~~
-Tags are specially formatted metadata appended to the foot the body of a patch
-or a comment on a patch. Patchwork extracts these tags at parse time and
-associates them with the patch. You add extra tags to an email by replying to
-the email. The following tags are available on a standard Patchwork install:
+Tags are specially formatted metadata appended to the foot the body of a patch,
+cover letter or a comment related to them. Patchwork extracts these tags at
+parse time and associates them with patches. You add extra tags to an email by
+replying to the email. The following tags are available on a standard Patchwork
+install:
Acked-by:
diff --git a/patchwork/api/cover.py b/patchwork/api/cover.py
index fc7ae97..4d82277 100644
--- a/patchwork/api/cover.py
+++ b/patchwork/api/cover.py
@@ -29,6 +29,7 @@ from patchwork.api.embedded import PersonSerializer
from patchwork.api.embedded import ProjectSerializer
from patchwork.api.embedded import SeriesSerializer
from patchwork.models import CoverLetter
+from patchwork.models import SubmissionTag
class CoverLetterListSerializer(BaseHyperlinkedModelSerializer):
@@ -37,18 +38,36 @@ class CoverLetterListSerializer(BaseHyperlinkedModelSerializer):
submitter = PersonSerializer(read_only=True)
mbox = SerializerMethodField()
series = SeriesSerializer(many=True, read_only=True)
+ tags = SerializerMethodField()
def get_mbox(self, instance):
request = self.context.get('request')
return request.build_absolute_uri(instance.get_mbox_url())
+ def get_tags(self, instance):
+ tags = instance.project.tags
+ if not tags:
+ return {}
+
+ all_tags = {}
+ related_tags = SubmissionTag.objects.filter(
+ submission=instance).values_list('tag__name', 'value').distinct()
+ for tag in tags:
+ all_tags[tag.name] = [related_tag[1] for related_tag in
+ related_tags if related_tag[0] == tag.name]
+ # Don't show tags that are not present
+ if not all_tags[tag.name]:
+ del(all_tags[tag.name])
+
+ return all_tags
+
class Meta:
model = CoverLetter
fields = ('id', 'url', 'project', 'msgid', 'date', 'name', 'submitter',
- 'mbox', 'series')
+ 'mbox', 'series', 'tags')
read_only_fields = fields
versioned_fields = {
- '1.1': ('mbox', ),
+ '1.1': ('mbox', 'tags'),
}
extra_kwargs = {
'url': {'view_name': 'api-cover-detail'},
@@ -68,6 +87,7 @@ class CoverLetterDetailSerializer(CoverLetterListSerializer):
fields = CoverLetterListSerializer.Meta.fields + ('headers', 'content')
read_only_fields = fields
extra_kwargs = CoverLetterListSerializer.Meta.extra_kwargs
+ versioned_fields = CoverLetterListSerializer.Meta.versioned_fields
class CoverLetterList(ListAPIView):
diff --git a/patchwork/api/patch.py b/patchwork/api/patch.py
index 115feff..295b046 100644
--- a/patchwork/api/patch.py
+++ b/patchwork/api/patch.py
@@ -24,9 +24,9 @@ from rest_framework.generics import ListAPIView
from rest_framework.generics import RetrieveUpdateAPIView
from rest_framework.relations import RelatedField
from rest_framework.reverse import reverse
-from rest_framework.serializers import HyperlinkedModelSerializer
from rest_framework.serializers import SerializerMethodField
+from patchwork.api.base import BaseHyperlinkedModelSerializer
from patchwork.api.base import PatchworkPermission
from patchwork.api.filters import PatchFilter
from patchwork.api.embedded import PersonSerializer
@@ -34,7 +34,9 @@ from patchwork.api.embedded import ProjectSerializer
from patchwork.api.embedded import SeriesSerializer
from patchwork.api.embedded import UserSerializer
from patchwork.models import Patch
+from patchwork.models import SeriesPatch
from patchwork.models import State
+from patchwork.models import SubmissionTag
from patchwork.parser import clean_subject
@@ -75,7 +77,7 @@ class StateField(RelatedField):
return State.objects.all()
-class PatchListSerializer(HyperlinkedModelSerializer):
+class PatchListSerializer(BaseHyperlinkedModelSerializer):
project = ProjectSerializer(read_only=True)
state = StateField()
@@ -92,9 +94,28 @@ class PatchListSerializer(HyperlinkedModelSerializer):
return request.build_absolute_uri(instance.get_mbox_url())
def get_tags(self, instance):
- # TODO(stephenfin): Make tags performant, possibly by reworking the
- # model
- return {}
+ tags = instance.project.tags
+ if not tags:
+ return {}
+
+ sub_ids = [instance.id]
+ cover = SeriesPatch.objects.get(
+ patch_id=instance.id).series.cover_letter
+ if cover:
+ sub_ids.append(cover.id)
+
+ all_tags = {}
+ related_tags = SubmissionTag.objects.filter(
+ submission__id__in=sub_ids).values_list('tag__name',
+ 'value').distinct()
+ for tag in tags:
+ all_tags[tag.name] = [related_tag[1] for related_tag in
+ related_tags if related_tag[0] == tag.name]
+ # Don't show tags that are not present
+ if not all_tags[tag.name]:
+ del(all_tags[tag.name])
+
+ return all_tags
def get_check(self, instance):
return instance.combined_check_state
@@ -115,6 +136,9 @@ class PatchListSerializer(HyperlinkedModelSerializer):
extra_kwargs = {
'url': {'view_name': 'api-patch-detail'},
}
+ versioned_fields = {
+ '1.1': ('tags', ),
+ }
class PatchDetailSerializer(PatchListSerializer):
@@ -136,6 +160,7 @@ class PatchDetailSerializer(PatchListSerializer):
read_only_fields = PatchListSerializer.Meta.read_only_fields + (
'headers', 'content', 'diff', 'prefixes')
extra_kwargs = PatchListSerializer.Meta.extra_kwargs
+ versioned_fields = PatchListSerializer.Meta.versioned_fields
class PatchList(ListAPIView):
diff --git a/patchwork/management/commands/retag.py b/patchwork/management/commands/retag.py
index 8617ff4..db40256 100644
--- a/patchwork/management/commands/retag.py
+++ b/patchwork/management/commands/retag.py
@@ -19,11 +19,12 @@
from django.core.management.base import BaseCommand
+from patchwork.models import Cover
from patchwork.models import Patch
class Command(BaseCommand):
- help = 'Update the tag (Ack/Review/Test) counts on existing patches'
+ help = 'Update tags on existing patches'
args = '[<patch_id>...]'
def handle(self, *args, **options):
@@ -37,7 +38,17 @@ class Command(BaseCommand):
count = query.count()
for i, patch in enumerate(query.iterator()):
- patch.refresh_tag_counts()
+ patch.refresh_tags()
+ for comment in patch.comments.all():
+ comment.refresh_tags()
+
+ cover = SeriesPatch.objects.get(
+ patch_id=patch.id).series.cover_letter
+ if cover:
+ cover.refresh_tags()
+ for comment in cover.comments.all():
+ comment.refresh_tags()
+
if (i % 10) == 0:
self.stdout.write('%06d/%06d\r' % (i, count), ending='')
self.stdout.flush()
diff --git a/patchwork/migrations/0026_rework_tagging.py b/patchwork/migrations/0026_rework_tagging.py
new file mode 100644
index 0000000..88ddcac
--- /dev/null
+++ b/patchwork/migrations/0026_rework_tagging.py
@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+import re
+
+
+# Django migrations don't allow us to call models' methods because the
+# migration will break if the methods change. Therefore we need to use an
+# altered copy of all the code needed.
+def extract_tags(extract_from, tags):
+ found_tags = {}
+
+ if not extract_from.content:
+ return found_tags
+
+ for tag in tags:
+ regex = re.compile(tag.pattern + r'\s(.*)', re.M | re.I)
+ found_tags[tag] = regex.findall(extract_from.content)
+
+ return found_tags
+
+
+def save_tag_values(apps, submission, tag, values, comment_origin=None):
+ if not values:
+ # We don't need to delete tags since none exist yet and we can't
+ # delete comments etc. during the migration
+ return
+
+ SubmissionTag = apps.get_model('patchwork', 'SubmissionTag')
+ SubmissionTag.objects.bulk_create([SubmissionTag(
+ submission=submission,
+ tag=tag,
+ value=value,
+ from_comment=comment_origin
+ ) for value in values])
+
+
+def create_all(apps, schema_editor):
+ Tag = apps.get_model('patchwork', 'Tag')
+ tags = Tag.objects.all()
+
+ Submission = apps.get_model('patchwork', 'Submission')
+ for submission in Submission.objects.all():
+ extracted = extract_tags(submission, tags)
+ for tag in extracted:
+ save_tag_values(apps, submission, tag, extracted[tag])
+
+ Comment = apps.get_model('patchwork', 'Comment')
+ for comment in Comment.objects.all():
+ extracted = extract_tags(comment, tags)
+ for tag in extracted:
+ save_tag_values(apps,
+ comment.submission,
+ tag,
+ extracted[tag],
+ comment_origin=comment.id)
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('patchwork', '0025_add_regex_validators'),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='SubmissionTag',
+ fields=[
+ ('id', models.AutoField(auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name='ID')),
+ ('value', models.CharField(max_length=255)),
+ ('from_comment', models.IntegerField(null=True)),
+ ('submission', models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ to='patchwork.Submission'
+ )),
+ ('tag', models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ to='patchwork.Tag'
+ )),
+ ],
+ ),
+ migrations.AlterUniqueTogether(
+ name='patchtag',
+ unique_together=set([]),
+ ),
+ migrations.RemoveField(
+ model_name='patchtag',
+ name='patch',
+ ),
+ migrations.RemoveField(
+ model_name='patchtag',
+ name='tag',
+ ),
+ migrations.RemoveField(
+ model_name='patch',
+ name='tags',
+ ),
+ migrations.DeleteModel(
+ name='PatchTag',
+ ),
+ migrations.AddField(
+ model_name='submission',
+ name='related_tags',
+ field=models.ManyToManyField(
+ through='patchwork.SubmissionTag',
+ to='patchwork.Tag'
+ ),
+ ),
+ migrations.AlterUniqueTogether(
+ name='submissiontag',
+ unique_together=set([('submission',
+ 'tag',
+ 'value',
+ 'from_comment')]),
+ ),
+ migrations.RunPython(create_all, atomic=False),
+ ]
diff --git a/patchwork/models.py b/patchwork/models.py
index f91b994..84447cc 100644
--- a/patchwork/models.py
+++ b/patchwork/models.py
@@ -20,8 +20,6 @@
from __future__ import absolute_import
-from collections import Counter
-from collections import OrderedDict
import datetime
import random
import re
@@ -252,10 +250,6 @@ class Tag(models.Model):
' tag\'s count in the patch list view',
default=True)
- @property
- def attr_name(self):
- return 'tag_%d_count' % self.id
-
def __str__(self):
return self.name
@@ -263,64 +257,20 @@ class Tag(models.Model):
ordering = ['abbrev']
-class PatchTag(models.Model):
- patch = models.ForeignKey('Patch', on_delete=models.CASCADE)
- tag = models.ForeignKey('Tag', on_delete=models.CASCADE)
- count = models.IntegerField(default=1)
+class SubmissionTag(models.Model):
+ submission = models.ForeignKey('Submission')
+ tag = models.ForeignKey('Tag')
+ value = models.CharField(max_length=255)
+ from_comment = models.IntegerField(null=True)
class Meta:
- unique_together = [('patch', 'tag')]
+ unique_together = [('submission', 'tag', 'value', 'from_comment')]
def get_default_initial_patch_state():
return State.objects.get(ordering=0)
-class PatchQuerySet(models.query.QuerySet):
-
- def with_tag_counts(self, project=None):
- if project and not project.use_tags:
- return self
-
- # We need the project's use_tags field loaded for Project.tags().
- # Using prefetch_related means we'll share the one instance of
- # Project, and share the project.tags cache between all patch.project
- # references.
- qs = self.prefetch_related('project')
- select = OrderedDict()
- select_params = []
-
- # All projects have the same tags, so we're good to go here
- if project:
- tags = project.tags
- else:
- tags = Tag.objects.all()
-
- for tag in tags:
- select[tag.attr_name] = (
- "coalesce("
- "(SELECT count FROM patchwork_patchtag"
- " WHERE patchwork_patchtag.patch_id="
- "patchwork_patch.submission_ptr_id"
- " AND patchwork_patchtag.tag_id=%s), 0)")
- select_params.append(tag.id)
-
- return qs.extra(select=select, select_params=select_params)
-
-
-class PatchManager(models.Manager):
- use_for_related_fields = True
- # NOTE(stephenfin): This is necessary to silence a warning with Django >=
- # 1.10. Remove when 1.10 is the minimum supported version.
- silence_use_for_related_fields_deprecation = True
-
- def get_queryset(self):
- return PatchQuerySet(self.model, using=self.db)
-
- def with_tag_counts(self, project):
- return self.get_queryset().with_tag_counts(project)
-
-
class EmailMixin(models.Model):
"""Mixin for models with an email-origin."""
# email metadata
@@ -334,17 +284,16 @@ class EmailMixin(models.Model):
submitter = models.ForeignKey(Person, on_delete=models.CASCADE)
content = models.TextField(null=True, blank=True)
- response_re = re.compile(
- r'^(Tested|Reviewed|Acked|Signed-off|Nacked|Reported)-by:.*$',
- re.M | re.I)
+ def _extract_tags(self, tags):
+ found_tags = {}
- @property
- def patch_responses(self):
if not self.content:
- return ''
+ return found_tags
- return ''.join([match.group(0) + '\n' for match in
- self.response_re.finditer(self.content)])
+ for tag in tags:
+ regex = re.compile(tag.pattern + r'\s(.*)', re.M | re.I)
+ found_tags[tag] = regex.findall(self.content)
+ return found_tags
def save(self, *args, **kwargs):
# Modifying a submission via admin interface changes '\n' newlines in
@@ -353,6 +302,7 @@ class EmailMixin(models.Model):
# on PY2
self.content = self.content.replace('\r\n', '\n')
super(EmailMixin, self).save(*args, **kwargs)
+ self.refresh_tags()
class Meta:
abstract = True
@@ -377,6 +327,34 @@ class Submission(FilenameMixin, EmailMixin, models.Model):
# submission metadata
name = models.CharField(max_length=255)
+ related_tags = models.ManyToManyField(Tag, through=SubmissionTag)
+
+ def save_tag_values(self, tag, values, comment_origin=None):
+ current_objs = SubmissionTag.objects.filter(
+ submission=self,
+ from_comment=comment_origin,
+ tag=tag
+ )
+
+ if not values:
+ current_objs.delete()
+ return
+
+ # In case the origin is modified, delete tags that were removed
+ current_objs.exclude(value__in=values).delete()
+
+ values_to_add = set(values) - set(current_objs.values_list('value'))
+ SubmissionTag.objects.bulk_create([SubmissionTag(
+ submission=self,
+ tag=tag,
+ value=value,
+ from_comment=comment_origin
+ ) for value in values_to_add])
+
+ def refresh_tags(self):
+ submission_tags = self._extract_tags(Tag.objects.all())
+ for tag in submission_tags:
+ self.save_tag_values(tag, submission_tags[tag])
# patchwork metadata
@@ -386,6 +364,10 @@ class Submission(FilenameMixin, EmailMixin, models.Model):
def __str__(self):
return self.name
+ def delete(self, *args, **kwargs):
+ SubmissionTag.objects.filter(submission=self).delete()
+ super(Submission, self).delete(*args, **kwargs)
+
class Meta:
ordering = ['date']
unique_together = [('msgid', 'project')]
@@ -423,7 +405,6 @@ class Patch(SeriesMixin, Submission):
diff = models.TextField(null=True, blank=True)
commit_ref = models.CharField(max_length=255, null=True, blank=True)
pull_url = models.CharField(max_length=255, null=True, blank=True)
- tags = models.ManyToManyField(Tag, through=PatchTag)
# patchwork metadata
@@ -437,40 +418,6 @@ class Patch(SeriesMixin, Submission):
# patches in a project without needing to do a JOIN.
patch_project = models.ForeignKey(Project, on_delete=models.CASCADE)
- objects = PatchManager()
-
- @staticmethod
- def extract_tags(content, tags):
- counts = Counter()
-
- for tag in tags:
- regex = re.compile(tag.pattern, re.MULTILINE | re.IGNORECASE)
- counts[tag] = len(regex.findall(content))
-
- return counts
-
- def _set_tag(self, tag, count):
- if count == 0:
- self.patchtag_set.filter(tag=tag).delete()
- return
- patchtag, _ = PatchTag.objects.get_or_create(patch=self, tag=tag)
- if patchtag.count != count:
- patchtag.count = count
- patchtag.save()
-
- def refresh_tag_counts(self):
- tags = self.project.tags
- counter = Counter()
-
- if self.content:
- counter += self.extract_tags(self.content, tags)
-
- for comment in self.comments.all():
- counter = counter + self.extract_tags(comment.content, tags)
-
- for tag in tags:
- self._set_tag(tag, counter[tag])
-
def save(self, *args, **kwargs):
if not hasattr(self, 'state') or not self.state:
self.state = get_default_initial_patch_state()
@@ -480,8 +427,6 @@ class Patch(SeriesMixin, Submission):
super(Patch, self).save(**kwargs)
- self.refresh_tag_counts()
-
def is_editable(self, user):
if not is_authenticated(user):
return False
@@ -492,6 +437,22 @@ class Patch(SeriesMixin, Submission):
return self.project.is_editable(user)
@property
+ def all_tags(self):
+ sub_ids = [self.id]
+ cover = SeriesPatch.objects.get(patch_id=self.id).series.cover_letter
+ if cover:
+ sub_ids.append(cover.id)
+ related_tags = SubmissionTag.objects.filter(
+ submission__id__in=sub_ids).values_list('tag__name',
+ 'value').distinct()
+
+ patch_tags = {}
+ for tag in self.project.tags:
+ patch_tags[tag] = [related_tag[1] for related_tag in related_tags
+ if related_tag[0] == tag.name]
+ return patch_tags
+
+ @property
def combined_check_state(self):
"""Return the combined state for all checks.
@@ -603,15 +564,16 @@ class Comment(EmailMixin, models.Model):
related_query_name='comment',
on_delete=models.CASCADE)
- def save(self, *args, **kwargs):
- super(Comment, self).save(*args, **kwargs)
- if hasattr(self.submission, 'patch'):
- self.submission.patch.refresh_tag_counts()
+ def refresh_tags(self):
+ comment_tags = self._extract_tags(Tag.objects.all())
+ for tag in comment_tags:
+ self.submission.save_tag_values(tag,
+ comment_tags[tag],
+ comment_origin=self.id)
def delete(self, *args, **kwargs):
+ SubmissionTag.objects.filter(from_comment=self.id).delete()
super(Comment, self).delete(*args, **kwargs)
- if hasattr(self.submission, 'patch'):
- self.submission.patch.refresh_tag_counts()
class Meta:
ordering = ['date']
diff --git a/patchwork/templatetags/patch.py b/patchwork/templatetags/patch.py
index 4350e09..f226dc0 100644
--- a/patchwork/templatetags/patch.py
+++ b/patchwork/templatetags/patch.py
@@ -34,8 +34,9 @@ register = template.Library()
def patch_tags(patch):
counts = []
titles = []
+ all_tags = patch.all_tags
for tag in [t for t in patch.project.tags if t.show_column]:
- count = getattr(patch, tag.attr_name)
+ count = len(all_tags[tag])
titles.append('%d %s' % (count, tag.name))
if count == 0:
counts.append("-")
diff --git a/patchwork/views/__init__.py b/patchwork/views/__init__.py
index f8d23a3..8c41df6 100644
--- a/patchwork/views/__init__.py
+++ b/patchwork/views/__init__.py
@@ -272,9 +272,6 @@ def generic_list(request, project, view, view_args=None, filter_settings=None,
if patches is None:
patches = Patch.objects.filter(patch_project=project)
- # annotate with tag counts
- patches = patches.with_tag_counts(project)
-
patches = context['filters'].apply(patches)
if not editable_order:
patches = order.apply(patches)
diff --git a/patchwork/views/utils.py b/patchwork/views/utils.py
index f5ff43c..8a0ce23 100644
--- a/patchwork/views/utils.py
+++ b/patchwork/views/utils.py
@@ -32,6 +32,8 @@ from django.utils import six
from patchwork.models import Comment
from patchwork.models import Patch
+from patchwork.models import SeriesPatch
+from patchwork.models import SubmissionTag
from patchwork.models import Series
if settings.ENABLE_REST_API:
@@ -75,9 +77,17 @@ def _submission_to_mbox(submission):
else:
postscript = ''
- # TODO(stephenfin): Make this use the tags infrastructure
- for comment in Comment.objects.filter(submission=submission):
- body += comment.patch_responses
+ sub_ids = [submission.id]
+ if is_patch:
+ cover = SeriesPatch.objects.get(
+ patch_id=submission.id).series.cover_letter
+ if cover:
+ sub_ids.append(cover.id)
+
+ for (tagname, value) in SubmissionTag.objects.filter(
+ submission__id__in=sub_ids).values_list(
+ 'tag__name', 'value').distinct():
+ body += '%s: %s\n' % (tagname, value)
if postscript:
body += '---\n' + postscript + '\n'
--
2.13.6
More information about the Patchwork
mailing list