[PATCH v2 1/4] Rework tagging infrastructure
vkabatov at redhat.com
vkabatov at redhat.com
Tue Sep 18 03:01:53 AEST 2018
From: Veronika Kabatova <vkabatov at redhat.com>
Solve #113 and #57 GitHub issues, keep track of tag origin to be able
to add tags to comments in the API later.
Use relations Tag-Patch and Tag-CoverLetter to avoid duplication of
tags for each patch in series, and use `series` attribute of
SubmissionTag as a notion of a tag which is related to each patch in the
series (because it comes from cover letter or it's comments)
Signed-off-by: Veronika Kabatova <vkabatov at redhat.com>
---
Rebased on top of 'Convert Series-Patch relationship to 1:N' series.
Stephen, I split up the patch to separate out API, mbox and documentation
changes as you suggested; and implemented your comments (simplified the
migration in favor of running the retag comment, moved the tag retrieval from
the API into a property in models.py, added comment and tag prefetching,
increased the API version where needed, added wildcard to API filter and
simplified it and some other minor things). The series-patch cleanup definitely
helped with some cleanup, but let me know if there are other optimizations that
would help with regards to DB performance.
---
patchwork/management/commands/retag.py | 15 +-
patchwork/migrations/0034_rework_tagging.py | 66 +++++++
patchwork/models.py | 175 ++++++++----------
patchwork/templatetags/patch.py | 3 +-
patchwork/tests/test_parser.py | 18 +-
patchwork/tests/test_tags.py | 64 +++----
patchwork/views/__init__.py | 3 -
.../tagging-rework-9907e9dc3f835566.yaml | 15 ++
8 files changed, 202 insertions(+), 157 deletions(-)
create mode 100644 patchwork/migrations/0034_rework_tagging.py
create mode 100644 releasenotes/notes/tagging-rework-9907e9dc3f835566.yaml
diff --git a/patchwork/management/commands/retag.py b/patchwork/management/commands/retag.py
index 8617ff41..95b2cc1f 100644
--- a/patchwork/management/commands/retag.py
+++ b/patchwork/management/commands/retag.py
@@ -19,15 +19,15 @@
from django.core.management.base import BaseCommand
-from patchwork.models import Patch
+from patchwork.models import Submission
class Command(BaseCommand):
- help = 'Update the tag (Ack/Review/Test) counts on existing patches'
- args = '[<patch_id>...]'
+ help = 'Update tags on existing submissions and associated comments'
+ args = '[<submission_id>...]'
def handle(self, *args, **options):
- query = Patch.objects
+ query = Submission.objects.prefetch_related('comments')
if args:
query = query.filter(id__in=args)
@@ -36,8 +36,11 @@ class Command(BaseCommand):
count = query.count()
- for i, patch in enumerate(query.iterator()):
- patch.refresh_tag_counts()
+ for i, submission in enumerate(query.iterator()):
+ submission.refresh_tags()
+ for comment in submission.comments.all():
+ comment.refresh_tags()
+
if (i % 10) == 0:
self.stdout.write('%06d/%06d\r' % (i, count), ending='')
self.stdout.flush()
diff --git a/patchwork/migrations/0034_rework_tagging.py b/patchwork/migrations/0034_rework_tagging.py
new file mode 100644
index 00000000..580a4fd0
--- /dev/null
+++ b/patchwork/migrations/0034_rework_tagging.py
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+from __future__ import unicode_literals
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+ dependencies = [
+ ('patchwork', '0033_remove_patch_series_model'),
+ ]
+
+ operations = [
+ migrations.CreateModel(
+ name='SubmissionTag',
+ fields=[
+ ('id', models.AutoField(auto_created=True,
+ primary_key=True,
+ serialize=False,
+ verbose_name='ID')),
+ ('value', models.CharField(max_length=255)),
+ ('comment', models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ to='patchwork.Comment',
+ null=True
+ )),
+ ('submission', models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ to='patchwork.Submission'
+ )),
+ ('tag', models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ to='patchwork.Tag'
+ )),
+ ('series', models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ to='patchwork.Series',
+ null=True
+ )),
+ ],
+ ),
+ migrations.AlterUniqueTogether(
+ name='patchtag',
+ unique_together=set([]),
+ ),
+ migrations.RemoveField(model_name='patchtag', name='patch',),
+ migrations.RemoveField(model_name='patchtag', name='tag',),
+ migrations.RemoveField(model_name='patch', name='tags',),
+ migrations.DeleteModel(name='PatchTag',),
+ migrations.AddField(
+ model_name='submission',
+ name='tags',
+ field=models.ManyToManyField(
+ through='patchwork.SubmissionTag',
+ to='patchwork.Tag'
+ ),
+ ),
+ migrations.AlterUniqueTogether(
+ name='submissiontag',
+ unique_together=set([('submission',
+ 'tag',
+ 'value',
+ 'comment')]),
+ ),
+ ]
diff --git a/patchwork/models.py b/patchwork/models.py
index 14eb74aa..5caf7641 100644
--- a/patchwork/models.py
+++ b/patchwork/models.py
@@ -20,8 +20,6 @@
from __future__ import absolute_import
-from collections import Counter
-from collections import OrderedDict
import datetime
import random
import re
@@ -30,6 +28,7 @@ from django.conf import settings
from django.contrib.auth.models import User
from django.core.exceptions import ValidationError
from django.db import models
+from django.db.models import Q
from django.urls import reverse
from django.utils.encoding import python_2_unicode_compatible
from django.utils.functional import cached_property
@@ -250,10 +249,6 @@ class Tag(models.Model):
' tag\'s count in the patch list view',
default=True)
- @property
- def attr_name(self):
- return 'tag_%d_count' % self.id
-
def __str__(self):
return self.name
@@ -261,60 +256,21 @@ class Tag(models.Model):
ordering = ['abbrev']
-class PatchTag(models.Model):
- patch = models.ForeignKey('Patch', on_delete=models.CASCADE)
+class SubmissionTag(models.Model):
+ submission = models.ForeignKey('Submission', on_delete=models.CASCADE)
tag = models.ForeignKey('Tag', on_delete=models.CASCADE)
- count = models.IntegerField(default=1)
+ value = models.CharField(max_length=255)
+ comment = models.ForeignKey('Comment', null=True, on_delete=models.CASCADE)
+ series = models.ForeignKey('Series', null=True, on_delete=models.CASCADE)
class Meta:
- unique_together = [('patch', 'tag')]
+ unique_together = [('submission', 'tag', 'value', 'comment')]
def get_default_initial_patch_state():
return State.objects.get(ordering=0)
-class PatchQuerySet(models.query.QuerySet):
-
- def with_tag_counts(self, project=None):
- if project and not project.use_tags:
- return self
-
- # We need the project's use_tags field loaded for Project.tags().
- # Using prefetch_related means we'll share the one instance of
- # Project, and share the project.tags cache between all patch.project
- # references.
- qs = self.prefetch_related('project')
- select = OrderedDict()
- select_params = []
-
- # All projects have the same tags, so we're good to go here
- if project:
- tags = project.tags
- else:
- tags = Tag.objects.all()
-
- for tag in tags:
- select[tag.attr_name] = (
- "coalesce("
- "(SELECT count FROM patchwork_patchtag"
- " WHERE patchwork_patchtag.patch_id="
- "patchwork_patch.submission_ptr_id"
- " AND patchwork_patchtag.tag_id=%s), 0)")
- select_params.append(tag.id)
-
- return qs.extra(select=select, select_params=select_params)
-
-
-class PatchManager(models.Manager):
-
- def get_queryset(self):
- return PatchQuerySet(self.model, using=self.db)
-
- def with_tag_counts(self, project):
- return self.get_queryset().with_tag_counts(project)
-
-
class EmailMixin(models.Model):
"""Mixin for models with an email-origin."""
# email metadata
@@ -340,6 +296,16 @@ class EmailMixin(models.Model):
return ''.join([match.group(0) + '\n' for match in
self.response_re.finditer(self.content)])
+ def _extract_tags(self, tags):
+ found_tags = {}
+ if not self.content:
+ return found_tags
+
+ for tag in tags:
+ regex = re.compile(tag.pattern + r'\s*(.*)', re.M | re.I | re.U)
+ found_tags[tag] = regex.findall(self.content)
+ return found_tags
+
def save(self, *args, **kwargs):
# Modifying a submission via admin interface changes '\n' newlines in
# message content to '\r\n'. We need to fix them to avoid problems,
@@ -371,6 +337,53 @@ class Submission(FilenameMixin, EmailMixin, models.Model):
# submission metadata
name = models.CharField(max_length=255)
+ tags = models.ManyToManyField(Tag, through=SubmissionTag)
+
+ def add_tags(self, tag, values, comment=None):
+ if hasattr(self, 'patch'):
+ series = None
+ else:
+ series = self.coverletter.series
+ current_objs = SubmissionTag.objects.filter(submission=self,
+ comment=comment,
+ tag=tag,
+ series=series)
+ if not values:
+ current_objs.delete()
+ return
+ # In case the origin is modified, delete tags that were removed
+ current_objs.exclude(value__in=values).delete()
+ values_to_add = set(values) - set(current_objs.values_list('value',
+ flat=True))
+ SubmissionTag.objects.bulk_create([SubmissionTag(
+ submission=self,
+ tag=tag,
+ value=value,
+ comment=comment,
+ series=series
+ ) for value in values_to_add])
+
+ def refresh_tags(self):
+ submission_tags = self._extract_tags(Tag.objects.all())
+ for tag in submission_tags:
+ self.add_tags(tag, submission_tags[tag])
+
+ @property
+ def all_tags(self):
+ related_tags = {}
+
+ for tag in self.project.tags:
+ if hasattr(self, 'patch'):
+ related_tags[tag] = SubmissionTag.objects.filter((
+ Q(submission=self) | Q(series=self.series)
+ ) & Q(tag__name=tag.name)).values_list('value',
+ flat=True).distinct()
+ else:
+ related_tags[tag] = SubmissionTag.objects.filter(
+ Q(submission=self) & Q(tag__name=tag.name)
+ ).values_list('value', flat=True).distinct()
+
+ return related_tags
# patchwork metadata
@@ -409,7 +422,6 @@ class Patch(Submission):
diff = models.TextField(null=True, blank=True)
commit_ref = models.CharField(max_length=255, null=True, blank=True)
pull_url = models.CharField(max_length=255, null=True, blank=True)
- tags = models.ManyToManyField(Tag, through=PatchTag)
# patchwork metadata
@@ -432,40 +444,6 @@ class Patch(Submission):
default=None, null=True,
help_text='The number assigned to this patch in the series')
- objects = PatchManager()
-
- @staticmethod
- def extract_tags(content, tags):
- counts = Counter()
-
- for tag in tags:
- regex = re.compile(tag.pattern, re.MULTILINE | re.IGNORECASE)
- counts[tag] = len(regex.findall(content))
-
- return counts
-
- def _set_tag(self, tag, count):
- if count == 0:
- self.patchtag_set.filter(tag=tag).delete()
- return
- patchtag, _ = PatchTag.objects.get_or_create(patch=self, tag=tag)
- if patchtag.count != count:
- patchtag.count = count
- patchtag.save()
-
- def refresh_tag_counts(self):
- tags = self.project.tags
- counter = Counter()
-
- if self.content:
- counter += self.extract_tags(self.content, tags)
-
- for comment in self.comments.all():
- counter = counter + self.extract_tags(comment.content, tags)
-
- for tag in tags:
- self._set_tag(tag, counter[tag])
-
def save(self, *args, **kwargs):
if not hasattr(self, 'state') or not self.state:
self.state = get_default_initial_patch_state()
@@ -475,7 +453,7 @@ class Patch(Submission):
super(Patch, self).save(**kwargs)
- self.refresh_tag_counts()
+ self.refresh_tags()
def is_editable(self, user):
if not user.is_authenticated:
@@ -610,13 +588,23 @@ class Comment(EmailMixin, models.Model):
def save(self, *args, **kwargs):
super(Comment, self).save(*args, **kwargs)
- if hasattr(self.submission, 'patch'):
- self.submission.patch.refresh_tag_counts()
+ self.refresh_tags()
+
+ def refresh_tags(self):
+ comment_tags = self._extract_tags(Tag.objects.all())
+ for tag in comment_tags:
+ self.submission.add_tags(tag, comment_tags[tag], comment=self)
+
+ @property
+ def all_tags(self):
+ related_tags = {}
+
+ for tag in self.submission.project.tags:
+ related_tags[tag] = SubmissionTag.objects.filter(
+ comment=self, tag__name=tag.name
+ ).values_list('value', flat=True).distinct()
- def delete(self, *args, **kwargs):
- super(Comment, self).delete(*args, **kwargs)
- if hasattr(self.submission, 'patch'):
- self.submission.patch.refresh_tag_counts()
+ return related_tags
def is_editable(self, user):
return False
@@ -715,6 +703,7 @@ class Series(FilenameMixin, models.Model):
self.name = self._format_name(cover)
self.save()
+ cover.refresh_tags()
def add_patch(self, patch, number):
"""Add a patch to the series."""
diff --git a/patchwork/templatetags/patch.py b/patchwork/templatetags/patch.py
index 30ccc8e2..5be6b908 100644
--- a/patchwork/templatetags/patch.py
+++ b/patchwork/templatetags/patch.py
@@ -34,8 +34,9 @@ register = template.Library()
def patch_tags(patch):
counts = []
titles = []
+ all_tags = patch.all_tags
for tag in [t for t in patch.project.tags if t.show_column]:
- count = getattr(patch, tag.attr_name)
+ count = len(all_tags[tag])
titles.append('%d %s' % (count, tag.name))
if count == 0:
counts.append("-")
diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py
index e99cf214..7fdceab3 100644
--- a/patchwork/tests/test_parser.py
+++ b/patchwork/tests/test_parser.py
@@ -802,12 +802,9 @@ class ParseInitialTagsTest(PatchTest):
def test_tags(self):
self.assertEqual(Patch.objects.count(), 1)
patch = Patch.objects.all()[0]
- self.assertEqual(patch.patchtag_set.filter(
- tag__name='Acked-by').count(), 0)
- self.assertEqual(patch.patchtag_set.get(
- tag__name='Reviewed-by').count, 1)
- self.assertEqual(patch.patchtag_set.get(
- tag__name='Tested-by').count, 1)
+ self.assertEqual(patch.tags.filter(name='Acked-by').count(), 0)
+ self.assertEqual(patch.tags.filter(name='Reviewed-by').count(), 1)
+ self.assertEqual(patch.tags.filter(name='Tested-by').count(), 1)
class ParseCommentTagsTest(PatchTest):
@@ -830,12 +827,9 @@ class ParseCommentTagsTest(PatchTest):
def test_tags(self):
self.assertEqual(Patch.objects.count(), 1)
patch = Patch.objects.all()[0]
- self.assertEqual(patch.patchtag_set.filter(
- tag__name='Acked-by').count(), 0)
- self.assertEqual(patch.patchtag_set.get(
- tag__name='Reviewed-by').count, 1)
- self.assertEqual(patch.patchtag_set.get(
- tag__name='Tested-by').count, 1)
+ self.assertEqual(patch.tags.filter(name='Acked-by').count(), 0)
+ self.assertEqual(patch.tags.filter(name='Reviewed-by').count(), 1)
+ self.assertEqual(patch.tags.filter(name='Tested-by').count(), 1)
class SubjectTest(TestCase):
diff --git a/patchwork/tests/test_tags.py b/patchwork/tests/test_tags.py
index 4fd1bf23..f7a35f92 100644
--- a/patchwork/tests/test_tags.py
+++ b/patchwork/tests/test_tags.py
@@ -21,7 +21,7 @@ from django.test import TestCase
from django.test import TransactionTestCase
from patchwork.models import Patch
-from patchwork.models import PatchTag
+from patchwork.models import SubmissionTag
from patchwork.models import Tag
from patchwork.tests.utils import create_comment
from patchwork.tests.utils import create_patch
@@ -34,11 +34,14 @@ class ExtractTagsTest(TestCase):
name_email = 'test name <' + email + '>'
def assertTagsEqual(self, str, acks, reviews, tests): # noqa
- counts = Patch.extract_tags(str, Tag.objects.all())
- self.assertEqual((acks, reviews, tests),
- (counts[Tag.objects.get(name='Acked-by')],
- counts[Tag.objects.get(name='Reviewed-by')],
- counts[Tag.objects.get(name='Tested-by')]))
+ patch = create_patch(content=str)
+ extracted = patch._extract_tags(Tag.objects.all())
+ self.assertEqual(
+ (acks, reviews, tests),
+ (len(extracted.get(Tag.objects.get(name='Acked-by'), [])),
+ len(extracted.get(Tag.objects.get(name='Reviewed-by'), [])),
+ len(extracted.get(Tag.objects.get(name='Tested-by'), [])))
+ )
def test_empty(self):
self.assertTagsEqual('', 0, 0, 0)
@@ -80,7 +83,7 @@ class ExtractTagsTest(TestCase):
self.assertTagsEqual('> Acked-by: %s\n' % self.name_email, 0, 0, 0)
-class PatchTagsTest(TransactionTestCase):
+class SubmissionTagsTest(TransactionTestCase):
fixtures = ['default_tags']
ACK = 1
@@ -95,16 +98,14 @@ class PatchTagsTest(TransactionTestCase):
def assertTagsEqual(self, patch, acks, reviews, tests): # noqa
patch = Patch.objects.get(pk=patch.pk)
- def count(name):
- try:
- return patch.patchtag_set.get(tag__name=name).count
- except PatchTag.DoesNotExist:
- return 0
+ def count(submission, name):
+ return SubmissionTag.objects.filter(submission=submission,
+ tag__name=name).count()
counts = (
- count(name='Acked-by'),
- count(name='Reviewed-by'),
- count(name='Tested-by'),
+ count(patch, name='Acked-by'),
+ count(patch, name='Reviewed-by'),
+ count(patch, name='Tested-by'),
)
self.assertEqual(counts, (acks, reviews, tests))
@@ -118,7 +119,12 @@ class PatchTagsTest(TransactionTestCase):
if tagtype not in tags:
return ''
- return '%s-by: Test Tagger <tagger at example.com>\n' % tags[tagtype]
+ index = SubmissionTag.objects.filter(
+ tag__name=tags[tagtype] + '-by'
+ ).count()
+ return '%s-by: Test Taggeri%d <tagger at example.com>\n' % (
+ tags[tagtype], index + 1
+ )
def create_tag_comment(self, patch, tagtype=None):
comment = create_comment(
@@ -179,29 +185,3 @@ class PatchTagsTest(TransactionTestCase):
c1.content += self.create_tag(self.REVIEW)
c1.save()
self.assertTagsEqual(self.patch, 1, 1, 0)
-
-
-class PatchTagManagerTest(PatchTagsTest):
-
- def assertTagsEqual(self, patch, acks, reviews, tests): # noqa
- tagattrs = {}
- for tag in Tag.objects.all():
- tagattrs[tag.name] = tag.attr_name
-
- # force project.tags to be queried outside of the assertNumQueries
- patch.project.tags
-
- # we should be able to do this with two queries: one for
- # the patch table lookup, and the prefetch_related for the
- # projects table.
- with self.assertNumQueries(2):
- patch = Patch.objects.with_tag_counts(project=patch.project) \
- .get(pk=patch.pk)
-
- counts = (
- getattr(patch, tagattrs['Acked-by']),
- getattr(patch, tagattrs['Reviewed-by']),
- getattr(patch, tagattrs['Tested-by']),
- )
-
- self.assertEqual(counts, (acks, reviews, tests))
diff --git a/patchwork/views/__init__.py b/patchwork/views/__init__.py
index 5942ded8..3ff4345c 100644
--- a/patchwork/views/__init__.py
+++ b/patchwork/views/__init__.py
@@ -274,9 +274,6 @@ def generic_list(request, project, view, view_args=None, filter_settings=None,
if patches is None:
patches = Patch.objects.filter(patch_project=project)
- # annotate with tag counts
- patches = patches.with_tag_counts(project)
-
patches = context['filters'].apply(patches)
if not editable_order:
patches = order.apply(patches)
diff --git a/releasenotes/notes/tagging-rework-9907e9dc3f835566.yaml b/releasenotes/notes/tagging-rework-9907e9dc3f835566.yaml
new file mode 100644
index 00000000..8a525532
--- /dev/null
+++ b/releasenotes/notes/tagging-rework-9907e9dc3f835566.yaml
@@ -0,0 +1,15 @@
+---
+features:
+ - |
+ Tagging is completely reworked. Instead of counts, real values are
+ extracted. This fixes wrong counts when for example the same person
+ accidentally sent the Acked-by email twice, since only a single same pair
+ tagname-value can be assigned to a patch. Tags from cover letters are now
+ counted towards each patch in the series.
+upgrade:
+ - |
+ The ``retag`` command (``python manage.py retag``) needs to be ran after
+ the upgrade. The migration only takes care of the database structure, while
+ the actual tag data will be created by the command, to make the migration
+ itself faster. Please note that this will take a lot of time and based on
+ the size of the data in question, might be useful to run in batches.
--
2.17.1
More information about the Patchwork
mailing list