[PATCH v4 1/3] models: Use database constraints to prevent split Series
Daniel Axtens
dja at axtens.net
Wed Dec 4 17:05:22 AEDT 2019
I only see patch 1 of apparently 3 patches?
I'm not super keen on the whole retry logic, and I think I have a way to
do this that doesn't require that - give me a few days to try and
polish it up enough to send out.
Regards,
Daniel
Stephen Finucane <stephen at that.guru> writes:
> Currently, the 'SeriesReference' object has a unique constraint on the
> two fields it has, 'series', which is a foreign key to 'Series', and
> 'msgid'. This is the wrong constraint. What we actually want to enforce
> is that a patch, cover letter or comment is referenced by a single
> series, or rather a single series per project the submission appears on.
> As such, we should be enforcing uniqueness on the msgid and the project
> that the patch, cover letter or comment belongs to.
>
> This requires adding a new field to the object, 'project', since it's
> not possible to do something like the following:
>
> unique_together = [('msgid', 'series__project')]
>
> This is detailed here [1]. In addition, the migration needs a precursor
> migration step to merge any broken series.
>
> [1] https://stackoverflow.com/a/4440189/613428
>
> Signed-off-by: Stephen Finucane <stephen at that.guru>
> Closes: #241
> Cc: Daniel Axtens <dja at axtens.net>
> Cc: Petr Vorel <petr.vorel at gmail.com>
> ---
> v4:
> - Further tweaks to logging
> v3:
> - Improve logging
> ---
> .../0038_unique_series_references.py | 121 +++++++++++++++++
> patchwork/models.py | 6 +-
> patchwork/parser.py | 123 +++++++++---------
> patchwork/tests/test_parser.py | 9 +-
> patchwork/tests/utils.py | 6 +-
> 5 files changed, 199 insertions(+), 66 deletions(-)
> create mode 100644 patchwork/migrations/0038_unique_series_references.py
>
> diff --git a/patchwork/migrations/0038_unique_series_references.py b/patchwork/migrations/0038_unique_series_references.py
> new file mode 100644
> index 00000000..91799020
> --- /dev/null
> +++ b/patchwork/migrations/0038_unique_series_references.py
> @@ -0,0 +1,121 @@
> +from django.db import connection, migrations, models
> +from django.db.models import Count
> +import django.db.models.deletion
> +
> +
> +def merge_duplicate_series(apps, schema_editor):
> + SeriesReference = apps.get_model('patchwork', 'SeriesReference')
> + Patch = apps.get_model('patchwork', 'Patch')
> +
> + msgid_seriesrefs = {}
> +
> + # find all SeriesReference that share a msgid but point to different series
> + # and decide which of the series is going to be the authoritative one
> + msgid_counts = (
> + SeriesReference.objects.values('msgid')
> + .annotate(count=Count('msgid'))
> + .filter(count__gt=1)
> + )
> + for msgid_count in msgid_counts:
> + msgid = msgid_count['msgid']
> + chosen_ref = None
> + for series_ref in SeriesReference.objects.filter(msgid=msgid):
> + if series_ref.series.cover_letter:
> + if chosen_ref:
> + # I don't think this can happen, but explode if it does
> + raise Exception(
> + "Looks like you've got two or more series that share "
> + "some patches but do not share a cover letter. Unable "
> + "to auto-resolve."
> + )
> +
> + # if a series has a cover letter, that's the one we'll group
> + # everything under
> + chosen_ref = series_ref
> +
> + if not chosen_ref:
> + # if none of the series have cover letters, simply use the last
> + # one (hint: this relies on Python's weird scoping for for loops
> + # where 'series_ref' is still accessible outside the loop)
> + chosen_ref = series_ref
> +
> + msgid_seriesrefs[msgid] = chosen_ref
> +
> + # reassign any patches referring to non-authoritative series to point to
> + # the authoritative one, and delete the other series; we do this separately
> + # to allow us a chance to raise the exception above if necessary
> + for msgid, chosen_ref in msgid_seriesrefs.items():
> + for series_ref in SeriesReference.objects.filter(msgid=msgid):
> + if series_ref == chosen_ref:
> + continue
> +
> + # update the patches to point to our chosen series instead, on the
> + # assumption that all other metadata is correct
> + for patch in Patch.objects.filter(series=series_ref.series):
> + patch.series = chosen_ref.series
> + patch.save()
> +
> + # delete the other series (which will delete the series ref)
> + series_ref.series.delete()
> +
> +
> +def copy_project_field(apps, schema_editor):
> + if connection.vendor == 'postgresql':
> + schema_editor.execute(
> + """
> + UPDATE patchwork_seriesreference
> + SET project_id = patchwork_series.project_id
> + FROM patchwork_series
> + WHERE patchwork_seriesreference.series_id = patchwork_series.id
> + """
> + )
> + elif connection.vendor == 'mysql':
> + schema_editor.execute(
> + """
> + UPDATE patchwork_seriesreference, patchwork_series
> + SET patchwork_seriesreference.project_id = patchwork_series.project_id
> + WHERE patchwork_seriesreference.series_id = patchwork_series.id
> + """
> + ) # noqa
> + else:
> + SeriesReference = apps.get_model('patchwork', 'SeriesReference')
> +
> + for series_ref in SeriesReference.objects.all().select_related(
> + 'series'
> + ):
> + series_ref.project = series_ref.series.project
> + series_ref.save()
> +
> +
> +class Migration(migrations.Migration):
> +
> + dependencies = [('patchwork', '0037_event_actor')]
> +
> + operations = [
> + migrations.RunPython(
> + merge_duplicate_series, migrations.RunPython.noop, atomic=False
> + ),
> + migrations.AddField(
> + model_name='seriesreference',
> + name='project',
> + field=models.ForeignKey(
> + null=True,
> + on_delete=django.db.models.deletion.CASCADE,
> + to='patchwork.Project',
> + ),
> + ),
> + migrations.AlterUniqueTogether(
> + name='seriesreference', unique_together={('project', 'msgid')}
> + ),
> + migrations.RunPython(
> + copy_project_field, migrations.RunPython.noop, atomic=False
> + ),
> + migrations.AlterField(
> + model_name='seriesreference',
> + name='project',
> + field=models.ForeignKey(
> + on_delete=django.db.models.deletion.CASCADE,
> + to='patchwork.Project',
> + ),
> + ),
> + ]
> diff --git a/patchwork/models.py b/patchwork/models.py
> index 7f0efd48..dbaff024 100644
> --- a/patchwork/models.py
> +++ b/patchwork/models.py
> @@ -79,7 +79,8 @@ class Project(models.Model):
> webscm_url = models.CharField(max_length=2000, blank=True)
> list_archive_url = models.CharField(max_length=2000, blank=True)
> list_archive_url_format = models.CharField(
> - max_length=2000, blank=True,
> + max_length=2000,
> + blank=True,
> help_text="URL format for the list archive's Message-ID redirector. "
> "{} will be replaced by the Message-ID.")
> commit_url_format = models.CharField(
> @@ -787,6 +788,7 @@ class SeriesReference(models.Model):
> required to handle the case whereby one or more patches are
> received before the cover letter.
> """
> + project = models.ForeignKey(Project, on_delete=models.CASCADE)
> series = models.ForeignKey(Series, related_name='references',
> related_query_name='reference',
> on_delete=models.CASCADE)
> @@ -796,7 +798,7 @@ class SeriesReference(models.Model):
> return self.msgid
>
> class Meta:
> - unique_together = [('series', 'msgid')]
> + unique_together = [('project', 'msgid')]
>
>
> class Bundle(models.Model):
> diff --git a/patchwork/parser.py b/patchwork/parser.py
> index c424729b..c0084cc0 100644
> --- a/patchwork/parser.py
> +++ b/patchwork/parser.py
> @@ -16,6 +16,7 @@ import re
>
> from django.contrib.auth.models import User
> from django.db.utils import IntegrityError
> +from django.db import transaction
> from django.utils import six
>
> from patchwork.models import Comment
> @@ -256,16 +257,9 @@ def _find_series_by_references(project, mail):
> for ref in refs:
> try:
> return SeriesReference.objects.get(
> - msgid=ref[:255], series__project=project).series
> + msgid=ref[:255], project=project).series
> except SeriesReference.DoesNotExist:
> continue
> - except SeriesReference.MultipleObjectsReturned:
> - # FIXME: Open bug: this can happen when we're processing
> - # messages in parallel. Pick the first and log.
> - logger.error("Multiple SeriesReferences for %s in project %s!" %
> - (ref[:255], project.name))
> - return SeriesReference.objects.filter(
> - msgid=ref[:255], series__project=project).first().series
>
>
> def _find_series_by_markers(project, mail, author):
> @@ -1092,47 +1086,65 @@ def parse_mail(mail, list_id=None):
> except IntegrityError:
> raise DuplicateMailError(msgid=msgid)
>
> - # if we don't have a series marker, we will never have an existing
> - # series to match against.
> - series = None
> - if n:
> - series = find_series(project, mail, author)
> + for attempt in range(1, 11): # arbitrary retry count
> + try:
> + with transaction.atomic():
> + # if we don't have a series marker, we will never have an
> + # existing series to match against.
> + series = None
> + if n:
> + series = find_series(project, mail, author)
> + else:
> + x = n = 1
> +
> + # We will create a new series if:
> + # - there is no existing series to assign this patch to, or
> + # - there is an existing series, but it already has a patch
> + # with this number in it
> + if not series or Patch.objects.filter(
> + series=series, number=x).count():
> + series = Series.objects.create(
> + project=project,
> + date=date,
> + submitter=author,
> + version=version,
> + total=n)
> +
> + # NOTE(stephenfin) We must save references for series.
> + # We do this to handle the case where a later patch is
> + # received first. Without storing references, it would
> + # not be possible to identify the relationship between
> + # patches as the earlier patch does not reference the
> + # later one.
> + for ref in refs + [msgid]:
> + ref = ref[:255]
> + # we don't want duplicates
> + try:
> + # we could have a ref to a previous series.
> + # (For example, a series sent in reply to
> + # another series.) That should not create a
> + # series ref for this series, so check for the
> + # msg-id only, not the msg-id/series pair.
> + SeriesReference.objects.get(
> + msgid=ref, project=project)
> + except SeriesReference.DoesNotExist:
> + SeriesReference.objects.create(
> + msgid=ref, project=project, series=series)
> + break
> + except IntegrityError:
> + # we lost the race so go again
> + logger.warning('Conflict while saving series. This is '
> + 'probably because multiple patches belonging '
> + 'to the same series have been received at '
> + 'once. Trying again (attempt %02d/10)',
> + attempt)
> else:
> - x = n = 1
> -
> - # We will create a new series if:
> - # - there is no existing series to assign this patch to, or
> - # - there is an existing series, but it already has a patch with this
> - # number in it
> - if not series or Patch.objects.filter(series=series, number=x).count():
> - series = Series.objects.create(
> - project=project,
> - date=date,
> - submitter=author,
> - version=version,
> - total=n)
> -
> - # NOTE(stephenfin) We must save references for series. We
> - # do this to handle the case where a later patch is
> - # received first. Without storing references, it would not
> - # be possible to identify the relationship between patches
> - # as the earlier patch does not reference the later one.
> - for ref in refs + [msgid]:
> - ref = ref[:255]
> - # we don't want duplicates
> - try:
> - # we could have a ref to a previous series. (For
> - # example, a series sent in reply to another
> - # series.) That should not create a series ref
> - # for this series, so check for the msg-id only,
> - # not the msg-id/series pair.
> - SeriesReference.objects.get(msgid=ref,
> - series__project=project)
> - except SeriesReference.DoesNotExist:
> - SeriesReference.objects.create(series=series, msgid=ref)
> - except SeriesReference.MultipleObjectsReturned:
> - logger.error("Multiple SeriesReferences for %s"
> - " in project %s!" % (ref, project.name))
> + # we failed to save the series so return the series-less patch
> + logger.warning('Failed to save series. Your patch with message ID '
> + '%s has been saved but this should not happen. '
> + 'Please report this as a bug and include logs',
> + msgid)
> + return patch
>
> # add to a series if we have found one, and we have a numbered
> # patch. Don't add unnumbered patches (for example diffs sent
> @@ -1170,14 +1182,9 @@ def parse_mail(mail, list_id=None):
> # message
> try:
> series = SeriesReference.objects.get(
> - msgid=msgid, series__project=project).series
> + msgid=msgid, project=project).series
> except SeriesReference.DoesNotExist:
> series = None
> - except SeriesReference.MultipleObjectsReturned:
> - logger.error("Multiple SeriesReferences for %s"
> - " in project %s!" % (msgid, project.name))
> - series = SeriesReference.objects.filter(
> - msgid=msgid, series__project=project).first().series
>
> if not series:
> series = Series.objects.create(
> @@ -1190,12 +1197,8 @@ def parse_mail(mail, list_id=None):
> # we don't save the in-reply-to or references fields
> # for a cover letter, as they can't refer to the same
> # series
> - try:
> - SeriesReference.objects.get_or_create(series=series,
> - msgid=msgid)
> - except SeriesReference.MultipleObjectsReturned:
> - logger.error("Multiple SeriesReferences for %s"
> - " in project %s!" % (msgid, project.name))
> + SeriesReference.objects.create(
> + msgid=msgid, project=project, series=series)
>
> try:
> cover_letter = CoverLetter.objects.create(
> diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py
> index 0bf71585..0edbd87a 100644
> --- a/patchwork/tests/test_parser.py
> +++ b/patchwork/tests/test_parser.py
> @@ -421,17 +421,20 @@ class SeriesCorrelationTest(TestCase):
> msgids = [make_msgid()]
> project = create_project()
> series_v1 = create_series(project=project)
> - create_series_reference(msgid=msgids[0], series=series_v1)
> + create_series_reference(msgid=msgids[0], series=series_v1,
> + project=project)
>
> # ...and three patches
> for i in range(3):
> msgids.append(make_msgid())
> - create_series_reference(msgid=msgids[-1], series=series_v1)
> + create_series_reference(msgid=msgids[-1], series=series_v1,
> + project=project)
>
> # now create a new series with "cover letter"
> msgids.append(make_msgid())
> series_v2 = create_series(project=project)
> - ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2)
> + ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2,
> + project=project)
>
> # ...and the "first patch" of this new series
> msgid = make_msgid()
> diff --git a/patchwork/tests/utils.py b/patchwork/tests/utils.py
> index 577183d0..4ead1781 100644
> --- a/patchwork/tests/utils.py
> +++ b/patchwork/tests/utils.py
> @@ -282,8 +282,12 @@ def create_series(**kwargs):
>
> def create_series_reference(**kwargs):
> """Create 'SeriesReference' object."""
> + project = kwargs.pop('project', create_project())
> + series = kwargs.pop('series', create_series(project=project))
> +
> values = {
> - 'series': create_series() if 'series' not in kwargs else None,
> + 'series': series,
> + 'project': project,
> 'msgid': make_msgid(),
> }
> values.update(**kwargs)
> --
> 2.23.0
More information about the Patchwork
mailing list