[PATCH 4/6] Begin live migration of diff and pull_url to Submission

Daniel Axtens dja at axtens.net
Thu Sep 26 23:19:07 AEST 2019


Daniel Axtens <dja at axtens.net> writes:

> This patch contains 2 migrations:
>
> 0037_prepare_old_diff_pull_url:
>  - renames Patch's 'diff' to 'old_diff', while not affecting the db
>  - likewise 'pull_url' to 'old_pull_url'
> This does no SQL changes, which you can verify with
> python manage.py sqlmigrate patchwork 0037
>
> 0038_submission_new_diff_pull_url:
>  - creates a diff column in Submission, accessible via new_diff
>  - likewise pull_url as new_pull_url
>
> livemigrate-v22-v23: (the version numbers are a bit arbitrary)
>  - copies data from old_{pull_url,diff} to new_{pull_url,diff}
>    as required. Can be run on a _running instance_ of patchwork.
>
> The rest of the code provides a bunch of pretty ugly glue code.
> It assumes both migrations have been run, and then allows pw to:
>  - read old data from old_{pull_url,diff}
>  - store new data in both the old and new fields

It turns out this is way, way, overcomplicating things. I think I had
twisted myself in knots by not properly splitting out schema migration
and data migration in my mental model of things. Anyway, we can do this
much more simply:

 - don't rename the field in Patch
 - add a new field in Submission called new_blah
 - make save() copy the data into Submission
 - then the next patch drops the fields in Patch and renames the fields
   in Submission.

I figured this out trying to extend my approach to migrating series and
number. Once I get that sorted out, I'll send new patches out.

Kind regards,
Daniel

>
> The idea is that you should be able to:
>  - bring down pw
>  - apply this
>  - do two fast migrations (one with no SQL, one that adds two columns)
>  - restart patchwork, restart ingesting mail
>  - do the live migration over time
>  - proceed to the next patch
>
> By storing the data in duplicate, the hope is that you can
> back out the new code at any point and still have all of your data.
>
> Signed-off-by: Daniel Axtens <dja at axtens.net>
> ---
>  patchwork/api/patch.py                        | 10 +-
>  .../commands/livemigrate-v22-v23.py           | 35 +++++++
>  .../0037_prepare_old_diff_pull_url.py         | 35 +++++++
>  .../0038_submission_new_diff_pull_url.py      | 25 +++++
>  patchwork/models.py                           | 93 ++++++++++++++++++-
>  patchwork/views/xmlrpc.py                     |  5 +-
>  6 files changed, 197 insertions(+), 6 deletions(-)
>  create mode 100644 patchwork/management/commands/livemigrate-v22-v23.py
>  create mode 100644 patchwork/migrations/0037_prepare_old_diff_pull_url.py
>  create mode 100644 patchwork/migrations/0038_submission_new_diff_pull_url.py
>
> diff --git a/patchwork/api/patch.py b/patchwork/api/patch.py
> index c9360308a56a..3866f92fe3dc 100644
> --- a/patchwork/api/patch.py
> +++ b/patchwork/api/patch.py
> @@ -181,11 +181,15 @@ class PatchList(ListAPIView):
>      ordering = 'id'
>  
>      def get_queryset(self):
> -        return Patch.objects.all()\
> +        qs = Patch.objects.all()\
>              .prefetch_related('check_set')\
>              .select_related('project', 'state', 'submitter', 'delegate',
> -                            'series')\
> -            .defer('content', 'diff', 'headers')
> +                            'series')
> +        if hasattr(Patch, 'old_diff'):
> +            qs.defer('content', 'old_diff', 'headers')
> +        else:
> +            qs.defer('content', 'submission_ptr__new_diff', 'headers')
> +        return qs
>  
>  
>  class PatchDetail(RetrieveUpdateAPIView):
> diff --git a/patchwork/management/commands/livemigrate-v22-v23.py b/patchwork/management/commands/livemigrate-v22-v23.py
> new file mode 100644
> index 000000000000..1079fb2f3997
> --- /dev/null
> +++ b/patchwork/management/commands/livemigrate-v22-v23.py
> @@ -0,0 +1,35 @@
> +# Patchwork - automated patch tracking system
> +# Copyright (C) 2019 IBM Corporation
> +#  Author: Daniel Axtens <dja at axtens.net>
> +#
> +# SPDX-License-Identifier: GPL-2.0-or-later
> +
> +from django.core.management.base import BaseCommand
> +from django.db.models import Q
> +
> +from patchwork.models import Patch
> +from patchwork.models import Submission
> +
> +
> +class Command(BaseCommand):
> +    help = 'Do the live migration for flattening patches'
> +
> +    def handle(self, *args, **options):
> +        if not hasattr(Submission, 'new_diff'):
> +            print("Submission model is missing 'new_diff'. Have you applied the migration to add it?")
> +            return
> +
> +        diffs = Q(old_diff__isnull=False, submission_ptr__new_diff__isnull=True)
> +        pull_urls = Q(old_pull_url__isnull=False, submission_ptr__new_pull_url__isnull=True)
> +        query = Patch.objects.filter(diffs | pull_urls)
> +
> +        count = query.count()
> +
> +        for i, patch in enumerate(query.iterator()):
> +            patch.new_diff = patch.old_diff
> +            patch.new_pull_url = patch.old_pull_url
> +            patch.save()
> +            if (i % 10) == 0:
> +                self.stdout.write('%06d/%06d\r' % (i, count), ending='')
> +                self.stdout.flush()
> +        self.stdout.write('\ndone')
> diff --git a/patchwork/migrations/0037_prepare_old_diff_pull_url.py b/patchwork/migrations/0037_prepare_old_diff_pull_url.py
> new file mode 100644
> index 000000000000..83fd1beaac9c
> --- /dev/null
> +++ b/patchwork/migrations/0037_prepare_old_diff_pull_url.py
> @@ -0,0 +1,35 @@
> +# -*- coding: utf-8 -*-
> +# Generated by Django 1.11.24 on 2019-09-18 01:49
> +from __future__ import unicode_literals
> +
> +from django.db import migrations, models
> +
> +
> +class Migration(migrations.Migration):
> +
> +    dependencies = [
> +        ('patchwork', '0036_project_commit_url_format'),
> +    ]
> +
> +    operations = [
> +        migrations.AlterField(
> +            model_name='patch',
> +            name='diff',
> +            field=models.TextField(blank=True, db_column='diff', null=True),
> +        ),
> +        migrations.RenameField(
> +            model_name='patch',
> +            old_name='diff',
> +            new_name='old_diff',
> +        ),
> +        migrations.AlterField(
> +            model_name='patch',
> +            name='pull_url',
> +            field=models.CharField(blank=True, db_column='pull_url', max_length=255, null=True),
> +        ),
> +        migrations.RenameField(
> +            model_name='patch',
> +            old_name='pull_url',
> +            new_name='old_pull_url',
> +        ),
> +    ]
> diff --git a/patchwork/migrations/0038_submission_new_diff_pull_url.py b/patchwork/migrations/0038_submission_new_diff_pull_url.py
> new file mode 100644
> index 000000000000..094854d863cb
> --- /dev/null
> +++ b/patchwork/migrations/0038_submission_new_diff_pull_url.py
> @@ -0,0 +1,25 @@
> +# -*- coding: utf-8 -*-
> +# Generated by Django 1.11.24 on 2019-09-18 01:54
> +from __future__ import unicode_literals
> +
> +from django.db import migrations, models
> +
> +
> +class Migration(migrations.Migration):
> +
> +    dependencies = [
> +        ('patchwork', '0037_prepare_old_diff_pull_url'),
> +    ]
> +
> +    operations = [
> +        migrations.AddField(
> +            model_name='submission',
> +            name='new_diff',
> +            field=models.TextField(blank=True, db_column='diff', null=True),
> +        ),
> +        migrations.AddField(
> +            model_name='submission',
> +            name='new_pull_url',
> +            field=models.CharField(blank=True, db_column='pull_url', max_length=255, null=True),
> +        ),
> +    ]
> diff --git a/patchwork/models.py b/patchwork/models.py
> index 32d1b3c2adc5..31f459e1a126 100644
> --- a/patchwork/models.py
> +++ b/patchwork/models.py
> @@ -367,6 +367,11 @@ class Submission(FilenameMixin, EmailMixin, models.Model):
>  
>      name = models.CharField(max_length=255)
>  
> +    # patch
> +    new_diff = models.TextField(null=True, blank=True, db_column='diff')
> +    new_pull_url = models.CharField(max_length=255, null=True, blank=True, db_column='pull_url')
> +
> +
>      @property
>      def list_archive_url(self):
>          if not self.project.list_archive_url_format:
> @@ -410,9 +415,9 @@ class CoverLetter(Submission):
>  class Patch(Submission):
>      # patch metadata
>  
> -    diff = models.TextField(null=True, blank=True)
> +    old_diff = models.TextField(null=True, blank=True, db_column='diff')
>      commit_ref = models.CharField(max_length=255, null=True, blank=True)
> -    pull_url = models.CharField(max_length=255, null=True, blank=True)
> +    old_pull_url = models.CharField(max_length=255, null=True, blank=True, db_column='pull_url')
>      tags = models.ManyToManyField(Tag, through=PatchTag)
>  
>      # patchwork metadata
> @@ -479,6 +484,20 @@ class Patch(Submission):
>  
>          super(Patch, self).save(**kwargs)
>  
> +        # we don't have to worry about duplicating to the
> +        # old fields: the setters do this for us.
> +        if hasattr(self, '_diff'):
> +            if hasattr(self.submission_ptr, 'new_diff'):
> +                self.new_diff = self._diff
> +                self.submission_ptr.save()
> +            del self._diff
> +
> +        if hasattr(self, '_pull_url'):
> +            if hasattr(self.submission_ptr, 'new_pull_url'):
> +                self.new_pull_url = self._diff
> +                self.submission_ptr.save()
> +            del self._pull_url
> +
>          self.refresh_tag_counts()
>  
>      def is_editable(self, user):
> @@ -602,6 +621,76 @@ class Patch(Submission):
>          ]
>  
>  
> +    # transition goop
> +    @property
> +    def diff(self):
> +        # three options:
> +        # it's sitting around in our cached storage
> +        # new field is not null
> +        # old has field
> +        # no point in null checking old
> +        if hasattr(self, '_diff'):
> +            return self._diff
> +        try:
> +            if self.submission_ptr.new_diff is not None:
> +                return self.submission_ptr.new_diff
> +        except:
> +            pass
> +        if hasattr(self, 'old_diff'):
> +            return self.old_diff
> +        return None
> +
> +    @diff.setter
> +    def diff(self, value):
> +        try:
> +            self.submission_ptr.diff = value
> +        except:
> +            self._diff = value
> +        if hasattr(self, 'old_diff'):
> +            self.old_diff = value
> +
> +    @property
> +    def pull_url(self):
> +        if hasattr(self, '_pull_url'):
> +            return self._pull_url
> +        try:
> +            if self.submission_ptr.new_pull_url is not None:
> +                return self.submission_ptr.new_pull_url
> +        except:
> +            pass
> +        if hasattr(self, 'old_pull_url'):
> +            return self.old_pull_url
> +        return None
> +
> +    @pull_url.setter
> +    def pull_url(self, value):
> +        try:
> +            self.submission_ptr.pull_url = value
> +        except:
> +            self._diff = value
> +        if hasattr(self, 'old_pull_url'):
> +            self.old_pull_url = value
> +
> +
> +    def __init__(self, *args, **kwargs):
> +        diff = None
> +        if 'diff' in kwargs:
> +            diff = kwargs['diff']
> +            del kwargs['diff']
> +
> +        pull_url = None
> +        if 'pull_url' in kwargs:
> +            pull_url = kwargs['pull_url']
> +            del kwargs['pull_url']
> +
> +        super(Patch, self).__init__(*args, **kwargs)
> +
> +        if diff:
> +            self.diff = diff
> +        if pull_url:
> +            self.pull_url = pull_url
> +
> +
>  class Comment(EmailMixin, models.Model):
>      # parent
>  
> diff --git a/patchwork/views/xmlrpc.py b/patchwork/views/xmlrpc.py
> index f60725044ebe..a2e3d8985dd7 100644
> --- a/patchwork/views/xmlrpc.py
> +++ b/patchwork/views/xmlrpc.py
> @@ -569,7 +569,10 @@ def patch_list(filt=None):
>      # Only extract the relevant fields. This saves a big db load as we
>      # no longer fetch content/headers/etc for potentially every patch
>      # in a project.
> -    patches = patches.defer('content', 'headers', 'diff')
> +    if hasattr(Patch, 'old_diff'):
> +        patches = patches.defer('content', 'headers', 'old_diff')
> +    else:
> +        patches = patches.defer('content', 'headers', 'submission_ptr__new_diff')
>  
>      return _get_objects(patch_to_dict, patches, max_count)
>  
> -- 
> 2.20.1


More information about the Patchwork mailing list