[PATCH 4/6] Begin live migration of diff and pull_url to Submission
Daniel Axtens
dja at axtens.net
Thu Sep 26 23:19:07 AEST 2019
Daniel Axtens <dja at axtens.net> writes:
> This patch contains 2 migrations:
>
> 0037_prepare_old_diff_pull_url:
> - renames Patch's 'diff' to 'old_diff', while not affecting the db
> - likewise 'pull_url' to 'old_pull_url'
> This does no SQL changes, which you can verify with
> python manage.py sqlmigrate patchwork 0037
>
> 0038_submission_new_diff_pull_url:
> - creates a diff column in Submission, accessible via new_diff
> - likewise pull_url as new_pull_url
>
> livemigrate-v22-v23: (the version numbers are a bit arbitrary)
> - copies data from old_{pull_url,diff} to new_{pull_url,diff}
> as required. Can be run on a _running instance_ of patchwork.
>
> The rest of the code provides a bunch of pretty ugly glue code.
> It assumes both migrations have been run, and then allows pw to:
> - read old data from old_{pull_url,diff}
> - store new data in both the old and new fields
It turns out this is way, way, overcomplicating things. I think I had
twisted myself in knots by not properly splitting out schema migration
and data migration in my mental model of things. Anyway, we can do this
much more simply:
- don't rename the field in Patch
- add a new field in Submission called new_blah
- make save() copy the data into Submission
- then the next patch drops the fields in Patch and renames the fields
in Submission.
I figured this out trying to extend my approach to migrating series and
number. Once I get that sorted out, I'll send new patches out.
Kind regards,
Daniel
>
> The idea is that you should be able to:
> - bring down pw
> - apply this
> - do two fast migrations (one with no SQL, one that adds two columns)
> - restart patchwork, restart ingesting mail
> - do the live migration over time
> - proceed to the next patch
>
> By storing the data in duplicate, the hope is that you can
> back out the new code at any point and still have all of your data.
>
> Signed-off-by: Daniel Axtens <dja at axtens.net>
> ---
> patchwork/api/patch.py | 10 +-
> .../commands/livemigrate-v22-v23.py | 35 +++++++
> .../0037_prepare_old_diff_pull_url.py | 35 +++++++
> .../0038_submission_new_diff_pull_url.py | 25 +++++
> patchwork/models.py | 93 ++++++++++++++++++-
> patchwork/views/xmlrpc.py | 5 +-
> 6 files changed, 197 insertions(+), 6 deletions(-)
> create mode 100644 patchwork/management/commands/livemigrate-v22-v23.py
> create mode 100644 patchwork/migrations/0037_prepare_old_diff_pull_url.py
> create mode 100644 patchwork/migrations/0038_submission_new_diff_pull_url.py
>
> diff --git a/patchwork/api/patch.py b/patchwork/api/patch.py
> index c9360308a56a..3866f92fe3dc 100644
> --- a/patchwork/api/patch.py
> +++ b/patchwork/api/patch.py
> @@ -181,11 +181,15 @@ class PatchList(ListAPIView):
> ordering = 'id'
>
> def get_queryset(self):
> - return Patch.objects.all()\
> + qs = Patch.objects.all()\
> .prefetch_related('check_set')\
> .select_related('project', 'state', 'submitter', 'delegate',
> - 'series')\
> - .defer('content', 'diff', 'headers')
> + 'series')
> + if hasattr(Patch, 'old_diff'):
> + qs.defer('content', 'old_diff', 'headers')
> + else:
> + qs.defer('content', 'submission_ptr__new_diff', 'headers')
> + return qs
>
>
> class PatchDetail(RetrieveUpdateAPIView):
> diff --git a/patchwork/management/commands/livemigrate-v22-v23.py b/patchwork/management/commands/livemigrate-v22-v23.py
> new file mode 100644
> index 000000000000..1079fb2f3997
> --- /dev/null
> +++ b/patchwork/management/commands/livemigrate-v22-v23.py
> @@ -0,0 +1,35 @@
> +# Patchwork - automated patch tracking system
> +# Copyright (C) 2019 IBM Corporation
> +# Author: Daniel Axtens <dja at axtens.net>
> +#
> +# SPDX-License-Identifier: GPL-2.0-or-later
> +
> +from django.core.management.base import BaseCommand
> +from django.db.models import Q
> +
> +from patchwork.models import Patch
> +from patchwork.models import Submission
> +
> +
> +class Command(BaseCommand):
> + help = 'Do the live migration for flattening patches'
> +
> + def handle(self, *args, **options):
> + if not hasattr(Submission, 'new_diff'):
> + print("Submission model is missing 'new_diff'. Have you applied the migration to add it?")
> + return
> +
> + diffs = Q(old_diff__isnull=False, submission_ptr__new_diff__isnull=True)
> + pull_urls = Q(old_pull_url__isnull=False, submission_ptr__new_pull_url__isnull=True)
> + query = Patch.objects.filter(diffs | pull_urls)
> +
> + count = query.count()
> +
> + for i, patch in enumerate(query.iterator()):
> + patch.new_diff = patch.old_diff
> + patch.new_pull_url = patch.old_pull_url
> + patch.save()
> + if (i % 10) == 0:
> + self.stdout.write('%06d/%06d\r' % (i, count), ending='')
> + self.stdout.flush()
> + self.stdout.write('\ndone')
> diff --git a/patchwork/migrations/0037_prepare_old_diff_pull_url.py b/patchwork/migrations/0037_prepare_old_diff_pull_url.py
> new file mode 100644
> index 000000000000..83fd1beaac9c
> --- /dev/null
> +++ b/patchwork/migrations/0037_prepare_old_diff_pull_url.py
> @@ -0,0 +1,35 @@
> +# -*- coding: utf-8 -*-
> +# Generated by Django 1.11.24 on 2019-09-18 01:49
> +from __future__ import unicode_literals
> +
> +from django.db import migrations, models
> +
> +
> +class Migration(migrations.Migration):
> +
> + dependencies = [
> + ('patchwork', '0036_project_commit_url_format'),
> + ]
> +
> + operations = [
> + migrations.AlterField(
> + model_name='patch',
> + name='diff',
> + field=models.TextField(blank=True, db_column='diff', null=True),
> + ),
> + migrations.RenameField(
> + model_name='patch',
> + old_name='diff',
> + new_name='old_diff',
> + ),
> + migrations.AlterField(
> + model_name='patch',
> + name='pull_url',
> + field=models.CharField(blank=True, db_column='pull_url', max_length=255, null=True),
> + ),
> + migrations.RenameField(
> + model_name='patch',
> + old_name='pull_url',
> + new_name='old_pull_url',
> + ),
> + ]
> diff --git a/patchwork/migrations/0038_submission_new_diff_pull_url.py b/patchwork/migrations/0038_submission_new_diff_pull_url.py
> new file mode 100644
> index 000000000000..094854d863cb
> --- /dev/null
> +++ b/patchwork/migrations/0038_submission_new_diff_pull_url.py
> @@ -0,0 +1,25 @@
> +# -*- coding: utf-8 -*-
> +# Generated by Django 1.11.24 on 2019-09-18 01:54
> +from __future__ import unicode_literals
> +
> +from django.db import migrations, models
> +
> +
> +class Migration(migrations.Migration):
> +
> + dependencies = [
> + ('patchwork', '0037_prepare_old_diff_pull_url'),
> + ]
> +
> + operations = [
> + migrations.AddField(
> + model_name='submission',
> + name='new_diff',
> + field=models.TextField(blank=True, db_column='diff', null=True),
> + ),
> + migrations.AddField(
> + model_name='submission',
> + name='new_pull_url',
> + field=models.CharField(blank=True, db_column='pull_url', max_length=255, null=True),
> + ),
> + ]
> diff --git a/patchwork/models.py b/patchwork/models.py
> index 32d1b3c2adc5..31f459e1a126 100644
> --- a/patchwork/models.py
> +++ b/patchwork/models.py
> @@ -367,6 +367,11 @@ class Submission(FilenameMixin, EmailMixin, models.Model):
>
> name = models.CharField(max_length=255)
>
> + # patch
> + new_diff = models.TextField(null=True, blank=True, db_column='diff')
> + new_pull_url = models.CharField(max_length=255, null=True, blank=True, db_column='pull_url')
> +
> +
> @property
> def list_archive_url(self):
> if not self.project.list_archive_url_format:
> @@ -410,9 +415,9 @@ class CoverLetter(Submission):
> class Patch(Submission):
> # patch metadata
>
> - diff = models.TextField(null=True, blank=True)
> + old_diff = models.TextField(null=True, blank=True, db_column='diff')
> commit_ref = models.CharField(max_length=255, null=True, blank=True)
> - pull_url = models.CharField(max_length=255, null=True, blank=True)
> + old_pull_url = models.CharField(max_length=255, null=True, blank=True, db_column='pull_url')
> tags = models.ManyToManyField(Tag, through=PatchTag)
>
> # patchwork metadata
> @@ -479,6 +484,20 @@ class Patch(Submission):
>
> super(Patch, self).save(**kwargs)
>
> + # we don't have to worry about duplicating to the
> + # old fields: the setters do this for us.
> + if hasattr(self, '_diff'):
> + if hasattr(self.submission_ptr, 'new_diff'):
> + self.new_diff = self._diff
> + self.submission_ptr.save()
> + del self._diff
> +
> + if hasattr(self, '_pull_url'):
> + if hasattr(self.submission_ptr, 'new_pull_url'):
> + self.new_pull_url = self._diff
> + self.submission_ptr.save()
> + del self._pull_url
> +
> self.refresh_tag_counts()
>
> def is_editable(self, user):
> @@ -602,6 +621,76 @@ class Patch(Submission):
> ]
>
>
> + # transition goop
> + @property
> + def diff(self):
> + # three options:
> + # it's sitting around in our cached storage
> + # new field is not null
> + # old has field
> + # no point in null checking old
> + if hasattr(self, '_diff'):
> + return self._diff
> + try:
> + if self.submission_ptr.new_diff is not None:
> + return self.submission_ptr.new_diff
> + except:
> + pass
> + if hasattr(self, 'old_diff'):
> + return self.old_diff
> + return None
> +
> + @diff.setter
> + def diff(self, value):
> + try:
> + self.submission_ptr.diff = value
> + except:
> + self._diff = value
> + if hasattr(self, 'old_diff'):
> + self.old_diff = value
> +
> + @property
> + def pull_url(self):
> + if hasattr(self, '_pull_url'):
> + return self._pull_url
> + try:
> + if self.submission_ptr.new_pull_url is not None:
> + return self.submission_ptr.new_pull_url
> + except:
> + pass
> + if hasattr(self, 'old_pull_url'):
> + return self.old_pull_url
> + return None
> +
> + @pull_url.setter
> + def pull_url(self, value):
> + try:
> + self.submission_ptr.pull_url = value
> + except:
> + self._diff = value
> + if hasattr(self, 'old_pull_url'):
> + self.old_pull_url = value
> +
> +
> + def __init__(self, *args, **kwargs):
> + diff = None
> + if 'diff' in kwargs:
> + diff = kwargs['diff']
> + del kwargs['diff']
> +
> + pull_url = None
> + if 'pull_url' in kwargs:
> + pull_url = kwargs['pull_url']
> + del kwargs['pull_url']
> +
> + super(Patch, self).__init__(*args, **kwargs)
> +
> + if diff:
> + self.diff = diff
> + if pull_url:
> + self.pull_url = pull_url
> +
> +
> class Comment(EmailMixin, models.Model):
> # parent
>
> diff --git a/patchwork/views/xmlrpc.py b/patchwork/views/xmlrpc.py
> index f60725044ebe..a2e3d8985dd7 100644
> --- a/patchwork/views/xmlrpc.py
> +++ b/patchwork/views/xmlrpc.py
> @@ -569,7 +569,10 @@ def patch_list(filt=None):
> # Only extract the relevant fields. This saves a big db load as we
> # no longer fetch content/headers/etc for potentially every patch
> # in a project.
> - patches = patches.defer('content', 'headers', 'diff')
> + if hasattr(Patch, 'old_diff'):
> + patches = patches.defer('content', 'headers', 'old_diff')
> + else:
> + patches = patches.defer('content', 'headers', 'submission_ptr__new_diff')
>
> return _get_objects(patch_to_dict, patches, max_count)
>
> --
> 2.20.1
More information about the Patchwork
mailing list