[PATCH 3/4] migrations: 0043: split the data migration into chunks
Daniel Axtens
dja at axtens.net
Sat Jul 17 03:19:39 AEST 2021
Migrate 10000 rows at a time. This:
- provides a view on progress
- means replication happens in manageable chunks
- hopefully prevents db lockups
Signed-off-by: Daniel Axtens <dja at axtens.net>
---
.../migrations/0043_merge_patch_submission.py | 146 ++++++++++--------
1 file changed, 82 insertions(+), 64 deletions(-)
diff --git a/patchwork/migrations/0043_merge_patch_submission.py b/patchwork/migrations/0043_merge_patch_submission.py
index 465e527812ba..1d072ca18230 100644
--- a/patchwork/migrations/0043_merge_patch_submission.py
+++ b/patchwork/migrations/0043_merge_patch_submission.py
@@ -1,76 +1,94 @@
from django.conf import settings
from django.db import connection, migrations, models
import django.db.models.deletion
+from django.db.models import Max
import patchwork.fields
+CHUNK_SIZE = 10000
def migrate_data(apps, schema_editor):
- if connection.vendor == 'postgresql':
- schema_editor.execute(
- """
- UPDATE patchwork_submission
- SET archived = patchwork_patch.archived,
- commit_ref = patchwork_patch.commit_ref,
- delegate_id = patchwork_patch.delegate_id,
- diff = patchwork_patch.diff,
- hash = patchwork_patch.hash,
- number = patchwork_patch.number,
- pull_url = patchwork_patch.pull_url,
- related_id = patchwork_patch.related_id,
- series_id = patchwork_patch.series_id,
- state_id = patchwork_patch.state_id
- FROM patchwork_patch
- WHERE patchwork_submission.id = patchwork_patch.submission_ptr_id
- """
- )
- elif connection.vendor == 'mysql':
- schema_editor.execute(
- """
- UPDATE patchwork_submission, patchwork_patch
- SET patchwork_submission.archived = patchwork_patch.archived,
- patchwork_submission.commit_ref = patchwork_patch.commit_ref,
- patchwork_submission.delegate_id = patchwork_patch.delegate_id,
- patchwork_submission.diff = patchwork_patch.diff,
- patchwork_submission.hash = patchwork_patch.hash,
- patchwork_submission.number = patchwork_patch.number,
- patchwork_submission.pull_url = patchwork_patch.pull_url,
- patchwork_submission.related_id = patchwork_patch.related_id,
- patchwork_submission.series_id = patchwork_patch.series_id,
- patchwork_submission.state_id = patchwork_patch.state_id
- WHERE patchwork_submission.id = patchwork_patch.submission_ptr_id
- """ # noqa
- )
- else:
- schema_editor.execute(
- """
- UPDATE patchwork_submission
- SET (
- archived, commit_ref, delegate_id, diff, hash, number,
- pull_url, related_id, series_id, state_id
- ) = (
- SELECT
- patchwork_patch.archived,
- patchwork_patch.commit_ref,
- patchwork_patch.delegate_id,
- patchwork_patch.diff,
- patchwork_patch.hash,
- patchwork_patch.number,
- patchwork_patch.pull_url,
- patchwork_patch.related_id,
- patchwork_patch.series_id,
- patchwork_patch.state_id
+ Patch = apps.get_model('patchwork', 'patch')
+ max_id = Patch.objects.all().aggregate(Max('submission_ptr_id'))['submission_ptr_id__max']
+ old_max = 0
+ # process CHUNK_SIZE at a time. +CHUNK_SIZE+1 is to ensure that if we get
+ # max_id = 50000, we do a migration of ids >= 50000, < 60000 - otherwise
+ # we would miss id=50000
+ print("\nMigrating data - %d rows" % max_id)
+ for new_max in range(CHUNK_SIZE, max_id+CHUNK_SIZE+1, CHUNK_SIZE):
+ print("Migrating rows >= %d, < %d (max %d)" % (old_max, new_max, max_id))
+ if connection.vendor == 'postgresql':
+ schema_editor.execute(
+ """
+ UPDATE patchwork_submission
+ SET archived = patchwork_patch.archived,
+ commit_ref = patchwork_patch.commit_ref,
+ delegate_id = patchwork_patch.delegate_id,
+ diff = patchwork_patch.diff,
+ hash = patchwork_patch.hash,
+ number = patchwork_patch.number,
+ pull_url = patchwork_patch.pull_url,
+ related_id = patchwork_patch.related_id,
+ series_id = patchwork_patch.series_id,
+ state_id = patchwork_patch.state_id
FROM patchwork_patch
- WHERE patchwork_patch.submission_ptr_id = patchwork_submission.id
- )
- WHERE
- EXISTS (
- SELECT *
- FROM patchwork_patch
- WHERE patchwork_patch.submission_ptr_id = patchwork_submission.id
- )
- """ # noqa
- )
+ WHERE patchwork_patch.submission_ptr_id >= %d
+ AND patchwork_patch.submission_ptr_id < %d
+ AND patchwork_submission.id = patchwork_patch.submission_ptr_id
+ """ % (old_max, new_max)
+ )
+ elif connection.vendor == 'mysql':
+ schema_editor.execute(
+ """
+ UPDATE patchwork_submission, patchwork_patch
+ SET patchwork_submission.archived = patchwork_patch.archived,
+ patchwork_submission.commit_ref = patchwork_patch.commit_ref,
+ patchwork_submission.delegate_id = patchwork_patch.delegate_id,
+ patchwork_submission.diff = patchwork_patch.diff,
+ patchwork_submission.hash = patchwork_patch.hash,
+ patchwork_submission.number = patchwork_patch.number,
+ patchwork_submission.pull_url = patchwork_patch.pull_url,
+ patchwork_submission.related_id = patchwork_patch.related_id,
+ patchwork_submission.series_id = patchwork_patch.series_id,
+ patchwork_submission.state_id = patchwork_patch.state_id
+ WHERE patchwork_patch.submission_ptr_id >= %d
+ AND patchwork_patch.submission_ptr_id < %d
+ AND patchwork_submission.id = patchwork_patch.submission_ptr_id
+ """ % (old_max, new_max) # noqa
+ )
+ else:
+ schema_editor.execute(
+ """
+ UPDATE patchwork_submission
+ SET (
+ archived, commit_ref, delegate_id, diff, hash, number,
+ pull_url, related_id, series_id, state_id
+ ) = (
+ SELECT
+ patchwork_patch.archived,
+ patchwork_patch.commit_ref,
+ patchwork_patch.delegate_id,
+ patchwork_patch.diff,
+ patchwork_patch.hash,
+ patchwork_patch.number,
+ patchwork_patch.pull_url,
+ patchwork_patch.related_id,
+ patchwork_patch.series_id,
+ patchwork_patch.state_id
+ FROM patchwork_patch
+ WHERE patchwork_patch.submission_ptr_id = patchwork_submission.id
+ )
+ WHERE
+ EXISTS (
+ SELECT *
+ FROM patchwork_patch
+ WHERE patchwork_patch.submission_ptr_id >= %d
+ AND patchwork_patch.submission_ptr_id < %d
+ AND patchwork_patch.submission_ptr_id = patchwork_submission.id
+ )
+ """ % (old_max, new_max) # noqa
+ )
+ old_max = new_max
class Migration(migrations.Migration):
--
2.30.2
More information about the Patchwork
mailing list