[PATCH v2] migrations: Optimize 0007 patch comment for psql

Andy Doan andy.doan at linaro.org
Fri Mar 25 05:20:43 AEDT 2016


By handling comment copying/deletion via SQL we can make migration take
<3 minutes rather than >30minutes for big instances. The SQL is vendor
specific, but covers the 2 DBs supported by Patchwork.

Signed-off-by: Andy Doan <andy.doan at linaro.org>
Reviewed-by: Stephen Finucane <stephen.finucane at intel.com>
---
v2: Add mysql support and use single function for migration steps
---
 .../0007_move_comment_content_to_patch_content.py  | 94 ++++++++++++++--------
 patchwork/migrations/0009_add_submission_model.py  | 13 +++
 .../0010_migrate_data_from_submission_to_patch.py  | 53 ++++--------
 3 files changed, 90 insertions(+), 70 deletions(-)

diff --git a/patchwork/migrations/0007_move_comment_content_to_patch_content.py b/patchwork/migrations/0007_move_comment_content_to_patch_content.py
index 63d57ba..43c135e 100644
--- a/patchwork/migrations/0007_move_comment_content_to_patch_content.py
+++ b/patchwork/migrations/0007_move_comment_content_to_patch_content.py
@@ -1,47 +1,75 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals

-from django.db import migrations
+from django.db import connection, migrations


 def copy_comment_field(apps, schema_editor):
-    Comment = apps.get_model('patchwork', 'Comment')
-    Patch = apps.get_model('patchwork', 'Patch')
+    if connection.vendor == 'postgresql':
+        schema_editor.execute('''
+            UPDATE patchwork_patch
+              SET content = patchwork_comment.content
+            FROM patchwork_comment
+              WHERE patchwork_patch.id=patchwork_comment.patch_id
+                    AND patchwork_patch.msgid=patchwork_comment.msgid
+        ''')
+    elif connection.vendor == 'mysql':
+        schema_editor.execute('''
+            UPDATE patchwork_patch, patchwork_comment
+              SET patchwork_patch.content = patchwork_comment.content
+            WHERE patchwork_patch.id=patchwork_comment.patch_id
+              AND patchwork_patch.msgid=patchwork_comment.msgid
+        ''')
+    else:
+        Comment = apps.get_model('patchwork', 'Comment')
+        Patch = apps.get_model('patchwork', 'Patch')
+
+        for patch in Patch.objects.all():
+            try:
+                # when available, this can only return one entry due to the
+                # unique_together constraint
+                comment = Comment.objects.get(patch=patch, msgid=patch.msgid)
+            except Comment.DoesNotExist:
+                # though there's no requirement to actually have a comment
+                continue
+
+            patch.content = comment.content
+            patch.save()

-    for patch in Patch.objects.all():
-        try:
-            # when available, this can only return one entry due to the
-            # unique_together constraint
-            comment = Comment.objects.get(patch=patch, msgid=patch.msgid)
-        except Comment.DoesNotExist:
-            # though there's no requirement to actually have a comment
-            continue

-        patch.content = comment.content
-        patch.save()
+def remove_duplicate_comments(apps, schema_editor):
+    if connection.vendor == 'postgresql':
+        schema_editor.execute('''
+            DELETE FROM patchwork_comment
+              USING patchwork_patch
+              WHERE patchwork_patch.id=patchwork_comment.patch_id
+                    AND patchwork_patch.msgid=patchwork_comment.msgid
+        ''')
+    elif connection.vendor == 'mysql':
+        schema_editor.execute('''
+            DELETE FROM patchwork_comment
+              USING patchwork_patch, patchwork_comment
+              WHERE patchwork_patch.id=patchwork_comment.patch_id
+                    AND patchwork_patch.msgid=patchwork_comment.msgid
+        ''')
+    else:
+        Comment = apps.get_model('patchwork', 'Comment')
+        Patch = apps.get_model('patchwork', 'Patch')
+
+        for patch in Patch.objects.all():
+            try:
+                # when available, this can only return one entry due to the
+                # unique_together constraint
+                comment = Comment.objects.get(patch=patch, msgid=patch.msgid)
+                comment.delete()
+            except Comment.DoesNotExist:
+                # though there's no requirement to actually have a comment
+                continue


 def uncopy_comment_field(apps, schema_editor):
-    Patch = apps.get_model('patchwork', 'Patch')
-
-    for patch in Patch.objects.all():
-        patch.content = None
-        patch.save()
-
-
-def remove_duplicate_comments(apps, schema_editor):
-    Comment = apps.get_model('patchwork', 'Comment')
-    Patch = apps.get_model('patchwork', 'Patch')
-
-    for patch in Patch.objects.all():
-        try:
-            # when available, this can only return one entry due to the
-            # unique_together constraint
-            comment = Comment.objects.get(patch=patch, msgid=patch.msgid)
-            comment.delete()
-        except Comment.DoesNotExist:
-            # though there's no requirement to actually have a comment
-            continue
+    # This is no-op because the column is being deleted
+    pass


 def recreate_comments(apps, schema_editor):
diff --git a/patchwork/migrations/0009_add_submission_model.py b/patchwork/migrations/0009_add_submission_model.py
index 6bb68fb..e39bb20 100644
--- a/patchwork/migrations/0009_add_submission_model.py
+++ b/patchwork/migrations/0009_add_submission_model.py
@@ -7,6 +7,19 @@ from django.db import migrations, models
 import patchwork.models


+''' migrations.RunSQL(
+            ['CREATE TABLE patchwork_submission AS select id, project_id, msgid, name, date, submitter_id, headers, content FROM patchwork_patch'],
+            ['DROP TABLE patchwork_submission'],
+            state_operations=migrations.CreateModel(name='Submission'),
+        ),
+        migrations.RenameField(
+            model_name='patch',
+            old_name='id',
+            new_name='submission_ptr_id'
+        ),
+'''
+
+
 class Migration(migrations.Migration):

     dependencies = [
diff --git a/patchwork/migrations/0010_migrate_data_from_submission_to_patch.py b/patchwork/migrations/0010_migrate_data_from_submission_to_patch.py
index 0da4567..1d4b6e1 100644
--- a/patchwork/migrations/0010_migrate_data_from_submission_to_patch.py
+++ b/patchwork/migrations/0010_migrate_data_from_submission_to_patch.py
@@ -1,40 +1,7 @@
 # -*- coding: utf-8 -*-
 from __future__ import unicode_literals

-from django.db import migrations, models
-
-
-def create_patch_instances(apps, schema_editor):
-    Submission = apps.get_model('patchwork', 'Submission')
-    Patch = apps.get_model('patchwork', 'Patch')
-
-    for submission in Submission.objects.all():
-        # NOTE(sfinucan) We copy every field _except_ tags, which is
-        # autogenerated anyway
-        patch = Patch(submission_ptr=submission,
-                      diff2=submission.diff,
-                      commit_ref2=submission.commit_ref,
-                      pull_url2=submission.pull_url,
-                      delegate2=submission.delegate,
-                      state2=submission.state,
-                      archived2=submission.archived,
-                      hash2=submission.hash)
-        patch.__dict__.update(submission.__dict__)
-        patch.save()
-
-
-def remove_patch_instances(apps, schema_editor):
-    Patch = apps.get_model('patchwork', 'Patch')
-
-    for patch in Patch.objects.all():
-        patch.diff = patch.diff2
-        patch.commit_ref = patch.commit_ref2
-        patch.pull_url = patch.pull_url2
-        patch.delegate = patch.delegate2
-        patch.state = patch.state2
-        patch.archived = patch.archived2
-        patch.hash = patch.hash2
-        patch.save()
+from django.db import migrations


 class Migration(migrations.Migration):
@@ -44,8 +11,20 @@ class Migration(migrations.Migration):
     ]

     operations = [
-        migrations.RunPython(
-            code=create_patch_instances,
-            reverse_code=remove_patch_instances,
+        migrations.RunSQL(
+            ['''INSERT INTO patchwork_patch
+                  (submission_ptr_id, diff2, commit_ref2, pull_url2,
+                   delegate2_id, state2_id, archived2, hash2)
+                SELECT id, diff, commit_ref, pull_url, delegate_id, state_id,
+                       archived, hash
+                FROM patchwork_submission
+                '''],
+            ['''UPDATE patchwork_submission SET
+                  diff=diff2, commit_ref=commit_ref2, pull_url=pull_url2,
+                  delegate_id=delegate2_id, state_id=state2_id,
+                  archived=archived2, hash=hash2
+                FROM patchwork_patch WHERE
+                  patchwork_submission.id = patchwork_patch.submission_ptr_id
+                ''']
         ),
     ]
--
2.7.3



More information about the Patchwork mailing list