[PATCH v2 4/6] models: Migrate event fields to JSON field

Daniel Axtens dja at axtens.net
Wed Apr 11 02:23:39 AEST 2018


> diff --git a/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py b/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py
> new file mode 100644
> index 00000000..5dca85c5
> --- /dev/null
> +++ b/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py
> @@ -0,0 +1,61 @@
> +# -*- coding: utf-8 -*-
> +from __future__ import unicode_literals
> +
> +from django.db import migrations, models
> +
> +from patchwork.api.event import PayloadSerializer
> +from patchwork.models import Event
> +
> +
> +# TODO(stephenfin): Move this to 'api/event' and into Serializer
> +class Payload(object):
> +
> +    def __init__(self, **kwargs):
> +        for kwarg in kwargs:
> +            setattr(self, kwarg, kwargs[kwarg])
> +
> +
> +def generate_payload(apps, schema_editor):
> +    # NOTE(stephenfin): We could convert this to native SQL in the future
> +    # by using e.g. 'JSON_OBJECT' for MySQL
> +    EventBase = apps.get_model('patchwork', 'Event')
> +
> +    for event in EventBase.objects.all():
> +        category = event.category
> +        # TODO(stephenfin): This logic should be moved into the EventSerializer
> +        # class
> +        if category == Event.CATEGORY_COVER_CREATED:
> +            payload_obj = Payload(cover=event.cover)
> +        elif category == Event.CATEGORY_PATCH_CREATED:
> +            payload_obj = Payload(patch=event.patch)
> +        elif category == Event.CATEGORY_PATCH_STATE_CHANGED:
> +            payload_obj = Payload(patch=event.patch,
> +                                  previous_state=event.previous_state,
> +                                  current_state=event.previous_state)
> +        elif category == Event.CATEGORY_PATCH_DELEGATED:
> +            payload_obj = Payload(patch=event.patch,
> +                                  previous_delegate=event.previous_delegate,
> +                                  current_delegate=event.current_delegate)
> +        elif category == Event.CATEGORY_PATCH_COMPLETED:
> +            payload_obj = Payload(patch=event.patch)
> +        elif category == Event.CATEGORY_CHECK_CREATED:
> +            payload_obj = Payload(patch=event.patch, check=event.created_check)
> +        elif category == Event.CATEGORY_SERIES_CREATED:
> +            payload_obj = Payload(series=event.series)
> +        elif category == Event.CATEGORY_SERIES_COMPLETED:
> +            payload_obj = Payload(series=event.series)
> +
> +        payload = PayloadSerializer(payload_obj).data
> +        event.payload = payload
> +        event.save()
> +
> +
> +class Migration(migrations.Migration):
> +
> +    dependencies = [
> +        ('patchwork', '0026_add_event_payload_field'),
> +    ]
> +
> +    operations = [
> +        migrations.RunPython(generate_payload, atomic=True),
> +    ]


I tried to run this migration on my laptop (16GB ram + swap) on a db
with 102k patches. (The postgres sql dump is 781 MB).

It chewed up over 16GB of ram before being OOM killed.

This somewhat derailed my attempts to see if this offers any performance
gain to justify the additional complexity.

BTW - I asked some questions last time around about versioning and
dealing with any future changes. Did you have any thoughts on that? I
think it's also a big issue.

Regards,
Daniel


> diff --git a/patchwork/migrations/0028_remove_old_event_fields.py b/patchwork/migrations/0028_remove_old_event_fields.py
> new file mode 100644
> index 00000000..2f7ba7bf
> --- /dev/null
> +++ b/patchwork/migrations/0028_remove_old_event_fields.py
> @@ -0,0 +1,34 @@
> +# -*- coding: utf-8 -*-
> +from __future__ import unicode_literals
> +
> +from django.db import migrations
> +
> +
> +class Migration(migrations.Migration):
> +
> +    dependencies = [
> +        ('patchwork', '0027_migrate_data_from_event_fields_to_payload'),
> +    ]
> +
> +    operations = [
> +        migrations.RemoveField(
> +            model_name='event',
> +            name='created_check',
> +        ),
> +        migrations.RemoveField(
> +            model_name='event',
> +            name='current_delegate',
> +        ),
> +        migrations.RemoveField(
> +            model_name='event',
> +            name='current_state',
> +        ),
> +        migrations.RemoveField(
> +            model_name='event',
> +            name='previous_delegate',
> +        ),
> +        migrations.RemoveField(
> +            model_name='event',
> +            name='previous_state',
> +        ),
> +    ]
> diff --git a/patchwork/models.py b/patchwork/models.py
> index f91b994c..a5992d68 100644
> --- a/patchwork/models.py
> +++ b/patchwork/models.py
> @@ -37,6 +37,7 @@ from django.utils.functional import cached_property
>  from patchwork.compat import is_authenticated
>  from patchwork.compat import reverse
>  from patchwork.fields import HashField
> +from patchwork.fields import JSONField
>  from patchwork.hasher import hash_diff
>  
>  if settings.ENABLE_REST_API:
> @@ -943,32 +944,9 @@ class Event(models.Model):
>          on_delete=models.CASCADE,
>          help_text='The cover letter that this event was created for.')
>  
> -    # fields for 'patch-state-changed' events
> +    # additional data
>  
> -    previous_state = models.ForeignKey(
> -        State, related_name='+', null=True, blank=True,
> -        on_delete=models.CASCADE)
> -    current_state = models.ForeignKey(
> -        State, related_name='+', null=True, blank=True,
> -        on_delete=models.CASCADE)
> -
> -    # fields for 'patch-delegate-changed' events
> -
> -    previous_delegate = models.ForeignKey(
> -        User, related_name='+', null=True, blank=True,
> -        on_delete=models.CASCADE)
> -    current_delegate = models.ForeignKey(
> -        User, related_name='+', null=True, blank=True,
> -        on_delete=models.CASCADE)
> -
> -    # fields or 'patch-check-created' events
> -
> -    created_check = models.ForeignKey(
> -        Check, related_name='+', null=True, blank=True,
> -        on_delete=models.CASCADE)
> -
> -    # TODO(stephenfin): Validate that the correct fields are being set by way
> -    # of a 'clean' method
> +    payload = JSONField(null=True, blank=True)
>  
>      def __repr__(self):
>          return "<Event id='%d' category='%s'" % (self.id, self.category)
> diff --git a/patchwork/signals.py b/patchwork/signals.py
> index b083b51a..c0f4c08d 100644
> --- a/patchwork/signals.py
> +++ b/patchwork/signals.py
> @@ -24,6 +24,7 @@ from django.db.models.signals import post_save
>  from django.db.models.signals import pre_save
>  from django.dispatch import receiver
>  
> +from patchwork.api.event import PayloadSerializer
>  from patchwork.models import Check
>  from patchwork.models import CoverLetter
>  from patchwork.models import Event
> @@ -33,6 +34,14 @@ from patchwork.models import Series
>  from patchwork.models import SeriesPatch
>  
>  
> +# TODO(stephenfin): Move this to 'api/event' and into Serializer
> +class Payload(object):
> +
> +    def __init__(self, **kwargs):
> +        for kwarg in kwargs:
> +            setattr(self, kwarg, kwargs[kwarg])
> +
> +
>  @receiver(pre_save, sender=Patch)
>  def patch_change_callback(sender, instance, raw, **kwargs):
>      # we only want notification of modified patches
> @@ -73,9 +82,13 @@ def patch_change_callback(sender, instance, raw, **kwargs):
>  def create_cover_created_event(sender, instance, created, raw, **kwargs):
>  
>      def create_event(cover):
> +        payload = PayloadSerializer(Payload(
> +            cover=cover))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_COVER_CREATED,
>              project=cover.project,
> +            payload=payload.data,
>              cover=cover)
>  
>      # don't trigger for items loaded from fixtures or new items
> @@ -88,9 +101,13 @@ def create_cover_created_event(sender, instance, created, raw, **kwargs):
>  def create_patch_created_event(sender, instance, created, raw, **kwargs):
>  
>      def create_event(patch):
> +        payload = PayloadSerializer(Payload(
> +            patch=patch))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_PATCH_CREATED,
>              project=patch.project,
> +            payload=payload.data,
>              patch=patch)
>  
>      # don't trigger for items loaded from fixtures or new items
> @@ -103,12 +120,16 @@ def create_patch_created_event(sender, instance, created, raw, **kwargs):
>  def create_patch_state_changed_event(sender, instance, raw, **kwargs):
>  
>      def create_event(patch, before, after):
> +        payload = PayloadSerializer(Payload(
> +            patch=patch,
> +            previous_state=before,
> +            current_state=after))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_PATCH_STATE_CHANGED,
>              project=patch.project,
> -            patch=patch,
> -            previous_state=before,
> -            current_state=after)
> +            payload=payload.data,
> +            patch=patch)
>  
>      # don't trigger for items loaded from fixtures or new items
>      if raw or not instance.pk:
> @@ -125,12 +146,16 @@ def create_patch_state_changed_event(sender, instance, raw, **kwargs):
>  def create_patch_delegated_event(sender, instance, raw, **kwargs):
>  
>      def create_event(patch, before, after):
> +        payload = PayloadSerializer(Payload(
> +            patch=patch,
> +            previous_delegate=before,
> +            current_delegate=after))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_PATCH_DELEGATED,
>              project=patch.project,
> -            patch=patch,
> -            previous_delegate=before,
> -            current_delegate=after)
> +            payload=payload.data,
> +            patch=patch)
>  
>      # don't trigger for items loaded from fixtures or new items
>      if raw or not instance.pk:
> @@ -148,9 +173,14 @@ def create_patch_completed_event(sender, instance, created, raw, **kwargs):
>      """Create patch completed event for patches with series."""
>  
>      def create_event(patch, series):
> +        payload = PayloadSerializer(Payload(
> +            patch=patch,
> +            series=series))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_PATCH_COMPLETED,
>              project=patch.project,
> +            payload=payload.data,
>              patch=patch,
>              series=series)
>  
> @@ -182,13 +212,17 @@ def create_patch_completed_event(sender, instance, created, raw, **kwargs):
>  def create_check_created_event(sender, instance, created, raw, **kwargs):
>  
>      def create_event(check):
> +        payload = PayloadSerializer(Payload(
> +            patch=check.patch,
> +            check=check))
> +
>          # TODO(stephenfin): It might make sense to add a 'project' field to
>          # 'check' to prevent lookups here and in the REST API
>          return Event.objects.create(
>              category=Event.CATEGORY_CHECK_CREATED,
>              project=check.patch.project,
> -            patch=check.patch,
> -            created_check=check)
> +            payload=payload.data,
> +            patch=check.patch)
>  
>      # don't trigger for items loaded from fixtures or existing items
>      if raw or not created:
> @@ -200,9 +234,13 @@ def create_check_created_event(sender, instance, created, raw, **kwargs):
>  def create_series_created_event(sender, instance, created, raw, **kwargs):
>  
>      def create_event(series):
> +        payload = PayloadSerializer(Payload(
> +            series=series))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_SERIES_CREATED,
>              project=series.project,
> +            payload=payload.data,
>              series=series)
>  
>      # don't trigger for items loaded from fixtures or existing items
> @@ -227,9 +265,13 @@ def create_series_completed_event(sender, instance, created, raw, **kwargs):
>      # in that case.
>  
>      def create_event(series):
> +        payload = PayloadSerializer(Payload(
> +            series=series))
> +
>          return Event.objects.create(
>              category=Event.CATEGORY_SERIES_COMPLETED,
>              project=series.project,
> +            payload=payload.data,
>              series=series)
>  
>      # don't trigger for items loaded from fixtures or existing items
> diff --git a/patchwork/tests/test_events.py b/patchwork/tests/test_events.py
> index 70d563de..39dff6f1 100644
> --- a/patchwork/tests/test_events.py
> +++ b/patchwork/tests/test_events.py
> @@ -17,32 +17,20 @@
>  # along with Patchwork; if not, write to the Free Software
>  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
>  
> +from collections import OrderedDict
> +
>  from django.test import TestCase
>  
>  from patchwork.models import Event
>  from patchwork.tests import utils
>  
> -BASE_FIELDS = ['previous_state', 'current_state', 'previous_delegate',
> -               'current_delegate']
> -
>  
>  def _get_events(**filters):
>      # These are sorted by reverse normally, so reverse it once again
>      return Event.objects.filter(**filters).order_by('date')
>  
>  
> -class _BaseTestCase(TestCase):
> -
> -    def assertEventFields(self, event, parent_type='patch', **fields):
> -        for field_name in [x for x in BASE_FIELDS]:
> -            field = getattr(event, field_name)
> -            if field_name in fields:
> -                self.assertEqual(field, fields[field_name])
> -            else:
> -                self.assertIsNone(field)
> -
> -
> -class PatchCreateTest(_BaseTestCase):
> +class PatchCreateTest(TestCase):
>  
>      def test_patch_created(self):
>          """No series, so patch dependencies implicitly exist."""
> @@ -51,10 +39,15 @@ class PatchCreateTest(_BaseTestCase):
>          # This should raise both the CATEGORY_PATCH_CREATED and
>          # CATEGORY_PATCH_COMPLETED events as there are no specific dependencies
>          events = _get_events(patch=patch)
> +
>          self.assertEqual(events.count(), 1)
>          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
>          self.assertEqual(events[0].project, patch.project)
> -        self.assertEventFields(events[0])
> +
> +        payload = events[0].payload
> +        self.assertIsInstance(payload, OrderedDict)
> +        self.assertIn('patch', payload)
> +        self.assertEqual(payload['patch']['id'], patch.id)
>  
>      def test_patch_dependencies_present_series(self):
>          """Patch dependencies already exist."""
> @@ -63,13 +56,22 @@ class PatchCreateTest(_BaseTestCase):
>          # This should raise both the CATEGORY_PATCH_CREATED and
>          # CATEGORY_PATCH_COMPLETED events
>          events = _get_events(patch=series_patch.patch)
> +
>          self.assertEqual(events.count(), 2)
>          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
>          self.assertEqual(events[0].project, series_patch.patch.project)
>          self.assertEqual(events[1].category, Event.CATEGORY_PATCH_COMPLETED)
>          self.assertEqual(events[1].project, series_patch.patch.project)
> -        self.assertEventFields(events[0])
> -        self.assertEventFields(events[1])
> +
> +        payload = events[0].payload
> +        self.assertIn('patch', payload)
> +        self.assertEqual(payload['patch']['id'], series_patch.patch.id)
> +
> +        payload = events[1].payload
> +        self.assertIn('patch', payload)
> +        self.assertEqual(payload['patch']['id'], series_patch.patch.id)
> +        self.assertIn('series', payload)
> +        self.assertEqual(payload['series']['id'], series_patch.series.id)
>  
>      def test_patch_dependencies_out_of_order(self):
>          series = utils.create_series()
> @@ -82,7 +84,6 @@ class PatchCreateTest(_BaseTestCase):
>              events = _get_events(patch=series_patch.patch)
>              self.assertEqual(events.count(), 1)
>              self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
> -            self.assertEventFields(events[0])
>  
>          series_patch_1 = utils.create_series_patch(series=series, number=1)
>  
> @@ -94,8 +95,6 @@ class PatchCreateTest(_BaseTestCase):
>              self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
>              self.assertEqual(events[1].category,
>                               Event.CATEGORY_PATCH_COMPLETED)
> -            self.assertEventFields(events[0])
> -            self.assertEventFields(events[1])
>  
>      def test_patch_dependencies_missing(self):
>          series_patch = utils.create_series_patch(number=2)
> @@ -105,10 +104,9 @@ class PatchCreateTest(_BaseTestCase):
>          events = _get_events(patch=series_patch.patch)
>          self.assertEqual(events.count(), 1)
>          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
> -        self.assertEventFields(events[0])
>  
>  
> -class PatchChangedTest(_BaseTestCase):
> +class PatchChangedTest(TestCase):
>  
>      def test_patch_state_changed(self):
>          patch = utils.create_patch()
> @@ -119,13 +117,18 @@ class PatchChangedTest(_BaseTestCase):
>          patch.save()
>  
>          events = _get_events(patch=patch)
> +
>          self.assertEqual(events.count(), 2)
>          # we don't care about the CATEGORY_PATCH_CREATED event here
>          self.assertEqual(events[1].category,
>                           Event.CATEGORY_PATCH_STATE_CHANGED)
>          self.assertEqual(events[1].project, patch.project)
> -        self.assertEventFields(events[1], previous_state=old_state,
> -                               current_state=new_state)
> +
> +        payload = events[1].payload
> +        self.assertIn('previous_state', payload)
> +        self.assertEqual(payload['previous_state'], old_state.slug)
> +        self.assertIn('current_state', payload)
> +        self.assertEqual(payload['current_state'], new_state.slug)
>  
>      def test_patch_delegated(self):
>          patch = utils.create_patch()
> @@ -137,12 +140,18 @@ class PatchChangedTest(_BaseTestCase):
>          patch.save()
>  
>          events = _get_events(patch=patch)
> +
>          self.assertEqual(events.count(), 2)
>          # we don't care about the CATEGORY_PATCH_CREATED event here
>          self.assertEqual(events[1].category,
>                           Event.CATEGORY_PATCH_DELEGATED)
>          self.assertEqual(events[1].project, patch.project)
> -        self.assertEventFields(events[1], current_delegate=delegate_a)
> +
> +        payload = events[1].payload
> +        self.assertIn('previous_delegate', payload)
> +        self.assertIsNone(payload['previous_delegate'])
> +        self.assertIn('current_delegate', payload)
> +        self.assertEqual(payload['current_delegate']['id'], delegate_a.id)
>  
>          delegate_b = utils.create_user()
>  
> @@ -152,11 +161,16 @@ class PatchChangedTest(_BaseTestCase):
>          patch.save()
>  
>          events = _get_events(patch=patch)
> +
>          self.assertEqual(events.count(), 3)
>          self.assertEqual(events[2].category,
>                           Event.CATEGORY_PATCH_DELEGATED)
> -        self.assertEventFields(events[2], previous_delegate=delegate_a,
> -                               current_delegate=delegate_b)
> +
> +        payload = events[2].payload
> +        self.assertIn('previous_delegate', payload)
> +        self.assertEqual(payload['previous_delegate']['id'], delegate_a.id)
> +        self.assertIn('current_delegate', payload)
> +        self.assertEqual(payload['current_delegate']['id'], delegate_b.id)
>  
>          # Delegate B -> None
>  
> @@ -164,24 +178,36 @@ class PatchChangedTest(_BaseTestCase):
>          patch.save()
>  
>          events = _get_events(patch=patch)
> +
>          self.assertEqual(events.count(), 4)
>          self.assertEqual(events[3].category,
>                           Event.CATEGORY_PATCH_DELEGATED)
> -        self.assertEventFields(events[3], previous_delegate=delegate_b)
>  
> +        payload = events[3].payload
> +        self.assertIn('previous_delegate', payload)
> +        self.assertEqual(payload['previous_delegate']['id'], delegate_b.id)
> +        self.assertIn('current_delegate', payload)
> +        self.assertIsNone(payload['current_delegate'])
>  
> -class CheckCreateTest(_BaseTestCase):
> +
> +class CheckCreateTest(TestCase):
>  
>      def test_check_created(self):
>          check = utils.create_check()
> -        events = _get_events(created_check=check)
> -        self.assertEqual(events.count(), 1)
> -        self.assertEqual(events[0].category, Event.CATEGORY_CHECK_CREATED)
> -        self.assertEqual(events[0].project, check.patch.project)
> -        self.assertEventFields(events[0])
> +        # we don't care about the CATEGORY_PATCH_CREATED event here
> +        events = _get_events(patch=check.patch)
>  
> +        self.assertEqual(events.count(), 2)
> +        # we don't care about the CATEGORY_PATCH_CREATED event here
> +        self.assertEqual(events[1].category, Event.CATEGORY_CHECK_CREATED)
> +        self.assertEqual(events[1].project, check.patch.project)
> +
> +        payload = events[1].payload
> +        self.assertIn('check', payload)
> +        self.assertEqual(payload['check']['id'], check.id)
>  
> -class CoverCreateTest(_BaseTestCase):
> +
> +class CoverCreateTest(TestCase):
>  
>      def test_cover_created(self):
>          cover = utils.create_cover()
> @@ -189,10 +215,13 @@ class CoverCreateTest(_BaseTestCase):
>          self.assertEqual(events.count(), 1)
>          self.assertEqual(events[0].category, Event.CATEGORY_COVER_CREATED)
>          self.assertEqual(events[0].project, cover.project)
> -        self.assertEventFields(events[0])
> +
> +        payload = events[0].payload
> +        self.assertIn('cover', payload)
> +        self.assertEqual(payload['cover']['id'], cover.id)
>  
>  
> -class SeriesCreateTest(_BaseTestCase):
> +class SeriesCreateTest(TestCase):
>  
>      def test_series_created(self):
>          series = utils.create_series()
> @@ -200,4 +229,7 @@ class SeriesCreateTest(_BaseTestCase):
>          self.assertEqual(events.count(), 1)
>          self.assertEqual(events[0].category, Event.CATEGORY_SERIES_CREATED)
>          self.assertEqual(events[0].project, series.project)
> -        self.assertEventFields(events[0])
> +
> +        payload = events[0].payload
> +        self.assertIn('series', payload)
> +        self.assertEqual(payload['series']['id'], series.id)
> -- 
> 2.14.3
>
> _______________________________________________
> Patchwork mailing list
> Patchwork at lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/patchwork


More information about the Patchwork mailing list