[PATCH v2 4/6] models: Migrate event fields to JSON field

Stephen Finucane stephen at that.guru
Wed Apr 11 04:54:37 AEST 2018


On Wed, 2018-04-11 at 02:23 +1000, Daniel Axtens wrote:
> > diff --git a/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py b/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py
> > new file mode 100644
> > index 00000000..5dca85c5
> > --- /dev/null
> > +++ b/patchwork/migrations/0027_migrate_data_from_event_fields_to_payload.py
> > @@ -0,0 +1,61 @@
> > +# -*- coding: utf-8 -*-
> > +from __future__ import unicode_literals
> > +
> > +from django.db import migrations, models
> > +
> > +from patchwork.api.event import PayloadSerializer
> > +from patchwork.models import Event
> > +
> > +
> > +# TODO(stephenfin): Move this to 'api/event' and into Serializer
> > +class Payload(object):
> > +
> > +    def __init__(self, **kwargs):
> > +        for kwarg in kwargs:
> > +            setattr(self, kwarg, kwargs[kwarg])
> > +
> > +
> > +def generate_payload(apps, schema_editor):
> > +    # NOTE(stephenfin): We could convert this to native SQL in the future
> > +    # by using e.g. 'JSON_OBJECT' for MySQL
> > +    EventBase = apps.get_model('patchwork', 'Event')
> > +
> > +    for event in EventBase.objects.all():
> > +        category = event.category
> > +        # TODO(stephenfin): This logic should be moved into the EventSerializer
> > +        # class
> > +        if category == Event.CATEGORY_COVER_CREATED:
> > +            payload_obj = Payload(cover=event.cover)
> > +        elif category == Event.CATEGORY_PATCH_CREATED:
> > +            payload_obj = Payload(patch=event.patch)
> > +        elif category == Event.CATEGORY_PATCH_STATE_CHANGED:
> > +            payload_obj = Payload(patch=event.patch,
> > +                                  previous_state=event.previous_state,
> > +                                  current_state=event.previous_state)
> > +        elif category == Event.CATEGORY_PATCH_DELEGATED:
> > +            payload_obj = Payload(patch=event.patch,
> > +                                  previous_delegate=event.previous_delegate,
> > +                                  current_delegate=event.current_delegate)
> > +        elif category == Event.CATEGORY_PATCH_COMPLETED:
> > +            payload_obj = Payload(patch=event.patch)
> > +        elif category == Event.CATEGORY_CHECK_CREATED:
> > +            payload_obj = Payload(patch=event.patch, check=event.created_check)
> > +        elif category == Event.CATEGORY_SERIES_CREATED:
> > +            payload_obj = Payload(series=event.series)
> > +        elif category == Event.CATEGORY_SERIES_COMPLETED:
> > +            payload_obj = Payload(series=event.series)
> > +
> > +        payload = PayloadSerializer(payload_obj).data
> > +        event.payload = payload
> > +        event.save()
> > +
> > +
> > +class Migration(migrations.Migration):
> > +
> > +    dependencies = [
> > +        ('patchwork', '0026_add_event_payload_field'),
> > +    ]
> > +
> > +    operations = [
> > +        migrations.RunPython(generate_payload, atomic=True),
> > +    ]
> 
> 
> I tried to run this migration on my laptop (16GB ram + swap) on a db
> with 102k patches. (The postgres sql dump is 781 MB).
> 
> It chewed up over 16GB of ram before being OOM killed.
> 
> This somewhat derailed my attempts to see if this offers any performance
> gain to justify the additional complexity.

Darn, I'd tested this with the Patchwork archives and, while slow, it
did work. Guess it must be loading a lot of stuff into memory. I'll try
this again later in the week with a larger archive and see if I can
optimize anything.

> BTW - I asked some questions last time around about versioning and
> dealing with any future changes. Did you have any thoughts on that? I
> think it's also a big issue.

Right, sorry. I'll address those now.

Stephen

> Regards,
> Daniel
> 
> 
> > diff --git a/patchwork/migrations/0028_remove_old_event_fields.py b/patchwork/migrations/0028_remove_old_event_fields.py
> > new file mode 100644
> > index 00000000..2f7ba7bf
> > --- /dev/null
> > +++ b/patchwork/migrations/0028_remove_old_event_fields.py
> > @@ -0,0 +1,34 @@
> > +# -*- coding: utf-8 -*-
> > +from __future__ import unicode_literals
> > +
> > +from django.db import migrations
> > +
> > +
> > +class Migration(migrations.Migration):
> > +
> > +    dependencies = [
> > +        ('patchwork', '0027_migrate_data_from_event_fields_to_payload'),
> > +    ]
> > +
> > +    operations = [
> > +        migrations.RemoveField(
> > +            model_name='event',
> > +            name='created_check',
> > +        ),
> > +        migrations.RemoveField(
> > +            model_name='event',
> > +            name='current_delegate',
> > +        ),
> > +        migrations.RemoveField(
> > +            model_name='event',
> > +            name='current_state',
> > +        ),
> > +        migrations.RemoveField(
> > +            model_name='event',
> > +            name='previous_delegate',
> > +        ),
> > +        migrations.RemoveField(
> > +            model_name='event',
> > +            name='previous_state',
> > +        ),
> > +    ]
> > diff --git a/patchwork/models.py b/patchwork/models.py
> > index f91b994c..a5992d68 100644
> > --- a/patchwork/models.py
> > +++ b/patchwork/models.py
> > @@ -37,6 +37,7 @@ from django.utils.functional import cached_property
> >  from patchwork.compat import is_authenticated
> >  from patchwork.compat import reverse
> >  from patchwork.fields import HashField
> > +from patchwork.fields import JSONField
> >  from patchwork.hasher import hash_diff
> >  
> >  if settings.ENABLE_REST_API:
> > @@ -943,32 +944,9 @@ class Event(models.Model):
> >          on_delete=models.CASCADE,
> >          help_text='The cover letter that this event was created for.')
> >  
> > -    # fields for 'patch-state-changed' events
> > +    # additional data
> >  
> > -    previous_state = models.ForeignKey(
> > -        State, related_name='+', null=True, blank=True,
> > -        on_delete=models.CASCADE)
> > -    current_state = models.ForeignKey(
> > -        State, related_name='+', null=True, blank=True,
> > -        on_delete=models.CASCADE)
> > -
> > -    # fields for 'patch-delegate-changed' events
> > -
> > -    previous_delegate = models.ForeignKey(
> > -        User, related_name='+', null=True, blank=True,
> > -        on_delete=models.CASCADE)
> > -    current_delegate = models.ForeignKey(
> > -        User, related_name='+', null=True, blank=True,
> > -        on_delete=models.CASCADE)
> > -
> > -    # fields or 'patch-check-created' events
> > -
> > -    created_check = models.ForeignKey(
> > -        Check, related_name='+', null=True, blank=True,
> > -        on_delete=models.CASCADE)
> > -
> > -    # TODO(stephenfin): Validate that the correct fields are being set by way
> > -    # of a 'clean' method
> > +    payload = JSONField(null=True, blank=True)
> >  
> >      def __repr__(self):
> >          return "<Event id='%d' category='%s'" % (self.id, self.category)
> > diff --git a/patchwork/signals.py b/patchwork/signals.py
> > index b083b51a..c0f4c08d 100644
> > --- a/patchwork/signals.py
> > +++ b/patchwork/signals.py
> > @@ -24,6 +24,7 @@ from django.db.models.signals import post_save
> >  from django.db.models.signals import pre_save
> >  from django.dispatch import receiver
> >  
> > +from patchwork.api.event import PayloadSerializer
> >  from patchwork.models import Check
> >  from patchwork.models import CoverLetter
> >  from patchwork.models import Event
> > @@ -33,6 +34,14 @@ from patchwork.models import Series
> >  from patchwork.models import SeriesPatch
> >  
> >  
> > +# TODO(stephenfin): Move this to 'api/event' and into Serializer
> > +class Payload(object):
> > +
> > +    def __init__(self, **kwargs):
> > +        for kwarg in kwargs:
> > +            setattr(self, kwarg, kwargs[kwarg])
> > +
> > +
> >  @receiver(pre_save, sender=Patch)
> >  def patch_change_callback(sender, instance, raw, **kwargs):
> >      # we only want notification of modified patches
> > @@ -73,9 +82,13 @@ def patch_change_callback(sender, instance, raw, **kwargs):
> >  def create_cover_created_event(sender, instance, created, raw, **kwargs):
> >  
> >      def create_event(cover):
> > +        payload = PayloadSerializer(Payload(
> > +            cover=cover))
> > +
> >          return Event.objects.create(
> >              category=Event.CATEGORY_COVER_CREATED,
> >              project=cover.project,
> > +            payload=payload.data,
> >              cover=cover)
> >  
> >      # don't trigger for items loaded from fixtures or new items
> > @@ -88,9 +101,13 @@ def create_cover_created_event(sender, instance, created, raw, **kwargs):
> >  def create_patch_created_event(sender, instance, created, raw, **kwargs):
> >  
> >      def create_event(patch):
> > +        payload = PayloadSerializer(Payload(
> > +            patch=patch))
> > +
> >          return Event.objects.create(
> >              category=Event.CATEGORY_PATCH_CREATED,
> >              project=patch.project,
> > +            payload=payload.data,
> >              patch=patch)
> >  
> >      # don't trigger for items loaded from fixtures or new items
> > @@ -103,12 +120,16 @@ def create_patch_created_event(sender, instance, created, raw, **kwargs):
> >  def create_patch_state_changed_event(sender, instance, raw, **kwargs):
> >  
> >      def create_event(patch, before, after):
> > +        payload = PayloadSerializer(Payload(
> > +            patch=patch,
> > +            previous_state=before,
> > +            current_state=after))
> > +
> >          return Event.objects.create(
> >              category=Event.CATEGORY_PATCH_STATE_CHANGED,
> >              project=patch.project,
> > -            patch=patch,
> > -            previous_state=before,
> > -            current_state=after)
> > +            payload=payload.data,
> > +            patch=patch)
> >  
> >      # don't trigger for items loaded from fixtures or new items
> >      if raw or not instance.pk:
> > @@ -125,12 +146,16 @@ def create_patch_state_changed_event(sender, instance, raw, **kwargs):
> >  def create_patch_delegated_event(sender, instance, raw, **kwargs):
> >  
> >      def create_event(patch, before, after):
> > +        payload = PayloadSerializer(Payload(
> > +            patch=patch,
> > +            previous_delegate=before,
> > +            current_delegate=after))
> > +
> >          return Event.objects.create(
> >              category=Event.CATEGORY_PATCH_DELEGATED,
> >              project=patch.project,
> > -            patch=patch,
> > -            previous_delegate=before,
> > -            current_delegate=after)
> > +            payload=payload.data,
> > +            patch=patch)
> >  
> >      # don't trigger for items loaded from fixtures or new items
> >      if raw or not instance.pk:
> > @@ -148,9 +173,14 @@ def create_patch_completed_event(sender, instance, created, raw, **kwargs):
> >      """Create patch completed event for patches with series."""
> >  
> >      def create_event(patch, series):
> > +        payload = PayloadSerializer(Payload(
> > +            patch=patch,
> > +            series=series))
> > +
> >          return Event.objects.create(
> >              category=Event.CATEGORY_PATCH_COMPLETED,
> >              project=patch.project,
> > +            payload=payload.data,
> >              patch=patch,
> >              series=series)
> >  
> > @@ -182,13 +212,17 @@ def create_patch_completed_event(sender, instance, created, raw, **kwargs):
> >  def create_check_created_event(sender, instance, created, raw, **kwargs):
> >  
> >      def create_event(check):
> > +        payload = PayloadSerializer(Payload(
> > +            patch=check.patch,
> > +            check=check))
> > +
> >          # TODO(stephenfin): It might make sense to add a 'project' field to
> >          # 'check' to prevent lookups here and in the REST API
> >          return Event.objects.create(
> >              category=Event.CATEGORY_CHECK_CREATED,
> >              project=check.patch.project,
> > -            patch=check.patch,
> > -            created_check=check)
> > +            payload=payload.data,
> > +            patch=check.patch)
> >  
> >      # don't trigger for items loaded from fixtures or existing items
> >      if raw or not created:
> > @@ -200,9 +234,13 @@ def create_check_created_event(sender, instance, created, raw, **kwargs):
> >  def create_series_created_event(sender, instance, created, raw, **kwargs):
> >  
> >      def create_event(series):
> > +        payload = PayloadSerializer(Payload(
> > +            series=series))
> > +
> >          return Event.objects.create(
> >              category=Event.CATEGORY_SERIES_CREATED,
> >              project=series.project,
> > +            payload=payload.data,
> >              series=series)
> >  
> >      # don't trigger for items loaded from fixtures or existing items
> > @@ -227,9 +265,13 @@ def create_series_completed_event(sender, instance, created, raw, **kwargs):
> >      # in that case.
> >  
> >      def create_event(series):
> > +        payload = PayloadSerializer(Payload(
> > +            series=series))
> > +
> >          return Event.objects.create(
> >              category=Event.CATEGORY_SERIES_COMPLETED,
> >              project=series.project,
> > +            payload=payload.data,
> >              series=series)
> >  
> >      # don't trigger for items loaded from fixtures or existing items
> > diff --git a/patchwork/tests/test_events.py b/patchwork/tests/test_events.py
> > index 70d563de..39dff6f1 100644
> > --- a/patchwork/tests/test_events.py
> > +++ b/patchwork/tests/test_events.py
> > @@ -17,32 +17,20 @@
> >  # along with Patchwork; if not, write to the Free Software
> >  # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> >  
> > +from collections import OrderedDict
> > +
> >  from django.test import TestCase
> >  
> >  from patchwork.models import Event
> >  from patchwork.tests import utils
> >  
> > -BASE_FIELDS = ['previous_state', 'current_state', 'previous_delegate',
> > -               'current_delegate']
> > -
> >  
> >  def _get_events(**filters):
> >      # These are sorted by reverse normally, so reverse it once again
> >      return Event.objects.filter(**filters).order_by('date')
> >  
> >  
> > -class _BaseTestCase(TestCase):
> > -
> > -    def assertEventFields(self, event, parent_type='patch', **fields):
> > -        for field_name in [x for x in BASE_FIELDS]:
> > -            field = getattr(event, field_name)
> > -            if field_name in fields:
> > -                self.assertEqual(field, fields[field_name])
> > -            else:
> > -                self.assertIsNone(field)
> > -
> > -
> > -class PatchCreateTest(_BaseTestCase):
> > +class PatchCreateTest(TestCase):
> >  
> >      def test_patch_created(self):
> >          """No series, so patch dependencies implicitly exist."""
> > @@ -51,10 +39,15 @@ class PatchCreateTest(_BaseTestCase):
> >          # This should raise both the CATEGORY_PATCH_CREATED and
> >          # CATEGORY_PATCH_COMPLETED events as there are no specific dependencies
> >          events = _get_events(patch=patch)
> > +
> >          self.assertEqual(events.count(), 1)
> >          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
> >          self.assertEqual(events[0].project, patch.project)
> > -        self.assertEventFields(events[0])
> > +
> > +        payload = events[0].payload
> > +        self.assertIsInstance(payload, OrderedDict)
> > +        self.assertIn('patch', payload)
> > +        self.assertEqual(payload['patch']['id'], patch.id)
> >  
> >      def test_patch_dependencies_present_series(self):
> >          """Patch dependencies already exist."""
> > @@ -63,13 +56,22 @@ class PatchCreateTest(_BaseTestCase):
> >          # This should raise both the CATEGORY_PATCH_CREATED and
> >          # CATEGORY_PATCH_COMPLETED events
> >          events = _get_events(patch=series_patch.patch)
> > +
> >          self.assertEqual(events.count(), 2)
> >          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
> >          self.assertEqual(events[0].project, series_patch.patch.project)
> >          self.assertEqual(events[1].category, Event.CATEGORY_PATCH_COMPLETED)
> >          self.assertEqual(events[1].project, series_patch.patch.project)
> > -        self.assertEventFields(events[0])
> > -        self.assertEventFields(events[1])
> > +
> > +        payload = events[0].payload
> > +        self.assertIn('patch', payload)
> > +        self.assertEqual(payload['patch']['id'], series_patch.patch.id)
> > +
> > +        payload = events[1].payload
> > +        self.assertIn('patch', payload)
> > +        self.assertEqual(payload['patch']['id'], series_patch.patch.id)
> > +        self.assertIn('series', payload)
> > +        self.assertEqual(payload['series']['id'], series_patch.series.id)
> >  
> >      def test_patch_dependencies_out_of_order(self):
> >          series = utils.create_series()
> > @@ -82,7 +84,6 @@ class PatchCreateTest(_BaseTestCase):
> >              events = _get_events(patch=series_patch.patch)
> >              self.assertEqual(events.count(), 1)
> >              self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
> > -            self.assertEventFields(events[0])
> >  
> >          series_patch_1 = utils.create_series_patch(series=series, number=1)
> >  
> > @@ -94,8 +95,6 @@ class PatchCreateTest(_BaseTestCase):
> >              self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
> >              self.assertEqual(events[1].category,
> >                               Event.CATEGORY_PATCH_COMPLETED)
> > -            self.assertEventFields(events[0])
> > -            self.assertEventFields(events[1])
> >  
> >      def test_patch_dependencies_missing(self):
> >          series_patch = utils.create_series_patch(number=2)
> > @@ -105,10 +104,9 @@ class PatchCreateTest(_BaseTestCase):
> >          events = _get_events(patch=series_patch.patch)
> >          self.assertEqual(events.count(), 1)
> >          self.assertEqual(events[0].category, Event.CATEGORY_PATCH_CREATED)
> > -        self.assertEventFields(events[0])
> >  
> >  
> > -class PatchChangedTest(_BaseTestCase):
> > +class PatchChangedTest(TestCase):
> >  
> >      def test_patch_state_changed(self):
> >          patch = utils.create_patch()
> > @@ -119,13 +117,18 @@ class PatchChangedTest(_BaseTestCase):
> >          patch.save()
> >  
> >          events = _get_events(patch=patch)
> > +
> >          self.assertEqual(events.count(), 2)
> >          # we don't care about the CATEGORY_PATCH_CREATED event here
> >          self.assertEqual(events[1].category,
> >                           Event.CATEGORY_PATCH_STATE_CHANGED)
> >          self.assertEqual(events[1].project, patch.project)
> > -        self.assertEventFields(events[1], previous_state=old_state,
> > -                               current_state=new_state)
> > +
> > +        payload = events[1].payload
> > +        self.assertIn('previous_state', payload)
> > +        self.assertEqual(payload['previous_state'], old_state.slug)
> > +        self.assertIn('current_state', payload)
> > +        self.assertEqual(payload['current_state'], new_state.slug)
> >  
> >      def test_patch_delegated(self):
> >          patch = utils.create_patch()
> > @@ -137,12 +140,18 @@ class PatchChangedTest(_BaseTestCase):
> >          patch.save()
> >  
> >          events = _get_events(patch=patch)
> > +
> >          self.assertEqual(events.count(), 2)
> >          # we don't care about the CATEGORY_PATCH_CREATED event here
> >          self.assertEqual(events[1].category,
> >                           Event.CATEGORY_PATCH_DELEGATED)
> >          self.assertEqual(events[1].project, patch.project)
> > -        self.assertEventFields(events[1], current_delegate=delegate_a)
> > +
> > +        payload = events[1].payload
> > +        self.assertIn('previous_delegate', payload)
> > +        self.assertIsNone(payload['previous_delegate'])
> > +        self.assertIn('current_delegate', payload)
> > +        self.assertEqual(payload['current_delegate']['id'], delegate_a.id)
> >  
> >          delegate_b = utils.create_user()
> >  
> > @@ -152,11 +161,16 @@ class PatchChangedTest(_BaseTestCase):
> >          patch.save()
> >  
> >          events = _get_events(patch=patch)
> > +
> >          self.assertEqual(events.count(), 3)
> >          self.assertEqual(events[2].category,
> >                           Event.CATEGORY_PATCH_DELEGATED)
> > -        self.assertEventFields(events[2], previous_delegate=delegate_a,
> > -                               current_delegate=delegate_b)
> > +
> > +        payload = events[2].payload
> > +        self.assertIn('previous_delegate', payload)
> > +        self.assertEqual(payload['previous_delegate']['id'], delegate_a.id)
> > +        self.assertIn('current_delegate', payload)
> > +        self.assertEqual(payload['current_delegate']['id'], delegate_b.id)
> >  
> >          # Delegate B -> None
> >  
> > @@ -164,24 +178,36 @@ class PatchChangedTest(_BaseTestCase):
> >          patch.save()
> >  
> >          events = _get_events(patch=patch)
> > +
> >          self.assertEqual(events.count(), 4)
> >          self.assertEqual(events[3].category,
> >                           Event.CATEGORY_PATCH_DELEGATED)
> > -        self.assertEventFields(events[3], previous_delegate=delegate_b)
> >  
> > +        payload = events[3].payload
> > +        self.assertIn('previous_delegate', payload)
> > +        self.assertEqual(payload['previous_delegate']['id'], delegate_b.id)
> > +        self.assertIn('current_delegate', payload)
> > +        self.assertIsNone(payload['current_delegate'])
> >  
> > -class CheckCreateTest(_BaseTestCase):
> > +
> > +class CheckCreateTest(TestCase):
> >  
> >      def test_check_created(self):
> >          check = utils.create_check()
> > -        events = _get_events(created_check=check)
> > -        self.assertEqual(events.count(), 1)
> > -        self.assertEqual(events[0].category, Event.CATEGORY_CHECK_CREATED)
> > -        self.assertEqual(events[0].project, check.patch.project)
> > -        self.assertEventFields(events[0])
> > +        # we don't care about the CATEGORY_PATCH_CREATED event here
> > +        events = _get_events(patch=check.patch)
> >  
> > +        self.assertEqual(events.count(), 2)
> > +        # we don't care about the CATEGORY_PATCH_CREATED event here
> > +        self.assertEqual(events[1].category, Event.CATEGORY_CHECK_CREATED)
> > +        self.assertEqual(events[1].project, check.patch.project)
> > +
> > +        payload = events[1].payload
> > +        self.assertIn('check', payload)
> > +        self.assertEqual(payload['check']['id'], check.id)
> >  
> > -class CoverCreateTest(_BaseTestCase):
> > +
> > +class CoverCreateTest(TestCase):
> >  
> >      def test_cover_created(self):
> >          cover = utils.create_cover()
> > @@ -189,10 +215,13 @@ class CoverCreateTest(_BaseTestCase):
> >          self.assertEqual(events.count(), 1)
> >          self.assertEqual(events[0].category, Event.CATEGORY_COVER_CREATED)
> >          self.assertEqual(events[0].project, cover.project)
> > -        self.assertEventFields(events[0])
> > +
> > +        payload = events[0].payload
> > +        self.assertIn('cover', payload)
> > +        self.assertEqual(payload['cover']['id'], cover.id)
> >  
> >  
> > -class SeriesCreateTest(_BaseTestCase):
> > +class SeriesCreateTest(TestCase):
> >  
> >      def test_series_created(self):
> >          series = utils.create_series()
> > @@ -200,4 +229,7 @@ class SeriesCreateTest(_BaseTestCase):
> >          self.assertEqual(events.count(), 1)
> >          self.assertEqual(events[0].category, Event.CATEGORY_SERIES_CREATED)
> >          self.assertEqual(events[0].project, series.project)
> > -        self.assertEventFields(events[0])
> > +
> > +        payload = events[0].payload
> > +        self.assertIn('series', payload)
> > +        self.assertEqual(payload['series']['id'], series.id)
> > -- 
> > 2.14.3
> > 
> > _______________________________________________
> > Patchwork mailing list
> > Patchwork at lists.ozlabs.org
> > https://lists.ozlabs.org/listinfo/patchwork



More information about the Patchwork mailing list