[PATCH] RFC: Monkey-patch Django performance bug

Sun Sep 3 23:36:31 AEST 2017

OzLabs noticed a performance regression, which was traced to the
loading of the 'headers', 'content' and 'diff' fields, despite
the 'defer()' call in views/__init__.py:generic_list()

This is a django bug, reported upstream and fixed at:
https://code.djangoproject.com/ticket/28549#ticket
https://github.com/django/django/pull/8994

I will be preparing a patch for the Debian-based system OzLabs uses,
but I am wondering if we need a more generic approach for all our
potential users, rather than asking people to patch their systems.
This is one way to do it:

Monkey-patch the broken method, and add a test to verify correct
behaviour.

It's also a somewhat terrifying approach, so ther suggestions are
very welcome.

Signed-off-by: Daniel Axtens <dja at axtens.net>
---
 patchwork/__init__.py        |   1 +
 patchwork/django_bugs.py     | 124 +++++++++++++++++++++++++++++++++++++++++++
 patchwork/tests/test_list.py |  22 ++++++++
 3 files changed, 147 insertions(+)
 create mode 100644 patchwork/django_bugs.py

diff --git a/patchwork/__init__.py b/patchwork/__init__.py
index 98e9b21d066f..8e704e4b30a2 100644
--- a/patchwork/__init__.py
+++ b/patchwork/__init__.py
@@ -18,6 +18,7 @@
 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 
 from patchwork.version import get_latest_version
+from patchwork import django_bugs  # noqa
 
 VERSION = (2, 1, 0, 'alpha', 0)
 
diff --git a/patchwork/django_bugs.py b/patchwork/django_bugs.py
new file mode 100644
index 000000000000..d0dfbfc5561f
--- /dev/null
+++ b/patchwork/django_bugs.py
@@ -0,0 +1,124 @@
+"""Horrifying workarounds for bugs.
+
+Please justify your answers and show all reasoning.
+
+Provide test cases!
+
+Imported through patchwork's __init__.py
+
+"""
+
+# https://code.djangoproject.com/ticket/28549
+# Can't defer() fields from super- and sub-class at the same time
+#
+# In views/__init__.py in in generic_list() we attempt to defer the
+# headers, diff and content of a patch. This broke with pw2, as the
+# headers and content belonged to the super-class (Submission) and the
+# diff belongs to the sub-class (Patch). As such an attempt to defer
+# both ended up deferring neither, and performance was affected very
+# noticeably on OzLabs.
+#
+# jk successfully fixed the bug in collaboration with upstream, but
+# this leaves deployments in an unfortunate position, as the fix
+# hasn't made it into any released versions yet. People would have to
+# patch their system or installation django. That would come with an
+# enormous maintainability cost. So monkey-patch the fixed version in
+# so long as we're below v1.12.
+#
+# Drop this after we only support Django v1.12+
+
+import django
+from django.db.models.constants import LOOKUP_SEP
+from django.db.models.sql.query import is_reverse_o2o
+from django.db.models.sql.query import add_to_dict
+
+
+def backported_deferred_to_data(self, target, callback):
+    """
+    Convert the self.deferred_loading data structure to an alternate data
+    structure, describing the field that *will* be loaded. This is used to
+    compute the columns to select from the database and also by the
+    QuerySet class to work out which fields are being initialized on each
+    model. Models that have all their fields included aren't mentioned in
+    the result, only those that have field restrictions in place.
+    The "target" parameter is the instance that is populated (in place).
+    The "callback" is a function that is called whenever a (model, field)
+    pair need to be added to "target". It accepts three parameters:
+    "target", and the model and list of fields being added for that model.
+    """
+    field_names, defer = self.deferred_loading
+    if not field_names:
+        return
+    orig_opts = self.get_meta()
+    seen = {}
+    must_include = {orig_opts.concrete_model: {orig_opts.pk}}
+    for field_name in field_names:
+        parts = field_name.split(LOOKUP_SEP)
+        cur_model = self.model._meta.concrete_model
+        opts = orig_opts
+        for name in parts[:-1]:
+            old_model = cur_model
+            source = opts.get_field(name)
+            if is_reverse_o2o(source):
+                cur_model = source.related_model
+            else:
+                cur_model = source.remote_field.model
+            opts = cur_model._meta
+            # Even if we're "just passing through" this model, we must add
+            # both the current model's pk and the related reference field
+            # (if it's not a reverse relation) to the things we select.
+            if not is_reverse_o2o(source):
+                must_include[old_model].add(source)
+            add_to_dict(must_include, cur_model, opts.pk)
+        field = opts.get_field(parts[-1])
+        is_reverse_object = field.auto_created and not field.concrete
+        model = field.related_model if is_reverse_object else field.model
+        model = model._meta.concrete_model
+        if model == opts.model:
+            model = cur_model
+        if not is_reverse_o2o(field):
+            add_to_dict(seen, model, field)
+
+    if defer:
+        # We need to load all fields for each model, except those that
+        # appear in "seen" (for all models that appear in "seen"). The only
+        # slight complexity here is handling fields that exist on parent
+        # models.
+        workset = {}
+        for model, values in seen.items():
+            for field in model._meta.local_fields:
+                if field in values:
+                    continue
+                m = field.model._meta.concrete_model
+                add_to_dict(workset, m, field)
+        for model, values in must_include.items():
+            # If we haven't included a model in workset, we don't add the
+            # corresponding must_include fields for that model, since an
+            # empty set means "include all fields". That's why there's no
+            # "else" branch here.
+            if model in workset:
+                workset[model].update(values)
+        for model, values in workset.items():
+            callback(target, model, values)
+    else:
+        for model, values in must_include.items():
+            if model in seen:
+                seen[model].update(values)
+            else:
+                # As we've passed through this model, but not explicitly
+                # included any fields, we have to make sure it's mentioned
+                # so that only the "must include" fields are pulled in.
+                seen[model] = values
+        # Now ensure that every model in the inheritance chain is mentioned
+        # in the parent list. Again, it must be mentioned to ensure that
+        # only "must include" fields are pulled in.
+        for model in orig_opts.get_parent_list():
+            if model not in seen:
+                seen[model] = set()
+        for model, values in seen.items():
+            callback(target, model, values)
+
+
+if django.VERSION < (1, 12):
+    django.db.models.sql.query.Query.deferred_to_data = \
+        backported_deferred_to_data
diff --git a/patchwork/tests/test_list.py b/patchwork/tests/test_list.py
index 11b9da9c11d3..c891a4cb4191 100644
--- a/patchwork/tests/test_list.py
+++ b/patchwork/tests/test_list.py
@@ -22,6 +22,8 @@ from __future__ import absolute_import
 from datetime import datetime as dt
 import re
 
+import django
+from django.db.models.query_utils import DeferredAttribute
 from django.test import TestCase
 from django.utils.six.moves import zip
 
@@ -32,6 +34,19 @@ from patchwork.tests.utils import create_person
 from patchwork.tests.utils import create_project
 
 
+if django.VERSION < (1, 10):
+    def count_deferred(obj):
+        count = 0
+        for field in obj._meta.fields:
+            if isinstance(obj.__class__.__dict__.get(field.attname),
+                          DeferredAttribute):
+                count += 1
+        return count
+else:
+    def count_deferred(obj):
+        return len(obj.get_deferred_fields())
+
+
 class EmptyPatchListTest(TestCase):
 
     def test_empty_patch_list(self):
@@ -132,3 +147,10 @@ class PatchOrderTest(TestCase):
                                     p2.submitter.name.lower())
 
         self._test_sequence(response, test_fn)
+
+    def test_defers(self):
+        # see django_bugs.py
+        patches = Patch.objects.all()
+        patches = patches.defer('diff', 'content', 'headers')
+        patch = patches.first()
+        self.assertEqual(count_deferred(patch), 3)
-- 
2.11.0