[PATCH] RFC: Monkey-patch Django performance bug
Daniel Axtens
dja at axtens.net
Sun Sep 3 23:36:31 AEST 2017
OzLabs noticed a performance regression, which was traced to the
loading of the 'headers', 'content' and 'diff' fields, despite
the 'defer()' call in views/__init__.py:generic_list()
This is a django bug, reported upstream and fixed at:
https://code.djangoproject.com/ticket/28549#ticket
https://github.com/django/django/pull/8994
I will be preparing a patch for the Debian-based system OzLabs uses,
but I am wondering if we need a more generic approach for all our
potential users, rather than asking people to patch their systems.
This is one way to do it:
Monkey-patch the broken method, and add a test to verify correct
behaviour.
It's also a somewhat terrifying approach, so ther suggestions are
very welcome.
Signed-off-by: Daniel Axtens <dja at axtens.net>
---
patchwork/__init__.py | 1 +
patchwork/django_bugs.py | 124 +++++++++++++++++++++++++++++++++++++++++++
patchwork/tests/test_list.py | 22 ++++++++
3 files changed, 147 insertions(+)
create mode 100644 patchwork/django_bugs.py
diff --git a/patchwork/__init__.py b/patchwork/__init__.py
index 98e9b21d066f..8e704e4b30a2 100644
--- a/patchwork/__init__.py
+++ b/patchwork/__init__.py
@@ -18,6 +18,7 @@
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
from patchwork.version import get_latest_version
+from patchwork import django_bugs # noqa
VERSION = (2, 1, 0, 'alpha', 0)
diff --git a/patchwork/django_bugs.py b/patchwork/django_bugs.py
new file mode 100644
index 000000000000..d0dfbfc5561f
--- /dev/null
+++ b/patchwork/django_bugs.py
@@ -0,0 +1,124 @@
+"""Horrifying workarounds for bugs.
+
+Please justify your answers and show all reasoning.
+
+Provide test cases!
+
+Imported through patchwork's __init__.py
+
+"""
+
+# https://code.djangoproject.com/ticket/28549
+# Can't defer() fields from super- and sub-class at the same time
+#
+# In views/__init__.py in in generic_list() we attempt to defer the
+# headers, diff and content of a patch. This broke with pw2, as the
+# headers and content belonged to the super-class (Submission) and the
+# diff belongs to the sub-class (Patch). As such an attempt to defer
+# both ended up deferring neither, and performance was affected very
+# noticeably on OzLabs.
+#
+# jk successfully fixed the bug in collaboration with upstream, but
+# this leaves deployments in an unfortunate position, as the fix
+# hasn't made it into any released versions yet. People would have to
+# patch their system or installation django. That would come with an
+# enormous maintainability cost. So monkey-patch the fixed version in
+# so long as we're below v1.12.
+#
+# Drop this after we only support Django v1.12+
+
+import django
+from django.db.models.constants import LOOKUP_SEP
+from django.db.models.sql.query import is_reverse_o2o
+from django.db.models.sql.query import add_to_dict
+
+
+def backported_deferred_to_data(self, target, callback):
+ """
+ Convert the self.deferred_loading data structure to an alternate data
+ structure, describing the field that *will* be loaded. This is used to
+ compute the columns to select from the database and also by the
+ QuerySet class to work out which fields are being initialized on each
+ model. Models that have all their fields included aren't mentioned in
+ the result, only those that have field restrictions in place.
+ The "target" parameter is the instance that is populated (in place).
+ The "callback" is a function that is called whenever a (model, field)
+ pair need to be added to "target". It accepts three parameters:
+ "target", and the model and list of fields being added for that model.
+ """
+ field_names, defer = self.deferred_loading
+ if not field_names:
+ return
+ orig_opts = self.get_meta()
+ seen = {}
+ must_include = {orig_opts.concrete_model: {orig_opts.pk}}
+ for field_name in field_names:
+ parts = field_name.split(LOOKUP_SEP)
+ cur_model = self.model._meta.concrete_model
+ opts = orig_opts
+ for name in parts[:-1]:
+ old_model = cur_model
+ source = opts.get_field(name)
+ if is_reverse_o2o(source):
+ cur_model = source.related_model
+ else:
+ cur_model = source.remote_field.model
+ opts = cur_model._meta
+ # Even if we're "just passing through" this model, we must add
+ # both the current model's pk and the related reference field
+ # (if it's not a reverse relation) to the things we select.
+ if not is_reverse_o2o(source):
+ must_include[old_model].add(source)
+ add_to_dict(must_include, cur_model, opts.pk)
+ field = opts.get_field(parts[-1])
+ is_reverse_object = field.auto_created and not field.concrete
+ model = field.related_model if is_reverse_object else field.model
+ model = model._meta.concrete_model
+ if model == opts.model:
+ model = cur_model
+ if not is_reverse_o2o(field):
+ add_to_dict(seen, model, field)
+
+ if defer:
+ # We need to load all fields for each model, except those that
+ # appear in "seen" (for all models that appear in "seen"). The only
+ # slight complexity here is handling fields that exist on parent
+ # models.
+ workset = {}
+ for model, values in seen.items():
+ for field in model._meta.local_fields:
+ if field in values:
+ continue
+ m = field.model._meta.concrete_model
+ add_to_dict(workset, m, field)
+ for model, values in must_include.items():
+ # If we haven't included a model in workset, we don't add the
+ # corresponding must_include fields for that model, since an
+ # empty set means "include all fields". That's why there's no
+ # "else" branch here.
+ if model in workset:
+ workset[model].update(values)
+ for model, values in workset.items():
+ callback(target, model, values)
+ else:
+ for model, values in must_include.items():
+ if model in seen:
+ seen[model].update(values)
+ else:
+ # As we've passed through this model, but not explicitly
+ # included any fields, we have to make sure it's mentioned
+ # so that only the "must include" fields are pulled in.
+ seen[model] = values
+ # Now ensure that every model in the inheritance chain is mentioned
+ # in the parent list. Again, it must be mentioned to ensure that
+ # only "must include" fields are pulled in.
+ for model in orig_opts.get_parent_list():
+ if model not in seen:
+ seen[model] = set()
+ for model, values in seen.items():
+ callback(target, model, values)
+
+
+if django.VERSION < (1, 12):
+ django.db.models.sql.query.Query.deferred_to_data = \
+ backported_deferred_to_data
diff --git a/patchwork/tests/test_list.py b/patchwork/tests/test_list.py
index 11b9da9c11d3..c891a4cb4191 100644
--- a/patchwork/tests/test_list.py
+++ b/patchwork/tests/test_list.py
@@ -22,6 +22,8 @@ from __future__ import absolute_import
from datetime import datetime as dt
import re
+import django
+from django.db.models.query_utils import DeferredAttribute
from django.test import TestCase
from django.utils.six.moves import zip
@@ -32,6 +34,19 @@ from patchwork.tests.utils import create_person
from patchwork.tests.utils import create_project
+if django.VERSION < (1, 10):
+ def count_deferred(obj):
+ count = 0
+ for field in obj._meta.fields:
+ if isinstance(obj.__class__.__dict__.get(field.attname),
+ DeferredAttribute):
+ count += 1
+ return count
+else:
+ def count_deferred(obj):
+ return len(obj.get_deferred_fields())
+
+
class EmptyPatchListTest(TestCase):
def test_empty_patch_list(self):
@@ -132,3 +147,10 @@ class PatchOrderTest(TestCase):
p2.submitter.name.lower())
self._test_sequence(response, test_fn)
+
+ def test_defers(self):
+ # see django_bugs.py
+ patches = Patch.objects.all()
+ patches = patches.defer('diff', 'content', 'headers')
+ patch = patches.first()
+ self.assertEqual(count_deferred(patch), 3)
--
2.11.0
More information about the Patchwork
mailing list