[PATCH v1 3/9] parser: Parse "Depends-on" tags in emails

Tue Jun 18 08:18:49 AEST 2024

Add a new function to parse "Depends-on" tags to the parser. The value
may either be a "reference" to a patch or series object or the web URL
to the object. For example, a reference may look like "patch-1234" or
"series-5678". When this tag is found, the parser will add the series
(or the series the patch belongs to) as a dependency to the patch series
it is creating.

Signed-off-by: Adam Hassick <ahassick at iol.unh.edu>
---
 patchwork/parser.py | 109 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 108 insertions(+), 1 deletion(-)

diff --git a/patchwork/parser.py b/patchwork/parser.py
index 09a53a0..90ec63b 100644
--- a/patchwork/parser.py
+++ b/patchwork/parser.py
@@ -14,11 +14,14 @@ from email.errors import HeaderParseError
 from fnmatch import fnmatch
 import logging
 import re
+from urllib.parse import urlparse
 
 from django.contrib.auth.models import User
 from django.db.utils import IntegrityError
 from django.db import transaction
 from django.utils import timezone as tz_utils
+from django.urls import resolve, Resolver404
+from django.conf import settings
 
 from patchwork.models import Cover
 from patchwork.models import CoverComment
@@ -32,7 +35,6 @@ from patchwork.models import Series
 from patchwork.models import SeriesReference
 from patchwork.models import State
 
-
 _msgid_re = re.compile(r'<[^>]+>')
 _hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
 _filename_re = re.compile(r'^(---|\+\+\+) (\S+)')
@@ -1054,6 +1056,102 @@ def parse_pull_request(content):
     return None
 
 
+def find_series_from_url(url):
+    """
+    Get series or patch from URL.
+    """
+
+    parse_result = urlparse(url)
+
+    # Resolve the URL path to see if this is a patch or series detail URL.
+    try:
+        result = resolve(parse_result.path)
+    except Resolver404:
+        return None
+
+    if result.view_name == 'patch-list' and parse_result.query:
+        # Parse the query string.
+        # This can be replaced with something much friendlier once the
+        # series detail view is implemented.
+        series_query_param = next(
+            filter(
+                lambda x: len(x) == 2 and x[0] == 'series',
+                map(lambda x: x.split('='), parse_result.query.split('&')),
+            )
+        )
+
+        if series_query_param:
+            series_id = series_query_param[1]
+
+            try:
+                series_id_num = int(series_id)
+            except ValueError:
+                return None
+
+            return Series.objects.filter(id=series_id_num).first()
+    elif result.view_name == 'patch-detail':
+        msgid = Patch.decode_msgid(result.kwargs['msgid'])
+        patch = Patch.objects.filter(msgid=msgid).first()
+
+        if patch:
+            return patch.series
+
+
+def find_series_from_ref(match):
+    (obj_type, obj_id_str) = match
+
+    try:
+        object_id = int(obj_id_str)
+    except ValueError:
+        return None
+
+    if obj_type == 'series':
+        series = Series.objects.filter(id=object_id).first()
+    elif obj_type == 'patch':
+        patch = Patch.objects.filter(id=object_id).first()
+
+        if not patch:
+            return None
+
+        series = patch.series
+
+    return series
+
+
+def parse_depends_on(content):
+    """Parses any dependency hints in the comments."""
+    depends_patterns = [
+        (
+            re.compile(
+                r'^Depends-on: ((?:patch)?(?:series)?)-([\d]+)(?: \("[^\n\r"]+"\))?\s*$',
+                re.MULTILINE | re.IGNORECASE,
+            ),
+            find_series_from_ref,
+        ),
+        (
+            re.compile(
+                r'^Depends-on: (http[s]?:\/\/[\w\d\-.\/=&@:%?_\+()]+)\s*$',
+                re.MULTILINE | re.IGNORECASE,
+            ),
+            find_series_from_url,
+        ),
+    ]
+
+    dependencies = list()
+
+    for pat, mapper in depends_patterns:
+        matches = pat.findall(content)
+
+        # Produce a list of tuples containing type and ID of each dependency.
+        # Eliminate elements where we could not parse the ID as an integer.
+        dependencies.extend(
+            filter(lambda series: series is not None, map(mapper, matches))
+        )
+
+    # Return list of series objects to depend on.
+    return dependencies
+
+
 def find_state(mail):
     """Return the state with the given name or the default."""
     state_name = clean_header(mail.get('X-Patchwork-State', ''))
@@ -1171,6 +1269,12 @@ def parse_mail(mail, list_id=None):
 
     pull_url = parse_pull_request(message)
 
+    # Only look for "Depends-on" tags if the setting is enabled.
+    if settings.ENABLE_DEPENDS_ON_PARSING:
+        dependencies = parse_depends_on(message)
+    else:
+        dependencies = []
+
     # build objects
 
     if not is_comment and (diff or pull_url):  # patches or pull requests
@@ -1308,6 +1412,8 @@ def parse_mail(mail, list_id=None):
             # always have a series
             series.add_patch(patch, x)
 
+        series.add_dependencies(dependencies)
+
         return patch
     elif x == 0:  # (potential) cover letters
         # if refs are empty, it's implicitly a cover letter. If not,
@@ -1374,6 +1480,7 @@ def parse_mail(mail, list_id=None):
             logger.debug('Cover letter saved')
 
             series.add_cover_letter(cover_letter)
+            series.add_dependencies(dependencies)
 
             return cover_letter
 
-- 
2.45.2