[PATCH v1 3/9] parser: Parse "Depends-on" tags in emails

Stephen Finucane stephen at that.guru
Sat Jul 13 02:01:54 AEST 2024


On Mon, 2024-06-17 at 18:18 -0400, Adam Hassick wrote:
> Add a new function to parse "Depends-on" tags to the parser. The value
> may either be a "reference" to a patch or series object or the web URL
> to the object. For example, a reference may look like "patch-1234" or
> "series-5678". When this tag is found, the parser will add the series
> (or the series the patch belongs to) as a dependency to the patch series
> it is creating.

I know the DPDK folks are using this integer ID-based format for dependencies
(per [1]), but I'm not a huge fan of it. My concerns are two-fold: firstly,
we've been trying to abstract/hide the integer IDs in favour of message IDs over
recent releases and we no longer expose in most of the web UI (they're still
exposed via the REST API but that's for historical reasons). This would be a
step backwards on this path. Secondly, something like 'series-5678' gives the
casual observer very little information about the location of that dependency.
You'd need to know that Patchwork was being used as well as the specific
location of the Patchwork instance in question. I wonder if, rather than
supporting this syntax, we could support Message IDs (and URLs) instead? That's
a unique identifier that is searchable both online and off (assuming you're
saving mail locally). DPDK could of course choose to add patches on top to
support their existing syntax, though they could also choose to migrate to the
new pattern. wdyt?

Some other comments on this patch as-is below.

[1] https://github.com/getpatchwork/git-pw/issues/71

> 
> Signed-off-by: Adam Hassick <ahassick at iol.unh.edu>
> ---
>  patchwork/parser.py | 109 +++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 108 insertions(+), 1 deletion(-)
> 
> diff --git a/patchwork/parser.py b/patchwork/parser.py
> index 09a53a0..90ec63b 100644
> --- a/patchwork/parser.py
> +++ b/patchwork/parser.py
> @@ -14,11 +14,14 @@ from email.errors import HeaderParseError
>  from fnmatch import fnmatch
>  import logging
>  import re
> +from urllib.parse import urlparse
>  
>  from django.contrib.auth.models import User
>  from django.db.utils import IntegrityError
>  from django.db import transaction
>  from django.utils import timezone as tz_utils
> +from django.urls import resolve, Resolver404
> +from django.conf import settings
>  
>  from patchwork.models import Cover
>  from patchwork.models import CoverComment
> @@ -32,7 +35,6 @@ from patchwork.models import Series
>  from patchwork.models import SeriesReference
>  from patchwork.models import State
>  
> -
>  _msgid_re = re.compile(r'<[^>]+>')
>  _hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@')
>  _filename_re = re.compile(r'^(---|\+\+\+) (\S+)')
> @@ -1054,6 +1056,102 @@ def parse_pull_request(content):
>      return None
>  
>  
> +def find_series_from_url(url):
> +    """
> +    Get series or patch from URL.
> +    """
> +
> +    parse_result = urlparse(url)
> +
> +    # Resolve the URL path to see if this is a patch or series detail URL.
> +    try:
> +        result = resolve(parse_result.path)
> +    except Resolver404:
> +        return None
> +
> +    if result.view_name == 'patch-list' and parse_result.query:
> +        # Parse the query string.
> +        # This can be replaced with something much friendlier once the
> +        # series detail view is implemented.
> +        series_query_param = next(
> +            filter(
> +                lambda x: len(x) == 2 and x[0] == 'series',
> +                map(lambda x: x.split('='), parse_result.query.split('&')),
> +            )
> +        )
> +
> +        if series_query_param:

nit:

    if not series_query_param:
        return None

    series_id = series_query_param[1]

    ...

> +            series_id = series_query_param[1]
> +
> +            try:
> +                series_id_num = int(series_id)
> +            except ValueError:
> +                return None
> +
> +            return Series.objects.filter(id=series_id_num).first()
> +    elif result.view_name == 'patch-detail':
> +        msgid = Patch.decode_msgid(result.kwargs['msgid'])
> +        patch = Patch.objects.filter(msgid=msgid).first()
> +
> +        if patch:
> +            return patch.series
> +
> +
> +def find_series_from_ref(match):
> +    (obj_type, obj_id_str) = match

nit: don't need those extra brackets

> +
> +    try:
> +        object_id = int(obj_id_str)
> +    except ValueError:
> +        return None
> +
> +    if obj_type == 'series':
> +        series = Series.objects.filter(id=object_id).first()

nit:

    return Series.objects.filter(...)

> +    elif obj_type == 'patch':
> +        patch = Patch.objects.filter(id=object_id).first()
> +
> +        if not patch:
> +            return None
> +
> +        series = patch.series

nit:

    return patch.series

> +
> +    return series
> +
> +
> +def parse_depends_on(content):
> +    """Parses any dependency hints in the comments."""
> +    depends_patterns = [
> +        (
> +            re.compile(
> +                r'^Depends-on: ((?:patch)?(?:series)?)-([\d]+)(?: \("[^\n\r"]+"\))?\s*$',
> +                re.MULTILINE | re.IGNORECASE,
> +            ),
> +            find_series_from_ref,
> +        ),
> +        (
> +            re.compile(
> +                r'^Depends-on: (http[s]?:\/\/[\w\d\-.\/=&@:%?_\+()]+)\s*$',
> +                re.MULTILINE | re.IGNORECASE,
> +            ),
> +            find_series_from_url,
> +        ),
> +    ]
> +
> +    dependencies = list()

nit: Can you use a literal?

    dependencies = []

> +
> +    for pat, mapper in depends_patterns:
> +        matches = pat.findall(content)
> +
> +        # Produce a list of tuples containing type and ID of each dependency.
> +        # Eliminate elements where we could not parse the ID as an integer.
> +        dependencies.extend(
> +            filter(lambda series: series is not None, map(mapper, matches))
> +        )

This is a little too clever for my liking :)

     for match in re.findall(
         r'^Depends-on: ((?:patch)?(?:series)?)-([\d]+)(?:
\("[^\n\r"]+"\))?\s*$',
         re.MULTILINE | re.IGNORECASE,
     ):
         if series := find_series_from_ref(match):
             dependencies.append(series)

     ...

> +
> +    # Return list of series objects to depend on.
> +    return dependencies
> +
> +
>  def find_state(mail):
>      """Return the state with the given name or the default."""
>      state_name = clean_header(mail.get('X-Patchwork-State', ''))
> @@ -1171,6 +1269,12 @@ def parse_mail(mail, list_id=None):
>  
>      pull_url = parse_pull_request(message)
>  
> +    # Only look for "Depends-on" tags if the setting is enabled.
> +    if settings.ENABLE_DEPENDS_ON_PARSING:
> +        dependencies = parse_depends_on(message)
> +    else:
> +        dependencies = []
> +

The patch that adds this should come earlier in the series (before this one). We
can discuss whether it's needed there. However, if we keep it can you move the
check for it into 'parse_depends_on' and move the call to 'parse_depends_on'
below so that we can avoid parsing dependencies for non-cover letter/patch
emails?

>      # build objects
>  
>      if not is_comment and (diff or pull_url):  # patches or pull requests
> @@ -1308,6 +1412,8 @@ def parse_mail(mail, list_id=None):
>              # always have a series
>              series.add_patch(patch, x)
>  
> +        series.add_dependencies(dependencies)

    dependencies = parse_depends_on(message)
    series.add_dependencies(dependencies)

> +
>          return patch
>      elif x == 0:  # (potential) cover letters
>          # if refs are empty, it's implicitly a cover letter. If not,
> @@ -1374,6 +1480,7 @@ def parse_mail(mail, list_id=None):
>              logger.debug('Cover letter saved')
>  
>              series.add_cover_letter(cover_letter)
> +            series.add_dependencies(dependencies)

    dependencies = parse_depends_on(message)
    series.add_dependencies(dependencies)

>              return cover_letter
>  



More information about the Patchwork mailing list