Improvements to wmltools3 (#7561)

* wmltools3: Make the CrossRef class iterable Removes generator() method. * wmltools3: Check for warn level on lack of wmlscope: set export * wmltools3: Support passing a list of file paths to initialize CrossRef * wmltools3: Track lineno_end for Reference * wmltools3: Track default values for macro optional parameters optional_args is no longer a list, but a dict. * wmltools3: Track macro bodies as Reference.body * wmltools3: Register quoted macro arguments verbatim * wmltools3: Improve parse_macroref - Support parsing nested macro calls. - Remove excessive need of parens around macro calls/arguments. - Support escaping quotes. - Other compliance improvements.
2023-05-01 10:08:30 -05:00 · 2023-05-01 10:08:30 -05:00 · 2413e2f366
commit 2413e2f366
parent 1819febe92
1 changed files with 160 additions and 61 deletions
--- a/data/tools/wesnoth/wmltools3.py
+++ b/data/tools/wesnoth/wmltools3.py
@ -20,6 +20,13 @@ l10n_directories = ("l10n",)
 resource_extensions = map_extensions + image_extensions + sound_extensions
 image_reference = r"[A-Za-z0-9{}.][A-Za-z0-9_/+{}.\-\[\]~\*,]*\.(png|jpe?g|webp)(?=(~.*)?)"
 EQUALS = '='
 QUOTE = '"'
 OPEN_BRACE = '{'
 CLOSE_BRACE = '}'
 OPEN_PARENS = '('
 CLOSE_PARENS = ')'
 class Substitution(object):
    __slots__ = ["sub", "start", "end"]
    def __init__(self, sub, start, end):
@ -251,7 +258,8 @@ class Forest:
        for tree in self.forest:
            allfiles += tree
        return allfiles
-    def generator(self):
+
    def __iter__(self):
        "Return a generator that walks through all files."
        for (directory, tree) in zip(self.dirpath, self.forest):
            for filename in tree:
@ -286,66 +294,138 @@ def isresource(filename):
    return ext and ext[1:] in resource_extensions
 def parse_macroref(start, line):
-    def handle_argument():
+    def handle_argument(buffer):
        nonlocal opt_arg
        nonlocal arg
        nonlocal optional_args
        nonlocal args
        nonlocal optional_args
        opt_arg = ""
        arg = "".join(buffer)
        # arg may be empty, so arg[0] may be OOB.
        if arg[0:1].isspace():
            arg = arg[1:]
        arg = arg.strip()
        # is this an optional argument?
        # argument names are usually made of uppercase letters, numbers and underscores
        # if they're optional, they're followed by an equal sign
        # stop matching on the first one, because the argument value might contain one too
        if re.match(r"^([A-Z0-9_]+?)=", arg):
            opt_arg, arg = arg.split("=", 1)
        if arg.startswith('"') and arg.endswith('"'):
            arg = arg[1:-1].strip()
        if opt_arg:
            optional_args[opt_arg] = arg
            opt_arg = ""
        else:
            args.append(arg)
-        arg = ""
+        buffer.clear()
        return True
-    brackdepth = parendepth = 0
+    buffer = []
    instring = False
    optional_args = {}
    args = []
-    opt_arg = ""
+    optional_args = {}
-    arg = ""
+
    depth = {
        EQUALS: 0,
        OPEN_BRACE: 0,
        OPEN_PARENS: 0,
        QUOTE: 0,
    }
    wrapper_stack = []
    prev_added_arg = False
    # close_token - Closes all active scopes, until the matching scope is found.
    # This is useful, for example, in {MACRO OPT_NAME=VAL}
    # In this example, close_token("}") will implicitly close the
    # optional argument scope.
    def close_token(token):
        while len(wrapper_stack) > 0:
            last_token = wrapper_stack.pop()
            depth[last_token] -= 1
            if last_token == token:
                break
        else:
            return False
        return True
    # close_if_token - Closes the current scope, if it matches the given token.
    # Atomic version of close_token, which provides expressivity.
    def close_if_token(token):
        if wrapper_stack[-1] == token:
            wrapper_stack.pop()
            depth[token] -= 1
            return True
        return False
    def open_token(token):
        wrapper_stack.append(token)
        depth[token] += 1
    for i in range(start, len(line)):
-        if instring:
+        added_arg = False
-            if line[i] == '"':
+        if depth[QUOTE] > 0:
-                instring = False
+            # If EOL, line[i+1] may be OOB, but slice is valid.
-            arg += line[i]
+            if line[i] == QUOTE and line[i+1:i+2] != QUOTE:
-        elif line[i] == '"':
+                close_token(QUOTE)
-            instring = not instring
+            buffer.append(line[i])
-            arg += line[i]
+        elif line[i] == QUOTE:
-        elif line[i] == "{":
+            open_token(QUOTE)
-            if brackdepth > 0:
+            buffer.append(line[i])
-                arg += line[i]
+        elif line[i] == OPEN_BRACE:
-            brackdepth += 1
+            open_token(OPEN_BRACE)
-        elif line[i] == "}":
+            buffer.append(line[i])
-            brackdepth -= 1
+        elif line[i] == CLOSE_BRACE:
-            if brackdepth == 0:
+            if wrapper_stack[-1] != OPEN_PARENS:
-                if not line[i-1].isspace():
+                close_token(OPEN_BRACE)
-                    handle_argument()
+            if depth[OPEN_BRACE] == 0:
                # Flush at end
                if not prev_added_arg and len(buffer) > 0:
                    added_arg = handle_argument(buffer)
                break
            else:
-                arg += line[i]
+                buffer.append(line[i])
-        elif line[i] == "(":
+        elif line[i] == OPEN_PARENS:
-            parendepth += 1
+            if wrapper_stack[-1] == OPEN_PARENS or wrapper_stack[-2:] == [OPEN_PARENS, EQUALS]:
-        elif line[i] == ")":
+                # Char in an argument
-            parendepth -= 1
+                buffer.append(line[i])
-        elif not line[i-1].isspace() and \
+            else:
-             line[i].isspace() and \
+                if wrapper_stack[-1] == EQUALS or (wrapper_stack[-1] == OPEN_BRACE and \
-             brackdepth == 1 and \
+                    not line[i-1].isspace()):
-             parendepth == 0:
+                    close_if_token(EQUALS)
-            handle_argument()
+                    if depth[OPEN_BRACE] == 1 and not prev_added_arg:
-        elif not line[i].isspace() or parendepth > 0:
+                        added_arg = handle_argument(buffer)
-            arg += line[i]
+                open_token(OPEN_PARENS)
-    return (args, optional_args, brackdepth, parendepth)
+                if depth[OPEN_BRACE] != 1:
                    buffer.append(line[i])
        elif line[i] == CLOSE_PARENS:
            # Source has too many closing parens.
            quit = not close_token(OPEN_PARENS)
            if depth[OPEN_BRACE] != 1:
                buffer.append(line[i])
            elif not prev_added_arg or line[i-1] == OPEN_PARENS:
                # {MACRO arg1()} has two arguments
                added_arg = handle_argument(buffer)
            if quit:
                added_arg = handle_argument(buffer)
                break
        elif line[i] == EQUALS and re.match(r'^([A-Z0-9_]+?)', line[i-1]):
            open_token(EQUALS)
            buffer.append(line[i])
        elif line[i].isspace():
            if line[i-1].isspace():
                # Ignore consecutive spaces
                continue
            if not prev_added_arg and \
             depth[OPEN_BRACE] == 1 and \
             depth[OPEN_PARENS] == 0:
                close_if_token(EQUALS)
                added_arg = handle_argument(buffer)
            buffer.append(line[i])
        else:
            buffer.append(line[i])
        prev_added_arg = added_arg
    args.pop(0)
    return (args, optional_args, depth[OPEN_BRACE] > 0)
 def formaltype(f):
    # Deduce the expected type of the formal
@ -488,14 +568,17 @@ def argmatch(formals, optional_formals, actuals, optional_actuals):
@total_ordering
 class Reference:
    "Describes a location by file and line."
-    def __init__(self, namespace, filename, lineno=None, docstring=None, args=None,
+    def __init__(self, namespace, filename, lineno=None, lineno_end=None, docstring=None, args=None,
                 optional_args=None, deprecated=False, deprecation_level=0, removal_version=None):
        self.namespace = namespace
        self.filename = filename
        self.lineno = lineno
        self.lineno_end = lineno_end
        self.docstring = docstring
        self.args = args
-        self.optional_args = optional_args
+        self._raw_optional_args = optional_args
        self.optional_args = {}
        self.body = []
        self.deprecated = deprecated
        self.deprecation_level = deprecation_level
        self.removal_version = removal_version
@ -523,7 +606,7 @@ class Reference:
            return self.filename > other.filename
    def mismatches(self):
-        copy = Reference(self.namespace, self.filename, self.lineno, self.docstring, self.args, self.optional_args)
+        copy = Reference(self.namespace, self.filename, self.lineno, self.lineno_end, self.docstring, self.args, self._raw_optional_args)
        copy.undef = self.undef
        for filename in self.references:
            mis = [(ln,a,oa) for (ln,a,oa) in self.references[filename] if a is not None and not argmatch(self.args, self.optional_args, a, oa)]
@ -575,7 +658,7 @@ class CrossRef:
        if self.exports(defn.namespace):
            # Macros and resources in subtrees with export=yes are global
            return True
-        elif not self.filelist.neighbors(defn.filename, fn):
+        elif defn.filename != fn and not self.filelist.neighbors(defn.filename, fn):
            # Otherwise, must be in the same subtree.
            return False
        else:
@ -655,7 +738,7 @@ class CrossRef:
                                  % (filename, n+1), file=sys.stderr)
                        else:
                            name = tokens[1]
-                            here = Reference(namespace, filename, n+1, line, args=tokens[2:], optional_args=[])
+                            here = Reference(namespace, filename, n+1, line, None, args=tokens[2:], optional_args=[])
                            here.hash = hashlib.md5()
                            here.docstring = line.lstrip()[8:] # Strip off #define_
                            current_docstring = None
@ -689,6 +772,8 @@ class CrossRef:
                            current_docstring = None
                            state = States.OUTSIDE
                    elif state != States.OUTSIDE and line.strip().endswith("#enddef"):
                        end_def_index = line.index("#enddef")
                        here.body.append(line[0:end_def_index])
                        here.hash.update(line.encode("utf8"))
                        here.hash = here.hash.digest()
                        if name in self.xref:
@ -705,15 +790,22 @@ class CrossRef:
                                            % (here, name, defn), file=sys.stderr)
                        if name not in self.xref:
                            self.xref[name] = []
                        here.lineno_end = n+1
                        self.xref[name].append(here)
                        state = States.OUTSIDE
                    elif state == States.MACRO_HEADER and line.strip():
                        if line.strip().startswith("#arg"):
                            state = States.MACRO_OPTIONAL_ARGUMENT
-                            here.optional_args.append(line.strip().split()[1])
+                            here._raw_optional_args.append([line.strip().split()[1],""])
                        elif line.strip()[0] != "#":
                            state = States.MACRO_BODY
-                    elif state == States.MACRO_OPTIONAL_ARGUMENT and "#endarg" in line:
+                    elif state == States.MACRO_OPTIONAL_ARGUMENT and not "#endarg" in line:
                        here._raw_optional_args[-1][1] += line
                    elif state == States.MACRO_OPTIONAL_ARGUMENT:
                        end_arg_index = line.index("#endarg")
                        here._raw_optional_args[-1][1] += line[0:end_arg_index]
                        here.optional_args = dict(here._raw_optional_args)
                        state = States.MACRO_HEADER
                        continue
                    if state == States.MACRO_HEADER:
@ -738,6 +830,8 @@ class CrossRef:
                                print("Deprecation line not matched found in {}, line {}".format(filename, n+1), file=sys.stderr)
                        else:
                            here.docstring += line.lstrip()[1:]
                    if state == States.MACRO_BODY:
                        here.body.append(line)
                    if state in (States.MACRO_HEADER, States.MACRO_OPTIONAL_ARGUMENT, States.MACRO_BODY):
                        here.hash.update(line.encode("utf8"))
                    elif line.strip().startswith("#undef"):
@ -781,10 +875,16 @@ class CrossRef:
        except UnicodeDecodeError as e:
            print('wmlscope: "{}" is not a valid UTF-8 file'.format(filename), file=sys.stderr)
-    def __init__(self, dirpath=[], exclude="", warnlevel=0, progress=False):
+    def __init__(self, dirpath=[], filelist=None, exclude="", warnlevel=0, progress=False):
        "Build cross-reference object from the specified filelist."
-        self.filelist = Forest(dirpath, exclude)
+        if filelist is None:
-        self.dirpath = [x for x in dirpath if not re.search(exclude, x)]
+            self.filelist = Forest(dirpath, exclude)
            self.dirpath = [x for x in dirpath if not re.search(exclude, x)]
        else:
            # All specified files share the same namespace
            self.filelist = [("src", filename) for filename in filelist]
            self.dirpath = ["src"]
        self.warnlevel = warnlevel
        self.xref = {}
        self.fileref = {}
@ -794,7 +894,7 @@ class CrossRef:
        all_in = []
        if self.warnlevel >=2 or progress:
            print("*** Beginning definition-gathering pass...")
-        for (namespace, filename) in self.filelist.generator():
+        for (namespace, filename) in self.filelist:
            all_in.append((namespace, filename))
            if self.warnlevel > 1:
                print(filename + ":")
@ -878,12 +978,10 @@ class CrossRef:
                                    # Count the number of actual arguments.
                                    # Set args to None if the call doesn't
                                    # close on this line
-                                    (args, optional_args, brackdepth, parendepth) = parse_macroref(match.start(0), line)
+                                    (args, optional_args, is_unfinished) = parse_macroref(match.start(0), line)
-                                    if brackdepth > 0 or parendepth > 0:
+                                    if is_unfinished:
                                        args = None
                                        optional_args = None
                                    else:
                                        args.pop(0)
                                    #if args:
                                    #    print('"%s", line %d: args of %s is %s' \
                                    #          % (fn, n+1, name, args))
@ -968,9 +1066,10 @@ class CrossRef:
                except UnicodeDecodeError as e:
                    pass # to not have the invalid UTF-8 file warning printed twice
        # Check whether each namespace has a defined export property
-        for namespace in self.dirpath:
+        if self.warnlevel >= 1:
-            if namespace not in self.properties or "export" not in self.properties[namespace]:
+            for namespace in self.dirpath:
-                print("warning: %s has no export property" % namespace)
+                if namespace not in self.properties or "export" not in self.properties[namespace]:
                    print("warning: %s has no export property" % namespace)
    def exports(self, namespace):
        return namespace in self.properties and self.properties[namespace].get("export") == "yes"
    def subtract(self, filelist):