Improvements to wmltools3 (#7561)

* wmltools3: Make the CrossRef class iterable

Removes generator() method.

* wmltools3: Check for warn level on lack of wmlscope: set export

* wmltools3: Support passing a list of file paths to initialize CrossRef

* wmltools3: Track lineno_end for Reference

* wmltools3: Track default values for macro optional parameters

optional_args is no longer a list, but a dict.

* wmltools3: Track macro bodies as Reference.body

* wmltools3: Register quoted macro arguments verbatim

* wmltools3: Improve parse_macroref

- Support parsing nested macro calls.
- Remove excessive need of parens around macro calls/arguments.
- Support escaping quotes.
- Other compliance improvements.
This commit is contained in:
Slayer95 2023-05-01 10:08:30 -05:00 committed by GitHub
parent 1819febe92
commit 2413e2f366
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -20,6 +20,13 @@ l10n_directories = ("l10n",)
resource_extensions = map_extensions + image_extensions + sound_extensions resource_extensions = map_extensions + image_extensions + sound_extensions
image_reference = r"[A-Za-z0-9{}.][A-Za-z0-9_/+{}.\-\[\]~\*,]*\.(png|jpe?g|webp)(?=(~.*)?)" image_reference = r"[A-Za-z0-9{}.][A-Za-z0-9_/+{}.\-\[\]~\*,]*\.(png|jpe?g|webp)(?=(~.*)?)"
EQUALS = '='
QUOTE = '"'
OPEN_BRACE = '{'
CLOSE_BRACE = '}'
OPEN_PARENS = '('
CLOSE_PARENS = ')'
class Substitution(object): class Substitution(object):
__slots__ = ["sub", "start", "end"] __slots__ = ["sub", "start", "end"]
def __init__(self, sub, start, end): def __init__(self, sub, start, end):
@ -251,7 +258,8 @@ class Forest:
for tree in self.forest: for tree in self.forest:
allfiles += tree allfiles += tree
return allfiles return allfiles
def generator(self):
def __iter__(self):
"Return a generator that walks through all files." "Return a generator that walks through all files."
for (directory, tree) in zip(self.dirpath, self.forest): for (directory, tree) in zip(self.dirpath, self.forest):
for filename in tree: for filename in tree:
@ -286,66 +294,138 @@ def isresource(filename):
return ext and ext[1:] in resource_extensions return ext and ext[1:] in resource_extensions
def parse_macroref(start, line): def parse_macroref(start, line):
def handle_argument(): def handle_argument(buffer):
nonlocal opt_arg
nonlocal arg
nonlocal optional_args
nonlocal args nonlocal args
nonlocal optional_args
opt_arg = ""
arg = "".join(buffer)
# arg may be empty, so arg[0] may be OOB.
if arg[0:1].isspace():
arg = arg[1:]
arg = arg.strip()
# is this an optional argument? # is this an optional argument?
# argument names are usually made of uppercase letters, numbers and underscores # argument names are usually made of uppercase letters, numbers and underscores
# if they're optional, they're followed by an equal sign # if they're optional, they're followed by an equal sign
# stop matching on the first one, because the argument value might contain one too # stop matching on the first one, because the argument value might contain one too
if re.match(r"^([A-Z0-9_]+?)=", arg): if re.match(r"^([A-Z0-9_]+?)=", arg):
opt_arg, arg = arg.split("=", 1) opt_arg, arg = arg.split("=", 1)
if arg.startswith('"') and arg.endswith('"'):
arg = arg[1:-1].strip()
if opt_arg: if opt_arg:
optional_args[opt_arg] = arg optional_args[opt_arg] = arg
opt_arg = "" opt_arg = ""
else: else:
args.append(arg) args.append(arg)
arg = "" buffer.clear()
return True
brackdepth = parendepth = 0 buffer = []
instring = False
optional_args = {}
args = [] args = []
opt_arg = "" optional_args = {}
arg = ""
depth = {
EQUALS: 0,
OPEN_BRACE: 0,
OPEN_PARENS: 0,
QUOTE: 0,
}
wrapper_stack = []
prev_added_arg = False
# close_token - Closes all active scopes, until the matching scope is found.
# This is useful, for example, in {MACRO OPT_NAME=VAL}
# In this example, close_token("}") will implicitly close the
# optional argument scope.
def close_token(token):
while len(wrapper_stack) > 0:
last_token = wrapper_stack.pop()
depth[last_token] -= 1
if last_token == token:
break
else:
return False
return True
# close_if_token - Closes the current scope, if it matches the given token.
# Atomic version of close_token, which provides expressivity.
def close_if_token(token):
if wrapper_stack[-1] == token:
wrapper_stack.pop()
depth[token] -= 1
return True
return False
def open_token(token):
wrapper_stack.append(token)
depth[token] += 1
for i in range(start, len(line)): for i in range(start, len(line)):
if instring: added_arg = False
if line[i] == '"': if depth[QUOTE] > 0:
instring = False # If EOL, line[i+1] may be OOB, but slice is valid.
arg += line[i] if line[i] == QUOTE and line[i+1:i+2] != QUOTE:
elif line[i] == '"': close_token(QUOTE)
instring = not instring buffer.append(line[i])
arg += line[i] elif line[i] == QUOTE:
elif line[i] == "{": open_token(QUOTE)
if brackdepth > 0: buffer.append(line[i])
arg += line[i] elif line[i] == OPEN_BRACE:
brackdepth += 1 open_token(OPEN_BRACE)
elif line[i] == "}": buffer.append(line[i])
brackdepth -= 1 elif line[i] == CLOSE_BRACE:
if brackdepth == 0: if wrapper_stack[-1] != OPEN_PARENS:
if not line[i-1].isspace(): close_token(OPEN_BRACE)
handle_argument() if depth[OPEN_BRACE] == 0:
# Flush at end
if not prev_added_arg and len(buffer) > 0:
added_arg = handle_argument(buffer)
break break
else: else:
arg += line[i] buffer.append(line[i])
elif line[i] == "(": elif line[i] == OPEN_PARENS:
parendepth += 1 if wrapper_stack[-1] == OPEN_PARENS or wrapper_stack[-2:] == [OPEN_PARENS, EQUALS]:
elif line[i] == ")": # Char in an argument
parendepth -= 1 buffer.append(line[i])
elif not line[i-1].isspace() and \ else:
line[i].isspace() and \ if wrapper_stack[-1] == EQUALS or (wrapper_stack[-1] == OPEN_BRACE and \
brackdepth == 1 and \ not line[i-1].isspace()):
parendepth == 0: close_if_token(EQUALS)
handle_argument() if depth[OPEN_BRACE] == 1 and not prev_added_arg:
elif not line[i].isspace() or parendepth > 0: added_arg = handle_argument(buffer)
arg += line[i] open_token(OPEN_PARENS)
return (args, optional_args, brackdepth, parendepth) if depth[OPEN_BRACE] != 1:
buffer.append(line[i])
elif line[i] == CLOSE_PARENS:
# Source has too many closing parens.
quit = not close_token(OPEN_PARENS)
if depth[OPEN_BRACE] != 1:
buffer.append(line[i])
elif not prev_added_arg or line[i-1] == OPEN_PARENS:
# {MACRO arg1()} has two arguments
added_arg = handle_argument(buffer)
if quit:
added_arg = handle_argument(buffer)
break
elif line[i] == EQUALS and re.match(r'^([A-Z0-9_]+?)', line[i-1]):
open_token(EQUALS)
buffer.append(line[i])
elif line[i].isspace():
if line[i-1].isspace():
# Ignore consecutive spaces
continue
if not prev_added_arg and \
depth[OPEN_BRACE] == 1 and \
depth[OPEN_PARENS] == 0:
close_if_token(EQUALS)
added_arg = handle_argument(buffer)
buffer.append(line[i])
else:
buffer.append(line[i])
prev_added_arg = added_arg
args.pop(0)
return (args, optional_args, depth[OPEN_BRACE] > 0)
def formaltype(f): def formaltype(f):
# Deduce the expected type of the formal # Deduce the expected type of the formal
@ -488,14 +568,17 @@ def argmatch(formals, optional_formals, actuals, optional_actuals):
@total_ordering @total_ordering
class Reference: class Reference:
"Describes a location by file and line." "Describes a location by file and line."
def __init__(self, namespace, filename, lineno=None, docstring=None, args=None, def __init__(self, namespace, filename, lineno=None, lineno_end=None, docstring=None, args=None,
optional_args=None, deprecated=False, deprecation_level=0, removal_version=None): optional_args=None, deprecated=False, deprecation_level=0, removal_version=None):
self.namespace = namespace self.namespace = namespace
self.filename = filename self.filename = filename
self.lineno = lineno self.lineno = lineno
self.lineno_end = lineno_end
self.docstring = docstring self.docstring = docstring
self.args = args self.args = args
self.optional_args = optional_args self._raw_optional_args = optional_args
self.optional_args = {}
self.body = []
self.deprecated = deprecated self.deprecated = deprecated
self.deprecation_level = deprecation_level self.deprecation_level = deprecation_level
self.removal_version = removal_version self.removal_version = removal_version
@ -523,7 +606,7 @@ class Reference:
return self.filename > other.filename return self.filename > other.filename
def mismatches(self): def mismatches(self):
copy = Reference(self.namespace, self.filename, self.lineno, self.docstring, self.args, self.optional_args) copy = Reference(self.namespace, self.filename, self.lineno, self.lineno_end, self.docstring, self.args, self._raw_optional_args)
copy.undef = self.undef copy.undef = self.undef
for filename in self.references: for filename in self.references:
mis = [(ln,a,oa) for (ln,a,oa) in self.references[filename] if a is not None and not argmatch(self.args, self.optional_args, a, oa)] mis = [(ln,a,oa) for (ln,a,oa) in self.references[filename] if a is not None and not argmatch(self.args, self.optional_args, a, oa)]
@ -575,7 +658,7 @@ class CrossRef:
if self.exports(defn.namespace): if self.exports(defn.namespace):
# Macros and resources in subtrees with export=yes are global # Macros and resources in subtrees with export=yes are global
return True return True
elif not self.filelist.neighbors(defn.filename, fn): elif defn.filename != fn and not self.filelist.neighbors(defn.filename, fn):
# Otherwise, must be in the same subtree. # Otherwise, must be in the same subtree.
return False return False
else: else:
@ -655,7 +738,7 @@ class CrossRef:
% (filename, n+1), file=sys.stderr) % (filename, n+1), file=sys.stderr)
else: else:
name = tokens[1] name = tokens[1]
here = Reference(namespace, filename, n+1, line, args=tokens[2:], optional_args=[]) here = Reference(namespace, filename, n+1, line, None, args=tokens[2:], optional_args=[])
here.hash = hashlib.md5() here.hash = hashlib.md5()
here.docstring = line.lstrip()[8:] # Strip off #define_ here.docstring = line.lstrip()[8:] # Strip off #define_
current_docstring = None current_docstring = None
@ -689,6 +772,8 @@ class CrossRef:
current_docstring = None current_docstring = None
state = States.OUTSIDE state = States.OUTSIDE
elif state != States.OUTSIDE and line.strip().endswith("#enddef"): elif state != States.OUTSIDE and line.strip().endswith("#enddef"):
end_def_index = line.index("#enddef")
here.body.append(line[0:end_def_index])
here.hash.update(line.encode("utf8")) here.hash.update(line.encode("utf8"))
here.hash = here.hash.digest() here.hash = here.hash.digest()
if name in self.xref: if name in self.xref:
@ -705,15 +790,22 @@ class CrossRef:
% (here, name, defn), file=sys.stderr) % (here, name, defn), file=sys.stderr)
if name not in self.xref: if name not in self.xref:
self.xref[name] = [] self.xref[name] = []
here.lineno_end = n+1
self.xref[name].append(here) self.xref[name].append(here)
state = States.OUTSIDE state = States.OUTSIDE
elif state == States.MACRO_HEADER and line.strip(): elif state == States.MACRO_HEADER and line.strip():
if line.strip().startswith("#arg"): if line.strip().startswith("#arg"):
state = States.MACRO_OPTIONAL_ARGUMENT state = States.MACRO_OPTIONAL_ARGUMENT
here.optional_args.append(line.strip().split()[1]) here._raw_optional_args.append([line.strip().split()[1],""])
elif line.strip()[0] != "#": elif line.strip()[0] != "#":
state = States.MACRO_BODY state = States.MACRO_BODY
elif state == States.MACRO_OPTIONAL_ARGUMENT and "#endarg" in line: elif state == States.MACRO_OPTIONAL_ARGUMENT and not "#endarg" in line:
here._raw_optional_args[-1][1] += line
elif state == States.MACRO_OPTIONAL_ARGUMENT:
end_arg_index = line.index("#endarg")
here._raw_optional_args[-1][1] += line[0:end_arg_index]
here.optional_args = dict(here._raw_optional_args)
state = States.MACRO_HEADER state = States.MACRO_HEADER
continue continue
if state == States.MACRO_HEADER: if state == States.MACRO_HEADER:
@ -738,6 +830,8 @@ class CrossRef:
print("Deprecation line not matched found in {}, line {}".format(filename, n+1), file=sys.stderr) print("Deprecation line not matched found in {}, line {}".format(filename, n+1), file=sys.stderr)
else: else:
here.docstring += line.lstrip()[1:] here.docstring += line.lstrip()[1:]
if state == States.MACRO_BODY:
here.body.append(line)
if state in (States.MACRO_HEADER, States.MACRO_OPTIONAL_ARGUMENT, States.MACRO_BODY): if state in (States.MACRO_HEADER, States.MACRO_OPTIONAL_ARGUMENT, States.MACRO_BODY):
here.hash.update(line.encode("utf8")) here.hash.update(line.encode("utf8"))
elif line.strip().startswith("#undef"): elif line.strip().startswith("#undef"):
@ -781,10 +875,16 @@ class CrossRef:
except UnicodeDecodeError as e: except UnicodeDecodeError as e:
print('wmlscope: "{}" is not a valid UTF-8 file'.format(filename), file=sys.stderr) print('wmlscope: "{}" is not a valid UTF-8 file'.format(filename), file=sys.stderr)
def __init__(self, dirpath=[], exclude="", warnlevel=0, progress=False): def __init__(self, dirpath=[], filelist=None, exclude="", warnlevel=0, progress=False):
"Build cross-reference object from the specified filelist." "Build cross-reference object from the specified filelist."
self.filelist = Forest(dirpath, exclude) if filelist is None:
self.dirpath = [x for x in dirpath if not re.search(exclude, x)] self.filelist = Forest(dirpath, exclude)
self.dirpath = [x for x in dirpath if not re.search(exclude, x)]
else:
# All specified files share the same namespace
self.filelist = [("src", filename) for filename in filelist]
self.dirpath = ["src"]
self.warnlevel = warnlevel self.warnlevel = warnlevel
self.xref = {} self.xref = {}
self.fileref = {} self.fileref = {}
@ -794,7 +894,7 @@ class CrossRef:
all_in = [] all_in = []
if self.warnlevel >=2 or progress: if self.warnlevel >=2 or progress:
print("*** Beginning definition-gathering pass...") print("*** Beginning definition-gathering pass...")
for (namespace, filename) in self.filelist.generator(): for (namespace, filename) in self.filelist:
all_in.append((namespace, filename)) all_in.append((namespace, filename))
if self.warnlevel > 1: if self.warnlevel > 1:
print(filename + ":") print(filename + ":")
@ -878,12 +978,10 @@ class CrossRef:
# Count the number of actual arguments. # Count the number of actual arguments.
# Set args to None if the call doesn't # Set args to None if the call doesn't
# close on this line # close on this line
(args, optional_args, brackdepth, parendepth) = parse_macroref(match.start(0), line) (args, optional_args, is_unfinished) = parse_macroref(match.start(0), line)
if brackdepth > 0 or parendepth > 0: if is_unfinished:
args = None args = None
optional_args = None optional_args = None
else:
args.pop(0)
#if args: #if args:
# print('"%s", line %d: args of %s is %s' \ # print('"%s", line %d: args of %s is %s' \
# % (fn, n+1, name, args)) # % (fn, n+1, name, args))
@ -968,9 +1066,10 @@ class CrossRef:
except UnicodeDecodeError as e: except UnicodeDecodeError as e:
pass # to not have the invalid UTF-8 file warning printed twice pass # to not have the invalid UTF-8 file warning printed twice
# Check whether each namespace has a defined export property # Check whether each namespace has a defined export property
for namespace in self.dirpath: if self.warnlevel >= 1:
if namespace not in self.properties or "export" not in self.properties[namespace]: for namespace in self.dirpath:
print("warning: %s has no export property" % namespace) if namespace not in self.properties or "export" not in self.properties[namespace]:
print("warning: %s has no export property" % namespace)
def exports(self, namespace): def exports(self, namespace):
return namespace in self.properties and self.properties[namespace].get("export") == "yes" return namespace in self.properties and self.properties[namespace].get("export") == "yes"
def subtract(self, filelist): def subtract(self, filelist):