Improvements to wmltools3 (#7561)

* wmltools3: Make the CrossRef class iterable

Removes generator() method.

* wmltools3: Check for warn level on lack of wmlscope: set export

* wmltools3: Support passing a list of file paths to initialize CrossRef

* wmltools3: Track lineno_end for Reference

* wmltools3: Track default values for macro optional parameters

optional_args is no longer a list, but a dict.

* wmltools3: Track macro bodies as Reference.body

* wmltools3: Register quoted macro arguments verbatim

* wmltools3: Improve parse_macroref

- Support parsing nested macro calls.
- Remove excessive need of parens around macro calls/arguments.
- Support escaping quotes.
- Other compliance improvements.
This commit is contained in:
Slayer95 2023-05-01 10:08:30 -05:00 committed by GitHub
parent 1819febe92
commit 2413e2f366
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -20,6 +20,13 @@ l10n_directories = ("l10n",)
resource_extensions = map_extensions + image_extensions + sound_extensions
image_reference = r"[A-Za-z0-9{}.][A-Za-z0-9_/+{}.\-\[\]~\*,]*\.(png|jpe?g|webp)(?=(~.*)?)"
EQUALS = '='
QUOTE = '"'
OPEN_BRACE = '{'
CLOSE_BRACE = '}'
OPEN_PARENS = '('
CLOSE_PARENS = ')'
class Substitution(object):
__slots__ = ["sub", "start", "end"]
def __init__(self, sub, start, end):
@ -251,7 +258,8 @@ class Forest:
for tree in self.forest:
allfiles += tree
return allfiles
def generator(self):
def __iter__(self):
"Return a generator that walks through all files."
for (directory, tree) in zip(self.dirpath, self.forest):
for filename in tree:
@ -286,66 +294,138 @@ def isresource(filename):
return ext and ext[1:] in resource_extensions
def parse_macroref(start, line):
def handle_argument():
nonlocal opt_arg
nonlocal arg
nonlocal optional_args
def handle_argument(buffer):
nonlocal args
nonlocal optional_args
opt_arg = ""
arg = "".join(buffer)
# arg may be empty, so arg[0] may be OOB.
if arg[0:1].isspace():
arg = arg[1:]
arg = arg.strip()
# is this an optional argument?
# argument names are usually made of uppercase letters, numbers and underscores
# if they're optional, they're followed by an equal sign
# stop matching on the first one, because the argument value might contain one too
if re.match(r"^([A-Z0-9_]+?)=", arg):
opt_arg, arg = arg.split("=", 1)
if arg.startswith('"') and arg.endswith('"'):
arg = arg[1:-1].strip()
if opt_arg:
optional_args[opt_arg] = arg
opt_arg = ""
else:
args.append(arg)
arg = ""
buffer.clear()
return True
brackdepth = parendepth = 0
instring = False
optional_args = {}
buffer = []
args = []
opt_arg = ""
arg = ""
optional_args = {}
depth = {
EQUALS: 0,
OPEN_BRACE: 0,
OPEN_PARENS: 0,
QUOTE: 0,
}
wrapper_stack = []
prev_added_arg = False
# close_token - Closes all active scopes, until the matching scope is found.
# This is useful, for example, in {MACRO OPT_NAME=VAL}
# In this example, close_token("}") will implicitly close the
# optional argument scope.
def close_token(token):
while len(wrapper_stack) > 0:
last_token = wrapper_stack.pop()
depth[last_token] -= 1
if last_token == token:
break
else:
return False
return True
# close_if_token - Closes the current scope, if it matches the given token.
# Atomic version of close_token, which provides expressivity.
def close_if_token(token):
if wrapper_stack[-1] == token:
wrapper_stack.pop()
depth[token] -= 1
return True
return False
def open_token(token):
wrapper_stack.append(token)
depth[token] += 1
for i in range(start, len(line)):
if instring:
if line[i] == '"':
instring = False
arg += line[i]
elif line[i] == '"':
instring = not instring
arg += line[i]
elif line[i] == "{":
if brackdepth > 0:
arg += line[i]
brackdepth += 1
elif line[i] == "}":
brackdepth -= 1
if brackdepth == 0:
if not line[i-1].isspace():
handle_argument()
added_arg = False
if depth[QUOTE] > 0:
# If EOL, line[i+1] may be OOB, but slice is valid.
if line[i] == QUOTE and line[i+1:i+2] != QUOTE:
close_token(QUOTE)
buffer.append(line[i])
elif line[i] == QUOTE:
open_token(QUOTE)
buffer.append(line[i])
elif line[i] == OPEN_BRACE:
open_token(OPEN_BRACE)
buffer.append(line[i])
elif line[i] == CLOSE_BRACE:
if wrapper_stack[-1] != OPEN_PARENS:
close_token(OPEN_BRACE)
if depth[OPEN_BRACE] == 0:
# Flush at end
if not prev_added_arg and len(buffer) > 0:
added_arg = handle_argument(buffer)
break
else:
arg += line[i]
elif line[i] == "(":
parendepth += 1
elif line[i] == ")":
parendepth -= 1
elif not line[i-1].isspace() and \
line[i].isspace() and \
brackdepth == 1 and \
parendepth == 0:
handle_argument()
elif not line[i].isspace() or parendepth > 0:
arg += line[i]
return (args, optional_args, brackdepth, parendepth)
buffer.append(line[i])
elif line[i] == OPEN_PARENS:
if wrapper_stack[-1] == OPEN_PARENS or wrapper_stack[-2:] == [OPEN_PARENS, EQUALS]:
# Char in an argument
buffer.append(line[i])
else:
if wrapper_stack[-1] == EQUALS or (wrapper_stack[-1] == OPEN_BRACE and \
not line[i-1].isspace()):
close_if_token(EQUALS)
if depth[OPEN_BRACE] == 1 and not prev_added_arg:
added_arg = handle_argument(buffer)
open_token(OPEN_PARENS)
if depth[OPEN_BRACE] != 1:
buffer.append(line[i])
elif line[i] == CLOSE_PARENS:
# Source has too many closing parens.
quit = not close_token(OPEN_PARENS)
if depth[OPEN_BRACE] != 1:
buffer.append(line[i])
elif not prev_added_arg or line[i-1] == OPEN_PARENS:
# {MACRO arg1()} has two arguments
added_arg = handle_argument(buffer)
if quit:
added_arg = handle_argument(buffer)
break
elif line[i] == EQUALS and re.match(r'^([A-Z0-9_]+?)', line[i-1]):
open_token(EQUALS)
buffer.append(line[i])
elif line[i].isspace():
if line[i-1].isspace():
# Ignore consecutive spaces
continue
if not prev_added_arg and \
depth[OPEN_BRACE] == 1 and \
depth[OPEN_PARENS] == 0:
close_if_token(EQUALS)
added_arg = handle_argument(buffer)
buffer.append(line[i])
else:
buffer.append(line[i])
prev_added_arg = added_arg
args.pop(0)
return (args, optional_args, depth[OPEN_BRACE] > 0)
def formaltype(f):
# Deduce the expected type of the formal
@ -488,14 +568,17 @@ def argmatch(formals, optional_formals, actuals, optional_actuals):
@total_ordering
class Reference:
"Describes a location by file and line."
def __init__(self, namespace, filename, lineno=None, docstring=None, args=None,
def __init__(self, namespace, filename, lineno=None, lineno_end=None, docstring=None, args=None,
optional_args=None, deprecated=False, deprecation_level=0, removal_version=None):
self.namespace = namespace
self.filename = filename
self.lineno = lineno
self.lineno_end = lineno_end
self.docstring = docstring
self.args = args
self.optional_args = optional_args
self._raw_optional_args = optional_args
self.optional_args = {}
self.body = []
self.deprecated = deprecated
self.deprecation_level = deprecation_level
self.removal_version = removal_version
@ -523,7 +606,7 @@ class Reference:
return self.filename > other.filename
def mismatches(self):
copy = Reference(self.namespace, self.filename, self.lineno, self.docstring, self.args, self.optional_args)
copy = Reference(self.namespace, self.filename, self.lineno, self.lineno_end, self.docstring, self.args, self._raw_optional_args)
copy.undef = self.undef
for filename in self.references:
mis = [(ln,a,oa) for (ln,a,oa) in self.references[filename] if a is not None and not argmatch(self.args, self.optional_args, a, oa)]
@ -575,7 +658,7 @@ class CrossRef:
if self.exports(defn.namespace):
# Macros and resources in subtrees with export=yes are global
return True
elif not self.filelist.neighbors(defn.filename, fn):
elif defn.filename != fn and not self.filelist.neighbors(defn.filename, fn):
# Otherwise, must be in the same subtree.
return False
else:
@ -655,7 +738,7 @@ class CrossRef:
% (filename, n+1), file=sys.stderr)
else:
name = tokens[1]
here = Reference(namespace, filename, n+1, line, args=tokens[2:], optional_args=[])
here = Reference(namespace, filename, n+1, line, None, args=tokens[2:], optional_args=[])
here.hash = hashlib.md5()
here.docstring = line.lstrip()[8:] # Strip off #define_
current_docstring = None
@ -689,6 +772,8 @@ class CrossRef:
current_docstring = None
state = States.OUTSIDE
elif state != States.OUTSIDE and line.strip().endswith("#enddef"):
end_def_index = line.index("#enddef")
here.body.append(line[0:end_def_index])
here.hash.update(line.encode("utf8"))
here.hash = here.hash.digest()
if name in self.xref:
@ -705,15 +790,22 @@ class CrossRef:
% (here, name, defn), file=sys.stderr)
if name not in self.xref:
self.xref[name] = []
here.lineno_end = n+1
self.xref[name].append(here)
state = States.OUTSIDE
elif state == States.MACRO_HEADER and line.strip():
if line.strip().startswith("#arg"):
state = States.MACRO_OPTIONAL_ARGUMENT
here.optional_args.append(line.strip().split()[1])
here._raw_optional_args.append([line.strip().split()[1],""])
elif line.strip()[0] != "#":
state = States.MACRO_BODY
elif state == States.MACRO_OPTIONAL_ARGUMENT and "#endarg" in line:
elif state == States.MACRO_OPTIONAL_ARGUMENT and not "#endarg" in line:
here._raw_optional_args[-1][1] += line
elif state == States.MACRO_OPTIONAL_ARGUMENT:
end_arg_index = line.index("#endarg")
here._raw_optional_args[-1][1] += line[0:end_arg_index]
here.optional_args = dict(here._raw_optional_args)
state = States.MACRO_HEADER
continue
if state == States.MACRO_HEADER:
@ -738,6 +830,8 @@ class CrossRef:
print("Deprecation line not matched found in {}, line {}".format(filename, n+1), file=sys.stderr)
else:
here.docstring += line.lstrip()[1:]
if state == States.MACRO_BODY:
here.body.append(line)
if state in (States.MACRO_HEADER, States.MACRO_OPTIONAL_ARGUMENT, States.MACRO_BODY):
here.hash.update(line.encode("utf8"))
elif line.strip().startswith("#undef"):
@ -781,10 +875,16 @@ class CrossRef:
except UnicodeDecodeError as e:
print('wmlscope: "{}" is not a valid UTF-8 file'.format(filename), file=sys.stderr)
def __init__(self, dirpath=[], exclude="", warnlevel=0, progress=False):
def __init__(self, dirpath=[], filelist=None, exclude="", warnlevel=0, progress=False):
"Build cross-reference object from the specified filelist."
self.filelist = Forest(dirpath, exclude)
self.dirpath = [x for x in dirpath if not re.search(exclude, x)]
if filelist is None:
self.filelist = Forest(dirpath, exclude)
self.dirpath = [x for x in dirpath if not re.search(exclude, x)]
else:
# All specified files share the same namespace
self.filelist = [("src", filename) for filename in filelist]
self.dirpath = ["src"]
self.warnlevel = warnlevel
self.xref = {}
self.fileref = {}
@ -794,7 +894,7 @@ class CrossRef:
all_in = []
if self.warnlevel >=2 or progress:
print("*** Beginning definition-gathering pass...")
for (namespace, filename) in self.filelist.generator():
for (namespace, filename) in self.filelist:
all_in.append((namespace, filename))
if self.warnlevel > 1:
print(filename + ":")
@ -878,12 +978,10 @@ class CrossRef:
# Count the number of actual arguments.
# Set args to None if the call doesn't
# close on this line
(args, optional_args, brackdepth, parendepth) = parse_macroref(match.start(0), line)
if brackdepth > 0 or parendepth > 0:
(args, optional_args, is_unfinished) = parse_macroref(match.start(0), line)
if is_unfinished:
args = None
optional_args = None
else:
args.pop(0)
#if args:
# print('"%s", line %d: args of %s is %s' \
# % (fn, n+1, name, args))
@ -968,9 +1066,10 @@ class CrossRef:
except UnicodeDecodeError as e:
pass # to not have the invalid UTF-8 file warning printed twice
# Check whether each namespace has a defined export property
for namespace in self.dirpath:
if namespace not in self.properties or "export" not in self.properties[namespace]:
print("warning: %s has no export property" % namespace)
if self.warnlevel >= 1:
for namespace in self.dirpath:
if namespace not in self.properties or "export" not in self.properties[namespace]:
print("warning: %s has no export property" % namespace)
def exports(self, namespace):
return namespace in self.properties and self.properties[namespace].get("export") == "yes"
def subtract(self, filelist):