Improvements to wmltools3 (#7561)
* wmltools3: Make the CrossRef class iterable Removes generator() method. * wmltools3: Check for warn level on lack of wmlscope: set export * wmltools3: Support passing a list of file paths to initialize CrossRef * wmltools3: Track lineno_end for Reference * wmltools3: Track default values for macro optional parameters optional_args is no longer a list, but a dict. * wmltools3: Track macro bodies as Reference.body * wmltools3: Register quoted macro arguments verbatim * wmltools3: Improve parse_macroref - Support parsing nested macro calls. - Remove excessive need of parens around macro calls/arguments. - Support escaping quotes. - Other compliance improvements.
This commit is contained in:
parent
1819febe92
commit
2413e2f366
1 changed files with 160 additions and 61 deletions
|
@ -20,6 +20,13 @@ l10n_directories = ("l10n",)
|
||||||
resource_extensions = map_extensions + image_extensions + sound_extensions
|
resource_extensions = map_extensions + image_extensions + sound_extensions
|
||||||
image_reference = r"[A-Za-z0-9{}.][A-Za-z0-9_/+{}.\-\[\]~\*,]*\.(png|jpe?g|webp)(?=(~.*)?)"
|
image_reference = r"[A-Za-z0-9{}.][A-Za-z0-9_/+{}.\-\[\]~\*,]*\.(png|jpe?g|webp)(?=(~.*)?)"
|
||||||
|
|
||||||
|
EQUALS = '='
|
||||||
|
QUOTE = '"'
|
||||||
|
OPEN_BRACE = '{'
|
||||||
|
CLOSE_BRACE = '}'
|
||||||
|
OPEN_PARENS = '('
|
||||||
|
CLOSE_PARENS = ')'
|
||||||
|
|
||||||
class Substitution(object):
|
class Substitution(object):
|
||||||
__slots__ = ["sub", "start", "end"]
|
__slots__ = ["sub", "start", "end"]
|
||||||
def __init__(self, sub, start, end):
|
def __init__(self, sub, start, end):
|
||||||
|
@ -251,7 +258,8 @@ class Forest:
|
||||||
for tree in self.forest:
|
for tree in self.forest:
|
||||||
allfiles += tree
|
allfiles += tree
|
||||||
return allfiles
|
return allfiles
|
||||||
def generator(self):
|
|
||||||
|
def __iter__(self):
|
||||||
"Return a generator that walks through all files."
|
"Return a generator that walks through all files."
|
||||||
for (directory, tree) in zip(self.dirpath, self.forest):
|
for (directory, tree) in zip(self.dirpath, self.forest):
|
||||||
for filename in tree:
|
for filename in tree:
|
||||||
|
@ -286,66 +294,138 @@ def isresource(filename):
|
||||||
return ext and ext[1:] in resource_extensions
|
return ext and ext[1:] in resource_extensions
|
||||||
|
|
||||||
def parse_macroref(start, line):
|
def parse_macroref(start, line):
|
||||||
def handle_argument():
|
def handle_argument(buffer):
|
||||||
nonlocal opt_arg
|
|
||||||
nonlocal arg
|
|
||||||
nonlocal optional_args
|
|
||||||
nonlocal args
|
nonlocal args
|
||||||
|
nonlocal optional_args
|
||||||
|
|
||||||
|
opt_arg = ""
|
||||||
|
|
||||||
|
arg = "".join(buffer)
|
||||||
|
# arg may be empty, so arg[0] may be OOB.
|
||||||
|
if arg[0:1].isspace():
|
||||||
|
arg = arg[1:]
|
||||||
|
|
||||||
arg = arg.strip()
|
|
||||||
# is this an optional argument?
|
# is this an optional argument?
|
||||||
# argument names are usually made of uppercase letters, numbers and underscores
|
# argument names are usually made of uppercase letters, numbers and underscores
|
||||||
# if they're optional, they're followed by an equal sign
|
# if they're optional, they're followed by an equal sign
|
||||||
# stop matching on the first one, because the argument value might contain one too
|
# stop matching on the first one, because the argument value might contain one too
|
||||||
if re.match(r"^([A-Z0-9_]+?)=", arg):
|
if re.match(r"^([A-Z0-9_]+?)=", arg):
|
||||||
opt_arg, arg = arg.split("=", 1)
|
opt_arg, arg = arg.split("=", 1)
|
||||||
if arg.startswith('"') and arg.endswith('"'):
|
|
||||||
arg = arg[1:-1].strip()
|
|
||||||
if opt_arg:
|
if opt_arg:
|
||||||
optional_args[opt_arg] = arg
|
optional_args[opt_arg] = arg
|
||||||
opt_arg = ""
|
opt_arg = ""
|
||||||
else:
|
else:
|
||||||
args.append(arg)
|
args.append(arg)
|
||||||
arg = ""
|
buffer.clear()
|
||||||
|
return True
|
||||||
|
|
||||||
brackdepth = parendepth = 0
|
buffer = []
|
||||||
instring = False
|
|
||||||
optional_args = {}
|
|
||||||
args = []
|
args = []
|
||||||
opt_arg = ""
|
optional_args = {}
|
||||||
arg = ""
|
|
||||||
|
depth = {
|
||||||
|
EQUALS: 0,
|
||||||
|
OPEN_BRACE: 0,
|
||||||
|
OPEN_PARENS: 0,
|
||||||
|
QUOTE: 0,
|
||||||
|
}
|
||||||
|
wrapper_stack = []
|
||||||
|
prev_added_arg = False
|
||||||
|
|
||||||
|
# close_token - Closes all active scopes, until the matching scope is found.
|
||||||
|
# This is useful, for example, in {MACRO OPT_NAME=VAL}
|
||||||
|
# In this example, close_token("}") will implicitly close the
|
||||||
|
# optional argument scope.
|
||||||
|
def close_token(token):
|
||||||
|
while len(wrapper_stack) > 0:
|
||||||
|
last_token = wrapper_stack.pop()
|
||||||
|
depth[last_token] -= 1
|
||||||
|
if last_token == token:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
# close_if_token - Closes the current scope, if it matches the given token.
|
||||||
|
# Atomic version of close_token, which provides expressivity.
|
||||||
|
def close_if_token(token):
|
||||||
|
if wrapper_stack[-1] == token:
|
||||||
|
wrapper_stack.pop()
|
||||||
|
depth[token] -= 1
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def open_token(token):
|
||||||
|
wrapper_stack.append(token)
|
||||||
|
depth[token] += 1
|
||||||
|
|
||||||
for i in range(start, len(line)):
|
for i in range(start, len(line)):
|
||||||
if instring:
|
added_arg = False
|
||||||
if line[i] == '"':
|
if depth[QUOTE] > 0:
|
||||||
instring = False
|
# If EOL, line[i+1] may be OOB, but slice is valid.
|
||||||
arg += line[i]
|
if line[i] == QUOTE and line[i+1:i+2] != QUOTE:
|
||||||
elif line[i] == '"':
|
close_token(QUOTE)
|
||||||
instring = not instring
|
buffer.append(line[i])
|
||||||
arg += line[i]
|
elif line[i] == QUOTE:
|
||||||
elif line[i] == "{":
|
open_token(QUOTE)
|
||||||
if brackdepth > 0:
|
buffer.append(line[i])
|
||||||
arg += line[i]
|
elif line[i] == OPEN_BRACE:
|
||||||
brackdepth += 1
|
open_token(OPEN_BRACE)
|
||||||
elif line[i] == "}":
|
buffer.append(line[i])
|
||||||
brackdepth -= 1
|
elif line[i] == CLOSE_BRACE:
|
||||||
if brackdepth == 0:
|
if wrapper_stack[-1] != OPEN_PARENS:
|
||||||
if not line[i-1].isspace():
|
close_token(OPEN_BRACE)
|
||||||
handle_argument()
|
if depth[OPEN_BRACE] == 0:
|
||||||
|
# Flush at end
|
||||||
|
if not prev_added_arg and len(buffer) > 0:
|
||||||
|
added_arg = handle_argument(buffer)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
arg += line[i]
|
buffer.append(line[i])
|
||||||
elif line[i] == "(":
|
elif line[i] == OPEN_PARENS:
|
||||||
parendepth += 1
|
if wrapper_stack[-1] == OPEN_PARENS or wrapper_stack[-2:] == [OPEN_PARENS, EQUALS]:
|
||||||
elif line[i] == ")":
|
# Char in an argument
|
||||||
parendepth -= 1
|
buffer.append(line[i])
|
||||||
elif not line[i-1].isspace() and \
|
else:
|
||||||
line[i].isspace() and \
|
if wrapper_stack[-1] == EQUALS or (wrapper_stack[-1] == OPEN_BRACE and \
|
||||||
brackdepth == 1 and \
|
not line[i-1].isspace()):
|
||||||
parendepth == 0:
|
close_if_token(EQUALS)
|
||||||
handle_argument()
|
if depth[OPEN_BRACE] == 1 and not prev_added_arg:
|
||||||
elif not line[i].isspace() or parendepth > 0:
|
added_arg = handle_argument(buffer)
|
||||||
arg += line[i]
|
open_token(OPEN_PARENS)
|
||||||
return (args, optional_args, brackdepth, parendepth)
|
if depth[OPEN_BRACE] != 1:
|
||||||
|
buffer.append(line[i])
|
||||||
|
elif line[i] == CLOSE_PARENS:
|
||||||
|
# Source has too many closing parens.
|
||||||
|
quit = not close_token(OPEN_PARENS)
|
||||||
|
if depth[OPEN_BRACE] != 1:
|
||||||
|
buffer.append(line[i])
|
||||||
|
elif not prev_added_arg or line[i-1] == OPEN_PARENS:
|
||||||
|
# {MACRO arg1()} has two arguments
|
||||||
|
added_arg = handle_argument(buffer)
|
||||||
|
if quit:
|
||||||
|
added_arg = handle_argument(buffer)
|
||||||
|
break
|
||||||
|
elif line[i] == EQUALS and re.match(r'^([A-Z0-9_]+?)', line[i-1]):
|
||||||
|
open_token(EQUALS)
|
||||||
|
buffer.append(line[i])
|
||||||
|
elif line[i].isspace():
|
||||||
|
if line[i-1].isspace():
|
||||||
|
# Ignore consecutive spaces
|
||||||
|
continue
|
||||||
|
if not prev_added_arg and \
|
||||||
|
depth[OPEN_BRACE] == 1 and \
|
||||||
|
depth[OPEN_PARENS] == 0:
|
||||||
|
close_if_token(EQUALS)
|
||||||
|
added_arg = handle_argument(buffer)
|
||||||
|
buffer.append(line[i])
|
||||||
|
else:
|
||||||
|
buffer.append(line[i])
|
||||||
|
|
||||||
|
prev_added_arg = added_arg
|
||||||
|
|
||||||
|
args.pop(0)
|
||||||
|
return (args, optional_args, depth[OPEN_BRACE] > 0)
|
||||||
|
|
||||||
def formaltype(f):
|
def formaltype(f):
|
||||||
# Deduce the expected type of the formal
|
# Deduce the expected type of the formal
|
||||||
|
@ -488,14 +568,17 @@ def argmatch(formals, optional_formals, actuals, optional_actuals):
|
||||||
@total_ordering
|
@total_ordering
|
||||||
class Reference:
|
class Reference:
|
||||||
"Describes a location by file and line."
|
"Describes a location by file and line."
|
||||||
def __init__(self, namespace, filename, lineno=None, docstring=None, args=None,
|
def __init__(self, namespace, filename, lineno=None, lineno_end=None, docstring=None, args=None,
|
||||||
optional_args=None, deprecated=False, deprecation_level=0, removal_version=None):
|
optional_args=None, deprecated=False, deprecation_level=0, removal_version=None):
|
||||||
self.namespace = namespace
|
self.namespace = namespace
|
||||||
self.filename = filename
|
self.filename = filename
|
||||||
self.lineno = lineno
|
self.lineno = lineno
|
||||||
|
self.lineno_end = lineno_end
|
||||||
self.docstring = docstring
|
self.docstring = docstring
|
||||||
self.args = args
|
self.args = args
|
||||||
self.optional_args = optional_args
|
self._raw_optional_args = optional_args
|
||||||
|
self.optional_args = {}
|
||||||
|
self.body = []
|
||||||
self.deprecated = deprecated
|
self.deprecated = deprecated
|
||||||
self.deprecation_level = deprecation_level
|
self.deprecation_level = deprecation_level
|
||||||
self.removal_version = removal_version
|
self.removal_version = removal_version
|
||||||
|
@ -523,7 +606,7 @@ class Reference:
|
||||||
return self.filename > other.filename
|
return self.filename > other.filename
|
||||||
|
|
||||||
def mismatches(self):
|
def mismatches(self):
|
||||||
copy = Reference(self.namespace, self.filename, self.lineno, self.docstring, self.args, self.optional_args)
|
copy = Reference(self.namespace, self.filename, self.lineno, self.lineno_end, self.docstring, self.args, self._raw_optional_args)
|
||||||
copy.undef = self.undef
|
copy.undef = self.undef
|
||||||
for filename in self.references:
|
for filename in self.references:
|
||||||
mis = [(ln,a,oa) for (ln,a,oa) in self.references[filename] if a is not None and not argmatch(self.args, self.optional_args, a, oa)]
|
mis = [(ln,a,oa) for (ln,a,oa) in self.references[filename] if a is not None and not argmatch(self.args, self.optional_args, a, oa)]
|
||||||
|
@ -575,7 +658,7 @@ class CrossRef:
|
||||||
if self.exports(defn.namespace):
|
if self.exports(defn.namespace):
|
||||||
# Macros and resources in subtrees with export=yes are global
|
# Macros and resources in subtrees with export=yes are global
|
||||||
return True
|
return True
|
||||||
elif not self.filelist.neighbors(defn.filename, fn):
|
elif defn.filename != fn and not self.filelist.neighbors(defn.filename, fn):
|
||||||
# Otherwise, must be in the same subtree.
|
# Otherwise, must be in the same subtree.
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
|
@ -655,7 +738,7 @@ class CrossRef:
|
||||||
% (filename, n+1), file=sys.stderr)
|
% (filename, n+1), file=sys.stderr)
|
||||||
else:
|
else:
|
||||||
name = tokens[1]
|
name = tokens[1]
|
||||||
here = Reference(namespace, filename, n+1, line, args=tokens[2:], optional_args=[])
|
here = Reference(namespace, filename, n+1, line, None, args=tokens[2:], optional_args=[])
|
||||||
here.hash = hashlib.md5()
|
here.hash = hashlib.md5()
|
||||||
here.docstring = line.lstrip()[8:] # Strip off #define_
|
here.docstring = line.lstrip()[8:] # Strip off #define_
|
||||||
current_docstring = None
|
current_docstring = None
|
||||||
|
@ -689,6 +772,8 @@ class CrossRef:
|
||||||
current_docstring = None
|
current_docstring = None
|
||||||
state = States.OUTSIDE
|
state = States.OUTSIDE
|
||||||
elif state != States.OUTSIDE and line.strip().endswith("#enddef"):
|
elif state != States.OUTSIDE and line.strip().endswith("#enddef"):
|
||||||
|
end_def_index = line.index("#enddef")
|
||||||
|
here.body.append(line[0:end_def_index])
|
||||||
here.hash.update(line.encode("utf8"))
|
here.hash.update(line.encode("utf8"))
|
||||||
here.hash = here.hash.digest()
|
here.hash = here.hash.digest()
|
||||||
if name in self.xref:
|
if name in self.xref:
|
||||||
|
@ -705,15 +790,22 @@ class CrossRef:
|
||||||
% (here, name, defn), file=sys.stderr)
|
% (here, name, defn), file=sys.stderr)
|
||||||
if name not in self.xref:
|
if name not in self.xref:
|
||||||
self.xref[name] = []
|
self.xref[name] = []
|
||||||
|
|
||||||
|
here.lineno_end = n+1
|
||||||
self.xref[name].append(here)
|
self.xref[name].append(here)
|
||||||
state = States.OUTSIDE
|
state = States.OUTSIDE
|
||||||
elif state == States.MACRO_HEADER and line.strip():
|
elif state == States.MACRO_HEADER and line.strip():
|
||||||
if line.strip().startswith("#arg"):
|
if line.strip().startswith("#arg"):
|
||||||
state = States.MACRO_OPTIONAL_ARGUMENT
|
state = States.MACRO_OPTIONAL_ARGUMENT
|
||||||
here.optional_args.append(line.strip().split()[1])
|
here._raw_optional_args.append([line.strip().split()[1],""])
|
||||||
elif line.strip()[0] != "#":
|
elif line.strip()[0] != "#":
|
||||||
state = States.MACRO_BODY
|
state = States.MACRO_BODY
|
||||||
elif state == States.MACRO_OPTIONAL_ARGUMENT and "#endarg" in line:
|
elif state == States.MACRO_OPTIONAL_ARGUMENT and not "#endarg" in line:
|
||||||
|
here._raw_optional_args[-1][1] += line
|
||||||
|
elif state == States.MACRO_OPTIONAL_ARGUMENT:
|
||||||
|
end_arg_index = line.index("#endarg")
|
||||||
|
here._raw_optional_args[-1][1] += line[0:end_arg_index]
|
||||||
|
here.optional_args = dict(here._raw_optional_args)
|
||||||
state = States.MACRO_HEADER
|
state = States.MACRO_HEADER
|
||||||
continue
|
continue
|
||||||
if state == States.MACRO_HEADER:
|
if state == States.MACRO_HEADER:
|
||||||
|
@ -738,6 +830,8 @@ class CrossRef:
|
||||||
print("Deprecation line not matched found in {}, line {}".format(filename, n+1), file=sys.stderr)
|
print("Deprecation line not matched found in {}, line {}".format(filename, n+1), file=sys.stderr)
|
||||||
else:
|
else:
|
||||||
here.docstring += line.lstrip()[1:]
|
here.docstring += line.lstrip()[1:]
|
||||||
|
if state == States.MACRO_BODY:
|
||||||
|
here.body.append(line)
|
||||||
if state in (States.MACRO_HEADER, States.MACRO_OPTIONAL_ARGUMENT, States.MACRO_BODY):
|
if state in (States.MACRO_HEADER, States.MACRO_OPTIONAL_ARGUMENT, States.MACRO_BODY):
|
||||||
here.hash.update(line.encode("utf8"))
|
here.hash.update(line.encode("utf8"))
|
||||||
elif line.strip().startswith("#undef"):
|
elif line.strip().startswith("#undef"):
|
||||||
|
@ -781,10 +875,16 @@ class CrossRef:
|
||||||
except UnicodeDecodeError as e:
|
except UnicodeDecodeError as e:
|
||||||
print('wmlscope: "{}" is not a valid UTF-8 file'.format(filename), file=sys.stderr)
|
print('wmlscope: "{}" is not a valid UTF-8 file'.format(filename), file=sys.stderr)
|
||||||
|
|
||||||
def __init__(self, dirpath=[], exclude="", warnlevel=0, progress=False):
|
def __init__(self, dirpath=[], filelist=None, exclude="", warnlevel=0, progress=False):
|
||||||
"Build cross-reference object from the specified filelist."
|
"Build cross-reference object from the specified filelist."
|
||||||
self.filelist = Forest(dirpath, exclude)
|
if filelist is None:
|
||||||
self.dirpath = [x for x in dirpath if not re.search(exclude, x)]
|
self.filelist = Forest(dirpath, exclude)
|
||||||
|
self.dirpath = [x for x in dirpath if not re.search(exclude, x)]
|
||||||
|
else:
|
||||||
|
# All specified files share the same namespace
|
||||||
|
self.filelist = [("src", filename) for filename in filelist]
|
||||||
|
self.dirpath = ["src"]
|
||||||
|
|
||||||
self.warnlevel = warnlevel
|
self.warnlevel = warnlevel
|
||||||
self.xref = {}
|
self.xref = {}
|
||||||
self.fileref = {}
|
self.fileref = {}
|
||||||
|
@ -794,7 +894,7 @@ class CrossRef:
|
||||||
all_in = []
|
all_in = []
|
||||||
if self.warnlevel >=2 or progress:
|
if self.warnlevel >=2 or progress:
|
||||||
print("*** Beginning definition-gathering pass...")
|
print("*** Beginning definition-gathering pass...")
|
||||||
for (namespace, filename) in self.filelist.generator():
|
for (namespace, filename) in self.filelist:
|
||||||
all_in.append((namespace, filename))
|
all_in.append((namespace, filename))
|
||||||
if self.warnlevel > 1:
|
if self.warnlevel > 1:
|
||||||
print(filename + ":")
|
print(filename + ":")
|
||||||
|
@ -878,12 +978,10 @@ class CrossRef:
|
||||||
# Count the number of actual arguments.
|
# Count the number of actual arguments.
|
||||||
# Set args to None if the call doesn't
|
# Set args to None if the call doesn't
|
||||||
# close on this line
|
# close on this line
|
||||||
(args, optional_args, brackdepth, parendepth) = parse_macroref(match.start(0), line)
|
(args, optional_args, is_unfinished) = parse_macroref(match.start(0), line)
|
||||||
if brackdepth > 0 or parendepth > 0:
|
if is_unfinished:
|
||||||
args = None
|
args = None
|
||||||
optional_args = None
|
optional_args = None
|
||||||
else:
|
|
||||||
args.pop(0)
|
|
||||||
#if args:
|
#if args:
|
||||||
# print('"%s", line %d: args of %s is %s' \
|
# print('"%s", line %d: args of %s is %s' \
|
||||||
# % (fn, n+1, name, args))
|
# % (fn, n+1, name, args))
|
||||||
|
@ -968,9 +1066,10 @@ class CrossRef:
|
||||||
except UnicodeDecodeError as e:
|
except UnicodeDecodeError as e:
|
||||||
pass # to not have the invalid UTF-8 file warning printed twice
|
pass # to not have the invalid UTF-8 file warning printed twice
|
||||||
# Check whether each namespace has a defined export property
|
# Check whether each namespace has a defined export property
|
||||||
for namespace in self.dirpath:
|
if self.warnlevel >= 1:
|
||||||
if namespace not in self.properties or "export" not in self.properties[namespace]:
|
for namespace in self.dirpath:
|
||||||
print("warning: %s has no export property" % namespace)
|
if namespace not in self.properties or "export" not in self.properties[namespace]:
|
||||||
|
print("warning: %s has no export property" % namespace)
|
||||||
def exports(self, namespace):
|
def exports(self, namespace):
|
||||||
return namespace in self.properties and self.properties[namespace].get("export") == "yes"
|
return namespace in self.properties and self.properties[namespace].get("export") == "yes"
|
||||||
def subtract(self, filelist):
|
def subtract(self, filelist):
|
||||||
|
|
Loading…
Add table
Reference in a new issue