wesnoth/data/tools/wmlscope
Eric Gallager 4915f3be86 wmlscope: fix format of magic comment in documentation
It's not just `prune`, but rather `wmlscope: prune`
2024-08-18 10:56:50 +02:00

571 lines
27 KiB
Python
Executable file

#!/usr/bin/env python3
# encoding: utf-8
#
# wmlscope -- generate reports on WML macro and resource usage
#
# By Eric S. Raymond, April 2007.
#
# This tool cross-references macro definitions with macro calls, and
# resource (sound or image) files with uses of the resources in WML.
# and generates various useful reports from such cross-references.
# It also checks actual macro arguments against types implied by the formals
#
# (Most of the work is done by a cross-referencer class that is also
# used in other tools.)
#
# It takes a list of directories as arguments; if none is given, it
# behaves as though the current directory had been specified as a
# single argument. Each directory is treated as a separate domain for
# macro and resource visibility purposes.
#
# There are two kinds of namespace, exporting and non-exporting.
# Exporting namespaces make all their resources and macro names
# globally visible. You can make a namespace exporting by embedding
# a comment like this in it:
#
# # wmlscope: export=yes
#
# Wesnoth core data is an exporting namespace. Campaigns are non-exporting;
# they should contain the declaration
#
# # wmlscope: export=no
#
# somewhere. wmlscope will complain when it sees a namespace with no export
# property, then treat it as non-exporting.
#
# You can tell wmlscope to ignore stretches of config files
# with the following magic comments:
#
# # wmlscope: start ignoring
# # wmlscope: stop ignoring
#
# Similarly, you can tell wmlscope to ignore multiple or duplicate macro
# definitions in a range of lines with the following magic comments:
#
# # wmlscope: start conditionals
# # wmlscope: stop conditionals
#
# The following magic comment:
#
# # wmlscope: prune FOOBAR
#
# will cause wmlscope to forget about all but one of the definitions of FOOBAR
# it has seen. This will be useful mainly for symbols that have different
# definitions enabled by an #ifdef.
#
# Due to a preprocessor limitation, inline macros cannot contain a documentation
# string. If you need to document these macros in the HTML macro reference, you
# can use the following directive:
#
# # wmlscope: docstring FOOBAR
#
# The docstring for the FOOBAR macro will be collected until a non-comment line,
# a #define or another # wmlscope: docstring are found. External docstrings
# *must* be defined before the macro they're referring to; defining two or more
# external docstrings keeps only the most recent one, but having both an
# external and an internal docstring is allowed (in this case, the internal one
# will be appended to the external one in the macro reference).
#
# This tool does catch one kind of implicit reference: if an attack name
# is specified but no icon is given, the attack icon will default to
# a name generated from the attack name. This behavior can be suppressed
# by adding a magic comment containing the string "no-icon" to the name= line.
#
# The checking done by this tool has a couple of flaws:
#
# (1) It doesn't actually evaluate file inclusions. Instead, any
# macro definition satisfies any macro call made under the same
# directory. Exception: when an #undef is detected, the macro is
# tagged local and not visible outside the span of lines where it's
# defined.
#
# (2) It doesn't read [binary_path] tags, as this would require
# implementing a WML parser. Instead, it assumes that a resource-file
# reference can be satisfied by any matching image file from anywhere
# in the same directory it came from. The resources under the *first*
# directory argument (only) are visible everywhere.
#
# (3) A reference with embedded {}s in a macro will have the macro's
# formal args substituted in at WML evaluation time. Instead, this
# tool treats each {} as a .* wildcard and considers the reference to
# match *every* resource filename that matches that pattern. Under
# appropriate circumstances this might report a resource filename
# statically matching the pattern as having been referenced even
# though none of the actual macro calls would actually generate it.
#
# Problems (1) and (2) imply that this tool might conceivably report
# that a reference has been satisfied when under actual
# WML-interpreter rules it has not.
#
# The reporting format is compatible with GNU Emacs compile mode.
#
# For debugging purposes, an in-line comment of the form
#
# # wmlscope: warnlevel NNN
#
# sets the warning level.
import sys, os, time, re, argparse, hashlib, glob, codecs
from wesnoth.wmltools3 import *
from wesnoth import version
def interpret(lines, css):
"Interpret the ! convention for .cfg comments."
inlisting = False
outstr = '<p class="%s">' % css
for line in lines:
line = line.strip()
if inlisting:
if line and line[0] != '!':
outstr += "</pre>\n<p>"
inlisting = False
else:
if not line:
outstr += "</p><p>"
continue
if line[0] == '!':
outstr += "</p>\n<pre class='listing'>"
inlisting = True
bracketdepth = curlydepth = 0
line = line.replace("<", "&lt;").replace(">", "&gt;").replace("&", "&amp;")
if inlisting:
outstr += line[1:] + "\n"
else:
outstr += line + "\n"
if not inlisting:
outstr += "</p>\n"
else:
outstr += "</pre>\n"
outstr = outstr.replace("<p></p>", "")
outstr = outstr.replace("\n\n</pre>", "\n</pre>")
return outstr
class CrossRefLister(CrossRef):
"Cross-reference generator with reporting functions"
def xrefdump(self, pred=None):
"Report resolved macro references."
for name in sorted(self.xref.keys()):
for defn in self.xref[name]:
if pred and not pred(name, defn):
continue
if defn.undef:
type_ = "local"
else:
type_ = "global"
nrefs = len(defn.references)
if nrefs == 0:
print("%s: %s macro %s is unused" % (defn, type_, name))
else:
print("%s: %s macro %s is used in %d files:" % (defn, type_, name, nrefs))
defn.dump_references()
for name in sorted(self.fileref.keys()):
defloc = self.fileref[name]
if pred and not pred(name, defloc):
continue
nrefs = len(defloc.references)
if nrefs == 0:
print("Resource %s is unused" % defloc)
else:
print("Resource %s is used in %d files:" % (defloc, nrefs))
defloc.dump_references()
def unresdump(self):
"Report unresolved references, arity mismatches, duplicate unit IDs."
# First the unresolved references
if len(self.unresolved) == 0 and len(self.missing) == 0:
print("# No unresolved references")
else:
print("# Unresolved references:")
#print(list(self.fileref.keys()))
for (name, reference) in self.unresolved + self.missing:
print("%s: Unresolved reference -> %s" % (reference, name))
mismatched = []
for name in sorted(self.xref.keys()):
for defn in self.xref[name]:
m = defn.mismatches()
if m.references:
mismatched.append((name, m))
# Then the type mismatches
if mismatched:
print("# Mismatched references:")
for (n, m) in mismatched:
print("%s: macro %s(%s%s%s) has mismatches:" % (m,
n,
", ".join(["{}={}".format(x, formaltype(x)) for x in m.args]),
" " if m.optional_args else "",
", ".join(["{}=optional {}".format(x, formaltype(x)) for x in m.optional_args])))
for (file, refs) in m.references.items():
for (ln, args, optional_args) in refs:
print('"%s", line %d: %s(%s%s%s) with signature (%s%s%s)' %
(file,
ln,
n,
", ".join(args),
", " if optional_args else "",
", ".join(optional_args.values()),
", ".join(["{}={}".format(f, actualtype(a)) for f,a in zip(m.args, args)]),
", " if optional_args else "",
", ".join(["{}=optional {}".format(f, actualtype(oa)) for f,oa in optional_args.items()])))
def deprecateddump(self):
"Report calls to deprecated macros"
if not self.deprecated:
print("# No calls to deprecated macros")
return
print("# Calls to deprecated macros:")
for (name, reference) in self.deprecated:
print("%s: Deprecated macro call -> %s" % (reference, name))
def incorrectlysized(self):
"Report incorrectly sized images that cannot be safely used for their intended purpose"
for (namespace, filename) in xref.filelist:
if filename.endswith(".png"):
fn_list = filename.split(os.sep)
try:
with open(filename, mode="rb") as image:
png_header = image.read(16)
w = image.read(4)
h = image.read(4)
# some explanations for those that don't want to read the PNG documentation
# all valid PNG files always start with the same 16 bytes
# the first 8 are the 'magic number', which is 89 50 4E 47 0D 0A 1A 0A
# notice that '50 4E 47' is 'PNG' in ASCII
# the next 4 are the chunk size, then the next 4 are the chunk type
# the IHDR chunk is always the first one in any PNG file
# and has always a length of 13 bytes (0D == 13)
if png_header != b"\x89PNG\r\n\x1a\n\x00\x00\x00\x0dIHDR":
print("%s is not a valid PNG file" % filename, file=sys.stderr)
continue
# after the common part to all PNG files,
# the next 4 bytes are the image width, and the next 4 are the image height
# said bytes are placed in big-endian order (most significant bytes come first)
# we need to use some bitwise operations to convert them as a single integer
# also we don't need the remaining 5 bytes of the IHDR chunk
# Py3 reads the file as bytes, and each byte is already an int
# this is why, unlike Python 2, ord() isn't needed
x = w[0] << 24 | w[1] << 16 | w[2] << 8 | w[3]
y = h[0] << 24 | h[1] << 16 | h[2] << 8 | h[3]
# these checks rely on add-ons that place files following mainline conventions
# I'm aware that this may not always be the case
# but the alternative will be implementing a more sophisticated check in wmllint
if "images" in fn_list:
expected_size = None
if "attacks" in fn_list or "icons" in fn_list:
# images used in attack dialogs should be 60x60
if x != 60 or y != 60:
expected_size = (60,60)
elif "flags" in fn_list:
# flags should be 72x72, but their icons should be 24 x 16
if "icon" in os.path.split(filename)[1]:
if x != 24 or y != 16:
expected_size = (24,16)
else:
if x != 72 or y != 72:
expected_size = (72,72)
elif "items" in fn_list:
# items should be 72x72
if x != 72 or y != 72:
expected_size = (72,72)
if expected_size:
print("%s: image is %d x %d, expected %d x %d" % (filename, x, y, expected_size[0], expected_size[1]))
except IOError:
print("%s: unable to read file" % filename, file=sys.stderr)
def duplicates(self, exportonly):
"Dump duplicate unit IDs."
duplicate_latch = False
for (key, value) in self.unit_ids.items():
if len(value) > 1:
if exportonly and not [x for x in value if self.exports(x.namespace)]:
continue
if not duplicate_latch:
print("# Duplicate IDs")
duplicate_latch = True
print("%s: occurs %d times as unit ID" % (key, len(value)))
for ref in value:
print("%s: exported=%s" % (ref, self.exports(ref.namespace)))
def typelist(self, branch):
"Dump actual and formal arguments for macros in specified file"
already_seen = []
for name in sorted(self.xref.keys()):
for defn in self.xref[name]:
for (filename, refs) in defn.references.items():
if filename.endswith(branch):
if name not in already_seen:
already_seen.append(name)
print("%s: macro %s(%s%s%s):" %
(defn,
name,
", ".join(["{}={}".format(x, formaltype(x)) for x in defn.args]),
", " if defn.optional_args else "",
", ".join(["{}=optional {}".format(x, formaltype(x)) for x in defn.optional_args])))
for (ln, args, optional_args) in refs:
print('"%s", line %d: %s(%s%s%s) with signature (%s%s%s)' %
(filename,
ln,
name,
", ".join(args),
", " if optional_args else "",
", ".join(optional_args.values()),
", ".join(["{}={}".format(f, actualtype(a)) for f,a in zip(defn.args, args)]),
", " if optional_args else "",
", ".join(["{}={}".format(f, actualtype(oa)) for f,oa in optional_args.items()])))
def deflist(self, pred=None):
"List all resource definitions."
for name in sorted(self.xref.keys()):
for defn in self.xref[name]:
if not pred or pred(name, defn):
print("macro",
name,
" ".join(["{}={}".format(x, formaltype(x)) for x in defn.args]),
" ".join(["{}=optional {}".format(x, formaltype(x)) for x in defn.optional_args]))
for name in sorted(self.fileref.keys()):
defloc = self.fileref[name]
if not pred or pred(name, defloc):
print("resource", name)
for uid in sorted(self.unit_ids.keys()):
print("unit", uid)
def unchecked(self, fp):
"List all macro definitions with untyped formals."
unchecked = []
defcount = 0
callcount = 0
unresolvedcount = 0
for name in self.xref.keys():
for defn in self.xref[name]:
defcount += 1
callcount += len(defn.references)
if None in map(formaltype, defn.args):
for (i, d) in enumerate(defn.args):
if formaltype(d) is None:
defn.args[i] += "?"
unchecked.append((name, defn))
unresolvedcount += len(defn.references)
if unchecked:
print("# %d of %d (%.02f%%) macro definitions and %d of %d calls (%.02f%%) have untyped required formals:" \
% (len(unchecked),
defcount,
((100 * len(unchecked)) / defcount),
unresolvedcount,
callcount,
((100 * unresolvedcount) / callcount)))
# sort by checking the 2nd element in the tuple
unchecked.sort(key=lambda element: element[1])
for (name, defn) in unchecked:
print("%s: %s(%s)" % (defn, name, ", ".join(defn.args),))
def extracthelp(self, pref, fp):
"Deliver all macro help comments in HTML form."
# Bug: finds only the first definition of each macro in scope.
# macros starting with INTERNAL: are only for internal use
# and aren't supposed to be reported in the macro reference
doclist = [x for x in self.xref.keys() if not x.startswith("INTERNAL:")]
doclist.sort(key=lambda element: self.xref[element][0])
outstr = ""
filename = None
filenamelist = []
counted = 0
for name in doclist:
entry = self.xref[name][0]
if entry.filename != filename:
if counted:
outstr += "</dl>\n"
counted += 1
filename = entry.filename
if filename.startswith(pref):
displayname = filename[len(pref):]
else:
displayname = filename
outstr += "<p class='toplink'>[ <a href='#content'>top</a> ]</p>\n"
outstr += "<h2 id='file:" + displayname + "' class='file_header'>From file: "
outstr += "<code class='noframe'>" + displayname + "</code></h2>\n"
filenamelist.append(displayname)
hdr = []
with codecs.open(filename, "r", "utf8") as dfp:
for line in dfp:
line = line.lstrip()
if line and line.startswith("#textdomain"):
continue
if line and line[0] == '#' and (not line.startswith("#define")):
hdr.append(line[1:])
else:
break
if hdr:
outstr += interpret(hdr, "file_explanation")
outstr += "<dl>\n"
if entry.docstring:
lines = entry.docstring.split("\n")
header = lines.pop(0).split()
if lines and not lines[-1]: # Ignore trailing blank lines
lines.pop()
outstr += "\n<dt id='" + header[0] + "'>\n<code class='noframe'>"
outstr += "<span class='macro-name'>" + header[0] + "</span>"
if header[1:]:
outstr += " <var class='macro-formals'>"+" ".join(header[1:])+"</var>"
if entry.optional_args:
outstr += " Optional arguments: <var class='macro-formals'>" + " ".join(entry.optional_args) + "</var>"
outstr += "\n</code></dt>\n"
outstr += "<dd>\n"
if entry.deprecated:
outstr += "<p class='macro-deprecated'><strong>Deprecated macro.</strong>"
outstr += " <em>Deprecation level: " + entry.deprecation_level + "."
if entry.removal_version:
outstr += " Scheduled for removal in " + entry.removal_version + "."
outstr += "</em></p>\n"
if lines:
outstr += interpret(lines, "macro-explanation")
else:
# Warn in output about definitions without a docstring
outstr += "<p class='macro-missing-docs'><em>No documentation available for this macro.</em></p>\n"
outstr += "</dd>\n"
outstr += "</dl>\n"
outstr += "<p class='toplink'>[ <a href='#content'>top</a> ]</p>\n"
linkheaderstr = "<p class='macro-ref-toc'>Documented files:</p><div class='filelist'><ul>"
for filename in filenamelist:
linkheaderstr += "<li><a href='#file:" + filename + "'>"
linkheaderstr += "<code class='noframe'>" + filename + "</code></a></li>"
linkheaderstr += "</ul></div>\n"
fp.write(linkheaderstr)
fp.write(outstr)
if __name__ == "__main__":
parser = argparse.ArgumentParser(prog="wmlscope")
parser.add_argument("-c", "--crossreference", action="store_true",
help="Report resolved macro references (implies -w 1)")
parser.add_argument("-C", "--collisions", action="store_true",
help="Report duplicate resource files")
parser.add_argument("-d", "--definitions", action="store_true",
help="Make definition list")
parser.add_argument("-e", "--exclude", action="append", default = [],
help="Ignore files matching the specified regular expression")
parser.add_argument("-f", "--from", action="store", dest="from_", metavar="FROM", # from is a keyword
help="Report only on things defined in files matching regexp")
parser.add_argument("-l", "--listfiles", action="store_true",
help="List files that will be processed")
parser.add_argument("-r", "--refcount", action="store", type=int, # convert to int, defaults to None
help="Report only on macros w/references in ddd files")
parser.add_argument("-t", "--typelist", action="store",
help="List actual & formal argtypes for calls in fname")
parser.add_argument("-u", "--unresolved", action="store_true",
help="Report unresolved macro references")
parser.add_argument("-w", "--warnlevel", action="store", type=int, default=0,
help="Set to 1 to warn of duplicate macro definitions")
# this option was never listed before...
parser.add_argument("-p", "--progress", action="store_true",
help="Show progress") # TODO: improve description
# no short options for these
parser.add_argument("--force-used", action="append", dest="forceused", default = [],
help="Ignore refcount 0 on names matching regexp")
parser.add_argument("--extracthelp", action="store_true",
help="Extract help from macro definition comments.")
parser.add_argument("--unchecked", action="store_true",
help="Report all macros with untyped formals.")
parser.add_argument("directories", action="store", nargs="*",
help="""Any number of directiories to check. If no
directories are given, all files under the current directory are checked.""")
parser.add_argument("--version", action="version",
version="%(prog)s " + version.as_string)
namespace = parser.parse_args()
try:
# Process options
crossreference = namespace.crossreference
collisions = namespace.collisions
definitions = namespace.definitions
exclude = namespace.exclude
from_restrict = namespace.from_
extracthelp = namespace.extracthelp
listfiles = namespace.listfiles
refcount_restrict = namespace.refcount
typelist = namespace.typelist
unresolved = namespace.unresolved
warnlevel = 1 if crossreference else namespace.warnlevel
forceused = namespace.forceused
unchecked = namespace.unchecked
progress = namespace.progress
arguments = namespace.directories # a remnant of getopt...
# in certain situations, Windows' command prompt appends a double quote
# to the command line parameters. This block takes care of this issue.
for i,arg in enumerate(arguments):
if arg.endswith('"'):
arguments[i] = arg[:-1]
forceused = "|".join(forceused)
if len(arguments):
dirpath = []
for arg in arguments:
globarg = glob.glob(arg)
for globbed in globarg:
dirpath.append(globbed)
else:
dirpath = ['.']
if not extracthelp:
print("# Wmlscope reporting on %s" % time.ctime())
print("# Invocation: %s" % " ".join(sys.argv))
print("# Working directory: %s" % os.getcwd())
starttime = time.time()
xref = CrossRefLister(dirpath=dirpath, filelist=None, exclude="|".join(exclude), warnlevel=warnlevel, progress=progress)
if not extracthelp:
print("#Cross-reference time: %d seconds" % (time.time()-starttime))
if extracthelp:
xref.extracthelp(dirpath[0], sys.stdout)
elif unchecked:
xref.unchecked(sys.stdout)
elif listfiles:
for (namespace, filename) in xref.filelist:
print(filename)
if collisions:
collisions = []
for (namespace, filename) in xref.filelist:
with open(filename, "rb") as ifp: # this one may be an image or a sound, so don't assume UTF8 encoding
m = hashlib.md5()
while True:
chunk = ifp.read(1024) # read 1 KiB each time to avoid using too much memory
if not chunk:
break
m.update(chunk)
collisions.append(m.hexdigest()) # hexdigest can be easily printed, unlike digest
hashes = {}
# hash in Py3 is a builtin function, hence the underscore after the variable name
for (filename, hash_) in zip(xref.filelist.flatten(), collisions):
if hash_ in hashes:
hashes[hash_].append(filename)
else:
hashes[hash_]=[filename]
for (hash_, files) in hashes.items(): # items in Py3 is equivalent to iteritems in Py2
if len(files) > 1:
print("%%\nPossible duplicated files with MD5 hash", hash_)
for fn in files:
print("->", fn)
xref.duplicates(exportonly=False)
elif typelist:
xref.typelist(typelist)
elif crossreference or definitions or listfiles or unresolved:
def predicate(name, defloc):
if from_restrict and not re.search(from_restrict, defloc.filename):
return False
if refcount_restrict!=None \
and len(defloc.references) != refcount_restrict \
or (refcount_restrict == 0 and forceused and re.search(forceused, name)):
return False
return True
if crossreference:
if xref.noxref:
print("wmlscope: can't make cross-reference, input included a definitions file.", file=sys.stderr)
else:
xref.xrefdump(predicate)
if definitions:
xref.deflist(predicate)
if unresolved:
xref.incorrectlysized()
xref.deprecateddump()
xref.unresdump()
xref.duplicates(exportonly=True)
except KeyboardInterrupt:
print("Aborted by pressing ctrl-c", file=sys.stderr)
# wmlscope ends here