338 lines
15 KiB
Python
Executable file
338 lines
15 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
# encoding: utf-8
|
|
"""\
|
|
wmlindent - re-indent WML in a uniform way.
|
|
|
|
By Eric S. Raymond, June 2007.
|
|
|
|
Call with no arguments to filter WML on stdin to reindented WML on
|
|
stdout. If arguments are specified, they are taken to be files to be
|
|
re-indented in place; a directory name causes reindenting on all WML
|
|
beneath it.
|
|
|
|
The indent unit is four spaces. Absence of an option to change this is
|
|
deliberate; the purpose of this tool is to *prevent* style wars, not encourage
|
|
them.
|
|
|
|
On non-empty lines, this code never modifies anything but leading and
|
|
trailing whitespace. Leading whitespace will be regularized to the
|
|
current indent; trailing whitespace will be stripped. After processing
|
|
all lines will end with a Unix-style \n end-of-line marker.
|
|
|
|
Runs of entirely blank lines will be reduced to one blank line, except
|
|
in two cases where they will be discarded: (a) before WML closing
|
|
tags, and (b) after WML opening tags.
|
|
|
|
It is possible to wrap a section of lines in special comments so that
|
|
wmlindent will ignore them. You may need to do this for unbalanced
|
|
macros (it's better, though, to get rid of those where possible).
|
|
Use 'wmlindent: {start,stop} ignoring' anywhere in a comment.
|
|
|
|
It is also possible to declare custom openers an closers, e.g for macros
|
|
that are actually control constructs. To do this, use declarations
|
|
|
|
# wmlindent: opener "{EXCEPTIONAL_OPENER "
|
|
# wmlindent: closer "{EXCEPTIONAL_CLOSER "
|
|
|
|
The lines after an opener will be indented an extra level; a closer
|
|
and lines following will be indented one level less. Note that these
|
|
declare prefixes; any prefix match to the non-whitespace text of a line
|
|
will be recognized.
|
|
|
|
The public utility macros "{FOREACH" and "{NEXT" come as wired-in exceptions,
|
|
because it is not guaranteed that their indent declarations will be processed
|
|
before the macro library is reached.
|
|
|
|
Interrupting will be safe, as each reindenting will be done to a copy
|
|
that is atomically renamed when it's done. If the output file is identical
|
|
to the input, the output file will simply be deleted, so the timestamp
|
|
on the input file won't be touched.
|
|
|
|
The --dryrun option detects and reports files that would be changed
|
|
without changing them. The --verbose or -v option enables reporting
|
|
of files that are (or would be, under --dryrun) changed. With -v -v,
|
|
unchanged files are also reported. The --exclude option takes a regexp
|
|
and excludes files matching it.
|
|
|
|
Note: This does not include a parser. It will produce bad results on WML
|
|
that is syntactically unbalanced. Unbalanced double quotes that aren't part
|
|
of a multiline literal will also confuse it. You will receive warnings
|
|
if there's an indent open at end of file or if a closer occurs with
|
|
indent already zero; these two conditions strongly suggest unbalanced WML.
|
|
"""
|
|
|
|
import sys, os, argparse, filecmp, re, codecs
|
|
from wesnoth import wmltools3 as wmltools
|
|
|
|
closer_prefixes = ["{NEXT "]
|
|
opener_prefixes = ["{FOREACH "]
|
|
|
|
def is_directive(str):
|
|
"Identify things that shouldn't be indented."
|
|
return str.startswith(("#ifdef", "#ifndef", "#ifhave", "#ifnhave", "#ifver", "#ifnver", "#else", "#endif", "#define", "#enddef", "#undef", "#arg", "#endarg"))
|
|
|
|
def closer(str):
|
|
"Are we looking at a closing tag?"
|
|
if str.startswith("#"):
|
|
return False
|
|
elif str.startswith("[/") or str.startswith(")"):
|
|
return True
|
|
else:
|
|
return str.startswith(tuple(closer_prefixes))
|
|
|
|
def opener(str):
|
|
"Are we looking at an opening tag?"
|
|
if str.startswith("#"):
|
|
return False
|
|
# This logic is a bit obscure. The 'not "[/" in str' catches the
|
|
# obvious case of a closing tag, but it also catches the idiom
|
|
# [allow_undo][/allow_undo] which we want to treat as a no-op.
|
|
elif (str.startswith("[") and not closer(str) and not "[/" in str):
|
|
return True
|
|
# Trailing ( opens a scope to be closed by ).
|
|
elif str.endswith("(\n") and '#' not in str:
|
|
return True
|
|
else:
|
|
return str.startswith(tuple(opener_prefixes))
|
|
|
|
class bailout(Exception):
|
|
def __init__(self, filename, lineno, msg):
|
|
self.filename = filename
|
|
self.lineno = lineno
|
|
self.msg = msg
|
|
|
|
def reindent(name, infp, outfp):
|
|
"Reindent WML."
|
|
dostrip = True
|
|
in_lua = False
|
|
seen_wml = False
|
|
inmacro = False
|
|
ignoring = False
|
|
instring = False
|
|
indent = ""
|
|
lasttag = ""
|
|
countlines = 0
|
|
countblanks = 0
|
|
multitag = re.compile(r"\[a-z]].*\[[a-z]") # Avoid triggering on arrays
|
|
continued_string = re.compile(r".+\+\s*(#.*)?$") # Check if the line is a string being continued
|
|
for line in infp:
|
|
countlines += 1
|
|
# User may declare indentation exceptions
|
|
if "wmlindent: opener" in line:
|
|
tag = line.split('"')[1]
|
|
if verbose and not quiet:
|
|
print("wmlindent: declaring indent exception for %s" % tag, file=sys.stderr)
|
|
opener_prefixes.append(tag)
|
|
elif "wmlindent: closer" in line:
|
|
tag = line.split('"')[1]
|
|
if verbose and not quiet:
|
|
print("wmlindent: declaring outdent exception for %s" % tag, file=sys.stderr)
|
|
closer_prefixes.append(tag)
|
|
# Implement passthrough mode
|
|
if "wmlindent: start ignoring" in line:
|
|
ignoring = True
|
|
if countblanks > 0:
|
|
countblanks = 0
|
|
outfp.write("\n")
|
|
outfp.write(line)
|
|
continue
|
|
elif ignoring:
|
|
outfp.write(line)
|
|
if "wmlindent: stop ignoring" in line:
|
|
ignoring = False
|
|
continue
|
|
# Detect things we can't handle
|
|
if multitag.search(line):
|
|
raise bailout(name, countlines, "multiple tags on the line")
|
|
# Strip each line, unless we're in something like a multiline string.
|
|
if dostrip:
|
|
transformed = line.strip() + "\n"
|
|
else:
|
|
transformed = line
|
|
# Check if we're in the middle of a string concatenation
|
|
if instring:
|
|
if opener(transformed) or closer(transformed):
|
|
print('wmlindent: "%s", line %d: Expected string, received tag.' % (name, countlines), file=sys.stderr)
|
|
# Track whether we've seen real WML rather than just macro definitions
|
|
elif transformed.startswith("#define"):
|
|
saved_indent = indent
|
|
indent = wmltools.baseindent
|
|
inmacro = True
|
|
# Be sure to ignore the newlines and comments
|
|
elif transformed.rstrip().endswith("#enddef") and transformed.find("#") == transformed.find("#enddef"):
|
|
indent = saved_indent
|
|
inmacro = False
|
|
elif not inmacro and transformed[0] in ('[', ']'):
|
|
seen_wml = True
|
|
# In the close case, we must compute new indent *before* emitting
|
|
# the new line so the close tag will be at the same level as the
|
|
# one that started the block.
|
|
if closer(transformed):
|
|
if indent == "":
|
|
print('wmlindent: "%s", line %d: close tag %s with indent already zero.' % (name, countlines, transformed.strip()), file=sys.stderr)
|
|
else:
|
|
indent = indent[:-len(wmltools.baseindent)]
|
|
# Cope with blank lines outside of multiline literals
|
|
if dostrip:
|
|
if transformed == "\n":
|
|
countblanks += 1
|
|
continue
|
|
elif countblanks > 0:
|
|
countblanks = 0
|
|
# All sequences of blank lines get mapped to one blank
|
|
# line, except (a) before closing tags and (b) after
|
|
# opening tags, In these cases they are ignored.
|
|
if not closer(transformed) and not opener(lasttag):
|
|
outfp.write("\n")
|
|
# Here's where we apply the current indent
|
|
if dostrip and transformed and not is_directive(transformed):
|
|
output = indent + transformed
|
|
else:
|
|
output = transformed
|
|
# Nuke trailing space and canonicalize to Unix-style end-of-line
|
|
if dostrip:
|
|
output = output.rstrip() + "\n"
|
|
# And ship the line
|
|
outfp.write(output)
|
|
# May need to indent based on the line we just saw.
|
|
if opener(transformed):
|
|
indent += wmltools.baseindent
|
|
if continued_string.search(transformed):
|
|
if not instring:
|
|
indent += wmltools.baseindent
|
|
instring = True
|
|
elif instring and not (transformed.startswith("#")):
|
|
indent = indent[:-len(wmltools.baseindent)]
|
|
instring = False
|
|
# Compute the dostrip state likewise.
|
|
# We look for unbalanced string quotes.
|
|
if dostrip:
|
|
eligible = re.split("\s#", transformed)[0]
|
|
else:
|
|
eligible = transformed
|
|
if dostrip and "<<" in eligible and not ">>" in eligible.split("<<", 1)[1]:
|
|
dostrip = False
|
|
in_lua = True
|
|
elif in_lua:
|
|
if ">>" in eligible and not "<<" in eligible.split(">>", 1)[1]:
|
|
dostrip = True
|
|
in_lua = False
|
|
elif eligible.count('"') % 2:
|
|
dostrip = not dostrip
|
|
# Are we going to be immediately following a tag?
|
|
if opener(transformed) or closer(transformed):
|
|
lasttag = transformed
|
|
else:
|
|
lasttag = ""
|
|
# Pure macro files look like they have unbalanced indents. That's OK
|
|
if indent != "" and seen_wml:
|
|
print('wmlindent: "%s". line %d: end of file with indent nonzero.' % (name, countlines), file=sys.stderr)
|
|
|
|
def allwmlfiles(directory):
|
|
"Get names of all WML files under dir, or dir itself if not a directory."
|
|
datafiles = []
|
|
if os.path.isfile(directory):
|
|
if directory.endswith(".cfg"):
|
|
datafiles.append(directory)
|
|
elif os.path.isdir(directory):
|
|
for root, dirs, files in os.walk(directory):
|
|
if wmltools.vcdir in dirs:
|
|
dirs.remove(wmltools.vcdir)
|
|
for name in files:
|
|
if name.endswith(".cfg"):
|
|
datafiles.append(os.path.join(root, name))
|
|
elif not os.path.exists(directory):
|
|
print("wmlindent: path %s does not exist and will be skipped" % directory, file=sys.stderr)
|
|
else:
|
|
# please note that both os.path.isfile() and os.path.isdir() follow symlinks
|
|
# and os.path.isdir() even covers mountpoints and drive letters
|
|
# however, this point can be reached by using special paths
|
|
# like /dev/null, /dev/random or /dev/zero on Linux
|
|
print("wmlindent: path %s is neither a file or a directory and will be skipped" % directory, file=sys.stderr)
|
|
|
|
return datafiles
|
|
|
|
def convertor(linefilter, arglist, exclude):
|
|
"Apply a filter to command-line arguments."
|
|
if not arglist:
|
|
linefilter("standard input", sys.stdin, sys.stdout)
|
|
else:
|
|
found_valid_path = False
|
|
for arg in arglist:
|
|
for filename in allwmlfiles(arg):
|
|
found_valid_path = True
|
|
if exclude and re.search(exclude, filename):
|
|
if verbose:
|
|
print("wmlindent: %s excluded" % filename, file=sys.stderr)
|
|
continue
|
|
else:
|
|
try:
|
|
with codecs.open(filename,"r","utf8") as infp, codecs.open(filename + ".out", "w","utf8") as outfp:
|
|
linefilter(filename, infp, outfp)
|
|
except bailout as e:
|
|
print('wmlindent: "%s", %d: %s' % (e.filename, e.lineno, e.msg), file=sys.stderr)
|
|
os.remove(filename + ".out")
|
|
except KeyboardInterrupt:
|
|
os.remove(filename + ".out")
|
|
print("wmlindent: %s interrupted" % filename, file=sys.stderr)
|
|
else:
|
|
if filecmp.cmp(filename, filename + ".out"):
|
|
if verbose >= 2:
|
|
print("wmlindent: %s unchanged" % filename, file=sys.stderr)
|
|
os.remove(filename + ".out")
|
|
else:
|
|
if not quiet:
|
|
print("wmlindent: %s changed" % filename, file=sys.stderr)
|
|
if dryrun:
|
|
os.remove(filename + ".out")
|
|
else:
|
|
os.remove(filename) # For Windows portability
|
|
# There's a tiny window open if you keyboard-
|
|
# interrupt here. It's unavoidable, because
|
|
# there's no known way to do an atomic rename
|
|
# under Windows when the target exists -- see
|
|
# Python manual 14.1.4::rename()
|
|
os.rename(filename + ".out", filename)
|
|
else: # in a for .. else cycle, else is always executed after the for cycle ends
|
|
if not found_valid_path:
|
|
print("wmlindent: no WML file found, exiting", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
if __name__ == '__main__':
|
|
parser = argparse.ArgumentParser(
|
|
formatter_class=argparse.RawTextHelpFormatter
|
|
)
|
|
parser.add_argument("-?", action="help",
|
|
help="show this help message and exit") # original --help, -h and -? printed __doc__
|
|
parser.add_argument("-d", "--dryrun", action="store_true",
|
|
help="""detects and reports files that would be changed without
|
|
changing them.""")
|
|
parser.add_argument("-v", "--verbose", action="count", default=0,
|
|
help="""-v enables reporting files that are changed.
|
|
-v -v unchanged files are also reported.""")
|
|
parser.add_argument("-e", "--exclude", action="append", default=[],
|
|
help="takes a regexp and excludes files matching it.")
|
|
parser.add_argument("-q", "--quiet", action="store_true",
|
|
help="Do not generate output") # TODO: improve description?
|
|
parser.add_argument("files", action="store", nargs="*",
|
|
help="""Any number of files or directories.
|
|
Call with no arguments to filter WML on stdin to
|
|
reindented WML on stdout.""")
|
|
|
|
namespace = parser.parse_args()
|
|
verbose = namespace.verbose
|
|
quiet = namespace.quiet
|
|
dryrun = namespace.dryrun
|
|
exclude = namespace.exclude
|
|
arguments = namespace.files # a remnant of getopt...
|
|
|
|
if dryrun:
|
|
verbose = max(1, verbose)
|
|
|
|
# in certain situations, Windows' command prompt appends a double quote
|
|
# to the command line parameters. This block takes care of this issue.
|
|
for i,arg in enumerate(arguments):
|
|
if arg.endswith('"'):
|
|
arguments[i] = arg[:-1]
|
|
convertor(lambda n, f1, f2: reindent(n, f1, f2),
|
|
arguments, "|".join(exclude))
|