wesnoth/data/tools/wmlindent

#!/usr/bin/env python3
# encoding: utf-8
"""\
wmlindent - re-indent WML in a uniform way.

By Eric S. Raymond, June 2007.

Call with no arguments to filter WML on stdin to reindented WML on
stdout.  If arguments are specified, they are taken to be files to be
re-indented in place; a directory name causes reindenting on all WML
beneath it.

The indent unit is four spaces.  Absence of an option to change this is
deliberate; the purpose of this tool is to *prevent* style wars, not encourage
them.

On non-empty lines, this code never modifies anything but leading and
trailing whitespace. Leading whitespace will be regularized to the
current indent; trailing whitespace will be stripped.  After processing
all lines will end with a Unix-style \n end-of-line marker.

Runs of entirely blank lines will be reduced to one blank line, except
in two cases where they will be discarded: (a) before WML closing
tags, and (b) after WML opening tags.

It is possible to wrap a section of lines in special comments so that
wmlindent will ignore them.  You may need to do this for unbalanced
macros (it's better, though, to get rid of those where possible).
Use 'wmlindent: {start,stop} ignoring' anywhere in a comment.

It is also possible to declare custom openers an closers, e.g for macros
that are actually control constructs.  To do this, use declarations

    # wmlindent: opener "{EXCEPTIONAL_OPENER "
    # wmlindent: closer "{EXCEPTIONAL_CLOSER "

The lines after an opener will be indented an extra level; a closer
and lines following will be indented one level less. Note that these
declare prefixes; any prefix match to the non-whitespace text of a line
will be recognized.

The public utility macros "{FOREACH" and "{NEXT" come as wired-in exceptions,
because it is not guaranteed that their indent declarations will be processed
before the macro library is reached.

Interrupting will be safe, as each reindenting will be done to a copy
that is atomically renamed when it's done.  If the output file is identical
to the input, the output file will simply be deleted, so the timestamp
on the input file won't be touched.

The --dryrun option detects and reports files that would be changed
without changing them. The --verbose or -v option enables reporting
of files that are (or would be, under --dryrun) changed.  With -v -v,
unchanged files are also reported.  The --exclude option takes a regexp
and excludes files matching it.

Note: This does not include a parser.  It will produce bad results on WML
that is syntactically unbalanced.  Unbalanced double quotes that aren't part
of a multiline literal will also confuse it.  You will receive warnings
if there's an indent open at end of file or if a closer occurs with
indent already zero; these two conditions strongly suggest unbalanced WML.
"""

import sys, os, argparse, filecmp, re, codecs
from wesnoth import wmltools3 as wmltools

closer_prefixes = ["{NEXT "]
opener_prefixes = ["{FOREACH "]

def is_directive(str):
    "Identify things that shouldn't be indented."
    return str.startswith(("#ifdef", "#ifndef", "#ifhave", "#ifnhave", "#ifver", "#ifnver", "#else", "#endif", "#define", "#enddef", "#undef", "#arg", "#endarg"))

def closer(str):
    "Are we looking at a closing tag?"
    if str.startswith("#"):
        return False
    elif str.startswith("[/") or str.startswith(")"):
        return True
    else:
        return str.startswith(tuple(closer_prefixes))

def opener(str):
    "Are we looking at an opening tag?"
    if str.startswith("#"):
        return False
    # This logic is a bit obscure. The 'not "[/" in str' catches the
    # obvious case of a closing tag, but it also catches the idiom
    # [allow_undo][/allow_undo] which we want to treat as a no-op.
    elif (str.startswith("[") and not closer(str) and not "[/" in str):
        return True
    # Trailing ( opens a scope to be closed by ).
    elif str.endswith("(\n") and '#' not in str:
        return True
    else:
        return str.startswith(tuple(opener_prefixes))

class bailout(Exception):
    def __init__(self, filename, lineno, msg):
        self.filename = filename
        self.lineno = lineno
        self.msg = msg

def reindent(name, infp, outfp):
    "Reindent WML."
    dostrip = True
    in_lua = False
    seen_wml = False
    inmacro = False
    ignoring = False
    instring = False
    indent = ""
    lasttag = ""
    countlines = 0
    countblanks = 0
    multitag = re.compile(r"\[a-z]].*\[[a-z]") # Avoid triggering on arrays
    continued_string = re.compile(r".+\+\s*(#.*)?$") # Check if the line is a string being continued
    for line in infp:
        countlines += 1
        # User may declare indentation exceptions
        if "wmlindent: opener" in line:
            tag = line.split('"')[1]
            if verbose and not quiet:
                print("wmlindent: declaring indent exception for %s" % tag, file=sys.stderr)
            opener_prefixes.append(tag)
        elif "wmlindent: closer" in line:
            tag = line.split('"')[1]
            if verbose and not quiet:
                print("wmlindent: declaring outdent exception for %s" % tag, file=sys.stderr)
            closer_prefixes.append(tag)
        # Implement passthrough mode
        if "wmlindent: start ignoring" in line:
            ignoring = True
            if countblanks > 0:
                countblanks = 0
                outfp.write("\n")
            outfp.write(line)
            continue
        elif ignoring:
            outfp.write(line)
            if "wmlindent: stop ignoring" in line:
                ignoring = False
            continue
        # Detect things we can't handle
        if multitag.search(line):
            raise bailout(name, countlines, "multiple tags on the line")
        # Strip each line, unless we're in something like a multiline string.
        if dostrip:
            transformed = line.strip() + "\n"
        else:
            transformed = line
        # Check if we're in the middle of a string concatenation
        if instring:
            if opener(transformed) or closer(transformed):
                print('wmlindent: "%s", line %d: Expected string, received tag.' % (name, countlines), file=sys.stderr)
        # Track whether we've seen real WML rather than just macro definitions
        elif transformed.startswith("#define"):
            saved_indent = indent
            indent = wmltools.baseindent
            inmacro = True
        # Be sure to ignore the newlines and comments
        elif transformed.rstrip().endswith("#enddef") and transformed.find("#") == transformed.find("#enddef"):
            indent = saved_indent
            inmacro = False
        elif not inmacro and transformed[0] in ('[', ']'):
            seen_wml = True
        # In the close case, we must compute new indent *before* emitting
        # the new line so the close tag will be at the same level as the
        # one that started the block.
        if closer(transformed):
            if indent == "":
                print('wmlindent: "%s", line %d: close tag %s with indent already zero.' % (name, countlines, transformed.strip()), file=sys.stderr)
            else:
                indent = indent[:-len(wmltools.baseindent)]
        # Cope with blank lines outside of multiline literals
        if dostrip:
            if transformed == "\n":
                countblanks += 1
                continue
            elif countblanks > 0:
                countblanks = 0
                # All sequences of blank lines get mapped to one blank
                # line, except (a) before closing tags and (b) after
                # opening tags, In these cases they are ignored.
                if not closer(transformed) and not opener(lasttag):
                    outfp.write("\n")
        # Here's where we apply the current indent
        if dostrip and transformed and not is_directive(transformed):
            output = indent + transformed
        else:
            output = transformed
        # Nuke trailing space and canonicalize to Unix-style end-of-line
        if dostrip:
            output = output.rstrip() + "\n"
        # And ship the line
        outfp.write(output)
        # May need to indent based on the line we just saw.
        if opener(transformed):
            indent += wmltools.baseindent
        if continued_string.search(transformed):
            if not instring:
                indent += wmltools.baseindent
                instring = True
        elif instring and not (transformed.startswith("#")):
            indent = indent[:-len(wmltools.baseindent)]
            instring = False
        # Compute the dostrip state likewise.
        # We look for unbalanced string quotes.
        if dostrip:
            eligible = re.split("\s#", transformed)[0]
        else:
            eligible = transformed
        if dostrip and "<<" in eligible and not ">>" in eligible.split("<<", 1)[1]:
            dostrip = False
            in_lua = True
        elif in_lua:
            if ">>" in eligible and not "<<" in eligible.split(">>", 1)[1]:
                dostrip = True
                in_lua = False
        elif eligible.count('"') % 2:
            dostrip = not dostrip
        # Are we going to be immediately following a tag?
        if opener(transformed) or closer(transformed):
            lasttag = transformed
        else:
            lasttag = ""
    # Pure macro files look like they have unbalanced indents.  That's OK
    if indent != "" and seen_wml:
        print('wmlindent: "%s". line %d: end of file with indent nonzero.' % (name, countlines), file=sys.stderr)

def allwmlfiles(directory):
    "Get names of all WML files under dir, or dir itself if not a directory."
    datafiles = []
    if os.path.isfile(directory):
        if directory.endswith(".cfg"):
            datafiles.append(directory)
    elif os.path.isdir(directory):
        for root, dirs, files in os.walk(directory):
            if wmltools.vcdir in dirs:
                dirs.remove(wmltools.vcdir)
            for name in files:
                if name.endswith(".cfg"):
                    datafiles.append(os.path.join(root, name))
    elif not os.path.exists(directory):
        print("wmlindent: path %s does not exist and will be skipped" % directory, file=sys.stderr)
    else:
        # please note that both os.path.isfile() and os.path.isdir() follow symlinks
        # and os.path.isdir() even covers mountpoints and drive letters
        # however, this point can be reached by using special paths
        # like /dev/null, /dev/random or /dev/zero on Linux
        print("wmlindent: path %s is neither a file or a directory and will be skipped" % directory, file=sys.stderr)

    return datafiles

def convertor(linefilter, arglist, exclude):
    "Apply a filter to command-line arguments."
    if not arglist:
        linefilter("standard input", sys.stdin, sys.stdout)
    else:
        found_valid_path = False
        for arg in arglist:
            for filename in allwmlfiles(arg):
                found_valid_path = True
                if exclude and re.search(exclude, filename):
                    if verbose:
                        print("wmlindent: %s excluded" % filename, file=sys.stderr)
                    continue
                else:
                    try:
                        with codecs.open(filename,"r","utf8") as infp, codecs.open(filename + ".out", "w","utf8") as outfp:
                            linefilter(filename, infp, outfp)
                    except bailout as e:
                        print('wmlindent: "%s", %d: %s' % (e.filename, e.lineno, e.msg), file=sys.stderr)
                        os.remove(filename + ".out")
                    except KeyboardInterrupt:
                        os.remove(filename + ".out")
                        print("wmlindent: %s interrupted" % filename, file=sys.stderr)
                    else:
                        if filecmp.cmp(filename, filename + ".out"):
                            if verbose >= 2:
                                print("wmlindent: %s unchanged" % filename, file=sys.stderr)
                            os.remove(filename + ".out")
                        else:
                            if not quiet:
                                print("wmlindent: %s changed" % filename, file=sys.stderr)
                            if dryrun:
                                os.remove(filename + ".out")
                            else:
                                os.remove(filename) # For Windows portability
                                # There's a tiny window open if you keyboard-
                                # interrupt here. It's unavoidable, because
                                # there's no known way to do an atomic rename
                                # under Windows when the target exists -- see
                                # Python manual 14.1.4::rename()
                                os.rename(filename + ".out", filename)
        else: # in a for .. else cycle, else is always executed after the for cycle ends
            if not found_valid_path:
                print("wmlindent: no WML file found, exiting", file=sys.stderr)
                sys.exit(1)

if __name__ == '__main__':
    parser = argparse.ArgumentParser(
        formatter_class=argparse.RawTextHelpFormatter
        )
    parser.add_argument("-?", action="help",
                        help="show this help message and exit") # original --help, -h and -? printed __doc__
    parser.add_argument("-d", "--dryrun", action="store_true",
                        help="""detects and reports files that would be changed without
changing them.""")
    parser.add_argument("-v", "--verbose", action="count", default=0,
                        help="""-v enables reporting files that are changed.
-v -v unchanged files are also reported.""")
    parser.add_argument("-e", "--exclude", action="append", default=[],
                        help="takes a regexp and excludes files matching it.")
    parser.add_argument("-q", "--quiet", action="store_true",
                        help="Do not generate output") # TODO: improve description?
    parser.add_argument("files", action="store", nargs="*",
                        help="""Any number of files or directories.
Call with no arguments to filter WML on stdin to
reindented WML on stdout.""")

    namespace = parser.parse_args()
    verbose = namespace.verbose
    quiet = namespace.quiet
    dryrun = namespace.dryrun
    exclude = namespace.exclude
    arguments = namespace.files # a remnant of getopt...

    if dryrun:
        verbose = max(1, verbose)

    # in certain situations, Windows' command prompt appends a double quote
    # to the command line parameters. This block takes care of this issue.
    for i,arg in enumerate(arguments):
        if arg.endswith('"'):
            arguments[i] = arg[:-1]
    convertor(lambda n, f1, f2: reindent(n, f1, f2),
              arguments, "|".join(exclude))