Added Python 3 versions of wmltools and wmliterator

The Python 2 versions should be removed as soon as there are no more scripts using them.
This commit is contained in:
Elvish_Hunter 2015-09-22 22:47:27 +02:00
parent f07e463882
commit 53b5d031e9
2 changed files with 1472 additions and 0 deletions

View file

@ -0,0 +1,498 @@
"""
wmliterator.py -- Python routines for navigating a Battle For Wesnoth WML tree
Author: Sapient (Patrick Parker), 2007
Purpose:
The WmlIterator class can be used to analyze and search the structure of WML
files non-invasively (i.e. preserving existing line structure), and its main
use is to determine when a transformation of deprecated content needs to take
place. (I wrote it was because wmllint was trying to do a lot of things with
regular expressions which really required a more algorithmic approach. Also,
wmllint was often inconsistent with correct handling of comments and values
inside strings.)
Limitations:
The WmlIterator does not attempt to expand macros, only to recognize them as
another level of nesting. Also, the handling of multiple assignment syntax
is somewhat limited (for similar reasons). Adding seamless support for these
would be ideal, but it presents a design challenge since the iteration is
supposed to be non-invasive. Thus, the current behavior is considered good
enough for now.
"""
from functools import total_ordering
import sys, re, copy, codecs
keyPattern = re.compile('(\w+)(,\s?\w+)*\s*=')
keySplit = re.compile(r'[=,\s]')
tagPattern = re.compile(r'(^|(?<![\w|}]))(\[/?\+?[a-z _]+\])')
macroOpenPattern = re.compile(r'(\{[^\s\}\{]*)')
macroClosePattern = re.compile(r'\}')
closeMacroType = 'end of macro'
silenceErrors = {}
def wmlfind(element, wmlItor):
"""Find a simple element from traversing a WML iterator"""
for itor in wmlItor.copy():
if element == itor.element:
return itor
return None
def wmlfindin(element, scopeElement, wmlItor):
"""Find an element inside a particular type of scope element"""
for itor in wmlItor.copy():
if element == itor.element:
if itor.scopes:
if scopeElement == itor.scopes[-1].element:
return itor
elif not scopeElement:
# allow searching in the empty scope
return itor
return None
def isDirective(elem):
"Identify things that shouldn't be indented."
if isinstance(elem, WmlIterator):
elem = elem.element
return elem.startswith(("#ifdef", "#ifndef", "#ifhave", "#ifnhave", "#ifver", "#ifnver", "#else", "#endif", "#define", "#enddef", "#undef"))
def isCloser(elem):
"Are we looking at a closing tag?"
if isinstance(elem, WmlIterator):
elem = elem.element
return type(elem) == type("") and elem.startswith("[/")
def isMacroCloser(elem):
"Are we looking at a macro closer?"
if isinstance(elem, WmlIterator):
elem = elem.element
return type(elem) == type("") and elem == closeMacroType
def isOpener(elem):
"Are we looking at an opening tag?"
if isinstance(elem, WmlIterator):
elem = elem.element
return type(elem) == type("") and elem.startswith("[") and not isCloser(elem)
def isExtender(elem):
"Are we looking at an extender tag?"
if isinstance(elem, WmlIterator):
elem = elem.element
return type(elem) == type("") and elem.startswith("[+")
def isMacroOpener(elem):
"Are we looking at a macro opener?"
if isinstance(elem, WmlIterator):
elem = elem.element
return type(elem) == type("") and elem.startswith("{")
def isAttribute(elem):
"Are we looking at an attribute (or attribute tuple)?"
if isinstance(elem, WmlIterator):
elem = elem.element
if type(elem) == type(()):
elem = elem[0]
return type(elem) == type("") and elem.endswith("=")
# the total_ordering decorator from functools allows to define only two comparison
# methods, and Python generates the remaining methods
# it comes with a speed penalty, but the alternative is defining six methods by hand...
@total_ordering
class WmlIterator(object):
"""Return an iterable WML navigation object.
Initialize with a list of lines or a file; if the the line list is
empty and the filename is specified, lines will be read from the file.
Note: if changes are made to lines while iterating, this may produce
unexpected results. In such case, seek() to the line number of a
scope behind where changes were made.
Important Attributes:
lines - this is an internal list of all the physical lines
scopes - this is an internal list of all open scopes (as iterators)
note: when retrieving an iterator from this list, always
use a copy to perform seek() or next(), and not the original
element - the wml tag, key, or macro name for this logical line
(in complex cases, this may be a tuple of elements...
see parseElements for list of possible values)
text - the exact text of this logical line, as it appears in the
original source, and ending with a newline
note: the logical line also includes multi-line quoted strings
span - the number of physical lines in this logical line:
always 1, unless text contains a multi-line quoted string
lineno - a zero-based line index marking where this text begins
"""
def __init__(self, lines=None, filename=None, begin=-1):
"Initialize a new WmlIterator."
self.fname = filename
if lines is None:
lines = []
if filename:
try:
with codecs.open(self.fname, "r", "utf8") as ifp:
lines = ifp.readlines()
except Exception:
self.printError('error opening file')
self.lines = lines
self.reset()
self.seek(begin)
def parseQuotes(self, lines):
"""Return the line or multiline text if a quote spans multiple lines"""
text = lines[self.lineno]
span = 1
begincomment = text.find('#')
if begincomment < 0:
begincomment = None
beginquote = text[:begincomment].find('<<')
while beginquote >= 0:
endquote = -1
beginofend = beginquote+2
while endquote < 0:
endquote = text.find('>>', beginofend)
if endquote < 0:
if self.lineno + span >= len(lines):
self.printError('reached EOF due to unterminated string at line', self.lineno+1)
return text, span
beginofend = len(text)
text += lines[self.lineno + span]
span += 1
begincomment = text.find('#', endquote+2)
if begincomment < 0:
begincomment = None
beginquote = text[:begincomment].find('<<', endquote+2)
beginquote = text[:begincomment].find('"')
while beginquote >= 0:
endquote = -1
beginofend = beginquote+1
while endquote < 0:
endquote = text.find('"', beginofend)
if endquote < 0:
if self.lineno + span >= len(lines):
self.printError('reached EOF due to unterminated string at line', self.lineno+1)
return text, span
beginofend = len(text)
text += lines[self.lineno + span]
span += 1
begincomment = text.find('#', endquote+1)
if begincomment < 0:
begincomment = None
beginquote = text[:begincomment].find('"', endquote+1)
return text, span
def closeScope(self, scopes, closerElement):
"""Close the most recently opened scope. Return false if not enough scopes.
note: directives close all the way back to the last open directive
non-directives cannot close a directive and will no-op in that case."""
try:
if isDirective(closerElement):
while not isDirective(scopes.pop()):
pass
elif (closerElement==closeMacroType):
elem = ''
while not elem.startswith('{'):
closed = scopes.pop()
elem = closed
if isinstance(closed, WmlIterator):
elem = closed.element
if isDirective(elem):
self.printScopeError(closerElement)
scopes.append(closed) # to reduce additional errors (hopefully)
return True
elif not isDirective(scopes[-1]):
closed = scopes.pop()
elem = closed
if isinstance(closed, WmlIterator):
elem = closed.element
if (elem.startswith('{') and closerElement != closeMacroType):
scopes.append(closed)
elif (isOpener(elem) and closerElement != '[/'+elem[1:]
and '+'+closerElement != elem[1]+'[/'+elem[2:]):
self.printError('reached', closerElement, 'at line', self.lineno+1, 'before closing scope', elem)
scopes.append(closed) # to reduce additional errors (hopefully)
return True
except IndexError:
return False
def parseElements(self, text):
"""Remove any closed scopes, return a list of element names
and list of new unclosed scopes
Element Types:
tags: one of "[tag_name]" or "[/tag_name]"
[tag_name] - opens a scope
[/tag_name] - closes a scope
keys: either "key=" or ("key1=", "key2=") for multi-assignment
key= - does not affect the scope
key1,key2= - multi-assignment returns multiple elements
directives: one of "#ifdef", "#ifndef", "#ifhave", "#ifnhave", "#ifver", "#ifnver", "#else", "#endif", "#define", "#enddef"
#ifdef - opens a scope
#ifndef - opens a scope
#ifhave - opens a scope
#ifnhave - opens a scope
#ifver - opens a scope
#ifnver - opens a scope
#else - closes a scope, also opens a new scope
#endif - closes a scope
#define - opens a scope
#enddef - closes a scope
macro calls: "{MACRO_NAME}"
{MACRO_NAME - opens a scope
} - closes a scope
"""
elements = [] #(elementType, sortPos, scopeDelta)
# first remove any lua strings
beginquote = text.find('<<')
while beginquote >= 0:
endquote = text.find('>>', beginquote+2)
if endquote < 0:
text = text[:beginquote]
beginquote = -1 #terminate loop
else:
text = text[:beginquote] + text[endquote+2:]
beginquote = text.find('<<')
# remove any quoted strings
beginquote = text.find('"')
while beginquote >= 0:
endquote = text.find('"', beginquote+1)
if endquote < 0:
text = text[:beginquote]
beginquote = -1 #terminate loop
else:
text = text[:beginquote] + text[endquote+1:]
beginquote = text.find('"')
# next remove any comments
text = text.lstrip()
commentSearch = 1
if text.startswith('#ifdef'):
return (['#ifdef'],)*2
elif text.startswith('#ifndef'):
return (['#ifndef'],)*2
elif text.startswith('#ifhave'):
return (['#ifhave'],)*2
elif text.startswith('#ifnhave'):
return (['#ifnhave'],)*2
elif text.startswith('#ifver'):
return (['#ifver'],)*2
elif text.startswith('#ifnver'):
return (['#ifnver'],)*2
elif text.startswith('#else'):
if not self.closeScope(self.scopes, '#else'):
self.printScopeError('#else')
return (['#else'],)*2
elif text.startswith('#endif'):
if not self.closeScope(self.scopes, '#endif'):
self.printScopeError('#endif')
return ['#endif'], []
elif text.startswith('#define'):
return (['#define'],)*2
elif text.find('#enddef') >= 0:
elements.append(('#enddef', text.find('#enddef'), -1))
elif text.startswith('#po:') or text.startswith('# po:'):
elements.append(("#po", 0, 0))
else:
commentSearch = 0
begincomment = text.find('#', commentSearch)
if begincomment >= 0:
text = text[:begincomment]
#now find elements in a loop
for m in tagPattern.finditer(text):
delta = 1
if isCloser(m.group(2)):
delta = -1
elements.append((m.group(2), m.start(), delta))
for m in keyPattern.finditer(text):
for i, k in enumerate(keySplit.split(m.group(0))):
if k:
elements.append((k+'=', m.start()+i, 0))
for m in macroOpenPattern.finditer(text):
elements.append((m.group(1), m.start(), 1))
for m in macroClosePattern.finditer(text):
elements.append((closeMacroType, m.start(), -1))
#sort by start position
elements.sort(key=lambda x:x[1])
resultElements = []
openedScopes = []
for elem, sortPos, scopeDelta in elements:
while scopeDelta < 0:
if not(self.closeScope(openedScopes, elem)\
or self.closeScope(self.scopes, elem)):
self.printScopeError(elem)
scopeDelta += 1
while scopeDelta > 0:
openedScopes.append(elem)
scopeDelta -= 1
resultElements.append(elem)
return resultElements, openedScopes
def printScopeError(self, elementType):
"""Print out warning if a scope was unable to close"""
self.printError('attempt to close empty scope at', elementType, 'line', self.lineno+1)
def __iter__(self):
"""The magic iterator method"""
return self
def __eq__(self, other):
return (self.fname, self.lineno, self.element) == \
(other.fname, other.lineno, other.element)
def __gt__(self, other):
return (self.fname, self.lineno, self.element) > \
(other.fname, other.lineno, other.element)
def reset(self):
"""Reset any line tracking information to defaults"""
self.lineno = -1
self.scopes = []
self.nextScopes = []
self.text = ""
self.span = 1
self.element = ""
return self
def seek(self, lineno, clearEnd=True):
"""Move the iterator to a specific line number"""
if clearEnd:
self.endScope = None
if lineno < self.lineno:
for scope in reversed(self.scopes):
# if moving backwards, try to re-use a scope iterator
if scope.lineno <= lineno:
# copy the scope iterator's state to self
self.__dict__ = dict(scope.__dict__)
self.scopes = scope.scopes[:]
self.nextScopes = scope.nextScopes[:]
break
else:
# moving backwards past all scopes forces a reset
self.reset()
while self.lineno + self.span - 1 < lineno:
next(self)
return self
def ancestors(self):
"""Return a list of tags enclosing this location, outermost first."""
return tuple([x.element for x in self.scopes])
def hasNext(self):
"""Some loops may wish to check this method instead of calling next()
and handling StopIteration... note: inaccurate for ScopeIterators"""
return len(self.lines) > self.lineno + self.span
def copy(self):
"""Return a copy of this iterator"""
itor = copy.copy(self)
itor.scopes = self.scopes[:]
itor.nextScopes = self.nextScopes[:]
return itor
def __str__(self):
"""Return a pretty string representation"""
if self.lineno == -1:
return 'beginning of file'
loc = ' at line ' + str(self.lineno+1)
if self.element:
return str(self.element) + loc
if self.text.strip():
return 'text' + loc
return 'whitespace' + loc
def __repr__(self):
"""Return a very basic string representation"""
return 'WmlIterator<' + repr(self.element) +', line %d>'%(self.lineno+1)
def __next__(self):
"""Move the iterator to the next line number
note: May raise StopIteration"""
if not self.hasNext():
if self.scopes:
self.printError("reached EOF with open scopes", self.scopes)
raise StopIteration
self.lineno = self.lineno + self.span
self.text, self.span = self.parseQuotes(self.lines)
self.scopes.extend(self.nextScopes)
self.element, nextScopes = self.parseElements(self.text)
self.nextScopes = []
for elem in nextScopes:
# remember scopes by storing a copy of the iterator
copyItor = self.copy()
copyItor.element = elem
self.nextScopes.append(copyItor)
copyItor.nextScopes.append(copyItor)
if(len(self.element) == 1):
# currently we only wish to handle simple single assignment syntax
self.element = self.element[0]
if self.endScope is not None and not self.scopes.count(self.endScope):
raise StopIteration
return self
def isOpener(self):
return isOpener(self)
def isCloser(self):
return isCloser(self)
def isExtender(self):
return isExtender(self)
def isMacroOpener(self):
return isMacroOpener(self)
def isMacroCloser(self):
return isMacroCloser(self)
def isAttribute(self):
return isAttribute(self)
def iterScope(self):
"""Return an iterator for the current scope"""
if not self.scopes:
return WmlIterator(self.lines, self.fname)
scopeItor = self.scopes[-1].copy()
scopeItor.endScope = self.scopes[-1]
return scopeItor
def printError(nav, *misc):
"""Print error associated with a given file; avoid printing duplicates"""
if nav.fname:
silenceValue = ' '.join(map(str, misc))
if nav.fname not in silenceErrors:
print(nav.fname, file=sys.stderr)
silenceErrors[nav.fname] = set()
elif silenceValue in silenceErrors[nav.fname]:
return # do not print a duplicate error for this file
silenceErrors[nav.fname].add(silenceValue)
print('wmliterator:', end=" ", file=sys.stderr)
for item in misc:
print(item, end=" ", file=sys.stderr)
print("", file=sys.stderr) #terminate line
if __name__ == '__main__':
"""Perform a test run on a file or directory"""
import os, glob
didSomething = False
flist = sys.argv[1:]
if not flist:
print('Current directory is', os.getcwd())
flist = glob.glob(os.path.join(os.getcwd(), input('Which file(s) would you like to test?\n')))
while flist:
fname = flist.pop()
if os.path.isdir(fname):
flist += glob.glob(fname + os.path.sep + '*')
continue
if not os.path.isfile(fname) or os.path.splitext(fname)[1] != '.cfg':
continue
print('Reading', fname+'...')
didSomething = True
with codecs.open(fname, "r", "utf8") as f:
itor = WmlIterator(f.readlines())
for i in itor:
pass
print(itor.lineno + itor.span, 'lines read.')
if not didSomething:
print('That is not a valid .cfg file')
if os.name == 'nt' and os.path.splitext(__file__)[0].endswith('wmliterator') and not sys.argv[1:]:
os.system('pause')
# wmliterator.py ends here

View file

@ -0,0 +1,974 @@
"""
wmltools.py -- Python routines for working with a Battle For Wesnoth WML tree
"""
from functools import total_ordering
import collections, codecs
import sys, os, re, sre_constants, hashlib, glob, gzip
import string
map_extensions = ("map", "mask")
image_extensions = ("png", "jpg", "jpeg")
sound_extensions = ("ogg", "wav")
vc_directories = (".git", ".svn")
l10n_directories = ("l10n",)
resource_extensions = map_extensions + image_extensions + sound_extensions
image_reference = r"[A-Za-z0-9{}.][A-Za-z0-9_/+{}.-]*\.(png|jpe?g)(?=(~.*)?)"
def is_root(dirname):
"Is the specified path the filesystem root?"
return dirname == os.sep or (os.sep == '\\' and dirname.endswith(':\\'))
def pop_to_top(whoami):
"Pop upward to the top-level directory."
upwards = os.getcwd().split(os.sep)
upwards.reverse()
for pathpart in upwards:
# Loose match because people have things like git trees.
if os.path.basename(pathpart).find("wesnoth") > -1:
break
else:
os.chdir("..")
else:
print(whoami + ": must be run from within a Battle "
"for Wesnoth source tree.", file=sys.stderr)
sys.exit(1)
def string_strip(value):
"String-strip the value"
if value.startswith('"'):
value = value[1:]
if value.endswith('"'):
value = value[:-1]
if value.startswith("'"):
value = value[1:]
if value.endswith("'"):
value = value[:-1]
return value
def attr_strip(value):
"Strip away an (optional) translation mark and string quotes."
value = value.strip()
if value.startswith('_'):
value = value[1:]
value = value.strip()
return string_strip(value)
def comma_split(csstring, list=None, strip="r"):
"Split a comma-separated string, and append the entries to a list if specified."
vallist = [x.lstrip() for x in csstring.split(",") if x.lstrip()]
# strip=: utils::split will remove trailing whitespace from items in comma-
# separated lists but the wml-tags.lua split function only removes leading
# whitespace. So two flags are offered to change default behavior: one to
# lstrip() only, the other to warn about trailing whitespace.
if 'w' in strip:
for item in vallist:
if re.search('\s$', item):
print('Trailing whitespace may be problematic: "%s" in "%s"' % (item, csstring))
if 'l' not in strip:
vallist = [x.rstrip() for x in vallist]
if list is not None:
list.extend(vallist)
else:
return vallist
def parse_attribute(line):
"Parse a WML key-value pair from a line."
if '=' not in line or line.find("#") > -1 and line.find("#") < line.find("="):
return None
where = line.find("=")
leader = line[:where]
after = line[where+1:]
after = after.lstrip()
if re.search("\s#", after):
where = len(re.split("\s+#", after)[0])
value = after[:where]
comment = after[where:]
else:
value = after.rstrip()
comment = ""
# Return four fields: stripped key, part of line before value,
# value, trailing whitespace and comment.
return (leader.strip(), leader+"=", string_strip(value), comment)
class Forest:
"Return an iterable directory forest object."
def __init__(self, dirpath, exclude=None):
"Get the names of all files under dirpath, ignoring version-control directories."
self.forest = []
self.dirpath = dirpath
roots = ["campaigns", "add-ons"]
for directory in dirpath:
subtree = []
rooted = False
if os.path.isdir(directory): # So we skip .cfgs in a UMC mirror
oldmain = os.path.join(os.path.dirname(directory), os.path.basename(directory) + '.cfg')
if os.path.isfile(oldmain):
subtree.append(oldmain)
base = os.path.basename(os.path.dirname(os.path.abspath(directory)))
if base in roots or base == "core":
rooted = True
for root, dirs, files in os.walk(directory):
dirs.sort()
dirlist = [x for x in dirs]
# Split out individual campaigns/add-ons into their own subtrees
if not rooted:
if os.path.basename(root) == "core":
rooted = True
elif os.path.basename(root) in roots:
for subdir in dirlist:
if subdir + '.cfg' in files:
files.remove(subdir + '.cfg')
dirs.remove(subdir)
dirpath.append(os.path.join(root, subdir))
rooted = True
elif "_info.cfg" in files or "info.cfg" in files:
rooted = True
roots.append(os.path.basename(os.path.dirname(os.path.abspath(root))))
else:
stop = min(len(dirs), 5)
count = 0
for subdir in dirlist[:stop]:
if os.path.isfile(os.path.join(root, subdir, '_info.cfg')):
count += 1
elif os.path.isfile(os.path.join(root, subdir, 'info.cfg')):
if os.path.isfile(os.path.join(root, subdir, 'COPYING.txt')):
count += 1
if count >= (stop // 2):
roots.append(os.path.basename(root))
for subdir in dirlist:
if subdir + '.cfg' in files:
files.remove(subdir + '.cfg')
dirs.remove(subdir)
dirpath.append(os.path.join(root, subdir))
subtree.extend([os.path.normpath(os.path.join(root, x)) for x in files])
# Always look at _main.cfg first
maincfgs = [elem for elem in subtree if elem.endswith("_main.cfg")]
rest = [elem for elem in subtree if not elem.endswith("_main.cfg")]
subtree = sorted(maincfgs) + sorted(rest)
self.forest.append(subtree)
for i in self.forest:
# Ignore version-control subdirectories and Emacs tempfiles
for dirkind in vc_directories + l10n_directories:
i = [x for x in i if dirkind not in x]
i = [x for x in i if '.#' not in x]
i = [x for x in i if not os.path.isdir(x)]
if exclude:
i = [x for x in i if not re.search(exclude, x)]
i = [x for x in i if not x.endswith("-bak")]
# Compute cliques (will be used later for visibility checks)
self.clique = {}
counter = 0
for tree in self.forest:
for filename in tree:
self.clique[filename] = counter
counter += 1
def parent(self, filename):
"Return the directory root that caused this path to be included."
return self.dirpath[self.clique[filename]]
def neighbors(self, fn1, fn2):
"Are two files from the same tree?"
return self.clique[fn1] == self.clique[fn2]
def flatten(self):
"Return a flattened list of all files in the forest."
allfiles = []
for tree in self.forest:
allfiles += tree
return allfiles
def generator(self):
"Return a generator that walks through all files."
for (directory, tree) in zip(self.dirpath, self.forest):
for filename in tree:
yield (directory, filename)
def iswml(filename):
"Is the specified filename WML?"
return filename.endswith(".cfg")
def issave(filename):
"Is the specified filename a WML save? (Detects compressed saves too.)"
if isresource(filename):
return False
if filename.endswith(".gz"):
with gzip.open(filename) as content:
firstline = content.readline()
else:
try:
with codecs.open(filename, "r", "utf8") as content:
firstline = content.readline()
except UnicodeDecodeError:
# our saves are in UTF-8, so this file shouldn't be one
return False
return firstline.startswith("label=")
def isresource(filename):
"Is the specified name a resource?"
(root, ext) = os.path.splitext(filename)
return ext and ext[1:] in resource_extensions
def parse_macroref(start, line):
brackdepth = parendepth = 0
instring = False
args = []
arg = ""
for i in range(start, len(line)):
if instring:
if line[i] == '"':
instring = False
arg += line[i]
elif line[i] == '"':
instring = not instring
arg += line[i]
elif line[i] == "{":
if brackdepth > 0:
arg += line[i]
brackdepth += 1
elif line[i] == "}":
brackdepth -= 1
if brackdepth == 0:
if not line[i-1].isspace():
arg = arg.strip()
if arg.startswith('"') and arg.endswith('"'):
arg = arg[1:-1].strip()
args.append(arg)
arg = ""
break
else:
arg += line[i]
elif line[i] == "(":
parendepth += 1
elif line[i] == ")":
parendepth -= 1
elif not line[i-1].isspace() and \
line[i].isspace() and \
brackdepth == 1 and \
parendepth == 0:
arg = arg.strip()
if arg.startswith('"') and arg.endswith('"'):
arg = arg[1:-1].strip()
args.append(arg)
arg = ""
elif not line[i].isspace() or parendepth > 0:
arg += line[i]
return (args, brackdepth, parendepth)
def formaltype(f):
# Deduce the expected type of the formal
if f.startswith("_"):
f = f[1:]
if f == "SIDE" or f.endswith("_SIDE") or re.match("SIDE[0-9]", f):
ftype = "side"
elif f in ("SIDE", "X", "Y", "RED", "GREEN", "BLUE", "TURN", "PROB", "LAYER", "TIME", "DURATION") or f.endswith("NUMBER") or f.endswith("AMOUNT") or f.endswith("COST") or f.endswith("RADIUS") or f.endswith("_X") or f.endswith("_Y") or f.endswith("_INCREMENT") or f.endswith("_FACTOR") or f.endswith("_TIME") or f.endswith("_SIZE"):
ftype = "numeric"
elif f.endswith("PERCENTAGE"):
ftype = "percentage"
elif f in ("POSITION",) or f.endswith("_POSITION") or f == "BASE":
ftype = "position"
elif f.endswith("_SPAN"):
ftype = "span"
elif f == "SIDES" or f.endswith("_SIDES"):
ftype = "alliance"
elif f in ("RANGE",):
ftype = "range"
elif f in ("ALIGN",):
ftype = "alignment"
elif f in ("TYPES"):
ftype = "types"
elif f.startswith("ADJACENT") or f.startswith("TERRAINLIST") or f == "RESTRICTING":
ftype = "terrain_pattern"
elif f.startswith("TERRAIN") or f.endswith("TERRAIN"):
ftype = "terrain_code"
elif f in ("NAME", "NAMESPACE", "VAR", "IMAGESTEM", "ID", "FLAG", "BUILDER") or f.endswith("_NAME") or f.endswith("_ID") or f.endswith("_VAR") or f.endswith("_OVERLAY"):
ftype = "name"
elif f in ("ID_STRING", "NAME_STRING", "DESCRIPTION", "IPF"):
ftype = "optional_string"
elif f in ("STRING", "TYPE", "TEXT") or f.endswith("_STRING") or f.endswith("_TYPE") or f.endswith("_TEXT"):
ftype = "string"
elif f.endswith("IMAGE") or f == "PROFILE":
ftype = "image"
elif f.endswith("MUSIC",) or f.endswith("SOUND"):
ftype = "sound"
elif f.endswith("FILTER",):
ftype = "filter"
elif f == "WML" or f.endswith("_WML"):
ftype = "wml"
elif f in ("AFFIX", "POSTFIX", "ROTATION") or f.endswith("AFFIX"):
ftype = "affix"
# The regexp case avoids complaints about some wacky terrain macros.
elif f.endswith("VALUE") or re.match("[ARS][0-9]", f):
ftype = "any"
else:
ftype = None
return ftype
def actualtype(a):
if a is None:
return None
# Deduce the type of the actual
if a.isdigit() or a.startswith("-") and a[1:].isdigit():
atype = "numeric"
elif re.match(r"0\.[0-9]+\Z", a):
atype = "percentage"
elif re.match(r"-?[0-9]+,-?[0-9]+\Z", a):
atype = "position"
elif re.match(r"([0-9]+\-[0-9]+,?|[0-9]+,?)+\Z", a):
atype = "span"
elif a in ("melee", "ranged"):
atype = "range"
elif a in ("lawful", "neutral", "chaotic", "liminal"):
atype = "alignment"
elif a.startswith("{") or a.endswith("}") or a.startswith("$"):
atype = None # Can't tell -- it's a macro expansion
elif re.match(image_reference, a) or a == "unit_image":
atype = "image"
elif re.match(r"(\*|[A-Z][a-z]+)\^([A-Z][a-z\\|/]+\Z)?", a):
atype = "terrain_code"
elif a.endswith(".wav") or a.endswith(".ogg"):
atype = "sound"
elif a.startswith('"') and a.endswith('"') or (a.startswith("_") and a[1] not in string.ascii_lowercase):
atype = "stringliteral"
elif "=" in a:
atype = "filter"
elif re.match(r"[A-Z][a-z][a-z]?\Z", a):
atype = "shortname"
elif a == "":
atype = "empty"
elif not ' ' in a:
atype = "name"
else:
atype = "string"
return atype
def argmatch(formals, actuals):
if len(formals) != len(actuals):
return False
for (f, a) in zip(formals, actuals):
# Here's the compatibility logic. First, we catch the situations
# in which a more restricted actual type matches a more general
# formal one. Then we have a fallback rule checking for type
# equality or wildcarding.
ftype = formaltype(f)
atype = actualtype(a)
if ftype == "any":
pass
elif (atype == "numeric" or a == "global") and ftype == "side":
pass
elif atype in ("filter", "empty") and ftype == "wml":
pass
elif atype in ("numeric", "position") and ftype == "span":
pass
elif atype in ("shortname", "name", "empty", "stringliteral") and ftype == "affix":
pass
elif atype in ("shortname", "name", "stringliteral") and ftype == "string":
pass
elif atype in ("shortname", "name", "string", "stringliteral", "empty") and ftype == "optional_string":
pass
elif atype in ("shortname",) and ftype == "terrain_code":
pass
elif atype in ("numeric", "position", "span", "empty") and ftype == "alliance":
pass
elif atype in ("terrain_code", "shortname", "name") and ftype == "terrain_pattern":
pass
elif atype in ("string", "shortname", "name") and ftype == "types":
pass
elif atype in ("numeric", "percentage") and ftype == "percentage":
pass
elif atype == "range" and ftype == "name":
pass
elif atype != ftype and ftype is not None and atype is not None:
return False
return True
# the total_ordering decorator from functools allows to define only two comparison
# methods, and Python generates the remaining methods
# it comes with a speed penalty, but the alternative is defining six methods by hand...
@total_ordering
class Reference:
"Describes a location by file and line."
def __init__(self, namespace, filename, lineno=None, docstring=None, args=None):
self.namespace = namespace
self.filename = filename
self.lineno = lineno
self.docstring = docstring
self.args = args
self.references = collections.defaultdict(list)
self.undef = None
def append(self, fn, n, a=None):
self.references[fn].append((n, a))
def dump_references(self):
"Dump all known references to this definition."
for (file, refs) in self.references.items():
print(" %s: %s" % (file, repr([x[0] for x in refs])[1:-1]))
def __eq__(self, other):
return self.filename == other.filename and self.lineno == other.lineno
def __gt__(self, other):
# Major sort by file, minor by line number. This presumes that the
# files correspond to coherent topics and gives us control of the
# sequence.
if self.filename == other.filename:
return self.lineno > other.lineno
else:
return self.filename > other.filename
def mismatches(self):
copy = Reference(self.namespace, self.filename, self.lineno, self.docstring, self.args)
copy.undef = self.undef
for filename in self.references:
mis = [(ln,a) for (ln,a) in self.references[filename] if a is not None and not argmatch(self.args, a)]
if mis:
copy.references[filename] = mis
return copy
def __str__(self):
if self.lineno:
return '"%s", line %d' % (self.filename, self.lineno)
else:
return self.filename
__repr__ = __str__
class CrossRef:
macro_reference = re.compile(r"\{([A-Z_][A-Za-z0-9_:]*)(?!\.)\b")
file_reference = re.compile(r"[A-Za-z0-9{}.][A-Za-z0-9_/+{}.@-]*\.(" + "|".join(resource_extensions) + ")(?=(~.*)?)")
tag_parse = re.compile("\s*([a-z_]+)\s*=(.*)")
def mark_matching_resources(self, pattern, fn, n):
"Mark all definitions matching a specified pattern with a reference."
pattern = pattern.replace("+", r"\+")
pattern = os.sep + pattern + "$"
if os.sep == "\\":
pattern = pattern.replace("\\", "\\\\")
try:
pattern = re.compile(pattern)
except sre_constants.error:
print("wmlscope: confused by %s" % pattern, file=sys.stderr)
return None
key = None
for trial in self.fileref:
if pattern.search(trial) and self.visible_from(trial, fn, n):
key = trial
self.fileref[key].append(fn, n)
return key
def visible_from(self, defn, fn, n):
"Is specified definition visible from the specified file and line?"
if isinstance(defn, str):
defn = self.fileref[defn]
if defn.undef is not None:
# Local macros are only visible in the file where they were defined
return defn.filename == fn and n >= defn.lineno and n <= defn.undef
if self.exports(defn.namespace):
# Macros and resources in subtrees with export=yes are global
return True
elif not self.filelist.neighbors(defn.filename, fn):
# Otherwise, must be in the same subtree.
return False
else:
# If the two files are in the same subtree, assume visibility.
# This doesn't match the actual preprocessor semantics.
# It means any macro without an undef is visible anywhere in the
# same argument directory.
#
# We can't do better than this without a lot of hairy graph-
# coloring logic to simulate include path interpretation.
# If that logic ever gets built, it will go here.
return True
def scan_for_definitions(self, namespace, filename):
ignoreflag = False
conditionalsflag = False
with codecs.open(filename, "r", "utf8") as dfp:
state = "outside"
latch_unit = in_base_unit = in_theme = False
for (n, line) in enumerate(dfp):
if self.warnlevel > 1:
print(repr(line)[1:-1])
if line.strip().startswith("#textdomain"):
continue
m = re.search("# *wmlscope: warnlevel ([0-9]*)", line)
if m:
self.warnlevel = int(m.group(1))
print('"%s", line %d: warnlevel set to %d (definition-gathering pass)' \
% (filename, n+1, self.warnlevel))
continue
m = re.search("# *wmlscope: set *([^=]*)=(.*)", line)
if m:
prop = m.group(1).strip()
value = m.group(2).strip()
if namespace not in self.properties:
self.properties[namespace] = {}
self.properties[namespace][prop] = value
m = re.search("# *wmlscope: prune (.*)", line)
if m:
name = m.group(1)
if self.warnlevel >= 2:
print('"%s", line %d: pruning definitions of %s' \
% (filename, n+1, name ))
if name not in self.xref:
print("wmlscope: can't prune undefined macro %s" % name, file=sys.stderr)
else:
self.xref[name] = self.xref[name][:1]
continue
if "# wmlscope: start conditionals" in line:
if self.warnlevel > 1:
print('"%s", line %d: starting conditionals' \
% (filename, n+1))
conditionalsflag = True
elif "# wmlscope: stop conditionals" in line:
if self.warnlevel > 1:
print('"%s", line %d: stopping conditionals' \
% (filename, n+1))
conditionalsflag = False
if "# wmlscope: start ignoring" in line:
if self.warnlevel > 1:
print('"%s", line %d: starting ignoring (definition pass)' \
% (filename, n+1))
ignoreflag = True
elif "# wmlscope: stop ignoring" in line:
if self.warnlevel > 1:
print('"%s", line %d: stopping ignoring (definition pass)' \
% (filename, n+1))
ignoreflag = False
elif ignoreflag:
continue
if line.strip().startswith("#define"):
tokens = line.split()
if len(tokens) < 2:
print('"%s", line %d: malformed #define' \
% (filename, n+1), file=sys.stderr)
else:
name = tokens[1]
here = Reference(namespace, filename, n+1, line, args=tokens[2:])
here.hash = hashlib.md5()
here.docstring = line.lstrip()[8:] # Strip off #define_
state = "macro_header"
continue
elif state != 'outside' and line.strip().endswith("#enddef"):
here.hash.update(line.encode("utf8"))
here.hash = here.hash.digest()
if name in self.xref:
for defn in self.xref[name]:
if not self.visible_from(defn, filename, n+1):
continue
elif conditionalsflag:
continue
elif defn.hash != here.hash:
print("%s: overrides different %s definition at %s" \
% (here, name, defn), file=sys.stderr)
elif self.warnlevel > 0:
print("%s: duplicates %s definition at %s" \
% (here, name, defn), file=sys.stderr)
if name not in self.xref:
self.xref[name] = []
self.xref[name].append(here)
state = "outside"
elif state == "macro_header" and line.strip() and line.strip()[0] != "#":
state = "macro_body"
if state == "macro_header":
# Ignore macro header commends that are pragmas
if "wmlscope" not in line and "wmllint:" not in line:
here.docstring += line.lstrip()[1:]
if state in ("macro_header", "macro_body"):
here.hash.update(line.encode("utf8"))
elif line.strip().startswith("#undef"):
tokens = line.split()
name = tokens[1]
if name in self.xref and self.xref[name]:
self.xref[name][-1].undef = n+1
else:
print("%s: unbalanced #undef on %s" \
% (Reference(namespace, filename, n+1), name))
if state == 'outside':
if '[unit_type]' in line:
latch_unit = True
elif '[/unit_type]' in line:
latch_unit = False
elif '[base_unit]' in line:
in_base_unit = True
elif '[/base_unit]' in line:
in_base_unit = False
elif '[theme]' in line:
in_theme = True
elif '[/theme]' in line:
in_theme = False
elif latch_unit and not in_base_unit and not in_theme and "id" in line:
m = CrossRef.tag_parse.search(line)
if m and m.group(1) == "id":
uid = m.group(2)
if uid not in self.unit_ids:
self.unit_ids[uid] = []
self.unit_ids[uid].append(Reference(namespace, filename, n+1))
latch_unit= False
def __init__(self, dirpath=[], exclude="", warnlevel=0, progress=False):
"Build cross-reference object from the specified filelist."
self.filelist = Forest(dirpath, exclude)
self.dirpath = [x for x in dirpath if not re.search(exclude, x)]
self.warnlevel = warnlevel
self.xref = {}
self.fileref = {}
self.noxref = False
self.properties = {}
self.unit_ids = {}
all_in = []
if self.warnlevel >=2 or progress:
print("*** Beginning definition-gathering pass...")
for (namespace, filename) in self.filelist.generator():
all_in.append((namespace, filename))
if self.warnlevel > 1:
print(filename + ":")
if progress:
print(filename)
if isresource(filename):
self.fileref[filename] = Reference(namespace, filename)
elif iswml(filename):
# It's a WML file, scan for macro definitions
self.scan_for_definitions(namespace, filename)
elif filename.endswith(".def"):
# It's a list of names to be considered defined
self.noxref = True
with codecs.open(filename, "r", "utf8") as dfp:
for line in dfp:
self.xref[line.strip()] = True
# Next, decorate definitions with all references from the filelist.
self.unresolved = []
self.missing = []
formals = []
state = "outside"
if self.warnlevel >=2 or progress:
print("*** Beginning reference-gathering pass...")
for (ns, fn) in all_in:
if progress:
print(filename)
if iswml(fn):
with codecs.open(fn, "r", "utf8") as rfp:
attack_name = None
beneath = 0
ignoreflag = False
for (n, line) in enumerate(rfp):
if line.strip().startswith("#define"):
formals = line.strip().split()[2:]
elif line.startswith("#enddef"):
formals = []
comment = ""
if '#' in line:
if "# wmlscope: start ignoring" in line:
if self.warnlevel > 1:
print('"%s", line %d: starting ignoring (reference pass)' \
% (fn, n+1))
ignoreflag = True
elif "# wmlscope: stop ignoring" in line:
if self.warnlevel > 1:
print('"%s", line %d: stopping ignoring (reference pass)' \
% (fn, n+1))
ignoreflag = False
m = re.search("# *wmlscope: self.warnlevel ([0-9]*)", line)
if m:
self.warnlevel = int(m.group(1))
print('"%s", line %d: self.warnlevel set to %d (reference-gathering pass)' \
% (fn, n+1, self.warnlevel))
continue
fields = line.split('#')
line = fields[0]
if len(fields) > 1:
comment = fields[1]
if ignoreflag or not line:
continue
# Find references to macros
for match in re.finditer(CrossRef.macro_reference, line):
name = match.group(1)
candidates = []
if self.warnlevel >=2:
print('"%s", line %d: seeking definition of %s' \
% (fn, n+1, name))
if name in formals:
continue
elif name in self.xref:
# Count the number of actual arguments.
# Set args to None if the call doesn't
# close on this line
(args, brackdepth, parendepth) = parse_macroref(match.start(0), line)
if brackdepth > 0 or parendepth > 0:
args = None
else:
args.pop(0)
#if args:
# print('"%s", line %d: args of %s is %s' \
# % (fn, n+1, name, args))
# Figure out which macros might resolve this
for defn in self.xref[name]:
if self.visible_from(defn, fn, n+1):
defn.append(fn, n+1, args)
candidates.append(str(defn))
if len(candidates) > 1:
print("%s: more than one definition of %s is visible here (%s)." % (Reference(ns, fn, n), name, "; ".join(candidates)))
if len(candidates) == 0:
self.unresolved.append((name,Reference(ns,fn,n+1)))
# Don't be fooled by HTML image references in help strings.
if "<img>" in line:
continue
# Find references to resource files
for match in re.finditer(CrossRef.file_reference, line):
name = match.group(0)
# Catches maps that look like macro names.
if (name.endswith(".map") or name.endswith(".mask")) and name[0] == '{':
name = name[1:]
if os.sep == "\\":
name = name.replace("/", "\\")
key = None
# If name is already in our resource list, it's easy.
if name in self.fileref and self.visible_from(name, fn, n):
self.fileref[name].append(fn, n+1)
continue
# If the name contains substitutable parts, count
# it as a reference to everything the substitutions
# could potentially match.
elif '{' in name or '@' in name:
pattern = re.sub(r"(\{[^}]*\}|@R0|@V)", '.*', name)
key = self.mark_matching_resources(pattern, fn,n+1)
if key:
self.fileref[key].append(fn, n+1)
else:
candidates = []
for trial in self.fileref:
if trial.endswith(os.sep + name) and self.visible_from(trial, fn, n):
key = trial
self.fileref[trial].append(fn, n+1)
candidates.append(trial)
if len(candidates) > 1:
print("%s: more than one resource matching %s is visible here (%s)." % (Reference(ns,fn, n), name, ", ".join(candidates)))
if not key:
self.missing.append((name, Reference(ns,fn,n+1)))
# Notice implicit references through attacks
if state == "outside":
if "[attack]" in line:
beneath = 0
attack_name = default_icon = None
have_icon = False
elif "name=" in line and not "no-icon" in comment:
attack_name = line[line.find("name=")+5:].strip()
default_icon = os.path.join("attacks", attack_name + ".png")
elif "icon=" in line and beneath == 0:
have_icon = True
elif "[/attack]" in line:
if attack_name and not have_icon:
candidates = []
key = None
for trial in self.fileref:
if trial.endswith(os.sep + default_icon) and self.visible_from(trial, fn, n):
key = trial
self.fileref[trial].append(fn, n+1)
candidates.append(trial)
if len(candidates) > 1:
print("%s: more than one definition of %s is visible here (%s)." % (Reference(ns,fn, n), name, ", ".join(candidates)))
if not key:
self.missing.append((default_icon, Reference(ns,fn,n+1)))
elif line.strip().startswith("[/"):
beneath -= 1
elif line.strip().startswith("["):
beneath += 1
# Check whether each namespace has a defined export property
for namespace in self.dirpath:
if namespace not in self.properties or "export" not in self.properties[namespace]:
print("warning: %s has no export property" % namespace)
def exports(self, namespace):
return namespace in self.properties and self.properties[namespace].get("export") == "yes"
def subtract(self, filelist):
"Transplant file references in files from filelist to a new CrossRef."
smallref = CrossRef()
for filename in self.fileref:
for (referrer, referlines) in self.fileref[filename].references.items():
if referrer in filelist:
if filename not in smallref.fileref:
smallref.fileref[filename] = Reference(None, filename)
smallref.fileref[filename].references[referrer] = referlines
del self.fileref[filename].references[referrer]
return smallref
def refcount(self, name):
"Return a reference count for the specified resource."
try:
return len(self.fileref[name].references)
except KeyError:
return 0
#
# String translations from po files. The advantage of this code is that it
# does not require the gettext binary message catalogs to have been compiled.
# The disadvantage is that it eats lots of core!
#
class TranslationError(Exception):
def __init__(self, textdomain, isocode):
self.isocode = isocode
self.textdomain = textdomain
def __str__(self):
return "No translations found for %s/%s.\n" % (
self.textdomain, self.isocode)
class Translation(dict):
"Parses a po file to create a translation dictionary."
def __init__(self, textdomain, isocode, topdir=""):
self.textdomain = textdomain
self.isocode = isocode
self.gettext = {}
if self.isocode != "C":
isocode2 = isocode[:isocode.rfind("_")]
for code in [isocode, isocode2]:
fn = "po/%s/%s.po" % (textdomain, code)
if topdir: fn = os.path.join(topdir, fn)
try:
f = file(fn)
break
except IOError:
pass
else:
raise TranslationError(textdomain, self.isocode)
expect = False
fuzzy = "#, fuzzy\n"
gettext = f.read().decode("utf8")
matches = re.compile("""(msgid|msgstr)((\s*".*?")+)""").finditer(gettext)
msgid = ""
for match in matches:
text = "".join(re.compile('"(.*?)"').findall(match.group(2)))
if match.group(1) == "msgid":
msgid = text.replace("\\n", "\n")
expect = gettext[match.start(1) - len(fuzzy):match.start(1)] != fuzzy
elif expect:
self.gettext[msgid] = text.replace("\\n", "\n")
def get(self, key, dflt):
if self.isocode == "C":
if key:
return key[key.find("^") + 1:]
return "?"
else:
t = self.gettext.get(key, dflt)
if not t:
if key:
return key[key.find("^") + 1:]
return "?"
return t
def __getitem__(self, key):
if self.isocode == "C":
return key
else:
return self.gettext[key]
def __contains__(self, key):
if self.isocode == "C":
return True
else:
return key in self.gettext
class Translations:
"Wraps around Translation to support multiple languages and domains."
def __init__(self, topdir = ""):
self.translations = {}
self.topdir = topdir
def get(self, textdomain, isocode, key, default):
t = (textdomain, isocode)
if not t in self.translations:
try:
self.translations[t] = Translation(textdomain, isocode, self.topdir)
except TranslationError as e:
print(str(e), file=sys.stderr)
self.translations[t] = Translation(textdomain, "C", self.topdir)
result = self.translations[t].get(key, default)
return result
## Namespace management
#
# This is the only part of the code that actually knows about the
# shape of the data tree.
def scopelist():
"Return a list of (separate) package scopes, core first."
return ["data/core"] + glob.glob("data/campaigns/*")
def is_namespace(name):
"Is the name either a valid campaign name or core?"
return name in map(os.path.basename, scopelist())
def namespace_directory(name):
"Go from namespace to directory."
if name == "core":
return "data/core/"
else:
return "data/campaigns/" + name + "/"
def directory_namespace(path):
"Go from directory to namespace."
if path.startswith("data/core/"):
return "core"
elif path.startswith("data/campaigns/"):
return path.split("/")[2]
else:
return None
def namespace_member(path, namespace):
"Is a path in a specified namespace?"
ns = directory_namespace(path)
return ns is not None and ns == namespace
def resolve_unit_cfg(namespace, utype, resource=None):
"Get the location of a specified unit in a specified scope."
if resource:
resource = os.path.join(utype, resource)
else:
resource = utype
loc = namespace_directory(namespace) + "units/" + resource
if not loc.endswith(".cfg"):
loc += ".cfg"
return loc
def resolve_unit_image(namespace, subdir, resource):
"Construct a plausible location for given resource in specified namespace."
return os.path.join(namespace_directory(namespace), "images/units", subdir, resource)
# And this is for code that does syntax transformation
baseindent = " "
## Version-control hooks begin here.
#
# Not tested since the git transition
vcdir = ".git"
def vcmove(src, dst):
"Move a file under version control. Only applied to unmodified files."
(path, base) = os.path.split(src)
if os.path.exists(os.path.join(path, ".git")):
return "git mv %s %s" % (src, dst)
else:
return "echo 'cannot move %s to %s, .git is missing'" % (src, dst)
def vcunmove(src, dst):
"Revert the result of a previous move (before commit)."
(path, base) = os.path.split(src)
if os.path.exists(os.path.join(path, ".git")):
return "git checkout %s" % dst # Revert the add at the destination
return "git rm " + dst # Remove the moved copy
return "git checkout %s" % src # Revert the deletion
else:
return "echo 'cannot unmove %s from %s, .git is missing'" % (src, dst)
def vcdelete(src):
"Delete a file under version control."
(path, base) = os.path.split(src)
if os.path.exists(os.path.join(path, ".git")):
return "git rm %s" % src
else:
return "echo 'cannot undelete %s, .git is missing'" % src
def vcundelete(src):
"Revert the result of a previous delete (before commit)."
(path, base) = os.path.split(src)
if os.path.exists(os.path.join(path, ".git")):
return "git checkout %s" % src # Revert the deletion
else:
return "echo 'cannot undelete %s, .git is missing'" % src
#
## Version-control hooks end here
# wmltools.py ends here