wmlunits: Made the Python wml parser more robust...

...when dealing with invalid WML, it should now abort with a
meaningful error message with about any bad WML you throw at it.
This commit is contained in:
Elias Pschernig 2008-05-16 16:05:51 +00:00
parent fc65c10b60
commit 5cb6bf80eb
3 changed files with 175 additions and 97 deletions

View file

@ -22,7 +22,6 @@ class ParserWithCoreMacros:
# Create a new parser for the macros.
parser = wmlparser.Parser(datadir)
parser.do_preprocessor_logic = True
parser.gettext = self.gettext
# Parse core macros.
@ -33,10 +32,11 @@ class ParserWithCoreMacros:
def parse(self, text_to_parse, ignore_macros = None):
# Create the real parser.
parser = wmlparser.Parser(self.datadir, self.userdir)
parser.do_preprocessor_logic = True
parser.gettext = self.gettext
parser.macros = copy.copy(self.core_macros)
#parser.verbose = True
# Suppress complaints about undefined terrain macros
parser.set_macro_not_found_callback(lambda wmlparser, name, params:
name.startswith("TERRAIN") or name == "DISABLE_TRANSITIONS")
@ -136,6 +136,7 @@ class ImageCollector:
sys.stderr.write("Warning: Looked at the following locations:\n")
sys.stderr.write("\n".join(bases) + "\n")
blah = 1
class WesnothList:
"""
Lists various Wesnoth stuff like units, campaigns, terrains, factions...
@ -192,6 +193,10 @@ class WesnothList:
def add_campaign(self, campaign):
name = campaign.get_text_val("id")
if not name:
global blah
name = "noid%d" % blah
blah += 1
self.campaign_lookup[name] = campaign
return name
@ -229,18 +234,15 @@ class WesnothList:
#define RANDOM_SIDE\n#enddef
{~campaigns}
""")
except Exception, e:
except wmlparser.Error, e:
print e
return n
for campaign in WML.find_all("campaign"):
cid = self.add_campaign(campaign)
n += 1
image_collector.add_binary_pathes_from_WML(cid, WML)
for era in WML.find_all("era"):
eid = self.add_era(era)
image_collector.add_binary_pathes_from_WML(eid, WML)
n += 1
n = self.add_units(WML, "addons")

View file

@ -75,11 +75,9 @@ class Parser:
# is expected to return a translation.
self.gettext = None
# If set, we actually do preprocessor logic with #ifdef and so on.
# Otherwise, they get included in the parse tree as nodes.
self.do_preprocessor_logic = False
# Internal flag while inside a non-parsed block
self.just_parse = False
# A list containing the stacked up #ifdefs.
self.preprocessor_nesting = []
self.stay_in_file = False
# If set, included files are only parsed when under the given directory.
self.only_expand_pathes = []
@ -102,7 +100,7 @@ class Parser:
except UnicodeDecodeError:
u = text.decode("latin1")
text = u
text = text.replace("\r\n", "\n").replace("\t", " ")
text = text.replace("\r\n", "\n").replace("\t", " ").replace("\r", "\n")
if text[-1] != "\n": text += "\n"
return text
@ -127,7 +125,7 @@ class Parser:
Set the parser to parse from a file object.
"""
text = stream.read()
text = text.replace("\r\n", "\n").replace("\t", " ")
text = text.replace("\r\n", "\n").replace("\t", " ").replace("\r", "\n")
self.push_text("inline", text)
def parse_text(self, text):
@ -188,7 +186,7 @@ class Parser:
if c == "\n":
self.line += 1
if self.textpos == len(self.text):
if len(self.texts): self.pop_text()
if len(self.texts) and not self.stay_in_file: self.pop_text()
return c
def at_end(self):
@ -196,12 +194,17 @@ class Parser:
Return True if the parser is at the very end of the input, that is the
last character of the topmost input text has been read.
"""
return len(self.texts) == 0 and self.textpos == len(self.text)
return (len(self.texts) == 0
or self.stay_in_file) and self.textpos == len(self.text)
def check_end(self):
if self.textpos == len(self.text):
if len(self.texts): self.pop_text()
def peek_next(self):
"""Like read_next, but does not consume."""
if self.textpos >= len(self.text):
if len(self.texts):
if len(self.texts) and not self.stay_in_file:
ts = self.texts[-1]
if ts.textpos >= len(ts.text): return ""
return ts.text[ts.textpos]
@ -214,14 +217,14 @@ class Parser:
found = mob.group(0)
self.line += found.count("\n")
self.textpos = mob.end(0)
if self.textpos == len(self.text):
if self.textpos == len(self.text) and not self.stay_in_file:
if len(self.texts): self.pop_text()
return found
else:
found = self.text[self.textpos:]
self.line += found.count("\n")
self.textpos = len(self.text)
if len(self.texts):
if len(self.texts) and not self.stay_in_file:
self.pop_text()
found += self.read_until(sep)
return found
@ -240,18 +243,63 @@ class Parser:
def skip_whitespace_and_newlines(self):
self.read_while(" \t\r\n")
preprocessor_commands = ["#define", "#undef", "#textdomain", "#ifdef",
"#ifndef", "#else", "#enddef", "#endif"]
def read_lines_until(self, string):
"""
Read lines until one contains the given string, but throw away any
comments.
"""
text = ""
in_string = False
while 1:
if self.at_end():
return None
line = self.read_until("\n")
line_start = 0
if in_string:
string_end = line.find('"')
if string_end < 0:
text += line
continue
in_string = False
line_start = string_end + 1
elif line.lstrip().startswith("#"):
possible_comment = line.lstrip()
for com in self.preprocessor_commands:
if possible_comment.startswith(com):
break
else:
continue
quotation = line.find('"', line_start)
while quotation >= 0:
in_string = True
string_end = line.find('"', quotation + 1)
if string_end < 0: break
line_start = string_end + 1
in_string = False
quotation = line.find('"', line_start)
if not in_string:
end = line.find(string, line_start)
if end >= 0:
text += line[:end]
break
text += line
return text
def skip_whitespace_inside_statement(self):
self.read_while(" \t\r\n")
if not self.at_end():
c = self.peek_next()
if c == "#":
if self.check_for("#define"): return
if self.check_for("#undef"): return
if self.check_for("#textdomain"): return
if self.check_for("#ifdef"): return
if self.check_for("#ifndef"): return
if self.check_for("#else"): return
if self.check_for("#end"): return
for command in self.preprocessor_commands:
if self.check_for(command): return
self.read_until("\n")
self.skip_whitespace_inside_statement()
@ -262,18 +310,6 @@ class Parser:
"""Compare the following text with str."""
return self.text[self.textpos:self.textpos + len(str)] == str
def read_upto_string(self, str):
"""Read input up to and including the given string."""
pos = self.text.find(str, self.textpos)
if pos == -1:
return None
found = self.text[self.textpos:pos]
self.textpos = pos + len(str)
self.line += found.count("\n")
if self.textpos == len(self.text):
self.pop_text()
return found
def parse_macro(self):
"""No recursive macro processing is done here. If a macro is passed as
parameter to a macro, then whoever parses the macro replacement will do
@ -295,10 +331,6 @@ class Parser:
raise Error(self, "Unclosed macro")
return
if self.just_parse:
# We do not execute any macros or file inclusions.
return None
preserve = macro
macro = macro[:-1] # Get rid of final }
@ -448,8 +480,8 @@ class Parser:
rep = self.gettext(self.textdomain, rep[q + 1:qe])
rep = '"' + rep + '"'
if self.verbose:
s = "Replacing {%s} with %s" % (macro.params[i], rep)
print s.encode("utf8")
s = "Replacing {%s} with %s\n" % (macro.params[i], rep)
sys.stderr.write(s.encode("utf8"))
text = text.replace("{%s}" % macro.params[i], rep)
if text:
@ -507,6 +539,7 @@ class Parser:
elif not got_assign:
if c == "=":
variables += [variable.rstrip()]
if variables[-1] == "mode": fuck = True
got_assign = True
translatable = False
self.skip_whitespace()
@ -567,11 +600,15 @@ class Parser:
data = []
j = 0
for i in range(len(variables)):
data += [wmldata.DataText(variables[i], values[j])]
try:
data += [wmldata.DataText(variables[i], values[j])]
except IndexError:
raise Error(self, "Assignement does not match: %s = %s" % (
str(variables), str(values)))
j += 1
return data
def parse_top(self, data, state = None, dont_parse_else = False):
def parse_top(self, data, state = None):
while 1:
self.skip_whitespace_and_newlines()
if self.at_end():
@ -588,20 +625,21 @@ class Parser:
if name: params += [name]
if sep == "\n": break
self.read_while(" ")
text = self.read_upto_string("#enddef")
text = self.read_lines_until("#enddef")
if text == None:
raise Error(self, "#define without #enddef")
return
if not self.just_parse:
self.macros[params[0]] = self.Macro(
params[0], params[1:], text, self.textdomain)
if self.verbose:
sys.stderr.write("New macro: %s.\n" % params[0])
self.macros[params[0]] = self.Macro(
params[0], params[1:], text, self.textdomain)
if self.verbose:
sys.stderr.write("New macro: %s.\n" % params[0])
elif self.check_for("undef "):
self.read_until(" ")
name = self.read_until(" \n")
self.macros[name] = None
name = name.rstrip()
if name in self.macros: del self.macros[name]
elif self.check_for("ifdef ") or self.check_for("ifndef"):
what = "#" + self.read_until(" ").rstrip()
@ -610,58 +648,75 @@ class Parser:
if name[-1] == " ": self.read_while(" \n")
name = name[:-1]
subdata = wmldata.DataIfDef(name, [], "then")
dont_parse_else = False
if self.do_preprocessor_logic:
dont_parse_else = True
prev = self.just_parse
if what == "#ifdef":
if not name in self.macros:
self.just_parse = True
dont_parse_else = prev
elif what == "#ifndef":
if name in self.macros:
self.just_parse = True
dont_parse_else = prev
condition_failed = False
if what == "#ifdef":
if name in self.macros:
pass
else:
condition_failed = True
else: # what == "#ifndef"
if not name in self.macros:
pass
else:
condition_failed = True
self.parse_top(subdata, what, dont_parse_else)
self.preprocessor_nesting.append((what, condition_failed))
if self.do_preprocessor_logic:
self.just_parse = prev
if self.just_parse == False:
elses = subdata.get_ifdefs("else")
if dont_parse_else:
if elses:
subdata.remove(elses[0])
data.append(subdata)
else:
if elses:
data.append(elses[0])
else:
data.insert(subdata)
# If the condition is true, we simply continue parsing. At
# some point we will either hit an #else or #endif, and
# things continue there. If the condition failed, we skip
# over everything until we find the matching #else or
# endif.
if condition_failed:
self.stay_in_file = True
balance = 1
while balance > 0 and not self.at_end():
line = self.read_until("\n")
line = line.lstrip()
if line.startswith("#ifdef"): balance += 1
if line.startswith("#ifndef"): balance += 1
if line.startswith("#endif"): balance -= 1
if line.startswith("#else"):
if balance == 1:
balance = -1
break
self.stay_in_file = False
if balance == 0:
self.preprocessor_nesting.pop()
if balance > 0:
raise Error(self, "Missing #endif for %s" % what)
self.check_end()
elif self.check_for("else"):
if not self.preprocessor_nesting:
raise Error(self, "#else without #ifdef")
self.read_until("\n")
if state != "#ifdef" and state != "#ifndef":
raise Error(self, "#else without #ifdef")
subdata = wmldata.DataIfDef("else", [], "else")
if self.do_preprocessor_logic:
self.just_parse = dont_parse_else
self.parse_top(subdata, "#else")
# We seen an #else - that means we are at the end of a
# conditional preprocessor block which has executed. So
# we should now ignore everything up to the #endif.
balance = 1
self.stay_in_file = True
while balance > 0 and not self.at_end():
line = self.read_until("\n")
line = line.lstrip()
if line.startswith("#ifdef"): balance += 1
if line.startswith("#ifndef"): balance += 1
if line.startswith("#endif"): balance -= 1
self.stay_in_file = False
if balance != 0:
raise Error(self, "Missing #endif for #else")
data.insert(subdata)
return
self.check_end()
elif self.check_for("endif"):
if not self.preprocessor_nesting:
raise Error(self, "#endif without #ifdef")
self.preprocessor_nesting.pop()
self.read_until("\n")
if state != "#ifdef" and state != "#else" and state !=\
"#ifndef":
self.read_until("\n")
raise Error(self, "#endif without #ifdef or #else")
return
elif self.check_for("textdomain"):
self.read_until(" ")

View file

@ -112,7 +112,7 @@ class HTMLOutput:
au = self.wesnoth.unit_lookup[auid]
except KeyError:
sys.stderr.write(
"Warning:Unit %s not found as advancement of %s" %
"Warning: Unit %s not found as advancement of %s\n" %
(auid, uid))
continue
forest.add_node(helpers.UnitNode(au))
@ -321,7 +321,9 @@ class HTMLOutput:
anames = []
already = {}
for abilities in u.get_all("abilities"):
for ability in abilities.children():
try: c = abilities.children()
except AttributeError: c = []
for ability in c:
id = ability.get_text_val("id")
if id in already: continue
already[id] = True
@ -879,7 +881,7 @@ def output(isocode):
n = stuff.add_units(WML, "mainline")
print n, "mainline units found."
# Now we read each campaign in turn to get its units.
# Now we read each mainline campaign in turn to get its units.
cnames = stuff.campaign_lookup.keys()
for cname in cnames:
print "Parsing %s units ..." % cname,
@ -894,9 +896,28 @@ def output(isocode):
image_collector.add_binary_pathes_from_WML(cname, WML)
print n, "units found."
sys.stderr.flush()
print "Parsing addons ...",
sys.stdout.flush()
n = stuff.add_addons(image_collector)
print "%d units found." % n
# Now we read each addon campaign in turn to get its units.
cnames = stuff.campaign_lookup.keys()
for cname in cnames:
if cname in stuff.is_mainline_campaign: continue
campaign = stuff.campaign_lookup[cname]
print "Parsing %s units ..." % cname,
sys.stdout.flush()
define = campaign.get_text_val("define")
WML = stuff.parser.parse("""
#define %s\n#enddef
{~campaigns}""" % define,
ignore_macros = lambda x: x.find("/scenarios") == -1)
n = stuff.add_units(WML, cname)
image_collector.add_binary_pathes_from_WML(cname, WML)
print n, "units found."
stuff.find_unit_factions()