wesnoth/data/tools/pywmlx/state/machine.py
2024-03-24 21:33:47 +01:00

465 lines
17 KiB
Python

import re
# import os
from pywmlx.wmlerr import wmlerr
from pywmlx.wmlerr import wmlwarn
from pywmlx.wmlerr import warnall
from pywmlx.postring import PoCommentedString
from pywmlx.postring import PoCommentedStringPL
from pywmlx.state.state import State
from pywmlx.state.lua_states import setup_luastates
from pywmlx.state.wml_states import setup_wmlstates
import pywmlx.nodemanip
import pdb
# Universe - convenient singleton for which
# `x in Universe` is always True
# Passing it to a filter is equivalent to not filtering.
class UniversalSet:
def __contains__(self, any):
return True
Universe = UniversalSet()
# --------------------------------------------------------------------
# PART 1: machine.py global variables
# --------------------------------------------------------------------
# True if --warnall option is used
_warnall = False
# True if -D option is used
_debugmode = False
# debug output file
_fdebug = None
# dictionary of pot sentences
_dictionary = None
# dictionary containing lua and WML states
_states = None
# initialdomain value (set with --initialdomain command line option)
_initialdomain = None
# the current domain value when parsing file (changed by #textdomain text)
_currentdomain = None
# the domain value (set with --domain command line option)
_domains = Universe
# this boolean value will be usually:
# True (when the file is a WML .cfg file)
# False (when the file is a .lua file)
_waitwml = True
# this boolean value is very useful to avoid a possible bug
# verified in a special case
# (see WmlGoluaState on wml_states.py for more details)
_on_luatag = False
# ---------
# pending additional infos for translators collected from # po
# or # po-override comments.
_pending_cinfo = {
# pending additional infos for translators (# po: addedinfo)
"po": None,
# pending override wmlinfo for translators (# po-override: overrideinfo)
"po-override": None,
}
# type of pending wmlinfo:
# it can be None or it can have an actual value.
# Possible actual values are: 'speaker', 'id', 'role', 'description',
# 'condition', 'type', 'race' or 'gender'
_pending_winfotype = None
# ----------
# the last function name encountered in a lua code (if any).
# If no lua functions already encountered, this var will be None
_pending_luafuncname = None
# ----------
# pending lua/wml string (they will be evaluated, and if translatable it will
# be added in _dictionary
_pending_luastring = None
_pending_wmlstring = None
# ----------
# counting line number
_current_lineno = 0
# lineno_sub helps to set the right orderid of the future PoCommentedString
_linenosub = 0
# --------------------------------------------------------------------
# PART 2: machine.py functions and classes
# --------------------------------------------------------------------
def clear_pending_infos(lineno, error=False):
global _pending_cinfo
for key in _pending_cinfo:
if error and _pending_cinfo[key] is not None:
wmlerr(pywmlx.nodemanip.fileref + ":" + str(lineno),
"#%s directive(s) not applied: %s" % (key, _pending_cinfo[key]))
_pending_cinfo[key] = None
def after_pending_info(lineno, error):
clear_pending_infos(lineno, error=error)
def checkdomain(lineno):
global _currentdomain
global _domains
if _currentdomain in _domains:
return True
else:
clear_pending_infos(lineno, error=True)
return False
def switchdomain(lineno, domain):
global _currentdomain
if _currentdomain != domain:
clear_pending_infos(lineno, error=True)
_currentdomain = domain
def checksentence(mystring, finfo, *, islua=False):
m = re.match(r'\s*$', mystring)
if m:
wmlwarn(finfo, "found an empty translatable message")
return 1
elif warnall() and not islua:
if "}" in mystring:
wmsg = ("found a translatable string containing a WML macro. "
" Translation for this string will NEVER work")
wmlwarn(finfo, wmsg)
return 2
else:
return 0
else:
return 0
class PendingPlural:
def __init__(self):
self.string = ''
# status values:
# 'wait_string' --> rightly after _ ( when we need to know
# wich string type we will manage
# 'wait_plural' --> after first argument. Search for plural or
# close parenthesis
# 'wait_close' --> expect close parenthesis
self.status = 'wait_string'
self.pluraltype = 0
self.numequals = 0
self.ismultiline = False
def addline(self, value, isfirstline=False):
if self.pluraltype != 3:
value = re.sub(r'\s*$', '', value)
else:
value = value.replace('\\', r'\\')
if isfirstline:
self.string = value
else:
self.string = self.string + '\n' + value
def convert(self):
if self.pluraltype == 2:
self.string = re.sub(r"\\\'", r"'", self.string)
if self.pluraltype != 3 and self.pluraltype!=0:
self.string = re.sub(r'(?<!\\)"', r'\"', self.string)
if self.pluraltype == 3:
self.string = self.string.replace('"', r'\"')
if self.ismultiline:
lf = r'\\n"' + '\n"'
self.string = re.sub(r'(\n\r|\r\n|[\n\r])',
lf, self.string)
self.string = '""\n"' + self.string + '"'
if not self.ismultiline:
self.string = '"' + self.string + '"'
return PoCommentedStringPL(self.string, ismultiline=self.ismultiline)
class PendingLuaString:
def __init__(self, lineno, luatype, luastring, ismultiline,
istranslatable, numequals=0, plural=None):
self.lineno = lineno
self.luatype = luatype
self.luastring = ''
self.ismultiline = ismultiline
self.istranslatable = istranslatable
self.numequals = numequals
if luatype != 'lua_plural':
self.addline(luastring, True)
self.plural = plural
def addline(self, value, isfirstline=False):
if self.luatype != 'luastr3':
value = re.sub(r'\s*$', '', value)
else:
value = value.replace('\\', r'\\')
if isfirstline:
self.luastring = value
else:
self.luastring = self.luastring + '\n' + value
# this function is used by store, when translating lua pending plural into
# PoCommentedString.plural
def storePlural(self):
if self.plural is None:
return None
else:
return self.plural.convert()
def store(self):
global _pending_cinfo
global _linenosub
if not checkdomain(self.lineno):
return
if self.istranslatable:
_linenosub += 1
finfo = pywmlx.nodemanip.fileref + ":" + str(self.lineno)
fileno = pywmlx.nodemanip.fileno
errcode = checksentence(self.luastring, finfo, islua=True)
if errcode != 1:
# when errcode is equal to 1, the translatable string is empty
# so, using "if errcode != 1"
# we will add the translatable string ONLY if it is NOT empty
if self.luatype == 'luastr2':
self.luastring = re.sub(r"\\\'", r"'", self.luastring)
if self.luatype != 'luastr3':
self.luastring = re.sub(r'(?<!\\)"', r'\"', self.luastring)
if self.luatype == 'luastr3':
self.luastring = self.luastring.replace('"', r'\"')
loc_wmlinfos = []
loc_addedinfos = None
if _pending_cinfo["po-override"] is not None:
loc_wmlinfos.append(_pending_cinfo["po-override"])
if (_pending_luafuncname is not None and
_pending_cinfo["po-override"] is None):
winf = '[lua]: ' + _pending_luafuncname
loc_wmlinfos.append(winf)
if _pending_cinfo["po"] is None:
loc_addedinfos = []
if _pending_cinfo["po"] is not None:
loc_addedinfos = _pending_cinfo["po"]
if not _currentdomain in _dictionary:
_dictionary[_currentdomain] = dict()
loc_posentence = _dictionary[_currentdomain].get(self.luastring)
if loc_posentence is None:
_dictionary[_currentdomain][self.luastring] = PoCommentedString(
self.luastring,
_currentdomain,
orderid=(fileno, self.lineno, _linenosub),
ismultiline=self.ismultiline,
wmlinfos=loc_wmlinfos, finfos=[finfo],
addedinfos=loc_addedinfos,
plural=self.storePlural() )
else:
loc_posentence.update_with_commented_string(
PoCommentedString(
self.luastring,
_currentdomain,
orderid=(fileno, self.lineno, _linenosub),
ismultiline=self.ismultiline,
wmlinfos=loc_wmlinfos, finfos=[finfo],
addedinfos=loc_addedinfos,
plural=self.storePlural()
) )
# finally PendingLuaString.store() will clear pendinginfos
# in any case (even if the pending string is not translatable)
after_pending_info(self.lineno, not self.istranslatable)
class PendingWmlString:
def __init__(self, lineno, wmlstring, ismultiline, istranslatable, israw):
"""The israw argument indicates a << >> delimited string"""
self.lineno = lineno
self.wmlstring = wmlstring.replace('\\', r'\\')
self.ismultiline = ismultiline
self.istranslatable = istranslatable
self.israw = israw
def addline(self, value):
self.wmlstring = self.wmlstring + '\n' + value.replace('\\', r'\\')
def store(self):
global _linenosub
global _pending_cinfo
global _pending_winfotype
if _pending_winfotype is not None:
if self.ismultiline is False and self.istranslatable is False:
winf = _pending_winfotype + '=' + self.wmlstring
pywmlx.nodemanip.addWmlInfo(winf)
_pending_winfotype = None
if not checkdomain(self.lineno):
return
if self.istranslatable:
finfo = pywmlx.nodemanip.fileref + ":" + str(self.lineno)
errcode = checksentence(self.wmlstring, finfo, islua=False)
if errcode != 1:
# when errcode is equal to 1, the translatable string is empty
# so, using "if errcode != 1"
# we will add the translatable string ONLY if it is NOT empty
_linenosub += 1
if self.israw:
self.wmlstring = re.sub('"', r'\"', self.wmlstring)
else:
self.wmlstring = re.sub('""', r'\"', self.wmlstring)
pywmlx.nodemanip.addNodeSentence(self.wmlstring,
domain=_currentdomain,
ismultiline=self.ismultiline,
lineno=self.lineno,
lineno_sub=_linenosub,
override=_pending_cinfo["po-override"],
addition=_pending_cinfo["po"])
after_pending_info(self.lineno, not self.istranslatable)
def addstate(name, value):
global _states
if _states is None:
_states = {}
_states[name.lower()] = value
def setup(dictionary, initialdomain, domains, wall, fdebug):
global _dictionary
global _initialdomain
global _domains
global _warnall
global _debugmode
global _fdebug
_dictionary = dictionary
_initialdomain = initialdomain
if domains is not None:
_domains = set(domains)
_warnall = wall
_fdebug = fdebug
if fdebug is None:
_debugmode = False
else:
_debugmode = True
setup_luastates()
setup_wmlstates()
def run(*, filebuf, fileref, fileno, startstate, waitwml=True):
global _states
global _current_lineno
global _linenosub
global _waitwml
global _currentdomain
global _dictionary
global _pending_luafuncname
global _on_luatag
_pending_luafuncname = None
_on_luatag = False
# cs is "current state"
cs = _states.get(startstate)
cs_debug = startstate
_current_lineno = 0
_linenosub = 0
_waitwml = waitwml
_currentdomain = _initialdomain
pywmlx.nodemanip.newfile(fileref, fileno)
# debug_cs = startstate
try:
for xline in filebuf:
xline = xline.strip('\n\r')
_current_lineno += 1
# on new line, debug file will write another marker
if _debugmode:
print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@',
file=_fdebug)
while xline is not None:
# print debug infos (if debugmode is on)
if _debugmode:
lno = '%05d' % _current_lineno
print('---------------------------------------------------',
file=_fdebug)
print('LINE', lno, '|', xline, file=_fdebug)
# action number is used to know what function we should run
action = 0
v = None
m = None
if cs.regex is None:
# action = 1 --> execute state.run
action = 1
if _debugmode:
print('ALWAYS-RUN x', cs_debug, file=_fdebug)
else:
# m is match
m = re.match(cs.regex, xline)
if m:
# action = 1 --> execute state.run
action = 1
if _debugmode:
print('RUN state \\', cs_debug, file=_fdebug)
else:
# action = 2 --> change to the state pointed by
# state.iffail
action = 2
if _debugmode:
print('FAIL state |', cs_debug, file=_fdebug)
if action == 1:
# xline, ns: xline --> override xline with new value
# ns --> value of next state
xline, ns = cs.run(xline, _current_lineno, m)
cs_debug = ns
cs = _states.get(ns)
else:
cs_debug = cs.iffail
cs = _states.get(cs.iffail)
# end while xline
# end for xline
except UnicodeDecodeError as e:
if "test_cve_2018_1999023_2.cfg" in pywmlx.nodemanip.fileref:
# This unit test is allowed to contain invalid UTF-8. Ignore it.
return
errpos = int(e.start) # error position on file object with UTF-8 error
errbval = hex(e.object[errpos]) # value of byte wich causes UTF-8 error
# well... when exception occurred, the _current_lineno value
# was not updated at all due to the failure of the try block.
# (it is = 0)
# this means we need to make a workaround to obtain in what line of the
# file the problem happened.
# In order to perform this task (and not only) we create a temporary
# string wich contains all the file text UNTIL the UTF-8
untilerr_buf = e.object[0:errpos] # buffer containing file text
untilerr = "".join(map(chr, untilerr_buf))
# splituntil will be a array of strings (each item is a line of text).
# the last item will show the point where the invalid UTF-8 character
# was found.
splituntil = untilerr.split('\n')
# error line is equal of lines of text until error occurs (number of
# items on splituntil string array)
errlineno = len(splituntil)
# finally we can know the actual file info
finfo = pywmlx.nodemanip.fileref + ":" + str(errlineno)
errmsg = (
"UTF-8 Format error.\nCan't decode byte " + str(errbval) + ' (' +
e.reason + ').\n' +
'Probably your file is not encoded with UTF-8 encoding: you ' +
'should open the file with an advanced text editor, and re-save ' +
'it with UTF-8 encoding.\n' +
'To avoid this problem in the future, you might want to set ' +
'the default encoding of your editor to UTF-8.\n\n' +
'Text preceding the invalid byte (source file, line ' +
str(errlineno) + '):\n' + splituntil[-1] + '\n'
)
wmlerr(finfo, errmsg)
pywmlx.nodemanip.closefile(_dictionary, _current_lineno)