465 lines
17 KiB
Python
465 lines
17 KiB
Python
import re
|
|
# import os
|
|
|
|
from pywmlx.wmlerr import wmlerr
|
|
from pywmlx.wmlerr import wmlwarn
|
|
from pywmlx.wmlerr import warnall
|
|
from pywmlx.postring import PoCommentedString
|
|
from pywmlx.postring import PoCommentedStringPL
|
|
from pywmlx.state.state import State
|
|
from pywmlx.state.lua_states import setup_luastates
|
|
from pywmlx.state.wml_states import setup_wmlstates
|
|
|
|
import pywmlx.nodemanip
|
|
import pdb
|
|
|
|
# Universe - convenient singleton for which
|
|
# `x in Universe` is always True
|
|
# Passing it to a filter is equivalent to not filtering.
|
|
class UniversalSet:
|
|
def __contains__(self, any):
|
|
return True
|
|
|
|
Universe = UniversalSet()
|
|
|
|
# --------------------------------------------------------------------
|
|
# PART 1: machine.py global variables
|
|
# --------------------------------------------------------------------
|
|
|
|
|
|
# True if --warnall option is used
|
|
_warnall = False
|
|
# True if -D option is used
|
|
_debugmode = False
|
|
# debug output file
|
|
_fdebug = None
|
|
# dictionary of pot sentences
|
|
_dictionary = None
|
|
# dictionary containing lua and WML states
|
|
_states = None
|
|
# initialdomain value (set with --initialdomain command line option)
|
|
_initialdomain = None
|
|
# the current domain value when parsing file (changed by #textdomain text)
|
|
_currentdomain = None
|
|
# the domain value (set with --domain command line option)
|
|
_domains = Universe
|
|
# this boolean value will be usually:
|
|
# True (when the file is a WML .cfg file)
|
|
# False (when the file is a .lua file)
|
|
_waitwml = True
|
|
# this boolean value is very useful to avoid a possible bug
|
|
# verified in a special case
|
|
# (see WmlGoluaState on wml_states.py for more details)
|
|
_on_luatag = False
|
|
|
|
# ---------
|
|
|
|
# pending additional infos for translators collected from # po
|
|
# or # po-override comments.
|
|
_pending_cinfo = {
|
|
# pending additional infos for translators (# po: addedinfo)
|
|
"po": None,
|
|
# pending override wmlinfo for translators (# po-override: overrideinfo)
|
|
"po-override": None,
|
|
}
|
|
|
|
# type of pending wmlinfo:
|
|
# it can be None or it can have an actual value.
|
|
# Possible actual values are: 'speaker', 'id', 'role', 'description',
|
|
# 'condition', 'type', 'race' or 'gender'
|
|
_pending_winfotype = None
|
|
|
|
# ----------
|
|
|
|
# the last function name encountered in a lua code (if any).
|
|
# If no lua functions already encountered, this var will be None
|
|
_pending_luafuncname = None
|
|
|
|
# ----------
|
|
|
|
# pending lua/wml string (they will be evaluated, and if translatable it will
|
|
# be added in _dictionary
|
|
_pending_luastring = None
|
|
_pending_wmlstring = None
|
|
|
|
# ----------
|
|
|
|
# counting line number
|
|
_current_lineno = 0
|
|
# lineno_sub helps to set the right orderid of the future PoCommentedString
|
|
_linenosub = 0
|
|
|
|
|
|
|
|
# --------------------------------------------------------------------
|
|
# PART 2: machine.py functions and classes
|
|
# --------------------------------------------------------------------
|
|
|
|
|
|
def clear_pending_infos(lineno, error=False):
|
|
global _pending_cinfo
|
|
for key in _pending_cinfo:
|
|
if error and _pending_cinfo[key] is not None:
|
|
wmlerr(pywmlx.nodemanip.fileref + ":" + str(lineno),
|
|
"#%s directive(s) not applied: %s" % (key, _pending_cinfo[key]))
|
|
_pending_cinfo[key] = None
|
|
|
|
|
|
|
|
def after_pending_info(lineno, error):
|
|
clear_pending_infos(lineno, error=error)
|
|
|
|
|
|
|
|
def checkdomain(lineno):
|
|
global _currentdomain
|
|
global _domains
|
|
if _currentdomain in _domains:
|
|
return True
|
|
else:
|
|
clear_pending_infos(lineno, error=True)
|
|
return False
|
|
|
|
|
|
def switchdomain(lineno, domain):
|
|
global _currentdomain
|
|
if _currentdomain != domain:
|
|
clear_pending_infos(lineno, error=True)
|
|
_currentdomain = domain
|
|
|
|
|
|
def checksentence(mystring, finfo, *, islua=False):
|
|
m = re.match(r'\s*$', mystring)
|
|
if m:
|
|
wmlwarn(finfo, "found an empty translatable message")
|
|
return 1
|
|
elif warnall() and not islua:
|
|
if "}" in mystring:
|
|
wmsg = ("found a translatable string containing a WML macro. "
|
|
" Translation for this string will NEVER work")
|
|
wmlwarn(finfo, wmsg)
|
|
return 2
|
|
else:
|
|
return 0
|
|
else:
|
|
return 0
|
|
|
|
|
|
|
|
class PendingPlural:
|
|
def __init__(self):
|
|
self.string = ''
|
|
# status values:
|
|
# 'wait_string' --> rightly after _ ( when we need to know
|
|
# wich string type we will manage
|
|
# 'wait_plural' --> after first argument. Search for plural or
|
|
# close parenthesis
|
|
# 'wait_close' --> expect close parenthesis
|
|
self.status = 'wait_string'
|
|
self.pluraltype = 0
|
|
self.numequals = 0
|
|
self.ismultiline = False
|
|
|
|
def addline(self, value, isfirstline=False):
|
|
if self.pluraltype != 3:
|
|
value = re.sub(r'\s*$', '', value)
|
|
else:
|
|
value = value.replace('\\', r'\\')
|
|
if isfirstline:
|
|
self.string = value
|
|
else:
|
|
self.string = self.string + '\n' + value
|
|
|
|
def convert(self):
|
|
if self.pluraltype == 2:
|
|
self.string = re.sub(r"\\\'", r"'", self.string)
|
|
if self.pluraltype != 3 and self.pluraltype!=0:
|
|
self.string = re.sub(r'(?<!\\)"', r'\"', self.string)
|
|
if self.pluraltype == 3:
|
|
self.string = self.string.replace('"', r'\"')
|
|
if self.ismultiline:
|
|
lf = r'\\n"' + '\n"'
|
|
self.string = re.sub(r'(\n\r|\r\n|[\n\r])',
|
|
lf, self.string)
|
|
self.string = '""\n"' + self.string + '"'
|
|
if not self.ismultiline:
|
|
self.string = '"' + self.string + '"'
|
|
return PoCommentedStringPL(self.string, ismultiline=self.ismultiline)
|
|
|
|
|
|
|
|
class PendingLuaString:
|
|
def __init__(self, lineno, luatype, luastring, ismultiline,
|
|
istranslatable, numequals=0, plural=None):
|
|
self.lineno = lineno
|
|
self.luatype = luatype
|
|
self.luastring = ''
|
|
self.ismultiline = ismultiline
|
|
self.istranslatable = istranslatable
|
|
self.numequals = numequals
|
|
if luatype != 'lua_plural':
|
|
self.addline(luastring, True)
|
|
self.plural = plural
|
|
|
|
def addline(self, value, isfirstline=False):
|
|
if self.luatype != 'luastr3':
|
|
value = re.sub(r'\s*$', '', value)
|
|
else:
|
|
value = value.replace('\\', r'\\')
|
|
if isfirstline:
|
|
self.luastring = value
|
|
else:
|
|
self.luastring = self.luastring + '\n' + value
|
|
|
|
# this function is used by store, when translating lua pending plural into
|
|
# PoCommentedString.plural
|
|
def storePlural(self):
|
|
if self.plural is None:
|
|
return None
|
|
else:
|
|
return self.plural.convert()
|
|
|
|
def store(self):
|
|
global _pending_cinfo
|
|
global _linenosub
|
|
if not checkdomain(self.lineno):
|
|
return
|
|
if self.istranslatable:
|
|
_linenosub += 1
|
|
finfo = pywmlx.nodemanip.fileref + ":" + str(self.lineno)
|
|
fileno = pywmlx.nodemanip.fileno
|
|
errcode = checksentence(self.luastring, finfo, islua=True)
|
|
if errcode != 1:
|
|
# when errcode is equal to 1, the translatable string is empty
|
|
# so, using "if errcode != 1"
|
|
# we will add the translatable string ONLY if it is NOT empty
|
|
if self.luatype == 'luastr2':
|
|
self.luastring = re.sub(r"\\\'", r"'", self.luastring)
|
|
if self.luatype != 'luastr3':
|
|
self.luastring = re.sub(r'(?<!\\)"', r'\"', self.luastring)
|
|
if self.luatype == 'luastr3':
|
|
self.luastring = self.luastring.replace('"', r'\"')
|
|
loc_wmlinfos = []
|
|
loc_addedinfos = None
|
|
if _pending_cinfo["po-override"] is not None:
|
|
loc_wmlinfos.append(_pending_cinfo["po-override"])
|
|
if (_pending_luafuncname is not None and
|
|
_pending_cinfo["po-override"] is None):
|
|
winf = '[lua]: ' + _pending_luafuncname
|
|
loc_wmlinfos.append(winf)
|
|
if _pending_cinfo["po"] is None:
|
|
loc_addedinfos = []
|
|
if _pending_cinfo["po"] is not None:
|
|
loc_addedinfos = _pending_cinfo["po"]
|
|
if not _currentdomain in _dictionary:
|
|
_dictionary[_currentdomain] = dict()
|
|
loc_posentence = _dictionary[_currentdomain].get(self.luastring)
|
|
if loc_posentence is None:
|
|
_dictionary[_currentdomain][self.luastring] = PoCommentedString(
|
|
self.luastring,
|
|
_currentdomain,
|
|
orderid=(fileno, self.lineno, _linenosub),
|
|
ismultiline=self.ismultiline,
|
|
wmlinfos=loc_wmlinfos, finfos=[finfo],
|
|
addedinfos=loc_addedinfos,
|
|
plural=self.storePlural() )
|
|
else:
|
|
loc_posentence.update_with_commented_string(
|
|
PoCommentedString(
|
|
self.luastring,
|
|
_currentdomain,
|
|
orderid=(fileno, self.lineno, _linenosub),
|
|
ismultiline=self.ismultiline,
|
|
wmlinfos=loc_wmlinfos, finfos=[finfo],
|
|
addedinfos=loc_addedinfos,
|
|
plural=self.storePlural()
|
|
) )
|
|
|
|
# finally PendingLuaString.store() will clear pendinginfos
|
|
# in any case (even if the pending string is not translatable)
|
|
after_pending_info(self.lineno, not self.istranslatable)
|
|
|
|
|
|
|
|
class PendingWmlString:
|
|
def __init__(self, lineno, wmlstring, ismultiline, istranslatable, israw):
|
|
"""The israw argument indicates a << >> delimited string"""
|
|
self.lineno = lineno
|
|
self.wmlstring = wmlstring.replace('\\', r'\\')
|
|
self.ismultiline = ismultiline
|
|
self.istranslatable = istranslatable
|
|
self.israw = israw
|
|
|
|
def addline(self, value):
|
|
self.wmlstring = self.wmlstring + '\n' + value.replace('\\', r'\\')
|
|
|
|
def store(self):
|
|
global _linenosub
|
|
global _pending_cinfo
|
|
global _pending_winfotype
|
|
if _pending_winfotype is not None:
|
|
if self.ismultiline is False and self.istranslatable is False:
|
|
winf = _pending_winfotype + '=' + self.wmlstring
|
|
pywmlx.nodemanip.addWmlInfo(winf)
|
|
_pending_winfotype = None
|
|
if not checkdomain(self.lineno):
|
|
return
|
|
if self.istranslatable:
|
|
finfo = pywmlx.nodemanip.fileref + ":" + str(self.lineno)
|
|
errcode = checksentence(self.wmlstring, finfo, islua=False)
|
|
if errcode != 1:
|
|
# when errcode is equal to 1, the translatable string is empty
|
|
# so, using "if errcode != 1"
|
|
# we will add the translatable string ONLY if it is NOT empty
|
|
_linenosub += 1
|
|
if self.israw:
|
|
self.wmlstring = re.sub('"', r'\"', self.wmlstring)
|
|
else:
|
|
self.wmlstring = re.sub('""', r'\"', self.wmlstring)
|
|
pywmlx.nodemanip.addNodeSentence(self.wmlstring,
|
|
domain=_currentdomain,
|
|
ismultiline=self.ismultiline,
|
|
lineno=self.lineno,
|
|
lineno_sub=_linenosub,
|
|
override=_pending_cinfo["po-override"],
|
|
addition=_pending_cinfo["po"])
|
|
after_pending_info(self.lineno, not self.istranslatable)
|
|
|
|
|
|
|
|
def addstate(name, value):
|
|
global _states
|
|
if _states is None:
|
|
_states = {}
|
|
_states[name.lower()] = value
|
|
|
|
|
|
|
|
def setup(dictionary, initialdomain, domains, wall, fdebug):
|
|
global _dictionary
|
|
global _initialdomain
|
|
global _domains
|
|
global _warnall
|
|
global _debugmode
|
|
global _fdebug
|
|
_dictionary = dictionary
|
|
_initialdomain = initialdomain
|
|
if domains is not None:
|
|
_domains = set(domains)
|
|
_warnall = wall
|
|
_fdebug = fdebug
|
|
if fdebug is None:
|
|
_debugmode = False
|
|
else:
|
|
_debugmode = True
|
|
setup_luastates()
|
|
setup_wmlstates()
|
|
|
|
|
|
|
|
def run(*, filebuf, fileref, fileno, startstate, waitwml=True):
|
|
global _states
|
|
global _current_lineno
|
|
global _linenosub
|
|
global _waitwml
|
|
global _currentdomain
|
|
global _dictionary
|
|
global _pending_luafuncname
|
|
global _on_luatag
|
|
_pending_luafuncname = None
|
|
_on_luatag = False
|
|
# cs is "current state"
|
|
cs = _states.get(startstate)
|
|
cs_debug = startstate
|
|
_current_lineno = 0
|
|
_linenosub = 0
|
|
_waitwml = waitwml
|
|
_currentdomain = _initialdomain
|
|
pywmlx.nodemanip.newfile(fileref, fileno)
|
|
# debug_cs = startstate
|
|
try:
|
|
for xline in filebuf:
|
|
xline = xline.strip('\n\r')
|
|
_current_lineno += 1
|
|
# on new line, debug file will write another marker
|
|
if _debugmode:
|
|
print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@',
|
|
file=_fdebug)
|
|
while xline is not None:
|
|
# print debug infos (if debugmode is on)
|
|
if _debugmode:
|
|
lno = '%05d' % _current_lineno
|
|
print('---------------------------------------------------',
|
|
file=_fdebug)
|
|
print('LINE', lno, '|', xline, file=_fdebug)
|
|
# action number is used to know what function we should run
|
|
action = 0
|
|
v = None
|
|
m = None
|
|
if cs.regex is None:
|
|
# action = 1 --> execute state.run
|
|
action = 1
|
|
if _debugmode:
|
|
print('ALWAYS-RUN x', cs_debug, file=_fdebug)
|
|
else:
|
|
# m is match
|
|
m = re.match(cs.regex, xline)
|
|
if m:
|
|
# action = 1 --> execute state.run
|
|
action = 1
|
|
if _debugmode:
|
|
print('RUN state \\', cs_debug, file=_fdebug)
|
|
else:
|
|
# action = 2 --> change to the state pointed by
|
|
# state.iffail
|
|
action = 2
|
|
if _debugmode:
|
|
print('FAIL state |', cs_debug, file=_fdebug)
|
|
if action == 1:
|
|
# xline, ns: xline --> override xline with new value
|
|
# ns --> value of next state
|
|
xline, ns = cs.run(xline, _current_lineno, m)
|
|
cs_debug = ns
|
|
cs = _states.get(ns)
|
|
else:
|
|
cs_debug = cs.iffail
|
|
cs = _states.get(cs.iffail)
|
|
# end while xline
|
|
# end for xline
|
|
except UnicodeDecodeError as e:
|
|
if "test_cve_2018_1999023_2.cfg" in pywmlx.nodemanip.fileref:
|
|
# This unit test is allowed to contain invalid UTF-8. Ignore it.
|
|
return
|
|
errpos = int(e.start) # error position on file object with UTF-8 error
|
|
errbval = hex(e.object[errpos]) # value of byte wich causes UTF-8 error
|
|
# well... when exception occurred, the _current_lineno value
|
|
# was not updated at all due to the failure of the try block.
|
|
# (it is = 0)
|
|
# this means we need to make a workaround to obtain in what line of the
|
|
# file the problem happened.
|
|
# In order to perform this task (and not only) we create a temporary
|
|
# string wich contains all the file text UNTIL the UTF-8
|
|
untilerr_buf = e.object[0:errpos] # buffer containing file text
|
|
untilerr = "".join(map(chr, untilerr_buf))
|
|
# splituntil will be a array of strings (each item is a line of text).
|
|
# the last item will show the point where the invalid UTF-8 character
|
|
# was found.
|
|
splituntil = untilerr.split('\n')
|
|
# error line is equal of lines of text until error occurs (number of
|
|
# items on splituntil string array)
|
|
errlineno = len(splituntil)
|
|
# finally we can know the actual file info
|
|
finfo = pywmlx.nodemanip.fileref + ":" + str(errlineno)
|
|
errmsg = (
|
|
"UTF-8 Format error.\nCan't decode byte " + str(errbval) + ' (' +
|
|
e.reason + ').\n' +
|
|
'Probably your file is not encoded with UTF-8 encoding: you ' +
|
|
'should open the file with an advanced text editor, and re-save ' +
|
|
'it with UTF-8 encoding.\n' +
|
|
'To avoid this problem in the future, you might want to set ' +
|
|
'the default encoding of your editor to UTF-8.\n\n' +
|
|
'Text preceding the invalid byte (source file, line ' +
|
|
str(errlineno) + '):\n' + splituntil[-1] + '\n'
|
|
)
|
|
wmlerr(finfo, errmsg)
|
|
pywmlx.nodemanip.closefile(_dictionary, _current_lineno)
|