import re # import os from pywmlx.wmlerr import wmlerr from pywmlx.wmlerr import wmlwarn from pywmlx.wmlerr import warnall from pywmlx.postring import PoCommentedString from pywmlx.postring import PoCommentedStringPL from pywmlx.state.state import State from pywmlx.state.lua_states import setup_luastates from pywmlx.state.wml_states import setup_wmlstates import pywmlx.nodemanip import pdb # Universe - convenient singleton for which # `x in Universe` is always True # Passing it to a filter is equivalent to not filtering. class UniversalSet: def __contains__(self, any): return True Universe = UniversalSet() # -------------------------------------------------------------------- # PART 1: machine.py global variables # -------------------------------------------------------------------- # True if --warnall option is used _warnall = False # True if -D option is used _debugmode = False # debug output file _fdebug = None # dictionary of pot sentences _dictionary = None # dictionary containing lua and WML states _states = None # initialdomain value (set with --initialdomain command line option) _initialdomain = None # the current domain value when parsing file (changed by #textdomain text) _currentdomain = None # the domain value (set with --domain command line option) _domains = Universe # this boolean value will be usually: # True (when the file is a WML .cfg file) # False (when the file is a .lua file) _waitwml = True # this boolean value is very useful to avoid a possible bug # verified in a special case # (see WmlGoluaState on wml_states.py for more details) _on_luatag = False # --------- # pending additional infos for translators collected from # po # or # po-override comments. _pending_cinfo = { # pending additional infos for translators (# po: addedinfo) "po": None, # pending override wmlinfo for translators (# po-override: overrideinfo) "po-override": None, } # type of pending wmlinfo: # it can be None or it can have an actual value. # Possible actual values are: 'speaker', 'id', 'role', 'description', # 'condition', 'type', 'race' or 'gender' _pending_winfotype = None # ---------- # the last function name encountered in a lua code (if any). # If no lua functions already encountered, this var will be None _pending_luafuncname = None # ---------- # pending lua/wml string (they will be evaluated, and if translatable it will # be added in _dictionary _pending_luastring = None _pending_wmlstring = None # ---------- # counting line number _current_lineno = 0 # lineno_sub helps to set the right orderid of the future PoCommentedString _linenosub = 0 # -------------------------------------------------------------------- # PART 2: machine.py functions and classes # -------------------------------------------------------------------- def clear_pending_infos(lineno, error=False): global _pending_cinfo for key in _pending_cinfo: if error and _pending_cinfo[key] is not None: wmlerr(pywmlx.nodemanip.fileref + ":" + str(lineno), "#%s directive(s) not applied: %s" % (key, _pending_cinfo[key])) _pending_cinfo[key] = None def after_pending_info(lineno, error): clear_pending_infos(lineno, error=error) def checkdomain(lineno): global _currentdomain global _domains if _currentdomain in _domains: return True else: clear_pending_infos(lineno, error=True) return False def switchdomain(lineno, domain): global _currentdomain if _currentdomain != domain: clear_pending_infos(lineno, error=True) _currentdomain = domain def checksentence(mystring, finfo, *, islua=False): m = re.match(r'\s*$', mystring) if m: wmlwarn(finfo, "found an empty translatable message") return 1 elif warnall() and not islua: if "}" in mystring: wmsg = ("found a translatable string containing a WML macro. " " Translation for this string will NEVER work") wmlwarn(finfo, wmsg) return 2 else: return 0 else: return 0 class PendingPlural: def __init__(self): self.string = '' # status values: # 'wait_string' --> rightly after _ ( when we need to know # wich string type we will manage # 'wait_plural' --> after first argument. Search for plural or # close parenthesis # 'wait_close' --> expect close parenthesis self.status = 'wait_string' self.pluraltype = 0 self.numequals = 0 self.ismultiline = False def addline(self, value, isfirstline=False): if self.pluraltype != 3: value = re.sub(r'\s*$', '', value) else: value = value.replace('\\', r'\\') if isfirstline: self.string = value else: self.string = self.string + '\n' + value def convert(self): if self.pluraltype == 2: self.string = re.sub(r"\\\'", r"'", self.string) if self.pluraltype != 3 and self.pluraltype!=0: self.string = re.sub(r'(?> delimited string""" self.lineno = lineno self.wmlstring = wmlstring.replace('\\', r'\\') self.ismultiline = ismultiline self.istranslatable = istranslatable self.israw = israw def addline(self, value): self.wmlstring = self.wmlstring + '\n' + value.replace('\\', r'\\') def store(self): global _linenosub global _pending_cinfo global _pending_winfotype if _pending_winfotype is not None: if self.ismultiline is False and self.istranslatable is False: winf = _pending_winfotype + '=' + self.wmlstring pywmlx.nodemanip.addWmlInfo(winf) _pending_winfotype = None if not checkdomain(self.lineno): return if self.istranslatable: finfo = pywmlx.nodemanip.fileref + ":" + str(self.lineno) errcode = checksentence(self.wmlstring, finfo, islua=False) if errcode != 1: # when errcode is equal to 1, the translatable string is empty # so, using "if errcode != 1" # we will add the translatable string ONLY if it is NOT empty _linenosub += 1 if self.israw: self.wmlstring = re.sub('"', r'\"', self.wmlstring) else: self.wmlstring = re.sub('""', r'\"', self.wmlstring) pywmlx.nodemanip.addNodeSentence(self.wmlstring, domain=_currentdomain, ismultiline=self.ismultiline, lineno=self.lineno, lineno_sub=_linenosub, override=_pending_cinfo["po-override"], addition=_pending_cinfo["po"]) after_pending_info(self.lineno, not self.istranslatable) def addstate(name, value): global _states if _states is None: _states = {} _states[name.lower()] = value def setup(dictionary, initialdomain, domains, wall, fdebug): global _dictionary global _initialdomain global _domains global _warnall global _debugmode global _fdebug _dictionary = dictionary _initialdomain = initialdomain if domains is not None: _domains = set(domains) _warnall = wall _fdebug = fdebug if fdebug is None: _debugmode = False else: _debugmode = True setup_luastates() setup_wmlstates() def run(*, filebuf, fileref, fileno, startstate, waitwml=True): global _states global _current_lineno global _linenosub global _waitwml global _currentdomain global _dictionary global _pending_luafuncname global _on_luatag _pending_luafuncname = None _on_luatag = False # cs is "current state" cs = _states.get(startstate) cs_debug = startstate _current_lineno = 0 _linenosub = 0 _waitwml = waitwml _currentdomain = _initialdomain pywmlx.nodemanip.newfile(fileref, fileno) # debug_cs = startstate try: for xline in filebuf: xline = xline.strip('\n\r') _current_lineno += 1 # on new line, debug file will write another marker if _debugmode: print('@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@', file=_fdebug) while xline is not None: # print debug infos (if debugmode is on) if _debugmode: lno = '%05d' % _current_lineno print('---------------------------------------------------', file=_fdebug) print('LINE', lno, '|', xline, file=_fdebug) # action number is used to know what function we should run action = 0 v = None m = None if cs.regex is None: # action = 1 --> execute state.run action = 1 if _debugmode: print('ALWAYS-RUN x', cs_debug, file=_fdebug) else: # m is match m = re.match(cs.regex, xline) if m: # action = 1 --> execute state.run action = 1 if _debugmode: print('RUN state \\', cs_debug, file=_fdebug) else: # action = 2 --> change to the state pointed by # state.iffail action = 2 if _debugmode: print('FAIL state |', cs_debug, file=_fdebug) if action == 1: # xline, ns: xline --> override xline with new value # ns --> value of next state xline, ns = cs.run(xline, _current_lineno, m) cs_debug = ns cs = _states.get(ns) else: cs_debug = cs.iffail cs = _states.get(cs.iffail) # end while xline # end for xline except UnicodeDecodeError as e: if "test_cve_2018_1999023_2.cfg" in pywmlx.nodemanip.fileref: # This unit test is allowed to contain invalid UTF-8. Ignore it. return errpos = int(e.start) # error position on file object with UTF-8 error errbval = hex(e.object[errpos]) # value of byte wich causes UTF-8 error # well... when exception occurred, the _current_lineno value # was not updated at all due to the failure of the try block. # (it is = 0) # this means we need to make a workaround to obtain in what line of the # file the problem happened. # In order to perform this task (and not only) we create a temporary # string wich contains all the file text UNTIL the UTF-8 untilerr_buf = e.object[0:errpos] # buffer containing file text untilerr = "".join(map(chr, untilerr_buf)) # splituntil will be a array of strings (each item is a line of text). # the last item will show the point where the invalid UTF-8 character # was found. splituntil = untilerr.split('\n') # error line is equal of lines of text until error occurs (number of # items on splituntil string array) errlineno = len(splituntil) # finally we can know the actual file info finfo = pywmlx.nodemanip.fileref + ":" + str(errlineno) errmsg = ( "UTF-8 Format error.\nCan't decode byte " + str(errbval) + ' (' + e.reason + ').\n' + 'Probably your file is not encoded with UTF-8 encoding: you ' + 'should open the file with an advanced text editor, and re-save ' + 'it with UTF-8 encoding.\n' + 'To avoid this problem in the future, you might want to set ' + 'the default encoding of your editor to UTF-8.\n\n' + 'Text preceding the invalid byte (source file, line ' + str(errlineno) + '):\n' + splituntil[-1] + '\n' ) wmlerr(finfo, errmsg) pywmlx.nodemanip.closefile(_dictionary, _current_lineno)