Build: Update scripts for .pot, .po files to support Python files

- Rewrites some of the commands to use Python.
- Also implements wmlxgettext --force-po, similar to xgettext.
This commit is contained in:
Ivo Julca 2023-05-12 21:51:24 -05:00 committed by Steve Cotton
parent 5cd12df0af
commit 4d2cb1ce7f
7 changed files with 220 additions and 106 deletions

View file

@ -84,6 +84,7 @@ find_package(ICU REQUIRED COMPONENTS data i18n uc)
# no, gettext executables are not required when NLS is deactivated
find_package(Gettext)
find_package(Python)
find_package(X11)
@ -557,7 +558,7 @@ endif()
add_subdirectory(doc)
if(GETTEXT_FOUND AND ENABLE_NLS)
if(GETTEXT_FOUND AND Python_FOUND AND ENABLE_NLS)
add_subdirectory(po)
endif()

View file

@ -1,7 +1,8 @@
# Update the source file dependencies of the pot file.
#
# This globs all files cpp in the src directory and looks for the text domain
# definition in that file and outputs these dependencies in POTFILES.in.
# definition in that file and outputs these dependencies in POTFILES_CPP.in.
# py, pyw are listed in POTFILES_PY.in
# Remove the old input file.
# Dummy target with a non existing (and not created file) is always executed.
@ -9,71 +10,29 @@ add_custom_command(
OUTPUT ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES.in.dummy
# remove the old file.
COMMAND ${CMAKE_COMMAND}
-E remove ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES.in
COMMENT "pot-update [${DOMAIN}]: Removed existing POTFILES.in."
-E remove ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES*.in
COMMENT "pot-update [${DOMAIN}]: Removing existing POTFILES*.in."
)
# Recreate the input file.
if(DOMAIN STREQUAL ${DEFAULT_DOMAIN})
add_custom_command(
OUTPUT ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES_CPP.in
# For the default text domain.
add_custom_command(
OUTPUT ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES.in
# Write list of matching files to POTFILES_CPP.in.
COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/po/FINDCPP ${DOMAIN} --initialdomain ${DEFAULT_DOMAIN} >|
${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES_CPP.in
# Find all cpp files which are not in a .git directory, check their
# textdomain, and write the list of matching files to POTFILES.in.
COMMAND find src -name .git -prune -o -name '*.[hc]pp' -print |
sort |
while read file\; do
# If the file doesn't contain a GETTEXT_DOMAIN
# definition it should be added to the default domain.
if ! grep '^\#define *GETTEXT_DOMAIN'
$$file > /dev/null 2>&1\; then
DEPENDS ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES.in.dummy
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMENT "pot-update [${DOMAIN}]: Creating POTFILES_CPP.in."
)
add_custom_command(
OUTPUT ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES_PY.in
echo $$file \;
# Write list of matching files to PY.in.
COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/po/FINDPY ${DOMAIN} >|
${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES_PY.in
# While files don't need a GETTEXT_DOMAIN to be included in
# the default domain, accept files that contain a
# GETTEXT_DOMAIN definition for the default domain too.
elif grep '^\#define *GETTEXT_DOMAIN *\"${DOMAIN}\"'
$$file > /dev/null 2>&1\; then
echo $$file \;
fi
done >|
${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES.in
DEPENDS ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES.in.dummy
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMENT
"pot-update [${DOMAIN}]: Created POTFILES.in for default domain."
)
else(DOMAIN STREQUAL ${DEFAULT_DOMAIN})
# For the other text domains.
add_custom_command(
OUTPUT ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES.in
# Find all cpp files which are not in a .git directory, check their
# textdomain, and write the list of matching files to POTFILES.in.
COMMAND find src -name .git -prune -o -name '*cpp' -print |
sort |
while read file\; do
# If the file contains a GETTEXT_DOMAIN definition for
# the current domain add it to the domain.
if grep '^\#define *GETTEXT_DOMAIN *\"${DOMAIN}\"'
$$file > /dev/null 2>&1\; then
echo $$file \;
fi
done >|
${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES.in
DEPENDS ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES.in.dummy
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMENT "pot-update [${DOMAIN}]: Created POTFILES.in."
)
endif(DOMAIN STREQUAL ${DEFAULT_DOMAIN})
DEPENDS ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES.in.dummy
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMENT "pot-update [${DOMAIN}]: Creating POTFILES_PY.in."
)

View file

@ -85,6 +85,13 @@ def commandline(args):
'(Default: current directory). The (relative) path to '
'every WML/Lua file should start from this directory.')
)
parser.add_argument(
'--force-po',
action='store_true',
default=False,
dest='force_po',
help=('Write PO file even if empty.')
)
parser.add_argument(
'--initialdomain',
default='wesnoth',
@ -251,6 +258,11 @@ def main():
folder, filename = os.path.split(folder)
os.makedirs(folder, exist_ok=True)
if args.force_po and args.domain is not None:
for domain in args.domain:
if not domain in sentlist:
sentlist[domain] = dict()
for domain, d in sentlist.items():
if folder is not None:
try:

View file

@ -1,7 +1,7 @@
############ Settings. ###########
set_directory_properties(PROPERTIES CLEAN_NO_CUSTOM true)
# The normal domains use cpp and cfg files.
# The normal domains use cfg, cpp, lua, py files.
set(NORMAL_DOMAINS
wesnoth
wesnoth-anl
@ -56,40 +56,55 @@ if(ENABLE_POT_UPDATE_TARGET)
# Update the source file dependencies.
include(update_pot_source_dependencies)
# Generate pot file for c++ data.
# Generate pot file for C++ strings.
add_custom_command(
# misses bug address
OUTPUT ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/${DOMAIN}.cpp.pot
# create the pot file make sure we always get output.
COMMAND ${GETTEXT_XGETTEXT_EXECUTABLE} ${GETTEXT_XGETTEXT_OPTIONS}
--files-from=${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES.in
--files-from=${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES_CPP.in
--output=${PROJECT_SOURCE_DIR}/po/${DOMAIN}/${DOMAIN}.cpp.pot
# replace the chartype
COMMAND sed -i
s/charset=CHARSET/charset=UTF-8/
${PROJECT_SOURCE_DIR}/po/${DOMAIN}/${DOMAIN}.cpp.pot
# Remove some header info - Need to test whether needed.
DEPENDS ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES.in
DEPENDS ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES_CPP.in
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMENT "pot-update [${DOMAIN}]: Generated source pot file."
COMMENT "pot-update [${DOMAIN}]: Generating source pot file."
)
# Generate pot file for wml data.
# Generate pot file for Python strings.
add_custom_command(
OUTPUT ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/${DOMAIN}.py.pot
# create the pot file make sure we always get output.
COMMAND ${GETTEXT_XGETTEXT_EXECUTABLE} ${GETTEXT_XGETTEXT_OPTIONS}
--language=Python
--files-from=${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES_PY.in
--output=${PROJECT_SOURCE_DIR}/po/${DOMAIN}/${DOMAIN}.py.pot
# replace the chartype
COMMAND sed -i
s/charset=CHARSET/charset=UTF-8/
${PROJECT_SOURCE_DIR}/po/${DOMAIN}/${DOMAIN}.py.pot
DEPENDS ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/POTFILES_PY.in
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMENT "pot-update [${DOMAIN}]: Generating python pot file."
)
# Generate pot file for WML/Lua strings.
add_custom_command(
OUTPUT ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/${DOMAIN}.wml.pot
COMMAND ${WMLXGETTEXT}
--directory=${PROJECT_SOURCE_DIR}
COMMAND ${Python_EXECUTABLE} ${WMLXGETTEXT}
--force-po
--domain=${DOMAIN}
`cd ${PROJECT_SOURCE_DIR} &&
sh ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/FINDCFG`
-o ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/${DOMAIN}.wml.pot
DEPENDS ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/FINDCFG
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMENT "pot-update [${DOMAIN}]: Generated wml pot file."
COMMENT "pot-update [${DOMAIN}]: Generating wml pot file."
)
# Merge both pot files
# Merge pot files
add_custom_command(
# The old function checked for differences in the time in the header see
# what we need to do with it.
@ -98,18 +113,21 @@ if(ENABLE_POT_UPDATE_TARGET)
COMMAND ${GETTEXT_MSGCAT_EXECUTABLE}
--sort-by-file
${PROJECT_SOURCE_DIR}/po/${DOMAIN}/${DOMAIN}.cpp.pot
${PROJECT_SOURCE_DIR}/po/${DOMAIN}/${DOMAIN}.py.pot
${PROJECT_SOURCE_DIR}/po/${DOMAIN}/${DOMAIN}.wml.pot
--output ${PROJECT_SOURCE_DIR}/po/${DOMAIN}/${DOMAIN}.pot
COMMAND ${CMAKE_COMMAND} -E remove
${PROJECT_SOURCE_DIR}/po/${DOMAIN}/${DOMAIN}.cpp.pot
${PROJECT_SOURCE_DIR}/po/${DOMAIN}/${DOMAIN}.py.pot
${PROJECT_SOURCE_DIR}/po/${DOMAIN}/${DOMAIN}.wml.pot
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
DEPENDS
${PROJECT_SOURCE_DIR}/po/${DOMAIN}/${DOMAIN}.cpp.pot
${PROJECT_SOURCE_DIR}/po/${DOMAIN}/${DOMAIN}.py.pot
${PROJECT_SOURCE_DIR}/po/${DOMAIN}/${DOMAIN}.wml.pot
COMMENT "pot-update [${DOMAIN}]: Generated pot file."
COMMENT "pot-update [${DOMAIN}]: Generating pot file."
)
# Update / generate the po files for all languages

44
po/FINDCPP Executable file
View file

@ -0,0 +1,44 @@
#!/usr/bin/env python3
#
# Lists localizable CPP files.
# Excludes files in .git folders.
#
# Syntax:
# FINDCPP DOMAIN --initialdomain INITIAL_DOMAIN
#
# --initialdomain: Files without an explicit domain default to this value.
import argparse
import glob
import re
from pathlib import Path
if __name__ == '__main__':
ap = argparse.ArgumentParser()
ap.add_argument('domain') # Single positional argument
ap.add_argument('--initialdomain', dest='initdom') # Optional
args = ap.parse_args()
# Whether the input string has any explicit text domain.
any_regexp = re.compile("^#define *GETTEXT_DOMAIN", re.MULTILINE)
# Whether the input string has defined args.domain as its text domain.
domain_regexp = re.compile("^#define *GETTEXT_DOMAIN *\"" + re.escape(args.domain) + "\"", re.MULTILINE)
cpp_files = glob.glob("src/**/*.cpp", recursive=True)
# Apparently, some people insist in adding translatable strings to header files (6bf76d940).
cpp_files.extend(glob.glob("src/**/*.hpp", recursive=True))
cpp_files.sort()
for p in cpp_files:
# In Windows, glob search yields paths with mixed separators (/\).
path = Path(p)
# Exclude any .git subdirectories.
if ".git" in path.parts:
continue
content = path.read_text(encoding='utf8')
# For the default domain, first check that there is no domain defined,
# but still search for domain_regexp otherwise.
if args.domain == args.initdom and not re.search(any_regexp, content):
# Produce output with / only.
print(path.as_posix())
elif re.search(domain_regexp, content):
# Produce output with / only.
print(path.as_posix())

52
po/FINDPY Executable file
View file

@ -0,0 +1,52 @@
#!/usr/bin/env python3
#
# Lists localizable Python scripts and modules.
# Excludes files in .git folders.
#
# Syntax:
# FINDPY DOMAIN
#
# In contrast to FINDCPP, there is no default domain.
import argparse
import glob
import re
from pathlib import Path
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('domain') # Single positional argument.
args = parser.parse_args()
# Whether the input string has defined args.domain as its text domain.
domain_regexp = re.compile(" *_ *= *gettext\\.translation\\((['\"])" + re.escape(args.domain) + "\\1")
py_files = glob.glob("data/tools/**/*.py", recursive=True)
# Executable files are hardcoded rather than scanning for +x (which doesn't exist in Windows),
# and matching shebang line.
py_files.extend([
"data/tools/GUI.pyw",
"data/tools/about_cfg_to_wiki",
"data/tools/check_mixed_indent" ,
"data/tools/extractbindings",
"data/tools/imgcheck",
"data/tools/steam-changelog",
"data/tools/TeamColorizer",
"data/tools/tmx_trackplacer",
"data/tools/wesnoth_addon_manager",
"data/tools/wmlflip",
"data/tools/wmlindent",
"data/tools/wmllint",
"data/tools/wmllint-1.4",
"data/tools/wmlscope",
"data/tools/wmlunits",
"data/tools/wmlxgettext"
])
py_files.sort()
for p in py_files:
# In Windows, glob search yields paths with mixed separators (/\).
path = Path(p)
# Exclude any .git subdirectories.
if ".git" in path.parts:
continue
if re.search(domain_regexp, path.read_text(encoding='utf8')):
# Produce output with / only.
print(path.as_posix())

View file

@ -4,7 +4,7 @@ from subprocess import Popen, PIPE
import os, shutil
import re
from fnmatch import fnmatch
from os.path import join
from os.path import join, relpath
Import("env")
def remove_pot_cdate(path):
@ -26,19 +26,46 @@ textdomains = glob("wesnoth*")
po4a_domains = Split("wesnoth-manpages wesnoth-manual")
textdomains = [domain for domain in textdomains if os.path.isdir(domain) and os.path.exists(domain+"/"+domain+".pot")]
linguas = Split(open("LINGUAS").read())
intermediate_exts = {
"cpp": "C++",
"py": "Python",
}
if "pot-update" in COMMAND_LINE_TARGETS:
from collections import defaultdict
domain_sources = defaultdict(list)
domain_sources = {ext: defaultdict(list) for ext in intermediate_exts}
for root, dirs, files in os.walk("../src"):
for file in files:
if fnmatch(file, "*.[hc]pp"):
match = re.search('^#define\\s+GETTEXT_DOMAIN\\s+"wesnoth(-.*)"', Dir(root).File(file).get_contents().decode("utf-8"), re.MULTILINE)
match = re.search(r'^#define\s+GETTEXT_DOMAIN\s+"(wesnoth(?:-.*)?)"', Dir(root).File(file).get_contents().decode("utf-8"), re.MULTILINE)
if match:
source_domain = match.group(1)
else:
source_domain = ""
domain_sources[source_domain].append(Dir(root).File(file).path)
# Default to wesnoth textdomain
source_domain = "wesnoth"
domain_sources["cpp"][source_domain].append(Dir(root).File(file).path)
for root, dirs, files in os.walk("../data/tools"):
for file in files:
is_python = fnmatch(file, "*.py*")
file_content = None
if not is_python and not os.path.isdir(os.path.join(root, file)):
file_content = Dir(root).File(file).get_contents()
first_line = file_content.split(b"\n")[0]
is_python = first_line[0:3] == b'^#!' and b'python' in first_line
if is_python:
if file_content is None:
file_content = Dir(root).File(file).get_contents()
try:
match = re.search(r'^\s*_\s*=\s*gettext\.translation\(\s*([\'"])(wesnoth(?:-.*)?)\1', file_content.decode("utf-8"), re.MULTILINE)
if match:
source_domain = match.group(2)
domain_sources["py"][source_domain].append(Dir(root).File(file).path)
# In contrast to CPP, there is no default.
except UnicodeDecodeError as ex:
pass
finally:
file_content = None
for domain in textdomains:
pot = File(join(domain, domain + ".pot"))
@ -48,22 +75,28 @@ if "pot-update" in COMMAND_LINE_TARGETS:
if domain in po4a_domains:
continue
with open(join(domain, "POTFILES.in"), "w") as potfiles:
potfiles.writelines([line + "\n" for line in sorted(domain_sources[domain[7:]])])
sources = [Dir("#").File(x) for x in domain_sources[domain[7:]]]
if sources:
source_pot = env.Command(
join(domain, domain + ".cpp.pot"),
sources,
"""xgettext --force-po --default-domain=%s --directory=. --add-comments=TRANSLATORS: \
--from-code=UTF-8 --sort-by-file \
--keyword=_ --keyword=N_ --keyword=sgettext --keyword=vgettext --keyword=VGETTEXT \
--keyword=_n:1,2 --keyword=N_n:1,2 --keyword=sngettext:1,2 --keyword=vngettext:1,2 --keyword=VNGETTEXT:1,2 \
--files-from=%s --copyright-holder='Wesnoth development team' --msgid-bugs-address=https://bugs.wesnoth.org/ \
--output=$TARGET \
; sed -i s/charset=CHARSET/charset=UTF-8/ $TARGET \
""" % (domain, join("po", domain, "POTFILES.in"))
intermediate_files = []
for ext, language in intermediate_exts.items():
files_from = join("po", domain, "POTFILES_{}.in".format(ext.upper()))
with open(relpath(files_from, "po"), "w") as potfiles:
potfiles.writelines([line + "\n" for line in sorted(domain_sources[ext][domain])])
file_list = [Dir("#").File(x) for x in domain_sources[ext][domain]]
if file_list:
target_path = env.Command(
join(domain, "{}.{}.pot".format(domain, ext)),
file_list,
"""xgettext --force-po --default-domain={default_domain} --directory=. --add-comments=TRANSLATORS: \
--from-code=UTF-8 --language={language} --sort-by-file \
--keyword=_ --keyword=N_ --keyword=sgettext --keyword=vgettext --keyword=VGETTEXT \
--keyword=_n:1,2 --keyword=N_n:1,2 --keyword=sngettext:1,2 --keyword=vngettext:1,2 --keyword=VNGETTEXT:1,2 \
--files-from={files_from} --copyright-holder='Wesnoth development team' --msgid-bugs-address=https://bugs.wesnoth.org/ \
--output=$TARGET \
; sed -i s/charset=CHARSET/charset=UTF-8/ $TARGET \
""".format(default_domain=domain, language=language, files_from=files_from)
)
intermediate_files.append(target_path)
# wml/lua
cfgs = []
FINDCFG = join(domain, "FINDCFG")
if os.path.exists(FINDCFG):
@ -73,22 +106,17 @@ if "pot-update" in COMMAND_LINE_TARGETS:
wml_pot = env.Command(
join(domain, domain + ".wml.pot"),
cfgs,
"data/tools/wmlxgettext --directory=. --domain=%s $SOURCES -o $TARGET" % domain
"data/tools/wmlxgettext --force-po --directory=. --domain=%s $SOURCES -o $TARGET" % domain
)
intermediate_files.append(wml_pot)
new_pot = str(pot) + ".new"
if cfgs and sources:
env.Command(new_pot, [source_pot, wml_pot],
env.Command(new_pot, intermediate_files,
[
"msgcat --sort-by-file $SOURCES -o $TARGET",
Delete(wml_pot),
Delete(source_pot)
]
] + list(map(Delete, intermediate_files))
)
elif cfgs:
env.Command(new_pot, wml_pot, ["msgcat --sort-by-file $SOURCES -o $TARGET", Delete(wml_pot)])
else:
env.Command(new_pot, source_pot, ["msgcat --sort-by-file $SOURCES -o $TARGET", Delete(source_pot)])
env.Command(pot, new_pot, Action(update_pot))
env.Alias("pot-update", "../translations", "python3 utils/po_stat.py --update-cfg --textdomains=wesnoth,wesnoth-editor,wesnoth-help,wesnoth-lib,wesnoth-multiplayer,wesnoth-tutorial,wesnoth-units")