LibUnicode: Replace individual UCD file downloads with single UCD.zip
Instead of downloading nearly 20 files individually, we can download a single .zip file similar to how we download a single CLDR .zip. This is to reduce the number of connections/downloads to/from unicode.org.
This commit is contained in:
parent
8545e2dec0
commit
8a46794ff8
Notes:
sideshowbarker
2024-07-17 14:19:06 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/8a46794ff8 Pull-request: https://github.com/SerenityOS/serenity/pull/13545 Reviewed-by: https://github.com/bgianfo ✅
1 changed files with 67 additions and 63 deletions
|
@ -13,56 +13,59 @@ set(CLDR_PATH "${CMAKE_BINARY_DIR}/CLDR" CACHE PATH "Download location for CLDR
|
|||
set(UCD_VERSION_FILE "${UCD_PATH}/version.txt")
|
||||
set(CLDR_VERSION_FILE "${CLDR_PATH}/version.txt")
|
||||
|
||||
set(UNICODE_DATA_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/UnicodeData.txt")
|
||||
set(UNICODE_DATA_PATH "${UCD_PATH}/UnicodeData.txt")
|
||||
set(UCD_ZIP_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/UCD.zip")
|
||||
set(UCD_ZIP_PATH "${UCD_PATH}/UCD.zip")
|
||||
|
||||
set(SPECIAL_CASING_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/SpecialCasing.txt")
|
||||
set(SPECIAL_CASING_PATH "${UCD_PATH}/SpecialCasing.txt")
|
||||
set(UNICODE_DATA_SOURCE "UnicodeData.txt")
|
||||
set(UNICODE_DATA_PATH "${UCD_PATH}/${UNICODE_DATA_SOURCE}")
|
||||
|
||||
set(DERIVED_GENERAL_CATEGORY_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/extracted/DerivedGeneralCategory.txt")
|
||||
set(DERIVED_GENERAL_CATEGORY_PATH "${UCD_PATH}/DerivedGeneralCategory.txt")
|
||||
set(SPECIAL_CASING_SOURCE "SpecialCasing.txt")
|
||||
set(SPECIAL_CASING_PATH "${UCD_PATH}/${SPECIAL_CASING_SOURCE}")
|
||||
|
||||
set(PROP_LIST_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/PropList.txt")
|
||||
set(PROP_LIST_PATH "${UCD_PATH}/PropList.txt")
|
||||
set(DERIVED_GENERAL_CATEGORY_SOURCE "extracted/DerivedGeneralCategory.txt")
|
||||
set(DERIVED_GENERAL_CATEGORY_PATH "${UCD_PATH}/${DERIVED_GENERAL_CATEGORY_SOURCE}")
|
||||
|
||||
set(DERIVED_CORE_PROP_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/DerivedCoreProperties.txt")
|
||||
set(DERIVED_CORE_PROP_PATH "${UCD_PATH}/DerivedCoreProperties.txt")
|
||||
set(PROP_LIST_SOURCE "PropList.txt")
|
||||
set(PROP_LIST_PATH "${UCD_PATH}/${PROP_LIST_SOURCE}")
|
||||
|
||||
set(DERIVED_BINARY_PROP_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/extracted/DerivedBinaryProperties.txt")
|
||||
set(DERIVED_BINARY_PROP_PATH "${UCD_PATH}/DerivedBinaryProperties.txt")
|
||||
set(DERIVED_CORE_PROP_SOURCE "DerivedCoreProperties.txt")
|
||||
set(DERIVED_CORE_PROP_PATH "${UCD_PATH}/${DERIVED_CORE_PROP_SOURCE}")
|
||||
|
||||
set(PROP_ALIAS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/PropertyAliases.txt")
|
||||
set(PROP_ALIAS_PATH "${UCD_PATH}/PropertyAliases.txt")
|
||||
set(DERIVED_BINARY_PROP_SOURCE "extracted/DerivedBinaryProperties.txt")
|
||||
set(DERIVED_BINARY_PROP_PATH "${UCD_PATH}/${DERIVED_BINARY_PROP_SOURCE}")
|
||||
|
||||
set(PROP_VALUE_ALIAS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/PropertyValueAliases.txt")
|
||||
set(PROP_VALUE_ALIAS_PATH "${UCD_PATH}/PropertyValueAliases.txt")
|
||||
set(PROP_ALIAS_SOURCE "PropertyAliases.txt")
|
||||
set(PROP_ALIAS_PATH "${UCD_PATH}/${PROP_ALIAS_SOURCE}")
|
||||
|
||||
set(NAME_ALIAS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/NameAliases.txt")
|
||||
set(NAME_ALIAS_PATH "${UCD_PATH}/NameAliases.txt")
|
||||
set(PROP_VALUE_ALIAS_SOURCE "PropertyValueAliases.txt")
|
||||
set(PROP_VALUE_ALIAS_PATH "${UCD_PATH}/${PROP_VALUE_ALIAS_SOURCE}")
|
||||
|
||||
set(SCRIPTS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/Scripts.txt")
|
||||
set(SCRIPTS_PATH "${UCD_PATH}/Scripts.txt")
|
||||
set(NAME_ALIAS_SOURCE "NameAliases.txt")
|
||||
set(NAME_ALIAS_PATH "${UCD_PATH}/${NAME_ALIAS_SOURCE}")
|
||||
|
||||
set(SCRIPT_EXTENSIONS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/ScriptExtensions.txt")
|
||||
set(SCRIPT_EXTENSIONS_PATH "${UCD_PATH}/ScriptExtensions.txt")
|
||||
set(SCRIPTS_SOURCE "Scripts.txt")
|
||||
set(SCRIPTS_PATH "${UCD_PATH}/${SCRIPTS_SOURCE}")
|
||||
|
||||
set(BLOCKS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/Blocks.txt")
|
||||
set(BLOCKS_PATH "${UCD_PATH}/Blocks.txt")
|
||||
set(SCRIPT_EXTENSIONS_SOURCE "ScriptExtensions.txt")
|
||||
set(SCRIPT_EXTENSIONS_PATH "${UCD_PATH}/${SCRIPT_EXTENSIONS_SOURCE}")
|
||||
|
||||
set(EMOJI_DATA_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/emoji/emoji-data.txt")
|
||||
set(EMOJI_DATA_PATH "${UCD_PATH}/emoji-data.txt")
|
||||
set(BLOCKS_SOURCE "Blocks.txt")
|
||||
set(BLOCKS_PATH "${UCD_PATH}/${BLOCKS_SOURCE}")
|
||||
|
||||
set(NORM_PROPS_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/DerivedNormalizationProps.txt")
|
||||
set(NORM_PROPS_PATH "${UCD_PATH}/DerivedNormalizationProps.txt")
|
||||
set(EMOJI_DATA_SOURCE "emoji/emoji-data.txt")
|
||||
set(EMOJI_DATA_PATH "${UCD_PATH}/${EMOJI_DATA_SOURCE}")
|
||||
|
||||
set(GRAPHEME_BREAK_PROP_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/auxiliary/GraphemeBreakProperty.txt")
|
||||
set(GRAPHEME_BREAK_PROP_PATH "${UCD_PATH}/GraphemeBreakProperty.txt")
|
||||
set(NORM_PROPS_SOURCE "DerivedNormalizationProps.txt")
|
||||
set(NORM_PROPS_PATH "${UCD_PATH}/${NORM_PROPS_SOURCE}")
|
||||
|
||||
set(WORD_BREAK_PROP_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/auxiliary/WordBreakProperty.txt")
|
||||
set(WORD_BREAK_PROP_PATH "${UCD_PATH}/WordBreakProperty.txt")
|
||||
set(GRAPHEME_BREAK_PROP_SOURCE "auxiliary/GraphemeBreakProperty.txt")
|
||||
set(GRAPHEME_BREAK_PROP_PATH "${UCD_PATH}/${GRAPHEME_BREAK_PROP_SOURCE}")
|
||||
|
||||
set(SENTENCE_BREAK_PROP_URL "https://www.unicode.org/Public/${UCD_VERSION}/ucd/auxiliary/SentenceBreakProperty.txt")
|
||||
set(SENTENCE_BREAK_PROP_PATH "${UCD_PATH}/SentenceBreakProperty.txt")
|
||||
set(WORD_BREAK_PROP_SOURCE "auxiliary/WordBreakProperty.txt")
|
||||
set(WORD_BREAK_PROP_PATH "${UCD_PATH}/${WORD_BREAK_PROP_SOURCE}")
|
||||
|
||||
set(SENTENCE_BREAK_PROP_SOURCE "auxiliary/SentenceBreakProperty.txt")
|
||||
set(SENTENCE_BREAK_PROP_PATH "${UCD_PATH}/${SENTENCE_BREAK_PROP_SOURCE}")
|
||||
|
||||
set(CLDR_ZIP_URL "https://github.com/unicode-org/cldr-json/releases/download/${CLDR_REAL_VERSION}/cldr-${CLDR_REAL_VERSION}-json-modern.zip")
|
||||
set(CLDR_ZIP_PATH "${CLDR_PATH}/cldr.zip")
|
||||
|
@ -88,12 +91,12 @@ set(CLDR_NUMBERS_PATH "${CLDR_PATH}/${CLDR_NUMBERS_SOURCE}")
|
|||
set(CLDR_UNITS_SOURCE cldr-units-modern)
|
||||
set(CLDR_UNITS_PATH "${CLDR_PATH}/${CLDR_UNITS_SOURCE}")
|
||||
|
||||
function(extract_cldr_file source path)
|
||||
if(EXISTS "${CLDR_ZIP_PATH}" AND NOT EXISTS "${path}")
|
||||
message(STATUS "Extracting CLDR ${source} from ${CLDR_ZIP_PATH}...")
|
||||
execute_process(COMMAND "${UNZIP_TOOL}" -q "${CLDR_ZIP_PATH}" "${source}/**" -d "${CLDR_PATH}" RESULT_VARIABLE unzip_result)
|
||||
function(extract_path dest_dir zip_path source_path dest_path)
|
||||
if (EXISTS "${zip_path}" AND NOT EXISTS "${dest_path}")
|
||||
message(STATUS "Extracting ${source_path} from ${zip_path}")
|
||||
execute_process(COMMAND "${UNZIP_TOOL}" -q "${zip_path}" "${source_path}" -d "${dest_dir}" RESULT_VARIABLE unzip_result)
|
||||
if (NOT unzip_result EQUAL 0)
|
||||
message(FATAL_ERROR "Failed to unzip ${source} from ${CLDR_ZIP_PATH} with status ${unzip_result}")
|
||||
message(FATAL_ERROR "Failed to unzip ${source_path} from ${zip_path} with status ${unzip_result}")
|
||||
endif()
|
||||
endif()
|
||||
endfunction()
|
||||
|
@ -102,32 +105,33 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
|
|||
remove_path_if_version_changed("${UCD_VERSION}" "${UCD_VERSION_FILE}" "${UCD_PATH}")
|
||||
remove_path_if_version_changed("${CLDR_VERSION}" "${CLDR_VERSION_FILE}" "${CLDR_PATH}")
|
||||
|
||||
download_file("${UNICODE_DATA_URL}" "${UNICODE_DATA_PATH}")
|
||||
download_file("${SPECIAL_CASING_URL}" "${SPECIAL_CASING_PATH}")
|
||||
download_file("${DERIVED_GENERAL_CATEGORY_URL}" "${DERIVED_GENERAL_CATEGORY_PATH}")
|
||||
download_file("${PROP_LIST_URL}" "${PROP_LIST_PATH}")
|
||||
download_file("${DERIVED_CORE_PROP_URL}" "${DERIVED_CORE_PROP_PATH}")
|
||||
download_file("${DERIVED_BINARY_PROP_URL}" "${DERIVED_BINARY_PROP_PATH}")
|
||||
download_file("${PROP_ALIAS_URL}" "${PROP_ALIAS_PATH}")
|
||||
download_file("${PROP_VALUE_ALIAS_URL}" "${PROP_VALUE_ALIAS_PATH}")
|
||||
download_file("${NAME_ALIAS_URL}" "${NAME_ALIAS_PATH}")
|
||||
download_file("${SCRIPTS_URL}" "${SCRIPTS_PATH}")
|
||||
download_file("${SCRIPT_EXTENSIONS_URL}" "${SCRIPT_EXTENSIONS_PATH}")
|
||||
download_file("${BLOCKS_URL}" "${BLOCKS_PATH}")
|
||||
download_file("${EMOJI_DATA_URL}" "${EMOJI_DATA_PATH}")
|
||||
download_file("${NORM_PROPS_URL}" "${NORM_PROPS_PATH}")
|
||||
download_file("${GRAPHEME_BREAK_PROP_URL}" "${GRAPHEME_BREAK_PROP_PATH}")
|
||||
download_file("${WORD_BREAK_PROP_URL}" "${WORD_BREAK_PROP_PATH}")
|
||||
download_file("${SENTENCE_BREAK_PROP_URL}" "${SENTENCE_BREAK_PROP_PATH}")
|
||||
download_file("${UCD_ZIP_URL}" "${UCD_ZIP_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${UNICODE_DATA_SOURCE}" "${UNICODE_DATA_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SPECIAL_CASING_SOURCE}" "${SPECIAL_CASING_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${DERIVED_GENERAL_CATEGORY_SOURCE}" "${DERIVED_GENERAL_CATEGORY_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${PROP_LIST_SOURCE}" "${PROP_LIST_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${DERIVED_CORE_PROP_SOURCE}" "${DERIVED_CORE_PROP_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${DERIVED_BINARY_PROP_SOURCE}" "${DERIVED_BINARY_PROP_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${PROP_ALIAS_SOURCE}" "${PROP_ALIAS_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${PROP_VALUE_ALIAS_SOURCE}" "${PROP_VALUE_ALIAS_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${NAME_ALIAS_SOURCE}" "${NAME_ALIAS_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SCRIPTS_SOURCE}" "${SCRIPTS_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SCRIPT_EXTENSIONS_SOURCE}" "${SCRIPT_EXTENSIONS_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${BLOCKS_SOURCE}" "${BLOCKS_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${EMOJI_DATA_SOURCE}" "${EMOJI_DATA_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${NORM_PROPS_SOURCE}" "${NORM_PROPS_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${GRAPHEME_BREAK_PROP_SOURCE}" "${GRAPHEME_BREAK_PROP_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${WORD_BREAK_PROP_SOURCE}" "${WORD_BREAK_PROP_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SENTENCE_BREAK_PROP_SOURCE}" "${SENTENCE_BREAK_PROP_PATH}")
|
||||
|
||||
download_file("${CLDR_ZIP_URL}" "${CLDR_ZIP_PATH}")
|
||||
extract_cldr_file("${CLDR_BCP47_SOURCE}" "${CLDR_BCP47_PATH}")
|
||||
extract_cldr_file("${CLDR_CORE_SOURCE}" "${CLDR_CORE_PATH}")
|
||||
extract_cldr_file("${CLDR_DATES_SOURCE}" "${CLDR_DATES_PATH}")
|
||||
extract_cldr_file("${CLDR_LOCALES_SOURCE}" "${CLDR_LOCALES_PATH}")
|
||||
extract_cldr_file("${CLDR_MISC_SOURCE}" "${CLDR_MISC_PATH}")
|
||||
extract_cldr_file("${CLDR_NUMBERS_SOURCE}" "${CLDR_NUMBERS_PATH}")
|
||||
extract_cldr_file("${CLDR_UNITS_SOURCE}" "${CLDR_UNITS_PATH}")
|
||||
extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_BCP47_SOURCE}/**" "${CLDR_BCP47_PATH}")
|
||||
extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_CORE_SOURCE}/**" "${CLDR_CORE_PATH}")
|
||||
extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_DATES_SOURCE}/**" "${CLDR_DATES_PATH}")
|
||||
extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_LOCALES_SOURCE}/**" "${CLDR_LOCALES_PATH}")
|
||||
extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_MISC_SOURCE}/**" "${CLDR_MISC_PATH}")
|
||||
extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_NUMBERS_SOURCE}/**" "${CLDR_NUMBERS_PATH}")
|
||||
extract_path("${CLDR_PATH}" "${CLDR_ZIP_PATH}" "${CLDR_UNITS_SOURCE}/**" "${CLDR_UNITS_PATH}")
|
||||
|
||||
set(UNICODE_DATA_HEADER LibUnicode/UnicodeData.h)
|
||||
set(UNICODE_DATA_IMPLEMENTATION LibUnicode/UnicodeData.cpp)
|
||||
|
|
Loading…
Add table
Reference in a new issue