فهرست منبع

LibUnicode: Parse UCD DerivedBinaryProperties.txt and generate property

Timothy Flynn 3 سال پیش
والد
کامیت
6f2640d031

+ 1 - 1
Userland/Libraries/LibUnicode/CharacterTypes.cpp

@@ -262,7 +262,7 @@ bool is_ecma262_property([[maybe_unused]] Property property)
     case Unicode::Property::Any:
     case Unicode::Property::Assigned:
     case Unicode::Property::Bidi_Control:
-    // case Unicode::Property::Bidi_Mirrored:
+    case Unicode::Property::Bidi_Mirrored:
     case Unicode::Property::Case_Ignorable:
     case Unicode::Property::Cased:
     case Unicode::Property::Changes_When_Casefolded:

+ 4 - 0
Userland/Libraries/LibUnicode/CodeGenerators/GenerateUnicodeData.cpp

@@ -823,6 +823,7 @@ int main(int argc, char** argv)
     char const* special_casing_path = nullptr;
     char const* prop_list_path = nullptr;
     char const* derived_core_prop_path = nullptr;
+    char const* derived_binary_prop_path = nullptr;
     char const* prop_alias_path = nullptr;
     char const* prop_value_alias_path = nullptr;
     char const* scripts_path = nullptr;
@@ -837,6 +838,7 @@ int main(int argc, char** argv)
     args_parser.add_option(special_casing_path, "Path to SpecialCasing.txt file", "special-casing-path", 's', "special-casing-path");
     args_parser.add_option(prop_list_path, "Path to PropList.txt file", "prop-list-path", 'p', "prop-list-path");
     args_parser.add_option(derived_core_prop_path, "Path to DerivedCoreProperties.txt file", "derived-core-prop-path", 'd', "derived-core-prop-path");
+    args_parser.add_option(derived_binary_prop_path, "Path to DerivedBinaryProperties.txt file", "derived-binary-prop-path", 'b', "derived-binary-prop-path");
     args_parser.add_option(prop_alias_path, "Path to PropertyAliases.txt file", "prop-alias-path", 'a', "prop-alias-path");
     args_parser.add_option(prop_value_alias_path, "Path to PropertyValueAliases.txt file", "prop-value-alias-path", 'v', "prop-value-alias-path");
     args_parser.add_option(scripts_path, "Path to Scripts.txt file", "scripts-path", 'r', "scripts-path");
@@ -867,6 +869,7 @@ int main(int argc, char** argv)
     auto special_casing_file = open_file(special_casing_path, "-s/--special-casing-path");
     auto prop_list_file = open_file(prop_list_path, "-p/--prop-list-path");
     auto derived_core_prop_file = open_file(derived_core_prop_path, "-d/--derived-core-prop-path");
+    auto derived_binary_prop_file = open_file(derived_binary_prop_path, "-b/--derived-binary-prop-path");
     auto prop_alias_file = open_file(prop_alias_path, "-a/--prop-alias-path");
     auto prop_value_alias_file = open_file(prop_value_alias_path, "-v/--prop-value-alias-path");
     auto scripts_file = open_file(scripts_path, "-r/--scripts-path");
@@ -878,6 +881,7 @@ int main(int argc, char** argv)
     parse_special_casing(special_casing_file, unicode_data);
     parse_prop_list(prop_list_file, unicode_data.prop_list);
     parse_prop_list(derived_core_prop_file, unicode_data.prop_list);
+    parse_prop_list(derived_binary_prop_file, unicode_data.prop_list);
     parse_prop_list(emoji_data_file, unicode_data.prop_list);
     parse_alias_list(prop_alias_file, unicode_data.prop_list, unicode_data.prop_aliases);
     parse_prop_list(scripts_file, unicode_data.script_list);

+ 9 - 2
Userland/Libraries/LibUnicode/unicode_data.cmake

@@ -12,6 +12,9 @@ set(PROP_LIST_PATH ${CMAKE_BINARY_DIR}/UCD/PropList.txt)
 set(DERIVED_CORE_PROP_URL https://www.unicode.org/Public/13.0.0/ucd/DerivedCoreProperties.txt)
 set(DERIVED_CORE_PROP_PATH ${CMAKE_BINARY_DIR}/UCD/DerivedCoreProperties.txt)
 
+set(DERIVED_BINARY_PROP_URL https://www.unicode.org/Public/13.0.0/ucd/extracted/DerivedBinaryProperties.txt)
+set(DERIVED_BINARY_PROP_PATH ${CMAKE_BINARY_DIR}/UCD/DerivedBinaryProperties.txt)
+
 set(PROP_ALIAS_URL https://www.unicode.org/Public/13.0.0/ucd/PropertyAliases.txt)
 set(PROP_ALIAS_PATH ${CMAKE_BINARY_DIR}/UCD/PropertyAliases.txt)
 
@@ -47,6 +50,10 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
         message(STATUS "Downloading UCD DerivedCoreProperties.txt from ${DERIVED_CORE_PROP_URL}...")
         file(DOWNLOAD ${DERIVED_CORE_PROP_URL} ${DERIVED_CORE_PROP_PATH} INACTIVITY_TIMEOUT 10)
     endif()
+    if (NOT EXISTS ${DERIVED_BINARY_PROP_PATH})
+        message(STATUS "Downloading UCD DerivedBinaryProperties.txt from ${DERIVED_BINARY_PROP_URL}...")
+        file(DOWNLOAD ${DERIVED_BINARY_PROP_URL} ${DERIVED_BINARY_PROP_PATH} INACTIVITY_TIMEOUT 10)
+    endif()
     if (NOT EXISTS ${PROP_ALIAS_PATH})
         message(STATUS "Downloading UCD PropertyAliases.txt from ${PROP_ALIAS_URL}...")
         file(DOWNLOAD ${PROP_ALIAS_URL} ${PROP_ALIAS_PATH} INACTIVITY_TIMEOUT 10)
@@ -82,9 +89,9 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
 
     add_custom_command(
         OUTPUT ${UNICODE_DATA_HEADER} ${UNICODE_DATA_IMPLEMENTATION}
-        COMMAND $<TARGET_FILE:GenerateUnicodeData> -h ${UNICODE_DATA_HEADER} -c ${UNICODE_DATA_IMPLEMENTATION} -u ${UNICODE_DATA_PATH} -s ${SPECIAL_CASING_PATH} -p ${PROP_LIST_PATH} -d ${DERIVED_CORE_PROP_PATH} -a ${PROP_ALIAS_PATH} -v ${PROP_VALUE_ALIAS_PATH} -r ${SCRIPTS_PATH} -x ${SCRIPT_EXTENSIONS_PATH} -w ${WORD_BREAK_PATH} -e ${EMOJI_DATA_PATH}
+        COMMAND $<TARGET_FILE:GenerateUnicodeData> -h ${UNICODE_DATA_HEADER} -c ${UNICODE_DATA_IMPLEMENTATION} -u ${UNICODE_DATA_PATH} -s ${SPECIAL_CASING_PATH} -p ${PROP_LIST_PATH} -d ${DERIVED_CORE_PROP_PATH} -b ${DERIVED_BINARY_PROP_PATH} -a ${PROP_ALIAS_PATH} -v ${PROP_VALUE_ALIAS_PATH} -r ${SCRIPTS_PATH} -x ${SCRIPT_EXTENSIONS_PATH} -w ${WORD_BREAK_PATH} -e ${EMOJI_DATA_PATH}
         VERBATIM
-        DEPENDS GenerateUnicodeData ${UNICODE_DATA_PATH} ${SPECIAL_CASING_PATH} ${PROP_LIST_PATH} ${DERIVED_CORE_PROP_PATH} ${PROP_ALIAS_PATH} ${PROP_VALUE_ALIAS_PATH} ${SCRIPTS_PATH} ${SCRIPT_EXTENSIONS_PATH} ${WORD_BREAK_PATH} ${EMOJI_DATA_PATH}
+        DEPENDS GenerateUnicodeData ${UNICODE_DATA_PATH} ${SPECIAL_CASING_PATH} ${PROP_LIST_PATH} ${DERIVED_CORE_PROP_PATH} ${DERIVED_BINARY_PROP_PATH} ${PROP_ALIAS_PATH} ${PROP_VALUE_ALIAS_PATH} ${SCRIPTS_PATH} ${SCRIPT_EXTENSIONS_PATH} ${WORD_BREAK_PATH} ${EMOJI_DATA_PATH}
     )
 
     set(UNICODE_DATA_SOURCES ${UNICODE_DATA_HEADER} ${UNICODE_DATA_IMPLEMENTATION})