mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-21 23:20:20 +00:00
LibUnicode: Replace code point general categories with ICU
This commit is contained in:
parent
c804bda5fd
commit
986ff984cc
Notes:
sideshowbarker
2024-07-17 16:23:06 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/LadybirdBrowser/ladybird/commit/986ff984cc Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/239
9 changed files with 90 additions and 68 deletions
|
@ -13,9 +13,6 @@ set(UCD_ZIP_PATH "${UCD_PATH}/UCD.zip")
|
|||
set(UNICODE_DATA_SOURCE "UnicodeData.txt")
|
||||
set(UNICODE_DATA_PATH "${UCD_PATH}/${UNICODE_DATA_SOURCE}")
|
||||
|
||||
set(DERIVED_GENERAL_CATEGORY_SOURCE "extracted/DerivedGeneralCategory.txt")
|
||||
set(DERIVED_GENERAL_CATEGORY_PATH "${UCD_PATH}/${DERIVED_GENERAL_CATEGORY_SOURCE}")
|
||||
|
||||
set(PROP_VALUE_ALIAS_SOURCE "PropertyValueAliases.txt")
|
||||
set(PROP_VALUE_ALIAS_PATH "${UCD_PATH}/${PROP_VALUE_ALIAS_SOURCE}")
|
||||
|
||||
|
@ -39,7 +36,6 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
|
|||
if (ENABLE_NETWORK_DOWNLOADS)
|
||||
download_file("${UCD_ZIP_URL}" "${UCD_ZIP_PATH}" SHA256 "${UCD_SHA256}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${UNICODE_DATA_SOURCE}" "${UNICODE_DATA_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${DERIVED_GENERAL_CATEGORY_SOURCE}" "${DERIVED_GENERAL_CATEGORY_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${PROP_VALUE_ALIAS_SOURCE}" "${PROP_VALUE_ALIAS_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SCRIPTS_SOURCE}" "${SCRIPTS_PATH}")
|
||||
extract_path("${UCD_PATH}" "${UCD_ZIP_PATH}" "${SCRIPT_EXTENSIONS_SOURCE}" "${SCRIPT_EXTENSIONS_PATH}")
|
||||
|
@ -67,7 +63,7 @@ if (ENABLE_UNICODE_DATABASE_DOWNLOAD)
|
|||
"${UCD_VERSION_FILE}"
|
||||
"${UNICODE_DATA_HEADER}"
|
||||
"${UNICODE_DATA_IMPLEMENTATION}"
|
||||
arguments -u "${UNICODE_DATA_PATH}" -g "${DERIVED_GENERAL_CATEGORY_PATH}" -v "${PROP_VALUE_ALIAS_PATH}" -r "${SCRIPTS_PATH}" -x "${SCRIPT_EXTENSIONS_PATH}"
|
||||
arguments -u "${UNICODE_DATA_PATH}" -v "${PROP_VALUE_ALIAS_PATH}" -r "${SCRIPTS_PATH}" -x "${SCRIPT_EXTENSIONS_PATH}"
|
||||
)
|
||||
invoke_generator(
|
||||
"EmojiData"
|
||||
|
|
|
@ -60,17 +60,12 @@ struct CodePointBidiClass {
|
|||
struct UnicodeData {
|
||||
Vector<CodePointData> code_point_data;
|
||||
|
||||
// https://www.unicode.org/reports/tr44/#General_Category_Values
|
||||
PropList general_categories;
|
||||
Vector<Alias> general_category_aliases;
|
||||
|
||||
PropList script_list {
|
||||
{ "Unknown"sv, {} },
|
||||
};
|
||||
Vector<Alias> script_aliases;
|
||||
PropList script_extensions;
|
||||
|
||||
CodePointTables<PropertyTable> general_category_tables;
|
||||
CodePointTables<PropertyTable> script_tables;
|
||||
CodePointTables<PropertyTable> script_extension_tables;
|
||||
|
||||
|
@ -290,7 +285,6 @@ enum class @name@ : @underlying@ {)~~~");
|
|||
namespace Unicode {
|
||||
)~~~");
|
||||
|
||||
generate_enum("GeneralCategory"sv, {}, unicode_data.general_categories.keys(), unicode_data.general_category_aliases);
|
||||
generate_enum("Script"sv, {}, unicode_data.script_list.keys(), unicode_data.script_aliases);
|
||||
generate_enum("BidirectionalClass"sv, {}, unicode_data.bidirectional_classes.values());
|
||||
|
||||
|
@ -405,7 +399,6 @@ static constexpr Array<@type@, @size@> @name@ { {
|
|||
return {};
|
||||
};
|
||||
|
||||
TRY(append_code_point_tables("s_general_categories"sv, unicode_data.general_category_tables, append_property_table));
|
||||
TRY(append_code_point_tables("s_scripts"sv, unicode_data.script_tables, append_property_table));
|
||||
TRY(append_code_point_tables("s_script_extensions"sv, unicode_data.script_extension_tables, append_property_table));
|
||||
|
||||
|
@ -489,9 +482,6 @@ bool code_point_has_@enum_snake@(u32 code_point, @enum_title@ @enum_snake@)
|
|||
return {};
|
||||
};
|
||||
|
||||
TRY(append_prop_search("GeneralCategory"sv, "general_category"sv, "s_general_categories"sv));
|
||||
TRY(append_from_string("GeneralCategory"sv, "general_category"sv, unicode_data.general_categories, unicode_data.general_category_aliases));
|
||||
|
||||
TRY(append_prop_search("Script"sv, "script"sv, "s_scripts"sv));
|
||||
TRY(append_prop_search("Script"sv, "script_extension"sv, "s_script_extensions"sv));
|
||||
TRY(append_from_string("Script"sv, "script"sv, unicode_data.script_list, unicode_data.script_aliases));
|
||||
|
@ -561,29 +551,6 @@ static void sort_and_merge_code_point_ranges(Vector<Unicode::CodePointRange>& co
|
|||
code_points = form_code_point_ranges(all_code_points);
|
||||
}
|
||||
|
||||
static void populate_general_category_unions(PropList& general_categories)
|
||||
{
|
||||
// The Unicode standard defines General Category values which are not in any UCD file. These
|
||||
// values are simply unions of other values.
|
||||
// https://www.unicode.org/reports/tr44/#GC_Values_Table
|
||||
auto populate_union = [&](auto alias, auto categories) {
|
||||
auto& code_points = general_categories.ensure(alias);
|
||||
for (auto const& category : categories)
|
||||
code_points.extend(general_categories.find(category)->value);
|
||||
|
||||
sort_and_merge_code_point_ranges(code_points);
|
||||
};
|
||||
|
||||
populate_union("LC"sv, Array { "Ll"sv, "Lu"sv, "Lt"sv });
|
||||
populate_union("L"sv, Array { "Lu"sv, "Ll"sv, "Lt"sv, "Lm"sv, "Lo"sv });
|
||||
populate_union("M"sv, Array { "Mn"sv, "Mc"sv, "Me"sv });
|
||||
populate_union("N"sv, Array { "Nd"sv, "Nl"sv, "No"sv });
|
||||
populate_union("P"sv, Array { "Pc"sv, "Pd"sv, "Ps"sv, "Pe"sv, "Pi"sv, "Pf"sv, "Po"sv });
|
||||
populate_union("S"sv, Array { "Sm"sv, "Sc"sv, "Sk"sv, "So"sv });
|
||||
populate_union("Z"sv, Array { "Zs"sv, "Zl"sv, "Zp"sv });
|
||||
populate_union("C"sv, Array { "Cc"sv, "Cf"sv, "Cs"sv, "Co"sv, "Cn"sv });
|
||||
}
|
||||
|
||||
static ErrorOr<void> normalize_script_extensions(PropList& script_extensions, PropList const& script_list, Vector<Alias> const& script_aliases)
|
||||
{
|
||||
// The ScriptExtensions UCD file lays out its code point ranges rather uniquely compared to
|
||||
|
@ -755,12 +722,10 @@ static ErrorOr<void> create_code_point_tables(UnicodeData& unicode_data)
|
|||
return {};
|
||||
};
|
||||
|
||||
auto general_category_metadata = TRY(PropertyMetadata::create(unicode_data.general_categories));
|
||||
auto script_metadata = TRY(PropertyMetadata::create(unicode_data.script_list));
|
||||
auto script_extension_metadata = TRY(PropertyMetadata::create(unicode_data.script_extensions));
|
||||
|
||||
for (u32 code_point = 0; code_point <= MAX_CODE_POINT; ++code_point) {
|
||||
TRY(update_property_tables(code_point, unicode_data.general_category_tables, general_category_metadata));
|
||||
TRY(update_property_tables(code_point, unicode_data.script_tables, script_metadata));
|
||||
TRY(update_property_tables(code_point, unicode_data.script_extension_tables, script_extension_metadata));
|
||||
}
|
||||
|
@ -773,7 +738,6 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
|
|||
StringView generated_header_path;
|
||||
StringView generated_implementation_path;
|
||||
StringView unicode_data_path;
|
||||
StringView derived_general_category_path;
|
||||
StringView prop_value_alias_path;
|
||||
StringView scripts_path;
|
||||
StringView script_extensions_path;
|
||||
|
@ -782,7 +746,6 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
|
|||
args_parser.add_option(generated_header_path, "Path to the Unicode Data header file to generate", "generated-header-path", 'h', "generated-header-path");
|
||||
args_parser.add_option(generated_implementation_path, "Path to the Unicode Data implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
|
||||
args_parser.add_option(unicode_data_path, "Path to UnicodeData.txt file", "unicode-data-path", 'u', "unicode-data-path");
|
||||
args_parser.add_option(derived_general_category_path, "Path to DerivedGeneralCategory.txt file", "derived-general-category-path", 'g', "derived-general-category-path");
|
||||
args_parser.add_option(prop_value_alias_path, "Path to PropertyValueAliases.txt file", "prop-value-alias-path", 'v', "prop-value-alias-path");
|
||||
args_parser.add_option(scripts_path, "Path to Scripts.txt file", "scripts-path", 'r', "scripts-path");
|
||||
args_parser.add_option(script_extensions_path, "Path to ScriptExtensions.txt file", "script-extensions-path", 'x', "script-extensions-path");
|
||||
|
@ -791,19 +754,15 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
|
|||
auto generated_header_file = TRY(open_file(generated_header_path, Core::File::OpenMode::Write));
|
||||
auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::File::OpenMode::Write));
|
||||
auto unicode_data_file = TRY(open_file(unicode_data_path, Core::File::OpenMode::Read));
|
||||
auto derived_general_category_file = TRY(open_file(derived_general_category_path, Core::File::OpenMode::Read));
|
||||
auto prop_value_alias_file = TRY(open_file(prop_value_alias_path, Core::File::OpenMode::Read));
|
||||
auto scripts_file = TRY(open_file(scripts_path, Core::File::OpenMode::Read));
|
||||
auto script_extensions_file = TRY(open_file(script_extensions_path, Core::File::OpenMode::Read));
|
||||
|
||||
UnicodeData unicode_data {};
|
||||
TRY(parse_prop_list(*derived_general_category_file, unicode_data.general_categories));
|
||||
TRY(parse_prop_list(*scripts_file, unicode_data.script_list));
|
||||
TRY(parse_prop_list(*script_extensions_file, unicode_data.script_extensions, true));
|
||||
|
||||
populate_general_category_unions(unicode_data.general_categories);
|
||||
TRY(parse_unicode_data(*unicode_data_file, unicode_data));
|
||||
TRY(parse_value_alias_list(*prop_value_alias_file, "gc"sv, unicode_data.general_categories.keys(), unicode_data.general_category_aliases));
|
||||
TRY(parse_value_alias_list(*prop_value_alias_file, "sc"sv, unicode_data.script_list.keys(), unicode_data.script_aliases, false));
|
||||
TRY(normalize_script_extensions(unicode_data.script_extensions, unicode_data.script_list, unicode_data.script_aliases));
|
||||
|
||||
|
|
|
@ -421,11 +421,7 @@ bool Lexer::is_whitespace() const
|
|||
auto code_point = current_code_point();
|
||||
if (code_point == NO_BREAK_SPACE || code_point == ZERO_WIDTH_NO_BREAK_SPACE)
|
||||
return true;
|
||||
|
||||
static auto space_separator_category = Unicode::general_category_from_string("Space_Separator"sv);
|
||||
if (space_separator_category.has_value())
|
||||
return Unicode::code_point_has_general_category(code_point, *space_separator_category);
|
||||
return false;
|
||||
return Unicode::code_point_has_space_separator_general_category(code_point);
|
||||
}
|
||||
|
||||
// UnicodeEscapeSequence :: https://tc39.es/ecma262/#prod-UnicodeEscapeSequence
|
||||
|
|
|
@ -780,15 +780,11 @@ ALWAYS_INLINE void OpCode_Compare::compare_character_class(MatchInput const& inp
|
|||
bool OpCode_Compare::matches_character_class(CharClass character_class, u32 ch, bool insensitive)
|
||||
{
|
||||
constexpr auto is_space_or_line_terminator = [](u32 code_point) {
|
||||
static auto space_separator = Unicode::general_category_from_string("Space_Separator"sv);
|
||||
if (!space_separator.has_value())
|
||||
return is_ascii_space(code_point);
|
||||
|
||||
if ((code_point == 0x0a) || (code_point == 0x0d) || (code_point == 0x2028) || (code_point == 0x2029))
|
||||
return true;
|
||||
if ((code_point == 0x09) || (code_point == 0x0b) || (code_point == 0x0c) || (code_point == 0xfeff))
|
||||
return true;
|
||||
return Unicode::code_point_has_general_category(code_point, *space_separator);
|
||||
return Unicode::code_point_has_space_separator_general_category(code_point);
|
||||
};
|
||||
|
||||
switch (character_class) {
|
||||
|
|
|
@ -1647,7 +1647,7 @@ bool ECMA262Parser::parse_atom_escape(ByteCode& stack, size_t& match_length_mini
|
|||
compares.empend(CompareTypeAndValuePair { CharacterCompareType::Property, (ByteCodeValueType)property.value() });
|
||||
},
|
||||
[&](Unicode::GeneralCategory general_category) {
|
||||
compares.empend(CompareTypeAndValuePair { CharacterCompareType::GeneralCategory, (ByteCodeValueType)general_category });
|
||||
compares.empend(CompareTypeAndValuePair { CharacterCompareType::GeneralCategory, (ByteCodeValueType)general_category.value() });
|
||||
},
|
||||
[&](Script script) {
|
||||
if (script.is_extension)
|
||||
|
@ -1998,7 +1998,7 @@ bool ECMA262Parser::parse_nonempty_class_ranges(Vector<CompareTypeAndValuePair>&
|
|||
if (atom.is_property)
|
||||
ranges.empend(CompareTypeAndValuePair { CharacterCompareType::Property, (ByteCodeValueType)(atom.property.value()) });
|
||||
else if (atom.is_general_category)
|
||||
ranges.empend(CompareTypeAndValuePair { CharacterCompareType::GeneralCategory, (ByteCodeValueType)(atom.general_category) });
|
||||
ranges.empend(CompareTypeAndValuePair { CharacterCompareType::GeneralCategory, (ByteCodeValueType)(atom.general_category.value()) });
|
||||
else if (atom.is_script)
|
||||
ranges.empend(CompareTypeAndValuePair { CharacterCompareType::Script, (ByteCodeValueType)(atom.script) });
|
||||
else if (atom.is_script_extension)
|
||||
|
@ -2335,7 +2335,7 @@ bool ECMA262Parser::parse_class_set_operand(Vector<regex::CompareTypeAndValuePai
|
|||
compares.empend(CompareTypeAndValuePair { CharacterCompareType::Property, (ByteCodeValueType)property.value() });
|
||||
},
|
||||
[&](Unicode::GeneralCategory general_category) {
|
||||
compares.empend(CompareTypeAndValuePair { CharacterCompareType::GeneralCategory, (ByteCodeValueType)general_category });
|
||||
compares.empend(CompareTypeAndValuePair { CharacterCompareType::GeneralCategory, (ByteCodeValueType)general_category.value() });
|
||||
},
|
||||
[&](Script script) {
|
||||
if (script.is_extension)
|
||||
|
@ -2422,7 +2422,7 @@ bool ECMA262Parser::parse_nested_class(Vector<regex::CompareTypeAndValuePair>& c
|
|||
compares.empend(CompareTypeAndValuePair { CharacterCompareType::Property, (ByteCodeValueType)property.value() });
|
||||
},
|
||||
[&](Unicode::GeneralCategory general_category) {
|
||||
compares.empend(CompareTypeAndValuePair { CharacterCompareType::GeneralCategory, (ByteCodeValueType)general_category });
|
||||
compares.empend(CompareTypeAndValuePair { CharacterCompareType::GeneralCategory, (ByteCodeValueType)general_category.value() });
|
||||
},
|
||||
[&](Script script) {
|
||||
if (script.is_extension)
|
||||
|
|
|
@ -37,8 +37,84 @@ struct AK::Traits<Unicode::PropertyName<PropertyType>> {
|
|||
|
||||
namespace Unicode {
|
||||
|
||||
Optional<GeneralCategory> __attribute__((weak)) general_category_from_string(StringView) { return {}; }
|
||||
bool __attribute__((weak)) code_point_has_general_category(u32, GeneralCategory) { return {}; }
|
||||
static constexpr GeneralCategory GENERAL_CATEGORY_CASED_LETTER = U_CHAR_CATEGORY_COUNT + 1;
|
||||
static constexpr GeneralCategory GENERAL_CATEGORY_LETTER = U_CHAR_CATEGORY_COUNT + 2;
|
||||
static constexpr GeneralCategory GENERAL_CATEGORY_MARK = U_CHAR_CATEGORY_COUNT + 3;
|
||||
static constexpr GeneralCategory GENERAL_CATEGORY_NUMBER = U_CHAR_CATEGORY_COUNT + 4;
|
||||
static constexpr GeneralCategory GENERAL_CATEGORY_PUNCTUATION = U_CHAR_CATEGORY_COUNT + 5;
|
||||
static constexpr GeneralCategory GENERAL_CATEGORY_SYMBOL = U_CHAR_CATEGORY_COUNT + 6;
|
||||
static constexpr GeneralCategory GENERAL_CATEGORY_SEPARATOR = U_CHAR_CATEGORY_COUNT + 7;
|
||||
static constexpr GeneralCategory GENERAL_CATEGORY_OTHER = U_CHAR_CATEGORY_COUNT + 8;
|
||||
static constexpr GeneralCategory GENERAL_CATEGORY_LIMIT = U_CHAR_CATEGORY_COUNT + 9;
|
||||
|
||||
Optional<GeneralCategory> general_category_from_string(StringView general_category)
|
||||
{
|
||||
static auto general_category_names = []() {
|
||||
Array<PropertyName<GeneralCategory>, GENERAL_CATEGORY_LIMIT.value()> names;
|
||||
|
||||
auto set_names = [&](auto property, auto index, auto general_category) {
|
||||
if (char const* name = u_getPropertyValueName(property, general_category, U_LONG_PROPERTY_NAME))
|
||||
names[index.value()].long_name = StringView { name, strlen(name) };
|
||||
if (char const* name = u_getPropertyValueName(property, general_category, U_SHORT_PROPERTY_NAME))
|
||||
names[index.value()].short_name = StringView { name, strlen(name) };
|
||||
if (char const* name = u_getPropertyValueName(property, general_category, ADDITIONAL_NAME))
|
||||
names[index.value()].additional_name = StringView { name, strlen(name) };
|
||||
};
|
||||
|
||||
for (GeneralCategory general_category = 0; general_category < U_CHAR_CATEGORY_COUNT; ++general_category)
|
||||
set_names(UCHAR_GENERAL_CATEGORY, general_category, static_cast<UCharCategory>(general_category.value()));
|
||||
|
||||
set_names(UCHAR_GENERAL_CATEGORY_MASK, GENERAL_CATEGORY_CASED_LETTER, U_GC_LC_MASK);
|
||||
set_names(UCHAR_GENERAL_CATEGORY_MASK, GENERAL_CATEGORY_LETTER, U_GC_L_MASK);
|
||||
set_names(UCHAR_GENERAL_CATEGORY_MASK, GENERAL_CATEGORY_MARK, U_GC_M_MASK);
|
||||
set_names(UCHAR_GENERAL_CATEGORY_MASK, GENERAL_CATEGORY_NUMBER, U_GC_N_MASK);
|
||||
set_names(UCHAR_GENERAL_CATEGORY_MASK, GENERAL_CATEGORY_PUNCTUATION, U_GC_P_MASK);
|
||||
set_names(UCHAR_GENERAL_CATEGORY_MASK, GENERAL_CATEGORY_SYMBOL, U_GC_S_MASK);
|
||||
set_names(UCHAR_GENERAL_CATEGORY_MASK, GENERAL_CATEGORY_SEPARATOR, U_GC_Z_MASK);
|
||||
set_names(UCHAR_GENERAL_CATEGORY_MASK, GENERAL_CATEGORY_OTHER, U_GC_C_MASK);
|
||||
|
||||
return names;
|
||||
}();
|
||||
|
||||
if (auto index = find_index(general_category_names.begin(), general_category_names.end(), general_category); index != general_category_names.size())
|
||||
return static_cast<GeneralCategory>(index);
|
||||
return {};
|
||||
}
|
||||
|
||||
bool code_point_has_general_category(u32 code_point, GeneralCategory general_category)
|
||||
{
|
||||
auto icu_code_point = static_cast<UChar32>(code_point);
|
||||
auto icu_general_category = static_cast<UCharCategory>(general_category.value());
|
||||
|
||||
if (general_category == GENERAL_CATEGORY_CASED_LETTER)
|
||||
return (U_GET_GC_MASK(icu_code_point) & U_GC_LC_MASK) != 0;
|
||||
if (general_category == GENERAL_CATEGORY_LETTER)
|
||||
return (U_GET_GC_MASK(icu_code_point) & U_GC_L_MASK) != 0;
|
||||
if (general_category == GENERAL_CATEGORY_MARK)
|
||||
return (U_GET_GC_MASK(icu_code_point) & U_GC_M_MASK) != 0;
|
||||
if (general_category == GENERAL_CATEGORY_NUMBER)
|
||||
return (U_GET_GC_MASK(icu_code_point) & U_GC_N_MASK) != 0;
|
||||
if (general_category == GENERAL_CATEGORY_PUNCTUATION)
|
||||
return (U_GET_GC_MASK(icu_code_point) & U_GC_P_MASK) != 0;
|
||||
if (general_category == GENERAL_CATEGORY_SYMBOL)
|
||||
return (U_GET_GC_MASK(icu_code_point) & U_GC_S_MASK) != 0;
|
||||
if (general_category == GENERAL_CATEGORY_SEPARATOR)
|
||||
return (U_GET_GC_MASK(icu_code_point) & U_GC_Z_MASK) != 0;
|
||||
if (general_category == GENERAL_CATEGORY_OTHER)
|
||||
return (U_GET_GC_MASK(icu_code_point) & U_GC_C_MASK) != 0;
|
||||
|
||||
return u_charType(icu_code_point) == icu_general_category;
|
||||
}
|
||||
|
||||
bool code_point_has_control_general_category(u32 code_point)
|
||||
{
|
||||
return code_point_has_general_category(code_point, U_CONTROL_CHAR);
|
||||
}
|
||||
|
||||
bool code_point_has_space_separator_general_category(u32 code_point)
|
||||
{
|
||||
return code_point_has_general_category(code_point, U_SPACE_SEPARATOR);
|
||||
}
|
||||
|
||||
static constexpr Property PROPERTY_ANY = UCHAR_BINARY_LIMIT + 1;
|
||||
static constexpr Property PROPERTY_ASCII = UCHAR_BINARY_LIMIT + 2;
|
||||
|
|
|
@ -29,6 +29,9 @@ struct CodePointRangeComparator {
|
|||
Optional<GeneralCategory> general_category_from_string(StringView);
|
||||
bool code_point_has_general_category(u32 code_point, GeneralCategory general_category);
|
||||
|
||||
bool code_point_has_control_general_category(u32 code_point);
|
||||
bool code_point_has_space_separator_general_category(u32 code_point);
|
||||
|
||||
Optional<Property> property_from_string(StringView);
|
||||
bool code_point_has_property(u32 code_point, Property property);
|
||||
|
||||
|
|
|
@ -13,12 +13,12 @@ namespace Unicode {
|
|||
|
||||
enum class BidirectionalClass : u8;
|
||||
enum class EmojiGroup : u8;
|
||||
enum class GeneralCategory : u8;
|
||||
enum class Script : u8;
|
||||
|
||||
struct CurrencyCode;
|
||||
struct Emoji;
|
||||
|
||||
AK_TYPEDEF_DISTINCT_NUMERIC_GENERAL(u32, GeneralCategory, CastToUnderlying, Comparison, Increment);
|
||||
AK_TYPEDEF_DISTINCT_NUMERIC_GENERAL(u32, Property, CastToUnderlying, Comparison, Increment);
|
||||
|
||||
}
|
||||
|
|
|
@ -246,11 +246,7 @@ static ErrorOr<Optional<String>> get_event_key_string(u32 code_point)
|
|||
auto is_non_control_character = [&]() {
|
||||
// A non-control character is any valid Unicode character except those that are part of the "Other, Control"
|
||||
// ("Cc") General Category.
|
||||
static auto control_general_category = Unicode::general_category_from_string("Cc"sv);
|
||||
if (!control_general_category.has_value())
|
||||
return true;
|
||||
|
||||
return !Unicode::code_point_has_general_category(code_point, *control_general_category);
|
||||
return !Unicode::code_point_has_control_general_category(code_point);
|
||||
};
|
||||
|
||||
// A key string is a string containing a 0 or 1 non-control characters ("base" characters) followed by 0 or more
|
||||
|
|
Loading…
Reference in a new issue