LibUnicode: Manually assign special code point properties
The Unicode standard defines a few extra properties that are not defined in any UCD file, so we must assign them manually.
This commit is contained in:
parent
bba3152104
commit
3f80791ed5
Notes:
sideshowbarker
2024-07-18 07:43:54 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/3f80791ed53 Pull-request: https://github.com/SerenityOS/serenity/pull/9090 Reviewed-by: https://github.com/Dexesttp Reviewed-by: https://github.com/davidot Reviewed-by: https://github.com/linusg
1 changed files with 12 additions and 3 deletions
|
@ -301,7 +301,7 @@ static void parse_unicode_data(Core::File& file, UnicodeData& unicode_data)
|
|||
}
|
||||
}
|
||||
if (data.prop_list.is_empty())
|
||||
data.prop_list.append("None"sv);
|
||||
data.prop_list.append("Assigned"sv);
|
||||
|
||||
for (auto const& property : unicode_data.word_break_prop_list) {
|
||||
for (auto const& range : property.value) {
|
||||
|
@ -411,7 +411,7 @@ namespace Unicode {
|
|||
generate_enum("Locale"sv, "None"sv, move(unicode_data.locales));
|
||||
generate_enum("Condition"sv, "None"sv, move(unicode_data.conditions));
|
||||
generate_enum("GeneralCategory"sv, {}, move(unicode_data.general_categories));
|
||||
generate_enum("Property"sv, "None"sv, unicode_data.prop_list.keys(), move(unicode_data.prop_aliases), true);
|
||||
generate_enum("Property"sv, "Assigned"sv, unicode_data.prop_list.keys(), move(unicode_data.prop_aliases), true);
|
||||
generate_enum("WordBreakProperty"sv, "Other"sv, unicode_data.word_break_prop_list.keys());
|
||||
|
||||
generator.append(R"~~~(
|
||||
|
@ -465,7 +465,7 @@ struct UnicodeData {
|
|||
SpecialCasing const* special_casing[@special_casing_size@] {};
|
||||
u32 special_casing_size { 0 };
|
||||
|
||||
Property properties { Property::None };
|
||||
Property properties { Property::Assigned };
|
||||
WordBreakProperty word_break_property { WordBreakProperty::Other };
|
||||
};
|
||||
|
||||
|
@ -680,6 +680,15 @@ int main(int argc, char** argv)
|
|||
parse_prop_list(derived_core_prop_file, unicode_data.prop_list);
|
||||
parse_alias_list(prop_alias_file, unicode_data.prop_list, unicode_data.prop_aliases);
|
||||
parse_prop_list(word_break_file, unicode_data.word_break_prop_list);
|
||||
|
||||
// The Unicode standard defines additional properties (Any, Assigned, ASCII) which are not in
|
||||
// any UCD file. Assigned is set as the default enum value 0 so "property & Assigned == Assigned"
|
||||
// is always true. Any is not assigned code points here because this file only parses assigned
|
||||
// code points, whereas Any will include unassigned code points.
|
||||
// https://unicode.org/reports/tr18/#General_Category_Property
|
||||
unicode_data.prop_list.set("Any"sv, {});
|
||||
unicode_data.prop_list.set("ASCII"sv, { { 0, 0, 0x7f } });
|
||||
|
||||
parse_unicode_data(unicode_data_file, unicode_data);
|
||||
|
||||
if (generate_header)
|
||||
|
|
Loading…
Add table
Reference in a new issue