mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 07:30:19 +00:00
LibUnicode: Generate code point abbreviations
This commit is contained in:
parent
444b2d9ec2
commit
701b7810ba
Notes:
sideshowbarker
2024-07-17 20:40:36 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/701b7810ba0 Pull-request: https://github.com/SerenityOS/serenity/pull/11993
3 changed files with 33 additions and 6 deletions
|
@ -72,6 +72,7 @@ struct CodePointName {
|
|||
struct CodePointData {
|
||||
u32 code_point { 0 };
|
||||
String name;
|
||||
Optional<StringView> abbreviation;
|
||||
u8 canonical_combining_class { 0 };
|
||||
String bidi_class;
|
||||
String decomposition_type;
|
||||
|
@ -101,6 +102,7 @@ struct UnicodeData {
|
|||
|
||||
Vector<CodePointData> code_point_data;
|
||||
|
||||
HashMap<u32, String> code_point_abbreviations;
|
||||
HashMap<u32, String> code_point_display_name_aliases;
|
||||
Vector<CodePointName> code_point_display_names;
|
||||
|
||||
|
@ -302,11 +304,12 @@ static void parse_name_aliases(Core::File& file, UnicodeData& unicode_data)
|
|||
auto alias = segments[1].trim_whitespace();
|
||||
auto reason = segments[2].trim_whitespace();
|
||||
|
||||
if (!reason.is_one_of("correction"sv, "control"sv))
|
||||
continue;
|
||||
|
||||
if (!unicode_data.code_point_display_name_aliases.contains(*code_point))
|
||||
unicode_data.code_point_display_name_aliases.set(*code_point, alias);
|
||||
if (reason == "abbreviation"sv) {
|
||||
unicode_data.code_point_abbreviations.set(*code_point, alias);
|
||||
} else if (reason.is_one_of("correction"sv, "control"sv)) {
|
||||
if (!unicode_data.code_point_display_name_aliases.contains(*code_point))
|
||||
unicode_data.code_point_display_name_aliases.set(*code_point, alias);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -476,6 +479,9 @@ static void parse_unicode_data(Core::File& file, UnicodeData& unicode_data)
|
|||
data.simple_lowercase_mapping = AK::StringUtils::convert_to_uint_from_hex<u32>(segments[13]);
|
||||
data.simple_titlecase_mapping = AK::StringUtils::convert_to_uint_from_hex<u32>(segments[14]);
|
||||
|
||||
if (auto abbreviation = unicode_data.code_point_abbreviations.get(data.code_point); abbreviation.has_value())
|
||||
data.abbreviation = *abbreviation;
|
||||
|
||||
if (!assigned_code_point_range_start.has_value())
|
||||
assigned_code_point_range_start = data.code_point;
|
||||
|
||||
|
@ -683,6 +689,11 @@ struct SpecialCaseMapping {
|
|||
u32 special_casing_size { 0 };
|
||||
};
|
||||
|
||||
struct CodePointAbbreviation {
|
||||
u32 code_point { 0 };
|
||||
StringView abbreviation {};
|
||||
};
|
||||
|
||||
template<typename MappingType>
|
||||
struct CodePointComparator {
|
||||
constexpr int operator()(u32 code_point, MappingType const& mapping)
|
||||
|
@ -707,7 +718,7 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { {
|
|||
for (auto const& data : unicode_data.code_point_data) {
|
||||
auto mapping = mapping_getter(data);
|
||||
|
||||
if constexpr (IsSame<decltype(mapping), Optional<u32>>) {
|
||||
if constexpr (requires { mapping.has_value(); }) {
|
||||
if (!mapping.has_value())
|
||||
continue;
|
||||
} else {
|
||||
|
@ -724,6 +735,9 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { {
|
|||
if constexpr (IsSame<decltype(mapping), Optional<u32>>) {
|
||||
generator.set("mapping", String::formatted("{:#x}", *mapping));
|
||||
generator.append(", @mapping@ },");
|
||||
} else if constexpr (IsSame<decltype(mapping), Optional<StringView>>) {
|
||||
generator.set("mapping", String::formatted("{}", *mapping));
|
||||
generator.append(", \"@mapping@\"sv },");
|
||||
} else {
|
||||
append_list_and_size(data.special_casing_indices, "&s_special_casing[{}]"sv);
|
||||
generator.append(" },");
|
||||
|
@ -748,6 +762,7 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { {
|
|||
append_code_point_mappings("uppercase"sv, "CodePointMapping"sv, unicode_data.simple_uppercase_mapping_size, [](auto const& data) { return data.simple_uppercase_mapping; });
|
||||
append_code_point_mappings("lowercase"sv, "CodePointMapping"sv, unicode_data.simple_lowercase_mapping_size, [](auto const& data) { return data.simple_lowercase_mapping; });
|
||||
append_code_point_mappings("special_case"sv, "SpecialCaseMapping"sv, unicode_data.code_points_with_special_casing, [](auto const& data) { return data.special_casing_indices; });
|
||||
append_code_point_mappings("abbreviation"sv, "CodePointAbbreviation"sv, unicode_data.code_point_abbreviations.size(), [](auto const& data) { return data.abbreviation; });
|
||||
|
||||
generator.append(R"~~~(
|
||||
struct CodePointRange {
|
||||
|
@ -891,6 +906,15 @@ Span<SpecialCasing const* const> special_case_mapping(u32 code_point)
|
|||
|
||||
return mapping->special_casing.span().slice(0, mapping->special_casing_size);
|
||||
}
|
||||
|
||||
Optional<StringView> code_point_abbreviation(u32 code_point)
|
||||
{
|
||||
auto const* mapping = binary_search(s_abbreviation_mappings, code_point, nullptr, CodePointComparator<CodePointAbbreviation> {});
|
||||
if (mapping == nullptr)
|
||||
return {};
|
||||
|
||||
return mapping->abbreviation;
|
||||
}
|
||||
)~~~");
|
||||
|
||||
auto append_prop_search = [&](StringView enum_title, StringView enum_snake, StringView collection_name) {
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
namespace Unicode {
|
||||
|
||||
Optional<String> __attribute__((weak)) code_point_display_name(u32) { return {}; }
|
||||
Optional<StringView> __attribute__((weak)) code_point_abbreviation(u32) { return {}; }
|
||||
u32 __attribute__((weak)) canonical_combining_class(u32) { return {}; }
|
||||
Span<SpecialCasing const* const> __attribute__((weak)) special_case_mapping(u32) { return {}; }
|
||||
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
namespace Unicode {
|
||||
|
||||
Optional<String> code_point_display_name(u32 code_point);
|
||||
Optional<StringView> code_point_abbreviation(u32 code_point);
|
||||
|
||||
u32 canonical_combining_class(u32 code_point);
|
||||
Span<SpecialCasing const* const> special_case_mapping(u32 code_point);
|
||||
|
||||
|
|
Loading…
Reference in a new issue