mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-21 23:20:20 +00:00
LibUnicode: Generate simple case folding mappings for titlecase
Note we already generate the special case foldings for titlecase.
This commit is contained in:
parent
6d710eeb43
commit
b562348d31
Notes:
sideshowbarker
2024-07-17 06:39:26 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/b562348d31 Pull-request: https://github.com/SerenityOS/serenity/pull/17048 Reviewed-by: https://github.com/linusg ✅
4 changed files with 36 additions and 0 deletions
|
@ -113,6 +113,7 @@ struct UnicodeData {
|
|||
|
||||
u32 simple_uppercase_mapping_size { 0 };
|
||||
u32 simple_lowercase_mapping_size { 0 };
|
||||
u32 simple_titlecase_mapping_size { 0 };
|
||||
|
||||
Vector<SpecialCasing> special_casing;
|
||||
u32 code_points_with_special_casing { 0 };
|
||||
|
@ -674,6 +675,7 @@ static ErrorOr<void> parse_unicode_data(Core::Stream::BufferedFile& file, Unicod
|
|||
unicode_data.code_points_with_non_zero_combining_class += data.canonical_combining_class != 0;
|
||||
unicode_data.simple_uppercase_mapping_size += data.simple_uppercase_mapping.has_value();
|
||||
unicode_data.simple_lowercase_mapping_size += data.simple_lowercase_mapping.has_value();
|
||||
unicode_data.simple_titlecase_mapping_size += data.simple_titlecase_mapping.has_value();
|
||||
unicode_data.code_points_with_decomposition_mapping += data.decomposition_mapping.has_value();
|
||||
|
||||
unicode_data.code_points_with_special_casing += has_special_casing;
|
||||
|
@ -978,6 +980,7 @@ static constexpr Array<@mapping_type@, @size@> s_@name@_mappings { {
|
|||
});
|
||||
append_code_point_mappings("uppercase"sv, "CodePointMapping"sv, unicode_data.simple_uppercase_mapping_size, [](auto const& data) { return data.simple_uppercase_mapping; });
|
||||
append_code_point_mappings("lowercase"sv, "CodePointMapping"sv, unicode_data.simple_lowercase_mapping_size, [](auto const& data) { return data.simple_lowercase_mapping; });
|
||||
append_code_point_mappings("titlecase"sv, "CodePointMapping"sv, unicode_data.simple_titlecase_mapping_size, [](auto const& data) { return data.simple_titlecase_mapping; });
|
||||
append_code_point_mappings("special_case"sv, "SpecialCaseMapping"sv, unicode_data.code_points_with_special_casing, [](auto const& data) { return data.special_casing_indices; });
|
||||
append_code_point_mappings("abbreviation"sv, "CodePointAbbreviation"sv, unicode_data.code_point_abbreviations.size(), [](auto const& data) { return data.abbreviation; });
|
||||
|
||||
|
@ -1138,6 +1141,7 @@ u32 @method@(u32 code_point)
|
|||
append_code_point_mapping_search("canonical_combining_class"sv, "s_combining_class_mappings"sv, "0"sv);
|
||||
append_code_point_mapping_search("to_unicode_uppercase"sv, "s_uppercase_mappings"sv, "code_point"sv);
|
||||
append_code_point_mapping_search("to_unicode_lowercase"sv, "s_lowercase_mappings"sv, "code_point"sv);
|
||||
append_code_point_mapping_search("to_unicode_titlecase"sv, "s_titlecase_mappings"sv, "code_point"sv);
|
||||
|
||||
generator.append(R"~~~(
|
||||
Span<SpecialCasing const* const> special_case_mapping(u32 code_point)
|
||||
|
|
|
@ -48,6 +48,32 @@ TEST_CASE(to_unicode_uppercase)
|
|||
EXPECT_EQ(Unicode::to_unicode_uppercase(0x3401u), 0x3401u);
|
||||
EXPECT_EQ(Unicode::to_unicode_uppercase(0x3402u), 0x3402u);
|
||||
EXPECT_EQ(Unicode::to_unicode_uppercase(0x4dbfu), 0x4dbfu);
|
||||
|
||||
// Code points whose uppercase and titlecase mappings actually differ.
|
||||
EXPECT_EQ(Unicode::to_unicode_uppercase(0x01c6u), 0x01c4u); // "dž" to "DŽ"
|
||||
EXPECT_EQ(Unicode::to_unicode_uppercase(0x01c9u), 0x01c7u); // "lj" to "LJ"
|
||||
EXPECT_EQ(Unicode::to_unicode_uppercase(0x01ccu), 0x01cau); // "nj" to "NJ"
|
||||
EXPECT_EQ(Unicode::to_unicode_uppercase(0x01f3u), 0x01f1u); // "dz" to "DZ"
|
||||
}
|
||||
|
||||
TEST_CASE(to_unicode_titlecase)
|
||||
{
|
||||
compare_to_ascii(toupper, Unicode::to_unicode_titlecase);
|
||||
|
||||
EXPECT_EQ(Unicode::to_unicode_titlecase(0x03c9u), 0x03a9u); // "ω" to "Ω"
|
||||
EXPECT_EQ(Unicode::to_unicode_titlecase(0x03a9u), 0x03a9u); // "Ω" to "Ω"
|
||||
|
||||
// Code points encoded by ranges in UnicodeData.txt
|
||||
EXPECT_EQ(Unicode::to_unicode_titlecase(0x3400u), 0x3400u);
|
||||
EXPECT_EQ(Unicode::to_unicode_titlecase(0x3401u), 0x3401u);
|
||||
EXPECT_EQ(Unicode::to_unicode_titlecase(0x3402u), 0x3402u);
|
||||
EXPECT_EQ(Unicode::to_unicode_titlecase(0x4dbfu), 0x4dbfu);
|
||||
|
||||
// Code points whose uppercase and titlecase mappings actually differ.
|
||||
EXPECT_EQ(Unicode::to_unicode_titlecase(0x01c6u), 0x01c5u); // "dž" to "Dž"
|
||||
EXPECT_EQ(Unicode::to_unicode_titlecase(0x01c9u), 0x01c8u); // "lj" to "Lj"
|
||||
EXPECT_EQ(Unicode::to_unicode_titlecase(0x01ccu), 0x01cbu); // "nj" to "Nj"
|
||||
EXPECT_EQ(Unicode::to_unicode_titlecase(0x01f3u), 0x01f2u); // "dz" to "Dz"
|
||||
}
|
||||
|
||||
TEST_CASE(to_unicode_lowercase_unconditional_special_casing)
|
||||
|
|
|
@ -38,6 +38,11 @@ u32 __attribute__((weak)) to_unicode_uppercase(u32 code_point)
|
|||
return to_ascii_uppercase(code_point);
|
||||
}
|
||||
|
||||
u32 __attribute__((weak)) to_unicode_titlecase(u32 code_point)
|
||||
{
|
||||
return to_ascii_uppercase(code_point);
|
||||
}
|
||||
|
||||
ErrorOr<DeprecatedString> to_unicode_lowercase_full(StringView string, Optional<StringView> const& locale)
|
||||
{
|
||||
StringBuilder builder;
|
||||
|
|
|
@ -38,6 +38,7 @@ u32 canonical_combining_class(u32 code_point);
|
|||
// Use the full-string transformations for full case folding.
|
||||
u32 to_unicode_lowercase(u32 code_point);
|
||||
u32 to_unicode_uppercase(u32 code_point);
|
||||
u32 to_unicode_titlecase(u32 code_point);
|
||||
|
||||
ErrorOr<DeprecatedString> to_unicode_lowercase_full(StringView, Optional<StringView> const& locale = {});
|
||||
ErrorOr<DeprecatedString> to_unicode_uppercase_full(StringView, Optional<StringView> const& locale = {});
|
||||
|
|
Loading…
Reference in a new issue