LibUnicode: Check property list when deciding if a code point is cased

This commit is contained in:
Timothy Flynn 2021-07-27 17:47:09 -04:00 committed by Andreas Kling
parent 38adfd8874
commit c45a014645
Notes: sideshowbarker 2024-07-18 07:57:18 +09:00
2 changed files with 23 additions and 3 deletions

View file

@ -110,6 +110,14 @@ TEST_CASE(to_unicode_lowercase_special_casing_sigma)
result = Unicode::to_unicode_lowercase_full("A\u03A3"sv);
EXPECT_EQ(result, "a\u03C2");
// Sigma preceded by FEMININE ORDINAL INDICATOR
result = Unicode::to_unicode_lowercase_full("\u00AA\u03A3"sv);
EXPECT_EQ(result, "\u00AA\u03C2");
// Sigma preceded by ROMAN NUMERAL ONE
result = Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv);
EXPECT_EQ(result, "\u2170\u03C2");
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR
result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3"sv);
EXPECT_EQ(result, "a\u180E\u03C2");

View file

@ -23,18 +23,30 @@ namespace Unicode {
#if ENABLE_UNICODE_DATA
static bool has_property(UnicodeData const& unicode_data, Property property)
{
for (u32 i = 0; i < unicode_data.prop_list_size; ++i) {
if (unicode_data.prop_list[i] == property)
return true;
}
return false;
}
static bool is_cased_letter(UnicodeData const& unicode_data)
{
// A character C is defined to be cased if and only if C has the Lowercase or Uppercase property
// or has a General_Category value of Titlecase_Letter.
switch (unicode_data.general_category) {
case GeneralCategory::Ll: // FIXME: Should be Ll + Other_Lowercase (PropList.txt).
case GeneralCategory::Lu: // FIXME: Should be Lu + Other_Uppercase (PropList.txt).
case GeneralCategory::Ll:
case GeneralCategory::Lu:
case GeneralCategory::Lt:
return true;
default:
return false;
break;
}
return has_property(unicode_data, Property::OtherLowercase) || has_property(unicode_data, Property::OtherUppercase);
}
static bool is_case_ignorable(UnicodeData const& unicode_data)