Browse Source

LibUnicode: Check property list when deciding if a code point is cased

Timothy Flynn 4 years ago
parent
commit
c45a014645

+ 8 - 0
Tests/LibUnicode/TestUnicodeCharacterTypes.cpp

@@ -110,6 +110,14 @@ TEST_CASE(to_unicode_lowercase_special_casing_sigma)
     result = Unicode::to_unicode_lowercase_full("A\u03A3"sv);
     EXPECT_EQ(result, "a\u03C2");
 
+    // Sigma preceded by FEMININE ORDINAL INDICATOR
+    result = Unicode::to_unicode_lowercase_full("\u00AA\u03A3"sv);
+    EXPECT_EQ(result, "\u00AA\u03C2");
+
+    // Sigma preceded by ROMAN NUMERAL ONE
+    result = Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv);
+    EXPECT_EQ(result, "\u2170\u03C2");
+
     // Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR
     result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3"sv);
     EXPECT_EQ(result, "a\u180E\u03C2");

+ 15 - 3
Userland/Libraries/LibUnicode/CharacterTypes.cpp

@@ -23,18 +23,30 @@ namespace Unicode {
 
 #if ENABLE_UNICODE_DATA
 
+static bool has_property(UnicodeData const& unicode_data, Property property)
+{
+    for (u32 i = 0; i < unicode_data.prop_list_size; ++i) {
+        if (unicode_data.prop_list[i] == property)
+            return true;
+    }
+
+    return false;
+}
+
 static bool is_cased_letter(UnicodeData const& unicode_data)
 {
     // A character C is defined to be cased if and only if C has the Lowercase or Uppercase property
     // or has a General_Category value of Titlecase_Letter.
     switch (unicode_data.general_category) {
-    case GeneralCategory::Ll: // FIXME: Should be Ll + Other_Lowercase (PropList.txt).
-    case GeneralCategory::Lu: // FIXME: Should be Lu + Other_Uppercase (PropList.txt).
+    case GeneralCategory::Ll:
+    case GeneralCategory::Lu:
     case GeneralCategory::Lt:
         return true;
     default:
-        return false;
+        break;
     }
+
+    return has_property(unicode_data, Property::OtherLowercase) || has_property(unicode_data, Property::OtherUppercase);
 }
 
 static bool is_case_ignorable(UnicodeData const& unicode_data)