Ver código fonte

LibUnicode: Handle code points that are both cased and case-ignorable

Apparently, some code points fit both categories, for example U+0345
(COMBINING GREEK YPOGEGRAMMENI). Handle this fact when determining if
a code point is a final code point in a string.
Timothy Flynn 4 anos atrás
pai
commit
c4bfda7f7f

+ 4 - 0
Tests/LibUnicode/TestUnicodeCharacterTypes.cpp

@@ -118,6 +118,10 @@ TEST_CASE(to_unicode_lowercase_special_casing_sigma)
     result = Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv);
     EXPECT_EQ(result, "\u2170\u03C2");
 
+    // Sigma preceded by COMBINING GREEK YPOGEGRAMMENI
+    result = Unicode::to_unicode_lowercase_full("\u0345\u03A3"sv);
+    EXPECT_EQ(result, "\u0345\u03C3");
+
     // Sigma preceded by A and FULL STOP
     result = Unicode::to_unicode_lowercase_full("A.\u03A3"sv);
     EXPECT_EQ(result, "a.\u03C2");

+ 1 - 1
Userland/Libraries/LibUnicode/CharacterTypes.cpp

@@ -95,7 +95,7 @@ static bool is_final_code_point(Utf8View const& string, size_t index, size_t byt
         if (!unicode_data.has_value())
             return false;
 
-        if (is_cased_letter(*unicode_data))
+        if (is_cased_letter(*unicode_data) && !is_case_ignorable(*unicode_data))
             ++cased_letter_count;
         else if (!is_case_ignorable(*unicode_data))
             cased_letter_count = 0;