LibUnicode: Check word break when deciding on case-ignorable code points

Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/7827aede6fd Pull-request: https://github.com/SerenityOS/serenity/pull/9054
2024-11-22 23:50:19 +00:00 · 2021-07-27 18:27:59 -04:00 · 2021-07-27 18:27:59 -04:00 · 7827aede6f · 2024-07-18 07:57:10 +09:00
commit 7827aede6f
parent 12fb3ae033
2 changed files with 16 additions and 2 deletions
--- a/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp
+++ b/Tests/LibUnicode/TestUnicodeCharacterTypes.cpp
@ -118,6 +118,10 @@ TEST_CASE(to_unicode_lowercase_special_casing_sigma)
    result = Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv);
    EXPECT_EQ(result, "\u2170\u03C2");

+    // Sigma preceded by A and FULL STOP
+    result = Unicode::to_unicode_lowercase_full("A.\u03A3"sv);
+    EXPECT_EQ(result, "a.\u03C2");
+
    // Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR
    result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3"sv);
    EXPECT_EQ(result, "a\u180E\u03C2");
--- a/Userland/Libraries/LibUnicode/CharacterTypes.cpp
+++ b/Userland/Libraries/LibUnicode/CharacterTypes.cpp
@ -63,9 +63,19 @@ static bool is_case_ignorable(UnicodeData const& unicode_data)
    case GeneralCategory::Sk:
        return true;
    default:
-        // FIXME: Handle word break properties (auxiliary/WordBreakProperty.txt).
-        return false;
+        break;
    }
+
+    switch (unicode_data.word_break_property) {
+    case WordBreakProperty::MidLetter:
+    case WordBreakProperty::MidNumLet:
+    case WordBreakProperty::SingleQuote:
+        return true;
+    default:
+        break;
+    }
+
+    return false;
 }

 static bool is_final_code_point(Utf8View const& string, size_t index, size_t byte_length)