瀏覽代碼

LibUnicode: Check word break when deciding on case-ignorable code points

Timothy Flynn 4 年之前
父節點
當前提交
7827aede6f
共有 2 個文件被更改,包括 16 次插入2 次删除
  1. 4 0
      Tests/LibUnicode/TestUnicodeCharacterTypes.cpp
  2. 12 2
      Userland/Libraries/LibUnicode/CharacterTypes.cpp

+ 4 - 0
Tests/LibUnicode/TestUnicodeCharacterTypes.cpp

@@ -118,6 +118,10 @@ TEST_CASE(to_unicode_lowercase_special_casing_sigma)
     result = Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv);
     result = Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv);
     EXPECT_EQ(result, "\u2170\u03C2");
     EXPECT_EQ(result, "\u2170\u03C2");
 
 
+    // Sigma preceded by A and FULL STOP
+    result = Unicode::to_unicode_lowercase_full("A.\u03A3"sv);
+    EXPECT_EQ(result, "a.\u03C2");
+
     // Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR
     // Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR
     result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3"sv);
     result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3"sv);
     EXPECT_EQ(result, "a\u180E\u03C2");
     EXPECT_EQ(result, "a\u180E\u03C2");

+ 12 - 2
Userland/Libraries/LibUnicode/CharacterTypes.cpp

@@ -63,9 +63,19 @@ static bool is_case_ignorable(UnicodeData const& unicode_data)
     case GeneralCategory::Sk:
     case GeneralCategory::Sk:
         return true;
         return true;
     default:
     default:
-        // FIXME: Handle word break properties (auxiliary/WordBreakProperty.txt).
-        return false;
+        break;
     }
     }
+
+    switch (unicode_data.word_break_property) {
+    case WordBreakProperty::MidLetter:
+    case WordBreakProperty::MidNumLet:
+    case WordBreakProperty::SingleQuote:
+        return true;
+    default:
+        break;
+    }
+
+    return false;
 }
 }
 
 
 static bool is_final_code_point(Utf8View const& string, size_t index, size_t byte_length)
 static bool is_final_code_point(Utf8View const& string, size_t index, size_t byte_length)