LibUnicode: Check word break when deciding on case-ignorable code points

This commit is contained in:
Timothy Flynn 2021-07-27 18:27:59 -04:00 committed by Andreas Kling
parent 12fb3ae033
commit 7827aede6f
Notes: sideshowbarker 2024-07-18 07:57:10 +09:00
2 changed files with 16 additions and 2 deletions

View file

@ -118,6 +118,10 @@ TEST_CASE(to_unicode_lowercase_special_casing_sigma)
result = Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv);
EXPECT_EQ(result, "\u2170\u03C2");
// Sigma preceded by A and FULL STOP
result = Unicode::to_unicode_lowercase_full("A.\u03A3"sv);
EXPECT_EQ(result, "a.\u03C2");
// Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR
result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3"sv);
EXPECT_EQ(result, "a\u180E\u03C2");

View file

@ -63,9 +63,19 @@ static bool is_case_ignorable(UnicodeData const& unicode_data)
case GeneralCategory::Sk:
return true;
default:
// FIXME: Handle word break properties (auxiliary/WordBreakProperty.txt).
return false;
break;
}
switch (unicode_data.word_break_property) {
case WordBreakProperty::MidLetter:
case WordBreakProperty::MidNumLet:
case WordBreakProperty::SingleQuote:
return true;
default:
break;
}
return false;
}
static bool is_final_code_point(Utf8View const& string, size_t index, size_t byte_length)