Przeglądaj źródła

LibUnicode: Fix Hangul syllable composition for specific cases

This fixes `combine_hangul_code_points` which would try to combine
a LVT syllable with a trailing consonant, resulting in a wrong
character.

Also added a test for this specific case.
matcool 2 lat temu
rodzic
commit
104b51b912

+ 1 - 0
Tests/LibUnicode/TestUnicodeNormalization.cpp

@@ -63,6 +63,7 @@ TEST_CASE(normalize_nfc)
 
     EXPECT_EQ(normalize("\u1103\u1161\u11B0"sv, NormalizationForm::NFC), "닭"sv);
     EXPECT_EQ(normalize("\u1100\uAC00\u11A8"sv, NormalizationForm::NFC), "\u1100\uAC01"sv);
+    EXPECT_EQ(normalize("\u1103\u1161\u11B0\u11B0"sv, NormalizationForm::NFC), "닭\u11B0");
 }
 
 TEST_CASE(normalize_nfkd)

+ 2 - 1
Userland/Libraries/LibUnicode/Normalize.cpp

@@ -111,7 +111,8 @@ static u32 combine_hangul_code_points(u32 a, u32 b)
         auto const leading_vowel_index = leading_index * HANGUL_BLOCK_COUNT + vowel_index * HANGUL_TRAILING_COUNT;
         return HANGUL_SYLLABLE_BASE + leading_vowel_index;
     }
-    if (is_hangul_code_point(a) && is_hangul_trailing(b)) {
+    // LV characters are the first in each "T block", so use this check to avoid combining LVT with T.
+    if (is_hangul_code_point(a) && (a - HANGUL_SYLLABLE_BASE) % HANGUL_TRAILING_COUNT == 0 && is_hangul_trailing(b)) {
         return a + b - HANGUL_TRAILING_BASE;
     }
     return 0;