TestUnicodeCharacterTypes.cpp 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. /*
  2. * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <LibTest/TestCase.h>
  7. #include <AK/StringView.h>
  8. #include <LibUnicode/CharacterTypes.h>
  9. #include <ctype.h>
  10. static void compare_to_ascii(auto& old_function, auto& new_function)
  11. {
  12. i64 result1 = 0;
  13. i64 result2 = 0;
  14. for (u32 i = 0; i < 0x80; ++i) {
  15. EXPECT_EQ(result1 = old_function(i), result2 = new_function(i));
  16. if (result1 != result2)
  17. dbgln("Function input value was {}.", i);
  18. }
  19. }
  20. TEST_CASE(to_unicode_lowercase)
  21. {
  22. compare_to_ascii(tolower, Unicode::to_unicode_lowercase);
  23. EXPECT_EQ(Unicode::to_unicode_lowercase(0x03c9u), 0x03c9u); // "ω" to "ω"
  24. EXPECT_EQ(Unicode::to_unicode_lowercase(0x03a9u), 0x03c9u); // "Ω" to "ω"
  25. // Code points encoded by ranges in UnicodeData.txt
  26. EXPECT_EQ(Unicode::to_unicode_lowercase(0x3400u), 0x3400u);
  27. EXPECT_EQ(Unicode::to_unicode_lowercase(0x3401u), 0x3401u);
  28. EXPECT_EQ(Unicode::to_unicode_lowercase(0x3402u), 0x3402u);
  29. EXPECT_EQ(Unicode::to_unicode_lowercase(0x4dbfu), 0x4dbfu);
  30. }
  31. TEST_CASE(to_unicode_uppercase)
  32. {
  33. compare_to_ascii(toupper, Unicode::to_unicode_uppercase);
  34. EXPECT_EQ(Unicode::to_unicode_uppercase(0x03c9u), 0x03a9u); // "ω" to "Ω"
  35. EXPECT_EQ(Unicode::to_unicode_uppercase(0x03a9u), 0x03a9u); // "Ω" to "Ω"
  36. // Code points encoded by ranges in UnicodeData.txt
  37. EXPECT_EQ(Unicode::to_unicode_uppercase(0x3400u), 0x3400u);
  38. EXPECT_EQ(Unicode::to_unicode_uppercase(0x3401u), 0x3401u);
  39. EXPECT_EQ(Unicode::to_unicode_uppercase(0x3402u), 0x3402u);
  40. EXPECT_EQ(Unicode::to_unicode_uppercase(0x4dbfu), 0x4dbfu);
  41. }
  42. TEST_CASE(to_unicode_lowercase_unconditional_special_casing)
  43. {
  44. // LATIN SMALL LETTER SHARP S
  45. auto result = Unicode::to_unicode_lowercase_full("\u00DF"sv);
  46. EXPECT_EQ(result, "\u00DF");
  47. // LATIN CAPITAL LETTER I WITH DOT ABOVE
  48. result = Unicode::to_unicode_lowercase_full("\u0130"sv);
  49. EXPECT_EQ(result, "\u0069\u0307");
  50. // LATIN SMALL LIGATURE FF
  51. result = Unicode::to_unicode_lowercase_full("\uFB00"sv);
  52. EXPECT_EQ(result, "\uFB00");
  53. // LATIN SMALL LIGATURE FI
  54. result = Unicode::to_unicode_lowercase_full("\uFB01"sv);
  55. EXPECT_EQ(result, "\uFB01");
  56. // LATIN SMALL LIGATURE FL
  57. result = Unicode::to_unicode_lowercase_full("\uFB02"sv);
  58. EXPECT_EQ(result, "\uFB02");
  59. // LATIN SMALL LIGATURE FFI
  60. result = Unicode::to_unicode_lowercase_full("\uFB03"sv);
  61. EXPECT_EQ(result, "\uFB03");
  62. // LATIN SMALL LIGATURE FFL
  63. result = Unicode::to_unicode_lowercase_full("\uFB04"sv);
  64. EXPECT_EQ(result, "\uFB04");
  65. // LATIN SMALL LIGATURE LONG S T
  66. result = Unicode::to_unicode_lowercase_full("\uFB05"sv);
  67. EXPECT_EQ(result, "\uFB05");
  68. // LATIN SMALL LIGATURE ST
  69. result = Unicode::to_unicode_lowercase_full("\uFB06"sv);
  70. EXPECT_EQ(result, "\uFB06");
  71. // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
  72. result = Unicode::to_unicode_lowercase_full("\u1FB7"sv);
  73. EXPECT_EQ(result, "\u1FB7");
  74. // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
  75. result = Unicode::to_unicode_lowercase_full("\u1FC7"sv);
  76. EXPECT_EQ(result, "\u1FC7");
  77. // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
  78. result = Unicode::to_unicode_lowercase_full("\u1FF7"sv);
  79. EXPECT_EQ(result, "\u1FF7");
  80. }
  81. TEST_CASE(to_unicode_lowercase_special_casing_sigma)
  82. {
  83. auto result = Unicode::to_unicode_lowercase_full("ABCI"sv);
  84. EXPECT_EQ(result, "abci");
  85. // Sigma preceded by A
  86. result = Unicode::to_unicode_lowercase_full("A\u03A3"sv);
  87. EXPECT_EQ(result, "a\u03C2");
  88. // Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR
  89. result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3"sv);
  90. EXPECT_EQ(result, "a\u180E\u03C2");
  91. // Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by B
  92. result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3B"sv);
  93. EXPECT_EQ(result, "a\u180E\u03C3b");
  94. // Sigma followed by A
  95. result = Unicode::to_unicode_lowercase_full("\u03A3A"sv);
  96. EXPECT_EQ(result, "\u03C3a");
  97. // Sigma preceded by A, followed by MONGOLIAN VOWEL SEPARATOR
  98. result = Unicode::to_unicode_lowercase_full("A\u03A3\u180E"sv);
  99. EXPECT_EQ(result, "a\u03C2\u180E");
  100. // Sigma preceded by A, followed by MONGOLIAN VOWEL SEPARATOR and B
  101. result = Unicode::to_unicode_lowercase_full("A\u03A3\u180EB"sv);
  102. EXPECT_EQ(result, "a\u03C3\u180Eb");
  103. // Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by MONGOLIAN VOWEL SEPARATOR
  104. result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3\u180E"sv);
  105. EXPECT_EQ(result, "a\u180E\u03C2\u180E");
  106. // Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by MONGOLIAN VOWEL SEPARATOR and B
  107. result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3\u180EB"sv);
  108. EXPECT_EQ(result, "a\u180E\u03C3\u180Eb");
  109. }
  110. TEST_CASE(to_unicode_uppercase_unconditional_special_casing)
  111. {
  112. // LATIN SMALL LETTER SHARP S
  113. auto result = Unicode::to_unicode_uppercase_full("\u00DF"sv);
  114. EXPECT_EQ(result, "\u0053\u0053");
  115. // LATIN CAPITAL LETTER I WITH DOT ABOVE
  116. result = Unicode::to_unicode_uppercase_full("\u0130"sv);
  117. EXPECT_EQ(result, "\u0130");
  118. // LATIN SMALL LIGATURE FF
  119. result = Unicode::to_unicode_uppercase_full("\uFB00"sv);
  120. EXPECT_EQ(result, "\u0046\u0046");
  121. // LATIN SMALL LIGATURE FI
  122. result = Unicode::to_unicode_uppercase_full("\uFB01"sv);
  123. EXPECT_EQ(result, "\u0046\u0049");
  124. // LATIN SMALL LIGATURE FL
  125. result = Unicode::to_unicode_uppercase_full("\uFB02"sv);
  126. EXPECT_EQ(result, "\u0046\u004C");
  127. // LATIN SMALL LIGATURE FFI
  128. result = Unicode::to_unicode_uppercase_full("\uFB03"sv);
  129. EXPECT_EQ(result, "\u0046\u0046\u0049");
  130. // LATIN SMALL LIGATURE FFL
  131. result = Unicode::to_unicode_uppercase_full("\uFB04"sv);
  132. EXPECT_EQ(result, "\u0046\u0046\u004C");
  133. // LATIN SMALL LIGATURE LONG S T
  134. result = Unicode::to_unicode_uppercase_full("\uFB05"sv);
  135. EXPECT_EQ(result, "\u0053\u0054");
  136. // LATIN SMALL LIGATURE ST
  137. result = Unicode::to_unicode_uppercase_full("\uFB06"sv);
  138. EXPECT_EQ(result, "\u0053\u0054");
  139. // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
  140. result = Unicode::to_unicode_uppercase_full("\u0390"sv);
  141. EXPECT_EQ(result, "\u0399\u0308\u0301");
  142. // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
  143. result = Unicode::to_unicode_uppercase_full("\u03B0"sv);
  144. EXPECT_EQ(result, "\u03A5\u0308\u0301");
  145. // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
  146. result = Unicode::to_unicode_uppercase_full("\u1FB7"sv);
  147. EXPECT_EQ(result, "\u0391\u0342\u0399");
  148. // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
  149. result = Unicode::to_unicode_uppercase_full("\u1FC7"sv);
  150. EXPECT_EQ(result, "\u0397\u0342\u0399");
  151. // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
  152. result = Unicode::to_unicode_uppercase_full("\u1FF7"sv);
  153. EXPECT_EQ(result, "\u03A9\u0342\u0399");
  154. }