TestUnicodeCharacterTypes.cpp 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875
  1. /*
  2. * Copyright (c) 2021, Tim Flynn <trflynn89@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <LibTest/TestCase.h>
  7. #include <AK/StringView.h>
  8. #include <LibUnicode/CharacterTypes.h>
  9. #include <ctype.h>
  10. static void compare_to_ascii(auto& old_function, auto& new_function)
  11. {
  12. i64 result1 = 0;
  13. i64 result2 = 0;
  14. for (u32 i = 0; i < 0x80; ++i) {
  15. EXPECT_EQ(result1 = old_function(i), result2 = new_function(i));
  16. if (result1 != result2)
  17. dbgln("Function input value was {}.", i);
  18. }
  19. }
  20. TEST_CASE(to_unicode_lowercase)
  21. {
  22. compare_to_ascii(tolower, Unicode::to_unicode_lowercase);
  23. EXPECT_EQ(Unicode::to_unicode_lowercase(0x03c9u), 0x03c9u); // "ω" to "ω"
  24. EXPECT_EQ(Unicode::to_unicode_lowercase(0x03a9u), 0x03c9u); // "Ω" to "ω"
  25. // Code points encoded by ranges in UnicodeData.txt
  26. EXPECT_EQ(Unicode::to_unicode_lowercase(0x3400u), 0x3400u);
  27. EXPECT_EQ(Unicode::to_unicode_lowercase(0x3401u), 0x3401u);
  28. EXPECT_EQ(Unicode::to_unicode_lowercase(0x3402u), 0x3402u);
  29. EXPECT_EQ(Unicode::to_unicode_lowercase(0x4dbfu), 0x4dbfu);
  30. }
  31. TEST_CASE(to_unicode_uppercase)
  32. {
  33. compare_to_ascii(toupper, Unicode::to_unicode_uppercase);
  34. EXPECT_EQ(Unicode::to_unicode_uppercase(0x03c9u), 0x03a9u); // "ω" to "Ω"
  35. EXPECT_EQ(Unicode::to_unicode_uppercase(0x03a9u), 0x03a9u); // "Ω" to "Ω"
  36. // Code points encoded by ranges in UnicodeData.txt
  37. EXPECT_EQ(Unicode::to_unicode_uppercase(0x3400u), 0x3400u);
  38. EXPECT_EQ(Unicode::to_unicode_uppercase(0x3401u), 0x3401u);
  39. EXPECT_EQ(Unicode::to_unicode_uppercase(0x3402u), 0x3402u);
  40. EXPECT_EQ(Unicode::to_unicode_uppercase(0x4dbfu), 0x4dbfu);
  41. // Code points whose uppercase and titlecase mappings actually differ.
  42. EXPECT_EQ(Unicode::to_unicode_uppercase(0x01c6u), 0x01c4u); // "dž" to "DŽ"
  43. EXPECT_EQ(Unicode::to_unicode_uppercase(0x01c9u), 0x01c7u); // "lj" to "LJ"
  44. EXPECT_EQ(Unicode::to_unicode_uppercase(0x01ccu), 0x01cau); // "nj" to "NJ"
  45. EXPECT_EQ(Unicode::to_unicode_uppercase(0x01f3u), 0x01f1u); // "dz" to "DZ"
  46. }
  47. TEST_CASE(to_unicode_titlecase)
  48. {
  49. compare_to_ascii(toupper, Unicode::to_unicode_titlecase);
  50. EXPECT_EQ(Unicode::to_unicode_titlecase(0x03c9u), 0x03a9u); // "ω" to "Ω"
  51. EXPECT_EQ(Unicode::to_unicode_titlecase(0x03a9u), 0x03a9u); // "Ω" to "Ω"
  52. // Code points encoded by ranges in UnicodeData.txt
  53. EXPECT_EQ(Unicode::to_unicode_titlecase(0x3400u), 0x3400u);
  54. EXPECT_EQ(Unicode::to_unicode_titlecase(0x3401u), 0x3401u);
  55. EXPECT_EQ(Unicode::to_unicode_titlecase(0x3402u), 0x3402u);
  56. EXPECT_EQ(Unicode::to_unicode_titlecase(0x4dbfu), 0x4dbfu);
  57. // Code points whose uppercase and titlecase mappings actually differ.
  58. EXPECT_EQ(Unicode::to_unicode_titlecase(0x01c6u), 0x01c5u); // "dž" to "Dž"
  59. EXPECT_EQ(Unicode::to_unicode_titlecase(0x01c9u), 0x01c8u); // "lj" to "Lj"
  60. EXPECT_EQ(Unicode::to_unicode_titlecase(0x01ccu), 0x01cbu); // "nj" to "Nj"
  61. EXPECT_EQ(Unicode::to_unicode_titlecase(0x01f3u), 0x01f2u); // "dz" to "Dz"
  62. EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(""sv)), ""sv);
  63. EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" "sv)), " "sv);
  64. EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" - "sv)), " - "sv);
  65. EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("a"sv)), "A"sv);
  66. EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("A"sv)), "A"sv);
  67. EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" a"sv)), " A"sv);
  68. EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("a "sv)), "A "sv);
  69. EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("ab"sv)), "Ab"sv);
  70. EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("Ab"sv)), "Ab"sv);
  71. EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("aB"sv)), "Ab"sv);
  72. EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("AB"sv)), "Ab"sv);
  73. EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full(" ab"sv)), " Ab"sv);
  74. EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("ab "sv)), "Ab "sv);
  75. EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("foo bar baz"sv)), "Foo Bar Baz"sv);
  76. EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("foo \n \r bar \t baz"sv)), "Foo \n \r Bar \t Baz"sv);
  77. EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("f\"oo\" b'ar'"sv)), "F\"Oo\" B'ar'"sv);
  78. EXPECT_EQ(MUST(Unicode::to_unicode_titlecase_full("123dollars"sv)), "123Dollars"sv);
  79. }
  80. TEST_CASE(to_unicode_casefold)
  81. {
  82. for (u8 code_point = 0; code_point < 0x80; ++code_point) {
  83. auto ascii = tolower(code_point);
  84. auto unicode = MUST(Unicode::to_unicode_casefold_full({ reinterpret_cast<char const*>(&code_point), 1 }));
  85. EXPECT_EQ(unicode.bytes_as_string_view().length(), 1u);
  86. EXPECT_EQ(unicode.bytes_as_string_view()[0], ascii);
  87. }
  88. // LATIN SMALL LETTER SHARP S
  89. auto result = MUST(Unicode::to_unicode_casefold_full("\u00DF"sv));
  90. EXPECT_EQ(result, "\u0073\u0073"sv);
  91. // GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
  92. result = MUST(Unicode::to_unicode_casefold_full("\u1FB3"sv));
  93. EXPECT_EQ(result, "\u03B1\u03B9"sv);
  94. // GREEK SMALL LETTER ALPHA WITH PERISPOMENI
  95. result = MUST(Unicode::to_unicode_casefold_full("\u1FB6"sv));
  96. EXPECT_EQ(result, "\u03B1\u0342"sv);
  97. // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
  98. result = MUST(Unicode::to_unicode_casefold_full("\u1FB7"sv));
  99. EXPECT_EQ(result, "\u03B1\u0342\u03B9"sv);
  100. }
  101. BENCHMARK_CASE(casing)
  102. {
  103. for (size_t i = 0; i < 50'000; ++i) {
  104. __test_to_unicode_lowercase();
  105. __test_to_unicode_uppercase();
  106. __test_to_unicode_titlecase();
  107. __test_to_unicode_casefold();
  108. }
  109. }
  110. TEST_CASE(to_unicode_lowercase_unconditional_special_casing)
  111. {
  112. // LATIN SMALL LETTER SHARP S
  113. auto result = MUST(Unicode::to_unicode_lowercase_full("\u00DF"sv));
  114. EXPECT_EQ(result, "\u00DF");
  115. // LATIN CAPITAL LETTER I WITH DOT ABOVE
  116. result = MUST(Unicode::to_unicode_lowercase_full("\u0130"sv));
  117. EXPECT_EQ(result, "\u0069\u0307");
  118. // LATIN SMALL LIGATURE FF
  119. result = MUST(Unicode::to_unicode_lowercase_full("\uFB00"sv));
  120. EXPECT_EQ(result, "\uFB00");
  121. // LATIN SMALL LIGATURE FI
  122. result = MUST(Unicode::to_unicode_lowercase_full("\uFB01"sv));
  123. EXPECT_EQ(result, "\uFB01");
  124. // LATIN SMALL LIGATURE FL
  125. result = MUST(Unicode::to_unicode_lowercase_full("\uFB02"sv));
  126. EXPECT_EQ(result, "\uFB02");
  127. // LATIN SMALL LIGATURE FFI
  128. result = MUST(Unicode::to_unicode_lowercase_full("\uFB03"sv));
  129. EXPECT_EQ(result, "\uFB03");
  130. // LATIN SMALL LIGATURE FFL
  131. result = MUST(Unicode::to_unicode_lowercase_full("\uFB04"sv));
  132. EXPECT_EQ(result, "\uFB04");
  133. // LATIN SMALL LIGATURE LONG S T
  134. result = MUST(Unicode::to_unicode_lowercase_full("\uFB05"sv));
  135. EXPECT_EQ(result, "\uFB05");
  136. // LATIN SMALL LIGATURE ST
  137. result = MUST(Unicode::to_unicode_lowercase_full("\uFB06"sv));
  138. EXPECT_EQ(result, "\uFB06");
  139. // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
  140. result = MUST(Unicode::to_unicode_lowercase_full("\u1FB7"sv));
  141. EXPECT_EQ(result, "\u1FB7");
  142. // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
  143. result = MUST(Unicode::to_unicode_lowercase_full("\u1FC7"sv));
  144. EXPECT_EQ(result, "\u1FC7");
  145. // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
  146. result = MUST(Unicode::to_unicode_lowercase_full("\u1FF7"sv));
  147. EXPECT_EQ(result, "\u1FF7");
  148. }
  149. TEST_CASE(to_unicode_lowercase_special_casing_sigma)
  150. {
  151. auto result = MUST(Unicode::to_unicode_lowercase_full("ABCI"sv));
  152. EXPECT_EQ(result, "abci");
  153. // Sigma preceded by A
  154. result = MUST(Unicode::to_unicode_lowercase_full("A\u03A3"sv));
  155. EXPECT_EQ(result, "a\u03C2");
  156. // Sigma preceded by FEMININE ORDINAL INDICATOR
  157. result = MUST(Unicode::to_unicode_lowercase_full("\u00AA\u03A3"sv));
  158. EXPECT_EQ(result, "\u00AA\u03C2");
  159. // Sigma preceded by ROMAN NUMERAL ONE
  160. result = MUST(Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv));
  161. EXPECT_EQ(result, "\u2170\u03C2");
  162. // Sigma preceded by COMBINING GREEK YPOGEGRAMMENI
  163. result = MUST(Unicode::to_unicode_lowercase_full("\u0345\u03A3"sv));
  164. EXPECT_EQ(result, "\u0345\u03C3");
  165. // Sigma preceded by A and FULL STOP
  166. result = MUST(Unicode::to_unicode_lowercase_full("A.\u03A3"sv));
  167. EXPECT_EQ(result, "a.\u03C2");
  168. // Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR
  169. result = MUST(Unicode::to_unicode_lowercase_full("A\u180E\u03A3"sv));
  170. EXPECT_EQ(result, "a\u180E\u03C2");
  171. // Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by B
  172. result = MUST(Unicode::to_unicode_lowercase_full("A\u180E\u03A3B"sv));
  173. EXPECT_EQ(result, "a\u180E\u03C3b");
  174. // Sigma followed by A
  175. result = MUST(Unicode::to_unicode_lowercase_full("\u03A3A"sv));
  176. EXPECT_EQ(result, "\u03C3a");
  177. // Sigma preceded by A, followed by MONGOLIAN VOWEL SEPARATOR
  178. result = MUST(Unicode::to_unicode_lowercase_full("A\u03A3\u180E"sv));
  179. EXPECT_EQ(result, "a\u03C2\u180E");
  180. // Sigma preceded by A, followed by MONGOLIAN VOWEL SEPARATOR and B
  181. result = MUST(Unicode::to_unicode_lowercase_full("A\u03A3\u180EB"sv));
  182. EXPECT_EQ(result, "a\u03C3\u180Eb");
  183. // Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by MONGOLIAN VOWEL SEPARATOR
  184. result = MUST(Unicode::to_unicode_lowercase_full("A\u180E\u03A3\u180E"sv));
  185. EXPECT_EQ(result, "a\u180E\u03C2\u180E");
  186. // Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by MONGOLIAN VOWEL SEPARATOR and B
  187. result = MUST(Unicode::to_unicode_lowercase_full("A\u180E\u03A3\u180EB"sv));
  188. EXPECT_EQ(result, "a\u180E\u03C3\u180Eb");
  189. }
  190. TEST_CASE(to_unicode_lowercase_special_casing_i)
  191. {
  192. // LATIN CAPITAL LETTER I
  193. auto result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "en"sv));
  194. EXPECT_EQ(result, "i"sv);
  195. result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "az"sv));
  196. EXPECT_EQ(result, "\u0131"sv);
  197. result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "tr"sv));
  198. EXPECT_EQ(result, "\u0131"sv);
  199. // LATIN CAPITAL LETTER I WITH DOT ABOVE
  200. result = MUST(Unicode::to_unicode_lowercase_full("\u0130"sv, "en"sv));
  201. EXPECT_EQ(result, "\u0069\u0307"sv);
  202. result = MUST(Unicode::to_unicode_lowercase_full("\u0130"sv, "az"sv));
  203. EXPECT_EQ(result, "i"sv);
  204. result = MUST(Unicode::to_unicode_lowercase_full("\u0130"sv, "tr"sv));
  205. EXPECT_EQ(result, "i"sv);
  206. // LATIN CAPITAL LETTER I followed by COMBINING DOT ABOVE
  207. result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "en"sv));
  208. EXPECT_EQ(result, "i\u0307"sv);
  209. result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "az"sv));
  210. EXPECT_EQ(result, "i"sv);
  211. result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "tr"sv));
  212. EXPECT_EQ(result, "i"sv);
  213. // LATIN CAPITAL LETTER I followed by combining class 0 and COMBINING DOT ABOVE
  214. result = MUST(Unicode::to_unicode_lowercase_full("IA\u0307"sv, "en"sv));
  215. EXPECT_EQ(result, "ia\u0307"sv);
  216. result = MUST(Unicode::to_unicode_lowercase_full("IA\u0307"sv, "az"sv));
  217. EXPECT_EQ(result, "\u0131a\u0307"sv);
  218. result = MUST(Unicode::to_unicode_lowercase_full("IA\u0307"sv, "tr"sv));
  219. EXPECT_EQ(result, "\u0131a\u0307"sv);
  220. }
  221. TEST_CASE(to_unicode_lowercase_special_casing_more_above)
  222. {
  223. // LATIN CAPITAL LETTER I
  224. auto result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "en"sv));
  225. EXPECT_EQ(result, "i"sv);
  226. result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "lt"sv));
  227. EXPECT_EQ(result, "i"sv);
  228. // LATIN CAPITAL LETTER J
  229. result = MUST(Unicode::to_unicode_lowercase_full("J"sv, "en"sv));
  230. EXPECT_EQ(result, "j"sv);
  231. result = MUST(Unicode::to_unicode_lowercase_full("J"sv, "lt"sv));
  232. EXPECT_EQ(result, "j"sv);
  233. // LATIN CAPITAL LETTER I WITH OGONEK
  234. result = MUST(Unicode::to_unicode_lowercase_full("\u012e"sv, "en"sv));
  235. EXPECT_EQ(result, "\u012f"sv);
  236. result = MUST(Unicode::to_unicode_lowercase_full("\u012e"sv, "lt"sv));
  237. EXPECT_EQ(result, "\u012f"sv);
  238. // LATIN CAPITAL LETTER I followed by COMBINING GRAVE ACCENT
  239. result = MUST(Unicode::to_unicode_lowercase_full("I\u0300"sv, "en"sv));
  240. EXPECT_EQ(result, "i\u0300"sv);
  241. result = MUST(Unicode::to_unicode_lowercase_full("I\u0300"sv, "lt"sv));
  242. EXPECT_EQ(result, "i\u0307\u0300"sv);
  243. // LATIN CAPITAL LETTER J followed by COMBINING GRAVE ACCENT
  244. result = MUST(Unicode::to_unicode_lowercase_full("J\u0300"sv, "en"sv));
  245. EXPECT_EQ(result, "j\u0300"sv);
  246. result = MUST(Unicode::to_unicode_lowercase_full("J\u0300"sv, "lt"sv));
  247. EXPECT_EQ(result, "j\u0307\u0300"sv);
  248. // LATIN CAPITAL LETTER I WITH OGONEK followed by COMBINING GRAVE ACCENT
  249. result = MUST(Unicode::to_unicode_lowercase_full("\u012e\u0300"sv, "en"sv));
  250. EXPECT_EQ(result, "\u012f\u0300"sv);
  251. result = MUST(Unicode::to_unicode_lowercase_full("\u012e\u0300"sv, "lt"sv));
  252. EXPECT_EQ(result, "\u012f\u0307\u0300"sv);
  253. }
  254. TEST_CASE(to_unicode_lowercase_special_casing_not_before_dot)
  255. {
  256. // LATIN CAPITAL LETTER I
  257. auto result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "en"sv));
  258. EXPECT_EQ(result, "i"sv);
  259. result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "az"sv));
  260. EXPECT_EQ(result, "\u0131"sv);
  261. result = MUST(Unicode::to_unicode_lowercase_full("I"sv, "tr"sv));
  262. EXPECT_EQ(result, "\u0131"sv);
  263. // LATIN CAPITAL LETTER I followed by COMBINING DOT ABOVE
  264. result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "en"sv));
  265. EXPECT_EQ(result, "i\u0307"sv);
  266. result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "az"sv));
  267. EXPECT_EQ(result, "i"sv);
  268. result = MUST(Unicode::to_unicode_lowercase_full("I\u0307"sv, "tr"sv));
  269. EXPECT_EQ(result, "i"sv);
  270. }
  271. TEST_CASE(to_unicode_uppercase_unconditional_special_casing)
  272. {
  273. // LATIN SMALL LETTER SHARP S
  274. auto result = MUST(Unicode::to_unicode_uppercase_full("\u00DF"sv));
  275. EXPECT_EQ(result, "\u0053\u0053");
  276. // LATIN CAPITAL LETTER I WITH DOT ABOVE
  277. result = MUST(Unicode::to_unicode_uppercase_full("\u0130"sv));
  278. EXPECT_EQ(result, "\u0130");
  279. // LATIN SMALL LIGATURE FF
  280. result = MUST(Unicode::to_unicode_uppercase_full("\uFB00"sv));
  281. EXPECT_EQ(result, "\u0046\u0046");
  282. // LATIN SMALL LIGATURE FI
  283. result = MUST(Unicode::to_unicode_uppercase_full("\uFB01"sv));
  284. EXPECT_EQ(result, "\u0046\u0049");
  285. // LATIN SMALL LIGATURE FL
  286. result = MUST(Unicode::to_unicode_uppercase_full("\uFB02"sv));
  287. EXPECT_EQ(result, "\u0046\u004C");
  288. // LATIN SMALL LIGATURE FFI
  289. result = MUST(Unicode::to_unicode_uppercase_full("\uFB03"sv));
  290. EXPECT_EQ(result, "\u0046\u0046\u0049");
  291. // LATIN SMALL LIGATURE FFL
  292. result = MUST(Unicode::to_unicode_uppercase_full("\uFB04"sv));
  293. EXPECT_EQ(result, "\u0046\u0046\u004C");
  294. // LATIN SMALL LIGATURE LONG S T
  295. result = MUST(Unicode::to_unicode_uppercase_full("\uFB05"sv));
  296. EXPECT_EQ(result, "\u0053\u0054");
  297. // LATIN SMALL LIGATURE ST
  298. result = MUST(Unicode::to_unicode_uppercase_full("\uFB06"sv));
  299. EXPECT_EQ(result, "\u0053\u0054");
  300. // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
  301. result = MUST(Unicode::to_unicode_uppercase_full("\u0390"sv));
  302. EXPECT_EQ(result, "\u0399\u0308\u0301");
  303. // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
  304. result = MUST(Unicode::to_unicode_uppercase_full("\u03B0"sv));
  305. EXPECT_EQ(result, "\u03A5\u0308\u0301");
  306. // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
  307. result = MUST(Unicode::to_unicode_uppercase_full("\u1FB7"sv));
  308. EXPECT_EQ(result, "\u0391\u0342\u0399");
  309. // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
  310. result = MUST(Unicode::to_unicode_uppercase_full("\u1FC7"sv));
  311. EXPECT_EQ(result, "\u0397\u0342\u0399");
  312. // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
  313. result = MUST(Unicode::to_unicode_uppercase_full("\u1FF7"sv));
  314. EXPECT_EQ(result, "\u03A9\u0342\u0399");
  315. }
  316. TEST_CASE(to_unicode_uppercase_special_casing_soft_dotted)
  317. {
  318. // LATIN SMALL LETTER I
  319. auto result = MUST(Unicode::to_unicode_uppercase_full("i"sv, "en"sv));
  320. EXPECT_EQ(result, "I"sv);
  321. result = MUST(Unicode::to_unicode_uppercase_full("i"sv, "lt"sv));
  322. EXPECT_EQ(result, "I"sv);
  323. // LATIN SMALL LETTER J
  324. result = MUST(Unicode::to_unicode_uppercase_full("j"sv, "en"sv));
  325. EXPECT_EQ(result, "J"sv);
  326. result = MUST(Unicode::to_unicode_uppercase_full("j"sv, "lt"sv));
  327. EXPECT_EQ(result, "J"sv);
  328. // LATIN SMALL LETTER I followed by COMBINING DOT ABOVE
  329. result = MUST(Unicode::to_unicode_uppercase_full("i\u0307"sv, "en"sv));
  330. EXPECT_EQ(result, "I\u0307"sv);
  331. result = MUST(Unicode::to_unicode_uppercase_full("i\u0307"sv, "lt"sv));
  332. EXPECT_EQ(result, "I"sv);
  333. // LATIN SMALL LETTER J followed by COMBINING DOT ABOVE
  334. result = MUST(Unicode::to_unicode_uppercase_full("j\u0307"sv, "en"sv));
  335. EXPECT_EQ(result, "J\u0307"sv);
  336. result = MUST(Unicode::to_unicode_uppercase_full("j\u0307"sv, "lt"sv));
  337. EXPECT_EQ(result, "J"sv);
  338. }
  339. TEST_CASE(to_unicode_titlecase_unconditional_special_casing)
  340. {
  341. // LATIN SMALL LETTER SHARP S
  342. auto result = MUST(Unicode::to_unicode_titlecase_full("\u00DF"sv));
  343. EXPECT_EQ(result, "\u0053\u0073"sv);
  344. // LATIN CAPITAL LETTER I WITH DOT ABOVE
  345. result = MUST(Unicode::to_unicode_titlecase_full("\u0130"sv));
  346. EXPECT_EQ(result, "\u0130"sv);
  347. // LATIN SMALL LIGATURE FF
  348. result = MUST(Unicode::to_unicode_titlecase_full("\uFB00"sv));
  349. EXPECT_EQ(result, "\u0046\u0066"sv);
  350. // LATIN SMALL LIGATURE FI
  351. result = MUST(Unicode::to_unicode_titlecase_full("\uFB01"sv));
  352. EXPECT_EQ(result, "\u0046\u0069"sv);
  353. // LATIN SMALL LIGATURE FL
  354. result = MUST(Unicode::to_unicode_titlecase_full("\uFB02"sv));
  355. EXPECT_EQ(result, "\u0046\u006C"sv);
  356. // LATIN SMALL LIGATURE FFI
  357. result = MUST(Unicode::to_unicode_titlecase_full("\uFB03"sv));
  358. EXPECT_EQ(result, "\u0046\u0066\u0069"sv);
  359. // LATIN SMALL LIGATURE FFL
  360. result = MUST(Unicode::to_unicode_titlecase_full("\uFB04"sv));
  361. EXPECT_EQ(result, "\u0046\u0066\u006C"sv);
  362. // LATIN SMALL LIGATURE LONG S T
  363. result = MUST(Unicode::to_unicode_titlecase_full("\uFB05"sv));
  364. EXPECT_EQ(result, "\u0053\u0074"sv);
  365. // LATIN SMALL LIGATURE ST
  366. result = MUST(Unicode::to_unicode_titlecase_full("\uFB06"sv));
  367. EXPECT_EQ(result, "\u0053\u0074"sv);
  368. // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
  369. result = MUST(Unicode::to_unicode_titlecase_full("\u0390"sv));
  370. EXPECT_EQ(result, "\u0399\u0308\u0301"sv);
  371. // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
  372. result = MUST(Unicode::to_unicode_titlecase_full("\u03B0"sv));
  373. EXPECT_EQ(result, "\u03A5\u0308\u0301"sv);
  374. // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
  375. result = MUST(Unicode::to_unicode_titlecase_full("\u1FB7"sv));
  376. EXPECT_EQ(result, "\u0391\u0342\u0345"sv);
  377. // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
  378. result = MUST(Unicode::to_unicode_titlecase_full("\u1FC7"sv));
  379. EXPECT_EQ(result, "\u0397\u0342\u0345"sv);
  380. // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
  381. result = MUST(Unicode::to_unicode_titlecase_full("\u1FF7"sv));
  382. EXPECT_EQ(result, "\u03A9\u0342\u0345"sv);
  383. }
  384. TEST_CASE(to_unicode_titlecase_special_casing_i)
  385. {
  386. // LATIN SMALL LETTER I
  387. auto result = MUST(Unicode::to_unicode_titlecase_full("i"sv, "en"sv));
  388. EXPECT_EQ(result, "I"sv);
  389. result = MUST(Unicode::to_unicode_titlecase_full("i"sv, "az"sv));
  390. EXPECT_EQ(result, "\u0130"sv);
  391. result = MUST(Unicode::to_unicode_titlecase_full("i"sv, "tr"sv));
  392. EXPECT_EQ(result, "\u0130"sv);
  393. }
  394. TEST_CASE(general_category)
  395. {
  396. auto general_category = [](StringView name) {
  397. auto general_category = Unicode::general_category_from_string(name);
  398. VERIFY(general_category.has_value());
  399. return *general_category;
  400. };
  401. auto general_category_c = general_category("C"sv);
  402. auto general_category_other = general_category("Other"sv);
  403. EXPECT_EQ(general_category_c, general_category_other);
  404. auto general_category_cc = general_category("Cc"sv);
  405. auto general_category_control = general_category("Control"sv);
  406. EXPECT_EQ(general_category_cc, general_category_control);
  407. auto general_category_co = general_category("Co"sv);
  408. auto general_category_private_use = general_category("Private_Use"sv);
  409. EXPECT_EQ(general_category_co, general_category_private_use);
  410. auto general_category_cn = general_category("Cn"sv);
  411. auto general_category_unassigned = general_category("Unassigned"sv);
  412. EXPECT_EQ(general_category_cn, general_category_unassigned);
  413. auto general_category_lc = general_category("LC"sv);
  414. auto general_category_cased_letter = general_category("Cased_Letter"sv);
  415. EXPECT_EQ(general_category_lc, general_category_cased_letter);
  416. auto general_category_ll = general_category("Ll"sv);
  417. auto general_category_lowercase_letter = general_category("Lowercase_Letter"sv);
  418. EXPECT_EQ(general_category_ll, general_category_lowercase_letter);
  419. auto general_category_lu = general_category("Lu"sv);
  420. auto general_category_uppercase_letter = general_category("Uppercase_Letter"sv);
  421. EXPECT_EQ(general_category_lu, general_category_uppercase_letter);
  422. for (u32 code_point = 0; code_point <= 0x1f; ++code_point) {
  423. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_c));
  424. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_cc));
  425. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_co));
  426. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cn));
  427. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lc));
  428. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_ll));
  429. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lu));
  430. }
  431. for (u32 code_point = 0xe000; code_point <= 0xe100; ++code_point) {
  432. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_c));
  433. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_co));
  434. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cc));
  435. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cn));
  436. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lc));
  437. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_ll));
  438. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lu));
  439. }
  440. for (u32 code_point = 0x101fe; code_point <= 0x1027f; ++code_point) {
  441. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_c));
  442. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_cn));
  443. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cc));
  444. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_co));
  445. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lc));
  446. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_ll));
  447. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lu));
  448. }
  449. for (u32 code_point = 0x61; code_point <= 0x7a; ++code_point) {
  450. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_lc));
  451. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_ll));
  452. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_c));
  453. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cc));
  454. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_co));
  455. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cn));
  456. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lu));
  457. }
  458. for (u32 code_point = 0x41; code_point <= 0x5a; ++code_point) {
  459. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_lc));
  460. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_lu));
  461. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_c));
  462. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cc));
  463. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_co));
  464. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cn));
  465. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_ll));
  466. }
  467. }
  468. BENCHMARK_CASE(general_category_performance)
  469. {
  470. auto general_category_cased_letter = Unicode::general_category_from_string("Cased_Letter"sv).value();
  471. for (size_t i = 0; i < 1'000'000; ++i) {
  472. for (u32 code_point = 0; code_point <= 0x1f; ++code_point)
  473. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cased_letter));
  474. for (u32 code_point = 0x41; code_point <= 0x5a; ++code_point)
  475. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_cased_letter));
  476. for (u32 code_point = 0x61; code_point <= 0x7a; ++code_point)
  477. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_cased_letter));
  478. for (u32 code_point = 0xe000; code_point <= 0xe100; ++code_point)
  479. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cased_letter));
  480. for (u32 code_point = 0x101fe; code_point <= 0x1027f; ++code_point)
  481. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cased_letter));
  482. }
  483. }
  484. TEST_CASE(property)
  485. {
  486. auto property = [](StringView name) {
  487. auto property = Unicode::property_from_string(name);
  488. VERIFY(property.has_value());
  489. return *property;
  490. };
  491. auto property_any = property("Any"sv);
  492. auto property_assigned = property("Assigned"sv);
  493. auto property_ascii = property("ASCII"sv);
  494. auto property_white_space = property("White_Space"sv);
  495. auto property_wspace = property("WSpace"sv);
  496. auto property_space = property("space"sv);
  497. EXPECT_EQ(property_white_space, property_wspace);
  498. EXPECT_EQ(property_white_space, property_space);
  499. auto property_emoji_presentation = property("Emoji_Presentation"sv);
  500. auto property_epres = property("EPres"sv);
  501. EXPECT_EQ(property_emoji_presentation, property_epres);
  502. for (u32 code_point = 0; code_point <= 0x10ffff; code_point += 1000)
  503. EXPECT(Unicode::code_point_has_property(code_point, property_any));
  504. for (u32 code_point = 0x101d0; code_point <= 0x101fd; ++code_point) {
  505. EXPECT(Unicode::code_point_has_property(code_point, property_any));
  506. EXPECT(Unicode::code_point_has_property(code_point, property_assigned));
  507. EXPECT(!Unicode::code_point_has_property(code_point, property_ascii));
  508. EXPECT(!Unicode::code_point_has_property(code_point, property_white_space));
  509. EXPECT(!Unicode::code_point_has_property(code_point, property_emoji_presentation));
  510. }
  511. for (u32 code_point = 0x101fe; code_point <= 0x1027f; ++code_point) {
  512. EXPECT(Unicode::code_point_has_property(code_point, property_any));
  513. EXPECT(!Unicode::code_point_has_property(code_point, property_assigned));
  514. EXPECT(!Unicode::code_point_has_property(code_point, property_ascii));
  515. EXPECT(!Unicode::code_point_has_property(code_point, property_white_space));
  516. EXPECT(!Unicode::code_point_has_property(code_point, property_emoji_presentation));
  517. }
  518. for (u32 code_point = 0; code_point <= 0x7f; ++code_point) {
  519. EXPECT(Unicode::code_point_has_property(code_point, property_any));
  520. EXPECT(Unicode::code_point_has_property(code_point, property_assigned));
  521. EXPECT(Unicode::code_point_has_property(code_point, property_ascii));
  522. EXPECT(!Unicode::code_point_has_property(code_point, property_emoji_presentation));
  523. }
  524. for (u32 code_point = 0x9; code_point <= 0xd; ++code_point) {
  525. EXPECT(Unicode::code_point_has_property(code_point, property_any));
  526. EXPECT(Unicode::code_point_has_property(code_point, property_assigned));
  527. EXPECT(Unicode::code_point_has_property(code_point, property_ascii));
  528. EXPECT(Unicode::code_point_has_property(code_point, property_white_space));
  529. EXPECT(!Unicode::code_point_has_property(code_point, property_emoji_presentation));
  530. }
  531. for (u32 code_point = 0x1f3e5; code_point <= 0x1f3f0; ++code_point) {
  532. EXPECT(Unicode::code_point_has_property(code_point, property_any));
  533. EXPECT(Unicode::code_point_has_property(code_point, property_assigned));
  534. EXPECT(Unicode::code_point_has_property(code_point, property_emoji_presentation));
  535. EXPECT(!Unicode::code_point_has_property(code_point, property_ascii));
  536. EXPECT(!Unicode::code_point_has_property(code_point, property_white_space));
  537. }
  538. }
  539. TEST_CASE(script)
  540. {
  541. auto script = [](StringView name) {
  542. auto script = Unicode::script_from_string(name);
  543. VERIFY(script.has_value());
  544. return *script;
  545. };
  546. auto script_latin = script("Latin"sv);
  547. auto script_latn = script("Latn"sv);
  548. EXPECT_EQ(script_latin, script_latn);
  549. auto script_cyrillic = script("Cyrillic"sv);
  550. auto script_cyrl = script("Cyrl"sv);
  551. EXPECT_EQ(script_cyrillic, script_cyrl);
  552. auto script_greek = script("Greek"sv);
  553. auto script_grek = script("Grek"sv);
  554. EXPECT_EQ(script_greek, script_grek);
  555. for (u32 code_point = 0x41; code_point <= 0x5a; ++code_point) {
  556. EXPECT(Unicode::code_point_has_script(code_point, script_latin));
  557. EXPECT(Unicode::code_point_has_script_extension(code_point, script_latin));
  558. EXPECT(!Unicode::code_point_has_script(code_point, script_cyrillic));
  559. EXPECT(!Unicode::code_point_has_script(code_point, script_greek));
  560. }
  561. for (u32 code_point = 0x61; code_point <= 0x7a; ++code_point) {
  562. EXPECT(Unicode::code_point_has_script(code_point, script_latin));
  563. EXPECT(Unicode::code_point_has_script_extension(code_point, script_latin));
  564. EXPECT(!Unicode::code_point_has_script(code_point, script_cyrillic));
  565. EXPECT(!Unicode::code_point_has_script(code_point, script_greek));
  566. }
  567. for (u32 code_point = 0x400; code_point <= 0x481; ++code_point) {
  568. EXPECT(Unicode::code_point_has_script(code_point, script_cyrillic));
  569. EXPECT(Unicode::code_point_has_script_extension(code_point, script_cyrillic));
  570. EXPECT(!Unicode::code_point_has_script(code_point, script_latin));
  571. EXPECT(!Unicode::code_point_has_script(code_point, script_greek));
  572. }
  573. for (u32 code_point = 0x400; code_point <= 0x481; ++code_point) {
  574. EXPECT(Unicode::code_point_has_script(code_point, script_cyrillic));
  575. EXPECT(Unicode::code_point_has_script_extension(code_point, script_cyrillic));
  576. EXPECT(!Unicode::code_point_has_script(code_point, script_latin));
  577. EXPECT(!Unicode::code_point_has_script(code_point, script_greek));
  578. }
  579. for (u32 code_point = 0x1f80; code_point <= 0x1fb4; ++code_point) {
  580. EXPECT(Unicode::code_point_has_script(code_point, script_greek));
  581. EXPECT(Unicode::code_point_has_script_extension(code_point, script_greek));
  582. EXPECT(!Unicode::code_point_has_script(code_point, script_latin));
  583. EXPECT(!Unicode::code_point_has_script(code_point, script_cyrillic));
  584. }
  585. }
  586. TEST_CASE(block)
  587. {
  588. for (u32 code_point = 0x0000; code_point <= 0x007F; ++code_point)
  589. EXPECT_EQ("Basic Latin"sv, Unicode::code_point_block_display_name(code_point).value());
  590. for (u32 code_point = 0x0370; code_point <= 0x03FF; ++code_point)
  591. EXPECT_EQ("Greek and Coptic"sv, Unicode::code_point_block_display_name(code_point).value());
  592. }
  593. TEST_CASE(script_extension)
  594. {
  595. auto script = [](StringView name) {
  596. auto script = Unicode::script_from_string(name);
  597. VERIFY(script.has_value());
  598. return *script;
  599. };
  600. auto script_latin = script("Latin"sv);
  601. auto script_greek = script("Greek"sv);
  602. for (u32 code_point = 0x363; code_point <= 0x36f; ++code_point) {
  603. EXPECT(!Unicode::code_point_has_script(code_point, script_latin));
  604. EXPECT(Unicode::code_point_has_script_extension(code_point, script_latin));
  605. }
  606. EXPECT(!Unicode::code_point_has_script(0x342, script_greek));
  607. EXPECT(Unicode::code_point_has_script_extension(0x342, script_greek));
  608. EXPECT(!Unicode::code_point_has_script(0x345, script_greek));
  609. EXPECT(Unicode::code_point_has_script_extension(0x345, script_greek));
  610. EXPECT(!Unicode::code_point_has_script(0x1dc0, script_greek));
  611. EXPECT(Unicode::code_point_has_script_extension(0x1dc0, script_greek));
  612. EXPECT(!Unicode::code_point_has_script(0x1dc1, script_greek));
  613. EXPECT(Unicode::code_point_has_script_extension(0x1dc1, script_greek));
  614. auto script_common = script("Common"sv);
  615. auto script_zyyy = script("Zyyy"sv);
  616. EXPECT_EQ(script_common, script_zyyy);
  617. EXPECT(Unicode::code_point_has_script(0x202f, script_common));
  618. EXPECT(!Unicode::code_point_has_script_extension(0x202f, script_common));
  619. EXPECT(Unicode::code_point_has_script(0x3000, script_common));
  620. EXPECT(Unicode::code_point_has_script_extension(0x3000, script_common));
  621. auto script_inherited = script("Inherited"sv);
  622. auto script_qaai = script("Qaai"sv);
  623. auto script_zinh = script("Zinh"sv);
  624. EXPECT_EQ(script_inherited, script_qaai);
  625. EXPECT_EQ(script_inherited, script_zinh);
  626. EXPECT(Unicode::code_point_has_script(0x1ced, script_inherited));
  627. EXPECT(!Unicode::code_point_has_script_extension(0x1ced, script_inherited));
  628. EXPECT(Unicode::code_point_has_script(0x101fd, script_inherited));
  629. EXPECT(Unicode::code_point_has_script_extension(0x101fd, script_inherited));
  630. }
  631. TEST_CASE(code_point_display_name)
  632. {
  633. auto code_point_display_name = [](u32 code_point) {
  634. auto name = Unicode::code_point_display_name(code_point);
  635. VERIFY(name.has_value());
  636. return name.release_value();
  637. };
  638. // Control code points.
  639. EXPECT_EQ(code_point_display_name(0), "NULL"sv);
  640. EXPECT_EQ(code_point_display_name(1), "START OF HEADING"sv);
  641. EXPECT_EQ(code_point_display_name(0xa), "LINE FEED"sv);
  642. // Ideographic code points (which already appeared in a range in UnicodeData.txt).
  643. EXPECT_EQ(code_point_display_name(0x3400), "CJK UNIFIED IDEOGRAPH-3400"sv);
  644. EXPECT_EQ(code_point_display_name(0x3401), "CJK UNIFIED IDEOGRAPH-3401"sv);
  645. EXPECT_EQ(code_point_display_name(0x3402), "CJK UNIFIED IDEOGRAPH-3402"sv);
  646. EXPECT_EQ(code_point_display_name(0x4dbf), "CJK UNIFIED IDEOGRAPH-4DBF"sv);
  647. EXPECT_EQ(code_point_display_name(0x20000), "CJK UNIFIED IDEOGRAPH-20000"sv);
  648. EXPECT_EQ(code_point_display_name(0x20001), "CJK UNIFIED IDEOGRAPH-20001"sv);
  649. EXPECT_EQ(code_point_display_name(0x20002), "CJK UNIFIED IDEOGRAPH-20002"sv);
  650. EXPECT_EQ(code_point_display_name(0x2a6df), "CJK UNIFIED IDEOGRAPH-2A6DF"sv);
  651. EXPECT(!Unicode::code_point_display_name(0x2a6e0).has_value());
  652. // Ideographic code points (which appeared individually in UnicodeData.txt and were coalesced into a range).
  653. EXPECT_EQ(code_point_display_name(0x2f800), "CJK COMPATIBILITY IDEOGRAPH-2F800"sv);
  654. EXPECT_EQ(code_point_display_name(0x2f801), "CJK COMPATIBILITY IDEOGRAPH-2F801"sv);
  655. EXPECT_EQ(code_point_display_name(0x2f802), "CJK COMPATIBILITY IDEOGRAPH-2F802"sv);
  656. EXPECT_EQ(code_point_display_name(0x2fa1d), "CJK COMPATIBILITY IDEOGRAPH-2FA1D"sv);
  657. }
  658. TEST_CASE(code_point_bidirectional_character_type)
  659. {
  660. auto code_point_bidi_class = [](u32 code_point) {
  661. auto bidi_class = Unicode::bidirectional_class(code_point);
  662. VERIFY(bidi_class.has_value());
  663. return bidi_class.release_value();
  664. };
  665. auto bidi_class_from_string = [](StringView name) {
  666. auto result = Unicode::bidirectional_class_from_string(name);
  667. VERIFY(result.has_value());
  668. return result.release_value();
  669. };
  670. // Left-to-right
  671. EXPECT_EQ(code_point_bidi_class('A'), bidi_class_from_string("L"sv));
  672. EXPECT_EQ(code_point_bidi_class('z'), bidi_class_from_string("L"sv));
  673. // European number
  674. EXPECT_EQ(code_point_bidi_class('7'), bidi_class_from_string("EN"sv));
  675. // Whitespace
  676. EXPECT_EQ(code_point_bidi_class(' '), bidi_class_from_string("WS"sv));
  677. // Arabic right-to-left (U+FEB4 ARABIC LETTER SEEN MEDIAL FORM)
  678. EXPECT_EQ(code_point_bidi_class(0xFEB4), bidi_class_from_string("AL"sv));
  679. }