TestUnicodeCharacterTypes.cpp 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
  1. /*
  2. * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <LibTest/TestCase.h>
  7. #include <AK/StringView.h>
  8. #include <LibUnicode/CharacterTypes.h>
  9. #include <ctype.h>
  10. static void compare_to_ascii(auto& old_function, auto& new_function)
  11. {
  12. i64 result1 = 0;
  13. i64 result2 = 0;
  14. for (u32 i = 0; i < 0x80; ++i) {
  15. EXPECT_EQ(result1 = old_function(i), result2 = new_function(i));
  16. if (result1 != result2)
  17. dbgln("Function input value was {}.", i);
  18. }
  19. }
  20. TEST_CASE(to_unicode_lowercase)
  21. {
  22. compare_to_ascii(tolower, Unicode::to_unicode_lowercase);
  23. EXPECT_EQ(Unicode::to_unicode_lowercase(0x03c9u), 0x03c9u); // "ω" to "ω"
  24. EXPECT_EQ(Unicode::to_unicode_lowercase(0x03a9u), 0x03c9u); // "Ω" to "ω"
  25. // Code points encoded by ranges in UnicodeData.txt
  26. EXPECT_EQ(Unicode::to_unicode_lowercase(0x3400u), 0x3400u);
  27. EXPECT_EQ(Unicode::to_unicode_lowercase(0x3401u), 0x3401u);
  28. EXPECT_EQ(Unicode::to_unicode_lowercase(0x3402u), 0x3402u);
  29. EXPECT_EQ(Unicode::to_unicode_lowercase(0x4dbfu), 0x4dbfu);
  30. }
  31. TEST_CASE(to_unicode_uppercase)
  32. {
  33. compare_to_ascii(toupper, Unicode::to_unicode_uppercase);
  34. EXPECT_EQ(Unicode::to_unicode_uppercase(0x03c9u), 0x03a9u); // "ω" to "Ω"
  35. EXPECT_EQ(Unicode::to_unicode_uppercase(0x03a9u), 0x03a9u); // "Ω" to "Ω"
  36. // Code points encoded by ranges in UnicodeData.txt
  37. EXPECT_EQ(Unicode::to_unicode_uppercase(0x3400u), 0x3400u);
  38. EXPECT_EQ(Unicode::to_unicode_uppercase(0x3401u), 0x3401u);
  39. EXPECT_EQ(Unicode::to_unicode_uppercase(0x3402u), 0x3402u);
  40. EXPECT_EQ(Unicode::to_unicode_uppercase(0x4dbfu), 0x4dbfu);
  41. }
  42. TEST_CASE(to_unicode_lowercase_unconditional_special_casing)
  43. {
  44. // LATIN SMALL LETTER SHARP S
  45. auto result = Unicode::to_unicode_lowercase_full("\u00DF"sv);
  46. EXPECT_EQ(result, "\u00DF");
  47. // LATIN CAPITAL LETTER I WITH DOT ABOVE
  48. result = Unicode::to_unicode_lowercase_full("\u0130"sv);
  49. EXPECT_EQ(result, "\u0069\u0307");
  50. // LATIN SMALL LIGATURE FF
  51. result = Unicode::to_unicode_lowercase_full("\uFB00"sv);
  52. EXPECT_EQ(result, "\uFB00");
  53. // LATIN SMALL LIGATURE FI
  54. result = Unicode::to_unicode_lowercase_full("\uFB01"sv);
  55. EXPECT_EQ(result, "\uFB01");
  56. // LATIN SMALL LIGATURE FL
  57. result = Unicode::to_unicode_lowercase_full("\uFB02"sv);
  58. EXPECT_EQ(result, "\uFB02");
  59. // LATIN SMALL LIGATURE FFI
  60. result = Unicode::to_unicode_lowercase_full("\uFB03"sv);
  61. EXPECT_EQ(result, "\uFB03");
  62. // LATIN SMALL LIGATURE FFL
  63. result = Unicode::to_unicode_lowercase_full("\uFB04"sv);
  64. EXPECT_EQ(result, "\uFB04");
  65. // LATIN SMALL LIGATURE LONG S T
  66. result = Unicode::to_unicode_lowercase_full("\uFB05"sv);
  67. EXPECT_EQ(result, "\uFB05");
  68. // LATIN SMALL LIGATURE ST
  69. result = Unicode::to_unicode_lowercase_full("\uFB06"sv);
  70. EXPECT_EQ(result, "\uFB06");
  71. // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
  72. result = Unicode::to_unicode_lowercase_full("\u1FB7"sv);
  73. EXPECT_EQ(result, "\u1FB7");
  74. // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
  75. result = Unicode::to_unicode_lowercase_full("\u1FC7"sv);
  76. EXPECT_EQ(result, "\u1FC7");
  77. // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
  78. result = Unicode::to_unicode_lowercase_full("\u1FF7"sv);
  79. EXPECT_EQ(result, "\u1FF7");
  80. }
  81. TEST_CASE(to_unicode_lowercase_special_casing_sigma)
  82. {
  83. auto result = Unicode::to_unicode_lowercase_full("ABCI"sv);
  84. EXPECT_EQ(result, "abci");
  85. // Sigma preceded by A
  86. result = Unicode::to_unicode_lowercase_full("A\u03A3"sv);
  87. EXPECT_EQ(result, "a\u03C2");
  88. // Sigma preceded by FEMININE ORDINAL INDICATOR
  89. result = Unicode::to_unicode_lowercase_full("\u00AA\u03A3"sv);
  90. EXPECT_EQ(result, "\u00AA\u03C2");
  91. // Sigma preceded by ROMAN NUMERAL ONE
  92. result = Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv);
  93. EXPECT_EQ(result, "\u2170\u03C2");
  94. // Sigma preceded by COMBINING GREEK YPOGEGRAMMENI
  95. result = Unicode::to_unicode_lowercase_full("\u0345\u03A3"sv);
  96. EXPECT_EQ(result, "\u0345\u03C3");
  97. // Sigma preceded by A and FULL STOP
  98. result = Unicode::to_unicode_lowercase_full("A.\u03A3"sv);
  99. EXPECT_EQ(result, "a.\u03C2");
  100. // Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR
  101. result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3"sv);
  102. EXPECT_EQ(result, "a\u180E\u03C2");
  103. // Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by B
  104. result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3B"sv);
  105. EXPECT_EQ(result, "a\u180E\u03C3b");
  106. // Sigma followed by A
  107. result = Unicode::to_unicode_lowercase_full("\u03A3A"sv);
  108. EXPECT_EQ(result, "\u03C3a");
  109. // Sigma preceded by A, followed by MONGOLIAN VOWEL SEPARATOR
  110. result = Unicode::to_unicode_lowercase_full("A\u03A3\u180E"sv);
  111. EXPECT_EQ(result, "a\u03C2\u180E");
  112. // Sigma preceded by A, followed by MONGOLIAN VOWEL SEPARATOR and B
  113. result = Unicode::to_unicode_lowercase_full("A\u03A3\u180EB"sv);
  114. EXPECT_EQ(result, "a\u03C3\u180Eb");
  115. // Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by MONGOLIAN VOWEL SEPARATOR
  116. result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3\u180E"sv);
  117. EXPECT_EQ(result, "a\u180E\u03C2\u180E");
  118. // Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR, followed by MONGOLIAN VOWEL SEPARATOR and B
  119. result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3\u180EB"sv);
  120. EXPECT_EQ(result, "a\u180E\u03C3\u180Eb");
  121. }
  122. TEST_CASE(to_unicode_lowercase_special_casing_i)
  123. {
  124. // LATIN CAPITAL LETTER I
  125. auto result = Unicode::to_unicode_lowercase_full("I"sv, "en"sv);
  126. EXPECT_EQ(result, "i"sv);
  127. result = Unicode::to_unicode_lowercase_full("I"sv, "az"sv);
  128. EXPECT_EQ(result, "\u0131"sv);
  129. result = Unicode::to_unicode_lowercase_full("I"sv, "tr"sv);
  130. EXPECT_EQ(result, "\u0131"sv);
  131. // LATIN CAPITAL LETTER I WITH DOT ABOVE
  132. result = Unicode::to_unicode_lowercase_full("\u0130"sv, "en"sv);
  133. EXPECT_EQ(result, "\u0069\u0307"sv);
  134. result = Unicode::to_unicode_lowercase_full("\u0130"sv, "az"sv);
  135. EXPECT_EQ(result, "i"sv);
  136. result = Unicode::to_unicode_lowercase_full("\u0130"sv, "tr"sv);
  137. EXPECT_EQ(result, "i"sv);
  138. // LATIN CAPITAL LETTER I followed by COMBINING DOT ABOVE
  139. result = Unicode::to_unicode_lowercase_full("I\u0307"sv, "en"sv);
  140. EXPECT_EQ(result, "i\u0307"sv);
  141. result = Unicode::to_unicode_lowercase_full("I\u0307"sv, "az"sv);
  142. EXPECT_EQ(result, "i"sv);
  143. result = Unicode::to_unicode_lowercase_full("I\u0307"sv, "tr"sv);
  144. EXPECT_EQ(result, "i"sv);
  145. // LATIN CAPITAL LETTER I followed by combining class 0 and COMBINING DOT ABOVE
  146. result = Unicode::to_unicode_lowercase_full("IA\u0307"sv, "en"sv);
  147. EXPECT_EQ(result, "ia\u0307"sv);
  148. result = Unicode::to_unicode_lowercase_full("IA\u0307"sv, "az"sv);
  149. EXPECT_EQ(result, "\u0131a\u0307"sv);
  150. result = Unicode::to_unicode_lowercase_full("IA\u0307"sv, "tr"sv);
  151. EXPECT_EQ(result, "\u0131a\u0307"sv);
  152. }
  153. TEST_CASE(to_unicode_uppercase_unconditional_special_casing)
  154. {
  155. // LATIN SMALL LETTER SHARP S
  156. auto result = Unicode::to_unicode_uppercase_full("\u00DF"sv);
  157. EXPECT_EQ(result, "\u0053\u0053");
  158. // LATIN CAPITAL LETTER I WITH DOT ABOVE
  159. result = Unicode::to_unicode_uppercase_full("\u0130"sv);
  160. EXPECT_EQ(result, "\u0130");
  161. // LATIN SMALL LIGATURE FF
  162. result = Unicode::to_unicode_uppercase_full("\uFB00"sv);
  163. EXPECT_EQ(result, "\u0046\u0046");
  164. // LATIN SMALL LIGATURE FI
  165. result = Unicode::to_unicode_uppercase_full("\uFB01"sv);
  166. EXPECT_EQ(result, "\u0046\u0049");
  167. // LATIN SMALL LIGATURE FL
  168. result = Unicode::to_unicode_uppercase_full("\uFB02"sv);
  169. EXPECT_EQ(result, "\u0046\u004C");
  170. // LATIN SMALL LIGATURE FFI
  171. result = Unicode::to_unicode_uppercase_full("\uFB03"sv);
  172. EXPECT_EQ(result, "\u0046\u0046\u0049");
  173. // LATIN SMALL LIGATURE FFL
  174. result = Unicode::to_unicode_uppercase_full("\uFB04"sv);
  175. EXPECT_EQ(result, "\u0046\u0046\u004C");
  176. // LATIN SMALL LIGATURE LONG S T
  177. result = Unicode::to_unicode_uppercase_full("\uFB05"sv);
  178. EXPECT_EQ(result, "\u0053\u0054");
  179. // LATIN SMALL LIGATURE ST
  180. result = Unicode::to_unicode_uppercase_full("\uFB06"sv);
  181. EXPECT_EQ(result, "\u0053\u0054");
  182. // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
  183. result = Unicode::to_unicode_uppercase_full("\u0390"sv);
  184. EXPECT_EQ(result, "\u0399\u0308\u0301");
  185. // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
  186. result = Unicode::to_unicode_uppercase_full("\u03B0"sv);
  187. EXPECT_EQ(result, "\u03A5\u0308\u0301");
  188. // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
  189. result = Unicode::to_unicode_uppercase_full("\u1FB7"sv);
  190. EXPECT_EQ(result, "\u0391\u0342\u0399");
  191. // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
  192. result = Unicode::to_unicode_uppercase_full("\u1FC7"sv);
  193. EXPECT_EQ(result, "\u0397\u0342\u0399");
  194. // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
  195. result = Unicode::to_unicode_uppercase_full("\u1FF7"sv);
  196. EXPECT_EQ(result, "\u03A9\u0342\u0399");
  197. }
  198. TEST_CASE(general_category)
  199. {
  200. auto general_category = [](StringView name) {
  201. auto general_category = Unicode::general_category_from_string(name);
  202. VERIFY(general_category.has_value());
  203. return *general_category;
  204. };
  205. auto general_category_c = general_category("C"sv);
  206. auto general_category_other = general_category("Other"sv);
  207. EXPECT_EQ(general_category_c, general_category_other);
  208. auto general_category_cc = general_category("Cc"sv);
  209. auto general_category_control = general_category("Control"sv);
  210. EXPECT_EQ(general_category_cc, general_category_control);
  211. auto general_category_co = general_category("Co"sv);
  212. auto general_category_private_use = general_category("Private_Use"sv);
  213. EXPECT_EQ(general_category_co, general_category_private_use);
  214. auto general_category_cn = general_category("Cn"sv);
  215. auto general_category_unassigned = general_category("Unassigned"sv);
  216. EXPECT_EQ(general_category_cn, general_category_unassigned);
  217. auto general_category_lc = general_category("LC"sv);
  218. auto general_category_cased_letter = general_category("Cased_Letter"sv);
  219. EXPECT_EQ(general_category_lc, general_category_cased_letter);
  220. auto general_category_ll = general_category("Ll"sv);
  221. auto general_category_lowercase_letter = general_category("Lowercase_Letter"sv);
  222. EXPECT_EQ(general_category_ll, general_category_lowercase_letter);
  223. auto general_category_lu = general_category("Lu"sv);
  224. auto general_category_uppercase_letter = general_category("Uppercase_Letter"sv);
  225. EXPECT_EQ(general_category_lu, general_category_uppercase_letter);
  226. for (u32 code_point = 0; code_point <= 0x1f; ++code_point) {
  227. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_c));
  228. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_cc));
  229. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_co));
  230. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cn));
  231. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lc));
  232. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_ll));
  233. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lu));
  234. }
  235. for (u32 code_point = 0xe000; code_point <= 0xe100; ++code_point) {
  236. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_c));
  237. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_co));
  238. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cc));
  239. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cn));
  240. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lc));
  241. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_ll));
  242. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lu));
  243. }
  244. for (u32 code_point = 0x101fe; code_point <= 0x1027f; ++code_point) {
  245. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_c));
  246. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_cn));
  247. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cc));
  248. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_co));
  249. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lc));
  250. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_ll));
  251. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lu));
  252. }
  253. for (u32 code_point = 0x61; code_point <= 0x7a; ++code_point) {
  254. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_lc));
  255. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_ll));
  256. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_c));
  257. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cc));
  258. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_co));
  259. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cn));
  260. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_lu));
  261. }
  262. for (u32 code_point = 0x41; code_point <= 0x5a; ++code_point) {
  263. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_lc));
  264. EXPECT(Unicode::code_point_has_general_category(code_point, general_category_lu));
  265. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_c));
  266. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cc));
  267. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_co));
  268. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_cn));
  269. EXPECT(!Unicode::code_point_has_general_category(code_point, general_category_ll));
  270. }
  271. }
  272. TEST_CASE(property)
  273. {
  274. auto property = [](StringView name) {
  275. auto property = Unicode::property_from_string(name);
  276. VERIFY(property.has_value());
  277. return *property;
  278. };
  279. auto property_any = property("Any"sv);
  280. auto property_assigned = property("Assigned"sv);
  281. auto property_ascii = property("ASCII"sv);
  282. auto property_white_space = property("White_Space"sv);
  283. auto property_wspace = property("WSpace"sv);
  284. auto property_space = property("space"sv);
  285. EXPECT_EQ(property_white_space, property_wspace);
  286. EXPECT_EQ(property_white_space, property_space);
  287. auto property_emoji_presentation = property("Emoji_Presentation"sv);
  288. auto property_epres = property("EPres"sv);
  289. EXPECT_EQ(property_emoji_presentation, property_epres);
  290. for (u32 code_point = 0; code_point <= 0x10ffff; code_point += 1000)
  291. EXPECT(Unicode::code_point_has_property(code_point, property_any));
  292. for (u32 code_point = 0x101d0; code_point <= 0x101fd; ++code_point) {
  293. EXPECT(Unicode::code_point_has_property(code_point, property_any));
  294. EXPECT(Unicode::code_point_has_property(code_point, property_assigned));
  295. EXPECT(!Unicode::code_point_has_property(code_point, property_ascii));
  296. EXPECT(!Unicode::code_point_has_property(code_point, property_white_space));
  297. EXPECT(!Unicode::code_point_has_property(code_point, property_emoji_presentation));
  298. }
  299. for (u32 code_point = 0x101fe; code_point <= 0x1027f; ++code_point) {
  300. EXPECT(Unicode::code_point_has_property(code_point, property_any));
  301. EXPECT(!Unicode::code_point_has_property(code_point, property_assigned));
  302. EXPECT(!Unicode::code_point_has_property(code_point, property_ascii));
  303. EXPECT(!Unicode::code_point_has_property(code_point, property_white_space));
  304. EXPECT(!Unicode::code_point_has_property(code_point, property_emoji_presentation));
  305. }
  306. for (u32 code_point = 0; code_point <= 0x7f; ++code_point) {
  307. EXPECT(Unicode::code_point_has_property(code_point, property_any));
  308. EXPECT(Unicode::code_point_has_property(code_point, property_assigned));
  309. EXPECT(Unicode::code_point_has_property(code_point, property_ascii));
  310. EXPECT(!Unicode::code_point_has_property(code_point, property_emoji_presentation));
  311. }
  312. for (u32 code_point = 0x9; code_point <= 0xd; ++code_point) {
  313. EXPECT(Unicode::code_point_has_property(code_point, property_any));
  314. EXPECT(Unicode::code_point_has_property(code_point, property_assigned));
  315. EXPECT(Unicode::code_point_has_property(code_point, property_ascii));
  316. EXPECT(Unicode::code_point_has_property(code_point, property_white_space));
  317. EXPECT(!Unicode::code_point_has_property(code_point, property_emoji_presentation));
  318. }
  319. for (u32 code_point = 0x1f3e5; code_point <= 0x1f3f0; ++code_point) {
  320. EXPECT(Unicode::code_point_has_property(code_point, property_any));
  321. EXPECT(Unicode::code_point_has_property(code_point, property_assigned));
  322. EXPECT(Unicode::code_point_has_property(code_point, property_emoji_presentation));
  323. EXPECT(!Unicode::code_point_has_property(code_point, property_ascii));
  324. EXPECT(!Unicode::code_point_has_property(code_point, property_white_space));
  325. }
  326. }
  327. TEST_CASE(script)
  328. {
  329. auto script = [](StringView name) {
  330. auto script = Unicode::script_from_string(name);
  331. VERIFY(script.has_value());
  332. return *script;
  333. };
  334. auto script_latin = script("Latin"sv);
  335. auto script_latn = script("Latn"sv);
  336. EXPECT_EQ(script_latin, script_latn);
  337. auto script_cyrillic = script("Cyrillic"sv);
  338. auto script_cyrl = script("Cyrl"sv);
  339. EXPECT_EQ(script_cyrillic, script_cyrl);
  340. auto script_greek = script("Greek"sv);
  341. auto script_grek = script("Grek"sv);
  342. EXPECT_EQ(script_greek, script_grek);
  343. for (u32 code_point = 0x41; code_point <= 0x5a; ++code_point) {
  344. EXPECT(Unicode::code_point_has_script(code_point, script_latin));
  345. EXPECT(Unicode::code_point_has_script_extension(code_point, script_latin));
  346. EXPECT(!Unicode::code_point_has_script(code_point, script_cyrillic));
  347. EXPECT(!Unicode::code_point_has_script(code_point, script_greek));
  348. }
  349. for (u32 code_point = 0x61; code_point <= 0x7a; ++code_point) {
  350. EXPECT(Unicode::code_point_has_script(code_point, script_latin));
  351. EXPECT(Unicode::code_point_has_script_extension(code_point, script_latin));
  352. EXPECT(!Unicode::code_point_has_script(code_point, script_cyrillic));
  353. EXPECT(!Unicode::code_point_has_script(code_point, script_greek));
  354. }
  355. for (u32 code_point = 0x400; code_point <= 0x481; ++code_point) {
  356. EXPECT(Unicode::code_point_has_script(code_point, script_cyrillic));
  357. EXPECT(Unicode::code_point_has_script_extension(code_point, script_cyrillic));
  358. EXPECT(!Unicode::code_point_has_script(code_point, script_latin));
  359. EXPECT(!Unicode::code_point_has_script(code_point, script_greek));
  360. }
  361. for (u32 code_point = 0x400; code_point <= 0x481; ++code_point) {
  362. EXPECT(Unicode::code_point_has_script(code_point, script_cyrillic));
  363. EXPECT(Unicode::code_point_has_script_extension(code_point, script_cyrillic));
  364. EXPECT(!Unicode::code_point_has_script(code_point, script_latin));
  365. EXPECT(!Unicode::code_point_has_script(code_point, script_greek));
  366. }
  367. for (u32 code_point = 0x1f80; code_point <= 0x1fb4; ++code_point) {
  368. EXPECT(Unicode::code_point_has_script(code_point, script_greek));
  369. EXPECT(Unicode::code_point_has_script_extension(code_point, script_greek));
  370. EXPECT(!Unicode::code_point_has_script(code_point, script_latin));
  371. EXPECT(!Unicode::code_point_has_script(code_point, script_cyrillic));
  372. }
  373. }
  374. TEST_CASE(script_extension)
  375. {
  376. auto script = [](StringView name) {
  377. auto script = Unicode::script_from_string(name);
  378. VERIFY(script.has_value());
  379. return *script;
  380. };
  381. auto script_latin = script("Latin"sv);
  382. auto script_greek = script("Greek"sv);
  383. for (u32 code_point = 0x363; code_point <= 0x36f; ++code_point) {
  384. EXPECT(!Unicode::code_point_has_script(code_point, script_latin));
  385. EXPECT(Unicode::code_point_has_script_extension(code_point, script_latin));
  386. }
  387. EXPECT(!Unicode::code_point_has_script(0x342, script_greek));
  388. EXPECT(Unicode::code_point_has_script_extension(0x342, script_greek));
  389. EXPECT(!Unicode::code_point_has_script(0x345, script_greek));
  390. EXPECT(Unicode::code_point_has_script_extension(0x345, script_greek));
  391. EXPECT(!Unicode::code_point_has_script(0x1dc0, script_greek));
  392. EXPECT(Unicode::code_point_has_script_extension(0x1dc0, script_greek));
  393. EXPECT(!Unicode::code_point_has_script(0x1dc1, script_greek));
  394. EXPECT(Unicode::code_point_has_script_extension(0x1dc1, script_greek));
  395. auto script_common = script("Common"sv);
  396. auto script_zyyy = script("Zyyy"sv);
  397. EXPECT_EQ(script_common, script_zyyy);
  398. EXPECT(Unicode::code_point_has_script(0x202f, script_common));
  399. EXPECT(!Unicode::code_point_has_script_extension(0x202f, script_common));
  400. EXPECT(Unicode::code_point_has_script(0x3000, script_common));
  401. EXPECT(Unicode::code_point_has_script_extension(0x3000, script_common));
  402. auto script_inherited = script("Inherited"sv);
  403. auto script_qaai = script("Qaai"sv);
  404. auto script_zinh = script("Zinh"sv);
  405. EXPECT_EQ(script_inherited, script_qaai);
  406. EXPECT_EQ(script_inherited, script_zinh);
  407. EXPECT(Unicode::code_point_has_script(0x1ced, script_inherited));
  408. EXPECT(!Unicode::code_point_has_script_extension(0x1ced, script_inherited));
  409. EXPECT(Unicode::code_point_has_script(0x101fd, script_inherited));
  410. EXPECT(Unicode::code_point_has_script_extension(0x101fd, script_inherited));
  411. }