TestWchar.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502
  1. /*
  2. * Copyright (c) 2021, the SerenityOS developers.
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <LibTest/TestCase.h>
  7. #include <errno.h>
  8. #include <string.h>
  9. #include <wchar.h>
  10. TEST_CASE(wcspbrk)
  11. {
  12. const wchar_t* input;
  13. wchar_t* ret;
  14. // Test empty haystack.
  15. ret = wcspbrk(L"", L"ab");
  16. EXPECT_EQ(ret, nullptr);
  17. // Test empty needle.
  18. ret = wcspbrk(L"ab", L"");
  19. EXPECT_EQ(ret, nullptr);
  20. // Test search for a single character.
  21. input = L"abcd";
  22. ret = wcspbrk(input, L"a");
  23. EXPECT_EQ(ret, input);
  24. // Test search for multiple characters, none matches.
  25. ret = wcspbrk(input, L"zxy");
  26. EXPECT_EQ(ret, nullptr);
  27. // Test search for multiple characters, last matches.
  28. ret = wcspbrk(input, L"zxyc");
  29. EXPECT_EQ(ret, input + 2);
  30. }
  31. TEST_CASE(wcsstr)
  32. {
  33. const wchar_t* input = L"abcde";
  34. wchar_t* ret;
  35. // Empty needle should return haystack.
  36. ret = wcsstr(input, L"");
  37. EXPECT_EQ(ret, input);
  38. // Test exact match.
  39. ret = wcsstr(input, input);
  40. EXPECT_EQ(ret, input);
  41. // Test match at string start.
  42. ret = wcsstr(input, L"ab");
  43. EXPECT_EQ(ret, input);
  44. // Test match at string end.
  45. ret = wcsstr(input, L"de");
  46. EXPECT_EQ(ret, input + 3);
  47. // Test no match.
  48. ret = wcsstr(input, L"z");
  49. EXPECT_EQ(ret, nullptr);
  50. // Test needle that is longer than the haystack.
  51. ret = wcsstr(input, L"abcdef");
  52. EXPECT_EQ(ret, nullptr);
  53. }
  54. TEST_CASE(wmemchr)
  55. {
  56. const wchar_t* input = L"abcde";
  57. wchar_t* ret;
  58. // Empty haystack returns nothing.
  59. ret = wmemchr(L"", L'c', 0);
  60. EXPECT_EQ(ret, nullptr);
  61. // Not included character returns nothing.
  62. ret = wmemchr(input, L'z', 5);
  63. EXPECT_EQ(ret, nullptr);
  64. // Match at string start.
  65. ret = wmemchr(input, L'a', 5);
  66. EXPECT_EQ(ret, input);
  67. // Match at string end.
  68. ret = wmemchr(input, L'e', 5);
  69. EXPECT_EQ(ret, input + 4);
  70. input = L"abcde\0fg";
  71. // Handle finding null characters.
  72. ret = wmemchr(input, L'\0', 8);
  73. EXPECT_EQ(ret, input + 5);
  74. // Don't stop at null characters.
  75. ret = wmemchr(input, L'f', 8);
  76. EXPECT_EQ(ret, input + 6);
  77. }
  78. TEST_CASE(wmemcpy)
  79. {
  80. const wchar_t* input = L"abc\0def";
  81. auto buf = static_cast<wchar_t*>(malloc(8 * sizeof(wchar_t)));
  82. if (!buf) {
  83. FAIL("Could not allocate space for copy target");
  84. return;
  85. }
  86. wchar_t* ret = wmemcpy(buf, input, 8);
  87. EXPECT_EQ(ret, buf);
  88. EXPECT_EQ(memcmp(buf, input, 8 * sizeof(wchar_t)), 0);
  89. }
  90. TEST_CASE(wmemset)
  91. {
  92. auto buf_length = 8;
  93. auto buf = static_cast<wchar_t*>(calloc(buf_length, sizeof(wchar_t)));
  94. if (!buf) {
  95. FAIL("Could not allocate memory for target buffer");
  96. return;
  97. }
  98. wchar_t* ret = wmemset(buf, L'\U0001f41e', buf_length - 1);
  99. EXPECT_EQ(ret, buf);
  100. for (int i = 0; i < buf_length - 1; i++) {
  101. EXPECT_EQ(buf[i], L'\U0001f41e');
  102. }
  103. EXPECT_EQ(buf[buf_length - 1], L'\0');
  104. free(buf);
  105. }
  106. TEST_CASE(wmemmove)
  107. {
  108. wchar_t* ret;
  109. const wchar_t* string = L"abc\0def";
  110. auto buf = static_cast<wchar_t*>(calloc(32, sizeof(wchar_t)));
  111. if (!buf) {
  112. FAIL("Could not allocate memory for target buffer");
  113. return;
  114. }
  115. // Test moving to smaller addresses.
  116. wmemcpy(buf + 3, string, 8);
  117. ret = wmemmove(buf + 1, buf + 3, 8);
  118. EXPECT_EQ(ret, buf + 1);
  119. EXPECT_EQ(memcmp(string, buf + 1, 8 * sizeof(wchar_t)), 0);
  120. // Test moving to larger addresses.
  121. wmemcpy(buf + 16, string, 8);
  122. ret = wmemmove(buf + 18, buf + 16, 8);
  123. EXPECT_EQ(ret, buf + 18);
  124. EXPECT_EQ(memcmp(string, buf + 18, 8 * sizeof(wchar_t)), 0);
  125. free(buf);
  126. }
  127. TEST_CASE(wcscoll)
  128. {
  129. // Check if wcscoll is sorting correctly. At the moment we are doing raw char comparisons,
  130. // so it's digits, then uppercase letters, then lowercase letters.
  131. // Equalness between equal strings.
  132. EXPECT(wcscoll(L"", L"") == 0);
  133. EXPECT(wcscoll(L"0", L"0") == 0);
  134. // Shorter strings before longer strings.
  135. EXPECT(wcscoll(L"", L"0") < 0);
  136. EXPECT(wcscoll(L"0", L"") > 0);
  137. EXPECT(wcscoll(L"123", L"1234") < 0);
  138. EXPECT(wcscoll(L"1234", L"123") > 0);
  139. // Order within digits.
  140. EXPECT(wcscoll(L"0", L"9") < 0);
  141. EXPECT(wcscoll(L"9", L"0") > 0);
  142. // Digits before uppercase letters.
  143. EXPECT(wcscoll(L"9", L"A") < 0);
  144. EXPECT(wcscoll(L"A", L"9") > 0);
  145. // Order within uppercase letters.
  146. EXPECT(wcscoll(L"A", L"Z") < 0);
  147. EXPECT(wcscoll(L"Z", L"A") > 0);
  148. // Uppercase letters before lowercase letters.
  149. EXPECT(wcscoll(L"Z", L"a") < 0);
  150. EXPECT(wcscoll(L"a", L"Z") > 0);
  151. // Uppercase letters before lowercase letters.
  152. EXPECT(wcscoll(L"a", L"z") < 0);
  153. EXPECT(wcscoll(L"z", L"a") > 0);
  154. }
  155. TEST_CASE(mbsinit)
  156. {
  157. // Ensure that nullptr is considered an initial state.
  158. EXPECT(mbsinit(nullptr) != 0);
  159. // Ensure that a zero-initialized state is recognized as initial state.
  160. mbstate_t state = {};
  161. EXPECT(mbsinit(&state) != 0);
  162. // Read a partial multibyte sequence (0b11011111 / 0xdf).
  163. size_t ret = mbrtowc(nullptr, "\xdf", 1, &state);
  164. if (ret != -2ul)
  165. FAIL(String::formatted("mbrtowc accepted partial multibyte sequence with return code {} (expected -2)", static_cast<ssize_t>(ret)));
  166. // Ensure that we are not in an initial state.
  167. EXPECT(mbsinit(&state) == 0);
  168. // Read the remaining multibyte sequence (0b10111111 / 0xbf).
  169. ret = mbrtowc(nullptr, "\xbf", 1, &state);
  170. if (ret != 1ul)
  171. FAIL(String::formatted("mbrtowc did not consume the expected number of bytes (1), returned {} instead", static_cast<ssize_t>(ret)));
  172. // Ensure that we are in an initial state again.
  173. EXPECT(mbsinit(&state) != 0);
  174. }
  175. TEST_CASE(mbrtowc)
  176. {
  177. size_t ret = 0;
  178. mbstate_t state = {};
  179. wchar_t wc = 0;
  180. // Ensure that we can parse normal ASCII characters.
  181. ret = mbrtowc(&wc, "Hello", 5, &state);
  182. EXPECT_EQ(ret, 1ul);
  183. EXPECT_EQ(wc, 'H');
  184. // Try two three-byte codepoints (™™), only one of which should be consumed.
  185. ret = mbrtowc(&wc, "\xe2\x84\xa2\xe2\x84\xa2", 6, &state);
  186. EXPECT_EQ(ret, 3ul);
  187. EXPECT_EQ(wc, 0x2122);
  188. // Try a null character, which should return 0 and reset the state to the initial state.
  189. ret = mbrtowc(&wc, "\x00\x00", 2, &state);
  190. EXPECT_EQ(ret, 0ul);
  191. EXPECT_EQ(wc, 0);
  192. EXPECT_NE(mbsinit(&state), 0);
  193. // Try an incomplete multibyte character.
  194. ret = mbrtowc(&wc, "\xe2\x84", 2, &state);
  195. EXPECT_EQ(ret, -2ul);
  196. EXPECT_EQ(mbsinit(&state), 0);
  197. mbstate_t incomplete_state = state;
  198. // Finish the previous multibyte character.
  199. ret = mbrtowc(&wc, "\xa2", 1, &state);
  200. EXPECT_EQ(ret, 1ul);
  201. EXPECT_EQ(wc, 0x2122);
  202. // Try an invalid multibyte sequence.
  203. // Reset the state afterwards because the effects are undefined.
  204. ret = mbrtowc(&wc, "\xff", 1, &state);
  205. EXPECT_EQ(ret, -1ul);
  206. EXPECT_EQ(errno, EILSEQ);
  207. state = {};
  208. // Try a successful conversion, but without target address.
  209. ret = mbrtowc(nullptr, "\xe2\x84\xa2\xe2\x84\xa2", 6, &state);
  210. EXPECT_EQ(ret, 3ul);
  211. // Test the "null byte shorthand". Ensure that wc is ignored.
  212. state = {};
  213. wchar_t old_wc = wc;
  214. ret = mbrtowc(&wc, nullptr, 0, &state);
  215. EXPECT_EQ(ret, 0ul);
  216. EXPECT_EQ(wc, old_wc);
  217. // Test recognition of incomplete multibyte sequences.
  218. ret = mbrtowc(nullptr, nullptr, 0, &incomplete_state);
  219. EXPECT_EQ(ret, -1ul);
  220. EXPECT_EQ(errno, EILSEQ);
  221. }
  222. TEST_CASE(wcrtomb)
  223. {
  224. char buf[MB_LEN_MAX];
  225. size_t ret = 0;
  226. // Ensure that `wc` is ignored when buf is a nullptr.
  227. ret = wcrtomb(nullptr, L'a', nullptr);
  228. EXPECT_EQ(ret, 1ul);
  229. ret = wcrtomb(nullptr, L'\U0001F41E', nullptr);
  230. EXPECT_EQ(ret, 1ul);
  231. // When the buffer is non-null, the multibyte representation is written into it.
  232. ret = wcrtomb(buf, L'a', nullptr);
  233. EXPECT_EQ(ret, 1ul);
  234. EXPECT_EQ(memcmp(buf, "a", ret), 0);
  235. ret = wcrtomb(buf, L'\U0001F41E', nullptr);
  236. EXPECT_EQ(ret, 4ul);
  237. EXPECT_EQ(memcmp(buf, "\xf0\x9f\x90\x9e", ret), 0);
  238. // When the wide character is invalid, -1 is returned and errno is set to EILSEQ.
  239. ret = wcrtomb(buf, 0x110000, nullptr);
  240. EXPECT_EQ(ret, (size_t)-1);
  241. EXPECT_EQ(errno, EILSEQ);
  242. // Replacement characters and conversion errors are not confused.
  243. ret = wcrtomb(buf, L'\uFFFD', nullptr);
  244. EXPECT_NE(ret, (size_t)-1);
  245. }
  246. TEST_CASE(wcsrtombs)
  247. {
  248. mbstate_t state = {};
  249. char buf[MB_LEN_MAX * 4];
  250. const wchar_t good_chars[] = { L'\U0001F41E', L'\U0001F41E', L'\0' };
  251. const wchar_t bad_chars[] = { L'\U0001F41E', static_cast<wchar_t>(0x1111F41E), L'\0' };
  252. const wchar_t* src;
  253. size_t ret = 0;
  254. // Convert normal and valid wchar_t values.
  255. src = good_chars;
  256. ret = wcsrtombs(buf, &src, 9, &state);
  257. EXPECT_EQ(ret, 8ul);
  258. EXPECT_EQ(memcmp(buf, "\xf0\x9f\x90\x9e\xf0\x9f\x90\x9e", 9), 0);
  259. EXPECT_EQ(src, nullptr);
  260. EXPECT_NE(mbsinit(&state), 0);
  261. // Stop on invalid wchar values.
  262. src = bad_chars;
  263. ret = wcsrtombs(buf, &src, 9, &state);
  264. EXPECT_EQ(ret, -1ul);
  265. EXPECT_EQ(memcmp(buf, "\xf0\x9f\x90\x9e", 4), 0);
  266. EXPECT_EQ(errno, EILSEQ);
  267. EXPECT_EQ(src, bad_chars + 1);
  268. // Valid characters but not enough space.
  269. src = good_chars;
  270. ret = wcsrtombs(buf, &src, 7, &state);
  271. EXPECT_EQ(ret, 4ul);
  272. EXPECT_EQ(memcmp(buf, "\xf0\x9f\x90\x9e", 4), 0);
  273. EXPECT_EQ(src, good_chars + 1);
  274. // Try a conversion with no destination and too short length.
  275. src = good_chars;
  276. ret = wcsrtombs(nullptr, &src, 2, &state);
  277. EXPECT_EQ(ret, 8ul);
  278. EXPECT_EQ(src, nullptr);
  279. EXPECT_NE(mbsinit(&state), 0);
  280. // Try a conversion using the internal anonymous state.
  281. src = good_chars;
  282. ret = wcsrtombs(buf, &src, 9, nullptr);
  283. EXPECT_EQ(ret, 8ul);
  284. EXPECT_EQ(memcmp(buf, "\xf0\x9f\x90\x9e\xf0\x9f\x90\x9e", 9), 0);
  285. EXPECT_EQ(src, nullptr);
  286. }
  287. TEST_CASE(wcsnrtombs)
  288. {
  289. mbstate_t state = {};
  290. const wchar_t good_chars[] = { L'\U0001F41E', L'\U0001F41E', L'\0' };
  291. const wchar_t* src;
  292. size_t ret = 0;
  293. // Convert nothing.
  294. src = good_chars;
  295. ret = wcsnrtombs(nullptr, &src, 0, 0, &state);
  296. EXPECT_EQ(ret, 0ul);
  297. EXPECT_EQ(src, good_chars);
  298. // Convert one wide char.
  299. src = good_chars;
  300. ret = wcsnrtombs(nullptr, &src, 1, 0, &state);
  301. EXPECT_EQ(ret, 4ul);
  302. EXPECT_EQ(src, good_chars + 1);
  303. // Encounter a null character.
  304. src = good_chars;
  305. ret = wcsnrtombs(nullptr, &src, 4, 0, &state);
  306. EXPECT_EQ(ret, 8ul);
  307. EXPECT_EQ(src, nullptr);
  308. }
  309. TEST_CASE(mbsrtowcs)
  310. {
  311. mbstate_t state = {};
  312. wchar_t buf[4];
  313. const char good_chars[] = "\xf0\x9f\x90\x9e\xf0\x9f\x90\x9e";
  314. const char bad_chars[] = "\xf0\x9f\x90\x9e\xf0\xff\x90\x9e";
  315. const char* src;
  316. size_t ret = 0;
  317. // Convert normal and valid multibyte sequences.
  318. src = good_chars;
  319. ret = mbsrtowcs(buf, &src, 3, &state);
  320. EXPECT_EQ(ret, 2ul);
  321. EXPECT_EQ(buf[0], L'\U0001F41E');
  322. EXPECT_EQ(buf[1], L'\U0001F41E');
  323. EXPECT_EQ(buf[2], L'\0');
  324. EXPECT_EQ(src, nullptr);
  325. EXPECT_NE(mbsinit(&state), 0);
  326. // Stop on invalid multibyte sequences.
  327. src = bad_chars;
  328. ret = mbsrtowcs(buf, &src, 3, &state);
  329. EXPECT_EQ(ret, -1ul);
  330. EXPECT_EQ(buf[0], L'\U0001F41E');
  331. EXPECT_EQ(errno, EILSEQ);
  332. EXPECT_EQ(src, bad_chars + 4);
  333. // Valid sequence but not enough space.
  334. src = good_chars;
  335. ret = mbsrtowcs(buf, &src, 1, &state);
  336. EXPECT_EQ(ret, 1ul);
  337. EXPECT_EQ(buf[0], L'\U0001F41E');
  338. EXPECT_EQ(src, good_chars + 4);
  339. // Try a conversion with no destination and too short length.
  340. src = good_chars;
  341. ret = mbsrtowcs(nullptr, &src, 1, &state);
  342. EXPECT_EQ(ret, 2ul);
  343. EXPECT_EQ(src, nullptr);
  344. EXPECT_NE(mbsinit(&state), 0);
  345. // Try a conversion using the internal anonymous state.
  346. src = good_chars;
  347. ret = mbsrtowcs(buf, &src, 3, nullptr);
  348. EXPECT_EQ(ret, 2ul);
  349. EXPECT_EQ(buf[0], L'\U0001F41E');
  350. EXPECT_EQ(buf[1], L'\U0001F41E');
  351. EXPECT_EQ(buf[2], L'\0');
  352. EXPECT_EQ(src, nullptr);
  353. }
  354. TEST_CASE(wcslcpy)
  355. {
  356. auto buf = static_cast<wchar_t*>(malloc(8 * sizeof(wchar_t)));
  357. if (!buf) {
  358. FAIL("Could not allocate space for copy target");
  359. return;
  360. }
  361. size_t ret;
  362. // If buffer is long enough, a straight-forward string copy is performed.
  363. ret = wcslcpy(buf, L"abc", 8);
  364. EXPECT_EQ(ret, 3ul);
  365. EXPECT_EQ(wmemcmp(L"abc", buf, 4), 0);
  366. // If buffer is (supposedly) too small, the string will be truncated.
  367. ret = wcslcpy(buf, L"1234", 4);
  368. EXPECT_EQ(ret, 4ul);
  369. EXPECT_EQ(wmemcmp(L"123", buf, 4), 0);
  370. // If the buffer is null, the length of the input is returned.
  371. ret = wcslcpy(nullptr, L"abc", 0);
  372. EXPECT_EQ(ret, 3ul);
  373. }
  374. TEST_CASE(mbrlen)
  375. {
  376. size_t ret = 0;
  377. mbstate_t state = {};
  378. // Ensure that we can parse normal ASCII characters.
  379. ret = mbrlen("Hello", 5, &state);
  380. EXPECT_EQ(ret, 1ul);
  381. // Try two three-byte codepoints (™™), only one of which should be consumed.
  382. ret = mbrlen("\xe2\x84\xa2\xe2\x84\xa2", 6, &state);
  383. EXPECT_EQ(ret, 3ul);
  384. // Try a null character, which should return 0 and reset the state to the initial state.
  385. ret = mbrlen("\x00\x00", 2, &state);
  386. EXPECT_EQ(ret, 0ul);
  387. EXPECT_NE(mbsinit(&state), 0);
  388. // Try an incomplete multibyte character.
  389. ret = mbrlen("\xe2\x84", 2, &state);
  390. EXPECT_EQ(ret, -2ul);
  391. EXPECT_EQ(mbsinit(&state), 0);
  392. // Finish the previous multibyte character.
  393. ret = mbrlen("\xa2", 1, &state);
  394. EXPECT_EQ(ret, 1ul);
  395. // Try an invalid multibyte sequence.
  396. // Reset the state afterwards because the effects are undefined.
  397. ret = mbrlen("\xff", 1, &state);
  398. EXPECT_EQ(ret, -1ul);
  399. EXPECT_EQ(errno, EILSEQ);
  400. state = {};
  401. }