123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502 |
- /*
- * Copyright (c) 2021, the SerenityOS developers.
- *
- * SPDX-License-Identifier: BSD-2-Clause
- */
- #include <LibTest/TestCase.h>
- #include <errno.h>
- #include <string.h>
- #include <wchar.h>
- TEST_CASE(wcspbrk)
- {
- const wchar_t* input;
- wchar_t* ret;
- // Test empty haystack.
- ret = wcspbrk(L"", L"ab");
- EXPECT_EQ(ret, nullptr);
- // Test empty needle.
- ret = wcspbrk(L"ab", L"");
- EXPECT_EQ(ret, nullptr);
- // Test search for a single character.
- input = L"abcd";
- ret = wcspbrk(input, L"a");
- EXPECT_EQ(ret, input);
- // Test search for multiple characters, none matches.
- ret = wcspbrk(input, L"zxy");
- EXPECT_EQ(ret, nullptr);
- // Test search for multiple characters, last matches.
- ret = wcspbrk(input, L"zxyc");
- EXPECT_EQ(ret, input + 2);
- }
- TEST_CASE(wcsstr)
- {
- const wchar_t* input = L"abcde";
- wchar_t* ret;
- // Empty needle should return haystack.
- ret = wcsstr(input, L"");
- EXPECT_EQ(ret, input);
- // Test exact match.
- ret = wcsstr(input, input);
- EXPECT_EQ(ret, input);
- // Test match at string start.
- ret = wcsstr(input, L"ab");
- EXPECT_EQ(ret, input);
- // Test match at string end.
- ret = wcsstr(input, L"de");
- EXPECT_EQ(ret, input + 3);
- // Test no match.
- ret = wcsstr(input, L"z");
- EXPECT_EQ(ret, nullptr);
- // Test needle that is longer than the haystack.
- ret = wcsstr(input, L"abcdef");
- EXPECT_EQ(ret, nullptr);
- }
- TEST_CASE(wmemchr)
- {
- const wchar_t* input = L"abcde";
- wchar_t* ret;
- // Empty haystack returns nothing.
- ret = wmemchr(L"", L'c', 0);
- EXPECT_EQ(ret, nullptr);
- // Not included character returns nothing.
- ret = wmemchr(input, L'z', 5);
- EXPECT_EQ(ret, nullptr);
- // Match at string start.
- ret = wmemchr(input, L'a', 5);
- EXPECT_EQ(ret, input);
- // Match at string end.
- ret = wmemchr(input, L'e', 5);
- EXPECT_EQ(ret, input + 4);
- input = L"abcde\0fg";
- // Handle finding null characters.
- ret = wmemchr(input, L'\0', 8);
- EXPECT_EQ(ret, input + 5);
- // Don't stop at null characters.
- ret = wmemchr(input, L'f', 8);
- EXPECT_EQ(ret, input + 6);
- }
- TEST_CASE(wmemcpy)
- {
- const wchar_t* input = L"abc\0def";
- auto buf = static_cast<wchar_t*>(malloc(8 * sizeof(wchar_t)));
- if (!buf) {
- FAIL("Could not allocate space for copy target");
- return;
- }
- wchar_t* ret = wmemcpy(buf, input, 8);
- EXPECT_EQ(ret, buf);
- EXPECT_EQ(memcmp(buf, input, 8 * sizeof(wchar_t)), 0);
- }
- TEST_CASE(wmemset)
- {
- auto buf_length = 8;
- auto buf = static_cast<wchar_t*>(calloc(buf_length, sizeof(wchar_t)));
- if (!buf) {
- FAIL("Could not allocate memory for target buffer");
- return;
- }
- wchar_t* ret = wmemset(buf, L'\U0001f41e', buf_length - 1);
- EXPECT_EQ(ret, buf);
- for (int i = 0; i < buf_length - 1; i++) {
- EXPECT_EQ(buf[i], L'\U0001f41e');
- }
- EXPECT_EQ(buf[buf_length - 1], L'\0');
- free(buf);
- }
- TEST_CASE(wmemmove)
- {
- wchar_t* ret;
- const wchar_t* string = L"abc\0def";
- auto buf = static_cast<wchar_t*>(calloc(32, sizeof(wchar_t)));
- if (!buf) {
- FAIL("Could not allocate memory for target buffer");
- return;
- }
- // Test moving to smaller addresses.
- wmemcpy(buf + 3, string, 8);
- ret = wmemmove(buf + 1, buf + 3, 8);
- EXPECT_EQ(ret, buf + 1);
- EXPECT_EQ(memcmp(string, buf + 1, 8 * sizeof(wchar_t)), 0);
- // Test moving to larger addresses.
- wmemcpy(buf + 16, string, 8);
- ret = wmemmove(buf + 18, buf + 16, 8);
- EXPECT_EQ(ret, buf + 18);
- EXPECT_EQ(memcmp(string, buf + 18, 8 * sizeof(wchar_t)), 0);
- free(buf);
- }
- TEST_CASE(wcscoll)
- {
- // Check if wcscoll is sorting correctly. At the moment we are doing raw char comparisons,
- // so it's digits, then uppercase letters, then lowercase letters.
- // Equalness between equal strings.
- EXPECT(wcscoll(L"", L"") == 0);
- EXPECT(wcscoll(L"0", L"0") == 0);
- // Shorter strings before longer strings.
- EXPECT(wcscoll(L"", L"0") < 0);
- EXPECT(wcscoll(L"0", L"") > 0);
- EXPECT(wcscoll(L"123", L"1234") < 0);
- EXPECT(wcscoll(L"1234", L"123") > 0);
- // Order within digits.
- EXPECT(wcscoll(L"0", L"9") < 0);
- EXPECT(wcscoll(L"9", L"0") > 0);
- // Digits before uppercase letters.
- EXPECT(wcscoll(L"9", L"A") < 0);
- EXPECT(wcscoll(L"A", L"9") > 0);
- // Order within uppercase letters.
- EXPECT(wcscoll(L"A", L"Z") < 0);
- EXPECT(wcscoll(L"Z", L"A") > 0);
- // Uppercase letters before lowercase letters.
- EXPECT(wcscoll(L"Z", L"a") < 0);
- EXPECT(wcscoll(L"a", L"Z") > 0);
- // Uppercase letters before lowercase letters.
- EXPECT(wcscoll(L"a", L"z") < 0);
- EXPECT(wcscoll(L"z", L"a") > 0);
- }
- TEST_CASE(mbsinit)
- {
- // Ensure that nullptr is considered an initial state.
- EXPECT(mbsinit(nullptr) != 0);
- // Ensure that a zero-initialized state is recognized as initial state.
- mbstate_t state = {};
- EXPECT(mbsinit(&state) != 0);
- // Read a partial multibyte sequence (0b11011111 / 0xdf).
- size_t ret = mbrtowc(nullptr, "\xdf", 1, &state);
- if (ret != -2ul)
- FAIL(String::formatted("mbrtowc accepted partial multibyte sequence with return code {} (expected -2)", static_cast<ssize_t>(ret)));
- // Ensure that we are not in an initial state.
- EXPECT(mbsinit(&state) == 0);
- // Read the remaining multibyte sequence (0b10111111 / 0xbf).
- ret = mbrtowc(nullptr, "\xbf", 1, &state);
- if (ret != 1ul)
- FAIL(String::formatted("mbrtowc did not consume the expected number of bytes (1), returned {} instead", static_cast<ssize_t>(ret)));
- // Ensure that we are in an initial state again.
- EXPECT(mbsinit(&state) != 0);
- }
- TEST_CASE(mbrtowc)
- {
- size_t ret = 0;
- mbstate_t state = {};
- wchar_t wc = 0;
- // Ensure that we can parse normal ASCII characters.
- ret = mbrtowc(&wc, "Hello", 5, &state);
- EXPECT_EQ(ret, 1ul);
- EXPECT_EQ(wc, 'H');
- // Try two three-byte codepoints (™™), only one of which should be consumed.
- ret = mbrtowc(&wc, "\xe2\x84\xa2\xe2\x84\xa2", 6, &state);
- EXPECT_EQ(ret, 3ul);
- EXPECT_EQ(wc, 0x2122);
- // Try a null character, which should return 0 and reset the state to the initial state.
- ret = mbrtowc(&wc, "\x00\x00", 2, &state);
- EXPECT_EQ(ret, 0ul);
- EXPECT_EQ(wc, 0);
- EXPECT_NE(mbsinit(&state), 0);
- // Try an incomplete multibyte character.
- ret = mbrtowc(&wc, "\xe2\x84", 2, &state);
- EXPECT_EQ(ret, -2ul);
- EXPECT_EQ(mbsinit(&state), 0);
- mbstate_t incomplete_state = state;
- // Finish the previous multibyte character.
- ret = mbrtowc(&wc, "\xa2", 1, &state);
- EXPECT_EQ(ret, 1ul);
- EXPECT_EQ(wc, 0x2122);
- // Try an invalid multibyte sequence.
- // Reset the state afterwards because the effects are undefined.
- ret = mbrtowc(&wc, "\xff", 1, &state);
- EXPECT_EQ(ret, -1ul);
- EXPECT_EQ(errno, EILSEQ);
- state = {};
- // Try a successful conversion, but without target address.
- ret = mbrtowc(nullptr, "\xe2\x84\xa2\xe2\x84\xa2", 6, &state);
- EXPECT_EQ(ret, 3ul);
- // Test the "null byte shorthand". Ensure that wc is ignored.
- state = {};
- wchar_t old_wc = wc;
- ret = mbrtowc(&wc, nullptr, 0, &state);
- EXPECT_EQ(ret, 0ul);
- EXPECT_EQ(wc, old_wc);
- // Test recognition of incomplete multibyte sequences.
- ret = mbrtowc(nullptr, nullptr, 0, &incomplete_state);
- EXPECT_EQ(ret, -1ul);
- EXPECT_EQ(errno, EILSEQ);
- }
- TEST_CASE(wcrtomb)
- {
- char buf[MB_LEN_MAX];
- size_t ret = 0;
- // Ensure that `wc` is ignored when buf is a nullptr.
- ret = wcrtomb(nullptr, L'a', nullptr);
- EXPECT_EQ(ret, 1ul);
- ret = wcrtomb(nullptr, L'\U0001F41E', nullptr);
- EXPECT_EQ(ret, 1ul);
- // When the buffer is non-null, the multibyte representation is written into it.
- ret = wcrtomb(buf, L'a', nullptr);
- EXPECT_EQ(ret, 1ul);
- EXPECT_EQ(memcmp(buf, "a", ret), 0);
- ret = wcrtomb(buf, L'\U0001F41E', nullptr);
- EXPECT_EQ(ret, 4ul);
- EXPECT_EQ(memcmp(buf, "\xf0\x9f\x90\x9e", ret), 0);
- // When the wide character is invalid, -1 is returned and errno is set to EILSEQ.
- ret = wcrtomb(buf, 0x110000, nullptr);
- EXPECT_EQ(ret, (size_t)-1);
- EXPECT_EQ(errno, EILSEQ);
- // Replacement characters and conversion errors are not confused.
- ret = wcrtomb(buf, L'\uFFFD', nullptr);
- EXPECT_NE(ret, (size_t)-1);
- }
- TEST_CASE(wcsrtombs)
- {
- mbstate_t state = {};
- char buf[MB_LEN_MAX * 4];
- const wchar_t good_chars[] = { L'\U0001F41E', L'\U0001F41E', L'\0' };
- const wchar_t bad_chars[] = { L'\U0001F41E', static_cast<wchar_t>(0x1111F41E), L'\0' };
- const wchar_t* src;
- size_t ret = 0;
- // Convert normal and valid wchar_t values.
- src = good_chars;
- ret = wcsrtombs(buf, &src, 9, &state);
- EXPECT_EQ(ret, 8ul);
- EXPECT_EQ(memcmp(buf, "\xf0\x9f\x90\x9e\xf0\x9f\x90\x9e", 9), 0);
- EXPECT_EQ(src, nullptr);
- EXPECT_NE(mbsinit(&state), 0);
- // Stop on invalid wchar values.
- src = bad_chars;
- ret = wcsrtombs(buf, &src, 9, &state);
- EXPECT_EQ(ret, -1ul);
- EXPECT_EQ(memcmp(buf, "\xf0\x9f\x90\x9e", 4), 0);
- EXPECT_EQ(errno, EILSEQ);
- EXPECT_EQ(src, bad_chars + 1);
- // Valid characters but not enough space.
- src = good_chars;
- ret = wcsrtombs(buf, &src, 7, &state);
- EXPECT_EQ(ret, 4ul);
- EXPECT_EQ(memcmp(buf, "\xf0\x9f\x90\x9e", 4), 0);
- EXPECT_EQ(src, good_chars + 1);
- // Try a conversion with no destination and too short length.
- src = good_chars;
- ret = wcsrtombs(nullptr, &src, 2, &state);
- EXPECT_EQ(ret, 8ul);
- EXPECT_EQ(src, nullptr);
- EXPECT_NE(mbsinit(&state), 0);
- // Try a conversion using the internal anonymous state.
- src = good_chars;
- ret = wcsrtombs(buf, &src, 9, nullptr);
- EXPECT_EQ(ret, 8ul);
- EXPECT_EQ(memcmp(buf, "\xf0\x9f\x90\x9e\xf0\x9f\x90\x9e", 9), 0);
- EXPECT_EQ(src, nullptr);
- }
- TEST_CASE(wcsnrtombs)
- {
- mbstate_t state = {};
- const wchar_t good_chars[] = { L'\U0001F41E', L'\U0001F41E', L'\0' };
- const wchar_t* src;
- size_t ret = 0;
- // Convert nothing.
- src = good_chars;
- ret = wcsnrtombs(nullptr, &src, 0, 0, &state);
- EXPECT_EQ(ret, 0ul);
- EXPECT_EQ(src, good_chars);
- // Convert one wide char.
- src = good_chars;
- ret = wcsnrtombs(nullptr, &src, 1, 0, &state);
- EXPECT_EQ(ret, 4ul);
- EXPECT_EQ(src, good_chars + 1);
- // Encounter a null character.
- src = good_chars;
- ret = wcsnrtombs(nullptr, &src, 4, 0, &state);
- EXPECT_EQ(ret, 8ul);
- EXPECT_EQ(src, nullptr);
- }
- TEST_CASE(mbsrtowcs)
- {
- mbstate_t state = {};
- wchar_t buf[4];
- const char good_chars[] = "\xf0\x9f\x90\x9e\xf0\x9f\x90\x9e";
- const char bad_chars[] = "\xf0\x9f\x90\x9e\xf0\xff\x90\x9e";
- const char* src;
- size_t ret = 0;
- // Convert normal and valid multibyte sequences.
- src = good_chars;
- ret = mbsrtowcs(buf, &src, 3, &state);
- EXPECT_EQ(ret, 2ul);
- EXPECT_EQ(buf[0], L'\U0001F41E');
- EXPECT_EQ(buf[1], L'\U0001F41E');
- EXPECT_EQ(buf[2], L'\0');
- EXPECT_EQ(src, nullptr);
- EXPECT_NE(mbsinit(&state), 0);
- // Stop on invalid multibyte sequences.
- src = bad_chars;
- ret = mbsrtowcs(buf, &src, 3, &state);
- EXPECT_EQ(ret, -1ul);
- EXPECT_EQ(buf[0], L'\U0001F41E');
- EXPECT_EQ(errno, EILSEQ);
- EXPECT_EQ(src, bad_chars + 4);
- // Valid sequence but not enough space.
- src = good_chars;
- ret = mbsrtowcs(buf, &src, 1, &state);
- EXPECT_EQ(ret, 1ul);
- EXPECT_EQ(buf[0], L'\U0001F41E');
- EXPECT_EQ(src, good_chars + 4);
- // Try a conversion with no destination and too short length.
- src = good_chars;
- ret = mbsrtowcs(nullptr, &src, 1, &state);
- EXPECT_EQ(ret, 2ul);
- EXPECT_EQ(src, nullptr);
- EXPECT_NE(mbsinit(&state), 0);
- // Try a conversion using the internal anonymous state.
- src = good_chars;
- ret = mbsrtowcs(buf, &src, 3, nullptr);
- EXPECT_EQ(ret, 2ul);
- EXPECT_EQ(buf[0], L'\U0001F41E');
- EXPECT_EQ(buf[1], L'\U0001F41E');
- EXPECT_EQ(buf[2], L'\0');
- EXPECT_EQ(src, nullptr);
- }
- TEST_CASE(wcslcpy)
- {
- auto buf = static_cast<wchar_t*>(malloc(8 * sizeof(wchar_t)));
- if (!buf) {
- FAIL("Could not allocate space for copy target");
- return;
- }
- size_t ret;
- // If buffer is long enough, a straight-forward string copy is performed.
- ret = wcslcpy(buf, L"abc", 8);
- EXPECT_EQ(ret, 3ul);
- EXPECT_EQ(wmemcmp(L"abc", buf, 4), 0);
- // If buffer is (supposedly) too small, the string will be truncated.
- ret = wcslcpy(buf, L"1234", 4);
- EXPECT_EQ(ret, 4ul);
- EXPECT_EQ(wmemcmp(L"123", buf, 4), 0);
- // If the buffer is null, the length of the input is returned.
- ret = wcslcpy(nullptr, L"abc", 0);
- EXPECT_EQ(ret, 3ul);
- }
- TEST_CASE(mbrlen)
- {
- size_t ret = 0;
- mbstate_t state = {};
- // Ensure that we can parse normal ASCII characters.
- ret = mbrlen("Hello", 5, &state);
- EXPECT_EQ(ret, 1ul);
- // Try two three-byte codepoints (™™), only one of which should be consumed.
- ret = mbrlen("\xe2\x84\xa2\xe2\x84\xa2", 6, &state);
- EXPECT_EQ(ret, 3ul);
- // Try a null character, which should return 0 and reset the state to the initial state.
- ret = mbrlen("\x00\x00", 2, &state);
- EXPECT_EQ(ret, 0ul);
- EXPECT_NE(mbsinit(&state), 0);
- // Try an incomplete multibyte character.
- ret = mbrlen("\xe2\x84", 2, &state);
- EXPECT_EQ(ret, -2ul);
- EXPECT_EQ(mbsinit(&state), 0);
- // Finish the previous multibyte character.
- ret = mbrlen("\xa2", 1, &state);
- EXPECT_EQ(ret, 1ul);
- // Try an invalid multibyte sequence.
- // Reset the state afterwards because the effects are undefined.
- ret = mbrlen("\xff", 1, &state);
- EXPECT_EQ(ret, -1ul);
- EXPECT_EQ(errno, EILSEQ);
- state = {};
- }
|