Bladeren bron

LibC: Fix up mbtowc

One more proper implementation and one less FIXME.
Tim Schumacher 3 jaren geleden
bovenliggende
commit
8df6955838
2 gewijzigde bestanden met toevoegingen van 56 en 8 verwijderingen
  1. 41 0
      Tests/LibC/TestWchar.cpp
  2. 15 8
      Userland/Libraries/LibC/stdlib.cpp

+ 41 - 0
Tests/LibC/TestWchar.cpp

@@ -539,3 +539,44 @@ TEST_CASE(mbrlen)
     EXPECT_EQ(errno, EILSEQ);
     state = {};
 }
+
+TEST_CASE(mbtowc)
+{
+    int ret = 0;
+    wchar_t wc = 0;
+
+    // Ensure that we can parse normal ASCII characters.
+    ret = mbtowc(&wc, "Hello", 5);
+    EXPECT_EQ(ret, 1);
+    EXPECT_EQ(wc, 'H');
+
+    // Try two three-byte codepoints (™™), only one of which should be consumed.
+    ret = mbtowc(&wc, "\xe2\x84\xa2\xe2\x84\xa2", 6);
+    EXPECT_EQ(ret, 3);
+    EXPECT_EQ(wc, 0x2122);
+
+    // Try a null character, which should return 0.
+    ret = mbtowc(&wc, "\x00\x00", 2);
+    EXPECT_EQ(ret, 0);
+    EXPECT_EQ(wc, 0);
+
+    // Try an incomplete multibyte character.
+    ret = mbtowc(&wc, "\xe2\x84", 2);
+    EXPECT_EQ(ret, -1);
+    EXPECT_EQ(errno, EILSEQ);
+
+    // Ask if we support shift states and reset the internal state in the process.
+    ret = mbtowc(nullptr, nullptr, 2);
+    EXPECT_EQ(ret, 0); // We don't support shift states.
+    ret = mbtowc(nullptr, "\x00", 1);
+    EXPECT_EQ(ret, 0); // No error likely means that the state is working again.
+
+    // Try an invalid multibyte sequence.
+    ret = mbtowc(&wc, "\xff", 1);
+    EXPECT_EQ(ret, -1);
+    EXPECT_EQ(errno, EILSEQ);
+
+    // Try a successful conversion, but without target address.
+    ret = mbtowc(nullptr, "\xe2\x84\xa2\xe2\x84\xa2", 6);
+    EXPECT_EQ(ret, 3);
+}

+ 15 - 8
Userland/Libraries/LibC/stdlib.cpp

@@ -884,19 +884,26 @@ size_t mbstowcs(wchar_t* pwcs, const char* s, size_t n)
     return mbsrtowcs(pwcs, &s, n, &state);
 }
 
-int mbtowc(wchar_t* wch, const char* data, [[maybe_unused]] size_t data_size)
+int mbtowc(wchar_t* pwc, const char* s, size_t n)
 {
-    // FIXME: This needs a real implementation.
-    if (wch && data) {
-        *wch = *data;
-        return 1;
+    static mbstate_t internal_state = {};
+
+    // Reset the internal state and ask whether we have shift states.
+    if (s == nullptr) {
+        internal_state = {};
+        return 0;
     }
 
-    if (!wch && data) {
-        return 1;
+    size_t ret = mbrtowc(pwc, s, n, &internal_state);
+
+    // Incomplete characters get returned as illegal sequence.
+    // Internal state is undefined, so don't bother with resetting.
+    if (ret == -2ul) {
+        errno = EILSEQ;
+        return -1;
     }
 
-    return 0;
+    return ret;
 }
 
 int wctomb(char* s, wchar_t wc)