Przeglądaj źródła

LibC: Fix up mblen

Tim Schumacher 3 lat temu
rodzic
commit
79bcfa967b
2 zmienionych plików z 50 dodań i 3 usunięć
  1. 33 0
      Tests/LibC/TestWchar.cpp
  2. 17 3
      Userland/Libraries/LibC/stdlib.cpp

+ 33 - 0
Tests/LibC/TestWchar.cpp

@@ -580,3 +580,36 @@ TEST_CASE(mbtowc)
     ret = mbtowc(nullptr, "\xe2\x84\xa2\xe2\x84\xa2", 6);
     EXPECT_EQ(ret, 3);
 }
+
+TEST_CASE(mblen)
+{
+    int ret = 0;
+
+    // Ensure that we can parse normal ASCII characters.
+    ret = mblen("Hello", 5);
+    EXPECT_EQ(ret, 1);
+
+    // Try two three-byte codepoints (™™), only one of which should be consumed.
+    ret = mblen("\xe2\x84\xa2\xe2\x84\xa2", 6);
+    EXPECT_EQ(ret, 3);
+
+    // Try a null character, which should return 0.
+    ret = mblen("\x00\x00", 2);
+    EXPECT_EQ(ret, 0);
+
+    // Try an incomplete multibyte character.
+    ret = mblen("\xe2\x84", 2);
+    EXPECT_EQ(ret, -1);
+    EXPECT_EQ(errno, EILSEQ);
+
+    // Ask if we support shift states and reset the internal state in the process.
+    ret = mblen(nullptr, 2);
+    EXPECT_EQ(ret, 0); // We don't support shift states.
+    ret = mblen("\x00", 1);
+    EXPECT_EQ(ret, 0); // No error likely means that the state is working again.
+
+    // Try an invalid multibyte sequence.
+    ret = mblen("\xff", 1);
+    EXPECT_EQ(ret, -1);
+    EXPECT_EQ(errno, EILSEQ);
+}

+ 17 - 3
Userland/Libraries/LibC/stdlib.cpp

@@ -872,10 +872,24 @@ lldiv_t lldiv(long long numerator, long long denominator)
 
 int mblen(char const* s, size_t n)
 {
-    // FIXME: Implement locale support
-    if (!s)
+    // POSIX: Equivalent to mbtowc(NULL, s, n), but we mustn't change the state of mbtowc.
+    static mbstate_t internal_state = {};
+
+    // Reset the internal state and ask whether we have shift states.
+    if (s == nullptr) {
+        internal_state = {};
         return 0;
-    return (MB_CUR_MAX > n) ? n : MB_CUR_MAX;
+    }
+
+    size_t ret = mbrtowc(nullptr, s, n, &internal_state);
+
+    // Incomplete characters get returned as illegal sequence.
+    if (ret == -2ul) {
+        errno = EILSEQ;
+        return -1;
+    }
+
+    return ret;
 }
 
 size_t mbstowcs(wchar_t* pwcs, const char* s, size_t n)