瀏覽代碼

AK: Add a method to compute UTF-16 length from a UTF-8 string

Timothy Flynn 1 年之前
父節點
當前提交
7a17c654d2
共有 3 個文件被更改,包括 19 次插入0 次删除
  1. 9 0
      AK/Utf16View.cpp
  2. 2 0
      AK/Utf16View.h
  3. 8 0
      Tests/AK/TestUtf16.cpp

+ 9 - 0
AK/Utf16View.cpp

@@ -129,6 +129,15 @@ ErrorOr<void> code_point_to_utf16(Utf16Data& string, u32 code_point, Endianness
     return {};
 }
 
+size_t utf16_code_unit_length_from_utf8(StringView string)
+{
+    // FIXME: The CPU-specific implementations behave differently on null inputs. We treat null views as an empty string.
+    if (string.is_empty())
+        return 0;
+
+    return simdutf::utf16_length_from_utf8(string.characters_without_null_termination(), string.length());
+}
+
 bool Utf16View::is_high_surrogate(u16 code_unit)
 {
     return (code_unit >= high_surrogate_min) && (code_unit <= high_surrogate_max);

+ 2 - 0
AK/Utf16View.h

@@ -26,6 +26,8 @@ ErrorOr<Utf16Data> utf8_to_utf16(Utf8View const&, Endianness = Endianness::Host)
 ErrorOr<Utf16Data> utf32_to_utf16(Utf32View const&, Endianness = Endianness::Host);
 ErrorOr<void> code_point_to_utf16(Utf16Data&, u32, Endianness = Endianness::Host);
 
+size_t utf16_code_unit_length_from_utf8(StringView);
+
 class Utf16View;
 
 class Utf16CodePointIterator {

+ 8 - 0
Tests/AK/TestUtf16.cpp

@@ -89,6 +89,14 @@ TEST_CASE(decode_utf16)
     EXPECT_EQ(i, expected.size());
 }
 
+TEST_CASE(utf16_code_unit_length_from_utf8)
+{
+    EXPECT_EQ(AK::utf16_code_unit_length_from_utf8(""sv), 0uz);
+    EXPECT_EQ(AK::utf16_code_unit_length_from_utf8("abc"sv), 3uz);
+    EXPECT_EQ(AK::utf16_code_unit_length_from_utf8("😀"sv), 2uz);
+    EXPECT_EQ(AK::utf16_code_unit_length_from_utf8("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv), 39uz);
+}
+
 TEST_CASE(null_view)
 {
     Utf16View view;