浏览代码

AK: Explicitly check for null data in Utf8View

The underlying CPU-specific instructions for operating on UTF-8 strings
behave differently for null inputs. Add an explicit check for this state
for consistency.
Timothy Flynn 11 月之前
父节点
当前提交
144452d638
共有 2 个文件被更改,包括 22 次插入0 次删除
  1. 10 0
      AK/Utf8View.cpp
  2. 12 0
      Tests/AK/TestUtf8.cpp

+ 10 - 0
AK/Utf8View.cpp

@@ -76,6 +76,10 @@ Utf8View Utf8View::unicode_substring_view(size_t code_point_offset, size_t code_
 
 size_t Utf8View::calculate_length() const
 {
+    // FIXME: The CPU-specific implementations behave differently on null inputs. We treat null views as an empty string.
+    if (is_empty())
+        return 0;
+
     // FIXME: simdutf's code point length method assumes valid UTF-8, whereas Utf8View uses U+FFFD as a replacement
     //        for invalid code points. If we change Utf8View to only accept valid encodings as an invariant, we can
     //        remove this branch.
@@ -155,6 +159,12 @@ Utf8View Utf8View::trim(Utf8View const& characters, TrimMode mode) const
 
 bool Utf8View::validate(size_t& valid_bytes, AllowSurrogates allow_surrogates) const
 {
+    // FIXME: The CPU-specific implementations behave differently on null inputs. We treat null views as an empty string.
+    if (is_empty()) {
+        valid_bytes = 0;
+        return true;
+    }
+
     auto result = simdutf::validate_utf8_with_errors(m_string.characters_without_null_termination(), m_string.length());
     valid_bytes = result.count;
 

+ 12 - 0
Tests/AK/TestUtf8.cpp

@@ -48,6 +48,18 @@ TEST_CASE(decode_utf8)
     EXPECT_EQ(i, expected_size);
 }
 
+TEST_CASE(null_view)
+{
+    Utf8View view;
+    EXPECT(view.validate(Utf8View::AllowSurrogates::No));
+    EXPECT(view.validate(Utf8View::AllowSurrogates::Yes));
+    EXPECT_EQ(view.byte_length(), 0zu);
+    EXPECT_EQ(view.length(), 0zu);
+
+    for ([[maybe_unused]] auto it : view)
+        FAIL("Iterating a null UTF-8 string should not produce any values");
+}
+
 TEST_CASE(validate_invalid_ut8)
 {
     size_t valid_bytes;