From d19b31529f28e88aa691a6a9fbd0215c54c6e81c Mon Sep 17 00:00:00 2001 From: Timothy Flynn Date: Thu, 19 Sep 2024 14:25:38 -0400 Subject: [PATCH] AK+Meta: Update simdutf to version 5.5.0 Contains many fixes found upstream by fuzzers. Also includes fixes for CPU-specific inconsistencies with null inputs. --- AK/Utf16View.cpp | 18 ------------------ AK/Utf8View.cpp | 10 ---------- vcpkg.json | 2 +- 3 files changed, 1 insertion(+), 29 deletions(-) diff --git a/AK/Utf16View.cpp b/AK/Utf16View.cpp index 1276a37363c..449a1b0b88f 100644 --- a/AK/Utf16View.cpp +++ b/AK/Utf16View.cpp @@ -131,10 +131,6 @@ ErrorOr code_point_to_utf16(Utf16Data& string, u32 code_point, Endianness size_t utf16_code_unit_length_from_utf8(StringView string) { - // FIXME: The CPU-specific implementations behave differently on null inputs. We treat null views as an empty string. - if (string.is_empty()) - return 0; - return simdutf::utf16_length_from_utf8(string.characters_without_null_termination(), string.length()); } @@ -300,10 +296,6 @@ bool Utf16View::starts_with(Utf16View const& needle) const bool Utf16View::validate() const { - // FIXME: The CPU-specific implementations behave differently on null inputs. We treat null views as an empty string. - if (is_empty()) - return true; - switch (m_endianness) { case Endianness::Host: return simdutf::validate_utf16(char_data(), length_in_code_units()); @@ -317,12 +309,6 @@ bool Utf16View::validate() const bool Utf16View::validate(size_t& valid_code_units) const { - // FIXME: The CPU-specific implementations behave differently on null inputs. We treat null views as an empty string. - if (is_empty()) { - valid_code_units = 0; - return true; - } - auto result = [&]() { switch (m_endianness) { case Endianness::Host: @@ -341,10 +327,6 @@ bool Utf16View::validate(size_t& valid_code_units) const size_t Utf16View::calculate_length_in_code_points() const { - // FIXME: The CPU-specific implementations behave differently on null inputs. We treat null views as an empty string. - if (is_empty()) - return 0; - // FIXME: simdutf's code point length method assumes valid UTF-16, whereas Utf16View uses U+FFFD as a replacement // for invalid code points. If we change Utf16View to only accept valid encodings as an invariant, we can // remove this branch. diff --git a/AK/Utf8View.cpp b/AK/Utf8View.cpp index 71f71618be2..76d2e3a04c8 100644 --- a/AK/Utf8View.cpp +++ b/AK/Utf8View.cpp @@ -74,10 +74,6 @@ Utf8View Utf8View::unicode_substring_view(size_t code_point_offset, size_t code_ size_t Utf8View::calculate_length() const { - // FIXME: The CPU-specific implementations behave differently on null inputs. We treat null views as an empty string. - if (is_empty()) - return 0; - // FIXME: simdutf's code point length method assumes valid UTF-8, whereas Utf8View uses U+FFFD as a replacement // for invalid code points. If we change Utf8View to only accept valid encodings as an invariant, we can // remove this branch. @@ -157,12 +153,6 @@ Utf8View Utf8View::trim(Utf8View const& characters, TrimMode mode) const bool Utf8View::validate(size_t& valid_bytes, AllowSurrogates allow_surrogates) const { - // FIXME: The CPU-specific implementations behave differently on null inputs. We treat null views as an empty string. - if (is_empty()) { - valid_bytes = 0; - return true; - } - auto result = simdutf::validate_utf8_with_errors(m_string.characters_without_null_termination(), m_string.length()); valid_bytes = result.count; diff --git a/vcpkg.json b/vcpkg.json index 243c92d80b3..849216a0020 100644 --- a/vcpkg.json +++ b/vcpkg.json @@ -102,7 +102,7 @@ }, { "name": "simdutf", - "version": "5.3.4#0" + "version": "5.5.0#0" }, { "name": "skia",