mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-21 23:20:20 +00:00
AK: Replace UTF-16 validation and length computation with simdutf
This commit is contained in:
parent
a2bcb2ab8d
commit
32ffe9bbfc
Notes:
sideshowbarker
2024-07-18 23:46:01 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/LadybirdBrowser/ladybird/commit/32ffe9bbfca Pull-request: https://github.com/LadybirdBrowser/ladybird/pull/674 Reviewed-by: https://github.com/ADKaster
2 changed files with 21 additions and 21 deletions
|
@ -1,9 +1,11 @@
|
|||
/*
|
||||
* Copyright (c) 2021-2023, Tim Flynn <trflynn89@serenityos.org>
|
||||
* Copyright (c) 2021-2024, Tim Flynn <trflynn89@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#define AK_DONT_REPLACE_STD
|
||||
|
||||
#include <AK/CharacterTypes.h>
|
||||
#include <AK/Concepts.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
|
@ -12,6 +14,8 @@
|
|||
#include <AK/Utf32View.h>
|
||||
#include <AK/Utf8View.h>
|
||||
|
||||
#include <simdutf.h>
|
||||
|
||||
namespace AK {
|
||||
|
||||
static constexpr u16 high_surrogate_min = 0xd800;
|
||||
|
@ -233,27 +237,27 @@ bool Utf16View::starts_with(Utf16View const& needle) const
|
|||
return true;
|
||||
}
|
||||
|
||||
bool Utf16View::validate() const
|
||||
{
|
||||
return simdutf::validate_utf16(reinterpret_cast<char16_t const*>(m_code_units.data()), m_code_units.size());
|
||||
}
|
||||
|
||||
bool Utf16View::validate(size_t& valid_code_units) const
|
||||
{
|
||||
valid_code_units = 0;
|
||||
auto result = simdutf::validate_utf16_with_errors(reinterpret_cast<char16_t const*>(m_code_units.data()), m_code_units.size());
|
||||
valid_code_units = result.count;
|
||||
|
||||
for (auto const* ptr = begin_ptr(); ptr < end_ptr(); ++ptr) {
|
||||
if (is_high_surrogate(*ptr)) {
|
||||
if ((++ptr >= end_ptr()) || !is_low_surrogate(*ptr))
|
||||
return false;
|
||||
++valid_code_units;
|
||||
} else if (is_low_surrogate(*ptr)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
++valid_code_units;
|
||||
}
|
||||
|
||||
return true;
|
||||
return result.error == simdutf::SUCCESS;
|
||||
}
|
||||
|
||||
size_t Utf16View::calculate_length_in_code_points() const
|
||||
{
|
||||
// FIXME: simdutf's code point length method assumes valid UTF-16, whereas Utf16View uses U+FFFD as a replacement
|
||||
// for invalid code points. If we change Utf16View to only accept valid encodings as an invariant, we can
|
||||
// remove this branch.
|
||||
if (validate()) [[likely]]
|
||||
return simdutf::count_utf16(reinterpret_cast<char16_t const*>(m_code_units.data()), m_code_units.size());
|
||||
|
||||
size_t code_points = 0;
|
||||
for ([[maybe_unused]] auto code_point : *this)
|
||||
++code_points;
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
/*
|
||||
* Copyright (c) 2021-2023, Tim Flynn <trflynn89@serenityos.org>
|
||||
* Copyright (c) 2021-2024, Tim Flynn <trflynn89@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
@ -113,12 +113,8 @@ public:
|
|||
|
||||
bool starts_with(Utf16View const&) const;
|
||||
|
||||
bool validate() const;
|
||||
bool validate(size_t& valid_code_units) const;
|
||||
bool validate() const
|
||||
{
|
||||
size_t valid_code_units;
|
||||
return validate(valid_code_units);
|
||||
}
|
||||
|
||||
bool equals_ignoring_case(Utf16View const&) const;
|
||||
|
||||
|
|
Loading…
Reference in a new issue