AK+Everywhere: Make UTF-8 and UTF-32 to UTF-16 converters fallible

These could fail to allocate the underlying storage needed to store the
UTF-16 data. Propagate these errors.
This commit is contained in:
Timothy Flynn 2023-01-06 13:19:34 -05:00 committed by Linus Groh
parent d8044c5358
commit 1edb96376b
Notes: sideshowbarker 2024-07-17 18:46:30 +09:00
13 changed files with 46 additions and 35 deletions

View file

@ -5,6 +5,7 @@
*/
#include <AK/CharacterTypes.h>
#include <AK/Concepts.h>
#include <AK/StringBuilder.h>
#include <AK/StringView.h>
#include <AK/Utf16View.h>
@ -20,45 +21,46 @@ static constexpr u16 low_surrogate_max = 0xdfff;
static constexpr u32 replacement_code_point = 0xfffd;
static constexpr u32 first_supplementary_plane_code_point = 0x10000;
template<typename UtfViewType>
static Utf16Data to_utf16_impl(UtfViewType const& view)
requires(IsSame<UtfViewType, Utf8View> || IsSame<UtfViewType, Utf32View>)
template<OneOf<Utf8View, Utf32View> UtfViewType>
static ErrorOr<Utf16Data> to_utf16_impl(UtfViewType const& view)
{
Utf16Data utf16_data;
utf16_data.ensure_capacity(view.length());
TRY(utf16_data.try_ensure_capacity(view.length()));
for (auto code_point : view)
code_point_to_utf16(utf16_data, code_point);
TRY(code_point_to_utf16(utf16_data, code_point));
return utf16_data;
}
Utf16Data utf8_to_utf16(StringView utf8_view)
ErrorOr<Utf16Data> utf8_to_utf16(StringView utf8_view)
{
return to_utf16_impl(Utf8View { utf8_view });
}
Utf16Data utf8_to_utf16(Utf8View const& utf8_view)
ErrorOr<Utf16Data> utf8_to_utf16(Utf8View const& utf8_view)
{
return to_utf16_impl(utf8_view);
}
Utf16Data utf32_to_utf16(Utf32View const& utf32_view)
ErrorOr<Utf16Data> utf32_to_utf16(Utf32View const& utf32_view)
{
return to_utf16_impl(utf32_view);
}
void code_point_to_utf16(Utf16Data& string, u32 code_point)
ErrorOr<void> code_point_to_utf16(Utf16Data& string, u32 code_point)
{
VERIFY(is_unicode(code_point));
if (code_point < first_supplementary_plane_code_point) {
string.append(static_cast<u16>(code_point));
TRY(string.try_append(static_cast<u16>(code_point)));
} else {
code_point -= first_supplementary_plane_code_point;
string.append(static_cast<u16>(high_surrogate_min | (code_point >> 10)));
string.append(static_cast<u16>(low_surrogate_min | (code_point & 0x3ff)));
TRY(string.try_append(static_cast<u16>(high_surrogate_min | (code_point >> 10))));
TRY(string.try_append(static_cast<u16>(low_surrogate_min | (code_point & 0x3ff))));
}
return {};
}
bool Utf16View::is_high_surrogate(u16 code_unit)

View file

@ -7,6 +7,7 @@
#pragma once
#include <AK/DeprecatedString.h>
#include <AK/Error.h>
#include <AK/Format.h>
#include <AK/Forward.h>
#include <AK/Optional.h>
@ -18,10 +19,10 @@ namespace AK {
using Utf16Data = Vector<u16, 1>;
Utf16Data utf8_to_utf16(StringView);
Utf16Data utf8_to_utf16(Utf8View const&);
Utf16Data utf32_to_utf16(Utf32View const&);
void code_point_to_utf16(Utf16Data&, u32);
ErrorOr<Utf16Data> utf8_to_utf16(StringView);
ErrorOr<Utf16Data> utf8_to_utf16(Utf8View const&);
ErrorOr<Utf16Data> utf32_to_utf16(Utf32View const&);
ErrorOr<void> code_point_to_utf16(Utf16Data&, u32);
class Utf16View;

View file

@ -14,7 +14,7 @@
TEST_CASE(decode_ascii)
{
auto string = AK::utf8_to_utf16("Hello World!11"sv);
auto string = MUST(AK::utf8_to_utf16("Hello World!11"sv));
Utf16View view { string };
size_t valid_code_units = 0;
@ -33,7 +33,7 @@ TEST_CASE(decode_ascii)
TEST_CASE(decode_utf8)
{
auto string = AK::utf8_to_utf16("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv);
auto string = MUST(AK::utf8_to_utf16("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv));
Utf16View view { string };
size_t valid_code_units = 0;
@ -54,7 +54,7 @@ TEST_CASE(encode_utf8)
{
{
DeprecatedString utf8_string("Привет, мир! 😀 γειά σου κόσμος こんにちは世界");
auto string = AK::utf8_to_utf16(utf8_string);
auto string = MUST(AK::utf8_to_utf16(utf8_string));
Utf16View view { string };
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), utf8_string);
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), utf8_string);
@ -91,7 +91,7 @@ TEST_CASE(decode_utf16)
TEST_CASE(iterate_utf16)
{
auto string = AK::utf8_to_utf16("Привет 😀"sv);
auto string = MUST(AK::utf8_to_utf16("Привет 😀"sv));
Utf16View view { string };
auto iterator = view.begin();
@ -263,7 +263,7 @@ TEST_CASE(decode_invalid_utf16)
TEST_CASE(substring_view)
{
auto string = AK::utf8_to_utf16("Привет 😀"sv);
auto string = MUST(AK::utf8_to_utf16("Привет 😀"sv));
{
Utf16View view { string };
view = view.substring_view(7, 2);

View file

@ -754,7 +754,7 @@ TEST_CASE(ECMA262_unicode_match)
for (auto& test : tests) {
Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | test.options);
auto subject = AK::utf8_to_utf16(test.subject);
auto subject = MUST(AK::utf8_to_utf16(test.subject));
Utf16View view { subject };
if constexpr (REGEX_DEBUG) {
@ -868,7 +868,7 @@ TEST_CASE(ECMA262_property_match)
for (auto& test : tests) {
Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | regex::ECMAScriptFlags::BrowserExtended | test.options);
auto subject = AK::utf8_to_utf16(test.subject);
auto subject = MUST(AK::utf8_to_utf16(test.subject));
Utf16View view { subject };
if constexpr (REGEX_DEBUG) {

View file

@ -142,8 +142,8 @@ public:
return 0;
}
case UTF16: {
auto utf16_view = Utf16View(utf8_to_utf16(m_values.at(index.row())));
if (utf16_view.validate())
auto utf16_data = utf8_to_utf16(m_values.at(index.row())).release_value_but_fixme_should_propagate_errors();
if (Utf16View utf16_view { utf16_data }; utf16_view.validate())
return static_cast<i32>(utf16_view.length_in_code_units() * 2);
return 0;
}

View file

@ -490,7 +490,7 @@ JS_DEFINE_NATIVE_FUNCTION(GlobalObject::escape)
{
auto string = TRY(vm.argument(0).to_string(vm));
StringBuilder escaped;
for (auto code_point : utf8_to_utf16(string)) {
for (auto code_point : TRY_OR_THROW_OOM(vm, utf8_to_utf16(string))) {
if (code_point < 256) {
if ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789@*_+-./"sv.contains(static_cast<char>(code_point)))
escaped.append(code_point);

View file

@ -89,7 +89,11 @@ ErrorOr<DeprecatedString, ParseRegexPatternError> parse_regex_pattern(StringView
if (unicode && unicode_sets)
return ParseRegexPatternError { DeprecatedString::formatted(ErrorType::RegExpObjectIncompatibleFlags.message(), 'u', 'v') };
auto utf16_pattern = AK::utf8_to_utf16(pattern);
auto utf16_pattern_result = AK::utf8_to_utf16(pattern);
if (utf16_pattern_result.is_error())
return ParseRegexPatternError { "Out of memory"sv };
auto utf16_pattern = utf16_pattern_result.release_value();
Utf16View utf16_pattern_view { utf16_pattern };
StringBuilder builder;

View file

@ -123,7 +123,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_code_point)
if (code_point < 0 || code_point > 0x10FFFF)
return vm.throw_completion<RangeError>(ErrorType::InvalidCodePoint, next_code_point.to_string_without_side_effects());
AK::code_point_to_utf16(string, static_cast<u32>(code_point));
TRY_OR_THROW_OOM(vm, code_point_to_utf16(string, static_cast<u32>(code_point)));
}
return PrimitiveString::create(vm, Utf16String(move(string)));

View file

@ -33,7 +33,7 @@ NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16Data string)
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(StringView string)
{
return create(AK::utf8_to_utf16(string));
return create(AK::utf8_to_utf16(string).release_value_but_fixme_should_propagate_errors());
}
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16View const& view)

View file

@ -280,7 +280,7 @@ public:
return RegexStringView { Utf32View { data.data(), data.size() } };
},
[&](Utf16View) {
optional_utf16_storage = AK::utf32_to_utf16(Utf32View { data.data(), data.size() });
optional_utf16_storage = AK::utf32_to_utf16(Utf32View { data.data(), data.size() }).release_value_but_fixme_should_propagate_errors();
return RegexStringView { Utf16View { optional_utf16_storage } };
});

View file

@ -34,8 +34,8 @@ DeprecatedString strip_and_collapse_whitespace(StringView string)
// https://infra.spec.whatwg.org/#code-unit-prefix
bool is_code_unit_prefix(StringView potential_prefix, StringView input)
{
auto potential_prefix_utf16 = utf8_to_utf16(potential_prefix);
auto input_utf16 = utf8_to_utf16(input);
auto potential_prefix_utf16 = utf8_to_utf16(potential_prefix).release_value_but_fixme_should_propagate_errors();
auto input_utf16 = utf8_to_utf16(input).release_value_but_fixme_should_propagate_errors();
// 1. Let i be 0.
size_t i = 0;

View file

@ -5,6 +5,8 @@
*/
#include <AK/Utf16View.h>
#include <LibJS/Runtime/Completion.h>
#include <LibJS/Runtime/Utf16String.h>
#include <LibWeb/DOM/Document.h>
#include <LibWeb/SVG/SVGTextContentElement.h>
@ -17,9 +19,10 @@ SVGTextContentElement::SVGTextContentElement(DOM::Document& document, DOM::Quali
}
// https://svgwg.org/svg2-draft/text.html#__svg__SVGTextContentElement__getNumberOfChars
int SVGTextContentElement::get_number_of_chars() const
WebIDL::ExceptionOr<int> SVGTextContentElement::get_number_of_chars() const
{
return AK::utf8_to_utf16(child_text_content()).size();
auto chars = TRY_OR_THROW_OOM(vm(), utf8_to_utf16(child_text_content()));
return static_cast<int>(chars.size());
}
}

View file

@ -7,6 +7,7 @@
#pragma once
#include <LibWeb/SVG/SVGGraphicsElement.h>
#include <LibWeb/WebIDL/ExceptionOr.h>
namespace Web::SVG {
@ -15,7 +16,7 @@ class SVGTextContentElement : public SVGGraphicsElement {
WEB_PLATFORM_OBJECT(SVGTextContentElement, SVGGraphicsElement);
public:
int get_number_of_chars() const;
WebIDL::ExceptionOr<int> get_number_of_chars() const;
protected:
SVGTextContentElement(DOM::Document&, DOM::QualifiedName);