mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-21 23:20:20 +00:00
AK+Everywhere: Make UTF-8 and UTF-32 to UTF-16 converters fallible
These could fail to allocate the underlying storage needed to store the UTF-16 data. Propagate these errors.
This commit is contained in:
parent
d8044c5358
commit
1edb96376b
Notes:
sideshowbarker
2024-07-17 18:46:30 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/1edb96376b Pull-request: https://github.com/SerenityOS/serenity/pull/16895 Reviewed-by: https://github.com/linusg
13 changed files with 46 additions and 35 deletions
|
@ -5,6 +5,7 @@
|
|||
*/
|
||||
|
||||
#include <AK/CharacterTypes.h>
|
||||
#include <AK/Concepts.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Utf16View.h>
|
||||
|
@ -20,45 +21,46 @@ static constexpr u16 low_surrogate_max = 0xdfff;
|
|||
static constexpr u32 replacement_code_point = 0xfffd;
|
||||
static constexpr u32 first_supplementary_plane_code_point = 0x10000;
|
||||
|
||||
template<typename UtfViewType>
|
||||
static Utf16Data to_utf16_impl(UtfViewType const& view)
|
||||
requires(IsSame<UtfViewType, Utf8View> || IsSame<UtfViewType, Utf32View>)
|
||||
template<OneOf<Utf8View, Utf32View> UtfViewType>
|
||||
static ErrorOr<Utf16Data> to_utf16_impl(UtfViewType const& view)
|
||||
{
|
||||
Utf16Data utf16_data;
|
||||
utf16_data.ensure_capacity(view.length());
|
||||
TRY(utf16_data.try_ensure_capacity(view.length()));
|
||||
|
||||
for (auto code_point : view)
|
||||
code_point_to_utf16(utf16_data, code_point);
|
||||
TRY(code_point_to_utf16(utf16_data, code_point));
|
||||
|
||||
return utf16_data;
|
||||
}
|
||||
|
||||
Utf16Data utf8_to_utf16(StringView utf8_view)
|
||||
ErrorOr<Utf16Data> utf8_to_utf16(StringView utf8_view)
|
||||
{
|
||||
return to_utf16_impl(Utf8View { utf8_view });
|
||||
}
|
||||
|
||||
Utf16Data utf8_to_utf16(Utf8View const& utf8_view)
|
||||
ErrorOr<Utf16Data> utf8_to_utf16(Utf8View const& utf8_view)
|
||||
{
|
||||
return to_utf16_impl(utf8_view);
|
||||
}
|
||||
|
||||
Utf16Data utf32_to_utf16(Utf32View const& utf32_view)
|
||||
ErrorOr<Utf16Data> utf32_to_utf16(Utf32View const& utf32_view)
|
||||
{
|
||||
return to_utf16_impl(utf32_view);
|
||||
}
|
||||
|
||||
void code_point_to_utf16(Utf16Data& string, u32 code_point)
|
||||
ErrorOr<void> code_point_to_utf16(Utf16Data& string, u32 code_point)
|
||||
{
|
||||
VERIFY(is_unicode(code_point));
|
||||
|
||||
if (code_point < first_supplementary_plane_code_point) {
|
||||
string.append(static_cast<u16>(code_point));
|
||||
TRY(string.try_append(static_cast<u16>(code_point)));
|
||||
} else {
|
||||
code_point -= first_supplementary_plane_code_point;
|
||||
string.append(static_cast<u16>(high_surrogate_min | (code_point >> 10)));
|
||||
string.append(static_cast<u16>(low_surrogate_min | (code_point & 0x3ff)));
|
||||
TRY(string.try_append(static_cast<u16>(high_surrogate_min | (code_point >> 10))));
|
||||
TRY(string.try_append(static_cast<u16>(low_surrogate_min | (code_point & 0x3ff))));
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
bool Utf16View::is_high_surrogate(u16 code_unit)
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <AK/DeprecatedString.h>
|
||||
#include <AK/Error.h>
|
||||
#include <AK/Format.h>
|
||||
#include <AK/Forward.h>
|
||||
#include <AK/Optional.h>
|
||||
|
@ -18,10 +19,10 @@ namespace AK {
|
|||
|
||||
using Utf16Data = Vector<u16, 1>;
|
||||
|
||||
Utf16Data utf8_to_utf16(StringView);
|
||||
Utf16Data utf8_to_utf16(Utf8View const&);
|
||||
Utf16Data utf32_to_utf16(Utf32View const&);
|
||||
void code_point_to_utf16(Utf16Data&, u32);
|
||||
ErrorOr<Utf16Data> utf8_to_utf16(StringView);
|
||||
ErrorOr<Utf16Data> utf8_to_utf16(Utf8View const&);
|
||||
ErrorOr<Utf16Data> utf32_to_utf16(Utf32View const&);
|
||||
ErrorOr<void> code_point_to_utf16(Utf16Data&, u32);
|
||||
|
||||
class Utf16View;
|
||||
|
||||
|
|
|
@ -14,7 +14,7 @@
|
|||
|
||||
TEST_CASE(decode_ascii)
|
||||
{
|
||||
auto string = AK::utf8_to_utf16("Hello World!11"sv);
|
||||
auto string = MUST(AK::utf8_to_utf16("Hello World!11"sv));
|
||||
Utf16View view { string };
|
||||
|
||||
size_t valid_code_units = 0;
|
||||
|
@ -33,7 +33,7 @@ TEST_CASE(decode_ascii)
|
|||
|
||||
TEST_CASE(decode_utf8)
|
||||
{
|
||||
auto string = AK::utf8_to_utf16("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv);
|
||||
auto string = MUST(AK::utf8_to_utf16("Привет, мир! 😀 γειά σου κόσμος こんにちは世界"sv));
|
||||
Utf16View view { string };
|
||||
|
||||
size_t valid_code_units = 0;
|
||||
|
@ -54,7 +54,7 @@ TEST_CASE(encode_utf8)
|
|||
{
|
||||
{
|
||||
DeprecatedString utf8_string("Привет, мир! 😀 γειά σου κόσμος こんにちは世界");
|
||||
auto string = AK::utf8_to_utf16(utf8_string);
|
||||
auto string = MUST(AK::utf8_to_utf16(utf8_string));
|
||||
Utf16View view { string };
|
||||
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::Yes), utf8_string);
|
||||
EXPECT_EQ(view.to_utf8(Utf16View::AllowInvalidCodeUnits::No), utf8_string);
|
||||
|
@ -91,7 +91,7 @@ TEST_CASE(decode_utf16)
|
|||
|
||||
TEST_CASE(iterate_utf16)
|
||||
{
|
||||
auto string = AK::utf8_to_utf16("Привет 😀"sv);
|
||||
auto string = MUST(AK::utf8_to_utf16("Привет 😀"sv));
|
||||
Utf16View view { string };
|
||||
auto iterator = view.begin();
|
||||
|
||||
|
@ -263,7 +263,7 @@ TEST_CASE(decode_invalid_utf16)
|
|||
|
||||
TEST_CASE(substring_view)
|
||||
{
|
||||
auto string = AK::utf8_to_utf16("Привет 😀"sv);
|
||||
auto string = MUST(AK::utf8_to_utf16("Привет 😀"sv));
|
||||
{
|
||||
Utf16View view { string };
|
||||
view = view.substring_view(7, 2);
|
||||
|
|
|
@ -754,7 +754,7 @@ TEST_CASE(ECMA262_unicode_match)
|
|||
for (auto& test : tests) {
|
||||
Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | test.options);
|
||||
|
||||
auto subject = AK::utf8_to_utf16(test.subject);
|
||||
auto subject = MUST(AK::utf8_to_utf16(test.subject));
|
||||
Utf16View view { subject };
|
||||
|
||||
if constexpr (REGEX_DEBUG) {
|
||||
|
@ -868,7 +868,7 @@ TEST_CASE(ECMA262_property_match)
|
|||
for (auto& test : tests) {
|
||||
Regex<ECMA262> re(test.pattern, (ECMAScriptFlags)regex::AllFlags::Global | regex::ECMAScriptFlags::BrowserExtended | test.options);
|
||||
|
||||
auto subject = AK::utf8_to_utf16(test.subject);
|
||||
auto subject = MUST(AK::utf8_to_utf16(test.subject));
|
||||
Utf16View view { subject };
|
||||
|
||||
if constexpr (REGEX_DEBUG) {
|
||||
|
|
|
@ -142,8 +142,8 @@ public:
|
|||
return 0;
|
||||
}
|
||||
case UTF16: {
|
||||
auto utf16_view = Utf16View(utf8_to_utf16(m_values.at(index.row())));
|
||||
if (utf16_view.validate())
|
||||
auto utf16_data = utf8_to_utf16(m_values.at(index.row())).release_value_but_fixme_should_propagate_errors();
|
||||
if (Utf16View utf16_view { utf16_data }; utf16_view.validate())
|
||||
return static_cast<i32>(utf16_view.length_in_code_units() * 2);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -490,7 +490,7 @@ JS_DEFINE_NATIVE_FUNCTION(GlobalObject::escape)
|
|||
{
|
||||
auto string = TRY(vm.argument(0).to_string(vm));
|
||||
StringBuilder escaped;
|
||||
for (auto code_point : utf8_to_utf16(string)) {
|
||||
for (auto code_point : TRY_OR_THROW_OOM(vm, utf8_to_utf16(string))) {
|
||||
if (code_point < 256) {
|
||||
if ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789@*_+-./"sv.contains(static_cast<char>(code_point)))
|
||||
escaped.append(code_point);
|
||||
|
|
|
@ -89,7 +89,11 @@ ErrorOr<DeprecatedString, ParseRegexPatternError> parse_regex_pattern(StringView
|
|||
if (unicode && unicode_sets)
|
||||
return ParseRegexPatternError { DeprecatedString::formatted(ErrorType::RegExpObjectIncompatibleFlags.message(), 'u', 'v') };
|
||||
|
||||
auto utf16_pattern = AK::utf8_to_utf16(pattern);
|
||||
auto utf16_pattern_result = AK::utf8_to_utf16(pattern);
|
||||
if (utf16_pattern_result.is_error())
|
||||
return ParseRegexPatternError { "Out of memory"sv };
|
||||
|
||||
auto utf16_pattern = utf16_pattern_result.release_value();
|
||||
Utf16View utf16_pattern_view { utf16_pattern };
|
||||
StringBuilder builder;
|
||||
|
||||
|
|
|
@ -123,7 +123,7 @@ JS_DEFINE_NATIVE_FUNCTION(StringConstructor::from_code_point)
|
|||
if (code_point < 0 || code_point > 0x10FFFF)
|
||||
return vm.throw_completion<RangeError>(ErrorType::InvalidCodePoint, next_code_point.to_string_without_side_effects());
|
||||
|
||||
AK::code_point_to_utf16(string, static_cast<u32>(code_point));
|
||||
TRY_OR_THROW_OOM(vm, code_point_to_utf16(string, static_cast<u32>(code_point)));
|
||||
}
|
||||
|
||||
return PrimitiveString::create(vm, Utf16String(move(string)));
|
||||
|
|
|
@ -33,7 +33,7 @@ NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16Data string)
|
|||
|
||||
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(StringView string)
|
||||
{
|
||||
return create(AK::utf8_to_utf16(string));
|
||||
return create(AK::utf8_to_utf16(string).release_value_but_fixme_should_propagate_errors());
|
||||
}
|
||||
|
||||
NonnullRefPtr<Utf16StringImpl> Utf16StringImpl::create(Utf16View const& view)
|
||||
|
|
|
@ -280,7 +280,7 @@ public:
|
|||
return RegexStringView { Utf32View { data.data(), data.size() } };
|
||||
},
|
||||
[&](Utf16View) {
|
||||
optional_utf16_storage = AK::utf32_to_utf16(Utf32View { data.data(), data.size() });
|
||||
optional_utf16_storage = AK::utf32_to_utf16(Utf32View { data.data(), data.size() }).release_value_but_fixme_should_propagate_errors();
|
||||
return RegexStringView { Utf16View { optional_utf16_storage } };
|
||||
});
|
||||
|
||||
|
|
|
@ -34,8 +34,8 @@ DeprecatedString strip_and_collapse_whitespace(StringView string)
|
|||
// https://infra.spec.whatwg.org/#code-unit-prefix
|
||||
bool is_code_unit_prefix(StringView potential_prefix, StringView input)
|
||||
{
|
||||
auto potential_prefix_utf16 = utf8_to_utf16(potential_prefix);
|
||||
auto input_utf16 = utf8_to_utf16(input);
|
||||
auto potential_prefix_utf16 = utf8_to_utf16(potential_prefix).release_value_but_fixme_should_propagate_errors();
|
||||
auto input_utf16 = utf8_to_utf16(input).release_value_but_fixme_should_propagate_errors();
|
||||
|
||||
// 1. Let i be 0.
|
||||
size_t i = 0;
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
*/
|
||||
|
||||
#include <AK/Utf16View.h>
|
||||
#include <LibJS/Runtime/Completion.h>
|
||||
#include <LibJS/Runtime/Utf16String.h>
|
||||
#include <LibWeb/DOM/Document.h>
|
||||
#include <LibWeb/SVG/SVGTextContentElement.h>
|
||||
|
||||
|
@ -17,9 +19,10 @@ SVGTextContentElement::SVGTextContentElement(DOM::Document& document, DOM::Quali
|
|||
}
|
||||
|
||||
// https://svgwg.org/svg2-draft/text.html#__svg__SVGTextContentElement__getNumberOfChars
|
||||
int SVGTextContentElement::get_number_of_chars() const
|
||||
WebIDL::ExceptionOr<int> SVGTextContentElement::get_number_of_chars() const
|
||||
{
|
||||
return AK::utf8_to_utf16(child_text_content()).size();
|
||||
auto chars = TRY_OR_THROW_OOM(vm(), utf8_to_utf16(child_text_content()));
|
||||
return static_cast<int>(chars.size());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <LibWeb/SVG/SVGGraphicsElement.h>
|
||||
#include <LibWeb/WebIDL/ExceptionOr.h>
|
||||
|
||||
namespace Web::SVG {
|
||||
|
||||
|
@ -15,7 +16,7 @@ class SVGTextContentElement : public SVGGraphicsElement {
|
|||
WEB_PLATFORM_OBJECT(SVGTextContentElement, SVGGraphicsElement);
|
||||
|
||||
public:
|
||||
int get_number_of_chars() const;
|
||||
WebIDL::ExceptionOr<int> get_number_of_chars() const;
|
||||
|
||||
protected:
|
||||
SVGTextContentElement(DOM::Document&, DOM::QualifiedName);
|
||||
|
|
Loading…
Reference in a new issue