mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-25 00:50:22 +00:00
AK: Ensure short String instances are valid UTF-8
We are currently only validating long strings.
This commit is contained in:
parent
434ca78425
commit
da0d000909
Notes:
sideshowbarker
2024-07-16 23:32:35 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/da0d000909 Pull-request: https://github.com/SerenityOS/serenity/pull/17699 Reviewed-by: https://github.com/linusg ✅
3 changed files with 37 additions and 15 deletions
|
@ -11,7 +11,6 @@
|
|||
#include <AK/MemMem.h>
|
||||
#include <AK/Stream.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/Utf8View.h>
|
||||
#include <AK/Vector.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
|
@ -132,10 +131,6 @@ ErrorOr<NonnullRefPtr<StringData>> StringData::from_utf8(char const* utf8_data,
|
|||
// Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization.
|
||||
VERIFY(byte_count > String::MAX_SHORT_STRING_BYTE_COUNT);
|
||||
|
||||
Utf8View view(StringView(utf8_data, byte_count));
|
||||
if (!view.validate())
|
||||
return Error::from_string_literal("StringData::from_utf8: Input was not valid UTF-8");
|
||||
|
||||
VERIFY(utf8_data);
|
||||
u8* buffer = nullptr;
|
||||
auto new_string_data = TRY(create_uninitialized(byte_count, buffer));
|
||||
|
@ -143,6 +138,16 @@ ErrorOr<NonnullRefPtr<StringData>> StringData::from_utf8(char const* utf8_data,
|
|||
return new_string_data;
|
||||
}
|
||||
|
||||
static ErrorOr<void> read_stream_into_buffer(Stream& stream, Bytes buffer)
|
||||
{
|
||||
TRY(stream.read_entire_buffer(buffer));
|
||||
|
||||
if (!Utf8View { StringView { buffer } }.validate())
|
||||
return Error::from_string_literal("String::from_stream: Input was not valid UTF-8");
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
ErrorOr<NonnullRefPtr<StringData>> StringData::from_stream(Stream& stream, size_t byte_count)
|
||||
{
|
||||
// Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization.
|
||||
|
@ -150,12 +155,7 @@ ErrorOr<NonnullRefPtr<StringData>> StringData::from_stream(Stream& stream, size_
|
|||
|
||||
u8* buffer = nullptr;
|
||||
auto new_string_data = TRY(create_uninitialized(byte_count, buffer));
|
||||
Bytes new_string_bytes = { buffer, byte_count };
|
||||
TRY(stream.read_entire_buffer(new_string_bytes));
|
||||
|
||||
Utf8View view(StringView { new_string_bytes });
|
||||
if (!view.validate())
|
||||
return Error::from_string_literal("StringData::from_stream: Input was not valid UTF-8");
|
||||
TRY(read_stream_into_buffer(stream, { buffer, byte_count }));
|
||||
|
||||
return new_string_data;
|
||||
}
|
||||
|
@ -230,6 +230,9 @@ void String::destroy_string()
|
|||
|
||||
ErrorOr<String> String::from_utf8(StringView view)
|
||||
{
|
||||
if (!Utf8View { view }.validate())
|
||||
return Error::from_string_literal("String::from_utf8: Input was not valid UTF-8");
|
||||
|
||||
if (view.length() <= MAX_SHORT_STRING_BYTE_COUNT) {
|
||||
ShortString short_string;
|
||||
if (!view.is_empty())
|
||||
|
@ -246,7 +249,7 @@ ErrorOr<String> String::from_stream(Stream& stream, size_t byte_count)
|
|||
if (byte_count <= MAX_SHORT_STRING_BYTE_COUNT) {
|
||||
ShortString short_string;
|
||||
if (byte_count > 0)
|
||||
TRY(stream.read_entire_buffer({ short_string.storage, byte_count }));
|
||||
TRY(Detail::read_stream_into_buffer(stream, { short_string.storage, byte_count }));
|
||||
short_string.byte_count_and_short_string_flag = (byte_count << 1) | SHORT_STRING_FLAG;
|
||||
return String { short_string };
|
||||
}
|
||||
|
@ -587,9 +590,6 @@ DeprecatedString String::to_deprecated_string() const
|
|||
|
||||
ErrorOr<String> String::from_deprecated_string(DeprecatedString const& deprecated_string)
|
||||
{
|
||||
Utf8View view(deprecated_string);
|
||||
if (!view.validate())
|
||||
return Error::from_string_literal("String::from_deprecated_string: Input was not valid UTF-8");
|
||||
return String::from_utf8(deprecated_string.view());
|
||||
}
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <AK/Traits.h>
|
||||
#include <AK/Types.h>
|
||||
#include <AK/UnicodeUtils.h>
|
||||
#include <AK/Utf8View.h>
|
||||
#include <AK/Vector.h>
|
||||
|
||||
namespace AK {
|
||||
|
@ -72,6 +73,7 @@ public:
|
|||
static AK_SHORT_STRING_CONSTEVAL String from_utf8_short_string(StringView string)
|
||||
{
|
||||
VERIFY(string.length() <= MAX_SHORT_STRING_BYTE_COUNT);
|
||||
VERIFY(Utf8View { string }.validate());
|
||||
|
||||
ShortString short_string;
|
||||
for (size_t i = 0; i < string.length(); ++i)
|
||||
|
|
|
@ -140,6 +140,26 @@ TEST_CASE(long_streams)
|
|||
}
|
||||
}
|
||||
|
||||
TEST_CASE(invalid_utf8)
|
||||
{
|
||||
auto string1 = String::from_utf8("long string \xf4\x8f\xbf\xc0"sv); // U+110000
|
||||
EXPECT(string1.is_error());
|
||||
EXPECT(string1.error().string_literal().contains("Input was not valid UTF-8"sv));
|
||||
|
||||
auto string2 = String::from_utf8("\xf4\xa1\xb0\xbd"sv); // U+121C3D
|
||||
EXPECT(string2.is_error());
|
||||
EXPECT(string2.error().string_literal().contains("Input was not valid UTF-8"sv));
|
||||
|
||||
AllocatingMemoryStream stream;
|
||||
MUST(stream.write_value<u8>(0xf4));
|
||||
MUST(stream.write_value<u8>(0xa1));
|
||||
MUST(stream.write_value<u8>(0xb0));
|
||||
MUST(stream.write_value<u8>(0xbd));
|
||||
auto string3 = String::from_stream(stream, stream.used_buffer_size());
|
||||
EXPECT_EQ(string3.is_error(), true);
|
||||
EXPECT(string3.error().string_literal().contains("Input was not valid UTF-8"sv));
|
||||
}
|
||||
|
||||
TEST_CASE(from_code_points)
|
||||
{
|
||||
for (u32 code_point = 0; code_point < 0x80; ++code_point) {
|
||||
|
|
Loading…
Reference in a new issue