diff --git a/AK/String.cpp b/AK/String.cpp index 69e44d37021..e87e0c290a2 100644 --- a/AK/String.cpp +++ b/AK/String.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +24,7 @@ public: static ErrorOr> create_uninitialized(size_t, u8*& buffer); static ErrorOr> create_substring(StringData const& superstring, size_t start, size_t byte_count); static ErrorOr> from_utf8(char const* utf8_bytes, size_t); + static ErrorOr> from_stream(Stream&, size_t byte_count); struct SubstringData { StringData const* superstring { nullptr }; @@ -141,6 +143,23 @@ ErrorOr> StringData::from_utf8(char const* utf8_data, return new_string_data; } +ErrorOr> StringData::from_stream(Stream& stream, size_t byte_count) +{ + // Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization. + VERIFY(byte_count > String::MAX_SHORT_STRING_BYTE_COUNT); + + u8* buffer = nullptr; + auto new_string_data = TRY(create_uninitialized(byte_count, buffer)); + Bytes new_string_bytes = { buffer, byte_count }; + TRY(stream.read(new_string_bytes)); + + Utf8View view(StringView { new_string_bytes }); + if (!view.validate()) + return Error::from_string_literal("StringData::from_stream: Input was not valid UTF-8"); + + return new_string_data; +} + ErrorOr> StringData::create_substring(StringData const& superstring, size_t start, size_t byte_count) { // Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization. @@ -222,6 +241,19 @@ ErrorOr String::from_utf8(StringView view) return String { move(data) }; } +ErrorOr String::from_stream(Stream& stream, size_t byte_count) +{ + if (byte_count <= MAX_SHORT_STRING_BYTE_COUNT) { + ShortString short_string; + if (byte_count > 0) + TRY(stream.read({ short_string.storage, byte_count })); + short_string.byte_count_and_short_string_flag = (byte_count << 1) | SHORT_STRING_FLAG; + return String { short_string }; + } + auto data = TRY(Detail::StringData::from_stream(stream, byte_count)); + return String { move(data) }; +} + ErrorOr String::repeated(u32 code_point, size_t count) { VERIFY(is_unicode(code_point)); diff --git a/AK/String.h b/AK/String.h index 187f84458e3..f9d6ec48a6e 100644 --- a/AK/String.h +++ b/AK/String.h @@ -64,6 +64,9 @@ public: // Creates a new String from a sequence of UTF-8 encoded code points. static ErrorOr from_utf8(StringView); + // Creates a new String by reading byte_count bytes from a UTF-8 encoded Stream. + static ErrorOr from_stream(Stream&, size_t byte_count); + // Creates a new String from a short sequence of UTF-8 encoded code points. If the provided string // does not fit in the short string storage, a compilation error will be emitted. static AK_SHORT_STRING_CONSTEVAL String from_utf8_short_string(StringView string) diff --git a/Tests/AK/TestString.cpp b/Tests/AK/TestString.cpp index ae48664b837..59668da04f4 100644 --- a/Tests/AK/TestString.cpp +++ b/Tests/AK/TestString.cpp @@ -10,6 +10,7 @@ #include +#include #include #include #include @@ -67,6 +68,54 @@ TEST_CASE(long_strings) EXPECT_EQ(string.bytes_as_string_view(), "abcdefgh"sv); } +TEST_CASE(long_streams) +{ + { + u8 bytes[64] = {}; + constexpr auto test_view = "Well, hello friends"sv; + FixedMemoryStream stream(Bytes { bytes, sizeof(bytes) }); + MUST(stream.write(test_view.bytes())); + MUST(stream.seek(0)); + + auto string = MUST(String::from_stream(stream, test_view.length())); + + EXPECT_EQ(string.is_short_string(), false); + EXPECT_EQ(string.bytes().size(), 19u); + EXPECT_EQ(string.bytes_as_string_view(), test_view); + } + + { + AllocatingMemoryStream stream; + MUST(stream.write(("abc"sv).bytes())); + + auto string = MUST(String::from_stream(stream, 3u)); + + EXPECT_EQ(string.is_short_string(), true); + EXPECT_EQ(string.bytes().size(), 3u); + EXPECT_EQ(string.bytes_as_string_view(), "abc"sv); + } + + { + AllocatingMemoryStream stream; + MUST(stream.write(("0123456789"sv).bytes())); + + auto string = MUST(String::from_stream(stream, 9u)); + + EXPECT_EQ(string.is_short_string(), false); + EXPECT_EQ(string.bytes().size(), 9u); + EXPECT_EQ(string.bytes_as_string_view(), "012345678"sv); + } + + { + AllocatingMemoryStream stream; + MUST(stream.write_value(0xffffffff)); + MUST(stream.write_value(0xffffffff)); + MUST(stream.write_value(0xffffffff)); + auto error_or_string = String::from_stream(stream, stream.used_buffer_size()); + EXPECT_EQ(error_or_string.is_error(), true); + } +} + TEST_CASE(from_code_points) { for (u32 code_point = 0; code_point < 0x80; ++code_point) {