AK: Introduce StringBase::replace_with_new_{short_,}string

This commit is contained in:
Dan Klishch 2023-10-28 16:43:56 -04:00 committed by Andrew Kaster
parent d6290c4684
commit dcd1fda9c8
Notes: sideshowbarker 2024-07-17 09:39:38 +09:00
4 changed files with 56 additions and 37 deletions

View file

@ -64,18 +64,6 @@ ErrorOr<NonnullRefPtr<StringData>> StringData::create_uninitialized(size_t byte_
return new_string_data; return new_string_data;
} }
ErrorOr<NonnullRefPtr<StringData>> StringData::from_utf8(char const* utf8_data, size_t byte_count)
{
// Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization.
VERIFY(byte_count > String::MAX_SHORT_STRING_BYTE_COUNT);
VERIFY(utf8_data);
u8* buffer = nullptr;
auto new_string_data = TRY(create_uninitialized(byte_count, buffer));
memcpy(buffer, utf8_data, byte_count * sizeof(char));
return new_string_data;
}
static ErrorOr<void> read_stream_into_buffer(Stream& stream, Bytes buffer) static ErrorOr<void> read_stream_into_buffer(Stream& stream, Bytes buffer)
{ {
TRY(stream.read_until_filled(buffer)); TRY(stream.read_until_filled(buffer));
@ -124,15 +112,12 @@ void StringData::compute_hash() const
String String::from_utf8_without_validation(ReadonlyBytes bytes) String String::from_utf8_without_validation(ReadonlyBytes bytes)
{ {
if (bytes.size() <= MAX_SHORT_STRING_BYTE_COUNT) { String result;
ShortString short_string; MUST(result.replace_with_new_string(bytes.size(), [&](Bytes buffer) {
if (!bytes.is_empty()) bytes.copy_to(buffer);
memcpy(short_string.storage, bytes.data(), bytes.size()); return ErrorOr<void> {};
short_string.byte_count_and_short_string_flag = (bytes.size() << 1) | SHORT_STRING_FLAG; }));
return String { short_string }; return result;
}
auto data = MUST(Detail::StringData::from_utf8(reinterpret_cast<char const*>(bytes.data()), bytes.size()));
return String { move(data) };
} }
ErrorOr<String> String::from_utf8(StringView view) ErrorOr<String> String::from_utf8(StringView view)
@ -140,15 +125,12 @@ ErrorOr<String> String::from_utf8(StringView view)
if (!Utf8View { view }.validate()) if (!Utf8View { view }.validate())
return Error::from_string_literal("String::from_utf8: Input was not valid UTF-8"); return Error::from_string_literal("String::from_utf8: Input was not valid UTF-8");
if (view.length() <= MAX_SHORT_STRING_BYTE_COUNT) { String result;
ShortString short_string; TRY(result.replace_with_new_string(view.length(), [&](Bytes buffer) {
if (!view.is_empty()) view.bytes().copy_to(buffer);
memcpy(short_string.storage, view.characters_without_null_termination(), view.length()); return ErrorOr<void> {};
short_string.byte_count_and_short_string_flag = (view.length() << 1) | SHORT_STRING_FLAG; }));
return String { short_string }; return result;
}
auto data = TRY(Detail::StringData::from_utf8(view.characters_without_null_termination(), view.length()));
return String { move(data) };
} }
ErrorOr<String> String::from_stream(Stream& stream, size_t byte_count) ErrorOr<String> String::from_stream(Stream& stream, size_t byte_count)

View file

@ -54,11 +54,6 @@ StringBase& StringBase::operator=(StringBase const& other)
return *this; return *this;
} }
bool StringBase::is_short_string() const
{
return has_short_string_bit(reinterpret_cast<uintptr_t>(m_data));
}
ReadonlyBytes StringBase::bytes() const ReadonlyBytes StringBase::bytes() const
{ {
if (is_short_string()) if (is_short_string())
@ -82,6 +77,17 @@ bool StringBase::operator==(StringBase const& other) const
return bytes() == other.bytes(); return bytes() == other.bytes();
} }
ErrorOr<Bytes> StringBase::replace_with_uninitialized_buffer(size_t byte_count)
{
if (byte_count <= MAX_SHORT_STRING_BYTE_COUNT)
return replace_with_uninitialized_short_string(byte_count);
u8* buffer = nullptr;
destroy_string();
m_data = &TRY(StringData::create_uninitialized(byte_count, buffer)).leak_ref();
return Bytes { buffer, byte_count };
}
void StringBase::destroy_string() void StringBase::destroy_string()
{ {
if (!is_short_string()) if (!is_short_string())

View file

@ -55,7 +55,10 @@ public:
} }
// NOTE: This is primarily interesting to unit tests. // NOTE: This is primarily interesting to unit tests.
[[nodiscard]] bool is_short_string() const; [[nodiscard]] constexpr bool is_short_string() const
{
return (m_short_string.byte_count_and_short_string_flag & SHORT_STRING_FLAG) != 0;
}
// Returns the underlying UTF-8 encoded bytes. // Returns the underlying UTF-8 encoded bytes.
// NOTE: There is no guarantee about null-termination. // NOTE: There is no guarantee about null-termination.
@ -80,12 +83,41 @@ protected:
{ {
} }
template<typename Func>
ErrorOr<void> replace_with_new_string(size_t byte_count, Func&& callback)
{
Bytes buffer = TRY(replace_with_uninitialized_buffer(byte_count));
if (byte_count != 0)
TRY(callback(buffer));
return {};
}
template<typename Func>
constexpr void replace_with_new_short_string(size_t byte_count, Func&& callback)
{
Bytes buffer = replace_with_uninitialized_short_string(byte_count);
if (byte_count != 0)
callback(buffer);
}
union { union {
ShortString m_short_string; ShortString m_short_string;
Detail::StringData const* m_data { nullptr }; Detail::StringData const* m_data { nullptr };
}; };
private: private:
ErrorOr<Bytes> replace_with_uninitialized_buffer(size_t byte_count);
constexpr Bytes replace_with_uninitialized_short_string(size_t byte_count)
{
VERIFY(is_short_string());
VERIFY(byte_count <= MAX_SHORT_STRING_BYTE_COUNT);
m_short_string = ShortString {};
m_short_string.byte_count_and_short_string_flag = (byte_count << 1) | SHORT_STRING_FLAG;
return { m_short_string.storage, byte_count };
}
void destroy_string(); void destroy_string();
}; };

View file

@ -17,7 +17,6 @@ class StringData final : public RefCounted<StringData> {
public: public:
static ErrorOr<NonnullRefPtr<StringData>> create_uninitialized(size_t, u8*& buffer); static ErrorOr<NonnullRefPtr<StringData>> create_uninitialized(size_t, u8*& buffer);
static ErrorOr<NonnullRefPtr<StringData>> create_substring(StringData const& superstring, size_t start, size_t byte_count); static ErrorOr<NonnullRefPtr<StringData>> create_substring(StringData const& superstring, size_t start, size_t byte_count);
static ErrorOr<NonnullRefPtr<StringData>> from_utf8(char const* utf8_bytes, size_t);
static ErrorOr<NonnullRefPtr<StringData>> from_stream(Stream&, size_t byte_count); static ErrorOr<NonnullRefPtr<StringData>> from_stream(Stream&, size_t byte_count);
struct SubstringData { struct SubstringData {