From dcd1fda9c8f3d134491fde9d4d55edb0ce810013 Mon Sep 17 00:00:00 2001 From: Dan Klishch Date: Sat, 28 Oct 2023 16:43:56 -0400 Subject: [PATCH] AK: Introduce StringBase::replace_with_new_{short_,}string --- AK/String.cpp | 42 ++++++++++++------------------------------ AK/StringBase.cpp | 16 +++++++++++----- AK/StringBase.h | 34 +++++++++++++++++++++++++++++++++- AK/StringInternals.h | 1 - 4 files changed, 56 insertions(+), 37 deletions(-) diff --git a/AK/String.cpp b/AK/String.cpp index 3258ce2c380..bdd2379b1ee 100644 --- a/AK/String.cpp +++ b/AK/String.cpp @@ -64,18 +64,6 @@ ErrorOr> StringData::create_uninitialized(size_t byte_ return new_string_data; } -ErrorOr> StringData::from_utf8(char const* utf8_data, size_t byte_count) -{ - // Strings of MAX_SHORT_STRING_BYTE_COUNT bytes or less should be handled by the String short string optimization. - VERIFY(byte_count > String::MAX_SHORT_STRING_BYTE_COUNT); - - VERIFY(utf8_data); - u8* buffer = nullptr; - auto new_string_data = TRY(create_uninitialized(byte_count, buffer)); - memcpy(buffer, utf8_data, byte_count * sizeof(char)); - return new_string_data; -} - static ErrorOr read_stream_into_buffer(Stream& stream, Bytes buffer) { TRY(stream.read_until_filled(buffer)); @@ -124,15 +112,12 @@ void StringData::compute_hash() const String String::from_utf8_without_validation(ReadonlyBytes bytes) { - if (bytes.size() <= MAX_SHORT_STRING_BYTE_COUNT) { - ShortString short_string; - if (!bytes.is_empty()) - memcpy(short_string.storage, bytes.data(), bytes.size()); - short_string.byte_count_and_short_string_flag = (bytes.size() << 1) | SHORT_STRING_FLAG; - return String { short_string }; - } - auto data = MUST(Detail::StringData::from_utf8(reinterpret_cast(bytes.data()), bytes.size())); - return String { move(data) }; + String result; + MUST(result.replace_with_new_string(bytes.size(), [&](Bytes buffer) { + bytes.copy_to(buffer); + return ErrorOr {}; + })); + return result; } ErrorOr String::from_utf8(StringView view) @@ -140,15 +125,12 @@ ErrorOr String::from_utf8(StringView view) if (!Utf8View { view }.validate()) return Error::from_string_literal("String::from_utf8: Input was not valid UTF-8"); - if (view.length() <= MAX_SHORT_STRING_BYTE_COUNT) { - ShortString short_string; - if (!view.is_empty()) - memcpy(short_string.storage, view.characters_without_null_termination(), view.length()); - short_string.byte_count_and_short_string_flag = (view.length() << 1) | SHORT_STRING_FLAG; - return String { short_string }; - } - auto data = TRY(Detail::StringData::from_utf8(view.characters_without_null_termination(), view.length())); - return String { move(data) }; + String result; + TRY(result.replace_with_new_string(view.length(), [&](Bytes buffer) { + view.bytes().copy_to(buffer); + return ErrorOr {}; + })); + return result; } ErrorOr String::from_stream(Stream& stream, size_t byte_count) diff --git a/AK/StringBase.cpp b/AK/StringBase.cpp index 62fa5225b37..dfb357db652 100644 --- a/AK/StringBase.cpp +++ b/AK/StringBase.cpp @@ -54,11 +54,6 @@ StringBase& StringBase::operator=(StringBase const& other) return *this; } -bool StringBase::is_short_string() const -{ - return has_short_string_bit(reinterpret_cast(m_data)); -} - ReadonlyBytes StringBase::bytes() const { if (is_short_string()) @@ -82,6 +77,17 @@ bool StringBase::operator==(StringBase const& other) const return bytes() == other.bytes(); } +ErrorOr StringBase::replace_with_uninitialized_buffer(size_t byte_count) +{ + if (byte_count <= MAX_SHORT_STRING_BYTE_COUNT) + return replace_with_uninitialized_short_string(byte_count); + + u8* buffer = nullptr; + destroy_string(); + m_data = &TRY(StringData::create_uninitialized(byte_count, buffer)).leak_ref(); + return Bytes { buffer, byte_count }; +} + void StringBase::destroy_string() { if (!is_short_string()) diff --git a/AK/StringBase.h b/AK/StringBase.h index 5ce3ae77b74..86d4b7709e1 100644 --- a/AK/StringBase.h +++ b/AK/StringBase.h @@ -55,7 +55,10 @@ public: } // NOTE: This is primarily interesting to unit tests. - [[nodiscard]] bool is_short_string() const; + [[nodiscard]] constexpr bool is_short_string() const + { + return (m_short_string.byte_count_and_short_string_flag & SHORT_STRING_FLAG) != 0; + } // Returns the underlying UTF-8 encoded bytes. // NOTE: There is no guarantee about null-termination. @@ -80,12 +83,41 @@ protected: { } + template + ErrorOr replace_with_new_string(size_t byte_count, Func&& callback) + { + Bytes buffer = TRY(replace_with_uninitialized_buffer(byte_count)); + if (byte_count != 0) + TRY(callback(buffer)); + return {}; + } + + template + constexpr void replace_with_new_short_string(size_t byte_count, Func&& callback) + { + Bytes buffer = replace_with_uninitialized_short_string(byte_count); + if (byte_count != 0) + callback(buffer); + } + union { ShortString m_short_string; Detail::StringData const* m_data { nullptr }; }; private: + ErrorOr replace_with_uninitialized_buffer(size_t byte_count); + + constexpr Bytes replace_with_uninitialized_short_string(size_t byte_count) + { + VERIFY(is_short_string()); + VERIFY(byte_count <= MAX_SHORT_STRING_BYTE_COUNT); + + m_short_string = ShortString {}; + m_short_string.byte_count_and_short_string_flag = (byte_count << 1) | SHORT_STRING_FLAG; + return { m_short_string.storage, byte_count }; + } + void destroy_string(); }; diff --git a/AK/StringInternals.h b/AK/StringInternals.h index 64a06f18864..0e6f411d94f 100644 --- a/AK/StringInternals.h +++ b/AK/StringInternals.h @@ -17,7 +17,6 @@ class StringData final : public RefCounted { public: static ErrorOr> create_uninitialized(size_t, u8*& buffer); static ErrorOr> create_substring(StringData const& superstring, size_t start, size_t byte_count); - static ErrorOr> from_utf8(char const* utf8_bytes, size_t); static ErrorOr> from_stream(Stream&, size_t byte_count); struct SubstringData {