ladybird/AK/StringBase.cpp
Timothy Flynn 29879a69a4 AK: Construct Strings from StringBuilder without re-allocating the data
Currently, invoking StringBuilder::to_string will re-allocate the string
data to construct the String. This is wasteful both in terms of memory
and speed.

The goal here is to simply hand the string buffer over to String, and
let String take ownership of that buffer. To do this, StringBuilder must
have the same memory layout as Detail::StringData. This layout is just
the members of the StringData class followed by the string itself.

So when a StringBuilder is created, we reserve sizeof(StringData) bytes
at the front of the buffer. StringData can then construct itself into
the buffer with placement new.

Things to note:
* StringData must now be aware of the actual capacity of its buffer, as
  that can be larger than the string size.
* We must take care not to pass ownership of inlined string buffers, as
  these live on the stack.
2024-07-20 06:45:49 +02:00

137 lines
3.3 KiB
C++

/*
* Copyright (c) 2023, Dan Klishch <danilklishch@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Badge.h>
#include <AK/FlyString.h>
#include <AK/StringBase.h>
#include <AK/StringData.h>
namespace AK::Detail {
ReadonlyBytes ShortString::bytes() const
{
return { storage, byte_count() };
}
size_t ShortString::byte_count() const
{
return byte_count_and_short_string_flag >> 1;
}
StringBase::StringBase(NonnullRefPtr<Detail::StringData const> data)
: m_data(&data.leak_ref())
{
}
StringBase::StringBase(StringBase const& other)
: m_data(other.m_data)
{
if (!is_short_string())
m_data->ref();
}
StringBase& StringBase::operator=(StringBase&& other)
{
if (!is_short_string())
m_data->unref();
m_data = exchange(other.m_data, nullptr);
other.m_short_string.byte_count_and_short_string_flag = SHORT_STRING_FLAG;
return *this;
}
StringBase& StringBase::operator=(StringBase const& other)
{
if (&other != this) {
if (!is_short_string())
m_data->unref();
m_data = other.m_data;
if (!is_short_string())
m_data->ref();
}
return *this;
}
ReadonlyBytes StringBase::bytes() const
{
if (is_short_string())
return m_short_string.bytes();
return m_data->bytes();
}
u32 StringBase::hash() const
{
if (is_short_string()) {
auto bytes = this->bytes();
return string_hash(reinterpret_cast<char const*>(bytes.data()), bytes.size());
}
return m_data->hash();
}
size_t StringBase::byte_count() const
{
if (is_short_string())
return m_short_string.byte_count_and_short_string_flag >> 1;
return m_data->byte_count();
}
bool StringBase::operator==(StringBase const& other) const
{
if (is_short_string())
return m_data == other.m_data;
if (other.is_short_string())
return false;
if (m_data->is_fly_string() && other.m_data->is_fly_string())
return m_data == other.m_data;
return bytes() == other.bytes();
}
void StringBase::replace_with_string_builder(StringBuilder& builder)
{
if (builder.length() <= MAX_SHORT_STRING_BYTE_COUNT) {
return replace_with_new_short_string(builder.length(), [&](Bytes buffer) {
builder.string_view().bytes().copy_to(buffer);
});
}
destroy_string();
m_data = &StringData::create_from_string_builder(builder).leak_ref();
}
ErrorOr<Bytes> StringBase::replace_with_uninitialized_buffer(size_t byte_count)
{
if (byte_count <= MAX_SHORT_STRING_BYTE_COUNT)
return replace_with_uninitialized_short_string(byte_count);
u8* buffer = nullptr;
destroy_string();
m_data = &TRY(StringData::create_uninitialized(byte_count, buffer)).leak_ref();
return Bytes { buffer, byte_count };
}
ErrorOr<StringBase> StringBase::substring_from_byte_offset_with_shared_superstring(size_t start, size_t length) const
{
VERIFY(start + length <= byte_count());
if (length == 0)
return StringBase {};
if (length <= MAX_SHORT_STRING_BYTE_COUNT) {
StringBase result;
bytes().slice(start, length).copy_to(result.replace_with_uninitialized_short_string(length));
return result;
}
return StringBase { TRY(Detail::StringData::create_substring(*m_data, start, length)) };
}
void StringBase::destroy_string()
{
if (!is_short_string())
m_data->unref();
}
}