AK: Implement FlyString for the new String class

This implements a FlyString that will de-duplicate String instances. The
FlyString will store the raw encoded data of the String instance: If the
String is a short string, FlyString holds the String::ShortString bytes;
otherwise FlyString holds a pointer to the Detail::StringData.

FlyString itself does not know about String's storage or how to refcount
its Detail::StringData. It defers to String to implement these details.
This commit is contained in:
Timothy Flynn 2023-01-11 08:26:49 -05:00 committed by Andreas Kling
parent f49a65cb28
commit 1d4f287582
Notes: sideshowbarker 2024-07-17 01:49:30 +09:00
8 changed files with 445 additions and 1 deletions

View file

@ -5,6 +5,7 @@ set(AK_SOURCES
DeprecatedFlyString.cpp
DeprecatedString.cpp
FloatingPointStringConversions.cpp
FlyString.cpp
Format.cpp
FuzzyMatch.cpp
GenericLexer.cpp

169
AK/FlyString.cpp Normal file
View file

@ -0,0 +1,169 @@
/*
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/FlyString.h>
#include <AK/HashMap.h>
#include <AK/Singleton.h>
#include <AK/StringView.h>
#include <AK/Utf8View.h>
namespace AK {
static auto& all_fly_strings()
{
static Singleton<HashMap<StringView, uintptr_t>> table;
return *table;
}
FlyString::FlyString()
: m_data(String {}.to_fly_string_data({}))
{
}
FlyString::~FlyString()
{
String::unref_fly_string_data({}, m_data);
}
ErrorOr<FlyString> FlyString::from_utf8(StringView string)
{
return FlyString { TRY(String::from_utf8(string)) };
}
FlyString::FlyString(String const& string)
{
if (string.is_short_string()) {
m_data = string.to_fly_string_data({});
return;
}
auto it = all_fly_strings().find(string.bytes_as_string_view());
if (it == all_fly_strings().end()) {
m_data = string.to_fly_string_data({});
all_fly_strings().set(string.bytes_as_string_view(), m_data);
string.did_create_fly_string({});
} else {
m_data = it->value;
}
String::ref_fly_string_data({}, m_data);
}
FlyString::FlyString(FlyString const& other)
: m_data(other.m_data)
{
String::ref_fly_string_data({}, m_data);
}
FlyString& FlyString::operator=(FlyString const& other)
{
if (this != &other) {
m_data = other.m_data;
String::ref_fly_string_data({}, m_data);
}
return *this;
}
FlyString::FlyString(FlyString&& other)
: m_data(other.m_data)
{
other.m_data = String {}.to_fly_string_data({});
}
FlyString& FlyString::operator=(FlyString&& other)
{
m_data = other.m_data;
other.m_data = String {}.to_fly_string_data({});
return *this;
}
bool FlyString::is_empty() const
{
return bytes_as_string_view().is_empty();
}
unsigned FlyString::hash() const
{
return bytes_as_string_view().hash();
}
FlyString::operator String() const
{
return to_string();
}
String FlyString::to_string() const
{
return String::fly_string_data_to_string({}, m_data);
}
Utf8View FlyString::code_points() const
{
return Utf8View { bytes_as_string_view() };
}
ReadonlyBytes FlyString::bytes() const
{
return bytes_as_string_view().bytes();
}
StringView FlyString::bytes_as_string_view() const
{
return String::fly_string_data_to_string_view({}, m_data);
}
bool FlyString::operator==(FlyString const& other) const
{
return m_data == other.m_data;
}
bool FlyString::operator==(String const& other) const
{
if (m_data == other.to_fly_string_data({}))
return true;
return bytes_as_string_view() == other.bytes_as_string_view();
}
bool FlyString::operator==(StringView string) const
{
return bytes_as_string_view() == string;
}
bool FlyString::operator==(char const* string) const
{
return bytes_as_string_view() == string;
}
void FlyString::did_destroy_fly_string_data(Badge<Detail::StringData>, StringView string_data)
{
all_fly_strings().remove(string_data);
}
uintptr_t FlyString::data(Badge<String>) const
{
return m_data;
}
size_t FlyString::number_of_fly_strings()
{
return all_fly_strings().size();
}
unsigned Traits<FlyString>::hash(FlyString const& fly_string)
{
return fly_string.bytes_as_string_view().hash();
}
ErrorOr<void> Formatter<FlyString>::format(FormatBuilder& builder, FlyString const& fly_string)
{
return Formatter<StringView>::format(builder, fly_string.bytes_as_string_view());
}
}

73
AK/FlyString.h Normal file
View file

@ -0,0 +1,73 @@
/*
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Error.h>
#include <AK/Format.h>
#include <AK/Platform.h>
#include <AK/String.h>
#include <AK/Traits.h>
#include <AK/Types.h>
namespace AK {
class FlyString {
public:
FlyString();
~FlyString();
static ErrorOr<FlyString> from_utf8(StringView);
explicit FlyString(String const&);
FlyString(FlyString const&);
FlyString& operator=(FlyString const&);
FlyString(FlyString&&);
FlyString& operator=(FlyString&&);
[[nodiscard]] bool is_empty() const;
[[nodiscard]] unsigned hash() const;
explicit operator String() const;
String to_string() const;
[[nodiscard]] Utf8View code_points() const;
[[nodiscard]] ReadonlyBytes bytes() const;
[[nodiscard]] StringView bytes_as_string_view() const;
[[nodiscard]] bool operator==(FlyString const& other) const;
[[nodiscard]] bool operator==(String const&) const;
[[nodiscard]] bool operator==(StringView) const;
[[nodiscard]] bool operator==(char const*) const;
static void did_destroy_fly_string_data(Badge<Detail::StringData>, StringView);
[[nodiscard]] uintptr_t data(Badge<String>) const;
// This is primarily interesting to unit tests.
[[nodiscard]] static size_t number_of_fly_strings();
private:
// This will hold either the pointer to the Detail::StringData it represents or the raw bytes of
// an inlined short string.
uintptr_t m_data { 0 };
};
template<>
struct Traits<FlyString> : public GenericTraits<FlyString> {
static unsigned hash(FlyString const&);
};
template<>
struct Formatter<FlyString> : Formatter<StringView> {
ErrorOr<void> format(FormatBuilder&, FlyString const&);
};
}
#if USING_AK_GLOBALLY
using AK::FlyString;
#endif

View file

@ -21,6 +21,7 @@ class Bitmap;
using ByteBuffer = Detail::ByteBuffer<32>;
class CircularBuffer;
class Error;
class FlyString;
class GenericLexer;
class IPv4Address;
class JsonArray;

View file

@ -5,6 +5,7 @@
*/
#include <AK/Checked.h>
#include <AK/FlyString.h>
#include <AK/Format.h>
#include <AK/String.h>
#include <AK/StringBuilder.h>
@ -59,6 +60,9 @@ public:
return m_hash;
}
bool is_fly_string() const { return m_is_fly_string; }
void set_fly_string(bool is_fly_string) { m_is_fly_string = is_fly_string; }
private:
explicit StringData(size_t byte_count);
StringData(StringData const& superstring, size_t start, size_t byte_count);
@ -69,6 +73,7 @@ private:
mutable unsigned m_hash { 0 };
mutable bool m_has_hash { false };
bool m_substring { false };
bool m_is_fly_string { false };
u8 m_bytes_or_substring_data[0];
};
@ -95,6 +100,8 @@ StringData::StringData(StringData const& superstring, size_t start, size_t byte_
StringData::~StringData()
{
if (m_is_fly_string)
FlyString::did_destroy_fly_string_data({}, bytes_as_string_view());
if (m_substring)
substring_data().superstring->unref();
}
@ -255,6 +262,13 @@ bool String::operator==(String const& other) const
return bytes_as_string_view() == other.bytes_as_string_view();
}
bool String::operator==(FlyString const& other) const
{
if (reinterpret_cast<uintptr_t>(m_data) == other.data({}))
return true;
return bytes_as_string_view() == other.bytes_as_string_view();
}
bool String::operator==(StringView other) const
{
return bytes_as_string_view() == other;
@ -307,7 +321,7 @@ ErrorOr<String> String::replace(StringView needle, StringView replacement, Repla
bool String::is_short_string() const
{
return reinterpret_cast<uintptr_t>(m_data) & SHORT_STRING_FLAG;
return has_short_string_bit(reinterpret_cast<uintptr_t>(m_data));
}
ReadonlyBytes String::ShortString::bytes() const
@ -325,6 +339,55 @@ unsigned Traits<String>::hash(String const& string)
return string.hash();
}
String String::fly_string_data_to_string(Badge<FlyString>, uintptr_t const& data)
{
if (has_short_string_bit(data))
return String { *reinterpret_cast<ShortString const*>(&data) };
auto const* string_data = reinterpret_cast<Detail::StringData const*>(data);
return String { NonnullRefPtr<Detail::StringData>(*string_data) };
}
StringView String::fly_string_data_to_string_view(Badge<FlyString>, uintptr_t const& data)
{
if (has_short_string_bit(data)) {
auto const* short_string = reinterpret_cast<ShortString const*>(&data);
return short_string->bytes();
}
auto const* string_data = reinterpret_cast<Detail::StringData const*>(data);
return string_data->bytes_as_string_view();
}
uintptr_t String::to_fly_string_data(Badge<FlyString>) const
{
return reinterpret_cast<uintptr_t>(m_data);
}
void String::ref_fly_string_data(Badge<FlyString>, uintptr_t data)
{
if (has_short_string_bit(data))
return;
auto const* string_data = reinterpret_cast<Detail::StringData const*>(data);
string_data->ref();
}
void String::unref_fly_string_data(Badge<FlyString>, uintptr_t data)
{
if (has_short_string_bit(data))
return;
auto const* string_data = reinterpret_cast<Detail::StringData const*>(data);
string_data->unref();
}
void String::did_create_fly_string(Badge<FlyString>) const
{
VERIFY(!is_short_string());
m_data->set_fly_string(true);
}
DeprecatedString String::to_deprecated_string() const
{
return DeprecatedString(bytes_as_string_view());

View file

@ -73,6 +73,9 @@ public:
[[nodiscard]] bool operator==(String const&) const;
[[nodiscard]] bool operator!=(String const& other) const { return !(*this == other); }
[[nodiscard]] bool operator==(FlyString const&) const;
[[nodiscard]] bool operator!=(FlyString const& other) const { return !(*this == other); }
[[nodiscard]] bool operator==(StringView) const;
[[nodiscard]] bool operator!=(StringView other) const { return !(*this == other); }
@ -102,6 +105,14 @@ public:
// NOTE: This is primarily interesting to unit tests.
[[nodiscard]] bool is_short_string() const;
[[nodiscard]] static String fly_string_data_to_string(Badge<FlyString>, uintptr_t const&);
[[nodiscard]] static StringView fly_string_data_to_string_view(Badge<FlyString>, uintptr_t const&);
[[nodiscard]] uintptr_t to_fly_string_data(Badge<FlyString>) const;
static void ref_fly_string_data(Badge<FlyString>, uintptr_t);
static void unref_fly_string_data(Badge<FlyString>, uintptr_t);
void did_create_fly_string(Badge<FlyString>) const;
// FIXME: Remove these once all code has been ported to String
[[nodiscard]] DeprecatedString to_deprecated_string() const;
static ErrorOr<String> from_deprecated_string(DeprecatedString const&);
@ -110,6 +121,11 @@ private:
// NOTE: If the least significant bit of the pointer is set, this is a short string.
static constexpr uintptr_t SHORT_STRING_FLAG = 1;
static constexpr bool has_short_string_bit(uintptr_t data)
{
return (data & SHORT_STRING_FLAG) != 0;
}
struct ShortString {
ReadonlyBytes bytes() const;
size_t byte_count() const;

View file

@ -30,6 +30,7 @@ set(AK_TEST_SOURCES
TestFixedPoint.cpp
TestFloatingPoint.cpp
TestFloatingPointParsing.cpp
TestFlyString.cpp
TestFormat.cpp
TestGenericLexer.cpp
TestHashFunctions.cpp

120
Tests/AK/TestFlyString.cpp Normal file
View file

@ -0,0 +1,120 @@
/*
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibTest/TestCase.h>
#include <AK/FlyString.h>
#include <AK/String.h>
#include <AK/Try.h>
TEST_CASE(empty_string)
{
FlyString fly {};
EXPECT(fly.is_empty());
EXPECT_EQ(fly, ""sv);
// Short strings do not get stored in the fly string table.
EXPECT_EQ(FlyString::number_of_fly_strings(), 0u);
}
TEST_CASE(short_string)
{
FlyString fly1 { MUST(String::from_utf8("foo"sv)) };
EXPECT_EQ(fly1, "foo"sv);
FlyString fly2 { MUST(String::from_utf8("foo"sv)) };
EXPECT_EQ(fly2, "foo"sv);
FlyString fly3 { MUST(String::from_utf8("bar"sv)) };
EXPECT_EQ(fly3, "bar"sv);
EXPECT_EQ(fly1, fly2);
EXPECT_NE(fly1, fly3);
EXPECT_NE(fly2, fly3);
EXPECT(fly1.to_string().is_short_string());
EXPECT(fly2.to_string().is_short_string());
EXPECT(fly3.to_string().is_short_string());
// Short strings do not get stored in the fly string table.
EXPECT_EQ(FlyString::number_of_fly_strings(), 0u);
}
TEST_CASE(long_string)
{
FlyString fly1 { MUST(String::from_utf8("thisisdefinitelymorethan7bytes"sv)) };
EXPECT_EQ(fly1, "thisisdefinitelymorethan7bytes"sv);
EXPECT_EQ(FlyString::number_of_fly_strings(), 1u);
FlyString fly2 { MUST(String::from_utf8("thisisdefinitelymorethan7bytes"sv)) };
EXPECT_EQ(fly2, "thisisdefinitelymorethan7bytes"sv);
EXPECT_EQ(FlyString::number_of_fly_strings(), 1u);
FlyString fly3 { MUST(String::from_utf8("thisisalsoforsuremorethan7bytes"sv)) };
EXPECT_EQ(fly3, "thisisalsoforsuremorethan7bytes"sv);
EXPECT_EQ(FlyString::number_of_fly_strings(), 2u);
EXPECT_EQ(fly1, fly2);
EXPECT_NE(fly1, fly3);
EXPECT_NE(fly2, fly3);
EXPECT(!fly1.to_string().is_short_string());
EXPECT(!fly2.to_string().is_short_string());
EXPECT(!fly3.to_string().is_short_string());
}
TEST_CASE(from_string_view)
{
auto fly1 = MUST(FlyString::from_utf8("thisisdefinitelymorethan7bytes"sv));
EXPECT_EQ(fly1, "thisisdefinitelymorethan7bytes"sv);
EXPECT_EQ(FlyString::number_of_fly_strings(), 1u);
auto fly2 = MUST(FlyString::from_utf8("thisisdefinitelymorethan7bytes"sv));
EXPECT_EQ(fly2, "thisisdefinitelymorethan7bytes"sv);
EXPECT_EQ(FlyString::number_of_fly_strings(), 1u);
auto fly3 = MUST(FlyString::from_utf8("foo"sv));
EXPECT_EQ(fly3, "foo"sv);
EXPECT_EQ(FlyString::number_of_fly_strings(), 1u);
EXPECT_EQ(fly1, fly2);
EXPECT_NE(fly1, fly3);
EXPECT_NE(fly2, fly3);
}
TEST_CASE(fly_string_keep_string_data_alive)
{
EXPECT_EQ(FlyString::number_of_fly_strings(), 0u);
{
FlyString fly {};
{
auto string = MUST(String::from_utf8("thisisdefinitelymorethan7bytes"sv));
fly = FlyString { string };
EXPECT_EQ(FlyString::number_of_fly_strings(), 1u);
}
EXPECT_EQ(fly, "thisisdefinitelymorethan7bytes"sv);
EXPECT_EQ(FlyString::number_of_fly_strings(), 1u);
}
EXPECT_EQ(FlyString::number_of_fly_strings(), 0u);
}
TEST_CASE(moved_fly_string_becomes_empty)
{
FlyString fly1 {};
EXPECT(fly1.is_empty());
FlyString fly2 { MUST(String::from_utf8("thisisdefinitelymorethan7bytes"sv)) };
EXPECT_EQ(fly2, "thisisdefinitelymorethan7bytes"sv);
EXPECT_EQ(FlyString::number_of_fly_strings(), 1u);
fly1 = move(fly2);
EXPECT(fly2.is_empty());
EXPECT_EQ(fly1, "thisisdefinitelymorethan7bytes"sv);
EXPECT_EQ(FlyString::number_of_fly_strings(), 1u);
}