mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 07:30:19 +00:00
AK: Implement FlyString for the new String class
This implements a FlyString that will de-duplicate String instances. The FlyString will store the raw encoded data of the String instance: If the String is a short string, FlyString holds the String::ShortString bytes; otherwise FlyString holds a pointer to the Detail::StringData. FlyString itself does not know about String's storage or how to refcount its Detail::StringData. It defers to String to implement these details.
This commit is contained in:
parent
f49a65cb28
commit
1d4f287582
Notes:
sideshowbarker
2024-07-17 01:49:30 +09:00
Author: https://github.com/trflynn89 Commit: https://github.com/SerenityOS/serenity/commit/1d4f287582 Pull-request: https://github.com/SerenityOS/serenity/pull/16958
8 changed files with 445 additions and 1 deletions
|
@ -5,6 +5,7 @@ set(AK_SOURCES
|
|||
DeprecatedFlyString.cpp
|
||||
DeprecatedString.cpp
|
||||
FloatingPointStringConversions.cpp
|
||||
FlyString.cpp
|
||||
Format.cpp
|
||||
FuzzyMatch.cpp
|
||||
GenericLexer.cpp
|
||||
|
|
169
AK/FlyString.cpp
Normal file
169
AK/FlyString.cpp
Normal file
|
@ -0,0 +1,169 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/FlyString.h>
|
||||
#include <AK/HashMap.h>
|
||||
#include <AK/Singleton.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Utf8View.h>
|
||||
|
||||
namespace AK {
|
||||
|
||||
static auto& all_fly_strings()
|
||||
{
|
||||
static Singleton<HashMap<StringView, uintptr_t>> table;
|
||||
return *table;
|
||||
}
|
||||
|
||||
FlyString::FlyString()
|
||||
: m_data(String {}.to_fly_string_data({}))
|
||||
{
|
||||
}
|
||||
|
||||
FlyString::~FlyString()
|
||||
{
|
||||
String::unref_fly_string_data({}, m_data);
|
||||
}
|
||||
|
||||
ErrorOr<FlyString> FlyString::from_utf8(StringView string)
|
||||
{
|
||||
return FlyString { TRY(String::from_utf8(string)) };
|
||||
}
|
||||
|
||||
FlyString::FlyString(String const& string)
|
||||
{
|
||||
if (string.is_short_string()) {
|
||||
m_data = string.to_fly_string_data({});
|
||||
return;
|
||||
}
|
||||
|
||||
auto it = all_fly_strings().find(string.bytes_as_string_view());
|
||||
if (it == all_fly_strings().end()) {
|
||||
m_data = string.to_fly_string_data({});
|
||||
|
||||
all_fly_strings().set(string.bytes_as_string_view(), m_data);
|
||||
string.did_create_fly_string({});
|
||||
} else {
|
||||
m_data = it->value;
|
||||
}
|
||||
|
||||
String::ref_fly_string_data({}, m_data);
|
||||
}
|
||||
|
||||
FlyString::FlyString(FlyString const& other)
|
||||
: m_data(other.m_data)
|
||||
{
|
||||
String::ref_fly_string_data({}, m_data);
|
||||
}
|
||||
|
||||
FlyString& FlyString::operator=(FlyString const& other)
|
||||
{
|
||||
if (this != &other) {
|
||||
m_data = other.m_data;
|
||||
String::ref_fly_string_data({}, m_data);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
FlyString::FlyString(FlyString&& other)
|
||||
: m_data(other.m_data)
|
||||
{
|
||||
other.m_data = String {}.to_fly_string_data({});
|
||||
}
|
||||
|
||||
FlyString& FlyString::operator=(FlyString&& other)
|
||||
{
|
||||
m_data = other.m_data;
|
||||
other.m_data = String {}.to_fly_string_data({});
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool FlyString::is_empty() const
|
||||
{
|
||||
return bytes_as_string_view().is_empty();
|
||||
}
|
||||
|
||||
unsigned FlyString::hash() const
|
||||
{
|
||||
return bytes_as_string_view().hash();
|
||||
}
|
||||
|
||||
FlyString::operator String() const
|
||||
{
|
||||
return to_string();
|
||||
}
|
||||
|
||||
String FlyString::to_string() const
|
||||
{
|
||||
return String::fly_string_data_to_string({}, m_data);
|
||||
}
|
||||
|
||||
Utf8View FlyString::code_points() const
|
||||
{
|
||||
return Utf8View { bytes_as_string_view() };
|
||||
}
|
||||
|
||||
ReadonlyBytes FlyString::bytes() const
|
||||
{
|
||||
return bytes_as_string_view().bytes();
|
||||
}
|
||||
|
||||
StringView FlyString::bytes_as_string_view() const
|
||||
{
|
||||
return String::fly_string_data_to_string_view({}, m_data);
|
||||
}
|
||||
|
||||
bool FlyString::operator==(FlyString const& other) const
|
||||
{
|
||||
return m_data == other.m_data;
|
||||
}
|
||||
|
||||
bool FlyString::operator==(String const& other) const
|
||||
{
|
||||
if (m_data == other.to_fly_string_data({}))
|
||||
return true;
|
||||
|
||||
return bytes_as_string_view() == other.bytes_as_string_view();
|
||||
}
|
||||
|
||||
bool FlyString::operator==(StringView string) const
|
||||
{
|
||||
return bytes_as_string_view() == string;
|
||||
}
|
||||
|
||||
bool FlyString::operator==(char const* string) const
|
||||
{
|
||||
return bytes_as_string_view() == string;
|
||||
}
|
||||
|
||||
void FlyString::did_destroy_fly_string_data(Badge<Detail::StringData>, StringView string_data)
|
||||
{
|
||||
all_fly_strings().remove(string_data);
|
||||
}
|
||||
|
||||
uintptr_t FlyString::data(Badge<String>) const
|
||||
{
|
||||
return m_data;
|
||||
}
|
||||
|
||||
size_t FlyString::number_of_fly_strings()
|
||||
{
|
||||
return all_fly_strings().size();
|
||||
}
|
||||
|
||||
unsigned Traits<FlyString>::hash(FlyString const& fly_string)
|
||||
{
|
||||
return fly_string.bytes_as_string_view().hash();
|
||||
}
|
||||
|
||||
ErrorOr<void> Formatter<FlyString>::format(FormatBuilder& builder, FlyString const& fly_string)
|
||||
{
|
||||
return Formatter<StringView>::format(builder, fly_string.bytes_as_string_view());
|
||||
}
|
||||
|
||||
}
|
73
AK/FlyString.h
Normal file
73
AK/FlyString.h
Normal file
|
@ -0,0 +1,73 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/Error.h>
|
||||
#include <AK/Format.h>
|
||||
#include <AK/Platform.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/Traits.h>
|
||||
#include <AK/Types.h>
|
||||
|
||||
namespace AK {
|
||||
|
||||
class FlyString {
|
||||
public:
|
||||
FlyString();
|
||||
~FlyString();
|
||||
|
||||
static ErrorOr<FlyString> from_utf8(StringView);
|
||||
explicit FlyString(String const&);
|
||||
|
||||
FlyString(FlyString const&);
|
||||
FlyString& operator=(FlyString const&);
|
||||
|
||||
FlyString(FlyString&&);
|
||||
FlyString& operator=(FlyString&&);
|
||||
|
||||
[[nodiscard]] bool is_empty() const;
|
||||
[[nodiscard]] unsigned hash() const;
|
||||
|
||||
explicit operator String() const;
|
||||
String to_string() const;
|
||||
|
||||
[[nodiscard]] Utf8View code_points() const;
|
||||
[[nodiscard]] ReadonlyBytes bytes() const;
|
||||
[[nodiscard]] StringView bytes_as_string_view() const;
|
||||
|
||||
[[nodiscard]] bool operator==(FlyString const& other) const;
|
||||
[[nodiscard]] bool operator==(String const&) const;
|
||||
[[nodiscard]] bool operator==(StringView) const;
|
||||
[[nodiscard]] bool operator==(char const*) const;
|
||||
|
||||
static void did_destroy_fly_string_data(Badge<Detail::StringData>, StringView);
|
||||
[[nodiscard]] uintptr_t data(Badge<String>) const;
|
||||
|
||||
// This is primarily interesting to unit tests.
|
||||
[[nodiscard]] static size_t number_of_fly_strings();
|
||||
|
||||
private:
|
||||
// This will hold either the pointer to the Detail::StringData it represents or the raw bytes of
|
||||
// an inlined short string.
|
||||
uintptr_t m_data { 0 };
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Traits<FlyString> : public GenericTraits<FlyString> {
|
||||
static unsigned hash(FlyString const&);
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Formatter<FlyString> : Formatter<StringView> {
|
||||
ErrorOr<void> format(FormatBuilder&, FlyString const&);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#if USING_AK_GLOBALLY
|
||||
using AK::FlyString;
|
||||
#endif
|
|
@ -21,6 +21,7 @@ class Bitmap;
|
|||
using ByteBuffer = Detail::ByteBuffer<32>;
|
||||
class CircularBuffer;
|
||||
class Error;
|
||||
class FlyString;
|
||||
class GenericLexer;
|
||||
class IPv4Address;
|
||||
class JsonArray;
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
*/
|
||||
|
||||
#include <AK/Checked.h>
|
||||
#include <AK/FlyString.h>
|
||||
#include <AK/Format.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
|
@ -59,6 +60,9 @@ public:
|
|||
return m_hash;
|
||||
}
|
||||
|
||||
bool is_fly_string() const { return m_is_fly_string; }
|
||||
void set_fly_string(bool is_fly_string) { m_is_fly_string = is_fly_string; }
|
||||
|
||||
private:
|
||||
explicit StringData(size_t byte_count);
|
||||
StringData(StringData const& superstring, size_t start, size_t byte_count);
|
||||
|
@ -69,6 +73,7 @@ private:
|
|||
mutable unsigned m_hash { 0 };
|
||||
mutable bool m_has_hash { false };
|
||||
bool m_substring { false };
|
||||
bool m_is_fly_string { false };
|
||||
|
||||
u8 m_bytes_or_substring_data[0];
|
||||
};
|
||||
|
@ -95,6 +100,8 @@ StringData::StringData(StringData const& superstring, size_t start, size_t byte_
|
|||
|
||||
StringData::~StringData()
|
||||
{
|
||||
if (m_is_fly_string)
|
||||
FlyString::did_destroy_fly_string_data({}, bytes_as_string_view());
|
||||
if (m_substring)
|
||||
substring_data().superstring->unref();
|
||||
}
|
||||
|
@ -255,6 +262,13 @@ bool String::operator==(String const& other) const
|
|||
return bytes_as_string_view() == other.bytes_as_string_view();
|
||||
}
|
||||
|
||||
bool String::operator==(FlyString const& other) const
|
||||
{
|
||||
if (reinterpret_cast<uintptr_t>(m_data) == other.data({}))
|
||||
return true;
|
||||
return bytes_as_string_view() == other.bytes_as_string_view();
|
||||
}
|
||||
|
||||
bool String::operator==(StringView other) const
|
||||
{
|
||||
return bytes_as_string_view() == other;
|
||||
|
@ -307,7 +321,7 @@ ErrorOr<String> String::replace(StringView needle, StringView replacement, Repla
|
|||
|
||||
bool String::is_short_string() const
|
||||
{
|
||||
return reinterpret_cast<uintptr_t>(m_data) & SHORT_STRING_FLAG;
|
||||
return has_short_string_bit(reinterpret_cast<uintptr_t>(m_data));
|
||||
}
|
||||
|
||||
ReadonlyBytes String::ShortString::bytes() const
|
||||
|
@ -325,6 +339,55 @@ unsigned Traits<String>::hash(String const& string)
|
|||
return string.hash();
|
||||
}
|
||||
|
||||
String String::fly_string_data_to_string(Badge<FlyString>, uintptr_t const& data)
|
||||
{
|
||||
if (has_short_string_bit(data))
|
||||
return String { *reinterpret_cast<ShortString const*>(&data) };
|
||||
|
||||
auto const* string_data = reinterpret_cast<Detail::StringData const*>(data);
|
||||
return String { NonnullRefPtr<Detail::StringData>(*string_data) };
|
||||
}
|
||||
|
||||
StringView String::fly_string_data_to_string_view(Badge<FlyString>, uintptr_t const& data)
|
||||
{
|
||||
if (has_short_string_bit(data)) {
|
||||
auto const* short_string = reinterpret_cast<ShortString const*>(&data);
|
||||
return short_string->bytes();
|
||||
}
|
||||
|
||||
auto const* string_data = reinterpret_cast<Detail::StringData const*>(data);
|
||||
return string_data->bytes_as_string_view();
|
||||
}
|
||||
|
||||
uintptr_t String::to_fly_string_data(Badge<FlyString>) const
|
||||
{
|
||||
return reinterpret_cast<uintptr_t>(m_data);
|
||||
}
|
||||
|
||||
void String::ref_fly_string_data(Badge<FlyString>, uintptr_t data)
|
||||
{
|
||||
if (has_short_string_bit(data))
|
||||
return;
|
||||
|
||||
auto const* string_data = reinterpret_cast<Detail::StringData const*>(data);
|
||||
string_data->ref();
|
||||
}
|
||||
|
||||
void String::unref_fly_string_data(Badge<FlyString>, uintptr_t data)
|
||||
{
|
||||
if (has_short_string_bit(data))
|
||||
return;
|
||||
|
||||
auto const* string_data = reinterpret_cast<Detail::StringData const*>(data);
|
||||
string_data->unref();
|
||||
}
|
||||
|
||||
void String::did_create_fly_string(Badge<FlyString>) const
|
||||
{
|
||||
VERIFY(!is_short_string());
|
||||
m_data->set_fly_string(true);
|
||||
}
|
||||
|
||||
DeprecatedString String::to_deprecated_string() const
|
||||
{
|
||||
return DeprecatedString(bytes_as_string_view());
|
||||
|
|
16
AK/String.h
16
AK/String.h
|
@ -73,6 +73,9 @@ public:
|
|||
[[nodiscard]] bool operator==(String const&) const;
|
||||
[[nodiscard]] bool operator!=(String const& other) const { return !(*this == other); }
|
||||
|
||||
[[nodiscard]] bool operator==(FlyString const&) const;
|
||||
[[nodiscard]] bool operator!=(FlyString const& other) const { return !(*this == other); }
|
||||
|
||||
[[nodiscard]] bool operator==(StringView) const;
|
||||
[[nodiscard]] bool operator!=(StringView other) const { return !(*this == other); }
|
||||
|
||||
|
@ -102,6 +105,14 @@ public:
|
|||
// NOTE: This is primarily interesting to unit tests.
|
||||
[[nodiscard]] bool is_short_string() const;
|
||||
|
||||
[[nodiscard]] static String fly_string_data_to_string(Badge<FlyString>, uintptr_t const&);
|
||||
[[nodiscard]] static StringView fly_string_data_to_string_view(Badge<FlyString>, uintptr_t const&);
|
||||
[[nodiscard]] uintptr_t to_fly_string_data(Badge<FlyString>) const;
|
||||
|
||||
static void ref_fly_string_data(Badge<FlyString>, uintptr_t);
|
||||
static void unref_fly_string_data(Badge<FlyString>, uintptr_t);
|
||||
void did_create_fly_string(Badge<FlyString>) const;
|
||||
|
||||
// FIXME: Remove these once all code has been ported to String
|
||||
[[nodiscard]] DeprecatedString to_deprecated_string() const;
|
||||
static ErrorOr<String> from_deprecated_string(DeprecatedString const&);
|
||||
|
@ -110,6 +121,11 @@ private:
|
|||
// NOTE: If the least significant bit of the pointer is set, this is a short string.
|
||||
static constexpr uintptr_t SHORT_STRING_FLAG = 1;
|
||||
|
||||
static constexpr bool has_short_string_bit(uintptr_t data)
|
||||
{
|
||||
return (data & SHORT_STRING_FLAG) != 0;
|
||||
}
|
||||
|
||||
struct ShortString {
|
||||
ReadonlyBytes bytes() const;
|
||||
size_t byte_count() const;
|
||||
|
|
|
@ -30,6 +30,7 @@ set(AK_TEST_SOURCES
|
|||
TestFixedPoint.cpp
|
||||
TestFloatingPoint.cpp
|
||||
TestFloatingPointParsing.cpp
|
||||
TestFlyString.cpp
|
||||
TestFormat.cpp
|
||||
TestGenericLexer.cpp
|
||||
TestHashFunctions.cpp
|
||||
|
|
120
Tests/AK/TestFlyString.cpp
Normal file
120
Tests/AK/TestFlyString.cpp
Normal file
|
@ -0,0 +1,120 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Tim Flynn <trflynn89@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <LibTest/TestCase.h>
|
||||
|
||||
#include <AK/FlyString.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/Try.h>
|
||||
|
||||
TEST_CASE(empty_string)
|
||||
{
|
||||
FlyString fly {};
|
||||
EXPECT(fly.is_empty());
|
||||
EXPECT_EQ(fly, ""sv);
|
||||
|
||||
// Short strings do not get stored in the fly string table.
|
||||
EXPECT_EQ(FlyString::number_of_fly_strings(), 0u);
|
||||
}
|
||||
|
||||
TEST_CASE(short_string)
|
||||
{
|
||||
FlyString fly1 { MUST(String::from_utf8("foo"sv)) };
|
||||
EXPECT_EQ(fly1, "foo"sv);
|
||||
|
||||
FlyString fly2 { MUST(String::from_utf8("foo"sv)) };
|
||||
EXPECT_EQ(fly2, "foo"sv);
|
||||
|
||||
FlyString fly3 { MUST(String::from_utf8("bar"sv)) };
|
||||
EXPECT_EQ(fly3, "bar"sv);
|
||||
|
||||
EXPECT_EQ(fly1, fly2);
|
||||
EXPECT_NE(fly1, fly3);
|
||||
EXPECT_NE(fly2, fly3);
|
||||
|
||||
EXPECT(fly1.to_string().is_short_string());
|
||||
EXPECT(fly2.to_string().is_short_string());
|
||||
EXPECT(fly3.to_string().is_short_string());
|
||||
|
||||
// Short strings do not get stored in the fly string table.
|
||||
EXPECT_EQ(FlyString::number_of_fly_strings(), 0u);
|
||||
}
|
||||
|
||||
TEST_CASE(long_string)
|
||||
{
|
||||
FlyString fly1 { MUST(String::from_utf8("thisisdefinitelymorethan7bytes"sv)) };
|
||||
EXPECT_EQ(fly1, "thisisdefinitelymorethan7bytes"sv);
|
||||
EXPECT_EQ(FlyString::number_of_fly_strings(), 1u);
|
||||
|
||||
FlyString fly2 { MUST(String::from_utf8("thisisdefinitelymorethan7bytes"sv)) };
|
||||
EXPECT_EQ(fly2, "thisisdefinitelymorethan7bytes"sv);
|
||||
EXPECT_EQ(FlyString::number_of_fly_strings(), 1u);
|
||||
|
||||
FlyString fly3 { MUST(String::from_utf8("thisisalsoforsuremorethan7bytes"sv)) };
|
||||
EXPECT_EQ(fly3, "thisisalsoforsuremorethan7bytes"sv);
|
||||
EXPECT_EQ(FlyString::number_of_fly_strings(), 2u);
|
||||
|
||||
EXPECT_EQ(fly1, fly2);
|
||||
EXPECT_NE(fly1, fly3);
|
||||
EXPECT_NE(fly2, fly3);
|
||||
|
||||
EXPECT(!fly1.to_string().is_short_string());
|
||||
EXPECT(!fly2.to_string().is_short_string());
|
||||
EXPECT(!fly3.to_string().is_short_string());
|
||||
}
|
||||
|
||||
TEST_CASE(from_string_view)
|
||||
{
|
||||
auto fly1 = MUST(FlyString::from_utf8("thisisdefinitelymorethan7bytes"sv));
|
||||
EXPECT_EQ(fly1, "thisisdefinitelymorethan7bytes"sv);
|
||||
EXPECT_EQ(FlyString::number_of_fly_strings(), 1u);
|
||||
|
||||
auto fly2 = MUST(FlyString::from_utf8("thisisdefinitelymorethan7bytes"sv));
|
||||
EXPECT_EQ(fly2, "thisisdefinitelymorethan7bytes"sv);
|
||||
EXPECT_EQ(FlyString::number_of_fly_strings(), 1u);
|
||||
|
||||
auto fly3 = MUST(FlyString::from_utf8("foo"sv));
|
||||
EXPECT_EQ(fly3, "foo"sv);
|
||||
EXPECT_EQ(FlyString::number_of_fly_strings(), 1u);
|
||||
|
||||
EXPECT_EQ(fly1, fly2);
|
||||
EXPECT_NE(fly1, fly3);
|
||||
EXPECT_NE(fly2, fly3);
|
||||
}
|
||||
|
||||
TEST_CASE(fly_string_keep_string_data_alive)
|
||||
{
|
||||
EXPECT_EQ(FlyString::number_of_fly_strings(), 0u);
|
||||
{
|
||||
FlyString fly {};
|
||||
{
|
||||
auto string = MUST(String::from_utf8("thisisdefinitelymorethan7bytes"sv));
|
||||
fly = FlyString { string };
|
||||
EXPECT_EQ(FlyString::number_of_fly_strings(), 1u);
|
||||
}
|
||||
|
||||
EXPECT_EQ(fly, "thisisdefinitelymorethan7bytes"sv);
|
||||
EXPECT_EQ(FlyString::number_of_fly_strings(), 1u);
|
||||
}
|
||||
|
||||
EXPECT_EQ(FlyString::number_of_fly_strings(), 0u);
|
||||
}
|
||||
|
||||
TEST_CASE(moved_fly_string_becomes_empty)
|
||||
{
|
||||
FlyString fly1 {};
|
||||
EXPECT(fly1.is_empty());
|
||||
|
||||
FlyString fly2 { MUST(String::from_utf8("thisisdefinitelymorethan7bytes"sv)) };
|
||||
EXPECT_EQ(fly2, "thisisdefinitelymorethan7bytes"sv);
|
||||
EXPECT_EQ(FlyString::number_of_fly_strings(), 1u);
|
||||
|
||||
fly1 = move(fly2);
|
||||
|
||||
EXPECT(fly2.is_empty());
|
||||
EXPECT_EQ(fly1, "thisisdefinitelymorethan7bytes"sv);
|
||||
EXPECT_EQ(FlyString::number_of_fly_strings(), 1u);
|
||||
}
|
Loading…
Reference in a new issue