2020-01-18 08:38:21 +00:00
|
|
|
/*
|
2024-10-04 11:19:50 +00:00
|
|
|
* Copyright (c) 2018-2020, Andreas Kling <andreas@ladybird.org>
|
2024-11-01 11:14:53 +00:00
|
|
|
* Copyright (c) 2023, Jelle Raaijmakers <jelle@ladybird.org>
|
2020-01-18 08:38:21 +00:00
|
|
|
*
|
2021-04-22 08:24:48 +00:00
|
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
2020-01-18 08:38:21 +00:00
|
|
|
*/
|
|
|
|
|
2018-10-10 09:53:07 +00:00
|
|
|
#pragma once
|
|
|
|
|
2021-11-07 13:52:20 +00:00
|
|
|
#include <AK/Concepts.h>
|
2021-11-10 22:00:21 +00:00
|
|
|
#include <AK/Error.h>
|
2023-09-24 18:34:55 +00:00
|
|
|
#include <AK/ReverseIterator.h>
|
2019-06-27 14:36:31 +00:00
|
|
|
#include <AK/StdLibExtras.h>
|
2021-09-15 22:00:33 +00:00
|
|
|
#include <AK/Traits.h>
|
2020-10-15 21:34:07 +00:00
|
|
|
#include <AK/Types.h>
|
|
|
|
#include <AK/kmalloc.h>
|
2018-10-10 09:53:07 +00:00
|
|
|
|
|
|
|
namespace AK {
|
|
|
|
|
2020-07-06 21:44:33 +00:00
|
|
|
enum class HashSetResult {
|
|
|
|
InsertedNewEntry,
|
2021-06-08 20:42:07 +00:00
|
|
|
ReplacedExistingEntry,
|
2023-02-14 00:27:19 +00:00
|
|
|
KeptExistingEntry,
|
2021-06-08 20:42:07 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
enum class HashSetExistingEntryBehavior {
|
|
|
|
Keep,
|
2023-02-14 00:27:19 +00:00
|
|
|
Replace,
|
2020-07-06 21:44:33 +00:00
|
|
|
};
|
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
// BucketState doubles as both an enum and a probe length value.
|
|
|
|
// - Free: empty bucket
|
|
|
|
// - Used (implicit, values 1..254): value-1 represents probe length
|
|
|
|
// - CalculateLength: same as Used but probe length > 253, so we calculate the actual probe length
|
2022-03-07 14:10:10 +00:00
|
|
|
enum class BucketState : u8 {
|
2023-02-14 00:27:19 +00:00
|
|
|
Free = 0,
|
|
|
|
CalculateLength = 255,
|
2022-03-07 14:10:10 +00:00
|
|
|
};
|
|
|
|
|
2020-10-15 21:34:07 +00:00
|
|
|
template<typename HashTableType, typename T, typename BucketType>
|
2019-06-27 13:57:49 +00:00
|
|
|
class HashTableIterator {
|
2020-10-15 21:34:07 +00:00
|
|
|
friend HashTableType;
|
|
|
|
|
2019-06-27 13:57:49 +00:00
|
|
|
public:
|
2022-04-01 17:58:27 +00:00
|
|
|
bool operator==(HashTableIterator const& other) const { return m_bucket == other.m_bucket; }
|
|
|
|
bool operator!=(HashTableIterator const& other) const { return m_bucket != other.m_bucket; }
|
2020-10-15 21:34:07 +00:00
|
|
|
T& operator*() { return *m_bucket->slot(); }
|
|
|
|
T* operator->() { return m_bucket->slot(); }
|
|
|
|
void operator++() { skip_to_next(); }
|
2019-06-27 13:57:49 +00:00
|
|
|
|
2020-10-15 21:34:07 +00:00
|
|
|
private:
|
2019-06-27 13:57:49 +00:00
|
|
|
void skip_to_next()
|
|
|
|
{
|
2020-10-15 21:34:07 +00:00
|
|
|
if (!m_bucket)
|
|
|
|
return;
|
|
|
|
do {
|
|
|
|
++m_bucket;
|
2023-02-14 00:27:19 +00:00
|
|
|
if (m_bucket == m_end_bucket) {
|
|
|
|
m_bucket = nullptr;
|
2019-06-27 13:57:49 +00:00
|
|
|
return;
|
2023-02-14 00:27:19 +00:00
|
|
|
}
|
|
|
|
} while (m_bucket->state == BucketState::Free);
|
2019-06-27 13:57:49 +00:00
|
|
|
}
|
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
HashTableIterator(BucketType* bucket, BucketType* end_bucket)
|
2020-10-15 21:34:07 +00:00
|
|
|
: m_bucket(bucket)
|
2023-02-14 00:27:19 +00:00
|
|
|
, m_end_bucket(end_bucket)
|
2019-06-27 13:57:49 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2020-10-15 21:34:07 +00:00
|
|
|
BucketType* m_bucket { nullptr };
|
2023-02-14 00:27:19 +00:00
|
|
|
BucketType* m_end_bucket { nullptr };
|
2019-06-27 13:57:49 +00:00
|
|
|
};
|
|
|
|
|
2021-06-13 14:26:08 +00:00
|
|
|
template<typename OrderedHashTableType, typename T, typename BucketType>
|
|
|
|
class OrderedHashTableIterator {
|
|
|
|
friend OrderedHashTableType;
|
|
|
|
|
|
|
|
public:
|
2022-04-01 17:58:27 +00:00
|
|
|
bool operator==(OrderedHashTableIterator const& other) const { return m_bucket == other.m_bucket; }
|
|
|
|
bool operator!=(OrderedHashTableIterator const& other) const { return m_bucket != other.m_bucket; }
|
2021-06-13 14:26:08 +00:00
|
|
|
T& operator*() { return *m_bucket->slot(); }
|
|
|
|
T* operator->() { return m_bucket->slot(); }
|
|
|
|
void operator++() { m_bucket = m_bucket->next; }
|
|
|
|
void operator--() { m_bucket = m_bucket->previous; }
|
|
|
|
|
|
|
|
private:
|
2023-02-14 00:27:19 +00:00
|
|
|
OrderedHashTableIterator(BucketType* bucket, BucketType*)
|
2021-06-13 14:26:08 +00:00
|
|
|
: m_bucket(bucket)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
BucketType* m_bucket { nullptr };
|
|
|
|
};
|
|
|
|
|
2023-09-24 18:34:55 +00:00
|
|
|
template<typename OrderedHashTableType, typename T, typename BucketType>
|
|
|
|
class ReverseOrderedHashTableIterator {
|
|
|
|
friend OrderedHashTableType;
|
|
|
|
|
|
|
|
public:
|
|
|
|
bool operator==(ReverseOrderedHashTableIterator const& other) const { return m_bucket == other.m_bucket; }
|
|
|
|
bool operator!=(ReverseOrderedHashTableIterator const& other) const { return m_bucket != other.m_bucket; }
|
|
|
|
T& operator*() { return *m_bucket->slot(); }
|
|
|
|
T* operator->() { return m_bucket->slot(); }
|
|
|
|
void operator++() { m_bucket = m_bucket->previous; }
|
|
|
|
void operator--() { m_bucket = m_bucket->next; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
ReverseOrderedHashTableIterator(BucketType* bucket)
|
|
|
|
: m_bucket(bucket)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
BucketType* m_bucket { nullptr };
|
|
|
|
};
|
|
|
|
|
2023-09-20 22:14:35 +00:00
|
|
|
// A set datastructure based on a hash table with closed hashing.
|
|
|
|
// HashTable can optionally provide ordered iteration when IsOrdered = true.
|
|
|
|
// For a (more commonly required) map datastructure with key-value entries, see HashMap.
|
2021-06-13 14:26:08 +00:00
|
|
|
template<typename T, typename TraitsForT, bool IsOrdered>
|
2018-10-10 09:53:07 +00:00
|
|
|
class HashTable {
|
2023-02-14 00:27:19 +00:00
|
|
|
static constexpr size_t grow_capacity_at_least = 8;
|
|
|
|
static constexpr size_t grow_at_load_factor_percent = 80;
|
|
|
|
static constexpr size_t grow_capacity_increase_percent = 60;
|
2020-10-16 06:32:35 +00:00
|
|
|
|
2020-10-15 21:34:07 +00:00
|
|
|
struct Bucket {
|
2022-03-07 14:10:10 +00:00
|
|
|
BucketState state;
|
2020-10-15 21:34:07 +00:00
|
|
|
alignas(T) u8 storage[sizeof(T)];
|
|
|
|
T* slot() { return reinterpret_cast<T*>(storage); }
|
2022-10-16 22:06:11 +00:00
|
|
|
T const* slot() const { return reinterpret_cast<T const*>(storage); }
|
2020-10-15 21:34:07 +00:00
|
|
|
};
|
2018-10-10 09:53:07 +00:00
|
|
|
|
2021-06-13 14:26:08 +00:00
|
|
|
struct OrderedBucket {
|
|
|
|
OrderedBucket* previous;
|
|
|
|
OrderedBucket* next;
|
2022-03-07 14:10:10 +00:00
|
|
|
BucketState state;
|
2021-06-13 14:26:08 +00:00
|
|
|
alignas(T) u8 storage[sizeof(T)];
|
|
|
|
T* slot() { return reinterpret_cast<T*>(storage); }
|
2022-10-16 22:06:11 +00:00
|
|
|
T const* slot() const { return reinterpret_cast<T const*>(storage); }
|
2021-06-13 14:26:08 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
using BucketType = Conditional<IsOrdered, OrderedBucket, Bucket>;
|
|
|
|
|
|
|
|
struct CollectionData {
|
|
|
|
};
|
|
|
|
|
|
|
|
struct OrderedCollectionData {
|
|
|
|
BucketType* head { nullptr };
|
|
|
|
BucketType* tail { nullptr };
|
|
|
|
};
|
|
|
|
|
|
|
|
using CollectionDataType = Conditional<IsOrdered, OrderedCollectionData, CollectionData>;
|
|
|
|
|
2018-10-10 09:53:07 +00:00
|
|
|
public:
|
2021-01-10 23:29:28 +00:00
|
|
|
HashTable() = default;
|
2021-04-11 08:24:35 +00:00
|
|
|
explicit HashTable(size_t capacity) { rehash(capacity); }
|
2020-10-17 13:44:43 +00:00
|
|
|
|
|
|
|
~HashTable()
|
|
|
|
{
|
|
|
|
if (!m_buckets)
|
|
|
|
return;
|
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
if constexpr (!IsTriviallyDestructible<T>) {
|
|
|
|
for (size_t i = 0; i < m_capacity; ++i) {
|
|
|
|
if (m_buckets[i].state != BucketState::Free)
|
|
|
|
m_buckets[i].slot()->~T();
|
|
|
|
}
|
2020-10-17 13:44:43 +00:00
|
|
|
}
|
|
|
|
|
2021-07-11 11:22:58 +00:00
|
|
|
kfree_sized(m_buckets, size_in_bytes(m_capacity));
|
2020-10-17 13:44:43 +00:00
|
|
|
}
|
2020-10-15 21:34:07 +00:00
|
|
|
|
2022-04-01 17:58:27 +00:00
|
|
|
HashTable(HashTable const& other)
|
2019-06-24 09:57:54 +00:00
|
|
|
{
|
2020-10-15 21:34:07 +00:00
|
|
|
rehash(other.capacity());
|
2019-06-24 09:57:54 +00:00
|
|
|
for (auto& it : other)
|
|
|
|
set(it);
|
|
|
|
}
|
2020-10-15 21:34:07 +00:00
|
|
|
|
2022-04-01 17:58:27 +00:00
|
|
|
HashTable& operator=(HashTable const& other)
|
2019-06-24 09:57:54 +00:00
|
|
|
{
|
2020-10-17 13:08:09 +00:00
|
|
|
HashTable temporary(other);
|
|
|
|
swap(*this, temporary);
|
2019-06-24 09:57:54 +00:00
|
|
|
return *this;
|
|
|
|
}
|
2020-10-15 21:34:07 +00:00
|
|
|
|
2020-10-17 13:08:09 +00:00
|
|
|
HashTable(HashTable&& other) noexcept
|
2018-10-10 09:53:07 +00:00
|
|
|
: m_buckets(other.m_buckets)
|
2021-06-13 14:26:08 +00:00
|
|
|
, m_collection_data(other.m_collection_data)
|
2018-10-10 09:53:07 +00:00
|
|
|
, m_size(other.m_size)
|
|
|
|
, m_capacity(other.m_capacity)
|
|
|
|
{
|
|
|
|
other.m_size = 0;
|
|
|
|
other.m_capacity = 0;
|
|
|
|
other.m_buckets = nullptr;
|
2021-06-13 14:26:08 +00:00
|
|
|
if constexpr (IsOrdered)
|
|
|
|
other.m_collection_data = { nullptr, nullptr };
|
2018-10-10 09:53:07 +00:00
|
|
|
}
|
2020-10-15 21:34:07 +00:00
|
|
|
|
2020-10-17 13:08:09 +00:00
|
|
|
HashTable& operator=(HashTable&& other) noexcept
|
2018-10-10 09:53:07 +00:00
|
|
|
{
|
2021-05-30 12:23:23 +00:00
|
|
|
HashTable temporary { move(other) };
|
|
|
|
swap(*this, temporary);
|
2018-10-10 09:53:07 +00:00
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
2020-10-17 12:44:59 +00:00
|
|
|
friend void swap(HashTable& a, HashTable& b) noexcept
|
|
|
|
{
|
|
|
|
swap(a.m_buckets, b.m_buckets);
|
|
|
|
swap(a.m_size, b.m_size);
|
|
|
|
swap(a.m_capacity, b.m_capacity);
|
2021-06-13 14:26:08 +00:00
|
|
|
|
|
|
|
if constexpr (IsOrdered)
|
|
|
|
swap(a.m_collection_data, b.m_collection_data);
|
2020-10-17 12:44:59 +00:00
|
|
|
}
|
|
|
|
|
2021-11-06 20:12:16 +00:00
|
|
|
[[nodiscard]] bool is_empty() const { return m_size == 0; }
|
2021-04-11 08:25:22 +00:00
|
|
|
[[nodiscard]] size_t size() const { return m_size; }
|
|
|
|
[[nodiscard]] size_t capacity() const { return m_capacity; }
|
2018-10-10 09:53:07 +00:00
|
|
|
|
2020-10-15 21:34:07 +00:00
|
|
|
template<typename U, size_t N>
|
2021-11-10 22:00:21 +00:00
|
|
|
ErrorOr<void> try_set_from(U (&from_array)[N])
|
2020-10-15 21:34:07 +00:00
|
|
|
{
|
2021-11-10 22:00:21 +00:00
|
|
|
for (size_t i = 0; i < N; ++i)
|
|
|
|
TRY(try_set(from_array[i]));
|
|
|
|
return {};
|
2021-08-14 00:07:39 +00:00
|
|
|
}
|
|
|
|
template<typename U, size_t N>
|
|
|
|
void set_from(U (&from_array)[N])
|
|
|
|
{
|
2021-11-10 22:00:21 +00:00
|
|
|
MUST(try_set_from(from_array));
|
2020-10-15 21:34:07 +00:00
|
|
|
}
|
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
ErrorOr<void> try_ensure_capacity(size_t capacity)
|
2019-05-27 11:07:20 +00:00
|
|
|
{
|
2023-02-14 00:27:19 +00:00
|
|
|
// The user usually expects "capacity" to mean the number of values that can be stored in a
|
|
|
|
// container without it needing to reallocate. Our definition of "capacity" is the number of
|
|
|
|
// buckets we can store, but we reallocate earlier because of `grow_at_load_factor_percent`.
|
|
|
|
// This calculates the required internal capacity to store `capacity` number of values.
|
|
|
|
size_t required_capacity = capacity * 100 / grow_at_load_factor_percent + 1;
|
|
|
|
if (required_capacity <= m_capacity)
|
|
|
|
return {};
|
|
|
|
return try_rehash(required_capacity);
|
2019-05-27 11:07:20 +00:00
|
|
|
}
|
2023-02-14 00:27:19 +00:00
|
|
|
void ensure_capacity(size_t capacity)
|
2022-01-25 00:31:20 +00:00
|
|
|
{
|
2023-02-14 00:27:19 +00:00
|
|
|
MUST(try_ensure_capacity(capacity));
|
2022-01-25 00:31:20 +00:00
|
|
|
}
|
|
|
|
|
2021-07-21 16:18:29 +00:00
|
|
|
[[nodiscard]] bool contains(T const& value) const
|
2020-10-15 21:34:07 +00:00
|
|
|
{
|
|
|
|
return find(value) != end();
|
|
|
|
}
|
2020-08-16 09:04:00 +00:00
|
|
|
|
2021-11-07 13:52:20 +00:00
|
|
|
template<Concepts::HashCompatible<T> K>
|
|
|
|
requires(IsSame<TraitsForT, Traits<T>>) [[nodiscard]] bool contains(K const& value) const
|
|
|
|
{
|
|
|
|
return find(value) != end();
|
|
|
|
}
|
|
|
|
|
2021-06-13 14:26:08 +00:00
|
|
|
using Iterator = Conditional<IsOrdered,
|
|
|
|
OrderedHashTableIterator<HashTable, T, BucketType>,
|
|
|
|
HashTableIterator<HashTable, T, BucketType>>;
|
2020-10-15 21:34:07 +00:00
|
|
|
|
2021-07-21 16:18:29 +00:00
|
|
|
[[nodiscard]] Iterator begin()
|
2020-08-16 09:04:00 +00:00
|
|
|
{
|
2021-06-13 14:26:08 +00:00
|
|
|
if constexpr (IsOrdered)
|
2023-02-14 00:27:19 +00:00
|
|
|
return Iterator(m_collection_data.head, end_bucket());
|
2021-06-13 14:26:08 +00:00
|
|
|
|
2020-10-15 21:34:07 +00:00
|
|
|
for (size_t i = 0; i < m_capacity; ++i) {
|
2023-02-14 00:27:19 +00:00
|
|
|
if (m_buckets[i].state != BucketState::Free)
|
|
|
|
return Iterator(&m_buckets[i], end_bucket());
|
2020-08-16 09:04:00 +00:00
|
|
|
}
|
2020-10-15 21:34:07 +00:00
|
|
|
return end();
|
2020-08-16 09:04:00 +00:00
|
|
|
}
|
|
|
|
|
2021-07-21 16:18:29 +00:00
|
|
|
[[nodiscard]] Iterator end()
|
2020-10-15 21:34:07 +00:00
|
|
|
{
|
2023-02-14 00:27:19 +00:00
|
|
|
return Iterator(nullptr, nullptr);
|
2020-10-15 21:34:07 +00:00
|
|
|
}
|
2018-10-10 09:53:07 +00:00
|
|
|
|
2021-06-13 14:26:08 +00:00
|
|
|
using ConstIterator = Conditional<IsOrdered,
|
2024-04-18 19:32:56 +00:00
|
|
|
OrderedHashTableIterator<HashTable const, T const, BucketType const>,
|
|
|
|
HashTableIterator<HashTable const, T const, BucketType const>>;
|
2018-10-10 09:53:07 +00:00
|
|
|
|
2021-07-21 16:18:29 +00:00
|
|
|
[[nodiscard]] ConstIterator begin() const
|
2019-06-29 19:09:40 +00:00
|
|
|
{
|
2021-06-13 14:26:08 +00:00
|
|
|
if constexpr (IsOrdered)
|
2023-02-14 00:27:19 +00:00
|
|
|
return ConstIterator(m_collection_data.head, end_bucket());
|
2021-06-13 14:26:08 +00:00
|
|
|
|
2020-10-15 21:34:07 +00:00
|
|
|
for (size_t i = 0; i < m_capacity; ++i) {
|
2023-02-14 00:27:19 +00:00
|
|
|
if (m_buckets[i].state != BucketState::Free)
|
|
|
|
return ConstIterator(&m_buckets[i], end_bucket());
|
2020-10-15 21:34:07 +00:00
|
|
|
}
|
2019-06-29 19:09:40 +00:00
|
|
|
return end();
|
|
|
|
}
|
|
|
|
|
2021-07-21 16:18:29 +00:00
|
|
|
[[nodiscard]] ConstIterator end() const
|
2020-10-15 21:34:07 +00:00
|
|
|
{
|
2023-02-14 00:27:19 +00:00
|
|
|
return ConstIterator(nullptr, nullptr);
|
2020-10-15 21:34:07 +00:00
|
|
|
}
|
|
|
|
|
2023-09-24 18:34:55 +00:00
|
|
|
using ReverseIterator = Conditional<IsOrdered,
|
|
|
|
ReverseOrderedHashTableIterator<HashTable, T, BucketType>,
|
|
|
|
void>;
|
|
|
|
|
|
|
|
[[nodiscard]] ReverseIterator rbegin()
|
|
|
|
requires(IsOrdered)
|
|
|
|
{
|
|
|
|
return ReverseIterator(m_collection_data.tail);
|
|
|
|
}
|
|
|
|
|
|
|
|
[[nodiscard]] ReverseIterator rend()
|
|
|
|
requires(IsOrdered)
|
|
|
|
{
|
|
|
|
return ReverseIterator(nullptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto in_reverse() { return ReverseWrapper::in_reverse(*this); }
|
|
|
|
|
|
|
|
using ReverseConstIterator = Conditional<IsOrdered,
|
|
|
|
ReverseOrderedHashTableIterator<HashTable const, T const, BucketType const>,
|
|
|
|
void>;
|
|
|
|
|
|
|
|
[[nodiscard]] ReverseConstIterator rbegin() const
|
|
|
|
requires(IsOrdered)
|
|
|
|
{
|
|
|
|
return ReverseConstIterator(m_collection_data.tail);
|
|
|
|
}
|
|
|
|
|
|
|
|
[[nodiscard]] ReverseConstIterator rend() const
|
|
|
|
requires(IsOrdered)
|
|
|
|
{
|
|
|
|
return ReverseConstIterator(nullptr);
|
|
|
|
}
|
|
|
|
|
|
|
|
auto in_reverse() const { return ReverseWrapper::in_reverse(*this); }
|
|
|
|
|
2020-10-15 21:34:07 +00:00
|
|
|
void clear()
|
|
|
|
{
|
2020-10-17 13:44:43 +00:00
|
|
|
*this = HashTable();
|
2020-10-15 21:34:07 +00:00
|
|
|
}
|
2023-02-14 00:27:19 +00:00
|
|
|
|
2021-09-20 21:43:52 +00:00
|
|
|
void clear_with_capacity()
|
|
|
|
{
|
2022-11-11 01:54:43 +00:00
|
|
|
if (m_capacity == 0)
|
|
|
|
return;
|
2022-11-14 18:25:18 +00:00
|
|
|
if constexpr (!IsTriviallyDestructible<T>) {
|
2021-09-20 21:43:52 +00:00
|
|
|
for (auto* bucket : *this)
|
|
|
|
bucket->~T();
|
|
|
|
}
|
2023-02-14 00:27:19 +00:00
|
|
|
__builtin_memset(m_buckets, 0, size_in_bytes(m_capacity));
|
2021-09-20 21:43:52 +00:00
|
|
|
m_size = 0;
|
|
|
|
|
|
|
|
if constexpr (IsOrdered)
|
|
|
|
m_collection_data = { nullptr, nullptr };
|
|
|
|
}
|
2020-10-15 21:34:07 +00:00
|
|
|
|
2021-01-15 22:59:55 +00:00
|
|
|
template<typename U = T>
|
2021-11-10 22:00:21 +00:00
|
|
|
ErrorOr<HashSetResult> try_set(U&& value, HashSetExistingEntryBehavior existing_entry_behavior = HashSetExistingEntryBehavior::Replace)
|
2020-10-15 21:34:07 +00:00
|
|
|
{
|
2023-02-14 00:27:19 +00:00
|
|
|
if (should_grow())
|
|
|
|
TRY(try_rehash(m_capacity * (100 + grow_capacity_increase_percent) / 100));
|
2021-06-13 14:26:08 +00:00
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
return write_value(forward<U>(value), existing_entry_behavior);
|
2020-10-15 21:34:07 +00:00
|
|
|
}
|
2021-08-14 00:07:39 +00:00
|
|
|
template<typename U = T>
|
2023-05-06 14:53:22 +00:00
|
|
|
HashSetResult set(U&& value, HashSetExistingEntryBehavior existing_entry_behavior = HashSetExistingEntryBehavior::Replace)
|
2021-08-14 00:07:39 +00:00
|
|
|
{
|
2023-05-06 14:53:22 +00:00
|
|
|
return MUST(try_set(forward<U>(value), existing_entry_behavior));
|
2021-08-14 00:07:39 +00:00
|
|
|
}
|
2020-10-15 21:34:07 +00:00
|
|
|
|
2021-07-12 21:23:08 +00:00
|
|
|
template<typename TUnaryPredicate>
|
2021-07-21 16:18:29 +00:00
|
|
|
[[nodiscard]] Iterator find(unsigned hash, TUnaryPredicate predicate)
|
2019-06-29 19:09:40 +00:00
|
|
|
{
|
2023-02-14 00:27:19 +00:00
|
|
|
return Iterator(lookup_with_hash(hash, move(predicate)), end_bucket());
|
2019-06-29 19:09:40 +00:00
|
|
|
}
|
|
|
|
|
2021-07-21 16:18:29 +00:00
|
|
|
[[nodiscard]] Iterator find(T const& value)
|
2019-06-29 19:09:40 +00:00
|
|
|
{
|
2024-03-16 06:17:09 +00:00
|
|
|
if (is_empty())
|
|
|
|
return end();
|
AK+Kernel: Unify Traits<T>::equals()'s argument order on different types
There was a small mishmash of argument order, as seen on the table:
| Traits<T>::equals(U, T) | Traits<T>::equals(T, U)
============= | ======================= | =======================
uses equals() | HashMap | Vector, HashTable
defines equals() | *String[^1] | ByteBuffer
[^1]: String, DeprecatedString, their Fly-type equivalents and KString.
This mostly meant that you couldn't use a StringView for finding a value
in Vector<String>.
I'm changing the order of arguments to make the trait type itself first
(`Traits<T>::equals(T, U)`), as I think it's more expected and makes us
more consistent with the rest of the functions that put the stored type
first (like StringUtils functions and binary_serach). I've also renamed
the variable name "other" in find functions to "entry" to give more
importance to the value.
With this change, each of the following lines will now compile
successfully:
Vector<String>().contains_slow("WHF!"sv);
HashTable<String>().contains("WHF!"sv);
HashMap<ByteBuffer, int>().contains("WHF!"sv.bytes());
2023-08-21 14:38:11 +00:00
|
|
|
return find(TraitsForT::hash(value), [&](auto& entry) { return TraitsForT::equals(entry, value); });
|
2019-06-29 19:09:40 +00:00
|
|
|
}
|
|
|
|
|
2021-07-12 21:23:08 +00:00
|
|
|
template<typename TUnaryPredicate>
|
2021-07-21 16:18:29 +00:00
|
|
|
[[nodiscard]] ConstIterator find(unsigned hash, TUnaryPredicate predicate) const
|
2020-10-15 21:34:07 +00:00
|
|
|
{
|
2023-02-14 00:27:19 +00:00
|
|
|
return ConstIterator(lookup_with_hash(hash, move(predicate)), end_bucket());
|
2020-10-15 21:34:07 +00:00
|
|
|
}
|
|
|
|
|
2021-07-21 16:18:29 +00:00
|
|
|
[[nodiscard]] ConstIterator find(T const& value) const
|
2019-06-29 19:09:40 +00:00
|
|
|
{
|
2024-03-16 06:17:09 +00:00
|
|
|
if (is_empty())
|
|
|
|
return end();
|
AK+Kernel: Unify Traits<T>::equals()'s argument order on different types
There was a small mishmash of argument order, as seen on the table:
| Traits<T>::equals(U, T) | Traits<T>::equals(T, U)
============= | ======================= | =======================
uses equals() | HashMap | Vector, HashTable
defines equals() | *String[^1] | ByteBuffer
[^1]: String, DeprecatedString, their Fly-type equivalents and KString.
This mostly meant that you couldn't use a StringView for finding a value
in Vector<String>.
I'm changing the order of arguments to make the trait type itself first
(`Traits<T>::equals(T, U)`), as I think it's more expected and makes us
more consistent with the rest of the functions that put the stored type
first (like StringUtils functions and binary_serach). I've also renamed
the variable name "other" in find functions to "entry" to give more
importance to the value.
With this change, each of the following lines will now compile
successfully:
Vector<String>().contains_slow("WHF!"sv);
HashTable<String>().contains("WHF!"sv);
HashMap<ByteBuffer, int>().contains("WHF!"sv.bytes());
2023-08-21 14:38:11 +00:00
|
|
|
return find(TraitsForT::hash(value), [&](auto& entry) { return TraitsForT::equals(entry, value); });
|
2019-06-29 19:09:40 +00:00
|
|
|
}
|
2021-11-07 13:52:20 +00:00
|
|
|
// FIXME: Support for predicates, while guaranteeing that the predicate call
|
|
|
|
// does not call a non trivial constructor each time invoked
|
|
|
|
template<Concepts::HashCompatible<T> K>
|
|
|
|
requires(IsSame<TraitsForT, Traits<T>>) [[nodiscard]] Iterator find(K const& value)
|
|
|
|
{
|
2024-03-16 06:17:09 +00:00
|
|
|
if (is_empty())
|
|
|
|
return end();
|
AK+Kernel: Unify Traits<T>::equals()'s argument order on different types
There was a small mishmash of argument order, as seen on the table:
| Traits<T>::equals(U, T) | Traits<T>::equals(T, U)
============= | ======================= | =======================
uses equals() | HashMap | Vector, HashTable
defines equals() | *String[^1] | ByteBuffer
[^1]: String, DeprecatedString, their Fly-type equivalents and KString.
This mostly meant that you couldn't use a StringView for finding a value
in Vector<String>.
I'm changing the order of arguments to make the trait type itself first
(`Traits<T>::equals(T, U)`), as I think it's more expected and makes us
more consistent with the rest of the functions that put the stored type
first (like StringUtils functions and binary_serach). I've also renamed
the variable name "other" in find functions to "entry" to give more
importance to the value.
With this change, each of the following lines will now compile
successfully:
Vector<String>().contains_slow("WHF!"sv);
HashTable<String>().contains("WHF!"sv);
HashMap<ByteBuffer, int>().contains("WHF!"sv.bytes());
2023-08-21 14:38:11 +00:00
|
|
|
return find(Traits<K>::hash(value), [&](auto& entry) { return Traits<T>::equals(entry, value); });
|
2021-11-07 13:52:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
template<Concepts::HashCompatible<T> K, typename TUnaryPredicate>
|
|
|
|
requires(IsSame<TraitsForT, Traits<T>>) [[nodiscard]] Iterator find(K const& value, TUnaryPredicate predicate)
|
|
|
|
{
|
2024-03-16 06:17:09 +00:00
|
|
|
if (is_empty())
|
|
|
|
return end();
|
2021-11-07 13:52:20 +00:00
|
|
|
return find(Traits<K>::hash(value), move(predicate));
|
|
|
|
}
|
|
|
|
|
|
|
|
template<Concepts::HashCompatible<T> K>
|
|
|
|
requires(IsSame<TraitsForT, Traits<T>>) [[nodiscard]] ConstIterator find(K const& value) const
|
|
|
|
{
|
2024-03-16 06:17:09 +00:00
|
|
|
if (is_empty())
|
|
|
|
return end();
|
AK+Kernel: Unify Traits<T>::equals()'s argument order on different types
There was a small mishmash of argument order, as seen on the table:
| Traits<T>::equals(U, T) | Traits<T>::equals(T, U)
============= | ======================= | =======================
uses equals() | HashMap | Vector, HashTable
defines equals() | *String[^1] | ByteBuffer
[^1]: String, DeprecatedString, their Fly-type equivalents and KString.
This mostly meant that you couldn't use a StringView for finding a value
in Vector<String>.
I'm changing the order of arguments to make the trait type itself first
(`Traits<T>::equals(T, U)`), as I think it's more expected and makes us
more consistent with the rest of the functions that put the stored type
first (like StringUtils functions and binary_serach). I've also renamed
the variable name "other" in find functions to "entry" to give more
importance to the value.
With this change, each of the following lines will now compile
successfully:
Vector<String>().contains_slow("WHF!"sv);
HashTable<String>().contains("WHF!"sv);
HashMap<ByteBuffer, int>().contains("WHF!"sv.bytes());
2023-08-21 14:38:11 +00:00
|
|
|
return find(Traits<K>::hash(value), [&](auto& entry) { return Traits<T>::equals(entry, value); });
|
2021-11-07 13:52:20 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
template<Concepts::HashCompatible<T> K, typename TUnaryPredicate>
|
|
|
|
requires(IsSame<TraitsForT, Traits<T>>) [[nodiscard]] ConstIterator find(K const& value, TUnaryPredicate predicate) const
|
|
|
|
{
|
2024-03-16 06:17:09 +00:00
|
|
|
if (is_empty())
|
|
|
|
return end();
|
2021-11-07 13:52:20 +00:00
|
|
|
return find(Traits<K>::hash(value), move(predicate));
|
|
|
|
}
|
2018-10-10 09:53:07 +00:00
|
|
|
|
2022-10-16 22:06:11 +00:00
|
|
|
bool remove(T const& value)
|
2018-10-13 12:22:09 +00:00
|
|
|
{
|
|
|
|
auto it = find(value);
|
2020-07-06 21:44:33 +00:00
|
|
|
if (it != end()) {
|
2018-10-13 12:22:09 +00:00
|
|
|
remove(it);
|
2020-07-06 21:44:33 +00:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
2018-10-13 12:22:09 +00:00
|
|
|
}
|
|
|
|
|
2021-12-15 14:18:30 +00:00
|
|
|
template<Concepts::HashCompatible<T> K>
|
|
|
|
requires(IsSame<TraitsForT, Traits<T>>) bool remove(K const& value)
|
|
|
|
{
|
|
|
|
auto it = find(value);
|
|
|
|
if (it != end()) {
|
|
|
|
remove(it);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
// This invalidates the iterator
|
|
|
|
void remove(Iterator& iterator)
|
2020-10-15 21:34:07 +00:00
|
|
|
{
|
2023-02-14 00:27:19 +00:00
|
|
|
auto* bucket = iterator.m_bucket;
|
|
|
|
VERIFY(bucket);
|
|
|
|
delete_bucket(*bucket);
|
|
|
|
iterator.m_bucket = nullptr;
|
2022-01-05 15:45:42 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
template<typename TUnaryPredicate>
|
2022-04-12 17:21:05 +00:00
|
|
|
bool remove_all_matching(TUnaryPredicate const& predicate)
|
2022-01-05 15:45:42 +00:00
|
|
|
{
|
2023-02-14 00:27:19 +00:00
|
|
|
bool has_removed_anything = false;
|
2022-03-06 18:11:17 +00:00
|
|
|
for (size_t i = 0; i < m_capacity; ++i) {
|
|
|
|
auto& bucket = m_buckets[i];
|
2023-02-14 00:27:19 +00:00
|
|
|
if (bucket.state == BucketState::Free || !predicate(*bucket.slot()))
|
|
|
|
continue;
|
|
|
|
|
|
|
|
delete_bucket(bucket);
|
|
|
|
has_removed_anything = true;
|
|
|
|
|
|
|
|
// If a bucket was shifted up, reevaluate this bucket index
|
|
|
|
if (bucket.state != BucketState::Free)
|
|
|
|
--i;
|
2022-03-06 18:11:17 +00:00
|
|
|
}
|
2023-02-14 00:27:19 +00:00
|
|
|
return has_removed_anything;
|
2020-10-15 21:34:07 +00:00
|
|
|
}
|
2018-10-13 12:22:09 +00:00
|
|
|
|
2023-02-21 10:30:52 +00:00
|
|
|
T take_last()
|
2023-02-14 00:27:19 +00:00
|
|
|
requires(IsOrdered)
|
2022-12-09 08:29:36 +00:00
|
|
|
{
|
|
|
|
VERIFY(!is_empty());
|
2023-02-21 10:30:52 +00:00
|
|
|
T element = move(*m_collection_data.tail->slot());
|
|
|
|
delete_bucket(*m_collection_data.tail);
|
|
|
|
return element;
|
|
|
|
}
|
|
|
|
|
|
|
|
T take_first()
|
|
|
|
requires(IsOrdered)
|
|
|
|
{
|
|
|
|
VERIFY(!is_empty());
|
|
|
|
T element = move(*m_collection_data.head->slot());
|
|
|
|
delete_bucket(*m_collection_data.head);
|
2022-12-09 08:29:36 +00:00
|
|
|
return element;
|
|
|
|
}
|
|
|
|
|
2023-04-28 12:02:38 +00:00
|
|
|
[[nodiscard]] Vector<T> values() const
|
|
|
|
{
|
|
|
|
Vector<T> list;
|
|
|
|
list.ensure_capacity(size());
|
|
|
|
for (auto& value : *this)
|
|
|
|
list.unchecked_append(value);
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
|
2018-10-10 09:53:07 +00:00
|
|
|
private:
|
2023-02-14 00:27:19 +00:00
|
|
|
bool should_grow() const { return ((m_size + 1) * 100) >= (m_capacity * grow_at_load_factor_percent); }
|
|
|
|
static constexpr size_t size_in_bytes(size_t capacity) { return sizeof(BucketType) * capacity; }
|
2021-06-13 14:26:08 +00:00
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
BucketType* end_bucket()
|
|
|
|
{
|
|
|
|
if constexpr (IsOrdered)
|
|
|
|
return m_collection_data.tail;
|
|
|
|
else
|
|
|
|
return &m_buckets[m_capacity];
|
2019-06-29 19:09:40 +00:00
|
|
|
}
|
2023-02-14 00:27:19 +00:00
|
|
|
BucketType const* end_bucket() const
|
2021-07-11 11:22:58 +00:00
|
|
|
{
|
2023-02-14 00:27:19 +00:00
|
|
|
return const_cast<HashTable*>(this)->end_bucket();
|
2021-07-11 11:22:58 +00:00
|
|
|
}
|
|
|
|
|
2021-11-10 22:00:21 +00:00
|
|
|
ErrorOr<void> try_rehash(size_t new_capacity)
|
2019-06-29 19:09:40 +00:00
|
|
|
{
|
2023-02-14 00:27:19 +00:00
|
|
|
new_capacity = max(new_capacity, m_capacity + grow_capacity_at_least);
|
|
|
|
new_capacity = kmalloc_good_size(size_in_bytes(new_capacity)) / sizeof(BucketType);
|
|
|
|
VERIFY(new_capacity >= size());
|
2019-06-29 19:09:40 +00:00
|
|
|
|
2020-10-15 21:34:07 +00:00
|
|
|
auto* old_buckets = m_buckets;
|
2023-02-14 00:27:19 +00:00
|
|
|
auto old_buckets_size = size_in_bytes(m_capacity);
|
2021-06-13 14:26:08 +00:00
|
|
|
Iterator old_iter = begin();
|
|
|
|
|
2022-03-14 22:59:16 +00:00
|
|
|
auto* new_buckets = kcalloc(1, size_in_bytes(new_capacity));
|
2021-08-14 00:07:39 +00:00
|
|
|
if (!new_buckets)
|
2021-11-10 22:00:21 +00:00
|
|
|
return Error::from_errno(ENOMEM);
|
2021-06-13 14:26:08 +00:00
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
m_buckets = static_cast<BucketType*>(new_buckets);
|
2020-10-15 21:34:07 +00:00
|
|
|
m_capacity = new_capacity;
|
2019-06-27 13:57:49 +00:00
|
|
|
|
2021-08-14 00:07:39 +00:00
|
|
|
if constexpr (IsOrdered)
|
|
|
|
m_collection_data = { nullptr, nullptr };
|
2018-10-10 09:53:07 +00:00
|
|
|
|
2020-10-15 21:34:07 +00:00
|
|
|
if (!old_buckets)
|
2021-11-10 22:00:21 +00:00
|
|
|
return {};
|
2018-10-10 09:53:07 +00:00
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
m_size = 0;
|
2021-06-13 14:26:08 +00:00
|
|
|
for (auto it = move(old_iter); it != end(); ++it) {
|
2023-02-14 00:27:19 +00:00
|
|
|
write_value(move(*it), HashSetExistingEntryBehavior::Keep);
|
2021-06-13 14:26:08 +00:00
|
|
|
it->~T();
|
2019-03-25 03:23:17 +00:00
|
|
|
}
|
2018-10-10 09:53:07 +00:00
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
kfree_sized(old_buckets, old_buckets_size);
|
2021-11-10 22:00:21 +00:00
|
|
|
return {};
|
2021-08-14 00:07:39 +00:00
|
|
|
}
|
|
|
|
void rehash(size_t new_capacity)
|
|
|
|
{
|
2021-11-10 22:00:21 +00:00
|
|
|
MUST(try_rehash(new_capacity));
|
2018-11-07 00:38:51 +00:00
|
|
|
}
|
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
template<typename TUnaryPredicate>
|
|
|
|
[[nodiscard]] BucketType* lookup_with_hash(unsigned hash, TUnaryPredicate predicate) const
|
AK: Rehash HashTable in-place instead of shrinking
As seen on TV, HashTable can get "thrashed", i.e. it has a bunch of
deleted buckets that count towards the load factor. This means that hash
tables which are large enough for their contents need to be resized.
This was fixed in 9d8da16 with a workaround that shrinks the HashTable
back down in these cases, as after the resize and re-hash the load
factor is very low again. However, that's not a good solution. If you
insert and remove repeatedly around a size boundary, you might get
frequent resizes, which involve frequent re-allocations.
The new solution is an in-place rehashing algorithm that I came up with.
(Do complain to me, I'm at fault.) Basically, it iterates the buckets
and re-hashes the used buckets while marking the deleted slots empty.
The issue arises with collisions in the re-hash. For this reason, there
are two kinds of used buckets during the re-hashing: the normal "used"
buckets, which are old and are treated as free space, and the
"re-hashed" buckets, which are new and treated as used space, i.e. they
trigger probing. Therefore, the procedure for relocating a bucket's
contents is as follows:
- Locate the "real" bucket of the contents with the hash. That bucket is
the starting point for the target bucket, and the current (old) bucket
is the bucket we want to move.
- While we still need to move the bucket:
- If we're the target, something strange happened last iteration or we
just re-hashed to the same location. We're done.
- If the target is empty or deleted, just move the bucket. We're done.
- If the target is a re-hashed full bucket, we probe by double-hashing
our hash as usual. Henceforth, we move our target for the next
iteration.
- If the target is an old full bucket, we swap the target and to-move
buckets. Therefore, the bucket to move is a the correct location and the
former target, which still needs to find a new place, is now in the
bucket to move. So we can just continue with the loop; the target is
re-obtained from the bucket to move. This happens for each and every
bucket, though some buckets are "coincidentally" moved before their
point of iteration is reached. Either way, this guarantees full in-place
movement (even without stack storage) and therefore space complexity of
O(1). Time complexity is amortized O(2n) asssuming a good hashing
function.
This leads to a performance improvement of ~30% on the benchmark
introduced with the last commit.
Co-authored-by: Hendiadyoin1 <leon.a@serenityos.org>
2022-03-07 22:56:54 +00:00
|
|
|
{
|
2023-02-14 00:27:19 +00:00
|
|
|
if (is_empty())
|
|
|
|
return nullptr;
|
AK: Rehash HashTable in-place instead of shrinking
As seen on TV, HashTable can get "thrashed", i.e. it has a bunch of
deleted buckets that count towards the load factor. This means that hash
tables which are large enough for their contents need to be resized.
This was fixed in 9d8da16 with a workaround that shrinks the HashTable
back down in these cases, as after the resize and re-hash the load
factor is very low again. However, that's not a good solution. If you
insert and remove repeatedly around a size boundary, you might get
frequent resizes, which involve frequent re-allocations.
The new solution is an in-place rehashing algorithm that I came up with.
(Do complain to me, I'm at fault.) Basically, it iterates the buckets
and re-hashes the used buckets while marking the deleted slots empty.
The issue arises with collisions in the re-hash. For this reason, there
are two kinds of used buckets during the re-hashing: the normal "used"
buckets, which are old and are treated as free space, and the
"re-hashed" buckets, which are new and treated as used space, i.e. they
trigger probing. Therefore, the procedure for relocating a bucket's
contents is as follows:
- Locate the "real" bucket of the contents with the hash. That bucket is
the starting point for the target bucket, and the current (old) bucket
is the bucket we want to move.
- While we still need to move the bucket:
- If we're the target, something strange happened last iteration or we
just re-hashed to the same location. We're done.
- If the target is empty or deleted, just move the bucket. We're done.
- If the target is a re-hashed full bucket, we probe by double-hashing
our hash as usual. Henceforth, we move our target for the next
iteration.
- If the target is an old full bucket, we swap the target and to-move
buckets. Therefore, the bucket to move is a the correct location and the
former target, which still needs to find a new place, is now in the
bucket to move. So we can just continue with the loop; the target is
re-obtained from the bucket to move. This happens for each and every
bucket, though some buckets are "coincidentally" moved before their
point of iteration is reached. Either way, this guarantees full in-place
movement (even without stack storage) and therefore space complexity of
O(1). Time complexity is amortized O(2n) asssuming a good hashing
function.
This leads to a performance improvement of ~30% on the benchmark
introduced with the last commit.
Co-authored-by: Hendiadyoin1 <leon.a@serenityos.org>
2022-03-07 22:56:54 +00:00
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
hash %= m_capacity;
|
|
|
|
for (;;) {
|
|
|
|
auto* bucket = &m_buckets[hash];
|
|
|
|
if (bucket->state == BucketState::Free)
|
|
|
|
return nullptr;
|
|
|
|
if (predicate(*bucket->slot()))
|
|
|
|
return bucket;
|
|
|
|
if (++hash == m_capacity) [[unlikely]]
|
|
|
|
hash = 0;
|
|
|
|
}
|
|
|
|
}
|
AK: Rehash HashTable in-place instead of shrinking
As seen on TV, HashTable can get "thrashed", i.e. it has a bunch of
deleted buckets that count towards the load factor. This means that hash
tables which are large enough for their contents need to be resized.
This was fixed in 9d8da16 with a workaround that shrinks the HashTable
back down in these cases, as after the resize and re-hash the load
factor is very low again. However, that's not a good solution. If you
insert and remove repeatedly around a size boundary, you might get
frequent resizes, which involve frequent re-allocations.
The new solution is an in-place rehashing algorithm that I came up with.
(Do complain to me, I'm at fault.) Basically, it iterates the buckets
and re-hashes the used buckets while marking the deleted slots empty.
The issue arises with collisions in the re-hash. For this reason, there
are two kinds of used buckets during the re-hashing: the normal "used"
buckets, which are old and are treated as free space, and the
"re-hashed" buckets, which are new and treated as used space, i.e. they
trigger probing. Therefore, the procedure for relocating a bucket's
contents is as follows:
- Locate the "real" bucket of the contents with the hash. That bucket is
the starting point for the target bucket, and the current (old) bucket
is the bucket we want to move.
- While we still need to move the bucket:
- If we're the target, something strange happened last iteration or we
just re-hashed to the same location. We're done.
- If the target is empty or deleted, just move the bucket. We're done.
- If the target is a re-hashed full bucket, we probe by double-hashing
our hash as usual. Henceforth, we move our target for the next
iteration.
- If the target is an old full bucket, we swap the target and to-move
buckets. Therefore, the bucket to move is a the correct location and the
former target, which still needs to find a new place, is now in the
bucket to move. So we can just continue with the loop; the target is
re-obtained from the bucket to move. This happens for each and every
bucket, though some buckets are "coincidentally" moved before their
point of iteration is reached. Either way, this guarantees full in-place
movement (even without stack storage) and therefore space complexity of
O(1). Time complexity is amortized O(2n) asssuming a good hashing
function.
This leads to a performance improvement of ~30% on the benchmark
introduced with the last commit.
Co-authored-by: Hendiadyoin1 <leon.a@serenityos.org>
2022-03-07 22:56:54 +00:00
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
size_t used_bucket_probe_length(BucketType const& bucket) const
|
|
|
|
{
|
|
|
|
VERIFY(bucket.state != BucketState::Free);
|
AK: Rehash HashTable in-place instead of shrinking
As seen on TV, HashTable can get "thrashed", i.e. it has a bunch of
deleted buckets that count towards the load factor. This means that hash
tables which are large enough for their contents need to be resized.
This was fixed in 9d8da16 with a workaround that shrinks the HashTable
back down in these cases, as after the resize and re-hash the load
factor is very low again. However, that's not a good solution. If you
insert and remove repeatedly around a size boundary, you might get
frequent resizes, which involve frequent re-allocations.
The new solution is an in-place rehashing algorithm that I came up with.
(Do complain to me, I'm at fault.) Basically, it iterates the buckets
and re-hashes the used buckets while marking the deleted slots empty.
The issue arises with collisions in the re-hash. For this reason, there
are two kinds of used buckets during the re-hashing: the normal "used"
buckets, which are old and are treated as free space, and the
"re-hashed" buckets, which are new and treated as used space, i.e. they
trigger probing. Therefore, the procedure for relocating a bucket's
contents is as follows:
- Locate the "real" bucket of the contents with the hash. That bucket is
the starting point for the target bucket, and the current (old) bucket
is the bucket we want to move.
- While we still need to move the bucket:
- If we're the target, something strange happened last iteration or we
just re-hashed to the same location. We're done.
- If the target is empty or deleted, just move the bucket. We're done.
- If the target is a re-hashed full bucket, we probe by double-hashing
our hash as usual. Henceforth, we move our target for the next
iteration.
- If the target is an old full bucket, we swap the target and to-move
buckets. Therefore, the bucket to move is a the correct location and the
former target, which still needs to find a new place, is now in the
bucket to move. So we can just continue with the loop; the target is
re-obtained from the bucket to move. This happens for each and every
bucket, though some buckets are "coincidentally" moved before their
point of iteration is reached. Either way, this guarantees full in-place
movement (even without stack storage) and therefore space complexity of
O(1). Time complexity is amortized O(2n) asssuming a good hashing
function.
This leads to a performance improvement of ~30% on the benchmark
introduced with the last commit.
Co-authored-by: Hendiadyoin1 <leon.a@serenityos.org>
2022-03-07 22:56:54 +00:00
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
if (bucket.state == BucketState::CalculateLength) {
|
|
|
|
size_t ideal_bucket_index = TraitsForT::hash(*bucket.slot()) % m_capacity;
|
AK: Rehash HashTable in-place instead of shrinking
As seen on TV, HashTable can get "thrashed", i.e. it has a bunch of
deleted buckets that count towards the load factor. This means that hash
tables which are large enough for their contents need to be resized.
This was fixed in 9d8da16 with a workaround that shrinks the HashTable
back down in these cases, as after the resize and re-hash the load
factor is very low again. However, that's not a good solution. If you
insert and remove repeatedly around a size boundary, you might get
frequent resizes, which involve frequent re-allocations.
The new solution is an in-place rehashing algorithm that I came up with.
(Do complain to me, I'm at fault.) Basically, it iterates the buckets
and re-hashes the used buckets while marking the deleted slots empty.
The issue arises with collisions in the re-hash. For this reason, there
are two kinds of used buckets during the re-hashing: the normal "used"
buckets, which are old and are treated as free space, and the
"re-hashed" buckets, which are new and treated as used space, i.e. they
trigger probing. Therefore, the procedure for relocating a bucket's
contents is as follows:
- Locate the "real" bucket of the contents with the hash. That bucket is
the starting point for the target bucket, and the current (old) bucket
is the bucket we want to move.
- While we still need to move the bucket:
- If we're the target, something strange happened last iteration or we
just re-hashed to the same location. We're done.
- If the target is empty or deleted, just move the bucket. We're done.
- If the target is a re-hashed full bucket, we probe by double-hashing
our hash as usual. Henceforth, we move our target for the next
iteration.
- If the target is an old full bucket, we swap the target and to-move
buckets. Therefore, the bucket to move is a the correct location and the
former target, which still needs to find a new place, is now in the
bucket to move. So we can just continue with the loop; the target is
re-obtained from the bucket to move. This happens for each and every
bucket, though some buckets are "coincidentally" moved before their
point of iteration is reached. Either way, this guarantees full in-place
movement (even without stack storage) and therefore space complexity of
O(1). Time complexity is amortized O(2n) asssuming a good hashing
function.
This leads to a performance improvement of ~30% on the benchmark
introduced with the last commit.
Co-authored-by: Hendiadyoin1 <leon.a@serenityos.org>
2022-03-07 22:56:54 +00:00
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
VERIFY(&bucket >= m_buckets);
|
|
|
|
size_t actual_bucket_index = &bucket - m_buckets;
|
AK: Rehash HashTable in-place instead of shrinking
As seen on TV, HashTable can get "thrashed", i.e. it has a bunch of
deleted buckets that count towards the load factor. This means that hash
tables which are large enough for their contents need to be resized.
This was fixed in 9d8da16 with a workaround that shrinks the HashTable
back down in these cases, as after the resize and re-hash the load
factor is very low again. However, that's not a good solution. If you
insert and remove repeatedly around a size boundary, you might get
frequent resizes, which involve frequent re-allocations.
The new solution is an in-place rehashing algorithm that I came up with.
(Do complain to me, I'm at fault.) Basically, it iterates the buckets
and re-hashes the used buckets while marking the deleted slots empty.
The issue arises with collisions in the re-hash. For this reason, there
are two kinds of used buckets during the re-hashing: the normal "used"
buckets, which are old and are treated as free space, and the
"re-hashed" buckets, which are new and treated as used space, i.e. they
trigger probing. Therefore, the procedure for relocating a bucket's
contents is as follows:
- Locate the "real" bucket of the contents with the hash. That bucket is
the starting point for the target bucket, and the current (old) bucket
is the bucket we want to move.
- While we still need to move the bucket:
- If we're the target, something strange happened last iteration or we
just re-hashed to the same location. We're done.
- If the target is empty or deleted, just move the bucket. We're done.
- If the target is a re-hashed full bucket, we probe by double-hashing
our hash as usual. Henceforth, we move our target for the next
iteration.
- If the target is an old full bucket, we swap the target and to-move
buckets. Therefore, the bucket to move is a the correct location and the
former target, which still needs to find a new place, is now in the
bucket to move. So we can just continue with the loop; the target is
re-obtained from the bucket to move. This happens for each and every
bucket, though some buckets are "coincidentally" moved before their
point of iteration is reached. Either way, this guarantees full in-place
movement (even without stack storage) and therefore space complexity of
O(1). Time complexity is amortized O(2n) asssuming a good hashing
function.
This leads to a performance improvement of ~30% on the benchmark
introduced with the last commit.
Co-authored-by: Hendiadyoin1 <leon.a@serenityos.org>
2022-03-07 22:56:54 +00:00
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
if (actual_bucket_index < ideal_bucket_index)
|
|
|
|
return m_capacity + actual_bucket_index - ideal_bucket_index;
|
|
|
|
return actual_bucket_index - ideal_bucket_index;
|
AK: Rehash HashTable in-place instead of shrinking
As seen on TV, HashTable can get "thrashed", i.e. it has a bunch of
deleted buckets that count towards the load factor. This means that hash
tables which are large enough for their contents need to be resized.
This was fixed in 9d8da16 with a workaround that shrinks the HashTable
back down in these cases, as after the resize and re-hash the load
factor is very low again. However, that's not a good solution. If you
insert and remove repeatedly around a size boundary, you might get
frequent resizes, which involve frequent re-allocations.
The new solution is an in-place rehashing algorithm that I came up with.
(Do complain to me, I'm at fault.) Basically, it iterates the buckets
and re-hashes the used buckets while marking the deleted slots empty.
The issue arises with collisions in the re-hash. For this reason, there
are two kinds of used buckets during the re-hashing: the normal "used"
buckets, which are old and are treated as free space, and the
"re-hashed" buckets, which are new and treated as used space, i.e. they
trigger probing. Therefore, the procedure for relocating a bucket's
contents is as follows:
- Locate the "real" bucket of the contents with the hash. That bucket is
the starting point for the target bucket, and the current (old) bucket
is the bucket we want to move.
- While we still need to move the bucket:
- If we're the target, something strange happened last iteration or we
just re-hashed to the same location. We're done.
- If the target is empty or deleted, just move the bucket. We're done.
- If the target is a re-hashed full bucket, we probe by double-hashing
our hash as usual. Henceforth, we move our target for the next
iteration.
- If the target is an old full bucket, we swap the target and to-move
buckets. Therefore, the bucket to move is a the correct location and the
former target, which still needs to find a new place, is now in the
bucket to move. So we can just continue with the loop; the target is
re-obtained from the bucket to move. This happens for each and every
bucket, though some buckets are "coincidentally" moved before their
point of iteration is reached. Either way, this guarantees full in-place
movement (even without stack storage) and therefore space complexity of
O(1). Time complexity is amortized O(2n) asssuming a good hashing
function.
This leads to a performance improvement of ~30% on the benchmark
introduced with the last commit.
Co-authored-by: Hendiadyoin1 <leon.a@serenityos.org>
2022-03-07 22:56:54 +00:00
|
|
|
}
|
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
return static_cast<u8>(bucket.state) - 1;
|
AK: Rehash HashTable in-place instead of shrinking
As seen on TV, HashTable can get "thrashed", i.e. it has a bunch of
deleted buckets that count towards the load factor. This means that hash
tables which are large enough for their contents need to be resized.
This was fixed in 9d8da16 with a workaround that shrinks the HashTable
back down in these cases, as after the resize and re-hash the load
factor is very low again. However, that's not a good solution. If you
insert and remove repeatedly around a size boundary, you might get
frequent resizes, which involve frequent re-allocations.
The new solution is an in-place rehashing algorithm that I came up with.
(Do complain to me, I'm at fault.) Basically, it iterates the buckets
and re-hashes the used buckets while marking the deleted slots empty.
The issue arises with collisions in the re-hash. For this reason, there
are two kinds of used buckets during the re-hashing: the normal "used"
buckets, which are old and are treated as free space, and the
"re-hashed" buckets, which are new and treated as used space, i.e. they
trigger probing. Therefore, the procedure for relocating a bucket's
contents is as follows:
- Locate the "real" bucket of the contents with the hash. That bucket is
the starting point for the target bucket, and the current (old) bucket
is the bucket we want to move.
- While we still need to move the bucket:
- If we're the target, something strange happened last iteration or we
just re-hashed to the same location. We're done.
- If the target is empty or deleted, just move the bucket. We're done.
- If the target is a re-hashed full bucket, we probe by double-hashing
our hash as usual. Henceforth, we move our target for the next
iteration.
- If the target is an old full bucket, we swap the target and to-move
buckets. Therefore, the bucket to move is a the correct location and the
former target, which still needs to find a new place, is now in the
bucket to move. So we can just continue with the loop; the target is
re-obtained from the bucket to move. This happens for each and every
bucket, though some buckets are "coincidentally" moved before their
point of iteration is reached. Either way, this guarantees full in-place
movement (even without stack storage) and therefore space complexity of
O(1). Time complexity is amortized O(2n) asssuming a good hashing
function.
This leads to a performance improvement of ~30% on the benchmark
introduced with the last commit.
Co-authored-by: Hendiadyoin1 <leon.a@serenityos.org>
2022-03-07 22:56:54 +00:00
|
|
|
}
|
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
ALWAYS_INLINE constexpr BucketState bucket_state_for_probe_length(size_t probe_length)
|
AK: Rehash HashTable in-place instead of shrinking
As seen on TV, HashTable can get "thrashed", i.e. it has a bunch of
deleted buckets that count towards the load factor. This means that hash
tables which are large enough for their contents need to be resized.
This was fixed in 9d8da16 with a workaround that shrinks the HashTable
back down in these cases, as after the resize and re-hash the load
factor is very low again. However, that's not a good solution. If you
insert and remove repeatedly around a size boundary, you might get
frequent resizes, which involve frequent re-allocations.
The new solution is an in-place rehashing algorithm that I came up with.
(Do complain to me, I'm at fault.) Basically, it iterates the buckets
and re-hashes the used buckets while marking the deleted slots empty.
The issue arises with collisions in the re-hash. For this reason, there
are two kinds of used buckets during the re-hashing: the normal "used"
buckets, which are old and are treated as free space, and the
"re-hashed" buckets, which are new and treated as used space, i.e. they
trigger probing. Therefore, the procedure for relocating a bucket's
contents is as follows:
- Locate the "real" bucket of the contents with the hash. That bucket is
the starting point for the target bucket, and the current (old) bucket
is the bucket we want to move.
- While we still need to move the bucket:
- If we're the target, something strange happened last iteration or we
just re-hashed to the same location. We're done.
- If the target is empty or deleted, just move the bucket. We're done.
- If the target is a re-hashed full bucket, we probe by double-hashing
our hash as usual. Henceforth, we move our target for the next
iteration.
- If the target is an old full bucket, we swap the target and to-move
buckets. Therefore, the bucket to move is a the correct location and the
former target, which still needs to find a new place, is now in the
bucket to move. So we can just continue with the loop; the target is
re-obtained from the bucket to move. This happens for each and every
bucket, though some buckets are "coincidentally" moved before their
point of iteration is reached. Either way, this guarantees full in-place
movement (even without stack storage) and therefore space complexity of
O(1). Time complexity is amortized O(2n) asssuming a good hashing
function.
This leads to a performance improvement of ~30% on the benchmark
introduced with the last commit.
Co-authored-by: Hendiadyoin1 <leon.a@serenityos.org>
2022-03-07 22:56:54 +00:00
|
|
|
{
|
2023-02-14 00:27:19 +00:00
|
|
|
if (probe_length > 253)
|
|
|
|
return BucketState::CalculateLength;
|
|
|
|
return static_cast<BucketState>(probe_length + 1);
|
AK: Rehash HashTable in-place instead of shrinking
As seen on TV, HashTable can get "thrashed", i.e. it has a bunch of
deleted buckets that count towards the load factor. This means that hash
tables which are large enough for their contents need to be resized.
This was fixed in 9d8da16 with a workaround that shrinks the HashTable
back down in these cases, as after the resize and re-hash the load
factor is very low again. However, that's not a good solution. If you
insert and remove repeatedly around a size boundary, you might get
frequent resizes, which involve frequent re-allocations.
The new solution is an in-place rehashing algorithm that I came up with.
(Do complain to me, I'm at fault.) Basically, it iterates the buckets
and re-hashes the used buckets while marking the deleted slots empty.
The issue arises with collisions in the re-hash. For this reason, there
are two kinds of used buckets during the re-hashing: the normal "used"
buckets, which are old and are treated as free space, and the
"re-hashed" buckets, which are new and treated as used space, i.e. they
trigger probing. Therefore, the procedure for relocating a bucket's
contents is as follows:
- Locate the "real" bucket of the contents with the hash. That bucket is
the starting point for the target bucket, and the current (old) bucket
is the bucket we want to move.
- While we still need to move the bucket:
- If we're the target, something strange happened last iteration or we
just re-hashed to the same location. We're done.
- If the target is empty or deleted, just move the bucket. We're done.
- If the target is a re-hashed full bucket, we probe by double-hashing
our hash as usual. Henceforth, we move our target for the next
iteration.
- If the target is an old full bucket, we swap the target and to-move
buckets. Therefore, the bucket to move is a the correct location and the
former target, which still needs to find a new place, is now in the
bucket to move. So we can just continue with the loop; the target is
re-obtained from the bucket to move. This happens for each and every
bucket, though some buckets are "coincidentally" moved before their
point of iteration is reached. Either way, this guarantees full in-place
movement (even without stack storage) and therefore space complexity of
O(1). Time complexity is amortized O(2n) asssuming a good hashing
function.
This leads to a performance improvement of ~30% on the benchmark
introduced with the last commit.
Co-authored-by: Hendiadyoin1 <leon.a@serenityos.org>
2022-03-07 22:56:54 +00:00
|
|
|
}
|
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
template<typename U = T>
|
|
|
|
HashSetResult write_value(U&& value, HashSetExistingEntryBehavior existing_entry_behavior)
|
2020-10-15 21:34:07 +00:00
|
|
|
{
|
2023-02-14 00:27:19 +00:00
|
|
|
auto update_collection_for_new_bucket = [&](BucketType& bucket) {
|
|
|
|
if constexpr (IsOrdered) {
|
|
|
|
if (!m_collection_data.head) [[unlikely]] {
|
|
|
|
m_collection_data.head = &bucket;
|
|
|
|
} else {
|
|
|
|
bucket.previous = m_collection_data.tail;
|
|
|
|
m_collection_data.tail->next = &bucket;
|
|
|
|
}
|
|
|
|
m_collection_data.tail = &bucket;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
auto update_collection_for_swapped_buckets = [&](BucketType* left_bucket, BucketType* right_bucket) {
|
|
|
|
if constexpr (IsOrdered) {
|
|
|
|
if (m_collection_data.head == left_bucket)
|
|
|
|
m_collection_data.head = right_bucket;
|
|
|
|
else if (m_collection_data.head == right_bucket)
|
|
|
|
m_collection_data.head = left_bucket;
|
|
|
|
if (m_collection_data.tail == left_bucket)
|
|
|
|
m_collection_data.tail = right_bucket;
|
|
|
|
else if (m_collection_data.tail == right_bucket)
|
|
|
|
m_collection_data.tail = left_bucket;
|
|
|
|
|
|
|
|
if (left_bucket->previous) {
|
|
|
|
if (left_bucket->previous == left_bucket)
|
|
|
|
left_bucket->previous = right_bucket;
|
|
|
|
left_bucket->previous->next = left_bucket;
|
|
|
|
}
|
|
|
|
if (left_bucket->next) {
|
|
|
|
if (left_bucket->next == left_bucket)
|
|
|
|
left_bucket->next = right_bucket;
|
|
|
|
left_bucket->next->previous = left_bucket;
|
|
|
|
}
|
2020-10-15 21:34:07 +00:00
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
if (right_bucket->previous && right_bucket->previous != left_bucket)
|
|
|
|
right_bucket->previous->next = right_bucket;
|
|
|
|
if (right_bucket->next && right_bucket->next != left_bucket)
|
|
|
|
right_bucket->next->previous = right_bucket;
|
|
|
|
}
|
|
|
|
};
|
2018-10-10 09:53:07 +00:00
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
auto bucket_index = TraitsForT::hash(value) % m_capacity;
|
|
|
|
size_t probe_length = 0;
|
2020-10-15 21:34:07 +00:00
|
|
|
for (;;) {
|
2023-02-14 00:27:19 +00:00
|
|
|
auto* bucket = &m_buckets[bucket_index];
|
|
|
|
|
|
|
|
// We found a free bucket, write to it and stop
|
|
|
|
if (bucket->state == BucketState::Free) {
|
|
|
|
new (bucket->slot()) T(forward<U>(value));
|
|
|
|
bucket->state = bucket_state_for_probe_length(probe_length);
|
|
|
|
update_collection_for_new_bucket(*bucket);
|
|
|
|
++m_size;
|
|
|
|
return HashSetResult::InsertedNewEntry;
|
|
|
|
}
|
2021-04-02 02:02:33 +00:00
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
// The bucket is already used, does it have an identical value?
|
|
|
|
if (TraitsForT::equals(*bucket->slot(), static_cast<T const&>(value))) {
|
|
|
|
if (existing_entry_behavior == HashSetExistingEntryBehavior::Replace) {
|
|
|
|
(*bucket->slot()) = forward<U>(value);
|
|
|
|
return HashSetResult::ReplacedExistingEntry;
|
|
|
|
}
|
|
|
|
return HashSetResult::KeptExistingEntry;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Robin hood: if our probe length is larger (poor) than this bucket's (rich), steal its position!
|
|
|
|
// This ensures that we will always traverse buckets in order of probe length.
|
|
|
|
auto target_probe_length = used_bucket_probe_length(*bucket);
|
|
|
|
if (probe_length > target_probe_length) {
|
|
|
|
// Copy out bucket
|
|
|
|
BucketType bucket_to_move = move(*bucket);
|
|
|
|
update_collection_for_swapped_buckets(bucket, &bucket_to_move);
|
|
|
|
|
|
|
|
// Write new bucket
|
|
|
|
new (bucket->slot()) T(forward<U>(value));
|
|
|
|
bucket->state = bucket_state_for_probe_length(probe_length);
|
|
|
|
probe_length = target_probe_length;
|
|
|
|
if constexpr (IsOrdered)
|
|
|
|
bucket->next = nullptr;
|
|
|
|
update_collection_for_new_bucket(*bucket);
|
|
|
|
++m_size;
|
|
|
|
|
|
|
|
// Find a free bucket, swapping with smaller probe length buckets along the way
|
|
|
|
for (;;) {
|
|
|
|
if (++bucket_index == m_capacity) [[unlikely]]
|
|
|
|
bucket_index = 0;
|
|
|
|
bucket = &m_buckets[bucket_index];
|
|
|
|
++probe_length;
|
|
|
|
|
|
|
|
if (bucket->state == BucketState::Free) {
|
|
|
|
*bucket = move(bucket_to_move);
|
|
|
|
bucket->state = bucket_state_for_probe_length(probe_length);
|
|
|
|
update_collection_for_swapped_buckets(&bucket_to_move, bucket);
|
|
|
|
break;
|
|
|
|
}
|
2021-04-02 02:02:33 +00:00
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
target_probe_length = used_bucket_probe_length(*bucket);
|
|
|
|
if (probe_length > target_probe_length) {
|
|
|
|
swap(bucket_to_move, *bucket);
|
|
|
|
bucket->state = bucket_state_for_probe_length(probe_length);
|
|
|
|
probe_length = target_probe_length;
|
|
|
|
update_collection_for_swapped_buckets(&bucket_to_move, bucket);
|
|
|
|
}
|
|
|
|
}
|
2021-04-02 02:02:33 +00:00
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
return HashSetResult::InsertedNewEntry;
|
2021-04-02 02:02:33 +00:00
|
|
|
}
|
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
// Try next bucket
|
|
|
|
if (++bucket_index == m_capacity) [[unlikely]]
|
|
|
|
bucket_index = 0;
|
|
|
|
++probe_length;
|
2020-10-15 21:34:07 +00:00
|
|
|
}
|
|
|
|
}
|
2018-10-10 09:53:07 +00:00
|
|
|
|
2022-03-06 18:11:17 +00:00
|
|
|
void delete_bucket(auto& bucket)
|
|
|
|
{
|
2023-02-14 00:27:19 +00:00
|
|
|
VERIFY(bucket.state != BucketState::Free);
|
2022-03-06 18:11:17 +00:00
|
|
|
|
2023-02-14 00:27:19 +00:00
|
|
|
// Delete the bucket
|
|
|
|
bucket.slot()->~T();
|
2022-03-06 18:11:17 +00:00
|
|
|
if constexpr (IsOrdered) {
|
|
|
|
if (bucket.previous)
|
|
|
|
bucket.previous->next = bucket.next;
|
|
|
|
else
|
|
|
|
m_collection_data.head = bucket.next;
|
|
|
|
if (bucket.next)
|
|
|
|
bucket.next->previous = bucket.previous;
|
|
|
|
else
|
|
|
|
m_collection_data.tail = bucket.previous;
|
2022-06-23 15:00:41 +00:00
|
|
|
bucket.previous = nullptr;
|
2022-06-22 18:06:28 +00:00
|
|
|
bucket.next = nullptr;
|
2022-03-06 18:11:17 +00:00
|
|
|
}
|
2023-02-14 00:27:19 +00:00
|
|
|
--m_size;
|
|
|
|
|
|
|
|
// If we deleted a bucket, we need to make sure to shift up all buckets after it to ensure
|
|
|
|
// that we can still probe for buckets with collisions, and we automatically optimize the
|
|
|
|
// probe lengths. To do so, we shift the following buckets up until we reach a free bucket,
|
|
|
|
// or a bucket with a probe length of 0 (the ideal index for that bucket).
|
2023-05-06 14:28:34 +00:00
|
|
|
auto update_bucket_neighbors = [&](BucketType* bucket) {
|
2023-02-14 00:27:19 +00:00
|
|
|
if constexpr (IsOrdered) {
|
|
|
|
if (bucket->previous)
|
|
|
|
bucket->previous->next = bucket;
|
2023-02-21 10:27:29 +00:00
|
|
|
else
|
|
|
|
m_collection_data.head = bucket;
|
2023-02-14 00:27:19 +00:00
|
|
|
if (bucket->next)
|
|
|
|
bucket->next->previous = bucket;
|
2023-02-21 10:27:29 +00:00
|
|
|
else
|
|
|
|
m_collection_data.tail = bucket;
|
2023-02-14 00:27:19 +00:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
VERIFY(&bucket >= m_buckets);
|
|
|
|
size_t shift_to_index = &bucket - m_buckets;
|
|
|
|
VERIFY(shift_to_index < m_capacity);
|
|
|
|
size_t shift_from_index = shift_to_index;
|
|
|
|
for (;;) {
|
|
|
|
if (++shift_from_index == m_capacity) [[unlikely]]
|
|
|
|
shift_from_index = 0;
|
|
|
|
|
|
|
|
auto* shift_from_bucket = &m_buckets[shift_from_index];
|
|
|
|
if (shift_from_bucket->state == BucketState::Free)
|
|
|
|
break;
|
|
|
|
|
|
|
|
auto shift_from_probe_length = used_bucket_probe_length(*shift_from_bucket);
|
|
|
|
if (shift_from_probe_length == 0)
|
|
|
|
break;
|
|
|
|
|
|
|
|
auto* shift_to_bucket = &m_buckets[shift_to_index];
|
|
|
|
*shift_to_bucket = move(*shift_from_bucket);
|
2023-03-15 17:37:55 +00:00
|
|
|
if constexpr (IsOrdered) {
|
|
|
|
shift_from_bucket->previous = nullptr;
|
|
|
|
shift_from_bucket->next = nullptr;
|
|
|
|
}
|
2023-02-14 00:27:19 +00:00
|
|
|
shift_to_bucket->state = bucket_state_for_probe_length(shift_from_probe_length - 1);
|
2023-05-06 14:28:34 +00:00
|
|
|
update_bucket_neighbors(shift_to_bucket);
|
2023-02-14 00:27:19 +00:00
|
|
|
|
|
|
|
if (++shift_to_index == m_capacity) [[unlikely]]
|
|
|
|
shift_to_index = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Mark last bucket as free
|
|
|
|
m_buckets[shift_to_index].state = BucketState::Free;
|
2022-03-06 18:11:17 +00:00
|
|
|
}
|
|
|
|
|
2021-06-13 14:26:08 +00:00
|
|
|
BucketType* m_buckets { nullptr };
|
|
|
|
|
|
|
|
[[no_unique_address]] CollectionDataType m_collection_data;
|
2020-10-15 21:34:07 +00:00
|
|
|
size_t m_size { 0 };
|
|
|
|
size_t m_capacity { 0 };
|
|
|
|
};
|
2018-10-10 09:53:07 +00:00
|
|
|
}
|
|
|
|
|
2022-11-26 11:18:30 +00:00
|
|
|
#if USING_AK_GLOBALLY
|
|
|
|
using AK::HashSetResult;
|
2018-10-10 09:53:07 +00:00
|
|
|
using AK::HashTable;
|
2021-06-13 14:26:08 +00:00
|
|
|
using AK::OrderedHashTable;
|
2022-11-26 11:18:30 +00:00
|
|
|
#endif
|