From 4a81c79909cb44f11e4bef0ef5753f912bea9cfa Mon Sep 17 00:00:00 2001 From: Hediadyoin1 Date: Sun, 13 Jun 2021 16:26:08 +0200 Subject: [PATCH] AK: Add Ordering support to HashTable and HashMap Adds a IsOrdered flag to Hashtable and HashMap, which allows iteration in insertion order --- AK/Forward.h | 10 +++- AK/HashMap.h | 4 +- AK/HashTable.h | 148 +++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 136 insertions(+), 26 deletions(-) diff --git a/AK/Forward.h b/AK/Forward.h index 1d8ee746398..46c2ca12a8e 100644 --- a/AK/Forward.h +++ b/AK/Forward.h @@ -72,12 +72,18 @@ class CircularQueue; template struct Traits; -template> +template, bool IsOrdered = false> class HashTable; -template> +template> +using OrderedHashTable = HashTable; + +template, bool IsOrdered = false> class HashMap; +template +using OrderedHashMap = HashMap; + template class Badge; diff --git a/AK/HashMap.h b/AK/HashMap.h index ef06025db10..26a461ed88b 100644 --- a/AK/HashMap.h +++ b/AK/HashMap.h @@ -19,7 +19,7 @@ namespace AK { -template +template class HashMap { private: struct Entry { @@ -68,7 +68,7 @@ public: } void remove_one_randomly() { m_table.remove(m_table.begin()); } - using HashTableType = HashTable; + using HashTableType = HashTable; using IteratorType = typename HashTableType::Iterator; using ConstIteratorType = typename HashTableType::ConstIterator; diff --git a/AK/HashTable.h b/AK/HashTable.h index 9c4c2f62274..b8322324397 100644 --- a/AK/HashTable.h +++ b/AK/HashTable.h @@ -6,6 +6,7 @@ #pragma once +#include #include #include #include @@ -57,7 +58,28 @@ private: BucketType* m_bucket { nullptr }; }; -template +template +class OrderedHashTableIterator { + friend OrderedHashTableType; + +public: + bool operator==(const OrderedHashTableIterator& other) const { return m_bucket == other.m_bucket; } + bool operator!=(const OrderedHashTableIterator& other) const { return m_bucket != other.m_bucket; } + T& operator*() { return *m_bucket->slot(); } + T* operator->() { return m_bucket->slot(); } + void operator++() { m_bucket = m_bucket->next; } + void operator--() { m_bucket = m_bucket->previous; } + +private: + explicit OrderedHashTableIterator(BucketType* bucket) + : m_bucket(bucket) + { + } + + BucketType* m_bucket { nullptr }; +}; + +template class HashTable { static constexpr size_t load_factor_in_percent = 60; @@ -71,6 +93,28 @@ class HashTable { const T* slot() const { return reinterpret_cast(storage); } }; + struct OrderedBucket { + OrderedBucket* previous; + OrderedBucket* next; + bool used; + bool deleted; + alignas(T) u8 storage[sizeof(T)]; + T* slot() { return reinterpret_cast(storage); } + const T* slot() const { return reinterpret_cast(storage); } + }; + + using BucketType = Conditional; + + struct CollectionData { + }; + + struct OrderedCollectionData { + BucketType* head { nullptr }; + BucketType* tail { nullptr }; + }; + + using CollectionDataType = Conditional; + public: HashTable() = default; explicit HashTable(size_t capacity) { rehash(capacity); } @@ -104,6 +148,7 @@ public: HashTable(HashTable&& other) noexcept : m_buckets(other.m_buckets) + , m_collection_data(other.m_collection_data) , m_size(other.m_size) , m_capacity(other.m_capacity) , m_deleted_count(other.m_deleted_count) @@ -112,6 +157,8 @@ public: other.m_capacity = 0; other.m_deleted_count = 0; other.m_buckets = nullptr; + if constexpr (IsOrdered) + other.m_collection_data = { nullptr, nullptr }; } HashTable& operator=(HashTable&& other) noexcept @@ -127,6 +174,9 @@ public: swap(a.m_size, b.m_size); swap(a.m_capacity, b.m_capacity); swap(a.m_deleted_count, b.m_deleted_count); + + if constexpr (IsOrdered) + swap(a.m_collection_data, b.m_collection_data); } [[nodiscard]] bool is_empty() const { return !m_size; } @@ -152,10 +202,15 @@ public: return find(value) != end(); } - using Iterator = HashTableIterator; + using Iterator = Conditional, + HashTableIterator>; Iterator begin() { + if constexpr (IsOrdered) + return Iterator(m_collection_data.head); + for (size_t i = 0; i < m_capacity; ++i) { if (m_buckets[i].used) return Iterator(&m_buckets[i]); @@ -168,10 +223,15 @@ public: return Iterator(nullptr); } - using ConstIterator = HashTableIterator; + using ConstIterator = Conditional, + HashTableIterator>; ConstIterator begin() const { + if constexpr (IsOrdered) + return ConstIterator(m_collection_data.head); + for (size_t i = 0; i < m_capacity; ++i) { if (m_buckets[i].used) return ConstIterator(&m_buckets[i]); @@ -206,6 +266,17 @@ public: bucket.deleted = false; --m_deleted_count; } + + if constexpr (IsOrdered) { + if (!m_collection_data.head) [[unlikely]] { + m_collection_data.head = &bucket; + } else { + bucket.previous = m_collection_data.tail; + m_collection_data.tail->next = &bucket; + } + m_collection_data.tail = &bucket; + } + ++m_size; return HashSetResult::InsertedNewEntry; } @@ -247,13 +318,28 @@ public: VERIFY(iterator.m_bucket); auto& bucket = *iterator.m_bucket; VERIFY(bucket.used); - VERIFY(!bucket.end); VERIFY(!bucket.deleted); + + if constexpr (!IsOrdered) + VERIFY(!bucket.end); + bucket.slot()->~T(); bucket.used = false; bucket.deleted = true; --m_size; ++m_deleted_count; + + if constexpr (IsOrdered) { + if (bucket.previous) + bucket.previous->next = bucket.next; + else + m_collection_data.head = bucket.next; + + if (bucket.next) + bucket.next->previous = bucket.previous; + else + m_collection_data.tail = bucket.previous; + } } private: @@ -262,39 +348,55 @@ private: auto& bucket = lookup_for_writing(value); new (bucket.slot()) T(move(value)); bucket.used = true; + + if constexpr (IsOrdered) { + if (!m_collection_data.head) [[unlikely]] { + m_collection_data.head = &bucket; + } else { + bucket.previous = m_collection_data.tail; + m_collection_data.tail->next = &bucket; + } + m_collection_data.tail = &bucket; + } } void rehash(size_t new_capacity) { new_capacity = max(new_capacity, static_cast(4)); - new_capacity = kmalloc_good_size(new_capacity * sizeof(Bucket)) / sizeof(Bucket); + new_capacity = kmalloc_good_size(new_capacity * sizeof(BucketType)) / sizeof(BucketType); auto* old_buckets = m_buckets; - auto old_capacity = m_capacity; + Iterator old_iter = begin(); + + if constexpr (IsOrdered) { + m_buckets = (BucketType*)kmalloc(sizeof(BucketType) * (new_capacity)); + __builtin_memset(m_buckets, 0, sizeof(BucketType) * (new_capacity)); + + m_collection_data = { nullptr, nullptr }; + } else { + m_buckets = (BucketType*)kmalloc(sizeof(BucketType) * (new_capacity + 1)); + __builtin_memset(m_buckets, 0, sizeof(BucketType) * (new_capacity + 1)); + } - m_buckets = (Bucket*)kmalloc(sizeof(Bucket) * (new_capacity + 1)); - __builtin_memset(m_buckets, 0, sizeof(Bucket) * (new_capacity + 1)); m_capacity = new_capacity; m_deleted_count = 0; - m_buckets[m_capacity].end = true; + if constexpr (!IsOrdered) + m_buckets[m_capacity].end = true; if (!old_buckets) return; - for (size_t i = 0; i < old_capacity; ++i) { - auto& old_bucket = old_buckets[i]; - if (old_bucket.used) { - insert_during_rehash(move(*old_bucket.slot())); - old_bucket.slot()->~T(); - } + for (auto it = move(old_iter); it != end(); ++it) { + insert_during_rehash(move(*it)); + it->~T(); } kfree(old_buckets); } template - Bucket* lookup_with_hash(unsigned hash, Finder finder) const + BucketType* lookup_with_hash(unsigned hash, Finder finder) const { if (is_empty()) return nullptr; @@ -312,18 +414,18 @@ private: } } - const Bucket* lookup_for_reading(const T& value) const + const BucketType* lookup_for_reading(const T& value) const { return lookup_with_hash(TraitsForT::hash(value), [&value](auto& entry) { return TraitsForT::equals(entry, value); }); } - Bucket& lookup_for_writing(const T& value) + BucketType& lookup_for_writing(const T& value) { if (should_grow()) rehash(capacity() * 2); auto hash = TraitsForT::hash(value); - Bucket* first_empty_bucket = nullptr; + BucketType* first_empty_bucket = nullptr; for (;;) { auto& bucket = m_buckets[hash % m_capacity]; @@ -335,7 +437,7 @@ private: first_empty_bucket = &bucket; if (!bucket.deleted) - return *const_cast(first_empty_bucket); + return *const_cast(first_empty_bucket); } hash = double_hash(hash); @@ -345,12 +447,14 @@ private: [[nodiscard]] size_t used_bucket_count() const { return m_size + m_deleted_count; } [[nodiscard]] bool should_grow() const { return ((used_bucket_count() + 1) * 100) >= (m_capacity * load_factor_in_percent); } - Bucket* m_buckets { nullptr }; + BucketType* m_buckets { nullptr }; + + [[no_unique_address]] CollectionDataType m_collection_data; size_t m_size { 0 }; size_t m_capacity { 0 }; size_t m_deleted_count { 0 }; }; - } using AK::HashTable; +using AK::OrderedHashTable;