4 years ago · 267eb3b329
--- a/Tests/LibSQL/TestSqlHashIndex.cpp
+++ b/Tests/LibSQL/TestSqlHashIndex.cpp
@@ -0,0 +1,329 @@
 
															+/*
														
 
															+ * Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
														
 
															+ *
														
 
															+ * SPDX-License-Identifier: BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#include <LibSQL/HashIndex.h>
														
 
															+#include <LibSQL/Heap.h>
														
 
															+#include <LibSQL/Meta.h>
														
 
															+#include <LibSQL/Tuple.h>
														
 
															+#include <LibSQL/Value.h>
														
 
															+#include <LibTest/TestCase.h>
														
 
															+#include <unistd.h>
														
 
															+
														
 
															+constexpr static int keys[] = {
														
 
															+    39,
														
 
															+    87,
														
 
															+    77,
														
 
															+    42,
														
 
															+    98,
														
 
															+    40,
														
 
															+    53,
														
 
															+    8,
														
 
															+    37,
														
 
															+    12,
														
 
															+    90,
														
 
															+    72,
														
 
															+    73,
														
 
															+    11,
														
 
															+    88,
														
 
															+    22,
														
 
															+    10,
														
 
															+    82,
														
 
															+    25,
														
 
															+    61,
														
 
															+    97,
														
 
															+    18,
														
 
															+    60,
														
 
															+    68,
														
 
															+    21,
														
 
															+    3,
														
 
															+    58,
														
 
															+    29,
														
 
															+    13,
														
 
															+    17,
														
 
															+    89,
														
 
															+    81,
														
 
															+    16,
														
 
															+    64,
														
 
															+    5,
														
 
															+    41,
														
 
															+    36,
														
 
															+    91,
														
 
															+    38,
														
 
															+    24,
														
 
															+    32,
														
 
															+    50,
														
 
															+    34,
														
 
															+    94,
														
 
															+    49,
														
 
															+    47,
														
 
															+    1,
														
 
															+    6,
														
 
															+    44,
														
 
															+    76,
														
 
															+};
														
 
															+constexpr static u32 pointers[] = {
														
 
															+    92,
														
 
															+    4,
														
 
															+    50,
														
 
															+    47,
														
 
															+    68,
														
 
															+    73,
														
 
															+    24,
														
 
															+    28,
														
 
															+    50,
														
 
															+    93,
														
 
															+    60,
														
 
															+    36,
														
 
															+    92,
														
 
															+    72,
														
 
															+    53,
														
 
															+    26,
														
 
															+    91,
														
 
															+    84,
														
 
															+    25,
														
 
															+    43,
														
 
															+    88,
														
 
															+    12,
														
 
															+    62,
														
 
															+    35,
														
 
															+    96,
														
 
															+    27,
														
 
															+    96,
														
 
															+    27,
														
 
															+    99,
														
 
															+    30,
														
 
															+    21,
														
 
															+    89,
														
 
															+    54,
														
 
															+    60,
														
 
															+    37,
														
 
															+    68,
														
 
															+    35,
														
 
															+    55,
														
 
															+    80,
														
 
															+    2,
														
 
															+    33,
														
 
															+    26,
														
 
															+    93,
														
 
															+    70,
														
 
															+    45,
														
 
															+    44,
														
 
															+    3,
														
 
															+    66,
														
 
															+    75,
														
 
															+    4,
														
 
															+};
														
 
															+
														
 
															+NonnullRefPtr<SQL::HashIndex> setup_hash_index(SQL::Heap& heap);
														
 
															+void insert_and_get_to_and_from_hash_index(int num_keys);
														
 
															+void insert_into_and_scan_hash_index(int num_keys);
														
 
															+
														
 
															+NonnullRefPtr<SQL::HashIndex> setup_hash_index(SQL::Heap& heap)
														
 
															+{
														
 
															+    SQL::TupleDescriptor tuple_descriptor;
														
 
															+    tuple_descriptor.append({ "key_value", SQL::SQLType::Integer, SQL::Order::Ascending });
														
 
															+    tuple_descriptor.append({ "text_value", SQL::SQLType::Text, SQL::Order::Ascending });
														
 
															+
														
 
															+    auto directory_pointer = heap.user_value(0);
														
 
															+    if (!directory_pointer) {
														
 
															+        directory_pointer = heap.new_record_pointer();
														
 
															+        heap.set_user_value(0, directory_pointer);
														
 
															+    }
														
 
															+    auto hash_index = SQL::HashIndex::construct(heap, tuple_descriptor, directory_pointer);
														
 
															+    return hash_index;
														
 
															+}
														
 
															+
														
 
															+void insert_and_get_to_and_from_hash_index(int num_keys)
														
 
															+{
														
 
															+    ScopeGuard guard([]() { unlink("test.db"); });
														
 
															+    {
														
 
															+        auto heap = SQL::Heap::construct("test.db");
														
 
															+        auto hash_index = setup_hash_index(heap);
														
 
															+
														
 
															+        for (auto ix = 0; ix < num_keys; ix++) {
														
 
															+            SQL::Key k(hash_index->descriptor());
														
 
															+            k[0] = keys[ix];
														
 
															+            k[1] = String::formatted("The key value is {} and the pointer is {}", keys[ix], pointers[ix]);
														
 
															+            k.set_pointer(pointers[ix]);
														
 
															+            hash_index->insert(k);
														
 
															+        }
														
 
															+#ifdef LIST_HASH_INDEX
														
 
															+        hash_index->list_hash();
														
 
															+#endif
														
 
															+    }
														
 
															+
														
 
															+    {
														
 
															+        auto heap = SQL::Heap::construct("test.db");
														
 
															+        auto hash_index = setup_hash_index(heap);
														
 
															+
														
 
															+        for (auto ix = 0; ix < num_keys; ix++) {
														
 
															+            SQL::Key k(hash_index->descriptor());
														
 
															+            k[0] = keys[ix];
														
 
															+            k[1] = String::formatted("The key value is {} and the pointer is {}", keys[ix], pointers[ix]);
														
 
															+            auto pointer_opt = hash_index->get(k);
														
 
															+            EXPECT(pointer_opt.has_value());
														
 
															+            EXPECT_EQ(pointer_opt.value(), pointers[ix]);
														
 
															+        }
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_one_key)
														
 
															+{
														
 
															+    insert_and_get_to_and_from_hash_index(1);
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_four_keys)
														
 
															+{
														
 
															+    insert_and_get_to_and_from_hash_index(4);
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_five_keys)
														
 
															+{
														
 
															+    insert_and_get_to_and_from_hash_index(5);
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_10_keys)
														
 
															+{
														
 
															+    insert_and_get_to_and_from_hash_index(10);
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_13_keys)
														
 
															+{
														
 
															+    insert_and_get_to_and_from_hash_index(13);
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_20_keys)
														
 
															+{
														
 
															+    insert_and_get_to_and_from_hash_index(20);
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_25_keys)
														
 
															+{
														
 
															+    insert_and_get_to_and_from_hash_index(25);
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_30_keys)
														
 
															+{
														
 
															+    insert_and_get_to_and_from_hash_index(30);
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_35_keys)
														
 
															+{
														
 
															+    insert_and_get_to_and_from_hash_index(35);
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_40_keys)
														
 
															+{
														
 
															+    insert_and_get_to_and_from_hash_index(40);
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_45_keys)
														
 
															+{
														
 
															+    insert_and_get_to_and_from_hash_index(45);
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_50_keys)
														
 
															+{
														
 
															+    insert_and_get_to_and_from_hash_index(50);
														
 
															+}
														
 
															+
														
 
															+void insert_into_and_scan_hash_index(int num_keys)
														
 
															+{
														
 
															+    ScopeGuard guard([]() { unlink("test.db"); });
														
 
															+    {
														
 
															+        auto heap = SQL::Heap::construct("test.db");
														
 
															+        auto hash_index = setup_hash_index(heap);
														
 
															+
														
 
															+        for (auto ix = 0; ix < num_keys; ix++) {
														
 
															+            SQL::Key k(hash_index->descriptor());
														
 
															+            k[0] = keys[ix];
														
 
															+            k[1] = String::formatted("The key value is {} and the pointer is {}", keys[ix], pointers[ix]);
														
 
															+            k.set_pointer(pointers[ix]);
														
 
															+            hash_index->insert(k);
														
 
															+        }
														
 
															+#ifdef LIST_HASH_INDEX
														
 
															+        hash_index->list_hash();
														
 
															+#endif
														
 
															+    }
														
 
															+
														
 
															+    {
														
 
															+        auto heap = SQL::Heap::construct("test.db");
														
 
															+        auto hash_index = setup_hash_index(heap);
														
 
															+        Vector<bool> found;
														
 
															+        for (auto ix = 0; ix < num_keys; ix++) {
														
 
															+            found.append(false);
														
 
															+        }
														
 
															+
														
 
															+        int count = 0;
														
 
															+        for (auto iter = hash_index->begin(); !iter.is_end(); iter++, count++) {
														
 
															+            auto key = (*iter);
														
 
															+            auto key_value = (int)key[0];
														
 
															+            for (auto ix = 0; ix < num_keys; ix++) {
														
 
															+                if (keys[ix] == key_value) {
														
 
															+                    EXPECT_EQ(key.pointer(), pointers[ix]);
														
 
															+                    if (found[ix])
														
 
															+                        FAIL(String::formatted("Key {}, index {} already found previously", key_value, ix));
														
 
															+                    found[ix] = true;
														
 
															+                    break;
														
 
															+                }
														
 
															+            }
														
 
															+        }
														
 
															+
														
 
															+#ifdef LIST_HASH_INDEX
														
 
															+        hash_index->list_hash();
														
 
															+#endif
														
 
															+        EXPECT_EQ(count, num_keys);
														
 
															+        for (auto ix = 0; ix < num_keys; ix++) {
														
 
															+            if (!found[ix])
														
 
															+                FAIL(String::formatted("Key {}, index {} not found", keys[ix], ix));
														
 
															+        }
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_scan_one_key)
														
 
															+{
														
 
															+    insert_into_and_scan_hash_index(1);
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_scan_four_keys)
														
 
															+{
														
 
															+    insert_into_and_scan_hash_index(4);
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_scan_five_keys)
														
 
															+{
														
 
															+    insert_into_and_scan_hash_index(5);
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_scan_10_keys)
														
 
															+{
														
 
															+    insert_into_and_scan_hash_index(10);
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_scan_15_keys)
														
 
															+{
														
 
															+    insert_into_and_scan_hash_index(15);
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_scan_20_keys)
														
 
															+{
														
 
															+    insert_into_and_scan_hash_index(20);
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_scan_30_keys)
														
 
															+{
														
 
															+    insert_into_and_scan_hash_index(30);
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_scan_40_keys)
														
 
															+{
														
 
															+    insert_into_and_scan_hash_index(40);
														
 
															+}
														
 
															+
														
 
															+TEST_CASE(hash_index_scan_50_keys)
														
 
															+{
														
 
															+    insert_into_and_scan_hash_index(50);
														
 
															+}
														
--- a/Userland/Libraries/LibSQL/CMakeLists.txt
+++ b/Userland/Libraries/LibSQL/CMakeLists.txt
@@ -1,6 +1,7 @@
 
															 set(SOURCES
														
 
															         BTree.cpp
														
 
															         BTreeIterator.cpp
														
 
															+        HashIndex.cpp
														
 
															         Heap.cpp
														
 
															         Index.cpp
														
 
															         Key.cpp
														
--- a/Userland/Libraries/LibSQL/Forward.h
+++ b/Userland/Libraries/LibSQL/Forward.h
@@ -35,6 +35,10 @@ class ErrorStatement;
 
															 class ExistsExpression;
														
 
															 class Expression;
														
 
															 class GroupByClause;
														
 
															+class HashBucket;
														
 
															+class HashDirectoryNode;
														
 
															+class HashIndex;
														
 
															+class HashIndexIterator;
														
 
															 class Heap;
														
 
															 class InChainedExpression;
														
 
															 class Index;
														
--- a/Userland/Libraries/LibSQL/HashIndex.cpp
+++ b/Userland/Libraries/LibSQL/HashIndex.cpp
@@ -0,0 +1,423 @@
 
															+/*
														
 
															+ * Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
														
 
															+ *
														
 
															+ * SPDX-License-Identifier: BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#include <LibSQL/HashIndex.h>
														
 
															+#include <LibSQL/Heap.h>
														
 
															+#include <LibSQL/Key.h>
														
 
															+#include <LibSQL/Serialize.h>
														
 
															+
														
 
															+namespace SQL {
														
 
															+
														
 
															+HashDirectoryNode::HashDirectoryNode(HashIndex& index, u32 node_number, size_t offset)
														
 
															+    : IndexNode(index.node_pointer(node_number))
														
 
															+    , m_hash_index(index)
														
 
															+    , m_node_number(node_number)
														
 
															+    , m_offset(offset)
														
 
															+{
														
 
															+}
														
 
															+
														
 
															+HashDirectoryNode::HashDirectoryNode(HashIndex& index, u32 pointer, ByteBuffer& buffer)
														
 
															+    : IndexNode(pointer)
														
 
															+    , m_hash_index(index)
														
 
															+{
														
 
															+    dbgln_if(SQL_DEBUG, "Deserializing Hash Directory Node");
														
 
															+    size_t offset = 0;
														
 
															+    deserialize_from<u32>(buffer, offset, index.m_global_depth);
														
 
															+    u32 size;
														
 
															+    deserialize_from<u32>(buffer, offset, size);
														
 
															+    dbgln_if(SQL_DEBUG, "Global Depth {}, #Bucket pointers {}", index.global_depth(), size);
														
 
															+    u32 next_node;
														
 
															+    deserialize_from<u32>(buffer, offset, next_node);
														
 
															+    if (next_node) {
														
 
															+        dbgln_if(SQL_DEBUG, "Next node {}", next_node);
														
 
															+        m_hash_index.m_nodes.append(next_node);
														
 
															+    } else {
														
 
															+        dbgln_if(SQL_DEBUG, "This is the last directory node");
														
 
															+        m_is_last = true;
														
 
															+    }
														
 
															+    for (auto ix = 0u; ix < size; ix++) {
														
 
															+        u32 bucket_pointer;
														
 
															+        deserialize_from(buffer, offset, bucket_pointer);
														
 
															+        u32 local_depth;
														
 
															+        deserialize_from(buffer, offset, local_depth);
														
 
															+        dbgln_if(SQL_DEBUG, "Bucket pointer {} local depth {}", bucket_pointer, local_depth);
														
 
															+        index.append_bucket(ix, local_depth, bucket_pointer);
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+void HashDirectoryNode::serialize(ByteBuffer& buffer) const
														
 
															+{
														
 
															+    dbgln_if(SQL_DEBUG, "Serializing directory node #{}. Offset {}", m_node_number, m_offset);
														
 
															+    serialize_to(buffer, m_hash_index.global_depth());
														
 
															+    serialize_to(buffer, number_of_pointers());
														
 
															+    dbgln_if(SQL_DEBUG, "Global depth {}, #bucket pointers {}", m_hash_index.global_depth(), number_of_pointers());
														
 
															+
														
 
															+    u32 next_node;
														
 
															+    if (m_node_number < (m_hash_index.m_nodes.size() - 1)) {
														
 
															+        next_node = m_hash_index.m_nodes[m_node_number + 1];
														
 
															+        dbgln_if(SQL_DEBUG, "Next directory node pointer {}", next_node);
														
 
															+    } else {
														
 
															+        next_node = 0u;
														
 
															+        dbgln_if(SQL_DEBUG, "This is the last directory node");
														
 
															+    }
														
 
															+
														
 
															+    serialize_to(buffer, next_node);
														
 
															+    for (auto ix = 0u; ix < number_of_pointers(); ix++) {
														
 
															+        auto& bucket = m_hash_index.m_buckets[m_offset + ix];
														
 
															+        dbgln_if(SQL_DEBUG, "Bucket pointer {} local depth {}", bucket->pointer(), bucket->local_depth());
														
 
															+        serialize_to(buffer, bucket->pointer());
														
 
															+        serialize_to(buffer, bucket->local_depth());
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+HashBucket::HashBucket(HashIndex& hash_index, u32 index, u32 local_depth, u32 pointer)
														
 
															+    : IndexNode(pointer)
														
 
															+    , m_hash_index(hash_index)
														
 
															+    , m_local_depth(local_depth)
														
 
															+    , m_index(index)
														
 
															+{
														
 
															+}
														
 
															+
														
 
															+void HashBucket::serialize(ByteBuffer& buffer) const
														
 
															+{
														
 
															+    dbgln_if(SQL_DEBUG, "Serializing bucket: pointer {}, index #{}, local depth {} size {}",
														
 
															+        pointer(), index(), local_depth(), size());
														
 
															+    dbgln_if(SQL_DEBUG, "key_length: {} max_entries: {}", m_hash_index.descriptor().data_length(), max_entries_in_bucket());
														
 
															+    serialize_to(buffer, local_depth());
														
 
															+    serialize_to(buffer, size());
														
 
															+    dbgln_if(SQL_DEBUG, "buffer size after prolog {}", buffer.size());
														
 
															+    for (auto& key : m_entries) {
														
 
															+        key.serialize(buffer);
														
 
															+        dbgln_if(SQL_DEBUG, "Key {} buffer size {}", key.to_string(), buffer.size());
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+void HashBucket::inflate()
														
 
															+{
														
 
															+    if (m_inflated || !pointer())
														
 
															+        return;
														
 
															+    dbgln_if(SQL_DEBUG, "Inflating Hash Bucket {}", pointer());
														
 
															+    auto buffer = m_hash_index.read_block(pointer());
														
 
															+    size_t offset = 0;
														
 
															+    deserialize_from(buffer, offset, m_local_depth);
														
 
															+    dbgln_if(SQL_DEBUG, "Bucket Local Depth {}", m_local_depth);
														
 
															+    u32 size;
														
 
															+    deserialize_from(buffer, offset, size);
														
 
															+    dbgln_if(SQL_DEBUG, "Bucket has {} keys", size);
														
 
															+    for (auto ix = 0u; ix < size; ix++) {
														
 
															+        Key key(m_hash_index.descriptor(), buffer, offset);
														
 
															+        dbgln_if(SQL_DEBUG, "Key {}: {}", ix, key.to_string());
														
 
															+        m_entries.append(key);
														
 
															+    }
														
 
															+    m_inflated = true;
														
 
															+}
														
 
															+
														
 
															+size_t HashBucket::max_entries_in_bucket() const
														
 
															+{
														
 
															+    auto key_size = m_hash_index.descriptor().data_length() + sizeof(u32);
														
 
															+    return (BLOCKSIZE - 2 * sizeof(u32)) / key_size;
														
 
															+}
														
 
															+
														
 
															+Optional<u32> HashBucket::get(Key& key)
														
 
															+{
														
 
															+    auto optional_index = find_key_in_bucket(key);
														
 
															+    if (optional_index.has_value()) {
														
 
															+        auto& k = m_entries[optional_index.value()];
														
 
															+        key.set_pointer(k.pointer());
														
 
															+        return k.pointer();
														
 
															+    }
														
 
															+    return {};
														
 
															+}
														
 
															+
														
 
															+bool HashBucket::insert(Key const& key)
														
 
															+{
														
 
															+    inflate();
														
 
															+    if (find_key_in_bucket(key).has_value()) {
														
 
															+        return false;
														
 
															+    }
														
 
															+    if (size() >= max_entries_in_bucket()) {
														
 
															+        return false;
														
 
															+    }
														
 
															+    m_entries.append(key);
														
 
															+    m_hash_index.add_to_write_ahead_log(this);
														
 
															+    return true;
														
 
															+}
														
 
															+
														
 
															+Optional<size_t> HashBucket::find_key_in_bucket(Key const& key)
														
 
															+{
														
 
															+    for (auto ix = 0u; ix < size(); ix++) {
														
 
															+        auto& k = entries()[ix];
														
 
															+        if (k == key) {
														
 
															+            return ix;
														
 
															+        }
														
 
															+    }
														
 
															+    return {};
														
 
															+}
														
 
															+
														
 
															+HashBucket const* HashBucket::next_bucket()
														
 
															+{
														
 
															+    for (auto ix = m_index + 1; ix < m_hash_index.size(); ix++) {
														
 
															+        auto bucket = m_hash_index.get_bucket_by_index(ix);
														
 
															+        bucket->inflate();
														
 
															+        if (bucket->size())
														
 
															+            return bucket;
														
 
															+    }
														
 
															+    return nullptr;
														
 
															+}
														
 
															+
														
 
															+HashBucket const* HashBucket::previous_bucket()
														
 
															+{
														
 
															+    for (auto ix = m_index - 1; ix > 0; ix--) {
														
 
															+        auto bucket = m_hash_index.get_bucket_by_index(ix);
														
 
															+        if (bucket->pointer())
														
 
															+            return bucket;
														
 
															+    }
														
 
															+    return nullptr;
														
 
															+}
														
 
															+
														
 
															+Key const& HashBucket::operator[](size_t ix)
														
 
															+{
														
 
															+    inflate();
														
 
															+    VERIFY(ix < size());
														
 
															+    return m_entries[ix];
														
 
															+}
														
 
															+
														
 
															+void HashBucket::list_bucket()
														
 
															+{
														
 
															+    warnln("Bucket #{} size {} local depth {} pointer {}{}",
														
 
															+        index(), size(), local_depth(), pointer(), (pointer() ? "" : " (VIRTUAL)"));
														
 
															+    for (auto& key : entries()) {
														
 
															+        warnln("  {} hash {}", key.to_string(), key.hash());
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+HashIndex::HashIndex(Heap& heap, TupleDescriptor const& descriptor, u32 first_node)
														
 
															+    : Index(heap, descriptor, true, first_node)
														
 
															+    , m_nodes()
														
 
															+    , m_buckets()
														
 
															+{
														
 
															+    if (!first_node) {
														
 
															+        set_pointer(new_record_pointer());
														
 
															+    }
														
 
															+    if (this->heap().has_block(first_node)) {
														
 
															+        u32 pointer = first_node;
														
 
															+        do {
														
 
															+            VERIFY(this->heap().has_block(pointer));
														
 
															+            auto buffer = read_block(pointer);
														
 
															+            auto node = HashDirectoryNode(*this, pointer, buffer);
														
 
															+            if (node.is_last())
														
 
															+                break;
														
 
															+            pointer = m_nodes.last(); // FIXME Ugly
														
 
															+        } while (pointer);
														
 
															+    } else {
														
 
															+        auto bucket = append_bucket(0u, 1u, new_record_pointer());
														
 
															+        bucket->m_inflated = true;
														
 
															+        add_to_write_ahead_log(bucket);
														
 
															+        bucket = append_bucket(1u, 1u, new_record_pointer());
														
 
															+        bucket->m_inflated = true;
														
 
															+        add_to_write_ahead_log(bucket);
														
 
															+        m_nodes.append(first_node);
														
 
															+        write_directory_to_write_ahead_log();
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+HashBucket* HashIndex::get_bucket(u32 index)
														
 
															+{
														
 
															+    VERIFY(index < m_buckets.size());
														
 
															+    auto divisor = size() / 2;
														
 
															+    while (!m_buckets[index]->pointer()) {
														
 
															+        VERIFY(divisor > 1);
														
 
															+        index = index % divisor;
														
 
															+        divisor /= 2;
														
 
															+    }
														
 
															+    auto& bucket = m_buckets[index];
														
 
															+    return bucket;
														
 
															+}
														
 
															+
														
 
															+HashBucket* HashIndex::get_bucket_for_insert(Key const& key)
														
 
															+{
														
 
															+    auto key_hash = key.hash();
														
 
															+
														
 
															+    do {
														
 
															+        auto bucket = get_bucket(key_hash % size());
														
 
															+        if (bucket->size() < bucket->max_entries_in_bucket()) {
														
 
															+            return bucket;
														
 
															+        }
														
 
															+
														
 
															+        // We previously doubled the directory but the target bucket is
														
 
															+        // still at an older depth. Create new buckets at the current global
														
 
															+        // depth and allocate the contents of the existing buckets to the
														
 
															+        // newly created ones:
														
 
															+        while (bucket->local_depth() < global_depth()) {
														
 
															+            auto base_index = bucket->index();
														
 
															+            auto step = 1 << (global_depth() - bucket->local_depth());
														
 
															+            for (auto ix = base_index + step; ix < size(); ix += step) {
														
 
															+                auto& sub_bucket = m_buckets[ix];
														
 
															+                sub_bucket->set_local_depth(bucket->local_depth() + 1);
														
 
															+                for (auto entry_index = (int)bucket->m_entries.size() - 1; entry_index >= 0; entry_index--) {
														
 
															+                    if (bucket->m_entries[entry_index].hash() % size() == ix) {
														
 
															+                        if (!sub_bucket->pointer()) {
														
 
															+                            sub_bucket->set_pointer(new_record_pointer());
														
 
															+                        }
														
 
															+                        sub_bucket->insert(bucket->m_entries.take(entry_index));
														
 
															+                    }
														
 
															+                }
														
 
															+                if (m_buckets[ix]->pointer())
														
 
															+                    add_to_write_ahead_log(m_buckets[ix]);
														
 
															+            }
														
 
															+            bucket->set_local_depth(bucket->local_depth() + 1);
														
 
															+            add_to_write_ahead_log(bucket);
														
 
															+            write_directory_to_write_ahead_log();
														
 
															+
														
 
															+            auto bucket_after_redistribution = get_bucket(key_hash % size());
														
 
															+            if (bucket_after_redistribution->size() < bucket_after_redistribution->max_entries_in_bucket()) {
														
 
															+                return bucket_after_redistribution;
														
 
															+            }
														
 
															+        }
														
 
															+        expand();
														
 
															+    } while (true);
														
 
															+}
														
 
															+
														
 
															+void HashIndex::expand()
														
 
															+{
														
 
															+    auto sz = size();
														
 
															+    for (auto i = 0u; i < sz; i++) {
														
 
															+        auto bucket = get_bucket(i);
														
 
															+        bucket = append_bucket(sz + i, bucket->local_depth(), 0u);
														
 
															+        bucket->m_inflated = true;
														
 
															+    }
														
 
															+    m_global_depth++;
														
 
															+    write_directory_to_write_ahead_log();
														
 
															+}
														
 
															+
														
 
															+void HashIndex::write_directory_to_write_ahead_log()
														
 
															+{
														
 
															+    auto num_nodes_required = (size() / HashDirectoryNode::max_pointers_in_node()) + 1;
														
 
															+    while (m_nodes.size() < num_nodes_required)
														
 
															+        m_nodes.append(new_record_pointer());
														
 
															+
														
 
															+    size_t offset = 0u;
														
 
															+    size_t num_node = 0u;
														
 
															+    while (offset < size()) {
														
 
															+        HashDirectoryNode node(*this, num_node, offset);
														
 
															+        add_to_write_ahead_log(node.as_index_node());
														
 
															+        offset += node.number_of_pointers();
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+HashBucket* HashIndex::append_bucket(u32 index, u32 local_depth, u32 pointer)
														
 
															+{
														
 
															+    m_buckets.append(make<HashBucket>(*this, index, local_depth, pointer));
														
 
															+    return m_buckets.last();
														
 
															+}
														
 
															+
														
 
															+HashBucket* HashIndex::get_bucket_by_index(u32 index)
														
 
															+{
														
 
															+    if (index >= size())
														
 
															+        return nullptr;
														
 
															+    return m_buckets[index];
														
 
															+}
														
 
															+
														
 
															+Optional<u32> HashIndex::get(Key& key)
														
 
															+{
														
 
															+    auto hash = key.hash();
														
 
															+    auto bucket_index = hash % size();
														
 
															+    auto bucket = get_bucket(bucket_index);
														
 
															+    return bucket->get(key);
														
 
															+}
														
 
															+
														
 
															+bool HashIndex::insert(Key const& key)
														
 
															+{
														
 
															+    auto bucket = get_bucket_for_insert(key);
														
 
															+    bucket->insert(key);
														
 
															+    return true;
														
 
															+}
														
 
															+
														
 
															+HashIndexIterator HashIndex::begin()
														
 
															+{
														
 
															+    return HashIndexIterator(get_bucket(0));
														
 
															+}
														
 
															+
														
 
															+HashIndexIterator HashIndex::end()
														
 
															+{
														
 
															+    return HashIndexIterator::end();
														
 
															+}
														
 
															+
														
 
															+HashIndexIterator HashIndex::find(Key const& key)
														
 
															+{
														
 
															+    auto hash = key.hash();
														
 
															+    auto bucket_index = hash % size();
														
 
															+    auto bucket = get_bucket(bucket_index);
														
 
															+    auto optional_index = bucket->find_key_in_bucket(key);
														
 
															+    if (!optional_index.has_value())
														
 
															+        return end();
														
 
															+    return HashIndexIterator(bucket, optional_index.value());
														
 
															+}
														
 
															+
														
 
															+void HashIndex::list_hash()
														
 
															+{
														
 
															+    warnln("Number of buckets: {} (Global depth {})", size(), global_depth());
														
 
															+    warn("Directory pointer(s): ");
														
 
															+    for (auto ptr : m_nodes) {
														
 
															+        warn("{}, ", ptr);
														
 
															+    }
														
 
															+    warnln();
														
 
															+
														
 
															+    bool first_bucket = true;
														
 
															+    for (auto& bucket : m_buckets) {
														
 
															+        if (first_bucket) {
														
 
															+            warnln("Max. keys in bucket {}", bucket->max_entries_in_bucket());
														
 
															+            first_bucket = false;
														
 
															+        }
														
 
															+        bucket->list_bucket();
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+HashIndexIterator::HashIndexIterator(HashBucket const* bucket, size_t index)
														
 
															+    : m_current(bucket)
														
 
															+    , m_index(index)
														
 
															+{
														
 
															+    VERIFY(!m_current || !index || (index < m_current->size()));
														
 
															+    while (m_current && (m_current->size() == 0)) {
														
 
															+        m_current = m_current->next_bucket();
														
 
															+        m_index = 0;
														
 
															+    }
														
 
															+}
														
 
															+
														
 
															+HashIndexIterator HashIndexIterator::next()
														
 
															+{
														
 
															+    if (is_end())
														
 
															+        return *this;
														
 
															+    if (m_index < (m_current->size() - 1))
														
 
															+        return HashIndexIterator(m_current.ptr(), m_index + 1);
														
 
															+    return HashIndexIterator(m_current->next_bucket());
														
 
															+}
														
 
															+
														
 
															+HashIndexIterator HashIndexIterator::previous()
														
 
															+{
														
 
															+    TODO();
														
 
															+}
														
 
															+
														
 
															+bool HashIndexIterator::operator==(HashIndexIterator const& other) const
														
 
															+{
														
 
															+    if (is_end())
														
 
															+        return other.is_end();
														
 
															+    if (other.is_end())
														
 
															+        return false;
														
 
															+    VERIFY(&other.m_current->hash_index() == &m_current->hash_index());
														
 
															+    return (m_current.ptr() == other.m_current.ptr()) && (m_index == other.m_index);
														
 
															+}
														
 
															+
														
 
															+bool HashIndexIterator::operator==(Key const& other) const
														
 
															+{
														
 
															+    if (is_end())
														
 
															+        return false;
														
 
															+    if (other.is_null())
														
 
															+        return false;
														
 
															+    return (**this).compare(other);
														
 
															+}
														
 
															+
														
 
															+}
														
--- a/Userland/Libraries/LibSQL/HashIndex.h
+++ b/Userland/Libraries/LibSQL/HashIndex.h
@@ -0,0 +1,188 @@
 
															+/*
														
 
															+ * Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
														
 
															+ *
														
 
															+ * SPDX-License-Identifier: BSD-2-Clause
														
 
															+ */
														
 
															+
														
 
															+#pragma once
														
 
															+
														
 
															+#include <AK/WeakPtr.h>
														
 
															+#include <LibCore/Object.h>
														
 
															+#include <LibSQL/Forward.h>
														
 
															+#include <LibSQL/Heap.h>
														
 
															+#include <LibSQL/Index.h>
														
 
															+#include <LibSQL/Key.h>
														
 
															+
														
 
															+namespace SQL {
														
 
															+
														
 
															+/**
														
 
															+ * The HashIndex class is a straightforward implementation of a persisted
														
 
															+ * extendible hash table (see
														
 
															+ * https://en.wikipedia.org/wiki/Extendible_hashing).
														
 
															+ */
														
 
															+
														
 
															+class HashBucket : public IndexNode
														
 
															+    , public Weakable<HashBucket> {
														
 
															+public:
														
 
															+    HashBucket(HashIndex&, u32 index, u32 local_depth, u32 pointer);
														
 
															+    ~HashBucket() override = default;
														
 
															+    Optional<u32> get(Key&);
														
 
															+    bool insert(Key const&);
														
 
															+    Vector<Key> const& entries()
														
 
															+    {
														
 
															+        inflate();
														
 
															+        return m_entries;
														
 
															+    }
														
 
															+    Key const& operator[](size_t);
														
 
															+    Key const& operator[](size_t ix) const
														
 
															+    {
														
 
															+        VERIFY(ix < m_entries.size());
														
 
															+        return m_entries[ix];
														
 
															+    }
														
 
															+    [[nodiscard]] u32 local_depth() const { return m_local_depth; }
														
 
															+    [[nodiscard]] u32 size() { return entries().size(); }
														
 
															+    [[nodiscard]] u32 size() const { return m_entries.size(); }
														
 
															+    [[nodiscard]] u32 index() const { return m_index; }
														
 
															+    void serialize(ByteBuffer&) const override;
														
 
															+    IndexNode* as_index_node() override { return dynamic_cast<IndexNode*>(this); }
														
 
															+    [[nodiscard]] HashIndex const& hash_index() const { return m_hash_index; }
														
 
															+    [[nodiscard]] HashBucket const* next_bucket();
														
 
															+    [[nodiscard]] HashBucket const* previous_bucket();
														
 
															+    void list_bucket();
														
 
															+
														
 
															+private:
														
 
															+    Optional<size_t> find_key_in_bucket(Key const&);
														
 
															+    void set_index(u32 index) { m_index = index; }
														
 
															+    void set_local_depth(u32 depth) { m_local_depth = depth; }
														
 
															+    [[nodiscard]] size_t max_entries_in_bucket() const;
														
 
															+    void inflate();
														
 
															+
														
 
															+    HashIndex& m_hash_index;
														
 
															+    u32 m_local_depth { 1 };
														
 
															+    u32 m_index { 0 };
														
 
															+    Vector<Key> m_entries;
														
 
															+    bool m_inflated { false };
														
 
															+
														
 
															+    friend HashIndex;
														
 
															+};
														
 
															+
														
 
															+class HashIndex : public Index {
														
 
															+    C_OBJECT(HashIndex);
														
 
															+
														
 
															+public:
														
 
															+    ~HashIndex() override = default;
														
 
															+
														
 
															+    Optional<u32> get(Key&);
														
 
															+    bool insert(Key const&);
														
 
															+    bool insert(Key const&& entry) { return insert(entry); }
														
 
															+    HashIndexIterator find(Key const&);
														
 
															+    HashIndexIterator begin();
														
 
															+    static HashIndexIterator end();
														
 
															+
														
 
															+    [[nodiscard]] u32 global_depth() const { return m_global_depth; }
														
 
															+    [[nodiscard]] u32 size() const { return 1 << m_global_depth; }
														
 
															+    [[nodiscard]] HashBucket* get_bucket(u32);
														
 
															+    [[nodiscard]] u32 node_pointer(u32 node_number) const { return m_nodes[node_number]; }
														
 
															+    [[nodiscard]] u32 first_node_pointer() const { return m_nodes[0]; }
														
 
															+    [[nodiscard]] size_t nodes() const { return m_nodes.size(); }
														
 
															+    void list_hash();
														
 
															+
														
 
															+private:
														
 
															+    HashIndex(Heap&, TupleDescriptor const&, u32);
														
 
															+
														
 
															+    void expand();
														
 
															+    void write_directory_to_write_ahead_log();
														
 
															+    HashBucket* append_bucket(u32 index, u32 local_depth, u32 pointer);
														
 
															+    HashBucket* get_bucket_for_insert(Key const&);
														
 
															+    [[nodiscard]] HashBucket* get_bucket_by_index(u32 index);
														
 
															+
														
 
															+    u32 m_global_depth { 1 };
														
 
															+    Vector<u32> m_nodes;
														
 
															+    Vector<OwnPtr<HashBucket>> m_buckets;
														
 
															+
														
 
															+    friend HashBucket;
														
 
															+    friend HashDirectoryNode;
														
 
															+};
														
 
															+
														
 
															+class HashDirectoryNode : public IndexNode {
														
 
															+public:
														
 
															+    HashDirectoryNode(HashIndex&, u32, size_t);
														
 
															+    HashDirectoryNode(HashIndex&, u32, ByteBuffer&);
														
 
															+    HashDirectoryNode(HashDirectoryNode const& other) = default;
														
 
															+    void serialize(ByteBuffer&) const override;
														
 
															+    IndexNode* as_index_node() override { return dynamic_cast<IndexNode*>(this); }
														
 
															+    [[nodiscard]] u32 number_of_pointers() const { return min(max_pointers_in_node(), m_hash_index.size() - m_offset); }
														
 
															+    [[nodiscard]] bool is_last() const { return m_is_last; }
														
 
															+    static constexpr size_t max_pointers_in_node() { return (BLOCKSIZE - 3 * sizeof(u32)) / (2 * sizeof(u32)); }
														
 
															+
														
 
															+private:
														
 
															+    HashIndex& m_hash_index;
														
 
															+    size_t m_node_number { 0 };
														
 
															+    size_t m_offset { 0 };
														
 
															+    bool m_is_last { false };
														
 
															+};
														
 
															+
														
 
															+class HashIndexIterator {
														
 
															+public:
														
 
															+    [[nodiscard]] bool is_end() const { return !m_current; }
														
 
															+
														
 
															+    bool operator==(HashIndexIterator const& other) const;
														
 
															+    bool operator!=(HashIndexIterator const& other) const { return !(*this == other); }
														
 
															+    bool operator==(Key const& other) const;
														
 
															+    bool operator!=(Key const& other) const { return !(*this == other); }
														
 
															+
														
 
															+    HashIndexIterator operator++()
														
 
															+    {
														
 
															+        *this = next();
														
 
															+        return *this;
														
 
															+    }
														
 
															+
														
 
															+    HashIndexIterator operator++(int)
														
 
															+    {
														
 
															+        *this = next();
														
 
															+        return *this;
														
 
															+    }
														
 
															+
														
 
															+    HashIndexIterator operator--()
														
 
															+    {
														
 
															+        *this = previous();
														
 
															+        return *this;
														
 
															+    }
														
 
															+
														
 
															+    HashIndexIterator const operator--(int)
														
 
															+    {
														
 
															+        *this = previous();
														
 
															+        return *this;
														
 
															+    }
														
 
															+
														
 
															+    Key const& operator*() const
														
 
															+    {
														
 
															+        VERIFY(!is_end());
														
 
															+        return (*m_current)[m_index];
														
 
															+    }
														
 
															+
														
 
															+    Key const& operator->() const
														
 
															+    {
														
 
															+        VERIFY(!is_end());
														
 
															+        return (*m_current)[m_index];
														
 
															+    }
														
 
															+
														
 
															+    HashIndexIterator& operator=(HashIndexIterator const&) = default;
														
 
															+    HashIndexIterator(HashIndexIterator const&) = default;
														
 
															+
														
 
															+private:
														
 
															+    HashIndexIterator() = default;
														
 
															+    explicit HashIndexIterator(HashBucket const*, size_t key_index = 0);
														
 
															+    static HashIndexIterator end() { return HashIndexIterator(); }
														
 
															+
														
 
															+    [[nodiscard]] HashIndexIterator next();
														
 
															+    [[nodiscard]] HashIndexIterator previous();
														
 
															+    [[nodiscard]] Key key() const { return **this; }
														
 
															+
														
 
															+    WeakPtr<HashBucket> m_current;
														
 
															+    size_t m_index { 0 };
														
 
															+
														
 
															+    friend HashIndex;
														
 
															+};
														
 
															+
														
 
															+}