LibSQL: BTree index, Heap, and Meta objects for SQL Storage layer

Unfortunately this patch is quite large.

The main functionality included are a BTree index implementation and
the Heap class which manages persistent storage.

Also included are a Key subclass of the Tuple class, which is a
specialization for index key tuples. This "dragged in" the Meta layer,
which has classes defining SQL objects like tables and indexes.
This commit is contained in:
Jan de Visser 2021-06-17 13:47:42 -04:00 committed by Andreas Kling
parent 2a46529170
commit 224804b424
Notes: sideshowbarker 2024-07-18 12:00:41 +09:00
15 changed files with 2153 additions and 0 deletions

View file

@ -0,0 +1,311 @@
/*
* Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <unistd.h>
#include <AK/ScopeGuard.h>
#include <LibSQL/BTree.h>
#include <LibSQL/Heap.h>
#include <LibSQL/Key.h>
#include <LibSQL/Meta.h>
#include <LibSQL/TupleDescriptor.h>
#include <LibSQL/Value.h>
#include <LibTest/TestCase.h>
constexpr static int keys[] = {
39,
87,
77,
42,
98,
40,
53,
8,
37,
12,
90,
72,
73,
11,
88,
22,
10,
82,
25,
61,
97,
18,
60,
68,
21,
3,
58,
29,
13,
17,
89,
81,
16,
64,
5,
41,
36,
91,
38,
24,
32,
50,
34,
94,
49,
47,
1,
6,
44,
76,
};
constexpr static u32 pointers[] = {
92,
4,
50,
47,
68,
73,
24,
28,
50,
93,
60,
36,
92,
72,
53,
26,
91,
84,
25,
43,
88,
12,
62,
35,
96,
27,
96,
27,
99,
30,
21,
89,
54,
60,
37,
68,
35,
55,
80,
2,
33,
26,
93,
70,
45,
44,
3,
66,
75,
4,
};
NonnullRefPtr<SQL::BTree> setup_btree(SQL::Heap& heap);
void insert_and_get_to_and_from_btree(int num_keys);
void insert_into_and_scan_btree(int num_keys);
NonnullRefPtr<SQL::BTree> setup_btree(SQL::Heap& heap)
{
SQL::TupleDescriptor tuple_descriptor;
tuple_descriptor.append({ "key_value", SQL::SQLType::Integer, SQL::Order::Ascending });
auto root_pointer = heap.user_value(0);
if (!root_pointer) {
root_pointer = heap.new_record_pointer();
heap.set_user_value(0, root_pointer);
}
auto btree = SQL::BTree::construct(heap, tuple_descriptor, true, root_pointer);
btree->on_new_root = [&]() {
heap.set_user_value(0, btree->root());
};
return btree;
}
void insert_and_get_to_and_from_btree(int num_keys)
{
ScopeGuard guard([]() { unlink("test.db"); });
{
auto heap = SQL::Heap::construct("test.db");
auto btree = setup_btree(heap);
for (auto ix = 0; ix < num_keys; ix++) {
SQL::Key k(btree->descriptor());
k[0] = keys[ix];
k.set_pointer(pointers[ix]);
btree->insert(k);
}
#ifdef LIST_TREE
btree->list_tree();
#endif
}
{
auto heap = SQL::Heap::construct("test.db");
auto btree = setup_btree(heap);
for (auto ix = 0; ix < num_keys; ix++) {
SQL::Key k(btree->descriptor());
k[0] = keys[ix];
auto pointer_opt = btree->get(k);
EXPECT(pointer_opt.has_value());
EXPECT_EQ(pointer_opt.value(), pointers[ix]);
}
}
}
void insert_into_and_scan_btree(int num_keys)
{
ScopeGuard guard([]() { unlink("test.db"); });
{
auto heap = SQL::Heap::construct("test.db");
auto btree = setup_btree(heap);
for (auto ix = 0; ix < num_keys; ix++) {
SQL::Key k(btree->descriptor());
k[0] = keys[ix];
k.set_pointer(pointers[ix]);
btree->insert(k);
}
#ifdef LIST_TREE
btree->list_tree();
#endif
}
{
auto heap = SQL::Heap::construct("test.db");
auto btree = setup_btree(heap);
int count = 0;
SQL::Tuple prev;
for (auto iter = btree->begin(); !iter.is_end(); iter++, count++) {
auto key = (*iter);
if (prev.length()) {
EXPECT(prev < key);
}
auto key_value = (int)key[0];
for (auto ix = 0; ix < num_keys; ix++) {
if (keys[ix] == key_value) {
EXPECT_EQ(key.pointer(), pointers[ix]);
break;
}
}
prev = key;
}
EXPECT_EQ(count, num_keys);
}
}
TEST_CASE(btree_one_key)
{
insert_and_get_to_and_from_btree(1);
}
TEST_CASE(btree_four_keys)
{
insert_and_get_to_and_from_btree(4);
}
TEST_CASE(btree_five_keys)
{
insert_and_get_to_and_from_btree(5);
}
TEST_CASE(btree_10_keys)
{
insert_and_get_to_and_from_btree(10);
}
TEST_CASE(btree_13_keys)
{
insert_and_get_to_and_from_btree(13);
}
TEST_CASE(btree_20_keys)
{
insert_and_get_to_and_from_btree(20);
}
TEST_CASE(btree_25_keys)
{
insert_and_get_to_and_from_btree(25);
}
TEST_CASE(btree_30_keys)
{
insert_and_get_to_and_from_btree(30);
}
TEST_CASE(btree_35_keys)
{
insert_and_get_to_and_from_btree(35);
}
TEST_CASE(btree_40_keys)
{
insert_and_get_to_and_from_btree(40);
}
TEST_CASE(btree_45_keys)
{
insert_and_get_to_and_from_btree(45);
}
TEST_CASE(btree_50_keys)
{
insert_and_get_to_and_from_btree(50);
}
TEST_CASE(btree_scan_one_key)
{
insert_into_and_scan_btree(1);
}
TEST_CASE(btree_scan_four_keys)
{
insert_into_and_scan_btree(4);
}
TEST_CASE(btree_scan_five_keys)
{
insert_into_and_scan_btree(5);
}
TEST_CASE(btree_scan_10_keys)
{
insert_into_and_scan_btree(10);
}
TEST_CASE(btree_scan_15_keys)
{
insert_into_and_scan_btree(15);
}
TEST_CASE(btree_scan_30_keys)
{
insert_into_and_scan_btree(15);
}
TEST_CASE(btree_scan_50_keys)
{
insert_into_and_scan_btree(50);
}

View file

@ -0,0 +1,113 @@
/*
* Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Format.h>
#include <LibSQL/BTree.h>
#include <LibSQL/Meta.h>
namespace SQL {
BTree::BTree(Heap& heap, TupleDescriptor const& descriptor, bool unique, u32 pointer)
: Index(heap, descriptor, unique, pointer)
, m_root(nullptr)
{
}
BTree::BTree(Heap& heap, TupleDescriptor const& descriptor, u32 pointer)
: BTree(heap, descriptor, true, pointer)
{
}
BTreeIterator BTree::begin()
{
if (!m_root)
initialize_root();
VERIFY(m_root);
return BTreeIterator(m_root, -1);
}
BTreeIterator BTree::end()
{
return BTreeIterator(nullptr, -1);
}
void BTree::initialize_root()
{
if (pointer()) {
if (pointer() < heap().size()) {
auto buffer = read_block(pointer());
size_t offset = 0;
m_root = make<TreeNode>(*this, nullptr, pointer(), buffer, offset);
} else {
m_root = make<TreeNode>(*this, nullptr, pointer());
}
} else {
set_pointer(new_record_pointer());
m_root = make<TreeNode>(*this, nullptr, pointer());
if (on_new_root)
on_new_root();
}
}
TreeNode* BTree::new_root()
{
set_pointer(new_record_pointer());
m_root = make<TreeNode>(*this, nullptr, m_root.leak_ptr(), pointer());
add_to_write_ahead_log(m_root->as_index_node());
if (on_new_root)
on_new_root();
return m_root;
}
bool BTree::insert(Key const& key)
{
if (!m_root)
initialize_root();
VERIFY(m_root);
return m_root->insert(key);
}
bool BTree::update_key_pointer(Key const& key)
{
if (!m_root)
initialize_root();
VERIFY(m_root);
return m_root->update_key_pointer(key);
}
Optional<u32> BTree::get(Key& key)
{
if (!m_root)
initialize_root();
VERIFY(m_root);
return m_root->get(key);
}
BTreeIterator BTree::find(Key const& key)
{
if (!m_root)
initialize_root();
VERIFY(m_root);
for (auto node = m_root->node_for(key); node; node = node->up()) {
for (auto ix = 0u; ix < node->size(); ix++) {
auto match = (*node)[ix].match(key);
if (match == 0)
return BTreeIterator(node, (int)ix);
else if (match > 0)
return end();
}
}
return end();
}
void BTree::list_tree()
{
if (!m_root)
initialize_root();
m_root->list_node(0);
}
}

View file

@ -0,0 +1,205 @@
/*
* Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Function.h>
#include <AK/NonnullRefPtr.h>
#include <AK/NonnullRefPtrVector.h>
#include <AK/Optional.h>
#include <AK/RefPtr.h>
#include <AK/String.h>
#include <AK/Vector.h>
#include <LibCore/File.h>
#include <LibCore/Object.h>
#include <LibSQL/Forward.h>
#include <LibSQL/Heap.h>
#include <LibSQL/Index.h>
#include <LibSQL/Key.h>
namespace SQL {
/**
* The BTree class models a B-Tree index. It contains a collection of
* Key objects organized in TreeNode objects. Keys can be inserted,
* located, deleted, and the set can be traversed in sort order. All keys in
* a tree have the same underlying structure. A BTree's TreeNodes and
* the keys it includes are lazily loaded from the Heap when needed.
*
* The classes implementing the B-Tree functionality are BTree, TreeNode,
* BTreeIterator, and DownPointer (a smart pointer-like helper class).
*/
class DownPointer {
public:
explicit DownPointer(TreeNode*, u32 = 0);
DownPointer(TreeNode*, TreeNode*);
DownPointer(DownPointer const&);
DownPointer(TreeNode*, DownPointer&);
~DownPointer() = default;
[[nodiscard]] u32 pointer() const { return m_pointer; }
TreeNode* node();
private:
void inflate();
TreeNode* m_owner;
u32 m_pointer { 0 };
OwnPtr<TreeNode> m_node { nullptr };
friend TreeNode;
};
class TreeNode : public IndexNode {
public:
TreeNode(BTree&, TreeNode*, u32 = 0);
TreeNode(BTree&, TreeNode*, TreeNode*, u32 = 0);
TreeNode(BTree&, TreeNode*, u32 pointer, ByteBuffer&, size_t&);
~TreeNode() override = default;
[[nodiscard]] BTree& tree() const { return m_tree; }
[[nodiscard]] TreeNode* up() const { return m_up; }
[[nodiscard]] size_t size() const { return m_entries.size(); }
[[nodiscard]] Vector<Key> entries() const { return m_entries; }
[[nodiscard]] u32 down_pointer(size_t) const;
[[nodiscard]] TreeNode* down_node(size_t);
[[nodiscard]] bool is_leaf() const { return m_is_leaf; }
[[nodiscard]] size_t max_keys_in_node();
Key const& operator[](size_t) const;
bool insert(Key const&);
bool update_key_pointer(Key const&);
TreeNode* node_for(Key const&);
Optional<u32> get(Key&);
void serialize(ByteBuffer&) const override;
IndexNode* as_index_node() override { return dynamic_cast<IndexNode*>(this); }
private:
TreeNode(BTree&, TreeNode*, DownPointer&, u32 = 0);
void dump_if(int, String&& = "");
bool insert_in_leaf(Key const&);
void just_insert(Key const&, TreeNode* = nullptr);
void split();
void list_node(int);
BTree& m_tree;
TreeNode* m_up;
Vector<Key> m_entries;
bool m_is_leaf { true };
Vector<DownPointer> m_down;
friend BTree;
friend BTreeIterator;
};
class BTree : public Index {
C_OBJECT(BTree);
public:
~BTree() override = default;
u32 root() const { return (m_root) ? m_root->pointer() : 0; }
bool insert(Key const&);
bool update_key_pointer(Key const&);
Optional<u32> get(Key&);
BTreeIterator find(Key const& key);
BTreeIterator begin();
static BTreeIterator end();
void list_tree();
Function<void(void)> on_new_root;
private:
BTree(Heap& heap, TupleDescriptor const&, bool unique, u32 pointer);
BTree(Heap& heap, TupleDescriptor const&, u32 pointer);
void initialize_root();
TreeNode* new_root();
OwnPtr<TreeNode> m_root { nullptr };
friend BTreeIterator;
friend DownPointer;
friend TreeNode;
};
class BTreeIterator {
public:
[[nodiscard]] bool is_end() const { return m_where == Where::End; }
[[nodiscard]] size_t index() const { return m_index; }
bool update(Key const&);
bool operator==(BTreeIterator const& other) const { return cmp(other) == 0; }
bool operator!=(BTreeIterator const& other) const { return cmp(other) != 0; }
bool operator<(BTreeIterator const& other) const { return cmp(other) < 0; }
bool operator>(BTreeIterator const& other) const { return cmp(other) > 0; }
bool operator<=(BTreeIterator const& other) const { return cmp(other) <= 0; }
bool operator>=(BTreeIterator const& other) const { return cmp(other) >= 0; }
bool operator==(Key const& other) const { return cmp(other) == 0; }
bool operator!=(Key const& other) const { return cmp(other) != 0; }
bool operator<(Key const& other) const { return cmp(other) < 0; }
bool operator>(Key const& other) const { return cmp(other) > 0; }
bool operator<=(Key const& other) const { return cmp(other) <= 0; }
bool operator>=(Key const& other) const { return cmp(other) >= 0; }
BTreeIterator operator++()
{
*this = next();
return *this;
}
BTreeIterator operator++(int)
{
*this = next();
return *this;
}
BTreeIterator operator--()
{
*this = previous();
return *this;
}
BTreeIterator const operator--(int)
{
*this = previous();
return *this;
}
Key const& operator*() const
{
VERIFY(!is_end());
return (*m_current)[m_index];
}
Key const& operator->() const
{
VERIFY(!is_end());
return (*m_current)[m_index];
}
BTreeIterator& operator=(BTreeIterator const&);
BTreeIterator(BTreeIterator const&) = default;
private:
BTreeIterator(TreeNode*, int index);
static BTreeIterator end() { return BTreeIterator(nullptr, -1); }
[[nodiscard]] int cmp(BTreeIterator const&) const;
[[nodiscard]] int cmp(Key const&) const;
[[nodiscard]] BTreeIterator next() const;
[[nodiscard]] BTreeIterator previous() const;
[[nodiscard]] Key key() const;
enum class Where {
Valid,
End
};
Where m_where { Where::Valid };
TreeNode* m_current { nullptr };
int m_index { -1 };
friend BTree;
};
}

View file

@ -0,0 +1,249 @@
/*
* Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Format.h>
#include <LibSQL/BTree.h>
namespace SQL {
BTreeIterator::BTreeIterator(TreeNode* node, int index)
: m_current(node)
, m_index(index)
{
if (!node) {
m_where = Where::End;
} else {
if (index < 0) {
while (!node->is_leaf() && (node->size() != 0)) {
node = node->down_node(0);
}
if (node->size() == 0) {
m_where = Where::End;
m_current = nullptr;
m_index = -1;
} else {
m_where = Where::Valid;
m_current = node;
m_index = 0;
}
} else {
VERIFY(m_index < (int)m_current->size());
}
}
}
int BTreeIterator::cmp(BTreeIterator const& other) const
{
if (is_end())
return (other.is_end()) ? 0 : 1;
if (other.is_end())
return -1;
VERIFY(&other.m_current->tree() == &m_current->tree());
VERIFY((m_current->size() > 0) && (other.m_current->size() > 0));
if (&m_current != &other.m_current)
return (*m_current)[m_current->size() - 1].compare((*(other.m_current))[0]);
return (*m_current)[m_index].compare((*(other.m_current))[other.m_index]);
}
int BTreeIterator::cmp(Key const& other) const
{
if (is_end())
return 1;
if (other.is_null())
return -1;
return key().compare(other);
}
BTreeIterator BTreeIterator::next() const
{
if (is_end())
return end();
auto ix = m_index;
auto node = m_current;
if (ix < (int)(node->size() - 1)) {
if (node->is_leaf()) {
// We're in the middle of a leaf node. Next entry is
// is the next entry of the node:
return BTreeIterator(node, ix + 1);
} else {
/*
* We're in the middle of a non-leaf node. The iterator's
* next value is all the way down to the right, first entry.
*
* |
* +--+--+--+--+
* | |##| | |
* +--+--+--+--+
* / | | | \
* |
* +--+--+--+--+
* | | | | |
* +--+--+--+--+
* /
* +--+--+--+--+
* |++| | | |
* +--+--+--+--+
*/
ix++;
while (!node->is_leaf()) {
node = node->down_node(ix);
ix = 0;
}
}
VERIFY(node->is_leaf() && (ix < (int)node->size()));
return BTreeIterator(node, ix);
}
if (node->is_leaf()) {
// We currently at the last entry of a leaf node. We need to check
// one or more levels up until we end up in the "middle" of a node.
// If one level up we're still at the end of the node, we need
// to keep going up until we hit the root node. If we're at the
// end of the root node, we reached the end of the btree.
for (auto up = node->up(); up; up = node->up()) {
for (size_t i = 0; i < up->size(); i++) {
// One level up, try to find the entry with the current
// node's pointer as the left pointer:
if (up->down_pointer(i) == node->pointer())
// Found it. This is the iterator's next value:
return BTreeIterator(up, (int)i);
}
// We didn't find the m_current's pointer as a left node. So
// it must be the right node all the way at the end and we need
// to go one more level up:
node = up;
}
// We reached the root node and we're still at the end of the node.
// That means we're at the end of the btree.
return end();
}
// If we're at the end of a non-leaf node, we need to follow the
// right pointer down until we find a leaf:
TreeNode* down;
for (down = node->down_node(node->size()); !down->is_leaf(); down = down->down_node(0))
;
return BTreeIterator(down, 0);
}
// FIXME Reverse iterating doesn't quite work; we don't recognize the
// end (which is really the beginning) of the tree.
BTreeIterator BTreeIterator::previous() const
{
if (is_end()) {
return end();
}
auto node = m_current;
auto ix = m_index;
if (ix > 0) {
if (node->is_leaf()) {
// We're in the middle of a leaf node. Previous entry is
// is the previous entry of the node:
return BTreeIterator(node, ix - 1);
} else {
/*
* We're in the middle of a non-leaf node. The iterator's
* previous value is all the way down to the left, last entry.
*
* |
* +--+--+--+--+
* | | |##| |
* +--+--+--+--+
* / | | | \
* |
* +--+--+--+--+
* | | | | |
* +--+--+--+--+
* \
* +--+--+--+--+
* | | | |++|
* +--+--+--+--+
*/
while (!node->is_leaf()) {
node = node->down_node(ix);
ix = (int)node->size();
}
}
VERIFY(node->is_leaf() && (ix <= (int)node->size()));
return BTreeIterator(node, ix);
}
if (node->is_leaf()) {
// We currently at the first entry of a leaf node. We need to check one
// or more levels up until we end up in the "middle" of a node.
// If one level up we're still at the start of the node, we need
// to keep going up until we hit the root node. If we're at the
// start of the root node, we reached the start of the btree.
auto stash_current = node;
for (auto up = node->up(); up; up = node->up()) {
for (size_t i = up->size(); i > 0; i--) {
// One level up, try to find the entry with the current
// node's pointer as the right pointer:
if (up->down_pointer(i) == node->pointer()) {
// Found it. This is the iterator's next value:
node = up;
ix = (int)i - 1;
return BTreeIterator(node, ix);
}
}
// We didn't find the m_current's pointer as a right node. So
// it must be the left node all the way at the start and we need
// to go one more level up:
node = up;
}
// We reached the root node and we're still at the start of the node.
// That means we're at the start of the btree.
return BTreeIterator(stash_current, 0);
}
// If we're at the start of a non-leaf node, we need to follow the
// left pointer down until we find a leaf:
TreeNode* down = node->down_node(0);
while (!down->is_leaf())
down = down->down_node(down->size());
return BTreeIterator(down, down->size() - 1);
}
Key BTreeIterator::key() const
{
if (is_end())
return {};
return (*m_current)[m_index];
}
bool BTreeIterator::update(Key const& new_value)
{
if (is_end())
return false;
if ((cmp(new_value) == 0) && (key().pointer() == new_value.pointer()))
return true;
auto previous_iter = previous();
auto next_iter = next();
if (!m_current->tree().duplicates_allowed() && ((previous_iter == new_value) || (next_iter == new_value))) {
return false;
}
if ((previous_iter > new_value) || (next_iter < new_value))
return false;
// We are friend of BTree and TreeNode. Don't know how I feel about that.
m_current->m_entries[m_index] = new_value;
m_current->tree().add_to_write_ahead_log(m_current);
return true;
}
BTreeIterator& BTreeIterator::operator=(BTreeIterator const& other)
{
if (&other != this) {
m_current = other.m_current;
m_index = other.m_index;
m_where = other.m_where;
}
return *this;
}
}

View file

@ -1,8 +1,15 @@
set(SOURCES
BTree.cpp
BTreeIterator.cpp
Heap.cpp
Index.cpp
Key.cpp
Lexer.cpp
Meta.cpp
Parser.cpp
SyntaxHighlighter.cpp
Token.cpp
TreeNode.cpp
Tuple.cpp
Value.cpp
)

View file

@ -13,10 +13,13 @@ class ASTNode;
class BetweenExpression;
class BinaryOperatorExpression;
class BlobLiteral;
class BTree;
class BTreeIterator;
class CaseExpression;
class CastExpression;
class ChainedExpression;
class CollateExpression;
class ColumnDef;
class ColumnDefinition;
class ColumnNameExpression;
class CommonTableExpression;
@ -32,13 +35,19 @@ class ErrorStatement;
class ExistsExpression;
class Expression;
class GroupByClause;
class Heap;
class InChainedExpression;
class Index;
class IndexNode;
class IndexDef;
class InSelectionExpression;
class Insert;
class InTableExpression;
class InvertibleNestedDoubleExpression;
class InvertibleNestedExpression;
class IsExpression;
class Key;
class KeyPartDef;
class Lexer;
class LimitClause;
class MatchExpression;
@ -54,12 +63,15 @@ class RenameColumn;
class RenameTable;
class ResultColumn;
class ReturningClause;
class Row;
class Select;
class SignedNumber;
class Statement;
class StringLiteral;
class TableDef;
class TableOrSubquery;
class Token;
class TreeNode;
class Tuple;
class TypeName;
class UnaryOperatorExpression;

View file

@ -0,0 +1,219 @@
/*
* Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Format.h>
#include <AK/QuickSort.h>
#include <AK/String.h>
#include <LibCore/IODevice.h>
#include <LibSQL/Heap.h>
#include <LibSQL/Serialize.h>
#include <sys/stat.h>
#include <sys/types.h>
namespace SQL {
Heap::Heap(String file_name)
{
set_name(move(file_name));
size_t file_size = 0;
struct stat stat_buffer;
if (stat(name().characters(), &stat_buffer) != 0) {
if (errno != ENOENT) {
perror("stat");
VERIFY_NOT_REACHED();
}
} else {
file_size = stat_buffer.st_size;
}
if (file_size > 0)
m_next_block = m_end_of_file = file_size / BLOCKSIZE;
auto file_or_error = Core::File::open(name(), Core::OpenMode::ReadWrite);
if (file_or_error.is_error()) {
warnln("Couldn't open '{}': {}", name(), file_or_error.error());
VERIFY_NOT_REACHED();
}
m_file = file_or_error.value();
if (file_size > 0)
read_zero_block();
else
initialize_zero_block();
}
Result<ByteBuffer, String> Heap::read_block(u32 block)
{
auto buffer_or_empty = m_write_ahead_log.get(block);
if (buffer_or_empty.has_value())
return buffer_or_empty.value();
VERIFY(block < m_next_block);
dbgln_if(SQL_DEBUG, "Read heap block {}", block);
if (!seek_block(block))
VERIFY_NOT_REACHED();
auto ret = m_file->read(BLOCKSIZE);
if (ret.is_empty())
return String("Could not read block");
return ret;
}
bool Heap::write_block(u32 block, ByteBuffer& buffer)
{
VERIFY(block < m_next_block);
if (!seek_block(block))
VERIFY_NOT_REACHED();
dbgln_if(SQL_DEBUG, "Write heap block {} size {}", block, buffer.size());
VERIFY(buffer.size() <= BLOCKSIZE);
auto sz = buffer.size();
if (sz < BLOCKSIZE) {
buffer.resize(BLOCKSIZE);
memset(buffer.offset_pointer((int)sz), 0, BLOCKSIZE - sz);
}
if (m_file->write(buffer.data(), (int)buffer.size())) {
if (block == m_end_of_file)
m_end_of_file++;
return true;
}
return false;
}
bool Heap::seek_block(u32 block)
{
if (block == m_end_of_file) {
off_t pos;
if (!m_file->seek(0, Core::SeekMode::FromEndPosition, &pos)) {
warnln("Could not seek block {} from file {}, which is at the end of the file", block, name());
warnln("FD: {} Position: {} error: {}", m_file->fd(), pos, m_file->error_string());
return false;
}
} else if (block > m_end_of_file) {
warnln("Seeking block {} of file {} which is beyond the end of the file", block, name());
return false;
} else {
if (!m_file->seek(block * BLOCKSIZE)) {
warnln("Could not seek block {} of file {}. The current size is {} blocks",
block, name(), m_end_of_file);
return false;
}
}
return true;
}
u32 Heap::new_record_pointer()
{
if (m_free_list) {
auto block_or_error = read_block(m_free_list);
if (block_or_error.is_error()) {
warnln("FREE LIST CORRUPTION");
VERIFY_NOT_REACHED();
}
auto new_pointer = m_free_list;
size_t offset = 0;
deserialize_from<u32>(block_or_error.value(), offset, m_free_list);
update_zero_block();
return new_pointer;
}
return m_next_block++;
}
void Heap::flush()
{
Vector<u32> blocks;
for (auto& wal_entry : m_write_ahead_log) {
blocks.append(wal_entry.key);
}
quick_sort(blocks);
for (auto& block : blocks) {
auto buffer_or_empty = m_write_ahead_log.get(block);
if (buffer_or_empty->is_empty()) {
VERIFY_NOT_REACHED();
}
dbgln_if(SQL_DEBUG, "Flushing block {} to {}", block, name());
write_block(block, buffer_or_empty.value());
}
m_write_ahead_log.clear();
}
constexpr static const char* FILE_ID = "SerenitySQL ";
constexpr static int VERSION_OFFSET = 12;
constexpr static int SCHEMAS_ROOT_OFFSET = 16;
constexpr static int TABLES_ROOT_OFFSET = 20;
constexpr static int TABLE_COLUMNS_ROOT_OFFSET = 24;
constexpr static int FREE_LIST_OFFSET = 28;
constexpr static int USER_VALUES_OFFSET = 32;
void Heap::read_zero_block()
{
char file_id[256];
auto bytes_or_error = read_block(0);
if (bytes_or_error.is_error())
VERIFY_NOT_REACHED();
auto buffer = bytes_or_error.value();
memcpy(file_id, buffer.offset_pointer(0), strlen(FILE_ID));
file_id[strlen(FILE_ID)] = 0;
if (strncmp(file_id, FILE_ID, strlen(FILE_ID)) != 0) {
warnln("Corrupt zero page in {}", name());
VERIFY_NOT_REACHED();
}
dbgln_if(SQL_DEBUG, "Read zero block from {}", name());
memcpy(&m_version, buffer.offset_pointer(VERSION_OFFSET), sizeof(u32));
dbgln_if(SQL_DEBUG, "Version: {}.{}", (m_version & 0xFFFF0000) >> 16, (m_version & 0x0000FFFF));
memcpy(&m_schemas_root, buffer.offset_pointer(SCHEMAS_ROOT_OFFSET), sizeof(u32));
dbgln_if(SQL_DEBUG, "Schemas root node: {}", m_tables_root);
memcpy(&m_tables_root, buffer.offset_pointer(TABLES_ROOT_OFFSET), sizeof(u32));
dbgln_if(SQL_DEBUG, "Tables root node: {}", m_tables_root);
memcpy(&m_table_columns_root, buffer.offset_pointer(TABLE_COLUMNS_ROOT_OFFSET), sizeof(u32));
dbgln_if(SQL_DEBUG, "Table columns root node: {}", m_table_columns_root);
memcpy(&m_free_list, buffer.offset_pointer(FREE_LIST_OFFSET), sizeof(u32));
dbgln_if(SQL_DEBUG, "Free list: {}", m_free_list);
memcpy(m_user_values.data(), buffer.offset_pointer(USER_VALUES_OFFSET), m_user_values.size() * sizeof(u32));
for (auto ix = 0u; ix < m_user_values.size(); ix++) {
if (m_user_values[ix]) {
dbgln_if(SQL_DEBUG, "User value {}: {}", ix, m_user_values[ix]);
}
}
}
void Heap::update_zero_block()
{
dbgln_if(SQL_DEBUG, "Write zero block to {}", name());
dbgln_if(SQL_DEBUG, "Version: {}.{}", (m_version & 0xFFFF0000) >> 16, (m_version & 0x0000FFFF));
dbgln_if(SQL_DEBUG, "Schemas root node: {}", m_schemas_root);
dbgln_if(SQL_DEBUG, "Tables root node: {}", m_tables_root);
dbgln_if(SQL_DEBUG, "Table Columns root node: {}", m_table_columns_root);
dbgln_if(SQL_DEBUG, "Free list: {}", m_free_list);
for (auto ix = 0u; ix < m_user_values.size(); ix++) {
if (m_user_values[ix]) {
dbgln_if(SQL_DEBUG, "User value {}: {}", ix, m_user_values[ix]);
}
}
auto buffer = ByteBuffer::create_zeroed(BLOCKSIZE);
buffer.overwrite(0, FILE_ID, strlen(FILE_ID));
buffer.overwrite(VERSION_OFFSET, &m_version, sizeof(u32));
buffer.overwrite(SCHEMAS_ROOT_OFFSET, &m_schemas_root, sizeof(u32));
buffer.overwrite(TABLES_ROOT_OFFSET, &m_tables_root, sizeof(u32));
buffer.overwrite(TABLE_COLUMNS_ROOT_OFFSET, &m_table_columns_root, sizeof(u32));
buffer.overwrite(FREE_LIST_OFFSET, &m_free_list, sizeof(u32));
buffer.overwrite(USER_VALUES_OFFSET, m_user_values.data(), m_user_values.size() * sizeof(u32));
add_to_wal(0, buffer);
}
void Heap::initialize_zero_block()
{
m_version = 0x00000001;
m_schemas_root = 0;
m_tables_root = 0;
m_table_columns_root = 0;
m_next_block = 1;
m_free_list = 0;
for (auto& user : m_user_values) {
user = 0u;
}
update_zero_block();
}
}

View file

@ -0,0 +1,105 @@
/*
* Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Debug.h>
#include <AK/HashMap.h>
#include <AK/String.h>
#include <AK/Vector.h>
#include <LibCore/File.h>
#include <LibCore/Object.h>
#include <LibSQL/Meta.h>
#include <LibSQL/Serialize.h>
namespace SQL {
constexpr static u32 BLOCKSIZE = 1024;
/**
* A Heap is a logical container for database (SQL) data. Conceptually a
* Heap can be a database file, or a memory block, or another storage medium.
* It contains datastructures, like B-Trees, hash_index tables, or tuple stores
* (basically a list of data tuples).
*
* A Heap can be thought of the backing storage of a single database. It's
* assumed that a single SQL database is backed by a single Heap.
*
* Currently only B-Trees and tuple stores are implemented.
*/
class Heap : public Core::Object {
C_OBJECT(Heap);
public:
explicit Heap(String);
virtual ~Heap() override { flush(); }
u32 size() const { return m_end_of_file; }
Result<ByteBuffer, String> read_block(u32);
bool write_block(u32, ByteBuffer&);
u32 new_record_pointer();
[[nodiscard]] bool has_block(u32 block) const { return block < size(); }
u32 schemas_root() const { return m_schemas_root; }
void set_schemas_root(u32 root)
{
m_schemas_root = root;
update_zero_block();
}
u32 tables_root() const { return m_tables_root; }
void set_tables_root(u32 root)
{
m_tables_root = root;
update_zero_block();
}
u32 table_columns_root() const { return m_table_columns_root; }
void set_table_columns_root(u32 root)
{
m_table_columns_root = root;
update_zero_block();
}
u32 version() const { return m_version; }
u32 user_value(size_t index) const
{
VERIFY(index < m_user_values.size());
return m_user_values[index];
}
void set_user_value(size_t index, u32 value)
{
VERIFY(index < m_user_values.size());
m_user_values[index] = value;
update_zero_block();
}
void add_to_wal(u32 block, ByteBuffer& buffer) { m_write_ahead_log.set(block, buffer); }
void flush();
private:
bool seek_block(u32);
void read_zero_block();
void initialize_zero_block();
void update_zero_block();
RefPtr<Core::File> m_file;
u32 m_free_list { 0 };
u32 m_next_block { 1 };
u32 m_end_of_file { 1 };
u32 m_schemas_root { 0 };
u32 m_tables_root { 0 };
u32 m_table_columns_root { 0 };
u32 m_version { 0x00000001 };
Array<u32, 16> m_user_values;
HashMap<u32, ByteBuffer> m_write_ahead_log;
};
}

View file

@ -0,0 +1,46 @@
/*
* Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibSQL/Heap.h>
#include <LibSQL/Index.h>
#include <LibSQL/Meta.h>
namespace SQL {
Index::Index(Heap& heap, TupleDescriptor const& descriptor, bool unique, u32 pointer)
: m_heap(heap)
, m_descriptor(descriptor)
, m_unique(unique)
, m_pointer(pointer)
{
}
Index::Index(Heap& heap, TupleDescriptor const& descriptor, u32 pointer)
: m_heap(heap)
, m_descriptor(descriptor)
, m_pointer(pointer)
{
}
ByteBuffer Index::read_block(u32 block)
{
auto ret = m_heap.read_block(block);
if (ret.is_error()) {
warnln("Error reading block {}: {}", block, ret.error());
VERIFY_NOT_REACHED();
}
return ret.value();
}
void Index::add_to_write_ahead_log(IndexNode* node)
{
VERIFY(node->pointer());
ByteBuffer buffer;
node->serialize(buffer);
m_heap.add_to_wal(node->pointer(), buffer);
}
}

View file

@ -0,0 +1,63 @@
/*
* Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <LibCore/Object.h>
#include <LibSQL/Forward.h>
#include <LibSQL/Meta.h>
namespace SQL {
class IndexNode {
public:
virtual ~IndexNode() = default;
[[nodiscard]] u32 pointer() const { return m_pointer; }
virtual void serialize(ByteBuffer&) const = 0;
virtual IndexNode* as_index_node() = 0;
protected:
explicit IndexNode(u32 pointer)
: m_pointer(pointer)
{
}
void set_pointer(u32 pointer) { m_pointer = pointer; }
private:
u32 m_pointer;
};
class Index : public Core::Object {
C_OBJECT_ABSTRACT(Index);
public:
~Index() override = default;
TupleDescriptor descriptor() const { return m_descriptor; }
[[nodiscard]] bool duplicates_allowed() const { return !m_unique; }
[[nodiscard]] bool unique() const { return m_unique; }
[[nodiscard]] u32 pointer() const { return m_pointer; }
protected:
Index(Heap& heap, TupleDescriptor const&, bool unique, u32 pointer);
Index(Heap& heap, TupleDescriptor const&, u32 pointer);
[[nodiscard]] Heap const& heap() const { return m_heap; }
[[nodiscard]] Heap& heap() { return m_heap; }
void set_pointer(u32 pointer) { m_pointer = pointer; }
u32 new_record_pointer() { return m_heap.new_record_pointer(); }
ByteBuffer read_block(u32);
void add_to_write_ahead_log(IndexNode*);
private:
Heap& m_heap;
TupleDescriptor m_descriptor;
bool m_unique { false };
u32 m_pointer { 0 };
};
}

View file

@ -0,0 +1,39 @@
/*
* Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibSQL/Key.h>
#include <LibSQL/Meta.h>
namespace SQL {
Key::Key()
: Tuple()
{
}
Key::Key(TupleDescriptor const& descriptor)
: Tuple(descriptor)
{
}
Key::Key(RefPtr<IndexDef> index)
: Tuple(index->to_tuple_descriptor())
, m_index(index)
{
}
Key::Key(TupleDescriptor const& descriptor, ByteBuffer& buffer, size_t& offset)
: Tuple(descriptor, buffer, offset)
{
}
Key::Key(RefPtr<IndexDef> index, ByteBuffer& buffer, size_t& offset)
: Key(index->to_tuple_descriptor())
{
deserialize(buffer, offset);
}
}

View file

@ -0,0 +1,30 @@
/*
* Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/RefPtr.h>
#include <LibSQL/Forward.h>
#include <LibSQL/Tuple.h>
namespace SQL {
class Key : public Tuple {
public:
Key();
explicit Key(TupleDescriptor const&);
explicit Key(RefPtr<IndexDef>);
Key(TupleDescriptor const&, ByteBuffer&, size_t& offset);
Key(RefPtr<IndexDef>, ByteBuffer&, size_t& offset);
Key(Key const&) = default;
RefPtr<IndexDef> index() const { return m_index; }
[[nodiscard]] virtual size_t data_length() const override { return Tuple::data_length() + sizeof(u32); }
private:
RefPtr<IndexDef> m_index;
};
}

View file

@ -0,0 +1,201 @@
/*
* Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibSQL/Key.h>
#include <LibSQL/Meta.h>
#include <LibSQL/Type.h>
namespace SQL {
SchemaDef::SchemaDef(String name)
: Relation(move(name))
{
}
SchemaDef::SchemaDef(Key const& key)
: Relation(key["schema_name"].to_string().value())
{
}
Key SchemaDef::key() const
{
auto key = Key(index_def()->to_tuple_descriptor());
key["schema_name"] = name();
key.set_pointer(pointer());
return key;
}
Key SchemaDef::make_key()
{
return Key(index_def());
}
NonnullRefPtr<IndexDef> SchemaDef::index_def()
{
NonnullRefPtr<IndexDef> s_index_def = IndexDef::construct("$schema", true, 0);
if (!s_index_def->size()) {
s_index_def->append_column("schema_name", SQLType::Text, Order::Ascending);
}
return s_index_def;
}
ColumnDef::ColumnDef(Relation* parent, size_t column_number, String name, SQLType sql_type)
: Relation(move(name), parent)
, m_index(column_number)
, m_type(sql_type)
{
}
Key ColumnDef::key() const
{
auto key = Key(index_def());
key["table_hash"] = parent_relation()->hash();
key["column_number"] = (int)column_number();
key["column_name"] = name();
key["column_type"] = (int)type();
return key;
}
Key ColumnDef::make_key(TableDef const& table_def)
{
Key key(index_def());
key["table_hash"] = table_def.key().hash();
return key;
}
NonnullRefPtr<IndexDef> ColumnDef::index_def()
{
NonnullRefPtr<IndexDef> s_index_def = IndexDef::construct("$column", true, 0);
if (!s_index_def->size()) {
s_index_def->append_column("table_hash", SQLType::Integer, Order::Ascending);
s_index_def->append_column("column_number", SQLType::Integer, Order::Ascending);
s_index_def->append_column("column_name", SQLType::Text, Order::Ascending);
s_index_def->append_column("column_type", SQLType::Integer, Order::Ascending);
}
return s_index_def;
}
KeyPartDef::KeyPartDef(IndexDef* index, String name, SQLType sql_type, Order sort_order)
: ColumnDef(index, index->size(), move(name), sql_type)
, m_sort_order(sort_order)
{
}
IndexDef::IndexDef(TableDef* table, String name, bool unique, u32 pointer)
: Relation(move(name), pointer, table)
, m_key_definition()
, m_unique(unique)
{
}
IndexDef::IndexDef(String name, bool unique, u32 pointer)
: IndexDef(nullptr, move(name), unique, pointer)
{
}
void IndexDef::append_column(String name, SQLType sql_type, Order sort_order)
{
auto part = KeyPartDef::construct(this, move(name), sql_type, sort_order);
m_key_definition.append(part);
}
TupleDescriptor IndexDef::to_tuple_descriptor() const
{
TupleDescriptor ret;
for (auto& part : m_key_definition) {
ret.append({ part.name(), part.type(), part.sort_order() });
}
return ret;
}
Key IndexDef::key() const
{
auto key = Key(index_def()->to_tuple_descriptor());
key["table_hash"] = parent_relation()->key().hash();
key["index_name"] = name();
key["unique"] = unique() ? 1 : 0;
return key;
}
Key IndexDef::make_key(TableDef const& table_def)
{
Key key(index_def());
key["table_hash"] = table_def.key().hash();
return key;
}
NonnullRefPtr<IndexDef> IndexDef::index_def()
{
NonnullRefPtr<IndexDef> s_index_def = IndexDef::construct("$index", true, 0);
if (!s_index_def->size()) {
s_index_def->append_column("table_hash", SQLType::Integer, Order::Ascending);
s_index_def->append_column("index_name", SQLType::Text, Order::Ascending);
s_index_def->append_column("unique", SQLType::Integer, Order::Ascending);
}
return s_index_def;
}
TableDef::TableDef(SchemaDef* schema, String name)
: Relation(move(name), schema)
, m_columns()
, m_indexes()
{
}
TupleDescriptor TableDef::to_tuple_descriptor() const
{
TupleDescriptor ret;
for (auto& part : m_columns) {
ret.append({ part.name(), part.type(), Order::Ascending });
}
return ret;
}
Key TableDef::key() const
{
auto key = Key(index_def()->to_tuple_descriptor());
key["schema_hash"] = parent_relation()->key().hash();
key["table_name"] = name();
key.set_pointer(pointer());
return key;
}
void TableDef::append_column(String name, SQLType sql_type)
{
auto column = ColumnDef::construct(this, num_columns(), move(name), sql_type);
m_columns.append(column);
}
void TableDef::append_column(Key const& column)
{
append_column(
(String)column["column_name"],
(SQLType)((int)column["column_type"]));
}
Key TableDef::make_key(SchemaDef const& schema_def)
{
return TableDef::make_key(schema_def.key());
}
Key TableDef::make_key(Key const& schema_key)
{
Key key(index_def());
key["schema_hash"] = schema_key.hash();
return key;
}
NonnullRefPtr<IndexDef> TableDef::index_def()
{
NonnullRefPtr<IndexDef> s_index_def = IndexDef::construct("$table", true, 0);
if (!s_index_def->size()) {
s_index_def->append_column("schema_hash", SQLType::Integer, Order::Ascending);
s_index_def->append_column("table_name", SQLType::Text, Order::Ascending);
}
return s_index_def;
}
}

View file

@ -0,0 +1,149 @@
/*
* Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/NonnullOwnPtr.h>
#include <AK/NonnullOwnPtrVector.h>
#include <AK/NonnullRefPtr.h>
#include <AK/Result.h>
#include <AK/String.h>
#include <AK/Vector.h>
#include <LibCore/Object.h>
#include <LibSQL/AST.h>
#include <LibSQL/Forward.h>
#include <LibSQL/Key.h>
#include <LibSQL/Type.h>
namespace SQL {
/**
* This file declares objects describing tables, indexes, and columns.
* It remains to be seen if this will survive in it's current form.
*/
class Relation : public Core::Object {
C_OBJECT_ABSTRACT(Relation);
public:
u32 hash() const { return key().hash(); }
u32 pointer() const { return m_pointer; }
void set_pointer(u32 pointer) { m_pointer = pointer; }
~Relation() override = default;
virtual Key key() const = 0;
Relation const* parent_relation() const { return dynamic_cast<Relation const*>(parent()); }
protected:
Relation(String name, u32 pointer, Relation* parent = nullptr)
: Core::Object(parent)
, m_pointer(pointer)
{
set_name(move(name));
}
explicit Relation(String name, Relation* parent = nullptr)
: Core::Object(parent)
, m_pointer(0)
{
set_name(move(name));
}
private:
u32 m_pointer { 0 };
};
class SchemaDef : public Relation {
C_OBJECT(SchemaDef);
public:
Key key() const override;
static NonnullRefPtr<IndexDef> index_def();
static Key make_key();
private:
explicit SchemaDef(String);
explicit SchemaDef(Key const&);
};
class ColumnDef : public Relation {
C_OBJECT(ColumnDef);
public:
Key key() const override;
SQLType type() const { return m_type; }
size_t column_number() const { return m_index; }
static NonnullRefPtr<IndexDef> index_def();
static Key make_key(TableDef const&);
protected:
ColumnDef(Relation*, size_t, String, SQLType);
private:
size_t m_index;
SQLType m_type { SQLType::Text };
};
class KeyPartDef : public ColumnDef {
C_OBJECT(KeyPartDef);
public:
KeyPartDef(IndexDef*, String, SQLType, Order = Order::Ascending);
Order sort_order() const { return m_sort_order; }
private:
Order m_sort_order { Order::Ascending };
};
class IndexDef : public Relation {
C_OBJECT(IndexDef);
public:
~IndexDef() override = default;
NonnullRefPtrVector<KeyPartDef> key_definition() const { return m_key_definition; }
bool unique() const { return m_unique; }
[[nodiscard]] size_t size() const { return m_key_definition.size(); }
void append_column(String, SQLType, Order = Order::Ascending);
Key key() const override;
[[nodiscard]] TupleDescriptor to_tuple_descriptor() const;
static NonnullRefPtr<IndexDef> index_def();
static Key make_key(TableDef const& table_def);
private:
IndexDef(TableDef*, String, bool unique = true, u32 pointer = 0);
explicit IndexDef(String, bool unique = true, u32 pointer = 0);
NonnullRefPtrVector<KeyPartDef> m_key_definition;
bool m_unique { false };
friend TableDef;
};
class TableDef : public Relation {
C_OBJECT(TableDef);
public:
Key key() const override;
void append_column(String, SQLType);
void append_column(Key const&);
size_t num_columns() { return m_columns.size(); }
size_t num_indexes() { return m_indexes.size(); }
NonnullRefPtrVector<ColumnDef> columns() const { return m_columns; }
NonnullRefPtrVector<IndexDef> indexes() const { return m_indexes; }
[[nodiscard]] TupleDescriptor to_tuple_descriptor() const;
static NonnullRefPtr<IndexDef> index_def();
static Key make_key(SchemaDef const& schema_def);
static Key make_key(Key const& schema_key);
private:
explicit TableDef(SchemaDef*, String);
NonnullRefPtrVector<ColumnDef> m_columns;
NonnullRefPtrVector<IndexDef> m_indexes;
};
}

View file

@ -0,0 +1,404 @@
/*
* Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Debug.h>
#include <AK/Format.h>
#include <AK/NonnullOwnPtr.h>
#include <AK/StringBuilder.h>
#include <LibSQL/BTree.h>
#include <LibSQL/Serialize.h>
namespace SQL {
DownPointer::DownPointer(TreeNode* owner, u32 pointer)
: m_owner(owner)
, m_pointer(pointer)
, m_node(nullptr)
{
}
DownPointer::DownPointer(TreeNode* owner, TreeNode* node)
: m_owner(owner)
, m_pointer((node) ? node->pointer() : 0)
, m_node(adopt_own_if_nonnull(node))
{
}
DownPointer::DownPointer(TreeNode* owner, DownPointer& down)
: m_owner(owner)
, m_pointer(down.m_pointer)
, m_node(move(down.m_node))
{
}
DownPointer::DownPointer(DownPointer const& other)
: m_owner(other.m_owner)
, m_pointer(other.pointer())
{
if (other.m_node)
// FIXME This is gross. We modify the other object which we promised
// to be const. However, this particular constructor is needed
// when we take DownPointers from the Vector they live in when
// we split a node. The original object is going to go away, so
// there is no harm done. However, it's yucky. If anybody has
// a better idea...
m_node = move(const_cast<DownPointer&>(other).m_node);
else
m_node = nullptr;
}
TreeNode* DownPointer::node()
{
if (!m_node)
inflate();
return m_node;
}
void DownPointer::inflate()
{
if (m_node || !m_pointer)
return;
auto buffer = m_owner->tree().read_block(m_pointer);
size_t offset = 0;
m_node = make<TreeNode>(m_owner->tree(), m_owner, m_pointer, buffer, offset);
}
TreeNode::TreeNode(BTree& tree, TreeNode* up, u32 pointer)
: IndexNode(pointer)
, m_tree(tree)
, m_up(up)
, m_entries()
, m_down()
{
m_down.append(DownPointer(this, nullptr));
m_is_leaf = true;
}
TreeNode::TreeNode(BTree& tree, TreeNode* up, DownPointer& left, u32 pointer)
: IndexNode(pointer)
, m_tree(tree)
, m_up(up)
, m_entries()
, m_down()
{
if (left.m_node != nullptr)
left.m_node->m_up = this;
m_down.append(DownPointer(this, left));
m_is_leaf = left.pointer() == 0;
if (!pointer)
set_pointer(m_tree.new_record_pointer());
}
TreeNode::TreeNode(BTree& tree, TreeNode* up, TreeNode* left, u32 pointer)
: IndexNode(pointer)
, m_tree(tree)
, m_up(up)
, m_entries()
, m_down()
{
m_down.append(DownPointer(this, left));
m_is_leaf = left->pointer() == 0;
}
TreeNode::TreeNode(BTree& tree, TreeNode* up, u32 pointer, ByteBuffer& buffer, size_t& at_offset)
: IndexNode(pointer)
, m_tree(tree)
, m_up(up)
, m_entries()
, m_down()
{
u32 nodes;
deserialize_from<u32>(buffer, at_offset, nodes);
dbgln_if(SQL_DEBUG, "Deserializing node. Size {}", nodes);
if (nodes > 0) {
for (u32 i = 0; i < nodes; i++) {
u32 left;
deserialize_from<u32>(buffer, at_offset, left);
dbgln_if(SQL_DEBUG, "Down[{}] {}", i, left);
if (!m_down.is_empty())
VERIFY((left == 0) == m_is_leaf);
else
m_is_leaf = (left == 0);
m_entries.append(Key(m_tree.descriptor(), buffer, at_offset));
m_down.empend(this, left);
}
u32 right;
deserialize_from<u32>(buffer, at_offset, right);
dbgln_if(SQL_DEBUG, "Right {}", right);
VERIFY((right == 0) == m_is_leaf);
m_down.empend(this, right);
}
}
bool TreeNode::insert(Key const& key)
{
dbgln_if(SQL_DEBUG, "[#{}] INSERT({})", pointer(), key.to_string());
if (!is_leaf())
return node_for(key)->insert_in_leaf(key);
return insert_in_leaf(key);
}
bool TreeNode::update_key_pointer(Key const& key)
{
dbgln_if(SQL_DEBUG, "[#{}] UPDATE({}, {})", pointer(), key.to_string(), key.pointer());
if (!is_leaf())
return node_for(key)->update_key_pointer(key);
for (auto ix = 0u; ix < size(); ix++) {
if (key == m_entries[ix]) {
dbgln_if(SQL_DEBUG, "[#{}] {} == {}",
pointer(), key.to_string(), m_entries[ix].to_string());
if (m_entries[ix].pointer() != key.pointer()) {
m_entries[ix].set_pointer(key.pointer());
dump_if(SQL_DEBUG, "To WAL");
tree().add_to_write_ahead_log(this);
}
return true;
}
}
return false;
}
bool TreeNode::insert_in_leaf(Key const& key)
{
VERIFY(is_leaf());
if (!m_tree.duplicates_allowed()) {
for (auto& entry : m_entries) {
if (key == entry) {
dbgln_if(SQL_DEBUG, "[#{}] duplicate key {}", pointer(), key.to_string());
return false;
}
}
}
dbgln_if(SQL_DEBUG, "[#{}] insert_in_leaf({})", pointer(), key.to_string());
just_insert(key, nullptr);
return true;
}
size_t TreeNode::max_keys_in_node()
{
auto descriptor = m_tree.descriptor();
auto key_size = descriptor.data_length() + sizeof(u32);
auto ret = (BLOCKSIZE - 2 * sizeof(u32)) / key_size;
if ((ret % 2) == 0)
--ret;
return ret;
}
Key const& TreeNode::operator[](size_t ix) const
{
VERIFY(ix < size());
return m_entries[ix];
}
u32 TreeNode::down_pointer(size_t ix) const
{
VERIFY(ix < m_down.size());
return m_down[ix].pointer();
}
TreeNode* TreeNode::down_node(size_t ix)
{
VERIFY(ix < m_down.size());
return m_down[ix].node();
}
TreeNode* TreeNode::node_for(Key const& key)
{
dump_if(SQL_DEBUG, String::formatted("node_for(Key {})", key.to_string()));
if (is_leaf())
return this;
for (size_t ix = 0; ix < size(); ix++) {
if (key < m_entries[ix]) {
dbgln_if(SQL_DEBUG, "[{}] {} < {} v{}",
pointer(), (String)key, (String)m_entries[ix], m_down[ix].pointer());
return down_node(ix)->node_for(key);
}
}
dbgln_if(SQL_DEBUG, "[#{}] {} >= {} v{}",
pointer(), key.to_string(), (String)m_entries[size() - 1], m_down[size()].pointer());
return down_node(size())->node_for(key);
}
Optional<u32> TreeNode::get(Key& key)
{
dump_if(SQL_DEBUG, String::formatted("get({})", key.to_string()));
for (auto ix = 0u; ix < size(); ix++) {
if (key < m_entries[ix]) {
if (is_leaf()) {
dbgln_if(SQL_DEBUG, "[#{}] {} < {} -> 0",
pointer(), key.to_string(), (String)m_entries[ix]);
return {};
} else {
dbgln_if(SQL_DEBUG, "[{}] {} < {} ({} -> {})",
pointer(), key.to_string(), (String)m_entries[ix],
ix, m_down[ix].pointer());
return down_node(ix)->get(key);
}
}
if (key == m_entries[ix]) {
dbgln_if(SQL_DEBUG, "[#{}] {} == {} -> {}",
pointer(), key.to_string(), (String)m_entries[ix],
m_entries[ix].pointer());
key.set_pointer(m_entries[ix].pointer());
return m_entries[ix].pointer();
}
}
if (m_entries.is_empty()) {
dbgln_if(SQL_DEBUG, "[#{}] {} Empty node??", pointer(), key.to_string());
VERIFY_NOT_REACHED();
}
if (is_leaf()) {
dbgln_if(SQL_DEBUG, "[#{}] {} > {} -> 0",
pointer(), key.to_string(), (String)m_entries[size() - 1]);
return {};
}
dbgln_if(SQL_DEBUG, "[#{}] {} > {} ({} -> {})",
pointer(), key.to_string(), (String)m_entries[size() - 1],
size(), m_down[size()].pointer());
return down_node(size())->get(key);
}
void TreeNode::serialize(ByteBuffer& buffer) const
{
u32 sz = size();
serialize_to<u32>(buffer, sz);
if (sz > 0) {
for (auto ix = 0u; ix < size(); ix++) {
auto& entry = m_entries[ix];
dbgln_if(SQL_DEBUG, "Serializing Left[{}] = {}", ix, m_down[ix].pointer());
serialize_to<u32>(buffer, is_leaf() ? 0u : m_down[ix].pointer());
entry.serialize(buffer);
}
dbgln_if(SQL_DEBUG, "Serializing Right = {}", m_down[size()].pointer());
serialize_to<u32>(buffer, is_leaf() ? 0u : m_down[size()].pointer());
}
}
void TreeNode::just_insert(Key const& key, TreeNode* right)
{
dbgln_if(SQL_DEBUG, "[#{}] just_insert({}, right = {})",
pointer(), (String)key, (right) ? right->pointer() : 0);
dump_if(SQL_DEBUG, "Before");
for (auto ix = 0u; ix < size(); ix++) {
if (key < m_entries[ix]) {
m_entries.insert(ix, key);
VERIFY(is_leaf() == (right == nullptr));
m_down.insert(ix + 1, DownPointer(this, right));
if (size() > max_keys_in_node()) {
split();
} else {
dump_if(SQL_DEBUG, "To WAL");
tree().add_to_write_ahead_log(this);
}
return;
}
}
m_entries.append(key);
m_down.empend(this, right);
if (size() > max_keys_in_node()) {
split();
} else {
dump_if(SQL_DEBUG, "To WAL");
tree().add_to_write_ahead_log(this);
}
}
void TreeNode::split()
{
dump_if(SQL_DEBUG, "Splitting node");
if (!m_up)
// Make new m_up. This is the new root node.
m_up = m_tree.new_root();
// Take the left pointer for the new node:
DownPointer left = m_down.take(max_keys_in_node() / 2 + 1);
// Create the new right node:
auto* new_node = new TreeNode(tree(), m_up, left);
// Move the rightmost keys from this node to the new right node:
while (m_entries.size() > max_keys_in_node() / 2 + 1) {
auto entry = m_entries.take(max_keys_in_node() / 2 + 1);
auto down = m_down.take(max_keys_in_node() / 2 + 1);
// Reparent to new right node:
if (down.m_node != nullptr) {
down.m_node->m_up = new_node;
}
new_node->m_entries.append(entry);
new_node->m_down.append(down);
}
// Move the median key in the node one level up. Its right node will
// be the new node:
auto median = m_entries.take_last();
dump_if(SQL_DEBUG, "Split Left To WAL");
tree().add_to_write_ahead_log(this);
new_node->dump_if(SQL_DEBUG, "Split Right to WAL");
tree().add_to_write_ahead_log(new_node);
m_up->just_insert(median, new_node);
}
void TreeNode::dump_if(int flag, String&& msg)
{
if (!flag)
return;
StringBuilder builder;
builder.appendff("[#{}] ", pointer());
if (!msg.is_empty())
builder.appendff("{}", msg);
builder.append(": ");
if (m_up)
builder.appendff("[^{}] -> ", m_up->pointer());
else
builder.append("* -> ");
for (size_t ix = 0; ix < m_entries.size(); ix++) {
if (!is_leaf())
builder.appendff("[v{}] ", m_down[ix].pointer());
else
VERIFY(m_down[ix].pointer() == 0);
builder.appendff("'{}' ", (String)m_entries[ix]);
}
if (!is_leaf()) {
builder.appendff("[v{}]", m_down[size()].pointer());
} else {
VERIFY(m_down[size()].pointer() == 0);
}
builder.appendff(" (size {}", (int)size());
if (is_leaf()) {
builder.append(", leaf");
}
builder.append(")");
dbgln(builder.build());
}
void TreeNode::list_node(int indent)
{
auto do_indent = [&]() {
for (int i = 0; i < indent; ++i) {
warn(" ");
}
};
do_indent();
warnln("--> #{}", pointer());
for (auto ix = 0u; ix < size(); ix++) {
if (!is_leaf()) {
down_node(ix)->list_node(indent + 2);
}
do_indent();
warnln("{}", m_entries[ix].to_string());
}
if (!is_leaf()) {
down_node(size())->list_node(indent + 2);
}
}
}