HashIndex.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. /*
  2. * Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <LibSQL/HashIndex.h>
  7. #include <LibSQL/Heap.h>
  8. #include <LibSQL/Key.h>
  9. #include <LibSQL/Serializer.h>
  10. namespace SQL {
  11. HashDirectoryNode::HashDirectoryNode(HashIndex& index, u32 node_number, size_t offset)
  12. : IndexNode(index.node_pointer(node_number))
  13. , m_hash_index(index)
  14. , m_node_number(node_number)
  15. , m_offset(offset)
  16. {
  17. }
  18. HashDirectoryNode::HashDirectoryNode(HashIndex& index, u32 pointer)
  19. : IndexNode(pointer)
  20. , m_hash_index(index)
  21. {
  22. }
  23. void HashDirectoryNode::deserialize(Serializer& serializer)
  24. {
  25. dbgln_if(SQL_DEBUG, "Deserializing Hash Directory Node");
  26. m_hash_index.m_global_depth = serializer.deserialize<u32>();
  27. auto size = serializer.deserialize<u32>();
  28. dbgln_if(SQL_DEBUG, "Global Depth {}, #Bucket pointers {}", m_hash_index.global_depth(), size);
  29. auto next_node = serializer.deserialize<u32>();
  30. if (next_node) {
  31. dbgln_if(SQL_DEBUG, "Next node {}", next_node);
  32. m_hash_index.m_nodes.append(next_node);
  33. } else {
  34. dbgln_if(SQL_DEBUG, "This is the last directory node");
  35. m_is_last = true;
  36. }
  37. for (auto ix = 0u; ix < size; ix++) {
  38. auto bucket_pointer = serializer.deserialize<u32>();
  39. auto local_depth = serializer.deserialize<u32>();
  40. dbgln_if(SQL_DEBUG, "--Index {} bucket pointer {} local depth {}", ix, bucket_pointer, local_depth);
  41. m_hash_index.append_bucket(ix, local_depth, bucket_pointer);
  42. }
  43. }
  44. void HashDirectoryNode::serialize(Serializer& serializer) const
  45. {
  46. dbgln_if(SQL_DEBUG, "Serializing directory node #{}. Offset {}", m_node_number, m_offset);
  47. serializer.serialize<u32>((u32)m_hash_index.global_depth());
  48. serializer.serialize<u32>(number_of_pointers());
  49. dbgln_if(SQL_DEBUG, "Global depth {}, #bucket pointers {}", m_hash_index.global_depth(), number_of_pointers());
  50. u32 next_node;
  51. if (m_node_number < (m_hash_index.m_nodes.size() - 1)) {
  52. next_node = m_hash_index.m_nodes[m_node_number + 1];
  53. dbgln_if(SQL_DEBUG, "Next directory node pointer {}", next_node);
  54. } else {
  55. next_node = 0u;
  56. dbgln_if(SQL_DEBUG, "This is the last directory node");
  57. }
  58. serializer.serialize<u32>(next_node);
  59. for (auto ix = 0u; ix < number_of_pointers(); ix++) {
  60. auto& bucket = m_hash_index.m_buckets[m_offset + ix];
  61. dbgln_if(SQL_DEBUG, "Bucket index #{} block_index {} local depth {} size {}", ix, bucket->block_index(), bucket->local_depth(), bucket->size());
  62. serializer.serialize<u32>(bucket->block_index());
  63. serializer.serialize<u32>(bucket->local_depth());
  64. }
  65. }
  66. HashBucket::HashBucket(HashIndex& hash_index, Block::Index index, u32 local_depth, Block::Index pointer)
  67. : IndexNode(pointer)
  68. , m_hash_index(hash_index)
  69. , m_local_depth(local_depth)
  70. , m_index(index)
  71. {
  72. }
  73. void HashBucket::serialize(Serializer& serializer) const
  74. {
  75. dbgln_if(SQL_DEBUG, "Serializing bucket: block_index {}, index #{}, local depth {} size {}",
  76. block_index(), index(), local_depth(), size());
  77. serializer.serialize<u32>(local_depth());
  78. serializer.serialize<u32>(size());
  79. for (auto& key : m_entries)
  80. serializer.serialize<Key>(key);
  81. }
  82. void HashBucket::deserialize(Serializer& serializer)
  83. {
  84. if (m_inflated || !block_index())
  85. return;
  86. dbgln_if(SQL_DEBUG, "Inflating Hash Bucket {}", block_index());
  87. m_local_depth = serializer.deserialize<u32>();
  88. dbgln_if(SQL_DEBUG, "Bucket Local Depth {}", m_local_depth);
  89. auto size = serializer.deserialize<u32>();
  90. dbgln_if(SQL_DEBUG, "Bucket has {} keys", size);
  91. for (auto ix = 0u; ix < size; ix++) {
  92. auto key = serializer.deserialize<Key>(m_hash_index.descriptor());
  93. dbgln_if(SQL_DEBUG, "Key {}: {}", ix, key.to_deprecated_string());
  94. m_entries.append(key);
  95. }
  96. m_inflated = true;
  97. }
  98. size_t HashBucket::length() const
  99. {
  100. size_t len = 2 * sizeof(u32);
  101. for (auto& key : m_entries)
  102. len += key.length();
  103. return len;
  104. }
  105. Optional<u32> HashBucket::get(Key& key)
  106. {
  107. auto optional_index = find_key_in_bucket(key);
  108. if (optional_index.has_value()) {
  109. auto& k = m_entries[optional_index.value()];
  110. key.set_block_index(k.block_index());
  111. return k.block_index();
  112. }
  113. return {};
  114. }
  115. bool HashBucket::insert(Key const& key)
  116. {
  117. if (!m_inflated)
  118. m_hash_index.serializer().deserialize_block_to(block_index(), *this);
  119. if (find_key_in_bucket(key).has_value())
  120. return false;
  121. if (length() + key.length() > Block::DATA_SIZE) {
  122. dbgln_if(SQL_DEBUG, "Adding key {} would make length exceed block size", key.to_deprecated_string());
  123. return false;
  124. }
  125. m_entries.append(key);
  126. m_hash_index.serializer().serialize_and_write(*this);
  127. return true;
  128. }
  129. Optional<size_t> HashBucket::find_key_in_bucket(Key const& key)
  130. {
  131. for (auto ix = 0u; ix < size(); ix++) {
  132. auto& k = entries()[ix];
  133. if (k == key)
  134. return ix;
  135. }
  136. return {};
  137. }
  138. HashBucket const* HashBucket::next_bucket()
  139. {
  140. for (auto ix = m_index + 1; ix < m_hash_index.size(); ix++) {
  141. auto bucket = m_hash_index.get_bucket_by_index(ix);
  142. m_hash_index.serializer().deserialize_block_to<HashBucket>(bucket->block_index(), *bucket);
  143. if (bucket->size())
  144. return bucket;
  145. }
  146. return nullptr;
  147. }
  148. HashBucket const* HashBucket::previous_bucket()
  149. {
  150. for (auto ix = m_index - 1; ix > 0; ix--) {
  151. auto bucket = m_hash_index.get_bucket_by_index(ix);
  152. if (bucket->block_index() > 0)
  153. return bucket;
  154. }
  155. return nullptr;
  156. }
  157. Vector<Key> const& HashBucket::entries()
  158. {
  159. if (!m_inflated)
  160. m_hash_index.serializer().deserialize_block_to(block_index(), *this);
  161. return m_entries;
  162. }
  163. Key const& HashBucket::operator[](size_t ix)
  164. {
  165. if (!m_inflated)
  166. m_hash_index.serializer().deserialize_block_to(block_index(), *this);
  167. return m_entries[ix];
  168. }
  169. Key const& HashBucket::operator[](size_t ix) const
  170. {
  171. return m_entries[ix];
  172. }
  173. void HashBucket::list_bucket()
  174. {
  175. warnln("Bucket #{} size {} local depth {} block_index {}{}",
  176. index(), size(), local_depth(), block_index(), (block_index() > 0 ? "" : " (VIRTUAL)"));
  177. for (auto& key : entries())
  178. warnln(" {} hash {}", key.to_deprecated_string(), key.hash());
  179. }
  180. HashIndex::HashIndex(Serializer& serializer, NonnullRefPtr<TupleDescriptor> const& descriptor, Block::Index first_node)
  181. : Index(serializer, descriptor, true, first_node)
  182. {
  183. if (first_node == 0)
  184. set_block_index(request_new_block_index());
  185. if (serializer.has_block(first_node)) {
  186. Block::Index block_index = first_node;
  187. do {
  188. VERIFY(serializer.has_block(block_index));
  189. auto node = serializer.deserialize_block<HashDirectoryNode>(block_index, *this, block_index);
  190. if (node.is_last())
  191. break;
  192. block_index = m_nodes.last(); // FIXME Ugly
  193. } while (block_index);
  194. } else {
  195. auto bucket = append_bucket(0u, 1u, request_new_block_index());
  196. bucket->m_inflated = true;
  197. serializer.serialize_and_write(*bucket);
  198. bucket = append_bucket(1u, 1u, request_new_block_index());
  199. bucket->m_inflated = true;
  200. serializer.serialize_and_write(*bucket);
  201. m_nodes.append(first_node);
  202. write_directory();
  203. }
  204. }
  205. HashBucket* HashIndex::get_bucket(u32 index)
  206. {
  207. VERIFY(index < m_buckets.size());
  208. auto divisor = size() / 2;
  209. while (m_buckets[index]->block_index() == 0) {
  210. VERIFY(divisor > 1);
  211. index = index % divisor;
  212. divisor /= 2;
  213. }
  214. auto& bucket = m_buckets[index];
  215. return bucket;
  216. }
  217. HashBucket* HashIndex::get_bucket_for_insert(Key const& key)
  218. {
  219. auto key_hash = key.hash();
  220. do {
  221. dbgln_if(SQL_DEBUG, "HashIndex::get_bucket_for_insert({}) bucket {} of {}", key.to_deprecated_string(), key_hash % size(), size());
  222. auto bucket = get_bucket(key_hash % size());
  223. if (bucket->length() + key.length() < Block::DATA_SIZE)
  224. return bucket;
  225. dbgln_if(SQL_DEBUG, "Bucket is full (bucket size {}/length {} key length {}). Expanding directory", bucket->size(), bucket->length(), key.length());
  226. // We previously doubled the directory but the target bucket is
  227. // still at an older depth. Create new buckets at the current global
  228. // depth and allocate the contents of the existing buckets to the
  229. // newly created ones:
  230. while (bucket->local_depth() < global_depth()) {
  231. auto base_index = bucket->index();
  232. auto step = 1 << (global_depth() - bucket->local_depth());
  233. auto total_moved = 0;
  234. for (auto ix = base_index + step; ix < size(); ix += step) {
  235. auto& sub_bucket = m_buckets[ix];
  236. sub_bucket->set_local_depth(bucket->local_depth() + 1);
  237. auto moved = 0;
  238. for (auto entry_index = (int)bucket->m_entries.size() - 1; entry_index >= 0; entry_index--) {
  239. if (bucket->m_entries[entry_index].hash() % size() == ix) {
  240. if (!sub_bucket->block_index())
  241. sub_bucket->set_block_index(request_new_block_index());
  242. sub_bucket->insert(bucket->m_entries.take(entry_index));
  243. moved++;
  244. }
  245. }
  246. if (moved > 0) {
  247. dbgln_if(SQL_DEBUG, "Moved {} entries from bucket #{} to #{}", moved, base_index, ix);
  248. serializer().serialize_and_write(*sub_bucket);
  249. }
  250. total_moved += moved;
  251. }
  252. if (total_moved)
  253. dbgln_if(SQL_DEBUG, "Redistributed {} entries from bucket #{}", total_moved, base_index);
  254. else
  255. dbgln_if(SQL_DEBUG, "Nothing redistributed from bucket #{}", base_index);
  256. bucket->set_local_depth(bucket->local_depth() + 1);
  257. serializer().serialize_and_write(*bucket);
  258. write_directory();
  259. auto bucket_after_redistribution = get_bucket(key_hash % size());
  260. if (bucket_after_redistribution->length() + key.length() < Block::DATA_SIZE)
  261. return bucket_after_redistribution;
  262. }
  263. expand();
  264. } while (true);
  265. VERIFY_NOT_REACHED();
  266. }
  267. void HashIndex::expand()
  268. {
  269. auto sz = size();
  270. dbgln_if(SQL_DEBUG, "Expanding directory from {} to {} buckets", sz, 2 * sz);
  271. for (auto i = 0u; i < sz; i++) {
  272. auto bucket = get_bucket(i);
  273. bucket = append_bucket(sz + i, bucket->local_depth(), 0u);
  274. bucket->m_inflated = true;
  275. }
  276. m_global_depth++;
  277. write_directory();
  278. }
  279. void HashIndex::write_directory()
  280. {
  281. auto num_nodes_required = (size() / HashDirectoryNode::max_pointers_in_node()) + 1;
  282. while (m_nodes.size() < num_nodes_required)
  283. m_nodes.append(request_new_block_index());
  284. size_t offset = 0u;
  285. size_t num_node = 0u;
  286. while (offset < size()) {
  287. HashDirectoryNode node(*this, num_node, offset);
  288. serializer().serialize_and_write(node);
  289. offset += node.number_of_pointers();
  290. }
  291. }
  292. HashBucket* HashIndex::append_bucket(u32 index, u32 local_depth, u32 pointer)
  293. {
  294. m_buckets.append(make<HashBucket>(*this, index, local_depth, pointer));
  295. return m_buckets.last();
  296. }
  297. HashBucket* HashIndex::get_bucket_by_index(u32 index)
  298. {
  299. if (index >= size())
  300. return nullptr;
  301. return m_buckets[index];
  302. }
  303. Optional<u32> HashIndex::get(Key& key)
  304. {
  305. auto hash = key.hash();
  306. auto bucket_index = hash % size();
  307. dbgln_if(SQL_DEBUG, "HashIndex::get({}) bucket_index {}", key.to_deprecated_string(), bucket_index);
  308. auto bucket = get_bucket(bucket_index);
  309. if constexpr (SQL_DEBUG)
  310. bucket->list_bucket();
  311. return bucket->get(key);
  312. }
  313. bool HashIndex::insert(Key const& key)
  314. {
  315. dbgln_if(SQL_DEBUG, "HashIndex::insert({})", key.to_deprecated_string());
  316. auto bucket = get_bucket_for_insert(key);
  317. bucket->insert(key);
  318. if constexpr (SQL_DEBUG)
  319. bucket->list_bucket();
  320. return true;
  321. }
  322. HashIndexIterator HashIndex::begin()
  323. {
  324. return HashIndexIterator(get_bucket(0));
  325. }
  326. HashIndexIterator HashIndex::end()
  327. {
  328. return HashIndexIterator::end();
  329. }
  330. HashIndexIterator HashIndex::find(Key const& key)
  331. {
  332. auto hash = key.hash();
  333. auto bucket_index = hash % size();
  334. auto bucket = get_bucket(bucket_index);
  335. auto optional_index = bucket->find_key_in_bucket(key);
  336. if (!optional_index.has_value())
  337. return end();
  338. return HashIndexIterator(bucket, optional_index.value());
  339. }
  340. void HashIndex::list_hash()
  341. {
  342. warnln("Number of buckets: {} (Global depth {})", size(), global_depth());
  343. warn("Directory pointer(s): ");
  344. for (auto ptr : m_nodes)
  345. warn("{}, ", ptr);
  346. warnln();
  347. for (auto& bucket : m_buckets)
  348. bucket->list_bucket();
  349. }
  350. HashIndexIterator::HashIndexIterator(HashBucket const* bucket, size_t index)
  351. : m_current(bucket)
  352. , m_index(index)
  353. {
  354. VERIFY(!m_current || !index || (index < m_current->size()));
  355. while (m_current && (m_current->size() == 0)) {
  356. m_current = m_current->next_bucket();
  357. m_index = 0;
  358. }
  359. }
  360. HashIndexIterator HashIndexIterator::next()
  361. {
  362. if (is_end())
  363. return *this;
  364. if (m_index < (m_current->size() - 1))
  365. return HashIndexIterator(m_current.ptr(), m_index + 1);
  366. return HashIndexIterator(m_current->next_bucket());
  367. }
  368. HashIndexIterator HashIndexIterator::previous()
  369. {
  370. TODO();
  371. }
  372. bool HashIndexIterator::operator==(HashIndexIterator const& other) const
  373. {
  374. if (is_end())
  375. return other.is_end();
  376. if (other.is_end())
  377. return false;
  378. VERIFY(&other.m_current->hash_index() == &m_current->hash_index());
  379. return (m_current.ptr() == other.m_current.ptr()) && (m_index == other.m_index);
  380. }
  381. bool HashIndexIterator::operator==(Key const& other) const
  382. {
  383. if (is_end())
  384. return false;
  385. if (other.is_null())
  386. return false;
  387. return (**this).compare(other);
  388. }
  389. }