HashTable.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. #pragma once
  2. #include "Assertions.h"
  3. #include "DoublyLinkedList.h"
  4. #include "Traits.h"
  5. #include "StdLibExtras.h"
  6. #include "kstdio.h"
  7. //#define HASHTABLE_DEBUG
  8. namespace AK {
  9. template<typename T, typename = Traits<T>> class HashTable;
  10. template<typename T, typename TraitsForT>
  11. class HashTable {
  12. private:
  13. struct Bucket {
  14. DoublyLinkedList<T> chain;
  15. };
  16. public:
  17. HashTable() { }
  18. explicit HashTable(HashTable&& other)
  19. : m_buckets(other.m_buckets)
  20. , m_size(other.m_size)
  21. , m_capacity(other.m_capacity)
  22. {
  23. other.m_size = 0;
  24. other.m_capacity = 0;
  25. other.m_buckets = nullptr;
  26. }
  27. HashTable& operator=(HashTable&& other)
  28. {
  29. if (this != &other) {
  30. clear();
  31. m_buckets = other.m_buckets;
  32. m_size = other.m_size;
  33. m_capacity = other.m_capacity;
  34. other.m_size = 0;
  35. other.m_capacity = 0;
  36. other.m_buckets = nullptr;
  37. }
  38. return *this;
  39. }
  40. ~HashTable() { clear(); }
  41. bool is_empty() const { return !m_size; }
  42. unsigned size() const { return m_size; }
  43. unsigned capacity() const { return m_capacity; }
  44. void set(const T&);
  45. void set(T&&);
  46. bool contains(const T&) const;
  47. void clear();
  48. void dump() const;
  49. class Iterator {
  50. public:
  51. bool operator!=(const Iterator& other) const
  52. {
  53. if (m_is_end && other.m_is_end)
  54. return false;
  55. return &m_table != &other.m_table
  56. || m_is_end != other.m_is_end
  57. || m_bucket_index != other.m_bucket_index
  58. || m_bucket_iterator != other.m_bucket_iterator;
  59. }
  60. bool operator==(const Iterator& other) const { return !(*this != other); }
  61. T& operator*()
  62. {
  63. #ifdef HASHTABLE_DEBUG
  64. kprintf("retrieve { bucket_index: %u, is_end: %u }\n", m_bucket_index, m_is_end);
  65. #endif
  66. return *m_bucket_iterator;
  67. }
  68. Iterator& operator++()
  69. {
  70. skip_to_next();
  71. return *this;
  72. }
  73. void skip_to_next()
  74. {
  75. #ifdef HASHTABLE_DEBUG
  76. unsigned pass = 0;
  77. #endif
  78. while (!m_is_end) {
  79. #ifdef HASHTABLE_DEBUG
  80. ++pass;
  81. kprintf("skip_to_next pass %u, m_bucket_index=%u\n", pass, m_bucket_index);
  82. #endif
  83. if (m_bucket_iterator.is_end()) {
  84. ++m_bucket_index;
  85. if (m_bucket_index >= m_table.capacity()) {
  86. m_is_end = true;
  87. return;
  88. }
  89. m_bucket_iterator = m_table.m_buckets[m_bucket_index].chain.begin();
  90. } else {
  91. ++m_bucket_iterator;
  92. }
  93. if (!m_bucket_iterator.is_end())
  94. return;
  95. }
  96. }
  97. private:
  98. friend class HashTable;
  99. explicit Iterator(HashTable& table, bool is_end, typename DoublyLinkedList<T>::Iterator bucket_iterator = DoublyLinkedList<T>::Iterator::universal_end(), unsigned bucket_index = 0)
  100. : m_table(table)
  101. , m_bucket_index(bucket_index)
  102. , m_is_end(is_end)
  103. , m_bucket_iterator(bucket_iterator)
  104. {
  105. if (!is_end && !m_table.is_empty() && !(m_bucket_iterator != DoublyLinkedList<T>::Iterator::universal_end())) {
  106. #ifdef HASHTABLE_DEBUG
  107. kprintf("bucket iterator init!\n");
  108. #endif
  109. m_bucket_iterator = m_table.m_buckets[0].chain.begin();
  110. if (m_bucket_iterator.is_end())
  111. skip_to_next();
  112. }
  113. }
  114. HashTable& m_table;
  115. unsigned m_bucket_index { 0 };
  116. bool m_is_end { false };
  117. typename DoublyLinkedList<T>::Iterator m_bucket_iterator;
  118. };
  119. Iterator begin() { return Iterator(*this, is_empty()); }
  120. Iterator end() { return Iterator(*this, true); }
  121. class ConstIterator {
  122. public:
  123. bool operator!=(const ConstIterator& other) const
  124. {
  125. if (m_is_end && other.m_is_end)
  126. return false;
  127. return &m_table != &other.m_table
  128. || m_is_end != other.m_is_end
  129. || m_bucket_index != other.m_bucket_index
  130. || m_bucket_iterator != other.m_bucket_iterator;
  131. }
  132. bool operator==(const ConstIterator& other) const { return !(*this != other); }
  133. const T& operator*() const
  134. {
  135. #ifdef HASHTABLE_DEBUG
  136. kprintf("retrieve { bucket_index: %u, is_end: %u }\n", m_bucket_index, m_is_end);
  137. #endif
  138. return *m_bucket_iterator;
  139. }
  140. ConstIterator& operator++()
  141. {
  142. skip_to_next();
  143. return *this;
  144. }
  145. void skip_to_next()
  146. {
  147. #ifdef HASHTABLE_DEBUG
  148. unsigned pass = 0;
  149. #endif
  150. while (!m_is_end) {
  151. #ifdef HASHTABLE_DEBUG
  152. ++pass;
  153. kprintf("skip_to_next pass %u, m_bucket_index=%u\n", pass, m_bucket_index);
  154. #endif
  155. if (m_bucket_iterator.is_end()) {
  156. ++m_bucket_index;
  157. if (m_bucket_index >= m_table.capacity()) {
  158. m_is_end = true;
  159. return;
  160. }
  161. const DoublyLinkedList<T>& chain = m_table.m_buckets[m_bucket_index].chain;
  162. m_bucket_iterator = chain.begin();
  163. } else {
  164. ++m_bucket_iterator;
  165. }
  166. if (!m_bucket_iterator.is_end())
  167. return;
  168. }
  169. }
  170. private:
  171. friend class HashTable;
  172. ConstIterator(const HashTable& table, bool is_end, typename DoublyLinkedList<T>::ConstIterator bucket_iterator = DoublyLinkedList<T>::ConstIterator::universal_end(), unsigned bucket_index = 0)
  173. : m_table(table)
  174. , m_bucket_index(bucket_index)
  175. , m_is_end(is_end)
  176. , m_bucket_iterator(bucket_iterator)
  177. {
  178. if (!is_end && !m_table.is_empty() && !(m_bucket_iterator != DoublyLinkedList<T>::ConstIterator::universal_end())) {
  179. #ifdef HASHTABLE_DEBUG
  180. kprintf("const bucket iterator init!\n");
  181. #endif
  182. const DoublyLinkedList<T>& chain = m_table.m_buckets[0].chain;
  183. m_bucket_iterator = chain.begin();
  184. if (m_bucket_iterator.is_end())
  185. skip_to_next();
  186. }
  187. }
  188. const HashTable& m_table;
  189. unsigned m_bucket_index { 0 };
  190. bool m_is_end { false };
  191. typename DoublyLinkedList<T>::ConstIterator m_bucket_iterator;
  192. };
  193. ConstIterator begin() const { return ConstIterator(*this, is_empty()); }
  194. ConstIterator end() const { return ConstIterator(*this, true); }
  195. Iterator find(const T&);
  196. ConstIterator find(const T&) const;
  197. void remove(const T& value)
  198. {
  199. auto it = find(value);
  200. if (it != end())
  201. remove(it);
  202. }
  203. void remove(Iterator);
  204. private:
  205. Bucket& lookup(const T&, unsigned* bucket_index = nullptr);
  206. const Bucket& lookup(const T&, unsigned* bucket_index = nullptr) const;
  207. void rehash(unsigned capacity);
  208. void insert(const T&);
  209. void insert(T&&);
  210. Bucket* m_buckets { nullptr };
  211. unsigned m_size { 0 };
  212. unsigned m_capacity { 0 };
  213. };
  214. template<typename T, typename TraitsForT>
  215. void HashTable<T, TraitsForT>::set(T&& value)
  216. {
  217. if (!m_capacity)
  218. rehash(1);
  219. auto& bucket = lookup(value);
  220. for (auto& e : bucket.chain) {
  221. if (e == value)
  222. return;
  223. }
  224. if (size() >= capacity()) {
  225. rehash(size() + 1);
  226. insert(move(value));
  227. } else {
  228. bucket.chain.append(move(value));
  229. }
  230. m_size++;
  231. }
  232. template<typename T, typename TraitsForT>
  233. void HashTable<T, TraitsForT>::set(const T& value)
  234. {
  235. if (!m_capacity)
  236. rehash(1);
  237. auto& bucket = lookup(value);
  238. for (auto& e : bucket.chain) {
  239. if (e == value)
  240. return;
  241. }
  242. if (size() >= capacity()) {
  243. rehash(size() + 1);
  244. insert(value);
  245. } else {
  246. bucket.chain.append(value);
  247. }
  248. m_size++;
  249. }
  250. template<typename T, typename TraitsForT>
  251. void HashTable<T, TraitsForT>::rehash(unsigned new_capacity)
  252. {
  253. new_capacity *= 2;
  254. #ifdef HASHTABLE_DEBUG
  255. kprintf("rehash to %u buckets\n", new_capacity);
  256. #endif
  257. auto* new_buckets = new Bucket[new_capacity];
  258. auto* old_buckets = m_buckets;
  259. unsigned old_capacity = m_capacity;
  260. m_buckets = new_buckets;
  261. m_capacity = new_capacity;
  262. #ifdef HASHTABLE_DEBUG
  263. kprintf("reinsert %u buckets\n", old_capacity);
  264. #endif
  265. for (unsigned i = 0; i < old_capacity; ++i) {
  266. for (auto& value : old_buckets[i].chain) {
  267. insert(move(value));
  268. }
  269. }
  270. delete [] old_buckets;
  271. }
  272. template<typename T, typename TraitsForT>
  273. void HashTable<T, TraitsForT>::clear()
  274. {
  275. if (m_buckets) {
  276. delete [] m_buckets;
  277. m_buckets = nullptr;
  278. }
  279. m_capacity = 0;
  280. m_size = 0;
  281. }
  282. template<typename T, typename TraitsForT>
  283. void HashTable<T, TraitsForT>::insert(T&& value)
  284. {
  285. auto& bucket = lookup(value);
  286. bucket.chain.append(move(value));
  287. }
  288. template<typename T, typename TraitsForT>
  289. void HashTable<T, TraitsForT>::insert(const T& value)
  290. {
  291. auto& bucket = lookup(value);
  292. bucket.chain.append(value);
  293. }
  294. template<typename T, typename TraitsForT>
  295. bool HashTable<T, TraitsForT>::contains(const T& value) const
  296. {
  297. if (is_empty())
  298. return false;
  299. auto& bucket = lookup(value);
  300. for (auto& e : bucket.chain) {
  301. if (e == value)
  302. return true;
  303. }
  304. return false;
  305. }
  306. template<typename T, typename TraitsForT>
  307. auto HashTable<T, TraitsForT>::find(const T& value) -> Iterator
  308. {
  309. if (is_empty())
  310. return end();
  311. unsigned bucket_index;
  312. auto& bucket = lookup(value, &bucket_index);
  313. auto bucket_iterator = bucket.chain.find(value);
  314. if (bucket_iterator != bucket.chain.end())
  315. return Iterator(*this, false, bucket_iterator, bucket_index);
  316. return end();
  317. }
  318. template<typename T, typename TraitsForT>
  319. auto HashTable<T, TraitsForT>::find(const T& value) const -> ConstIterator
  320. {
  321. if (is_empty())
  322. return end();
  323. unsigned bucket_index;
  324. auto& bucket = lookup(value, &bucket_index);
  325. auto bucket_iterator = bucket.chain.find(value);
  326. if (bucket_iterator != bucket.chain.end())
  327. return ConstIterator(*this, false, bucket_iterator, bucket_index);
  328. return end();
  329. }
  330. template<typename T, typename TraitsForT>
  331. void HashTable<T, TraitsForT>::remove(Iterator it)
  332. {
  333. ASSERT(!is_empty());
  334. m_buckets[it.m_bucket_index].chain.remove(it.m_bucket_iterator);
  335. --m_size;
  336. }
  337. template<typename T, typename TraitsForT>
  338. typename HashTable<T, TraitsForT>::Bucket& HashTable<T, TraitsForT>::lookup(const T& value, unsigned* bucket_index)
  339. {
  340. unsigned hash = TraitsForT::hash(value);
  341. #ifdef HASHTABLE_DEBUG
  342. kprintf("hash for ");
  343. TraitsForT::dump(value);
  344. kprintf(" is %u\n", hash);
  345. #endif
  346. if (bucket_index)
  347. *bucket_index = hash % m_capacity;
  348. return m_buckets[hash % m_capacity];
  349. }
  350. template<typename T, typename TraitsForT>
  351. const typename HashTable<T, TraitsForT>::Bucket& HashTable<T, TraitsForT>::lookup(const T& value, unsigned* bucket_index) const
  352. {
  353. unsigned hash = TraitsForT::hash(value);
  354. #ifdef HASHTABLE_DEBUG
  355. kprintf("hash for ");
  356. TraitsForT::dump(value);
  357. kprintf(" is %u\n", hash);
  358. #endif
  359. if (bucket_index)
  360. *bucket_index = hash % m_capacity;
  361. return m_buckets[hash % m_capacity];
  362. }
  363. template<typename T, typename TraitsForT>
  364. void HashTable<T, TraitsForT>::dump() const
  365. {
  366. kprintf("HashTable{%p} m_size=%u, m_capacity=%u, m_buckets=%p\n", this, m_size, m_capacity, m_buckets);
  367. for (unsigned i = 0; i < m_capacity; ++i) {
  368. auto& bucket = m_buckets[i];
  369. kprintf("Bucket %u\n", i);
  370. for (auto& e : bucket.chain) {
  371. kprintf(" > ");
  372. TraitsForT::dump(e);
  373. kprintf("\n");
  374. }
  375. }
  376. }
  377. }
  378. using AK::HashTable;