Heap.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367
  1. /*
  2. * Copyright (c) 2021, Jan de Visser <jan@de-visser.net>
  3. * Copyright (c) 2023, Jelle Raaijmakers <jelle@gmta.nl>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/ByteString.h>
  8. #include <AK/Format.h>
  9. #include <AK/QuickSort.h>
  10. #include <LibCore/System.h>
  11. #include <LibSQL/Heap.h>
  12. #include <sys/stat.h>
  13. namespace SQL {
  14. ErrorOr<NonnullRefPtr<Heap>> Heap::create(ByteString file_name)
  15. {
  16. return adopt_nonnull_ref_or_enomem(new (nothrow) Heap(move(file_name)));
  17. }
  18. Heap::Heap(ByteString file_name)
  19. : m_name(move(file_name))
  20. {
  21. }
  22. Heap::~Heap()
  23. {
  24. if (m_file && !m_write_ahead_log.is_empty()) {
  25. if (auto maybe_error = flush(); maybe_error.is_error())
  26. warnln("~Heap({}): {}", name(), maybe_error.error());
  27. }
  28. }
  29. ErrorOr<void> Heap::open()
  30. {
  31. VERIFY(!m_file);
  32. size_t file_size = 0;
  33. struct stat stat_buffer;
  34. if (stat(name().characters(), &stat_buffer) != 0) {
  35. if (errno != ENOENT) {
  36. warnln("Heap::open({}): could not stat: {}"sv, name(), strerror(errno));
  37. return Error::from_string_literal("Heap::open(): could not stat file");
  38. }
  39. } else if (!S_ISREG(stat_buffer.st_mode)) {
  40. warnln("Heap::open({}): can only use regular files"sv, name());
  41. return Error::from_string_literal("Heap::open(): can only use regular files");
  42. } else {
  43. file_size = stat_buffer.st_size;
  44. }
  45. if (file_size > 0) {
  46. m_next_block = file_size / Block::SIZE;
  47. m_highest_block_written = m_next_block - 1;
  48. }
  49. auto file = TRY(Core::File::open(name(), Core::File::OpenMode::ReadWrite));
  50. m_file = TRY(Core::InputBufferedFile::create(move(file)));
  51. if (file_size > 0) {
  52. if (auto error_maybe = read_zero_block(); error_maybe.is_error()) {
  53. m_file = nullptr;
  54. return error_maybe.release_error();
  55. }
  56. } else {
  57. TRY(initialize_zero_block());
  58. }
  59. // FIXME: We should more gracefully handle version incompatibilities. For now, we drop the database.
  60. if (m_version != VERSION) {
  61. dbgln_if(SQL_DEBUG, "Heap file {} opened has incompatible version {}. Deleting for version {}.", name(), m_version, VERSION);
  62. m_file = nullptr;
  63. TRY(Core::System::unlink(name()));
  64. return open();
  65. }
  66. // Perform a heap scan to find all free blocks
  67. // FIXME: this is very inefficient; store free blocks in a persistent heap structure
  68. for (Block::Index index = 1; index <= m_highest_block_written; ++index) {
  69. auto block_data = TRY(read_raw_block(index));
  70. auto size_in_bytes = *reinterpret_cast<u32*>(block_data.data());
  71. if (size_in_bytes == 0)
  72. TRY(m_free_block_indices.try_append(index));
  73. }
  74. dbgln_if(SQL_DEBUG, "Heap file {} opened; number of blocks = {}; free blocks = {}", name(), m_highest_block_written, m_free_block_indices.size());
  75. return {};
  76. }
  77. ErrorOr<size_t> Heap::file_size_in_bytes() const
  78. {
  79. TRY(m_file->seek(0, SeekMode::FromEndPosition));
  80. return TRY(m_file->tell());
  81. }
  82. bool Heap::has_block(Block::Index index) const
  83. {
  84. return (index <= m_highest_block_written || m_write_ahead_log.contains(index))
  85. && !m_free_block_indices.contains_slow(index);
  86. }
  87. Block::Index Heap::request_new_block_index()
  88. {
  89. if (!m_free_block_indices.is_empty())
  90. return m_free_block_indices.take_last();
  91. return m_next_block++;
  92. }
  93. ErrorOr<ByteBuffer> Heap::read_storage(Block::Index index)
  94. {
  95. dbgln_if(SQL_DEBUG, "{}({})", __FUNCTION__, index);
  96. // Reconstruct the data storage from a potential chain of blocks
  97. ByteBuffer data;
  98. while (index > 0) {
  99. auto block = TRY(read_block(index));
  100. dbgln_if(SQL_DEBUG, " -> {} bytes", block.size_in_bytes());
  101. TRY(data.try_append(block.data().bytes().slice(0, block.size_in_bytes())));
  102. index = block.next_block();
  103. }
  104. return data;
  105. }
  106. ErrorOr<void> Heap::write_storage(Block::Index index, ReadonlyBytes data)
  107. {
  108. dbgln_if(SQL_DEBUG, "{}({}, {} bytes)", __FUNCTION__, index, data.size());
  109. if (index == 0)
  110. return Error::from_string_view("Writing to zero block is not allowed"sv);
  111. if (data.is_empty())
  112. return Error::from_string_view("Writing empty data is not allowed"sv);
  113. if (m_free_block_indices.contains_slow(index))
  114. return Error::from_string_view("Invalid write to a free block index"sv);
  115. // Split up the storage across multiple blocks if necessary, creating a chain
  116. u32 remaining_size = static_cast<u32>(data.size());
  117. u32 offset_in_data = 0;
  118. Block::Index existing_next_block_index = 0;
  119. while (remaining_size > 0) {
  120. auto block_data_size = AK::min(remaining_size, Block::DATA_SIZE);
  121. remaining_size -= block_data_size;
  122. ByteBuffer block_data;
  123. if (has_block(index)) {
  124. auto existing_block = TRY(read_block(index));
  125. block_data = existing_block.data();
  126. TRY(block_data.try_resize(block_data_size));
  127. existing_next_block_index = existing_block.next_block();
  128. } else {
  129. block_data = TRY(ByteBuffer::create_uninitialized(block_data_size));
  130. existing_next_block_index = 0;
  131. }
  132. Block::Index next_block_index = existing_next_block_index;
  133. if (next_block_index == 0 && remaining_size > 0)
  134. next_block_index = request_new_block_index();
  135. else if (remaining_size == 0)
  136. next_block_index = 0;
  137. block_data.bytes().overwrite(0, data.offset(offset_in_data), block_data_size);
  138. TRY(write_block({ index, block_data_size, next_block_index, move(block_data) }));
  139. index = next_block_index;
  140. offset_in_data += block_data_size;
  141. }
  142. // Free remaining blocks in existing chain, if any
  143. if (existing_next_block_index > 0)
  144. TRY(free_storage(existing_next_block_index));
  145. return {};
  146. }
  147. ErrorOr<ByteBuffer> Heap::read_raw_block(Block::Index index)
  148. {
  149. VERIFY(m_file);
  150. VERIFY(index < m_next_block);
  151. if (auto wal_entry = m_write_ahead_log.get(index); wal_entry.has_value())
  152. return wal_entry.value();
  153. TRY(m_file->seek(index * Block::SIZE, SeekMode::SetPosition));
  154. auto buffer = TRY(ByteBuffer::create_uninitialized(Block::SIZE));
  155. TRY(m_file->read_until_filled(buffer));
  156. return buffer;
  157. }
  158. ErrorOr<Block> Heap::read_block(Block::Index index)
  159. {
  160. dbgln_if(SQL_DEBUG, "{}({})", __FUNCTION__, index);
  161. auto buffer = TRY(read_raw_block(index));
  162. auto size_in_bytes = *reinterpret_cast<u32*>(buffer.offset_pointer(0));
  163. auto next_block = *reinterpret_cast<Block::Index*>(buffer.offset_pointer(sizeof(u32)));
  164. auto data = TRY(buffer.slice(Block::HEADER_SIZE, Block::DATA_SIZE));
  165. return Block { index, size_in_bytes, next_block, move(data) };
  166. }
  167. ErrorOr<void> Heap::write_raw_block(Block::Index index, ReadonlyBytes data)
  168. {
  169. dbgln_if(SQL_DEBUG, "Write raw block {}", index);
  170. VERIFY(m_file);
  171. VERIFY(data.size() == Block::SIZE);
  172. TRY(m_file->seek(index * Block::SIZE, SeekMode::SetPosition));
  173. TRY(m_file->write_until_depleted(data));
  174. if (index > m_highest_block_written)
  175. m_highest_block_written = index;
  176. return {};
  177. }
  178. ErrorOr<void> Heap::write_raw_block_to_wal(Block::Index index, ByteBuffer&& data)
  179. {
  180. dbgln_if(SQL_DEBUG, "{}({})", __FUNCTION__, index);
  181. VERIFY(index < m_next_block);
  182. VERIFY(data.size() == Block::SIZE);
  183. TRY(m_write_ahead_log.try_set(index, move(data)));
  184. return {};
  185. }
  186. ErrorOr<void> Heap::write_block(Block const& block)
  187. {
  188. dbgln_if(SQL_DEBUG, "{}({})", __FUNCTION__, block.index());
  189. VERIFY(block.index() < m_next_block);
  190. VERIFY(block.next_block() < m_next_block);
  191. VERIFY(block.size_in_bytes() > 0);
  192. VERIFY(block.data().size() <= Block::DATA_SIZE);
  193. auto size_in_bytes = block.size_in_bytes();
  194. auto next_block = block.next_block();
  195. auto heap_data = TRY(ByteBuffer::create_zeroed(Block::SIZE));
  196. heap_data.overwrite(0, &size_in_bytes, sizeof(size_in_bytes));
  197. heap_data.overwrite(sizeof(size_in_bytes), &next_block, sizeof(next_block));
  198. block.data().bytes().copy_to(heap_data.bytes().slice(Block::HEADER_SIZE));
  199. return write_raw_block_to_wal(block.index(), move(heap_data));
  200. }
  201. ErrorOr<void> Heap::free_storage(Block::Index index)
  202. {
  203. dbgln_if(SQL_DEBUG, "{}({})", __FUNCTION__, index);
  204. VERIFY(index > 0);
  205. while (index > 0) {
  206. auto block = TRY(read_block(index));
  207. TRY(free_block(block));
  208. index = block.next_block();
  209. }
  210. return {};
  211. }
  212. ErrorOr<void> Heap::free_block(Block const& block)
  213. {
  214. auto index = block.index();
  215. dbgln_if(SQL_DEBUG, "{}({})", __FUNCTION__, index);
  216. VERIFY(index > 0);
  217. VERIFY(has_block(index));
  218. // Zero out freed blocks to facilitate a free block scan upon opening the database later
  219. auto zeroed_data = TRY(ByteBuffer::create_zeroed(Block::SIZE));
  220. TRY(write_raw_block_to_wal(index, move(zeroed_data)));
  221. return m_free_block_indices.try_append(index);
  222. }
  223. ErrorOr<void> Heap::flush()
  224. {
  225. VERIFY(m_file);
  226. auto indices = m_write_ahead_log.keys();
  227. quick_sort(indices);
  228. for (auto index : indices) {
  229. dbgln_if(SQL_DEBUG, "Flushing block {}", index);
  230. auto& data = m_write_ahead_log.get(index).value();
  231. TRY(write_raw_block(index, data));
  232. }
  233. m_write_ahead_log.clear();
  234. dbgln_if(SQL_DEBUG, "WAL flushed; new number of blocks = {}", m_highest_block_written);
  235. return {};
  236. }
  237. constexpr static auto FILE_ID = "SerenitySQL "sv;
  238. constexpr static auto VERSION_OFFSET = FILE_ID.length();
  239. constexpr static auto SCHEMAS_ROOT_OFFSET = VERSION_OFFSET + sizeof(u32);
  240. constexpr static auto TABLES_ROOT_OFFSET = SCHEMAS_ROOT_OFFSET + sizeof(u32);
  241. constexpr static auto TABLE_COLUMNS_ROOT_OFFSET = TABLES_ROOT_OFFSET + sizeof(u32);
  242. constexpr static auto USER_VALUES_OFFSET = TABLE_COLUMNS_ROOT_OFFSET + sizeof(u32);
  243. ErrorOr<void> Heap::read_zero_block()
  244. {
  245. dbgln_if(SQL_DEBUG, "Read zero block from {}", name());
  246. auto block = TRY(read_raw_block(0));
  247. auto file_id_buffer = TRY(block.slice(0, FILE_ID.length()));
  248. auto file_id = StringView(file_id_buffer);
  249. if (file_id != FILE_ID) {
  250. warnln("{}: Zero page corrupt. This is probably not a {} heap file"sv, name(), FILE_ID);
  251. return Error::from_string_literal("Heap()::read_zero_block(): Zero page corrupt. This is probably not a SerenitySQL heap file");
  252. }
  253. memcpy(&m_version, block.offset_pointer(VERSION_OFFSET), sizeof(u32));
  254. dbgln_if(SQL_DEBUG, "Version: {}.{}", (m_version & 0xFFFF0000) >> 16, (m_version & 0x0000FFFF));
  255. memcpy(&m_schemas_root, block.offset_pointer(SCHEMAS_ROOT_OFFSET), sizeof(u32));
  256. dbgln_if(SQL_DEBUG, "Schemas root node: {}", m_schemas_root);
  257. memcpy(&m_tables_root, block.offset_pointer(TABLES_ROOT_OFFSET), sizeof(u32));
  258. dbgln_if(SQL_DEBUG, "Tables root node: {}", m_tables_root);
  259. memcpy(&m_table_columns_root, block.offset_pointer(TABLE_COLUMNS_ROOT_OFFSET), sizeof(u32));
  260. dbgln_if(SQL_DEBUG, "Table columns root node: {}", m_table_columns_root);
  261. memcpy(m_user_values.data(), block.offset_pointer(USER_VALUES_OFFSET), m_user_values.size() * sizeof(u32));
  262. for (auto ix = 0u; ix < m_user_values.size(); ix++) {
  263. if (m_user_values[ix])
  264. dbgln_if(SQL_DEBUG, "User value {}: {}", ix, m_user_values[ix]);
  265. }
  266. return {};
  267. }
  268. ErrorOr<void> Heap::update_zero_block()
  269. {
  270. dbgln_if(SQL_DEBUG, "Write zero block to {}", name());
  271. dbgln_if(SQL_DEBUG, "Version: {}.{}", (m_version & 0xFFFF0000) >> 16, (m_version & 0x0000FFFF));
  272. dbgln_if(SQL_DEBUG, "Schemas root node: {}", m_schemas_root);
  273. dbgln_if(SQL_DEBUG, "Tables root node: {}", m_tables_root);
  274. dbgln_if(SQL_DEBUG, "Table Columns root node: {}", m_table_columns_root);
  275. for (auto ix = 0u; ix < m_user_values.size(); ix++) {
  276. if (m_user_values[ix] > 0)
  277. dbgln_if(SQL_DEBUG, "User value {}: {}", ix, m_user_values[ix]);
  278. }
  279. auto buffer = TRY(ByteBuffer::create_zeroed(Block::SIZE));
  280. auto buffer_bytes = buffer.bytes();
  281. buffer_bytes.overwrite(0, FILE_ID.characters_without_null_termination(), FILE_ID.length());
  282. buffer_bytes.overwrite(VERSION_OFFSET, &m_version, sizeof(u32));
  283. buffer_bytes.overwrite(SCHEMAS_ROOT_OFFSET, &m_schemas_root, sizeof(u32));
  284. buffer_bytes.overwrite(TABLES_ROOT_OFFSET, &m_tables_root, sizeof(u32));
  285. buffer_bytes.overwrite(TABLE_COLUMNS_ROOT_OFFSET, &m_table_columns_root, sizeof(u32));
  286. buffer_bytes.overwrite(USER_VALUES_OFFSET, m_user_values.data(), m_user_values.size() * sizeof(u32));
  287. return write_raw_block_to_wal(0, move(buffer));
  288. }
  289. ErrorOr<void> Heap::initialize_zero_block()
  290. {
  291. m_version = VERSION;
  292. m_schemas_root = 0;
  293. m_tables_root = 0;
  294. m_table_columns_root = 0;
  295. m_next_block = 1;
  296. m_highest_block_written = 0;
  297. for (auto& user : m_user_values)
  298. user = 0u;
  299. return update_zero_block();
  300. }
  301. }