kmalloc.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Assertions.h>
  7. #include <AK/Types.h>
  8. #include <Kernel/Arch/PageDirectory.h>
  9. #include <Kernel/Debug.h>
  10. #include <Kernel/Heap/Heap.h>
  11. #include <Kernel/Heap/kmalloc.h>
  12. #include <Kernel/KSyms.h>
  13. #include <Kernel/Locking/Spinlock.h>
  14. #include <Kernel/Memory/MemoryManager.h>
  15. #include <Kernel/Panic.h>
  16. #include <Kernel/PerformanceManager.h>
  17. #include <Kernel/Sections.h>
  18. #include <Kernel/StdLib.h>
  19. #if ARCH(I386)
  20. static constexpr size_t CHUNK_SIZE = 32;
  21. #elif ARCH(X86_64) || ARCH(AARCH64)
  22. static constexpr size_t CHUNK_SIZE = 64;
  23. #else
  24. # error Unknown architecture
  25. #endif
  26. static_assert(is_power_of_two(CHUNK_SIZE));
  27. static constexpr size_t INITIAL_KMALLOC_MEMORY_SIZE = 2 * MiB;
  28. // Treat the heap as logically separate from .bss
  29. __attribute__((section(".heap"))) static u8 initial_kmalloc_memory[INITIAL_KMALLOC_MEMORY_SIZE];
  30. namespace std {
  31. const nothrow_t nothrow;
  32. }
  33. // FIXME: Figure out whether this can be MemoryManager.
  34. static RecursiveSpinlock s_lock { LockRank::None }; // needs to be recursive because of dump_backtrace()
  35. struct KmallocSubheap {
  36. KmallocSubheap(u8* base, size_t size)
  37. : allocator(base, size)
  38. {
  39. }
  40. IntrusiveListNode<KmallocSubheap> list_node;
  41. using List = IntrusiveList<&KmallocSubheap::list_node>;
  42. Heap<CHUNK_SIZE, KMALLOC_SCRUB_BYTE, KFREE_SCRUB_BYTE> allocator;
  43. };
  44. class KmallocSlabBlock {
  45. public:
  46. static constexpr size_t block_size = 64 * KiB;
  47. static constexpr FlatPtr block_mask = ~(block_size - 1);
  48. KmallocSlabBlock(size_t slab_size)
  49. : m_slab_size(slab_size)
  50. , m_slab_count((block_size - sizeof(KmallocSlabBlock)) / slab_size)
  51. {
  52. for (size_t i = 0; i < m_slab_count; ++i) {
  53. auto* freelist_entry = (FreelistEntry*)(void*)(&m_data[i * slab_size]);
  54. freelist_entry->next = m_freelist;
  55. m_freelist = freelist_entry;
  56. }
  57. }
  58. void* allocate()
  59. {
  60. VERIFY(m_freelist);
  61. ++m_allocated_slabs;
  62. return exchange(m_freelist, m_freelist->next);
  63. }
  64. void deallocate(void* ptr)
  65. {
  66. VERIFY(ptr >= &m_data && ptr < ((u8*)this + block_size));
  67. --m_allocated_slabs;
  68. auto* freelist_entry = (FreelistEntry*)ptr;
  69. freelist_entry->next = m_freelist;
  70. m_freelist = freelist_entry;
  71. }
  72. bool is_full() const
  73. {
  74. return m_freelist == nullptr;
  75. }
  76. size_t allocated_bytes() const
  77. {
  78. return m_allocated_slabs * m_slab_size;
  79. }
  80. size_t free_bytes() const
  81. {
  82. return (m_slab_count - m_allocated_slabs) * m_slab_size;
  83. }
  84. IntrusiveListNode<KmallocSlabBlock> list_node;
  85. using List = IntrusiveList<&KmallocSlabBlock::list_node>;
  86. private:
  87. struct FreelistEntry {
  88. FreelistEntry* next;
  89. };
  90. FreelistEntry* m_freelist { nullptr };
  91. size_t m_slab_size { 0 };
  92. size_t m_slab_count { 0 };
  93. size_t m_allocated_slabs { 0 };
  94. [[gnu::aligned(16)]] u8 m_data[];
  95. };
  96. class KmallocSlabheap {
  97. public:
  98. KmallocSlabheap(size_t slab_size)
  99. : m_slab_size(slab_size)
  100. {
  101. }
  102. size_t slab_size() const { return m_slab_size; }
  103. void* allocate()
  104. {
  105. if (m_usable_blocks.is_empty()) {
  106. // FIXME: This allocation wastes `block_size` bytes due to the implementation of kmalloc_aligned().
  107. // Handle this with a custom VM+page allocator instead of using kmalloc_aligned().
  108. auto* slot = kmalloc_aligned(KmallocSlabBlock::block_size, KmallocSlabBlock::block_size);
  109. if (!slot) {
  110. // FIXME: Dare to return nullptr!
  111. PANIC("OOM while growing slabheap ({})", m_slab_size);
  112. }
  113. auto* block = new (slot) KmallocSlabBlock(m_slab_size);
  114. m_usable_blocks.append(*block);
  115. }
  116. auto* block = m_usable_blocks.first();
  117. auto* ptr = block->allocate();
  118. if (block->is_full())
  119. m_full_blocks.append(*block);
  120. memset(ptr, KMALLOC_SCRUB_BYTE, m_slab_size);
  121. return ptr;
  122. }
  123. void deallocate(void* ptr)
  124. {
  125. memset(ptr, KFREE_SCRUB_BYTE, m_slab_size);
  126. auto* block = (KmallocSlabBlock*)((FlatPtr)ptr & KmallocSlabBlock::block_mask);
  127. bool block_was_full = block->is_full();
  128. block->deallocate(ptr);
  129. if (block_was_full)
  130. m_usable_blocks.append(*block);
  131. }
  132. size_t allocated_bytes() const
  133. {
  134. size_t total = m_full_blocks.size_slow() * KmallocSlabBlock::block_size;
  135. for (auto const& slab_block : m_usable_blocks)
  136. total += slab_block.allocated_bytes();
  137. return total;
  138. }
  139. size_t free_bytes() const
  140. {
  141. size_t total = 0;
  142. for (auto const& slab_block : m_usable_blocks)
  143. total += slab_block.free_bytes();
  144. return total;
  145. }
  146. bool try_purge()
  147. {
  148. bool did_purge = false;
  149. // Note: We cannot remove children from the list when using a structured loop,
  150. // Because we need to advance the iterator before we delete the underlying
  151. // value, so we have to iterate manually
  152. auto block = m_usable_blocks.begin();
  153. while (block != m_usable_blocks.end()) {
  154. if (block->allocated_bytes() != 0) {
  155. ++block;
  156. continue;
  157. }
  158. auto& block_to_remove = *block;
  159. ++block;
  160. block_to_remove.list_node.remove();
  161. block_to_remove.~KmallocSlabBlock();
  162. kfree_aligned(&block_to_remove);
  163. did_purge = true;
  164. }
  165. return did_purge;
  166. }
  167. private:
  168. size_t m_slab_size { 0 };
  169. KmallocSlabBlock::List m_usable_blocks;
  170. KmallocSlabBlock::List m_full_blocks;
  171. };
  172. struct KmallocGlobalData {
  173. static constexpr size_t minimum_subheap_size = 1 * MiB;
  174. KmallocGlobalData(u8* initial_heap, size_t initial_heap_size)
  175. {
  176. add_subheap(initial_heap, initial_heap_size);
  177. }
  178. void add_subheap(u8* storage, size_t storage_size)
  179. {
  180. dbgln_if(KMALLOC_DEBUG, "Adding kmalloc subheap @ {} with size {}", storage, storage_size);
  181. static_assert(sizeof(KmallocSubheap) <= PAGE_SIZE);
  182. auto* subheap = new (storage) KmallocSubheap(storage + PAGE_SIZE, storage_size - PAGE_SIZE);
  183. subheaps.append(*subheap);
  184. }
  185. void* allocate(size_t size)
  186. {
  187. VERIFY(!expansion_in_progress);
  188. for (auto& slabheap : slabheaps) {
  189. if (size <= slabheap.slab_size())
  190. return slabheap.allocate();
  191. }
  192. for (auto& subheap : subheaps) {
  193. if (auto* ptr = subheap.allocator.allocate(size))
  194. return ptr;
  195. }
  196. // NOTE: This size calculation is a mirror of kmalloc_aligned(KmallocSlabBlock)
  197. if (size <= KmallocSlabBlock::block_size * 2 + sizeof(ptrdiff_t) + sizeof(size_t)) {
  198. // FIXME: We should propagate a freed pointer, to find the specific subheap it belonged to
  199. // This would save us iterating over them in the next step and remove a recursion
  200. bool did_purge = false;
  201. for (auto& slabheap : slabheaps) {
  202. if (slabheap.try_purge()) {
  203. dbgln_if(KMALLOC_DEBUG, "Kmalloc purged block(s) from slabheap of size {} to avoid expansion", slabheap.slab_size());
  204. did_purge = true;
  205. break;
  206. }
  207. }
  208. if (did_purge)
  209. return allocate(size);
  210. }
  211. if (!try_expand(size)) {
  212. PANIC("OOM when trying to expand kmalloc heap.");
  213. }
  214. return allocate(size);
  215. }
  216. void deallocate(void* ptr, size_t size)
  217. {
  218. VERIFY(!expansion_in_progress);
  219. VERIFY(is_valid_kmalloc_address(VirtualAddress { ptr }));
  220. for (auto& slabheap : slabheaps) {
  221. if (size <= slabheap.slab_size())
  222. return slabheap.deallocate(ptr);
  223. }
  224. for (auto& subheap : subheaps) {
  225. if (subheap.allocator.contains(ptr)) {
  226. subheap.allocator.deallocate(ptr);
  227. return;
  228. }
  229. }
  230. PANIC("Bogus pointer passed to kfree_sized({:p}, {})", ptr, size);
  231. }
  232. size_t allocated_bytes() const
  233. {
  234. size_t total = 0;
  235. for (auto const& subheap : subheaps)
  236. total += subheap.allocator.allocated_bytes();
  237. for (auto const& slabheap : slabheaps)
  238. total += slabheap.allocated_bytes();
  239. return total;
  240. }
  241. size_t free_bytes() const
  242. {
  243. size_t total = 0;
  244. for (auto const& subheap : subheaps)
  245. total += subheap.allocator.free_bytes();
  246. for (auto const& slabheap : slabheaps)
  247. total += slabheap.free_bytes();
  248. return total;
  249. }
  250. bool try_expand(size_t allocation_request)
  251. {
  252. VERIFY(!expansion_in_progress);
  253. TemporaryChange change(expansion_in_progress, true);
  254. auto new_subheap_base = expansion_data->next_virtual_address;
  255. Checked<size_t> padded_allocation_request = allocation_request;
  256. padded_allocation_request *= 2;
  257. padded_allocation_request += PAGE_SIZE;
  258. if (padded_allocation_request.has_overflow()) {
  259. PANIC("Integer overflow during kmalloc heap expansion");
  260. }
  261. auto rounded_allocation_request = Memory::page_round_up(padded_allocation_request.value());
  262. if (rounded_allocation_request.is_error()) {
  263. PANIC("Integer overflow computing pages for kmalloc heap expansion");
  264. }
  265. size_t new_subheap_size = max(minimum_subheap_size, rounded_allocation_request.value());
  266. dbgln_if(KMALLOC_DEBUG, "Unable to allocate {}, expanding kmalloc heap", allocation_request);
  267. if (!expansion_data->virtual_range.contains(new_subheap_base, new_subheap_size)) {
  268. // FIXME: Dare to return false and allow kmalloc() to fail!
  269. PANIC("Out of address space when expanding kmalloc heap.");
  270. }
  271. auto physical_pages_or_error = MM.commit_physical_pages(new_subheap_size / PAGE_SIZE);
  272. if (physical_pages_or_error.is_error()) {
  273. // FIXME: Dare to return false!
  274. PANIC("Out of physical pages when expanding kmalloc heap.");
  275. }
  276. auto physical_pages = physical_pages_or_error.release_value();
  277. expansion_data->next_virtual_address = expansion_data->next_virtual_address.offset(new_subheap_size);
  278. auto cpu_supports_nx = Processor::current().has_nx();
  279. SpinlockLocker pd_locker(MM.kernel_page_directory().get_lock());
  280. SpinlockLocker mm_locker(Memory::s_mm_lock);
  281. for (auto vaddr = new_subheap_base; !physical_pages.is_empty(); vaddr = vaddr.offset(PAGE_SIZE)) {
  282. // FIXME: We currently leak physical memory when mapping it into the kmalloc heap.
  283. auto& page = physical_pages.take_one().leak_ref();
  284. auto* pte = MM.pte(MM.kernel_page_directory(), vaddr);
  285. VERIFY(pte);
  286. pte->set_physical_page_base(page.paddr().get());
  287. pte->set_global(true);
  288. pte->set_user_allowed(false);
  289. pte->set_writable(true);
  290. if (cpu_supports_nx)
  291. pte->set_execute_disabled(true);
  292. pte->set_present(true);
  293. }
  294. add_subheap(new_subheap_base.as_ptr(), new_subheap_size);
  295. return true;
  296. }
  297. void enable_expansion()
  298. {
  299. // FIXME: This range can be much bigger on 64-bit, but we need to figure something out for 32-bit.
  300. auto reserved_region = MUST(MM.allocate_unbacked_region_anywhere(64 * MiB, 1 * MiB));
  301. expansion_data = KmallocGlobalData::ExpansionData {
  302. .virtual_range = reserved_region->range(),
  303. .next_virtual_address = reserved_region->range().base(),
  304. };
  305. // Make sure the entire kmalloc VM range is backed by page tables.
  306. // This avoids having to deal with lazy page table allocation during heap expansion.
  307. SpinlockLocker pd_locker(MM.kernel_page_directory().get_lock());
  308. SpinlockLocker mm_locker(Memory::s_mm_lock);
  309. for (auto vaddr = reserved_region->range().base(); vaddr < reserved_region->range().end(); vaddr = vaddr.offset(PAGE_SIZE)) {
  310. MM.ensure_pte(MM.kernel_page_directory(), vaddr);
  311. }
  312. (void)reserved_region.leak_ptr();
  313. }
  314. struct ExpansionData {
  315. Memory::VirtualRange virtual_range;
  316. VirtualAddress next_virtual_address;
  317. };
  318. Optional<ExpansionData> expansion_data;
  319. bool is_valid_kmalloc_address(VirtualAddress vaddr) const
  320. {
  321. if (vaddr.as_ptr() >= initial_kmalloc_memory && vaddr.as_ptr() < (initial_kmalloc_memory + INITIAL_KMALLOC_MEMORY_SIZE))
  322. return true;
  323. if (!expansion_data.has_value())
  324. return false;
  325. return expansion_data->virtual_range.contains(vaddr);
  326. }
  327. KmallocSubheap::List subheaps;
  328. KmallocSlabheap slabheaps[6] = { 16, 32, 64, 128, 256, 512 };
  329. bool expansion_in_progress { false };
  330. };
  331. READONLY_AFTER_INIT static KmallocGlobalData* g_kmalloc_global;
  332. alignas(KmallocGlobalData) static u8 g_kmalloc_global_heap[sizeof(KmallocGlobalData)];
  333. static size_t g_kmalloc_call_count;
  334. static size_t g_kfree_call_count;
  335. static size_t g_nested_kfree_calls;
  336. bool g_dump_kmalloc_stacks;
  337. void kmalloc_enable_expand()
  338. {
  339. g_kmalloc_global->enable_expansion();
  340. }
  341. static inline void kmalloc_verify_nospinlock_held()
  342. {
  343. // Catch bad callers allocating under spinlock.
  344. if constexpr (KMALLOC_VERIFY_NO_SPINLOCK_HELD) {
  345. VERIFY(!Processor::in_critical());
  346. }
  347. }
  348. UNMAP_AFTER_INIT void kmalloc_init()
  349. {
  350. // Zero out heap since it's placed after end_of_kernel_bss.
  351. memset(initial_kmalloc_memory, 0, sizeof(initial_kmalloc_memory));
  352. g_kmalloc_global = new (g_kmalloc_global_heap) KmallocGlobalData(initial_kmalloc_memory, sizeof(initial_kmalloc_memory));
  353. s_lock.initialize();
  354. }
  355. void* kmalloc(size_t size)
  356. {
  357. kmalloc_verify_nospinlock_held();
  358. SpinlockLocker lock(s_lock);
  359. ++g_kmalloc_call_count;
  360. if (g_dump_kmalloc_stacks && Kernel::g_kernel_symbols_available) {
  361. dbgln("kmalloc({})", size);
  362. Kernel::dump_backtrace();
  363. }
  364. void* ptr = g_kmalloc_global->allocate(size);
  365. Thread* current_thread = Thread::current();
  366. if (!current_thread)
  367. current_thread = Processor::idle_thread();
  368. if (current_thread) {
  369. // FIXME: By the time we check this, we have already allocated above.
  370. // This means that in the case of an infinite recursion, we can't catch it this way.
  371. VERIFY(current_thread->is_allocation_enabled());
  372. PerformanceManager::add_kmalloc_perf_event(*current_thread, size, (FlatPtr)ptr);
  373. }
  374. return ptr;
  375. }
  376. void* kcalloc(size_t count, size_t size)
  377. {
  378. if (Checked<size_t>::multiplication_would_overflow(count, size))
  379. return nullptr;
  380. size_t new_size = count * size;
  381. auto* ptr = kmalloc(new_size);
  382. // FIXME: Avoid redundantly scrubbing the memory in kmalloc()
  383. if (ptr)
  384. memset(ptr, 0, new_size);
  385. return ptr;
  386. }
  387. void kfree_sized(void* ptr, size_t size)
  388. {
  389. if (!ptr)
  390. return;
  391. VERIFY(size > 0);
  392. kmalloc_verify_nospinlock_held();
  393. SpinlockLocker lock(s_lock);
  394. ++g_kfree_call_count;
  395. ++g_nested_kfree_calls;
  396. if (g_nested_kfree_calls == 1) {
  397. Thread* current_thread = Thread::current();
  398. if (!current_thread)
  399. current_thread = Processor::idle_thread();
  400. if (current_thread) {
  401. VERIFY(current_thread->is_allocation_enabled());
  402. PerformanceManager::add_kfree_perf_event(*current_thread, 0, (FlatPtr)ptr);
  403. }
  404. }
  405. g_kmalloc_global->deallocate(ptr, size);
  406. --g_nested_kfree_calls;
  407. }
  408. size_t kmalloc_good_size(size_t size)
  409. {
  410. VERIFY(size > 0);
  411. // NOTE: There's no need to take the kmalloc lock, as the kmalloc slab-heaps (and their sizes) are constant
  412. for (auto const& slabheap : g_kmalloc_global->slabheaps) {
  413. if (size <= slabheap.slab_size())
  414. return slabheap.slab_size();
  415. }
  416. return round_up_to_power_of_two(size + Heap<CHUNK_SIZE>::AllocationHeaderSize, CHUNK_SIZE) - Heap<CHUNK_SIZE>::AllocationHeaderSize;
  417. }
  418. void* kmalloc_aligned(size_t size, size_t alignment)
  419. {
  420. Checked<size_t> real_allocation_size = size;
  421. real_allocation_size += alignment;
  422. real_allocation_size += sizeof(ptrdiff_t) + sizeof(size_t);
  423. void* ptr = kmalloc(real_allocation_size.value());
  424. if (ptr == nullptr)
  425. return nullptr;
  426. size_t max_addr = (size_t)ptr + alignment;
  427. void* aligned_ptr = (void*)(max_addr - (max_addr % alignment));
  428. ((ptrdiff_t*)aligned_ptr)[-1] = (ptrdiff_t)((u8*)aligned_ptr - (u8*)ptr);
  429. ((size_t*)aligned_ptr)[-2] = real_allocation_size.value();
  430. return aligned_ptr;
  431. }
  432. void* operator new(size_t size)
  433. {
  434. void* ptr = kmalloc(size);
  435. VERIFY(ptr);
  436. return ptr;
  437. }
  438. void* operator new(size_t size, std::nothrow_t const&) noexcept
  439. {
  440. return kmalloc(size);
  441. }
  442. void* operator new(size_t size, std::align_val_t al)
  443. {
  444. void* ptr = kmalloc_aligned(size, (size_t)al);
  445. VERIFY(ptr);
  446. return ptr;
  447. }
  448. void* operator new(size_t size, std::align_val_t al, std::nothrow_t const&) noexcept
  449. {
  450. return kmalloc_aligned(size, (size_t)al);
  451. }
  452. void* operator new[](size_t size)
  453. {
  454. void* ptr = kmalloc(size);
  455. VERIFY(ptr);
  456. return ptr;
  457. }
  458. void* operator new[](size_t size, std::nothrow_t const&) noexcept
  459. {
  460. return kmalloc(size);
  461. }
  462. void operator delete(void*) noexcept
  463. {
  464. // All deletes in kernel code should have a known size.
  465. VERIFY_NOT_REACHED();
  466. }
  467. void operator delete(void* ptr, size_t size) noexcept
  468. {
  469. return kfree_sized(ptr, size);
  470. }
  471. void operator delete(void* ptr, size_t, std::align_val_t) noexcept
  472. {
  473. return kfree_aligned(ptr);
  474. }
  475. void operator delete[](void*) noexcept
  476. {
  477. // All deletes in kernel code should have a known size.
  478. VERIFY_NOT_REACHED();
  479. }
  480. void operator delete[](void* ptr, size_t size) noexcept
  481. {
  482. return kfree_sized(ptr, size);
  483. }
  484. void get_kmalloc_stats(kmalloc_stats& stats)
  485. {
  486. SpinlockLocker lock(s_lock);
  487. stats.bytes_allocated = g_kmalloc_global->allocated_bytes();
  488. stats.bytes_free = g_kmalloc_global->free_bytes();
  489. stats.kmalloc_call_count = g_kmalloc_call_count;
  490. stats.kfree_call_count = g_kfree_call_count;
  491. }