MemoryManager.cpp 32 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945
  1. #include "MemoryManager.h"
  2. #include <AK/Assertions.h>
  3. #include <AK/kstdio.h>
  4. #include <AK/kmalloc.h>
  5. #include "i386.h"
  6. #include "StdLib.h"
  7. #include "Process.h"
  8. #include <LibC/errno_numbers.h>
  9. #include "CMOS.h"
  10. //#define MM_DEBUG
  11. //#define PAGE_FAULT_DEBUG
  12. static MemoryManager* s_the;
  13. MemoryManager& MM
  14. {
  15. return *s_the;
  16. }
  17. MemoryManager::MemoryManager()
  18. {
  19. // FIXME: This is not the best way to do memory map detection.
  20. // Rewrite to use BIOS int 15,e820 once we have VM86 support.
  21. word base_memory = (CMOS::read(0x16) << 8) | CMOS::read(0x15);
  22. word ext_memory = (CMOS::read(0x18) << 8) | CMOS::read(0x17);
  23. kprintf("%u kB base memory\n", base_memory);
  24. kprintf("%u kB extended memory\n", ext_memory);
  25. m_ram_size = ext_memory * 1024;
  26. m_kernel_page_directory = PageDirectory::create_at_fixed_address(PhysicalAddress(0x4000));
  27. m_page_table_zero = (dword*)0x6000;
  28. initialize_paging();
  29. }
  30. MemoryManager::~MemoryManager()
  31. {
  32. }
  33. PageDirectory::PageDirectory(PhysicalAddress paddr)
  34. {
  35. m_directory_page = PhysicalPage::create_eternal(paddr, true);
  36. }
  37. PageDirectory::PageDirectory()
  38. {
  39. MM.populate_page_directory(*this);
  40. }
  41. void MemoryManager::populate_page_directory(PageDirectory& page_directory)
  42. {
  43. page_directory.m_directory_page = allocate_supervisor_physical_page();
  44. page_directory.entries()[0] = kernel_page_directory().entries()[0];
  45. }
  46. void MemoryManager::initialize_paging()
  47. {
  48. static_assert(sizeof(MemoryManager::PageDirectoryEntry) == 4);
  49. static_assert(sizeof(MemoryManager::PageTableEntry) == 4);
  50. memset(m_page_table_zero, 0, PAGE_SIZE);
  51. #ifdef MM_DEBUG
  52. dbgprintf("MM: Kernel page directory @ %p\n", kernel_page_directory().cr3());
  53. #endif
  54. #ifdef MM_DEBUG
  55. dbgprintf("MM: Protect against null dereferences\n");
  56. #endif
  57. // Make null dereferences crash.
  58. map_protected(LinearAddress(0), PAGE_SIZE);
  59. #ifdef MM_DEBUG
  60. dbgprintf("MM: Identity map bottom 4MB\n");
  61. #endif
  62. // The bottom 4 MB (except for the null page) are identity mapped & supervisor only.
  63. // Every process shares these mappings.
  64. create_identity_mapping(kernel_page_directory(), LinearAddress(PAGE_SIZE), (4 * MB) - PAGE_SIZE);
  65. // Basic memory map:
  66. // 0 -> 512 kB Kernel code. Root page directory & PDE 0.
  67. // (last page before 1MB) Used by quickmap_page().
  68. // 1 MB -> 2 MB kmalloc_eternal() space.
  69. // 2 MB -> 3 MB kmalloc() space.
  70. // 3 MB -> 4 MB Supervisor physical pages (available for allocation!)
  71. // 4 MB -> (max) MB Userspace physical pages (available for allocation!)
  72. for (size_t i = (2 * MB); i < (4 * MB); i += PAGE_SIZE)
  73. m_free_supervisor_physical_pages.append(PhysicalPage::create_eternal(PhysicalAddress(i), true));
  74. dbgprintf("MM: 4MB-%uMB available for allocation\n", m_ram_size / 1048576);
  75. for (size_t i = (4 * MB); i < m_ram_size; i += PAGE_SIZE)
  76. m_free_physical_pages.append(PhysicalPage::create_eternal(PhysicalAddress(i), false));
  77. m_quickmap_addr = LinearAddress((1 * MB) - PAGE_SIZE);
  78. #ifdef MM_DEBUG
  79. dbgprintf("MM: Quickmap will use P%x\n", m_quickmap_addr.get());
  80. dbgprintf("MM: Installing page directory\n");
  81. #endif
  82. asm volatile("movl %%eax, %%cr3"::"a"(kernel_page_directory().cr3()));
  83. asm volatile(
  84. "movl %%cr0, %%eax\n"
  85. "orl $0x80000001, %%eax\n"
  86. "movl %%eax, %%cr0\n"
  87. :::"%eax", "memory");
  88. }
  89. RetainPtr<PhysicalPage> MemoryManager::allocate_page_table(PageDirectory& page_directory, unsigned index)
  90. {
  91. ASSERT(!page_directory.m_physical_pages.contains(index));
  92. auto physical_page = allocate_supervisor_physical_page();
  93. if (!physical_page)
  94. return nullptr;
  95. page_directory.m_physical_pages.set(index, physical_page.copy_ref());
  96. return physical_page;
  97. }
  98. void MemoryManager::remove_identity_mapping(PageDirectory& page_directory, LinearAddress laddr, size_t size)
  99. {
  100. InterruptDisabler disabler;
  101. // FIXME: ASSERT(laddr is 4KB aligned);
  102. for (dword offset = 0; offset < size; offset += PAGE_SIZE) {
  103. auto pte_address = laddr.offset(offset);
  104. auto pte = ensure_pte(page_directory, pte_address);
  105. pte.set_physical_page_base(0);
  106. pte.set_user_allowed(false);
  107. pte.set_present(true);
  108. pte.set_writable(true);
  109. flush_tlb(pte_address);
  110. }
  111. }
  112. auto MemoryManager::ensure_pte(PageDirectory& page_directory, LinearAddress laddr) -> PageTableEntry
  113. {
  114. ASSERT_INTERRUPTS_DISABLED();
  115. dword page_directory_index = (laddr.get() >> 22) & 0x3ff;
  116. dword page_table_index = (laddr.get() >> 12) & 0x3ff;
  117. PageDirectoryEntry pde = PageDirectoryEntry(&page_directory.entries()[page_directory_index]);
  118. if (!pde.is_present()) {
  119. #ifdef MM_DEBUG
  120. dbgprintf("MM: PDE %u not present (requested for L%x), allocating\n", page_directory_index, laddr.get());
  121. #endif
  122. if (page_directory_index == 0) {
  123. ASSERT(&page_directory == m_kernel_page_directory.ptr());
  124. pde.set_page_table_base((dword)m_page_table_zero);
  125. pde.set_user_allowed(false);
  126. pde.set_present(true);
  127. pde.set_writable(true);
  128. } else {
  129. ASSERT(&page_directory != m_kernel_page_directory.ptr());
  130. auto page_table = allocate_page_table(page_directory, page_directory_index);
  131. #ifdef MM_DEBUG
  132. dbgprintf("MM: PD K%x (%s) at P%x allocated page table #%u (for L%x) at P%x\n",
  133. &page_directory,
  134. &page_directory == m_kernel_page_directory.ptr() ? "Kernel" : "User",
  135. page_directory.cr3(),
  136. page_directory_index,
  137. laddr.get(),
  138. page_table->paddr().get());
  139. #endif
  140. pde.set_page_table_base(page_table->paddr().get());
  141. pde.set_user_allowed(true);
  142. pde.set_present(true);
  143. pde.set_writable(true);
  144. page_directory.m_physical_pages.set(page_directory_index, move(page_table));
  145. }
  146. }
  147. return PageTableEntry(&pde.page_table_base()[page_table_index]);
  148. }
  149. void MemoryManager::map_protected(LinearAddress laddr, size_t length)
  150. {
  151. InterruptDisabler disabler;
  152. // FIXME: ASSERT(linearAddress is 4KB aligned);
  153. for (dword offset = 0; offset < length; offset += PAGE_SIZE) {
  154. auto pte_address = laddr.offset(offset);
  155. auto pte = ensure_pte(kernel_page_directory(), pte_address);
  156. pte.set_physical_page_base(pte_address.get());
  157. pte.set_user_allowed(false);
  158. pte.set_present(false);
  159. pte.set_writable(false);
  160. flush_tlb(pte_address);
  161. }
  162. }
  163. void MemoryManager::create_identity_mapping(PageDirectory& page_directory, LinearAddress laddr, size_t size)
  164. {
  165. InterruptDisabler disabler;
  166. ASSERT((laddr.get() & ~PAGE_MASK) == 0);
  167. for (dword offset = 0; offset < size; offset += PAGE_SIZE) {
  168. auto pte_address = laddr.offset(offset);
  169. auto pte = ensure_pte(page_directory, pte_address);
  170. pte.set_physical_page_base(pte_address.get());
  171. pte.set_user_allowed(false);
  172. pte.set_present(true);
  173. pte.set_writable(true);
  174. page_directory.flush(pte_address);
  175. }
  176. }
  177. void MemoryManager::initialize()
  178. {
  179. s_the = new MemoryManager;
  180. }
  181. Region* MemoryManager::region_from_laddr(Process& process, LinearAddress laddr)
  182. {
  183. ASSERT_INTERRUPTS_DISABLED();
  184. // FIXME: Use a binary search tree (maybe red/black?) or some other more appropriate data structure!
  185. for (auto& region : process.m_regions) {
  186. if (region->contains(laddr))
  187. return region.ptr();
  188. }
  189. dbgprintf("%s(%u) Couldn't find region for L%x (CR3=%x)\n", process.name().characters(), process.pid(), laddr.get(), process.page_directory().cr3());
  190. return nullptr;
  191. }
  192. const Region* MemoryManager::region_from_laddr(const Process& process, LinearAddress laddr)
  193. {
  194. // FIXME: Use a binary search tree (maybe red/black?) or some other more appropriate data structure!
  195. for (auto& region : process.m_regions) {
  196. if (region->contains(laddr))
  197. return region.ptr();
  198. }
  199. dbgprintf("%s(%u) Couldn't find region for L%x (CR3=%x)\n", process.name().characters(), process.pid(), laddr.get(), process.page_directory().cr3());
  200. return nullptr;
  201. }
  202. bool MemoryManager::zero_page(Region& region, unsigned page_index_in_region)
  203. {
  204. ASSERT_INTERRUPTS_DISABLED();
  205. auto& vmo = region.vmo();
  206. auto& vmo_page = vmo.physical_pages()[region.first_page_index() + page_index_in_region];
  207. sti();
  208. LOCKER(vmo.m_paging_lock);
  209. cli();
  210. if (!vmo_page.is_null()) {
  211. #ifdef PAGE_FAULT_DEBUG
  212. dbgprintf("MM: zero_page() but page already present. Fine with me!\n");
  213. #endif
  214. remap_region_page(region, page_index_in_region, true);
  215. return true;
  216. }
  217. auto physical_page = allocate_physical_page(ShouldZeroFill::Yes);
  218. #ifdef PAGE_FAULT_DEBUG
  219. dbgprintf(" >> ZERO P%x\n", physical_page->paddr().get());
  220. #endif
  221. region.m_cow_map.set(page_index_in_region, false);
  222. vmo.physical_pages()[page_index_in_region] = move(physical_page);
  223. remap_region_page(region, page_index_in_region, true);
  224. return true;
  225. }
  226. bool MemoryManager::copy_on_write(Region& region, unsigned page_index_in_region)
  227. {
  228. ASSERT_INTERRUPTS_DISABLED();
  229. auto& vmo = region.vmo();
  230. if (vmo.physical_pages()[page_index_in_region]->retain_count() == 1) {
  231. #ifdef PAGE_FAULT_DEBUG
  232. dbgprintf(" >> It's a COW page but nobody is sharing it anymore. Remap r/w\n");
  233. #endif
  234. region.m_cow_map.set(page_index_in_region, false);
  235. remap_region_page(region, page_index_in_region, true);
  236. return true;
  237. }
  238. #ifdef PAGE_FAULT_DEBUG
  239. dbgprintf(" >> It's a COW page and it's time to COW!\n");
  240. #endif
  241. auto physical_page_to_copy = move(vmo.physical_pages()[page_index_in_region]);
  242. auto physical_page = allocate_physical_page(ShouldZeroFill::No);
  243. byte* dest_ptr = quickmap_page(*physical_page);
  244. const byte* src_ptr = region.laddr().offset(page_index_in_region * PAGE_SIZE).as_ptr();
  245. #ifdef PAGE_FAULT_DEBUG
  246. dbgprintf(" >> COW P%x <- P%x\n", physical_page->paddr().get(), physical_page_to_copy->paddr().get());
  247. #endif
  248. memcpy(dest_ptr, src_ptr, PAGE_SIZE);
  249. vmo.physical_pages()[page_index_in_region] = move(physical_page);
  250. unquickmap_page();
  251. region.m_cow_map.set(page_index_in_region, false);
  252. remap_region_page(region, page_index_in_region, true);
  253. return true;
  254. }
  255. bool Region::page_in()
  256. {
  257. ASSERT(m_page_directory);
  258. ASSERT(!vmo().is_anonymous());
  259. ASSERT(vmo().inode());
  260. #ifdef MM_DEBUG
  261. dbgprintf("MM: page_in %u pages\n", page_count());
  262. #endif
  263. for (size_t i = 0; i < page_count(); ++i) {
  264. auto& vmo_page = vmo().physical_pages()[first_page_index() + i];
  265. if (vmo_page.is_null()) {
  266. bool success = MM.page_in_from_inode(*this, i);
  267. if (!success)
  268. return false;
  269. }
  270. MM.remap_region_page(*this, i, true);
  271. }
  272. return true;
  273. }
  274. bool MemoryManager::page_in_from_inode(Region& region, unsigned page_index_in_region)
  275. {
  276. ASSERT(region.page_directory());
  277. auto& vmo = region.vmo();
  278. ASSERT(!vmo.is_anonymous());
  279. ASSERT(vmo.inode());
  280. auto& vmo_page = vmo.physical_pages()[region.first_page_index() + page_index_in_region];
  281. InterruptFlagSaver saver;
  282. sti();
  283. LOCKER(vmo.m_paging_lock);
  284. cli();
  285. if (!vmo_page.is_null()) {
  286. dbgprintf("MM: page_in_from_inode() but page already present. Fine with me!\n");
  287. remap_region_page(region, page_index_in_region, true);
  288. return true;
  289. }
  290. #ifdef MM_DEBUG
  291. dbgprintf("MM: page_in_from_inode ready to read from inode\n");
  292. #endif
  293. sti();
  294. byte page_buffer[PAGE_SIZE];
  295. auto& inode = *vmo.inode();
  296. auto nread = inode.read_bytes(vmo.inode_offset() + ((region.first_page_index() + page_index_in_region) * PAGE_SIZE), PAGE_SIZE, page_buffer, nullptr);
  297. if (nread < 0) {
  298. kprintf("MM: page_in_from_inode had error (%d) while reading!\n", nread);
  299. return false;
  300. }
  301. if (nread < PAGE_SIZE) {
  302. // If we read less than a page, zero out the rest to avoid leaking uninitialized data.
  303. memset(page_buffer + nread, 0, PAGE_SIZE - nread);
  304. }
  305. cli();
  306. vmo_page = allocate_physical_page(ShouldZeroFill::No);
  307. if (vmo_page.is_null()) {
  308. kprintf("MM: page_in_from_inode was unable to allocate a physical page\n");
  309. return false;
  310. }
  311. remap_region_page(region, page_index_in_region, true);
  312. byte* dest_ptr = region.laddr().offset(page_index_in_region * PAGE_SIZE).as_ptr();
  313. memcpy(dest_ptr, page_buffer, PAGE_SIZE);
  314. return true;
  315. }
  316. PageFaultResponse MemoryManager::handle_page_fault(const PageFault& fault)
  317. {
  318. ASSERT_INTERRUPTS_DISABLED();
  319. #ifdef PAGE_FAULT_DEBUG
  320. dbgprintf("MM: handle_page_fault(%w) at L%x\n", fault.code(), fault.laddr().get());
  321. #endif
  322. ASSERT(fault.laddr() != m_quickmap_addr);
  323. auto* region = region_from_laddr(*current, fault.laddr());
  324. if (!region) {
  325. kprintf("NP(error) fault at invalid address L%x\n", fault.laddr().get());
  326. return PageFaultResponse::ShouldCrash;
  327. }
  328. auto page_index_in_region = region->page_index_from_address(fault.laddr());
  329. if (fault.is_not_present()) {
  330. if (region->vmo().inode()) {
  331. #ifdef PAGE_FAULT_DEBUG
  332. dbgprintf("NP(inode) fault in Region{%p}[%u]\n", region, page_index_in_region);
  333. #endif
  334. page_in_from_inode(*region, page_index_in_region);
  335. return PageFaultResponse::Continue;
  336. } else {
  337. #ifdef PAGE_FAULT_DEBUG
  338. dbgprintf("NP(zero) fault in Region{%p}[%u]\n", region, page_index_in_region);
  339. #endif
  340. zero_page(*region, page_index_in_region);
  341. return PageFaultResponse::Continue;
  342. }
  343. } else if (fault.is_protection_violation()) {
  344. if (region->m_cow_map.get(page_index_in_region)) {
  345. #ifdef PAGE_FAULT_DEBUG
  346. dbgprintf("PV(cow) fault in Region{%p}[%u]\n", region, page_index_in_region);
  347. #endif
  348. bool success = copy_on_write(*region, page_index_in_region);
  349. ASSERT(success);
  350. return PageFaultResponse::Continue;
  351. }
  352. kprintf("PV(error) fault in Region{%p}[%u]\n", region, page_index_in_region);
  353. } else {
  354. ASSERT_NOT_REACHED();
  355. }
  356. return PageFaultResponse::ShouldCrash;
  357. }
  358. RetainPtr<PhysicalPage> MemoryManager::allocate_physical_page(ShouldZeroFill should_zero_fill)
  359. {
  360. InterruptDisabler disabler;
  361. if (1 > m_free_physical_pages.size()) {
  362. kprintf("FUCK! No physical pages available.\n");
  363. ASSERT_NOT_REACHED();
  364. return { };
  365. }
  366. #ifdef MM_DEBUG
  367. dbgprintf("MM: allocate_physical_page vending P%x (%u remaining)\n", m_free_physical_pages.last()->paddr().get(), m_free_physical_pages.size());
  368. #endif
  369. auto physical_page = m_free_physical_pages.take_last();
  370. if (should_zero_fill == ShouldZeroFill::Yes) {
  371. auto* ptr = (dword*)quickmap_page(*physical_page);
  372. fast_dword_fill(ptr, 0, PAGE_SIZE / sizeof(dword));
  373. unquickmap_page();
  374. }
  375. return physical_page;
  376. }
  377. RetainPtr<PhysicalPage> MemoryManager::allocate_supervisor_physical_page()
  378. {
  379. InterruptDisabler disabler;
  380. if (1 > m_free_supervisor_physical_pages.size()) {
  381. kprintf("FUCK! No physical pages available.\n");
  382. ASSERT_NOT_REACHED();
  383. return { };
  384. }
  385. #ifdef MM_DEBUG
  386. dbgprintf("MM: allocate_supervisor_physical_page vending P%x (%u remaining)\n", m_free_supervisor_physical_pages.last()->paddr().get(), m_free_supervisor_physical_pages.size());
  387. #endif
  388. auto physical_page = m_free_supervisor_physical_pages.take_last();
  389. fast_dword_fill((dword*)physical_page->paddr().as_ptr(), 0, PAGE_SIZE / sizeof(dword));
  390. return physical_page;
  391. }
  392. void MemoryManager::enter_process_paging_scope(Process& process)
  393. {
  394. InterruptDisabler disabler;
  395. current->m_tss.cr3 = process.page_directory().cr3();
  396. asm volatile("movl %%eax, %%cr3"::"a"(process.page_directory().cr3()):"memory");
  397. }
  398. void MemoryManager::flush_entire_tlb()
  399. {
  400. asm volatile(
  401. "mov %%cr3, %%eax\n"
  402. "mov %%eax, %%cr3\n"
  403. ::: "%eax", "memory"
  404. );
  405. }
  406. void MemoryManager::flush_tlb(LinearAddress laddr)
  407. {
  408. asm volatile("invlpg %0": :"m" (*(char*)laddr.get()) : "memory");
  409. }
  410. byte* MemoryManager::quickmap_page(PhysicalPage& physical_page)
  411. {
  412. ASSERT_INTERRUPTS_DISABLED();
  413. ASSERT(!m_quickmap_in_use);
  414. m_quickmap_in_use = true;
  415. auto page_laddr = m_quickmap_addr;
  416. auto pte = ensure_pte(kernel_page_directory(), page_laddr);
  417. pte.set_physical_page_base(physical_page.paddr().get());
  418. pte.set_present(true);
  419. pte.set_writable(true);
  420. pte.set_user_allowed(false);
  421. flush_tlb(page_laddr);
  422. ASSERT((dword)pte.physical_page_base() == physical_page.paddr().get());
  423. #ifdef MM_DEBUG
  424. dbgprintf("MM: >> quickmap_page L%x => P%x @ PTE=%p\n", page_laddr, physical_page.paddr().get(), pte.ptr());
  425. #endif
  426. return page_laddr.as_ptr();
  427. }
  428. void MemoryManager::unquickmap_page()
  429. {
  430. ASSERT_INTERRUPTS_DISABLED();
  431. ASSERT(m_quickmap_in_use);
  432. auto page_laddr = m_quickmap_addr;
  433. auto pte = ensure_pte(kernel_page_directory(), page_laddr);
  434. #ifdef MM_DEBUG
  435. auto old_physical_address = pte.physical_page_base();
  436. #endif
  437. pte.set_physical_page_base(0);
  438. pte.set_present(false);
  439. pte.set_writable(false);
  440. flush_tlb(page_laddr);
  441. #ifdef MM_DEBUG
  442. dbgprintf("MM: >> unquickmap_page L%x =/> P%x\n", page_laddr, old_physical_address);
  443. #endif
  444. m_quickmap_in_use = false;
  445. }
  446. void MemoryManager::remap_region_page(Region& region, unsigned page_index_in_region, bool user_allowed)
  447. {
  448. ASSERT(region.page_directory());
  449. InterruptDisabler disabler;
  450. auto page_laddr = region.laddr().offset(page_index_in_region * PAGE_SIZE);
  451. auto pte = ensure_pte(*region.page_directory(), page_laddr);
  452. auto& physical_page = region.vmo().physical_pages()[page_index_in_region];
  453. ASSERT(physical_page);
  454. pte.set_physical_page_base(physical_page->paddr().get());
  455. pte.set_present(true); // FIXME: Maybe we should use the is_readable flag here?
  456. if (region.m_cow_map.get(page_index_in_region))
  457. pte.set_writable(false);
  458. else
  459. pte.set_writable(region.is_writable());
  460. pte.set_cache_disabled(!region.vmo().m_allow_cpu_caching);
  461. pte.set_write_through(!region.vmo().m_allow_cpu_caching);
  462. pte.set_user_allowed(user_allowed);
  463. region.page_directory()->flush(page_laddr);
  464. #ifdef MM_DEBUG
  465. dbgprintf("MM: >> remap_region_page (PD=%x, PTE=P%x) '%s' L%x => P%x (@%p)\n", region.page_directory()->cr3(), pte.ptr(), region.name().characters(), page_laddr.get(), physical_page->paddr().get(), physical_page.ptr());
  466. #endif
  467. }
  468. void MemoryManager::remap_region(PageDirectory& page_directory, Region& region)
  469. {
  470. InterruptDisabler disabler;
  471. ASSERT(region.page_directory() == &page_directory);
  472. map_region_at_address(page_directory, region, region.laddr(), true);
  473. }
  474. void MemoryManager::map_region_at_address(PageDirectory& page_directory, Region& region, LinearAddress laddr, bool user_allowed)
  475. {
  476. InterruptDisabler disabler;
  477. region.set_page_directory(page_directory);
  478. auto& vmo = region.vmo();
  479. #ifdef MM_DEBUG
  480. dbgprintf("MM: map_region_at_address will map VMO pages %u - %u (VMO page count: %u)\n", region.first_page_index(), region.last_page_index(), vmo.page_count());
  481. #endif
  482. for (size_t i = 0; i < region.page_count(); ++i) {
  483. auto page_laddr = laddr.offset(i * PAGE_SIZE);
  484. auto pte = ensure_pte(page_directory, page_laddr);
  485. auto& physical_page = vmo.physical_pages()[region.first_page_index() + i];
  486. if (physical_page) {
  487. pte.set_physical_page_base(physical_page->paddr().get());
  488. pte.set_present(true); // FIXME: Maybe we should use the is_readable flag here?
  489. // FIXME: It seems wrong that the *region* cow map is essentially using *VMO* relative indices.
  490. if (region.m_cow_map.get(region.first_page_index() + i))
  491. pte.set_writable(false);
  492. else
  493. pte.set_writable(region.is_writable());
  494. pte.set_cache_disabled(!region.vmo().m_allow_cpu_caching);
  495. pte.set_write_through(!region.vmo().m_allow_cpu_caching);
  496. } else {
  497. pte.set_physical_page_base(0);
  498. pte.set_present(false);
  499. pte.set_writable(region.is_writable());
  500. }
  501. pte.set_user_allowed(user_allowed);
  502. page_directory.flush(page_laddr);
  503. #ifdef MM_DEBUG
  504. dbgprintf("MM: >> map_region_at_address (PD=%x) '%s' L%x => P%x (@%p)\n", &page_directory, region.name().characters(), page_laddr, physical_page ? physical_page->paddr().get() : 0, physical_page.ptr());
  505. #endif
  506. }
  507. }
  508. bool MemoryManager::unmap_region(Region& region)
  509. {
  510. ASSERT(region.page_directory());
  511. InterruptDisabler disabler;
  512. for (size_t i = 0; i < region.page_count(); ++i) {
  513. auto laddr = region.laddr().offset(i * PAGE_SIZE);
  514. auto pte = ensure_pte(*region.page_directory(), laddr);
  515. pte.set_physical_page_base(0);
  516. pte.set_present(false);
  517. pte.set_writable(false);
  518. pte.set_user_allowed(false);
  519. region.page_directory()->flush(laddr);
  520. #ifdef MM_DEBUG
  521. auto& physical_page = region.vmo().physical_pages()[region.first_page_index() + i];
  522. dbgprintf("MM: >> Unmapped L%x => P%x <<\n", laddr, physical_page ? physical_page->paddr().get() : 0);
  523. #endif
  524. }
  525. region.release_page_directory();
  526. return true;
  527. }
  528. bool MemoryManager::map_region(Process& process, Region& region)
  529. {
  530. map_region_at_address(process.page_directory(), region, region.laddr(), true);
  531. return true;
  532. }
  533. bool MemoryManager::validate_user_read(const Process& process, LinearAddress laddr) const
  534. {
  535. auto* region = region_from_laddr(process, laddr);
  536. return region && region->is_readable();
  537. }
  538. bool MemoryManager::validate_user_write(const Process& process, LinearAddress laddr) const
  539. {
  540. auto* region = region_from_laddr(process, laddr);
  541. return region && region->is_writable();
  542. }
  543. Retained<Region> Region::clone()
  544. {
  545. if (m_shared || (m_readable && !m_writable)) {
  546. dbgprintf("%s<%u> Region::clone(): sharing %s (L%x)\n",
  547. current->name().characters(),
  548. current->pid(),
  549. m_name.characters(),
  550. laddr().get());
  551. // Create a new region backed by the same VMObject.
  552. return adopt(*new Region(laddr(), size(), m_vmo.copy_ref(), m_offset_in_vmo, String(m_name), m_readable, m_writable));
  553. }
  554. dbgprintf("%s<%u> Region::clone(): cowing %s (L%x)\n",
  555. current->name().characters(),
  556. current->pid(),
  557. m_name.characters(),
  558. laddr().get());
  559. // Set up a COW region. The parent (this) region becomes COW as well!
  560. for (size_t i = 0; i < page_count(); ++i)
  561. m_cow_map.set(i, true);
  562. MM.remap_region(current->page_directory(), *this);
  563. return adopt(*new Region(laddr(), size(), m_vmo->clone(), m_offset_in_vmo, String(m_name), m_readable, m_writable, true));
  564. }
  565. Region::Region(LinearAddress a, size_t s, String&& n, bool r, bool w, bool cow)
  566. : m_laddr(a)
  567. , m_size(s)
  568. , m_vmo(VMObject::create_anonymous(s))
  569. , m_name(move(n))
  570. , m_readable(r)
  571. , m_writable(w)
  572. , m_cow_map(Bitmap::create(m_vmo->page_count(), cow))
  573. {
  574. m_vmo->set_name(m_name);
  575. MM.register_region(*this);
  576. }
  577. Region::Region(LinearAddress a, size_t s, RetainPtr<Inode>&& inode, String&& n, bool r, bool w)
  578. : m_laddr(a)
  579. , m_size(s)
  580. , m_vmo(VMObject::create_file_backed(move(inode)))
  581. , m_name(move(n))
  582. , m_readable(r)
  583. , m_writable(w)
  584. , m_cow_map(Bitmap::create(m_vmo->page_count()))
  585. {
  586. MM.register_region(*this);
  587. }
  588. Region::Region(LinearAddress a, size_t s, Retained<VMObject>&& vmo, size_t offset_in_vmo, String&& n, bool r, bool w, bool cow)
  589. : m_laddr(a)
  590. , m_size(s)
  591. , m_offset_in_vmo(offset_in_vmo)
  592. , m_vmo(move(vmo))
  593. , m_name(move(n))
  594. , m_readable(r)
  595. , m_writable(w)
  596. , m_cow_map(Bitmap::create(m_vmo->page_count(), cow))
  597. {
  598. MM.register_region(*this);
  599. }
  600. Region::~Region()
  601. {
  602. if (m_page_directory) {
  603. MM.unmap_region(*this);
  604. ASSERT(!m_page_directory);
  605. }
  606. MM.unregister_region(*this);
  607. }
  608. Retained<PhysicalPage> PhysicalPage::create_eternal(PhysicalAddress paddr, bool supervisor)
  609. {
  610. void* slot = kmalloc_eternal(sizeof(PhysicalPage));
  611. new (slot) PhysicalPage(paddr, supervisor);
  612. return adopt(*(PhysicalPage*)slot);
  613. }
  614. Retained<PhysicalPage> PhysicalPage::create(PhysicalAddress paddr, bool supervisor)
  615. {
  616. void* slot = kmalloc(sizeof(PhysicalPage));
  617. new (slot) PhysicalPage(paddr, supervisor, false);
  618. return adopt(*(PhysicalPage*)slot);
  619. }
  620. PhysicalPage::PhysicalPage(PhysicalAddress paddr, bool supervisor, bool may_return_to_freelist)
  621. : m_may_return_to_freelist(may_return_to_freelist)
  622. , m_supervisor(supervisor)
  623. , m_paddr(paddr)
  624. {
  625. }
  626. void PhysicalPage::return_to_freelist()
  627. {
  628. ASSERT((paddr().get() & ~PAGE_MASK) == 0);
  629. InterruptDisabler disabler;
  630. m_retain_count = 1;
  631. if (m_supervisor)
  632. MM.m_free_supervisor_physical_pages.append(adopt(*this));
  633. else
  634. MM.m_free_physical_pages.append(adopt(*this));
  635. #ifdef MM_DEBUG
  636. dbgprintf("MM: P%x released to freelist\n", m_paddr.get());
  637. #endif
  638. }
  639. Retained<VMObject> VMObject::create_file_backed(RetainPtr<Inode>&& inode)
  640. {
  641. InterruptDisabler disabler;
  642. if (inode->vmo())
  643. return *inode->vmo();
  644. auto vmo = adopt(*new VMObject(move(inode)));
  645. vmo->inode()->set_vmo(*vmo);
  646. return vmo;
  647. }
  648. Retained<VMObject> VMObject::create_anonymous(size_t size)
  649. {
  650. size = ceil_div(size, PAGE_SIZE) * PAGE_SIZE;
  651. return adopt(*new VMObject(size));
  652. }
  653. Retained<VMObject> VMObject::create_for_physical_range(PhysicalAddress paddr, size_t size)
  654. {
  655. size = ceil_div(size, PAGE_SIZE) * PAGE_SIZE;
  656. auto vmo = adopt(*new VMObject(paddr, size));
  657. vmo->m_allow_cpu_caching = false;
  658. return vmo;
  659. }
  660. Retained<VMObject> VMObject::clone()
  661. {
  662. return adopt(*new VMObject(*this));
  663. }
  664. VMObject::VMObject(VMObject& other)
  665. : m_name(other.m_name)
  666. , m_anonymous(other.m_anonymous)
  667. , m_inode_offset(other.m_inode_offset)
  668. , m_size(other.m_size)
  669. , m_inode(other.m_inode)
  670. , m_physical_pages(other.m_physical_pages)
  671. {
  672. MM.register_vmo(*this);
  673. }
  674. VMObject::VMObject(size_t size)
  675. : m_anonymous(true)
  676. , m_size(size)
  677. {
  678. MM.register_vmo(*this);
  679. m_physical_pages.resize(page_count());
  680. }
  681. VMObject::VMObject(PhysicalAddress paddr, size_t size)
  682. : m_anonymous(true)
  683. , m_size(size)
  684. {
  685. MM.register_vmo(*this);
  686. for (size_t i = 0; i < size; i += PAGE_SIZE) {
  687. m_physical_pages.append(PhysicalPage::create(paddr.offset(i), false));
  688. }
  689. ASSERT(m_physical_pages.size() == page_count());
  690. }
  691. VMObject::VMObject(RetainPtr<Inode>&& inode)
  692. : m_inode(move(inode))
  693. {
  694. ASSERT(m_inode);
  695. m_size = ceil_div(m_inode->size(), PAGE_SIZE) * PAGE_SIZE;
  696. m_physical_pages.resize(page_count());
  697. MM.register_vmo(*this);
  698. }
  699. VMObject::~VMObject()
  700. {
  701. if (m_inode)
  702. ASSERT(m_inode->vmo() == this);
  703. MM.unregister_vmo(*this);
  704. }
  705. template<typename Callback>
  706. void VMObject::for_each_region(Callback callback)
  707. {
  708. // FIXME: Figure out a better data structure so we don't have to walk every single region every time an inode changes.
  709. // Perhaps VMObject could have a Vector<Region*> with all of his mappers?
  710. for (auto* region : MM.m_regions) {
  711. if (&region->vmo() == this)
  712. callback(*region);
  713. }
  714. }
  715. void VMObject::inode_size_changed(Badge<Inode>, size_t old_size, size_t new_size)
  716. {
  717. (void)old_size;
  718. InterruptDisabler disabler;
  719. size_t old_page_count = page_count();
  720. m_size = new_size;
  721. if (page_count() > old_page_count) {
  722. // Add null pages and let the fault handler page these in when that day comes.
  723. for (size_t i = old_page_count; i < page_count(); ++i)
  724. m_physical_pages.append(nullptr);
  725. } else {
  726. // Prune the no-longer valid pages. I'm not sure this is actually correct behavior.
  727. for (size_t i = page_count(); i < old_page_count; ++i)
  728. m_physical_pages.take_last();
  729. }
  730. // FIXME: Consolidate with inode_contents_changed() so we only do a single walk.
  731. for_each_region([] (Region& region) {
  732. ASSERT(region.page_directory());
  733. MM.remap_region(*region.page_directory(), region);
  734. });
  735. }
  736. void VMObject::inode_contents_changed(Badge<Inode>, off_t offset, ssize_t size, const byte* data)
  737. {
  738. (void)size;
  739. (void)data;
  740. InterruptDisabler disabler;
  741. ASSERT(offset >= 0);
  742. // FIXME: Only invalidate the parts that actually changed.
  743. for (auto& physical_page : m_physical_pages)
  744. physical_page = nullptr;
  745. #if 0
  746. size_t current_offset = offset;
  747. size_t remaining_bytes = size;
  748. const byte* data_ptr = data;
  749. auto to_page_index = [] (size_t offset) -> size_t {
  750. return offset / PAGE_SIZE;
  751. };
  752. if (current_offset & PAGE_MASK) {
  753. size_t page_index = to_page_index(current_offset);
  754. size_t bytes_to_copy = min(size, PAGE_SIZE - (current_offset & PAGE_MASK));
  755. if (m_physical_pages[page_index]) {
  756. auto* ptr = MM.quickmap_page(*m_physical_pages[page_index]);
  757. memcpy(ptr, data_ptr, bytes_to_copy);
  758. MM.unquickmap_page();
  759. }
  760. current_offset += bytes_to_copy;
  761. data += bytes_to_copy;
  762. remaining_bytes -= bytes_to_copy;
  763. }
  764. for (size_t page_index = to_page_index(current_offset); page_index < m_physical_pages.size(); ++page_index) {
  765. size_t bytes_to_copy = PAGE_SIZE - (current_offset & PAGE_MASK);
  766. if (m_physical_pages[page_index]) {
  767. auto* ptr = MM.quickmap_page(*m_physical_pages[page_index]);
  768. memcpy(ptr, data_ptr, bytes_to_copy);
  769. MM.unquickmap_page();
  770. }
  771. current_offset += bytes_to_copy;
  772. data += bytes_to_copy;
  773. }
  774. #endif
  775. // FIXME: Consolidate with inode_size_changed() so we only do a single walk.
  776. for_each_region([] (Region& region) {
  777. ASSERT(region.page_directory());
  778. MM.remap_region(*region.page_directory(), region);
  779. });
  780. }
  781. int Region::commit()
  782. {
  783. InterruptDisabler disabler;
  784. #ifdef MM_DEBUG
  785. dbgprintf("MM: commit %u pages in Region %p (VMO=%p) at L%x\n", vmo().page_count(), this, &vmo(), laddr().get());
  786. #endif
  787. for (size_t i = first_page_index(); i <= last_page_index(); ++i) {
  788. if (!vmo().physical_pages()[i].is_null())
  789. continue;
  790. auto physical_page = MM.allocate_physical_page(MemoryManager::ShouldZeroFill::Yes);
  791. if (!physical_page) {
  792. kprintf("MM: commit was unable to allocate a physical page\n");
  793. return -ENOMEM;
  794. }
  795. vmo().physical_pages()[i] = move(physical_page);
  796. MM.remap_region_page(*this, i, true);
  797. }
  798. return 0;
  799. }
  800. void MemoryManager::register_vmo(VMObject& vmo)
  801. {
  802. InterruptDisabler disabler;
  803. m_vmos.set(&vmo);
  804. }
  805. void MemoryManager::unregister_vmo(VMObject& vmo)
  806. {
  807. InterruptDisabler disabler;
  808. m_vmos.remove(&vmo);
  809. }
  810. void MemoryManager::register_region(Region& region)
  811. {
  812. InterruptDisabler disabler;
  813. m_regions.set(&region);
  814. }
  815. void MemoryManager::unregister_region(Region& region)
  816. {
  817. InterruptDisabler disabler;
  818. m_regions.remove(&region);
  819. }
  820. size_t Region::amount_resident() const
  821. {
  822. size_t bytes = 0;
  823. for (size_t i = 0; i < page_count(); ++i) {
  824. if (m_vmo->physical_pages()[first_page_index() + i])
  825. bytes += PAGE_SIZE;
  826. }
  827. return bytes;
  828. }
  829. size_t Region::amount_shared() const
  830. {
  831. size_t bytes = 0;
  832. for (size_t i = 0; i < page_count(); ++i) {
  833. auto& physical_page = m_vmo->physical_pages()[first_page_index() + i];
  834. if (physical_page && physical_page->retain_count() > 1)
  835. bytes += PAGE_SIZE;
  836. }
  837. return bytes;
  838. }
  839. PageDirectory::~PageDirectory()
  840. {
  841. #ifdef MM_DEBUG
  842. dbgprintf("MM: ~PageDirectory K%x\n", this);
  843. #endif
  844. }
  845. void PageDirectory::flush(LinearAddress laddr)
  846. {
  847. if (&current->page_directory() == this)
  848. MM.flush_tlb(laddr);
  849. }