MemoryManager.cpp 33 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010
  1. #include "MemoryManager.h"
  2. #include <AK/Assertions.h>
  3. #include <AK/kstdio.h>
  4. #include "i386.h"
  5. #include "StdLib.h"
  6. #include "Process.h"
  7. #include <LibC/errno_numbers.h>
  8. #include "CMOS.h"
  9. //#define MM_DEBUG
  10. //#define PAGE_FAULT_DEBUG
  11. static MemoryManager* s_the;
  12. unsigned MemoryManager::s_user_physical_pages_in_existence;
  13. unsigned MemoryManager::s_super_physical_pages_in_existence;
  14. MemoryManager& MM
  15. {
  16. return *s_the;
  17. }
  18. MemoryManager::MemoryManager()
  19. {
  20. // FIXME: This is not the best way to do memory map detection.
  21. // Rewrite to use BIOS int 15,e820 once we have VM86 support.
  22. word base_memory = (CMOS::read(0x16) << 8) | CMOS::read(0x15);
  23. word ext_memory = (CMOS::read(0x18) << 8) | CMOS::read(0x17);
  24. kprintf("%u kB base memory\n", base_memory);
  25. kprintf("%u kB extended memory\n", ext_memory);
  26. m_ram_size = ext_memory * 1024;
  27. m_kernel_page_directory = PageDirectory::create_at_fixed_address(PhysicalAddress(0x4000));
  28. m_page_table_zero = (dword*)0x6000;
  29. initialize_paging();
  30. kprintf("MM initialized.\n");
  31. }
  32. MemoryManager::~MemoryManager()
  33. {
  34. }
  35. PageDirectory::PageDirectory(PhysicalAddress paddr)
  36. {
  37. m_directory_page = PhysicalPage::create_eternal(paddr, true);
  38. }
  39. PageDirectory::PageDirectory()
  40. {
  41. MM.populate_page_directory(*this);
  42. }
  43. void MemoryManager::populate_page_directory(PageDirectory& page_directory)
  44. {
  45. page_directory.m_directory_page = allocate_supervisor_physical_page();
  46. page_directory.entries()[0] = kernel_page_directory().entries()[0];
  47. // Defer to the kernel page tables for 0xC0000000-0xFFFFFFFF
  48. for (int i = 768; i < 1024; ++i)
  49. page_directory.entries()[i] = kernel_page_directory().entries()[i];
  50. }
  51. void MemoryManager::initialize_paging()
  52. {
  53. static_assert(sizeof(MemoryManager::PageDirectoryEntry) == 4);
  54. static_assert(sizeof(MemoryManager::PageTableEntry) == 4);
  55. memset(m_page_table_zero, 0, PAGE_SIZE);
  56. #ifdef MM_DEBUG
  57. dbgprintf("MM: Kernel page directory @ %p\n", kernel_page_directory().cr3());
  58. #endif
  59. #ifdef MM_DEBUG
  60. dbgprintf("MM: Protect against null dereferences\n");
  61. #endif
  62. // Make null dereferences crash.
  63. map_protected(LinearAddress(0), PAGE_SIZE);
  64. #ifdef MM_DEBUG
  65. dbgprintf("MM: Identity map bottom 4MB\n");
  66. #endif
  67. // The bottom 4 MB (except for the null page) are identity mapped & supervisor only.
  68. // Every process shares these mappings.
  69. create_identity_mapping(kernel_page_directory(), LinearAddress(PAGE_SIZE), (4 * MB) - PAGE_SIZE);
  70. // Basic memory map:
  71. // 0 -> 512 kB Kernel code. Root page directory & PDE 0.
  72. // (last page before 1MB) Used by quickmap_page().
  73. // 1 MB -> 2 MB kmalloc_eternal() space.
  74. // 2 MB -> 3 MB kmalloc() space.
  75. // 3 MB -> 4 MB Supervisor physical pages (available for allocation!)
  76. // 4 MB -> (max) MB Userspace physical pages (available for allocation!)
  77. for (size_t i = (2 * MB); i < (4 * MB); i += PAGE_SIZE)
  78. m_free_supervisor_physical_pages.append(PhysicalPage::create_eternal(PhysicalAddress(i), true));
  79. dbgprintf("MM: 4MB-%uMB available for allocation\n", m_ram_size / 1048576);
  80. for (size_t i = (4 * MB); i < m_ram_size; i += PAGE_SIZE)
  81. m_free_physical_pages.append(PhysicalPage::create_eternal(PhysicalAddress(i), false));
  82. m_quickmap_addr = LinearAddress((1 * MB) - PAGE_SIZE);
  83. #ifdef MM_DEBUG
  84. dbgprintf("MM: Quickmap will use P%x\n", m_quickmap_addr.get());
  85. dbgprintf("MM: Installing page directory\n");
  86. #endif
  87. asm volatile("movl %%eax, %%cr3"::"a"(kernel_page_directory().cr3()));
  88. asm volatile(
  89. "movl %%cr0, %%eax\n"
  90. "orl $0x80000001, %%eax\n"
  91. "movl %%eax, %%cr0\n"
  92. :::"%eax", "memory");
  93. #ifdef MM_DEBUG
  94. dbgprintf("MM: Paging initialized.\n");
  95. #endif
  96. }
  97. RetainPtr<PhysicalPage> MemoryManager::allocate_page_table(PageDirectory& page_directory, unsigned index)
  98. {
  99. ASSERT(!page_directory.m_physical_pages.contains(index));
  100. auto physical_page = allocate_supervisor_physical_page();
  101. if (!physical_page)
  102. return nullptr;
  103. page_directory.m_physical_pages.set(index, physical_page.copy_ref());
  104. return physical_page;
  105. }
  106. void MemoryManager::remove_identity_mapping(PageDirectory& page_directory, LinearAddress laddr, size_t size)
  107. {
  108. InterruptDisabler disabler;
  109. // FIXME: ASSERT(laddr is 4KB aligned);
  110. for (dword offset = 0; offset < size; offset += PAGE_SIZE) {
  111. auto pte_address = laddr.offset(offset);
  112. auto pte = ensure_pte(page_directory, pte_address);
  113. pte.set_physical_page_base(0);
  114. pte.set_user_allowed(false);
  115. pte.set_present(true);
  116. pte.set_writable(true);
  117. flush_tlb(pte_address);
  118. }
  119. }
  120. auto MemoryManager::ensure_pte(PageDirectory& page_directory, LinearAddress laddr) -> PageTableEntry
  121. {
  122. ASSERT_INTERRUPTS_DISABLED();
  123. dword page_directory_index = (laddr.get() >> 22) & 0x3ff;
  124. dword page_table_index = (laddr.get() >> 12) & 0x3ff;
  125. PageDirectoryEntry pde = PageDirectoryEntry(&page_directory.entries()[page_directory_index]);
  126. if (!pde.is_present()) {
  127. #ifdef MM_DEBUG
  128. dbgprintf("MM: PDE %u not present (requested for L%x), allocating\n", page_directory_index, laddr.get());
  129. #endif
  130. if (page_directory_index == 0) {
  131. ASSERT(&page_directory == m_kernel_page_directory.ptr());
  132. pde.set_page_table_base((dword)m_page_table_zero);
  133. pde.set_user_allowed(false);
  134. pde.set_present(true);
  135. pde.set_writable(true);
  136. } else {
  137. //ASSERT(&page_directory != m_kernel_page_directory.ptr());
  138. auto page_table = allocate_page_table(page_directory, page_directory_index);
  139. #ifdef MM_DEBUG
  140. dbgprintf("MM: PD K%x (%s) at P%x allocated page table #%u (for L%x) at P%x\n",
  141. &page_directory,
  142. &page_directory == m_kernel_page_directory.ptr() ? "Kernel" : "User",
  143. page_directory.cr3(),
  144. page_directory_index,
  145. laddr.get(),
  146. page_table->paddr().get());
  147. #endif
  148. pde.set_page_table_base(page_table->paddr().get());
  149. pde.set_user_allowed(true);
  150. pde.set_present(true);
  151. pde.set_writable(true);
  152. page_directory.m_physical_pages.set(page_directory_index, move(page_table));
  153. }
  154. }
  155. return PageTableEntry(&pde.page_table_base()[page_table_index]);
  156. }
  157. void MemoryManager::map_protected(LinearAddress laddr, size_t length)
  158. {
  159. InterruptDisabler disabler;
  160. // FIXME: ASSERT(linearAddress is 4KB aligned);
  161. for (dword offset = 0; offset < length; offset += PAGE_SIZE) {
  162. auto pte_address = laddr.offset(offset);
  163. auto pte = ensure_pte(kernel_page_directory(), pte_address);
  164. pte.set_physical_page_base(pte_address.get());
  165. pte.set_user_allowed(false);
  166. pte.set_present(false);
  167. pte.set_writable(false);
  168. flush_tlb(pte_address);
  169. }
  170. }
  171. void MemoryManager::create_identity_mapping(PageDirectory& page_directory, LinearAddress laddr, size_t size)
  172. {
  173. InterruptDisabler disabler;
  174. ASSERT((laddr.get() & ~PAGE_MASK) == 0);
  175. for (dword offset = 0; offset < size; offset += PAGE_SIZE) {
  176. auto pte_address = laddr.offset(offset);
  177. auto pte = ensure_pte(page_directory, pte_address);
  178. pte.set_physical_page_base(pte_address.get());
  179. pte.set_user_allowed(false);
  180. pte.set_present(true);
  181. pte.set_writable(true);
  182. page_directory.flush(pte_address);
  183. }
  184. }
  185. void MemoryManager::initialize()
  186. {
  187. s_the = new MemoryManager;
  188. }
  189. Region* MemoryManager::region_from_laddr(Process& process, LinearAddress laddr)
  190. {
  191. ASSERT_INTERRUPTS_DISABLED();
  192. // FIXME: Use a binary search tree (maybe red/black?) or some other more appropriate data structure!
  193. for (auto& region : process.m_regions) {
  194. if (region->contains(laddr))
  195. return region.ptr();
  196. }
  197. dbgprintf("%s(%u) Couldn't find region for L%x (CR3=%x)\n", process.name().characters(), process.pid(), laddr.get(), process.page_directory().cr3());
  198. return nullptr;
  199. }
  200. const Region* MemoryManager::region_from_laddr(const Process& process, LinearAddress laddr)
  201. {
  202. // FIXME: Use a binary search tree (maybe red/black?) or some other more appropriate data structure!
  203. for (auto& region : process.m_regions) {
  204. if (region->contains(laddr))
  205. return region.ptr();
  206. }
  207. dbgprintf("%s(%u) Couldn't find region for L%x (CR3=%x)\n", process.name().characters(), process.pid(), laddr.get(), process.page_directory().cr3());
  208. return nullptr;
  209. }
  210. bool MemoryManager::zero_page(Region& region, unsigned page_index_in_region)
  211. {
  212. ASSERT_INTERRUPTS_DISABLED();
  213. auto& vmo = region.vmo();
  214. auto& vmo_page = vmo.physical_pages()[region.first_page_index() + page_index_in_region];
  215. sti();
  216. LOCKER(vmo.m_paging_lock);
  217. cli();
  218. if (!vmo_page.is_null()) {
  219. #ifdef PAGE_FAULT_DEBUG
  220. dbgprintf("MM: zero_page() but page already present. Fine with me!\n");
  221. #endif
  222. remap_region_page(region, page_index_in_region, true);
  223. return true;
  224. }
  225. auto physical_page = allocate_physical_page(ShouldZeroFill::Yes);
  226. #ifdef PAGE_FAULT_DEBUG
  227. dbgprintf(" >> ZERO P%x\n", physical_page->paddr().get());
  228. #endif
  229. region.m_cow_map.set(page_index_in_region, false);
  230. vmo.physical_pages()[page_index_in_region] = move(physical_page);
  231. remap_region_page(region, page_index_in_region, true);
  232. return true;
  233. }
  234. bool MemoryManager::copy_on_write(Region& region, unsigned page_index_in_region)
  235. {
  236. ASSERT_INTERRUPTS_DISABLED();
  237. auto& vmo = region.vmo();
  238. if (vmo.physical_pages()[page_index_in_region]->retain_count() == 1) {
  239. #ifdef PAGE_FAULT_DEBUG
  240. dbgprintf(" >> It's a COW page but nobody is sharing it anymore. Remap r/w\n");
  241. #endif
  242. region.m_cow_map.set(page_index_in_region, false);
  243. remap_region_page(region, page_index_in_region, true);
  244. return true;
  245. }
  246. #ifdef PAGE_FAULT_DEBUG
  247. dbgprintf(" >> It's a COW page and it's time to COW!\n");
  248. #endif
  249. auto physical_page_to_copy = move(vmo.physical_pages()[page_index_in_region]);
  250. auto physical_page = allocate_physical_page(ShouldZeroFill::No);
  251. byte* dest_ptr = quickmap_page(*physical_page);
  252. const byte* src_ptr = region.laddr().offset(page_index_in_region * PAGE_SIZE).as_ptr();
  253. #ifdef PAGE_FAULT_DEBUG
  254. dbgprintf(" >> COW P%x <- P%x\n", physical_page->paddr().get(), physical_page_to_copy->paddr().get());
  255. #endif
  256. memcpy(dest_ptr, src_ptr, PAGE_SIZE);
  257. vmo.physical_pages()[page_index_in_region] = move(physical_page);
  258. unquickmap_page();
  259. region.m_cow_map.set(page_index_in_region, false);
  260. remap_region_page(region, page_index_in_region, true);
  261. return true;
  262. }
  263. bool Region::page_in()
  264. {
  265. ASSERT(m_page_directory);
  266. ASSERT(!vmo().is_anonymous());
  267. ASSERT(vmo().inode());
  268. #ifdef MM_DEBUG
  269. dbgprintf("MM: page_in %u pages\n", page_count());
  270. #endif
  271. for (size_t i = 0; i < page_count(); ++i) {
  272. auto& vmo_page = vmo().physical_pages()[first_page_index() + i];
  273. if (vmo_page.is_null()) {
  274. bool success = MM.page_in_from_inode(*this, i);
  275. if (!success)
  276. return false;
  277. }
  278. MM.remap_region_page(*this, i, true);
  279. }
  280. return true;
  281. }
  282. bool MemoryManager::page_in_from_inode(Region& region, unsigned page_index_in_region)
  283. {
  284. ASSERT(region.page_directory());
  285. auto& vmo = region.vmo();
  286. ASSERT(!vmo.is_anonymous());
  287. ASSERT(vmo.inode());
  288. auto& vmo_page = vmo.physical_pages()[region.first_page_index() + page_index_in_region];
  289. InterruptFlagSaver saver;
  290. sti();
  291. LOCKER(vmo.m_paging_lock);
  292. cli();
  293. if (!vmo_page.is_null()) {
  294. dbgprintf("MM: page_in_from_inode() but page already present. Fine with me!\n");
  295. remap_region_page(region, page_index_in_region, true);
  296. return true;
  297. }
  298. #ifdef MM_DEBUG
  299. dbgprintf("MM: page_in_from_inode ready to read from inode\n");
  300. #endif
  301. sti();
  302. byte page_buffer[PAGE_SIZE];
  303. auto& inode = *vmo.inode();
  304. auto nread = inode.read_bytes(vmo.inode_offset() + ((region.first_page_index() + page_index_in_region) * PAGE_SIZE), PAGE_SIZE, page_buffer, nullptr);
  305. if (nread < 0) {
  306. kprintf("MM: page_in_from_inode had error (%d) while reading!\n", nread);
  307. return false;
  308. }
  309. if (nread < PAGE_SIZE) {
  310. // If we read less than a page, zero out the rest to avoid leaking uninitialized data.
  311. memset(page_buffer + nread, 0, PAGE_SIZE - nread);
  312. }
  313. cli();
  314. vmo_page = allocate_physical_page(ShouldZeroFill::No);
  315. if (vmo_page.is_null()) {
  316. kprintf("MM: page_in_from_inode was unable to allocate a physical page\n");
  317. return false;
  318. }
  319. remap_region_page(region, page_index_in_region, true);
  320. byte* dest_ptr = region.laddr().offset(page_index_in_region * PAGE_SIZE).as_ptr();
  321. memcpy(dest_ptr, page_buffer, PAGE_SIZE);
  322. return true;
  323. }
  324. PageFaultResponse MemoryManager::handle_page_fault(const PageFault& fault)
  325. {
  326. ASSERT_INTERRUPTS_DISABLED();
  327. ASSERT(current);
  328. #ifdef PAGE_FAULT_DEBUG
  329. dbgprintf("MM: handle_page_fault(%w) at L%x\n", fault.code(), fault.laddr().get());
  330. #endif
  331. ASSERT(fault.laddr() != m_quickmap_addr);
  332. auto* region = region_from_laddr(current->process(), fault.laddr());
  333. if (!region) {
  334. kprintf("NP(error) fault at invalid address L%x\n", fault.laddr().get());
  335. return PageFaultResponse::ShouldCrash;
  336. }
  337. auto page_index_in_region = region->page_index_from_address(fault.laddr());
  338. if (fault.is_not_present()) {
  339. if (region->vmo().inode()) {
  340. #ifdef PAGE_FAULT_DEBUG
  341. dbgprintf("NP(inode) fault in Region{%p}[%u]\n", region, page_index_in_region);
  342. #endif
  343. page_in_from_inode(*region, page_index_in_region);
  344. return PageFaultResponse::Continue;
  345. } else {
  346. #ifdef PAGE_FAULT_DEBUG
  347. dbgprintf("NP(zero) fault in Region{%p}[%u]\n", region, page_index_in_region);
  348. #endif
  349. zero_page(*region, page_index_in_region);
  350. return PageFaultResponse::Continue;
  351. }
  352. } else if (fault.is_protection_violation()) {
  353. if (region->m_cow_map.get(page_index_in_region)) {
  354. #ifdef PAGE_FAULT_DEBUG
  355. dbgprintf("PV(cow) fault in Region{%p}[%u]\n", region, page_index_in_region);
  356. #endif
  357. bool success = copy_on_write(*region, page_index_in_region);
  358. ASSERT(success);
  359. return PageFaultResponse::Continue;
  360. }
  361. kprintf("PV(error) fault in Region{%p}[%u] at L%x\n", region, page_index_in_region, fault.laddr().get());
  362. } else {
  363. ASSERT_NOT_REACHED();
  364. }
  365. return PageFaultResponse::ShouldCrash;
  366. }
  367. RetainPtr<PhysicalPage> MemoryManager::allocate_physical_page(ShouldZeroFill should_zero_fill)
  368. {
  369. InterruptDisabler disabler;
  370. if (1 > m_free_physical_pages.size()) {
  371. kprintf("FUCK! No physical pages available.\n");
  372. ASSERT_NOT_REACHED();
  373. return { };
  374. }
  375. #ifdef MM_DEBUG
  376. dbgprintf("MM: allocate_physical_page vending P%x (%u remaining)\n", m_free_physical_pages.last()->paddr().get(), m_free_physical_pages.size());
  377. #endif
  378. auto physical_page = m_free_physical_pages.take_last();
  379. if (should_zero_fill == ShouldZeroFill::Yes) {
  380. auto* ptr = (dword*)quickmap_page(*physical_page);
  381. fast_dword_fill(ptr, 0, PAGE_SIZE / sizeof(dword));
  382. unquickmap_page();
  383. }
  384. return physical_page;
  385. }
  386. RetainPtr<PhysicalPage> MemoryManager::allocate_supervisor_physical_page()
  387. {
  388. InterruptDisabler disabler;
  389. if (1 > m_free_supervisor_physical_pages.size()) {
  390. kprintf("FUCK! No physical pages available.\n");
  391. ASSERT_NOT_REACHED();
  392. return { };
  393. }
  394. #ifdef MM_DEBUG
  395. dbgprintf("MM: allocate_supervisor_physical_page vending P%x (%u remaining)\n", m_free_supervisor_physical_pages.last()->paddr().get(), m_free_supervisor_physical_pages.size());
  396. #endif
  397. auto physical_page = m_free_supervisor_physical_pages.take_last();
  398. fast_dword_fill((dword*)physical_page->paddr().as_ptr(), 0, PAGE_SIZE / sizeof(dword));
  399. return physical_page;
  400. }
  401. void MemoryManager::enter_process_paging_scope(Process& process)
  402. {
  403. ASSERT(current);
  404. InterruptDisabler disabler;
  405. current->tss().cr3 = process.page_directory().cr3();
  406. asm volatile("movl %%eax, %%cr3"::"a"(process.page_directory().cr3()):"memory");
  407. }
  408. void MemoryManager::enter_kernel_paging_scope()
  409. {
  410. InterruptDisabler disabler;
  411. asm volatile("movl %%eax, %%cr3"::"a"(kernel_page_directory().cr3()):"memory");
  412. }
  413. void MemoryManager::flush_entire_tlb()
  414. {
  415. asm volatile(
  416. "mov %%cr3, %%eax\n"
  417. "mov %%eax, %%cr3\n"
  418. ::: "%eax", "memory"
  419. );
  420. }
  421. void MemoryManager::flush_tlb(LinearAddress laddr)
  422. {
  423. asm volatile("invlpg %0": :"m" (*(char*)laddr.get()) : "memory");
  424. }
  425. void MemoryManager::map_for_kernel(LinearAddress laddr, PhysicalAddress paddr)
  426. {
  427. auto pte = ensure_pte(kernel_page_directory(), laddr);
  428. pte.set_physical_page_base(paddr.get());
  429. pte.set_present(true);
  430. pte.set_writable(true);
  431. pte.set_user_allowed(false);
  432. flush_tlb(laddr);
  433. }
  434. byte* MemoryManager::quickmap_page(PhysicalPage& physical_page)
  435. {
  436. ASSERT_INTERRUPTS_DISABLED();
  437. ASSERT(!m_quickmap_in_use);
  438. m_quickmap_in_use = true;
  439. auto page_laddr = m_quickmap_addr;
  440. auto pte = ensure_pte(kernel_page_directory(), page_laddr);
  441. pte.set_physical_page_base(physical_page.paddr().get());
  442. pte.set_present(true);
  443. pte.set_writable(true);
  444. pte.set_user_allowed(false);
  445. flush_tlb(page_laddr);
  446. ASSERT((dword)pte.physical_page_base() == physical_page.paddr().get());
  447. #ifdef MM_DEBUG
  448. dbgprintf("MM: >> quickmap_page L%x => P%x @ PTE=%p\n", page_laddr, physical_page.paddr().get(), pte.ptr());
  449. #endif
  450. return page_laddr.as_ptr();
  451. }
  452. void MemoryManager::unquickmap_page()
  453. {
  454. ASSERT_INTERRUPTS_DISABLED();
  455. ASSERT(m_quickmap_in_use);
  456. auto page_laddr = m_quickmap_addr;
  457. auto pte = ensure_pte(kernel_page_directory(), page_laddr);
  458. #ifdef MM_DEBUG
  459. auto old_physical_address = pte.physical_page_base();
  460. #endif
  461. pte.set_physical_page_base(0);
  462. pte.set_present(false);
  463. pte.set_writable(false);
  464. flush_tlb(page_laddr);
  465. #ifdef MM_DEBUG
  466. dbgprintf("MM: >> unquickmap_page L%x =/> P%x\n", page_laddr, old_physical_address);
  467. #endif
  468. m_quickmap_in_use = false;
  469. }
  470. void MemoryManager::remap_region_page(Region& region, unsigned page_index_in_region, bool user_allowed)
  471. {
  472. ASSERT(region.page_directory());
  473. InterruptDisabler disabler;
  474. auto page_laddr = region.laddr().offset(page_index_in_region * PAGE_SIZE);
  475. auto pte = ensure_pte(*region.page_directory(), page_laddr);
  476. auto& physical_page = region.vmo().physical_pages()[page_index_in_region];
  477. ASSERT(physical_page);
  478. pte.set_physical_page_base(physical_page->paddr().get());
  479. pte.set_present(true); // FIXME: Maybe we should use the is_readable flag here?
  480. if (region.m_cow_map.get(page_index_in_region))
  481. pte.set_writable(false);
  482. else
  483. pte.set_writable(region.is_writable());
  484. pte.set_cache_disabled(!region.vmo().m_allow_cpu_caching);
  485. pte.set_write_through(!region.vmo().m_allow_cpu_caching);
  486. pte.set_user_allowed(user_allowed);
  487. region.page_directory()->flush(page_laddr);
  488. #ifdef MM_DEBUG
  489. dbgprintf("MM: >> remap_region_page (PD=%x, PTE=P%x) '%s' L%x => P%x (@%p)\n", region.page_directory()->cr3(), pte.ptr(), region.name().characters(), page_laddr.get(), physical_page->paddr().get(), physical_page.ptr());
  490. #endif
  491. }
  492. void MemoryManager::remap_region(PageDirectory& page_directory, Region& region)
  493. {
  494. InterruptDisabler disabler;
  495. ASSERT(region.page_directory() == &page_directory);
  496. map_region_at_address(page_directory, region, region.laddr(), true);
  497. }
  498. void MemoryManager::map_region_at_address(PageDirectory& page_directory, Region& region, LinearAddress laddr, bool user_allowed)
  499. {
  500. InterruptDisabler disabler;
  501. region.set_page_directory(page_directory);
  502. auto& vmo = region.vmo();
  503. #ifdef MM_DEBUG
  504. dbgprintf("MM: map_region_at_address will map VMO pages %u - %u (VMO page count: %u)\n", region.first_page_index(), region.last_page_index(), vmo.page_count());
  505. #endif
  506. for (size_t i = 0; i < region.page_count(); ++i) {
  507. auto page_laddr = laddr.offset(i * PAGE_SIZE);
  508. auto pte = ensure_pte(page_directory, page_laddr);
  509. auto& physical_page = vmo.physical_pages()[region.first_page_index() + i];
  510. if (physical_page) {
  511. pte.set_physical_page_base(physical_page->paddr().get());
  512. pte.set_present(true); // FIXME: Maybe we should use the is_readable flag here?
  513. // FIXME: It seems wrong that the *region* cow map is essentially using *VMO* relative indices.
  514. if (region.m_cow_map.get(region.first_page_index() + i))
  515. pte.set_writable(false);
  516. else
  517. pte.set_writable(region.is_writable());
  518. pte.set_cache_disabled(!region.vmo().m_allow_cpu_caching);
  519. pte.set_write_through(!region.vmo().m_allow_cpu_caching);
  520. } else {
  521. pte.set_physical_page_base(0);
  522. pte.set_present(false);
  523. pte.set_writable(region.is_writable());
  524. }
  525. pte.set_user_allowed(user_allowed);
  526. page_directory.flush(page_laddr);
  527. #ifdef MM_DEBUG
  528. dbgprintf("MM: >> map_region_at_address (PD=%x) '%s' L%x => P%x (@%p)\n", &page_directory, region.name().characters(), page_laddr, physical_page ? physical_page->paddr().get() : 0, physical_page.ptr());
  529. #endif
  530. }
  531. }
  532. bool MemoryManager::unmap_region(Region& region)
  533. {
  534. ASSERT(region.page_directory());
  535. InterruptDisabler disabler;
  536. for (size_t i = 0; i < region.page_count(); ++i) {
  537. auto laddr = region.laddr().offset(i * PAGE_SIZE);
  538. auto pte = ensure_pte(*region.page_directory(), laddr);
  539. pte.set_physical_page_base(0);
  540. pte.set_present(false);
  541. pte.set_writable(false);
  542. pte.set_user_allowed(false);
  543. region.page_directory()->flush(laddr);
  544. #ifdef MM_DEBUG
  545. auto& physical_page = region.vmo().physical_pages()[region.first_page_index() + i];
  546. dbgprintf("MM: >> Unmapped L%x => P%x <<\n", laddr, physical_page ? physical_page->paddr().get() : 0);
  547. #endif
  548. }
  549. region.release_page_directory();
  550. return true;
  551. }
  552. bool MemoryManager::map_region(Process& process, Region& region)
  553. {
  554. map_region_at_address(process.page_directory(), region, region.laddr(), true);
  555. return true;
  556. }
  557. bool MemoryManager::validate_user_read(const Process& process, LinearAddress laddr) const
  558. {
  559. auto* region = region_from_laddr(process, laddr);
  560. return region && region->is_readable();
  561. }
  562. bool MemoryManager::validate_user_write(const Process& process, LinearAddress laddr) const
  563. {
  564. auto* region = region_from_laddr(process, laddr);
  565. return region && region->is_writable();
  566. }
  567. Retained<Region> Region::clone()
  568. {
  569. ASSERT(current);
  570. if (m_shared || (m_readable && !m_writable)) {
  571. #ifdef MM_DEBUG
  572. dbgprintf("%s<%u> Region::clone(): sharing %s (L%x)\n",
  573. current->process().name().characters(),
  574. current->pid(),
  575. m_name.characters(),
  576. laddr().get());
  577. #endif
  578. // Create a new region backed by the same VMObject.
  579. return adopt(*new Region(laddr(), size(), m_vmo.copy_ref(), m_offset_in_vmo, String(m_name), m_readable, m_writable));
  580. }
  581. #ifdef MM_DEBUG
  582. dbgprintf("%s<%u> Region::clone(): cowing %s (L%x)\n",
  583. current->process().name().characters(),
  584. current->pid(),
  585. m_name.characters(),
  586. laddr().get());
  587. #endif
  588. // Set up a COW region. The parent (this) region becomes COW as well!
  589. for (size_t i = 0; i < page_count(); ++i)
  590. m_cow_map.set(i, true);
  591. MM.remap_region(current->process().page_directory(), *this);
  592. return adopt(*new Region(laddr(), size(), m_vmo->clone(), m_offset_in_vmo, String(m_name), m_readable, m_writable, true));
  593. }
  594. Region::Region(LinearAddress a, size_t s, String&& n, bool r, bool w, bool cow)
  595. : m_laddr(a)
  596. , m_size(s)
  597. , m_vmo(VMObject::create_anonymous(s))
  598. , m_name(move(n))
  599. , m_readable(r)
  600. , m_writable(w)
  601. , m_cow_map(Bitmap::create(m_vmo->page_count(), cow))
  602. {
  603. m_vmo->set_name(m_name);
  604. MM.register_region(*this);
  605. }
  606. Region::Region(LinearAddress a, size_t s, RetainPtr<Inode>&& inode, String&& n, bool r, bool w)
  607. : m_laddr(a)
  608. , m_size(s)
  609. , m_vmo(VMObject::create_file_backed(move(inode)))
  610. , m_name(move(n))
  611. , m_readable(r)
  612. , m_writable(w)
  613. , m_cow_map(Bitmap::create(m_vmo->page_count()))
  614. {
  615. MM.register_region(*this);
  616. }
  617. Region::Region(LinearAddress a, size_t s, Retained<VMObject>&& vmo, size_t offset_in_vmo, String&& n, bool r, bool w, bool cow)
  618. : m_laddr(a)
  619. , m_size(s)
  620. , m_offset_in_vmo(offset_in_vmo)
  621. , m_vmo(move(vmo))
  622. , m_name(move(n))
  623. , m_readable(r)
  624. , m_writable(w)
  625. , m_cow_map(Bitmap::create(m_vmo->page_count(), cow))
  626. {
  627. MM.register_region(*this);
  628. }
  629. Region::~Region()
  630. {
  631. if (m_page_directory) {
  632. MM.unmap_region(*this);
  633. ASSERT(!m_page_directory);
  634. }
  635. MM.unregister_region(*this);
  636. }
  637. Retained<PhysicalPage> PhysicalPage::create_eternal(PhysicalAddress paddr, bool supervisor)
  638. {
  639. void* slot = kmalloc_eternal(sizeof(PhysicalPage));
  640. new (slot) PhysicalPage(paddr, supervisor);
  641. return adopt(*(PhysicalPage*)slot);
  642. }
  643. Retained<PhysicalPage> PhysicalPage::create(PhysicalAddress paddr, bool supervisor)
  644. {
  645. void* slot = kmalloc(sizeof(PhysicalPage));
  646. new (slot) PhysicalPage(paddr, supervisor, false);
  647. return adopt(*(PhysicalPage*)slot);
  648. }
  649. PhysicalPage::PhysicalPage(PhysicalAddress paddr, bool supervisor, bool may_return_to_freelist)
  650. : m_may_return_to_freelist(may_return_to_freelist)
  651. , m_supervisor(supervisor)
  652. , m_paddr(paddr)
  653. {
  654. if (supervisor)
  655. ++MemoryManager::s_super_physical_pages_in_existence;
  656. else
  657. ++MemoryManager::s_user_physical_pages_in_existence;
  658. }
  659. void PhysicalPage::return_to_freelist()
  660. {
  661. ASSERT((paddr().get() & ~PAGE_MASK) == 0);
  662. InterruptDisabler disabler;
  663. m_retain_count = 1;
  664. if (m_supervisor)
  665. MM.m_free_supervisor_physical_pages.append(adopt(*this));
  666. else
  667. MM.m_free_physical_pages.append(adopt(*this));
  668. #ifdef MM_DEBUG
  669. dbgprintf("MM: P%x released to freelist\n", m_paddr.get());
  670. #endif
  671. }
  672. Retained<VMObject> VMObject::create_file_backed(RetainPtr<Inode>&& inode)
  673. {
  674. InterruptDisabler disabler;
  675. if (inode->vmo())
  676. return *inode->vmo();
  677. auto vmo = adopt(*new VMObject(move(inode)));
  678. vmo->inode()->set_vmo(*vmo);
  679. return vmo;
  680. }
  681. Retained<VMObject> VMObject::create_anonymous(size_t size)
  682. {
  683. size = ceil_div(size, PAGE_SIZE) * PAGE_SIZE;
  684. return adopt(*new VMObject(size));
  685. }
  686. Retained<VMObject> VMObject::create_for_physical_range(PhysicalAddress paddr, size_t size)
  687. {
  688. size = ceil_div(size, PAGE_SIZE) * PAGE_SIZE;
  689. auto vmo = adopt(*new VMObject(paddr, size));
  690. vmo->m_allow_cpu_caching = false;
  691. return vmo;
  692. }
  693. Retained<VMObject> VMObject::clone()
  694. {
  695. return adopt(*new VMObject(*this));
  696. }
  697. VMObject::VMObject(VMObject& other)
  698. : m_name(other.m_name)
  699. , m_anonymous(other.m_anonymous)
  700. , m_inode_offset(other.m_inode_offset)
  701. , m_size(other.m_size)
  702. , m_inode(other.m_inode)
  703. , m_physical_pages(other.m_physical_pages)
  704. {
  705. MM.register_vmo(*this);
  706. }
  707. VMObject::VMObject(size_t size)
  708. : m_anonymous(true)
  709. , m_size(size)
  710. {
  711. MM.register_vmo(*this);
  712. m_physical_pages.resize(page_count());
  713. }
  714. VMObject::VMObject(PhysicalAddress paddr, size_t size)
  715. : m_anonymous(true)
  716. , m_size(size)
  717. {
  718. MM.register_vmo(*this);
  719. for (size_t i = 0; i < size; i += PAGE_SIZE) {
  720. m_physical_pages.append(PhysicalPage::create(paddr.offset(i), false));
  721. }
  722. ASSERT(m_physical_pages.size() == page_count());
  723. }
  724. VMObject::VMObject(RetainPtr<Inode>&& inode)
  725. : m_inode(move(inode))
  726. {
  727. ASSERT(m_inode);
  728. m_size = ceil_div(m_inode->size(), PAGE_SIZE) * PAGE_SIZE;
  729. m_physical_pages.resize(page_count());
  730. MM.register_vmo(*this);
  731. }
  732. VMObject::~VMObject()
  733. {
  734. if (m_inode)
  735. ASSERT(m_inode->vmo() == this);
  736. MM.unregister_vmo(*this);
  737. }
  738. template<typename Callback>
  739. void VMObject::for_each_region(Callback callback)
  740. {
  741. // FIXME: Figure out a better data structure so we don't have to walk every single region every time an inode changes.
  742. // Perhaps VMObject could have a Vector<Region*> with all of his mappers?
  743. for (auto* region : MM.m_regions) {
  744. if (&region->vmo() == this)
  745. callback(*region);
  746. }
  747. }
  748. void VMObject::inode_size_changed(Badge<Inode>, size_t old_size, size_t new_size)
  749. {
  750. (void)old_size;
  751. InterruptDisabler disabler;
  752. size_t old_page_count = page_count();
  753. m_size = new_size;
  754. if (page_count() > old_page_count) {
  755. // Add null pages and let the fault handler page these in when that day comes.
  756. for (size_t i = old_page_count; i < page_count(); ++i)
  757. m_physical_pages.append(nullptr);
  758. } else {
  759. // Prune the no-longer valid pages. I'm not sure this is actually correct behavior.
  760. for (size_t i = page_count(); i < old_page_count; ++i)
  761. m_physical_pages.take_last();
  762. }
  763. // FIXME: Consolidate with inode_contents_changed() so we only do a single walk.
  764. for_each_region([] (Region& region) {
  765. ASSERT(region.page_directory());
  766. MM.remap_region(*region.page_directory(), region);
  767. });
  768. }
  769. void VMObject::inode_contents_changed(Badge<Inode>, off_t offset, ssize_t size, const byte* data)
  770. {
  771. (void)size;
  772. (void)data;
  773. InterruptDisabler disabler;
  774. ASSERT(offset >= 0);
  775. // FIXME: Only invalidate the parts that actually changed.
  776. for (auto& physical_page : m_physical_pages)
  777. physical_page = nullptr;
  778. #if 0
  779. size_t current_offset = offset;
  780. size_t remaining_bytes = size;
  781. const byte* data_ptr = data;
  782. auto to_page_index = [] (size_t offset) -> size_t {
  783. return offset / PAGE_SIZE;
  784. };
  785. if (current_offset & PAGE_MASK) {
  786. size_t page_index = to_page_index(current_offset);
  787. size_t bytes_to_copy = min(size, PAGE_SIZE - (current_offset & PAGE_MASK));
  788. if (m_physical_pages[page_index]) {
  789. auto* ptr = MM.quickmap_page(*m_physical_pages[page_index]);
  790. memcpy(ptr, data_ptr, bytes_to_copy);
  791. MM.unquickmap_page();
  792. }
  793. current_offset += bytes_to_copy;
  794. data += bytes_to_copy;
  795. remaining_bytes -= bytes_to_copy;
  796. }
  797. for (size_t page_index = to_page_index(current_offset); page_index < m_physical_pages.size(); ++page_index) {
  798. size_t bytes_to_copy = PAGE_SIZE - (current_offset & PAGE_MASK);
  799. if (m_physical_pages[page_index]) {
  800. auto* ptr = MM.quickmap_page(*m_physical_pages[page_index]);
  801. memcpy(ptr, data_ptr, bytes_to_copy);
  802. MM.unquickmap_page();
  803. }
  804. current_offset += bytes_to_copy;
  805. data += bytes_to_copy;
  806. }
  807. #endif
  808. // FIXME: Consolidate with inode_size_changed() so we only do a single walk.
  809. for_each_region([] (Region& region) {
  810. ASSERT(region.page_directory());
  811. MM.remap_region(*region.page_directory(), region);
  812. });
  813. }
  814. int Region::commit()
  815. {
  816. InterruptDisabler disabler;
  817. #ifdef MM_DEBUG
  818. dbgprintf("MM: commit %u pages in Region %p (VMO=%p) at L%x\n", vmo().page_count(), this, &vmo(), laddr().get());
  819. #endif
  820. for (size_t i = first_page_index(); i <= last_page_index(); ++i) {
  821. if (!vmo().physical_pages()[i].is_null())
  822. continue;
  823. auto physical_page = MM.allocate_physical_page(MemoryManager::ShouldZeroFill::Yes);
  824. if (!physical_page) {
  825. kprintf("MM: commit was unable to allocate a physical page\n");
  826. return -ENOMEM;
  827. }
  828. vmo().physical_pages()[i] = move(physical_page);
  829. MM.remap_region_page(*this, i, true);
  830. }
  831. return 0;
  832. }
  833. void MemoryManager::register_vmo(VMObject& vmo)
  834. {
  835. InterruptDisabler disabler;
  836. m_vmos.set(&vmo);
  837. }
  838. void MemoryManager::unregister_vmo(VMObject& vmo)
  839. {
  840. InterruptDisabler disabler;
  841. m_vmos.remove(&vmo);
  842. }
  843. void MemoryManager::register_region(Region& region)
  844. {
  845. InterruptDisabler disabler;
  846. m_regions.set(&region);
  847. }
  848. void MemoryManager::unregister_region(Region& region)
  849. {
  850. InterruptDisabler disabler;
  851. m_regions.remove(&region);
  852. }
  853. size_t Region::amount_resident() const
  854. {
  855. size_t bytes = 0;
  856. for (size_t i = 0; i < page_count(); ++i) {
  857. if (m_vmo->physical_pages()[first_page_index() + i])
  858. bytes += PAGE_SIZE;
  859. }
  860. return bytes;
  861. }
  862. size_t Region::amount_shared() const
  863. {
  864. size_t bytes = 0;
  865. for (size_t i = 0; i < page_count(); ++i) {
  866. auto& physical_page = m_vmo->physical_pages()[first_page_index() + i];
  867. if (physical_page && physical_page->retain_count() > 1)
  868. bytes += PAGE_SIZE;
  869. }
  870. return bytes;
  871. }
  872. PageDirectory::~PageDirectory()
  873. {
  874. #ifdef MM_DEBUG
  875. dbgprintf("MM: ~PageDirectory K%x\n", this);
  876. #endif
  877. }
  878. void PageDirectory::flush(LinearAddress laddr)
  879. {
  880. #ifdef MM_DEBUG
  881. dbgprintf("MM: Flush page L%x\n", laddr.get());
  882. #endif
  883. if (!current)
  884. return;
  885. if (&current->process().page_directory() == this)
  886. MM.flush_tlb(laddr);
  887. }
  888. ProcessPagingScope::ProcessPagingScope(Process& process)
  889. {
  890. ASSERT(current);
  891. MM.enter_process_paging_scope(process);
  892. }
  893. ProcessPagingScope::~ProcessPagingScope()
  894. {
  895. MM.enter_process_paging_scope(current->process());
  896. }
  897. KernelPagingScope::KernelPagingScope()
  898. {
  899. ASSERT(current);
  900. MM.enter_kernel_paging_scope();
  901. }
  902. KernelPagingScope::~KernelPagingScope()
  903. {
  904. MM.enter_process_paging_scope(current->process());
  905. }