AddressSpace.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551
  1. /*
  2. * Copyright (c) 2021-2022, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2021, Leon Albrecht <leon2002.la@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <Kernel/API/MemoryLayout.h>
  8. #include <Kernel/Arch/CPU.h>
  9. #include <Kernel/Locking/Spinlock.h>
  10. #include <Kernel/Memory/AddressSpace.h>
  11. #include <Kernel/Memory/AnonymousVMObject.h>
  12. #include <Kernel/Memory/InodeVMObject.h>
  13. #include <Kernel/Memory/MemoryManager.h>
  14. #include <Kernel/PerformanceManager.h>
  15. #include <Kernel/Process.h>
  16. #include <Kernel/Random.h>
  17. #include <Kernel/Scheduler.h>
  18. namespace Kernel::Memory {
  19. ErrorOr<NonnullOwnPtr<AddressSpace>> AddressSpace::try_create(AddressSpace const* parent)
  20. {
  21. auto page_directory = TRY(PageDirectory::try_create_for_userspace());
  22. VirtualRange total_range = [&]() -> VirtualRange {
  23. if (parent)
  24. return parent->m_total_range;
  25. constexpr FlatPtr userspace_range_base = USER_RANGE_BASE;
  26. FlatPtr const userspace_range_ceiling = USER_RANGE_CEILING;
  27. size_t random_offset = (get_fast_random<u8>() % 32 * MiB) & PAGE_MASK;
  28. FlatPtr base = userspace_range_base + random_offset;
  29. return VirtualRange(VirtualAddress { base }, userspace_range_ceiling - base);
  30. }();
  31. auto space = TRY(adopt_nonnull_own_or_enomem(new (nothrow) AddressSpace(move(page_directory), total_range)));
  32. space->page_directory().set_space({}, *space);
  33. return space;
  34. }
  35. AddressSpace::AddressSpace(NonnullRefPtr<PageDirectory> page_directory, VirtualRange total_range)
  36. : m_page_directory(move(page_directory))
  37. , m_total_range(total_range)
  38. {
  39. }
  40. AddressSpace::~AddressSpace()
  41. {
  42. delete_all_regions_assuming_they_are_unmapped();
  43. }
  44. void AddressSpace::delete_all_regions_assuming_they_are_unmapped()
  45. {
  46. // FIXME: This could definitely be done in a more efficient manner.
  47. while (!m_regions.is_empty()) {
  48. auto& region = *m_regions.begin();
  49. m_regions.remove(region.vaddr().get());
  50. delete &region;
  51. }
  52. }
  53. ErrorOr<void> AddressSpace::unmap_mmap_range(VirtualAddress addr, size_t size)
  54. {
  55. if (!size)
  56. return EINVAL;
  57. auto range_to_unmap = TRY(VirtualRange::expand_to_page_boundaries(addr.get(), size));
  58. if (!is_user_range(range_to_unmap))
  59. return EFAULT;
  60. if (auto* whole_region = find_region_from_range(range_to_unmap)) {
  61. if (!whole_region->is_mmap())
  62. return EPERM;
  63. PerformanceManager::add_unmap_perf_event(Process::current(), whole_region->range());
  64. deallocate_region(*whole_region);
  65. return {};
  66. }
  67. if (auto* old_region = find_region_containing(range_to_unmap)) {
  68. if (!old_region->is_mmap())
  69. return EPERM;
  70. // Remove the old region from our regions tree, since were going to add another region
  71. // with the exact same start address, but don't deallocate it yet.
  72. auto region = take_region(*old_region);
  73. // We manually unmap the old region here, specifying that we *don't* want the VM deallocated.
  74. region->unmap(Region::ShouldDeallocateVirtualRange::No);
  75. auto new_regions = TRY(try_split_region_around_range(*region, range_to_unmap));
  76. // And finally we map the new region(s) using our page directory (they were just allocated and don't have one).
  77. for (auto* new_region : new_regions) {
  78. // TODO: Ideally we should do this in a way that can be rolled back on failure, as failing here
  79. // leaves the caller in an undefined state.
  80. TRY(new_region->map(page_directory()));
  81. }
  82. PerformanceManager::add_unmap_perf_event(Process::current(), range_to_unmap);
  83. return {};
  84. }
  85. // Try again while checking multiple regions at a time.
  86. auto const& regions = TRY(find_regions_intersecting(range_to_unmap));
  87. if (regions.is_empty())
  88. return {};
  89. // Check if any of the regions is not mmap'ed, to not accidentally
  90. // error out with just half a region map left.
  91. for (auto* region : regions) {
  92. if (!region->is_mmap())
  93. return EPERM;
  94. }
  95. Vector<Region*, 2> new_regions;
  96. for (auto* old_region : regions) {
  97. // If it's a full match we can remove the entire old region.
  98. if (old_region->range().intersect(range_to_unmap).size() == old_region->size()) {
  99. deallocate_region(*old_region);
  100. continue;
  101. }
  102. // Remove the old region from our regions tree, since were going to add another region
  103. // with the exact same start address, but don't deallocate it yet.
  104. auto region = take_region(*old_region);
  105. // We manually unmap the old region here, specifying that we *don't* want the VM deallocated.
  106. region->unmap(Region::ShouldDeallocateVirtualRange::No);
  107. // Otherwise, split the regions and collect them for future mapping.
  108. auto split_regions = TRY(try_split_region_around_range(*region, range_to_unmap));
  109. TRY(new_regions.try_extend(split_regions));
  110. }
  111. // And finally map the new region(s) into our page directory.
  112. for (auto* new_region : new_regions) {
  113. // TODO: Ideally we should do this in a way that can be rolled back on failure, as failing here
  114. // leaves the caller in an undefined state.
  115. TRY(new_region->map(page_directory()));
  116. }
  117. PerformanceManager::add_unmap_perf_event(Process::current(), range_to_unmap);
  118. return {};
  119. }
  120. ErrorOr<VirtualRange> AddressSpace::try_allocate_anywhere(size_t size, size_t alignment)
  121. {
  122. if (!size)
  123. return EINVAL;
  124. VERIFY((size % PAGE_SIZE) == 0);
  125. VERIFY((alignment % PAGE_SIZE) == 0);
  126. if (Checked<size_t>::addition_would_overflow(size, alignment))
  127. return EOVERFLOW;
  128. VirtualAddress window_start = m_total_range.base();
  129. for (auto it = m_regions.begin(); !it.is_end(); ++it) {
  130. auto& region = *it;
  131. if (window_start == region.vaddr()) {
  132. window_start = region.range().end();
  133. continue;
  134. }
  135. VirtualRange available_range { window_start, region.vaddr().get() - window_start.get() };
  136. window_start = region.range().end();
  137. // FIXME: This check is probably excluding some valid candidates when using a large alignment.
  138. if (available_range.size() < (size + alignment))
  139. continue;
  140. FlatPtr initial_base = available_range.base().get();
  141. FlatPtr aligned_base = round_up_to_power_of_two(initial_base, alignment);
  142. return VirtualRange { VirtualAddress(aligned_base), size };
  143. }
  144. VirtualRange available_range { window_start, m_total_range.end().get() - window_start.get() };
  145. if (m_total_range.contains(available_range))
  146. return available_range;
  147. dmesgln("VirtualRangeAllocator: Failed to allocate anywhere: size={}, alignment={}", size, alignment);
  148. return ENOMEM;
  149. }
  150. ErrorOr<VirtualRange> AddressSpace::try_allocate_specific(VirtualAddress base, size_t size)
  151. {
  152. if (!size)
  153. return EINVAL;
  154. VERIFY(base.is_page_aligned());
  155. VERIFY((size % PAGE_SIZE) == 0);
  156. VirtualRange const range { base, size };
  157. if (!m_total_range.contains(range))
  158. return ENOMEM;
  159. auto* region = m_regions.find_largest_not_above(base.get());
  160. if (!region) {
  161. // The range can be accommodated below the current lowest range.
  162. return range;
  163. }
  164. if (region->range().intersects(range)) {
  165. // Requested range overlaps an existing range.
  166. return ENOMEM;
  167. }
  168. auto it = m_regions.begin_from(region->vaddr().get());
  169. VERIFY(!it.is_end());
  170. ++it;
  171. if (it.is_end()) {
  172. // The range can be accommodated above the nearest range.
  173. return range;
  174. }
  175. if (it->range().intersects(range)) {
  176. // Requested range overlaps the next neighbor.
  177. return ENOMEM;
  178. }
  179. // Requested range fits between first region and its next neighbor.
  180. return range;
  181. }
  182. ErrorOr<VirtualRange> AddressSpace::try_allocate_randomized(size_t size, size_t alignment)
  183. {
  184. if (!size)
  185. return EINVAL;
  186. VERIFY((size % PAGE_SIZE) == 0);
  187. VERIFY((alignment % PAGE_SIZE) == 0);
  188. // FIXME: I'm sure there's a smarter way to do this.
  189. constexpr size_t maximum_randomization_attempts = 1000;
  190. for (size_t i = 0; i < maximum_randomization_attempts; ++i) {
  191. VirtualAddress random_address { round_up_to_power_of_two(get_fast_random<FlatPtr>() % m_total_range.end().get(), alignment) };
  192. if (!m_total_range.contains(random_address, size))
  193. continue;
  194. auto range_or_error = try_allocate_specific(random_address, size);
  195. if (!range_or_error.is_error())
  196. return range_or_error.release_value();
  197. }
  198. return try_allocate_anywhere(size, alignment);
  199. }
  200. ErrorOr<VirtualRange> AddressSpace::try_allocate_range(VirtualAddress vaddr, size_t size, size_t alignment)
  201. {
  202. vaddr.mask(PAGE_MASK);
  203. size = TRY(page_round_up(size));
  204. if (vaddr.is_null())
  205. return try_allocate_anywhere(size, alignment);
  206. return try_allocate_specific(vaddr, size);
  207. }
  208. ErrorOr<Region*> AddressSpace::try_allocate_split_region(Region const& source_region, VirtualRange const& range, size_t offset_in_vmobject)
  209. {
  210. OwnPtr<KString> region_name;
  211. if (!source_region.name().is_null())
  212. region_name = TRY(KString::try_create(source_region.name()));
  213. auto new_region = TRY(Region::try_create_user_accessible(
  214. range, source_region.vmobject(), offset_in_vmobject, move(region_name), source_region.access(), source_region.is_cacheable() ? Region::Cacheable::Yes : Region::Cacheable::No, source_region.is_shared()));
  215. new_region->set_syscall_region(source_region.is_syscall_region());
  216. new_region->set_mmap(source_region.is_mmap());
  217. new_region->set_stack(source_region.is_stack());
  218. size_t page_offset_in_source_region = (offset_in_vmobject - source_region.offset_in_vmobject()) / PAGE_SIZE;
  219. for (size_t i = 0; i < new_region->page_count(); ++i) {
  220. if (source_region.should_cow(page_offset_in_source_region + i))
  221. TRY(new_region->set_should_cow(i, true));
  222. }
  223. return add_region(move(new_region));
  224. }
  225. ErrorOr<Region*> AddressSpace::allocate_region(VirtualRange const& range, StringView name, int prot, AllocationStrategy strategy)
  226. {
  227. VERIFY(range.is_valid());
  228. OwnPtr<KString> region_name;
  229. if (!name.is_null())
  230. region_name = TRY(KString::try_create(name));
  231. auto vmobject = TRY(AnonymousVMObject::try_create_with_size(range.size(), strategy));
  232. auto region = TRY(Region::try_create_user_accessible(range, move(vmobject), 0, move(region_name), prot_to_region_access_flags(prot), Region::Cacheable::Yes, false));
  233. TRY(region->map(page_directory(), ShouldFlushTLB::No));
  234. return add_region(move(region));
  235. }
  236. ErrorOr<Region*> AddressSpace::allocate_region_with_vmobject(VirtualRange const& range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, StringView name, int prot, bool shared)
  237. {
  238. VERIFY(range.is_valid());
  239. size_t end_in_vmobject = offset_in_vmobject + range.size();
  240. if (end_in_vmobject <= offset_in_vmobject) {
  241. dbgln("allocate_region_with_vmobject: Overflow (offset + size)");
  242. return EINVAL;
  243. }
  244. if (offset_in_vmobject >= vmobject->size()) {
  245. dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an offset past the end of its VMObject.");
  246. return EINVAL;
  247. }
  248. if (end_in_vmobject > vmobject->size()) {
  249. dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an end past the end of its VMObject.");
  250. return EINVAL;
  251. }
  252. offset_in_vmobject &= PAGE_MASK;
  253. OwnPtr<KString> region_name;
  254. if (!name.is_null())
  255. region_name = TRY(KString::try_create(name));
  256. auto region = TRY(Region::try_create_user_accessible(range, move(vmobject), offset_in_vmobject, move(region_name), prot_to_region_access_flags(prot), Region::Cacheable::Yes, shared));
  257. if (prot == PROT_NONE) {
  258. // For PROT_NONE mappings, we don't have to set up any page table mappings.
  259. // We do still need to attach the region to the page_directory though.
  260. SpinlockLocker mm_locker(s_mm_lock);
  261. region->set_page_directory(page_directory());
  262. } else {
  263. TRY(region->map(page_directory(), ShouldFlushTLB::No));
  264. }
  265. return add_region(move(region));
  266. }
  267. void AddressSpace::deallocate_region(Region& region)
  268. {
  269. (void)take_region(region);
  270. }
  271. NonnullOwnPtr<Region> AddressSpace::take_region(Region& region)
  272. {
  273. SpinlockLocker lock(m_lock);
  274. auto did_remove = m_regions.remove(region.vaddr().get());
  275. VERIFY(did_remove);
  276. return NonnullOwnPtr { NonnullOwnPtr<Region>::Adopt, region };
  277. }
  278. Region* AddressSpace::find_region_from_range(VirtualRange const& range)
  279. {
  280. SpinlockLocker lock(m_lock);
  281. auto* found_region = m_regions.find(range.base().get());
  282. if (!found_region)
  283. return nullptr;
  284. auto& region = *found_region;
  285. auto rounded_range_size = page_round_up(range.size());
  286. if (rounded_range_size.is_error() || region.size() != rounded_range_size.value())
  287. return nullptr;
  288. return &region;
  289. }
  290. Region* AddressSpace::find_region_containing(VirtualRange const& range)
  291. {
  292. SpinlockLocker lock(m_lock);
  293. auto* candidate = m_regions.find_largest_not_above(range.base().get());
  294. if (!candidate)
  295. return nullptr;
  296. return (*candidate).range().contains(range) ? candidate : nullptr;
  297. }
  298. ErrorOr<Vector<Region*>> AddressSpace::find_regions_intersecting(VirtualRange const& range)
  299. {
  300. Vector<Region*> regions = {};
  301. size_t total_size_collected = 0;
  302. SpinlockLocker lock(m_lock);
  303. auto* found_region = m_regions.find_largest_not_above(range.base().get());
  304. if (!found_region)
  305. return regions;
  306. for (auto iter = m_regions.begin_from((*found_region).vaddr().get()); !iter.is_end(); ++iter) {
  307. auto const& iter_range = (*iter).range();
  308. if (iter_range.base() < range.end() && iter_range.end() > range.base()) {
  309. TRY(regions.try_append(&*iter));
  310. total_size_collected += (*iter).size() - iter_range.intersect(range).size();
  311. if (total_size_collected == range.size())
  312. break;
  313. }
  314. }
  315. return regions;
  316. }
  317. ErrorOr<Region*> AddressSpace::add_region(NonnullOwnPtr<Region> region)
  318. {
  319. SpinlockLocker lock(m_lock);
  320. // NOTE: We leak the region into the IRBT here. It must be deleted or readopted when removed from the tree.
  321. auto* ptr = region.leak_ptr();
  322. m_regions.insert(ptr->vaddr().get(), *ptr);
  323. return ptr;
  324. }
  325. // Carve out a virtual address range from a region and return the two regions on either side
  326. ErrorOr<Vector<Region*, 2>> AddressSpace::try_split_region_around_range(Region const& source_region, VirtualRange const& desired_range)
  327. {
  328. VirtualRange old_region_range = source_region.range();
  329. auto remaining_ranges_after_unmap = old_region_range.carve(desired_range);
  330. VERIFY(!remaining_ranges_after_unmap.is_empty());
  331. auto try_make_replacement_region = [&](VirtualRange const& new_range) -> ErrorOr<Region*> {
  332. VERIFY(old_region_range.contains(new_range));
  333. size_t new_range_offset_in_vmobject = source_region.offset_in_vmobject() + (new_range.base().get() - old_region_range.base().get());
  334. return try_allocate_split_region(source_region, new_range, new_range_offset_in_vmobject);
  335. };
  336. Vector<Region*, 2> new_regions;
  337. for (auto& new_range : remaining_ranges_after_unmap) {
  338. auto* new_region = TRY(try_make_replacement_region(new_range));
  339. new_regions.unchecked_append(new_region);
  340. }
  341. return new_regions;
  342. }
  343. void AddressSpace::dump_regions()
  344. {
  345. dbgln("Process regions:");
  346. #if ARCH(I386)
  347. char const* addr_padding = "";
  348. #else
  349. char const* addr_padding = " ";
  350. #endif
  351. dbgln("BEGIN{} END{} SIZE{} ACCESS NAME",
  352. addr_padding, addr_padding, addr_padding);
  353. SpinlockLocker lock(m_lock);
  354. for (auto const& region : m_regions) {
  355. dbgln("{:p} -- {:p} {:p} {:c}{:c}{:c}{:c}{:c}{:c} {}", region.vaddr().get(), region.vaddr().offset(region.size() - 1).get(), region.size(),
  356. region.is_readable() ? 'R' : ' ',
  357. region.is_writable() ? 'W' : ' ',
  358. region.is_executable() ? 'X' : ' ',
  359. region.is_shared() ? 'S' : ' ',
  360. region.is_stack() ? 'T' : ' ',
  361. region.is_syscall_region() ? 'C' : ' ',
  362. region.name());
  363. }
  364. MM.dump_kernel_regions();
  365. }
  366. void AddressSpace::remove_all_regions(Badge<Process>)
  367. {
  368. VERIFY(Thread::current() == g_finalizer);
  369. SpinlockLocker locker(m_lock);
  370. {
  371. SpinlockLocker pd_locker(m_page_directory->get_lock());
  372. SpinlockLocker mm_locker(s_mm_lock);
  373. for (auto& region : m_regions)
  374. region.unmap_with_locks_held(Region::ShouldDeallocateVirtualRange::No, ShouldFlushTLB::No, pd_locker, mm_locker);
  375. }
  376. delete_all_regions_assuming_they_are_unmapped();
  377. }
  378. size_t AddressSpace::amount_dirty_private() const
  379. {
  380. SpinlockLocker lock(m_lock);
  381. // FIXME: This gets a bit more complicated for Regions sharing the same underlying VMObject.
  382. // The main issue I'm thinking of is when the VMObject has physical pages that none of the Regions are mapping.
  383. // That's probably a situation that needs to be looked at in general.
  384. size_t amount = 0;
  385. for (auto const& region : m_regions) {
  386. if (!region.is_shared())
  387. amount += region.amount_dirty();
  388. }
  389. return amount;
  390. }
  391. ErrorOr<size_t> AddressSpace::amount_clean_inode() const
  392. {
  393. SpinlockLocker lock(m_lock);
  394. HashTable<InodeVMObject const*> vmobjects;
  395. for (auto const& region : m_regions) {
  396. if (region.vmobject().is_inode())
  397. TRY(vmobjects.try_set(&static_cast<InodeVMObject const&>(region.vmobject())));
  398. }
  399. size_t amount = 0;
  400. for (auto& vmobject : vmobjects)
  401. amount += vmobject->amount_clean();
  402. return amount;
  403. }
  404. size_t AddressSpace::amount_virtual() const
  405. {
  406. SpinlockLocker lock(m_lock);
  407. size_t amount = 0;
  408. for (auto const& region : m_regions) {
  409. amount += region.size();
  410. }
  411. return amount;
  412. }
  413. size_t AddressSpace::amount_resident() const
  414. {
  415. SpinlockLocker lock(m_lock);
  416. // FIXME: This will double count if multiple regions use the same physical page.
  417. size_t amount = 0;
  418. for (auto const& region : m_regions) {
  419. amount += region.amount_resident();
  420. }
  421. return amount;
  422. }
  423. size_t AddressSpace::amount_shared() const
  424. {
  425. SpinlockLocker lock(m_lock);
  426. // FIXME: This will double count if multiple regions use the same physical page.
  427. // FIXME: It doesn't work at the moment, since it relies on PhysicalPage ref counts,
  428. // and each PhysicalPage is only reffed by its VMObject. This needs to be refactored
  429. // so that every Region contributes +1 ref to each of its PhysicalPages.
  430. size_t amount = 0;
  431. for (auto const& region : m_regions) {
  432. amount += region.amount_shared();
  433. }
  434. return amount;
  435. }
  436. size_t AddressSpace::amount_purgeable_volatile() const
  437. {
  438. SpinlockLocker lock(m_lock);
  439. size_t amount = 0;
  440. for (auto const& region : m_regions) {
  441. if (!region.vmobject().is_anonymous())
  442. continue;
  443. auto const& vmobject = static_cast<AnonymousVMObject const&>(region.vmobject());
  444. if (vmobject.is_purgeable() && vmobject.is_volatile())
  445. amount += region.amount_resident();
  446. }
  447. return amount;
  448. }
  449. size_t AddressSpace::amount_purgeable_nonvolatile() const
  450. {
  451. SpinlockLocker lock(m_lock);
  452. size_t amount = 0;
  453. for (auto const& region : m_regions) {
  454. if (!region.vmobject().is_anonymous())
  455. continue;
  456. auto const& vmobject = static_cast<AnonymousVMObject const&>(region.vmobject());
  457. if (vmobject.is_purgeable() && !vmobject.is_volatile())
  458. amount += region.amount_resident();
  459. }
  460. return amount;
  461. }
  462. }