AddressSpace.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448
  1. /*
  2. * Copyright (c) 2021-2022, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2021, Leon Albrecht <leon2002.la@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <Kernel/API/MemoryLayout.h>
  8. #include <Kernel/Arch/CPU.h>
  9. #include <Kernel/Locking/Spinlock.h>
  10. #include <Kernel/Memory/AddressSpace.h>
  11. #include <Kernel/Memory/AnonymousVMObject.h>
  12. #include <Kernel/Memory/InodeVMObject.h>
  13. #include <Kernel/Memory/MemoryManager.h>
  14. #include <Kernel/PerformanceManager.h>
  15. #include <Kernel/Process.h>
  16. #include <Kernel/Random.h>
  17. #include <Kernel/Scheduler.h>
  18. namespace Kernel::Memory {
  19. ErrorOr<NonnullOwnPtr<AddressSpace>> AddressSpace::try_create(AddressSpace const* parent)
  20. {
  21. auto page_directory = TRY(PageDirectory::try_create_for_userspace());
  22. VirtualRange total_range = [&]() -> VirtualRange {
  23. if (parent)
  24. return parent->m_region_tree.total_range();
  25. constexpr FlatPtr userspace_range_base = USER_RANGE_BASE;
  26. FlatPtr const userspace_range_ceiling = USER_RANGE_CEILING;
  27. size_t random_offset = (get_fast_random<u8>() % 2 * MiB) & PAGE_MASK;
  28. FlatPtr base = userspace_range_base + random_offset;
  29. return VirtualRange(VirtualAddress { base }, userspace_range_ceiling - base);
  30. }();
  31. auto space = TRY(adopt_nonnull_own_or_enomem(new (nothrow) AddressSpace(move(page_directory), total_range)));
  32. space->page_directory().set_space({}, *space);
  33. return space;
  34. }
  35. AddressSpace::AddressSpace(NonnullRefPtr<PageDirectory> page_directory, VirtualRange total_range)
  36. : m_page_directory(move(page_directory))
  37. , m_region_tree(total_range)
  38. {
  39. }
  40. AddressSpace::~AddressSpace() = default;
  41. ErrorOr<void> AddressSpace::unmap_mmap_range(VirtualAddress addr, size_t size)
  42. {
  43. if (!size)
  44. return EINVAL;
  45. auto range_to_unmap = TRY(VirtualRange::expand_to_page_boundaries(addr.get(), size));
  46. if (!is_user_range(range_to_unmap))
  47. return EFAULT;
  48. if (auto* whole_region = find_region_from_range(range_to_unmap)) {
  49. if (!whole_region->is_mmap())
  50. return EPERM;
  51. PerformanceManager::add_unmap_perf_event(Process::current(), whole_region->range());
  52. deallocate_region(*whole_region);
  53. return {};
  54. }
  55. if (auto* old_region = find_region_containing(range_to_unmap)) {
  56. if (!old_region->is_mmap())
  57. return EPERM;
  58. // Remove the old region from our regions tree, since were going to add another region
  59. // with the exact same start address.
  60. auto region = take_region(*old_region);
  61. region->unmap();
  62. auto new_regions = TRY(try_split_region_around_range(*region, range_to_unmap));
  63. // And finally we map the new region(s) using our page directory (they were just allocated and don't have one).
  64. for (auto* new_region : new_regions) {
  65. // TODO: Ideally we should do this in a way that can be rolled back on failure, as failing here
  66. // leaves the caller in an undefined state.
  67. TRY(new_region->map(page_directory()));
  68. }
  69. PerformanceManager::add_unmap_perf_event(Process::current(), range_to_unmap);
  70. return {};
  71. }
  72. // Try again while checking multiple regions at a time.
  73. auto const& regions = TRY(find_regions_intersecting(range_to_unmap));
  74. if (regions.is_empty())
  75. return {};
  76. // Check if any of the regions is not mmap'ed, to not accidentally
  77. // error out with just half a region map left.
  78. for (auto* region : regions) {
  79. if (!region->is_mmap())
  80. return EPERM;
  81. }
  82. Vector<Region*, 2> new_regions;
  83. for (auto* old_region : regions) {
  84. // If it's a full match we can remove the entire old region.
  85. if (old_region->range().intersect(range_to_unmap).size() == old_region->size()) {
  86. deallocate_region(*old_region);
  87. continue;
  88. }
  89. // Remove the old region from our regions tree, since were going to add another region
  90. // with the exact same start address.
  91. auto region = take_region(*old_region);
  92. region->unmap();
  93. // Otherwise, split the regions and collect them for future mapping.
  94. auto split_regions = TRY(try_split_region_around_range(*region, range_to_unmap));
  95. TRY(new_regions.try_extend(split_regions));
  96. }
  97. // And finally map the new region(s) into our page directory.
  98. for (auto* new_region : new_regions) {
  99. // TODO: Ideally we should do this in a way that can be rolled back on failure, as failing here
  100. // leaves the caller in an undefined state.
  101. TRY(new_region->map(page_directory()));
  102. }
  103. PerformanceManager::add_unmap_perf_event(Process::current(), range_to_unmap);
  104. return {};
  105. }
  106. ErrorOr<Region*> AddressSpace::try_allocate_split_region(Region const& source_region, VirtualRange const& range, size_t offset_in_vmobject)
  107. {
  108. OwnPtr<KString> region_name;
  109. if (!source_region.name().is_null())
  110. region_name = TRY(KString::try_create(source_region.name()));
  111. auto new_region = TRY(Region::create_unplaced(
  112. source_region.vmobject(), offset_in_vmobject, move(region_name), source_region.access(), source_region.is_cacheable() ? Region::Cacheable::Yes : Region::Cacheable::No, source_region.is_shared()));
  113. new_region->set_syscall_region(source_region.is_syscall_region());
  114. new_region->set_mmap(source_region.is_mmap());
  115. new_region->set_stack(source_region.is_stack());
  116. size_t page_offset_in_source_region = (offset_in_vmobject - source_region.offset_in_vmobject()) / PAGE_SIZE;
  117. for (size_t i = 0; i < new_region->page_count(); ++i) {
  118. if (source_region.should_cow(page_offset_in_source_region + i))
  119. TRY(new_region->set_should_cow(i, true));
  120. }
  121. SpinlockLocker locker(m_lock);
  122. TRY(m_region_tree.place_specifically(*new_region, range));
  123. return new_region.leak_ptr();
  124. }
  125. ErrorOr<Region*> AddressSpace::allocate_region(RandomizeVirtualAddress randomize_virtual_address, VirtualAddress requested_address, size_t requested_size, size_t requested_alignment, StringView name, int prot, AllocationStrategy strategy)
  126. {
  127. if (!requested_address.is_page_aligned())
  128. return EINVAL;
  129. auto size = TRY(Memory::page_round_up(requested_size));
  130. auto alignment = TRY(Memory::page_round_up(requested_alignment));
  131. OwnPtr<KString> region_name;
  132. if (!name.is_null())
  133. region_name = TRY(KString::try_create(name));
  134. auto vmobject = TRY(AnonymousVMObject::try_create_with_size(size, strategy));
  135. auto region = TRY(Region::create_unplaced(move(vmobject), 0, move(region_name), prot_to_region_access_flags(prot)));
  136. if (requested_address.is_null()) {
  137. TRY(m_region_tree.place_anywhere(*region, randomize_virtual_address, size, alignment));
  138. } else {
  139. TRY(m_region_tree.place_specifically(*region, VirtualRange { requested_address, size }));
  140. }
  141. TRY(region->map(page_directory(), ShouldFlushTLB::No));
  142. return region.leak_ptr();
  143. }
  144. ErrorOr<Region*> AddressSpace::allocate_region_with_vmobject(VirtualRange requested_range, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, StringView name, int prot, bool shared)
  145. {
  146. return allocate_region_with_vmobject(RandomizeVirtualAddress::Yes, requested_range.base(), requested_range.size(), PAGE_SIZE, move(vmobject), offset_in_vmobject, name, prot, shared);
  147. }
  148. ErrorOr<Region*> AddressSpace::allocate_region_with_vmobject(RandomizeVirtualAddress randomize_virtual_address, VirtualAddress requested_address, size_t requested_size, size_t requested_alignment, NonnullRefPtr<VMObject> vmobject, size_t offset_in_vmobject, StringView name, int prot, bool shared)
  149. {
  150. if (!requested_address.is_page_aligned())
  151. return EINVAL;
  152. auto size = TRY(page_round_up(requested_size));
  153. auto alignment = TRY(page_round_up(requested_alignment));
  154. if (Checked<size_t>::addition_would_overflow(offset_in_vmobject, requested_size))
  155. return EOVERFLOW;
  156. size_t end_in_vmobject = offset_in_vmobject + requested_size;
  157. if (offset_in_vmobject >= vmobject->size()) {
  158. dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an offset past the end of its VMObject.");
  159. return EINVAL;
  160. }
  161. if (end_in_vmobject > vmobject->size()) {
  162. dbgln("allocate_region_with_vmobject: Attempt to allocate a region with an end past the end of its VMObject.");
  163. return EINVAL;
  164. }
  165. offset_in_vmobject &= PAGE_MASK;
  166. OwnPtr<KString> region_name;
  167. if (!name.is_null())
  168. region_name = TRY(KString::try_create(name));
  169. auto region = TRY(Region::create_unplaced(move(vmobject), offset_in_vmobject, move(region_name), prot_to_region_access_flags(prot), Region::Cacheable::Yes, shared));
  170. SpinlockLocker locker(m_lock);
  171. if (requested_address.is_null())
  172. TRY(m_region_tree.place_anywhere(*region, randomize_virtual_address, size, alignment));
  173. else
  174. TRY(m_region_tree.place_specifically(*region, VirtualRange { VirtualAddress { requested_address }, size }));
  175. ArmedScopeGuard remove_region_from_tree_on_failure = [this, &region]() {
  176. // At this point the region is already part of the Process region tree, so we have to make sure
  177. // we remove it from the tree before returning an error, or else the Region tree will contain
  178. // a dangling pointer to the free'd Region instance
  179. m_region_tree.remove(*region);
  180. };
  181. if (prot == PROT_NONE) {
  182. // For PROT_NONE mappings, we don't have to set up any page table mappings.
  183. // We do still need to attach the region to the page_directory though.
  184. region->set_page_directory(page_directory());
  185. } else {
  186. TRY(region->map(page_directory(), ShouldFlushTLB::No));
  187. }
  188. remove_region_from_tree_on_failure.disarm();
  189. return region.leak_ptr();
  190. }
  191. void AddressSpace::deallocate_region(Region& region)
  192. {
  193. (void)take_region(region);
  194. }
  195. NonnullOwnPtr<Region> AddressSpace::take_region(Region& region)
  196. {
  197. auto did_remove = m_region_tree.remove(region);
  198. VERIFY(did_remove);
  199. return NonnullOwnPtr { NonnullOwnPtr<Region>::Adopt, region };
  200. }
  201. Region* AddressSpace::find_region_from_range(VirtualRange const& range)
  202. {
  203. SpinlockLocker lock(m_lock);
  204. SpinlockLocker tree_locker(m_region_tree.get_lock());
  205. auto* found_region = m_region_tree.regions().find(range.base().get());
  206. if (!found_region)
  207. return nullptr;
  208. auto& region = *found_region;
  209. auto rounded_range_size = page_round_up(range.size());
  210. if (rounded_range_size.is_error() || region.size() != rounded_range_size.value())
  211. return nullptr;
  212. return &region;
  213. }
  214. Region* AddressSpace::find_region_containing(VirtualRange const& range)
  215. {
  216. return m_region_tree.find_region_containing(range);
  217. }
  218. ErrorOr<Vector<Region*, 4>> AddressSpace::find_regions_intersecting(VirtualRange const& range)
  219. {
  220. Vector<Region*, 4> regions = {};
  221. size_t total_size_collected = 0;
  222. SpinlockLocker lock(m_lock);
  223. SpinlockLocker tree_locker(m_region_tree.get_lock());
  224. auto* found_region = m_region_tree.regions().find_largest_not_above(range.base().get());
  225. if (!found_region)
  226. return regions;
  227. for (auto iter = m_region_tree.regions().begin_from(*found_region); !iter.is_end(); ++iter) {
  228. auto const& iter_range = (*iter).range();
  229. if (iter_range.base() < range.end() && iter_range.end() > range.base()) {
  230. TRY(regions.try_append(&*iter));
  231. total_size_collected += (*iter).size() - iter_range.intersect(range).size();
  232. if (total_size_collected == range.size())
  233. break;
  234. }
  235. }
  236. return regions;
  237. }
  238. // Carve out a virtual address range from a region and return the two regions on either side
  239. ErrorOr<Vector<Region*, 2>> AddressSpace::try_split_region_around_range(Region const& source_region, VirtualRange const& desired_range)
  240. {
  241. VirtualRange old_region_range = source_region.range();
  242. auto remaining_ranges_after_unmap = old_region_range.carve(desired_range);
  243. VERIFY(!remaining_ranges_after_unmap.is_empty());
  244. auto try_make_replacement_region = [&](VirtualRange const& new_range) -> ErrorOr<Region*> {
  245. VERIFY(old_region_range.contains(new_range));
  246. size_t new_range_offset_in_vmobject = source_region.offset_in_vmobject() + (new_range.base().get() - old_region_range.base().get());
  247. return try_allocate_split_region(source_region, new_range, new_range_offset_in_vmobject);
  248. };
  249. Vector<Region*, 2> new_regions;
  250. for (auto& new_range : remaining_ranges_after_unmap) {
  251. auto* new_region = TRY(try_make_replacement_region(new_range));
  252. new_regions.unchecked_append(new_region);
  253. }
  254. return new_regions;
  255. }
  256. void AddressSpace::dump_regions()
  257. {
  258. dbgln("Process regions:");
  259. #if ARCH(I386)
  260. char const* addr_padding = "";
  261. #else
  262. char const* addr_padding = " ";
  263. #endif
  264. dbgln("BEGIN{} END{} SIZE{} ACCESS NAME",
  265. addr_padding, addr_padding, addr_padding);
  266. SpinlockLocker lock(m_lock);
  267. SpinlockLocker tree_locker(m_region_tree.get_lock());
  268. for (auto const& region : m_region_tree.regions()) {
  269. dbgln("{:p} -- {:p} {:p} {:c}{:c}{:c}{:c}{:c}{:c} {}", region.vaddr().get(), region.vaddr().offset(region.size() - 1).get(), region.size(),
  270. region.is_readable() ? 'R' : ' ',
  271. region.is_writable() ? 'W' : ' ',
  272. region.is_executable() ? 'X' : ' ',
  273. region.is_shared() ? 'S' : ' ',
  274. region.is_stack() ? 'T' : ' ',
  275. region.is_syscall_region() ? 'C' : ' ',
  276. region.name());
  277. }
  278. MM.dump_kernel_regions();
  279. }
  280. void AddressSpace::remove_all_regions(Badge<Process>)
  281. {
  282. VERIFY(Thread::current() == g_finalizer);
  283. SpinlockLocker locker(m_lock);
  284. {
  285. SpinlockLocker pd_locker(m_page_directory->get_lock());
  286. SpinlockLocker mm_locker(s_mm_lock);
  287. SpinlockLocker tree_locker(m_region_tree.get_lock());
  288. for (auto& region : m_region_tree.regions())
  289. region.unmap_with_locks_held(ShouldFlushTLB::No, pd_locker, mm_locker);
  290. }
  291. m_region_tree.delete_all_regions_assuming_they_are_unmapped();
  292. }
  293. size_t AddressSpace::amount_dirty_private() const
  294. {
  295. SpinlockLocker lock(m_lock);
  296. SpinlockLocker tree_locker(m_region_tree.get_lock());
  297. // FIXME: This gets a bit more complicated for Regions sharing the same underlying VMObject.
  298. // The main issue I'm thinking of is when the VMObject has physical pages that none of the Regions are mapping.
  299. // That's probably a situation that needs to be looked at in general.
  300. size_t amount = 0;
  301. for (auto const& region : m_region_tree.regions()) {
  302. if (!region.is_shared())
  303. amount += region.amount_dirty();
  304. }
  305. return amount;
  306. }
  307. ErrorOr<size_t> AddressSpace::amount_clean_inode() const
  308. {
  309. SpinlockLocker lock(m_lock);
  310. SpinlockLocker tree_locker(m_region_tree.get_lock());
  311. HashTable<InodeVMObject const*> vmobjects;
  312. for (auto const& region : m_region_tree.regions()) {
  313. if (region.vmobject().is_inode())
  314. TRY(vmobjects.try_set(&static_cast<InodeVMObject const&>(region.vmobject())));
  315. }
  316. size_t amount = 0;
  317. for (auto& vmobject : vmobjects)
  318. amount += vmobject->amount_clean();
  319. return amount;
  320. }
  321. size_t AddressSpace::amount_virtual() const
  322. {
  323. SpinlockLocker lock(m_lock);
  324. SpinlockLocker tree_locker(m_region_tree.get_lock());
  325. size_t amount = 0;
  326. for (auto const& region : m_region_tree.regions()) {
  327. amount += region.size();
  328. }
  329. return amount;
  330. }
  331. size_t AddressSpace::amount_resident() const
  332. {
  333. SpinlockLocker lock(m_lock);
  334. SpinlockLocker tree_locker(m_region_tree.get_lock());
  335. // FIXME: This will double count if multiple regions use the same physical page.
  336. size_t amount = 0;
  337. for (auto const& region : m_region_tree.regions()) {
  338. amount += region.amount_resident();
  339. }
  340. return amount;
  341. }
  342. size_t AddressSpace::amount_shared() const
  343. {
  344. SpinlockLocker lock(m_lock);
  345. SpinlockLocker tree_locker(m_region_tree.get_lock());
  346. // FIXME: This will double count if multiple regions use the same physical page.
  347. // FIXME: It doesn't work at the moment, since it relies on PhysicalPage ref counts,
  348. // and each PhysicalPage is only reffed by its VMObject. This needs to be refactored
  349. // so that every Region contributes +1 ref to each of its PhysicalPages.
  350. size_t amount = 0;
  351. for (auto const& region : m_region_tree.regions()) {
  352. amount += region.amount_shared();
  353. }
  354. return amount;
  355. }
  356. size_t AddressSpace::amount_purgeable_volatile() const
  357. {
  358. SpinlockLocker lock(m_lock);
  359. SpinlockLocker tree_locker(m_region_tree.get_lock());
  360. size_t amount = 0;
  361. for (auto const& region : m_region_tree.regions()) {
  362. if (!region.vmobject().is_anonymous())
  363. continue;
  364. auto const& vmobject = static_cast<AnonymousVMObject const&>(region.vmobject());
  365. if (vmobject.is_purgeable() && vmobject.is_volatile())
  366. amount += region.amount_resident();
  367. }
  368. return amount;
  369. }
  370. size_t AddressSpace::amount_purgeable_nonvolatile() const
  371. {
  372. SpinlockLocker lock(m_lock);
  373. SpinlockLocker tree_locker(m_region_tree.get_lock());
  374. size_t amount = 0;
  375. for (auto const& region : m_region_tree.regions()) {
  376. if (!region.vmobject().is_anonymous())
  377. continue;
  378. auto const& vmobject = static_cast<AnonymousVMObject const&>(region.vmobject());
  379. if (vmobject.is_purgeable() && !vmobject.is_volatile())
  380. amount += region.amount_resident();
  381. }
  382. return amount;
  383. }
  384. }