mmap.cpp 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2021, Leon Albrecht <leon2002.la@gmail.com>
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <Kernel/Arch/x86/MSR.h>
  8. #include <Kernel/Arch/x86/SmapDisabler.h>
  9. #include <Kernel/FileSystem/FileDescription.h>
  10. #include <Kernel/Memory/AnonymousVMObject.h>
  11. #include <Kernel/Memory/MemoryManager.h>
  12. #include <Kernel/Memory/PageDirectory.h>
  13. #include <Kernel/Memory/PrivateInodeVMObject.h>
  14. #include <Kernel/Memory/Region.h>
  15. #include <Kernel/Memory/SharedInodeVMObject.h>
  16. #include <Kernel/PerformanceEventBuffer.h>
  17. #include <Kernel/PerformanceManager.h>
  18. #include <Kernel/Process.h>
  19. #include <LibC/limits.h>
  20. #include <LibELF/Validation.h>
  21. namespace Kernel {
  22. static bool should_make_executable_exception_for_dynamic_loader(bool make_readable, bool make_writable, bool make_executable, Memory::Region const& region)
  23. {
  24. // Normally we don't allow W -> X transitions, but we have to make an exception
  25. // for the dynamic loader, which needs to do this after performing text relocations.
  26. // FIXME: Investigate whether we could get rid of all text relocations entirely.
  27. // The exception is only made if all the following criteria is fulfilled:
  28. // The region must be RW
  29. if (!(region.is_readable() && region.is_writable() && !region.is_executable()))
  30. return false;
  31. // The region wants to become RX
  32. if (!(make_readable && !make_writable && make_executable))
  33. return false;
  34. // The region is backed by a file
  35. if (!region.vmobject().is_inode())
  36. return false;
  37. // The file mapping is private, not shared (no relocations in a shared mapping!)
  38. if (!region.vmobject().is_private_inode())
  39. return false;
  40. auto& inode_vm = static_cast<Memory::InodeVMObject const&>(region.vmobject());
  41. auto& inode = inode_vm.inode();
  42. ElfW(Ehdr) header;
  43. auto buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&header);
  44. auto result = inode.read_bytes(0, sizeof(header), buffer, nullptr);
  45. if (result.is_error() || result.value() != sizeof(header))
  46. return false;
  47. // The file is a valid ELF binary
  48. if (!ELF::validate_elf_header(header, inode.size()))
  49. return false;
  50. // The file is an ELF shared object
  51. if (header.e_type != ET_DYN)
  52. return false;
  53. // FIXME: Are there any additional checks/validations we could do here?
  54. return true;
  55. }
  56. static bool validate_mmap_prot(int prot, bool map_stack, bool map_anonymous, Memory::Region const* region = nullptr)
  57. {
  58. bool make_readable = prot & PROT_READ;
  59. bool make_writable = prot & PROT_WRITE;
  60. bool make_executable = prot & PROT_EXEC;
  61. if (map_anonymous && make_executable)
  62. return false;
  63. if (make_writable && make_executable)
  64. return false;
  65. if (map_stack) {
  66. if (make_executable)
  67. return false;
  68. if (!make_readable || !make_writable)
  69. return false;
  70. }
  71. if (region) {
  72. if (make_writable && region->has_been_executable())
  73. return false;
  74. if (make_executable && region->has_been_writable()) {
  75. if (should_make_executable_exception_for_dynamic_loader(make_readable, make_writable, make_executable, *region))
  76. return true;
  77. return false;
  78. }
  79. }
  80. return true;
  81. }
  82. static bool validate_inode_mmap_prot(const Process& process, int prot, const Inode& inode, bool map_shared)
  83. {
  84. auto metadata = inode.metadata();
  85. if ((prot & PROT_READ) && !metadata.may_read(process))
  86. return false;
  87. if (map_shared) {
  88. // FIXME: What about readonly filesystem mounts? We cannot make a
  89. // decision here without knowing the mount flags, so we would need to
  90. // keep a Custody or something from mmap time.
  91. if ((prot & PROT_WRITE) && !metadata.may_write(process))
  92. return false;
  93. if (auto shared_vmobject = inode.shared_vmobject()) {
  94. if ((prot & PROT_EXEC) && shared_vmobject->writable_mappings())
  95. return false;
  96. if ((prot & PROT_WRITE) && shared_vmobject->executable_mappings())
  97. return false;
  98. }
  99. }
  100. return true;
  101. }
  102. KResultOr<FlatPtr> Process::sys$mmap(Userspace<const Syscall::SC_mmap_params*> user_params)
  103. {
  104. VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this)
  105. REQUIRE_PROMISE(stdio);
  106. Syscall::SC_mmap_params params;
  107. if (!copy_from_user(&params, user_params))
  108. return EFAULT;
  109. FlatPtr addr = params.addr;
  110. auto size = params.size;
  111. auto alignment = params.alignment;
  112. auto prot = params.prot;
  113. auto flags = params.flags;
  114. auto fd = params.fd;
  115. auto offset = params.offset;
  116. if (prot & PROT_EXEC) {
  117. REQUIRE_PROMISE(prot_exec);
  118. }
  119. if (prot & MAP_FIXED) {
  120. REQUIRE_PROMISE(map_fixed);
  121. }
  122. if (alignment & ~PAGE_MASK)
  123. return EINVAL;
  124. if (Memory::page_round_up_would_wrap(size))
  125. return EINVAL;
  126. if (!Memory::is_user_range(VirtualAddress(addr), Memory::page_round_up(size)))
  127. return EFAULT;
  128. OwnPtr<KString> name;
  129. if (params.name.characters) {
  130. if (params.name.length > PATH_MAX)
  131. return ENAMETOOLONG;
  132. auto name_or_error = try_copy_kstring_from_user(params.name);
  133. if (name_or_error.is_error())
  134. return name_or_error.error();
  135. name = name_or_error.release_value();
  136. }
  137. if (size == 0)
  138. return EINVAL;
  139. if ((FlatPtr)addr & ~PAGE_MASK)
  140. return EINVAL;
  141. bool map_shared = flags & MAP_SHARED;
  142. bool map_anonymous = flags & MAP_ANONYMOUS;
  143. bool map_private = flags & MAP_PRIVATE;
  144. bool map_stack = flags & MAP_STACK;
  145. bool map_fixed = flags & MAP_FIXED;
  146. bool map_noreserve = flags & MAP_NORESERVE;
  147. bool map_randomized = flags & MAP_RANDOMIZED;
  148. if (map_shared && map_private)
  149. return EINVAL;
  150. if (!map_shared && !map_private)
  151. return EINVAL;
  152. if (map_fixed && map_randomized)
  153. return EINVAL;
  154. if (!validate_mmap_prot(prot, map_stack, map_anonymous))
  155. return EINVAL;
  156. if (map_stack && (!map_private || !map_anonymous))
  157. return EINVAL;
  158. Memory::Region* region = nullptr;
  159. Optional<Memory::VirtualRange> range;
  160. if (map_randomized) {
  161. range = address_space().page_directory().range_allocator().allocate_randomized(Memory::page_round_up(size), alignment);
  162. } else {
  163. range = address_space().allocate_range(VirtualAddress(addr), size, alignment);
  164. if (!range.has_value()) {
  165. if (addr && !map_fixed) {
  166. // If there's an address but MAP_FIXED wasn't specified, the address is just a hint.
  167. range = address_space().allocate_range({}, size, alignment);
  168. }
  169. }
  170. }
  171. if (!range.has_value())
  172. return ENOMEM;
  173. if (map_anonymous) {
  174. auto strategy = map_noreserve ? AllocationStrategy::None : AllocationStrategy::Reserve;
  175. RefPtr<Memory::AnonymousVMObject> vmobject;
  176. if (flags & MAP_PURGEABLE) {
  177. auto maybe_vmobject = Memory::AnonymousVMObject::try_create_purgeable_with_size(Memory::page_round_up(size), strategy);
  178. if (maybe_vmobject.is_error())
  179. return maybe_vmobject.error();
  180. vmobject = maybe_vmobject.release_value();
  181. } else {
  182. auto maybe_vmobject = Memory::AnonymousVMObject::try_create_with_size(Memory::page_round_up(size), strategy);
  183. if (maybe_vmobject.is_error())
  184. return maybe_vmobject.error();
  185. vmobject = maybe_vmobject.release_value();
  186. }
  187. auto region_or_error = address_space().allocate_region_with_vmobject(range.value(), vmobject.release_nonnull(), 0, {}, prot, map_shared);
  188. if (region_or_error.is_error())
  189. return region_or_error.error().error();
  190. region = region_or_error.value();
  191. } else {
  192. if (offset < 0)
  193. return EINVAL;
  194. if (static_cast<size_t>(offset) & ~PAGE_MASK)
  195. return EINVAL;
  196. auto description = fds().file_description(fd);
  197. if (!description)
  198. return EBADF;
  199. if (description->is_directory())
  200. return ENODEV;
  201. // Require read access even when read protection is not requested.
  202. if (!description->is_readable())
  203. return EACCES;
  204. if (map_shared) {
  205. if ((prot & PROT_WRITE) && !description->is_writable())
  206. return EACCES;
  207. }
  208. if (description->inode()) {
  209. if (!validate_inode_mmap_prot(*this, prot, *description->inode(), map_shared))
  210. return EACCES;
  211. }
  212. auto region_or_error = description->mmap(*this, range.value(), static_cast<u64>(offset), prot, map_shared);
  213. if (region_or_error.is_error())
  214. return region_or_error.error().error();
  215. region = region_or_error.value();
  216. }
  217. if (!region)
  218. return ENOMEM;
  219. region->set_mmap(true);
  220. if (map_shared)
  221. region->set_shared(true);
  222. if (map_stack)
  223. region->set_stack(true);
  224. region->set_name(move(name));
  225. PerformanceManager::add_mmap_perf_event(*this, *region);
  226. return region->vaddr().get();
  227. }
  228. static KResultOr<Memory::VirtualRange> expand_range_to_page_boundaries(FlatPtr address, size_t size)
  229. {
  230. if (Memory::page_round_up_would_wrap(size))
  231. return EINVAL;
  232. if ((address + size) < address)
  233. return EINVAL;
  234. if (Memory::page_round_up_would_wrap(address + size))
  235. return EINVAL;
  236. auto base = VirtualAddress { address }.page_base();
  237. auto end = Memory::page_round_up(address + size);
  238. return Memory::VirtualRange { base, end - base.get() };
  239. }
  240. KResultOr<FlatPtr> Process::sys$mprotect(Userspace<void*> addr, size_t size, int prot)
  241. {
  242. VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this)
  243. REQUIRE_PROMISE(stdio);
  244. if (prot & PROT_EXEC) {
  245. REQUIRE_PROMISE(prot_exec);
  246. }
  247. auto range_or_error = expand_range_to_page_boundaries(addr, size);
  248. if (range_or_error.is_error())
  249. return range_or_error.error();
  250. auto range_to_mprotect = range_or_error.value();
  251. if (!range_to_mprotect.size())
  252. return EINVAL;
  253. if (!is_user_range(range_to_mprotect))
  254. return EFAULT;
  255. if (auto* whole_region = address_space().find_region_from_range(range_to_mprotect)) {
  256. if (!whole_region->is_mmap())
  257. return EPERM;
  258. if (!validate_mmap_prot(prot, whole_region->is_stack(), whole_region->vmobject().is_anonymous(), whole_region))
  259. return EINVAL;
  260. if (whole_region->access() == Memory::prot_to_region_access_flags(prot))
  261. return 0;
  262. if (whole_region->vmobject().is_inode()
  263. && !validate_inode_mmap_prot(*this, prot, static_cast<Memory::InodeVMObject const&>(whole_region->vmobject()).inode(), whole_region->is_shared())) {
  264. return EACCES;
  265. }
  266. whole_region->set_readable(prot & PROT_READ);
  267. whole_region->set_writable(prot & PROT_WRITE);
  268. whole_region->set_executable(prot & PROT_EXEC);
  269. whole_region->remap();
  270. return 0;
  271. }
  272. // Check if we can carve out the desired range from an existing region
  273. if (auto* old_region = address_space().find_region_containing(range_to_mprotect)) {
  274. if (!old_region->is_mmap())
  275. return EPERM;
  276. if (!validate_mmap_prot(prot, old_region->is_stack(), old_region->vmobject().is_anonymous(), old_region))
  277. return EINVAL;
  278. if (old_region->access() == Memory::prot_to_region_access_flags(prot))
  279. return 0;
  280. if (old_region->vmobject().is_inode()
  281. && !validate_inode_mmap_prot(*this, prot, static_cast<Memory::InodeVMObject const&>(old_region->vmobject()).inode(), old_region->is_shared())) {
  282. return EACCES;
  283. }
  284. // Remove the old region from our regions tree, since were going to add another region
  285. // with the exact same start address, but do not deallocate it yet
  286. auto region = address_space().take_region(*old_region);
  287. // Unmap the old region here, specifying that we *don't* want the VM deallocated.
  288. region->unmap(Memory::Region::ShouldDeallocateVirtualRange::No);
  289. // This vector is the region(s) adjacent to our range.
  290. // We need to allocate a new region for the range we wanted to change permission bits on.
  291. auto adjacent_regions_or_error = address_space().try_split_region_around_range(*region, range_to_mprotect);
  292. if (adjacent_regions_or_error.is_error())
  293. return adjacent_regions_or_error.error();
  294. auto& adjacent_regions = adjacent_regions_or_error.value();
  295. size_t new_range_offset_in_vmobject = region->offset_in_vmobject() + (range_to_mprotect.base().get() - region->range().base().get());
  296. auto new_region_or_error = address_space().try_allocate_split_region(*region, range_to_mprotect, new_range_offset_in_vmobject);
  297. if (new_region_or_error.is_error())
  298. return new_region_or_error.error();
  299. auto& new_region = *new_region_or_error.value();
  300. new_region.set_readable(prot & PROT_READ);
  301. new_region.set_writable(prot & PROT_WRITE);
  302. new_region.set_executable(prot & PROT_EXEC);
  303. // Map the new regions using our page directory (they were just allocated and don't have one).
  304. for (auto* adjacent_region : adjacent_regions) {
  305. if (!adjacent_region->map(address_space().page_directory()))
  306. return ENOMEM;
  307. }
  308. if (!new_region.map(address_space().page_directory()))
  309. return ENOMEM;
  310. return 0;
  311. }
  312. if (const auto& regions = address_space().find_regions_intersecting(range_to_mprotect); regions.size()) {
  313. size_t full_size_found = 0;
  314. // first check before doing anything
  315. for (const auto* region : regions) {
  316. if (!region->is_mmap())
  317. return EPERM;
  318. if (!validate_mmap_prot(prot, region->is_stack(), region->vmobject().is_anonymous(), region))
  319. return EINVAL;
  320. if (region->access() == Memory::prot_to_region_access_flags(prot))
  321. return 0;
  322. if (region->vmobject().is_inode()
  323. && !validate_inode_mmap_prot(*this, prot, static_cast<Memory::InodeVMObject const&>(region->vmobject()).inode(), region->is_shared())) {
  324. return EACCES;
  325. }
  326. full_size_found += region->range().intersect(range_to_mprotect).size();
  327. }
  328. if (full_size_found != range_to_mprotect.size())
  329. return ENOMEM;
  330. // then do all the other stuff
  331. for (auto* old_region : regions) {
  332. const auto intersection_to_mprotect = range_to_mprotect.intersect(old_region->range());
  333. // full sub region
  334. if (intersection_to_mprotect == old_region->range()) {
  335. old_region->set_readable(prot & PROT_READ);
  336. old_region->set_writable(prot & PROT_WRITE);
  337. old_region->set_executable(prot & PROT_EXEC);
  338. old_region->remap();
  339. continue;
  340. }
  341. // Remove the old region from our regions tree, since were going to add another region
  342. // with the exact same start address, but dont deallocate it yet
  343. auto region = address_space().take_region(*old_region);
  344. // Unmap the old region here, specifying that we *don't* want the VM deallocated.
  345. region->unmap(Memory::Region::ShouldDeallocateVirtualRange::No);
  346. // This vector is the region(s) adjacent to our range.
  347. // We need to allocate a new region for the range we wanted to change permission bits on.
  348. auto adjacent_regions_or_error = address_space().try_split_region_around_range(*old_region, intersection_to_mprotect);
  349. if (adjacent_regions_or_error.is_error())
  350. return adjacent_regions_or_error.error();
  351. auto& adjacent_regions = adjacent_regions_or_error.value();
  352. // there should only be one
  353. VERIFY(adjacent_regions.size() == 1);
  354. size_t new_range_offset_in_vmobject = old_region->offset_in_vmobject() + (intersection_to_mprotect.base().get() - old_region->range().base().get());
  355. auto new_region_or_error = address_space().try_allocate_split_region(*region, intersection_to_mprotect, new_range_offset_in_vmobject);
  356. if (new_region_or_error.is_error())
  357. return new_region_or_error.error();
  358. auto& new_region = *new_region_or_error.value();
  359. new_region.set_readable(prot & PROT_READ);
  360. new_region.set_writable(prot & PROT_WRITE);
  361. new_region.set_executable(prot & PROT_EXEC);
  362. // Map the new region using our page directory (they were just allocated and don't have one) if any.
  363. if (adjacent_regions.size())
  364. if (!adjacent_regions[0]->map(address_space().page_directory()))
  365. return ENOMEM;
  366. if (!new_region.map(address_space().page_directory()))
  367. return ENOMEM;
  368. }
  369. return 0;
  370. }
  371. return EINVAL;
  372. }
  373. KResultOr<FlatPtr> Process::sys$madvise(Userspace<void*> address, size_t size, int advice)
  374. {
  375. VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this)
  376. REQUIRE_PROMISE(stdio);
  377. auto range_or_error = expand_range_to_page_boundaries(address, size);
  378. if (range_or_error.is_error())
  379. return range_or_error.error();
  380. auto range_to_madvise = range_or_error.value();
  381. if (!range_to_madvise.size())
  382. return EINVAL;
  383. if (!is_user_range(range_to_madvise))
  384. return EFAULT;
  385. auto* region = address_space().find_region_from_range(range_to_madvise);
  386. if (!region)
  387. return EINVAL;
  388. if (!region->is_mmap())
  389. return EPERM;
  390. bool set_volatile = advice & MADV_SET_VOLATILE;
  391. bool set_nonvolatile = advice & MADV_SET_NONVOLATILE;
  392. if (set_volatile && set_nonvolatile)
  393. return EINVAL;
  394. if (set_volatile || set_nonvolatile) {
  395. if (!region->vmobject().is_anonymous())
  396. return EINVAL;
  397. auto& vmobject = static_cast<Memory::AnonymousVMObject&>(region->vmobject());
  398. if (!vmobject.is_purgeable())
  399. return EINVAL;
  400. bool was_purged = false;
  401. auto result = vmobject.set_volatile(set_volatile, was_purged);
  402. if (result.is_error())
  403. return result.error();
  404. return was_purged ? 1 : 0;
  405. }
  406. return EINVAL;
  407. }
  408. KResultOr<FlatPtr> Process::sys$set_mmap_name(Userspace<const Syscall::SC_set_mmap_name_params*> user_params)
  409. {
  410. VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this)
  411. REQUIRE_PROMISE(stdio);
  412. Syscall::SC_set_mmap_name_params params;
  413. if (!copy_from_user(&params, user_params))
  414. return EFAULT;
  415. if (params.name.length > PATH_MAX)
  416. return ENAMETOOLONG;
  417. auto name_or_error = try_copy_kstring_from_user(params.name);
  418. if (name_or_error.is_error())
  419. return name_or_error.error();
  420. auto name = name_or_error.release_value();
  421. auto range_or_error = expand_range_to_page_boundaries((FlatPtr)params.addr, params.size);
  422. if (range_or_error.is_error())
  423. return range_or_error.error();
  424. auto range = range_or_error.value();
  425. auto* region = address_space().find_region_from_range(range);
  426. if (!region)
  427. return EINVAL;
  428. if (!region->is_mmap())
  429. return EPERM;
  430. region->set_name(move(name));
  431. PerformanceManager::add_mmap_perf_event(*this, *region);
  432. return 0;
  433. }
  434. KResultOr<FlatPtr> Process::sys$munmap(Userspace<void*> addr, size_t size)
  435. {
  436. VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this)
  437. REQUIRE_PROMISE(stdio);
  438. auto result = address_space().unmap_mmap_range(VirtualAddress { addr }, size);
  439. if (result.is_error())
  440. return result;
  441. return 0;
  442. }
  443. KResultOr<FlatPtr> Process::sys$mremap(Userspace<const Syscall::SC_mremap_params*> user_params)
  444. {
  445. VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this)
  446. REQUIRE_PROMISE(stdio);
  447. Syscall::SC_mremap_params params {};
  448. if (!copy_from_user(&params, user_params))
  449. return EFAULT;
  450. auto range_or_error = expand_range_to_page_boundaries((FlatPtr)params.old_address, params.old_size);
  451. if (range_or_error.is_error())
  452. return range_or_error.error().error();
  453. auto old_range = range_or_error.value();
  454. auto* old_region = address_space().find_region_from_range(old_range);
  455. if (!old_region)
  456. return EINVAL;
  457. if (!old_region->is_mmap())
  458. return EPERM;
  459. if (old_region->vmobject().is_shared_inode() && params.flags & MAP_PRIVATE && !(params.flags & (MAP_ANONYMOUS | MAP_NORESERVE))) {
  460. auto range = old_region->range();
  461. auto old_prot = region_access_flags_to_prot(old_region->access());
  462. auto old_offset = old_region->offset_in_vmobject();
  463. NonnullRefPtr inode = static_cast<Memory::SharedInodeVMObject&>(old_region->vmobject()).inode();
  464. auto new_vmobject = Memory::PrivateInodeVMObject::try_create_with_inode(inode);
  465. if (!new_vmobject)
  466. return ENOMEM;
  467. auto old_name = old_region->take_name();
  468. // Unmap without deallocating the VM range since we're going to reuse it.
  469. old_region->unmap(Memory::Region::ShouldDeallocateVirtualRange::No);
  470. address_space().deallocate_region(*old_region);
  471. auto new_region_or_error = address_space().allocate_region_with_vmobject(range, new_vmobject.release_nonnull(), old_offset, old_name->view(), old_prot, false);
  472. if (new_region_or_error.is_error())
  473. return new_region_or_error.error().error();
  474. auto& new_region = *new_region_or_error.value();
  475. new_region.set_mmap(true);
  476. return new_region.vaddr().get();
  477. }
  478. dbgln("sys$mremap: Unimplemented remap request (flags={})", params.flags);
  479. return ENOTIMPL;
  480. }
  481. KResultOr<FlatPtr> Process::sys$allocate_tls(Userspace<const char*> initial_data, size_t size)
  482. {
  483. VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this)
  484. REQUIRE_PROMISE(stdio);
  485. if (!size || size % PAGE_SIZE != 0)
  486. return EINVAL;
  487. if (!m_master_tls_region.is_null())
  488. return EEXIST;
  489. if (thread_count() != 1)
  490. return EFAULT;
  491. Thread* main_thread = nullptr;
  492. bool multiple_threads = false;
  493. for_each_thread([&main_thread, &multiple_threads](auto& thread) {
  494. if (main_thread)
  495. multiple_threads = true;
  496. main_thread = &thread;
  497. return IterationDecision::Break;
  498. });
  499. VERIFY(main_thread);
  500. if (multiple_threads)
  501. return EINVAL;
  502. auto range = address_space().allocate_range({}, size);
  503. if (!range.has_value())
  504. return ENOMEM;
  505. auto region_or_error = address_space().allocate_region(range.value(), String("Master TLS"), PROT_READ | PROT_WRITE);
  506. if (region_or_error.is_error())
  507. return region_or_error.error().error();
  508. m_master_tls_region = region_or_error.value()->make_weak_ptr();
  509. m_master_tls_size = size;
  510. m_master_tls_alignment = PAGE_SIZE;
  511. {
  512. Kernel::SmapDisabler disabler;
  513. void* fault_at;
  514. if (!Kernel::safe_memcpy((char*)m_master_tls_region.unsafe_ptr()->vaddr().as_ptr(), (char*)initial_data.ptr(), size, fault_at))
  515. return EFAULT;
  516. }
  517. auto tsr_result = main_thread->make_thread_specific_region({});
  518. if (tsr_result.is_error())
  519. return EFAULT;
  520. #if ARCH(I386)
  521. auto& tls_descriptor = Processor::current().get_gdt_entry(GDT_SELECTOR_TLS);
  522. tls_descriptor.set_base(main_thread->thread_specific_data());
  523. tls_descriptor.set_limit(main_thread->thread_specific_region_size());
  524. #else
  525. MSR fs_base_msr(MSR_FS_BASE);
  526. fs_base_msr.set(main_thread->thread_specific_data().get());
  527. #endif
  528. return m_master_tls_region.unsafe_ptr()->vaddr().get();
  529. }
  530. KResultOr<FlatPtr> Process::sys$msyscall(Userspace<void*> address)
  531. {
  532. VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this)
  533. if (address_space().enforces_syscall_regions())
  534. return EPERM;
  535. if (!address) {
  536. address_space().set_enforces_syscall_regions(true);
  537. return 0;
  538. }
  539. if (!Memory::is_user_address(VirtualAddress { address }))
  540. return EFAULT;
  541. auto* region = address_space().find_region_containing(Memory::VirtualRange { VirtualAddress { address }, 1 });
  542. if (!region)
  543. return EINVAL;
  544. if (!region->is_mmap())
  545. return EINVAL;
  546. region->set_syscall_region(true);
  547. return 0;
  548. }
  549. }