execve.cpp 39 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/ScopeGuard.h>
  7. #include <AK/TemporaryChange.h>
  8. #include <AK/WeakPtr.h>
  9. #include <Kernel/Debug.h>
  10. #include <Kernel/FileSystem/Custody.h>
  11. #include <Kernel/FileSystem/FileDescription.h>
  12. #include <Kernel/Memory/AllocationStrategy.h>
  13. #include <Kernel/Memory/MemoryManager.h>
  14. #include <Kernel/Memory/PageDirectory.h>
  15. #include <Kernel/Memory/Region.h>
  16. #include <Kernel/Memory/SharedInodeVMObject.h>
  17. #include <Kernel/Panic.h>
  18. #include <Kernel/PerformanceManager.h>
  19. #include <Kernel/Process.h>
  20. #include <Kernel/Random.h>
  21. #include <Kernel/Time/TimeManagement.h>
  22. #include <LibC/limits.h>
  23. #include <LibELF/AuxiliaryVector.h>
  24. #include <LibELF/Image.h>
  25. #include <LibELF/Validation.h>
  26. namespace Kernel {
  27. extern Memory::Region* g_signal_trampoline_region;
  28. struct LoadResult {
  29. OwnPtr<Memory::AddressSpace> space;
  30. FlatPtr load_base { 0 };
  31. FlatPtr entry_eip { 0 };
  32. size_t size { 0 };
  33. WeakPtr<Memory::Region> tls_region;
  34. size_t tls_size { 0 };
  35. size_t tls_alignment { 0 };
  36. WeakPtr<Memory::Region> stack_region;
  37. };
  38. static Vector<ELF::AuxiliaryValue> generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, UserID uid, UserID euid, GroupID gid, GroupID egid, String executable_path, int main_program_fd);
  39. static bool validate_stack_size(const Vector<String>& arguments, const Vector<String>& environment)
  40. {
  41. size_t total_arguments_size = 0;
  42. size_t total_environment_size = 0;
  43. for (auto& a : arguments)
  44. total_arguments_size += a.length() + 1;
  45. for (auto& e : environment)
  46. total_environment_size += e.length() + 1;
  47. total_arguments_size += sizeof(char*) * (arguments.size() + 1);
  48. total_environment_size += sizeof(char*) * (environment.size() + 1);
  49. static constexpr size_t max_arguments_size = Thread::default_userspace_stack_size / 8;
  50. static constexpr size_t max_environment_size = Thread::default_userspace_stack_size / 8;
  51. if (total_arguments_size > max_arguments_size)
  52. return false;
  53. if (total_environment_size > max_environment_size)
  54. return false;
  55. // FIXME: This doesn't account for the size of the auxiliary vector
  56. return true;
  57. }
  58. static KResultOr<FlatPtr> make_userspace_context_for_main_thread([[maybe_unused]] ThreadRegisters& regs, Memory::Region& region, Vector<String> arguments,
  59. Vector<String> environment, Vector<ELF::AuxiliaryValue> auxiliary_values)
  60. {
  61. FlatPtr new_sp = region.range().end().get();
  62. // Add some bits of randomness to the user stack pointer.
  63. new_sp -= round_up_to_power_of_two(get_fast_random<u32>() % 4096, 16);
  64. auto push_on_new_stack = [&new_sp](FlatPtr value) {
  65. new_sp -= sizeof(FlatPtr);
  66. Userspace<FlatPtr*> stack_ptr = new_sp;
  67. return copy_to_user(stack_ptr, &value);
  68. };
  69. auto push_aux_value_on_new_stack = [&new_sp](auxv_t value) {
  70. new_sp -= sizeof(auxv_t);
  71. Userspace<auxv_t*> stack_ptr = new_sp;
  72. return copy_to_user(stack_ptr, &value);
  73. };
  74. auto push_string_on_new_stack = [&new_sp](const String& string) {
  75. new_sp -= round_up_to_power_of_two(string.length() + 1, sizeof(FlatPtr));
  76. Userspace<FlatPtr*> stack_ptr = new_sp;
  77. return copy_to_user(stack_ptr, string.characters(), string.length() + 1);
  78. };
  79. Vector<FlatPtr> argv_entries;
  80. for (auto& argument : arguments) {
  81. push_string_on_new_stack(argument);
  82. if (!argv_entries.try_append(new_sp))
  83. return ENOMEM;
  84. }
  85. Vector<FlatPtr> env_entries;
  86. for (auto& variable : environment) {
  87. push_string_on_new_stack(variable);
  88. if (!env_entries.try_append(new_sp))
  89. return ENOMEM;
  90. }
  91. for (auto& value : auxiliary_values) {
  92. if (!value.optional_string.is_empty()) {
  93. push_string_on_new_stack(value.optional_string);
  94. value.auxv.a_un.a_ptr = (void*)new_sp;
  95. }
  96. }
  97. for (ssize_t i = auxiliary_values.size() - 1; i >= 0; --i) {
  98. auto& value = auxiliary_values[i];
  99. push_aux_value_on_new_stack(value.auxv);
  100. }
  101. push_on_new_stack(0);
  102. for (ssize_t i = env_entries.size() - 1; i >= 0; --i)
  103. push_on_new_stack(env_entries[i]);
  104. FlatPtr envp = new_sp;
  105. push_on_new_stack(0);
  106. for (ssize_t i = argv_entries.size() - 1; i >= 0; --i)
  107. push_on_new_stack(argv_entries[i]);
  108. FlatPtr argv = new_sp;
  109. // NOTE: The stack needs to be 16-byte aligned.
  110. new_sp -= new_sp % 16;
  111. #if ARCH(I386)
  112. // GCC assumes that the return address has been pushed to the stack when it enters the function,
  113. // so we need to reserve an extra pointer's worth of bytes below this to make GCC's stack alignment
  114. // calculations work
  115. new_sp -= sizeof(void*);
  116. push_on_new_stack(envp);
  117. push_on_new_stack(argv);
  118. push_on_new_stack(argv_entries.size());
  119. #else
  120. regs.rdi = argv_entries.size();
  121. regs.rsi = argv;
  122. regs.rdx = envp;
  123. #endif
  124. VERIFY(new_sp % 16 == 0);
  125. // FIXME: The way we're setting up the stack and passing arguments to the entry point isn't ABI-compliant
  126. return new_sp;
  127. }
  128. struct RequiredLoadRange {
  129. FlatPtr start { 0 };
  130. FlatPtr end { 0 };
  131. };
  132. static KResultOr<RequiredLoadRange> get_required_load_range(FileDescription& program_description)
  133. {
  134. auto& inode = *(program_description.inode());
  135. auto vmobject = Memory::SharedInodeVMObject::try_create_with_inode(inode);
  136. if (!vmobject) {
  137. dbgln("get_required_load_range: Unable to allocate SharedInodeVMObject");
  138. return ENOMEM;
  139. }
  140. size_t executable_size = inode.size();
  141. auto region = MM.allocate_kernel_region_with_vmobject(*vmobject, Memory::page_round_up(executable_size), "ELF memory range calculation", Memory::Region::Access::Read);
  142. if (!region) {
  143. dbgln("Could not allocate memory for ELF");
  144. return ENOMEM;
  145. }
  146. auto elf_image = ELF::Image(region->vaddr().as_ptr(), executable_size);
  147. if (!elf_image.is_valid()) {
  148. return EINVAL;
  149. }
  150. RequiredLoadRange range {};
  151. elf_image.for_each_program_header([&range](const auto& pheader) {
  152. if (pheader.type() != PT_LOAD)
  153. return;
  154. auto region_start = (FlatPtr)pheader.vaddr().as_ptr();
  155. auto region_end = region_start + pheader.size_in_memory();
  156. if (range.start == 0 || region_start < range.start)
  157. range.start = region_start;
  158. if (range.end == 0 || region_end > range.end)
  159. range.end = region_end;
  160. });
  161. VERIFY(range.end > range.start);
  162. return range;
  163. };
  164. static KResultOr<FlatPtr> get_load_offset(const ElfW(Ehdr) & main_program_header, FileDescription& main_program_description, FileDescription* interpreter_description)
  165. {
  166. constexpr FlatPtr load_range_start = 0x08000000;
  167. constexpr FlatPtr load_range_size = 65536 * PAGE_SIZE; // 2**16 * PAGE_SIZE = 256MB
  168. constexpr FlatPtr minimum_load_offset_randomization_size = 10 * MiB;
  169. auto random_load_offset_in_range([](auto start, auto size) {
  170. return Memory::page_round_down(start + get_good_random<FlatPtr>() % size);
  171. });
  172. if (main_program_header.e_type == ET_DYN) {
  173. return random_load_offset_in_range(load_range_start, load_range_size);
  174. }
  175. if (main_program_header.e_type != ET_EXEC)
  176. return EINVAL;
  177. auto main_program_load_range_result = get_required_load_range(main_program_description);
  178. if (main_program_load_range_result.is_error())
  179. return main_program_load_range_result.error();
  180. auto main_program_load_range = main_program_load_range_result.value();
  181. RequiredLoadRange selected_range {};
  182. if (interpreter_description) {
  183. auto interpreter_load_range_result = get_required_load_range(*interpreter_description);
  184. if (interpreter_load_range_result.is_error())
  185. return interpreter_load_range_result.error();
  186. auto interpreter_size_in_memory = interpreter_load_range_result.value().end - interpreter_load_range_result.value().start;
  187. auto interpreter_load_range_end = load_range_start + load_range_size - interpreter_size_in_memory;
  188. // No intersection
  189. if (main_program_load_range.end < load_range_start || main_program_load_range.start > interpreter_load_range_end)
  190. return random_load_offset_in_range(load_range_start, load_range_size);
  191. RequiredLoadRange first_available_part = { load_range_start, main_program_load_range.start };
  192. RequiredLoadRange second_available_part = { main_program_load_range.end, interpreter_load_range_end };
  193. // Select larger part
  194. if (first_available_part.end - first_available_part.start > second_available_part.end - second_available_part.start)
  195. selected_range = first_available_part;
  196. else
  197. selected_range = second_available_part;
  198. } else
  199. selected_range = main_program_load_range;
  200. // If main program is too big and leaves us without enough space for adequate loader randomization
  201. if (selected_range.end - selected_range.start < minimum_load_offset_randomization_size)
  202. return E2BIG;
  203. return random_load_offset_in_range(selected_range.start, selected_range.end - selected_range.start);
  204. }
  205. enum class ShouldAllocateTls {
  206. No,
  207. Yes,
  208. };
  209. enum class ShouldAllowSyscalls {
  210. No,
  211. Yes,
  212. };
  213. static KResultOr<LoadResult> load_elf_object(NonnullOwnPtr<Memory::AddressSpace> new_space, FileDescription& object_description,
  214. FlatPtr load_offset, ShouldAllocateTls should_allocate_tls, ShouldAllowSyscalls should_allow_syscalls)
  215. {
  216. auto& inode = *(object_description.inode());
  217. auto vmobject = Memory::SharedInodeVMObject::try_create_with_inode(inode);
  218. if (!vmobject) {
  219. dbgln("load_elf_object: Unable to allocate SharedInodeVMObject");
  220. return ENOMEM;
  221. }
  222. if (vmobject->writable_mappings()) {
  223. dbgln("Refusing to execute a write-mapped program");
  224. return ETXTBSY;
  225. }
  226. size_t executable_size = inode.size();
  227. auto executable_region = MM.allocate_kernel_region_with_vmobject(*vmobject, Memory::page_round_up(executable_size), "ELF loading", Memory::Region::Access::Read);
  228. if (!executable_region) {
  229. dbgln("Could not allocate memory for ELF loading");
  230. return ENOMEM;
  231. }
  232. auto elf_image = ELF::Image(executable_region->vaddr().as_ptr(), executable_size);
  233. if (!elf_image.is_valid())
  234. return ENOEXEC;
  235. Memory::Region* master_tls_region { nullptr };
  236. size_t master_tls_size = 0;
  237. size_t master_tls_alignment = 0;
  238. FlatPtr load_base_address = 0;
  239. String elf_name = object_description.absolute_path();
  240. VERIFY(!Processor::in_critical());
  241. Memory::MemoryManager::enter_space(*new_space);
  242. KResult ph_load_result = KSuccess;
  243. elf_image.for_each_program_header([&](const ELF::Image::ProgramHeader& program_header) {
  244. if (program_header.type() == PT_TLS) {
  245. VERIFY(should_allocate_tls == ShouldAllocateTls::Yes);
  246. VERIFY(program_header.size_in_memory());
  247. if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) {
  248. dbgln("Shenanigans! ELF PT_TLS header sneaks outside of executable.");
  249. ph_load_result = ENOEXEC;
  250. return IterationDecision::Break;
  251. }
  252. auto range = new_space->allocate_range({}, program_header.size_in_memory());
  253. if (!range.has_value()) {
  254. ph_load_result = ENOMEM;
  255. return IterationDecision::Break;
  256. }
  257. auto region_or_error = new_space->allocate_region(range.value(), String::formatted("{} (master-tls)", elf_name), PROT_READ | PROT_WRITE, AllocationStrategy::Reserve);
  258. if (region_or_error.is_error()) {
  259. ph_load_result = region_or_error.error();
  260. return IterationDecision::Break;
  261. }
  262. master_tls_region = region_or_error.value();
  263. master_tls_size = program_header.size_in_memory();
  264. master_tls_alignment = program_header.alignment();
  265. if (!copy_to_user(master_tls_region->vaddr().as_ptr(), program_header.raw_data(), program_header.size_in_image())) {
  266. ph_load_result = EFAULT;
  267. return IterationDecision::Break;
  268. }
  269. return IterationDecision::Continue;
  270. }
  271. if (program_header.type() != PT_LOAD)
  272. return IterationDecision::Continue;
  273. if (program_header.is_writable()) {
  274. // Writable section: create a copy in memory.
  275. VERIFY(program_header.size_in_memory());
  276. VERIFY(program_header.alignment() == PAGE_SIZE);
  277. if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) {
  278. dbgln("Shenanigans! Writable ELF PT_LOAD header sneaks outside of executable.");
  279. ph_load_result = ENOEXEC;
  280. return IterationDecision::Break;
  281. }
  282. int prot = 0;
  283. if (program_header.is_readable())
  284. prot |= PROT_READ;
  285. if (program_header.is_writable())
  286. prot |= PROT_WRITE;
  287. auto region_name = String::formatted("{} (data-{}{})", elf_name, program_header.is_readable() ? "r" : "", program_header.is_writable() ? "w" : "");
  288. auto range_base = VirtualAddress { Memory::page_round_down(program_header.vaddr().offset(load_offset).get()) };
  289. auto range_end = VirtualAddress { Memory::page_round_up(program_header.vaddr().offset(load_offset).offset(program_header.size_in_memory()).get()) };
  290. auto range = new_space->allocate_range(range_base, range_end.get() - range_base.get());
  291. if (!range.has_value()) {
  292. ph_load_result = ENOMEM;
  293. return IterationDecision::Break;
  294. }
  295. auto region_or_error = new_space->allocate_region(range.value(), region_name, prot, AllocationStrategy::Reserve);
  296. if (region_or_error.is_error()) {
  297. ph_load_result = region_or_error.error();
  298. return IterationDecision::Break;
  299. }
  300. // It's not always the case with PIE executables (and very well shouldn't be) that the
  301. // virtual address in the program header matches the one we end up giving the process.
  302. // In order to copy the data image correctly into memory, we need to copy the data starting at
  303. // the right initial page offset into the pages allocated for the elf_alloc-XX section.
  304. // FIXME: There's an opportunity to munmap, or at least mprotect, the padding space between
  305. // the .text and .data PT_LOAD sections of the executable.
  306. // Accessing it would definitely be a bug.
  307. auto page_offset = program_header.vaddr();
  308. page_offset.mask(~PAGE_MASK);
  309. if (!copy_to_user((u8*)region_or_error.value()->vaddr().as_ptr() + page_offset.get(), program_header.raw_data(), program_header.size_in_image())) {
  310. ph_load_result = EFAULT;
  311. return IterationDecision::Break;
  312. }
  313. return IterationDecision::Continue;
  314. }
  315. // Non-writable section: map the executable itself in memory.
  316. VERIFY(program_header.size_in_memory());
  317. VERIFY(program_header.alignment() == PAGE_SIZE);
  318. int prot = 0;
  319. if (program_header.is_readable())
  320. prot |= PROT_READ;
  321. if (program_header.is_writable())
  322. prot |= PROT_WRITE;
  323. if (program_header.is_executable())
  324. prot |= PROT_EXEC;
  325. auto range_base = VirtualAddress { Memory::page_round_down(program_header.vaddr().offset(load_offset).get()) };
  326. auto range_end = VirtualAddress { Memory::page_round_up(program_header.vaddr().offset(load_offset).offset(program_header.size_in_memory()).get()) };
  327. auto range = new_space->allocate_range(range_base, range_end.get() - range_base.get());
  328. if (!range.has_value()) {
  329. ph_load_result = ENOMEM;
  330. return IterationDecision::Break;
  331. }
  332. auto region_or_error = new_space->allocate_region_with_vmobject(range.value(), *vmobject, program_header.offset(), elf_name, prot, true);
  333. if (region_or_error.is_error()) {
  334. ph_load_result = region_or_error.error();
  335. return IterationDecision::Break;
  336. }
  337. if (should_allow_syscalls == ShouldAllowSyscalls::Yes)
  338. region_or_error.value()->set_syscall_region(true);
  339. if (program_header.offset() == 0)
  340. load_base_address = (FlatPtr)region_or_error.value()->vaddr().as_ptr();
  341. return IterationDecision::Continue;
  342. });
  343. if (ph_load_result.is_error()) {
  344. dbgln("do_exec: Failure loading program ({})", ph_load_result.error());
  345. return ph_load_result;
  346. }
  347. if (!elf_image.entry().offset(load_offset).get()) {
  348. dbgln("do_exec: Failure loading program, entry pointer is invalid! {})", elf_image.entry().offset(load_offset));
  349. return ENOEXEC;
  350. }
  351. auto stack_range = new_space->allocate_range({}, Thread::default_userspace_stack_size);
  352. if (!stack_range.has_value()) {
  353. dbgln("do_exec: Failed to allocate VM range for stack");
  354. return ENOMEM;
  355. }
  356. auto stack_region_or_error = new_space->allocate_region(stack_range.value(), "Stack (Main thread)", PROT_READ | PROT_WRITE, AllocationStrategy::Reserve);
  357. if (stack_region_or_error.is_error())
  358. return stack_region_or_error.error();
  359. auto& stack_region = *stack_region_or_error.value();
  360. stack_region.set_stack(true);
  361. return LoadResult {
  362. move(new_space),
  363. load_base_address,
  364. elf_image.entry().offset(load_offset).get(),
  365. executable_size,
  366. AK::try_make_weak_ptr(master_tls_region),
  367. master_tls_size,
  368. master_tls_alignment,
  369. stack_region.make_weak_ptr()
  370. };
  371. }
  372. KResultOr<LoadResult> Process::load(NonnullRefPtr<FileDescription> main_program_description,
  373. RefPtr<FileDescription> interpreter_description, const ElfW(Ehdr) & main_program_header)
  374. {
  375. auto new_space = Memory::AddressSpace::try_create(nullptr);
  376. if (!new_space)
  377. return ENOMEM;
  378. ScopeGuard space_guard([&]() {
  379. Memory::MemoryManager::enter_process_paging_scope(*this);
  380. });
  381. auto load_offset = get_load_offset(main_program_header, main_program_description, interpreter_description);
  382. if (load_offset.is_error()) {
  383. return load_offset.error();
  384. }
  385. if (interpreter_description.is_null()) {
  386. auto result = load_elf_object(new_space.release_nonnull(), main_program_description, load_offset.value(), ShouldAllocateTls::Yes, ShouldAllowSyscalls::No);
  387. if (result.is_error())
  388. return result.error();
  389. m_master_tls_region = result.value().tls_region;
  390. m_master_tls_size = result.value().tls_size;
  391. m_master_tls_alignment = result.value().tls_alignment;
  392. return result;
  393. }
  394. auto interpreter_load_result = load_elf_object(new_space.release_nonnull(), *interpreter_description, load_offset.value(), ShouldAllocateTls::No, ShouldAllowSyscalls::Yes);
  395. if (interpreter_load_result.is_error())
  396. return interpreter_load_result.error();
  397. // TLS allocation will be done in userspace by the loader
  398. VERIFY(!interpreter_load_result.value().tls_region);
  399. VERIFY(!interpreter_load_result.value().tls_alignment);
  400. VERIFY(!interpreter_load_result.value().tls_size);
  401. return interpreter_load_result;
  402. }
  403. KResult Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Vector<String> arguments, Vector<String> environment,
  404. RefPtr<FileDescription> interpreter_description, Thread*& new_main_thread, u32& prev_flags, const ElfW(Ehdr) & main_program_header)
  405. {
  406. VERIFY(is_user_process());
  407. VERIFY(!Processor::in_critical());
  408. auto path = main_program_description->absolute_path();
  409. dbgln_if(EXEC_DEBUG, "do_exec: {}", path);
  410. // FIXME: How much stack space does process startup need?
  411. if (!validate_stack_size(arguments, environment))
  412. return E2BIG;
  413. auto parts = path.split('/');
  414. if (parts.is_empty())
  415. return ENOENT;
  416. auto main_program_metadata = main_program_description->metadata();
  417. auto load_result_or_error = load(main_program_description, interpreter_description, main_program_header);
  418. if (load_result_or_error.is_error()) {
  419. dbgln("do_exec: Failed to load main program or interpreter for {}", path);
  420. return load_result_or_error.error();
  421. }
  422. auto signal_trampoline_range = load_result_or_error.value().space->allocate_range({}, PAGE_SIZE);
  423. if (!signal_trampoline_range.has_value()) {
  424. dbgln("do_exec: Failed to allocate VM for signal trampoline");
  425. return ENOMEM;
  426. }
  427. // We commit to the new executable at this point. There is no turning back!
  428. // Prevent other processes from attaching to us with ptrace while we're doing this.
  429. MutexLocker ptrace_locker(ptrace_lock());
  430. // Disable profiling temporarily in case it's running on this process.
  431. auto was_profiling = m_profiling;
  432. TemporaryChange profiling_disabler(m_profiling, false);
  433. kill_threads_except_self();
  434. auto& load_result = load_result_or_error.value();
  435. bool executable_is_setid = false;
  436. if (!(main_program_description->custody()->mount_flags() & MS_NOSUID)) {
  437. if (main_program_metadata.is_setuid()) {
  438. executable_is_setid = true;
  439. ProtectedDataMutationScope scope { *this };
  440. m_protected_values.euid = main_program_metadata.uid;
  441. m_protected_values.suid = main_program_metadata.uid;
  442. }
  443. if (main_program_metadata.is_setgid()) {
  444. executable_is_setid = true;
  445. ProtectedDataMutationScope scope { *this };
  446. m_protected_values.egid = main_program_metadata.gid;
  447. m_protected_values.sgid = main_program_metadata.gid;
  448. }
  449. }
  450. set_dumpable(!executable_is_setid);
  451. {
  452. // We must disable global profiling (especially kfree tracing) here because
  453. // we might otherwise end up walking the stack into the process' space that
  454. // is about to be destroyed.
  455. TemporaryChange global_profiling_disabler(g_profiling_all_threads, false);
  456. m_space = load_result.space.release_nonnull();
  457. }
  458. Memory::MemoryManager::enter_space(*m_space);
  459. auto signal_trampoline_region = m_space->allocate_region_with_vmobject(signal_trampoline_range.value(), g_signal_trampoline_region->vmobject(), 0, "Signal trampoline", PROT_READ | PROT_EXEC, true);
  460. if (signal_trampoline_region.is_error()) {
  461. VERIFY_NOT_REACHED();
  462. }
  463. signal_trampoline_region.value()->set_syscall_region(true);
  464. m_executable = main_program_description->custody();
  465. m_arguments = arguments;
  466. m_environment = environment;
  467. m_veil_state = VeilState::None;
  468. m_unveiled_paths.clear();
  469. m_unveiled_paths.set_metadata({ "/", UnveilAccess::None, false });
  470. for (auto& property : m_coredump_properties)
  471. property = {};
  472. auto current_thread = Thread::current();
  473. current_thread->clear_signals();
  474. clear_futex_queues_on_exec();
  475. fds().change_each([&](auto& file_description_metadata) {
  476. if (file_description_metadata.is_valid() && file_description_metadata.flags() & FD_CLOEXEC)
  477. file_description_metadata = {};
  478. });
  479. int main_program_fd = -1;
  480. if (interpreter_description) {
  481. auto main_program_fd_wrapper = m_fds.allocate().release_value();
  482. VERIFY(main_program_fd_wrapper.fd >= 0);
  483. auto seek_result = main_program_description->seek(0, SEEK_SET);
  484. VERIFY(!seek_result.is_error());
  485. main_program_description->set_readable(true);
  486. m_fds[main_program_fd_wrapper.fd].set(move(main_program_description), FD_CLOEXEC);
  487. main_program_fd = main_program_fd_wrapper.fd;
  488. }
  489. new_main_thread = nullptr;
  490. if (&current_thread->process() == this) {
  491. new_main_thread = current_thread;
  492. } else {
  493. for_each_thread([&](auto& thread) {
  494. new_main_thread = &thread;
  495. return IterationDecision::Break;
  496. });
  497. }
  498. VERIFY(new_main_thread);
  499. auto auxv = generate_auxiliary_vector(load_result.load_base, load_result.entry_eip, uid(), euid(), gid(), egid(), path, main_program_fd);
  500. // NOTE: We create the new stack before disabling interrupts since it will zero-fault
  501. // and we don't want to deal with faults after this point.
  502. auto make_stack_result = make_userspace_context_for_main_thread(new_main_thread->regs(), *load_result.stack_region.unsafe_ptr(), move(arguments), move(environment), move(auxv));
  503. if (make_stack_result.is_error())
  504. return make_stack_result.error();
  505. FlatPtr new_userspace_sp = make_stack_result.value();
  506. if (wait_for_tracer_at_next_execve()) {
  507. // Make sure we release the ptrace lock here or the tracer will block forever.
  508. ptrace_locker.unlock();
  509. Thread::current()->send_urgent_signal_to_self(SIGSTOP);
  510. }
  511. ptrace_locker.unlock(); // unlock before disabling interrupts as well
  512. // We enter a critical section here because we don't want to get interrupted between do_exec()
  513. // and Processor::assume_context() or the next context switch.
  514. // If we used an InterruptDisabler that sti()'d on exit, we might timer tick'd too soon in exec().
  515. Processor::enter_critical();
  516. prev_flags = cpu_flags();
  517. cli();
  518. // NOTE: Be careful to not trigger any page faults below!
  519. m_name = parts.take_last();
  520. new_main_thread->set_name(KString::try_create(m_name));
  521. {
  522. ProtectedDataMutationScope scope { *this };
  523. m_protected_values.promises = m_protected_values.execpromises.load();
  524. m_protected_values.has_promises = m_protected_values.has_execpromises.load();
  525. m_protected_values.execpromises = 0;
  526. m_protected_values.has_execpromises = false;
  527. m_protected_values.signal_trampoline = signal_trampoline_region.value()->vaddr();
  528. // FIXME: PID/TID ISSUE
  529. m_protected_values.pid = new_main_thread->tid().value();
  530. }
  531. auto tsr_result = new_main_thread->make_thread_specific_region({});
  532. if (tsr_result.is_error()) {
  533. // FIXME: We cannot fail this late. Refactor this so the allocation happens before we commit to the new executable.
  534. VERIFY_NOT_REACHED();
  535. }
  536. new_main_thread->reset_fpu_state();
  537. auto& regs = new_main_thread->m_regs;
  538. #if ARCH(I386)
  539. regs.cs = GDT_SELECTOR_CODE3 | 3;
  540. regs.ds = GDT_SELECTOR_DATA3 | 3;
  541. regs.es = GDT_SELECTOR_DATA3 | 3;
  542. regs.ss = GDT_SELECTOR_DATA3 | 3;
  543. regs.fs = GDT_SELECTOR_DATA3 | 3;
  544. regs.gs = GDT_SELECTOR_TLS | 3;
  545. regs.eip = load_result.entry_eip;
  546. regs.esp = new_userspace_sp;
  547. #else
  548. regs.rip = load_result.entry_eip;
  549. regs.rsp = new_userspace_sp;
  550. #endif
  551. regs.cr3 = address_space().page_directory().cr3();
  552. {
  553. TemporaryChange profiling_disabler(m_profiling, was_profiling);
  554. PerformanceManager::add_process_exec_event(*this);
  555. }
  556. {
  557. SpinlockLocker lock(g_scheduler_lock);
  558. new_main_thread->set_state(Thread::State::Runnable);
  559. }
  560. u32 lock_count_to_restore;
  561. [[maybe_unused]] auto rc = big_lock().force_unlock_if_locked(lock_count_to_restore);
  562. VERIFY_INTERRUPTS_DISABLED();
  563. VERIFY(Processor::in_critical());
  564. return KSuccess;
  565. }
  566. static Vector<ELF::AuxiliaryValue> generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, UserID uid, UserID euid, GroupID gid, GroupID egid, String executable_path, int main_program_fd)
  567. {
  568. Vector<ELF::AuxiliaryValue> auxv;
  569. // PHDR/EXECFD
  570. // PH*
  571. auxv.append({ ELF::AuxiliaryValue::PageSize, PAGE_SIZE });
  572. auxv.append({ ELF::AuxiliaryValue::BaseAddress, (void*)load_base });
  573. auxv.append({ ELF::AuxiliaryValue::Entry, (void*)entry_eip });
  574. // NOTELF
  575. auxv.append({ ELF::AuxiliaryValue::Uid, (long)uid.value() });
  576. auxv.append({ ELF::AuxiliaryValue::EUid, (long)euid.value() });
  577. auxv.append({ ELF::AuxiliaryValue::Gid, (long)gid.value() });
  578. auxv.append({ ELF::AuxiliaryValue::EGid, (long)egid.value() });
  579. auxv.append({ ELF::AuxiliaryValue::Platform, Processor::platform_string() });
  580. // FIXME: This is platform specific
  581. auxv.append({ ELF::AuxiliaryValue::HwCap, (long)CPUID(1).edx() });
  582. auxv.append({ ELF::AuxiliaryValue::ClockTick, (long)TimeManagement::the().ticks_per_second() });
  583. // FIXME: Also take into account things like extended filesystem permissions? That's what linux does...
  584. auxv.append({ ELF::AuxiliaryValue::Secure, ((uid != euid) || (gid != egid)) ? 1 : 0 });
  585. char random_bytes[16] {};
  586. get_fast_random_bytes((u8*)random_bytes, sizeof(random_bytes));
  587. auxv.append({ ELF::AuxiliaryValue::Random, String(random_bytes, sizeof(random_bytes)) });
  588. auxv.append({ ELF::AuxiliaryValue::ExecFilename, executable_path });
  589. auxv.append({ ELF::AuxiliaryValue::ExecFileDescriptor, main_program_fd });
  590. auxv.append({ ELF::AuxiliaryValue::Null, 0L });
  591. return auxv;
  592. }
  593. static KResultOr<Vector<String>> find_shebang_interpreter_for_executable(const char first_page[], int nread)
  594. {
  595. int word_start = 2;
  596. int word_length = 0;
  597. if (nread > 2 && first_page[0] == '#' && first_page[1] == '!') {
  598. Vector<String> interpreter_words;
  599. for (int i = 2; i < nread; ++i) {
  600. if (first_page[i] == '\n') {
  601. break;
  602. }
  603. if (first_page[i] != ' ') {
  604. ++word_length;
  605. }
  606. if (first_page[i] == ' ') {
  607. if (word_length > 0) {
  608. interpreter_words.append(String(&first_page[word_start], word_length));
  609. }
  610. word_length = 0;
  611. word_start = i + 1;
  612. }
  613. }
  614. if (word_length > 0)
  615. interpreter_words.append(String(&first_page[word_start], word_length));
  616. if (!interpreter_words.is_empty())
  617. return interpreter_words;
  618. }
  619. return ENOEXEC;
  620. }
  621. KResultOr<RefPtr<FileDescription>> Process::find_elf_interpreter_for_executable(const String& path, const ElfW(Ehdr) & main_program_header, int nread, size_t file_size)
  622. {
  623. // Not using KResultOr here because we'll want to do the same thing in userspace in the RTLD
  624. String interpreter_path;
  625. if (!ELF::validate_program_headers(main_program_header, file_size, (const u8*)&main_program_header, nread, &interpreter_path)) {
  626. dbgln("exec({}): File has invalid ELF Program headers", path);
  627. return ENOEXEC;
  628. }
  629. if (!interpreter_path.is_empty()) {
  630. dbgln_if(EXEC_DEBUG, "exec({}): Using program interpreter {}", path, interpreter_path);
  631. auto interp_result = VirtualFileSystem::the().open(interpreter_path, O_EXEC, 0, current_directory());
  632. if (interp_result.is_error()) {
  633. dbgln("exec({}): Unable to open program interpreter {}", path, interpreter_path);
  634. return interp_result.error();
  635. }
  636. auto interpreter_description = interp_result.value();
  637. auto interp_metadata = interpreter_description->metadata();
  638. VERIFY(interpreter_description->inode());
  639. // Validate the program interpreter as a valid elf binary.
  640. // If your program interpreter is a #! file or something, it's time to stop playing games :)
  641. if (interp_metadata.size < (int)sizeof(ElfW(Ehdr)))
  642. return ENOEXEC;
  643. char first_page[PAGE_SIZE] = {};
  644. auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page);
  645. auto nread_or_error = interpreter_description->read(first_page_buffer, sizeof(first_page));
  646. if (nread_or_error.is_error())
  647. return ENOEXEC;
  648. nread = nread_or_error.value();
  649. if (nread < (int)sizeof(ElfW(Ehdr)))
  650. return ENOEXEC;
  651. auto elf_header = (ElfW(Ehdr)*)first_page;
  652. if (!ELF::validate_elf_header(*elf_header, interp_metadata.size)) {
  653. dbgln("exec({}): Interpreter ({}) has invalid ELF header", path, interpreter_description->absolute_path());
  654. return ENOEXEC;
  655. }
  656. // Not using KResultOr here because we'll want to do the same thing in userspace in the RTLD
  657. String interpreter_interpreter_path;
  658. if (!ELF::validate_program_headers(*elf_header, interp_metadata.size, (u8*)first_page, nread, &interpreter_interpreter_path)) {
  659. dbgln("exec({}): Interpreter ({}) has invalid ELF Program headers", path, interpreter_description->absolute_path());
  660. return ENOEXEC;
  661. }
  662. if (!interpreter_interpreter_path.is_empty()) {
  663. dbgln("exec({}): Interpreter ({}) has its own interpreter ({})! No thank you!", path, interpreter_description->absolute_path(), interpreter_interpreter_path);
  664. return ELOOP;
  665. }
  666. return interpreter_description;
  667. }
  668. if (main_program_header.e_type == ET_REL) {
  669. // We can't exec an ET_REL, that's just an object file from the compiler
  670. return ENOEXEC;
  671. }
  672. if (main_program_header.e_type == ET_DYN) {
  673. // If it's ET_DYN with no PT_INTERP, then it's a dynamic executable responsible
  674. // for its own relocation (i.e. it's /usr/lib/Loader.so)
  675. if (path != "/usr/lib/Loader.so")
  676. dbgln("exec({}): WARNING - Dynamic ELF executable without a PT_INTERP header, and isn't /usr/lib/Loader.so", path);
  677. return nullptr;
  678. }
  679. // No interpreter, but, path refers to a valid elf image
  680. return KResult(KSuccess);
  681. }
  682. KResult Process::exec(String path, Vector<String> arguments, Vector<String> environment, int recursion_depth)
  683. {
  684. if (recursion_depth > 2) {
  685. dbgln("exec({}): SHENANIGANS! recursed too far trying to find #! interpreter", path);
  686. return ELOOP;
  687. }
  688. // Open the file to check what kind of binary format it is
  689. // Currently supported formats:
  690. // - #! interpreted file
  691. // - ELF32
  692. // * ET_EXEC binary that just gets loaded
  693. // * ET_DYN binary that requires a program interpreter
  694. //
  695. auto file_or_error = VirtualFileSystem::the().open(path, O_EXEC, 0, current_directory());
  696. if (file_or_error.is_error())
  697. return file_or_error.error();
  698. auto description = file_or_error.release_value();
  699. auto metadata = description->metadata();
  700. if (!metadata.is_regular_file())
  701. return EACCES;
  702. // Always gonna need at least 3 bytes. these are for #!X
  703. if (metadata.size < 3)
  704. return ENOEXEC;
  705. VERIFY(description->inode());
  706. // Read the first page of the program into memory so we can validate the binfmt of it
  707. char first_page[PAGE_SIZE];
  708. auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page);
  709. auto nread_or_error = description->read(first_page_buffer, sizeof(first_page));
  710. if (nread_or_error.is_error())
  711. return ENOEXEC;
  712. // 1) #! interpreted file
  713. auto shebang_result = find_shebang_interpreter_for_executable(first_page, nread_or_error.value());
  714. if (!shebang_result.is_error()) {
  715. auto shebang_words = shebang_result.release_value();
  716. auto shebang_path = shebang_words.first();
  717. arguments[0] = move(path);
  718. if (!arguments.try_prepend(move(shebang_words)))
  719. return ENOMEM;
  720. return exec(move(shebang_path), move(arguments), move(environment), ++recursion_depth);
  721. }
  722. // #2) ELF32 for i386
  723. if (nread_or_error.value() < (int)sizeof(ElfW(Ehdr)))
  724. return ENOEXEC;
  725. auto main_program_header = (ElfW(Ehdr)*)first_page;
  726. if (!ELF::validate_elf_header(*main_program_header, metadata.size)) {
  727. dbgln("exec({}): File has invalid ELF header", path);
  728. return ENOEXEC;
  729. }
  730. auto elf_result = find_elf_interpreter_for_executable(path, *main_program_header, nread_or_error.value(), metadata.size);
  731. // Assume a static ELF executable by default
  732. RefPtr<FileDescription> interpreter_description;
  733. // We're getting either an interpreter, an error, or KSuccess (i.e. no interpreter but file checks out)
  734. if (!elf_result.is_error()) {
  735. // It's a dynamic ELF executable, with or without an interpreter. Do not allocate TLS
  736. interpreter_description = elf_result.value();
  737. } else if (elf_result.error().is_error())
  738. return elf_result.error();
  739. // The bulk of exec() is done by do_exec(), which ensures that all locals
  740. // are cleaned up by the time we yield-teleport below.
  741. Thread* new_main_thread = nullptr;
  742. u32 prev_flags = 0;
  743. auto result = do_exec(move(description), move(arguments), move(environment), move(interpreter_description), new_main_thread, prev_flags, *main_program_header);
  744. if (result.is_error())
  745. return result;
  746. VERIFY_INTERRUPTS_DISABLED();
  747. VERIFY(Processor::in_critical());
  748. auto current_thread = Thread::current();
  749. if (current_thread == new_main_thread) {
  750. // We need to enter the scheduler lock before changing the state
  751. // and it will be released after the context switch into that
  752. // thread. We should also still be in our critical section
  753. VERIFY(!g_scheduler_lock.is_locked_by_current_processor());
  754. VERIFY(Processor::in_critical() == 1);
  755. g_scheduler_lock.lock();
  756. current_thread->set_state(Thread::State::Running);
  757. Processor::assume_context(*current_thread, prev_flags);
  758. VERIFY_NOT_REACHED();
  759. }
  760. if (prev_flags & 0x200)
  761. sti();
  762. Processor::leave_critical();
  763. return KSuccess;
  764. }
  765. KResultOr<FlatPtr> Process::sys$execve(Userspace<const Syscall::SC_execve_params*> user_params)
  766. {
  767. VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this);
  768. REQUIRE_PROMISE(exec);
  769. // NOTE: Be extremely careful with allocating any kernel memory in exec().
  770. // On success, the kernel stack will be lost.
  771. Syscall::SC_execve_params params;
  772. if (!copy_from_user(&params, user_params))
  773. return EFAULT;
  774. if (params.arguments.length > ARG_MAX || params.environment.length > ARG_MAX)
  775. return E2BIG;
  776. String path;
  777. {
  778. auto path_arg = get_syscall_path_argument(params.path);
  779. if (path_arg.is_error())
  780. return path_arg.error();
  781. path = path_arg.value()->view();
  782. }
  783. auto copy_user_strings = [](const auto& list, auto& output) {
  784. if (!list.length)
  785. return true;
  786. Checked<size_t> size = sizeof(*list.strings);
  787. size *= list.length;
  788. if (size.has_overflow())
  789. return false;
  790. Vector<Syscall::StringArgument, 32> strings;
  791. if (!strings.try_resize(list.length))
  792. return false;
  793. if (!copy_from_user(strings.data(), list.strings, size.value()))
  794. return false;
  795. for (size_t i = 0; i < list.length; ++i) {
  796. auto string_or_error = try_copy_kstring_from_user(strings[i]);
  797. if (string_or_error.is_error()) {
  798. // FIXME: Propagate the error.
  799. return false;
  800. }
  801. // FIXME: Don't convert to String here, use KString all the way.
  802. auto string = String(string_or_error.value()->view());
  803. if (!output.try_append(move(string)))
  804. return false;
  805. }
  806. return true;
  807. };
  808. Vector<String> arguments;
  809. if (!copy_user_strings(params.arguments, arguments))
  810. return EFAULT;
  811. Vector<String> environment;
  812. if (!copy_user_strings(params.environment, environment))
  813. return EFAULT;
  814. auto result = exec(move(path), move(arguments), move(environment));
  815. VERIFY(result.is_error()); // We should never continue after a successful exec!
  816. return result.error();
  817. }
  818. }