execve.cpp 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/ScopeGuard.h>
  7. #include <AK/TemporaryChange.h>
  8. #include <AK/WeakPtr.h>
  9. #include <Kernel/Debug.h>
  10. #include <Kernel/FileSystem/Custody.h>
  11. #include <Kernel/FileSystem/FileDescription.h>
  12. #include <Kernel/Memory/AllocationStrategy.h>
  13. #include <Kernel/Memory/MemoryManager.h>
  14. #include <Kernel/Memory/PageDirectory.h>
  15. #include <Kernel/Memory/Region.h>
  16. #include <Kernel/Memory/SharedInodeVMObject.h>
  17. #include <Kernel/Panic.h>
  18. #include <Kernel/PerformanceManager.h>
  19. #include <Kernel/Process.h>
  20. #include <Kernel/Random.h>
  21. #include <Kernel/Time/TimeManagement.h>
  22. #include <LibC/limits.h>
  23. #include <LibELF/AuxiliaryVector.h>
  24. #include <LibELF/Image.h>
  25. #include <LibELF/Validation.h>
  26. namespace Kernel {
  27. extern Memory::Region* g_signal_trampoline_region;
  28. struct LoadResult {
  29. OwnPtr<Memory::AddressSpace> space;
  30. FlatPtr load_base { 0 };
  31. FlatPtr entry_eip { 0 };
  32. size_t size { 0 };
  33. WeakPtr<Memory::Region> tls_region;
  34. size_t tls_size { 0 };
  35. size_t tls_alignment { 0 };
  36. WeakPtr<Memory::Region> stack_region;
  37. };
  38. static Vector<ELF::AuxiliaryValue> generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, UserID uid, UserID euid, GroupID gid, GroupID egid, String executable_path, int main_program_fd);
  39. static bool validate_stack_size(const Vector<String>& arguments, const Vector<String>& environment)
  40. {
  41. size_t total_arguments_size = 0;
  42. size_t total_environment_size = 0;
  43. for (auto& a : arguments)
  44. total_arguments_size += a.length() + 1;
  45. for (auto& e : environment)
  46. total_environment_size += e.length() + 1;
  47. total_arguments_size += sizeof(char*) * (arguments.size() + 1);
  48. total_environment_size += sizeof(char*) * (environment.size() + 1);
  49. static constexpr size_t max_arguments_size = Thread::default_userspace_stack_size / 8;
  50. static constexpr size_t max_environment_size = Thread::default_userspace_stack_size / 8;
  51. if (total_arguments_size > max_arguments_size)
  52. return false;
  53. if (total_environment_size > max_environment_size)
  54. return false;
  55. // FIXME: This doesn't account for the size of the auxiliary vector
  56. return true;
  57. }
  58. static KResultOr<FlatPtr> make_userspace_context_for_main_thread([[maybe_unused]] ThreadRegisters& regs, Memory::Region& region, Vector<String> arguments,
  59. Vector<String> environment, Vector<ELF::AuxiliaryValue> auxiliary_values)
  60. {
  61. FlatPtr new_sp = region.range().end().get();
  62. // Add some bits of randomness to the user stack pointer.
  63. new_sp -= round_up_to_power_of_two(get_fast_random<u32>() % 4096, 16);
  64. auto push_on_new_stack = [&new_sp](FlatPtr value) {
  65. new_sp -= sizeof(FlatPtr);
  66. Userspace<FlatPtr*> stack_ptr = new_sp;
  67. return copy_to_user(stack_ptr, &value);
  68. };
  69. auto push_aux_value_on_new_stack = [&new_sp](auxv_t value) {
  70. new_sp -= sizeof(auxv_t);
  71. Userspace<auxv_t*> stack_ptr = new_sp;
  72. return copy_to_user(stack_ptr, &value);
  73. };
  74. auto push_string_on_new_stack = [&new_sp](const String& string) {
  75. new_sp -= round_up_to_power_of_two(string.length() + 1, sizeof(FlatPtr));
  76. Userspace<FlatPtr*> stack_ptr = new_sp;
  77. return copy_to_user(stack_ptr, string.characters(), string.length() + 1);
  78. };
  79. Vector<FlatPtr> argv_entries;
  80. for (auto& argument : arguments) {
  81. push_string_on_new_stack(argument);
  82. if (!argv_entries.try_append(new_sp))
  83. return ENOMEM;
  84. }
  85. Vector<FlatPtr> env_entries;
  86. for (auto& variable : environment) {
  87. push_string_on_new_stack(variable);
  88. if (!env_entries.try_append(new_sp))
  89. return ENOMEM;
  90. }
  91. for (auto& value : auxiliary_values) {
  92. if (!value.optional_string.is_empty()) {
  93. push_string_on_new_stack(value.optional_string);
  94. value.auxv.a_un.a_ptr = (void*)new_sp;
  95. }
  96. }
  97. for (ssize_t i = auxiliary_values.size() - 1; i >= 0; --i) {
  98. auto& value = auxiliary_values[i];
  99. push_aux_value_on_new_stack(value.auxv);
  100. }
  101. push_on_new_stack(0);
  102. for (ssize_t i = env_entries.size() - 1; i >= 0; --i)
  103. push_on_new_stack(env_entries[i]);
  104. FlatPtr envp = new_sp;
  105. push_on_new_stack(0);
  106. for (ssize_t i = argv_entries.size() - 1; i >= 0; --i)
  107. push_on_new_stack(argv_entries[i]);
  108. FlatPtr argv = new_sp;
  109. // NOTE: The stack needs to be 16-byte aligned.
  110. new_sp -= new_sp % 16;
  111. #if ARCH(I386)
  112. // GCC assumes that the return address has been pushed to the stack when it enters the function,
  113. // so we need to reserve an extra pointer's worth of bytes below this to make GCC's stack alignment
  114. // calculations work
  115. new_sp -= sizeof(void*);
  116. push_on_new_stack(envp);
  117. push_on_new_stack(argv);
  118. push_on_new_stack(argv_entries.size());
  119. #else
  120. regs.rdi = argv_entries.size();
  121. regs.rsi = argv;
  122. regs.rdx = envp;
  123. #endif
  124. VERIFY(new_sp % 16 == 0);
  125. // FIXME: The way we're setting up the stack and passing arguments to the entry point isn't ABI-compliant
  126. return new_sp;
  127. }
  128. struct RequiredLoadRange {
  129. FlatPtr start { 0 };
  130. FlatPtr end { 0 };
  131. };
  132. static KResultOr<RequiredLoadRange> get_required_load_range(FileDescription& program_description)
  133. {
  134. auto& inode = *(program_description.inode());
  135. auto vmobject = Memory::SharedInodeVMObject::try_create_with_inode(inode);
  136. if (!vmobject) {
  137. dbgln("get_required_load_range: Unable to allocate SharedInodeVMObject");
  138. return ENOMEM;
  139. }
  140. size_t executable_size = inode.size();
  141. auto region = MM.allocate_kernel_region_with_vmobject(*vmobject, Memory::page_round_up(executable_size), "ELF memory range calculation", Memory::Region::Access::Read);
  142. if (!region) {
  143. dbgln("Could not allocate memory for ELF");
  144. return ENOMEM;
  145. }
  146. auto elf_image = ELF::Image(region->vaddr().as_ptr(), executable_size);
  147. if (!elf_image.is_valid()) {
  148. return EINVAL;
  149. }
  150. RequiredLoadRange range {};
  151. elf_image.for_each_program_header([&range](const auto& pheader) {
  152. if (pheader.type() != PT_LOAD)
  153. return;
  154. auto region_start = (FlatPtr)pheader.vaddr().as_ptr();
  155. auto region_end = region_start + pheader.size_in_memory();
  156. if (range.start == 0 || region_start < range.start)
  157. range.start = region_start;
  158. if (range.end == 0 || region_end > range.end)
  159. range.end = region_end;
  160. });
  161. VERIFY(range.end > range.start);
  162. return range;
  163. };
  164. static KResultOr<FlatPtr> get_load_offset(const ElfW(Ehdr) & main_program_header, FileDescription& main_program_description, FileDescription* interpreter_description)
  165. {
  166. constexpr FlatPtr load_range_start = 0x08000000;
  167. constexpr FlatPtr load_range_size = 65536 * PAGE_SIZE; // 2**16 * PAGE_SIZE = 256MB
  168. constexpr FlatPtr minimum_load_offset_randomization_size = 10 * MiB;
  169. auto random_load_offset_in_range([](auto start, auto size) {
  170. return Memory::page_round_down(start + get_good_random<FlatPtr>() % size);
  171. });
  172. if (main_program_header.e_type == ET_DYN) {
  173. return random_load_offset_in_range(load_range_start, load_range_size);
  174. }
  175. if (main_program_header.e_type != ET_EXEC)
  176. return EINVAL;
  177. auto main_program_load_range_result = get_required_load_range(main_program_description);
  178. if (main_program_load_range_result.is_error())
  179. return main_program_load_range_result.error();
  180. auto main_program_load_range = main_program_load_range_result.value();
  181. RequiredLoadRange selected_range {};
  182. if (interpreter_description) {
  183. auto interpreter_load_range_result = get_required_load_range(*interpreter_description);
  184. if (interpreter_load_range_result.is_error())
  185. return interpreter_load_range_result.error();
  186. auto interpreter_size_in_memory = interpreter_load_range_result.value().end - interpreter_load_range_result.value().start;
  187. auto interpreter_load_range_end = load_range_start + load_range_size - interpreter_size_in_memory;
  188. // No intersection
  189. if (main_program_load_range.end < load_range_start || main_program_load_range.start > interpreter_load_range_end)
  190. return random_load_offset_in_range(load_range_start, load_range_size);
  191. RequiredLoadRange first_available_part = { load_range_start, main_program_load_range.start };
  192. RequiredLoadRange second_available_part = { main_program_load_range.end, interpreter_load_range_end };
  193. // Select larger part
  194. if (first_available_part.end - first_available_part.start > second_available_part.end - second_available_part.start)
  195. selected_range = first_available_part;
  196. else
  197. selected_range = second_available_part;
  198. } else
  199. selected_range = main_program_load_range;
  200. // If main program is too big and leaves us without enough space for adequate loader randomization
  201. if (selected_range.end - selected_range.start < minimum_load_offset_randomization_size)
  202. return E2BIG;
  203. return random_load_offset_in_range(selected_range.start, selected_range.end - selected_range.start);
  204. }
  205. enum class ShouldAllocateTls {
  206. No,
  207. Yes,
  208. };
  209. enum class ShouldAllowSyscalls {
  210. No,
  211. Yes,
  212. };
  213. static KResultOr<LoadResult> load_elf_object(NonnullOwnPtr<Memory::AddressSpace> new_space, FileDescription& object_description,
  214. FlatPtr load_offset, ShouldAllocateTls should_allocate_tls, ShouldAllowSyscalls should_allow_syscalls)
  215. {
  216. auto& inode = *(object_description.inode());
  217. auto vmobject = Memory::SharedInodeVMObject::try_create_with_inode(inode);
  218. if (!vmobject) {
  219. dbgln("load_elf_object: Unable to allocate SharedInodeVMObject");
  220. return ENOMEM;
  221. }
  222. if (vmobject->writable_mappings()) {
  223. dbgln("Refusing to execute a write-mapped program");
  224. return ETXTBSY;
  225. }
  226. size_t executable_size = inode.size();
  227. auto executable_region = MM.allocate_kernel_region_with_vmobject(*vmobject, Memory::page_round_up(executable_size), "ELF loading", Memory::Region::Access::Read);
  228. if (!executable_region) {
  229. dbgln("Could not allocate memory for ELF loading");
  230. return ENOMEM;
  231. }
  232. auto elf_image = ELF::Image(executable_region->vaddr().as_ptr(), executable_size);
  233. if (!elf_image.is_valid())
  234. return ENOEXEC;
  235. Memory::Region* master_tls_region { nullptr };
  236. size_t master_tls_size = 0;
  237. size_t master_tls_alignment = 0;
  238. FlatPtr load_base_address = 0;
  239. String elf_name = object_description.absolute_path();
  240. VERIFY(!Processor::in_critical());
  241. Memory::MemoryManager::enter_space(*new_space);
  242. KResult ph_load_result = KSuccess;
  243. elf_image.for_each_program_header([&](const ELF::Image::ProgramHeader& program_header) {
  244. if (program_header.type() == PT_TLS) {
  245. VERIFY(should_allocate_tls == ShouldAllocateTls::Yes);
  246. VERIFY(program_header.size_in_memory());
  247. if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) {
  248. dbgln("Shenanigans! ELF PT_TLS header sneaks outside of executable.");
  249. ph_load_result = ENOEXEC;
  250. return IterationDecision::Break;
  251. }
  252. auto range = new_space->allocate_range({}, program_header.size_in_memory());
  253. if (!range.has_value()) {
  254. ph_load_result = ENOMEM;
  255. return IterationDecision::Break;
  256. }
  257. auto region_or_error = new_space->allocate_region(range.value(), String::formatted("{} (master-tls)", elf_name), PROT_READ | PROT_WRITE, AllocationStrategy::Reserve);
  258. if (region_or_error.is_error()) {
  259. ph_load_result = region_or_error.error();
  260. return IterationDecision::Break;
  261. }
  262. master_tls_region = region_or_error.value();
  263. master_tls_size = program_header.size_in_memory();
  264. master_tls_alignment = program_header.alignment();
  265. if (!copy_to_user(master_tls_region->vaddr().as_ptr(), program_header.raw_data(), program_header.size_in_image())) {
  266. ph_load_result = EFAULT;
  267. return IterationDecision::Break;
  268. }
  269. return IterationDecision::Continue;
  270. }
  271. if (program_header.type() != PT_LOAD)
  272. return IterationDecision::Continue;
  273. if (program_header.size_in_memory() == 0)
  274. return IterationDecision::Continue;
  275. if (program_header.is_writable()) {
  276. // Writable section: create a copy in memory.
  277. VERIFY(program_header.size_in_memory());
  278. VERIFY(program_header.alignment() == PAGE_SIZE);
  279. if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) {
  280. dbgln("Shenanigans! Writable ELF PT_LOAD header sneaks outside of executable.");
  281. ph_load_result = ENOEXEC;
  282. return IterationDecision::Break;
  283. }
  284. int prot = 0;
  285. if (program_header.is_readable())
  286. prot |= PROT_READ;
  287. if (program_header.is_writable())
  288. prot |= PROT_WRITE;
  289. auto region_name = String::formatted("{} (data-{}{})", elf_name, program_header.is_readable() ? "r" : "", program_header.is_writable() ? "w" : "");
  290. auto range_base = VirtualAddress { Memory::page_round_down(program_header.vaddr().offset(load_offset).get()) };
  291. auto range_end = VirtualAddress { Memory::page_round_up(program_header.vaddr().offset(load_offset).offset(program_header.size_in_memory()).get()) };
  292. auto range = new_space->allocate_range(range_base, range_end.get() - range_base.get());
  293. if (!range.has_value()) {
  294. ph_load_result = ENOMEM;
  295. return IterationDecision::Break;
  296. }
  297. auto region_or_error = new_space->allocate_region(range.value(), region_name, prot, AllocationStrategy::Reserve);
  298. if (region_or_error.is_error()) {
  299. ph_load_result = region_or_error.error();
  300. return IterationDecision::Break;
  301. }
  302. // It's not always the case with PIE executables (and very well shouldn't be) that the
  303. // virtual address in the program header matches the one we end up giving the process.
  304. // In order to copy the data image correctly into memory, we need to copy the data starting at
  305. // the right initial page offset into the pages allocated for the elf_alloc-XX section.
  306. // FIXME: There's an opportunity to munmap, or at least mprotect, the padding space between
  307. // the .text and .data PT_LOAD sections of the executable.
  308. // Accessing it would definitely be a bug.
  309. auto page_offset = program_header.vaddr();
  310. page_offset.mask(~PAGE_MASK);
  311. if (!copy_to_user((u8*)region_or_error.value()->vaddr().as_ptr() + page_offset.get(), program_header.raw_data(), program_header.size_in_image())) {
  312. ph_load_result = EFAULT;
  313. return IterationDecision::Break;
  314. }
  315. return IterationDecision::Continue;
  316. }
  317. // Non-writable section: map the executable itself in memory.
  318. VERIFY(program_header.size_in_memory());
  319. VERIFY(program_header.alignment() == PAGE_SIZE);
  320. int prot = 0;
  321. if (program_header.is_readable())
  322. prot |= PROT_READ;
  323. if (program_header.is_writable())
  324. prot |= PROT_WRITE;
  325. if (program_header.is_executable())
  326. prot |= PROT_EXEC;
  327. auto range_base = VirtualAddress { Memory::page_round_down(program_header.vaddr().offset(load_offset).get()) };
  328. auto range_end = VirtualAddress { Memory::page_round_up(program_header.vaddr().offset(load_offset).offset(program_header.size_in_memory()).get()) };
  329. auto range = new_space->allocate_range(range_base, range_end.get() - range_base.get());
  330. if (!range.has_value()) {
  331. ph_load_result = ENOMEM;
  332. return IterationDecision::Break;
  333. }
  334. auto region_or_error = new_space->allocate_region_with_vmobject(range.value(), *vmobject, program_header.offset(), elf_name, prot, true);
  335. if (region_or_error.is_error()) {
  336. ph_load_result = region_or_error.error();
  337. return IterationDecision::Break;
  338. }
  339. if (should_allow_syscalls == ShouldAllowSyscalls::Yes)
  340. region_or_error.value()->set_syscall_region(true);
  341. if (program_header.offset() == 0)
  342. load_base_address = (FlatPtr)region_or_error.value()->vaddr().as_ptr();
  343. return IterationDecision::Continue;
  344. });
  345. if (ph_load_result.is_error()) {
  346. dbgln("do_exec: Failure loading program ({})", ph_load_result.error());
  347. return ph_load_result;
  348. }
  349. if (!elf_image.entry().offset(load_offset).get()) {
  350. dbgln("do_exec: Failure loading program, entry pointer is invalid! {})", elf_image.entry().offset(load_offset));
  351. return ENOEXEC;
  352. }
  353. auto stack_range = new_space->allocate_range({}, Thread::default_userspace_stack_size);
  354. if (!stack_range.has_value()) {
  355. dbgln("do_exec: Failed to allocate VM range for stack");
  356. return ENOMEM;
  357. }
  358. auto* stack_region = TRY(new_space->allocate_region(stack_range.value(), "Stack (Main thread)", PROT_READ | PROT_WRITE, AllocationStrategy::Reserve));
  359. stack_region->set_stack(true);
  360. return LoadResult {
  361. move(new_space),
  362. load_base_address,
  363. elf_image.entry().offset(load_offset).get(),
  364. executable_size,
  365. AK::try_make_weak_ptr(master_tls_region),
  366. master_tls_size,
  367. master_tls_alignment,
  368. stack_region->make_weak_ptr()
  369. };
  370. }
  371. KResultOr<LoadResult> Process::load(NonnullRefPtr<FileDescription> main_program_description,
  372. RefPtr<FileDescription> interpreter_description, const ElfW(Ehdr) & main_program_header)
  373. {
  374. auto new_space = Memory::AddressSpace::try_create(nullptr);
  375. if (!new_space)
  376. return ENOMEM;
  377. ScopeGuard space_guard([&]() {
  378. Memory::MemoryManager::enter_process_paging_scope(*this);
  379. });
  380. auto load_offset = get_load_offset(main_program_header, main_program_description, interpreter_description);
  381. if (load_offset.is_error()) {
  382. return load_offset.error();
  383. }
  384. if (interpreter_description.is_null()) {
  385. auto result = load_elf_object(new_space.release_nonnull(), main_program_description, load_offset.value(), ShouldAllocateTls::Yes, ShouldAllowSyscalls::No);
  386. if (result.is_error())
  387. return result.error();
  388. m_master_tls_region = result.value().tls_region;
  389. m_master_tls_size = result.value().tls_size;
  390. m_master_tls_alignment = result.value().tls_alignment;
  391. return result;
  392. }
  393. auto interpreter_load_result = load_elf_object(new_space.release_nonnull(), *interpreter_description, load_offset.value(), ShouldAllocateTls::No, ShouldAllowSyscalls::Yes);
  394. if (interpreter_load_result.is_error())
  395. return interpreter_load_result.error();
  396. // TLS allocation will be done in userspace by the loader
  397. VERIFY(!interpreter_load_result.value().tls_region);
  398. VERIFY(!interpreter_load_result.value().tls_alignment);
  399. VERIFY(!interpreter_load_result.value().tls_size);
  400. return interpreter_load_result;
  401. }
  402. KResult Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Vector<String> arguments, Vector<String> environment,
  403. RefPtr<FileDescription> interpreter_description, Thread*& new_main_thread, u32& prev_flags, const ElfW(Ehdr) & main_program_header)
  404. {
  405. VERIFY(is_user_process());
  406. VERIFY(!Processor::in_critical());
  407. auto path = main_program_description->absolute_path();
  408. dbgln_if(EXEC_DEBUG, "do_exec: {}", path);
  409. // FIXME: How much stack space does process startup need?
  410. if (!validate_stack_size(arguments, environment))
  411. return E2BIG;
  412. auto parts = path.split('/');
  413. if (parts.is_empty())
  414. return ENOENT;
  415. auto main_program_metadata = main_program_description->metadata();
  416. auto load_result = TRY(load(main_program_description, interpreter_description, main_program_header));
  417. auto signal_trampoline_range = load_result.space->allocate_range({}, PAGE_SIZE);
  418. if (!signal_trampoline_range.has_value()) {
  419. dbgln("do_exec: Failed to allocate VM for signal trampoline");
  420. return ENOMEM;
  421. }
  422. // We commit to the new executable at this point. There is no turning back!
  423. // Prevent other processes from attaching to us with ptrace while we're doing this.
  424. MutexLocker ptrace_locker(ptrace_lock());
  425. // Disable profiling temporarily in case it's running on this process.
  426. auto was_profiling = m_profiling;
  427. TemporaryChange profiling_disabler(m_profiling, false);
  428. kill_threads_except_self();
  429. bool executable_is_setid = false;
  430. if (!(main_program_description->custody()->mount_flags() & MS_NOSUID)) {
  431. if (main_program_metadata.is_setuid()) {
  432. executable_is_setid = true;
  433. ProtectedDataMutationScope scope { *this };
  434. m_protected_values.euid = main_program_metadata.uid;
  435. m_protected_values.suid = main_program_metadata.uid;
  436. }
  437. if (main_program_metadata.is_setgid()) {
  438. executable_is_setid = true;
  439. ProtectedDataMutationScope scope { *this };
  440. m_protected_values.egid = main_program_metadata.gid;
  441. m_protected_values.sgid = main_program_metadata.gid;
  442. }
  443. }
  444. set_dumpable(!executable_is_setid);
  445. {
  446. // We must disable global profiling (especially kfree tracing) here because
  447. // we might otherwise end up walking the stack into the process' space that
  448. // is about to be destroyed.
  449. TemporaryChange global_profiling_disabler(g_profiling_all_threads, false);
  450. m_space = load_result.space.release_nonnull();
  451. }
  452. Memory::MemoryManager::enter_space(*m_space);
  453. auto signal_trampoline_region = m_space->allocate_region_with_vmobject(signal_trampoline_range.value(), g_signal_trampoline_region->vmobject(), 0, "Signal trampoline", PROT_READ | PROT_EXEC, true);
  454. if (signal_trampoline_region.is_error()) {
  455. VERIFY_NOT_REACHED();
  456. }
  457. signal_trampoline_region.value()->set_syscall_region(true);
  458. m_executable = main_program_description->custody();
  459. m_arguments = arguments;
  460. m_environment = environment;
  461. m_veil_state = VeilState::None;
  462. m_unveiled_paths.clear();
  463. m_unveiled_paths.set_metadata({ "/", UnveilAccess::None, false });
  464. for (auto& property : m_coredump_properties)
  465. property = {};
  466. auto current_thread = Thread::current();
  467. current_thread->clear_signals();
  468. clear_futex_queues_on_exec();
  469. fds().change_each([&](auto& file_description_metadata) {
  470. if (file_description_metadata.is_valid() && file_description_metadata.flags() & FD_CLOEXEC)
  471. file_description_metadata = {};
  472. });
  473. int main_program_fd = -1;
  474. if (interpreter_description) {
  475. auto main_program_fd_wrapper = m_fds.allocate().release_value();
  476. VERIFY(main_program_fd_wrapper.fd >= 0);
  477. auto seek_result = main_program_description->seek(0, SEEK_SET);
  478. VERIFY(!seek_result.is_error());
  479. main_program_description->set_readable(true);
  480. m_fds[main_program_fd_wrapper.fd].set(move(main_program_description), FD_CLOEXEC);
  481. main_program_fd = main_program_fd_wrapper.fd;
  482. }
  483. new_main_thread = nullptr;
  484. if (&current_thread->process() == this) {
  485. new_main_thread = current_thread;
  486. } else {
  487. for_each_thread([&](auto& thread) {
  488. new_main_thread = &thread;
  489. return IterationDecision::Break;
  490. });
  491. }
  492. VERIFY(new_main_thread);
  493. auto auxv = generate_auxiliary_vector(load_result.load_base, load_result.entry_eip, uid(), euid(), gid(), egid(), path, main_program_fd);
  494. // NOTE: We create the new stack before disabling interrupts since it will zero-fault
  495. // and we don't want to deal with faults after this point.
  496. auto make_stack_result = make_userspace_context_for_main_thread(new_main_thread->regs(), *load_result.stack_region.unsafe_ptr(), move(arguments), move(environment), move(auxv));
  497. if (make_stack_result.is_error())
  498. return make_stack_result.error();
  499. FlatPtr new_userspace_sp = make_stack_result.value();
  500. if (wait_for_tracer_at_next_execve()) {
  501. // Make sure we release the ptrace lock here or the tracer will block forever.
  502. ptrace_locker.unlock();
  503. Thread::current()->send_urgent_signal_to_self(SIGSTOP);
  504. } else {
  505. // Unlock regardless before disabling interrupts.
  506. // Ensure we always unlock after checking ptrace status to avoid TOCTOU ptrace issues
  507. ptrace_locker.unlock();
  508. }
  509. // We enter a critical section here because we don't want to get interrupted between do_exec()
  510. // and Processor::assume_context() or the next context switch.
  511. // If we used an InterruptDisabler that sti()'d on exit, we might timer tick'd too soon in exec().
  512. Processor::enter_critical();
  513. prev_flags = cpu_flags();
  514. cli();
  515. // NOTE: Be careful to not trigger any page faults below!
  516. m_name = parts.take_last();
  517. new_main_thread->set_name(KString::try_create(m_name));
  518. {
  519. ProtectedDataMutationScope scope { *this };
  520. m_protected_values.promises = m_protected_values.execpromises.load();
  521. m_protected_values.has_promises = m_protected_values.has_execpromises.load();
  522. m_protected_values.execpromises = 0;
  523. m_protected_values.has_execpromises = false;
  524. m_protected_values.signal_trampoline = signal_trampoline_region.value()->vaddr();
  525. // FIXME: PID/TID ISSUE
  526. m_protected_values.pid = new_main_thread->tid().value();
  527. }
  528. auto tsr_result = new_main_thread->make_thread_specific_region({});
  529. if (tsr_result.is_error()) {
  530. // FIXME: We cannot fail this late. Refactor this so the allocation happens before we commit to the new executable.
  531. VERIFY_NOT_REACHED();
  532. }
  533. new_main_thread->reset_fpu_state();
  534. auto& regs = new_main_thread->m_regs;
  535. #if ARCH(I386)
  536. regs.cs = GDT_SELECTOR_CODE3 | 3;
  537. regs.ds = GDT_SELECTOR_DATA3 | 3;
  538. regs.es = GDT_SELECTOR_DATA3 | 3;
  539. regs.ss = GDT_SELECTOR_DATA3 | 3;
  540. regs.fs = GDT_SELECTOR_DATA3 | 3;
  541. regs.gs = GDT_SELECTOR_TLS | 3;
  542. regs.eip = load_result.entry_eip;
  543. regs.esp = new_userspace_sp;
  544. #else
  545. regs.rip = load_result.entry_eip;
  546. regs.rsp = new_userspace_sp;
  547. #endif
  548. regs.cr3 = address_space().page_directory().cr3();
  549. {
  550. TemporaryChange profiling_disabler(m_profiling, was_profiling);
  551. PerformanceManager::add_process_exec_event(*this);
  552. }
  553. {
  554. SpinlockLocker lock(g_scheduler_lock);
  555. new_main_thread->set_state(Thread::State::Runnable);
  556. }
  557. u32 lock_count_to_restore;
  558. [[maybe_unused]] auto rc = big_lock().force_unlock_if_locked(lock_count_to_restore);
  559. VERIFY_INTERRUPTS_DISABLED();
  560. VERIFY(Processor::in_critical());
  561. return KSuccess;
  562. }
  563. static Vector<ELF::AuxiliaryValue> generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, UserID uid, UserID euid, GroupID gid, GroupID egid, String executable_path, int main_program_fd)
  564. {
  565. Vector<ELF::AuxiliaryValue> auxv;
  566. // PHDR/EXECFD
  567. // PH*
  568. auxv.append({ ELF::AuxiliaryValue::PageSize, PAGE_SIZE });
  569. auxv.append({ ELF::AuxiliaryValue::BaseAddress, (void*)load_base });
  570. auxv.append({ ELF::AuxiliaryValue::Entry, (void*)entry_eip });
  571. // NOTELF
  572. auxv.append({ ELF::AuxiliaryValue::Uid, (long)uid.value() });
  573. auxv.append({ ELF::AuxiliaryValue::EUid, (long)euid.value() });
  574. auxv.append({ ELF::AuxiliaryValue::Gid, (long)gid.value() });
  575. auxv.append({ ELF::AuxiliaryValue::EGid, (long)egid.value() });
  576. auxv.append({ ELF::AuxiliaryValue::Platform, Processor::platform_string() });
  577. // FIXME: This is platform specific
  578. auxv.append({ ELF::AuxiliaryValue::HwCap, (long)CPUID(1).edx() });
  579. auxv.append({ ELF::AuxiliaryValue::ClockTick, (long)TimeManagement::the().ticks_per_second() });
  580. // FIXME: Also take into account things like extended filesystem permissions? That's what linux does...
  581. auxv.append({ ELF::AuxiliaryValue::Secure, ((uid != euid) || (gid != egid)) ? 1 : 0 });
  582. char random_bytes[16] {};
  583. get_fast_random_bytes({ (u8*)random_bytes, sizeof(random_bytes) });
  584. auxv.append({ ELF::AuxiliaryValue::Random, String(random_bytes, sizeof(random_bytes)) });
  585. auxv.append({ ELF::AuxiliaryValue::ExecFilename, executable_path });
  586. auxv.append({ ELF::AuxiliaryValue::ExecFileDescriptor, main_program_fd });
  587. auxv.append({ ELF::AuxiliaryValue::Null, 0L });
  588. return auxv;
  589. }
  590. static KResultOr<Vector<String>> find_shebang_interpreter_for_executable(const char first_page[], int nread)
  591. {
  592. int word_start = 2;
  593. int word_length = 0;
  594. if (nread > 2 && first_page[0] == '#' && first_page[1] == '!') {
  595. Vector<String> interpreter_words;
  596. for (int i = 2; i < nread; ++i) {
  597. if (first_page[i] == '\n') {
  598. break;
  599. }
  600. if (first_page[i] != ' ') {
  601. ++word_length;
  602. }
  603. if (first_page[i] == ' ') {
  604. if (word_length > 0) {
  605. interpreter_words.append(String(&first_page[word_start], word_length));
  606. }
  607. word_length = 0;
  608. word_start = i + 1;
  609. }
  610. }
  611. if (word_length > 0)
  612. interpreter_words.append(String(&first_page[word_start], word_length));
  613. if (!interpreter_words.is_empty())
  614. return interpreter_words;
  615. }
  616. return ENOEXEC;
  617. }
  618. KResultOr<RefPtr<FileDescription>> Process::find_elf_interpreter_for_executable(const String& path, const ElfW(Ehdr) & main_program_header, int nread, size_t file_size)
  619. {
  620. // Not using KResultOr here because we'll want to do the same thing in userspace in the RTLD
  621. String interpreter_path;
  622. if (!ELF::validate_program_headers(main_program_header, file_size, (const u8*)&main_program_header, nread, &interpreter_path)) {
  623. dbgln("exec({}): File has invalid ELF Program headers", path);
  624. return ENOEXEC;
  625. }
  626. if (!interpreter_path.is_empty()) {
  627. dbgln_if(EXEC_DEBUG, "exec({}): Using program interpreter {}", path, interpreter_path);
  628. auto interp_result = VirtualFileSystem::the().open(interpreter_path, O_EXEC, 0, current_directory());
  629. if (interp_result.is_error()) {
  630. dbgln("exec({}): Unable to open program interpreter {}", path, interpreter_path);
  631. return interp_result.error();
  632. }
  633. auto interpreter_description = interp_result.value();
  634. auto interp_metadata = interpreter_description->metadata();
  635. VERIFY(interpreter_description->inode());
  636. // Validate the program interpreter as a valid elf binary.
  637. // If your program interpreter is a #! file or something, it's time to stop playing games :)
  638. if (interp_metadata.size < (int)sizeof(ElfW(Ehdr)))
  639. return ENOEXEC;
  640. char first_page[PAGE_SIZE] = {};
  641. auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page);
  642. auto nread_or_error = interpreter_description->read(first_page_buffer, sizeof(first_page));
  643. if (nread_or_error.is_error())
  644. return ENOEXEC;
  645. nread = nread_or_error.value();
  646. if (nread < (int)sizeof(ElfW(Ehdr)))
  647. return ENOEXEC;
  648. auto elf_header = (ElfW(Ehdr)*)first_page;
  649. if (!ELF::validate_elf_header(*elf_header, interp_metadata.size)) {
  650. dbgln("exec({}): Interpreter ({}) has invalid ELF header", path, interpreter_description->absolute_path());
  651. return ENOEXEC;
  652. }
  653. // Not using KResultOr here because we'll want to do the same thing in userspace in the RTLD
  654. String interpreter_interpreter_path;
  655. if (!ELF::validate_program_headers(*elf_header, interp_metadata.size, (u8*)first_page, nread, &interpreter_interpreter_path)) {
  656. dbgln("exec({}): Interpreter ({}) has invalid ELF Program headers", path, interpreter_description->absolute_path());
  657. return ENOEXEC;
  658. }
  659. if (!interpreter_interpreter_path.is_empty()) {
  660. dbgln("exec({}): Interpreter ({}) has its own interpreter ({})! No thank you!", path, interpreter_description->absolute_path(), interpreter_interpreter_path);
  661. return ELOOP;
  662. }
  663. return interpreter_description;
  664. }
  665. if (main_program_header.e_type == ET_REL) {
  666. // We can't exec an ET_REL, that's just an object file from the compiler
  667. return ENOEXEC;
  668. }
  669. if (main_program_header.e_type == ET_DYN) {
  670. // If it's ET_DYN with no PT_INTERP, then it's a dynamic executable responsible
  671. // for its own relocation (i.e. it's /usr/lib/Loader.so)
  672. if (path != "/usr/lib/Loader.so")
  673. dbgln("exec({}): WARNING - Dynamic ELF executable without a PT_INTERP header, and isn't /usr/lib/Loader.so", path);
  674. return nullptr;
  675. }
  676. // No interpreter, but, path refers to a valid elf image
  677. return KResult(KSuccess);
  678. }
  679. KResult Process::exec(String path, Vector<String> arguments, Vector<String> environment, int recursion_depth)
  680. {
  681. if (recursion_depth > 2) {
  682. dbgln("exec({}): SHENANIGANS! recursed too far trying to find #! interpreter", path);
  683. return ELOOP;
  684. }
  685. // Open the file to check what kind of binary format it is
  686. // Currently supported formats:
  687. // - #! interpreted file
  688. // - ELF32
  689. // * ET_EXEC binary that just gets loaded
  690. // * ET_DYN binary that requires a program interpreter
  691. //
  692. auto description = TRY(VirtualFileSystem::the().open(path, O_EXEC, 0, current_directory()));
  693. auto metadata = description->metadata();
  694. if (!metadata.is_regular_file())
  695. return EACCES;
  696. // Always gonna need at least 3 bytes. these are for #!X
  697. if (metadata.size < 3)
  698. return ENOEXEC;
  699. VERIFY(description->inode());
  700. // Read the first page of the program into memory so we can validate the binfmt of it
  701. char first_page[PAGE_SIZE];
  702. auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page);
  703. auto nread_or_error = description->read(first_page_buffer, sizeof(first_page));
  704. if (nread_or_error.is_error())
  705. return ENOEXEC;
  706. // 1) #! interpreted file
  707. auto shebang_result = find_shebang_interpreter_for_executable(first_page, nread_or_error.value());
  708. if (!shebang_result.is_error()) {
  709. auto shebang_words = shebang_result.release_value();
  710. auto shebang_path = shebang_words.first();
  711. arguments[0] = move(path);
  712. if (!arguments.try_prepend(move(shebang_words)))
  713. return ENOMEM;
  714. return exec(move(shebang_path), move(arguments), move(environment), ++recursion_depth);
  715. }
  716. // #2) ELF32 for i386
  717. if (nread_or_error.value() < (int)sizeof(ElfW(Ehdr)))
  718. return ENOEXEC;
  719. auto main_program_header = (ElfW(Ehdr)*)first_page;
  720. if (!ELF::validate_elf_header(*main_program_header, metadata.size)) {
  721. dbgln("exec({}): File has invalid ELF header", path);
  722. return ENOEXEC;
  723. }
  724. auto elf_result = find_elf_interpreter_for_executable(path, *main_program_header, nread_or_error.value(), metadata.size);
  725. // Assume a static ELF executable by default
  726. RefPtr<FileDescription> interpreter_description;
  727. // We're getting either an interpreter, an error, or KSuccess (i.e. no interpreter but file checks out)
  728. if (!elf_result.is_error()) {
  729. // It's a dynamic ELF executable, with or without an interpreter. Do not allocate TLS
  730. interpreter_description = elf_result.value();
  731. } else if (elf_result.error().is_error())
  732. return elf_result.error();
  733. // The bulk of exec() is done by do_exec(), which ensures that all locals
  734. // are cleaned up by the time we yield-teleport below.
  735. Thread* new_main_thread = nullptr;
  736. u32 prev_flags = 0;
  737. auto result = do_exec(move(description), move(arguments), move(environment), move(interpreter_description), new_main_thread, prev_flags, *main_program_header);
  738. if (result.is_error())
  739. return result;
  740. VERIFY_INTERRUPTS_DISABLED();
  741. VERIFY(Processor::in_critical());
  742. auto current_thread = Thread::current();
  743. if (current_thread == new_main_thread) {
  744. // We need to enter the scheduler lock before changing the state
  745. // and it will be released after the context switch into that
  746. // thread. We should also still be in our critical section
  747. VERIFY(!g_scheduler_lock.is_locked_by_current_processor());
  748. VERIFY(Processor::in_critical() == 1);
  749. g_scheduler_lock.lock();
  750. current_thread->set_state(Thread::State::Running);
  751. Processor::assume_context(*current_thread, prev_flags);
  752. VERIFY_NOT_REACHED();
  753. }
  754. if (prev_flags & 0x200)
  755. sti();
  756. Processor::leave_critical();
  757. return KSuccess;
  758. }
  759. KResultOr<FlatPtr> Process::sys$execve(Userspace<const Syscall::SC_execve_params*> user_params)
  760. {
  761. VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this);
  762. REQUIRE_PROMISE(exec);
  763. // NOTE: Be extremely careful with allocating any kernel memory in exec().
  764. // On success, the kernel stack will be lost.
  765. Syscall::SC_execve_params params;
  766. if (!copy_from_user(&params, user_params))
  767. return EFAULT;
  768. if (params.arguments.length > ARG_MAX || params.environment.length > ARG_MAX)
  769. return E2BIG;
  770. String path;
  771. {
  772. auto path_arg = get_syscall_path_argument(params.path);
  773. if (path_arg.is_error())
  774. return path_arg.error();
  775. path = path_arg.value()->view();
  776. }
  777. auto copy_user_strings = [](const auto& list, auto& output) {
  778. if (!list.length)
  779. return true;
  780. Checked<size_t> size = sizeof(*list.strings);
  781. size *= list.length;
  782. if (size.has_overflow())
  783. return false;
  784. Vector<Syscall::StringArgument, 32> strings;
  785. if (!strings.try_resize(list.length))
  786. return false;
  787. if (!copy_from_user(strings.data(), list.strings, size.value()))
  788. return false;
  789. for (size_t i = 0; i < list.length; ++i) {
  790. auto string_or_error = try_copy_kstring_from_user(strings[i]);
  791. if (string_or_error.is_error()) {
  792. // FIXME: Propagate the error.
  793. return false;
  794. }
  795. // FIXME: Don't convert to String here, use KString all the way.
  796. auto string = String(string_or_error.value()->view());
  797. if (!output.try_append(move(string)))
  798. return false;
  799. }
  800. return true;
  801. };
  802. Vector<String> arguments;
  803. if (!copy_user_strings(params.arguments, arguments))
  804. return EFAULT;
  805. Vector<String> environment;
  806. if (!copy_user_strings(params.environment, environment))
  807. return EFAULT;
  808. auto result = exec(move(path), move(arguments), move(environment));
  809. VERIFY(result.is_error()); // We should never continue after a successful exec!
  810. return result.error();
  811. }
  812. }