execve.cpp 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/LexicalPath.h>
  7. #include <AK/ScopeGuard.h>
  8. #include <AK/TemporaryChange.h>
  9. #include <AK/WeakPtr.h>
  10. #include <Kernel/Debug.h>
  11. #include <Kernel/FileSystem/Custody.h>
  12. #include <Kernel/FileSystem/FileDescription.h>
  13. #include <Kernel/Panic.h>
  14. #include <Kernel/PerformanceManager.h>
  15. #include <Kernel/Process.h>
  16. #include <Kernel/Random.h>
  17. #include <Kernel/Time/TimeManagement.h>
  18. #include <Kernel/VM/AllocationStrategy.h>
  19. #include <Kernel/VM/MemoryManager.h>
  20. #include <Kernel/VM/PageDirectory.h>
  21. #include <Kernel/VM/Region.h>
  22. #include <Kernel/VM/SharedInodeVMObject.h>
  23. #include <LibC/limits.h>
  24. #include <LibELF/AuxiliaryVector.h>
  25. #include <LibELF/Image.h>
  26. #include <LibELF/Validation.h>
  27. namespace Kernel {
  28. extern Region* g_signal_trampoline_region;
  29. struct LoadResult {
  30. OwnPtr<Space> space;
  31. FlatPtr load_base { 0 };
  32. FlatPtr entry_eip { 0 };
  33. size_t size { 0 };
  34. WeakPtr<Region> tls_region;
  35. size_t tls_size { 0 };
  36. size_t tls_alignment { 0 };
  37. WeakPtr<Region> stack_region;
  38. };
  39. static Vector<ELF::AuxiliaryValue> generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, uid_t uid, uid_t euid, gid_t gid, gid_t egid, String executable_path, int main_program_fd);
  40. static bool validate_stack_size(const Vector<String>& arguments, const Vector<String>& environment)
  41. {
  42. size_t total_arguments_size = 0;
  43. size_t total_environment_size = 0;
  44. for (auto& a : arguments)
  45. total_arguments_size += a.length() + 1;
  46. for (auto& e : environment)
  47. total_environment_size += e.length() + 1;
  48. total_arguments_size += sizeof(char*) * (arguments.size() + 1);
  49. total_environment_size += sizeof(char*) * (environment.size() + 1);
  50. static constexpr size_t max_arguments_size = Thread::default_userspace_stack_size / 8;
  51. static constexpr size_t max_environment_size = Thread::default_userspace_stack_size / 8;
  52. if (total_arguments_size > max_arguments_size)
  53. return false;
  54. if (total_environment_size > max_environment_size)
  55. return false;
  56. // FIXME: This doesn't account for the size of the auxiliary vector
  57. return true;
  58. }
  59. static KResultOr<FlatPtr> make_userspace_context_for_main_thread([[maybe_unused]] ThreadRegisters& regs, Region& region, Vector<String> arguments,
  60. Vector<String> environment, Vector<ELF::AuxiliaryValue> auxiliary_values)
  61. {
  62. FlatPtr new_sp = region.range().end().get();
  63. // Add some bits of randomness to the user stack pointer.
  64. new_sp -= round_up_to_power_of_two(get_fast_random<u32>() % 4096, 16);
  65. auto push_on_new_stack = [&new_sp](FlatPtr value) {
  66. new_sp -= sizeof(FlatPtr);
  67. Userspace<FlatPtr*> stack_ptr = new_sp;
  68. return copy_to_user(stack_ptr, &value);
  69. };
  70. auto push_aux_value_on_new_stack = [&new_sp](auxv_t value) {
  71. new_sp -= sizeof(auxv_t);
  72. Userspace<auxv_t*> stack_ptr = new_sp;
  73. return copy_to_user(stack_ptr, &value);
  74. };
  75. auto push_string_on_new_stack = [&new_sp](const String& string) {
  76. new_sp -= round_up_to_power_of_two(string.length() + 1, sizeof(FlatPtr));
  77. Userspace<FlatPtr*> stack_ptr = new_sp;
  78. return copy_to_user(stack_ptr, string.characters(), string.length() + 1);
  79. };
  80. Vector<FlatPtr> argv_entries;
  81. for (auto& argument : arguments) {
  82. push_string_on_new_stack(argument);
  83. if (!argv_entries.try_append(new_sp))
  84. return ENOMEM;
  85. }
  86. Vector<FlatPtr> env_entries;
  87. for (auto& variable : environment) {
  88. push_string_on_new_stack(variable);
  89. if (!env_entries.try_append(new_sp))
  90. return ENOMEM;
  91. }
  92. for (auto& value : auxiliary_values) {
  93. if (!value.optional_string.is_empty()) {
  94. push_string_on_new_stack(value.optional_string);
  95. value.auxv.a_un.a_ptr = (void*)new_sp;
  96. }
  97. }
  98. for (ssize_t i = auxiliary_values.size() - 1; i >= 0; --i) {
  99. auto& value = auxiliary_values[i];
  100. push_aux_value_on_new_stack(value.auxv);
  101. }
  102. push_on_new_stack(0);
  103. for (ssize_t i = env_entries.size() - 1; i >= 0; --i)
  104. push_on_new_stack(env_entries[i]);
  105. FlatPtr envp = new_sp;
  106. push_on_new_stack(0);
  107. for (ssize_t i = argv_entries.size() - 1; i >= 0; --i)
  108. push_on_new_stack(argv_entries[i]);
  109. FlatPtr argv = new_sp;
  110. // NOTE: The stack needs to be 16-byte aligned.
  111. new_sp -= new_sp % 16;
  112. #if ARCH(I386)
  113. // GCC assumes that the return address has been pushed to the stack when it enters the function,
  114. // so we need to reserve an extra pointer's worth of bytes below this to make GCC's stack alignment
  115. // calculations work
  116. new_sp -= sizeof(void*);
  117. push_on_new_stack(envp);
  118. push_on_new_stack(argv);
  119. push_on_new_stack(argv_entries.size());
  120. #else
  121. regs.rdi = argv;
  122. regs.rsi = argv_entries.size();
  123. regs.rdx = envp;
  124. #endif
  125. push_on_new_stack(0); // return address
  126. VERIFY((new_sp + sizeof(void*)) % 16 == 0);
  127. return new_sp;
  128. }
  129. struct RequiredLoadRange {
  130. FlatPtr start { 0 };
  131. FlatPtr end { 0 };
  132. };
  133. static KResultOr<RequiredLoadRange> get_required_load_range(FileDescription& program_description)
  134. {
  135. auto& inode = *(program_description.inode());
  136. auto vmobject = SharedInodeVMObject::create_with_inode(inode);
  137. size_t executable_size = inode.size();
  138. auto region = MM.allocate_kernel_region_with_vmobject(*vmobject, page_round_up(executable_size), "ELF memory range calculation", Region::Access::Read);
  139. if (!region) {
  140. dbgln("Could not allocate memory for ELF");
  141. return ENOMEM;
  142. }
  143. auto elf_image = ELF::Image(region->vaddr().as_ptr(), executable_size);
  144. if (!elf_image.is_valid()) {
  145. return EINVAL;
  146. }
  147. RequiredLoadRange range {};
  148. elf_image.for_each_program_header([&range](const auto& pheader) {
  149. if (pheader.type() != PT_LOAD)
  150. return;
  151. auto region_start = (FlatPtr)pheader.vaddr().as_ptr();
  152. auto region_end = region_start + pheader.size_in_memory();
  153. if (range.start == 0 || region_start < range.start)
  154. range.start = region_start;
  155. if (range.end == 0 || region_end > range.end)
  156. range.end = region_end;
  157. });
  158. VERIFY(range.end > range.start);
  159. return range;
  160. };
  161. static KResultOr<FlatPtr> get_load_offset(const ElfW(Ehdr) & main_program_header, FileDescription& main_program_description, FileDescription* interpreter_description)
  162. {
  163. constexpr FlatPtr load_range_start = 0x08000000;
  164. constexpr FlatPtr load_range_size = 65536 * PAGE_SIZE; // 2**16 * PAGE_SIZE = 256MB
  165. constexpr FlatPtr minimum_load_offset_randomization_size = 10 * MiB;
  166. auto random_load_offset_in_range([](auto start, auto size) {
  167. return page_round_down(start + get_good_random<FlatPtr>() % size);
  168. });
  169. if (main_program_header.e_type == ET_DYN) {
  170. return random_load_offset_in_range(load_range_start, load_range_size);
  171. }
  172. if (main_program_header.e_type != ET_EXEC)
  173. return EINVAL;
  174. auto main_program_load_range_result = get_required_load_range(main_program_description);
  175. if (main_program_load_range_result.is_error())
  176. return main_program_load_range_result.error();
  177. auto main_program_load_range = main_program_load_range_result.value();
  178. RequiredLoadRange selected_range {};
  179. if (interpreter_description) {
  180. auto interpreter_load_range_result = get_required_load_range(*interpreter_description);
  181. if (interpreter_load_range_result.is_error())
  182. return interpreter_load_range_result.error();
  183. auto interpreter_size_in_memory = interpreter_load_range_result.value().end - interpreter_load_range_result.value().start;
  184. auto interpreter_load_range_end = load_range_start + load_range_size - interpreter_size_in_memory;
  185. // No intersection
  186. if (main_program_load_range.end < load_range_start || main_program_load_range.start > interpreter_load_range_end)
  187. return random_load_offset_in_range(load_range_start, load_range_size);
  188. RequiredLoadRange first_available_part = { load_range_start, main_program_load_range.start };
  189. RequiredLoadRange second_available_part = { main_program_load_range.end, interpreter_load_range_end };
  190. // Select larger part
  191. if (first_available_part.end - first_available_part.start > second_available_part.end - second_available_part.start)
  192. selected_range = first_available_part;
  193. else
  194. selected_range = second_available_part;
  195. } else
  196. selected_range = main_program_load_range;
  197. // If main program is too big and leaves us without enough space for adequate loader randomization
  198. if (selected_range.end - selected_range.start < minimum_load_offset_randomization_size)
  199. return E2BIG;
  200. return random_load_offset_in_range(selected_range.start, selected_range.end - selected_range.start);
  201. }
  202. enum class ShouldAllocateTls {
  203. No,
  204. Yes,
  205. };
  206. enum class ShouldAllowSyscalls {
  207. No,
  208. Yes,
  209. };
  210. static KResultOr<LoadResult> load_elf_object(NonnullOwnPtr<Space> new_space, FileDescription& object_description,
  211. FlatPtr load_offset, ShouldAllocateTls should_allocate_tls, ShouldAllowSyscalls should_allow_syscalls)
  212. {
  213. auto& inode = *(object_description.inode());
  214. auto vmobject = SharedInodeVMObject::create_with_inode(inode);
  215. if (vmobject->writable_mappings()) {
  216. dbgln("Refusing to execute a write-mapped program");
  217. return ETXTBSY;
  218. }
  219. size_t executable_size = inode.size();
  220. auto executable_region = MM.allocate_kernel_region_with_vmobject(*vmobject, page_round_up(executable_size), "ELF loading", Region::Access::Read);
  221. if (!executable_region) {
  222. dbgln("Could not allocate memory for ELF loading");
  223. return ENOMEM;
  224. }
  225. auto elf_image = ELF::Image(executable_region->vaddr().as_ptr(), executable_size);
  226. if (!elf_image.is_valid())
  227. return ENOEXEC;
  228. Region* master_tls_region { nullptr };
  229. size_t master_tls_size = 0;
  230. size_t master_tls_alignment = 0;
  231. FlatPtr load_base_address = 0;
  232. String elf_name = object_description.absolute_path();
  233. VERIFY(!Processor::current().in_critical());
  234. MemoryManager::enter_space(*new_space);
  235. KResult ph_load_result = KSuccess;
  236. elf_image.for_each_program_header([&](const ELF::Image::ProgramHeader& program_header) {
  237. if (program_header.type() == PT_TLS) {
  238. VERIFY(should_allocate_tls == ShouldAllocateTls::Yes);
  239. VERIFY(program_header.size_in_memory());
  240. if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) {
  241. dbgln("Shenanigans! ELF PT_TLS header sneaks outside of executable.");
  242. ph_load_result = ENOEXEC;
  243. return IterationDecision::Break;
  244. }
  245. auto range = new_space->allocate_range({}, program_header.size_in_memory());
  246. if (!range.has_value()) {
  247. ph_load_result = ENOMEM;
  248. return IterationDecision::Break;
  249. }
  250. auto region_or_error = new_space->allocate_region(range.value(), String::formatted("{} (master-tls)", elf_name), PROT_READ | PROT_WRITE, AllocationStrategy::Reserve);
  251. if (region_or_error.is_error()) {
  252. ph_load_result = region_or_error.error();
  253. return IterationDecision::Break;
  254. }
  255. master_tls_region = region_or_error.value();
  256. master_tls_size = program_header.size_in_memory();
  257. master_tls_alignment = program_header.alignment();
  258. if (!copy_to_user(master_tls_region->vaddr().as_ptr(), program_header.raw_data(), program_header.size_in_image())) {
  259. ph_load_result = EFAULT;
  260. return IterationDecision::Break;
  261. }
  262. return IterationDecision::Continue;
  263. }
  264. if (program_header.type() != PT_LOAD)
  265. return IterationDecision::Continue;
  266. if (program_header.is_writable()) {
  267. // Writable section: create a copy in memory.
  268. VERIFY(program_header.size_in_memory());
  269. VERIFY(program_header.alignment() == PAGE_SIZE);
  270. if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) {
  271. dbgln("Shenanigans! Writable ELF PT_LOAD header sneaks outside of executable.");
  272. ph_load_result = ENOEXEC;
  273. return IterationDecision::Break;
  274. }
  275. int prot = 0;
  276. if (program_header.is_readable())
  277. prot |= PROT_READ;
  278. if (program_header.is_writable())
  279. prot |= PROT_WRITE;
  280. auto region_name = String::formatted("{} (data-{}{})", elf_name, program_header.is_readable() ? "r" : "", program_header.is_writable() ? "w" : "");
  281. auto range_base = VirtualAddress { page_round_down(program_header.vaddr().offset(load_offset).get()) };
  282. auto range_end = VirtualAddress { page_round_up(program_header.vaddr().offset(load_offset).offset(program_header.size_in_memory()).get()) };
  283. auto range = new_space->allocate_range(range_base, range_end.get() - range_base.get());
  284. if (!range.has_value()) {
  285. ph_load_result = ENOMEM;
  286. return IterationDecision::Break;
  287. }
  288. auto region_or_error = new_space->allocate_region(range.value(), region_name, prot, AllocationStrategy::Reserve);
  289. if (region_or_error.is_error()) {
  290. ph_load_result = region_or_error.error();
  291. return IterationDecision::Break;
  292. }
  293. // It's not always the case with PIE executables (and very well shouldn't be) that the
  294. // virtual address in the program header matches the one we end up giving the process.
  295. // In order to copy the data image correctly into memory, we need to copy the data starting at
  296. // the right initial page offset into the pages allocated for the elf_alloc-XX section.
  297. // FIXME: There's an opportunity to munmap, or at least mprotect, the padding space between
  298. // the .text and .data PT_LOAD sections of the executable.
  299. // Accessing it would definitely be a bug.
  300. auto page_offset = program_header.vaddr();
  301. page_offset.mask(~PAGE_MASK);
  302. if (!copy_to_user((u8*)region_or_error.value()->vaddr().as_ptr() + page_offset.get(), program_header.raw_data(), program_header.size_in_image())) {
  303. ph_load_result = EFAULT;
  304. return IterationDecision::Break;
  305. }
  306. return IterationDecision::Continue;
  307. }
  308. // Non-writable section: map the executable itself in memory.
  309. VERIFY(program_header.size_in_memory());
  310. VERIFY(program_header.alignment() == PAGE_SIZE);
  311. int prot = 0;
  312. if (program_header.is_readable())
  313. prot |= PROT_READ;
  314. if (program_header.is_writable())
  315. prot |= PROT_WRITE;
  316. if (program_header.is_executable())
  317. prot |= PROT_EXEC;
  318. auto range = new_space->allocate_range(program_header.vaddr().offset(load_offset), program_header.size_in_memory());
  319. if (!range.has_value()) {
  320. ph_load_result = ENOMEM;
  321. return IterationDecision::Break;
  322. }
  323. auto region_or_error = new_space->allocate_region_with_vmobject(range.value(), *vmobject, program_header.offset(), elf_name, prot, true);
  324. if (region_or_error.is_error()) {
  325. ph_load_result = region_or_error.error();
  326. return IterationDecision::Break;
  327. }
  328. if (should_allow_syscalls == ShouldAllowSyscalls::Yes)
  329. region_or_error.value()->set_syscall_region(true);
  330. if (program_header.offset() == 0)
  331. load_base_address = (FlatPtr)region_or_error.value()->vaddr().as_ptr();
  332. return IterationDecision::Continue;
  333. });
  334. if (ph_load_result.is_error()) {
  335. dbgln("do_exec: Failure loading program ({})", ph_load_result.error());
  336. return ph_load_result;
  337. }
  338. if (!elf_image.entry().offset(load_offset).get()) {
  339. dbgln("do_exec: Failure loading program, entry pointer is invalid! {})", elf_image.entry().offset(load_offset));
  340. return ENOEXEC;
  341. }
  342. auto stack_range = new_space->allocate_range({}, Thread::default_userspace_stack_size);
  343. if (!stack_range.has_value()) {
  344. dbgln("do_exec: Failed to allocate VM range for stack");
  345. return ENOMEM;
  346. }
  347. auto stack_region_or_error = new_space->allocate_region(stack_range.value(), "Stack (Main thread)", PROT_READ | PROT_WRITE, AllocationStrategy::Reserve);
  348. if (stack_region_or_error.is_error())
  349. return stack_region_or_error.error();
  350. auto& stack_region = *stack_region_or_error.value();
  351. stack_region.set_stack(true);
  352. return LoadResult {
  353. move(new_space),
  354. load_base_address,
  355. elf_image.entry().offset(load_offset).get(),
  356. executable_size,
  357. AK::try_make_weak_ptr(master_tls_region),
  358. master_tls_size,
  359. master_tls_alignment,
  360. stack_region.make_weak_ptr()
  361. };
  362. }
  363. KResultOr<LoadResult> Process::load(NonnullRefPtr<FileDescription> main_program_description,
  364. RefPtr<FileDescription> interpreter_description, const ElfW(Ehdr) & main_program_header)
  365. {
  366. auto new_space = Space::create(*this, nullptr);
  367. if (!new_space)
  368. return ENOMEM;
  369. ScopeGuard space_guard([&]() {
  370. MemoryManager::enter_process_paging_scope(*this);
  371. });
  372. auto load_offset = get_load_offset(main_program_header, main_program_description, interpreter_description);
  373. if (load_offset.is_error()) {
  374. return load_offset.error();
  375. }
  376. if (interpreter_description.is_null()) {
  377. auto result = load_elf_object(new_space.release_nonnull(), main_program_description, load_offset.value(), ShouldAllocateTls::Yes, ShouldAllowSyscalls::No);
  378. if (result.is_error())
  379. return result.error();
  380. m_master_tls_region = result.value().tls_region;
  381. m_master_tls_size = result.value().tls_size;
  382. m_master_tls_alignment = result.value().tls_alignment;
  383. return result;
  384. }
  385. auto interpreter_load_result = load_elf_object(new_space.release_nonnull(), *interpreter_description, load_offset.value(), ShouldAllocateTls::No, ShouldAllowSyscalls::Yes);
  386. if (interpreter_load_result.is_error())
  387. return interpreter_load_result.error();
  388. // TLS allocation will be done in userspace by the loader
  389. VERIFY(!interpreter_load_result.value().tls_region);
  390. VERIFY(!interpreter_load_result.value().tls_alignment);
  391. VERIFY(!interpreter_load_result.value().tls_size);
  392. return interpreter_load_result;
  393. }
  394. KResult Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Vector<String> arguments, Vector<String> environment,
  395. RefPtr<FileDescription> interpreter_description, Thread*& new_main_thread, u32& prev_flags, const ElfW(Ehdr) & main_program_header)
  396. {
  397. VERIFY(is_user_process());
  398. VERIFY(!Processor::current().in_critical());
  399. auto path = main_program_description->absolute_path();
  400. dbgln_if(EXEC_DEBUG, "do_exec: {}", path);
  401. // FIXME: How much stack space does process startup need?
  402. if (!validate_stack_size(arguments, environment))
  403. return E2BIG;
  404. auto parts = path.split('/');
  405. if (parts.is_empty())
  406. return ENOENT;
  407. auto main_program_metadata = main_program_description->metadata();
  408. auto load_result_or_error = load(main_program_description, interpreter_description, main_program_header);
  409. if (load_result_or_error.is_error()) {
  410. dbgln("do_exec: Failed to load main program or interpreter for {}", path);
  411. return load_result_or_error.error();
  412. }
  413. auto signal_trampoline_range = load_result_or_error.value().space->allocate_range({}, PAGE_SIZE);
  414. if (!signal_trampoline_range.has_value()) {
  415. dbgln("do_exec: Failed to allocate VM for signal trampoline");
  416. return ENOMEM;
  417. }
  418. // We commit to the new executable at this point. There is no turning back!
  419. // Prevent other processes from attaching to us with ptrace while we're doing this.
  420. Locker ptrace_locker(ptrace_lock());
  421. // Disable profiling temporarily in case it's running on this process.
  422. auto was_profiling = m_profiling;
  423. TemporaryChange profiling_disabler(m_profiling, false);
  424. kill_threads_except_self();
  425. auto& load_result = load_result_or_error.value();
  426. bool executable_is_setid = false;
  427. if (!(main_program_description->custody()->mount_flags() & MS_NOSUID)) {
  428. if (main_program_metadata.is_setuid()) {
  429. executable_is_setid = true;
  430. ProtectedDataMutationScope scope { *this };
  431. m_euid = main_program_metadata.uid;
  432. m_suid = main_program_metadata.uid;
  433. }
  434. if (main_program_metadata.is_setgid()) {
  435. executable_is_setid = true;
  436. ProtectedDataMutationScope scope { *this };
  437. m_egid = main_program_metadata.gid;
  438. m_sgid = main_program_metadata.gid;
  439. }
  440. }
  441. set_dumpable(!executable_is_setid);
  442. {
  443. // We must disable global profiling (especially kfree tracing) here because
  444. // we might otherwise end up walking the stack into the process' space that
  445. // is about to be destroyed.
  446. TemporaryChange global_profiling_disabler(g_profiling_all_threads, false);
  447. m_space = load_result.space.release_nonnull();
  448. }
  449. MemoryManager::enter_space(*m_space);
  450. auto signal_trampoline_region = m_space->allocate_region_with_vmobject(signal_trampoline_range.value(), g_signal_trampoline_region->vmobject(), 0, "Signal trampoline", PROT_READ | PROT_EXEC, true);
  451. if (signal_trampoline_region.is_error()) {
  452. VERIFY_NOT_REACHED();
  453. }
  454. signal_trampoline_region.value()->set_syscall_region(true);
  455. m_executable = main_program_description->custody();
  456. m_arguments = arguments;
  457. m_environment = environment;
  458. m_veil_state = VeilState::None;
  459. m_unveiled_paths.clear();
  460. m_unveiled_paths.set_metadata({ "/", UnveilAccess::None, false });
  461. m_coredump_metadata.clear();
  462. auto current_thread = Thread::current();
  463. current_thread->clear_signals();
  464. clear_futex_queues_on_exec();
  465. for (size_t i = 0; i < m_fds.size(); ++i) {
  466. auto& description_and_flags = m_fds[i];
  467. if (description_and_flags.description() && description_and_flags.flags() & FD_CLOEXEC)
  468. description_and_flags = {};
  469. }
  470. int main_program_fd = -1;
  471. if (interpreter_description) {
  472. main_program_fd = alloc_fd();
  473. VERIFY(main_program_fd >= 0);
  474. auto seek_result = main_program_description->seek(0, SEEK_SET);
  475. VERIFY(!seek_result.is_error());
  476. main_program_description->set_readable(true);
  477. m_fds[main_program_fd].set(move(main_program_description), FD_CLOEXEC);
  478. }
  479. new_main_thread = nullptr;
  480. if (&current_thread->process() == this) {
  481. new_main_thread = current_thread;
  482. } else {
  483. for_each_thread([&](auto& thread) {
  484. new_main_thread = &thread;
  485. return IterationDecision::Break;
  486. });
  487. }
  488. VERIFY(new_main_thread);
  489. auto auxv = generate_auxiliary_vector(load_result.load_base, load_result.entry_eip, uid(), euid(), gid(), egid(), path, main_program_fd);
  490. // NOTE: We create the new stack before disabling interrupts since it will zero-fault
  491. // and we don't want to deal with faults after this point.
  492. auto make_stack_result = make_userspace_context_for_main_thread(new_main_thread->regs(), *load_result.stack_region.unsafe_ptr(), move(arguments), move(environment), move(auxv));
  493. if (make_stack_result.is_error())
  494. return make_stack_result.error();
  495. FlatPtr new_userspace_sp = make_stack_result.value();
  496. if (wait_for_tracer_at_next_execve()) {
  497. // Make sure we release the ptrace lock here or the tracer will block forever.
  498. ptrace_locker.unlock();
  499. Thread::current()->send_urgent_signal_to_self(SIGSTOP);
  500. }
  501. // We enter a critical section here because we don't want to get interrupted between do_exec()
  502. // and Processor::assume_context() or the next context switch.
  503. // If we used an InterruptDisabler that sti()'d on exit, we might timer tick'd too soon in exec().
  504. Processor::current().enter_critical(prev_flags);
  505. // NOTE: Be careful to not trigger any page faults below!
  506. m_name = parts.take_last();
  507. new_main_thread->set_name(m_name);
  508. {
  509. ProtectedDataMutationScope scope { *this };
  510. m_promises = m_execpromises;
  511. m_has_promises = m_has_execpromises;
  512. m_execpromises = 0;
  513. m_has_execpromises = false;
  514. m_signal_trampoline = signal_trampoline_region.value()->vaddr();
  515. // FIXME: PID/TID ISSUE
  516. m_pid = new_main_thread->tid().value();
  517. }
  518. auto tsr_result = new_main_thread->make_thread_specific_region({});
  519. if (tsr_result.is_error()) {
  520. // FIXME: We cannot fail this late. Refactor this so the allocation happens before we commit to the new executable.
  521. VERIFY_NOT_REACHED();
  522. }
  523. new_main_thread->reset_fpu_state();
  524. auto& regs = new_main_thread->m_regs;
  525. #if ARCH(I386)
  526. regs.cs = GDT_SELECTOR_CODE3 | 3;
  527. regs.ds = GDT_SELECTOR_DATA3 | 3;
  528. regs.es = GDT_SELECTOR_DATA3 | 3;
  529. regs.ss = GDT_SELECTOR_DATA3 | 3;
  530. regs.fs = GDT_SELECTOR_DATA3 | 3;
  531. regs.gs = GDT_SELECTOR_TLS | 3;
  532. regs.eip = load_result.entry_eip;
  533. regs.esp = new_userspace_sp;
  534. #else
  535. regs.rip = load_result.entry_eip;
  536. regs.rsp = new_userspace_sp;
  537. #endif
  538. regs.cr3 = space().page_directory().cr3();
  539. {
  540. TemporaryChange profiling_disabler(m_profiling, was_profiling);
  541. PerformanceManager::add_process_exec_event(*this);
  542. }
  543. {
  544. ScopedSpinLock lock(g_scheduler_lock);
  545. new_main_thread->set_state(Thread::State::Runnable);
  546. }
  547. u32 lock_count_to_restore;
  548. [[maybe_unused]] auto rc = big_lock().force_unlock_if_locked(lock_count_to_restore);
  549. VERIFY_INTERRUPTS_DISABLED();
  550. VERIFY(Processor::current().in_critical());
  551. return KSuccess;
  552. }
  553. static Vector<ELF::AuxiliaryValue> generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, uid_t uid, uid_t euid, gid_t gid, gid_t egid, String executable_path, int main_program_fd)
  554. {
  555. Vector<ELF::AuxiliaryValue> auxv;
  556. // PHDR/EXECFD
  557. // PH*
  558. auxv.append({ ELF::AuxiliaryValue::PageSize, PAGE_SIZE });
  559. auxv.append({ ELF::AuxiliaryValue::BaseAddress, (void*)load_base });
  560. auxv.append({ ELF::AuxiliaryValue::Entry, (void*)entry_eip });
  561. // NOTELF
  562. auxv.append({ ELF::AuxiliaryValue::Uid, (long)uid });
  563. auxv.append({ ELF::AuxiliaryValue::EUid, (long)euid });
  564. auxv.append({ ELF::AuxiliaryValue::Gid, (long)gid });
  565. auxv.append({ ELF::AuxiliaryValue::EGid, (long)egid });
  566. auxv.append({ ELF::AuxiliaryValue::Platform, Processor::current().platform_string() });
  567. // FIXME: This is platform specific
  568. auxv.append({ ELF::AuxiliaryValue::HwCap, (long)CPUID(1).edx() });
  569. auxv.append({ ELF::AuxiliaryValue::ClockTick, (long)TimeManagement::the().ticks_per_second() });
  570. // FIXME: Also take into account things like extended filesystem permissions? That's what linux does...
  571. auxv.append({ ELF::AuxiliaryValue::Secure, ((uid != euid) || (gid != egid)) ? 1 : 0 });
  572. char random_bytes[16] {};
  573. get_fast_random_bytes((u8*)random_bytes, sizeof(random_bytes));
  574. auxv.append({ ELF::AuxiliaryValue::Random, String(random_bytes, sizeof(random_bytes)) });
  575. auxv.append({ ELF::AuxiliaryValue::ExecFilename, executable_path });
  576. auxv.append({ ELF::AuxiliaryValue::ExecFileDescriptor, main_program_fd });
  577. auxv.append({ ELF::AuxiliaryValue::Null, 0L });
  578. return auxv;
  579. }
  580. static KResultOr<Vector<String>> find_shebang_interpreter_for_executable(const char first_page[], int nread)
  581. {
  582. int word_start = 2;
  583. int word_length = 0;
  584. if (nread > 2 && first_page[0] == '#' && first_page[1] == '!') {
  585. Vector<String> interpreter_words;
  586. for (int i = 2; i < nread; ++i) {
  587. if (first_page[i] == '\n') {
  588. break;
  589. }
  590. if (first_page[i] != ' ') {
  591. ++word_length;
  592. }
  593. if (first_page[i] == ' ') {
  594. if (word_length > 0) {
  595. interpreter_words.append(String(&first_page[word_start], word_length));
  596. }
  597. word_length = 0;
  598. word_start = i + 1;
  599. }
  600. }
  601. if (word_length > 0)
  602. interpreter_words.append(String(&first_page[word_start], word_length));
  603. if (!interpreter_words.is_empty())
  604. return interpreter_words;
  605. }
  606. return ENOEXEC;
  607. }
  608. KResultOr<RefPtr<FileDescription>> Process::find_elf_interpreter_for_executable(const String& path, const ElfW(Ehdr) & main_program_header, int nread, size_t file_size)
  609. {
  610. // Not using KResultOr here because we'll want to do the same thing in userspace in the RTLD
  611. String interpreter_path;
  612. if (!ELF::validate_program_headers(main_program_header, file_size, (const u8*)&main_program_header, nread, &interpreter_path)) {
  613. dbgln("exec({}): File has invalid ELF Program headers", path);
  614. return ENOEXEC;
  615. }
  616. if (!interpreter_path.is_empty()) {
  617. dbgln_if(EXEC_DEBUG, "exec({}): Using program interpreter {}", path, interpreter_path);
  618. auto interp_result = VFS::the().open(interpreter_path, O_EXEC, 0, current_directory());
  619. if (interp_result.is_error()) {
  620. dbgln("exec({}): Unable to open program interpreter {}", path, interpreter_path);
  621. return interp_result.error();
  622. }
  623. auto interpreter_description = interp_result.value();
  624. auto interp_metadata = interpreter_description->metadata();
  625. VERIFY(interpreter_description->inode());
  626. // Validate the program interpreter as a valid elf binary.
  627. // If your program interpreter is a #! file or something, it's time to stop playing games :)
  628. if (interp_metadata.size < (int)sizeof(ElfW(Ehdr)))
  629. return ENOEXEC;
  630. char first_page[PAGE_SIZE] = {};
  631. auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page);
  632. auto nread_or_error = interpreter_description->read(first_page_buffer, sizeof(first_page));
  633. if (nread_or_error.is_error())
  634. return ENOEXEC;
  635. nread = nread_or_error.value();
  636. if (nread < (int)sizeof(ElfW(Ehdr)))
  637. return ENOEXEC;
  638. auto elf_header = (ElfW(Ehdr)*)first_page;
  639. if (!ELF::validate_elf_header(*elf_header, interp_metadata.size)) {
  640. dbgln("exec({}): Interpreter ({}) has invalid ELF header", path, interpreter_description->absolute_path());
  641. return ENOEXEC;
  642. }
  643. // Not using KResultOr here because we'll want to do the same thing in userspace in the RTLD
  644. String interpreter_interpreter_path;
  645. if (!ELF::validate_program_headers(*elf_header, interp_metadata.size, (u8*)first_page, nread, &interpreter_interpreter_path)) {
  646. dbgln("exec({}): Interpreter ({}) has invalid ELF Program headers", path, interpreter_description->absolute_path());
  647. return ENOEXEC;
  648. }
  649. if (!interpreter_interpreter_path.is_empty()) {
  650. dbgln("exec({}): Interpreter ({}) has its own interpreter ({})! No thank you!", path, interpreter_description->absolute_path(), interpreter_interpreter_path);
  651. return ELOOP;
  652. }
  653. return interpreter_description;
  654. }
  655. if (main_program_header.e_type == ET_REL) {
  656. // We can't exec an ET_REL, that's just an object file from the compiler
  657. return ENOEXEC;
  658. }
  659. if (main_program_header.e_type == ET_DYN) {
  660. // If it's ET_DYN with no PT_INTERP, then it's a dynamic executable responsible
  661. // for its own relocation (i.e. it's /usr/lib/Loader.so)
  662. if (path != "/usr/lib/Loader.so")
  663. dbgln("exec({}): WARNING - Dynamic ELF executable without a PT_INTERP header, and isn't /usr/lib/Loader.so", path);
  664. return nullptr;
  665. }
  666. // No interpreter, but, path refers to a valid elf image
  667. return KResult(KSuccess);
  668. }
  669. KResult Process::exec(String path, Vector<String> arguments, Vector<String> environment, int recursion_depth)
  670. {
  671. if (recursion_depth > 2) {
  672. dbgln("exec({}): SHENANIGANS! recursed too far trying to find #! interpreter", path);
  673. return ELOOP;
  674. }
  675. // Open the file to check what kind of binary format it is
  676. // Currently supported formats:
  677. // - #! interpreted file
  678. // - ELF32
  679. // * ET_EXEC binary that just gets loaded
  680. // * ET_DYN binary that requires a program interpreter
  681. //
  682. auto file_or_error = VFS::the().open(path, O_EXEC, 0, current_directory());
  683. if (file_or_error.is_error())
  684. return file_or_error.error();
  685. auto description = file_or_error.release_value();
  686. auto metadata = description->metadata();
  687. if (!metadata.is_regular_file())
  688. return EACCES;
  689. // Always gonna need at least 3 bytes. these are for #!X
  690. if (metadata.size < 3)
  691. return ENOEXEC;
  692. VERIFY(description->inode());
  693. // Read the first page of the program into memory so we can validate the binfmt of it
  694. char first_page[PAGE_SIZE];
  695. auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page);
  696. auto nread_or_error = description->read(first_page_buffer, sizeof(first_page));
  697. if (nread_or_error.is_error())
  698. return ENOEXEC;
  699. // 1) #! interpreted file
  700. auto shebang_result = find_shebang_interpreter_for_executable(first_page, nread_or_error.value());
  701. if (!shebang_result.is_error()) {
  702. auto shebang_words = shebang_result.release_value();
  703. auto shebang_path = shebang_words.first();
  704. arguments[0] = move(path);
  705. if (!arguments.try_prepend(move(shebang_words)))
  706. return ENOMEM;
  707. return exec(move(shebang_path), move(arguments), move(environment), ++recursion_depth);
  708. }
  709. // #2) ELF32 for i386
  710. if (nread_or_error.value() < (int)sizeof(ElfW(Ehdr)))
  711. return ENOEXEC;
  712. auto main_program_header = (ElfW(Ehdr)*)first_page;
  713. if (!ELF::validate_elf_header(*main_program_header, metadata.size)) {
  714. dbgln("exec({}): File has invalid ELF header", path);
  715. return ENOEXEC;
  716. }
  717. auto elf_result = find_elf_interpreter_for_executable(path, *main_program_header, nread_or_error.value(), metadata.size);
  718. // Assume a static ELF executable by default
  719. RefPtr<FileDescription> interpreter_description;
  720. // We're getting either an interpreter, an error, or KSuccess (i.e. no interpreter but file checks out)
  721. if (!elf_result.is_error()) {
  722. // It's a dynamic ELF executable, with or without an interpreter. Do not allocate TLS
  723. interpreter_description = elf_result.value();
  724. } else if (elf_result.error().is_error())
  725. return elf_result.error();
  726. // The bulk of exec() is done by do_exec(), which ensures that all locals
  727. // are cleaned up by the time we yield-teleport below.
  728. Thread* new_main_thread = nullptr;
  729. u32 prev_flags = 0;
  730. auto result = do_exec(move(description), move(arguments), move(environment), move(interpreter_description), new_main_thread, prev_flags, *main_program_header);
  731. if (result.is_error())
  732. return result;
  733. VERIFY_INTERRUPTS_DISABLED();
  734. VERIFY(Processor::current().in_critical());
  735. auto current_thread = Thread::current();
  736. if (current_thread == new_main_thread) {
  737. // We need to enter the scheduler lock before changing the state
  738. // and it will be released after the context switch into that
  739. // thread. We should also still be in our critical section
  740. VERIFY(!g_scheduler_lock.own_lock());
  741. VERIFY(Processor::current().in_critical() == 1);
  742. g_scheduler_lock.lock();
  743. current_thread->set_state(Thread::State::Running);
  744. Processor::assume_context(*current_thread, prev_flags);
  745. VERIFY_NOT_REACHED();
  746. }
  747. Processor::current().leave_critical(prev_flags);
  748. return KSuccess;
  749. }
  750. KResultOr<FlatPtr> Process::sys$execve(Userspace<const Syscall::SC_execve_params*> user_params)
  751. {
  752. REQUIRE_PROMISE(exec);
  753. // NOTE: Be extremely careful with allocating any kernel memory in exec().
  754. // On success, the kernel stack will be lost.
  755. Syscall::SC_execve_params params;
  756. if (!copy_from_user(&params, user_params))
  757. return EFAULT;
  758. if (params.arguments.length > ARG_MAX || params.environment.length > ARG_MAX)
  759. return E2BIG;
  760. String path;
  761. {
  762. auto path_arg = get_syscall_path_argument(params.path);
  763. if (path_arg.is_error())
  764. return path_arg.error();
  765. path = path_arg.value()->view();
  766. }
  767. auto copy_user_strings = [](const auto& list, auto& output) {
  768. if (!list.length)
  769. return true;
  770. Checked size = sizeof(*list.strings);
  771. size *= list.length;
  772. if (size.has_overflow())
  773. return false;
  774. Vector<Syscall::StringArgument, 32> strings;
  775. if (!strings.try_resize(list.length))
  776. return false;
  777. if (!copy_from_user(strings.data(), list.strings, list.length * sizeof(*list.strings)))
  778. return false;
  779. for (size_t i = 0; i < list.length; ++i) {
  780. auto string = copy_string_from_user(strings[i]);
  781. if (string.is_null())
  782. return false;
  783. if (!output.try_append(move(string)))
  784. return false;
  785. }
  786. return true;
  787. };
  788. Vector<String> arguments;
  789. if (!copy_user_strings(params.arguments, arguments))
  790. return EFAULT;
  791. Vector<String> environment;
  792. if (!copy_user_strings(params.environment, environment))
  793. return EFAULT;
  794. auto result = exec(move(path), move(arguments), move(environment));
  795. VERIFY(result.is_error()); // We should never continue after a successful exec!
  796. return result.error();
  797. }
  798. }