execve.cpp 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/ScopeGuard.h>
  7. #include <AK/TemporaryChange.h>
  8. #include <AK/WeakPtr.h>
  9. #include <Kernel/Debug.h>
  10. #include <Kernel/FileSystem/Custody.h>
  11. #include <Kernel/FileSystem/OpenFileDescription.h>
  12. #include <Kernel/Memory/AllocationStrategy.h>
  13. #include <Kernel/Memory/MemoryManager.h>
  14. #include <Kernel/Memory/PageDirectory.h>
  15. #include <Kernel/Memory/Region.h>
  16. #include <Kernel/Memory/SharedInodeVMObject.h>
  17. #include <Kernel/Panic.h>
  18. #include <Kernel/PerformanceManager.h>
  19. #include <Kernel/Process.h>
  20. #include <Kernel/Random.h>
  21. #include <Kernel/Time/TimeManagement.h>
  22. #include <LibC/limits.h>
  23. #include <LibELF/AuxiliaryVector.h>
  24. #include <LibELF/Image.h>
  25. #include <LibELF/Validation.h>
  26. namespace Kernel {
  27. extern Memory::Region* g_signal_trampoline_region;
  28. struct LoadResult {
  29. OwnPtr<Memory::AddressSpace> space;
  30. FlatPtr load_base { 0 };
  31. FlatPtr entry_eip { 0 };
  32. size_t size { 0 };
  33. WeakPtr<Memory::Region> tls_region;
  34. size_t tls_size { 0 };
  35. size_t tls_alignment { 0 };
  36. WeakPtr<Memory::Region> stack_region;
  37. };
  38. static Vector<ELF::AuxiliaryValue> generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, UserID uid, UserID euid, GroupID gid, GroupID egid, StringView executable_path, Optional<Process::ScopedDescriptionAllocation> const& main_program_fd_allocation);
  39. static bool validate_stack_size(NonnullOwnPtrVector<KString> const& arguments, NonnullOwnPtrVector<KString>& environment)
  40. {
  41. size_t total_arguments_size = 0;
  42. size_t total_environment_size = 0;
  43. for (auto& a : arguments)
  44. total_arguments_size += a.length() + 1;
  45. for (auto& e : environment)
  46. total_environment_size += e.length() + 1;
  47. total_arguments_size += sizeof(char*) * (arguments.size() + 1);
  48. total_environment_size += sizeof(char*) * (environment.size() + 1);
  49. static constexpr size_t max_arguments_size = Thread::default_userspace_stack_size / 8;
  50. static constexpr size_t max_environment_size = Thread::default_userspace_stack_size / 8;
  51. if (total_arguments_size > max_arguments_size)
  52. return false;
  53. if (total_environment_size > max_environment_size)
  54. return false;
  55. // FIXME: This doesn't account for the size of the auxiliary vector
  56. return true;
  57. }
  58. static ErrorOr<FlatPtr> make_userspace_context_for_main_thread([[maybe_unused]] ThreadRegisters& regs, Memory::Region& region, NonnullOwnPtrVector<KString> const& arguments,
  59. NonnullOwnPtrVector<KString> const& environment, Vector<ELF::AuxiliaryValue> auxiliary_values)
  60. {
  61. FlatPtr new_sp = region.range().end().get();
  62. // Add some bits of randomness to the user stack pointer.
  63. new_sp -= round_up_to_power_of_two(get_fast_random<u32>() % 4096, 16);
  64. auto push_on_new_stack = [&new_sp](FlatPtr value) {
  65. new_sp -= sizeof(FlatPtr);
  66. Userspace<FlatPtr*> stack_ptr = new_sp;
  67. auto result = copy_to_user(stack_ptr, &value);
  68. VERIFY(!result.is_error());
  69. };
  70. auto push_aux_value_on_new_stack = [&new_sp](auxv_t value) {
  71. new_sp -= sizeof(auxv_t);
  72. Userspace<auxv_t*> stack_ptr = new_sp;
  73. auto result = copy_to_user(stack_ptr, &value);
  74. VERIFY(!result.is_error());
  75. };
  76. auto push_string_on_new_stack = [&new_sp](StringView string) {
  77. new_sp -= round_up_to_power_of_two(string.length() + 1, sizeof(FlatPtr));
  78. Userspace<FlatPtr*> stack_ptr = new_sp;
  79. auto result = copy_to_user(stack_ptr, string.characters_without_null_termination(), string.length() + 1);
  80. VERIFY(!result.is_error());
  81. };
  82. Vector<FlatPtr> argv_entries;
  83. for (auto& argument : arguments) {
  84. push_string_on_new_stack(argument.view());
  85. TRY(argv_entries.try_append(new_sp));
  86. }
  87. Vector<FlatPtr> env_entries;
  88. for (auto& variable : environment) {
  89. push_string_on_new_stack(variable.view());
  90. TRY(env_entries.try_append(new_sp));
  91. }
  92. for (auto& value : auxiliary_values) {
  93. if (!value.optional_string.is_empty()) {
  94. push_string_on_new_stack(value.optional_string);
  95. value.auxv.a_un.a_ptr = (void*)new_sp;
  96. }
  97. }
  98. for (ssize_t i = auxiliary_values.size() - 1; i >= 0; --i) {
  99. auto& value = auxiliary_values[i];
  100. push_aux_value_on_new_stack(value.auxv);
  101. }
  102. push_on_new_stack(0);
  103. for (ssize_t i = env_entries.size() - 1; i >= 0; --i)
  104. push_on_new_stack(env_entries[i]);
  105. FlatPtr envp = new_sp;
  106. push_on_new_stack(0);
  107. for (ssize_t i = argv_entries.size() - 1; i >= 0; --i)
  108. push_on_new_stack(argv_entries[i]);
  109. FlatPtr argv = new_sp;
  110. // NOTE: The stack needs to be 16-byte aligned.
  111. new_sp -= new_sp % 16;
  112. #if ARCH(I386)
  113. // GCC assumes that the return address has been pushed to the stack when it enters the function,
  114. // so we need to reserve an extra pointer's worth of bytes below this to make GCC's stack alignment
  115. // calculations work
  116. new_sp -= sizeof(void*);
  117. push_on_new_stack(envp);
  118. push_on_new_stack(argv);
  119. push_on_new_stack(argv_entries.size());
  120. #else
  121. regs.rdi = argv_entries.size();
  122. regs.rsi = argv;
  123. regs.rdx = envp;
  124. #endif
  125. VERIFY(new_sp % 16 == 0);
  126. // FIXME: The way we're setting up the stack and passing arguments to the entry point isn't ABI-compliant
  127. return new_sp;
  128. }
  129. struct RequiredLoadRange {
  130. FlatPtr start { 0 };
  131. FlatPtr end { 0 };
  132. };
  133. static ErrorOr<RequiredLoadRange> get_required_load_range(OpenFileDescription& program_description)
  134. {
  135. auto& inode = *(program_description.inode());
  136. auto vmobject = TRY(Memory::SharedInodeVMObject::try_create_with_inode(inode));
  137. size_t executable_size = inode.size();
  138. auto region = TRY(MM.allocate_kernel_region_with_vmobject(*vmobject, Memory::page_round_up(executable_size), "ELF memory range calculation", Memory::Region::Access::Read));
  139. auto elf_image = ELF::Image(region->vaddr().as_ptr(), executable_size);
  140. if (!elf_image.is_valid()) {
  141. return EINVAL;
  142. }
  143. RequiredLoadRange range {};
  144. elf_image.for_each_program_header([&range](const auto& pheader) {
  145. if (pheader.type() != PT_LOAD)
  146. return;
  147. auto region_start = (FlatPtr)pheader.vaddr().as_ptr();
  148. auto region_end = region_start + pheader.size_in_memory();
  149. if (range.start == 0 || region_start < range.start)
  150. range.start = region_start;
  151. if (range.end == 0 || region_end > range.end)
  152. range.end = region_end;
  153. });
  154. VERIFY(range.end > range.start);
  155. return range;
  156. };
  157. static ErrorOr<FlatPtr> get_load_offset(const ElfW(Ehdr) & main_program_header, OpenFileDescription& main_program_description, OpenFileDescription* interpreter_description)
  158. {
  159. constexpr FlatPtr load_range_start = 0x08000000;
  160. constexpr FlatPtr load_range_size = 65536 * PAGE_SIZE; // 2**16 * PAGE_SIZE = 256MB
  161. constexpr FlatPtr minimum_load_offset_randomization_size = 10 * MiB;
  162. auto random_load_offset_in_range([](auto start, auto size) {
  163. return Memory::page_round_down(start + get_good_random<FlatPtr>() % size);
  164. });
  165. if (main_program_header.e_type == ET_DYN) {
  166. return random_load_offset_in_range(load_range_start, load_range_size);
  167. }
  168. if (main_program_header.e_type != ET_EXEC)
  169. return EINVAL;
  170. auto main_program_load_range = TRY(get_required_load_range(main_program_description));
  171. RequiredLoadRange selected_range {};
  172. if (interpreter_description) {
  173. auto interpreter_load_range = TRY(get_required_load_range(*interpreter_description));
  174. auto interpreter_size_in_memory = interpreter_load_range.end - interpreter_load_range.start;
  175. auto interpreter_load_range_end = load_range_start + load_range_size - interpreter_size_in_memory;
  176. // No intersection
  177. if (main_program_load_range.end < load_range_start || main_program_load_range.start > interpreter_load_range_end)
  178. return random_load_offset_in_range(load_range_start, load_range_size);
  179. RequiredLoadRange first_available_part = { load_range_start, main_program_load_range.start };
  180. RequiredLoadRange second_available_part = { main_program_load_range.end, interpreter_load_range_end };
  181. // Select larger part
  182. if (first_available_part.end - first_available_part.start > second_available_part.end - second_available_part.start)
  183. selected_range = first_available_part;
  184. else
  185. selected_range = second_available_part;
  186. } else
  187. selected_range = main_program_load_range;
  188. // If main program is too big and leaves us without enough space for adequate loader randomization
  189. if (selected_range.end - selected_range.start < minimum_load_offset_randomization_size)
  190. return E2BIG;
  191. return random_load_offset_in_range(selected_range.start, selected_range.end - selected_range.start);
  192. }
  193. enum class ShouldAllocateTls {
  194. No,
  195. Yes,
  196. };
  197. enum class ShouldAllowSyscalls {
  198. No,
  199. Yes,
  200. };
  201. static ErrorOr<LoadResult> load_elf_object(NonnullOwnPtr<Memory::AddressSpace> new_space, OpenFileDescription& object_description,
  202. FlatPtr load_offset, ShouldAllocateTls should_allocate_tls, ShouldAllowSyscalls should_allow_syscalls)
  203. {
  204. auto& inode = *(object_description.inode());
  205. auto vmobject = TRY(Memory::SharedInodeVMObject::try_create_with_inode(inode));
  206. if (vmobject->writable_mappings()) {
  207. dbgln("Refusing to execute a write-mapped program");
  208. return ETXTBSY;
  209. }
  210. size_t executable_size = inode.size();
  211. auto executable_region = TRY(MM.allocate_kernel_region_with_vmobject(*vmobject, Memory::page_round_up(executable_size), "ELF loading", Memory::Region::Access::Read));
  212. auto elf_image = ELF::Image(executable_region->vaddr().as_ptr(), executable_size);
  213. if (!elf_image.is_valid())
  214. return ENOEXEC;
  215. Memory::Region* master_tls_region { nullptr };
  216. size_t master_tls_size = 0;
  217. size_t master_tls_alignment = 0;
  218. FlatPtr load_base_address = 0;
  219. auto elf_name = TRY(object_description.pseudo_path());
  220. VERIFY(!Processor::in_critical());
  221. Memory::MemoryManager::enter_address_space(*new_space);
  222. auto load_tls_section = [&](auto& program_header) -> ErrorOr<void> {
  223. VERIFY(should_allocate_tls == ShouldAllocateTls::Yes);
  224. VERIFY(program_header.size_in_memory());
  225. if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) {
  226. dbgln("Shenanigans! ELF PT_TLS header sneaks outside of executable.");
  227. return ENOEXEC;
  228. }
  229. auto range = TRY(new_space->try_allocate_range({}, program_header.size_in_memory()));
  230. master_tls_region = TRY(new_space->allocate_region(range, String::formatted("{} (master-tls)", elf_name), PROT_READ | PROT_WRITE, AllocationStrategy::Reserve));
  231. master_tls_size = program_header.size_in_memory();
  232. master_tls_alignment = program_header.alignment();
  233. TRY(copy_to_user(master_tls_region->vaddr().as_ptr(), program_header.raw_data(), program_header.size_in_image()));
  234. return {};
  235. };
  236. auto load_writable_section = [&](auto& program_header) -> ErrorOr<void> {
  237. // Writable section: create a copy in memory.
  238. VERIFY(program_header.alignment() == PAGE_SIZE);
  239. if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) {
  240. dbgln("Shenanigans! Writable ELF PT_LOAD header sneaks outside of executable.");
  241. return ENOEXEC;
  242. }
  243. int prot = 0;
  244. if (program_header.is_readable())
  245. prot |= PROT_READ;
  246. if (program_header.is_writable())
  247. prot |= PROT_WRITE;
  248. auto region_name = String::formatted("{} (data-{}{})", elf_name, program_header.is_readable() ? "r" : "", program_header.is_writable() ? "w" : "");
  249. auto range_base = VirtualAddress { Memory::page_round_down(program_header.vaddr().offset(load_offset).get()) };
  250. auto range_end = VirtualAddress { Memory::page_round_up(program_header.vaddr().offset(load_offset).offset(program_header.size_in_memory()).get()) };
  251. auto range = TRY(new_space->try_allocate_range(range_base, range_end.get() - range_base.get()));
  252. auto region = TRY(new_space->allocate_region(range, region_name, prot, AllocationStrategy::Reserve));
  253. // It's not always the case with PIE executables (and very well shouldn't be) that the
  254. // virtual address in the program header matches the one we end up giving the process.
  255. // In order to copy the data image correctly into memory, we need to copy the data starting at
  256. // the right initial page offset into the pages allocated for the elf_alloc-XX section.
  257. // FIXME: There's an opportunity to munmap, or at least mprotect, the padding space between
  258. // the .text and .data PT_LOAD sections of the executable.
  259. // Accessing it would definitely be a bug.
  260. auto page_offset = program_header.vaddr();
  261. page_offset.mask(~PAGE_MASK);
  262. TRY(copy_to_user((u8*)region->vaddr().as_ptr() + page_offset.get(), program_header.raw_data(), program_header.size_in_image()));
  263. return {};
  264. };
  265. auto load_section = [&](auto& program_header) -> ErrorOr<void> {
  266. if (program_header.size_in_memory() == 0)
  267. return {};
  268. if (program_header.is_writable())
  269. return load_writable_section(program_header);
  270. // Non-writable section: map the executable itself in memory.
  271. VERIFY(program_header.alignment() == PAGE_SIZE);
  272. int prot = 0;
  273. if (program_header.is_readable())
  274. prot |= PROT_READ;
  275. if (program_header.is_writable())
  276. prot |= PROT_WRITE;
  277. if (program_header.is_executable())
  278. prot |= PROT_EXEC;
  279. auto range_base = VirtualAddress { Memory::page_round_down(program_header.vaddr().offset(load_offset).get()) };
  280. auto range_end = VirtualAddress { Memory::page_round_up(program_header.vaddr().offset(load_offset).offset(program_header.size_in_memory()).get()) };
  281. auto range = TRY(new_space->try_allocate_range(range_base, range_end.get() - range_base.get()));
  282. auto region = TRY(new_space->allocate_region_with_vmobject(range, *vmobject, program_header.offset(), elf_name->view(), prot, true));
  283. if (should_allow_syscalls == ShouldAllowSyscalls::Yes)
  284. region->set_syscall_region(true);
  285. if (program_header.offset() == 0)
  286. load_base_address = (FlatPtr)region->vaddr().as_ptr();
  287. return {};
  288. };
  289. auto load_elf_program_header = [&](auto& program_header) -> ErrorOr<void> {
  290. if (program_header.type() == PT_TLS)
  291. return load_tls_section(program_header);
  292. if (program_header.type() == PT_LOAD)
  293. return load_section(program_header);
  294. // NOTE: We ignore other program header types.
  295. return {};
  296. };
  297. TRY([&] {
  298. ErrorOr<void> result;
  299. elf_image.for_each_program_header([&](ELF::Image::ProgramHeader const& program_header) {
  300. result = load_elf_program_header(program_header);
  301. return result.is_error() ? IterationDecision::Break : IterationDecision::Continue;
  302. });
  303. return result;
  304. }());
  305. if (!elf_image.entry().offset(load_offset).get()) {
  306. dbgln("do_exec: Failure loading program, entry pointer is invalid! {})", elf_image.entry().offset(load_offset));
  307. return ENOEXEC;
  308. }
  309. auto stack_range = TRY(new_space->try_allocate_range({}, Thread::default_userspace_stack_size));
  310. auto* stack_region = TRY(new_space->allocate_region(stack_range, "Stack (Main thread)", PROT_READ | PROT_WRITE, AllocationStrategy::Reserve));
  311. stack_region->set_stack(true);
  312. return LoadResult {
  313. move(new_space),
  314. load_base_address,
  315. elf_image.entry().offset(load_offset).get(),
  316. executable_size,
  317. AK::try_make_weak_ptr(master_tls_region),
  318. master_tls_size,
  319. master_tls_alignment,
  320. stack_region->make_weak_ptr()
  321. };
  322. }
  323. ErrorOr<LoadResult>
  324. Process::load(NonnullRefPtr<OpenFileDescription> main_program_description,
  325. RefPtr<OpenFileDescription> interpreter_description, const ElfW(Ehdr) & main_program_header)
  326. {
  327. auto new_space = TRY(Memory::AddressSpace::try_create(nullptr));
  328. ScopeGuard space_guard([&]() {
  329. Memory::MemoryManager::enter_process_address_space(*this);
  330. });
  331. auto load_offset = TRY(get_load_offset(main_program_header, main_program_description, interpreter_description));
  332. if (interpreter_description.is_null()) {
  333. auto load_result = TRY(load_elf_object(move(new_space), main_program_description, load_offset, ShouldAllocateTls::Yes, ShouldAllowSyscalls::No));
  334. m_master_tls_region = load_result.tls_region;
  335. m_master_tls_size = load_result.tls_size;
  336. m_master_tls_alignment = load_result.tls_alignment;
  337. return load_result;
  338. }
  339. auto interpreter_load_result = TRY(load_elf_object(move(new_space), *interpreter_description, load_offset, ShouldAllocateTls::No, ShouldAllowSyscalls::Yes));
  340. // TLS allocation will be done in userspace by the loader
  341. VERIFY(!interpreter_load_result.tls_region);
  342. VERIFY(!interpreter_load_result.tls_alignment);
  343. VERIFY(!interpreter_load_result.tls_size);
  344. return interpreter_load_result;
  345. }
  346. ErrorOr<void> Process::do_exec(NonnullRefPtr<OpenFileDescription> main_program_description, NonnullOwnPtrVector<KString> arguments, NonnullOwnPtrVector<KString> environment,
  347. RefPtr<OpenFileDescription> interpreter_description, Thread*& new_main_thread, u32& prev_flags, const ElfW(Ehdr) & main_program_header)
  348. {
  349. VERIFY(is_user_process());
  350. VERIFY(!Processor::in_critical());
  351. // Although we *could* handle a pseudo_path here, trying to execute something that doesn't have
  352. // a custody (e.g. BlockDevice or RandomDevice) is pretty suspicious anyway.
  353. auto path = TRY(main_program_description->original_absolute_path());
  354. dbgln_if(EXEC_DEBUG, "do_exec: {}", path);
  355. // FIXME: How much stack space does process startup need?
  356. if (!validate_stack_size(arguments, environment))
  357. return E2BIG;
  358. // FIXME: split_view() currently allocates (Vector) without checking for failure.
  359. auto parts = path->view().split_view('/');
  360. if (parts.is_empty())
  361. return ENOENT;
  362. auto new_process_name = TRY(KString::try_create(parts.last()));
  363. auto new_main_thread_name = TRY(new_process_name->try_clone());
  364. auto load_result = TRY(load(main_program_description, interpreter_description, main_program_header));
  365. // NOTE: We don't need the interpreter executable description after this point.
  366. // We destroy it here to prevent it from getting destroyed when we return from this function.
  367. // That's important because when we're returning from this function, we're in a very delicate
  368. // state where we can't block (e.g by trying to acquire a mutex in description teardown.)
  369. bool has_interpreter = interpreter_description;
  370. interpreter_description = nullptr;
  371. auto signal_trampoline_range = TRY(load_result.space->try_allocate_range({}, PAGE_SIZE));
  372. auto signal_trampoline_region = TRY(load_result.space->allocate_region_with_vmobject(signal_trampoline_range, g_signal_trampoline_region->vmobject(), 0, "Signal trampoline", PROT_READ | PROT_EXEC, true));
  373. signal_trampoline_region->set_syscall_region(true);
  374. // (For dynamically linked executable) Allocate an FD for passing the main executable to the dynamic loader.
  375. Optional<ScopedDescriptionAllocation> main_program_fd_allocation;
  376. if (has_interpreter)
  377. main_program_fd_allocation = TRY(m_fds.allocate());
  378. // We commit to the new executable at this point. There is no turning back!
  379. // Prevent other processes from attaching to us with ptrace while we're doing this.
  380. MutexLocker ptrace_locker(ptrace_lock());
  381. // Disable profiling temporarily in case it's running on this process.
  382. auto was_profiling = m_profiling;
  383. TemporaryChange profiling_disabler(m_profiling, false);
  384. kill_threads_except_self();
  385. bool executable_is_setid = false;
  386. if (!(main_program_description->custody()->mount_flags() & MS_NOSUID)) {
  387. auto main_program_metadata = main_program_description->metadata();
  388. if (main_program_metadata.is_setuid()) {
  389. executable_is_setid = true;
  390. ProtectedDataMutationScope scope { *this };
  391. m_protected_values.euid = main_program_metadata.uid;
  392. m_protected_values.suid = main_program_metadata.uid;
  393. }
  394. if (main_program_metadata.is_setgid()) {
  395. executable_is_setid = true;
  396. ProtectedDataMutationScope scope { *this };
  397. m_protected_values.egid = main_program_metadata.gid;
  398. m_protected_values.sgid = main_program_metadata.gid;
  399. }
  400. }
  401. set_dumpable(!executable_is_setid);
  402. {
  403. // We must disable global profiling (especially kfree tracing) here because
  404. // we might otherwise end up walking the stack into the process' space that
  405. // is about to be destroyed.
  406. TemporaryChange global_profiling_disabler(g_profiling_all_threads, false);
  407. m_space = load_result.space.release_nonnull();
  408. }
  409. Memory::MemoryManager::enter_address_space(*m_space);
  410. m_executable = main_program_description->custody();
  411. m_arguments = move(arguments);
  412. m_environment = move(environment);
  413. m_veil_state = VeilState::None;
  414. m_unveiled_paths.clear();
  415. m_unveiled_paths.set_metadata({ "/", UnveilAccess::None, false });
  416. for (auto& property : m_coredump_properties)
  417. property = {};
  418. auto* current_thread = Thread::current();
  419. current_thread->reset_signals_for_exec();
  420. clear_futex_queues_on_exec();
  421. fds().change_each([&](auto& file_description_metadata) {
  422. if (file_description_metadata.is_valid() && file_description_metadata.flags() & FD_CLOEXEC)
  423. file_description_metadata = {};
  424. });
  425. if (main_program_fd_allocation.has_value()) {
  426. main_program_description->set_readable(true);
  427. m_fds[main_program_fd_allocation->fd].set(move(main_program_description), FD_CLOEXEC);
  428. }
  429. new_main_thread = nullptr;
  430. if (&current_thread->process() == this) {
  431. new_main_thread = current_thread;
  432. } else {
  433. for_each_thread([&](auto& thread) {
  434. new_main_thread = &thread;
  435. return IterationDecision::Break;
  436. });
  437. }
  438. VERIFY(new_main_thread);
  439. auto auxv = generate_auxiliary_vector(load_result.load_base, load_result.entry_eip, uid(), euid(), gid(), egid(), path->view(), main_program_fd_allocation);
  440. // NOTE: We create the new stack before disabling interrupts since it will zero-fault
  441. // and we don't want to deal with faults after this point.
  442. auto new_userspace_sp = TRY(make_userspace_context_for_main_thread(new_main_thread->regs(), *load_result.stack_region.unsafe_ptr(), m_arguments, m_environment, move(auxv)));
  443. if (wait_for_tracer_at_next_execve()) {
  444. // Make sure we release the ptrace lock here or the tracer will block forever.
  445. ptrace_locker.unlock();
  446. Thread::current()->send_urgent_signal_to_self(SIGSTOP);
  447. } else {
  448. // Unlock regardless before disabling interrupts.
  449. // Ensure we always unlock after checking ptrace status to avoid TOCTOU ptrace issues
  450. ptrace_locker.unlock();
  451. }
  452. // We enter a critical section here because we don't want to get interrupted between do_exec()
  453. // and Processor::assume_context() or the next context switch.
  454. // If we used an InterruptDisabler that sti()'d on exit, we might timer tick'd too soon in exec().
  455. Processor::enter_critical();
  456. prev_flags = cpu_flags();
  457. cli();
  458. // NOTE: Be careful to not trigger any page faults below!
  459. m_name = move(new_process_name);
  460. new_main_thread->set_name(move(new_main_thread_name));
  461. {
  462. ProtectedDataMutationScope scope { *this };
  463. m_protected_values.promises = m_protected_values.execpromises.load();
  464. m_protected_values.has_promises = m_protected_values.has_execpromises.load();
  465. m_protected_values.execpromises = 0;
  466. m_protected_values.has_execpromises = false;
  467. m_protected_values.signal_trampoline = signal_trampoline_region->vaddr();
  468. // FIXME: PID/TID ISSUE
  469. m_protected_values.pid = new_main_thread->tid().value();
  470. }
  471. auto tsr_result = new_main_thread->make_thread_specific_region({});
  472. if (tsr_result.is_error()) {
  473. // FIXME: We cannot fail this late. Refactor this so the allocation happens before we commit to the new executable.
  474. VERIFY_NOT_REACHED();
  475. }
  476. new_main_thread->reset_fpu_state();
  477. auto& regs = new_main_thread->m_regs;
  478. #if ARCH(I386)
  479. regs.cs = GDT_SELECTOR_CODE3 | 3;
  480. regs.ds = GDT_SELECTOR_DATA3 | 3;
  481. regs.es = GDT_SELECTOR_DATA3 | 3;
  482. regs.ss = GDT_SELECTOR_DATA3 | 3;
  483. regs.fs = GDT_SELECTOR_DATA3 | 3;
  484. regs.gs = GDT_SELECTOR_TLS | 3;
  485. regs.eip = load_result.entry_eip;
  486. regs.esp = new_userspace_sp;
  487. #else
  488. regs.rip = load_result.entry_eip;
  489. regs.rsp = new_userspace_sp;
  490. #endif
  491. regs.cr3 = address_space().page_directory().cr3();
  492. {
  493. TemporaryChange profiling_disabler(m_profiling, was_profiling);
  494. PerformanceManager::add_process_exec_event(*this);
  495. }
  496. {
  497. SpinlockLocker lock(g_scheduler_lock);
  498. new_main_thread->set_state(Thread::State::Runnable);
  499. }
  500. u32 lock_count_to_restore;
  501. [[maybe_unused]] auto rc = big_lock().force_unlock_if_locked(lock_count_to_restore);
  502. VERIFY_INTERRUPTS_DISABLED();
  503. VERIFY(Processor::in_critical());
  504. return {};
  505. }
  506. static Vector<ELF::AuxiliaryValue> generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, UserID uid, UserID euid, GroupID gid, GroupID egid, StringView executable_path, Optional<Process::ScopedDescriptionAllocation> const& main_program_fd_allocation)
  507. {
  508. Vector<ELF::AuxiliaryValue> auxv;
  509. // PHDR/EXECFD
  510. // PH*
  511. auxv.append({ ELF::AuxiliaryValue::PageSize, PAGE_SIZE });
  512. auxv.append({ ELF::AuxiliaryValue::BaseAddress, (void*)load_base });
  513. auxv.append({ ELF::AuxiliaryValue::Entry, (void*)entry_eip });
  514. // NOTELF
  515. auxv.append({ ELF::AuxiliaryValue::Uid, (long)uid.value() });
  516. auxv.append({ ELF::AuxiliaryValue::EUid, (long)euid.value() });
  517. auxv.append({ ELF::AuxiliaryValue::Gid, (long)gid.value() });
  518. auxv.append({ ELF::AuxiliaryValue::EGid, (long)egid.value() });
  519. auxv.append({ ELF::AuxiliaryValue::Platform, Processor::platform_string() });
  520. // FIXME: This is platform specific
  521. auxv.append({ ELF::AuxiliaryValue::HwCap, (long)CPUID(1).edx() });
  522. auxv.append({ ELF::AuxiliaryValue::ClockTick, (long)TimeManagement::the().ticks_per_second() });
  523. // FIXME: Also take into account things like extended filesystem permissions? That's what linux does...
  524. auxv.append({ ELF::AuxiliaryValue::Secure, ((uid != euid) || (gid != egid)) ? 1 : 0 });
  525. char random_bytes[16] {};
  526. get_fast_random_bytes({ (u8*)random_bytes, sizeof(random_bytes) });
  527. auxv.append({ ELF::AuxiliaryValue::Random, String(random_bytes, sizeof(random_bytes)) });
  528. auxv.append({ ELF::AuxiliaryValue::ExecFilename, executable_path });
  529. if (main_program_fd_allocation.has_value())
  530. auxv.append({ ELF::AuxiliaryValue::ExecFileDescriptor, main_program_fd_allocation->fd });
  531. auxv.append({ ELF::AuxiliaryValue::Null, 0L });
  532. return auxv;
  533. }
  534. static ErrorOr<NonnullOwnPtrVector<KString>> find_shebang_interpreter_for_executable(char const first_page[], size_t nread)
  535. {
  536. int word_start = 2;
  537. size_t word_length = 0;
  538. if (nread > 2 && first_page[0] == '#' && first_page[1] == '!') {
  539. NonnullOwnPtrVector<KString> interpreter_words;
  540. for (size_t i = 2; i < nread; ++i) {
  541. if (first_page[i] == '\n') {
  542. break;
  543. }
  544. if (first_page[i] != ' ') {
  545. ++word_length;
  546. }
  547. if (first_page[i] == ' ') {
  548. if (word_length > 0) {
  549. auto word = TRY(KString::try_create(StringView { &first_page[word_start], word_length }));
  550. interpreter_words.append(move(word));
  551. }
  552. word_length = 0;
  553. word_start = i + 1;
  554. }
  555. }
  556. if (word_length > 0) {
  557. auto word = TRY(KString::try_create(StringView { &first_page[word_start], word_length }));
  558. interpreter_words.append(move(word));
  559. }
  560. if (!interpreter_words.is_empty())
  561. return interpreter_words;
  562. }
  563. return ENOEXEC;
  564. }
  565. ErrorOr<RefPtr<OpenFileDescription>> Process::find_elf_interpreter_for_executable(StringView path, ElfW(Ehdr) const& main_executable_header, size_t main_executable_header_size, size_t file_size)
  566. {
  567. // Not using ErrorOr here because we'll want to do the same thing in userspace in the RTLD
  568. String interpreter_path;
  569. if (!ELF::validate_program_headers(main_executable_header, file_size, (u8 const*)&main_executable_header, main_executable_header_size, &interpreter_path)) {
  570. dbgln("exec({}): File has invalid ELF Program headers", path);
  571. return ENOEXEC;
  572. }
  573. if (!interpreter_path.is_empty()) {
  574. dbgln_if(EXEC_DEBUG, "exec({}): Using program interpreter {}", path, interpreter_path);
  575. auto interpreter_description = TRY(VirtualFileSystem::the().open(interpreter_path, O_EXEC, 0, current_directory()));
  576. auto interp_metadata = interpreter_description->metadata();
  577. VERIFY(interpreter_description->inode());
  578. // Validate the program interpreter as a valid elf binary.
  579. // If your program interpreter is a #! file or something, it's time to stop playing games :)
  580. if (interp_metadata.size < (int)sizeof(ElfW(Ehdr)))
  581. return ENOEXEC;
  582. char first_page[PAGE_SIZE] = {};
  583. auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page);
  584. auto nread = TRY(interpreter_description->read(first_page_buffer, sizeof(first_page)));
  585. if (nread < sizeof(ElfW(Ehdr)))
  586. return ENOEXEC;
  587. auto elf_header = (ElfW(Ehdr)*)first_page;
  588. if (!ELF::validate_elf_header(*elf_header, interp_metadata.size)) {
  589. dbgln("exec({}): Interpreter ({}) has invalid ELF header", path, interpreter_path);
  590. return ENOEXEC;
  591. }
  592. // Not using ErrorOr here because we'll want to do the same thing in userspace in the RTLD
  593. String interpreter_interpreter_path;
  594. if (!ELF::validate_program_headers(*elf_header, interp_metadata.size, (u8*)first_page, nread, &interpreter_interpreter_path)) {
  595. dbgln("exec({}): Interpreter ({}) has invalid ELF Program headers", path, interpreter_path);
  596. return ENOEXEC;
  597. }
  598. if (!interpreter_interpreter_path.is_empty()) {
  599. dbgln("exec({}): Interpreter ({}) has its own interpreter ({})! No thank you!", path, interpreter_path, interpreter_interpreter_path);
  600. return ELOOP;
  601. }
  602. return interpreter_description;
  603. }
  604. if (main_executable_header.e_type == ET_REL) {
  605. // We can't exec an ET_REL, that's just an object file from the compiler
  606. return ENOEXEC;
  607. }
  608. if (main_executable_header.e_type == ET_DYN) {
  609. // If it's ET_DYN with no PT_INTERP, then it's a dynamic executable responsible
  610. // for its own relocation (i.e. it's /usr/lib/Loader.so)
  611. if (path != "/usr/lib/Loader.so")
  612. dbgln("exec({}): WARNING - Dynamic ELF executable without a PT_INTERP header, and isn't /usr/lib/Loader.so", path);
  613. return nullptr;
  614. }
  615. // No interpreter, but, path refers to a valid elf image
  616. return nullptr;
  617. }
  618. ErrorOr<void> Process::exec(NonnullOwnPtr<KString> path, NonnullOwnPtrVector<KString> arguments, NonnullOwnPtrVector<KString> environment, int recursion_depth)
  619. {
  620. if (recursion_depth > 2) {
  621. dbgln("exec({}): SHENANIGANS! recursed too far trying to find #! interpreter", path);
  622. return ELOOP;
  623. }
  624. // Open the file to check what kind of binary format it is
  625. // Currently supported formats:
  626. // - #! interpreted file
  627. // - ELF32
  628. // * ET_EXEC binary that just gets loaded
  629. // * ET_DYN binary that requires a program interpreter
  630. //
  631. auto description = TRY(VirtualFileSystem::the().open(path->view(), O_EXEC, 0, current_directory()));
  632. auto metadata = description->metadata();
  633. if (!metadata.is_regular_file())
  634. return EACCES;
  635. // Always gonna need at least 3 bytes. these are for #!X
  636. if (metadata.size < 3)
  637. return ENOEXEC;
  638. VERIFY(description->inode());
  639. // Read the first page of the program into memory so we can validate the binfmt of it
  640. char first_page[PAGE_SIZE];
  641. auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page);
  642. auto nread = TRY(description->read(first_page_buffer, sizeof(first_page)));
  643. // 1) #! interpreted file
  644. auto shebang_result = find_shebang_interpreter_for_executable(first_page, nread);
  645. if (!shebang_result.is_error()) {
  646. auto shebang_words = shebang_result.release_value();
  647. auto shebang_path = TRY(shebang_words.first().try_clone());
  648. arguments.ptr_at(0) = move(path);
  649. TRY(arguments.try_prepend(move(shebang_words)));
  650. return exec(move(shebang_path), move(arguments), move(environment), ++recursion_depth);
  651. }
  652. // #2) ELF32 for i386
  653. if (nread < sizeof(ElfW(Ehdr)))
  654. return ENOEXEC;
  655. auto main_program_header = (ElfW(Ehdr)*)first_page;
  656. if (!ELF::validate_elf_header(*main_program_header, metadata.size)) {
  657. dbgln("exec({}): File has invalid ELF header", path);
  658. return ENOEXEC;
  659. }
  660. // The bulk of exec() is done by do_exec(), which ensures that all locals
  661. // are cleaned up by the time we yield-teleport below.
  662. Thread* new_main_thread = nullptr;
  663. u32 prev_flags = 0;
  664. auto interpreter_description = TRY(find_elf_interpreter_for_executable(path->view(), *main_program_header, nread, metadata.size));
  665. TRY(do_exec(move(description), move(arguments), move(environment), move(interpreter_description), new_main_thread, prev_flags, *main_program_header));
  666. VERIFY_INTERRUPTS_DISABLED();
  667. VERIFY(Processor::in_critical());
  668. auto current_thread = Thread::current();
  669. if (current_thread == new_main_thread) {
  670. // We need to enter the scheduler lock before changing the state
  671. // and it will be released after the context switch into that
  672. // thread. We should also still be in our critical section
  673. VERIFY(!g_scheduler_lock.is_locked_by_current_processor());
  674. VERIFY(Processor::in_critical() == 1);
  675. g_scheduler_lock.lock();
  676. current_thread->set_state(Thread::State::Running);
  677. Processor::assume_context(*current_thread, prev_flags);
  678. VERIFY_NOT_REACHED();
  679. }
  680. // NOTE: This code path is taken in the non-syscall case, i.e when the kernel spawns
  681. // a userspace process directly (such as /bin/SystemServer on startup)
  682. if (prev_flags & 0x200)
  683. sti();
  684. Processor::leave_critical();
  685. return {};
  686. }
  687. ErrorOr<FlatPtr> Process::sys$execve(Userspace<const Syscall::SC_execve_params*> user_params)
  688. {
  689. VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this);
  690. REQUIRE_PROMISE(exec);
  691. // NOTE: Be extremely careful with allocating any kernel memory in exec().
  692. // On success, the kernel stack will be lost.
  693. auto params = TRY(copy_typed_from_user(user_params));
  694. if (params.arguments.length > ARG_MAX || params.environment.length > ARG_MAX)
  695. return E2BIG;
  696. auto path = TRY(get_syscall_path_argument(params.path));
  697. auto copy_user_strings = [](const auto& list, auto& output) -> ErrorOr<void> {
  698. if (!list.length)
  699. return {};
  700. Checked<size_t> size = sizeof(*list.strings);
  701. size *= list.length;
  702. if (size.has_overflow())
  703. return EOVERFLOW;
  704. Vector<Syscall::StringArgument, 32> strings;
  705. TRY(strings.try_resize(list.length));
  706. TRY(copy_from_user(strings.data(), list.strings, size.value()));
  707. for (size_t i = 0; i < list.length; ++i) {
  708. auto string = TRY(try_copy_kstring_from_user(strings[i]));
  709. TRY(output.try_append(move(string)));
  710. }
  711. return {};
  712. };
  713. NonnullOwnPtrVector<KString> arguments;
  714. TRY(copy_user_strings(params.arguments, arguments));
  715. NonnullOwnPtrVector<KString> environment;
  716. TRY(copy_user_strings(params.environment, environment));
  717. TRY(exec(move(path), move(arguments), move(environment)));
  718. // We should never continue after a successful exec!
  719. VERIFY_NOT_REACHED();
  720. }
  721. }