execve.cpp 42 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2022, the SerenityOS developers.
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/ScopeGuard.h>
  8. #include <AK/TemporaryChange.h>
  9. #include <Kernel/Arch/CPU.h>
  10. #include <Kernel/Debug.h>
  11. #include <Kernel/FileSystem/Custody.h>
  12. #include <Kernel/FileSystem/OpenFileDescription.h>
  13. #include <Kernel/FileSystem/VirtualFileSystem.h>
  14. #include <Kernel/Memory/MemoryManager.h>
  15. #include <Kernel/Memory/Region.h>
  16. #include <Kernel/Memory/SharedInodeVMObject.h>
  17. #include <Kernel/Panic.h>
  18. #include <Kernel/PerformanceManager.h>
  19. #include <Kernel/Process.h>
  20. #include <Kernel/Random.h>
  21. #include <Kernel/Scheduler.h>
  22. #include <Kernel/Time/TimeManagement.h>
  23. #include <LibELF/AuxiliaryVector.h>
  24. #include <LibELF/Image.h>
  25. #include <LibELF/Validation.h>
  26. namespace Kernel {
  27. extern Memory::Region* g_signal_trampoline_region;
  28. struct LoadResult {
  29. OwnPtr<Memory::AddressSpace> space;
  30. FlatPtr load_base { 0 };
  31. FlatPtr entry_eip { 0 };
  32. size_t size { 0 };
  33. LockWeakPtr<Memory::Region> tls_region;
  34. size_t tls_size { 0 };
  35. size_t tls_alignment { 0 };
  36. LockWeakPtr<Memory::Region> stack_region;
  37. };
  38. static constexpr size_t auxiliary_vector_size = 15;
  39. static Array<ELF::AuxiliaryValue, auxiliary_vector_size> generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, UserID uid, UserID euid, GroupID gid, GroupID egid, StringView executable_path, Optional<Process::ScopedDescriptionAllocation> const& main_program_fd_allocation);
  40. static bool validate_stack_size(Vector<NonnullOwnPtr<KString>> const& arguments, Vector<NonnullOwnPtr<KString>>& environment, Array<ELF::AuxiliaryValue, auxiliary_vector_size> const& auxiliary)
  41. {
  42. size_t total_arguments_size = 0;
  43. size_t total_environment_size = 0;
  44. size_t total_auxiliary_size = 0;
  45. for (auto const& a : arguments)
  46. total_arguments_size += a->length() + 1;
  47. for (auto const& e : environment)
  48. total_environment_size += e->length() + 1;
  49. for (auto const& v : auxiliary) {
  50. if (!v.optional_string.is_empty())
  51. total_auxiliary_size += round_up_to_power_of_two(v.optional_string.length() + 1, sizeof(FlatPtr));
  52. if (v.auxv.a_type == ELF::AuxiliaryValue::Random)
  53. total_auxiliary_size += round_up_to_power_of_two(16, sizeof(FlatPtr));
  54. }
  55. total_arguments_size += sizeof(char*) * (arguments.size() + 1);
  56. total_environment_size += sizeof(char*) * (environment.size() + 1);
  57. total_auxiliary_size += sizeof(auxv_t) * auxiliary.size();
  58. if (total_arguments_size > Process::max_arguments_size)
  59. return false;
  60. if (total_environment_size > Process::max_environment_size)
  61. return false;
  62. if (total_auxiliary_size > Process::max_auxiliary_size)
  63. return false;
  64. return true;
  65. }
  66. static ErrorOr<FlatPtr> make_userspace_context_for_main_thread([[maybe_unused]] ThreadRegisters& regs, Memory::Region& region, Vector<NonnullOwnPtr<KString>> const& arguments,
  67. Vector<NonnullOwnPtr<KString>> const& environment, Array<ELF::AuxiliaryValue, auxiliary_vector_size> auxiliary_values)
  68. {
  69. FlatPtr new_sp = region.range().end().get();
  70. // Add some bits of randomness to the user stack pointer.
  71. new_sp -= round_up_to_power_of_two(get_fast_random<u32>() % 4096, 16);
  72. auto push_on_new_stack = [&new_sp](FlatPtr value) {
  73. new_sp -= sizeof(FlatPtr);
  74. Userspace<FlatPtr*> stack_ptr = new_sp;
  75. auto result = copy_to_user(stack_ptr, &value);
  76. VERIFY(!result.is_error());
  77. };
  78. auto push_aux_value_on_new_stack = [&new_sp](auxv_t value) {
  79. new_sp -= sizeof(auxv_t);
  80. Userspace<auxv_t*> stack_ptr = new_sp;
  81. auto result = copy_to_user(stack_ptr, &value);
  82. VERIFY(!result.is_error());
  83. };
  84. auto push_string_on_new_stack = [&new_sp](StringView string) {
  85. new_sp -= round_up_to_power_of_two(string.length() + 1, sizeof(FlatPtr));
  86. Userspace<FlatPtr*> stack_ptr = new_sp;
  87. auto result = copy_to_user(stack_ptr, string.characters_without_null_termination(), string.length() + 1);
  88. VERIFY(!result.is_error());
  89. };
  90. Vector<FlatPtr> argv_entries;
  91. for (auto const& argument : arguments) {
  92. push_string_on_new_stack(argument->view());
  93. TRY(argv_entries.try_append(new_sp));
  94. }
  95. Vector<FlatPtr> env_entries;
  96. for (auto const& variable : environment) {
  97. push_string_on_new_stack(variable->view());
  98. TRY(env_entries.try_append(new_sp));
  99. }
  100. for (auto& value : auxiliary_values) {
  101. if (!value.optional_string.is_empty()) {
  102. push_string_on_new_stack(value.optional_string);
  103. value.auxv.a_un.a_ptr = (void*)new_sp;
  104. }
  105. if (value.auxv.a_type == ELF::AuxiliaryValue::Random) {
  106. u8 random_bytes[16] {};
  107. get_fast_random_bytes({ random_bytes, sizeof(random_bytes) });
  108. push_string_on_new_stack({ random_bytes, sizeof(random_bytes) });
  109. value.auxv.a_un.a_ptr = (void*)new_sp;
  110. }
  111. }
  112. for (ssize_t i = auxiliary_values.size() - 1; i >= 0; --i) {
  113. auto& value = auxiliary_values[i];
  114. push_aux_value_on_new_stack(value.auxv);
  115. }
  116. push_on_new_stack(0);
  117. for (ssize_t i = env_entries.size() - 1; i >= 0; --i)
  118. push_on_new_stack(env_entries[i]);
  119. FlatPtr envp = new_sp;
  120. push_on_new_stack(0);
  121. for (ssize_t i = argv_entries.size() - 1; i >= 0; --i)
  122. push_on_new_stack(argv_entries[i]);
  123. FlatPtr argv = new_sp;
  124. // NOTE: The stack needs to be 16-byte aligned.
  125. new_sp -= new_sp % 16;
  126. #if ARCH(X86_64)
  127. regs.rdi = argv_entries.size();
  128. regs.rsi = argv;
  129. regs.rdx = envp;
  130. #elif ARCH(AARCH64)
  131. regs.x[0] = argv_entries.size();
  132. regs.x[1] = argv;
  133. regs.x[2] = envp;
  134. #else
  135. # error Unknown architecture
  136. #endif
  137. VERIFY(new_sp % 16 == 0);
  138. // FIXME: The way we're setting up the stack and passing arguments to the entry point isn't ABI-compliant
  139. return new_sp;
  140. }
  141. struct RequiredLoadRange {
  142. FlatPtr start { 0 };
  143. FlatPtr end { 0 };
  144. };
  145. static ErrorOr<RequiredLoadRange> get_required_load_range(OpenFileDescription& program_description)
  146. {
  147. auto& inode = *(program_description.inode());
  148. auto vmobject = TRY(Memory::SharedInodeVMObject::try_create_with_inode(inode));
  149. size_t executable_size = inode.size();
  150. size_t rounded_executable_size = TRY(Memory::page_round_up(executable_size));
  151. auto region = TRY(MM.allocate_kernel_region_with_vmobject(*vmobject, rounded_executable_size, "ELF memory range calculation"sv, Memory::Region::Access::Read));
  152. auto elf_image = ELF::Image(region->vaddr().as_ptr(), executable_size);
  153. if (!elf_image.is_valid()) {
  154. return EINVAL;
  155. }
  156. RequiredLoadRange range {};
  157. elf_image.for_each_program_header([&range](auto const& pheader) {
  158. if (pheader.type() != PT_LOAD)
  159. return;
  160. auto region_start = (FlatPtr)pheader.vaddr().as_ptr();
  161. auto region_end = region_start + pheader.size_in_memory();
  162. if (range.start == 0 || region_start < range.start)
  163. range.start = region_start;
  164. if (range.end == 0 || region_end > range.end)
  165. range.end = region_end;
  166. });
  167. VERIFY(range.end > range.start);
  168. return range;
  169. };
  170. static ErrorOr<FlatPtr> get_load_offset(const ElfW(Ehdr) & main_program_header, OpenFileDescription& main_program_description, OpenFileDescription* interpreter_description)
  171. {
  172. constexpr FlatPtr load_range_start = 0x08000000;
  173. constexpr FlatPtr load_range_size = 65536 * PAGE_SIZE; // 2**16 * PAGE_SIZE = 256MB
  174. constexpr FlatPtr minimum_load_offset_randomization_size = 10 * MiB;
  175. auto random_load_offset_in_range([](auto start, auto size) {
  176. return Memory::page_round_down(start + get_good_random<FlatPtr>() % size);
  177. });
  178. if (main_program_header.e_type == ET_DYN) {
  179. return random_load_offset_in_range(load_range_start, load_range_size);
  180. }
  181. if (main_program_header.e_type != ET_EXEC)
  182. return EINVAL;
  183. auto main_program_load_range = TRY(get_required_load_range(main_program_description));
  184. RequiredLoadRange selected_range {};
  185. if (interpreter_description) {
  186. auto interpreter_load_range = TRY(get_required_load_range(*interpreter_description));
  187. auto interpreter_size_in_memory = interpreter_load_range.end - interpreter_load_range.start;
  188. auto interpreter_load_range_end = load_range_start + load_range_size - interpreter_size_in_memory;
  189. // No intersection
  190. if (main_program_load_range.end < load_range_start || main_program_load_range.start > interpreter_load_range_end)
  191. return random_load_offset_in_range(load_range_start, load_range_size);
  192. RequiredLoadRange first_available_part = { load_range_start, main_program_load_range.start };
  193. RequiredLoadRange second_available_part = { main_program_load_range.end, interpreter_load_range_end };
  194. // Select larger part
  195. if (first_available_part.end - first_available_part.start > second_available_part.end - second_available_part.start)
  196. selected_range = first_available_part;
  197. else
  198. selected_range = second_available_part;
  199. } else
  200. selected_range = main_program_load_range;
  201. // If main program is too big and leaves us without enough space for adequate loader randomization
  202. if (selected_range.end - selected_range.start < minimum_load_offset_randomization_size)
  203. return E2BIG;
  204. return random_load_offset_in_range(selected_range.start, selected_range.end - selected_range.start);
  205. }
  206. enum class ShouldAllocateTls {
  207. No,
  208. Yes,
  209. };
  210. enum class ShouldAllowSyscalls {
  211. No,
  212. Yes,
  213. };
  214. static ErrorOr<LoadResult> load_elf_object(NonnullOwnPtr<Memory::AddressSpace> new_space, OpenFileDescription& object_description,
  215. FlatPtr load_offset, ShouldAllocateTls should_allocate_tls, ShouldAllowSyscalls should_allow_syscalls)
  216. {
  217. auto& inode = *(object_description.inode());
  218. auto vmobject = TRY(Memory::SharedInodeVMObject::try_create_with_inode(inode));
  219. if (vmobject->writable_mappings()) {
  220. dbgln("Refusing to execute a write-mapped program");
  221. return ETXTBSY;
  222. }
  223. size_t executable_size = inode.size();
  224. size_t rounded_executable_size = TRY(Memory::page_round_up(executable_size));
  225. auto executable_region = TRY(MM.allocate_kernel_region_with_vmobject(*vmobject, rounded_executable_size, "ELF loading"sv, Memory::Region::Access::Read));
  226. auto elf_image = ELF::Image(executable_region->vaddr().as_ptr(), executable_size);
  227. if (!elf_image.is_valid())
  228. return ENOEXEC;
  229. Memory::Region* master_tls_region { nullptr };
  230. size_t master_tls_size = 0;
  231. size_t master_tls_alignment = 0;
  232. FlatPtr load_base_address = 0;
  233. size_t stack_size = 0;
  234. auto elf_name = TRY(object_description.pseudo_path());
  235. VERIFY(!Processor::in_critical());
  236. Memory::MemoryManager::enter_address_space(*new_space);
  237. auto load_tls_section = [&](auto& program_header) -> ErrorOr<void> {
  238. VERIFY(should_allocate_tls == ShouldAllocateTls::Yes);
  239. VERIFY(program_header.size_in_memory());
  240. if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) {
  241. dbgln("Shenanigans! ELF PT_TLS header sneaks outside of executable.");
  242. return ENOEXEC;
  243. }
  244. auto region_name = TRY(KString::formatted("{} (master-tls)", elf_name));
  245. master_tls_region = TRY(new_space->allocate_region(Memory::RandomizeVirtualAddress::Yes, {}, program_header.size_in_memory(), PAGE_SIZE, region_name->view(), PROT_READ | PROT_WRITE, AllocationStrategy::Reserve));
  246. master_tls_size = program_header.size_in_memory();
  247. master_tls_alignment = program_header.alignment();
  248. TRY(copy_to_user(master_tls_region->vaddr().as_ptr(), program_header.raw_data(), program_header.size_in_image()));
  249. return {};
  250. };
  251. auto load_writable_section = [&](auto& program_header) -> ErrorOr<void> {
  252. // Writable section: create a copy in memory.
  253. VERIFY(program_header.alignment() % PAGE_SIZE == 0);
  254. if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) {
  255. dbgln("Shenanigans! Writable ELF PT_LOAD header sneaks outside of executable.");
  256. return ENOEXEC;
  257. }
  258. int prot = 0;
  259. if (program_header.is_readable())
  260. prot |= PROT_READ;
  261. if (program_header.is_writable())
  262. prot |= PROT_WRITE;
  263. auto region_name = TRY(KString::formatted("{} (data-{}{})", elf_name, program_header.is_readable() ? "r" : "", program_header.is_writable() ? "w" : ""));
  264. auto range_base = VirtualAddress { Memory::page_round_down(program_header.vaddr().offset(load_offset).get()) };
  265. size_t rounded_range_end = TRY(Memory::page_round_up(program_header.vaddr().offset(load_offset).offset(program_header.size_in_memory()).get()));
  266. auto range_end = VirtualAddress { rounded_range_end };
  267. auto region = TRY(new_space->allocate_region(Memory::RandomizeVirtualAddress::Yes, range_base, range_end.get() - range_base.get(), PAGE_SIZE, region_name->view(), prot, AllocationStrategy::Reserve));
  268. // It's not always the case with PIE executables (and very well shouldn't be) that the
  269. // virtual address in the program header matches the one we end up giving the process.
  270. // In order to copy the data image correctly into memory, we need to copy the data starting at
  271. // the right initial page offset into the pages allocated for the elf_alloc-XX section.
  272. // FIXME: There's an opportunity to munmap, or at least mprotect, the padding space between
  273. // the .text and .data PT_LOAD sections of the executable.
  274. // Accessing it would definitely be a bug.
  275. auto page_offset = program_header.vaddr();
  276. page_offset.mask(~PAGE_MASK);
  277. TRY(copy_to_user((u8*)region->vaddr().as_ptr() + page_offset.get(), program_header.raw_data(), program_header.size_in_image()));
  278. return {};
  279. };
  280. auto load_section = [&](auto& program_header) -> ErrorOr<void> {
  281. if (program_header.size_in_memory() == 0)
  282. return {};
  283. if (program_header.is_writable())
  284. return load_writable_section(program_header);
  285. // Non-writable section: map the executable itself in memory.
  286. VERIFY(program_header.alignment() % PAGE_SIZE == 0);
  287. int prot = 0;
  288. if (program_header.is_readable())
  289. prot |= PROT_READ;
  290. if (program_header.is_writable())
  291. prot |= PROT_WRITE;
  292. if (program_header.is_executable())
  293. prot |= PROT_EXEC;
  294. auto range_base = VirtualAddress { Memory::page_round_down(program_header.vaddr().offset(load_offset).get()) };
  295. size_t rounded_range_end = TRY(Memory::page_round_up(program_header.vaddr().offset(load_offset).offset(program_header.size_in_memory()).get()));
  296. auto range_end = VirtualAddress { rounded_range_end };
  297. auto region = TRY(new_space->allocate_region_with_vmobject(Memory::RandomizeVirtualAddress::Yes, range_base, range_end.get() - range_base.get(), program_header.alignment(), *vmobject, program_header.offset(), elf_name->view(), prot, true));
  298. if (should_allow_syscalls == ShouldAllowSyscalls::Yes)
  299. region->set_syscall_region(true);
  300. if (program_header.offset() == 0)
  301. load_base_address = (FlatPtr)region->vaddr().as_ptr();
  302. return {};
  303. };
  304. auto load_elf_program_header = [&](auto& program_header) -> ErrorOr<void> {
  305. if (program_header.type() == PT_TLS)
  306. return load_tls_section(program_header);
  307. if (program_header.type() == PT_LOAD)
  308. return load_section(program_header);
  309. if (program_header.type() == PT_GNU_STACK) {
  310. stack_size = program_header.size_in_memory();
  311. }
  312. // NOTE: We ignore other program header types.
  313. return {};
  314. };
  315. TRY([&] {
  316. ErrorOr<void> result;
  317. elf_image.for_each_program_header([&](ELF::Image::ProgramHeader const& program_header) {
  318. result = load_elf_program_header(program_header);
  319. return result.is_error() ? IterationDecision::Break : IterationDecision::Continue;
  320. });
  321. return result;
  322. }());
  323. if (stack_size == 0) {
  324. stack_size = Thread::default_userspace_stack_size;
  325. }
  326. if (!elf_image.entry().offset(load_offset).get()) {
  327. dbgln("do_exec: Failure loading program, entry pointer is invalid! {})", elf_image.entry().offset(load_offset));
  328. return ENOEXEC;
  329. }
  330. auto* stack_region = TRY(new_space->allocate_region(Memory::RandomizeVirtualAddress::Yes, {}, stack_size, PAGE_SIZE, "Stack (Main thread)"sv, PROT_READ | PROT_WRITE, AllocationStrategy::Reserve));
  331. stack_region->set_stack(true);
  332. return LoadResult {
  333. move(new_space),
  334. load_base_address,
  335. elf_image.entry().offset(load_offset).get(),
  336. executable_size,
  337. TRY(AK::try_make_weak_ptr_if_nonnull(master_tls_region)),
  338. master_tls_size,
  339. master_tls_alignment,
  340. TRY(stack_region->try_make_weak_ptr())
  341. };
  342. }
  343. ErrorOr<LoadResult>
  344. Process::load(NonnullLockRefPtr<OpenFileDescription> main_program_description,
  345. LockRefPtr<OpenFileDescription> interpreter_description, const ElfW(Ehdr) & main_program_header)
  346. {
  347. auto new_space = TRY(Memory::AddressSpace::try_create(nullptr));
  348. ScopeGuard space_guard([&]() {
  349. Memory::MemoryManager::enter_process_address_space(*this);
  350. });
  351. auto load_offset = TRY(get_load_offset(main_program_header, main_program_description, interpreter_description));
  352. if (interpreter_description.is_null()) {
  353. auto load_result = TRY(load_elf_object(move(new_space), main_program_description, load_offset, ShouldAllocateTls::Yes, ShouldAllowSyscalls::No));
  354. m_master_tls_region = load_result.tls_region;
  355. m_master_tls_size = load_result.tls_size;
  356. m_master_tls_alignment = load_result.tls_alignment;
  357. return load_result;
  358. }
  359. auto interpreter_load_result = TRY(load_elf_object(move(new_space), *interpreter_description, load_offset, ShouldAllocateTls::No, ShouldAllowSyscalls::Yes));
  360. // TLS allocation will be done in userspace by the loader
  361. VERIFY(!interpreter_load_result.tls_region);
  362. VERIFY(!interpreter_load_result.tls_alignment);
  363. VERIFY(!interpreter_load_result.tls_size);
  364. return interpreter_load_result;
  365. }
  366. void Process::clear_signal_handlers_for_exec()
  367. {
  368. // Comments are as they are presented in the POSIX specification, but slightly out of order.
  369. for (size_t signal = 0; signal < m_signal_action_data.size(); signal++) {
  370. // Except for SIGCHLD, signals set to be ignored by the calling process image shall be set to be ignored by the new process image.
  371. // If the SIGCHLD signal is set to be ignored by the calling process image, it is unspecified whether the SIGCHLD signal is set
  372. // to be ignored or to the default action in the new process image.
  373. if (signal != SIGCHLD && m_signal_action_data[signal].handler_or_sigaction.get() == reinterpret_cast<FlatPtr>(SIG_IGN)) {
  374. m_signal_action_data[signal] = {};
  375. m_signal_action_data[signal].handler_or_sigaction.set(reinterpret_cast<FlatPtr>(SIG_IGN));
  376. continue;
  377. }
  378. // Signals set to the default action in the calling process image shall be set to the default action in the new process image.
  379. // Signals set to be caught by the calling process image shall be set to the default action in the new process image.
  380. m_signal_action_data[signal] = {};
  381. }
  382. }
  383. ErrorOr<void> Process::do_exec(NonnullLockRefPtr<OpenFileDescription> main_program_description, Vector<NonnullOwnPtr<KString>> arguments, Vector<NonnullOwnPtr<KString>> environment,
  384. LockRefPtr<OpenFileDescription> interpreter_description, Thread*& new_main_thread, InterruptsState& previous_interrupts_state, const ElfW(Ehdr) & main_program_header)
  385. {
  386. VERIFY(is_user_process());
  387. VERIFY(!Processor::in_critical());
  388. auto main_program_metadata = main_program_description->metadata();
  389. // NOTE: Don't allow running SUID binaries at all if we are in a jail.
  390. TRY(Process::current().jail().with([&](auto const& my_jail) -> ErrorOr<void> {
  391. if (my_jail && (main_program_metadata.is_setuid() || main_program_metadata.is_setgid())) {
  392. return Error::from_errno(EPERM);
  393. }
  394. return {};
  395. }));
  396. // Although we *could* handle a pseudo_path here, trying to execute something that doesn't have
  397. // a custody (e.g. BlockDevice or RandomDevice) is pretty suspicious anyway.
  398. auto path = TRY(main_program_description->original_absolute_path());
  399. dbgln_if(EXEC_DEBUG, "do_exec: {}", path);
  400. auto last_part = path->view().find_last_split_view('/');
  401. auto new_process_name = TRY(KString::try_create(last_part));
  402. auto new_main_thread_name = TRY(new_process_name->try_clone());
  403. auto load_result = TRY(load(main_program_description, interpreter_description, main_program_header));
  404. // NOTE: We don't need the interpreter executable description after this point.
  405. // We destroy it here to prevent it from getting destroyed when we return from this function.
  406. // That's important because when we're returning from this function, we're in a very delicate
  407. // state where we can't block (e.g by trying to acquire a mutex in description teardown.)
  408. bool has_interpreter = interpreter_description;
  409. interpreter_description = nullptr;
  410. auto* signal_trampoline_region = TRY(load_result.space->allocate_region_with_vmobject(Memory::RandomizeVirtualAddress::Yes, {}, PAGE_SIZE, PAGE_SIZE, g_signal_trampoline_region->vmobject(), 0, "Signal trampoline"sv, PROT_READ | PROT_EXEC, true));
  411. signal_trampoline_region->set_syscall_region(true);
  412. // (For dynamically linked executable) Allocate an FD for passing the main executable to the dynamic loader.
  413. Optional<ScopedDescriptionAllocation> main_program_fd_allocation;
  414. if (has_interpreter)
  415. main_program_fd_allocation = TRY(allocate_fd());
  416. auto old_credentials = this->credentials();
  417. auto new_credentials = old_credentials;
  418. auto old_process_attached_jail = m_attached_jail.with([&](auto& jail) -> RefPtr<Jail> { return jail; });
  419. bool executable_is_setid = false;
  420. if (!(main_program_description->custody()->mount_flags() & MS_NOSUID)) {
  421. auto new_euid = old_credentials->euid();
  422. auto new_egid = old_credentials->egid();
  423. auto new_suid = old_credentials->suid();
  424. auto new_sgid = old_credentials->sgid();
  425. if (main_program_metadata.is_setuid()) {
  426. executable_is_setid = true;
  427. new_euid = main_program_metadata.uid;
  428. new_suid = main_program_metadata.uid;
  429. }
  430. if (main_program_metadata.is_setgid()) {
  431. executable_is_setid = true;
  432. new_egid = main_program_metadata.gid;
  433. new_sgid = main_program_metadata.gid;
  434. }
  435. if (executable_is_setid) {
  436. new_credentials = TRY(Credentials::create(
  437. old_credentials->uid(),
  438. old_credentials->gid(),
  439. new_euid,
  440. new_egid,
  441. new_suid,
  442. new_sgid,
  443. old_credentials->extra_gids(),
  444. old_credentials->sid(),
  445. old_credentials->pgid()));
  446. }
  447. }
  448. // We commit to the new executable at this point. There is no turning back!
  449. // Prevent other processes from attaching to us with ptrace while we're doing this.
  450. MutexLocker ptrace_locker(ptrace_lock());
  451. // Disable profiling temporarily in case it's running on this process.
  452. auto was_profiling = m_profiling;
  453. TemporaryChange profiling_disabler(m_profiling, false);
  454. kill_threads_except_self();
  455. with_mutable_protected_data([&](auto& protected_data) {
  456. protected_data.credentials = move(new_credentials);
  457. protected_data.dumpable = !executable_is_setid;
  458. protected_data.executable_is_setid = executable_is_setid;
  459. });
  460. // We make sure to enter the new address space before destroying the old one.
  461. // This ensures that the process always has a valid page directory.
  462. Memory::MemoryManager::enter_address_space(*load_result.space);
  463. m_space.with([&](auto& space) { space = load_result.space.release_nonnull(); });
  464. m_executable.with([&](auto& executable) { executable = main_program_description->custody(); });
  465. m_arguments = move(arguments);
  466. m_attached_jail.with([&](auto& jail) {
  467. jail = old_process_attached_jail;
  468. });
  469. m_environment = move(environment);
  470. TRY(m_unveil_data.with([&](auto& unveil_data) -> ErrorOr<void> {
  471. TRY(m_exec_unveil_data.with([&](auto& exec_unveil_data) -> ErrorOr<void> {
  472. // Note: If we have exec unveil data being waiting to be dispatched
  473. // to the current execve'd program, then we apply the unveil data and
  474. // ensure it is locked in the new program.
  475. if (exec_unveil_data.state == VeilState::Dropped) {
  476. unveil_data.state = VeilState::LockedInherited;
  477. exec_unveil_data.state = VeilState::None;
  478. unveil_data.paths = TRY(exec_unveil_data.paths.deep_copy());
  479. } else {
  480. unveil_data.state = VeilState::None;
  481. exec_unveil_data.state = VeilState::None;
  482. unveil_data.paths.clear();
  483. unveil_data.paths.set_metadata({ TRY(KString::try_create("/"sv)), UnveilAccess::None, false });
  484. }
  485. exec_unveil_data.paths.clear();
  486. exec_unveil_data.paths.set_metadata({ TRY(KString::try_create("/"sv)), UnveilAccess::None, false });
  487. return {};
  488. }));
  489. return {};
  490. }));
  491. m_coredump_properties.for_each([](auto& property) {
  492. property = {};
  493. });
  494. auto* current_thread = Thread::current();
  495. current_thread->reset_signals_for_exec();
  496. clear_signal_handlers_for_exec();
  497. clear_futex_queues_on_exec();
  498. m_fds.with_exclusive([&](auto& fds) {
  499. fds.change_each([&](auto& file_description_metadata) {
  500. if (file_description_metadata.is_valid() && file_description_metadata.flags() & FD_CLOEXEC)
  501. file_description_metadata = {};
  502. });
  503. });
  504. if (main_program_fd_allocation.has_value()) {
  505. main_program_description->set_readable(true);
  506. m_fds.with_exclusive([&](auto& fds) { fds[main_program_fd_allocation->fd].set(move(main_program_description), FD_CLOEXEC); });
  507. }
  508. new_main_thread = nullptr;
  509. if (&current_thread->process() == this) {
  510. new_main_thread = current_thread;
  511. } else {
  512. for_each_thread([&](auto& thread) {
  513. new_main_thread = &thread;
  514. return IterationDecision::Break;
  515. });
  516. }
  517. VERIFY(new_main_thread);
  518. auto credentials = this->credentials();
  519. auto auxv = generate_auxiliary_vector(load_result.load_base, load_result.entry_eip, credentials->uid(), credentials->euid(), credentials->gid(), credentials->egid(), path->view(), main_program_fd_allocation);
  520. // FIXME: How much stack space does process startup need?
  521. if (!validate_stack_size(m_arguments, m_environment, auxv))
  522. return E2BIG;
  523. // NOTE: We create the new stack before disabling interrupts since it will zero-fault
  524. // and we don't want to deal with faults after this point.
  525. auto new_userspace_sp = TRY(make_userspace_context_for_main_thread(new_main_thread->regs(), *load_result.stack_region.unsafe_ptr(), m_arguments, m_environment, move(auxv)));
  526. set_name(move(new_process_name));
  527. new_main_thread->set_name(move(new_main_thread_name));
  528. if (wait_for_tracer_at_next_execve()) {
  529. // Make sure we release the ptrace lock here or the tracer will block forever.
  530. ptrace_locker.unlock();
  531. Thread::current()->send_urgent_signal_to_self(SIGSTOP);
  532. } else {
  533. // Unlock regardless before disabling interrupts.
  534. // Ensure we always unlock after checking ptrace status to avoid TOCTOU ptrace issues
  535. ptrace_locker.unlock();
  536. }
  537. // We enter a critical section here because we don't want to get interrupted between do_exec()
  538. // and Processor::assume_context() or the next context switch.
  539. // If we used an InterruptDisabler that calls enable_interrupts() on exit, we might timer tick'd too soon in exec().
  540. Processor::enter_critical();
  541. previous_interrupts_state = processor_interrupts_state();
  542. Processor::disable_interrupts();
  543. // NOTE: Be careful to not trigger any page faults below!
  544. with_mutable_protected_data([&](auto& protected_data) {
  545. protected_data.promises = protected_data.execpromises.load();
  546. protected_data.has_promises = protected_data.has_execpromises.load();
  547. protected_data.execpromises = 0;
  548. protected_data.has_execpromises = false;
  549. protected_data.signal_trampoline = signal_trampoline_region->vaddr();
  550. // FIXME: PID/TID ISSUE
  551. protected_data.pid = new_main_thread->tid().value();
  552. });
  553. auto tsr_result = new_main_thread->make_thread_specific_region({});
  554. if (tsr_result.is_error()) {
  555. // FIXME: We cannot fail this late. Refactor this so the allocation happens before we commit to the new executable.
  556. VERIFY_NOT_REACHED();
  557. }
  558. new_main_thread->reset_fpu_state();
  559. auto& regs = new_main_thread->m_regs;
  560. address_space().with([&](auto& space) {
  561. regs.set_exec_state(load_result.entry_eip, new_userspace_sp, *space);
  562. });
  563. {
  564. TemporaryChange profiling_disabler(m_profiling, was_profiling);
  565. PerformanceManager::add_process_exec_event(*this);
  566. }
  567. u32 lock_count_to_restore;
  568. [[maybe_unused]] auto rc = big_lock().force_unlock_exclusive_if_locked(lock_count_to_restore);
  569. VERIFY_INTERRUPTS_DISABLED();
  570. VERIFY(Processor::in_critical());
  571. return {};
  572. }
  573. static Array<ELF::AuxiliaryValue, auxiliary_vector_size> generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, UserID uid, UserID euid, GroupID gid, GroupID egid, StringView executable_path, Optional<Process::ScopedDescriptionAllocation> const& main_program_fd_allocation)
  574. {
  575. return { {
  576. // PHDR/EXECFD
  577. // PH*
  578. { ELF::AuxiliaryValue::PageSize, PAGE_SIZE },
  579. { ELF::AuxiliaryValue::BaseAddress, (void*)load_base },
  580. { ELF::AuxiliaryValue::Entry, (void*)entry_eip },
  581. // NOTELF
  582. { ELF::AuxiliaryValue::Uid, (long)uid.value() },
  583. { ELF::AuxiliaryValue::EUid, (long)euid.value() },
  584. { ELF::AuxiliaryValue::Gid, (long)gid.value() },
  585. { ELF::AuxiliaryValue::EGid, (long)egid.value() },
  586. { ELF::AuxiliaryValue::Platform, Processor::platform_string() },
  587. // FIXME: This is platform specific
  588. #if ARCH(X86_64)
  589. { ELF::AuxiliaryValue::HwCap, (long)CPUID(1).edx() },
  590. #elif ARCH(AARCH64)
  591. { ELF::AuxiliaryValue::HwCap, (long)0 },
  592. #else
  593. # error "Unknown architecture"
  594. #endif
  595. { ELF::AuxiliaryValue::ClockTick, (long)TimeManagement::the().ticks_per_second() },
  596. // FIXME: Also take into account things like extended filesystem permissions? That's what linux does...
  597. { ELF::AuxiliaryValue::Secure, ((uid != euid) || (gid != egid)) ? 1 : 0 },
  598. { ELF::AuxiliaryValue::Random, nullptr },
  599. { ELF::AuxiliaryValue::ExecFilename, executable_path },
  600. main_program_fd_allocation.has_value() ? ELF::AuxiliaryValue { ELF::AuxiliaryValue::ExecFileDescriptor, main_program_fd_allocation->fd } : ELF::AuxiliaryValue { ELF::AuxiliaryValue::Ignore, 0L },
  601. { ELF::AuxiliaryValue::Null, 0L },
  602. } };
  603. }
  604. static ErrorOr<Vector<NonnullOwnPtr<KString>>> find_shebang_interpreter_for_executable(char const first_page[], size_t nread)
  605. {
  606. int word_start = 2;
  607. size_t word_length = 0;
  608. if (nread > 2 && first_page[0] == '#' && first_page[1] == '!') {
  609. Vector<NonnullOwnPtr<KString>> interpreter_words;
  610. for (size_t i = 2; i < nread; ++i) {
  611. if (first_page[i] == '\n') {
  612. break;
  613. }
  614. if (first_page[i] != ' ') {
  615. ++word_length;
  616. }
  617. if (first_page[i] == ' ') {
  618. if (word_length > 0) {
  619. auto word = TRY(KString::try_create(StringView { &first_page[word_start], word_length }));
  620. TRY(interpreter_words.try_append(move(word)));
  621. }
  622. word_length = 0;
  623. word_start = i + 1;
  624. }
  625. }
  626. if (word_length > 0) {
  627. auto word = TRY(KString::try_create(StringView { &first_page[word_start], word_length }));
  628. TRY(interpreter_words.try_append(move(word)));
  629. }
  630. if (!interpreter_words.is_empty())
  631. return interpreter_words;
  632. }
  633. return ENOEXEC;
  634. }
  635. ErrorOr<LockRefPtr<OpenFileDescription>> Process::find_elf_interpreter_for_executable(StringView path, ElfW(Ehdr) const& main_executable_header, size_t main_executable_header_size, size_t file_size)
  636. {
  637. // Not using ErrorOr here because we'll want to do the same thing in userspace in the RTLD
  638. StringBuilder interpreter_path_builder;
  639. if (!TRY(ELF::validate_program_headers(main_executable_header, file_size, { &main_executable_header, main_executable_header_size }, &interpreter_path_builder))) {
  640. dbgln("exec({}): File has invalid ELF Program headers", path);
  641. return ENOEXEC;
  642. }
  643. auto interpreter_path = interpreter_path_builder.string_view();
  644. if (!interpreter_path.is_empty()) {
  645. dbgln_if(EXEC_DEBUG, "exec({}): Using program interpreter {}", path, interpreter_path);
  646. auto interpreter_description = TRY(VirtualFileSystem::the().open(credentials(), interpreter_path, O_EXEC, 0, current_directory()));
  647. auto interp_metadata = interpreter_description->metadata();
  648. VERIFY(interpreter_description->inode());
  649. // Validate the program interpreter as a valid elf binary.
  650. // If your program interpreter is a #! file or something, it's time to stop playing games :)
  651. if (interp_metadata.size < (int)sizeof(ElfW(Ehdr)))
  652. return ENOEXEC;
  653. char first_page[PAGE_SIZE] = {};
  654. auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page);
  655. auto nread = TRY(interpreter_description->read(first_page_buffer, sizeof(first_page)));
  656. if (nread < sizeof(ElfW(Ehdr)))
  657. return ENOEXEC;
  658. auto* elf_header = (ElfW(Ehdr)*)first_page;
  659. if (!ELF::validate_elf_header(*elf_header, interp_metadata.size)) {
  660. dbgln("exec({}): Interpreter ({}) has invalid ELF header", path, interpreter_path);
  661. return ENOEXEC;
  662. }
  663. // Not using ErrorOr here because we'll want to do the same thing in userspace in the RTLD
  664. StringBuilder interpreter_interpreter_path_builder;
  665. if (!TRY(ELF::validate_program_headers(*elf_header, interp_metadata.size, { first_page, nread }, &interpreter_interpreter_path_builder))) {
  666. dbgln("exec({}): Interpreter ({}) has invalid ELF Program headers", path, interpreter_path);
  667. return ENOEXEC;
  668. }
  669. auto interpreter_interpreter_path = interpreter_interpreter_path_builder.string_view();
  670. if (!interpreter_interpreter_path.is_empty()) {
  671. dbgln("exec({}): Interpreter ({}) has its own interpreter ({})! No thank you!", path, interpreter_path, interpreter_interpreter_path);
  672. return ELOOP;
  673. }
  674. return interpreter_description;
  675. }
  676. if (main_executable_header.e_type == ET_REL) {
  677. // We can't exec an ET_REL, that's just an object file from the compiler
  678. return ENOEXEC;
  679. }
  680. if (main_executable_header.e_type == ET_DYN) {
  681. // If it's ET_DYN with no PT_INTERP, then it's a dynamic executable responsible
  682. // for its own relocation (i.e. it's /usr/lib/Loader.so)
  683. if (path != "/usr/lib/Loader.so")
  684. dbgln("exec({}): WARNING - Dynamic ELF executable without a PT_INTERP header, and isn't /usr/lib/Loader.so", path);
  685. return nullptr;
  686. }
  687. // No interpreter, but, path refers to a valid elf image
  688. return nullptr;
  689. }
  690. ErrorOr<void> Process::exec(NonnullOwnPtr<KString> path, Vector<NonnullOwnPtr<KString>> arguments, Vector<NonnullOwnPtr<KString>> environment, Thread*& new_main_thread, InterruptsState& previous_interrupts_state, int recursion_depth)
  691. {
  692. if (recursion_depth > 2) {
  693. dbgln("exec({}): SHENANIGANS! recursed too far trying to find #! interpreter", path);
  694. return ELOOP;
  695. }
  696. // Open the file to check what kind of binary format it is
  697. // Currently supported formats:
  698. // - #! interpreted file
  699. // - ELF32
  700. // * ET_EXEC binary that just gets loaded
  701. // * ET_DYN binary that requires a program interpreter
  702. //
  703. auto description = TRY(VirtualFileSystem::the().open(credentials(), path->view(), O_EXEC, 0, current_directory()));
  704. auto metadata = description->metadata();
  705. if (!metadata.is_regular_file())
  706. return EACCES;
  707. // Always gonna need at least 3 bytes. these are for #!X
  708. if (metadata.size < 3)
  709. return ENOEXEC;
  710. VERIFY(description->inode());
  711. // Read the first page of the program into memory so we can validate the binfmt of it
  712. char first_page[PAGE_SIZE];
  713. auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page);
  714. auto nread = TRY(description->read(first_page_buffer, sizeof(first_page)));
  715. // 1) #! interpreted file
  716. auto shebang_result = find_shebang_interpreter_for_executable(first_page, nread);
  717. if (!shebang_result.is_error()) {
  718. auto shebang_words = shebang_result.release_value();
  719. auto shebang_path = TRY(shebang_words.first()->try_clone());
  720. arguments[0] = move(path);
  721. TRY(arguments.try_prepend(move(shebang_words)));
  722. return exec(move(shebang_path), move(arguments), move(environment), new_main_thread, previous_interrupts_state, ++recursion_depth);
  723. }
  724. // #2) ELF32 for i386
  725. if (nread < sizeof(ElfW(Ehdr)))
  726. return ENOEXEC;
  727. auto const* main_program_header = (ElfW(Ehdr)*)first_page;
  728. if (!ELF::validate_elf_header(*main_program_header, metadata.size)) {
  729. dbgln("exec({}): File has invalid ELF header", path);
  730. return ENOEXEC;
  731. }
  732. auto interpreter_description = TRY(find_elf_interpreter_for_executable(path->view(), *main_program_header, nread, metadata.size));
  733. return do_exec(move(description), move(arguments), move(environment), move(interpreter_description), new_main_thread, previous_interrupts_state, *main_program_header);
  734. }
  735. ErrorOr<FlatPtr> Process::sys$execve(Userspace<Syscall::SC_execve_params const*> user_params)
  736. {
  737. VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this);
  738. TRY(require_promise(Pledge::exec));
  739. Thread* new_main_thread = nullptr;
  740. InterruptsState previous_interrupts_state = InterruptsState::Enabled;
  741. // NOTE: Be extremely careful with allocating any kernel memory in this function.
  742. // On success, the kernel stack will be lost.
  743. // The explicit block scope below is specifically placed to minimize the number
  744. // of stack locals in this function.
  745. {
  746. auto params = TRY(copy_typed_from_user(user_params));
  747. if (params.arguments.length > ARG_MAX || params.environment.length > ARG_MAX)
  748. return E2BIG;
  749. // NOTE: The caller is expected to always pass at least one argument by convention,
  750. // the program path that was passed as params.path.
  751. if (params.arguments.length == 0)
  752. return EINVAL;
  753. auto path = TRY(get_syscall_path_argument(params.path));
  754. auto copy_user_strings = [](auto const& list, auto& output) -> ErrorOr<void> {
  755. if (!list.length)
  756. return {};
  757. Checked<size_t> size = sizeof(*list.strings);
  758. size *= list.length;
  759. if (size.has_overflow())
  760. return EOVERFLOW;
  761. Vector<Syscall::StringArgument, 32> strings;
  762. TRY(strings.try_resize(list.length));
  763. TRY(copy_from_user(strings.data(), list.strings, size.value()));
  764. for (size_t i = 0; i < list.length; ++i) {
  765. auto string = TRY(try_copy_kstring_from_user(strings[i]));
  766. TRY(output.try_append(move(string)));
  767. }
  768. return {};
  769. };
  770. Vector<NonnullOwnPtr<KString>> arguments;
  771. TRY(copy_user_strings(params.arguments, arguments));
  772. Vector<NonnullOwnPtr<KString>> environment;
  773. TRY(copy_user_strings(params.environment, environment));
  774. TRY(exec(move(path), move(arguments), move(environment), new_main_thread, previous_interrupts_state));
  775. }
  776. // NOTE: If we're here, the exec has succeeded and we've got a new executable image!
  777. // We will not return normally from this function. Instead, the next time we
  778. // get scheduled, it'll be at the entry point of the new executable.
  779. VERIFY_INTERRUPTS_DISABLED();
  780. VERIFY(Processor::in_critical());
  781. auto* current_thread = Thread::current();
  782. if (current_thread == new_main_thread) {
  783. // We need to enter the scheduler lock before changing the state
  784. // and it will be released after the context switch into that
  785. // thread. We should also still be in our critical section
  786. VERIFY(!g_scheduler_lock.is_locked_by_current_processor());
  787. VERIFY(Processor::in_critical() == 1);
  788. g_scheduler_lock.lock();
  789. current_thread->set_state(Thread::State::Running);
  790. #if ARCH(X86_64)
  791. FlatPtr prev_flags = previous_interrupts_state == InterruptsState::Enabled ? 0x200 : 0;
  792. Processor::assume_context(*current_thread, prev_flags);
  793. VERIFY_NOT_REACHED();
  794. #elif ARCH(AARCH64)
  795. TODO_AARCH64();
  796. #else
  797. # error Unknown architecture
  798. #endif
  799. }
  800. // NOTE: This code path is taken in the non-syscall case, i.e when the kernel spawns
  801. // a userspace process directly (such as /bin/SystemServer on startup)
  802. restore_processor_interrupts_state(previous_interrupts_state);
  803. Processor::leave_critical();
  804. return 0;
  805. }
  806. }