execve.cpp 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include <AK/LexicalPath.h>
  27. #include <AK/ScopeGuard.h>
  28. #include <AK/TemporaryChange.h>
  29. #include <AK/WeakPtr.h>
  30. #include <Kernel/Debug.h>
  31. #include <Kernel/FileSystem/Custody.h>
  32. #include <Kernel/FileSystem/FileDescription.h>
  33. #include <Kernel/PerformanceEventBuffer.h>
  34. #include <Kernel/Process.h>
  35. #include <Kernel/Random.h>
  36. #include <Kernel/Time/TimeManagement.h>
  37. #include <Kernel/VM/AllocationStrategy.h>
  38. #include <Kernel/VM/MemoryManager.h>
  39. #include <Kernel/VM/PageDirectory.h>
  40. #include <Kernel/VM/Region.h>
  41. #include <Kernel/VM/SharedInodeVMObject.h>
  42. #include <LibC/limits.h>
  43. #include <LibELF/AuxiliaryVector.h>
  44. #include <LibELF/Image.h>
  45. #include <LibELF/Validation.h>
  46. namespace Kernel {
  47. static Vector<ELF::AuxiliaryValue> generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, uid_t uid, uid_t euid, gid_t gid, gid_t egid, String executable_path, int main_program_fd);
  48. static bool validate_stack_size(const Vector<String>& arguments, const Vector<String>& environment)
  49. {
  50. size_t total_arguments_size = 0;
  51. size_t total_environment_size = 0;
  52. for (auto& a : arguments)
  53. total_arguments_size += a.length() + 1;
  54. for (auto& e : environment)
  55. total_environment_size += e.length() + 1;
  56. total_arguments_size += sizeof(char*) * (arguments.size() + 1);
  57. total_environment_size += sizeof(char*) * (environment.size() + 1);
  58. static constexpr size_t max_arguments_size = Thread::default_userspace_stack_size / 8;
  59. static constexpr size_t max_environment_size = Thread::default_userspace_stack_size / 8;
  60. if (total_arguments_size > max_arguments_size)
  61. return false;
  62. if (total_environment_size > max_environment_size)
  63. return false;
  64. // FIXME: This doesn't account for the size of the auxiliary vector
  65. return true;
  66. }
  67. static KResultOr<FlatPtr> make_userspace_stack_for_main_thread(Region& region, Vector<String> arguments, Vector<String> environment, Vector<ELF::AuxiliaryValue> auxiliary_values)
  68. {
  69. FlatPtr new_esp = region.vaddr().offset(Thread::default_userspace_stack_size).get();
  70. auto push_on_new_stack = [&new_esp](u32 value) {
  71. new_esp -= 4;
  72. Userspace<u32*> stack_ptr = new_esp;
  73. return copy_to_user(stack_ptr, &value);
  74. };
  75. auto push_aux_value_on_new_stack = [&new_esp](auxv_t value) {
  76. new_esp -= sizeof(auxv_t);
  77. Userspace<auxv_t*> stack_ptr = new_esp;
  78. return copy_to_user(stack_ptr, &value);
  79. };
  80. auto push_string_on_new_stack = [&new_esp](const String& string) {
  81. new_esp -= round_up_to_power_of_two(string.length() + 1, 4);
  82. Userspace<u32*> stack_ptr = new_esp;
  83. return copy_to_user(stack_ptr, string.characters(), string.length() + 1);
  84. };
  85. Vector<FlatPtr> argv_entries;
  86. for (auto& argument : arguments) {
  87. push_string_on_new_stack(argument);
  88. argv_entries.append(new_esp);
  89. }
  90. Vector<FlatPtr> env_entries;
  91. for (auto& variable : environment) {
  92. push_string_on_new_stack(variable);
  93. env_entries.append(new_esp);
  94. }
  95. for (auto& value : auxiliary_values) {
  96. if (!value.optional_string.is_empty()) {
  97. push_string_on_new_stack(value.optional_string);
  98. value.auxv.a_un.a_ptr = (void*)new_esp;
  99. }
  100. }
  101. for (ssize_t i = auxiliary_values.size() - 1; i >= 0; --i) {
  102. auto& value = auxiliary_values[i];
  103. push_aux_value_on_new_stack(value.auxv);
  104. }
  105. push_on_new_stack(0);
  106. for (ssize_t i = env_entries.size() - 1; i >= 0; --i)
  107. push_on_new_stack(env_entries[i]);
  108. FlatPtr envp = new_esp;
  109. push_on_new_stack(0);
  110. for (ssize_t i = argv_entries.size() - 1; i >= 0; --i)
  111. push_on_new_stack(argv_entries[i]);
  112. FlatPtr argv = new_esp;
  113. // NOTE: The stack needs to be 16-byte aligned.
  114. new_esp -= new_esp % 16;
  115. push_on_new_stack((FlatPtr)envp);
  116. push_on_new_stack((FlatPtr)argv);
  117. push_on_new_stack((FlatPtr)argv_entries.size());
  118. push_on_new_stack(0);
  119. return new_esp;
  120. }
  121. KResultOr<Process::LoadResult> Process::load_elf_object(FileDescription& object_description, FlatPtr load_offset, ShouldAllocateTls should_allocate_tls)
  122. {
  123. auto& inode = *(object_description.inode());
  124. auto vmobject = SharedInodeVMObject::create_with_inode(inode);
  125. if (vmobject->writable_mappings()) {
  126. dbgln("Refusing to execute a write-mapped program");
  127. return ETXTBSY;
  128. }
  129. size_t executable_size = inode.size();
  130. auto executable_region = MM.allocate_kernel_region_with_vmobject(*vmobject, PAGE_ROUND_UP(executable_size), "ELF loading", Region::Access::Read);
  131. if (!executable_region) {
  132. dbgln("Could not allocate memory for ELF loading");
  133. return ENOMEM;
  134. }
  135. auto elf_image = ELF::Image(executable_region->vaddr().as_ptr(), executable_size);
  136. if (!elf_image.is_valid())
  137. return ENOEXEC;
  138. Region* master_tls_region { nullptr };
  139. size_t master_tls_size = 0;
  140. size_t master_tls_alignment = 0;
  141. FlatPtr load_base_address = 0;
  142. String elf_name = object_description.absolute_path();
  143. ASSERT(!Processor::current().in_critical());
  144. KResult ph_load_result = KSuccess;
  145. elf_image.for_each_program_header([&](const ELF::Image::ProgramHeader& program_header) {
  146. if (program_header.type() == PT_TLS) {
  147. ASSERT(should_allocate_tls == ShouldAllocateTls::Yes);
  148. ASSERT(program_header.size_in_memory());
  149. if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) {
  150. dbgln("Shenanigans! ELF PT_TLS header sneaks outside of executable.");
  151. ph_load_result = ENOEXEC;
  152. return IterationDecision::Break;
  153. }
  154. auto range = allocate_range({}, program_header.size_in_memory());
  155. if (!range.is_valid()) {
  156. ph_load_result = ENOMEM;
  157. return IterationDecision::Break;
  158. }
  159. auto region_or_error = allocate_region(range, String::formatted("{} (master-tls)", elf_name), PROT_READ | PROT_WRITE, AllocationStrategy::Reserve);
  160. if (region_or_error.is_error()) {
  161. ph_load_result = region_or_error.error();
  162. return IterationDecision::Break;
  163. }
  164. master_tls_region = region_or_error.value();
  165. master_tls_size = program_header.size_in_memory();
  166. master_tls_alignment = program_header.alignment();
  167. if (!copy_to_user(master_tls_region->vaddr().as_ptr(), program_header.raw_data(), program_header.size_in_image())) {
  168. ph_load_result = EFAULT;
  169. return IterationDecision::Break;
  170. }
  171. return IterationDecision::Continue;
  172. }
  173. if (program_header.type() != PT_LOAD)
  174. return IterationDecision::Continue;
  175. if (program_header.is_writable()) {
  176. // Writable section: create a copy in memory.
  177. ASSERT(program_header.size_in_memory());
  178. ASSERT(program_header.alignment() == PAGE_SIZE);
  179. if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) {
  180. dbgln("Shenanigans! Writable ELF PT_LOAD header sneaks outside of executable.");
  181. ph_load_result = ENOEXEC;
  182. return IterationDecision::Break;
  183. }
  184. int prot = 0;
  185. if (program_header.is_readable())
  186. prot |= PROT_READ;
  187. if (program_header.is_writable())
  188. prot |= PROT_WRITE;
  189. auto region_name = String::formatted("{} (data-{}{})", elf_name, program_header.is_readable() ? "r" : "", program_header.is_writable() ? "w" : "");
  190. auto range = allocate_range(program_header.vaddr().offset(load_offset), program_header.size_in_memory());
  191. if (!range.is_valid()) {
  192. ph_load_result = ENOMEM;
  193. return IterationDecision::Break;
  194. }
  195. auto region_or_error = allocate_region(range, region_name, prot, AllocationStrategy::Reserve);
  196. if (region_or_error.is_error()) {
  197. ph_load_result = region_or_error.error();
  198. return IterationDecision::Break;
  199. }
  200. // It's not always the case with PIE executables (and very well shouldn't be) that the
  201. // virtual address in the program header matches the one we end up giving the process.
  202. // In order to copy the data image correctly into memory, we need to copy the data starting at
  203. // the right initial page offset into the pages allocated for the elf_alloc-XX section.
  204. // FIXME: There's an opportunity to munmap, or at least mprotect, the padding space between
  205. // the .text and .data PT_LOAD sections of the executable.
  206. // Accessing it would definitely be a bug.
  207. auto page_offset = program_header.vaddr();
  208. page_offset.mask(~PAGE_MASK);
  209. if (!copy_to_user((u8*)region_or_error.value()->vaddr().as_ptr() + page_offset.get(), program_header.raw_data(), program_header.size_in_image())) {
  210. ph_load_result = EFAULT;
  211. return IterationDecision::Break;
  212. }
  213. return IterationDecision::Continue;
  214. }
  215. // Non-writable section: map the executable itself in memory.
  216. ASSERT(program_header.size_in_memory());
  217. ASSERT(program_header.alignment() == PAGE_SIZE);
  218. int prot = 0;
  219. if (program_header.is_readable())
  220. prot |= PROT_READ;
  221. if (program_header.is_writable())
  222. prot |= PROT_WRITE;
  223. if (program_header.is_executable())
  224. prot |= PROT_EXEC;
  225. auto range = allocate_range(program_header.vaddr().offset(load_offset), program_header.size_in_memory());
  226. if (!range.is_valid()) {
  227. ph_load_result = ENOMEM;
  228. return IterationDecision::Break;
  229. }
  230. auto region_or_error = allocate_region_with_vmobject(range, *vmobject, program_header.offset(), elf_name, prot, true);
  231. if (region_or_error.is_error()) {
  232. ph_load_result = region_or_error.error();
  233. return IterationDecision::Break;
  234. }
  235. if (program_header.offset() == 0)
  236. load_base_address = (FlatPtr)region_or_error.value()->vaddr().as_ptr();
  237. return IterationDecision::Continue;
  238. });
  239. if (ph_load_result.is_error()) {
  240. dbgln("do_exec: Failure loading program ({})", ph_load_result.error());
  241. return ph_load_result;
  242. }
  243. if (!elf_image.entry().offset(load_offset).get()) {
  244. dbgln("do_exec: Failure loading program, entry pointer is invalid! {})", elf_image.entry().offset(load_offset));
  245. return ENOEXEC;
  246. }
  247. auto stack_range = allocate_range({}, Thread::default_userspace_stack_size);
  248. if (!stack_range.is_valid()) {
  249. dbgln("do_exec: Failed to allocate VM range for stack");
  250. return ENOMEM;
  251. }
  252. auto stack_region_or_error = allocate_region(stack_range, "Stack (Main thread)", PROT_READ | PROT_WRITE, AllocationStrategy::Reserve);
  253. if (stack_region_or_error.is_error())
  254. return stack_region_or_error.error();
  255. auto& stack_region = *stack_region_or_error.value();
  256. stack_region.set_stack(true);
  257. return LoadResult {
  258. load_base_address,
  259. elf_image.entry().offset(load_offset).get(),
  260. executable_size,
  261. VirtualAddress(elf_image.program_header_table_offset()).offset(load_offset).get(),
  262. elf_image.program_header_count(),
  263. AK::try_make_weak_ptr(master_tls_region),
  264. master_tls_size,
  265. master_tls_alignment,
  266. stack_region.make_weak_ptr()
  267. };
  268. }
  269. KResultOr<Process::LoadResult> Process::load(NonnullRefPtr<FileDescription> main_program_description, RefPtr<FileDescription> interpreter_description, const Elf32_Ehdr& main_program_header)
  270. {
  271. RefPtr<PageDirectory> old_page_directory;
  272. NonnullOwnPtrVector<Region> old_regions;
  273. {
  274. auto page_directory = PageDirectory::create_for_userspace(*this);
  275. if (!page_directory)
  276. return ENOMEM;
  277. // Need to make sure we don't swap contexts in the middle
  278. ScopedCritical critical;
  279. old_page_directory = move(m_page_directory);
  280. old_regions = move(m_regions);
  281. m_page_directory = page_directory.release_nonnull();
  282. MM.enter_process_paging_scope(*this);
  283. }
  284. ArmedScopeGuard rollback_regions_guard([&]() {
  285. ASSERT(Process::current() == this);
  286. // Need to make sure we don't swap contexts in the middle
  287. ScopedCritical critical;
  288. // Explicitly clear m_regions *before* restoring the page directory,
  289. // otherwise we may silently corrupt memory!
  290. m_regions.clear();
  291. // Now that we freed the regions, revert to the original page directory
  292. // and restore the original regions
  293. m_page_directory = move(old_page_directory);
  294. MM.enter_process_paging_scope(*this);
  295. m_regions = move(old_regions);
  296. });
  297. if (interpreter_description.is_null()) {
  298. auto result = load_elf_object(main_program_description, FlatPtr { 0 }, ShouldAllocateTls::Yes);
  299. if (result.is_error())
  300. return result.error();
  301. rollback_regions_guard.disarm();
  302. return result;
  303. }
  304. auto interpreter_load_offset = get_interpreter_load_offset(main_program_header, main_program_description, *interpreter_description);
  305. if (interpreter_load_offset.is_error()) {
  306. return interpreter_load_offset.error();
  307. }
  308. auto interpreter_load_result = load_elf_object(*interpreter_description, interpreter_load_offset.value(), ShouldAllocateTls::No);
  309. if (interpreter_load_result.is_error())
  310. return interpreter_load_result.error();
  311. // TLS allocation will be done in userspace by the loader
  312. ASSERT(!interpreter_load_result.value().tls_region);
  313. ASSERT(!interpreter_load_result.value().tls_alignment);
  314. ASSERT(!interpreter_load_result.value().tls_size);
  315. rollback_regions_guard.disarm();
  316. return interpreter_load_result;
  317. }
  318. struct RequiredLoadRange {
  319. FlatPtr start { 0 };
  320. FlatPtr end { 0 };
  321. };
  322. static KResultOr<RequiredLoadRange> get_required_load_range(FileDescription& program_description)
  323. {
  324. auto& inode = *(program_description.inode());
  325. auto vmobject = SharedInodeVMObject::create_with_inode(inode);
  326. size_t executable_size = inode.size();
  327. auto region = MM.allocate_kernel_region_with_vmobject(*vmobject, PAGE_ROUND_UP(executable_size), "ELF memory range calculation", Region::Access::Read);
  328. if (!region) {
  329. dbgln("Could not allocate memory for ELF");
  330. return ENOMEM;
  331. }
  332. auto elf_image = ELF::Image(region->vaddr().as_ptr(), executable_size);
  333. if (!elf_image.is_valid()) {
  334. return EINVAL;
  335. }
  336. RequiredLoadRange range {};
  337. elf_image.for_each_program_header([&range](const auto& pheader) {
  338. if (pheader.type() != PT_LOAD)
  339. return IterationDecision::Continue;
  340. auto region_start = (FlatPtr)pheader.vaddr().as_ptr();
  341. auto region_end = region_start + pheader.size_in_memory();
  342. if (range.start == 0 || region_start < range.start)
  343. range.start = region_start;
  344. if (range.end == 0 || region_end > range.end)
  345. range.end = region_end;
  346. return IterationDecision::Continue;
  347. });
  348. ASSERT(range.end > range.start);
  349. return range;
  350. };
  351. KResultOr<FlatPtr> Process::get_interpreter_load_offset(const Elf32_Ehdr& main_program_header, FileDescription& main_program_description, FileDescription& interpreter_description)
  352. {
  353. constexpr FlatPtr interpreter_load_range_start = 0x08000000;
  354. constexpr FlatPtr interpreter_load_range_size = 65536 * PAGE_SIZE; // 2**16 * PAGE_SIZE = 256MB
  355. constexpr FlatPtr minimum_interpreter_load_offset_randomization_size = 10 * MiB;
  356. auto random_load_offset_in_range([](auto start, auto size) {
  357. return PAGE_ROUND_DOWN(start + get_good_random<FlatPtr>() % size);
  358. });
  359. if (main_program_header.e_type == ET_DYN) {
  360. return random_load_offset_in_range(interpreter_load_range_start, interpreter_load_range_size);
  361. }
  362. if (main_program_header.e_type != ET_EXEC)
  363. return -EINVAL;
  364. auto main_program_load_range_result = get_required_load_range(main_program_description);
  365. if (main_program_load_range_result.is_error())
  366. return main_program_load_range_result.error();
  367. auto main_program_load_range = main_program_load_range_result.value();
  368. auto interpreter_load_range_result = get_required_load_range(interpreter_description);
  369. if (interpreter_load_range_result.is_error())
  370. return interpreter_load_range_result.error();
  371. auto interpreter_size_in_memory = interpreter_load_range_result.value().end - interpreter_load_range_result.value().start;
  372. auto interpreter_load_range_end = interpreter_load_range_start + interpreter_load_range_size - interpreter_size_in_memory;
  373. // No intersection
  374. if (main_program_load_range.end < interpreter_load_range_start || main_program_load_range.start > interpreter_load_range_end)
  375. return random_load_offset_in_range(interpreter_load_range_start, interpreter_load_range_size);
  376. RequiredLoadRange first_available_part = { interpreter_load_range_start, main_program_load_range.start };
  377. RequiredLoadRange second_available_part = { main_program_load_range.end, interpreter_load_range_end };
  378. RequiredLoadRange selected_range {};
  379. // Select larger part
  380. if (first_available_part.end - first_available_part.start > second_available_part.end - second_available_part.start)
  381. selected_range = first_available_part;
  382. else
  383. selected_range = second_available_part;
  384. // If main program is too big and leaves us without enough space for adequate loader randmoization
  385. if (selected_range.end - selected_range.start < minimum_interpreter_load_offset_randomization_size)
  386. return -E2BIG;
  387. return random_load_offset_in_range(selected_range.start, selected_range.end - selected_range.start);
  388. }
  389. int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Vector<String> arguments, Vector<String> environment, RefPtr<FileDescription> interpreter_description, Thread*& new_main_thread, u32& prev_flags, const Elf32_Ehdr& main_program_header)
  390. {
  391. ASSERT(is_user_process());
  392. ASSERT(!Processor::current().in_critical());
  393. auto path = main_program_description->absolute_path();
  394. #if EXEC_DEBUG
  395. dbgln("do_exec({})", path);
  396. #endif
  397. // FIXME: How much stack space does process startup need?
  398. if (!validate_stack_size(arguments, environment))
  399. return -E2BIG;
  400. auto parts = path.split('/');
  401. if (parts.is_empty())
  402. return -ENOENT;
  403. // Disable profiling temporarily in case it's running on this process.
  404. TemporaryChange profiling_disabler(m_profiling, false);
  405. // Mark this thread as the current thread that does exec
  406. // No other thread from this process will be scheduled to run
  407. auto current_thread = Thread::current();
  408. m_exec_tid = current_thread->tid();
  409. // NOTE: We switch credentials before altering the memory layout of the process.
  410. // This ensures that ptrace access control takes the right credentials into account.
  411. // FIXME: This still feels rickety. Perhaps it would be better to simply block ptrace
  412. // clients until we're ready to be traced? Or reject them with EPERM?
  413. auto main_program_metadata = main_program_description->metadata();
  414. auto old_euid = m_euid;
  415. auto old_suid = m_suid;
  416. auto old_egid = m_egid;
  417. auto old_sgid = m_sgid;
  418. ArmedScopeGuard cred_restore_guard = [&] {
  419. m_euid = old_euid;
  420. m_suid = old_suid;
  421. m_egid = old_egid;
  422. m_sgid = old_sgid;
  423. };
  424. bool executable_is_setid = false;
  425. if (!(main_program_description->custody()->mount_flags() & MS_NOSUID)) {
  426. if (main_program_metadata.is_setuid()) {
  427. executable_is_setid = true;
  428. m_euid = m_suid = main_program_metadata.uid;
  429. }
  430. if (main_program_metadata.is_setgid()) {
  431. executable_is_setid = true;
  432. m_egid = m_sgid = main_program_metadata.gid;
  433. }
  434. }
  435. auto load_result_or_error = load(main_program_description, interpreter_description, main_program_header);
  436. if (load_result_or_error.is_error()) {
  437. dbgln("do_exec({}): Failed to load main program or interpreter", path);
  438. return load_result_or_error.error();
  439. }
  440. auto& load_result = load_result_or_error.value();
  441. // We can commit to the new credentials at this point.
  442. cred_restore_guard.disarm();
  443. kill_threads_except_self();
  444. #if EXEC_DEBUG
  445. dbgln("Memory layout after ELF load:");
  446. dump_regions();
  447. #endif
  448. m_executable = main_program_description->custody();
  449. m_arguments = arguments;
  450. m_environment = environment;
  451. m_promises = m_execpromises;
  452. m_has_promises = m_has_execpromises;
  453. m_execpromises = 0;
  454. m_has_execpromises = false;
  455. m_veil_state = VeilState::None;
  456. m_unveiled_paths.clear();
  457. m_coredump_metadata.clear();
  458. current_thread->set_default_signal_dispositions();
  459. current_thread->clear_signals();
  460. clear_futex_queues_on_exec();
  461. m_region_lookup_cache = {};
  462. set_dumpable(!executable_is_setid);
  463. for (size_t i = 0; i < m_fds.size(); ++i) {
  464. auto& description_and_flags = m_fds[i];
  465. if (description_and_flags.description() && description_and_flags.flags() & FD_CLOEXEC)
  466. description_and_flags = {};
  467. }
  468. int main_program_fd = -1;
  469. if (interpreter_description) {
  470. main_program_fd = alloc_fd();
  471. ASSERT(main_program_fd >= 0);
  472. main_program_description->seek(0, SEEK_SET);
  473. main_program_description->set_readable(true);
  474. m_fds[main_program_fd].set(move(main_program_description), FD_CLOEXEC);
  475. }
  476. new_main_thread = nullptr;
  477. if (&current_thread->process() == this) {
  478. new_main_thread = current_thread;
  479. } else {
  480. for_each_thread([&](auto& thread) {
  481. new_main_thread = &thread;
  482. return IterationDecision::Break;
  483. });
  484. }
  485. ASSERT(new_main_thread);
  486. auto auxv = generate_auxiliary_vector(load_result.load_base, load_result.entry_eip, m_uid, m_euid, m_gid, m_egid, path, main_program_fd);
  487. // NOTE: We create the new stack before disabling interrupts since it will zero-fault
  488. // and we don't want to deal with faults after this point.
  489. auto make_stack_result = make_userspace_stack_for_main_thread(*load_result.stack_region.unsafe_ptr(), move(arguments), move(environment), move(auxv));
  490. if (make_stack_result.is_error())
  491. return make_stack_result.error();
  492. u32 new_userspace_esp = make_stack_result.value();
  493. if (wait_for_tracer_at_next_execve())
  494. Thread::current()->send_urgent_signal_to_self(SIGSTOP);
  495. // We enter a critical section here because we don't want to get interrupted between do_exec()
  496. // and Processor::assume_context() or the next context switch.
  497. // If we used an InterruptDisabler that sti()'d on exit, we might timer tick'd too soon in exec().
  498. Processor::current().enter_critical(prev_flags);
  499. // NOTE: Be careful to not trigger any page faults below!
  500. m_name = parts.take_last();
  501. new_main_thread->set_name(m_name);
  502. // FIXME: PID/TID ISSUE
  503. m_pid = new_main_thread->tid().value();
  504. auto tsr_result = new_main_thread->make_thread_specific_region({});
  505. if (tsr_result.is_error())
  506. return tsr_result.error();
  507. new_main_thread->reset_fpu_state();
  508. auto& tss = new_main_thread->m_tss;
  509. tss.cs = GDT_SELECTOR_CODE3 | 3;
  510. tss.ds = GDT_SELECTOR_DATA3 | 3;
  511. tss.es = GDT_SELECTOR_DATA3 | 3;
  512. tss.ss = GDT_SELECTOR_DATA3 | 3;
  513. tss.fs = GDT_SELECTOR_DATA3 | 3;
  514. tss.gs = GDT_SELECTOR_TLS | 3;
  515. tss.eip = load_result.entry_eip;
  516. tss.esp = new_userspace_esp;
  517. tss.cr3 = m_page_directory->cr3();
  518. tss.ss2 = m_pid.value();
  519. // Throw away any recorded performance events in this process.
  520. if (m_perf_event_buffer)
  521. m_perf_event_buffer->clear();
  522. {
  523. ScopedSpinLock lock(g_scheduler_lock);
  524. new_main_thread->set_state(Thread::State::Runnable);
  525. }
  526. u32 lock_count_to_restore;
  527. [[maybe_unused]] auto rc = big_lock().force_unlock_if_locked(lock_count_to_restore);
  528. ASSERT_INTERRUPTS_DISABLED();
  529. ASSERT(Processor::current().in_critical());
  530. return 0;
  531. }
  532. static Vector<ELF::AuxiliaryValue> generate_auxiliary_vector(FlatPtr load_base, FlatPtr entry_eip, uid_t uid, uid_t euid, gid_t gid, gid_t egid, String executable_path, int main_program_fd)
  533. {
  534. Vector<ELF::AuxiliaryValue> auxv;
  535. // PHDR/EXECFD
  536. // PH*
  537. auxv.append({ ELF::AuxiliaryValue::PageSize, PAGE_SIZE });
  538. auxv.append({ ELF::AuxiliaryValue::BaseAddress, (void*)load_base });
  539. auxv.append({ ELF::AuxiliaryValue::Entry, (void*)entry_eip });
  540. // NOTELF
  541. auxv.append({ ELF::AuxiliaryValue::Uid, (long)uid });
  542. auxv.append({ ELF::AuxiliaryValue::EUid, (long)euid });
  543. auxv.append({ ELF::AuxiliaryValue::Gid, (long)gid });
  544. auxv.append({ ELF::AuxiliaryValue::EGid, (long)egid });
  545. // FIXME: Don't hard code this? We might support other platforms later.. (e.g. x86_64)
  546. auxv.append({ ELF::AuxiliaryValue::Platform, "i386" });
  547. // FIXME: This is platform specific
  548. auxv.append({ ELF::AuxiliaryValue::HwCap, (long)CPUID(1).edx() });
  549. auxv.append({ ELF::AuxiliaryValue::ClockTick, (long)TimeManagement::the().ticks_per_second() });
  550. // FIXME: Also take into account things like extended filesystem permissions? That's what linux does...
  551. auxv.append({ ELF::AuxiliaryValue::Secure, ((uid != euid) || (gid != egid)) ? 1 : 0 });
  552. char random_bytes[16] {};
  553. get_fast_random_bytes((u8*)random_bytes, sizeof(random_bytes));
  554. auxv.append({ ELF::AuxiliaryValue::Random, String(random_bytes, sizeof(random_bytes)) });
  555. auxv.append({ ELF::AuxiliaryValue::ExecFilename, executable_path });
  556. auxv.append({ ELF::AuxiliaryValue::ExecFileDescriptor, main_program_fd });
  557. auxv.append({ ELF::AuxiliaryValue::Null, 0L });
  558. return auxv;
  559. }
  560. static KResultOr<Vector<String>> find_shebang_interpreter_for_executable(const char first_page[], int nread)
  561. {
  562. int word_start = 2;
  563. int word_length = 0;
  564. if (nread > 2 && first_page[0] == '#' && first_page[1] == '!') {
  565. Vector<String> interpreter_words;
  566. for (int i = 2; i < nread; ++i) {
  567. if (first_page[i] == '\n') {
  568. break;
  569. }
  570. if (first_page[i] != ' ') {
  571. ++word_length;
  572. }
  573. if (first_page[i] == ' ') {
  574. if (word_length > 0) {
  575. interpreter_words.append(String(&first_page[word_start], word_length));
  576. }
  577. word_length = 0;
  578. word_start = i + 1;
  579. }
  580. }
  581. if (word_length > 0)
  582. interpreter_words.append(String(&first_page[word_start], word_length));
  583. if (!interpreter_words.is_empty())
  584. return interpreter_words;
  585. }
  586. return ENOEXEC;
  587. }
  588. KResultOr<RefPtr<FileDescription>> Process::find_elf_interpreter_for_executable(const String& path, const Elf32_Ehdr& main_program_header, int nread, size_t file_size)
  589. {
  590. // Not using KResultOr here because we'll want to do the same thing in userspace in the RTLD
  591. String interpreter_path;
  592. if (!ELF::validate_program_headers(main_program_header, file_size, (const u8*)&main_program_header, nread, &interpreter_path)) {
  593. dbgln("exec({}): File has invalid ELF Program headers", path);
  594. return ENOEXEC;
  595. }
  596. if (!interpreter_path.is_empty()) {
  597. #if EXEC_DEBUG
  598. dbgln("exec({}): Using program interpreter {}", path, interpreter_path);
  599. #endif
  600. auto interp_result = VFS::the().open(interpreter_path, O_EXEC, 0, current_directory());
  601. if (interp_result.is_error()) {
  602. dbgln("exec({}): Unable to open program interpreter {}", path, interpreter_path);
  603. return interp_result.error();
  604. }
  605. auto interpreter_description = interp_result.value();
  606. auto interp_metadata = interpreter_description->metadata();
  607. ASSERT(interpreter_description->inode());
  608. // Validate the program interpreter as a valid elf binary.
  609. // If your program interpreter is a #! file or something, it's time to stop playing games :)
  610. if (interp_metadata.size < (int)sizeof(Elf32_Ehdr))
  611. return ENOEXEC;
  612. char first_page[PAGE_SIZE] = {};
  613. auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page);
  614. auto nread_or_error = interpreter_description->read(first_page_buffer, sizeof(first_page));
  615. if (nread_or_error.is_error())
  616. return ENOEXEC;
  617. nread = nread_or_error.value();
  618. if (nread < (int)sizeof(Elf32_Ehdr))
  619. return ENOEXEC;
  620. auto elf_header = (Elf32_Ehdr*)first_page;
  621. if (!ELF::validate_elf_header(*elf_header, interp_metadata.size)) {
  622. dbgln("exec({}): Interpreter ({}) has invalid ELF header", path, interpreter_description->absolute_path());
  623. return ENOEXEC;
  624. }
  625. // Not using KResultOr here because we'll want to do the same thing in userspace in the RTLD
  626. String interpreter_interpreter_path;
  627. if (!ELF::validate_program_headers(*elf_header, interp_metadata.size, (u8*)first_page, nread, &interpreter_interpreter_path)) {
  628. dbgln("exec({}): Interpreter ({}) has invalid ELF Program headers", path, interpreter_description->absolute_path());
  629. return ENOEXEC;
  630. }
  631. if (!interpreter_interpreter_path.is_empty()) {
  632. dbgln("exec({}): Interpreter ({}) has its own interpreter ({})! No thank you!", path, interpreter_description->absolute_path(), interpreter_interpreter_path);
  633. return ELOOP;
  634. }
  635. return interpreter_description;
  636. }
  637. if (main_program_header.e_type == ET_REL) {
  638. // We can't exec an ET_REL, that's just an object file from the compiler
  639. return ENOEXEC;
  640. }
  641. if (main_program_header.e_type == ET_DYN) {
  642. // If it's ET_DYN with no PT_INTERP, then it's a dynamic executable responsible
  643. // for its own relocation (i.e. it's /usr/lib/Loader.so)
  644. if (path != "/usr/lib/Loader.so")
  645. dbgln("exec({}): WARNING - Dynamic ELF executable without a PT_INTERP header, and isn't /usr/lib/Loader.so", path);
  646. return nullptr;
  647. }
  648. // No interpreter, but, path refers to a valid elf image
  649. return KResult(KSuccess);
  650. }
  651. int Process::exec(String path, Vector<String> arguments, Vector<String> environment, int recursion_depth)
  652. {
  653. if (recursion_depth > 2) {
  654. dbgln("exec({}): SHENANIGANS! recursed too far trying to find #! interpreter", path);
  655. return -ELOOP;
  656. }
  657. // Open the file to check what kind of binary format it is
  658. // Currently supported formats:
  659. // - #! interpreted file
  660. // - ELF32
  661. // * ET_EXEC binary that just gets loaded
  662. // * ET_DYN binary that requires a program interpreter
  663. //
  664. auto result = VFS::the().open(path, O_EXEC, 0, current_directory());
  665. if (result.is_error())
  666. return result.error();
  667. auto description = result.release_value();
  668. auto metadata = description->metadata();
  669. // Always gonna need at least 3 bytes. these are for #!X
  670. if (metadata.size < 3)
  671. return -ENOEXEC;
  672. ASSERT(description->inode());
  673. // Read the first page of the program into memory so we can validate the binfmt of it
  674. char first_page[PAGE_SIZE];
  675. auto first_page_buffer = UserOrKernelBuffer::for_kernel_buffer((u8*)&first_page);
  676. auto nread_or_error = description->read(first_page_buffer, sizeof(first_page));
  677. if (nread_or_error.is_error())
  678. return -ENOEXEC;
  679. // 1) #! interpreted file
  680. auto shebang_result = find_shebang_interpreter_for_executable(first_page, nread_or_error.value());
  681. if (!shebang_result.is_error()) {
  682. Vector<String> new_arguments(shebang_result.value());
  683. new_arguments.append(path);
  684. arguments.remove(0);
  685. new_arguments.append(move(arguments));
  686. return exec(shebang_result.value().first(), move(new_arguments), move(environment), ++recursion_depth);
  687. }
  688. // #2) ELF32 for i386
  689. if (nread_or_error.value() < (int)sizeof(Elf32_Ehdr))
  690. return -ENOEXEC;
  691. auto main_program_header = (Elf32_Ehdr*)first_page;
  692. if (!ELF::validate_elf_header(*main_program_header, metadata.size)) {
  693. dbgln("exec({}): File has invalid ELF header", path);
  694. return -ENOEXEC;
  695. }
  696. auto elf_result = find_elf_interpreter_for_executable(path, *main_program_header, nread_or_error.value(), metadata.size);
  697. // Assume a static ELF executable by default
  698. RefPtr<FileDescription> interpreter_description;
  699. // We're getting either an interpreter, an error, or KSuccess (i.e. no interpreter but file checks out)
  700. if (!elf_result.is_error()) {
  701. // It's a dynamic ELF executable, with or without an interpreter. Do not allocate TLS
  702. interpreter_description = elf_result.value();
  703. } else if (elf_result.error().is_error())
  704. return elf_result.error();
  705. // The bulk of exec() is done by do_exec(), which ensures that all locals
  706. // are cleaned up by the time we yield-teleport below.
  707. Thread* new_main_thread = nullptr;
  708. u32 prev_flags = 0;
  709. int rc = do_exec(move(description), move(arguments), move(environment), move(interpreter_description), new_main_thread, prev_flags, *main_program_header);
  710. m_exec_tid = 0;
  711. if (rc < 0)
  712. return rc;
  713. ASSERT_INTERRUPTS_DISABLED();
  714. ASSERT(Processor::current().in_critical());
  715. auto current_thread = Thread::current();
  716. if (current_thread == new_main_thread) {
  717. // We need to enter the scheduler lock before changing the state
  718. // and it will be released after the context switch into that
  719. // thread. We should also still be in our critical section
  720. ASSERT(!g_scheduler_lock.own_lock());
  721. ASSERT(Processor::current().in_critical() == 1);
  722. g_scheduler_lock.lock();
  723. current_thread->set_state(Thread::State::Running);
  724. Processor::assume_context(*current_thread, prev_flags);
  725. ASSERT_NOT_REACHED();
  726. }
  727. Processor::current().leave_critical(prev_flags);
  728. return 0;
  729. }
  730. int Process::sys$execve(Userspace<const Syscall::SC_execve_params*> user_params)
  731. {
  732. REQUIRE_PROMISE(exec);
  733. // NOTE: Be extremely careful with allocating any kernel memory in exec().
  734. // On success, the kernel stack will be lost.
  735. Syscall::SC_execve_params params;
  736. if (!copy_from_user(&params, user_params))
  737. return -EFAULT;
  738. if (params.arguments.length > ARG_MAX || params.environment.length > ARG_MAX)
  739. return -E2BIG;
  740. String path;
  741. {
  742. auto path_arg = get_syscall_path_argument(params.path);
  743. if (path_arg.is_error())
  744. return path_arg.error();
  745. path = path_arg.value();
  746. }
  747. auto copy_user_strings = [](const auto& list, auto& output) {
  748. if (!list.length)
  749. return true;
  750. Checked size = sizeof(list.strings);
  751. size *= list.length;
  752. if (size.has_overflow())
  753. return false;
  754. Vector<Syscall::StringArgument, 32> strings;
  755. strings.resize(list.length);
  756. if (!copy_from_user(strings.data(), list.strings, list.length * sizeof(Syscall::StringArgument)))
  757. return false;
  758. for (size_t i = 0; i < list.length; ++i) {
  759. auto string = copy_string_from_user(strings[i]);
  760. if (string.is_null())
  761. return false;
  762. output.append(move(string));
  763. }
  764. return true;
  765. };
  766. Vector<String> arguments;
  767. if (!copy_user_strings(params.arguments, arguments))
  768. return -EFAULT;
  769. Vector<String> environment;
  770. if (!copy_user_strings(params.environment, environment))
  771. return -EFAULT;
  772. int rc = exec(move(path), move(arguments), move(environment));
  773. ASSERT(rc < 0); // We should never continue after a successful exec!
  774. return rc;
  775. }
  776. }