Jelajahi Sumber

Kernel+LibELF: Expose ELF Auxiliary Vector to Userspace

The AT_* entries are placed after the environment variables, so that
they can be found by iterating until the end of the envp array, and then
going even further beyond :^)
Andrew Kaster 5 tahun lalu
induk
melakukan
f96b827990
5 mengubah file dengan 200 tambahan dan 28 penghapusan
  1. 50 12
      Kernel/Process.cpp
  2. 6 3
      Kernel/Process.h
  3. 23 9
      Kernel/Thread.cpp
  4. 5 4
      Kernel/Thread.h
  5. 116 0
      Libraries/LibELF/AuxiliaryVector.h

+ 50 - 12
Kernel/Process.cpp

@@ -33,6 +33,7 @@
 #include <AK/Time.h>
 #include <AK/Types.h>
 #include <Kernel/ACPI/Parser.h>
+#include <Kernel/API/Syscall.h>
 #include <Kernel/Arch/i386/CPU.h>
 #include <Kernel/Console.h>
 #include <Kernel/Devices/BlockDevice.h>
@@ -67,7 +68,6 @@
 #include <Kernel/Scheduler.h>
 #include <Kernel/SharedBuffer.h>
 #include <Kernel/StdLib.h>
-#include <Kernel/API/Syscall.h>
 #include <Kernel/TTY/MasterPTY.h>
 #include <Kernel/TTY/TTY.h>
 #include <Kernel/Thread.h>
@@ -888,7 +888,7 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
     //      It also happens to be the static Virtual Addresss offset every static exectuable gets :)
     //      Without this, some assumptions by the ELF loading hooks below are severely broken.
     //      0x08000000 is a verified random number chosen by random dice roll https://xkcd.com/221/
-    u32 totally_random_offset = interpreter_description ? 0x08000000 : 0;
+    m_load_offset = interpreter_description ? 0x08000000 : 0;
 
     // FIXME: We should be able to load both the PT_INTERP interpreter and the main program... once the RTLD is smart enough
     if (interpreter_description) {
@@ -906,7 +906,7 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
     Region* master_tls_region { nullptr };
     size_t master_tls_size = 0;
     size_t master_tls_alignment = 0;
-    u32 entry_eip = 0;
+    m_entry_eip = 0;
 
     MM.enter_process_paging_scope(*this);
     RefPtr<ELF::Loader> loader;
@@ -935,7 +935,7 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
                 prot |= PROT_WRITE;
             if (is_executable)
                 prot |= PROT_EXEC;
-            if (auto* region = allocate_region_with_vmobject(vaddr.offset(totally_random_offset), size, *vmobject, offset_in_image, String(name), prot)) {
+            if (auto* region = allocate_region_with_vmobject(vaddr.offset(m_load_offset), size, *vmobject, offset_in_image, String(name), prot)) {
                 region->set_shared(true);
                 return region->vaddr().as_ptr();
             }
@@ -949,7 +949,7 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
                 prot |= PROT_READ;
             if (is_writable)
                 prot |= PROT_WRITE;
-            if (auto* region = allocate_region(vaddr.offset(totally_random_offset), size, String(name), prot))
+            if (auto* region = allocate_region(vaddr.offset(m_load_offset), size, String(name), prot))
                 return region->vaddr().as_ptr();
             return nullptr;
         };
@@ -976,15 +976,15 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
         // FIXME: Validate that this virtual address is within executable region,
         //     instead of just non-null. You could totally have a DSO with entry point of
         //     the beginning of the text segement.
-        if (!loader->entry().offset(totally_random_offset).get()) {
-            klog() << "do_exec: Failure loading " << path.characters() << ", entry pointer is invalid! (" << loader->entry().offset(totally_random_offset) << ")";
+        if (!loader->entry().offset(m_load_offset).get()) {
+            klog() << "do_exec: Failure loading " << path.characters() << ", entry pointer is invalid! (" << loader->entry().offset(m_load_offset) << ")";
             return -ENOEXEC;
         }
 
         rollback_regions_guard.disarm();
 
         // NOTE: At this point, we've committed to the new executable.
-        entry_eip = loader->entry().offset(totally_random_offset).get();
+        m_entry_eip = loader->entry().offset(m_load_offset).get();
 
         kill_threads_except_self();
 
@@ -1002,6 +1002,7 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
     m_unveiled_paths.clear();
 
     // Copy of the master TLS region that we will clone for new threads
+    // FIXME: Handle this in userspace
     m_master_tls_region = master_tls_region->make_weak_ptr();
 
     auto main_program_metadata = main_program_description->metadata();
@@ -1042,9 +1043,11 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
     }
     ASSERT(new_main_thread);
 
+    auto auxv = generate_auxiliary_vector();
+
     // NOTE: We create the new stack before disabling interrupts since it will zero-fault
     //       and we don't want to deal with faults after this point.
-    u32 new_userspace_esp = new_main_thread->make_userspace_stack_for_main_thread(move(arguments), move(environment));
+    u32 new_userspace_esp = new_main_thread->make_userspace_stack_for_main_thread(move(arguments), move(environment), move(auxv));
 
     // We enter a critical section here because we don't want to get interrupted between do_exec()
     // and Processor::assume_context() or the next context switch.
@@ -1070,13 +1073,13 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
     tss.ss = GDT_SELECTOR_DATA3 | 3;
     tss.fs = GDT_SELECTOR_DATA3 | 3;
     tss.gs = GDT_SELECTOR_TLS | 3;
-    tss.eip = entry_eip;
+    tss.eip = m_entry_eip;
     tss.esp = new_userspace_esp;
     tss.cr3 = m_page_directory->cr3();
     tss.ss2 = m_pid;
 
 #ifdef TASK_DEBUG
-    klog() << "Process " << VirtualAddress(this) << " thread " << VirtualAddress(new_main_thread) << " exec'd " << path.characters() << " @ " << String::format("%p", entry_eip);
+    klog() << "Process " << VirtualAddress(this) << " thread " << VirtualAddress(new_main_thread) << " exec'd " << path.characters() << " @ " << String::format("%p", m_entry_eip);
 #endif
 
     if (was_profiling)
@@ -1089,6 +1092,42 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
     return 0;
 }
 
+Vector<AuxiliaryValue> Process::generate_auxiliary_vector() const
+{
+    Vector<AuxiliaryValue> auxv;
+    // PHDR/EXECFD
+    // PH*
+    auxv.append({ AuxiliaryValue::PageSize, PAGE_SIZE });
+    auxv.append({ AuxiliaryValue::BaseAddress, (void*)m_load_offset });
+    // FLAGS
+    auxv.append({ AuxiliaryValue::Entry, (void*)m_entry_eip });
+    // NOTELF
+    auxv.append({ AuxiliaryValue::Uid, (long)m_uid });
+    auxv.append({ AuxiliaryValue::EUid, (long)m_euid });
+    auxv.append({ AuxiliaryValue::Gid, (long)m_gid });
+    auxv.append({ AuxiliaryValue::EGid, (long)m_egid });
+
+    // FIXME: Don't hard code this? We might support other platforms later.. (e.g. x86_64)
+    auxv.append({ AuxiliaryValue::Platform, "i386" });
+    // FIXME: This is platform specific
+    auxv.append({ AuxiliaryValue::HwCap, (long)CPUID(1).edx() });
+
+    auxv.append({ AuxiliaryValue::ClockTick, (long)TimeManagement::the().ticks_per_second() });
+
+    // FIXME: Also take into account things like extended filesystem permissions? That's what linux does...
+    auxv.append({ AuxiliaryValue::Secure, ((m_uid != m_euid) || (m_gid != m_egid)) ? 1 : 0 });
+
+    char random_bytes[16] {};
+    get_good_random_bytes((u8*)random_bytes, sizeof(random_bytes));
+
+    auxv.append({ AuxiliaryValue::Random, String(random_bytes, sizeof(random_bytes)) });
+
+    auxv.append({ AuxiliaryValue::ExecFilename, m_executable->absolute_path() });
+
+    auxv.append({ AuxiliaryValue::Null, 0L });
+    return auxv;
+}
+
 static KResultOr<Vector<String>> find_shebang_interpreter_for_executable(const char first_page[], int nread)
 {
     int word_start = 2;
@@ -5269,5 +5308,4 @@ int Process::sys$recvfd(int sockfd)
     m_fds[new_fd].set(*received_descriptor_or_error.value(), 0);
     return new_fd;
 }
-
 }

+ 6 - 3
Kernel/Process.h

@@ -33,15 +33,16 @@
 #include <AK/NonnullOwnPtrVector.h>
 #include <AK/String.h>
 #include <AK/WeakPtr.h>
+#include <Kernel/API/Syscall.h>
 #include <Kernel/FileSystem/InodeMetadata.h>
 #include <Kernel/Forward.h>
 #include <Kernel/Lock.h>
 #include <Kernel/StdLib.h>
-#include <Kernel/API/Syscall.h>
 #include <Kernel/Thread.h>
 #include <Kernel/UnixTypes.h>
 #include <Kernel/VM/RangeAllocator.h>
 #include <LibC/signal_numbers.h>
+#include <LibELF/AuxiliaryVector.h>
 
 namespace ELF {
 class Loader;
@@ -475,6 +476,7 @@ private:
     ssize_t do_write(FileDescription&, const u8*, int data_size);
 
     KResultOr<NonnullRefPtr<FileDescription>> find_elf_interpreter_for_executable(const String& path, char (&first_page)[PAGE_SIZE], int nread, size_t file_size);
+    Vector<AuxiliaryValue> generate_auxiliary_vector() const;
 
     int alloc_fd(int first_candidate_fd = 0);
     void disown_all_shared_buffers();
@@ -510,6 +512,8 @@ private:
     gid_t m_sgid { 0 };
 
     pid_t m_exec_tid { 0 };
+    FlatPtr m_load_offset { 0U };
+    FlatPtr m_entry_eip { 0U };
 
     static const int m_max_open_file_descriptors { FD_SETSIZE };
 
@@ -661,8 +665,7 @@ inline void Process::for_each_thread(Callback callback) const
     if (my_pid == 0) {
         // NOTE: Special case the colonel process, since its main thread is not in the global thread table.
         Processor::for_each(
-            [&](Processor& proc) -> IterationDecision
-            {
+            [&](Processor& proc) -> IterationDecision {
                 auto idle_thread = proc.idle_thread();
                 if (idle_thread != nullptr)
                     return callback(*idle_thread);

+ 23 - 9
Kernel/Thread.cpp

@@ -185,18 +185,18 @@ void Thread::die_if_needed()
 
     ScopedCritical critical;
     set_state(Thread::State::Dying);
-    
+
     // Flag a context switch. Because we're in a critical section,
     // Scheduler::yield will actually only mark a pending scontext switch
     // Simply leaving the critical section would not necessarily trigger
     // a switch.
     Scheduler::yield();
-    
+
     // Now leave the critical section so that we can also trigger the
     // actual context switch
     u32 prev_flags;
     Processor::current().clear_critical(prev_flags, false);
-dbg() << "die_if_needed returned form clear_critical!!! in irq: " << Processor::current().in_irq();
+    dbg() << "die_if_needed returned form clear_critical!!! in irq: " << Processor::current().in_irq();
     // We should never get here, but the scoped scheduler lock
     // will be released by Scheduler::context_switch again
     ASSERT_NOT_REACHED();
@@ -640,7 +640,7 @@ RegisterState& Thread::get_register_dump_from_stack()
     return *(RegisterState*)(kernel_stack_top() - sizeof(RegisterState));
 }
 
-u32 Thread::make_userspace_stack_for_main_thread(Vector<String> arguments, Vector<String> environment)
+u32 Thread::make_userspace_stack_for_main_thread(Vector<String> arguments, Vector<String> environment, Vector<AuxiliaryValue> auxv)
 {
     auto* region = m_process.allocate_region(VirtualAddress(), default_userspace_stack_size, "Stack (Main thread)", PROT_READ | PROT_WRITE, false);
     ASSERT(region);
@@ -654,7 +654,8 @@ u32 Thread::make_userspace_stack_for_main_thread(Vector<String> arguments, Vecto
     int argc = arguments.size();
     char** argv = (char**)stack_base;
     char** env = argv + arguments.size() + 1;
-    char* bufptr = stack_base + (sizeof(char*) * (arguments.size() + 1)) + (sizeof(char*) * (environment.size() + 1));
+    auxv_t* auxvp = (auxv_t*)((char*)(env + environment.size() + 1));
+    char* bufptr = stack_base + (sizeof(char*) * (arguments.size() + 1)) + (sizeof(char*) * (environment.size() + 1) + (sizeof(auxv_t) * auxv.size()));
 
     SmapDisabler disabler;
 
@@ -674,6 +675,17 @@ u32 Thread::make_userspace_stack_for_main_thread(Vector<String> arguments, Vecto
     }
     env[environment.size()] = nullptr;
 
+    for (size_t i = 0; i < auxv.size(); ++i) {
+        *auxvp = auxv[i].auxv;
+        if (!auxv[i].optional_string.is_empty()) {
+            auxvp->a_un.a_ptr = bufptr;
+            memcpy(bufptr, auxv[i].optional_string.characters(), auxv[i].optional_string.length());
+            bufptr += auxv[i].optional_string.length();
+            *(bufptr++) = '\0';
+        }
+        ++auxvp;
+    }
+
     auto push_on_new_stack = [&new_esp](u32 value) {
         new_esp -= 4;
         u32* stack_ptr = (u32*)new_esp;
@@ -685,6 +697,9 @@ u32 Thread::make_userspace_stack_for_main_thread(Vector<String> arguments, Vecto
     push_on_new_stack((FlatPtr)argv);
     push_on_new_stack((FlatPtr)argc);
     push_on_new_stack(0);
+
+    ASSERT((FlatPtr)new_esp % 16 == 0);
+
     return new_esp;
 }
 
@@ -802,7 +817,7 @@ String Thread::backtrace_impl()
                 if (!process.validate_read_from_kernel(VirtualAddress(stack_ptr), sizeof(void*) * 2))
                     break;
                 FlatPtr retaddr;
- 
+
                 if (is_user_range(VirtualAddress(stack_ptr), sizeof(FlatPtr) * 2)) {
                     copy_from_user(&retaddr, &((FlatPtr*)stack_ptr)[1]);
                     recognized_symbols.append({ retaddr, symbolicate_kernel_address(retaddr) });
@@ -890,7 +905,6 @@ Thread::BlockResult Thread::wait_on(WaitQueue& queue, const char* reason, timeva
             set_state(State::Queued);
             m_wait_reason = reason;
 
-    
             if (timeout) {
                 timer_id = TimerQueue::the().add_timer(*timeout, [&]() {
                     ScopedSpinLock sched_lock(g_scheduler_lock);
@@ -928,9 +942,9 @@ Thread::BlockResult Thread::wait_on(WaitQueue& queue, const char* reason, timeva
         // To be able to look at m_wait_queue_node we once again need the
         // scheduler lock, which is held when we insert into the queue
         ScopedSpinLock sched_lock(g_scheduler_lock);
-    
+
         result = m_wait_queue_node.is_in_list() ? BlockResult::InterruptedByTimeout : BlockResult::WokeNormally;
-    
+
         // Make sure we cancel the timer if woke normally.
         if (timeout && result == BlockResult::WokeNormally)
             TimerQueue::the().cancel_timer(timer_id);

+ 5 - 4
Kernel/Thread.h

@@ -39,6 +39,7 @@
 #include <Kernel/ThreadTracer.h>
 #include <Kernel/UnixTypes.h>
 #include <LibC/fd_set.h>
+#include <LibELF/AuxiliaryVector.h>
 
 namespace Kernel {
 
@@ -127,7 +128,7 @@ public:
 
     class Blocker {
     public:
-        virtual ~Blocker() {}
+        virtual ~Blocker() { }
         virtual bool should_unblock(Thread&, time_t now_s, long us) = 0;
         virtual const char* state_string() const = 0;
         virtual bool is_reason_signal() const { return false; }
@@ -371,7 +372,7 @@ public:
 
     ShouldUnblockThread dispatch_one_pending_signal();
     ShouldUnblockThread dispatch_signal(u8 signal);
-    bool has_unmasked_pending_signals() const {  return m_pending_signals & ~m_signal_mask; }
+    bool has_unmasked_pending_signals() const { return m_pending_signals & ~m_signal_mask; }
     void terminate_due_to_signal(u8 signal);
     bool should_ignore_signal(u8 signal) const;
     bool has_signal_handler(u8 signal) const;
@@ -382,7 +383,7 @@ public:
     void set_default_signal_dispositions();
     void push_value_on_stack(FlatPtr);
 
-    u32 make_userspace_stack_for_main_thread(Vector<String> arguments, Vector<String> environment);
+    u32 make_userspace_stack_for_main_thread(Vector<String> arguments, Vector<String> environment, Vector<AuxiliaryValue>);
 
     void make_thread_specific_region(Badge<Process>);
 
@@ -536,7 +537,7 @@ private:
 
     bool m_dump_backtrace_on_finalization { false };
     bool m_should_die { false };
-    bool m_initialized {false};
+    bool m_initialized { false };
 
     OwnPtr<ThreadTracer> m_tracer;
 

+ 116 - 0
Libraries/LibELF/AuxiliaryVector.h

@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2020, The SerenityOS developers.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+/* Auxiliary Vector types, from Intel386 ABI ver 1.0 section 2.3.3 */
+typedef struct
+{
+    long a_type; /* Note: Extended to long from int, for ease of comaptibility w/64 bit */
+    union {
+        long a_val;
+        void* a_ptr;
+        void (*a_fnc)(); /* In spec, not used */
+    } a_un;
+} auxv_t;
+
+#define AT_NULL 0           /* No length, last entry's a_type has this value */
+#define AT_IGNORE 1         /* Entry has no meaning, a_un undefined */
+#define AT_EXECFD 2         /* a_val contains a file descriptor of the main program image */
+#define AT_PHDR 3           /* a_ptr contains pointer to program header table of main program image */
+#define AT_PHENT 4          /* a_val holds size of program header table entries */
+#define AT_PHNUM 5          /* a_val holds number of program header table entries */
+#define AT_PAGESZ 6         /* a_val gives system page size in bytes */
+#define AT_BASE 7           /* a_ptr holds base address that Loader was loaded into memory */
+#define AT_FLAGS 8          /* a_val holds 1 bit flags. Undefined flags are 0 */
+#define AT_ENTRY 9          /* a_ptr holds entry point of application for loader */
+#define AT_NOTELF 10        /* a_val non-zero if the program is not ELF */
+#define AT_UID 11           /* a_val holds real user id of process */
+#define AT_EUID 12          /* a_val holds effective user id of process */
+#define AT_GID 13           /* a_val holds real group id of process */
+#define AT_EGID 14          /* a_val holds effective group id of process */
+#define AT_PLATFORM 15      /* a_val points to a string containing platform name */
+#define AT_HWCAP 16         /* a_val contains bitmask of CPU features. Equivalent to CPUID 1.EDX*/
+#define AT_CLKTCK 17        /* a_val contains frequence at which times() increments. (Re: Spec. What is times()?) */
+#define AT_SECURE 23        /* a_val holds 1 if program in secure mode (e.g. suid). Otherwise 0 */
+#define AT_BASE_PLATFORM 24 /* a_ptr points to a string identifying base platform name, which might be different from platform (e.g x86_64 when in i386 compat)  */
+#define AT_RANDOM 25        /* a_ptr points to 16 securely generated random bytes */
+#define AT_HWCAP2 26        /* a_val holds extended hw feature mask. Currently 0 */
+#define AT_EXECFN 31        /* a_ptr points to file name of executed program */
+
+#ifdef __cplusplus
+#    include <AK/String.h>
+#    include <AK/Types.h>
+
+struct AuxiliaryValue {
+    enum Type {
+        Null = AT_NULL,
+        Ignore = AT_IGNORE,
+        ExecFileDescriptor = AT_EXECFD,
+        Phdr = AT_PHDR,
+        Phent = AT_PHENT,
+        Phnum = AT_PHNUM,
+        PageSize = AT_PAGESZ,
+        BaseAddress = AT_BASE,
+        Flags = AT_FLAGS,
+        Entry = AT_ENTRY,
+        NotELF = AT_NOTELF,
+        Uid = AT_UID,
+        EUid = AT_EUID,
+        Gid = AT_GID,
+        EGid = AT_EGID,
+        Platform = AT_PLATFORM,
+        HwCap = AT_HWCAP,
+        ClockTick = AT_CLKTCK,
+        Secure = AT_SECURE,
+        BasePlatform = AT_BASE_PLATFORM,
+        Random = AT_RANDOM,
+        HwCap2 = AT_HWCAP2,
+        ExecFilename = AT_EXECFN
+    };
+
+    AuxiliaryValue(Type type, long val)
+    {
+        auxv.a_type = type;
+        auxv.a_un.a_val = val;
+    }
+    AuxiliaryValue(Type type, void* ptr)
+    {
+        auxv.a_type = type;
+        auxv.a_un.a_ptr = (void*)ptr;
+    }
+    AuxiliaryValue(Type type, String string)
+    {
+        auxv.a_type = type;
+        auxv.a_un.a_ptr = nullptr;
+        optional_string = string;
+    }
+
+    auxv_t auxv {};
+    String optional_string;
+};
+
+#endif /* __cplusplus */