Forráskód Böngészése

Kernel: Support thread-local storage

This patch adds support for TLS according to the x86 System V ABI.
Each thread gets a thread-specific memory region, and the GS segment
register always points _to a pointer_ to the thread-specific memory.

In other words, to access thread-local variables, userspace programs
start by dereferencing the pointer at [gs:0].

The Process keeps a master copy of the TLS segment that new threads
should use, and when a new thread is created, they get a copy of it.
It's basically whatever the PT_TLS program header in the ELF says.
Andreas Kling 5 éve
szülő
commit
ec6bceaa08
7 módosított fájl, 92 hozzáadás és 5 törlés
  1. 5 0
      AK/ELF/ELFLoader.cpp
  2. 1 0
      AK/ELF/ELFLoader.h
  3. 24 2
      Kernel/Process.cpp
  4. 4 0
      Kernel/Process.h
  5. 6 0
      Kernel/Scheduler.cpp
  6. 40 3
      Kernel/Thread.cpp
  7. 12 0
      Kernel/Thread.h

+ 5 - 0
AK/ELF/ELFLoader.cpp

@@ -35,6 +35,11 @@ bool ELFLoader::layout()
 {
     bool failed = false;
     m_image.for_each_program_header([&](const ELFImage::ProgramHeader& program_header) {
+        if (program_header.type() == PT_TLS) {
+            auto* tls_image = tls_section_hook(program_header.size_in_memory(), program_header.alignment());
+            memcpy(tls_image, program_header.raw_data(), program_header.size_in_image());
+            return;
+        }
         if (program_header.type() != PT_LOAD)
             return;
 #ifdef ELFLOADER_DEBUG

+ 1 - 0
AK/ELF/ELFLoader.h

@@ -19,6 +19,7 @@ public:
     bool load();
 #if defined(KERNEL)
     Function<void*(VirtualAddress, size_t, size_t, bool, bool, const String&)> alloc_section_hook;
+    Function<void*(size_t, size_t)> tls_section_hook;
     Function<void*(VirtualAddress, size_t, size_t, size_t, bool r, bool w, bool x, const String&)> map_section_hook;
     VirtualAddress entry() const { return m_image.entry(); }
 #endif

+ 24 - 2
Kernel/Process.cpp

@@ -310,6 +310,9 @@ Process* Process::fork(RegisterDump& regs)
         auto cloned_region = region.clone();
         child->m_regions.append(move(cloned_region));
         MM.map_region(*child, child->m_regions.last());
+
+        if (&region == m_master_tls_region)
+            child->m_master_tls_region = child->m_regions.last();
     }
 
     for (auto gid : m_gids)
@@ -403,6 +406,10 @@ int Process::do_exec(String path, Vector<String> arguments, Vector<String> envir
     RefPtr<Region> region = allocate_region_with_vmo(VirtualAddress(), metadata.size, vmo, 0, description->absolute_path(), PROT_READ);
     ASSERT(region);
 
+    RefPtr<Region> master_tls_region;
+    size_t master_tls_size = 0;
+    size_t master_tls_alignment = 0;
+
     OwnPtr<ELFLoader> loader;
     {
         // Okay, here comes the sleight of hand, pay close attention..
@@ -433,6 +440,13 @@ int Process::do_exec(String path, Vector<String> arguments, Vector<String> envir
             (void)allocate_region(vaddr, size, String(name), prot);
             return vaddr.as_ptr();
         };
+        loader->tls_section_hook = [&](size_t size, size_t alignment) {
+            ASSERT(size);
+            master_tls_region = allocate_region({}, size, String(), PROT_READ | PROT_WRITE);
+            master_tls_size = size;
+            master_tls_alignment = alignment;
+            return master_tls_region->vaddr().as_ptr();
+        };
         bool success = loader->load();
         if (!success || !loader->entry().get()) {
             m_page_directory = move(old_page_directory);
@@ -451,6 +465,9 @@ int Process::do_exec(String path, Vector<String> arguments, Vector<String> envir
     m_elf_loader = move(loader);
     m_executable = description->custody();
 
+    // Copy of the master TLS region that we will clone for new threads
+    m_master_tls_region = master_tls_region.ptr();
+
     if (metadata.is_setuid())
         m_euid = metadata.uid;
     if (metadata.is_setgid())
@@ -483,6 +500,11 @@ int Process::do_exec(String path, Vector<String> arguments, Vector<String> envir
     // ss0 sp!!!!!!!!!
     u32 old_esp0 = main_thread().m_tss.esp0;
 
+    m_master_tls_size = master_tls_size;
+    m_master_tls_alignment = master_tls_alignment;
+
+    main_thread().make_thread_specific_region({});
+
     memset(&main_thread().m_tss, 0, sizeof(main_thread().m_tss));
     main_thread().m_tss.eflags = 0x0202;
     main_thread().m_tss.eip = entry_eip;
@@ -490,7 +512,7 @@ int Process::do_exec(String path, Vector<String> arguments, Vector<String> envir
     main_thread().m_tss.ds = 0x23;
     main_thread().m_tss.es = 0x23;
     main_thread().m_tss.fs = 0x23;
-    main_thread().m_tss.gs = 0x23;
+    main_thread().m_tss.gs = thread_specific_selector() | 3;
     main_thread().m_tss.ss = 0x23;
     main_thread().m_tss.cr3 = page_directory().cr3();
     main_thread().make_userspace_stack_for_main_thread(move(arguments), move(environment));
@@ -2661,7 +2683,7 @@ int Process::sys$create_thread(int (*entry)(void*), void* argument)
     tss.eflags = 0x0202;
     tss.cr3 = page_directory().cr3();
     thread->make_userspace_stack_for_secondary_thread(argument);
-
+    thread->make_thread_specific_region({});
     thread->set_state(Thread::State::Runnable);
     return thread->tid();
 }

+ 4 - 0
Kernel/Process.h

@@ -369,6 +369,10 @@ private:
     RefPtr<ProcessTracer> m_tracer;
     OwnPtr<ELFLoader> m_elf_loader;
 
+    RefPtr<Region> m_master_tls_region;
+    size_t m_master_tls_size { 0 };
+    size_t m_master_tls_alignment { 0 };
+
     Lock m_big_lock { "Process" };
 
     u64 m_alarm_deadline { 0 };

+ 6 - 0
Kernel/Scheduler.cpp

@@ -460,6 +460,12 @@ bool Scheduler::context_switch(Thread& thread)
         descriptor.descriptor_type = 0;
     }
 
+    if (!thread.thread_specific_data().is_null()) {
+        auto& descriptor = thread_specific_descriptor();
+        descriptor.set_base(thread.thread_specific_data().as_ptr());
+        descriptor.set_limit(sizeof(ThreadSpecificData*));
+    }
+
     auto& descriptor = get_gdt_entry(thread.selector());
     descriptor.type = 11; // Busy TSS
     flush_gdt();

+ 40 - 3
Kernel/Thread.cpp

@@ -9,6 +9,28 @@
 
 //#define SIGNAL_DEBUG
 
+u16 thread_specific_selector()
+{
+    static u16 selector;
+    if (!selector) {
+        selector = gdt_alloc_entry();
+        auto& descriptor = get_gdt_entry(selector);
+        descriptor.dpl = 3;
+        descriptor.segment_present = 1;
+        descriptor.granularity = 0;
+        descriptor.zero = 0;
+        descriptor.operation_size = 1;
+        descriptor.descriptor_type = 1;
+        descriptor.type = 2;
+    }
+    return selector;
+}
+
+Descriptor& thread_specific_descriptor()
+{
+    return get_gdt_entry(thread_specific_selector());
+}
+
 HashTable<Thread*>& thread_table()
 {
     ASSERT_INTERRUPTS_DISABLED();
@@ -32,22 +54,24 @@ Thread::Thread(Process& process)
 
     // Only IF is set when a process boots.
     m_tss.eflags = 0x0202;
-    u16 cs, ds, ss;
+    u16 cs, ds, ss, gs;
 
     if (m_process.is_ring0()) {
         cs = 0x08;
         ds = 0x10;
         ss = 0x10;
+        gs = 0;
     } else {
         cs = 0x1b;
         ds = 0x23;
         ss = 0x23;
+        gs = thread_specific_selector() | 3;
     }
 
     m_tss.ds = ds;
     m_tss.es = ds;
     m_tss.fs = ds;
-    m_tss.gs = ds;
+    m_tss.gs = gs;
     m_tss.ss = ss;
     m_tss.cs = cs;
 
@@ -425,7 +449,7 @@ ShouldUnblockThread Thread::dispatch_signal(u8 signal)
         m_tss.ds = 0x23;
         m_tss.es = 0x23;
         m_tss.fs = 0x23;
-        m_tss.gs = 0x23;
+        m_tss.gs = thread_specific_selector() | 3;
         m_tss.eip = regs.eip;
         m_tss.esp = regs.esp_if_crossRing;
         // FIXME: This state is such a hack. It avoids trouble if 'current' is the process receiving a signal.
@@ -525,6 +549,7 @@ Thread* Thread::clone(Process& process)
     clone->m_fpu_state = (FPUState*)kmalloc_aligned(sizeof(FPUState), 16);
     memcpy(clone->m_fpu_state, m_fpu_state, sizeof(FPUState));
     clone->m_has_used_fpu = m_has_used_fpu;
+    clone->m_thread_specific_data = m_thread_specific_data;
     return clone;
 }
 
@@ -602,3 +627,15 @@ String Thread::backtrace_impl() const
     }
     return builder.to_string();
 }
+
+void Thread::make_thread_specific_region(Badge<Process>)
+{
+    size_t thread_specific_region_alignment = max(process().m_master_tls_alignment, alignof(ThreadSpecificData));
+    size_t thread_specific_region_size = align_up_to(process().m_master_tls_size, thread_specific_region_alignment) + sizeof(ThreadSpecificData);
+    auto* region = process().allocate_region({}, thread_specific_region_size, "Thread-specific", PROT_READ | PROT_WRITE, true);
+    auto* thread_specific_data = (ThreadSpecificData*)region->vaddr().offset(align_up_to(process().m_master_tls_size, thread_specific_region_alignment)).as_ptr();
+    auto* thread_local_storage = (u8*)((u8*)thread_specific_data) - align_up_to(process().m_master_tls_size, process().m_master_tls_alignment);
+    m_thread_specific_data = VirtualAddress((u32)thread_specific_data);
+    thread_specific_data->self = thread_specific_data;
+    memcpy(thread_local_storage, process().m_master_tls_region->vaddr().as_ptr(), process().m_master_tls_size);
+}

+ 12 - 0
Kernel/Thread.h

@@ -30,6 +30,10 @@ struct SignalActionData {
     int flags { 0 };
 };
 
+struct ThreadSpecificData {
+    ThreadSpecificData* self;
+};
+
 class Thread {
     friend class Process;
     friend class Scheduler;
@@ -214,6 +218,8 @@ public:
     const char* state_string() const;
     u32 ticks() const { return m_ticks; }
 
+    VirtualAddress thread_specific_data() const { return m_thread_specific_data; }
+
     u64 sleep(u32 ticks);
 
     enum class BlockResult {
@@ -301,6 +307,8 @@ public:
     void make_userspace_stack_for_main_thread(Vector<String> arguments, Vector<String> environment);
     void make_userspace_stack_for_secondary_thread(void* argument);
 
+    void make_thread_specific_region(Badge<Process>);
+
     Thread* clone(Process&);
 
     template<typename Callback>
@@ -336,6 +344,7 @@ private:
     RefPtr<Region> m_userspace_stack_region;
     RefPtr<Region> m_kernel_stack_region;
     RefPtr<Region> m_kernel_stack_for_signal_handler_region;
+    VirtualAddress m_thread_specific_data;
     SignalActionData m_signal_action_data[32];
     Region* m_signal_stack_user_region { nullptr };
     IntrusiveList<Blocker, &Blocker::m_blocker_list_node> m_blockers;
@@ -432,3 +441,6 @@ inline IterationDecision Scheduler::for_each_nonrunnable(Callback callback)
 
     return IterationDecision::Continue;
 }
+
+u16 thread_specific_selector();
+Descriptor& thread_specific_descriptor();