Sfoglia il codice sorgente

Kernel+LibC+LibELF: Move TLS handling to userspace

This removes the allocate_tls syscall and adds an archctl option to set
the fs_base for the current thread on x86-64, since you can't set that
register from userspace. enter_thread_context loads the fs_base for the
next thread on each context switch.
This also moves tpidr_el0 (the thread pointer register on AArch64) to
the register state, so it gets properly saved/restored on context
switches.

The userspace TLS allocation code is kept pretty similar to the original
kernel TLS code, aside from a couple of style changes.

We also have to add a new argument "tls_pointer" to
SC_create_thread_params, as we otherwise can't prevent race conditions
between setting the thread pointer register and signal handling code
that might be triggered before the thread pointer was set, which could
use TLS.
Sönke Holz 1 anno fa
parent
commit
243d7003a2

+ 1 - 1
Kernel/API/Syscall.h

@@ -49,7 +49,6 @@ enum class NeedsBigProcessLock {
     S(accept4, NeedsBigProcessLock::No)                    \
     S(adjtime, NeedsBigProcessLock::No)                    \
     S(alarm, NeedsBigProcessLock::No)                      \
-    S(allocate_tls, NeedsBigProcessLock::No)               \
     S(archctl, NeedsBigProcessLock::No)                    \
     S(anon_create, NeedsBigProcessLock::No)                \
     S(annotate_mapping, NeedsBigProcessLock::No)           \
@@ -375,6 +374,7 @@ struct SC_create_thread_params {
     void* stack_location;                      // nullptr means any, o.w. process virtual address
     void* (*entry)(void*);
     void* entry_argument;
+    void* tls_pointer;
 };
 
 struct SC_realpath_params {

+ 2 - 0
Kernel/API/archctl_numbers.h

@@ -5,3 +5,5 @@
  */
 
 #pragma once
+
+#define ARCHCTL_X86_64_SET_FS_BASE_FOR_CURRENT_THREAD 1

+ 0 - 2
Kernel/Arch/Processor.h

@@ -165,8 +165,6 @@ public:
 
     static void deferred_call_queue(Function<void()> callback);
 
-    static void set_thread_specific_data(VirtualAddress thread_specific_data);
-
     [[noreturn]] void initialize_context_switching(Thread& initial_thread);
     NEVER_INLINE void switch_context(Thread*& from_thread, Thread*& to_thread);
     [[noreturn]] static void assume_context(Thread& thread, InterruptsState new_interrupts_state);

+ 0 - 1
Kernel/Arch/ProcessorFunctions.include

@@ -26,7 +26,6 @@ template bool ProcessorBase<Processor>::are_interrupts_enabled();
 template void ProcessorBase<Processor>::wait_for_interrupt() const;
 template Processor& ProcessorBase<Processor>::by_id(u32 id);
 template StringView ProcessorBase<Processor>::platform_string();
-template void ProcessorBase<Processor>::set_thread_specific_data(VirtualAddress thread_specific_data);
 template void ProcessorBase<Processor>::initialize_context_switching(Thread& initial_thread);
 template void ProcessorBase<Processor>::switch_context(Thread*& from_thread, Thread*& to_thread);
 template void ProcessorBase<Processor>::assume_context(Thread& thread, InterruptsState new_interrupts_state);

+ 1 - 0
Kernel/Arch/aarch64/Interrupts.cpp

@@ -43,6 +43,7 @@ void dump_registers(RegisterState const& regs)
     dbgln("Saved Program Status: (NZCV({:#b}) DAIF({:#b}) M({:#b})) / {:#x}", ((regs.spsr_el1 >> 28) & 0b1111), ((regs.spsr_el1 >> 6) & 0b1111), regs.spsr_el1 & 0b1111, regs.spsr_el1);
     dbgln("Exception Link Register: {:#x}", regs.elr_el1);
     dbgln("Stack Pointer (EL0): {:#x}", regs.sp_el0);
+    dbgln("Software Thread ID Register (EL0): {:#x}", regs.tpidr_el0);
 
     dbgln(" x0={:p}  x1={:p}  x2={:p}  x3={:p}  x4={:p}", regs.x[0], regs.x[1], regs.x[2], regs.x[3], regs.x[4]);
     dbgln(" x5={:p}  x6={:p}  x7={:p}  x8={:p}  x9={:p}", regs.x[5], regs.x[6], regs.x[7], regs.x[8], regs.x[9]);

+ 1 - 8
Kernel/Arch/aarch64/Processor.cpp

@@ -337,6 +337,7 @@ FlatPtr ProcessorBase<T>::init_context(Thread& thread, bool leave_crit)
     }
     eretframe.elr_el1 = thread_regs.elr_el1;
     eretframe.sp_el0 = thread_regs.sp_el0;
+    eretframe.tpidr_el0 = thread_regs.tpidr_el0;
     eretframe.spsr_el1 = thread_regs.spsr_el1;
 
     // Push a TrapFrame onto the stack
@@ -472,8 +473,6 @@ extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread)
 
     to_thread->set_cpu(Processor::current().id());
 
-    Processor::set_thread_specific_data(to_thread->thread_specific_data());
-
     auto in_critical = to_thread->saved_critical();
     VERIFY(in_critical > 0);
     Processor::restore_critical(in_critical);
@@ -487,12 +486,6 @@ StringView ProcessorBase<T>::platform_string()
     return "aarch64"sv;
 }
 
-template<typename T>
-void ProcessorBase<T>::set_thread_specific_data(VirtualAddress thread_specific_data)
-{
-    Aarch64::Asm::set_tpidr_el0(thread_specific_data.get());
-}
-
 template<typename T>
 void ProcessorBase<T>::wait_for_interrupt() const
 {

+ 7 - 6
Kernel/Arch/aarch64/RegisterState.h

@@ -17,11 +17,12 @@ VALIDATE_IS_AARCH64()
 
 namespace Kernel {
 
-struct RegisterState {
-    u64 x[31];    // Saved general purpose registers
-    u64 spsr_el1; // Save Processor Status Register, EL1
-    u64 elr_el1;  // Exception Link Register, EL1
-    u64 sp_el0;   // EL0 stack pointer
+struct alignas(16) RegisterState {
+    u64 x[31];     // Saved general purpose registers
+    u64 spsr_el1;  // Save Processor Status Register, EL1
+    u64 elr_el1;   // Exception Link Register, EL1
+    u64 sp_el0;    // EL0 stack pointer
+    u64 tpidr_el0; // EL0 Software Thread ID Register
 
     FlatPtr userspace_sp() const { return sp_el0; }
     void set_userspace_sp(FlatPtr value)
@@ -51,7 +52,7 @@ struct RegisterState {
     }
 };
 
-#define REGISTER_STATE_SIZE (34 * 8)
+#define REGISTER_STATE_SIZE (36 * 8)
 static_assert(AssertSize<RegisterState, REGISTER_STATE_SIZE>());
 
 inline void copy_kernel_registers_into_ptrace_registers(PtraceRegisters& ptrace_regs, RegisterState const& kernel_regs)

+ 1 - 0
Kernel/Arch/aarch64/ThreadRegisters.h

@@ -17,6 +17,7 @@ struct ThreadRegisters {
     u64 spsr_el1;
     u64 elr_el1;
     u64 sp_el0;
+    u64 tpidr_el0;
     u64 ttbr0_el1;
 
     FlatPtr ip() const { return elr_el1; }

+ 6 - 1
Kernel/Arch/aarch64/vector_table.S

@@ -8,7 +8,7 @@
 
 // NOTE: This size must be a multiple of 16 bytes, to ensure that the stack pointer
 //       stays 16 byte aligned.
-#define REGISTER_STATE_SIZE 272
+#define REGISTER_STATE_SIZE (36 * 8)
 #if REGISTER_STATE_SIZE % 16 != 0
 #    error "REGISTER_STATE_SIZE is not a multiple of 16 bytes!"
 #endif
@@ -16,6 +16,7 @@
 #define SPSR_EL1_SLOT       (31 * 8)
 #define ELR_EL1_SLOT        (32 * 8)
 #define SP_EL0_SLOT         (33 * 8)
+#define TPIDR_EL0_SLOT      (34 * 8)
 
 // Vector Table Entry macro. Each entry is aligned at 128 bytes, meaning we have
 // at most that many instructions.
@@ -65,6 +66,8 @@
     str x0, [sp, #ELR_EL1_SLOT]
     mrs x0, sp_el0
     str x0, [sp, #SP_EL0_SLOT]
+    mrs x0, tpidr_el0
+    str x0, [sp, #TPIDR_EL0_SLOT]
 
     // Set up TrapFrame struct on the stack
     mov x0, sp
@@ -88,6 +91,8 @@
     msr elr_el1, x0
     ldr x0, [sp, #SP_EL0_SLOT]
     msr sp_el0, x0
+    ldr x0, [sp, #TPIDR_EL0_SLOT]
+    msr tpidr_el0, x0
 
     ldp x0, x1,     [sp, #(0 * 0)]
     ldp x2, x3,     [sp, #(2 * 8)]

+ 0 - 8
Kernel/Arch/riscv64/Processor.cpp

@@ -499,8 +499,6 @@ extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread)
 
     to_thread->set_cpu(Processor::current().id());
 
-    Processor::set_thread_specific_data(to_thread->thread_specific_data());
-
     auto in_critical = to_thread->saved_critical();
     VERIFY(in_critical > 0);
     Processor::restore_critical(in_critical);
@@ -546,12 +544,6 @@ StringView ProcessorBase<T>::platform_string()
     return "riscv64"sv;
 }
 
-template<typename T>
-void ProcessorBase<T>::set_thread_specific_data(VirtualAddress)
-{
-    // FIXME: Add support for thread-local storage on RISC-V
-}
-
 template<typename T>
 void ProcessorBase<T>::wait_for_interrupt() const
 {

+ 1 - 0
Kernel/Arch/x86_64/ArchSpecificThreadData.h

@@ -11,6 +11,7 @@
 namespace Kernel {
 
 struct ArchSpecificThreadData {
+    FlatPtr fs_base { 0 };
 };
 
 }

+ 3 - 4
Kernel/Arch/x86_64/Processor.cpp

@@ -1390,7 +1390,7 @@ extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread)
     }
 
     auto& processor = Processor::current();
-    Processor::set_thread_specific_data(to_thread->thread_specific_data());
+    Processor::set_fs_base(to_thread->arch_specific_data().fs_base);
 
     if (from_regs.cr3 != to_regs.cr3)
         write_cr3(to_regs.cr3);
@@ -1717,11 +1717,10 @@ UNMAP_AFTER_INIT void ProcessorBase<T>::initialize_context_switching(Thread& ini
     VERIFY_NOT_REACHED();
 }
 
-template<typename T>
-void ProcessorBase<T>::set_thread_specific_data(VirtualAddress thread_specific_data)
+void Processor::set_fs_base(FlatPtr fs_base)
 {
     MSR fs_base_msr(MSR_FS_BASE);
-    fs_base_msr.set(thread_specific_data.get());
+    fs_base_msr.set(fs_base);
 }
 
 template<typename T>

+ 2 - 0
Kernel/Arch/x86_64/Processor.h

@@ -153,6 +153,8 @@ public:
 
     static void smp_unicast(u32 cpu, Function<void()>, bool async);
     static void smp_broadcast_flush_tlb(Memory::PageDirectory const*, VirtualAddress, size_t);
+
+    static void set_fs_base(FlatPtr);
 };
 
 template<typename T>

+ 11 - 4
Kernel/Arch/x86_64/archctl.cpp

@@ -4,17 +4,24 @@
  * SPDX-License-Identifier: BSD-2-Clause
  */
 
+#include <Kernel/API/archctl_numbers.h>
 #include <Kernel/Tasks/Process.h>
 
 namespace Kernel {
 
 ErrorOr<FlatPtr> Process::sys$archctl(int option, FlatPtr arg1)
 {
-    (void)option;
-    (void)arg1;
-
     VERIFY_NO_PROCESS_BIG_LOCK(this);
-    return ENOSYS;
+    switch (option) {
+    case ARCHCTL_X86_64_SET_FS_BASE_FOR_CURRENT_THREAD: {
+        Thread::current()->arch_specific_data().fs_base = arg1;
+        Processor::set_fs_base(arg1);
+        return 0;
+    }
+
+    default:
+        return EINVAL;
+    }
 }
 
 }

+ 4 - 68
Kernel/Syscalls/execve.cpp

@@ -33,9 +33,6 @@ struct LoadResult {
     FlatPtr load_base { 0 };
     FlatPtr entry_eip { 0 };
     size_t size { 0 };
-    LockWeakPtr<Memory::Region> tls_region;
-    size_t tls_size { 0 };
-    size_t tls_alignment { 0 };
     LockWeakPtr<Memory::Region> stack_region;
 };
 
@@ -258,18 +255,13 @@ static ErrorOr<FlatPtr> get_load_offset(Elf_Ehdr const& main_program_header, Ope
     return random_load_offset_in_range(selected_range.start, selected_range.end - selected_range.start);
 }
 
-enum class ShouldAllocateTls {
-    No,
-    Yes,
-};
-
 enum class ShouldAllowSyscalls {
     No,
     Yes,
 };
 
 static ErrorOr<LoadResult> load_elf_object(Memory::AddressSpace& new_space, OpenFileDescription& object_description,
-    FlatPtr load_offset, ShouldAllocateTls should_allocate_tls, ShouldAllowSyscalls should_allow_syscalls, Optional<size_t> minimum_stack_size = {})
+    FlatPtr load_offset, ShouldAllowSyscalls should_allow_syscalls, Optional<size_t> minimum_stack_size = {})
 {
     auto& inode = *(object_description.inode());
     auto vmobject = TRY(Memory::SharedInodeVMObject::try_create_with_inode(inode));
@@ -288,9 +280,6 @@ static ErrorOr<LoadResult> load_elf_object(Memory::AddressSpace& new_space, Open
     if (!elf_image.is_valid())
         return ENOEXEC;
 
-    Memory::Region* master_tls_region { nullptr };
-    size_t master_tls_size = 0;
-    size_t master_tls_alignment = 0;
     FlatPtr load_base_address = 0;
     size_t stack_size = Thread::default_userspace_stack_size;
 
@@ -302,24 +291,6 @@ static ErrorOr<LoadResult> load_elf_object(Memory::AddressSpace& new_space, Open
 
     Memory::MemoryManager::enter_address_space(new_space);
 
-    auto load_tls_section = [&](auto& program_header) -> ErrorOr<void> {
-        VERIFY(should_allocate_tls == ShouldAllocateTls::Yes);
-        VERIFY(program_header.size_in_memory());
-
-        if (!elf_image.is_within_image(program_header.raw_data(), program_header.size_in_image())) {
-            dbgln("Shenanigans! ELF PT_TLS header sneaks outside of executable.");
-            return ENOEXEC;
-        }
-
-        auto region_name = TRY(KString::formatted("{} (master-tls)", elf_name));
-        master_tls_region = TRY(new_space.allocate_region(Memory::RandomizeVirtualAddress::Yes, {}, program_header.size_in_memory(), PAGE_SIZE, region_name->view(), PROT_READ | PROT_WRITE, AllocationStrategy::Reserve));
-        master_tls_size = program_header.size_in_memory();
-        master_tls_alignment = program_header.alignment();
-
-        TRY(copy_to_user(master_tls_region->vaddr().as_ptr(), program_header.raw_data(), program_header.size_in_image()));
-        return {};
-    };
-
     auto load_writable_section = [&](auto& program_header) -> ErrorOr<void> {
         // Writable section: create a copy in memory.
         VERIFY(program_header.alignment() % PAGE_SIZE == 0);
@@ -385,9 +356,6 @@ static ErrorOr<LoadResult> load_elf_object(Memory::AddressSpace& new_space, Open
     };
 
     auto load_elf_program_header = [&](auto& program_header) -> ErrorOr<void> {
-        if (program_header.type() == PT_TLS)
-            return load_tls_section(program_header);
-
         if (program_header.type() == PT_LOAD)
             return load_section(program_header);
 
@@ -416,9 +384,6 @@ static ErrorOr<LoadResult> load_elf_object(Memory::AddressSpace& new_space, Open
         load_base_address,
         elf_image.entry().offset(load_offset).get(),
         executable_size,
-        TRY(AK::try_make_weak_ptr_if_nonnull(master_tls_region)),
-        master_tls_size,
-        master_tls_alignment,
         TRY(stack_region->try_make_weak_ptr())
     };
 }
@@ -429,24 +394,10 @@ Process::load(Memory::AddressSpace& new_space, NonnullRefPtr<OpenFileDescription
 {
     auto load_offset = TRY(get_load_offset(main_program_header, main_program_description, interpreter_description));
 
-    if (interpreter_description.is_null()) {
-        auto load_result = TRY(load_elf_object(new_space, main_program_description, load_offset, ShouldAllocateTls::Yes, ShouldAllowSyscalls::No, minimum_stack_size));
-        m_master_tls.with([&load_result](auto& master_tls) {
-            master_tls.region = load_result.tls_region;
-            master_tls.size = load_result.tls_size;
-            master_tls.alignment = load_result.tls_alignment;
-        });
-        return load_result;
-    }
-
-    auto interpreter_load_result = TRY(load_elf_object(new_space, *interpreter_description, load_offset, ShouldAllocateTls::No, ShouldAllowSyscalls::Yes, minimum_stack_size));
+    if (interpreter_description.is_null())
+        return TRY(load_elf_object(new_space, main_program_description, load_offset, ShouldAllowSyscalls::No, minimum_stack_size));
 
-    // TLS allocation will be done in userspace by the loader
-    VERIFY(!interpreter_load_result.tls_region);
-    VERIFY(!interpreter_load_result.tls_alignment);
-    VERIFY(!interpreter_load_result.tls_size);
-
-    return interpreter_load_result;
+    return TRY(load_elf_object(new_space, *interpreter_description, load_offset, ShouldAllowSyscalls::Yes, minimum_stack_size));
 }
 
 void Process::clear_signal_handlers_for_exec()
@@ -492,13 +443,6 @@ ErrorOr<void> Process::do_exec(NonnullRefPtr<OpenFileDescription> main_program_d
 
     auto allocated_space = TRY(Memory::AddressSpace::try_create(*this, nullptr));
     OwnPtr<Memory::AddressSpace> old_space;
-    auto old_master_tls = m_master_tls.with([](auto& master_tls) {
-        auto old = master_tls;
-        master_tls.region = nullptr;
-        master_tls.size = 0;
-        master_tls.alignment = 0;
-        return old;
-    });
     auto& new_space = m_space.with([&](auto& space) -> Memory::AddressSpace& {
         old_space = move(space);
         space = move(allocated_space);
@@ -509,9 +453,6 @@ ErrorOr<void> Process::do_exec(NonnullRefPtr<OpenFileDescription> main_program_d
         m_space.with([&](auto& space) {
             space = old_space.release_nonnull();
         });
-        m_master_tls.with([&](auto& master_tls) {
-            master_tls = old_master_tls;
-        });
         Memory::MemoryManager::enter_process_address_space(*this);
     });
 
@@ -703,11 +644,6 @@ ErrorOr<void> Process::do_exec(NonnullRefPtr<OpenFileDescription> main_program_d
         protected_data.pid = new_main_thread->tid().value();
     });
 
-    auto tsr_result = new_main_thread->make_thread_specific_region({});
-    if (tsr_result.is_error()) {
-        // FIXME: We cannot fail this late. Refactor this so the allocation happens before we commit to the new executable.
-        VERIFY_NOT_REACHED();
-    }
     new_main_thread->reset_fpu_state();
 
     auto& regs = new_main_thread->m_regs;

+ 10 - 21
Kernel/Syscalls/fork.cpp

@@ -149,6 +149,7 @@ ErrorOr<FlatPtr> Process::sys$fork(RegisterState& regs)
     child_regs.spsr_el1 = regs.spsr_el1;
     child_regs.elr_el1 = regs.elr_el1;
     child_regs.sp_el0 = regs.sp_el0;
+    child_regs.tpidr_el0 = regs.tpidr_el0;
 #elif ARCH(RISCV64)
     for (size_t i = 0; i < array_size(child_regs.x); ++i)
         child_regs.x[i] = regs.x[i];
@@ -162,27 +163,15 @@ ErrorOr<FlatPtr> Process::sys$fork(RegisterState& regs)
 #endif
 
     TRY(address_space().with([&](auto& parent_space) {
-        return m_master_tls.with([&](auto& parent_master_tls) -> ErrorOr<void> {
-            return child->address_space().with([&](auto& child_space) -> ErrorOr<void> {
-                child_space->set_enforces_syscall_regions(parent_space->enforces_syscall_regions());
-                for (auto& region : parent_space->region_tree().regions()) {
-                    dbgln_if(FORK_DEBUG, "fork: cloning Region '{}' @ {}", region.name(), region.vaddr());
-                    auto region_clone = TRY(region.try_clone());
-                    TRY(region_clone->map(child_space->page_directory(), Memory::ShouldFlushTLB::No));
-                    TRY(child_space->region_tree().place_specifically(*region_clone, region.range()));
-                    auto* child_region = region_clone.leak_ptr();
-
-                    if (&region == parent_master_tls.region.unsafe_ptr()) {
-                        TRY(child->m_master_tls.with([&](auto& child_master_tls) -> ErrorOr<void> {
-                            child_master_tls.region = TRY(child_region->try_make_weak_ptr());
-                            child_master_tls.size = parent_master_tls.size;
-                            child_master_tls.alignment = parent_master_tls.alignment;
-                            return {};
-                        }));
-                    }
-                }
-                return {};
-            });
+        return child->address_space().with([&](auto& child_space) -> ErrorOr<void> {
+            child_space->set_enforces_syscall_regions(parent_space->enforces_syscall_regions());
+            for (auto& region : parent_space->region_tree().regions()) {
+                dbgln_if(FORK_DEBUG, "fork: cloning Region '{}' @ {}", region.name(), region.vaddr());
+                auto region_clone = TRY(region.try_clone());
+                TRY(region_clone->map(child_space->page_directory(), Memory::ShouldFlushTLB::No));
+                TRY(child_space->region_tree().place_specifically(*region_clone, region.range()));
+                (void)region_clone.leak_ptr();
+            }
             return {};
         });
     }));

+ 0 - 51
Kernel/Syscalls/mmap.cpp

@@ -522,57 +522,6 @@ ErrorOr<FlatPtr> Process::sys$mremap(Userspace<Syscall::SC_mremap_params const*>
     });
 }
 
-ErrorOr<FlatPtr> Process::sys$allocate_tls(Userspace<char const*> initial_data, size_t size)
-{
-    VERIFY_NO_PROCESS_BIG_LOCK(this);
-    TRY(require_promise(Pledge::stdio));
-
-    if (!size || size % PAGE_SIZE != 0)
-        return EINVAL;
-
-    return m_master_tls.with([&](auto& master_tls) -> ErrorOr<FlatPtr> {
-        if (!master_tls.region.is_null())
-            return EEXIST;
-
-        if (thread_count() != 1)
-            return EFAULT;
-
-        Thread* main_thread = nullptr;
-        bool multiple_threads = false;
-        for_each_thread([&main_thread, &multiple_threads](auto& thread) {
-            if (main_thread)
-                multiple_threads = true;
-            main_thread = &thread;
-            return IterationDecision::Break;
-        });
-        VERIFY(main_thread);
-
-        if (multiple_threads)
-            return EINVAL;
-
-        return address_space().with([&](auto& space) -> ErrorOr<FlatPtr> {
-            auto* region = TRY(space->allocate_region(Memory::RandomizeVirtualAddress::Yes, {}, size, PAGE_SIZE, "Master TLS"sv, PROT_READ | PROT_WRITE));
-
-            master_tls.region = TRY(region->try_make_weak_ptr());
-            master_tls.size = size;
-            master_tls.alignment = PAGE_SIZE;
-
-            {
-                Kernel::SmapDisabler disabler;
-                void* fault_at;
-                if (!Kernel::safe_memcpy((char*)master_tls.region.unsafe_ptr()->vaddr().as_ptr(), (char*)initial_data.ptr(), size, fault_at))
-                    return EFAULT;
-            }
-
-            TRY(main_thread->make_thread_specific_region({}));
-
-            Processor::set_thread_specific_data(main_thread->thread_specific_data());
-
-            return master_tls.region.unsafe_ptr()->vaddr().get();
-        });
-    });
-}
-
 ErrorOr<FlatPtr> Process::sys$annotate_mapping(Userspace<void*> address, int flags)
 {
     VERIFY_NO_PROCESS_BIG_LOCK(this);

+ 6 - 2
Kernel/Syscalls/thread.cpp

@@ -68,6 +68,8 @@ ErrorOr<FlatPtr> Process::sys$create_thread(void* (*entry)(void*), Userspace<Sys
     regs.rsi = (FlatPtr)params.entry_argument;
     regs.rdx = (FlatPtr)params.stack_location;
     regs.rcx = (FlatPtr)params.stack_size;
+
+    thread->arch_specific_data().fs_base = bit_cast<FlatPtr>(params.tls_pointer);
 #elif ARCH(AARCH64)
     regs.ttbr0_el1 = address_space().with([](auto& space) { return space->page_directory().ttbr0(); });
 
@@ -76,6 +78,8 @@ ErrorOr<FlatPtr> Process::sys$create_thread(void* (*entry)(void*), Userspace<Sys
     regs.x[1] = (FlatPtr)params.entry_argument;
     regs.x[2] = (FlatPtr)params.stack_location;
     regs.x[3] = (FlatPtr)params.stack_size;
+
+    regs.tpidr_el0 = bit_cast<FlatPtr>(params.tls_pointer);
 #elif ARCH(RISCV64)
     regs.satp = address_space().with([](auto& space) { return space->page_directory().satp(); });
 
@@ -84,12 +88,12 @@ ErrorOr<FlatPtr> Process::sys$create_thread(void* (*entry)(void*), Userspace<Sys
     regs.x[10] = (FlatPtr)params.entry_argument;
     regs.x[11] = (FlatPtr)params.stack_location;
     regs.x[12] = (FlatPtr)params.stack_size;
+
+    regs.x[3] = bit_cast<FlatPtr>(params.tls_pointer);
 #else
 #    error Unknown architecture
 #endif
 
-    TRY(thread->make_thread_specific_region({}));
-
     PerformanceManager::add_thread_created_event(*thread);
 
     SpinlockLocker lock(g_scheduler_lock);

+ 0 - 8
Kernel/Tasks/Process.h

@@ -471,7 +471,6 @@ public:
     ErrorOr<FlatPtr> sys$recvfd(int sockfd, int options);
     ErrorOr<FlatPtr> sys$sysconf(int name);
     ErrorOr<FlatPtr> sys$disown(ProcessID);
-    ErrorOr<FlatPtr> sys$allocate_tls(Userspace<char const*> initial_data, size_t);
     ErrorOr<FlatPtr> sys$prctl(int option, FlatPtr arg1, FlatPtr arg2, FlatPtr arg3);
     ErrorOr<FlatPtr> sys$anon_create(size_t, int options);
     ErrorOr<FlatPtr> sys$statvfs(Userspace<Syscall::SC_statvfs_params const*> user_params);
@@ -955,13 +954,6 @@ private:
     SpinlockProtected<RefPtr<ProcessList>, LockRank::None> m_jail_process_list;
     SpinlockProtected<RefPtr<Jail>, LockRank::Process> m_attached_jail {};
 
-    struct MasterThreadLocalStorage {
-        LockWeakPtr<Memory::Region> region;
-        size_t size { 0 };
-        size_t alignment { 0 };
-    };
-    SpinlockProtected<MasterThreadLocalStorage, LockRank::None> m_master_tls;
-
     Mutex m_big_lock { "Process"sv, Mutex::MutexBehavior::BigLock };
     Mutex m_ptrace_lock { "ptrace"sv };
 

+ 0 - 35
Kernel/Tasks/Thread.cpp

@@ -434,12 +434,6 @@ void Thread::exit(void* exit_value)
     set_should_die();
     u32 unlock_count;
     [[maybe_unused]] auto rc = unlock_process_if_locked(unlock_count);
-    if (m_thread_specific_range.has_value()) {
-        process().address_space().with([&](auto& space) {
-            auto* region = space->find_region_from_range(m_thread_specific_range.value());
-            space->deallocate_region(*region);
-        });
-    }
     die_if_needed();
 }
 
@@ -1209,7 +1203,6 @@ ErrorOr<NonnullRefPtr<Thread>> Thread::clone(NonnullRefPtr<Process> process)
     m_signal_action_masks.span().copy_to(clone->m_signal_action_masks);
     clone->m_signal_mask = m_signal_mask;
     clone->m_fpu_state = m_fpu_state;
-    clone->m_thread_specific_data = m_thread_specific_data;
     clone->m_arch_specific_data = m_arch_specific_data;
     return clone;
 }
@@ -1356,34 +1349,6 @@ void Thread::print_backtrace()
     }
 }
 
-ErrorOr<void> Thread::make_thread_specific_region(Badge<Process>)
-{
-    return process().m_master_tls.with([&](auto& master_tls) -> ErrorOr<void> {
-        // The process may not require a TLS region, or allocate TLS later with sys$allocate_tls (which is what dynamically loaded programs do)
-        if (!master_tls.region)
-            return {};
-
-        return process().address_space().with([&](auto& space) -> ErrorOr<void> {
-            auto region_alignment = max(master_tls.alignment, alignof(ThreadSpecificData));
-            auto region_size = align_up_to(master_tls.size, region_alignment) + sizeof(ThreadSpecificData);
-            auto* region = TRY(space->allocate_region(Memory::RandomizeVirtualAddress::Yes, {}, region_size, PAGE_SIZE, "Thread-specific"sv, PROT_READ | PROT_WRITE));
-
-            m_thread_specific_range = region->range();
-
-            SmapDisabler disabler;
-            auto* thread_specific_data = (ThreadSpecificData*)region->vaddr().offset(align_up_to(master_tls.size, region_alignment)).as_ptr();
-            auto* thread_local_storage = (u8*)((u8*)thread_specific_data) - align_up_to(master_tls.size, master_tls.size);
-            m_thread_specific_data = VirtualAddress(thread_specific_data);
-            thread_specific_data->self = thread_specific_data;
-
-            if (master_tls.size != 0)
-                memcpy(thread_local_storage, master_tls.region.unsafe_ptr()->vaddr().as_ptr(), master_tls.size);
-
-            return {};
-        });
-    });
-}
-
 RefPtr<Thread> Thread::from_tid_in_same_jail(ThreadID tid)
 {
     return Thread::all_instances().with([&](auto& list) -> RefPtr<Thread> {

+ 0 - 5
Kernel/Tasks/Thread.h

@@ -790,7 +790,6 @@ public:
     State state() const { return m_state; }
     StringView state_string() const;
 
-    VirtualAddress thread_specific_data() const { return m_thread_specific_data; }
     ArchSpecificThreadData& arch_specific_data() { return m_arch_specific_data; }
     ArchSpecificThreadData const& arch_specific_data() const { return m_arch_specific_data; }
 
@@ -892,8 +891,6 @@ public:
 
     FPUState& fpu_state() { return m_fpu_state; }
 
-    ErrorOr<void> make_thread_specific_region(Badge<Process>);
-
     unsigned syscall_count() const { return m_syscall_count; }
     void did_syscall() { ++m_syscall_count; }
     unsigned inode_faults() const { return m_inode_faults; }
@@ -1186,8 +1183,6 @@ private:
     FlatPtr m_kernel_stack_base { 0 };
     FlatPtr m_kernel_stack_top { 0 };
     NonnullOwnPtr<Memory::Region> m_kernel_stack_region;
-    VirtualAddress m_thread_specific_data;
-    Optional<Memory::VirtualRange> m_thread_specific_range;
     Array<Optional<u32>, NSIG> m_signal_action_masks;
     Array<ProcessID, NSIG> m_signal_senders;
     Blocker* m_blocker { nullptr };

+ 13 - 0
Userland/Libraries/LibC/pthread.cpp

@@ -55,9 +55,13 @@ static __thread bool pending_cancellation = false;
 
 extern "C" {
 
+[[gnu::weak]] ErrorOr<FlatPtr> (*__create_new_tls_region)();
+[[gnu::weak]] ErrorOr<void> (*__free_tls_region)(FlatPtr thread_pointer);
+
 [[noreturn]] static void exit_thread(void* code, void* stack_location, size_t stack_size)
 {
     __pthread_key_destroy_for_current_thread();
+    MUST(__free_tls_region(bit_cast<FlatPtr>(__builtin_thread_pointer())));
     syscall(SC_exit_thread, code, stack_location, stack_size);
     VERIFY_NOT_REACHED();
 }
@@ -94,6 +98,12 @@ static int create_thread(pthread_t* thread, void* (*entry)(void*), void* argumen
     thread_params->entry = entry;
     thread_params->entry_argument = argument;
 
+    auto maybe_thread_pointer = __create_new_tls_region();
+    if (maybe_thread_pointer.is_error())
+        return maybe_thread_pointer.error().code();
+
+    thread_params->tls_pointer = bit_cast<void*>(maybe_thread_pointer.release_value());
+
     VERIFY((uintptr_t)stack % 16 == 0);
 
     // Push a fake return address
@@ -102,6 +112,9 @@ static int create_thread(pthread_t* thread, void* (*entry)(void*), void* argumen
     int rc = syscall(SC_create_thread, pthread_create_helper, thread_params);
     if (rc >= 0)
         *thread = rc;
+    else
+        MUST(__free_tls_region(bit_cast<FlatPtr>(thread_params->tls_pointer)));
+
     __RETURN_PTHREAD_ERROR(rc);
 }
 

+ 0 - 10
Userland/Libraries/LibC/sys/mman.cpp

@@ -84,16 +84,6 @@ int posix_madvise(void* address, size_t len, int advice)
     return madvise(address, len, advice);
 }
 
-void* allocate_tls(char const* initial_data, size_t size)
-{
-    ptrdiff_t rc = syscall(SC_allocate_tls, initial_data, size);
-    if (rc < 0 && rc > -EMAXERRNO) {
-        errno = -rc;
-        return MAP_FAILED;
-    }
-    return (void*)rc;
-}
-
 // https://pubs.opengroup.org/onlinepubs/9699919799/functions/mlock.html
 int mlock(void const*, size_t)
 {

+ 0 - 1
Userland/Libraries/LibC/sys/mman.h

@@ -20,7 +20,6 @@ int mprotect(void*, size_t, int prot);
 int set_mmap_name(void*, size_t, char const*);
 int madvise(void*, size_t, int advice);
 int posix_madvise(void*, size_t, int advice);
-void* allocate_tls(char const* initial_data, size_t);
 int mlock(void const*, size_t);
 int munlock(void const*, size_t);
 int msync(void*, size_t, int flags);

+ 3 - 2
Userland/Libraries/LibELF/Arch/x86_64/tls.cpp

@@ -10,9 +10,10 @@
 
 namespace ELF {
 
-void set_thread_pointer_register(FlatPtr)
+void set_thread_pointer_register(FlatPtr value)
 {
-    TODO();
+    // TODO: Consider if we want to support the FSGSBASE extension: https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/best-practices/guidance-enabling-fsgsbase.html
+    VERIFY(archctl(ARCHCTL_X86_64_SET_FS_BASE_FOR_CURRENT_THREAD, value) == 0);
 }
 
 }

+ 48 - 11
Userland/Libraries/LibELF/DynamicLinker.cpp

@@ -18,6 +18,7 @@
 #include <AK/Vector.h>
 #include <Kernel/API/VirtualMemoryAnnotations.h>
 #include <Kernel/API/prctl_numbers.h>
+#include <LibELF/Arch/tls.h>
 #include <LibELF/AuxiliaryVector.h>
 #include <LibELF/DynamicLinker.h>
 #include <LibELF/DynamicLoader.h>
@@ -54,7 +55,9 @@ extern "C" [[noreturn]] void _invoke_entry(int argc, char** argv, char** envp, E
 
 struct TLSData {
     size_t total_tls_size { 0 };
+    void* tls_template { nullptr };
     size_t tls_template_size { 0 };
+    size_t alignment { 0 };
 };
 static TLSData s_tls_data;
 
@@ -234,6 +237,40 @@ static Result<void, DlErrorMessage> map_dependencies(ByteString const& path)
     return {};
 }
 
+struct ThreadSpecificData {
+    ThreadSpecificData* self;
+};
+
+static ErrorOr<FlatPtr> __create_new_tls_region()
+{
+    auto static_tls_region_alignment = max(s_tls_data.alignment, alignof(ThreadSpecificData));
+    auto static_tls_region_size = align_up_to(s_tls_data.tls_template_size, static_tls_region_alignment) + sizeof(ThreadSpecificData);
+    void* thread_specific_ptr = serenity_mmap(nullptr, static_tls_region_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0, static_tls_region_alignment, "Static TLS Data");
+    if (thread_specific_ptr == MAP_FAILED)
+        return Error::from_syscall("mmap"sv, -errno);
+
+    auto* thread_specific_data = bit_cast<ThreadSpecificData*>(bit_cast<FlatPtr>(thread_specific_ptr) + (align_up_to(s_tls_data.tls_template_size, static_tls_region_alignment)));
+    thread_specific_data->self = thread_specific_data;
+
+    auto* thread_local_storage = bit_cast<u8*>(bit_cast<FlatPtr>(thread_specific_data) - align_up_to(s_tls_data.tls_template_size, s_tls_data.alignment));
+
+    if (s_tls_data.tls_template_size != 0)
+        memcpy(thread_local_storage, s_tls_data.tls_template, s_tls_data.tls_template_size);
+
+    return bit_cast<FlatPtr>(thread_specific_data);
+}
+
+static ErrorOr<void> __free_tls_region(FlatPtr thread_pointer)
+{
+    auto static_tls_region_alignment = max(s_tls_data.alignment, alignof(ThreadSpecificData));
+    auto static_tls_region_size = align_up_to(s_tls_data.tls_template_size, static_tls_region_alignment) + sizeof(ThreadSpecificData);
+
+    if (munmap(bit_cast<void*>(bit_cast<FlatPtr>(thread_pointer) - align_up_to(s_tls_data.tls_template_size, s_tls_data.alignment)), static_tls_region_size) != 0)
+        return Error::from_syscall("mmap"sv, -errno);
+
+    return {};
+}
+
 static void allocate_tls()
 {
     for (auto const& data : s_loaders) {
@@ -244,25 +281,23 @@ static void allocate_tls()
     if (s_tls_data.total_tls_size == 0)
         return;
 
-    auto page_aligned_size = align_up_to(s_tls_data.total_tls_size, PAGE_SIZE);
-    auto initial_tls_data_result = ByteBuffer::create_zeroed(page_aligned_size);
-    if (initial_tls_data_result.is_error()) {
-        dbgln("Failed to allocate initial TLS data");
+    s_tls_data.tls_template_size = align_up_to(s_tls_data.total_tls_size, PAGE_SIZE);
+    s_tls_data.alignment = PAGE_SIZE;
+    s_tls_data.tls_template = mmap_with_name(nullptr, s_tls_data.tls_template_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0, "TLS Template");
+
+    if (s_tls_data.tls_template == MAP_FAILED) {
+        dbgln("Failed to allocate memory for the TLS template");
         VERIFY_NOT_REACHED();
     }
 
-    auto& initial_tls_data = initial_tls_data_result.value();
+    auto tls_template = Bytes(s_tls_data.tls_template, s_tls_data.tls_template_size);
 
     // Initialize TLS data
     for (auto const& entry : s_loaders) {
-        entry.value->copy_initial_tls_data_into(initial_tls_data);
+        entry.value->copy_initial_tls_data_into(tls_template);
     }
 
-    void* master_tls = ::allocate_tls((char*)initial_tls_data.data(), initial_tls_data.size());
-    VERIFY(master_tls != (void*)-1);
-    dbgln_if(DYNAMIC_LOAD_DEBUG, "from userspace, master_tls: {:p}", master_tls);
-
-    s_tls_data.tls_template_size = initial_tls_data.size();
+    set_thread_pointer_register(MUST(__create_new_tls_region()));
 }
 
 static int __dl_iterate_phdr(DlIteratePhdrCallbackFunction callback, void* data)
@@ -648,6 +683,8 @@ void ELF::DynamicLinker::linker_main(ByteString&& main_program_path, int main_pr
     s_magic_weak_symbols.set("environ"sv, make_ref_counted<MagicWeakSymbol>(STT_OBJECT, s_envp));
     s_magic_weak_symbols.set("__stack_chk_guard"sv, make_ref_counted<MagicWeakSymbol>(STT_OBJECT, stack_guard));
     s_magic_weak_symbols.set("__call_fini_functions"sv, make_ref_counted<MagicWeakSymbol>(STT_FUNC, __call_fini_functions));
+    s_magic_weak_symbols.set("__create_new_tls_region"sv, make_ref_counted<MagicWeakSymbol>(STT_FUNC, __create_new_tls_region));
+    s_magic_weak_symbols.set("__free_tls_region"sv, make_ref_counted<MagicWeakSymbol>(STT_FUNC, __free_tls_region));
     s_magic_weak_symbols.set("__dl_iterate_phdr"sv, make_ref_counted<MagicWeakSymbol>(STT_FUNC, __dl_iterate_phdr));
     s_magic_weak_symbols.set("__dlclose"sv, make_ref_counted<MagicWeakSymbol>(STT_FUNC, __dlclose));
     s_magic_weak_symbols.set("__dlopen"sv, make_ref_counted<MagicWeakSymbol>(STT_FUNC, __dlopen));