Ver código fonte

Kernel+LibC: Allow clock_gettime() to run without syscalls

This patch adds a vDSO-like mechanism for exposing the current time as
an array of per-clock-source timestamps.

LibC's clock_gettime() calls sys$map_time_page() to map the kernel's
"time page" into the process address space (at a random address, ofc.)
This is only done on first call, and from then on the timestamps are
fetched from the time page.

This first patch only adds support for CLOCK_REALTIME, but eventually
we should be able to support all clock sources this way and get rid of
sys$clock_gettime() in the kernel entirely. :^)

Accesses are synchronized using two atomic integers that are incremented
at the start and finish of the kernel's time page update cycle.
Andreas Kling 4 anos atrás
pai
commit
fdfc66db61

+ 1 - 0
Kernel/API/Syscall.h

@@ -118,6 +118,7 @@ enum class NeedsBigProcessLock {
     S(listen, NeedsBigProcessLock::Yes)                     \
     S(lseek, NeedsBigProcessLock::Yes)                      \
     S(madvise, NeedsBigProcessLock::Yes)                    \
+    S(map_time_page, NeedsBigProcessLock::Yes)              \
     S(mkdir, NeedsBigProcessLock::Yes)                      \
     S(mknod, NeedsBigProcessLock::Yes)                      \
     S(mmap, NeedsBigProcessLock::Yes)                       \

+ 30 - 0
Kernel/API/TimePage.h

@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2021, Andreas Kling <kling@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/Types.h>
+
+#ifdef KERNEL
+#    include <Kernel/UnixTypes.h>
+#else
+#    include <time.h>
+#endif
+
+namespace Kernel {
+
+inline bool time_page_supports(clockid_t clock_id)
+{
+    return clock_id == CLOCK_REALTIME;
+}
+
+struct TimePage {
+    volatile u32 update1;
+    struct timespec clocks[CLOCK_ID_COUNT];
+    volatile u32 update2;
+};
+
+}

+ 1 - 0
Kernel/Process.h

@@ -412,6 +412,7 @@ public:
     KResultOr<FlatPtr> sys$anon_create(size_t, int options);
     KResultOr<FlatPtr> sys$statvfs(Userspace<const Syscall::SC_statvfs_params*> user_params);
     KResultOr<FlatPtr> sys$fstatvfs(int fd, statvfs* buf);
+    KResultOr<FlatPtr> sys$map_time_page();
 
     template<bool sockname, typename Params>
     int get_sock_or_peer_name(const Params&);

+ 18 - 0
Kernel/Syscalls/clock.cpp

@@ -10,6 +10,24 @@
 
 namespace Kernel {
 
+KResultOr<FlatPtr> Process::sys$map_time_page()
+{
+    VERIFY_PROCESS_BIG_LOCK_ACQUIRED(this);
+    REQUIRE_PROMISE(stdio);
+
+    auto& vmobject = TimeManagement::the().time_page_vmobject();
+
+    auto range = address_space().page_directory().range_allocator().allocate_randomized(PAGE_SIZE, PAGE_SIZE);
+    if (!range.has_value())
+        return ENOMEM;
+
+    auto region_or_error = address_space().allocate_region_with_vmobject(range.value(), vmobject, 0, "Kernel time page"sv, PROT_READ, true);
+    if (region_or_error.is_error())
+        return region_or_error.error();
+
+    return region_or_error.value()->vaddr().get();
+}
+
 KResultOr<FlatPtr> Process::sys$clock_gettime(clockid_t clock_id, Userspace<timespec*> user_ts)
 {
     VERIFY_NO_PROCESS_BIG_LOCK(this);

+ 26 - 0
Kernel/Time/TimeManagement.cpp

@@ -145,6 +145,9 @@ UNMAP_AFTER_INIT void TimeManagement::initialize(u32 cpu)
             dmesgln("Time: Using APIC timer as system timer");
             s_the->set_system_timer(*apic_timer);
         }
+
+        s_the->m_time_page_region = MM.allocate_kernel_region(PAGE_SIZE, "Time page"sv, Memory::Region::Access::ReadWrite, AllocationStrategy::AllocateNow);
+        VERIFY(s_the->m_time_page_region);
     } else {
         VERIFY(s_the.is_initialized());
         if (auto* apic_timer = APIC::the().get_timer()) {
@@ -359,6 +362,9 @@ void TimeManagement::increment_time_since_boot_hpet()
     m_ticks_this_second = ticks_this_second;
     // TODO: Apply m_remaining_epoch_time_adjustment
     timespec_add(m_epoch_time, { (time_t)(delta_ns / 1000000000), (long)(delta_ns % 1000000000) }, m_epoch_time);
+
+    update_time_page();
+
     m_update2.store(update_iteration + 1, AK::MemoryOrder::memory_order_release);
 }
 
@@ -389,6 +395,8 @@ void TimeManagement::increment_time_since_boot()
         ++m_seconds_since_boot;
         m_ticks_this_second = 0;
     }
+
+    update_time_page();
     m_update2.store(update_iteration + 1, AK::MemoryOrder::memory_order_release);
 }
 
@@ -419,4 +427,22 @@ bool TimeManagement::disable_profile_timer()
     return true;
 }
 
+void TimeManagement::update_time_page()
+{
+    auto* page = time_page();
+    u32 update_iteration = AK::atomic_fetch_add(&page->update1, 1u, AK::MemoryOrder::memory_order_acquire);
+    page->clocks[CLOCK_REALTIME] = m_epoch_time;
+    AK::atomic_store(&page->update2, update_iteration + 1u, AK::MemoryOrder::memory_order_release);
+}
+
+TimePage* TimeManagement::time_page()
+{
+    return static_cast<TimePage*>((void*)m_time_page_region->vaddr().as_ptr());
+}
+
+Memory::VMObject& TimeManagement::time_page_vmobject()
+{
+    return m_time_page_region->vmobject();
+}
+
 }

+ 9 - 0
Kernel/Time/TimeManagement.h

@@ -7,9 +7,11 @@
 #pragma once
 
 #include <AK/NonnullRefPtrVector.h>
+#include <AK/OwnPtr.h>
 #include <AK/RefPtr.h>
 #include <AK/Time.h>
 #include <AK/Types.h>
+#include <Kernel/API/TimePage.h>
 #include <Kernel/Arch/x86/RegisterState.h>
 #include <Kernel/KResult.h>
 #include <Kernel/UnixTypes.h>
@@ -71,7 +73,12 @@ public:
 
     bool can_query_precise_time() const { return m_can_query_precise_time; }
 
+    Memory::VMObject& time_page_vmobject();
+
 private:
+    TimePage* time_page();
+    void update_time_page();
+
     bool probe_and_set_legacy_hardware_timers();
     bool probe_and_set_non_legacy_hardware_timers();
     Vector<HardwareTimerBase*> scan_and_initialize_periodic_timers();
@@ -100,6 +107,8 @@ private:
 
     Atomic<u32> m_profile_enable_count { 0 };
     RefPtr<HardwareTimerBase> m_profile_timer;
+
+    OwnPtr<Memory::Region> m_time_page_region;
 };
 
 }

+ 32 - 0
Userland/Libraries/LibC/time.cpp

@@ -7,6 +7,7 @@
 #include <AK/String.h>
 #include <AK/StringBuilder.h>
 #include <AK/Time.h>
+#include <Kernel/API/TimePage.h>
 #include <assert.h>
 #include <errno.h>
 #include <stdio.h>
@@ -362,8 +363,39 @@ clock_t clock()
     return tms.tms_utime + tms.tms_stime;
 }
 
+static Kernel::TimePage* get_kernel_time_page()
+{
+    static Kernel::TimePage* s_kernel_time_page;
+    // FIXME: Thread safety
+    if (!s_kernel_time_page) {
+        auto rc = syscall(SC_map_time_page);
+        if ((int)rc < 0 && (int)rc > -EMAXERRNO) {
+            errno = -(int)rc;
+            return nullptr;
+        }
+        s_kernel_time_page = (Kernel::TimePage*)rc;
+    }
+    return s_kernel_time_page;
+}
+
 int clock_gettime(clockid_t clock_id, struct timespec* ts)
 {
+    if (Kernel::time_page_supports(clock_id)) {
+        if (!ts) {
+            errno = EFAULT;
+            return -1;
+        }
+
+        if (auto* kernel_time_page = get_kernel_time_page()) {
+            u32 update_iteration;
+            do {
+                update_iteration = AK::atomic_load(&kernel_time_page->update1, AK::memory_order_acquire);
+                *ts = kernel_time_page->clocks[clock_id];
+            } while (update_iteration != AK::atomic_load(&kernel_time_page->update2, AK::memory_order_acquire));
+            return 0;
+        }
+    }
+
     int rc = syscall(SC_clock_gettime, clock_id, ts);
     __RETURN_WITH_ERRNO(rc, rc, -1);
 }