Sfoglia il codice sorgente

Kernel+Profiler: Track lost time between profiler timer ticks

We can lose profiling timer events for a few reasons, for example
disabled interrupts or system slowness. This accounts for lost
time between CPU samples by adding a field lost_samples to each
profiling event which tracks how many samples were lost immediately
preceding the event.
Gunnar Beutner 4 anni fa
parent
commit
c41f13f10b

+ 7 - 5
Kernel/PerformanceEventBuffer.cpp

@@ -25,7 +25,7 @@ NEVER_INLINE KResult PerformanceEventBuffer::append(int type, FlatPtr arg1, Flat
     FlatPtr ebp;
     asm volatile("movl %%ebp, %%eax"
                  : "=a"(ebp));
-    return append_with_eip_and_ebp(current_thread->pid(), current_thread->tid(), 0, ebp, type, arg1, arg2, arg3);
+    return append_with_eip_and_ebp(current_thread->pid(), current_thread->tid(), 0, ebp, type, 0, arg1, arg2, arg3);
 }
 
 static Vector<FlatPtr, PerformanceEvent::max_stack_frame_count> raw_backtrace(FlatPtr ebp, FlatPtr eip)
@@ -55,13 +55,14 @@ static Vector<FlatPtr, PerformanceEvent::max_stack_frame_count> raw_backtrace(Fl
 }
 
 KResult PerformanceEventBuffer::append_with_eip_and_ebp(ProcessID pid, ThreadID tid,
-    u32 eip, u32 ebp, int type, FlatPtr arg1, FlatPtr arg2, const StringView& arg3)
+    u32 eip, u32 ebp, int type, u32 lost_samples, FlatPtr arg1, FlatPtr arg2, const StringView& arg3)
 {
     if (count() >= capacity())
         return ENOBUFS;
 
     PerformanceEvent event;
     event.type = type;
+    event.lost_samples = lost_samples;
 
     switch (type) {
     case PERF_EVENT_SAMPLE:
@@ -182,6 +183,7 @@ bool PerformanceEventBuffer::to_json_impl(Serializer& object) const
         event_object.add("pid", event.pid);
         event_object.add("tid", event.tid);
         event_object.add("timestamp", event.timestamp);
+        event_object.add("lost_samples", i != 0 ? event.lost_samples : 0);
         auto stack_array = event_object.add_array("stack");
         for (size_t j = 0; j < event.stack_size; ++j) {
             stack_array.add(event.stack[j]);
@@ -220,17 +222,17 @@ void PerformanceEventBuffer::add_process(const Process& process, ProcessEventTyp
 
     [[maybe_unused]] auto rc = append_with_eip_and_ebp(process.pid(), 0, 0, 0,
         event_type == ProcessEventType::Create ? PERF_EVENT_PROCESS_CREATE : PERF_EVENT_PROCESS_EXEC,
-        process.pid().value(), 0, executable);
+        0, process.pid().value(), 0, executable);
 
     process.for_each_thread([&](auto& thread) {
         [[maybe_unused]] auto rc = append_with_eip_and_ebp(process.pid(), thread.tid().value(),
-            0, 0, PERF_EVENT_THREAD_CREATE, 0, 0, nullptr);
+            0, 0, PERF_EVENT_THREAD_CREATE, 0, 0, 0, nullptr);
         return IterationDecision::Continue;
     });
 
     for (auto& region : process.space().regions()) {
         [[maybe_unused]] auto rc = append_with_eip_and_ebp(process.pid(), 0,
-            0, 0, PERF_EVENT_MMAP, region->range().base().get(), region->range().size(), region->name());
+            0, 0, PERF_EVENT_MMAP, 0, region->range().base().get(), region->range().size(), region->name());
     }
 }
 

+ 2 - 1
Kernel/PerformanceEventBuffer.h

@@ -53,6 +53,7 @@ struct [[gnu::packed]] PerformanceEvent {
     u32 pid { 0 };
     u32 tid { 0 };
     u64 timestamp;
+    u32 lost_samples;
     union {
         MallocPerformanceEvent malloc;
         FreePerformanceEvent free;
@@ -77,7 +78,7 @@ public:
 
     KResult append(int type, FlatPtr arg1, FlatPtr arg2, const StringView& arg3, Thread* current_thread = Thread::current());
     KResult append_with_eip_and_ebp(ProcessID pid, ThreadID tid, u32 eip, u32 ebp,
-        int type, FlatPtr arg1, FlatPtr arg2, const StringView& arg3);
+        int type, u32 lost_samples, FlatPtr arg1, FlatPtr arg2, const StringView& arg3);
 
     void clear()
     {

+ 11 - 4
Kernel/PerformanceManager.h

@@ -34,7 +34,7 @@ public:
         if (g_profiling_all_threads) {
             VERIFY(g_global_perf_events);
             [[maybe_unused]] auto rc = g_global_perf_events->append_with_eip_and_ebp(
-                process.pid(), 0, 0, 0, PERF_EVENT_PROCESS_EXIT, 0, 0, nullptr);
+                process.pid(), 0, 0, 0, PERF_EVENT_PROCESS_EXIT, 0, 0, 0, nullptr);
         }
     }
 
@@ -52,7 +52,7 @@ public:
         }
     }
 
-    inline static void add_cpu_sample_event(Thread& current_thread, const RegisterState& regs)
+    inline static void add_cpu_sample_event(Thread& current_thread, const RegisterState& regs, u32 lost_time)
     {
         PerformanceEventBuffer* perf_events = nullptr;
 
@@ -67,7 +67,7 @@ public:
         if (perf_events) {
             [[maybe_unused]] auto rc = perf_events->append_with_eip_and_ebp(
                 current_thread.pid(), current_thread.tid(),
-                regs.eip, regs.ebp, PERF_EVENT_SAMPLE, 0, 0, nullptr);
+                regs.eip, regs.ebp, PERF_EVENT_SAMPLE, lost_time, 0, 0, nullptr);
         }
     }
 
@@ -87,13 +87,20 @@ public:
 
     inline static void timer_tick(RegisterState const& regs)
     {
+        static Time last_wakeup;
+        auto now = kgettimeofday();
+        constexpr auto ideal_interval = Time::from_microseconds(1000'000 / OPTIMAL_PROFILE_TICKS_PER_SECOND_RATE);
+        auto expected_wakeup = last_wakeup + ideal_interval;
+        auto delay = (now > expected_wakeup) ? now - expected_wakeup : Time::from_microseconds(0);
+        last_wakeup = now;
         auto current_thread = Thread::current();
         // FIXME: We currently don't collect samples while idle.
         //        That will be an interesting mode to add in the future. :^)
         if (!current_thread || current_thread == Processor::current().idle_thread())
             return;
 
-        PerformanceManager::add_cpu_sample_event(*current_thread, regs);
+        auto lost_samples = delay.to_microseconds() / ideal_interval.to_microseconds();
+        PerformanceManager::add_cpu_sample_event(*current_thread, regs, lost_samples);
     }
 };
 

+ 1 - 0
Userland/DevTools/Profiler/Profile.cpp

@@ -228,6 +228,7 @@ Result<NonnullOwnPtr<Profile>, String> Profile::load_from_perfcore_file(const St
         Event event;
 
         event.timestamp = perf_event.get("timestamp").to_number<u64>();
+        event.lost_samples = perf_event.get("lost_samples").to_number<u32>();
         event.type = perf_event.get("type").to_string();
         event.pid = perf_event.get("pid").to_i32();
         event.tid = perf_event.get("tid").to_i32();

+ 1 - 0
Userland/DevTools/Profiler/Profile.h

@@ -166,6 +166,7 @@ public:
         String executable;
         int pid { 0 };
         int tid { 0 };
+        u32 lost_samples { 0 };
         bool in_kernel { false };
         Vector<Frame> frames;
     };

+ 6 - 0
Userland/DevTools/Profiler/SamplesModel.cpp

@@ -45,6 +45,8 @@ String SamplesModel::column_name(int column) const
         return "TID";
     case Column::ExecutableName:
         return "Executable";
+    case Column::LostSamples:
+        return "Lost Samples";
     case Column::InnermostStackFrame:
         return "Innermost Frame";
     default:
@@ -81,6 +83,10 @@ GUI::Variant SamplesModel::data(const GUI::ModelIndex& index, GUI::ModelRole rol
             return (u32)event.timestamp;
         }
 
+        if (index.column() == Column::LostSamples) {
+            return event.lost_samples;
+        }
+
         if (index.column() == Column::InnermostStackFrame) {
             return event.frames.last().symbol;
         }

+ 1 - 0
Userland/DevTools/Profiler/SamplesModel.h

@@ -25,6 +25,7 @@ public:
         ProcessID,
         ThreadID,
         ExecutableName,
+        LostSamples,
         InnermostStackFrame,
         __Count
     };

+ 1 - 1
Userland/DevTools/Profiler/TimelineTrack.cpp

@@ -78,7 +78,7 @@ void TimelineTrack::paint_event(GUI::PaintEvent& event)
         }
 
         auto& histogram = event.in_kernel ? kernel_histogram : usermode_histogram;
-        histogram.insert(clamp_timestamp(event.timestamp), 1);
+        histogram.insert(clamp_timestamp(event.timestamp), 1 + event.lost_samples);
     }
 
     decltype(kernel_histogram.at(0)) max_value = 0;