ladybird/Kernel/PerformanceEventBuffer.cpp
Gunnar Beutner eb798d5538 Kernel+Profiler: Improve profiling subsystem
This turns the perfcore format into more a log than it was before,
which lets us properly log process, thread and region
creation/destruction. This also makes it unnecessary to dump the
process' regions every time it is scheduled like we did before.

Incidentally this also fixes 'profile -c' because we previously ended
up incorrectly dumping the parent's region map into the profile data.

Log-based mmap support enables profiling shared libraries which
are loaded at runtime, e.g. via dlopen().

This enables profiling both the parent and child process for
programs which use execve(). Previously we'd discard the profiling
data for the old process.

The Profiler tool has been updated to not treat thread IDs as
process IDs anymore. This enables support for processes with more
than one thread. Also, there's a new widget to filter which
process should be displayed.
2021-04-26 17:13:55 +02:00

238 lines
8.7 KiB
C++

/*
* Copyright (c) 2020-2021, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/JsonArraySerializer.h>
#include <AK/JsonObject.h>
#include <AK/JsonObjectSerializer.h>
#include <Kernel/Arch/x86/SmapDisabler.h>
#include <Kernel/FileSystem/Custody.h>
#include <Kernel/KBufferBuilder.h>
#include <Kernel/PerformanceEventBuffer.h>
#include <Kernel/Process.h>
namespace Kernel {
PerformanceEventBuffer::PerformanceEventBuffer(NonnullOwnPtr<KBuffer> buffer)
: m_buffer(move(buffer))
{
}
NEVER_INLINE KResult PerformanceEventBuffer::append(int type, FlatPtr arg1, FlatPtr arg2, const StringView& arg3)
{
FlatPtr ebp;
asm volatile("movl %%ebp, %%eax"
: "=a"(ebp));
auto current_thread = Thread::current();
return append_with_eip_and_ebp(current_thread->pid(), current_thread->tid(), 0, ebp, type, arg1, arg2, arg3);
}
static Vector<FlatPtr, PerformanceEvent::max_stack_frame_count> raw_backtrace(FlatPtr ebp, FlatPtr eip)
{
Vector<FlatPtr, PerformanceEvent::max_stack_frame_count> backtrace;
if (eip != 0)
backtrace.append(eip);
FlatPtr stack_ptr_copy;
FlatPtr stack_ptr = (FlatPtr)ebp;
// FIXME: Figure out how to remove this SmapDisabler without breaking profile stacks.
SmapDisabler disabler;
while (stack_ptr) {
void* fault_at;
if (!safe_memcpy(&stack_ptr_copy, (void*)stack_ptr, sizeof(FlatPtr), fault_at))
break;
FlatPtr retaddr;
if (!safe_memcpy(&retaddr, (void*)(stack_ptr + sizeof(FlatPtr)), sizeof(FlatPtr), fault_at))
break;
if (retaddr == 0)
break;
backtrace.append(retaddr);
if (backtrace.size() == PerformanceEvent::max_stack_frame_count)
break;
stack_ptr = stack_ptr_copy;
}
return backtrace;
}
KResult PerformanceEventBuffer::append_with_eip_and_ebp(ProcessID pid, ThreadID tid,
u32 eip, u32 ebp, int type, FlatPtr arg1, FlatPtr arg2, const StringView& arg3)
{
if (count() >= capacity())
return ENOBUFS;
PerformanceEvent event;
event.type = type;
switch (type) {
case PERF_EVENT_SAMPLE:
break;
case PERF_EVENT_MALLOC:
event.data.malloc.size = arg1;
event.data.malloc.ptr = arg2;
break;
case PERF_EVENT_FREE:
event.data.free.ptr = arg1;
break;
case PERF_EVENT_MMAP:
event.data.mmap.ptr = arg1;
event.data.mmap.size = arg2;
memset(event.data.mmap.name, 0, sizeof(event.data.mmap.name));
if (!arg3.is_empty())
memcpy(event.data.mmap.name, arg3.characters_without_null_termination(), min(arg3.length(), sizeof(event.data.mmap.name) - 1));
break;
case PERF_EVENT_MUNMAP:
event.data.mmap.ptr = arg1;
event.data.mmap.size = arg2;
break;
case PERF_EVENT_PROCESS_CREATE:
event.data.process_create.parent_pid = arg1;
memset(event.data.process_create.executable, 0, sizeof(event.data.process_create.executable));
if (!arg3.is_empty()) {
memcpy(event.data.process_create.executable, arg3.characters_without_null_termination(),
min(arg3.length(), sizeof(event.data.process_create.executable) - 1));
}
break;
case PERF_EVENT_PROCESS_EXEC:
memset(event.data.process_exec.executable, 0, sizeof(event.data.process_exec.executable));
if (!arg3.is_empty()) {
memcpy(event.data.process_exec.executable, arg3.characters_without_null_termination(),
min(arg3.length(), sizeof(event.data.process_exec.executable) - 1));
}
break;
case PERF_EVENT_PROCESS_EXIT:
break;
case PERF_EVENT_THREAD_CREATE:
event.data.thread_create.parent_tid = arg1;
break;
case PERF_EVENT_THREAD_EXIT:
break;
default:
return EINVAL;
}
auto backtrace = raw_backtrace(ebp, eip);
event.stack_size = min(sizeof(event.stack) / sizeof(FlatPtr), static_cast<size_t>(backtrace.size()));
memcpy(event.stack, backtrace.data(), event.stack_size * sizeof(FlatPtr));
event.pid = pid.value();
event.tid = tid.value();
event.timestamp = TimeManagement::the().uptime_ms();
at(m_count++) = event;
return KSuccess;
}
PerformanceEvent& PerformanceEventBuffer::at(size_t index)
{
VERIFY(index < capacity());
auto* events = reinterpret_cast<PerformanceEvent*>(m_buffer->data());
return events[index];
}
template<typename Serializer>
bool PerformanceEventBuffer::to_json_impl(Serializer& object) const
{
auto array = object.add_array("events");
for (size_t i = 0; i < m_count; ++i) {
auto& event = at(i);
auto event_object = array.add_object();
switch (event.type) {
case PERF_EVENT_SAMPLE:
event_object.add("type", "sample");
break;
case PERF_EVENT_MALLOC:
event_object.add("type", "malloc");
event_object.add("ptr", static_cast<u64>(event.data.malloc.ptr));
event_object.add("size", static_cast<u64>(event.data.malloc.size));
break;
case PERF_EVENT_FREE:
event_object.add("type", "free");
event_object.add("ptr", static_cast<u64>(event.data.free.ptr));
break;
case PERF_EVENT_MMAP:
event_object.add("type", "mmap");
event_object.add("ptr", static_cast<u64>(event.data.mmap.ptr));
event_object.add("size", static_cast<u64>(event.data.mmap.size));
event_object.add("name", event.data.mmap.name);
break;
case PERF_EVENT_MUNMAP:
event_object.add("type", "munmap");
event_object.add("ptr", static_cast<u64>(event.data.munmap.ptr));
event_object.add("size", static_cast<u64>(event.data.munmap.size));
break;
case PERF_EVENT_PROCESS_CREATE:
event_object.add("type", "process_create");
event_object.add("parent_pid", static_cast<u64>(event.data.process_create.parent_pid));
event_object.add("executable", event.data.process_create.executable);
break;
case PERF_EVENT_PROCESS_EXEC:
event_object.add("type", "process_exec");
event_object.add("executable", event.data.process_exec.executable);
break;
case PERF_EVENT_PROCESS_EXIT:
event_object.add("type", "process_exit");
break;
case PERF_EVENT_THREAD_CREATE:
event_object.add("type", "thread_create");
event_object.add("parent_tid", static_cast<u64>(event.data.thread_create.parent_tid));
break;
case PERF_EVENT_THREAD_EXIT:
event_object.add("type", "thread_exit");
break;
}
event_object.add("pid", event.pid);
event_object.add("tid", event.tid);
event_object.add("timestamp", event.timestamp);
auto stack_array = event_object.add_array("stack");
for (size_t j = 0; j < event.stack_size; ++j) {
stack_array.add(event.stack[j]);
}
stack_array.finish();
event_object.finish();
}
array.finish();
object.finish();
return true;
}
bool PerformanceEventBuffer::to_json(KBufferBuilder& builder) const
{
JsonObjectSerializer object(builder);
return to_json_impl(object);
}
OwnPtr<PerformanceEventBuffer> PerformanceEventBuffer::try_create_with_size(size_t buffer_size)
{
auto buffer = KBuffer::try_create_with_size(buffer_size, Region::Access::Read | Region::Access::Write, "Performance events", AllocationStrategy::AllocateNow);
if (!buffer)
return {};
return adopt_own(*new PerformanceEventBuffer(buffer.release_nonnull()));
}
void PerformanceEventBuffer::add_process(const Process& process, ProcessEventType event_type)
{
ScopedSpinLock locker(process.space().get_lock());
String executable;
if (process.executable())
executable = process.executable()->absolute_path();
else
executable = String::formatted("<{}>", process.name());
[[maybe_unused]] auto rc = append_with_eip_and_ebp(process.pid(), 0, 0, 0,
event_type == ProcessEventType::Create ? PERF_EVENT_PROCESS_CREATE : PERF_EVENT_PROCESS_EXEC,
process.pid().value(), 0, executable.characters());
process.for_each_thread([&](auto& thread) {
[[maybe_unused]] auto rc = append_with_eip_and_ebp(process.pid(), thread.tid().value(),
0, 0, PERF_EVENT_THREAD_CREATE, 0, 0, nullptr);
return IterationDecision::Continue;
});
for (auto& region : process.space().regions()) {
[[maybe_unused]] auto rc = append_with_eip_and_ebp(process.pid(), 0,
0, 0, PERF_EVENT_MMAP, region->range().base().get(), region->range().size(), region->name().characters());
}
}
}