Jelajahi Sumber

LibDebug: Support shared libraries

DebugSession now makes the loader stop after loading the libraries,
and parses the loaded libraries of the program before continuing its
execution.

DebugSession now also supports inserting a breakpoint at a given symbol
or source position.
Additionally, DebugInfo now takes the base address of its object into
consideration.
Itamar 4 tahun lalu
induk
melakukan
4b91e7c821

+ 1 - 1
Libraries/LibDebug/CMakeLists.txt

@@ -11,4 +11,4 @@ set(SOURCES
 )
 
 serenity_lib(LibDebug debug)
-target_link_libraries(LibDebug LibC)
+target_link_libraries(LibDebug LibC LibRegex)

+ 28 - 7
Libraries/LibDebug/DebugInfo.cpp

@@ -25,6 +25,7 @@
  */
 
 #include "DebugInfo.h"
+#include <AK/LexicalPath.h>
 #include <AK/MemoryStream.h>
 #include <AK/QuickSort.h>
 #include <LibDebug/Dwarf/CompilationUnit.h>
@@ -35,8 +36,10 @@
 
 namespace Debug {
 
-DebugInfo::DebugInfo(NonnullOwnPtr<const ELF::Image> elf)
+DebugInfo::DebugInfo(NonnullOwnPtr<const ELF::Image> elf, String source_root, FlatPtr base_address)
     : m_elf(move(elf))
+    , m_source_root(source_root)
+    , m_base_address(base_address)
     , m_dwarf_info(*m_elf)
 {
     prepare_variable_scopes();
@@ -124,6 +127,9 @@ void DebugInfo::prepare_lines()
             auto start_index = file_path.index_of(serenity_slash).value() + serenity_slash.length();
             file_path = file_path.substring(start_index, file_path.length() - start_index);
         }
+        if (file_path.starts_with("./") && !m_source_root.is_null()) {
+            file_path = LexicalPath::canonicalized_path(String::formatted("{}/{}", m_source_root, file_path));
+        }
         m_sorted_lines.append({ line_info.address, file_path, line_info.line });
     }
     quick_sort(m_sorted_lines, [](auto& a, auto& b) {
@@ -147,19 +153,34 @@ Optional<DebugInfo::SourcePosition> DebugInfo::get_source_position(u32 target_ad
     return {};
 }
 
-Optional<u32> DebugInfo::get_instruction_from_source(const String& file, size_t line) const
+Optional<DebugInfo::SourcePositionAndAddress> DebugInfo::get_address_from_source_position(const String& file, size_t line) const
 {
     String file_path = file;
+    if (!file_path.starts_with("/"))
+        file_path = String::format("/%s", file_path.characters());
+
     constexpr char SERENITY_LIBS_PREFIX[] = "/usr/src/serenity";
     if (file.starts_with(SERENITY_LIBS_PREFIX)) {
         file_path = file.substring(sizeof(SERENITY_LIBS_PREFIX), file.length() - sizeof(SERENITY_LIBS_PREFIX));
         file_path = String::format("../%s", file_path.characters());
     }
+
+    Optional<SourcePositionAndAddress> result;
     for (const auto& line_entry : m_sorted_lines) {
-        if (line_entry.file == file_path && line_entry.line == line)
-            return Optional<u32>(line_entry.address);
+        if (!line_entry.file.ends_with(file_path))
+            continue;
+
+        if (line_entry.line > line)
+            continue;
+
+        // We look for the source position that is closest to the desired position, and is not after it.
+        // For example, get_address_of_source_position("main.cpp", 73) could return the address for an instruction whose location is ("main.cpp", 72)
+        // as there might not be an instruction mapped for "main.cpp", 73.
+        if (!result.has_value() || (line_entry.line > result.value().line)) {
+            result = SourcePositionAndAddress { line_entry.file, line_entry.line, line_entry.address };
+        }
     }
-    return {};
+    return result;
 }
 
 NonnullOwnPtrVector<DebugInfo::VariableInfo> DebugInfo::get_variables_in_current_scope(const PtraceRegisters& regs) const
@@ -168,7 +189,7 @@ NonnullOwnPtrVector<DebugInfo::VariableInfo> DebugInfo::get_variables_in_current
 
     // TODO: We can store the scopes in a better data structure
     for (const auto& scope : m_scopes) {
-        if (regs.eip < scope.address_low || regs.eip >= scope.address_high)
+        if (regs.eip - m_base_address < scope.address_low || regs.eip - m_base_address >= scope.address_high)
             continue;
 
         for (const auto& die_entry : scope.dies_of_variables) {
@@ -336,7 +357,7 @@ Vector<DebugInfo::SourcePosition> DebugInfo::source_lines_in_scope(const Variabl
 
 DebugInfo::SourcePosition DebugInfo::SourcePosition::from_line_info(const Dwarf::LineProgram::LineInfo& line)
 {
-    return { line.file, line.line, line.address };
+    return { line.file, line.line, { line.address } };
 }
 
 }

+ 28 - 3
Libraries/LibDebug/DebugInfo.h

@@ -40,14 +40,30 @@ namespace Debug {
 
 class DebugInfo {
 public:
-    explicit DebugInfo(NonnullOwnPtr<const ELF::Image>);
+    explicit DebugInfo(NonnullOwnPtr<const ELF::Image>, String source_root = {}, FlatPtr base_address = 0);
 
     const ELF::Image& elf() const { return *m_elf; }
 
     struct SourcePosition {
         FlyString file_path;
         size_t line_number { 0 };
-        u32 address_of_first_statement { 0 };
+        Optional<u32> address_of_first_statement;
+
+        SourcePosition()
+            : SourcePosition(String::empty(), 0)
+        {
+        }
+        SourcePosition(String file_path, size_t line_number)
+            : file_path(file_path)
+            , line_number(line_number)
+        {
+        }
+        SourcePosition(String file_path, size_t line_number, u32 address_of_first_statement)
+            : file_path(file_path)
+            , line_number(line_number)
+            , address_of_first_statement(address_of_first_statement)
+        {
+        }
 
         bool operator==(const SourcePosition& other) const { return file_path == other.file_path && line_number == other.line_number; }
         bool operator!=(const SourcePosition& other) const { return !(*this == other); }
@@ -93,7 +109,14 @@ public:
     NonnullOwnPtrVector<VariableInfo> get_variables_in_current_scope(const PtraceRegisters&) const;
 
     Optional<SourcePosition> get_source_position(u32 address) const;
-    Optional<u32> get_instruction_from_source(const String& file, size_t line) const;
+
+    struct SourcePositionAndAddress {
+        String file;
+        size_t line;
+        FlatPtr address;
+    };
+
+    Optional<SourcePositionAndAddress> get_address_from_source_position(const String& file, size_t line) const;
 
     template<typename Callback>
     void for_each_source_position(Callback callback) const
@@ -120,6 +143,8 @@ private:
     OwnPtr<VariableInfo> create_variable_info(const Dwarf::DIE& variable_die, const PtraceRegisters&) const;
 
     NonnullOwnPtr<const ELF::Image> m_elf;
+    String m_source_root;
+    FlatPtr m_base_address { 0 };
     Dwarf::DwarfInfo m_dwarf_info;
 
     Vector<VariablesScope> m_scopes;

+ 174 - 6
Libraries/LibDebug/DebugSession.cpp

@@ -25,15 +25,20 @@
  */
 
 #include "DebugSession.h"
+#include <AK/JsonObject.h>
+#include <AK/JsonValue.h>
+#include <AK/LexicalPath.h>
 #include <AK/Optional.h>
+#include <LibCore/File.h>
+#include <LibRegex/Regex.h>
 #include <stdlib.h>
 
 namespace Debug {
 
-DebugSession::DebugSession(pid_t pid)
+DebugSession::DebugSession(pid_t pid, String source_root)
     : m_debuggee_pid(pid)
-    , m_executable(map_executable_for_process(pid))
-    , m_debug_info(make<ELF::Image>(reinterpret_cast<const u8*>(m_executable.data()), m_executable.size()))
+    , m_source_root(source_root)
+
 {
 }
 
@@ -59,7 +64,7 @@ DebugSession::~DebugSession()
     }
 }
 
-OwnPtr<DebugSession> DebugSession::exec_and_attach(const String& command)
+OwnPtr<DebugSession> DebugSession::exec_and_attach(const String& command, String source_root)
 {
     auto pid = fork();
 
@@ -80,7 +85,10 @@ OwnPtr<DebugSession> DebugSession::exec_and_attach(const String& command)
         for (size_t i = 0; i < parts.size(); i++) {
             args[i] = parts[i].characters();
         }
-        int rc = execvp(args[0], const_cast<char**>(args));
+        const char** envp = (const char**)calloc(2, sizeof(const char*));
+        // This causes loader to stop on a breakpoint before jumping to the entry point of the program.
+        envp[0] = "_LOADER_BREAKPOINT=1";
+        int rc = execvpe(args[0], const_cast<char**>(args), const_cast<char**>(envp));
         if (rc < 0) {
             perror("execvp");
         }
@@ -107,7 +115,19 @@ OwnPtr<DebugSession> DebugSession::exec_and_attach(const String& command)
         return nullptr;
     }
 
-    return adopt_own(*new DebugSession(pid));
+    auto debug_session = adopt_own(*new DebugSession(pid, source_root));
+
+    // Continue until breakpoint before entry point of main program
+    int wstatus = debug_session->continue_debuggee_and_wait();
+    if (WSTOPSIG(wstatus) != SIGTRAP) {
+        dbgln("expected SIGTRAP");
+        return nullptr;
+    }
+
+    // At this point, libraries should have been loaded
+    debug_session->update_loaded_libs();
+
+    return move(debug_session);
 }
 
 bool DebugSession::poke(u32* address, u32 data)
@@ -268,4 +288,152 @@ void DebugSession::detach()
     continue_debuggee();
 }
 
+Optional<DebugSession::InsertBreakpointAtSymbolResult> DebugSession::insert_breakpoint(const String& symbol_name)
+{
+    Optional<InsertBreakpointAtSymbolResult> result;
+    for_each_loaded_library([this, symbol_name, &result](auto& lib) {
+        // The loader contains its own definitions for LibC symbols, so we don't want to include it in the search.
+        if (lib.name == "Loader.so")
+            return IterationDecision::Continue;
+
+        auto symbol = lib.debug_info->elf().find_demangled_function(symbol_name);
+        if (!symbol.has_value())
+            return IterationDecision::Continue;
+
+        auto breakpoint_address = symbol.value().value() + lib.base_address;
+        bool rc = this->insert_breakpoint(reinterpret_cast<void*>(breakpoint_address));
+        if (!rc)
+            return IterationDecision::Break;
+
+        result = InsertBreakpointAtSymbolResult { lib.name, breakpoint_address };
+        return IterationDecision::Break;
+    });
+    return result;
+}
+
+Optional<DebugSession::InsertBreakpointAtSourcePositionResult> DebugSession::insert_breakpoint(const String& file_name, size_t line_number)
+{
+    auto address_and_source_position = get_address_from_source_position(file_name, line_number);
+    if (!address_and_source_position.has_value())
+        return {};
+
+    auto address = address_and_source_position.value().address;
+    bool rc = this->insert_breakpoint(reinterpret_cast<void*>(address));
+    if (!rc)
+        return {};
+
+    auto lib = library_at(address);
+    ASSERT(lib);
+
+    return InsertBreakpointAtSourcePositionResult { lib->name, address_and_source_position.value().file, address_and_source_position.value().line, address };
+}
+
+void DebugSession::update_loaded_libs()
+{
+    auto file = Core::File::construct(String::format("/proc/%u/vm", m_debuggee_pid));
+    bool rc = file->open(Core::IODevice::ReadOnly);
+    ASSERT(rc);
+
+    auto file_contents = file->read_all();
+    auto json = JsonValue::from_string(file_contents);
+    ASSERT(json.has_value());
+
+    auto vm_entries = json.value().as_array();
+    Regex<PosixExtended> re("(.+): \\.text");
+
+    auto get_path_to_object = [&re](const String& vm_name) -> Optional<String> {
+        if (vm_name == "/usr/lib/Loader.so")
+            return vm_name;
+        RegexResult result;
+        auto rc = re.search(vm_name, result);
+        if (!rc)
+            return {};
+        auto lib_name = result.capture_group_matches.at(0).at(0).view.u8view().to_string();
+        if (lib_name.starts_with("/"))
+            return lib_name;
+        return String::format("/usr/lib/%s", lib_name.characters());
+    };
+
+    vm_entries.for_each([&](auto& entry) {
+        // TODO: check that region is executable
+        auto vm_name = entry.as_object().get("name").as_string();
+
+        auto object_path = get_path_to_object(vm_name);
+        if (!object_path.has_value())
+            return IterationDecision::Continue;
+
+        String lib_name = object_path.value();
+        if (lib_name.ends_with(".so"))
+            lib_name = LexicalPath(object_path.value()).basename();
+
+        // FIXME: DebugInfo currently cannot parse the debug information of libgcc_s.so
+        if (lib_name == "libgcc_s.so")
+            return IterationDecision::Continue;
+
+        if (m_loaded_libraries.contains(lib_name))
+            return IterationDecision::Continue;
+
+        MappedFile lib_file(object_path.value());
+        if (!lib_file.is_valid())
+            return IterationDecision::Continue;
+
+        FlatPtr base_address = entry.as_object().get("address").as_u32();
+        auto debug_info = make<DebugInfo>(make<ELF::Image>(reinterpret_cast<const u8*>(lib_file.data()), lib_file.size()), m_source_root, base_address);
+        auto lib = make<LoadedLibrary>(lib_name, move(lib_file), move(debug_info), base_address);
+        m_loaded_libraries.set(lib_name, move(lib));
+
+        return IterationDecision::Continue;
+    });
+}
+
+const DebugSession::LoadedLibrary* DebugSession::library_at(FlatPtr address) const
+{
+    const LoadedLibrary* result = nullptr;
+    for_each_loaded_library([&result, address](const auto& lib) {
+        if (address >= lib.base_address && address < lib.base_address + lib.debug_info->elf().size()) {
+            result = &lib;
+            return IterationDecision::Break;
+        }
+        return IterationDecision::Continue;
+    });
+    return result;
+}
+
+Optional<DebugSession::SymbolicationResult> DebugSession::symbolicate(FlatPtr address) const
+{
+    auto* lib = library_at(address);
+    if (!lib)
+        return {};
+    //FIXME: ELF::Image symlicate() API should return String::empty() if symbol is not found (It currently returns ??)
+    auto symbol = lib->debug_info->elf().symbolicate(address - lib->base_address);
+    return { { lib->name, symbol } };
+}
+
+Optional<DebugInfo::SourcePositionAndAddress> DebugSession::get_address_from_source_position(const String& file, size_t line) const
+{
+    Optional<DebugInfo::SourcePositionAndAddress> result;
+    for_each_loaded_library([this, file, line, &result](auto& lib) {
+        // The loader contains its own definitions for LibC symbols, so we don't want to include it in the search.
+        if (lib.name == "Loader.so")
+            return IterationDecision::Continue;
+
+        auto source_position_and_address = lib.debug_info->get_address_from_source_position(file, line);
+        if (!source_position_and_address.has_value())
+            return IterationDecision::Continue;
+
+        result = source_position_and_address;
+        result.value().address += lib.base_address;
+        return IterationDecision::Break;
+    });
+    return result;
+}
+
+Optional<DebugInfo::SourcePosition> DebugSession::get_source_position(FlatPtr address) const
+{
+    auto* lib = library_at(address);
+    if (!lib)
+        return {};
+    return lib->debug_info->get_source_position(address - lib->base_address);
+}
+
 }

+ 76 - 17
Libraries/LibDebug/DebugSession.h

@@ -45,7 +45,7 @@ namespace Debug {
 
 class DebugSession {
 public:
-    static OwnPtr<DebugSession> exec_and_attach(const String& command);
+    static OwnPtr<DebugSession> exec_and_attach(const String& command, String source_root = {});
 
     ~DebugSession();
 
@@ -60,11 +60,27 @@ public:
     };
 
     struct BreakPoint {
-        void* address;
-        u32 original_first_word;
-        BreakPointState state;
+        void* address { nullptr };
+        u32 original_first_word { 0 };
+        BreakPointState state { BreakPointState::Disabled };
     };
 
+    struct InsertBreakpointAtSymbolResult {
+        String library_name;
+        FlatPtr address { 0 };
+    };
+
+    Optional<InsertBreakpointAtSymbolResult> insert_breakpoint(const String& symbol_name);
+
+    struct InsertBreakpointAtSourcePositionResult {
+        String library_name;
+        String file_name;
+        size_t line_number { 0 };
+        FlatPtr address { 0 };
+    };
+
+    Optional<InsertBreakpointAtSourcePositionResult> insert_breakpoint(const String& file_name, size_t line_number);
+
     bool insert_breakpoint(void* address);
     bool disable_breakpoint(void* address);
     bool enable_breakpoint(void* address);
@@ -95,12 +111,12 @@ public:
 
     void detach();
 
+    enum DesiredInitialDebugeeState {
+        Running,
+        Stopped
+    };
     template<typename Callback>
-    void run(Callback callback);
-
-    const ELF::Image& elf() const { return m_debug_info.elf(); }
-    const MappedFile& executable() const { return m_executable; }
-    const DebugInfo& debug_info() const { return m_debug_info; }
+    void run(DesiredInitialDebugeeState, Callback);
 
     enum DebugDecision {
         Continue,
@@ -116,38 +132,79 @@ public:
         Exited,
     };
 
+    struct LoadedLibrary {
+        String name;
+        MappedFile file;
+        NonnullOwnPtr<DebugInfo> debug_info;
+        FlatPtr base_address;
+
+        LoadedLibrary(const String& name, MappedFile&& file, NonnullOwnPtr<DebugInfo>&& debug_info, FlatPtr base_address)
+            : name(name)
+            , file(move(file))
+            , debug_info(move(debug_info))
+            , base_address(base_address)
+        {
+        }
+    };
+
+    template<typename Func>
+    void for_each_loaded_library(Func f) const
+    {
+        for (const auto& lib_name : m_loaded_libraries.keys()) {
+            const auto& lib = *m_loaded_libraries.get(lib_name).value();
+            if (f(lib) == IterationDecision::Break)
+                break;
+        }
+    }
+
+    const LoadedLibrary* library_at(FlatPtr address) const;
+
+    struct SymbolicationResult {
+        String library_name;
+        String symbol;
+    };
+    Optional<SymbolicationResult> symbolicate(FlatPtr address) const;
+
+    Optional<DebugInfo::SourcePositionAndAddress> get_address_from_source_position(const String& file, size_t line) const;
+
+    Optional<DebugInfo::SourcePosition> get_source_position(FlatPtr address) const;
+
 private:
-    explicit DebugSession(pid_t);
+    explicit DebugSession(pid_t, String source_root);
 
     // x86 breakpoint instruction "int3"
     static constexpr u8 BREAKPOINT_INSTRUCTION = 0xcc;
 
     static MappedFile map_executable_for_process(pid_t);
 
+    void update_loaded_libs();
+
     int m_debuggee_pid { -1 };
+    String m_source_root;
     bool m_is_debuggee_dead { false };
 
-    MappedFile m_executable;
-    DebugInfo m_debug_info;
-
     HashMap<void*, BreakPoint> m_breakpoints;
+
+    // Maps from base address to loaded library
+    HashMap<String, NonnullOwnPtr<LoadedLibrary>> m_loaded_libraries;
 };
 
 template<typename Callback>
-void DebugSession::run(Callback callback)
+void DebugSession::run(DesiredInitialDebugeeState initial_debugee_state, Callback callback)
 {
 
     enum class State {
+        FirstIteration,
         FreeRun,
         Syscall,
         ConsecutiveBreakpoint,
         SingleStep,
     };
 
-    State state { State::FreeRun };
+    State state { State::FirstIteration };
 
     auto do_continue_and_wait = [&]() {
-        int wstatus = continue_debuggee_and_wait((state == State::FreeRun) ? ContinueType::FreeRun : ContinueType::Syscall);
+        int wstatus = continue_debuggee_and_wait((state == State::Syscall) ? ContinueType::Syscall : ContinueType::FreeRun);
 
         // FIXME: This check actually only checks whether the debuggee
         // stopped because it hit a breakpoint/syscall/is in single stepping mode or not
@@ -160,10 +217,12 @@ void DebugSession::run(Callback callback)
     };
 
     for (;;) {
-        if (state == State::FreeRun || state == State::Syscall) {
+        if ((state == State::FirstIteration && initial_debugee_state == DesiredInitialDebugeeState::Running) || state == State::FreeRun || state == State::Syscall) {
             if (do_continue_and_wait())
                 break;
         }
+        if (state == State::FirstIteration)
+            state = State::FreeRun;
 
         auto regs = get_registers();
         Optional<BreakPoint> current_breakpoint;

+ 22 - 20
Userland/functrace.cpp

@@ -84,25 +84,27 @@ static void print_syscall(PtraceRegisters& regs, size_t depth)
 
 static NonnullOwnPtr<HashMap<void*, X86::Instruction>> instrument_code()
 {
-    [[maybe_unused]] auto r = demangle("foo"); // Required for linked with __cxa_demangle
     auto instrumented = make<HashMap<void*, X86::Instruction>>();
-    g_debug_session->elf().for_each_section_of_type(SHT_PROGBITS, [&](const ELF::Image::Section& section) {
-        if (section.name() != ".text")
-            return IterationDecision::Continue;
-
-        X86::SimpleInstructionStream stream((const u8*)((u32)g_debug_session->executable().data() + section.offset()), section.size());
-        X86::Disassembler disassembler(stream);
-        for (;;) {
-            auto offset = stream.offset();
-            void* instruction_address = (void*)(section.address() + offset);
-            auto insn = disassembler.next();
-            if (!insn.has_value())
-                break;
-            if (insn.value().mnemonic() == "RET" || insn.value().mnemonic() == "CALL") {
-                g_debug_session->insert_breakpoint(instruction_address);
-                instrumented->set(instruction_address, insn.value());
+    g_debug_session->for_each_loaded_library([&](const Debug::DebugSession::LoadedLibrary& lib) {
+        lib.debug_info->elf().for_each_section_of_type(SHT_PROGBITS, [&](const ELF::Image::Section& section) {
+            if (section.name() != ".text")
+                return IterationDecision::Continue;
+
+            X86::SimpleInstructionStream stream((const u8*)((u32)lib.file.data() + section.offset()), section.size());
+            X86::Disassembler disassembler(stream);
+            for (;;) {
+                auto offset = stream.offset();
+                void* instruction_address = (void*)(section.address() + offset + lib.base_address);
+                auto insn = disassembler.next();
+                if (!insn.has_value())
+                    break;
+                if (insn.value().mnemonic() == "RET" || insn.value().mnemonic() == "CALL") {
+                    g_debug_session->insert_breakpoint(instruction_address);
+                    instrumented->set(instruction_address, insn.value());
+                }
             }
-        }
+            return IterationDecision::Continue;
+        });
         return IterationDecision::Continue;
     });
     return instrumented;
@@ -142,7 +144,7 @@ int main(int argc, char** argv)
     size_t depth = 0;
     bool new_function = true;
 
-    g_debug_session->run([&](Debug::DebugSession::DebugBreakReason reason, Optional<PtraceRegisters> regs) {
+    g_debug_session->run(Debug::DebugSession::DesiredInitialDebugeeState::Running, [&](Debug::DebugSession::DebugBreakReason reason, Optional<PtraceRegisters> regs) {
         if (reason == Debug::DebugSession::DebugBreakReason::Exited) {
             outln("Program exited.");
             return Debug::DebugSession::DebugDecision::Detach;
@@ -154,8 +156,8 @@ int main(int argc, char** argv)
         }
 
         if (new_function) {
-            auto function_name = g_debug_session->elf().symbolicate(regs.value().eip);
-            print_function_call(function_name, depth);
+            auto function_name = g_debug_session->symbolicate(regs.value().eip);
+            print_function_call(function_name.value().symbol, depth);
             new_function = false;
             return Debug::DebugSession::ContinueBreakAtSyscall;
         }