Prechádzať zdrojové kódy

LibDebug: Parse line number information from DWARF format

DWARF line number information, if generated, is stored  in the
.debug_line section of an object file.

The information is encoded as instructions for a VM that is defined in
the DWARF specification.
By executing these instructions, we can extract the encoded line number
information.
Itamar 5 rokov pred
rodič
commit
8a886e0e96

+ 89 - 0
AK/BufferStream.h

@@ -317,6 +317,95 @@ public:
         return *this;
         return *this;
     }
     }
 
 
+    BufferStream& read_raw(u8* raw_data, size_t size)
+    {
+        if (m_offset + size > m_buffer.size()) {
+            m_read_failure = true;
+            return *this;
+        }
+        __builtin_memcpy(raw_data, m_buffer.data() + m_offset, size);
+        m_offset += size;
+        return *this;
+    };
+
+    u8 peek()
+    {
+        if (m_offset >= m_buffer.size()) {
+            m_read_failure = true;
+            return 0;
+        }
+        return m_buffer[m_offset];
+    }
+
+    BufferStream& operator>>(String& str)
+    {
+        if (m_offset >= m_buffer.size()) {
+            m_read_failure = true;
+            return *this;
+        }
+        size_t string_size = 0;
+        while (m_offset + string_size < m_buffer.size() && m_buffer[m_offset + string_size]) {
+            ++string_size;
+        }
+        str = String(reinterpret_cast<const char*>(&m_buffer[m_offset]), string_size);
+        m_offset += string_size + 1;
+        return *this;
+    }
+
+    // LEB128 is a variable-length encoding for integers
+    BufferStream& read_LEB128_unsigned(size_t& result)
+    {
+        result = 0;
+        size_t num_bytes = 0;
+        while (true) {
+            if (m_offset > m_buffer.size()) {
+                m_read_failure = true;
+                break;
+            }
+            const u8 byte = m_buffer[m_offset];
+            result = (result) | (static_cast<size_t>(byte & ~(1 << 7)) << (num_bytes * 7));
+            ++m_offset;
+            if (!(byte & (1 << 7)))
+                break;
+            ++num_bytes;
+        }
+
+        return *this;
+    }
+
+    // LEB128 is a variable-length encoding for integers
+    BufferStream& read_LEB128_signed(ssize_t& result)
+    {
+        result = 0;
+        size_t num_bytes = 0;
+        u8 byte = 0;
+        do {
+            if (m_offset > m_buffer.size()) {
+                m_read_failure = true;
+                break;
+            }
+            byte = m_buffer[m_offset];
+            result = (result) | (static_cast<size_t>(byte & ~(1 << 7)) << (num_bytes * 7));
+            ++m_offset;
+            ++num_bytes;
+        } while (byte & (1 << 7));
+        if (num_bytes * 7 < sizeof(size_t) * 4 && (byte & 0x40)) {
+            // sign extend
+            result |= ((size_t)(-1) << (num_bytes * 7));
+        }
+        return *this;
+    }
+
+    BufferStream& advance(size_t amount)
+    {
+        if (m_offset + amount > m_buffer.size()) {
+            m_read_failure = true;
+        } else {
+            m_offset += amount;
+        }
+        return *this;
+    }
+
     bool at_end() const
     bool at_end() const
     {
     {
         return m_offset == m_buffer.size();
         return m_offset == m_buffer.size();

+ 91 - 0
Libraries/LibDebug/DebugInfo.cpp

@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2020, Itamar S. <itamar8910@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "DebugInfo.h"
+#include <AK/QuickSort.h>
+
+DebugInfo::DebugInfo(NonnullRefPtr<const ELF::Loader> elf)
+    : m_elf(elf)
+{
+    prepare_lines();
+}
+
+void DebugInfo::prepare_lines()
+{
+
+    auto section = m_elf->image().lookup_section(".debug_line");
+    ASSERT(!section.is_undefined());
+
+    auto buffer = ByteBuffer::wrap(reinterpret_cast<const u8*>(section.raw_data()), section.size());
+    BufferStream stream(buffer);
+
+    Vector<LineProgram::LineInfo> all_lines;
+    while (!stream.at_end()) {
+        LineProgram program(stream);
+        all_lines.append(move(program.lines()));
+    }
+
+    for (auto& line_info : all_lines) {
+        String file_path = line_info.file;
+        if (file_path.contains("Toolchain/"))
+            continue;
+        if (file_path.contains("serenity/")) {
+            auto start_index = file_path.index_of("serenity/").value() + String("serenity/").length();
+            file_path = file_path.substring(start_index, file_path.length() - start_index);
+        }
+        m_sorted_lines.append({ line_info.address, file_path, line_info.line });
+    }
+    quick_sort(m_sorted_lines, [](auto& a, auto& b) {
+        return a.address < b.address;
+    });
+}
+
+Optional<DebugInfo::SourcePosition> DebugInfo::get_source_position(u32 target_address) const
+{
+
+    if (m_sorted_lines.is_empty())
+        return {};
+    if (target_address < m_sorted_lines[0].address)
+        return {};
+
+    // TODO: We can do a binray search here
+    for (size_t i = 0; i < m_sorted_lines.size() - 1; ++i) {
+        if (m_sorted_lines[i + 1].address > target_address) {
+            return Optional<SourcePosition>({ m_sorted_lines[i].file, m_sorted_lines[i].line });
+        }
+    }
+    return {};
+}
+
+Optional<u32> DebugInfo::get_instruction_from_source(const String& file, size_t line) const
+{
+    for (const auto& line_entry : m_sorted_lines) {
+        dbg() << line_entry.file;
+        if (line_entry.file == file && line_entry.line == line)
+            return Optional<u32>(line_entry.address);
+    }
+    return {};
+}

+ 56 - 0
Libraries/LibDebug/DebugInfo.h

@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020, Itamar S. <itamar8910@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <AK/NonnullRefPtr.h>
+#include <AK/Optional.h>
+#include <AK/Vector.h>
+#include <LibELF/Loader.h>
+#include <Libraries/LibDebug/Dwarf/LineProgram.h>
+
+class DebugInfo {
+public:
+    explicit DebugInfo(NonnullRefPtr<const ELF::Loader> elf);
+
+    struct SourcePosition {
+        String file_path;
+        size_t line_number { 0 };
+
+        bool operator==(const SourcePosition& other) const { return file_path == other.file_path && line_number == other.line_number; }
+        bool operator!=(const SourcePosition& other) const { return !(*this == other); }
+    };
+
+    Optional<SourcePosition> get_source_position(u32 address) const;
+    Optional<u32> get_instruction_from_source(const String& file, size_t line) const;
+
+private:
+    void prepare_lines();
+
+    NonnullRefPtr<const ELF::Loader> m_elf;
+
+    Vector<LineProgram::LineInfo> m_sorted_lines;
+};

+ 247 - 0
Libraries/LibDebug/Dwarf/LineProgram.cpp

@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2020, Itamar S. <itamar8910@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "LineProgram.h"
+
+LineProgram::LineProgram(BufferStream& stream)
+    : m_stream(stream)
+{
+    m_unit_offset = m_stream.offset();
+    parse_unit_header();
+    parse_source_directories();
+    parse_source_files();
+    run_program();
+}
+
+void LineProgram::parse_unit_header()
+{
+    m_stream.read_raw((u8*)&m_unit_header, sizeof(m_unit_header));
+
+    ASSERT(m_unit_header.version == DWARF_VERSION);
+    ASSERT(m_unit_header.opcode_base == SPECIAL_OPCODES_BASE);
+
+#ifdef DWARF_DEBUG
+    dbg() << "unit length: " << unit_header.length;
+#endif
+}
+
+void LineProgram::parse_source_directories()
+{
+    m_source_directories.append(".");
+    while (m_stream.peek()) {
+        String directory;
+        m_stream >> directory;
+#ifdef DWARF_DEBUG
+        dbg() << "directory: " << directory;
+#endif
+        m_source_directories.append(move(directory));
+    }
+    m_stream.advance(1);
+}
+
+void LineProgram::parse_source_files()
+{
+    m_source_files.append({ ".", 0 });
+    while (m_stream.peek()) {
+        String file_name;
+        m_stream >> file_name;
+        size_t directory_index = 0;
+        m_stream.read_LEB128_unsigned(directory_index);
+        size_t _unused = 0;
+        m_stream.read_LEB128_unsigned(_unused); // skip modification time
+        m_stream.read_LEB128_unsigned(_unused); // skip file size
+#ifdef DWARF_DEBUG
+        dbg() << "file: " << file_name << ", directory index: " << directory_index;
+#endif
+        m_source_files.append({ file_name, directory_index });
+    }
+    m_stream.advance(1);
+    ASSERT(!m_stream.handle_read_failure());
+}
+
+void LineProgram::append_to_line_info()
+{
+#ifdef DWARF_DEBUG
+    dbg() << "appending line info: " << (void*)address << ", " << files[file_index].name << ":" << line;
+#endif
+    if (!m_is_statement)
+        return;
+
+    String directory = m_source_directories[m_source_files[m_file_index].directory_index];
+    String full_path = String::format("%s/%s", directory.characters(), m_source_files[m_file_index].name.characters());
+    m_lines.append({ m_address, full_path, m_line });
+}
+
+void LineProgram::reset_registers()
+{
+    m_address = 0;
+    m_line = 1;
+    m_file_index = 1;
+    m_is_statement = m_unit_header.default_is_stmt == 1;
+}
+
+void LineProgram::handle_extended_opcode()
+{
+    size_t length = 0;
+    m_stream.read_LEB128_unsigned(length);
+
+    u8 sub_opcode = 0;
+    m_stream >> sub_opcode;
+
+    switch (sub_opcode) {
+    case ExtendedOpcodes::EndSequence: {
+        append_to_line_info();
+        reset_registers();
+        break;
+    }
+    case ExtendedOpcodes::SetAddress: {
+        ASSERT(length == sizeof(size_t) + 1);
+        m_stream >> m_address;
+#ifdef DWARF_DEBUG
+        dbg() << "SetAddress: " << (void*)address;
+#endif
+        break;
+    }
+    case ExtendedOpcodes::SetDiscriminator: {
+#ifdef DWARF_DEBUG
+        dbg() << "SetDiscriminator";
+#endif
+        m_stream.advance(1);
+        break;
+    }
+    default:
+#ifdef DWARF_DEBUG
+        dbg() << "offset: " << (void*)m_stream.offset();
+#endif
+        ASSERT_NOT_REACHED();
+    }
+}
+void LineProgram::handle_standard_opcode(u8 opcode)
+{
+    switch (opcode) {
+    case StandardOpcodes::Copy: {
+        append_to_line_info();
+        break;
+    }
+    case StandardOpcodes::AdvancePc: {
+        size_t operand = 0;
+        m_stream.read_LEB128_unsigned(operand);
+        size_t delta = operand * m_unit_header.min_instruction_length;
+#ifdef DWARF_DEBUG
+        dbg() << "AdvnacePC by: " << delta << " to: " << (void*)(address + delta);
+#endif
+        m_address += delta;
+        break;
+    }
+    case StandardOpcodes::SetFile: {
+        size_t new_file_index = 0;
+        m_stream.read_LEB128_unsigned(new_file_index);
+#ifdef DWARF_DEBUG
+        dbg() << "SetFile: new file index: " << new_file_index;
+#endif
+        m_file_index = new_file_index;
+        break;
+    }
+    case StandardOpcodes::SetColumn: {
+        // not implemented
+#ifdef DWARF_DEBUG
+        dbg() << "SetColumn";
+#endif
+        size_t new_column;
+        m_stream.read_LEB128_unsigned(new_column);
+
+        break;
+    }
+    case StandardOpcodes::AdvanceLine: {
+        ssize_t line_delta;
+        m_stream.read_LEB128_signed(line_delta);
+        // dbg() << "line_delta: " << line_delta;
+        ASSERT(line_delta >= 0 || m_line >= (size_t)(-line_delta));
+        m_line += line_delta;
+#ifdef DWARF_DEBUG
+        dbg() << "AdvanceLine: " << line;
+#endif
+        break;
+    }
+    case StandardOpcodes::NegateStatement: {
+#ifdef DWARF_DEBUG
+        dbg() << "NegateStatement";
+#endif
+        m_is_statement = !m_is_statement;
+        break;
+    }
+    case StandardOpcodes::ConstAddPc: {
+        u8 adjusted_opcode = 255 - SPECIAL_OPCODES_BASE;
+        ssize_t address_increment = (adjusted_opcode / m_unit_header.line_range) * m_unit_header.min_instruction_length;
+        address_increment *= m_unit_header.min_instruction_length;
+#ifdef DWARF_DEBUG
+        dbg() << "ConstAddPc: advance pc by: " << address_increment << " to: " << (address + address_increment);
+#endif
+        m_address += address_increment;
+        break;
+    }
+    default:
+        ASSERT_NOT_REACHED();
+    }
+}
+void LineProgram::handle_sepcial_opcode(u8 opcode)
+{
+    u8 adjusted_opcode = opcode - SPECIAL_OPCODES_BASE;
+    ssize_t address_increment = (adjusted_opcode / m_unit_header.line_range) * m_unit_header.min_instruction_length;
+    ssize_t line_increment = m_unit_header.line_base + (adjusted_opcode % m_unit_header.line_range);
+
+    m_address += address_increment;
+    m_line += line_increment;
+
+#ifdef DWARF_DEBUG
+    dbg() << "Special adjusted_opcode: " << adjusted_opcode << ", delta_address: " << address_increment << ", delta_line: " << line_increment;
+    dbg() << "Address is now:" << (void*)m_address << ", and line is: " << source_files[m_file_index].name << ":" << line;
+#endif
+
+    append_to_line_info();
+}
+
+void LineProgram::run_program()
+{
+    reset_registers();
+
+    while ((size_t)m_stream.offset() < m_unit_offset + sizeof(u32) + m_unit_header.length) {
+        u8 opcode = 0;
+        m_stream >> opcode;
+
+#ifdef DWARF_DEBUG
+        dbg() << (void*)(m_stream.offset() - 1) << ": opcode: " << opcode;
+#endif
+
+        if (opcode == 0) {
+            handle_extended_opcode();
+        } else if (opcode >= 1 && opcode <= 12) {
+            handle_standard_opcode(opcode);
+        } else {
+            handle_sepcial_opcode(opcode);
+        }
+    }
+}

+ 113 - 0
Libraries/LibDebug/Dwarf/LineProgram.h

@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2020, Itamar S. <itamar8910@gmail.com>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+#include <AK/BufferStream.h>
+#include <AK/Vector.h>
+
+class LineProgram {
+public:
+    explicit LineProgram(BufferStream& stream);
+
+    struct LineInfo {
+        u32 address { 0 };
+        String file;
+        size_t line { 0 };
+    };
+
+    const Vector<LineInfo>& lines() const { return m_lines; }
+
+private:
+    void parse_unit_header();
+    void parse_source_directories();
+    void parse_source_files();
+    void run_program();
+
+    void append_to_line_info();
+    void reset_registers();
+
+    void handle_extended_opcode();
+    void handle_standard_opcode(u8 opcode);
+    void handle_sepcial_opcode(u8 opcode);
+
+    struct [[gnu::packed]] UnitHeader32
+    {
+        u32 length;
+        u16 version;
+        u32 header_length;
+        u8 min_instruction_length;
+        u8 default_is_stmt;
+        i8 line_base;
+        u8 line_range;
+        u8 opcode_base;
+        u8 std_opcode_lengths[12];
+    };
+
+    enum StandardOpcodes {
+        Copy = 1,
+        AdvancePc,
+        AdvanceLine,
+        SetFile,
+        SetColumn,
+        NegateStatement,
+        SetBasicBlock,
+        ConstAddPc,
+        FixAdvancePc,
+        SetProlougeEnd,
+        SetEpilogueBegin,
+        SetIsa
+    };
+
+    enum ExtendedOpcodes {
+        EndSequence = 1,
+        SetAddress,
+        DefineFile,
+        SetDiscriminator,
+    };
+
+    struct FileEntry {
+        String name;
+        size_t directory_index { 0 };
+    };
+
+    static constexpr u16 DWARF_VERSION = 3;
+    static constexpr u8 SPECIAL_OPCODES_BASE = 13;
+
+    BufferStream& m_stream;
+
+    size_t m_unit_offset { 0 };
+    UnitHeader32 m_unit_header {};
+    Vector<String> m_source_directories;
+    Vector<FileEntry> m_source_files;
+
+    // The registers of the "line program" virtual machine
+    u32 m_address { 0 };
+    size_t m_line { 0 };
+    size_t m_file_index { 0 };
+    bool m_is_statement { false };
+
+    Vector<LineInfo> m_lines;
+};