From 10a1b0de9607bf910faba97c1d09bfc6963500b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?kleines=20Filmr=C3=B6llchen?= Date: Tue, 17 Oct 2023 19:00:30 +0200 Subject: [PATCH] disasm: Overhaul symbol printing We now split up symbols into zero-sized symbols that label single instructions (no need to separate them by newlines; they are used for jump labels and relocation targets within a larger block of code) and ranged symbols that label functions. Empty symbols are discarded since at least RISC-V ELF files contain quite a few of those. Zero-sized symbols and ranged symbols are handled almost the same, but this way we can make sure that zero-sized symbols don't interfere with ranged symbol's newline separation logic. For zero-sized symbols, the "symbol contains address" logic is updated so they actually contain the one address they're pointing at, fixing the bug with many "dangling" zero-sized symbols after a function that contained them. Zero-sized labels are also no longer printed as a start-end range, since that is unnecessary visual noise. --- Userland/Utilities/disasm.cpp | 125 +++++++++++++++++++++++++--------- 1 file changed, 92 insertions(+), 33 deletions(-) diff --git a/Userland/Utilities/disasm.cpp b/Userland/Utilities/disasm.cpp index 4892e988534..97125cb0063 100644 --- a/Userland/Utilities/disasm.cpp +++ b/Userland/Utilities/disasm.cpp @@ -6,8 +6,11 @@ #include #include +#include #include #include +#include +#include #include #include #include @@ -16,7 +19,24 @@ #include #include #include -#include + +struct Symbol { + size_t value { 0 }; + size_t size { 0 }; + StringView name; + + size_t address() const { return value; } + size_t address_end() const { return value + size; } + + bool contains(size_t virtual_address) { return (address() <= virtual_address && virtual_address < address_end()) || (size == 0 && address() == virtual_address); } + + String format_symbol_address() const + { + if (size > 0) + return MUST(String::formatted("{:p}-{:p}", address(), address_end())); + return MUST(String::formatted("{:p}", address())); + } +}; ErrorOr serenity_main(Main::Arguments args) { @@ -38,20 +58,12 @@ ErrorOr serenity_main(Main::Arguments args) asm_size = MUST(file->size()); } - struct Symbol { - size_t value; - size_t size; - StringView name; - - size_t address() const { return value; } - size_t address_end() const { return value + size; } - - bool contains(size_t virtual_address) { return address() <= virtual_address && virtual_address < address_end(); } - }; - Vector symbols; + // Functions and similar symbols. + Vector ranged_symbols; + // Jump labels, relocation targets, etc. + Vector zero_size_symbols; size_t file_offset = 0; - Vector::Iterator current_symbol = symbols.begin(); OwnPtr symbol_provider; // nullptr for non-ELF disassembly. OwnPtr elf; if (asm_size >= 4 && strncmp(reinterpret_cast(asm_data), "\u007fELF", 4) == 0) { @@ -67,22 +79,36 @@ ErrorOr serenity_main(Main::Arguments args) file_offset = section.address(); return IterationDecision::Break; }); - symbols.ensure_capacity(elf->symbol_count() + 1); - symbols.append({ 0, 0, StringView() }); // Sentinel. + ranged_symbols.ensure_capacity(elf->symbol_count() + 1); + zero_size_symbols.ensure_capacity(elf->symbol_count() + 1); + // Sentinels: + ranged_symbols.append({ 0, 0, StringView() }); + zero_size_symbols.append({ 0, 0, StringView() }); + elf->for_each_symbol([&](ELF::Image::Symbol const& symbol) { - symbols.append({ symbol.value(), symbol.size(), symbol.name() }); + if (symbol.name().is_empty()) + return IterationDecision::Continue; + + if (symbol.size() == 0) + zero_size_symbols.append({ symbol.value(), symbol.size(), symbol.name() }); + else + ranged_symbols.append({ symbol.value(), symbol.size(), symbol.name() }); return IterationDecision::Continue; }); - quick_sort(symbols, [](auto& a, auto& b) { + auto symbol_order = [](auto& a, auto& b) { if (a.value != b.value) return a.value < b.value; if (a.size != b.size) return a.size < b.size; return a.name < b.name; - }); + }; + quick_sort(ranged_symbols, symbol_order); + quick_sort(zero_size_symbols, symbol_order); if constexpr (DISASM_DUMP_DEBUG) { - for (size_t i = 0; i < symbols.size(); ++i) - dbgln("{}: {:p}, {}", symbols[i].name, symbols[i].value, symbols[i].size); + for (size_t i = 0; i < ranged_symbols.size(); ++i) + dbgln("{}: {:p}, {}", ranged_symbols[i].name, ranged_symbols[i].value, ranged_symbols[i].size); + for (size_t i = 0; i < zero_size_symbols.size(); ++i) + dbgln("{}: {:p}", zero_size_symbols[i].name, zero_size_symbols[i].value); } } } @@ -90,6 +116,8 @@ ErrorOr serenity_main(Main::Arguments args) X86::SimpleInstructionStream stream(asm_data, asm_size); X86::Disassembler disassembler(stream); + Vector::Iterator current_ranged_symbol = ranged_symbols.begin(); + Vector::Iterator current_zero_size_symbol = zero_size_symbols.begin(); bool is_first_symbol = true; bool current_instruction_is_in_symbol = false; @@ -99,38 +127,69 @@ ErrorOr serenity_main(Main::Arguments args) if (!insn.has_value()) break; + size_t virtual_offset = file_offset + offset; // Prefix regions of instructions belonging to a symbol with the symbol's name. // Separate regions of instructions belonging to distinct symbols with newlines, // and separate regions of instructions not belonging to symbols from regions belonging to symbols with newlines. // Interesting cases: // - More than 1 symbol covering a region of instructions (ICF, D1/D2) // - Symbols of size 0 that don't cover any instructions but are at an address (want to print them, separated from instructions both before and after) - // Invariant: current_symbol is the largest instruction containing insn, or it is the largest instruction that has an address less than the instruction's address. - size_t virtual_offset = file_offset + offset; - if (current_symbol < symbols.end() && !current_symbol->contains(virtual_offset)) { + // Invariant: current_ranged_symbol is the largest instruction containing insn, or it is the largest instruction that has an address less than the instruction's address. + StringBuilder dangling_symbols; + StringBuilder instruction_symbols; + bool needs_separator = false; + if (current_zero_size_symbol < zero_size_symbols.end()) { + // Print "dangling" symbols preceding the current instruction. + while (current_zero_size_symbol + 1 < zero_size_symbols.end() && !(current_zero_size_symbol + 1)->contains(virtual_offset) && (current_zero_size_symbol + 1)->address() <= virtual_offset) { + ++current_zero_size_symbol; + if (!is_first_symbol) + dangling_symbols.appendff("\n({} ({}))\n", demangle(current_zero_size_symbol->name), current_zero_size_symbol->format_symbol_address()); + } + // Find and print all symbols covering the current instruction. + while (current_zero_size_symbol + 1 < zero_size_symbols.end() && (current_zero_size_symbol + 1)->contains(virtual_offset)) { + if (!is_first_symbol && !current_instruction_is_in_symbol) + needs_separator = true; + ++current_zero_size_symbol; + current_instruction_is_in_symbol = true; + + instruction_symbols.appendff("{} ({}):\n", demangle(current_zero_size_symbol->name), current_zero_size_symbol->format_symbol_address()); + } + } + // Handle ranged symbols separately. + if (current_ranged_symbol < ranged_symbols.end() && !current_ranged_symbol->contains(virtual_offset)) { if (!is_first_symbol && current_instruction_is_in_symbol) { // The previous instruction was part of a symbol that doesn't cover the current instruction, so separate it from the current instruction with a newline. - outln(); - current_instruction_is_in_symbol = (current_symbol + 1 < symbols.end() && (current_symbol + 1)->contains(virtual_offset)); + needs_separator = true; + current_instruction_is_in_symbol = (current_ranged_symbol + 1 < ranged_symbols.end() && (current_ranged_symbol + 1)->contains(virtual_offset)); } - // Try to find symbol covering current instruction, if one exists. - while (current_symbol + 1 < symbols.end() && !(current_symbol + 1)->contains(virtual_offset) && (current_symbol + 1)->address() <= virtual_offset) { - ++current_symbol; + // Print "dangling" symbols preceding the current instruction. + while (current_ranged_symbol + 1 < ranged_symbols.end() && !(current_ranged_symbol + 1)->contains(virtual_offset) && (current_ranged_symbol + 1)->address() <= virtual_offset) { + ++current_ranged_symbol; if (!is_first_symbol) - outln("\n({} ({:p}-{:p}))\n", demangle(current_symbol->name), current_symbol->address(), current_symbol->address_end()); + dangling_symbols.appendff("\n({} ({}))\n", demangle(current_ranged_symbol->name), current_ranged_symbol->format_symbol_address()); } - while (current_symbol + 1 < symbols.end() && (current_symbol + 1)->contains(virtual_offset)) { + // Find and print all symbols covering the current instruction. + while (current_ranged_symbol + 1 < ranged_symbols.end() && (current_ranged_symbol + 1)->contains(virtual_offset)) { if (!is_first_symbol && !current_instruction_is_in_symbol) - outln(); - ++current_symbol; + needs_separator = true; + ++current_ranged_symbol; current_instruction_is_in_symbol = true; - outln("{} ({:p}-{:p}):", demangle(current_symbol->name), current_symbol->address(), current_symbol->address_end()); + + instruction_symbols.appendff("{} ({}):\n", demangle(current_ranged_symbol->name), current_ranged_symbol->format_symbol_address()); } is_first_symbol = false; } + // Insert extra newline after the "dangling" symbols. + if (needs_separator) + outln(); + if (auto dangling_symbols_text = TRY(dangling_symbols.to_string()); !dangling_symbols_text.is_empty()) + outln("{}", dangling_symbols_text); + if (auto instruction_symbols_text = TRY(instruction_symbols.to_string()); !instruction_symbols_text.is_empty()) + out("{}", instruction_symbols_text); + size_t length = insn.value().length(); StringBuilder builder; builder.appendff("{:p} ", virtual_offset);