disasm.cpp 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215
  1. /*
  2. * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Debug.h>
  7. #include <AK/Demangle.h>
  8. #include <AK/IterationDecision.h>
  9. #include <AK/OwnPtr.h>
  10. #include <AK/QuickSort.h>
  11. #include <AK/String.h>
  12. #include <AK/StringBuilder.h>
  13. #include <AK/Vector.h>
  14. #include <LibCore/ArgsParser.h>
  15. #include <LibCore/MappedFile.h>
  16. #include <LibCore/System.h>
  17. #include <LibELF/Image.h>
  18. #include <LibMain/Main.h>
  19. #include <LibX86/Disassembler.h>
  20. #include <LibX86/ELFSymbolProvider.h>
  21. struct Symbol {
  22. size_t value { 0 };
  23. size_t size { 0 };
  24. StringView name;
  25. size_t address() const { return value; }
  26. size_t address_end() const { return value + size; }
  27. bool contains(size_t virtual_address) { return (address() <= virtual_address && virtual_address < address_end()) || (size == 0 && address() == virtual_address); }
  28. String format_symbol_address() const
  29. {
  30. if (size > 0)
  31. return MUST(String::formatted("{:p}-{:p}", address(), address_end()));
  32. return MUST(String::formatted("{:p}", address()));
  33. }
  34. };
  35. ErrorOr<int> serenity_main(Main::Arguments args)
  36. {
  37. StringView path {};
  38. Core::ArgsParser args_parser;
  39. args_parser.set_general_help(
  40. "Disassemble an executable, and show human-readable "
  41. "assembly code for each function.");
  42. args_parser.add_positional_argument(path, "Path to i386 binary file", "path");
  43. args_parser.parse(args);
  44. OwnPtr<Core::MappedFile> file;
  45. u8 const* asm_data = nullptr;
  46. size_t asm_size = 0;
  47. if ((TRY(Core::System::stat(path))).st_size > 0) {
  48. file = TRY(Core::MappedFile::map(path));
  49. asm_data = static_cast<u8 const*>(file->data());
  50. asm_size = MUST(file->size());
  51. }
  52. // Functions and similar symbols.
  53. Vector<Symbol> ranged_symbols;
  54. // Jump labels, relocation targets, etc.
  55. Vector<Symbol> zero_size_symbols;
  56. size_t file_offset = 0;
  57. OwnPtr<X86::ELFSymbolProvider> symbol_provider; // nullptr for non-ELF disassembly.
  58. OwnPtr<ELF::Image> elf;
  59. if (asm_size >= 4 && strncmp(reinterpret_cast<char const*>(asm_data), "\u007fELF", 4) == 0) {
  60. elf = make<ELF::Image>(asm_data, asm_size);
  61. if (elf->is_valid()) {
  62. symbol_provider = make<X86::ELFSymbolProvider>(*elf);
  63. elf->for_each_section_of_type(SHT_PROGBITS, [&](ELF::Image::Section const& section) {
  64. // FIXME: Disassemble all SHT_PROGBITS sections, not just .text.
  65. if (section.name() != ".text")
  66. return IterationDecision::Continue;
  67. asm_data = reinterpret_cast<u8 const*>(section.raw_data());
  68. asm_size = section.size();
  69. file_offset = section.address();
  70. return IterationDecision::Break;
  71. });
  72. ranged_symbols.ensure_capacity(elf->symbol_count() + 1);
  73. zero_size_symbols.ensure_capacity(elf->symbol_count() + 1);
  74. // Sentinels:
  75. ranged_symbols.append({ 0, 0, StringView() });
  76. zero_size_symbols.append({ 0, 0, StringView() });
  77. elf->for_each_symbol([&](ELF::Image::Symbol const& symbol) {
  78. if (symbol.name().is_empty())
  79. return IterationDecision::Continue;
  80. if (symbol.size() == 0)
  81. zero_size_symbols.append({ symbol.value(), symbol.size(), symbol.name() });
  82. else
  83. ranged_symbols.append({ symbol.value(), symbol.size(), symbol.name() });
  84. return IterationDecision::Continue;
  85. });
  86. auto symbol_order = [](auto& a, auto& b) {
  87. if (a.value != b.value)
  88. return a.value < b.value;
  89. if (a.size != b.size)
  90. return a.size < b.size;
  91. return a.name < b.name;
  92. };
  93. quick_sort(ranged_symbols, symbol_order);
  94. quick_sort(zero_size_symbols, symbol_order);
  95. if constexpr (DISASM_DUMP_DEBUG) {
  96. for (size_t i = 0; i < ranged_symbols.size(); ++i)
  97. dbgln("{}: {:p}, {}", ranged_symbols[i].name, ranged_symbols[i].value, ranged_symbols[i].size);
  98. for (size_t i = 0; i < zero_size_symbols.size(); ++i)
  99. dbgln("{}: {:p}", zero_size_symbols[i].name, zero_size_symbols[i].value);
  100. }
  101. }
  102. }
  103. X86::SimpleInstructionStream stream(asm_data, asm_size);
  104. X86::Disassembler disassembler(stream);
  105. Vector<Symbol>::Iterator current_ranged_symbol = ranged_symbols.begin();
  106. Vector<Symbol>::Iterator current_zero_size_symbol = zero_size_symbols.begin();
  107. bool is_first_symbol = true;
  108. bool current_instruction_is_in_symbol = false;
  109. for (;;) {
  110. auto offset = stream.offset();
  111. auto insn = disassembler.next();
  112. if (!insn.has_value())
  113. break;
  114. size_t virtual_offset = file_offset + offset;
  115. // Prefix regions of instructions belonging to a symbol with the symbol's name.
  116. // Separate regions of instructions belonging to distinct symbols with newlines,
  117. // and separate regions of instructions not belonging to symbols from regions belonging to symbols with newlines.
  118. // Interesting cases:
  119. // - More than 1 symbol covering a region of instructions (ICF, D1/D2)
  120. // - Symbols of size 0 that don't cover any instructions but are at an address (want to print them, separated from instructions both before and after)
  121. // Invariant: current_ranged_symbol is the largest instruction containing insn, or it is the largest instruction that has an address less than the instruction's address.
  122. StringBuilder dangling_symbols;
  123. StringBuilder instruction_symbols;
  124. bool needs_separator = false;
  125. if (current_zero_size_symbol < zero_size_symbols.end()) {
  126. // Print "dangling" symbols preceding the current instruction.
  127. while (current_zero_size_symbol + 1 < zero_size_symbols.end() && !(current_zero_size_symbol + 1)->contains(virtual_offset) && (current_zero_size_symbol + 1)->address() <= virtual_offset) {
  128. ++current_zero_size_symbol;
  129. if (!is_first_symbol)
  130. dangling_symbols.appendff("\n({} ({}))\n", demangle(current_zero_size_symbol->name), current_zero_size_symbol->format_symbol_address());
  131. }
  132. // Find and print all symbols covering the current instruction.
  133. while (current_zero_size_symbol + 1 < zero_size_symbols.end() && (current_zero_size_symbol + 1)->contains(virtual_offset)) {
  134. if (!is_first_symbol && !current_instruction_is_in_symbol)
  135. needs_separator = true;
  136. ++current_zero_size_symbol;
  137. current_instruction_is_in_symbol = true;
  138. instruction_symbols.appendff("{} ({}):\n", demangle(current_zero_size_symbol->name), current_zero_size_symbol->format_symbol_address());
  139. }
  140. }
  141. // Handle ranged symbols separately.
  142. if (current_ranged_symbol < ranged_symbols.end() && !current_ranged_symbol->contains(virtual_offset)) {
  143. if (!is_first_symbol && current_instruction_is_in_symbol) {
  144. // The previous instruction was part of a symbol that doesn't cover the current instruction, so separate it from the current instruction with a newline.
  145. needs_separator = true;
  146. current_instruction_is_in_symbol = (current_ranged_symbol + 1 < ranged_symbols.end() && (current_ranged_symbol + 1)->contains(virtual_offset));
  147. }
  148. // Print "dangling" symbols preceding the current instruction.
  149. while (current_ranged_symbol + 1 < ranged_symbols.end() && !(current_ranged_symbol + 1)->contains(virtual_offset) && (current_ranged_symbol + 1)->address() <= virtual_offset) {
  150. ++current_ranged_symbol;
  151. if (!is_first_symbol)
  152. dangling_symbols.appendff("\n({} ({}))\n", demangle(current_ranged_symbol->name), current_ranged_symbol->format_symbol_address());
  153. }
  154. // Find and print all symbols covering the current instruction.
  155. while (current_ranged_symbol + 1 < ranged_symbols.end() && (current_ranged_symbol + 1)->contains(virtual_offset)) {
  156. if (!is_first_symbol && !current_instruction_is_in_symbol)
  157. needs_separator = true;
  158. ++current_ranged_symbol;
  159. current_instruction_is_in_symbol = true;
  160. instruction_symbols.appendff("{} ({}):\n", demangle(current_ranged_symbol->name), current_ranged_symbol->format_symbol_address());
  161. }
  162. is_first_symbol = false;
  163. }
  164. // Insert extra newline after the "dangling" symbols.
  165. if (needs_separator)
  166. outln();
  167. if (auto dangling_symbols_text = TRY(dangling_symbols.to_string()); !dangling_symbols_text.is_empty())
  168. outln("{}", dangling_symbols_text);
  169. if (auto instruction_symbols_text = TRY(instruction_symbols.to_string()); !instruction_symbols_text.is_empty())
  170. out("{}", instruction_symbols_text);
  171. size_t length = insn.value().length();
  172. StringBuilder builder;
  173. builder.appendff("{:p} ", virtual_offset);
  174. for (size_t i = 0; i < 7; i++) {
  175. if (i < length)
  176. builder.appendff("{:02x} ", asm_data[offset + i]);
  177. else
  178. builder.append(" "sv);
  179. }
  180. builder.append(" "sv);
  181. builder.append(insn.value().to_byte_string(virtual_offset, symbol_provider));
  182. outln("{}", builder.string_view());
  183. for (size_t bytes_printed = 7; bytes_printed < length; bytes_printed += 7) {
  184. builder.clear();
  185. builder.appendff("{:p} ", virtual_offset + bytes_printed);
  186. for (size_t i = bytes_printed; i < bytes_printed + 7 && i < length; i++)
  187. builder.appendff(" {:02x}", asm_data[offset + i]);
  188. outln("{}", builder.string_view());
  189. }
  190. }
  191. return 0;
  192. }