disasm.cpp 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156
  1. /*
  2. * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Debug.h>
  7. #include <AK/Demangle.h>
  8. #include <AK/OwnPtr.h>
  9. #include <AK/QuickSort.h>
  10. #include <AK/Vector.h>
  11. #include <LibCore/ArgsParser.h>
  12. #include <LibCore/MappedFile.h>
  13. #include <LibCore/System.h>
  14. #include <LibELF/Image.h>
  15. #include <LibMain/Main.h>
  16. #include <LibX86/Disassembler.h>
  17. #include <LibX86/ELFSymbolProvider.h>
  18. #include <string.h>
  19. ErrorOr<int> serenity_main(Main::Arguments args)
  20. {
  21. StringView path {};
  22. Core::ArgsParser args_parser;
  23. args_parser.set_general_help(
  24. "Disassemble an executable, and show human-readable "
  25. "assembly code for each function.");
  26. args_parser.add_positional_argument(path, "Path to i386 binary file", "path");
  27. args_parser.parse(args);
  28. OwnPtr<Core::MappedFile const> file;
  29. u8 const* asm_data = nullptr;
  30. size_t asm_size = 0;
  31. if ((TRY(Core::System::stat(path))).st_size > 0) {
  32. file = TRY(Core::MappedFile::map(path));
  33. asm_data = static_cast<u8 const*>(file->data());
  34. asm_size = file->size();
  35. }
  36. struct Symbol {
  37. size_t value;
  38. size_t size;
  39. StringView name;
  40. size_t address() const { return value; }
  41. size_t address_end() const { return value + size; }
  42. bool contains(size_t virtual_address) { return address() <= virtual_address && virtual_address < address_end(); }
  43. };
  44. Vector<Symbol> symbols;
  45. size_t file_offset = 0;
  46. Vector<Symbol>::Iterator current_symbol = symbols.begin();
  47. OwnPtr<X86::ELFSymbolProvider> symbol_provider; // nullptr for non-ELF disassembly.
  48. OwnPtr<ELF::Image> elf;
  49. if (asm_size >= 4 && strncmp(reinterpret_cast<char const*>(asm_data), "\u007fELF", 4) == 0) {
  50. elf = make<ELF::Image>(asm_data, asm_size);
  51. if (elf->is_valid()) {
  52. symbol_provider = make<X86::ELFSymbolProvider>(*elf);
  53. elf->for_each_section_of_type(SHT_PROGBITS, [&](ELF::Image::Section const& section) {
  54. // FIXME: Disassemble all SHT_PROGBITS sections, not just .text.
  55. if (section.name() != ".text")
  56. return IterationDecision::Continue;
  57. asm_data = reinterpret_cast<u8 const*>(section.raw_data());
  58. asm_size = section.size();
  59. file_offset = section.address();
  60. return IterationDecision::Break;
  61. });
  62. symbols.ensure_capacity(elf->symbol_count() + 1);
  63. symbols.append({ 0, 0, StringView() }); // Sentinel.
  64. elf->for_each_symbol([&](ELF::Image::Symbol const& symbol) {
  65. symbols.append({ symbol.value(), symbol.size(), symbol.name() });
  66. return IterationDecision::Continue;
  67. });
  68. quick_sort(symbols, [](auto& a, auto& b) {
  69. if (a.value != b.value)
  70. return a.value < b.value;
  71. if (a.size != b.size)
  72. return a.size < b.size;
  73. return a.name < b.name;
  74. });
  75. if constexpr (DISASM_DUMP_DEBUG) {
  76. for (size_t i = 0; i < symbols.size(); ++i)
  77. dbgln("{}: {:p}, {}", symbols[i].name, symbols[i].value, symbols[i].size);
  78. }
  79. }
  80. }
  81. X86::SimpleInstructionStream stream(asm_data, asm_size);
  82. X86::Disassembler disassembler(stream);
  83. bool is_first_symbol = true;
  84. bool current_instruction_is_in_symbol = false;
  85. for (;;) {
  86. auto offset = stream.offset();
  87. auto insn = disassembler.next();
  88. if (!insn.has_value())
  89. break;
  90. // Prefix regions of instructions belonging to a symbol with the symbol's name.
  91. // Separate regions of instructions belonging to distinct symbols with newlines,
  92. // and separate regions of instructions not belonging to symbols from regions belonging to symbols with newlines.
  93. // Interesting cases:
  94. // - More than 1 symbol covering a region of instructions (ICF, D1/D2)
  95. // - Symbols of size 0 that don't cover any instructions but are at an address (want to print them, separated from instructions both before and after)
  96. // Invariant: current_symbol is the largest instruction containing insn, or it is the largest instruction that has an address less than the instruction's address.
  97. size_t virtual_offset = file_offset + offset;
  98. if (current_symbol < symbols.end() && !current_symbol->contains(virtual_offset)) {
  99. if (!is_first_symbol && current_instruction_is_in_symbol) {
  100. // The previous instruction was part of a symbol that doesn't cover the current instruction, so separate it from the current instruction with a newline.
  101. outln();
  102. current_instruction_is_in_symbol = (current_symbol + 1 < symbols.end() && (current_symbol + 1)->contains(virtual_offset));
  103. }
  104. // Try to find symbol covering current instruction, if one exists.
  105. while (current_symbol + 1 < symbols.end() && !(current_symbol + 1)->contains(virtual_offset) && (current_symbol + 1)->address() <= virtual_offset) {
  106. ++current_symbol;
  107. if (!is_first_symbol)
  108. outln("\n({} ({:p}-{:p}))\n", demangle(current_symbol->name), current_symbol->address(), current_symbol->address_end());
  109. }
  110. while (current_symbol + 1 < symbols.end() && (current_symbol + 1)->contains(virtual_offset)) {
  111. if (!is_first_symbol && !current_instruction_is_in_symbol)
  112. outln();
  113. ++current_symbol;
  114. current_instruction_is_in_symbol = true;
  115. outln("{} ({:p}-{:p}):", demangle(current_symbol->name), current_symbol->address(), current_symbol->address_end());
  116. }
  117. is_first_symbol = false;
  118. }
  119. size_t length = insn.value().length();
  120. StringBuilder builder;
  121. builder.appendff("{:p} ", virtual_offset);
  122. for (size_t i = 0; i < 7; i++) {
  123. if (i < length)
  124. builder.appendff("{:02x} ", asm_data[offset + i]);
  125. else
  126. builder.append(" "sv);
  127. }
  128. builder.append(" "sv);
  129. builder.append(insn.value().to_deprecated_string(virtual_offset, symbol_provider));
  130. outln("{}", builder.string_view());
  131. for (size_t bytes_printed = 7; bytes_printed < length; bytes_printed += 7) {
  132. builder.clear();
  133. builder.appendff("{:p} ", virtual_offset + bytes_printed);
  134. for (size_t i = bytes_printed; i < bytes_printed + 7 && i < length; i++)
  135. builder.appendff(" {:02x}", asm_data[offset + i]);
  136. outln("{}", builder.string_view());
  137. }
  138. }
  139. return 0;
  140. }