disasm.cpp 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. /*
  2. * Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include <AK/LogStream.h>
  27. #include <AK/MappedFile.h>
  28. #include <AK/QuickSort.h>
  29. #include <AK/Vector.h>
  30. #include <LibCore/ArgsParser.h>
  31. #include <LibELF/Loader.h>
  32. #include <LibX86/Disassembler.h>
  33. #include <LibX86/ELFSymbolProvider.h>
  34. #include <stdio.h>
  35. #include <string.h>
  36. //#define DISASM_DUMP
  37. int main(int argc, char** argv)
  38. {
  39. const char* path = nullptr;
  40. Core::ArgsParser args_parser;
  41. args_parser.add_positional_argument(path, "Path to i386 binary file", "path");
  42. args_parser.parse(argc, argv);
  43. MappedFile file(path);
  44. if (!file.is_valid()) {
  45. // Already printed some error message.
  46. return 1;
  47. }
  48. struct Symbol {
  49. size_t value;
  50. size_t size;
  51. StringView name;
  52. size_t address() const { return value; }
  53. size_t address_end() const { return value + size; }
  54. bool contains(size_t virtual_address) { return address() <= virtual_address && virtual_address < address_end(); }
  55. };
  56. Vector<Symbol> symbols;
  57. const u8* asm_data = (const u8*)file.data();
  58. size_t asm_size = file.size();
  59. size_t file_offset = 0;
  60. Vector<Symbol>::Iterator current_symbol = symbols.begin();
  61. RefPtr<ELF::Loader> elf;
  62. OwnPtr<X86::ELFSymbolProvider> symbol_provider; // nullptr for non-ELF disassembly.
  63. if (asm_size >= 4 && strncmp((const char*)asm_data, "\u007fELF", 4) == 0) {
  64. NonnullRefPtr<ELF::Loader> elf_loader = ELF::Loader::create(asm_data, asm_size);
  65. if (elf_loader->image().is_valid()) {
  66. elf = elf_loader;
  67. symbol_provider = make<X86::ELFSymbolProvider>(*elf);
  68. elf->image().for_each_section_of_type(SHT_PROGBITS, [&](const ELF::Image::Section& section) {
  69. // FIXME: Disassemble all SHT_PROGBITS sections, not just .text.
  70. if (section.name() != ".text")
  71. return IterationDecision::Continue;
  72. asm_data = (const u8*)section.raw_data();
  73. asm_size = section.size();
  74. file_offset = section.address();
  75. return IterationDecision::Break;
  76. });
  77. symbols.ensure_capacity(elf->image().symbol_count() + 1);
  78. symbols.append({ 0, 0, StringView() }); // Sentinel.
  79. elf->image().for_each_symbol([&](const ELF::Image::Symbol& symbol) {
  80. symbols.append({ symbol.value(), symbol.size(), symbol.name() });
  81. return IterationDecision::Continue;
  82. });
  83. quick_sort(symbols, [](auto& a, auto& b) {
  84. if (a.value != b.value)
  85. return a.value < b.value;
  86. if (a.size != b.size)
  87. return a.size < b.size;
  88. return a.name < b.name;
  89. });
  90. #ifdef DISASM_DUMP
  91. for (size_t i = 0; i < symbols.size(); ++i)
  92. dbg() << symbols[i].name << ": " << (void*)(uintptr_t)symbols[i].value << ", " << symbols[i].size;
  93. #endif
  94. }
  95. }
  96. X86::SimpleInstructionStream stream(asm_data, asm_size);
  97. X86::Disassembler disassembler(stream);
  98. bool is_first_symbol = true;
  99. bool current_instruction_is_in_symbol = false;
  100. for (;;) {
  101. auto offset = stream.offset();
  102. auto insn = disassembler.next();
  103. if (!insn.has_value())
  104. break;
  105. // Prefix regions of instructions belonging to a symbol with the symbol's name.
  106. // Separate regions of instructions belonging to distinct symbols with newlines,
  107. // and separate regions of instructions not belonging to symbols from regions belonging to symbols with newlines.
  108. // Interesting cases:
  109. // - More than 1 symbol covering a region of instructions (ICF, D1/D2)
  110. // - Symbols of size 0 that don't cover any instructions but are at an address (want to print them, separated from instructions both before and after)
  111. // Invariant: current_symbol is the largest instruction containing insn, or it is the largest instruction that has an address less than the instruction's address.
  112. size_t virtual_offset = file_offset + offset;
  113. if (current_symbol < symbols.end() && !current_symbol->contains(virtual_offset)) {
  114. if (!is_first_symbol && current_instruction_is_in_symbol) {
  115. // The previous instruction was part of a symbol that doesn't cover the current instruction, so separate it from the current instruction with a newline.
  116. out();
  117. current_instruction_is_in_symbol = (current_symbol + 1 < symbols.end() && (current_symbol + 1)->contains(virtual_offset));
  118. }
  119. // Try to find symbol covering current instruction, if one exists.
  120. while (current_symbol + 1 < symbols.end() && !(current_symbol + 1)->contains(virtual_offset) && (current_symbol + 1)->address() <= virtual_offset) {
  121. ++current_symbol;
  122. if (!is_first_symbol)
  123. out() << "\n(" << current_symbol->name << " (" << String::format("%08x-%08x", current_symbol->address(), current_symbol->address_end()) << "))\n";
  124. }
  125. while (current_symbol + 1 < symbols.end() && (current_symbol + 1)->contains(virtual_offset)) {
  126. if (!is_first_symbol && !current_instruction_is_in_symbol)
  127. out();
  128. ++current_symbol;
  129. current_instruction_is_in_symbol = true;
  130. out() << current_symbol->name << " (" << String::format("%08x-%08x", current_symbol->address(), current_symbol->address_end()) << "):";
  131. }
  132. is_first_symbol = false;
  133. }
  134. out() << String::format("%08x", virtual_offset) << " " << insn.value().to_string(virtual_offset, symbol_provider);
  135. }
  136. return 0;
  137. }