DynamicObject.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517
  1. /*
  2. * Copyright (c) 2019-2020, Andrew Kaster <andrewdkaster@gmail.com>
  3. * Copyright (c) 2020, Itamar S. <itamar8910@gmail.com>
  4. * All rights reserved.
  5. *
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions are met:
  8. *
  9. * 1. Redistributions of source code must retain the above copyright notice, this
  10. * list of conditions and the following disclaimer.
  11. *
  12. * 2. Redistributions in binary form must reproduce the above copyright notice,
  13. * this list of conditions and the following disclaimer in the documentation
  14. * and/or other materials provided with the distribution.
  15. *
  16. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  17. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  19. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  20. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  22. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  23. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  24. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  25. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. */
  27. #include <AK/Debug.h>
  28. #include <AK/String.h>
  29. #include <AK/StringBuilder.h>
  30. #include <LibELF/DynamicLinker.h>
  31. #include <LibELF/DynamicObject.h>
  32. #include <LibELF/exec_elf.h>
  33. #include <stdio.h>
  34. #include <string.h>
  35. namespace ELF {
  36. static const char* name_for_dtag(Elf32_Sword d_tag);
  37. DynamicObject::DynamicObject(VirtualAddress base_address, VirtualAddress dynamic_section_addresss)
  38. : m_base_address(base_address)
  39. , m_dynamic_address(dynamic_section_addresss)
  40. {
  41. Elf32_Ehdr* header = (Elf32_Ehdr*)base_address.as_ptr();
  42. Elf32_Phdr* pheader = (Elf32_Phdr*)(base_address.as_ptr() + header->e_phoff);
  43. m_elf_base_address = VirtualAddress(pheader->p_vaddr - pheader->p_offset);
  44. if (header->e_type == ET_DYN)
  45. m_is_elf_dynamic = true;
  46. else
  47. m_is_elf_dynamic = false;
  48. parse();
  49. }
  50. DynamicObject::~DynamicObject()
  51. {
  52. }
  53. void DynamicObject::dump() const
  54. {
  55. StringBuilder builder;
  56. builder.append("\nd_tag tag_name value\n");
  57. size_t num_dynamic_sections = 0;
  58. for_each_dynamic_entry([&](const DynamicObject::DynamicEntry& entry) {
  59. String name_field = String::format("(%s)", name_for_dtag(entry.tag()));
  60. builder.appendf("0x%08X %-17s0x%X\n", entry.tag(), name_field.characters(), entry.val());
  61. num_dynamic_sections++;
  62. return IterationDecision::Continue;
  63. });
  64. if (m_has_soname)
  65. builder.appendf("DT_SONAME: %s\n", soname()); // FIXME: Valdidate that this string is null terminated?
  66. dbgln<DYNAMIC_LOAD_DEBUG>("Dynamic section at address {} contains {} entries:", m_dynamic_address.as_ptr(), num_dynamic_sections);
  67. dbgln<DYNAMIC_LOAD_DEBUG>("{}", builder.string_view());
  68. }
  69. void DynamicObject::parse()
  70. {
  71. for_each_dynamic_entry([&](const DynamicEntry& entry) {
  72. switch (entry.tag()) {
  73. case DT_INIT:
  74. m_init_offset = entry.ptr() - (FlatPtr)m_elf_base_address.as_ptr();
  75. break;
  76. case DT_FINI:
  77. m_fini_offset = entry.ptr() - (FlatPtr)m_elf_base_address.as_ptr();
  78. break;
  79. case DT_INIT_ARRAY:
  80. m_init_array_offset = entry.ptr() - (FlatPtr)m_elf_base_address.as_ptr();
  81. break;
  82. case DT_INIT_ARRAYSZ:
  83. m_init_array_size = entry.val();
  84. break;
  85. case DT_FINI_ARRAY:
  86. m_fini_array_offset = entry.ptr() - (FlatPtr)m_elf_base_address.as_ptr();
  87. break;
  88. case DT_FINI_ARRAYSZ:
  89. m_fini_array_size = entry.val();
  90. break;
  91. case DT_HASH:
  92. // Use SYSV hash only if GNU hash is not available
  93. if (m_hash_type == HashType::SYSV) {
  94. m_hash_table_offset = entry.ptr() - (FlatPtr)m_elf_base_address.as_ptr();
  95. }
  96. break;
  97. case DT_GNU_HASH:
  98. m_hash_type = HashType::GNU;
  99. m_hash_table_offset = entry.ptr() - (FlatPtr)m_elf_base_address.as_ptr();
  100. break;
  101. case DT_SYMTAB:
  102. m_symbol_table_offset = entry.ptr() - (FlatPtr)m_elf_base_address.as_ptr();
  103. break;
  104. case DT_STRTAB:
  105. m_string_table_offset = entry.ptr() - (FlatPtr)m_elf_base_address.as_ptr();
  106. break;
  107. case DT_STRSZ:
  108. m_size_of_string_table = entry.val();
  109. break;
  110. case DT_SYMENT:
  111. m_size_of_symbol_table_entry = entry.val();
  112. break;
  113. case DT_PLTGOT:
  114. m_procedure_linkage_table_offset = entry.ptr() - (FlatPtr)m_elf_base_address.as_ptr();
  115. break;
  116. case DT_PLTRELSZ:
  117. m_size_of_plt_relocation_entry_list = entry.val();
  118. break;
  119. case DT_PLTREL:
  120. m_procedure_linkage_table_relocation_type = entry.val();
  121. ASSERT(m_procedure_linkage_table_relocation_type & (DT_REL | DT_RELA));
  122. break;
  123. case DT_JMPREL:
  124. m_plt_relocation_offset_location = entry.ptr() - (FlatPtr)m_elf_base_address.as_ptr();
  125. break;
  126. case DT_RELA:
  127. case DT_REL:
  128. m_relocation_table_offset = entry.ptr() - (FlatPtr)m_elf_base_address.as_ptr();
  129. break;
  130. case DT_RELASZ:
  131. case DT_RELSZ:
  132. m_size_of_relocation_table = entry.val();
  133. break;
  134. case DT_RELAENT:
  135. case DT_RELENT:
  136. m_size_of_relocation_entry = entry.val();
  137. break;
  138. case DT_RELACOUNT:
  139. case DT_RELCOUNT:
  140. m_number_of_relocations = entry.val();
  141. break;
  142. case DT_FLAGS:
  143. m_dt_flags = entry.val();
  144. break;
  145. case DT_TEXTREL:
  146. m_dt_flags |= DF_TEXTREL; // This tag seems to exist for legacy reasons only?
  147. break;
  148. case DT_SONAME:
  149. m_soname_index = entry.val();
  150. m_has_soname = true;
  151. break;
  152. case DT_DEBUG:
  153. break;
  154. case DT_FLAGS_1:
  155. break;
  156. case DT_NEEDED:
  157. // We handle these in for_each_needed_library
  158. break;
  159. default:
  160. dbgln("DynamicObject: DYNAMIC tag handling not implemented for DT_{}", name_for_dtag(entry.tag()));
  161. ASSERT_NOT_REACHED(); // FIXME: Maybe just break out here and return false?
  162. break;
  163. }
  164. return IterationDecision::Continue;
  165. });
  166. if (!m_size_of_relocation_entry) {
  167. // TODO: FIXME, this shouldn't be hardcoded
  168. // The reason we need this here is that for some reason, when there only PLT relocations, the compiler
  169. // doesn't insert a 'PLTRELSZ' entry to the dynamic section
  170. m_size_of_relocation_entry = sizeof(Elf32_Rel);
  171. }
  172. auto hash_section_address = hash_section().address().as_ptr();
  173. // TODO: consider base address - it might not be zero
  174. auto num_hash_chains = ((u32*)hash_section_address)[1];
  175. m_symbol_count = num_hash_chains;
  176. }
  177. const DynamicObject::Relocation DynamicObject::RelocationSection::relocation(unsigned index) const
  178. {
  179. ASSERT(index < entry_count());
  180. unsigned offset_in_section = index * entry_size();
  181. auto relocation_address = (Elf32_Rel*)address().offset(offset_in_section).as_ptr();
  182. return Relocation(m_dynamic, *relocation_address, offset_in_section);
  183. }
  184. const DynamicObject::Relocation DynamicObject::RelocationSection::relocation_at_offset(unsigned offset) const
  185. {
  186. ASSERT(offset <= (m_section_size_bytes - m_entry_size));
  187. auto relocation_address = (Elf32_Rel*)address().offset(offset).as_ptr();
  188. return Relocation(m_dynamic, *relocation_address, offset);
  189. }
  190. const DynamicObject::Symbol DynamicObject::symbol(unsigned index) const
  191. {
  192. auto symbol_section = Section(*this, m_symbol_table_offset, (m_symbol_count * m_size_of_symbol_table_entry), m_size_of_symbol_table_entry, "DT_SYMTAB");
  193. auto symbol_entry = (Elf32_Sym*)symbol_section.address().offset(index * symbol_section.entry_size()).as_ptr();
  194. return Symbol(*this, index, *symbol_entry);
  195. }
  196. const DynamicObject::Section DynamicObject::init_section() const
  197. {
  198. return Section(*this, m_init_offset, sizeof(void (*)()), sizeof(void (*)()), "DT_INIT");
  199. }
  200. const DynamicObject::Section DynamicObject::fini_section() const
  201. {
  202. return Section(*this, m_fini_offset, sizeof(void (*)()), sizeof(void (*)()), "DT_FINI");
  203. }
  204. const DynamicObject::Section DynamicObject::init_array_section() const
  205. {
  206. return Section(*this, m_init_array_offset, m_init_array_size, sizeof(void (*)()), "DT_INIT_ARRAY");
  207. }
  208. const DynamicObject::Section DynamicObject::fini_array_section() const
  209. {
  210. return Section(*this, m_fini_array_offset, m_fini_array_size, sizeof(void (*)()), "DT_FINI_ARRAY");
  211. }
  212. const DynamicObject::HashSection DynamicObject::hash_section() const
  213. {
  214. const char* section_name = m_hash_type == HashType::SYSV ? "DT_HASH" : "DT_GNU_HASH";
  215. return HashSection(Section(*this, m_hash_table_offset, 0, 0, section_name), m_hash_type);
  216. }
  217. const DynamicObject::RelocationSection DynamicObject::relocation_section() const
  218. {
  219. return RelocationSection(Section(*this, m_relocation_table_offset, m_size_of_relocation_table, m_size_of_relocation_entry, "DT_REL"));
  220. }
  221. const DynamicObject::RelocationSection DynamicObject::plt_relocation_section() const
  222. {
  223. return RelocationSection(Section(*this, m_plt_relocation_offset_location, m_size_of_plt_relocation_entry_list, m_size_of_relocation_entry, "DT_JMPREL"));
  224. }
  225. u32 DynamicObject::HashSection::calculate_elf_hash(const char* name) const
  226. {
  227. // SYSV ELF hash algorithm
  228. // Note that the GNU HASH algorithm has less collisions
  229. uint32_t hash = 0;
  230. while (*name != '\0') {
  231. hash = hash << 4;
  232. hash += *name;
  233. name++;
  234. const uint32_t top_nibble_of_hash = hash & 0xF0000000U;
  235. hash ^= top_nibble_of_hash >> 24;
  236. hash &= ~top_nibble_of_hash;
  237. }
  238. return hash;
  239. }
  240. u32 DynamicObject::HashSection::calculate_gnu_hash(const char* name) const
  241. {
  242. // GNU ELF hash algorithm
  243. u32 hash = 5381;
  244. for (; *name != '\0'; ++name) {
  245. hash = hash * 33 + *name;
  246. }
  247. return hash;
  248. }
  249. const DynamicObject::Symbol DynamicObject::HashSection::lookup_symbol(const char* name) const
  250. {
  251. return (this->*(m_lookup_function))(name);
  252. }
  253. const DynamicObject::Symbol DynamicObject::HashSection::lookup_elf_symbol(const char* name) const
  254. {
  255. u32 hash_value = calculate_elf_hash(name);
  256. u32* hash_table_begin = (u32*)address().as_ptr();
  257. size_t num_buckets = hash_table_begin[0];
  258. // This is here for completeness, but, since we're using the fact that every chain
  259. // will end at chain 0 (which means 'not found'), we don't need to check num_chains.
  260. // Interestingly, num_chains is required to be num_symbols
  261. // size_t num_chains = hash_table_begin[1];
  262. u32* buckets = &hash_table_begin[2];
  263. u32* chains = &buckets[num_buckets];
  264. for (u32 i = buckets[hash_value % num_buckets]; i; i = chains[i]) {
  265. auto symbol = m_dynamic.symbol(i);
  266. if (strcmp(name, symbol.name()) == 0) {
  267. dbgln<DYNAMIC_LOAD_DEBUG>("Returning SYSV dynamic symbol with index {} for {}: {}", i, symbol.name(), symbol.address().as_ptr());
  268. return symbol;
  269. }
  270. }
  271. return Symbol::create_undefined(m_dynamic);
  272. }
  273. const DynamicObject::Symbol DynamicObject::HashSection::lookup_gnu_symbol(const char* name) const
  274. {
  275. // Algorithm reference: https://ent-voy.blogspot.com/2011/02/
  276. // TODO: Handle 64bit bloomwords for ELF_CLASS64
  277. using BloomWord = u32;
  278. constexpr size_t bloom_word_size = sizeof(BloomWord) * 8;
  279. const u32* hash_table_begin = (u32*)address().as_ptr();
  280. const size_t num_buckets = hash_table_begin[0];
  281. const size_t num_omitted_symbols = hash_table_begin[1];
  282. const u32 num_maskwords = hash_table_begin[2];
  283. // This works because num_maskwords is required to be a power of 2
  284. const u32 num_maskwords_bitmask = num_maskwords - 1;
  285. const u32 shift2 = hash_table_begin[3];
  286. const BloomWord* bloom_words = &hash_table_begin[4];
  287. const u32* const buckets = &bloom_words[num_maskwords];
  288. const u32* const chains = &buckets[num_buckets];
  289. BloomWord hash1 = calculate_gnu_hash(name);
  290. BloomWord hash2 = hash1 >> shift2;
  291. const BloomWord bitmask = (1 << (hash1 % bloom_word_size)) | (1 << (hash2 % bloom_word_size));
  292. if ((bloom_words[(hash1 / bloom_word_size) & num_maskwords_bitmask] & bitmask) != bitmask) {
  293. return Symbol::create_undefined(m_dynamic);
  294. }
  295. size_t current_sym = buckets[hash1 % num_buckets];
  296. if (current_sym == 0) {
  297. return Symbol::create_undefined(m_dynamic);
  298. }
  299. const u32* current_chain = &chains[current_sym - num_omitted_symbols];
  300. for (hash1 &= ~1;; ++current_sym) {
  301. hash2 = *(current_chain++);
  302. const auto symbol = m_dynamic.symbol(current_sym);
  303. if ((hash1 == (hash2 & ~1)) && strcmp(name, symbol.name()) == 0) {
  304. dbgln<DYNAMIC_LOAD_DEBUG>("Returning GNU dynamic symbol with index {} for {}: {}", current_sym, symbol.name(), symbol.address().as_ptr());
  305. return symbol;
  306. }
  307. if (hash2 & 1) {
  308. break;
  309. }
  310. }
  311. return Symbol::create_undefined(m_dynamic);
  312. }
  313. const char* DynamicObject::symbol_string_table_string(Elf32_Word index) const
  314. {
  315. return (const char*)base_address().offset(m_string_table_offset + index).as_ptr();
  316. }
  317. DynamicObject::InitializationFunction DynamicObject::init_section_function() const
  318. {
  319. ASSERT(has_init_section());
  320. return (InitializationFunction)init_section().address().as_ptr();
  321. }
  322. static const char* name_for_dtag(Elf32_Sword d_tag)
  323. {
  324. switch (d_tag) {
  325. case DT_NULL:
  326. return "NULL"; /* marks end of _DYNAMIC array */
  327. case DT_NEEDED:
  328. return "NEEDED"; /* string table offset of needed lib */
  329. case DT_PLTRELSZ:
  330. return "PLTRELSZ"; /* size of relocation entries in PLT */
  331. case DT_PLTGOT:
  332. return "PLTGOT"; /* address PLT/GOT */
  333. case DT_HASH:
  334. return "HASH"; /* address of symbol hash table */
  335. case DT_STRTAB:
  336. return "STRTAB"; /* address of string table */
  337. case DT_SYMTAB:
  338. return "SYMTAB"; /* address of symbol table */
  339. case DT_RELA:
  340. return "RELA"; /* address of relocation table */
  341. case DT_RELASZ:
  342. return "RELASZ"; /* size of relocation table */
  343. case DT_RELAENT:
  344. return "RELAENT"; /* size of relocation entry */
  345. case DT_STRSZ:
  346. return "STRSZ"; /* size of string table */
  347. case DT_SYMENT:
  348. return "SYMENT"; /* size of symbol table entry */
  349. case DT_INIT:
  350. return "INIT"; /* address of initialization func. */
  351. case DT_FINI:
  352. return "FINI"; /* address of termination function */
  353. case DT_SONAME:
  354. return "SONAME"; /* string table offset of shared obj */
  355. case DT_RPATH:
  356. return "RPATH"; /* string table offset of library search path */
  357. case DT_SYMBOLIC:
  358. return "SYMBOLIC"; /* start sym search in shared obj. */
  359. case DT_REL:
  360. return "REL"; /* address of rel. tbl. w addends */
  361. case DT_RELSZ:
  362. return "RELSZ"; /* size of DT_REL relocation table */
  363. case DT_RELENT:
  364. return "RELENT"; /* size of DT_REL relocation entry */
  365. case DT_PLTREL:
  366. return "PLTREL"; /* PLT referenced relocation entry */
  367. case DT_DEBUG:
  368. return "DEBUG"; /* bugger */
  369. case DT_TEXTREL:
  370. return "TEXTREL"; /* Allow rel. mod. to unwritable seg */
  371. case DT_JMPREL:
  372. return "JMPREL"; /* add. of PLT's relocation entries */
  373. case DT_BIND_NOW:
  374. return "BIND_NOW"; /* Bind now regardless of env setting */
  375. case DT_INIT_ARRAY:
  376. return "INIT_ARRAY"; /* address of array of init func */
  377. case DT_FINI_ARRAY:
  378. return "FINI_ARRAY"; /* address of array of term func */
  379. case DT_INIT_ARRAYSZ:
  380. return "INIT_ARRAYSZ"; /* size of array of init func */
  381. case DT_FINI_ARRAYSZ:
  382. return "FINI_ARRAYSZ"; /* size of array of term func */
  383. case DT_RUNPATH:
  384. return "RUNPATH"; /* strtab offset of lib search path */
  385. case DT_FLAGS:
  386. return "FLAGS"; /* Set of DF_* flags */
  387. case DT_ENCODING:
  388. return "ENCODING"; /* further DT_* follow encoding rules */
  389. case DT_PREINIT_ARRAY:
  390. return "PREINIT_ARRAY"; /* address of array of preinit func */
  391. case DT_PREINIT_ARRAYSZ:
  392. return "PREINIT_ARRAYSZ"; /* size of array of preinit func */
  393. case DT_LOOS:
  394. return "LOOS"; /* reserved range for OS */
  395. case DT_HIOS:
  396. return "HIOS"; /* specific dynamic array tags */
  397. case DT_LOPROC:
  398. return "LOPROC"; /* reserved range for processor */
  399. case DT_HIPROC:
  400. return "HIPROC"; /* specific dynamic array tags */
  401. case DT_GNU_HASH:
  402. return "GNU_HASH"; /* address of GNU hash table */
  403. case DT_RELACOUNT:
  404. return "RELACOUNT"; /* if present, number of RELATIVE */
  405. case DT_RELCOUNT:
  406. return "RELCOUNT"; /* relocs, which must come first */
  407. case DT_FLAGS_1:
  408. return "FLAGS_1";
  409. default:
  410. return "??";
  411. }
  412. }
  413. DynamicObject::SymbolLookupResult DynamicObject::lookup_symbol(const char* name) const
  414. {
  415. auto res = hash_section().lookup_symbol(name);
  416. if (res.is_undefined())
  417. return {};
  418. return SymbolLookupResult { true, res.value(), (FlatPtr)res.address().as_ptr(), res.bind(), this };
  419. }
  420. NonnullRefPtr<DynamicObject> DynamicObject::construct(VirtualAddress base_address, VirtualAddress dynamic_section_address)
  421. {
  422. return adopt(*new DynamicObject(base_address, dynamic_section_address));
  423. }
  424. // offset is in PLT relocation table
  425. Elf32_Addr DynamicObject::patch_plt_entry(u32 relocation_offset)
  426. {
  427. auto relocation = plt_relocation_section().relocation_at_offset(relocation_offset);
  428. ASSERT(relocation.type() == R_386_JMP_SLOT);
  429. auto sym = relocation.symbol();
  430. u8* relocation_address = relocation.address().as_ptr();
  431. auto res = lookup_symbol(sym);
  432. if (!res.found) {
  433. dbgln("did not find symbol: {} ", sym.name());
  434. ASSERT_NOT_REACHED();
  435. }
  436. u32 symbol_location = res.address;
  437. dbgln<DYNAMIC_LOAD_DEBUG>("DynamicLoader: Jump slot relocation: putting {} ({:p}) into PLT at {}", sym.name(), symbol_location, (void*)relocation_address);
  438. *(u32*)relocation_address = symbol_location;
  439. return symbol_location;
  440. }
  441. DynamicObject::SymbolLookupResult DynamicObject::lookup_symbol(const ELF::DynamicObject::Symbol& symbol) const
  442. {
  443. dbgln<DYNAMIC_LOAD_DEBUG>("looking up symbol: {}", symbol.name());
  444. if (symbol.is_undefined() || symbol.bind() == STB_WEAK)
  445. return DynamicLinker::lookup_global_symbol(symbol.name());
  446. if (!symbol.is_undefined()) {
  447. dbgln<DYNAMIC_LOAD_DEBUG>("symbol is defined in its object");
  448. return { true, symbol.value(), (FlatPtr)symbol.address().as_ptr(), symbol.bind(), &symbol.object() };
  449. }
  450. return DynamicLinker::lookup_global_symbol(symbol.name());
  451. }
  452. } // end namespace ELF