Browse Source

LibELF: Add ELFDynamicObject to dynamically load libaries

This patch also adds some missing relocation defines to exec_elf.h,
and a few helper classes/methods to ELFImage so that we can use it
for our dynamically loaded libs and not just main program images from
the kernel :)
Andrew Kaster 5 years ago
parent
commit
a18b37880e

+ 608 - 0
Libraries/LibELF/ELFDynamicObject.cpp

@@ -0,0 +1,608 @@
+#include <AK/StringBuilder.h>
+#include <LibELF/ELFDynamicObject.h>
+
+#include <assert.h>
+#include <mman.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#define DYNAMIC_LOAD_DEBUG
+//#define DYNAMIC_LOAD_VERBOSE
+
+#ifdef DYNAMIC_LOAD_VERBOSE
+#    define VERBOSE(fmt, ...) dbgprintf(fmt, ##__VA_ARGS__)
+#else
+#    define VERBOSE(fmt, ...) do { } while (0)
+#endif
+
+static bool s_always_bind_now = true;
+
+static const char* name_for_dtag(Elf32_Sword tag);
+
+// SYSV ELF hash algorithm
+// Note that the GNU HASH algorithm has less collisions
+static uint32_t calculate_elf_hash(const char* name)
+{
+    uint32_t hash = 0;
+    uint32_t top_nibble_of_hash = 0;
+
+    while (*name != '\0') {
+        hash = hash << 4;
+        hash += *name;
+        name++;
+
+        top_nibble_of_hash = hash & 0xF0000000U;
+        if (top_nibble_of_hash != 0)
+            hash ^= top_nibble_of_hash >> 24;
+        hash &= ~top_nibble_of_hash;
+    }
+
+    return hash;
+}
+
+NonnullRefPtr<ELFDynamicObject> ELFDynamicObject::construct(const char* filename, int fd, size_t size)
+{
+    return adopt(*new ELFDynamicObject(filename, fd, size));
+}
+
+ELFDynamicObject::ELFDynamicObject(const char* filename, int fd, size_t size)
+    : m_filename(filename)
+    , m_file_size(size)
+    , m_image_fd(fd)
+{
+    String file_mmap_name = String::format("ELF_DYN: %s", m_filename.characters());
+
+    m_file_mapping = mmap_with_name(nullptr, size, PROT_READ, MAP_PRIVATE, m_image_fd, 0, file_mmap_name.characters());
+    if (MAP_FAILED == m_file_mapping) {
+        m_valid = false;
+        return;
+    }
+
+    m_image = AK::make<ELFImage>((u8*)m_file_mapping);
+
+    m_valid = m_image->is_valid() && m_image->parse() && m_image->is_dynamic();
+
+    if (!m_valid) {
+        return;
+    }
+
+    const ELFImage::DynamicSection probably_dynamic_section = m_image->dynamic_section();
+    if (StringView(".dynamic") != probably_dynamic_section.name() || probably_dynamic_section.type() != SHT_DYNAMIC) {
+        m_valid = false;
+        return;
+    }
+}
+
+ELFDynamicObject::~ELFDynamicObject()
+{
+    if (MAP_FAILED != m_file_mapping)
+        munmap(m_file_mapping, m_file_size);
+}
+
+void ELFDynamicObject::dump()
+{
+    auto dynamic_section = m_image->dynamic_section();
+
+    StringBuilder builder;
+    builder.append("\nd_tag      tag_name         value\n");
+    size_t num_dynamic_sections = 0;
+
+    dynamic_section.for_each_dynamic_entry([&](const ELFImage::DynamicSectionEntry& entry) {
+        String name_field = String::format("(%s)", name_for_dtag(entry.tag()));
+        builder.appendf("0x%08X %-17s0x%X\n", entry.tag(), name_field.characters(), entry.val());
+        num_dynamic_sections++;
+        return IterationDecision::Continue;
+    });
+
+    dbgprintf("Dynamic section at offset 0x%x contains %zu entries:\n", dynamic_section.offset(), num_dynamic_sections);
+    dbgprintf(builder.to_string().characters());
+}
+
+void ELFDynamicObject::parse_dynamic_section()
+{
+    auto dynamic_section = m_image->dynamic_section();
+    dynamic_section.for_each_dynamic_entry([&](const ELFImage::DynamicSectionEntry& entry) {
+        switch (entry.tag()) {
+        case DT_INIT:
+            m_init_offset = entry.ptr();
+            break;
+        case DT_FINI:
+            m_fini_offset = entry.ptr();
+            break;
+        case DT_INIT_ARRAY:
+            m_init_array_offset = entry.ptr();
+            break;
+        case DT_INIT_ARRAYSZ:
+            m_init_array_size = entry.val();
+            break;
+        case DT_HASH:
+            m_hash_table_offset = entry.ptr();
+            break;
+        case DT_SYMTAB:
+            m_symbol_table_offset = entry.ptr();
+            break;
+        case DT_STRTAB:
+            m_string_table_offset = entry.ptr();
+            break;
+        case DT_STRSZ:
+            m_size_of_string_table = entry.val();
+            break;
+        case DT_SYMENT:
+            m_size_of_symbol_table_entry = entry.val();
+            break;
+        case DT_PLTGOT:
+            m_procedure_linkage_table_offset = entry.ptr();
+            break;
+        case DT_PLTRELSZ:
+            m_size_of_plt_relocation_entry_list = entry.val();
+            break;
+        case DT_PLTREL:
+            m_procedure_linkage_table_relocation_type = entry.val();
+            ASSERT(m_procedure_linkage_table_relocation_type & (DT_REL | DT_RELA));
+            break;
+        case DT_JMPREL:
+            m_plt_relocation_offset_location = entry.ptr();
+            break;
+        case DT_RELA:
+        case DT_REL:
+            m_relocation_table_offset = entry.ptr();
+            break;
+        case DT_RELASZ:
+        case DT_RELSZ:
+            m_size_of_relocation_table = entry.val();
+            break;
+        case DT_RELAENT:
+        case DT_RELENT:
+            m_size_of_relocation_entry = entry.val();
+            break;
+        case DT_RELACOUNT:
+        case DT_RELCOUNT:
+            m_number_of_relocations = entry.val();
+            break;
+        case DT_FLAGS:
+            m_must_bind_now = entry.val() & DF_BIND_NOW;
+            m_has_text_relocations = entry.val() & DF_TEXTREL;
+            m_should_process_origin = entry.val() & DF_ORIGIN;
+            m_has_static_thread_local_storage = entry.val() & DF_STATIC_TLS;
+            m_requires_symbolic_symbol_resolution = entry.val() & DF_SYMBOLIC;
+            break;
+        case DT_TEXTREL:
+            m_has_text_relocations = true; // This tag seems to exist for legacy reasons only?
+            break;
+        default:
+            dbgprintf("ELFDynamicObject: DYNAMIC tag handling not implemented for DT_%s\n", name_for_dtag(entry.tag()));
+            printf("ELFDynamicObject: DYNAMIC tag handling not implemented for DT_%s\n", name_for_dtag(entry.tag()));
+            ASSERT_NOT_REACHED(); // FIXME: Maybe just break out here and return false?
+            break;
+        }
+        return IterationDecision::Continue;
+    });
+}
+
+typedef void (*InitFunc)();
+
+bool ELFDynamicObject::load(unsigned flags)
+{
+    ASSERT(flags & RTLD_GLOBAL);
+    ASSERT(flags & RTLD_LAZY);
+
+#ifdef DYNAMIC_LOAD_DEBUG
+    dump();
+#endif
+#ifdef DYNAMIC_LOAD_VERBOSE
+    m_image->dump();
+#endif
+
+    parse_dynamic_section();
+
+    // FIXME: be more flexible?
+    size_t total_required_allocation_size = 0;
+
+    // FIXME: Can we re-use ELFLoader? This and what follows looks a lot like what's in there...
+    //     With the exception of using desired_load_address().offset(text_segment_begin)
+    //     It seems kinda gross to expect the program headers to be in a specific order..
+    m_image->for_each_program_header([&](const ELFImage::ProgramHeader& program_header) {
+        ProgramHeaderRegion new_region(program_header.raw_header());
+        if (new_region.is_load())
+            total_required_allocation_size += new_region.required_load_size();
+        m_program_header_regions.append(move(new_region));
+        auto& region = m_program_header_regions.last();
+        if (region.is_tls_template())
+            m_tls_region = &region;
+        else if (region.is_load()) {
+            if (region.is_executable())
+                m_text_region = &region;
+            else
+                m_data_region = &region;
+        }
+    });
+
+    ASSERT(m_text_region && m_data_region);
+
+    // Process regions in order: .text, .data, .tls
+    auto* region = m_text_region;
+    void* text_segment_begin = mmap_with_name(nullptr, region->required_load_size(), region->mmap_prot(), MAP_PRIVATE, m_image_fd, region->offset(), String::format(".text: %s", m_filename.characters()).characters());
+    size_t text_segment_size = region->required_load_size();
+    region->set_base_address(VirtualAddress { (u32)text_segment_begin });
+    region->set_load_address(VirtualAddress { (u32)text_segment_begin });
+
+    region = m_data_region;
+    void* data_segment_begin = mmap_with_name((u8*)text_segment_begin + text_segment_size, region->required_load_size(), region->mmap_prot(), MAP_ANONYMOUS | MAP_PRIVATE, 0, 0, String::format(".data: %s", m_filename.characters()).characters());
+    size_t data_segment_size = region->required_load_size();
+    VirtualAddress data_segment_actual_addr = region->desired_load_address().offset((u32)text_segment_begin);
+    region->set_base_address(VirtualAddress { (u32)text_segment_begin });
+    region->set_load_address(data_segment_actual_addr);
+    memcpy(data_segment_actual_addr.as_ptr(), (u8*)m_file_mapping + region->offset(), region->size_in_image());
+
+    if (m_tls_region) {
+        region = m_data_region;
+        VirtualAddress tls_segment_actual_addr = region->desired_load_address().offset((u32)text_segment_begin);
+        region->set_base_address(VirtualAddress { (u32)text_segment_begin });
+        region->set_load_address(tls_segment_actual_addr);
+        memcpy(tls_segment_actual_addr.as_ptr(), (u8*)m_file_mapping + region->offset(), region->size_in_image());
+    }
+
+    // sanity check
+    u8* end_of_in_memory_image = (u8*)data_segment_begin + data_segment_size;
+    ASSERT((ptrdiff_t)total_required_allocation_size == (ptrdiff_t)(end_of_in_memory_image - (u8*)text_segment_begin));
+
+    if (m_has_text_relocations) {
+        if (0 > mprotect(m_text_region->load_address().as_ptr(), m_text_region->required_load_size(), PROT_READ | PROT_WRITE)) {
+            perror("mprotect"); // FIXME: dlerror?
+            return false;
+        }
+    }
+
+    do_relocations();
+
+#ifdef DYNAMIC_LOAD_DEBUG
+    dbgprintf("Done relocating!\n");
+#endif
+
+    // FIXME: PLT patching doesn't seem to work as expected.
+    //     Need to dig into the spec to see what we're doing wrong
+    //     Hopefully it won't need an assembly entry point... :/
+    ///    For now we can just BIND_NOW every time
+
+    // This should be the address of section ".got.plt"
+    const ELFImage::Section& got_section = m_image->lookup_section(".got.plt");
+    VirtualAddress got_address = m_text_region->load_address().offset(got_section.address());
+
+    u32* got_u32_ptr = reinterpret_cast<u32*>(got_address.as_ptr());
+    got_u32_ptr[1] = (u32)this;
+    got_u32_ptr[2] = (u32)&ELFDynamicObject::patch_plt_entry;
+
+#ifdef DYNAMIC_LOAD_DEBUG
+    dbgprintf("Set GOT PLT entries at %p: [0] = %p [1] = %p, [2] = %p\n", got_u32_ptr, got_u32_ptr[0], got_u32_ptr[1], got_u32_ptr[2]);
+#endif
+
+    // Clean up our setting of .text to PROT_READ | PROT_WRITE
+    if (m_has_text_relocations) {
+        if (0 > mprotect(m_text_region->load_address().as_ptr(), m_text_region->required_load_size(), PROT_READ | PROT_EXEC)) {
+            perror("mprotect"); // FIXME: dlerror?
+            return false;
+        }
+    }
+
+    u8* load_addr = m_text_region->load_address().as_ptr();
+    InitFunc init_function = (InitFunc)(load_addr + m_init_offset);
+
+#ifdef DYNAMIC_LOAD_DEBUG
+    dbgprintf("Calling DT_INIT at %p\n", init_function);
+#endif
+    // FIXME:
+    // Disassembly of section .init:
+    //
+    //  00007e98 <_init>:
+    //        7e98:       55                      push   ebp
+    //
+    // Where da ret at? related to -nostartfiles for sure...
+    //(init_function)();
+
+    InitFunc* init_begin = (InitFunc*)(load_addr + m_init_array_offset);
+    u32 init_end = (u32)((u8*)init_begin + m_init_array_size);
+    while ((u32)init_begin < init_end) {
+        // Andriod sources claim that these can be -1, to be ignored.
+        // 0 definitely shows up. Apparently 0/-1 are valid? Confusing.
+        if (!*init_begin || ((i32)*init_begin == -1))
+            continue;
+#ifdef DYNAMIC_LOAD_DEBUG
+        dbgprintf("Calling DT_INITARRAY entry at %p\n", *init_begin);
+#endif
+        (*init_begin)();
+        ++init_begin;
+    }
+
+#ifdef DYNAMIC_LOAD_DEBUG
+    dbgprintf("Loaded %s\n", m_filename.characters());
+#endif
+    // FIXME: return false sometimes? missing symbol etc
+    return true;
+}
+
+void* ELFDynamicObject::symbol_for_name(const char* name)
+{
+    // FIXME: If we enable gnu hash in the compiler, we should use that here instead
+    //     The algo is way better with less collisions
+    uint32_t hash_value = calculate_elf_hash(name);
+
+    u8* load_addr = m_text_region->load_address().as_ptr();
+
+    // NOTE: We need to use the loaded hash/string/symbol tables here to get the right
+    //    addresses. The ones that are in the ELFImage won't cut it, they aren't relocated
+    u32* hash_table_begin = (u32*)(load_addr + m_hash_table_offset);
+    Elf32_Sym* symtab = (Elf32_Sym*)(load_addr + m_symbol_table_offset);
+    const char* strtab = (const char*)load_addr + m_string_table_offset;
+
+    size_t num_buckets = hash_table_begin[0];
+
+    // This is here for completeness, but, since we're using the fact that every chain
+    // will end at chain 0 (which means 'not found'), we don't need to check num_chains.
+    // Interestingly, num_chains is required to be num_symbols
+    //size_t num_chains = hash_table_begin[1];
+
+    u32* buckets = &hash_table_begin[2];
+    u32* chains = &buckets[num_buckets];
+
+    for (u32 i = buckets[hash_value % num_buckets]; i; i = chains[i]) {
+        if (strcmp(name, strtab + symtab[i].st_name) == 0) {
+            void* retval = load_addr + symtab[i].st_value;
+#ifdef DYNAMIC_LOAD_DEBUG
+            dbgprintf("Returning dynamic symbol with index %d for %s: %p\n", i, strtab + symtab[i].st_name, retval);
+#endif
+            return retval;
+        }
+    }
+
+    return nullptr;
+}
+
+// offset is from PLT entry
+// Tag is inserted into GOT #2 for 'this' DSO (literally the this pointer)
+void ELFDynamicObject::patch_plt_entry(u32 got_offset, void* dso_got_tag)
+{
+    // FIXME: This is never called :(
+    CRASH();
+    dbgprintf("------ PATCHING PLT ENTRY -------");
+    // NOTE: We put 'this' into the GOT when we loaded it into memory
+    auto* dynamic_object_object = reinterpret_cast<ELFDynamicObject*>(dso_got_tag);
+
+    // FIXME: might actually be a RelA, check m_plt_relocation_type
+    // u32 base_addr_offset = dynamic_object_object->m_relocation_table_offset + got_offset;
+    // Elf32_Rel relocation = *reinterpret_cast<Elf32_Rel*>(&((u8*)dynamic_object_object->m_file_mapping)[base_addr_offset]);
+    u32 relocation_index = got_offset / dynamic_object_object->m_size_of_relocation_entry;
+    auto relocation = dynamic_object_object->m_image->dynamic_relocation_section().relocation(relocation_index);
+
+    ASSERT(relocation.type() == R_386_JMP_SLOT);
+
+    auto sym = relocation.symbol();
+
+    auto* text_load_address = dynamic_object_object->m_text_region->load_address().as_ptr();
+    u8* relocation_address = text_load_address + relocation.offset();
+
+    if (0 > mprotect(text_load_address, dynamic_object_object->m_text_region->required_load_size(), PROT_READ | PROT_WRITE)) {
+        ASSERT_NOT_REACHED(); // uh oh, no can do boss
+    }
+
+    dbgprintf("Found relocation address: %p for %s", relocation_address, sym.name());
+
+    *(u32*)relocation_address = (u32)(text_load_address + sym.value());
+
+    if (0 > mprotect(text_load_address, dynamic_object_object->m_text_region->required_load_size(), PROT_READ | PROT_EXEC)) {
+        ASSERT_NOT_REACHED(); // uh oh, no can do boss
+    }
+
+    CRASH();
+    // FIXME: Call the relocated method here?
+}
+
+void ELFDynamicObject::do_relocations()
+{
+    auto dyn_relocation_section = m_image->dynamic_relocation_section();
+    if (StringView(".rel.dyn") != dyn_relocation_section.name() || SHT_REL != dyn_relocation_section.type()) {
+        ASSERT_NOT_REACHED();
+    }
+
+    u8* load_base_address = m_text_region->base_address().as_ptr();
+
+    int i = -1;
+
+    // FIXME: We should really bail on undefined symbols here. (but, there's some TLS vars that are currently undef soooo.... :) )
+
+    dyn_relocation_section.for_each_relocation([&](const ELFImage::DynamicRelocation& relocation) {
+        ++i;
+        VERBOSE("====== RELOCATION %d: offset 0x%08X, type %d, symidx %08X\n", i, relocation.offset(), relocation.type(), relocation.symbol_index());
+        u32* patch_ptr = (u32*)(load_base_address + relocation.offset());
+        switch (relocation.type()) {
+        case R_386_NONE:
+            // Apparently most loaders will just skip these?
+            // Seems if the 'link editor' generates one something is funky with your code
+            VERBOSE("None relocation. No symbol, no nothin.\n");
+            break;
+        case R_386_32: {
+            auto symbol = relocation.symbol();
+
+            VERBOSE("Absolute relocation: name: '%s', value: %p\n", symbol.name(), symbol.value());
+            if (symbol.bind() == STB_LOCAL) {
+                u32 symbol_address = symbol.section().address() + symbol.value();
+                *patch_ptr += symbol_address;
+            } else if (symbol.bind() == STB_GLOBAL) {
+                u32 symbol_address = symbol.value() + (u32)load_base_address;
+                *patch_ptr += symbol_address;
+            } else if (symbol.bind() == STB_WEAK) {
+                // FIXME: Handle weak symbols...
+                dbgprintf("ELFDynamicObject: Ignoring weak symbol %s\n", symbol.name());
+            } else {
+                VERBOSE("Found new fun symbol bind value %d\n", symbol.bind());
+                ASSERT_NOT_REACHED();
+            }
+            VERBOSE("   Symbol address: %p\n", *patch_ptr);
+            break;
+        }
+        case R_386_PC32: {
+            auto symbol = relocation.symbol();
+            VERBOSE("PC-relative relocation: '%s', value: %p\n", symbol.name(), symbol.value());
+            u32 relative_offset = (symbol.value() - relocation.offset());
+            *patch_ptr += relative_offset;
+            VERBOSE("   Symbol address: %p\n", *patch_ptr);
+            break;
+        }
+        case R_386_GLOB_DAT: {
+            auto symbol = relocation.symbol();
+            VERBOSE("Global data relocation: '%s', value: %p\n", symbol.name(), symbol.value());
+            u32 symbol_location = (u32)(m_data_region->base_address().as_ptr() + symbol.value());
+            *patch_ptr = symbol_location;
+            VERBOSE("   Symbol address: %p\n", *patch_ptr);
+            break;
+        }
+        case R_386_RELATIVE: {
+            // FIXME: According to the spec, R_386_relative ones must be done first.
+            //     We could explicitly do them first using m_number_of_relocatoins from DT_RELCOUNT
+            //     However, our compiler is nice enough to put them at the front of the relocations for us :)
+            VERBOSE("Load address relocation at offset %X\n", relocation.offset());
+            VERBOSE("    patch ptr == %p, adding load base address (%p) to it and storing %p\n", *patch_ptr, load_base_address, *patch_ptr + (u32)load_base_address);
+            *patch_ptr += (u32)load_base_address; // + addend for RelA (addend for Rel is stored at addr)
+            break;
+        }
+        case R_386_TLS_TPOFF: {
+            VERBOSE("Relocation type: R_386_TLS_TPOFF at offset %X\n", relocation.offset());
+            // FIXME: this can't be right? I have no idea what "negative offset into TLS storage" means...
+            // FIXME: Check m_has_static_tls and do something different for dynamic TLS
+            VirtualAddress tls_region_loctation = m_tls_region->desired_load_address();
+            *patch_ptr = relocation.offset() - (u32)tls_region_loctation.as_ptr() - *patch_ptr;
+            break;
+        }
+        default:
+            // Raise the alarm! Someone needs to implement this relocation type
+            dbgprintf("Found a new exciting relocation type %d\n", relocation.type());
+            printf("ELFDynamicObject: Found unknown relocation type %d\n", relocation.type());
+            ASSERT_NOT_REACHED();
+            break;
+        }
+        return IterationDecision::Continue;
+    });
+
+    // FIXME: Or BIND_NOW flag passed in?
+    if (m_must_bind_now || s_always_bind_now) {
+        // FIXME: Why do we keep jumping to the entry in the GOT without going to our callback first?
+        //     that would make this s_always_bind_now redundant
+
+        for (size_t idx = 0; idx < m_size_of_plt_relocation_entry_list; idx += m_size_of_relocation_entry) {
+            VirtualAddress relocation_vaddr = m_text_region->load_address().offset(m_plt_relocation_offset_location).offset(idx);
+            Elf32_Rel* jump_slot_relocation = (Elf32_Rel*)relocation_vaddr.as_ptr();
+
+            ASSERT(ELF32_R_TYPE(jump_slot_relocation->r_info) == R_386_JMP_SLOT);
+
+            auto sym = m_image->dynamic_symbol(ELF32_R_SYM(jump_slot_relocation->r_info));
+
+            auto* image_base_address = m_text_region->base_address().as_ptr();
+            u8* relocation_address = image_base_address + jump_slot_relocation->r_offset;
+            u32 symbol_location = (u32)(image_base_address + sym.value());
+
+            VERBOSE("ELFDynamicObject: Jump slot relocation: putting %s (%p) into PLT at %p\n", sym.name(), symbol_location, relocation_address);
+
+            *(u32*)relocation_address = symbol_location;
+        }
+    }
+}
+
+u32 ELFDynamicObject::ProgramHeaderRegion::mmap_prot() const
+{
+    int prot = 0;
+    prot |= is_executable() ? PROT_EXEC : 0;
+    prot |= is_readable() ? PROT_READ : 0;
+    prot |= is_writable() ? PROT_WRITE : 0;
+    return prot;
+}
+
+static const char* name_for_dtag(Elf32_Sword d_tag)
+{
+    switch (d_tag) {
+    case DT_NULL:
+        return "NULL"; /* marks end of _DYNAMIC array */
+    case DT_NEEDED:
+        return "NEEDED"; /* string table offset of needed lib */
+    case DT_PLTRELSZ:
+        return "PLTRELSZ"; /* size of relocation entries in PLT */
+    case DT_PLTGOT:
+        return "PLTGOT"; /* address PLT/GOT */
+    case DT_HASH:
+        return "HASH"; /* address of symbol hash table */
+    case DT_STRTAB:
+        return "STRTAB"; /* address of string table */
+    case DT_SYMTAB:
+        return "SYMTAB"; /* address of symbol table */
+    case DT_RELA:
+        return "RELA"; /* address of relocation table */
+    case DT_RELASZ:
+        return "RELASZ"; /* size of relocation table */
+    case DT_RELAENT:
+        return "RELAENT"; /* size of relocation entry */
+    case DT_STRSZ:
+        return "STRSZ"; /* size of string table */
+    case DT_SYMENT:
+        return "SYMENT"; /* size of symbol table entry */
+    case DT_INIT:
+        return "INIT"; /* address of initialization func. */
+    case DT_FINI:
+        return "FINI"; /* address of termination function */
+    case DT_SONAME:
+        return "SONAME"; /* string table offset of shared obj */
+    case DT_RPATH:
+        return "RPATH"; /* string table offset of library search path */
+    case DT_SYMBOLIC:
+        return "SYMBOLIC"; /* start sym search in shared obj. */
+    case DT_REL:
+        return "REL"; /* address of rel. tbl. w addends */
+    case DT_RELSZ:
+        return "RELSZ"; /* size of DT_REL relocation table */
+    case DT_RELENT:
+        return "RELENT"; /* size of DT_REL relocation entry */
+    case DT_PLTREL:
+        return "PLTREL"; /* PLT referenced relocation entry */
+    case DT_DEBUG:
+        return "DEBUG"; /* bugger */
+    case DT_TEXTREL:
+        return "TEXTREL"; /* Allow rel. mod. to unwritable seg */
+    case DT_JMPREL:
+        return "JMPREL"; /* add. of PLT's relocation entries */
+    case DT_BIND_NOW:
+        return "BIND_NOW"; /* Bind now regardless of env setting */
+    case DT_INIT_ARRAY:
+        return "INIT_ARRAY"; /* address of array of init func */
+    case DT_FINI_ARRAY:
+        return "FINI_ARRAY"; /* address of array of term func */
+    case DT_INIT_ARRAYSZ:
+        return "INIT_ARRAYSZ"; /* size of array of init func */
+    case DT_FINI_ARRAYSZ:
+        return "FINI_ARRAYSZ"; /* size of array of term func */
+    case DT_RUNPATH:
+        return "RUNPATH"; /* strtab offset of lib search path */
+    case DT_FLAGS:
+        return "FLAGS"; /* Set of DF_* flags */
+    case DT_ENCODING:
+        return "ENCODING"; /* further DT_* follow encoding rules */
+    case DT_PREINIT_ARRAY:
+        return "PREINIT_ARRAY"; /* address of array of preinit func */
+    case DT_PREINIT_ARRAYSZ:
+        return "PREINIT_ARRAYSZ"; /* size of array of preinit func */
+    case DT_LOOS:
+        return "LOOS"; /* reserved range for OS */
+    case DT_HIOS:
+        return "HIOS"; /*  specific dynamic array tags */
+    case DT_LOPROC:
+        return "LOPROC"; /* reserved range for processor */
+    case DT_HIPROC:
+        return "HIPROC"; /*  specific dynamic array tags */
+    case DT_GNU_HASH:
+        return "GNU_HASH"; /* address of GNU hash table */
+    case DT_RELACOUNT:
+        return "RELACOUNT"; /* if present, number of RELATIVE */
+    case DT_RELCOUNT:
+        return "RELCOUNT"; /* relocs, which must come first */
+    case DT_FLAGS_1:
+        return "FLAGS_1";
+    default:
+        return "??";
+    }
+}

+ 122 - 0
Libraries/LibELF/ELFDynamicObject.h

@@ -0,0 +1,122 @@
+#pragma once
+
+#include <LibELF/ELFImage.h>
+#include <LibELF/exec_elf.h>
+#include <dlfcn.h>
+#include <mman.h>
+
+#include <AK/OwnPtr.h>
+#include <AK/RefCounted.h>
+#include <AK/String.h>
+
+#define ALIGN_ROUND_UP(x, align) ((((size_t)(x)) + align - 1) & (~(align - 1)))
+
+class ELFDynamicObject : public RefCounted<ELFDynamicObject> {
+public:
+    static NonnullRefPtr<ELFDynamicObject> construct(const char* filename, int fd, size_t file_size);
+
+    ~ELFDynamicObject();
+
+    bool is_valid() const { return m_valid; }
+
+    // FIXME: How can we resolve all of the symbols without having the original elf image for our process?
+    //     RTLD_LAZY only at first probably... though variables ('objects') need resolved at load time every time
+    bool load(unsigned flags);
+
+    // Intended for use by dlsym or other internal methods
+    void* symbol_for_name(const char*);
+
+    void dump();
+
+private:
+    class ProgramHeaderRegion {
+    public:
+        ProgramHeaderRegion(const Elf32_Phdr& header)
+            : m_program_header(header)
+        {
+        }
+
+        VirtualAddress load_address() const { return m_load_address; }
+        VirtualAddress base_address() const { return m_image_base_address; }
+
+        void set_load_address(VirtualAddress addr) { m_load_address = addr; }
+        void set_base_address(VirtualAddress addr) { m_image_base_address = addr; }
+
+        // Information from ELF Program header
+        u32 type() const { return m_program_header.p_type; }
+        u32 flags() const { return m_program_header.p_flags; }
+        u32 offset() const { return m_program_header.p_offset; }
+        VirtualAddress desired_load_address() const { return VirtualAddress(m_program_header.p_vaddr); }
+        u32 size_in_memory() const { return m_program_header.p_memsz; }
+        u32 size_in_image() const { return m_program_header.p_filesz; }
+        u32 alignment() const { return m_program_header.p_align; }
+        u32 mmap_prot() const;
+        bool is_readable() const { return flags() & PF_R; }
+        bool is_writable() const { return flags() & PF_W; }
+        bool is_executable() const { return flags() & PF_X; }
+        bool is_tls_template() const { return type() == PT_TLS; }
+        bool is_load() const { return type() == PT_LOAD; }
+        bool is_dynamic() const { return type() == PT_DYNAMIC; }
+
+        u32 required_load_size() { return ALIGN_ROUND_UP(m_program_header.p_memsz, m_program_header.p_align); }
+
+    private:
+        Elf32_Phdr m_program_header; // Explictly a copy of the PHDR in the image
+        VirtualAddress m_load_address { 0 };
+        VirtualAddress m_image_base_address { 0 };
+    };
+
+    explicit ELFDynamicObject(const char* filename, int fd, size_t file_size);
+
+    String m_filename;
+    size_t m_file_size { 0 };
+    int m_image_fd { -1 };
+    void* m_file_mapping { nullptr };
+    bool m_valid { false };
+
+    OwnPtr<ELFImage> m_image;
+
+    void parse_dynamic_section();
+    void do_relocations();
+
+    static void patch_plt_entry(u32 got_offset, void* dso_got_tag);
+
+    Vector<ProgramHeaderRegion> m_program_header_regions;
+    ProgramHeaderRegion* m_text_region { nullptr };
+    ProgramHeaderRegion* m_data_region { nullptr };
+    ProgramHeaderRegion* m_tls_region { nullptr };
+
+    // Begin Section information collected from DT_* entries
+    uintptr_t m_init_offset { 0 };
+    uintptr_t m_fini_offset { 0 };
+
+    uintptr_t m_init_array_offset { 0 };
+    size_t m_init_array_size { 0 };
+
+    uintptr_t m_hash_table_offset { 0 };
+
+    uintptr_t m_string_table_offset { 0 };
+    uintptr_t m_symbol_table_offset { 0 };
+    size_t m_size_of_string_table { 0 };
+    size_t m_size_of_symbol_table_entry { 0 };
+
+    Elf32_Sword m_procedure_linkage_table_relocation_type { -1 };
+    uintptr_t m_plt_relocation_offset_location { 0 }; // offset of PLT relocations, at end of relocations
+    size_t m_size_of_plt_relocation_entry_list { 0 };
+    uintptr_t m_procedure_linkage_table_offset { 0 };
+
+    // NOTE: We'll only ever either RELA or REL entries, not both (thank god)
+    size_t m_number_of_relocations { 0 };
+    size_t m_size_of_relocation_entry { 0 };
+    size_t m_size_of_relocation_table { 0 };
+    uintptr_t m_relocation_table_offset { 0 };
+
+    // DT_FLAGS
+    bool m_should_process_origin = false;
+    bool m_requires_symbolic_symbol_resolution = false;
+    // Text relocations meaning: we need to edit the .text section which is normally mapped PROT_READ
+    bool m_has_text_relocations = false;
+    bool m_must_bind_now = false; // FIXME: control with an environment var as well?
+    bool m_has_static_thread_local_storage = false;
+    // End Section information from DT_* entries
+};

+ 59 - 3
Libraries/LibELF/ELFImage.cpp

@@ -43,6 +43,11 @@ unsigned ELFImage::symbol_count() const
     return section(m_symbol_table_section_index).entry_count();
 }
 
+unsigned ELFImage::dynamic_symbol_count() const
+{
+    return section(m_dynamic_symbol_table_section_index).entry_count();
+}
+
 void ELFImage::dump() const
 {
     dbgprintf("ELFImage{%p} {\n", this);
@@ -110,8 +115,25 @@ bool ELFImage::parse()
             m_symbol_table_section_index = i;
         }
         if (sh.sh_type == SHT_STRTAB && i != header().e_shstrndx) {
-            ASSERT(!m_string_table_section_index || m_string_table_section_index == i);
-            m_string_table_section_index = i;
+            if (StringView(".strtab") == section_header_table_string(sh.sh_name))
+                m_string_table_section_index = i;
+            else if (StringView(".dynstr") == section_header_table_string(sh.sh_name))
+                m_dynamic_string_table_section_index = i;
+            else
+                ASSERT_NOT_REACHED();
+        }
+        if (sh.sh_type == SHT_DYNAMIC) {
+            ASSERT(!m_dynamic_section_index || m_dynamic_section_index == i);
+            m_dynamic_section_index = i;
+        }
+        if (sh.sh_type == SHT_DYNSYM) {
+            ASSERT(!m_dynamic_symbol_table_section_index || m_dynamic_symbol_table_section_index == i);
+            m_dynamic_symbol_table_section_index = i;
+        }
+        if (sh.sh_type == SHT_REL) {
+            if (StringView(".rel.dyn") == section_header_table_string(sh.sh_name)) {
+                m_dynamic_relocation_section_index = i;
+            }
         }
     }
 
@@ -140,6 +162,14 @@ const char* ELFImage::table_string(unsigned offset) const
     return raw_data(sh.sh_offset + offset);
 }
 
+const char* ELFImage::dynamic_table_string(unsigned offset) const
+{
+    auto& sh = section_header(m_dynamic_string_table_section_index);
+    if (sh.sh_type != SHT_STRTAB)
+        return nullptr;
+    return raw_data(sh.sh_offset + offset);
+}
+
 const char* ELFImage::raw_data(unsigned offset) const
 {
     return reinterpret_cast<const char*>(m_buffer) + offset;
@@ -159,7 +189,7 @@ const Elf32_Phdr& ELFImage::program_header_internal(unsigned index) const
 const Elf32_Shdr& ELFImage::section_header(unsigned index) const
 {
     ASSERT(index < header().e_shnum);
-    return *reinterpret_cast<const Elf32_Shdr*>(raw_data(header().e_shoff + (index * sizeof(Elf32_Shdr))));
+    return *reinterpret_cast<const Elf32_Shdr*>(raw_data(header().e_shoff + (index * header().e_shentsize)));
 }
 
 const ELFImage::Symbol ELFImage::symbol(unsigned index) const
@@ -169,6 +199,13 @@ const ELFImage::Symbol ELFImage::symbol(unsigned index) const
     return Symbol(*this, index, raw_syms[index]);
 }
 
+const ELFImage::DynamicSymbol ELFImage::dynamic_symbol(unsigned index) const
+{
+    ASSERT(index < symbol_count());
+    auto* raw_syms = reinterpret_cast<const Elf32_Sym*>(raw_data(section(m_dynamic_symbol_table_section_index).offset()));
+    return DynamicSymbol(*this, index, raw_syms[index]);
+}
+
 const ELFImage::Section ELFImage::section(unsigned index) const
 {
     ASSERT(index < section_count());
@@ -188,6 +225,13 @@ const ELFImage::Relocation ELFImage::RelocationSection::relocation(unsigned inde
     return Relocation(m_image, rels[index]);
 }
 
+const ELFImage::DynamicRelocation ELFImage::DynamicRelocationSection::relocation(unsigned index) const
+{
+    ASSERT(index < relocation_count());
+    auto* rels = reinterpret_cast<const Elf32_Rel*>(m_image.raw_data(offset()));
+    return DynamicRelocation(m_image, rels[index]);
+}
+
 const ELFImage::RelocationSection ELFImage::Section::relocations() const
 {
     // FIXME: This is ugly.
@@ -213,3 +257,15 @@ const ELFImage::Section ELFImage::lookup_section(const char* name) const
         return section((*it).value);
     return section(0);
 }
+
+const ELFImage::DynamicSection ELFImage::dynamic_section() const
+{
+    ASSERT(is_dynamic());
+    return section(m_dynamic_section_index);
+}
+
+const ELFImage::DynamicRelocationSection ELFImage::dynamic_relocation_section() const
+{
+    ASSERT(is_dynamic());
+    return section(m_dynamic_relocation_section_index);
+}

+ 140 - 0
Libraries/LibELF/ELFImage.h

@@ -16,8 +16,13 @@ public:
 
     class Section;
     class RelocationSection;
+    class DynamicRelocationSection;
     class Symbol;
+    class DynamicSymbol;
     class Relocation;
+    class DynamicRelocation;
+    class DynamicSection;
+    class DynamicSectionEntry;
 
     class Symbol {
     public:
@@ -45,6 +50,32 @@ public:
         const unsigned m_index;
     };
 
+    class DynamicSymbol {
+    public:
+        DynamicSymbol(const ELFImage& image, unsigned index, const Elf32_Sym& sym)
+            : m_image(image)
+            , m_sym(sym)
+            , m_index(index)
+        {
+        }
+
+        ~DynamicSymbol() {}
+
+        const char* name() const { return m_image.dynamic_table_string(m_sym.st_name); }
+        unsigned section_index() const { return m_sym.st_shndx; }
+        unsigned value() const { return m_sym.st_value; }
+        unsigned size() const { return m_sym.st_size; }
+        unsigned index() const { return m_index; }
+        unsigned type() const { return ELF32_ST_TYPE(m_sym.st_info); }
+        unsigned bind() const { return ELF32_ST_BIND(m_sym.st_info); }
+        const Section section() const { return m_image.section(section_index()); }
+
+    private:
+        const ELFImage& m_image;
+        const Elf32_Sym& m_sym;
+        const unsigned m_index;
+    };
+
     class ProgramHeader {
     public:
         ProgramHeader(const ELFImage& image, unsigned program_header_index)
@@ -67,6 +98,7 @@ public:
         bool is_writable() const { return flags() & PF_W; }
         bool is_executable() const { return flags() & PF_X; }
         const char* raw_data() const { return m_image.raw_data(m_program_header.p_offset); }
+        Elf32_Phdr raw_header() const { return m_program_header; }
 
     private:
         const ELFImage& m_image;
@@ -100,6 +132,8 @@ public:
 
     protected:
         friend class RelocationSection;
+        friend class DynamicSection;
+        friend class DynamicRelocationSection;
         const ELFImage& m_image;
         const Elf32_Shdr& m_section_header;
         unsigned m_section_index;
@@ -117,6 +151,38 @@ public:
         void for_each_relocation(F) const;
     };
 
+    class DynamicRelocationSection : public Section {
+    public:
+        DynamicRelocationSection(const Section& section)
+            : Section(section.m_image, section.m_section_index)
+        {
+        }
+        unsigned relocation_count() const { return entry_count(); }
+        const DynamicRelocation relocation(unsigned index) const;
+        template<typename F>
+        void for_each_relocation(F) const;
+    };
+
+    class DynamicRelocation {
+    public:
+        DynamicRelocation(const ELFImage& image, const Elf32_Rel& rel)
+            : m_image(image)
+            , m_rel(rel)
+        {
+        }
+
+        ~DynamicRelocation() {}
+
+        unsigned offset() const { return m_rel.r_offset; }
+        unsigned type() const { return ELF32_R_TYPE(m_rel.r_info); }
+        unsigned symbol_index() const { return ELF32_R_SYM(m_rel.r_info); }
+        const DynamicSymbol symbol() const { return m_image.dynamic_symbol(symbol_index()); }
+
+    private:
+        const ELFImage& m_image;
+        const Elf32_Rel& m_rel;
+    };
+
     class Relocation {
     public:
         Relocation(const ELFImage& image, const Elf32_Rel& rel)
@@ -137,13 +203,48 @@ public:
         const Elf32_Rel& m_rel;
     };
 
+    class DynamicSection : public Section {
+    public:
+        DynamicSection(const Section& section)
+            : Section(section.m_image, section.m_section_index)
+        {
+            ASSERT(type() == SHT_DYNAMIC);
+        }
+
+        template<typename F>
+        void for_each_dynamic_entry(F) const;
+    };
+
+    class DynamicSectionEntry {
+    public:
+        DynamicSectionEntry(const ELFImage& image, const Elf32_Dyn& dyn)
+            : m_image(image)
+            , m_dyn(dyn)
+        {
+        }
+
+        ~DynamicSectionEntry() {}
+
+        Elf32_Sword tag() const { return m_dyn.d_tag; }
+        Elf32_Addr ptr() const { return m_dyn.d_un.d_ptr; }
+        Elf32_Word val() const { return m_dyn.d_un.d_val; }
+
+    private:
+        const ELFImage& m_image;
+        const Elf32_Dyn& m_dyn;
+    };
+
     unsigned symbol_count() const;
+    unsigned dynamic_symbol_count() const;
     unsigned section_count() const;
     unsigned program_header_count() const;
 
     const Symbol symbol(unsigned) const;
+    const DynamicSymbol dynamic_symbol(unsigned) const;
     const Section section(unsigned) const;
     const ProgramHeader program_header(unsigned const) const;
+    const DynamicSection dynamic_section() const;
+    const DynamicRelocationSection dynamic_relocation_section() const;
 
     template<typename F>
     void for_each_section(F) const;
@@ -152,6 +253,8 @@ public:
     template<typename F>
     void for_each_symbol(F) const;
     template<typename F>
+    void for_each_dynamic_symbol(F) const;
+    template<typename F>
     void for_each_program_header(F) const;
 
     // NOTE: Returns section(0) if section with name is not found.
@@ -160,6 +263,7 @@ public:
 
     bool is_executable() const { return header().e_type == ET_EXEC; }
     bool is_relocatable() const { return header().e_type == ET_REL; }
+    bool is_dynamic() const { return header().e_type == ET_DYN; }
 
     VirtualAddress entry() const { return VirtualAddress(header().e_entry); }
 
@@ -172,12 +276,17 @@ private:
     const char* table_string(unsigned offset) const;
     const char* section_header_table_string(unsigned offset) const;
     const char* section_index_to_string(unsigned index) const;
+    const char* dynamic_table_string(unsigned offset) const;
 
     const u8* m_buffer { nullptr };
     HashMap<String, unsigned> m_sections;
     bool m_valid { false };
     unsigned m_symbol_table_section_index { 0 };
     unsigned m_string_table_section_index { 0 };
+    unsigned m_dynamic_symbol_table_section_index { 0 }; // .dynsym
+    unsigned m_dynamic_string_table_section_index { 0 }; // .dynstr
+    unsigned m_dynamic_section_index { 0 };              // .dynamic
+    unsigned m_dynamic_relocation_section_index { 0 };   // .rel.dyn
 };
 
 template<typename F>
@@ -208,6 +317,15 @@ inline void ELFImage::RelocationSection::for_each_relocation(F func) const
     }
 }
 
+template<typename F>
+inline void ELFImage::DynamicRelocationSection::for_each_relocation(F func) const
+{
+    for (unsigned i = 0; i < relocation_count(); ++i) {
+        if (func(relocation(i)) == IterationDecision::Break)
+            break;
+    }
+}
+
 template<typename F>
 inline void ELFImage::for_each_symbol(F func) const
 {
@@ -217,9 +335,31 @@ inline void ELFImage::for_each_symbol(F func) const
     }
 }
 
+template<typename F>
+inline void ELFImage::for_each_dynamic_symbol(F func) const
+{
+    for (unsigned i = 0; i < dynamic_symbol_count(); ++i) {
+        if (func(symbol(i)) == IterationDecision::Break)
+            break;
+    }
+}
+
 template<typename F>
 inline void ELFImage::for_each_program_header(F func) const
 {
     for (unsigned i = 0; i < program_header_count(); ++i)
         func(program_header(i));
 }
+
+template<typename F>
+inline void ELFImage::DynamicSection::for_each_dynamic_entry(F func) const
+{
+    auto* dyns = reinterpret_cast<const Elf32_Dyn*>(m_image.raw_data(offset()));
+    for (unsigned i = 0;; ++i) {
+        auto&& dyn = DynamicSectionEntry(m_image, dyns[i]);
+        if (dyn.tag() == DT_NULL)
+            break;
+        if (func(dyn) == IterationDecision::Break)
+            break;
+    }
+}

+ 12 - 2
Libraries/LibELF/exec_elf.h

@@ -775,7 +775,17 @@ struct elf_args {
 
 #define ELF_TARG_VER 1 /* The ver for which this code is intended */
 
-#define R_386_32 1
-#define R_386_PC32 2
+/* Relocation types */
+#define R_386_NONE 0
+#define R_386_32 1       /* Symbol + Addend */
+#define R_386_PC32 2     /* Symbol + Addend - Section offset */
+#define R_386_GOT32 3    /* Used by build-time linker to create GOT entry */
+#define R_386_PLT32 4    /* Used by build-time linker to create PLT entry */
+#define R_386_COPY 5     /* https://docs.oracle.com/cd/E23824_01/html/819-0690/chapter4-10454.html#chapter4-84604 */
+#define R_386_GLOB_DAT 6 /* Relation b/w GOT entry and symbol */
+#define R_386_JMP_SLOT 7 /* Fixed up by dynamic loader */
+#define R_386_RELATIVE 8 /* Base address + Addned */
+#define R_386_TLS_TPOFF 14 /* Negative offset into the static TLS storage */
+
 
 #endif /* _SYS_EXEC_ELF_H_ */