LibELF+LibC: Add support for Variant I of the TLS data structures

We currently only supported Variant II which is used by x86-64.
Variant I is used by both AArch64 (when using the traditional
non-TLSDESC model) and RISC-V, although with small differences.

The TLS layout for Variant I is essentially flipped. The static TLS
blocks are after the thread pointer for Variant I, while on Variant II
they are before it.

Some code using ELF TLS already worked on AArch64 and RISC-V even though
we only support Variant II. This is because only the local-exec model
directly uses TLS offsets, other models use relocations or
__tls_get_addr().
This commit is contained in:
Sönke Holz 2024-04-17 21:38:49 +02:00 committed by Andrew Kaster
parent 3af793abfd
commit 9437b29b43
Notes: sideshowbarker 2024-07-17 02:39:10 +09:00
4 changed files with 67 additions and 30 deletions

View file

@ -5,6 +5,7 @@
*/
#include <AK/Types.h>
#include <LibELF/Arch/tls.h>
#include <sys/internals.h>
extern "C" {
@ -21,6 +22,6 @@ extern "C" {
// changed if we support dynamically allocated TLS blocks.
void* __tls_get_addr(__tls_index* index)
{
return reinterpret_cast<void*>(reinterpret_cast<FlatPtr>(__builtin_thread_pointer()) + index->ti_module + index->ti_offset);
return reinterpret_cast<void*>(reinterpret_cast<FlatPtr>(__builtin_thread_pointer()) + index->ti_module + index->ti_offset + ELF::TLS_DTV_OFFSET);
}
}

View file

@ -60,4 +60,6 @@
.type __tlsdesc_static,@function
__tlsdesc_static:
ldr x0, [x0, #8]
// The first static TLS block is 16 bytes after the thread pointer on AArch64.
add x0, x0, 16
ret

View file

@ -58,6 +58,8 @@ struct TLSData {
void* tls_template { nullptr };
size_t tls_template_size { 0 };
size_t alignment { 0 };
size_t static_tls_region_size { 0 };
size_t static_tls_region_alignment { 0 };
};
static TLSData s_tls_data;
@ -131,10 +133,18 @@ static Result<NonnullRefPtr<DynamicLoader>, DlErrorMessage> map_library(ByteStri
static size_t s_current_tls_offset = 0;
s_current_tls_offset -= loader->tls_size_of_current_object();
if (loader->tls_alignment_of_current_object())
s_current_tls_offset = align_down_to(s_current_tls_offset, loader->tls_alignment_of_current_object());
loader->set_tls_offset(s_current_tls_offset);
if constexpr (TLS_VARIANT == 1) {
if (loader->tls_alignment_of_current_object() != 0)
s_current_tls_offset = align_up_to(s_current_tls_offset, loader->tls_alignment_of_current_object());
loader->set_tls_offset(s_current_tls_offset);
s_current_tls_offset += loader->tls_size_of_current_object();
} else if constexpr (TLS_VARIANT == 2) {
s_current_tls_offset -= loader->tls_size_of_current_object();
if (loader->tls_alignment_of_current_object() != 0)
s_current_tls_offset = align_down_to(s_current_tls_offset, loader->tls_alignment_of_current_object());
loader->set_tls_offset(s_current_tls_offset);
}
// This actually maps the library at the intended and final place.
auto main_library_object = loader->map();
@ -237,35 +247,37 @@ static Result<void, DlErrorMessage> map_dependencies(ByteString const& path)
return {};
}
struct ThreadSpecificData {
ThreadSpecificData* self;
};
static ErrorOr<FlatPtr> __create_new_tls_region()
{
auto static_tls_region_alignment = max(s_tls_data.alignment, alignof(ThreadSpecificData));
auto static_tls_region_size = align_up_to(s_tls_data.tls_template_size, static_tls_region_alignment) + sizeof(ThreadSpecificData);
void* thread_specific_ptr = serenity_mmap(nullptr, static_tls_region_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0, static_tls_region_alignment, "Static TLS Data");
if (thread_specific_ptr == MAP_FAILED)
void* static_tls_region = serenity_mmap(nullptr, s_tls_data.static_tls_region_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0, s_tls_data.static_tls_region_alignment, "Static TLS Data");
if (static_tls_region == MAP_FAILED)
return Error::from_syscall("mmap"sv, -errno);
auto* thread_specific_data = bit_cast<ThreadSpecificData*>(bit_cast<FlatPtr>(thread_specific_ptr) + (align_up_to(s_tls_data.tls_template_size, static_tls_region_alignment)));
thread_specific_data->self = thread_specific_data;
auto thread_pointer = calculate_tp_value_from_static_tls_region_address(bit_cast<FlatPtr>(static_tls_region), s_tls_data.tls_template_size, s_tls_data.static_tls_region_alignment);
VERIFY(thread_pointer % s_tls_data.static_tls_region_alignment == 0);
auto* thread_local_storage = bit_cast<u8*>(bit_cast<FlatPtr>(thread_specific_data) - align_up_to(s_tls_data.tls_template_size, s_tls_data.alignment));
auto* tcb = get_tcb_pointer_from_thread_pointer(thread_pointer);
// FIXME: Add support for dynamically-allocated TLS blocks.
tcb->dynamic_thread_vector = nullptr;
#if ARCH(X86_64)
tcb->thread_pointer = bit_cast<void*>(thread_pointer);
#endif
auto* static_tls_blocks = get_pointer_to_first_static_tls_block_from_thread_pointer(thread_pointer, s_tls_data.tls_template_size, s_tls_data.static_tls_region_alignment);
if (s_tls_data.tls_template_size != 0)
memcpy(thread_local_storage, s_tls_data.tls_template, s_tls_data.tls_template_size);
memcpy(static_tls_blocks, s_tls_data.tls_template, s_tls_data.tls_template_size);
return bit_cast<FlatPtr>(thread_specific_data);
return thread_pointer;
}
static ErrorOr<void> __free_tls_region(FlatPtr thread_pointer)
{
auto static_tls_region_alignment = max(s_tls_data.alignment, alignof(ThreadSpecificData));
auto static_tls_region_size = align_up_to(s_tls_data.tls_template_size, static_tls_region_alignment) + sizeof(ThreadSpecificData);
auto* static_tls_region = get_pointer_to_static_tls_region_from_thread_pointer(thread_pointer, s_tls_data.tls_template_size, s_tls_data.static_tls_region_alignment);
if (munmap(bit_cast<void*>(bit_cast<FlatPtr>(thread_pointer) - align_up_to(s_tls_data.tls_template_size, s_tls_data.alignment)), static_tls_region_size) != 0)
if (munmap(static_tls_region, s_tls_data.static_tls_region_size) != 0)
return Error::from_syscall("mmap"sv, -errno);
return {};
@ -273,6 +285,12 @@ static ErrorOr<void> __free_tls_region(FlatPtr thread_pointer)
static void allocate_tls()
{
// FIXME: Use the max p_align of all TLS segments.
// We currently pass s_tls_data.static_tls_region_alignment as the alignment to mmap,
// so we would have to manually insert padding, as mmap only accepts alignments that
// are multiples of PAGE_SIZE. Or instead use aligned_alloc/posix_memalign?
s_tls_data.alignment = PAGE_SIZE;
for (auto const& data : s_loaders) {
dbgln_if(DYNAMIC_LOAD_DEBUG, "{}: TLS Size: {}, TLS Alignment: {}", data.key, data.value->tls_size_of_current_object(), data.value->tls_alignment_of_current_object());
s_tls_data.total_tls_size += data.value->tls_size_of_current_object() + data.value->tls_alignment_of_current_object();
@ -282,7 +300,6 @@ static void allocate_tls()
return;
s_tls_data.tls_template_size = align_up_to(s_tls_data.total_tls_size, PAGE_SIZE);
s_tls_data.alignment = PAGE_SIZE;
s_tls_data.tls_template = mmap_with_name(nullptr, s_tls_data.tls_template_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0, "TLS Template");
if (s_tls_data.tls_template == MAP_FAILED) {
@ -290,6 +307,9 @@ static void allocate_tls()
VERIFY_NOT_REACHED();
}
s_tls_data.static_tls_region_alignment = max(s_tls_data.alignment, sizeof(ThreadControlBlock));
s_tls_data.static_tls_region_size = calculate_static_tls_region_size(s_tls_data.tls_template_size, s_tls_data.static_tls_region_alignment);
auto tls_template = Bytes(s_tls_data.tls_template, s_tls_data.tls_template_size);
// Initialize TLS data

View file

@ -12,6 +12,7 @@
#include <AK/QuickSort.h>
#include <AK/StringBuilder.h>
#include <LibELF/Arch/GenericDynamicRelocationType.h>
#include <LibELF/Arch/tls.h>
#include <LibELF/DynamicLinker.h>
#include <LibELF/DynamicLoader.h>
#include <LibELF/Hashes.h>
@ -654,10 +655,16 @@ DynamicLoader::RelocationResult DynamicLoader::do_direct_relocation(DynamicObjec
auto [dynamic_object_of_symbol, symbol_value] = maybe_resolution.value();
size_t addend = relocation.addend_used() ? relocation.addend() : *patch_ptr;
*patch_ptr = addend + dynamic_object_of_symbol.tls_offset().value() + symbol_value;
*patch_ptr = addend + dynamic_object_of_symbol.tls_offset().value() + symbol_value + TLS_TP_STATIC_TLS_BLOCK_OFFSET;
if constexpr (TLS_VARIANT == 1) {
// Until offset TLS_TP_STATIC_TLS_BLOCK_OFFSET there's the thread's ThreadControlBlock, we don't want to collide with it.
VERIFY(static_cast<ssize_t>(*patch_ptr) >= static_cast<ssize_t>(TLS_TP_STATIC_TLS_BLOCK_OFFSET));
} else if constexpr (TLS_VARIANT == 2) {
// At offset 0 there's the thread's ThreadControlBlock, we don't want to collide with it.
VERIFY(static_cast<ssize_t>(*patch_ptr) < 0);
}
// At offset 0 there's the thread's ThreadSpecificData structure, we don't want to collide with it.
VERIFY(static_cast<ssize_t>(*patch_ptr) < 0);
break;
}
case TLS_DTPMOD: {
@ -676,7 +683,7 @@ DynamicLoader::RelocationResult DynamicLoader::do_direct_relocation(DynamicObjec
break;
size_t addend = relocation.addend_used() ? relocation.addend() : *patch_ptr;
*patch_ptr = addend + maybe_resolution->value;
*patch_ptr = addend + maybe_resolution->value - TLS_DTV_OFFSET + TLS_TP_STATIC_TLS_BLOCK_OFFSET;
break;
}
#ifdef HAS_TLSDESC_SUPPORT
@ -765,14 +772,21 @@ void DynamicLoader::copy_initial_tls_data_into(Bytes buffer) const
// only included in the "size in memory" metric, and is expected to not be touched or read from, as
// it is not present in the image and zeroed out in-memory. We will still check that the buffer has
// space for both the initialized and the uninitialized data.
// Note: The m_tls_offset here is (of course) negative.
// TODO: Is the initialized data always in the beginning of the TLS segment, or should we walk the
// sections to figure that out?
size_t tls_start_in_buffer = buffer.size() + m_tls_offset;
VERIFY(program_header.size_in_image() <= program_header.size_in_memory());
VERIFY(program_header.size_in_memory() <= m_tls_size_of_current_object);
VERIFY(tls_start_in_buffer + program_header.size_in_memory() <= buffer.size());
memcpy(buffer.data() + tls_start_in_buffer, static_cast<u8 const*>(m_file_data) + program_header.offset(), program_header.size_in_image());
if constexpr (TLS_VARIANT == 1) {
size_t tls_start_in_buffer = m_tls_offset;
VERIFY(tls_start_in_buffer + program_header.size_in_memory() <= buffer.size());
memcpy(buffer.data() + tls_start_in_buffer, static_cast<u8 const*>(m_file_data) + program_header.offset(), program_header.size_in_image());
} else if constexpr (TLS_VARIANT == 2) {
size_t tls_start_in_buffer = buffer.size() + m_tls_offset;
VERIFY(tls_start_in_buffer + program_header.size_in_memory() <= buffer.size());
memcpy(buffer.data() + tls_start_in_buffer, static_cast<u8 const*>(m_file_data) + program_header.offset(), program_header.size_in_image());
}
return IterationDecision::Break;
});