ladybird/Userland/Libraries/LibJS/Heap/Heap.cpp

603 lines
22 KiB
C++
Raw Normal View History

/*
* Copyright (c) 2020-2022, Andreas Kling <kling@serenityos.org>
* Copyright (c) 2023, Aliaksandr Kalenik <kalenik.aliaksandr@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Badge.h>
#include <AK/Debug.h>
#include <AK/HashTable.h>
#include <AK/JsonArray.h>
#include <AK/JsonObject.h>
#include <AK/StackInfo.h>
#include <AK/TemporaryChange.h>
#include <LibCore/ElapsedTimer.h>
#include <LibJS/Bytecode/Interpreter.h>
#include <LibJS/Heap/CellAllocator.h>
#include <LibJS/Heap/Handle.h>
#include <LibJS/Heap/Heap.h>
#include <LibJS/Heap/HeapBlock.h>
#include <LibJS/Runtime/Object.h>
#include <LibJS/Runtime/WeakContainer.h>
#include <LibJS/SafeFunction.h>
#include <setjmp.h>
#ifdef AK_OS_SERENITY
# include <serenity.h>
#endif
#ifdef HAS_ADDRESS_SANITIZER
# include <sanitizer/asan_interface.h>
#endif
namespace JS {
#ifdef AK_OS_SERENITY
static int gc_perf_string_id;
#endif
// NOTE: We keep a per-thread list of custom ranges. This hinges on the assumption that there is one JS VM per thread.
static __thread HashMap<FlatPtr*, size_t>* s_custom_ranges_for_conservative_scan = nullptr;
static __thread HashMap<FlatPtr*, SourceLocation*>* s_safe_function_locations = nullptr;
Heap::Heap(VM& vm)
: HeapBase(vm)
{
#ifdef AK_OS_SERENITY
auto gc_signpost_string = "Garbage collection"sv;
gc_perf_string_id = perf_register_string(gc_signpost_string.characters_without_null_termination(), gc_signpost_string.length());
#endif
if constexpr (HeapBlock::min_possible_cell_size <= 16) {
m_allocators.append(make<CellAllocator>(16));
}
static_assert(HeapBlock::min_possible_cell_size <= 24, "Heap Cell tracking uses too much data!");
m_allocators.append(make<CellAllocator>(32));
m_allocators.append(make<CellAllocator>(64));
m_allocators.append(make<CellAllocator>(96));
m_allocators.append(make<CellAllocator>(128));
m_allocators.append(make<CellAllocator>(256));
m_allocators.append(make<CellAllocator>(512));
m_allocators.append(make<CellAllocator>(1024));
m_allocators.append(make<CellAllocator>(3072));
}
Heap::~Heap()
{
vm().string_cache().clear();
vm().byte_string_cache().clear();
collect_garbage(CollectionType::CollectEverything);
}
void Heap::will_allocate(size_t size)
{
if (should_collect_on_every_allocation()) {
m_allocated_bytes_since_last_gc = 0;
collect_garbage();
} else if (m_allocated_bytes_since_last_gc + size > m_gc_bytes_threshold) {
m_allocated_bytes_since_last_gc = 0;
collect_garbage();
}
m_allocated_bytes_since_last_gc += size;
}
static void add_possible_value(HashMap<FlatPtr, HeapRoot>& possible_pointers, FlatPtr data, HeapRoot origin, FlatPtr min_block_address, FlatPtr max_block_address)
{
if constexpr (sizeof(FlatPtr*) == sizeof(Value)) {
// Because Value stores pointers in non-canonical form we have to check if the top bytes
// match any pointer-backed tag, in that case we have to extract the pointer to its
// canonical form and add that as a possible pointer.
FlatPtr possible_pointer;
if ((data & SHIFTED_IS_CELL_PATTERN) == SHIFTED_IS_CELL_PATTERN)
possible_pointer = Value::extract_pointer_bits(data);
else
possible_pointer = data;
if (possible_pointer < min_block_address || possible_pointer > max_block_address)
return;
possible_pointers.set(possible_pointer, move(origin));
} else {
static_assert((sizeof(Value) % sizeof(FlatPtr*)) == 0);
if (data < min_block_address || data > max_block_address)
return;
// In the 32-bit case we will look at the top and bottom part of Value separately we just
// add both the upper and lower bytes as possible pointers.
possible_pointers.set(data, move(origin));
}
}
void Heap::find_min_and_max_block_addresses(FlatPtr& min_address, FlatPtr& max_address)
{
min_address = explode_byte(0xff);
max_address = 0;
for_each_block([&](auto& block) {
min_address = min(min_address, reinterpret_cast<FlatPtr>(&block));
max_address = max(max_address, reinterpret_cast<FlatPtr>(&block) + HeapBlockBase::block_size);
return IterationDecision::Continue;
});
}
template<typename Callback>
static void for_each_cell_among_possible_pointers(HashTable<HeapBlock*> const& all_live_heap_blocks, HashMap<FlatPtr, HeapRoot>& possible_pointers, Callback callback)
{
for (auto possible_pointer : possible_pointers.keys()) {
if (!possible_pointer)
continue;
auto* possible_heap_block = HeapBlock::from_cell(reinterpret_cast<Cell const*>(possible_pointer));
if (!all_live_heap_blocks.contains(possible_heap_block))
continue;
if (auto* cell = possible_heap_block->cell_from_possible_pointer(possible_pointer)) {
callback(cell, possible_pointer);
}
}
}
class GraphConstructorVisitor final : public Cell::Visitor {
public:
explicit GraphConstructorVisitor(Heap& heap, HashMap<Cell*, HeapRoot> const& roots)
: m_heap(heap)
{
m_heap.find_min_and_max_block_addresses(m_min_block_address, m_max_block_address);
m_heap.for_each_block([&](auto& block) {
m_all_live_heap_blocks.set(&block);
return IterationDecision::Continue;
});
for (auto* root : roots.keys()) {
visit(root);
auto& graph_node = m_graph.ensure(reinterpret_cast<FlatPtr>(root));
graph_node.class_name = root->class_name();
graph_node.root_origin = *roots.get(root);
}
}
virtual void visit_impl(Cell& cell) override
{
if (m_node_being_visited)
m_node_being_visited->edges.set(reinterpret_cast<FlatPtr>(&cell));
if (m_graph.get(reinterpret_cast<FlatPtr>(&cell)).has_value())
return;
m_work_queue.append(cell);
}
virtual void visit_possible_values(ReadonlyBytes bytes) override
{
HashMap<FlatPtr, HeapRoot> possible_pointers;
auto* raw_pointer_sized_values = reinterpret_cast<FlatPtr const*>(bytes.data());
for (size_t i = 0; i < (bytes.size() / sizeof(FlatPtr)); ++i)
add_possible_value(possible_pointers, raw_pointer_sized_values[i], HeapRoot { .type = HeapRoot::Type::HeapFunctionCapturedPointer }, m_min_block_address, m_max_block_address);
for_each_cell_among_possible_pointers(m_all_live_heap_blocks, possible_pointers, [&](Cell* cell, FlatPtr) {
if (m_node_being_visited)
m_node_being_visited->edges.set(reinterpret_cast<FlatPtr>(&cell));
if (m_graph.get(reinterpret_cast<FlatPtr>(&cell)).has_value())
return;
m_work_queue.append(*cell);
});
}
void visit_all_cells()
{
while (!m_work_queue.is_empty()) {
auto ptr = reinterpret_cast<FlatPtr>(&m_work_queue.last());
m_node_being_visited = &m_graph.ensure(ptr);
m_node_being_visited->class_name = m_work_queue.last().class_name();
m_work_queue.take_last().visit_edges(*this);
m_node_being_visited = nullptr;
}
}
AK::JsonObject dump()
{
auto graph = AK::JsonObject();
for (auto& it : m_graph) {
AK::JsonArray edges;
for (auto const& value : it.value.edges) {
edges.must_append(ByteString::formatted("{}", value));
}
auto node = AK::JsonObject();
if (it.value.root_origin.has_value()) {
auto type = it.value.root_origin->type;
auto location = it.value.root_origin->location;
switch (type) {
case HeapRoot::Type::Handle:
node.set("root"sv, ByteString::formatted("Handle {} {}:{}", location->function_name(), location->filename(), location->line_number()));
break;
case HeapRoot::Type::MarkedVector:
node.set("root"sv, "MarkedVector");
break;
case HeapRoot::Type::RegisterPointer:
node.set("root"sv, "RegisterPointer");
break;
case HeapRoot::Type::StackPointer:
node.set("root"sv, "StackPointer");
break;
case HeapRoot::Type::VM:
node.set("root"sv, "VM");
break;
case HeapRoot::Type::SafeFunction:
node.set("root"sv, ByteString::formatted("SafeFunction {} {}:{}", location->function_name(), location->filename(), location->line_number()));
break;
default:
VERIFY_NOT_REACHED();
}
}
node.set("class_name"sv, it.value.class_name);
node.set("edges"sv, edges);
graph.set(ByteString::number(it.key), node);
}
return graph;
}
private:
struct GraphNode {
Optional<HeapRoot> root_origin;
StringView class_name;
HashTable<FlatPtr> edges {};
};
GraphNode* m_node_being_visited { nullptr };
Vector<Cell&> m_work_queue;
HashMap<FlatPtr, GraphNode> m_graph;
Heap& m_heap;
HashTable<HeapBlock*> m_all_live_heap_blocks;
FlatPtr m_min_block_address;
FlatPtr m_max_block_address;
};
AK::JsonObject Heap::dump_graph()
{
HashMap<Cell*, HeapRoot> roots;
gather_roots(roots);
GraphConstructorVisitor visitor(*this, roots);
vm().bytecode_interpreter().visit_edges(visitor);
visitor.visit_all_cells();
return visitor.dump();
}
void Heap::collect_garbage(CollectionType collection_type, bool print_report)
{
VERIFY(!m_collecting_garbage);
TemporaryChange change(m_collecting_garbage, true);
#ifdef AK_OS_SERENITY
static size_t global_gc_counter = 0;
perf_event(PERF_EVENT_SIGNPOST, gc_perf_string_id, global_gc_counter++);
#endif
Core::ElapsedTimer collection_measurement_timer;
if (print_report)
collection_measurement_timer.start();
if (collection_type == CollectionType::CollectGarbage) {
if (m_gc_deferrals) {
m_should_gc_when_deferral_ends = true;
return;
}
HashMap<Cell*, HeapRoot> roots;
gather_roots(roots);
mark_live_cells(roots);
}
finalize_unmarked_cells();
sweep_dead_cells(print_report, collection_measurement_timer);
}
void Heap::gather_roots(HashMap<Cell*, HeapRoot>& roots)
{
vm().gather_roots(roots);
gather_conservative_roots(roots);
for (auto& handle : m_handles)
roots.set(handle.cell(), HeapRoot { .type = HeapRoot::Type::Handle, .location = &handle.source_location() });
LibJS: Let MarkedVector<T> inherit from Vector and handle Cell* + Value Note: MarkedVector is still relatively new and has zero users right now, so these changes don't affect any code other than the class itself. Reasons for this are the rather limited API: - Despite the name and unlike MarkedValueList, MarkedVector isn't actually a Vector, it *wraps* a Vector. This means that plenty of convenient APIs are unavailable and have to be exported on the class separately and forwarded to the internal Vector, or need to go through the exposed Span - both not great options. - Exposing append(Cell*) and prepend(Cell*) on the base class means that it was possible to append any Cell type, not just T! All the strong typing guarantees are basically gone, and MarkedVector doesn't do much more than casting Cells to the appropriate type through the exposed Span. All of this combined means that MarkedVector - in its current form - doesn't provide much value over MarkedValueList, and that we have to maintain two separate, yet almost identical classes. Let's fix this! The updated MarkedVector steals various concepts from the existing MarkedValueList, especially the ability to copy. On the other hand, it remains generic enough to handle both Cell* and Value for T, making MarkedValueList effectively redundant :^) Additionally, by inheriting from Vector we get all the current and future APIs without having to select and expose them separately. MarkedVectorBase remains and takes care of communicating creation and destruction of the class to the heap. Visiting the contained values is handled via a pure virtual method gather_roots(), which is being called by the Heap's function of the same name; much like the VM has one. From there, values are added to the roots HashTable if they are cells for T = Value, and unconditionally for any other T. As a small additional improvement the template now also takes an inline_capacity parameter, defaulting to 32, and forwards it to the Vector template; allowing for possible future optimizations of current uses of MarkedValueList, which hard-codes it to 32.
2022-02-09 09:40:49 +00:00
for (auto& vector : m_marked_vectors)
vector.gather_roots(roots);
if constexpr (HEAP_DEBUG) {
dbgln("gather_roots:");
for (auto* root : roots.keys())
dbgln(" + {}", root);
}
}
#ifdef HAS_ADDRESS_SANITIZER
__attribute__((no_sanitize("address"))) void Heap::gather_asan_fake_stack_roots(HashMap<FlatPtr, HeapRoot>& possible_pointers, FlatPtr addr, FlatPtr min_block_address, FlatPtr max_block_address)
{
void* begin = nullptr;
void* end = nullptr;
void* real_stack = __asan_addr_is_in_fake_stack(__asan_get_current_fake_stack(), reinterpret_cast<void*>(addr), &begin, &end);
if (real_stack != nullptr) {
for (auto* real_stack_addr = reinterpret_cast<void const* const*>(begin); real_stack_addr < end; ++real_stack_addr) {
void const* real_address = *real_stack_addr;
if (real_address == nullptr)
continue;
add_possible_value(possible_pointers, reinterpret_cast<FlatPtr>(real_address), HeapRoot { .type = HeapRoot::Type::StackPointer }, min_block_address, max_block_address);
}
}
}
#else
void Heap::gather_asan_fake_stack_roots(HashMap<FlatPtr, HeapRoot>&, FlatPtr, FlatPtr, FlatPtr)
{
}
#endif
__attribute__((no_sanitize("address"))) void Heap::gather_conservative_roots(HashMap<Cell*, HeapRoot>& roots)
{
FlatPtr dummy;
2021-04-07 13:12:32 +00:00
dbgln_if(HEAP_DEBUG, "gather_conservative_roots:");
jmp_buf buf;
setjmp(buf);
HashMap<FlatPtr, HeapRoot> possible_pointers;
auto* raw_jmp_buf = reinterpret_cast<FlatPtr const*>(buf);
FlatPtr min_block_address, max_block_address;
find_min_and_max_block_addresses(min_block_address, max_block_address);
for (size_t i = 0; i < ((size_t)sizeof(buf)) / sizeof(FlatPtr); ++i)
add_possible_value(possible_pointers, raw_jmp_buf[i], HeapRoot { .type = HeapRoot::Type::RegisterPointer }, min_block_address, max_block_address);
auto stack_reference = bit_cast<FlatPtr>(&dummy);
auto& stack_info = m_vm.stack_info();
for (FlatPtr stack_address = stack_reference; stack_address < stack_info.top(); stack_address += sizeof(FlatPtr)) {
auto data = *reinterpret_cast<FlatPtr*>(stack_address);
add_possible_value(possible_pointers, data, HeapRoot { .type = HeapRoot::Type::StackPointer }, min_block_address, max_block_address);
gather_asan_fake_stack_roots(possible_pointers, data, min_block_address, max_block_address);
}
// NOTE: If we have any custom ranges registered, scan those as well.
// This is where JS::SafeFunction closures get marked.
if (s_custom_ranges_for_conservative_scan) {
for (auto& custom_range : *s_custom_ranges_for_conservative_scan) {
for (size_t i = 0; i < (custom_range.value / sizeof(FlatPtr)); ++i) {
auto safe_function_location = s_safe_function_locations->get(custom_range.key);
add_possible_value(possible_pointers, custom_range.key[i], HeapRoot { .type = HeapRoot::Type::SafeFunction, .location = *safe_function_location }, min_block_address, max_block_address);
}
}
}
HashTable<HeapBlock*> all_live_heap_blocks;
for_each_block([&](auto& block) {
all_live_heap_blocks.set(&block);
return IterationDecision::Continue;
});
for_each_cell_among_possible_pointers(all_live_heap_blocks, possible_pointers, [&](Cell* cell, FlatPtr possible_pointer) {
if (cell->state() == Cell::State::Live) {
dbgln_if(HEAP_DEBUG, " ?-> {}", (void const*)cell);
roots.set(cell, *possible_pointers.get(possible_pointer));
} else {
dbgln_if(HEAP_DEBUG, " #-> {}", (void const*)cell);
}
});
}
class MarkingVisitor final : public Cell::Visitor {
public:
explicit MarkingVisitor(Heap& heap, HashMap<Cell*, HeapRoot> const& roots)
: m_heap(heap)
{
m_heap.find_min_and_max_block_addresses(m_min_block_address, m_max_block_address);
m_heap.for_each_block([&](auto& block) {
m_all_live_heap_blocks.set(&block);
return IterationDecision::Continue;
});
for (auto* root : roots.keys()) {
visit(root);
}
}
virtual void visit_impl(Cell& cell) override
{
if (cell.is_marked())
return;
2021-05-25 17:44:32 +00:00
dbgln_if(HEAP_DEBUG, " ! {}", &cell);
cell.set_marked(true);
m_work_queue.append(cell);
}
virtual void visit_possible_values(ReadonlyBytes bytes) override
{
HashMap<FlatPtr, HeapRoot> possible_pointers;
auto* raw_pointer_sized_values = reinterpret_cast<FlatPtr const*>(bytes.data());
for (size_t i = 0; i < (bytes.size() / sizeof(FlatPtr)); ++i)
add_possible_value(possible_pointers, raw_pointer_sized_values[i], HeapRoot { .type = HeapRoot::Type::HeapFunctionCapturedPointer }, m_min_block_address, m_max_block_address);
for_each_cell_among_possible_pointers(m_all_live_heap_blocks, possible_pointers, [&](Cell* cell, FlatPtr) {
if (cell->is_marked())
return;
if (cell->state() != Cell::State::Live)
return;
cell->set_marked(true);
m_work_queue.append(*cell);
});
}
void mark_all_live_cells()
{
while (!m_work_queue.is_empty()) {
m_work_queue.take_last().visit_edges(*this);
}
}
private:
Heap& m_heap;
Vector<Cell&> m_work_queue;
HashTable<HeapBlock*> m_all_live_heap_blocks;
FlatPtr m_min_block_address;
FlatPtr m_max_block_address;
};
void Heap::mark_live_cells(HashMap<Cell*, HeapRoot> const& roots)
{
2021-04-07 13:12:32 +00:00
dbgln_if(HEAP_DEBUG, "mark_live_cells:");
MarkingVisitor visitor(*this, roots);
vm().bytecode_interpreter().visit_edges(visitor);
visitor.mark_all_live_cells();
for (auto& inverse_root : m_uprooted_cells)
inverse_root->set_marked(false);
m_uprooted_cells.clear();
}
bool Heap::cell_must_survive_garbage_collection(Cell const& cell)
{
if (!cell.overrides_must_survive_garbage_collection({}))
return false;
return cell.must_survive_garbage_collection();
}
void Heap::finalize_unmarked_cells()
{
for_each_block([&](auto& block) {
block.template for_each_cell_in_state<Cell::State::Live>([](Cell* cell) {
if (!cell->is_marked() && !cell_must_survive_garbage_collection(*cell))
cell->finalize();
});
return IterationDecision::Continue;
});
}
2022-04-01 17:58:27 +00:00
void Heap::sweep_dead_cells(bool print_report, Core::ElapsedTimer const& measurement_timer)
{
2021-04-07 13:12:32 +00:00
dbgln_if(HEAP_DEBUG, "sweep_dead_cells:");
Vector<HeapBlock*, 32> empty_blocks;
Vector<HeapBlock*, 32> full_blocks_that_became_usable;
size_t collected_cells = 0;
size_t live_cells = 0;
size_t collected_cell_bytes = 0;
size_t live_cell_bytes = 0;
for_each_block([&](auto& block) {
bool block_has_live_cells = false;
bool block_was_full = block.is_full();
block.template for_each_cell_in_state<Cell::State::Live>([&](Cell* cell) {
if (!cell->is_marked() && !cell_must_survive_garbage_collection(*cell)) {
dbgln_if(HEAP_DEBUG, " ~ {}", cell);
block.deallocate(cell);
++collected_cells;
collected_cell_bytes += block.cell_size();
} else {
cell->set_marked(false);
block_has_live_cells = true;
++live_cells;
live_cell_bytes += block.cell_size();
}
});
if (!block_has_live_cells)
empty_blocks.append(&block);
else if (block_was_full != block.is_full())
full_blocks_that_became_usable.append(&block);
return IterationDecision::Continue;
});
for (auto& weak_container : m_weak_containers)
weak_container.remove_dead_cells({});
for (auto* block : empty_blocks) {
2021-04-07 13:12:32 +00:00
dbgln_if(HEAP_DEBUG, " - HeapBlock empty @ {}: cell_size={}", block, block->cell_size());
allocator_for_size(block->cell_size()).block_did_become_empty({}, *block);
}
for (auto* block : full_blocks_that_became_usable) {
2021-04-07 13:12:32 +00:00
dbgln_if(HEAP_DEBUG, " - HeapBlock usable again @ {}: cell_size={}", block, block->cell_size());
allocator_for_size(block->cell_size()).block_did_become_usable({}, *block);
}
if constexpr (HEAP_DEBUG) {
for_each_block([&](auto& block) {
dbgln(" > Live HeapBlock @ {}: cell_size={}", &block, block.cell_size());
return IterationDecision::Continue;
});
}
m_gc_bytes_threshold = live_cell_bytes > GC_MIN_BYTES_THRESHOLD ? live_cell_bytes : GC_MIN_BYTES_THRESHOLD;
if (print_report) {
Duration const time_spent = measurement_timer.elapsed_time();
size_t live_block_count = 0;
for_each_block([&](auto&) {
++live_block_count;
return IterationDecision::Continue;
});
dbgln("Garbage collection report");
dbgln("=============================================");
dbgln(" Time spent: {} ms", time_spent.to_milliseconds());
dbgln(" Live cells: {} ({} bytes)", live_cells, live_cell_bytes);
dbgln("Collected cells: {} ({} bytes)", collected_cells, collected_cell_bytes);
dbgln(" Live blocks: {} ({} bytes)", live_block_count, live_block_count * HeapBlock::block_size);
dbgln(" Freed blocks: {} ({} bytes)", empty_blocks.size(), empty_blocks.size() * HeapBlock::block_size);
dbgln("=============================================");
}
}
void Heap::defer_gc()
{
++m_gc_deferrals;
}
void Heap::undefer_gc()
{
VERIFY(m_gc_deferrals > 0);
--m_gc_deferrals;
if (!m_gc_deferrals) {
if (m_should_gc_when_deferral_ends)
collect_garbage();
m_should_gc_when_deferral_ends = false;
}
}
void Heap::uproot_cell(Cell* cell)
{
m_uprooted_cells.append(cell);
}
void register_safe_function_closure(void* base, size_t size, SourceLocation* location)
{
if (!s_custom_ranges_for_conservative_scan) {
// FIXME: This per-thread HashMap is currently leaked on thread exit.
s_custom_ranges_for_conservative_scan = new HashMap<FlatPtr*, size_t>;
}
if (!s_safe_function_locations) {
s_safe_function_locations = new HashMap<FlatPtr*, SourceLocation*>;
}
auto result = s_custom_ranges_for_conservative_scan->set(reinterpret_cast<FlatPtr*>(base), size);
VERIFY(result == AK::HashSetResult::InsertedNewEntry);
result = s_safe_function_locations->set(reinterpret_cast<FlatPtr*>(base), location);
VERIFY(result == AK::HashSetResult::InsertedNewEntry);
}
void unregister_safe_function_closure(void* base, size_t, SourceLocation*)
{
VERIFY(s_custom_ranges_for_conservative_scan);
bool did_remove_range = s_custom_ranges_for_conservative_scan->remove(reinterpret_cast<FlatPtr*>(base));
VERIFY(did_remove_range);
bool did_remove_location = s_safe_function_locations->remove(reinterpret_cast<FlatPtr*>(base));
VERIFY(did_remove_location);
}
}