Przeglądaj źródła

LibC+UE: Keep more unused chunked blocks around

Previously each malloc size class would keep around a limited number of
unused blocks which were marked with MADV_SET_VOLATILE which could then
be reinitialized when additional blocks were needed.

This changes malloc() so that it also keeps around a number of blocks
without marking them with MADV_SET_VOLATILE. I termed these "hot"
blocks whereas blocks which were marked as MADV_SET_VOLATILE are called
"cold" blocks because they're more expensive to reinitialize.

In the worst case this could increase memory usage per process by
1MB when a program requests a bunch of memory and frees all of it.

Also, in order to make more efficient use of these unused blocks
they're now shared between size classes.
Gunnar Beutner 4 lat temu
rodzic
commit
39f0739381

+ 3 - 0
Userland/DevTools/UserspaceEmulator/Emulator_syscalls.cpp

@@ -1121,6 +1121,9 @@ int Emulator::virt$emuctl(FlatPtr arg1, FlatPtr arg2, FlatPtr arg3)
     case 3:
     case 3:
         tracer->target_did_realloc({}, arg3, arg2);
         tracer->target_did_realloc({}, arg3, arg2);
         return 0;
         return 0;
+    case 4:
+        tracer->target_did_change_chunk_size({}, arg3, arg2);
+        return 0;
     default:
     default:
         return -EINVAL;
         return -EINVAL;
     }
     }

+ 33 - 17
Userland/DevTools/UserspaceEmulator/MallocTracer.cpp

@@ -36,6 +36,27 @@ inline void MallocTracer::for_each_mallocation(Callback callback) const
     });
     });
 }
 }
 
 
+void MallocTracer::update_metadata(MmapRegion& mmap_region, size_t chunk_size)
+{
+    mmap_region.set_malloc_metadata({},
+        adopt_own(*new MallocRegionMetadata {
+            .region = mmap_region,
+            .address = mmap_region.base(),
+            .chunk_size = chunk_size,
+            .mallocations = {},
+        }));
+    auto& malloc_data = *mmap_region.malloc_metadata();
+
+    bool is_chunked_block = malloc_data.chunk_size <= size_classes[num_size_classes - 1];
+    if (is_chunked_block)
+        malloc_data.mallocations.resize((ChunkedBlock::block_size - sizeof(ChunkedBlock)) / malloc_data.chunk_size);
+    else
+        malloc_data.mallocations.resize(1);
+
+    // Mark the containing mmap region as a malloc block!
+    mmap_region.set_malloc(true);
+}
+
 void MallocTracer::target_did_malloc(Badge<Emulator>, FlatPtr address, size_t size)
 void MallocTracer::target_did_malloc(Badge<Emulator>, FlatPtr address, size_t size)
 {
 {
     if (m_emulator.is_in_loader_code())
     if (m_emulator.is_in_loader_code())
@@ -59,29 +80,24 @@ void MallocTracer::target_did_malloc(Badge<Emulator>, FlatPtr address, size_t si
 
 
     if (!mmap_region.is_malloc_block()) {
     if (!mmap_region.is_malloc_block()) {
         auto chunk_size = mmap_region.read32(offsetof(CommonHeader, m_size)).value();
         auto chunk_size = mmap_region.read32(offsetof(CommonHeader, m_size)).value();
-        mmap_region.set_malloc_metadata({},
-            adopt_own(*new MallocRegionMetadata {
-                .region = mmap_region,
-                .address = mmap_region.base(),
-                .chunk_size = chunk_size,
-                .mallocations = {},
-            }));
-        auto& malloc_data = *mmap_region.malloc_metadata();
-
-        bool is_chunked_block = malloc_data.chunk_size <= size_classes[num_size_classes - 1];
-        if (is_chunked_block)
-            malloc_data.mallocations.resize((ChunkedBlock::block_size - sizeof(ChunkedBlock)) / malloc_data.chunk_size);
-        else
-            malloc_data.mallocations.resize(1);
-
-        // Mark the containing mmap region as a malloc block!
-        mmap_region.set_malloc(true);
+        update_metadata(mmap_region, chunk_size);
     }
     }
     auto* mallocation = mmap_region.malloc_metadata()->mallocation_for_address(address);
     auto* mallocation = mmap_region.malloc_metadata()->mallocation_for_address(address);
     VERIFY(mallocation);
     VERIFY(mallocation);
     *mallocation = { address, size, true, false, m_emulator.raw_backtrace(), Vector<FlatPtr>() };
     *mallocation = { address, size, true, false, m_emulator.raw_backtrace(), Vector<FlatPtr>() };
 }
 }
 
 
+void MallocTracer::target_did_change_chunk_size(Badge<Emulator>, FlatPtr block, size_t chunk_size)
+{
+    if (m_emulator.is_in_loader_code())
+        return;
+    auto* region = m_emulator.mmu().find_region({ 0x23, block });
+    VERIFY(region);
+    VERIFY(is<MmapRegion>(*region));
+    auto& mmap_region = static_cast<MmapRegion&>(*region);
+    update_metadata(mmap_region, chunk_size);
+}
+
 ALWAYS_INLINE Mallocation* MallocRegionMetadata::mallocation_for_address(FlatPtr address) const
 ALWAYS_INLINE Mallocation* MallocRegionMetadata::mallocation_for_address(FlatPtr address) const
 {
 {
     auto index = chunk_index_for_address(address);
     auto index = chunk_index_for_address(address);

+ 3 - 0
Userland/DevTools/UserspaceEmulator/MallocTracer.h

@@ -61,6 +61,7 @@ public:
     void target_did_malloc(Badge<Emulator>, FlatPtr address, size_t);
     void target_did_malloc(Badge<Emulator>, FlatPtr address, size_t);
     void target_did_free(Badge<Emulator>, FlatPtr address);
     void target_did_free(Badge<Emulator>, FlatPtr address);
     void target_did_realloc(Badge<Emulator>, FlatPtr address, size_t);
     void target_did_realloc(Badge<Emulator>, FlatPtr address, size_t);
+    void target_did_change_chunk_size(Badge<Emulator>, FlatPtr, size_t);
 
 
     void audit_read(const Region&, FlatPtr address, size_t);
     void audit_read(const Region&, FlatPtr address, size_t);
     void audit_write(const Region&, FlatPtr address, size_t);
     void audit_write(const Region&, FlatPtr address, size_t);
@@ -79,6 +80,8 @@ private:
     void dump_memory_graph();
     void dump_memory_graph();
     void populate_memory_graph();
     void populate_memory_graph();
 
 
+    void update_metadata(MmapRegion& mmap_region, size_t chunk_size);
+
     Emulator& m_emulator;
     Emulator& m_emulator;
 
 
     MemoryGraph m_memory_graph {};
     MemoryGraph m_memory_graph {};

+ 54 - 18
Userland/Libraries/LibC/malloc.cpp

@@ -33,7 +33,8 @@ static Threading::Lock& malloc_lock()
     return *reinterpret_cast<Threading::Lock*>(&lock_storage);
     return *reinterpret_cast<Threading::Lock*>(&lock_storage);
 }
 }
 
 
-constexpr size_t number_of_chunked_blocks_to_keep_around_per_size_class = 4;
+constexpr size_t number_of_hot_chunked_blocks_to_keep_around = 16;
+constexpr size_t number_of_cold_chunked_blocks_to_keep_around = 16;
 constexpr size_t number_of_big_blocks_to_keep_around_per_size_class = 8;
 constexpr size_t number_of_big_blocks_to_keep_around_per_size_class = 8;
 
 
 static bool s_log_malloc = false;
 static bool s_log_malloc = false;
@@ -60,6 +61,12 @@ ALWAYS_INLINE static void ue_notify_realloc(const void* ptr, size_t size)
         syscall(SC_emuctl, 3, size, (FlatPtr)ptr);
         syscall(SC_emuctl, 3, size, (FlatPtr)ptr);
 }
 }
 
 
+ALWAYS_INLINE static void ue_notify_chunk_size_changed(const void* block, size_t chunk_size)
+{
+    if (s_in_userspace_emulator)
+        syscall(SC_emuctl, 4, chunk_size, (FlatPtr)block);
+}
+
 struct MallocStats {
 struct MallocStats {
     size_t number_of_malloc_calls;
     size_t number_of_malloc_calls;
 
 
@@ -67,8 +74,9 @@ struct MallocStats {
     size_t number_of_big_allocator_purge_hits;
     size_t number_of_big_allocator_purge_hits;
     size_t number_of_big_allocs;
     size_t number_of_big_allocs;
 
 
-    size_t number_of_empty_block_hits;
-    size_t number_of_empty_block_purge_hits;
+    size_t number_of_hot_empty_block_hits;
+    size_t number_of_cold_empty_block_hits;
+    size_t number_of_cold_empty_block_purge_hits;
     size_t number_of_block_allocs;
     size_t number_of_block_allocs;
     size_t number_of_blocks_full;
     size_t number_of_blocks_full;
 
 
@@ -78,16 +86,20 @@ struct MallocStats {
     size_t number_of_big_allocator_frees;
     size_t number_of_big_allocator_frees;
 
 
     size_t number_of_freed_full_blocks;
     size_t number_of_freed_full_blocks;
-    size_t number_of_keeps;
+    size_t number_of_hot_keeps;
+    size_t number_of_cold_keeps;
     size_t number_of_frees;
     size_t number_of_frees;
 };
 };
 static MallocStats g_malloc_stats = {};
 static MallocStats g_malloc_stats = {};
 
 
+static size_t s_hot_empty_block_count { 0 };
+static ChunkedBlock* s_hot_empty_blocks[number_of_hot_chunked_blocks_to_keep_around] { nullptr };
+static size_t s_cold_empty_block_count { 0 };
+static ChunkedBlock* s_cold_empty_blocks[number_of_cold_chunked_blocks_to_keep_around] { nullptr };
+
 struct Allocator {
 struct Allocator {
     size_t size { 0 };
     size_t size { 0 };
     size_t block_count { 0 };
     size_t block_count { 0 };
-    size_t empty_block_count { 0 };
-    ChunkedBlock* empty_blocks[number_of_chunked_blocks_to_keep_around_per_size_class] { nullptr };
     InlineLinkedList<ChunkedBlock> usable_blocks;
     InlineLinkedList<ChunkedBlock> usable_blocks;
     InlineLinkedList<ChunkedBlock> full_blocks;
     InlineLinkedList<ChunkedBlock> full_blocks;
 };
 };
@@ -215,9 +227,22 @@ static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_ini
             break;
             break;
     }
     }
 
 
-    if (!block && allocator->empty_block_count) {
-        g_malloc_stats.number_of_empty_block_hits++;
-        block = allocator->empty_blocks[--allocator->empty_block_count];
+    if (!block && s_hot_empty_block_count) {
+        g_malloc_stats.number_of_hot_empty_block_hits++;
+        block = s_hot_empty_blocks[--s_hot_empty_block_count];
+        if (block->m_size != good_size) {
+            new (block) ChunkedBlock(good_size);
+            ue_notify_chunk_size_changed(block, good_size);
+            char buffer[64];
+            snprintf(buffer, sizeof(buffer), "malloc: ChunkedBlock(%zu)", good_size);
+            set_mmap_name(block, ChunkedBlock::block_size, buffer);
+        }
+        allocator->usable_blocks.append(block);
+    }
+
+    if (!block && s_cold_empty_block_count) {
+        g_malloc_stats.number_of_cold_empty_block_hits++;
+        block = s_cold_empty_blocks[--s_cold_empty_block_count];
         int rc = madvise(block, ChunkedBlock::block_size, MADV_SET_NONVOLATILE);
         int rc = madvise(block, ChunkedBlock::block_size, MADV_SET_NONVOLATILE);
         bool this_block_was_purged = rc == 1;
         bool this_block_was_purged = rc == 1;
         if (rc < 0) {
         if (rc < 0) {
@@ -229,9 +254,11 @@ static void* malloc_impl(size_t size, CallerWillInitializeMemory caller_will_ini
             perror("mprotect");
             perror("mprotect");
             VERIFY_NOT_REACHED();
             VERIFY_NOT_REACHED();
         }
         }
-        if (this_block_was_purged) {
-            g_malloc_stats.number_of_empty_block_purge_hits++;
+        if (this_block_was_purged || block->m_size != good_size) {
+            if (this_block_was_purged)
+                g_malloc_stats.number_of_cold_empty_block_purge_hits++;
             new (block) ChunkedBlock(good_size);
             new (block) ChunkedBlock(good_size);
+            ue_notify_chunk_size_changed(block, good_size);
         }
         }
         allocator->usable_blocks.append(block);
         allocator->usable_blocks.append(block);
     }
     }
@@ -335,11 +362,18 @@ static void free_impl(void* ptr)
     if (!block->used_chunks()) {
     if (!block->used_chunks()) {
         size_t good_size;
         size_t good_size;
         auto* allocator = allocator_for_size(block->m_size, good_size);
         auto* allocator = allocator_for_size(block->m_size, good_size);
-        if (allocator->block_count < number_of_chunked_blocks_to_keep_around_per_size_class) {
-            dbgln_if(MALLOC_DEBUG, "Keeping block {:p} around for size class {}", block, good_size);
-            g_malloc_stats.number_of_keeps++;
+        if (s_hot_empty_block_count < number_of_hot_chunked_blocks_to_keep_around) {
+            dbgln_if(MALLOC_DEBUG, "Keeping hot block {:p} around", block);
+            g_malloc_stats.number_of_hot_keeps++;
+            allocator->usable_blocks.remove(block);
+            s_hot_empty_blocks[s_hot_empty_block_count++] = block;
+            return;
+        }
+        if (s_cold_empty_block_count < number_of_cold_chunked_blocks_to_keep_around) {
+            dbgln_if(MALLOC_DEBUG, "Keeping cold block {:p} around", block);
+            g_malloc_stats.number_of_cold_keeps++;
             allocator->usable_blocks.remove(block);
             allocator->usable_blocks.remove(block);
-            allocator->empty_blocks[allocator->empty_block_count++] = block;
+            s_cold_empty_blocks[s_cold_empty_block_count++] = block;
             mprotect(block, ChunkedBlock::block_size, PROT_NONE);
             mprotect(block, ChunkedBlock::block_size, PROT_NONE);
             madvise(block, ChunkedBlock::block_size, MADV_SET_VOLATILE);
             madvise(block, ChunkedBlock::block_size, MADV_SET_VOLATILE);
             return;
             return;
@@ -458,8 +492,9 @@ void serenity_dump_malloc_stats()
     dbgln("big alloc hits that were purged: {}", g_malloc_stats.number_of_big_allocator_purge_hits);
     dbgln("big alloc hits that were purged: {}", g_malloc_stats.number_of_big_allocator_purge_hits);
     dbgln("big allocs: {}", g_malloc_stats.number_of_big_allocs);
     dbgln("big allocs: {}", g_malloc_stats.number_of_big_allocs);
     dbgln();
     dbgln();
-    dbgln("empty block hits: {}", g_malloc_stats.number_of_empty_block_hits);
-    dbgln("empty block hits that were purged: {}", g_malloc_stats.number_of_empty_block_purge_hits);
+    dbgln("empty hot block hits: {}", g_malloc_stats.number_of_hot_empty_block_hits);
+    dbgln("empty cold block hits: {}", g_malloc_stats.number_of_cold_empty_block_hits);
+    dbgln("empty cold block hits that were purged: {}", g_malloc_stats.number_of_cold_empty_block_purge_hits);
     dbgln("block allocs: {}", g_malloc_stats.number_of_block_allocs);
     dbgln("block allocs: {}", g_malloc_stats.number_of_block_allocs);
     dbgln("filled blocks: {}", g_malloc_stats.number_of_blocks_full);
     dbgln("filled blocks: {}", g_malloc_stats.number_of_blocks_full);
     dbgln();
     dbgln();
@@ -469,7 +504,8 @@ void serenity_dump_malloc_stats()
     dbgln("big alloc frees: {}", g_malloc_stats.number_of_big_allocator_frees);
     dbgln("big alloc frees: {}", g_malloc_stats.number_of_big_allocator_frees);
     dbgln();
     dbgln();
     dbgln("full block frees: {}", g_malloc_stats.number_of_freed_full_blocks);
     dbgln("full block frees: {}", g_malloc_stats.number_of_freed_full_blocks);
-    dbgln("number of keeps: {}", g_malloc_stats.number_of_keeps);
+    dbgln("number of hot keeps: {}", g_malloc_stats.number_of_hot_keeps);
+    dbgln("number of cold keeps: {}", g_malloc_stats.number_of_cold_keeps);
     dbgln("number of frees: {}", g_malloc_stats.number_of_frees);
     dbgln("number of frees: {}", g_malloc_stats.number_of_frees);
 }
 }
 }
 }