Explorar o código

Kernel: Start implementing purgeable memory support

It's now possible to get purgeable memory by using mmap(MAP_PURGEABLE).
Purgeable memory has a "volatile" flag that can be set using madvise():

- madvise(..., MADV_SET_VOLATILE)
- madvise(..., MADV_SET_NONVOLATILE)

When in the "volatile" state, the kernel may take away the underlying
physical memory pages at any time, without notifying the owner.
This gives you a guilt discount when caching very large things. :^)

Setting a purgeable region to non-volatile will return whether or not
the memory has been taken away by the kernel while being volatile.
Basically, if madvise(..., MADV_SET_NONVOLATILE) returns 1, that means
the memory was purged while volatile, and whatever was in that piece
of memory needs to be reconstructed before use.
Andreas Kling %!s(int64=5) %!d(string=hai) anos
pai
achega
dbb644f20c

+ 8 - 0
Kernel/FileSystem/ProcFS.cpp

@@ -24,6 +24,7 @@
 #include <Kernel/Net/UDPSocket.h>
 #include <Kernel/PCI.h>
 #include <Kernel/VM/MemoryManager.h>
+#include <Kernel/VM/PurgeableVMObject.h>
 #include <LibC/errno_numbers.h>
 
 enum ProcParentDirectory {
@@ -262,6 +263,11 @@ Optional<KBuffer> procfs$pid_vm(InodeIdentifier identifier)
         region_object.add("writable", region.is_writable());
         region_object.add("stack", region.is_stack());
         region_object.add("shared", region.is_shared());
+        region_object.add("purgeable", region.vmobject().is_purgeable());
+        if (region.vmobject().is_purgeable()) {
+            region_object.add("volatile", static_cast<const PurgeableVMObject&>(region.vmobject()).is_volatile());
+        }
+        region_object.add("purgeable", region.vmobject().is_purgeable());
         region_object.add("address", region.vaddr().get());
         region_object.add("size", (u32)region.size());
         region_object.add("amount_resident", (u32)region.amount_resident());
@@ -716,6 +722,8 @@ Optional<KBuffer> procfs$all(InodeIdentifier)
         process_object.add("amount_virtual", (u32)process.amount_virtual());
         process_object.add("amount_resident", (u32)process.amount_resident());
         process_object.add("amount_shared", (u32)process.amount_shared());
+        process_object.add("amount_purgeable_volatile", (u32)process.amount_purgeable_volatile());
+        process_object.add("amount_purgeable_nonvolatile", (u32)process.amount_purgeable_nonvolatile());
         process_object.add("icon_id", process.icon_id());
         auto thread_array = process_object.add_array("threads");
         process.for_each_thread([&](const Thread& thread) {

+ 1 - 0
Kernel/Makefile

@@ -93,6 +93,7 @@ CXX_OBJS = \
     VM/PageDirectory.o \
     VM/PhysicalPage.o \
     VM/PhysicalRegion.o \
+    VM/PurgeableVMObject.o \
     VM/RangeAllocator.o \
     VM/Region.o \
     VM/VMObject.o \

+ 81 - 1
Kernel/Process.cpp

@@ -36,6 +36,7 @@
 #include <Kernel/TTY/MasterPTY.h>
 #include <Kernel/Thread.h>
 #include <Kernel/VM/InodeVMObject.h>
+#include <Kernel/VM/PurgeableVMObject.h>
 #include <LibC/errno_numbers.h>
 #include <LibC/signal_numbers.h>
 #include <LibELF/ELFLoader.h>
@@ -224,6 +225,18 @@ void* Process::sys$mmap(const Syscall::SC_mmap_params* params)
         return (void*)-EINVAL;
 
     // FIXME: The rest of this function seems like it could share more code..
+    if (flags & MAP_PURGEABLE) {
+        auto vmobject = PurgeableVMObject::create_with_size(size);
+        auto* region = allocate_region_with_vmo(VirtualAddress((u32)addr), size, vmobject, 0, name ? name : "mmap (purgeable)", prot);
+        if (!region)
+            return (void*)-ENOMEM;
+        if (flags & MAP_SHARED)
+            region->set_shared(true);
+
+        region->set_mmap(true);
+        return region->vaddr().as_ptr();
+    }
+
     if (flags & MAP_ANONYMOUS) {
         auto* region = allocate_region(VirtualAddress((u32)addr), size, name ? name : "mmap", prot, false);
         if (!region)
@@ -312,6 +325,52 @@ int Process::sys$mprotect(void* addr, size_t size, int prot)
     return 0;
 }
 
+int Process::sys$madvise(void* address, size_t size, int advice)
+{
+    auto* region = region_from_range({ VirtualAddress((u32)address), size });
+    if (!region)
+        return -EINVAL;
+    if (!region->is_mmap())
+        return -EPERM;
+    if ((advice & MADV_SET_VOLATILE) && (advice & MADV_SET_NONVOLATILE))
+        return -EINVAL;
+    if (advice & MADV_SET_VOLATILE) {
+        if (!region->vmobject().is_purgeable())
+            return -EPERM;
+        auto& vmobject = static_cast<PurgeableVMObject&>(region->vmobject());
+        vmobject.set_volatile(true);
+        return 0;
+    }
+    if (advice & MADV_SET_NONVOLATILE) {
+        if (!region->vmobject().is_purgeable())
+            return -EPERM;
+        auto& vmobject = static_cast<PurgeableVMObject&>(region->vmobject());
+        vmobject.set_volatile(false);
+        bool was_purged = vmobject.was_purged();
+        vmobject.set_was_purged(false);
+        return was_purged ? 1 : 0;
+    }
+    return -EINVAL;
+}
+
+int Process::sys$purge()
+{
+    NonnullRefPtrVector<PurgeableVMObject> vmobjects;
+    {
+        InterruptDisabler disabler;
+        MM.for_each_vmobject([&](auto& vmobject) {
+            if (vmobject.is_purgeable())
+                vmobjects.append(static_cast<PurgeableVMObject&>(vmobject));
+            return IterationDecision::Continue;
+        });
+    }
+    int purged_page_count = 0;
+    for (auto& vmobject : vmobjects) {
+        purged_page_count += vmobject.purge();
+    }
+    return purged_page_count;
+}
+
 int Process::sys$gethostname(char* buffer, ssize_t size)
 {
     if (size < 0)
@@ -842,7 +901,7 @@ void Process::dump_regions()
     kprintf("Process %s(%u) regions:\n", name().characters(), pid());
     kprintf("BEGIN       END         SIZE        ACCESS  NAME\n");
     for (auto& region : m_regions) {
-        kprintf("%08x -- %08x    %08x    %c%c%c%c%c     %s\n",
+        kprintf("%08x -- %08x    %08x    %c%c%c%c%c%c    %s\n",
             region.vaddr().get(),
             region.vaddr().offset(region.size() - 1).get(),
             region.size(),
@@ -851,6 +910,7 @@ void Process::dump_regions()
             region.is_executable() ? 'X' : ' ',
             region.is_shared() ? 'S' : ' ',
             region.is_stack() ? 'T' : ' ',
+            region.vmobject().is_purgeable() ? 'P' : ' ',
             region.name().characters());
     }
 }
@@ -2410,6 +2470,26 @@ size_t Process::amount_shared() const
     return amount;
 }
 
+size_t Process::amount_purgeable_volatile() const
+{
+    size_t amount = 0;
+    for (auto& region : m_regions) {
+        if (region.vmobject().is_purgeable() && static_cast<const PurgeableVMObject&>(region.vmobject()).is_volatile())
+            amount += region.amount_resident();
+    }
+    return amount;
+}
+
+size_t Process::amount_purgeable_nonvolatile() const
+{
+    size_t amount = 0;
+    for (auto& region : m_regions) {
+        if (region.vmobject().is_purgeable() && !static_cast<const PurgeableVMObject&>(region.vmobject()).is_volatile())
+            amount += region.amount_resident();
+    }
+    return amount;
+}
+
 int Process::sys$socket(int domain, int type, int protocol)
 {
     int fd = alloc_fd();

+ 4 - 0
Kernel/Process.h

@@ -139,6 +139,8 @@ public:
     int sys$munmap(void*, size_t size);
     int sys$set_mmap_name(void*, size_t, const char*);
     int sys$mprotect(void*, size_t, int prot);
+    int sys$madvise(void*, size_t, int advice);
+    int sys$purge();
     int sys$select(const Syscall::SC_select_params*);
     int sys$poll(pollfd*, int nfds, int timeout);
     ssize_t sys$get_dir_entries(int fd, void*, ssize_t);
@@ -266,6 +268,8 @@ public:
     size_t amount_virtual() const;
     size_t amount_resident() const;
     size_t amount_shared() const;
+    size_t amount_purgeable_volatile() const;
+    size_t amount_purgeable_nonvolatile() const;
 
     Process* fork(RegisterDump&);
     int exec(String path, Vector<String> arguments, Vector<String> environment);

+ 3 - 1
Kernel/Syscall.h

@@ -144,7 +144,9 @@ typedef u32 socklen_t;
     __ENUMERATE_SYSCALL(module_unload)          \
     __ENUMERATE_SYSCALL(detach_thread)          \
     __ENUMERATE_SYSCALL(set_thread_name)        \
-    __ENUMERATE_SYSCALL(get_thread_name)
+    __ENUMERATE_SYSCALL(get_thread_name)        \
+    __ENUMERATE_SYSCALL(madvise)                \
+    __ENUMERATE_SYSCALL(purge)
 
 namespace Syscall {
 

+ 4 - 0
Kernel/UnixTypes.h

@@ -27,12 +27,16 @@
 #define MAP_ANONYMOUS 0x20
 #define MAP_ANON MAP_ANONYMOUS
 #define MAP_STACK 0x40
+#define MAP_PURGEABLE 0x80
 
 #define PROT_READ 0x1
 #define PROT_WRITE 0x2
 #define PROT_EXEC 0x4
 #define PROT_NONE 0x0
 
+#define MADV_SET_VOLATILE 0x100
+#define MADV_SET_NONVOLATILE 0x200
+
 #define F_DUPFD 0
 #define F_GETFD 1
 #define F_SETFD 2

+ 4 - 2
Kernel/VM/AnonymousVMObject.h

@@ -3,7 +3,7 @@
 #include <Kernel/VM/PhysicalAddress.h>
 #include <Kernel/VM/VMObject.h>
 
-class AnonymousVMObject final : public VMObject {
+class AnonymousVMObject : public VMObject {
 public:
     virtual ~AnonymousVMObject() override;
 
@@ -11,9 +11,11 @@ public:
     static NonnullRefPtr<AnonymousVMObject> create_for_physical_range(PhysicalAddress, size_t);
     virtual NonnullRefPtr<VMObject> clone() override;
 
-private:
+protected:
     explicit AnonymousVMObject(size_t);
     explicit AnonymousVMObject(const AnonymousVMObject&);
+
+private:
     AnonymousVMObject(PhysicalAddress, size_t);
 
     AnonymousVMObject& operator=(const AnonymousVMObject&) = delete;

+ 41 - 0
Kernel/VM/PurgeableVMObject.cpp

@@ -0,0 +1,41 @@
+#include <Kernel/VM/PurgeableVMObject.h>
+#include <Kernel/VM/PhysicalPage.h>
+
+NonnullRefPtr<PurgeableVMObject> PurgeableVMObject::create_with_size(size_t size)
+{
+    return adopt(*new PurgeableVMObject(size));
+}
+
+PurgeableVMObject::PurgeableVMObject(size_t size)
+    : AnonymousVMObject(size)
+{
+}
+
+PurgeableVMObject::PurgeableVMObject(const PurgeableVMObject& other)
+    : AnonymousVMObject(other)
+{
+}
+
+PurgeableVMObject::~PurgeableVMObject()
+{
+}
+
+NonnullRefPtr<VMObject> PurgeableVMObject::clone()
+{
+    return adopt(*new PurgeableVMObject(*this));
+}
+
+int PurgeableVMObject::purge()
+{
+    LOCKER(m_paging_lock);
+    if (!m_volatile)
+        return 0;
+    int purged_page_count = 0;
+    for (size_t i = 0; i < m_physical_pages.size(); ++i) {
+        if (m_physical_pages[i])
+            ++purged_page_count;
+        m_physical_pages[i] = nullptr;
+    }
+    m_was_purged = true;
+    return purged_page_count;
+}

+ 32 - 0
Kernel/VM/PurgeableVMObject.h

@@ -0,0 +1,32 @@
+#pragma once
+
+#include <Kernel/VM/AnonymousVMObject.h>
+
+class PurgeableVMObject final : public AnonymousVMObject {
+public:
+    virtual ~PurgeableVMObject() override;
+
+    static NonnullRefPtr<PurgeableVMObject> create_with_size(size_t);
+    virtual NonnullRefPtr<VMObject> clone() override;
+
+    int purge();
+
+    bool was_purged() const { return m_was_purged; }
+    void set_was_purged(bool b) { m_was_purged = b; }
+
+    bool is_volatile() const { return m_volatile; }
+    void set_volatile(bool b) { m_volatile = b; }
+
+private:
+    explicit PurgeableVMObject(size_t);
+    explicit PurgeableVMObject(const PurgeableVMObject&);
+
+    PurgeableVMObject& operator=(const PurgeableVMObject&) = delete;
+    PurgeableVMObject& operator=(PurgeableVMObject&&) = delete;
+    PurgeableVMObject(PurgeableVMObject&&) = delete;
+
+    virtual bool is_purgeable() const override { return true; }
+
+    bool m_was_purged { false };
+    bool m_volatile { false };
+};

+ 4 - 3
Kernel/VM/Region.cpp

@@ -299,10 +299,11 @@ PageFaultResponse Region::handle_zero_fault(size_t page_index_in_region)
     ASSERT_INTERRUPTS_DISABLED();
     ASSERT(vmobject().is_anonymous());
 
-    auto& vmobject_physical_page_entry = vmobject().physical_pages()[first_page_index() + page_index_in_region];
+    sti();
+    LOCKER(vmobject().m_paging_lock);
+    cli();
 
-    // NOTE: We don't need to acquire the VMObject's lock.
-    //       This function is already exclusive due to interrupts being blocked.
+    auto& vmobject_physical_page_entry = vmobject().physical_pages()[first_page_index() + page_index_in_region];
 
     if (!vmobject_physical_page_entry.is_null()) {
 #ifdef PAGE_FAULT_DEBUG

+ 2 - 2
Kernel/VM/VMObject.h

@@ -22,6 +22,7 @@ public:
     virtual NonnullRefPtr<VMObject> clone() = 0;
 
     virtual bool is_anonymous() const { return false; }
+    virtual bool is_purgeable() const { return false; }
     virtual bool is_inode() const { return false; }
 
     size_t page_count() const { return m_physical_pages.size(); }
@@ -42,11 +43,10 @@ protected:
     void for_each_region(Callback);
 
     FixedArray<RefPtr<PhysicalPage>> m_physical_pages;
+    Lock m_paging_lock { "VMObject" };
 
 private:
     VMObject& operator=(const VMObject&) = delete;
     VMObject& operator=(VMObject&&) = delete;
     VMObject(VMObject&&) = delete;
-
-    Lock m_paging_lock { "VMObject" };
 };

+ 7 - 0
Libraries/LibC/mman.cpp

@@ -56,4 +56,11 @@ int shm_unlink(const char* name)
     int rc = syscall(SC_shm_unlink, name);
     __RETURN_WITH_ERRNO(rc, rc, -1);
 }
+
+int madvise(void* address, size_t size, int advice)
+{
+    int rc = syscall(SC_madvise, address, size, advice);
+    __RETURN_WITH_ERRNO(rc, rc, -1);
+}
+
 }

+ 5 - 0
Libraries/LibC/mman.h

@@ -9,6 +9,7 @@
 #define MAP_ANONYMOUS 0x20
 #define MAP_ANON MAP_ANONYMOUS
 #define MAP_STACK 0x40
+#define MAP_PURGEABLE 0x80
 
 #define PROT_READ 0x1
 #define PROT_WRITE 0x2
@@ -17,6 +18,9 @@
 
 #define MAP_FAILED ((void*)-1)
 
+#define MADV_SET_VOLATILE 0x100
+#define MADV_SET_NONVOLATILE 0x200
+
 __BEGIN_DECLS
 
 void* mmap(void* addr, size_t, int prot, int flags, int fd, off_t);
@@ -26,5 +30,6 @@ int mprotect(void*, size_t, int prot);
 int set_mmap_name(void*, size_t, const char*);
 int shm_open(const char* name, int flags, mode_t);
 int shm_unlink(const char* name);
+int madvise(void*, size_t, int advice);
 
 __END_DECLS