From 52deb09382880b27ae3fca395cc9340e374f5142 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Wed, 25 Dec 2019 11:22:16 +0100 Subject: [PATCH] Kernel: Enable PAE (Physical Address Extension) Introduce one more (CPU) indirection layer in the paging code: the page directory pointer table (PDPT). Each PageDirectory now has 4 separate PageDirectoryEntry arrays, governing 1 GB of VM each. A really neat side-effect of this is that we can now share the physical page containing the >=3GB kernel-only address space metadata between all processes, instead of lazily cloning it on page faults. This will give us access to the NX (No eXecute) bit, allowing us to prevent execution of memory that's not supposed to be executed. --- Kernel/Arch/i386/Boot/boot.S | 2 +- Kernel/Arch/i386/CPU.h | 41 ++++++++++++++++------ Kernel/Thread.cpp | 1 - Kernel/VM/MemoryManager.cpp | 66 ++++++++++-------------------------- Kernel/VM/MemoryManager.h | 5 +-- Kernel/VM/PageDirectory.cpp | 41 ++++++++++++++++------ Kernel/VM/PageDirectory.h | 9 +++-- 7 files changed, 84 insertions(+), 81 deletions(-) diff --git a/Kernel/Arch/i386/Boot/boot.S b/Kernel/Arch/i386/Boot/boot.S index 535f13d8489..5ae068c7717 100644 --- a/Kernel/Arch/i386/Boot/boot.S +++ b/Kernel/Arch/i386/Boot/boot.S @@ -34,7 +34,7 @@ stack_top: .section .page_tables .align 4096 page_tables_start: -.skip 4096*3 +.skip 4096*9 .section .text diff --git a/Kernel/Arch/i386/CPU.h b/Kernel/Arch/i386/CPU.h index 49bf4c9aa36..0f592e90375 100644 --- a/Kernel/Arch/i386/CPU.h +++ b/Kernel/Arch/i386/CPU.h @@ -9,6 +9,7 @@ #define PAGE_MASK 0xfffff000 class MemoryManager; +class PageDirectory; class PageTableEntry; struct [[gnu::packed]] TSS32 @@ -89,12 +90,12 @@ public: PageTableEntry* page_table_base() { return reinterpret_cast(m_raw & 0xfffff000u); } void set_page_table_base(u32 value) { - m_raw &= 0xfff; + m_raw &= 0x8000000000000fffULL; m_raw |= value & 0xfffff000; } - u32 raw() const { return m_raw; } - void copy_from(Badge, const PageDirectoryEntry& other) { m_raw = other.m_raw; } + u64 raw() const { return m_raw; } + void copy_from(Badge, const PageDirectoryEntry& other) { m_raw = other.m_raw; } enum Flags { Present = 1 << 0, @@ -103,6 +104,7 @@ public: WriteThrough = 1 << 3, CacheDisabled = 1 << 4, Global = 1 << 8, + NoExecute = 0x8000000000000000ULL, }; bool is_present() const { return raw() & Present; } @@ -123,7 +125,10 @@ public: bool is_global() const { return raw() & Global; } void set_global(bool b) { set_bit(Global, b); } - void set_bit(u32 bit, bool value) + bool is_execute_disabled() const { return raw() & NoExecute; } + void set_execute_disabled(bool b) { set_bit(NoExecute, b); } + + void set_bit(u64 bit, bool value) { if (value) m_raw |= bit; @@ -132,7 +137,7 @@ public: } private: - u32 m_raw; + u64 m_raw; }; class PageTableEntry { @@ -140,11 +145,11 @@ public: void* physical_page_base() { return reinterpret_cast(m_raw & 0xfffff000u); } void set_physical_page_base(u32 value) { - m_raw &= 0xfff; + m_raw &= 0x8000000000000fffULL; m_raw |= value & 0xfffff000; } - u32 raw() const { return m_raw; } + u64 raw() const { return (u32)m_raw; } enum Flags { Present = 1 << 0, @@ -153,6 +158,7 @@ public: WriteThrough = 1 << 3, CacheDisabled = 1 << 4, Global = 1 << 8, + NoExecute = 0x8000000000000000ULL, }; bool is_present() const { return raw() & Present; } @@ -173,7 +179,10 @@ public: bool is_global() const { return raw() & Global; } void set_global(bool b) { set_bit(Global, b); } - void set_bit(u32 bit, bool value) + bool is_execute_disabled() const { return raw() & NoExecute; } + void set_execute_disabled(bool b) { set_bit(NoExecute, b); } + + void set_bit(u64 bit, bool value) { if (value) m_raw |= bit; @@ -182,11 +191,21 @@ public: } private: - u32 m_raw; + u64 m_raw; }; -static_assert(sizeof(PageDirectoryEntry) == 4); -static_assert(sizeof(PageTableEntry) == 4); +static_assert(sizeof(PageDirectoryEntry) == 8); +static_assert(sizeof(PageTableEntry) == 8); + +class PageDirectoryPointerTable { +public: + PageDirectoryEntry* directory(size_t index) + { + return (PageDirectoryEntry*)(raw[index] & ~0xfffu); + } + + u64 raw[4]; +}; class IRQHandler; struct RegisterDump; diff --git a/Kernel/Thread.cpp b/Kernel/Thread.cpp index be1ceeff226..81a6612f560 100644 --- a/Kernel/Thread.cpp +++ b/Kernel/Thread.cpp @@ -100,7 +100,6 @@ Thread::Thread(Process& process) m_tss.esp0 = m_kernel_stack_top; kprintf("Allocated ring0 stack @ %p - %p\n", m_kernel_stack_base, m_kernel_stack_top); } - m_process.page_directory().update_kernel_mappings(); // HACK: Ring2 SS in the TSS is the current PID. m_tss.ss2 = m_process.pid(); diff --git a/Kernel/VM/MemoryManager.cpp b/Kernel/VM/MemoryManager.cpp index f6861136a51..a3e792d6cb5 100644 --- a/Kernel/VM/MemoryManager.cpp +++ b/Kernel/VM/MemoryManager.cpp @@ -23,8 +23,11 @@ MemoryManager& MM MemoryManager::MemoryManager(u32 physical_address_for_kernel_page_tables) { m_kernel_page_directory = PageDirectory::create_at_fixed_address(PhysicalAddress(physical_address_for_kernel_page_tables)); - m_page_table_zero = (PageTableEntry*)(physical_address_for_kernel_page_tables + PAGE_SIZE); - m_page_table_one = (PageTableEntry*)(physical_address_for_kernel_page_tables + PAGE_SIZE * 2); + for (size_t i = 0; i < 4; ++i) { + m_low_page_tables[i] = (PageTableEntry*)(physical_address_for_kernel_page_tables + PAGE_SIZE * (5 + i)); + memset(m_low_page_tables[i], 0, PAGE_SIZE); + } + initialize_paging(); kprintf("MM initialized.\n"); @@ -34,21 +37,8 @@ MemoryManager::~MemoryManager() { } -void MemoryManager::populate_page_directory(PageDirectory& page_directory) -{ - page_directory.m_directory_page = allocate_supervisor_physical_page(); - page_directory.entries()[0].copy_from({}, kernel_page_directory().entries()[0]); - page_directory.entries()[1].copy_from({}, kernel_page_directory().entries()[1]); - // Defer to the kernel page tables for 0xC0000000-0xFFFFFFFF - for (int i = 768; i < 1024; ++i) - page_directory.entries()[i].copy_from({}, kernel_page_directory().entries()[i]); -} - void MemoryManager::initialize_paging() { - memset(m_page_table_zero, 0, PAGE_SIZE); - memset(m_page_table_one, 0, PAGE_SIZE); - #ifdef MM_DEBUG dbgprintf("MM: Kernel page directory @ %p\n", kernel_page_directory().cr3()); #endif @@ -171,6 +161,12 @@ void MemoryManager::initialize_paging() "orl $0x80, %eax\n" "mov %eax, %cr4\n"); + // Turn on CR4.PAE + asm volatile( + "mov %cr4, %eax\n" + "orl $0x20, %eax\n" + "mov %eax, %cr4\n"); + asm volatile("movl %%eax, %%cr3" ::"a"(kernel_page_directory().cr3())); asm volatile( "movl %%cr0, %%eax\n" @@ -186,30 +182,23 @@ void MemoryManager::initialize_paging() PageTableEntry& MemoryManager::ensure_pte(PageDirectory& page_directory, VirtualAddress vaddr) { ASSERT_INTERRUPTS_DISABLED(); - u32 page_directory_index = (vaddr.get() >> 22) & 0x3ff; - u32 page_table_index = (vaddr.get() >> 12) & 0x3ff; + u32 page_directory_table_index = (vaddr.get() >> 30) & 0x3; + u32 page_directory_index = (vaddr.get() >> 21) & 0x1ff; + u32 page_table_index = (vaddr.get() >> 12) & 0x1ff; - PageDirectoryEntry& pde = page_directory.entries()[page_directory_index]; + PageDirectoryEntry& pde = page_directory.table().directory(page_directory_table_index)[page_directory_index]; if (!pde.is_present()) { #ifdef MM_DEBUG dbgprintf("MM: PDE %u not present (requested for V%p), allocating\n", page_directory_index, vaddr.get()); #endif - if (page_directory_index == 0) { + if (page_directory_table_index == 0 && page_directory_index < 4) { ASSERT(&page_directory == m_kernel_page_directory); - pde.set_page_table_base((u32)m_page_table_zero); - pde.set_user_allowed(false); - pde.set_present(true); - pde.set_writable(true); - pde.set_global(true); - } else if (page_directory_index == 1) { - ASSERT(&page_directory == m_kernel_page_directory); - pde.set_page_table_base((u32)m_page_table_one); + pde.set_page_table_base((u32)m_low_page_tables[page_directory_index]); pde.set_user_allowed(false); pde.set_present(true); pde.set_writable(true); pde.set_global(true); } else { - //ASSERT(&page_directory != m_kernel_page_directory.ptr()); auto page_table = allocate_supervisor_physical_page(); #ifdef MM_DEBUG dbgprintf("MM: PD K%p (%s) at P%p allocated page table #%u (for V%p) at P%p\n", @@ -220,7 +209,6 @@ PageTableEntry& MemoryManager::ensure_pte(PageDirectory& page_directory, Virtual vaddr.get(), page_table->paddr().get()); #endif - pde.set_page_table_base(page_table->paddr().get()); pde.set_user_allowed(true); pde.set_present(true); @@ -322,21 +310,6 @@ PageFaultResponse MemoryManager::handle_page_fault(const PageFault& fault) dbgprintf("MM: handle_page_fault(%w) at V%p\n", fault.code(), fault.vaddr().get()); #endif ASSERT(fault.vaddr() != m_quickmap_addr); - if (fault.type() == PageFault::Type::PageNotPresent && fault.vaddr().get() >= 0xc0000000) { - auto* current_page_directory = reinterpret_cast(cpu_cr3()); - u32 page_directory_index = (fault.vaddr().get() >> 22) & 0x3ff; - auto& kernel_pde = kernel_page_directory().entries()[page_directory_index]; - auto& current_pde = current_page_directory[page_directory_index]; - - if (kernel_pde.is_present() && !current_pde.is_present()) { -#ifdef PAGE_FAULT_DEBUG - dbg() << "NP(kernel): Copying new kernel mapping for " << fault.vaddr() << " into current page directory"; -#endif - current_pde.copy_from({}, kernel_pde); - flush_tlb(fault.vaddr().page_base()); - return PageFaultResponse::Continue; - } - } auto* region = region_from_vaddr(fault.vaddr()); if (!region) { kprintf("NP(error) fault at invalid address V%p\n", fault.vaddr().get()); @@ -494,11 +467,6 @@ void MemoryManager::enter_process_paging_scope(Process& process) ASSERT(current); InterruptDisabler disabler; - // NOTE: To prevent triple-faulting here, we have to ensure that the current stack - // is accessible to the incoming page directory. We achieve this by forcing - // an update of the kernel VM mappings in the entered scope's page directory. - process.page_directory().update_kernel_mappings(); - current->tss().cr3 = process.page_directory().cr3(); asm volatile("movl %%eax, %%cr3" ::"a"(process.page_directory().cr3()) : "memory"); diff --git a/Kernel/VM/MemoryManager.h b/Kernel/VM/MemoryManager.h index ce0c52ff731..ef00bee9da7 100644 --- a/Kernel/VM/MemoryManager.h +++ b/Kernel/VM/MemoryManager.h @@ -42,8 +42,6 @@ public: PageFaultResponse handle_page_fault(const PageFault&); - void populate_page_directory(PageDirectory&); - void enter_process_paging_scope(Process&); bool validate_user_stack(const Process&, VirtualAddress) const; @@ -114,8 +112,7 @@ private: PageTableEntry& ensure_pte(PageDirectory&, VirtualAddress); RefPtr m_kernel_page_directory; - PageTableEntry* m_page_table_zero { nullptr }; - PageTableEntry* m_page_table_one { nullptr }; + PageTableEntry* m_low_page_tables[4] { nullptr }; VirtualAddress m_quickmap_addr; diff --git a/Kernel/VM/PageDirectory.cpp b/Kernel/VM/PageDirectory.cpp index e25c6e9119b..729cf4c9be4 100644 --- a/Kernel/VM/PageDirectory.cpp +++ b/Kernel/VM/PageDirectory.cpp @@ -24,7 +24,17 @@ RefPtr PageDirectory::find_by_cr3(u32 cr3) PageDirectory::PageDirectory(PhysicalAddress paddr) : m_range_allocator(VirtualAddress(0xc0000000), 0x3f000000) { - m_directory_page = PhysicalPage::create(paddr, true, false); + m_directory_table = PhysicalPage::create(paddr, true, false); + m_directory_pages[0] = PhysicalPage::create(paddr.offset(PAGE_SIZE * 1), true, false); + m_directory_pages[1] = PhysicalPage::create(paddr.offset(PAGE_SIZE * 2), true, false); + m_directory_pages[2] = PhysicalPage::create(paddr.offset(PAGE_SIZE * 3), true, false); + m_directory_pages[3] = PhysicalPage::create(paddr.offset(PAGE_SIZE * 4), true, false); + + table().raw[0] = (u64)m_directory_pages[0]->paddr().as_ptr() | 1; + table().raw[1] = (u64)m_directory_pages[1]->paddr().as_ptr() | 1; + table().raw[2] = (u64)m_directory_pages[2]->paddr().as_ptr() | 1; + table().raw[3] = (u64)m_directory_pages[3]->paddr().as_ptr() | 1; + InterruptDisabler disabler; cr3_map().set(cr3(), this); } @@ -33,7 +43,26 @@ PageDirectory::PageDirectory(Process& process, const RangeAllocator* parent_rang : m_process(&process) , m_range_allocator(parent_range_allocator ? RangeAllocator(*parent_range_allocator) : RangeAllocator(VirtualAddress(userspace_range_base), kernelspace_range_base - userspace_range_base)) { - MM.populate_page_directory(*this); + // Set up a userspace page directory + + m_directory_table = MM.allocate_supervisor_physical_page(); + m_directory_pages[0] = MM.allocate_supervisor_physical_page(); + m_directory_pages[1] = MM.allocate_supervisor_physical_page(); + m_directory_pages[2] = MM.allocate_supervisor_physical_page(); + // Share the top 1 GB of kernel-only mappings (>=3GB or >=0xc0000000) + m_directory_pages[3] = MM.kernel_page_directory().m_directory_pages[3]; + + table().raw[0] = (u64)m_directory_pages[0]->paddr().as_ptr() | 1; + table().raw[1] = (u64)m_directory_pages[1]->paddr().as_ptr() | 1; + table().raw[2] = (u64)m_directory_pages[2]->paddr().as_ptr() | 1; + table().raw[3] = (u64)m_directory_pages[3]->paddr().as_ptr() | 1; + + // Clone bottom 8 MB of mappings from kernel_page_directory + table().directory(0)[0].copy_from({}, MM.kernel_page_directory().table().directory(0)[0]); + table().directory(0)[1].copy_from({}, MM.kernel_page_directory().table().directory(0)[1]); + table().directory(0)[2].copy_from({}, MM.kernel_page_directory().table().directory(0)[2]); + table().directory(0)[3].copy_from({}, MM.kernel_page_directory().table().directory(0)[3]); + InterruptDisabler disabler; cr3_map().set(cr3(), this); } @@ -57,11 +86,3 @@ void PageDirectory::flush(VirtualAddress vaddr) if (this == &MM.kernel_page_directory() || ¤t->process().page_directory() == this) MM.flush_tlb(vaddr); } - -void PageDirectory::update_kernel_mappings() -{ - // This ensures that the kernel virtual address space is up-to-date in this page directory. - // This may be necessary to avoid triple faulting when entering a process's paging scope - // whose mappings are out-of-date. - memcpy(entries() + 768, MM.kernel_page_directory().entries() + 768, sizeof(PageDirectoryEntry) * 256); -} diff --git a/Kernel/VM/PageDirectory.h b/Kernel/VM/PageDirectory.h index 98ac63dcabf..2d70eb0fe1e 100644 --- a/Kernel/VM/PageDirectory.h +++ b/Kernel/VM/PageDirectory.h @@ -21,8 +21,8 @@ public: ~PageDirectory(); - u32 cr3() const { return m_directory_page->paddr().get(); } - PageDirectoryEntry* entries() { return reinterpret_cast(cr3()); } + u32 cr3() const { return m_directory_table->paddr().get(); } + PageDirectoryPointerTable& table() { return *reinterpret_cast(cr3()); } void flush(VirtualAddress); @@ -31,14 +31,13 @@ public: Process* process() { return m_process; } const Process* process() const { return m_process; } - void update_kernel_mappings(); - private: PageDirectory(Process&, const RangeAllocator* parent_range_allocator); explicit PageDirectory(PhysicalAddress); Process* m_process { nullptr }; RangeAllocator m_range_allocator; - RefPtr m_directory_page; + RefPtr m_directory_table; + RefPtr m_directory_pages[4]; HashMap> m_physical_pages; };