From 964f8fbf3a8b1faf73f763d4cb37a0a946c9ef29 Mon Sep 17 00:00:00 2001 From: Jesse Buhagiar Date: Mon, 25 Apr 2022 23:09:57 +1000 Subject: [PATCH] Kernel: Implement AVX `XSAVE` support This adds some new buffers to the `FPUState` struct, which contains enough space for the `xsave` instruction to run. This instruction writes the upper part of the x86 SIMD registers (YMM0-15) to a seperate 256-byte area, as well as an "xsave header" describing the region. If the underlying processor supports AVX, the `fxsave` instruction is no longer used, as `xsave` itself implictly saves all of the SSE and x87 registers. Co-authored-by: Leon Albrecht --- Kernel/Arch/x86/Processor.h | 11 +++- Kernel/Arch/x86/SIMDState.h | 75 ++++++++++++++++++++++++++++ Kernel/Arch/x86/common/Processor.cpp | 28 ++++++++--- 3 files changed, 106 insertions(+), 8 deletions(-) create mode 100644 Kernel/Arch/x86/SIMDState.h diff --git a/Kernel/Arch/x86/Processor.h b/Kernel/Arch/x86/Processor.h index 9bd13613e44..1e0f4cc2ddc 100644 --- a/Kernel/Arch/x86/Processor.h +++ b/Kernel/Arch/x86/Processor.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -46,9 +47,15 @@ extern "C" void thread_context_first_enter(void); extern "C" void exit_kernel_thread(void); extern "C" void do_assume_context(Thread* thread, u32 flags); -struct [[gnu::aligned(16)]] FPUState +struct [[gnu::aligned(64), gnu::packed]] FPUState { - u8 buffer[512]; + SIMD::LegacyRegion legacy_region; + SIMD::Header xsave_header; + + // FIXME: This should be dynamically allocated! For now, we only save the `YMM` registers here, + // so this will do for now. The size of the area is queried via CPUID(EAX=0dh, ECX=2):EAX. + // https://www.intel.com/content/dam/develop/external/us/en/documents/36945 + u8 ext_save_area[256]; }; class Processor; diff --git a/Kernel/Arch/x86/SIMDState.h b/Kernel/Arch/x86/SIMDState.h new file mode 100644 index 00000000000..93eac0b8b7e --- /dev/null +++ b/Kernel/Arch/x86/SIMDState.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2022, Leon Albrecht + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include + +namespace Kernel::SIMD { + +// Intel-Manual Vol 1 Chp 13.4 +enum StateComponent : u64 { + X87 = 1ull << 0ull, + SSE = 1ull << 1ull, // xmm0-xmm7(15) + AVX = 1ull << 2ull, // ymm0-ymm7(15) hi + MPX_BNDREGS = 1ull << 3ull, + MPX_BNDCSR = 1ull << 4ull, + AVX512_opmask = 1ull << 5ull, // k0 - k9 + AVX512_ZMM_hi = 1ull << 6ull, // 0 - 15 + AVX512_ZMM = 1ull << 7ull, // 16 - 31 full + PT = 1ull << 8ull, + PKRU = 1ull << 9ull, + + CET_U = 1ull << 11ull, + CET_S = 1ull << 12ull, + HDC = 1ull << 13ull, + + LBR = 1ull << 15ull, + HWP = 1ull << 16ull, + + XCOMP_ENABLE = 1ull << 63ull +}; +AK_ENUM_BITWISE_OPERATORS(StateComponent); + +struct [[gnu::packed]] LegacyRegion { + AK::X87ControlWord FCW; + u16 FSW; + u8 FTW; + u8 : 8; + u16 FOP; +#if ARCH(I386) + // 32-bit version + u32 FIP_32; + u16 FCS; + u16 : 16; + u32 FPD_32; + u16 FDS; + u16 : 16; +#elif ARCH(X86_64) + // 64-bit version + u64 FIP_64; + u64 FDP_64; +#endif + AK::MXCSR MXCSR; + u32 MXCSR_mask; + u8 st_mmx[128]; + u8 xmm[256]; + u8 available[96]; // Extra available space +}; + +static_assert(sizeof(LegacyRegion) == 512); + +struct [[gnu::packed]] Header { + StateComponent xstate_bv; + StateComponent xcomp_bv; + u8 reserved[48]; +}; +static_assert(sizeof(Header) == 64); + +} diff --git a/Kernel/Arch/x86/common/Processor.cpp b/Kernel/Arch/x86/common/Processor.cpp index 6f6cdb43c1d..a9b75cf5684 100644 --- a/Kernel/Arch/x86/common/Processor.cpp +++ b/Kernel/Arch/x86/common/Processor.cpp @@ -565,7 +565,7 @@ UNMAP_AFTER_INIT void Processor::cpu_setup() if (has_feature(CPUFeature::AVX)) { // Turn on SSE, AVX and x87 flags - write_xcr0(read_xcr0() | 0x7); + write_xcr0(read_xcr0() | SIMD::StateComponent::AVX | SIMD::StateComponent::SSE | SIMD::StateComponent::X87); } } @@ -663,12 +663,18 @@ UNMAP_AFTER_INIT void Processor::initialize(u32 cpu) if (cpu == 0) { VERIFY((FlatPtr(&s_clean_fpu_state) & 0xF) == 0); asm volatile("fninit"); - if (has_feature(CPUFeature::FXSR)) + // Initialize AVX state + if (has_feature(CPUFeature::XSAVE | CPUFeature::AVX)) { + asm volatile("xsave %0\n" + : "=m"(s_clean_fpu_state) + : "a"(static_cast(SIMD::StateComponent::AVX | SIMD::StateComponent::SSE | SIMD::StateComponent::X87)), "d"(0u)); + } else if (has_feature(CPUFeature::FXSR)) { asm volatile("fxsave %0" : "=m"(s_clean_fpu_state)); - else + } else { asm volatile("fnsave %0" : "=m"(s_clean_fpu_state)); + } if (has_feature(CPUFeature::HYPERVISOR)) detect_hypervisor(); @@ -1563,6 +1569,7 @@ extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread) VERIFY(to_thread->state() == Thread::State::Running); bool has_fxsr = Processor::current().has_feature(CPUFeature::FXSR); + bool has_xsave_avx_support = Processor::current().has_feature(CPUFeature::XSAVE) && Processor::current().has_feature(CPUFeature::AVX); Processor::set_current_thread(*to_thread); auto& from_regs = from_thread->regs(); @@ -1572,12 +1579,19 @@ extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread) // instead of carrying on with elevated I/O privileges. VERIFY(get_iopl_from_eflags(to_regs.flags()) == 0); - if (has_fxsr) + if (has_xsave_avx_support) { + // The specific state components saved correspond to the bits set in the requested-feature bitmap (RFBM), which is the logical-AND of EDX:EAX and XCR0. + // https://www.moritz.systems/blog/how-debuggers-work-getting-and-setting-x86-registers-part-2/ + asm volatile("xsave %0\n" + : "=m"(from_thread->fpu_state()) + : "a"(static_cast(SIMD::StateComponent::AVX | SIMD::StateComponent::SSE | SIMD::StateComponent::X87)), "d"(0u)); + } else if (has_fxsr) { asm volatile("fxsave %0" : "=m"(from_thread->fpu_state())); - else + } else { asm volatile("fnsave %0" : "=m"(from_thread->fpu_state())); + } #if ARCH(I386) from_regs.fs = get_fs(); @@ -1614,7 +1628,9 @@ extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread) VERIFY(in_critical > 0); Processor::restore_in_critical(in_critical); - if (has_fxsr) + if (has_xsave_avx_support) + asm volatile("xrstor %0" ::"m"(to_thread->fpu_state()), "a"(static_cast(SIMD::StateComponent::AVX | SIMD::StateComponent::SSE | SIMD::StateComponent::X87)), "d"(0u)); + else if (has_fxsr) asm volatile("fxrstor %0" ::"m"(to_thread->fpu_state())); else asm volatile("frstor %0" ::"m"(to_thread->fpu_state()));