mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-11-22 15:40:19 +00:00
Kernel: Implement AVX XSAVE
support
This adds some new buffers to the `FPUState` struct, which contains enough space for the `xsave` instruction to run. This instruction writes the upper part of the x86 SIMD registers (YMM0-15) to a seperate 256-byte area, as well as an "xsave header" describing the region. If the underlying processor supports AVX, the `fxsave` instruction is no longer used, as `xsave` itself implictly saves all of the SSE and x87 registers. Co-authored-by: Leon Albrecht <leon.a@serenityos.org>
This commit is contained in:
parent
c00ae53b66
commit
964f8fbf3a
Notes:
sideshowbarker
2024-07-17 10:53:43 +09:00
Author: https://github.com/Quaker762 Commit: https://github.com/SerenityOS/serenity/commit/964f8fbf3a Pull-request: https://github.com/SerenityOS/serenity/pull/13949 Reviewed-by: https://github.com/ADKaster ✅ Reviewed-by: https://github.com/BertalanD ✅ Reviewed-by: https://github.com/Hendiadyoin1 Reviewed-by: https://github.com/IdanHo Reviewed-by: https://github.com/linusg
3 changed files with 106 additions and 8 deletions
|
@ -17,6 +17,7 @@
|
|||
#include <Kernel/Arch/x86/ASM_wrapper.h>
|
||||
#include <Kernel/Arch/x86/CPUID.h>
|
||||
#include <Kernel/Arch/x86/DescriptorTable.h>
|
||||
#include <Kernel/Arch/x86/SIMDState.h>
|
||||
#include <Kernel/Arch/x86/TSS.h>
|
||||
#include <Kernel/Forward.h>
|
||||
#include <Kernel/KString.h>
|
||||
|
@ -46,9 +47,15 @@ extern "C" void thread_context_first_enter(void);
|
|||
extern "C" void exit_kernel_thread(void);
|
||||
extern "C" void do_assume_context(Thread* thread, u32 flags);
|
||||
|
||||
struct [[gnu::aligned(16)]] FPUState
|
||||
struct [[gnu::aligned(64), gnu::packed]] FPUState
|
||||
{
|
||||
u8 buffer[512];
|
||||
SIMD::LegacyRegion legacy_region;
|
||||
SIMD::Header xsave_header;
|
||||
|
||||
// FIXME: This should be dynamically allocated! For now, we only save the `YMM` registers here,
|
||||
// so this will do for now. The size of the area is queried via CPUID(EAX=0dh, ECX=2):EAX.
|
||||
// https://www.intel.com/content/dam/develop/external/us/en/documents/36945
|
||||
u8 ext_save_area[256];
|
||||
};
|
||||
|
||||
class Processor;
|
||||
|
|
75
Kernel/Arch/x86/SIMDState.h
Normal file
75
Kernel/Arch/x86/SIMDState.h
Normal file
|
@ -0,0 +1,75 @@
|
|||
/*
|
||||
* Copyright (c) 2022, Leon Albrecht <leon.a@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/EnumBits.h>
|
||||
#include <AK/FPControl.h>
|
||||
#include <AK/Platform.h>
|
||||
#include <AK/Types.h>
|
||||
|
||||
namespace Kernel::SIMD {
|
||||
|
||||
// Intel-Manual Vol 1 Chp 13.4
|
||||
enum StateComponent : u64 {
|
||||
X87 = 1ull << 0ull,
|
||||
SSE = 1ull << 1ull, // xmm0-xmm7(15)
|
||||
AVX = 1ull << 2ull, // ymm0-ymm7(15) hi
|
||||
MPX_BNDREGS = 1ull << 3ull,
|
||||
MPX_BNDCSR = 1ull << 4ull,
|
||||
AVX512_opmask = 1ull << 5ull, // k0 - k9
|
||||
AVX512_ZMM_hi = 1ull << 6ull, // 0 - 15
|
||||
AVX512_ZMM = 1ull << 7ull, // 16 - 31 full
|
||||
PT = 1ull << 8ull,
|
||||
PKRU = 1ull << 9ull,
|
||||
|
||||
CET_U = 1ull << 11ull,
|
||||
CET_S = 1ull << 12ull,
|
||||
HDC = 1ull << 13ull,
|
||||
|
||||
LBR = 1ull << 15ull,
|
||||
HWP = 1ull << 16ull,
|
||||
|
||||
XCOMP_ENABLE = 1ull << 63ull
|
||||
};
|
||||
AK_ENUM_BITWISE_OPERATORS(StateComponent);
|
||||
|
||||
struct [[gnu::packed]] LegacyRegion {
|
||||
AK::X87ControlWord FCW;
|
||||
u16 FSW;
|
||||
u8 FTW;
|
||||
u8 : 8;
|
||||
u16 FOP;
|
||||
#if ARCH(I386)
|
||||
// 32-bit version
|
||||
u32 FIP_32;
|
||||
u16 FCS;
|
||||
u16 : 16;
|
||||
u32 FPD_32;
|
||||
u16 FDS;
|
||||
u16 : 16;
|
||||
#elif ARCH(X86_64)
|
||||
// 64-bit version
|
||||
u64 FIP_64;
|
||||
u64 FDP_64;
|
||||
#endif
|
||||
AK::MXCSR MXCSR;
|
||||
u32 MXCSR_mask;
|
||||
u8 st_mmx[128];
|
||||
u8 xmm[256];
|
||||
u8 available[96]; // Extra available space
|
||||
};
|
||||
|
||||
static_assert(sizeof(LegacyRegion) == 512);
|
||||
|
||||
struct [[gnu::packed]] Header {
|
||||
StateComponent xstate_bv;
|
||||
StateComponent xcomp_bv;
|
||||
u8 reserved[48];
|
||||
};
|
||||
static_assert(sizeof(Header) == 64);
|
||||
|
||||
}
|
|
@ -565,7 +565,7 @@ UNMAP_AFTER_INIT void Processor::cpu_setup()
|
|||
|
||||
if (has_feature(CPUFeature::AVX)) {
|
||||
// Turn on SSE, AVX and x87 flags
|
||||
write_xcr0(read_xcr0() | 0x7);
|
||||
write_xcr0(read_xcr0() | SIMD::StateComponent::AVX | SIMD::StateComponent::SSE | SIMD::StateComponent::X87);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -663,12 +663,18 @@ UNMAP_AFTER_INIT void Processor::initialize(u32 cpu)
|
|||
if (cpu == 0) {
|
||||
VERIFY((FlatPtr(&s_clean_fpu_state) & 0xF) == 0);
|
||||
asm volatile("fninit");
|
||||
if (has_feature(CPUFeature::FXSR))
|
||||
// Initialize AVX state
|
||||
if (has_feature(CPUFeature::XSAVE | CPUFeature::AVX)) {
|
||||
asm volatile("xsave %0\n"
|
||||
: "=m"(s_clean_fpu_state)
|
||||
: "a"(static_cast<u32>(SIMD::StateComponent::AVX | SIMD::StateComponent::SSE | SIMD::StateComponent::X87)), "d"(0u));
|
||||
} else if (has_feature(CPUFeature::FXSR)) {
|
||||
asm volatile("fxsave %0"
|
||||
: "=m"(s_clean_fpu_state));
|
||||
else
|
||||
} else {
|
||||
asm volatile("fnsave %0"
|
||||
: "=m"(s_clean_fpu_state));
|
||||
}
|
||||
|
||||
if (has_feature(CPUFeature::HYPERVISOR))
|
||||
detect_hypervisor();
|
||||
|
@ -1563,6 +1569,7 @@ extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread)
|
|||
VERIFY(to_thread->state() == Thread::State::Running);
|
||||
|
||||
bool has_fxsr = Processor::current().has_feature(CPUFeature::FXSR);
|
||||
bool has_xsave_avx_support = Processor::current().has_feature(CPUFeature::XSAVE) && Processor::current().has_feature(CPUFeature::AVX);
|
||||
Processor::set_current_thread(*to_thread);
|
||||
|
||||
auto& from_regs = from_thread->regs();
|
||||
|
@ -1572,12 +1579,19 @@ extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread)
|
|||
// instead of carrying on with elevated I/O privileges.
|
||||
VERIFY(get_iopl_from_eflags(to_regs.flags()) == 0);
|
||||
|
||||
if (has_fxsr)
|
||||
if (has_xsave_avx_support) {
|
||||
// The specific state components saved correspond to the bits set in the requested-feature bitmap (RFBM), which is the logical-AND of EDX:EAX and XCR0.
|
||||
// https://www.moritz.systems/blog/how-debuggers-work-getting-and-setting-x86-registers-part-2/
|
||||
asm volatile("xsave %0\n"
|
||||
: "=m"(from_thread->fpu_state())
|
||||
: "a"(static_cast<u32>(SIMD::StateComponent::AVX | SIMD::StateComponent::SSE | SIMD::StateComponent::X87)), "d"(0u));
|
||||
} else if (has_fxsr) {
|
||||
asm volatile("fxsave %0"
|
||||
: "=m"(from_thread->fpu_state()));
|
||||
else
|
||||
} else {
|
||||
asm volatile("fnsave %0"
|
||||
: "=m"(from_thread->fpu_state()));
|
||||
}
|
||||
|
||||
#if ARCH(I386)
|
||||
from_regs.fs = get_fs();
|
||||
|
@ -1614,7 +1628,9 @@ extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread)
|
|||
VERIFY(in_critical > 0);
|
||||
Processor::restore_in_critical(in_critical);
|
||||
|
||||
if (has_fxsr)
|
||||
if (has_xsave_avx_support)
|
||||
asm volatile("xrstor %0" ::"m"(to_thread->fpu_state()), "a"(static_cast<u32>(SIMD::StateComponent::AVX | SIMD::StateComponent::SSE | SIMD::StateComponent::X87)), "d"(0u));
|
||||
else if (has_fxsr)
|
||||
asm volatile("fxrstor %0" ::"m"(to_thread->fpu_state()));
|
||||
else
|
||||
asm volatile("frstor %0" ::"m"(to_thread->fpu_state()));
|
||||
|
|
Loading…
Reference in a new issue