Kernel/aarch64: Explicitly allow float instrs in {load,store}_fpu_state

LLVM 18 otherwise throws errors, as we use '-mgeneral-regs-only' in the kernel. The functions had to be moved into a .S, as there is no '-mno-general-regs-only' and also no nice way to remove '-mgeneral-regs-only' for a single .cpp file.
Author: https://github.com/spholz Commit: https://github.com/SerenityOS/serenity/commit/b363abb082 Pull-request: https://github.com/SerenityOS/serenity/pull/24113 Reviewed-by: https://github.com/ADKaster ✅
2024-11-25 09:00:22 +00:00 · 2024-04-25 21:46:01 +02:00 · 2024-04-25 21:46:01 +02:00 · b363abb082 · 2024-07-16 22:26:05 +09:00
commit b363abb082
parent 476b3703fd
4 changed files with 55 additions and 46 deletions
--- a/Kernel/Arch/aarch64/FPUState.S
+++ b/Kernel/Arch/aarch64/FPUState.S
@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2022, Timon Kruiper <timonkruiper@gmail.com>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+// The kernel is compiled with -mgeneral-regs-only on AArch64,
+// so we have to explicitly allow the use of floating-point instructions here.
+.arch_extension fp
+
+.global store_fpu_state
+.type store_fpu_state, @function
+store_fpu_state:
+    stp q0, q1, [x0, #(0 * 16)]
+    stp q2, q3, [x0, #(2 * 16)]
+    stp q4, q5, [x0, #(4 * 16)]
+    stp q6, q7, [x0, #(6 * 16)]
+    stp q8, q9, [x0, #(8 * 16)]
+    stp q10, q11, [x0, #(10 * 16)]
+    stp q12, q13, [x0, #(12 * 16)]
+    stp q14, q15, [x0, #(14 * 16)]
+    stp q16, q17, [x0, #(16 * 16)]
+    stp q18, q19, [x0, #(18 * 16)]
+    stp q20, q21, [x0, #(20 * 16)]
+    stp q22, q23, [x0, #(22 * 16)]
+    stp q24, q25, [x0, #(24 * 16)]
+    stp q26, q27, [x0, #(26 * 16)]
+    stp q28, q29, [x0, #(28 * 16)]
+    stp q30, q31, [x0, #(30 * 16)]
+    ret
+
+.global load_fpu_state
+.type load_fpu_state, @function
+load_fpu_state:
+    ldp q0, q1, [x0, #(0 * 16)]
+    ldp q2, q3, [x0, #(2 * 16)]
+    ldp q4, q5, [x0, #(4 * 16)]
+    ldp q6, q7, [x0, #(6 * 16)]
+    ldp q8, q9, [x0, #(8 * 16)]
+    ldp q10, q11, [x0, #(10 * 16)]
+    ldp q12, q13, [x0, #(12 * 16)]
+    ldp q14, q15, [x0, #(14 * 16)]
+    ldp q16, q17, [x0, #(16 * 16)]
+    ldp q18, q19, [x0, #(18 * 16)]
+    ldp q20, q21, [x0, #(20 * 16)]
+    ldp q22, q23, [x0, #(22 * 16)]
+    ldp q24, q25, [x0, #(24 * 16)]
+    ldp q26, q27, [x0, #(26 * 16)]
+    ldp q28, q29, [x0, #(28 * 16)]
+    ldp q30, q31, [x0, #(30 * 16)]
+    ret
--- a/Kernel/Arch/aarch64/FPUState.h
+++ b/Kernel/Arch/aarch64/FPUState.h
@ -16,4 +16,7 @@ struct [[gnu::aligned(16)]] FPUState {
    u8 buffer[512];
 };

+extern "C" void store_fpu_state(FPUState* fpu_state);
+extern "C" void load_fpu_state(FPUState* fpu_state);
+
 }
--- a/Kernel/Arch/aarch64/Processor.cpp
+++ b/Kernel/Arch/aarch64/Processor.cpp
@ -27,52 +27,6 @@ extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread) __a

 Processor* g_current_processor;

-static void store_fpu_state(FPUState* fpu_state)
-{
-    asm volatile(
-        "mov x0, %[fpu_state]\n"
-        "stp q0, q1, [x0, #(0 * 16)]\n"
-        "stp q2, q3, [x0, #(2 * 16)]\n"
-        "stp q4, q5, [x0, #(4 * 16)]\n"
-        "stp q6, q7, [x0, #(6 * 16)]\n"
-        "stp q8, q9, [x0, #(8 * 16)]\n"
-        "stp q10, q11, [x0, #(10 * 16)]\n"
-        "stp q12, q13, [x0, #(12 * 16)]\n"
-        "stp q14, q15, [x0, #(14 * 16)]\n"
-        "stp q16, q17, [x0, #(16 * 16)]\n"
-        "stp q18, q19, [x0, #(18 * 16)]\n"
-        "stp q20, q21, [x0, #(20 * 16)]\n"
-        "stp q22, q23, [x0, #(22 * 16)]\n"
-        "stp q24, q25, [x0, #(24 * 16)]\n"
-        "stp q26, q27, [x0, #(26 * 16)]\n"
-        "stp q28, q29, [x0, #(28 * 16)]\n"
-        "stp q30, q31, [x0, #(30 * 16)]\n"
-        "\n" ::[fpu_state] "r"(fpu_state));
-}
-
-static void load_fpu_state(FPUState* fpu_state)
-{
-    asm volatile(
-        "mov x0, %[fpu_state]\n"
-        "ldp q0, q1, [x0, #(0 * 16)]\n"
-        "ldp q2, q3, [x0, #(2 * 16)]\n"
-        "ldp q4, q5, [x0, #(4 * 16)]\n"
-        "ldp q6, q7, [x0, #(6 * 16)]\n"
-        "ldp q8, q9, [x0, #(8 * 16)]\n"
-        "ldp q10, q11, [x0, #(10 * 16)]\n"
-        "ldp q12, q13, [x0, #(12 * 16)]\n"
-        "ldp q14, q15, [x0, #(14 * 16)]\n"
-        "ldp q16, q17, [x0, #(16 * 16)]\n"
-        "ldp q18, q19, [x0, #(18 * 16)]\n"
-        "ldp q20, q21, [x0, #(20 * 16)]\n"
-        "ldp q22, q23, [x0, #(22 * 16)]\n"
-        "ldp q24, q25, [x0, #(24 * 16)]\n"
-        "ldp q26, q27, [x0, #(26 * 16)]\n"
-        "ldp q28, q29, [x0, #(28 * 16)]\n"
-        "ldp q30, q31, [x0, #(30 * 16)]\n"
-        "\n" ::[fpu_state] "r"(fpu_state));
-}
-
 template<typename T>
 void ProcessorBase<T>::early_initialize(u32 cpu)
 {
--- a/Kernel/CMakeLists.txt
+++ b/Kernel/CMakeLists.txt
@ -493,6 +493,7 @@ elseif("${SERENITY_ARCH}" STREQUAL "aarch64")
        Arch/aarch64/CPUID.cpp
        Arch/aarch64/CurrentTime.cpp
        Arch/aarch64/Dummy.cpp
+        Arch/aarch64/FPUState.S
        Arch/aarch64/InterruptManagement.cpp
        Arch/aarch64/Interrupts.cpp
        Arch/aarch64/kprintf.cpp