Kernel: Implement software context switching and Processor structure

Moving certain globals into a new Processor structure for
each CPU allows us to eventually run an instance of the
scheduler on each CPU.
This commit is contained in:
Tom 2020-06-27 13:42:28 -06:00 committed by Andreas Kling
parent 10407061d2
commit fb41d89384
Notes: sideshowbarker 2024-07-19 05:18:13 +09:00
22 changed files with 1002 additions and 513 deletions

View file

@ -252,15 +252,6 @@ apic_ap_start:
mov %cs, %ax
mov %ax, %ds
/* Generate a new processor id. This is not the APIC id. We just
need a way to find ourselves a stack without stomping on other
APs that may be doing this concurrently. */
xor %ax, %ax
mov %ax, %bp
inc %ax
lock; xaddw %ax, %ds:(ap_cpu_id - apic_ap_start)(%bp) /* avoid relocation entries */
mov %ax, %bx
xor %ax, %ax
mov %ax, %sp
@ -281,14 +272,18 @@ apic_ap_start32:
mov %ax, %es
mov %ax, %fs
mov %ax, %gs
movl $0x8000, %ebp
/* generate a unique ap cpu id (0 means 1st ap, not bsp!) */
xorl %eax, %eax
incl %eax
lock; xaddl %eax, (ap_cpu_id - apic_ap_start)(%ebp) /* avoid relocation entries */
movl %eax, %esi
/* find our allocated stack based on the generated id */
andl 0x0000FFFF, %ebx
movl %ebx, %esi
movl (ap_cpu_init_stacks - apic_ap_start)(%ebp, %ebx, 4), %esp
movl (ap_cpu_init_stacks - apic_ap_start)(%ebp, %eax, 4), %esp
/* check if we support NX and enable it if we do */
movl $0x80000001, %eax
cpuid
@ -319,8 +314,8 @@ apic_ap_start32:
lgdt (ap_cpu_gdtr_initial2 - apic_ap_start + 0xc0008000)
/* jump above 3GB into our identity mapped area now */
ljmp $8, $(1f - apic_ap_start + 0xc0008000)
1:
ljmp $8, $(apic_ap_start32_2 - apic_ap_start + 0xc0008000)
apic_ap_start32_2:
/* flush the TLB */
movl %cr3, %eax
movl %eax, %cr3
@ -338,13 +333,20 @@ apic_ap_start32:
movl %eax, %cr0
movl (ap_cpu_init_cr4 - apic_ap_start)(%ebp), %eax
movl %eax, %cr4
/* push the Processor pointer this CPU is going to use */
movl (ap_cpu_init_processor_info_array - apic_ap_start)(%ebp), %eax
addl $0xc0000000, %eax
movl 0(%eax, %esi, 4), %eax
push %eax
/* push the cpu id, 0 representing the bsp and call into c++ */
incl %esi
push %esi
xor %ebp, %ebp
cld
/* push the arbitrary cpu id, 0 representing the bsp and call into c++ */
inc %esi
push %esi
/* We are in identity mapped P0x8000 and the BSP will unload this code
once all APs are initialized, so call init_ap but return to our
infinite loop */
@ -356,7 +358,7 @@ apic_ap_start32:
apic_ap_start_size:
.2byte end_apic_ap_start - apic_ap_start
ap_cpu_id:
.2byte 0x0
.4byte 0x0
ap_cpu_gdt:
/* null */
.8byte 0x0
@ -388,6 +390,9 @@ ap_cpu_init_cr3:
.global ap_cpu_init_cr4
ap_cpu_init_cr4:
.4byte 0x0 /* will be set at runtime */
.global ap_cpu_init_processor_info_array
ap_cpu_init_processor_info_array:
.4byte 0x0 /* will be set at runtime */
.global ap_cpu_init_stacks
ap_cpu_init_stacks:
/* array of allocated stack pointers */

View file

@ -38,42 +38,28 @@
#include <Kernel/Interrupts/UnhandledInterruptHandler.h>
#include <Kernel/KSyms.h>
#include <Kernel/Process.h>
#include <Kernel/SpinLock.h>
#include <Kernel/Thread.h>
#include <Kernel/VM/MemoryManager.h>
#include <Kernel/VM/PageDirectory.h>
#include <Kernel/IO.h>
#include <LibC/mallocdefs.h>
//#define PAGE_FAULT_DEBUG
//#define CONTEXT_SWITCH_DEBUG
namespace Kernel {
static DescriptorTablePointer s_idtr;
static DescriptorTablePointer s_gdtr;
static Descriptor s_idt[256];
static Descriptor s_gdt[256];
static GenericInterruptHandler* s_interrupt_handler[GENERIC_INTERRUPT_HANDLERS_COUNT];
static Vector<u16>* s_gdt_freelist;
static u16 s_gdt_length;
u16 gdt_alloc_entry()
{
ASSERT(s_gdt_freelist);
ASSERT(!s_gdt_freelist->is_empty());
return s_gdt_freelist->take_last();
}
void gdt_free_entry(u16 entry)
{
s_gdt_freelist->append(entry);
}
extern "C" void handle_interrupt(RegisterState);
extern "C" void handle_interrupt(TrapFrame*);
#define EH_ENTRY(ec, title) \
extern "C" void title##_asm_entry(); \
extern "C" void title##_handler(RegisterState); \
extern "C" void title##_handler(TrapFrame*); \
asm( \
".globl " #title "_asm_entry\n" \
"" #title "_asm_entry: \n" \
@ -83,22 +69,21 @@ extern "C" void handle_interrupt(RegisterState);
" pushl %fs\n" \
" pushl %gs\n" \
" pushl %ss\n" \
" mov $0x10, %ax\n" \
" mov $" __STRINGIFY(GDT_SELECTOR_DATA0) ", %ax\n" \
" mov %ax, %ds\n" \
" mov %ax, %es\n" \
" mov $" __STRINGIFY(GDT_SELECTOR_PROC) ", %ax\n" \
" mov %ax, %fs\n" \
" pushl %esp \n" /* set TrapFrame::regs */ \
" subl $" __STRINGIFY(TRAP_FRAME_SIZE - 4) ", %esp \n" \
" pushl %esp \n" \
" cld\n" \
" call enter_trap_no_irq \n" \
" call " #title "_handler\n" \
" add $0x4, %esp \n" \
" popl %gs\n" \
" popl %fs\n" \
" popl %es\n" \
" popl %ds\n" \
" popa\n" \
" add $0x4, %esp\n" \
" iret\n");
" jmp common_trap_exit \n");
#define EH_ENTRY_NO_CODE(ec, title) \
extern "C" void title##_handler(RegisterState); \
extern "C" void title##_handler(TrapFrame*); \
extern "C" void title##_asm_entry(); \
asm( \
".globl " #title "_asm_entry\n" \
@ -110,19 +95,18 @@ extern "C" void handle_interrupt(RegisterState);
" pushl %fs\n" \
" pushl %gs\n" \
" pushl %ss\n" \
" mov $0x10, %ax\n" \
" mov $" __STRINGIFY(GDT_SELECTOR_DATA0) ", %ax\n" \
" mov %ax, %ds\n" \
" mov %ax, %es\n" \
" mov $" __STRINGIFY(GDT_SELECTOR_PROC) ", %ax\n" \
" mov %ax, %fs\n" \
" pushl %esp \n" /* set TrapFrame::regs */ \
" subl $" __STRINGIFY(TRAP_FRAME_SIZE - 4) ", %esp \n" \
" pushl %esp \n" \
" cld\n" \
" call enter_trap_no_irq \n" \
" call " #title "_handler\n" \
" add $0x4, %esp\n" \
" popl %gs\n" \
" popl %fs\n" \
" popl %es\n" \
" popl %ds\n" \
" popa\n" \
" add $0x4, %esp\n" \
" iret\n");
" jmp common_trap_exit \n");
static void dump(const RegisterState& regs)
{
@ -172,7 +156,7 @@ void handle_crash(RegisterState& regs, const char* description, int signal, bool
// make sure we switch back to the right page tables.
MM.enter_process_paging_scope(*Process::current);
klog() << "CRASH: " << description << ". Ring " << (Process::current->is_ring0() ? 0 : 3) << ".";
klog() << "CRASH: CPU #" << Processor::current().id() << " " << description << ". Ring " << (Process::current->is_ring0() ? 0 : 3) << ".";
dump(regs);
if (Process::current->is_ring0()) {
@ -186,29 +170,29 @@ void handle_crash(RegisterState& regs, const char* description, int signal, bool
}
EH_ENTRY_NO_CODE(6, illegal_instruction);
void illegal_instruction_handler(RegisterState regs)
void illegal_instruction_handler(TrapFrame* trap)
{
clac();
handle_crash(regs, "Illegal instruction", SIGILL);
handle_crash(*trap->regs, "Illegal instruction", SIGILL);
}
EH_ENTRY_NO_CODE(0, divide_error);
void divide_error_handler(RegisterState regs)
void divide_error_handler(TrapFrame* trap)
{
clac();
handle_crash(regs, "Divide error", SIGFPE);
handle_crash(*trap->regs, "Divide error", SIGFPE);
}
EH_ENTRY(13, general_protection_fault);
void general_protection_fault_handler(RegisterState regs)
void general_protection_fault_handler(TrapFrame* trap)
{
clac();
handle_crash(regs, "General protection fault", SIGSEGV);
handle_crash(*trap->regs, "General protection fault", SIGSEGV);
}
// 7: FPU not available exception
EH_ENTRY_NO_CODE(7, fpu_exception);
void fpu_exception_handler(RegisterState)
void fpu_exception_handler(TrapFrame*)
{
// Just clear the TS flag. We've already restored the FPU state eagerly.
// FIXME: It would be nice if we didn't have to do this at all.
@ -217,10 +201,11 @@ void fpu_exception_handler(RegisterState)
// 14: Page Fault
EH_ENTRY(14, page_fault);
void page_fault_handler(RegisterState regs)
void page_fault_handler(TrapFrame* trap)
{
clac();
auto& regs = *trap->regs;
u32 fault_address;
asm("movl %%cr2, %%eax"
: "=a"(fault_address));
@ -294,9 +279,10 @@ void page_fault_handler(RegisterState regs)
}
EH_ENTRY_NO_CODE(1, debug);
void debug_handler(RegisterState regs)
void debug_handler(TrapFrame* trap)
{
clac();
auto& regs = *trap->regs;
if (!Process::current || (regs.cs & 3) == 0) {
klog() << "Debug Exception in Ring0";
hang();
@ -314,9 +300,10 @@ void debug_handler(RegisterState regs)
}
EH_ENTRY_NO_CODE(3, breakpoint);
void breakpoint_handler(RegisterState regs)
void breakpoint_handler(TrapFrame* trap)
{
clac();
auto& regs = *trap->regs;
if (!Process::current || (regs.cs & 3) == 0) {
klog() << "Breakpoint Trap in Ring0";
hang();
@ -356,80 +343,11 @@ EH(12, "Stack exception")
EH(15, "Unknown error")
EH(16, "Coprocessor error")
static void write_raw_gdt_entry(u16 selector, u32 low, u32 high)
{
u16 i = (selector & 0xfffc) >> 3;
s_gdt[i].low = low;
s_gdt[i].high = high;
if (i > s_gdt_length)
s_gdtr.limit = (s_gdt_length + 1) * 8 - 1;
}
void write_gdt_entry(u16 selector, Descriptor& descriptor)
{
write_raw_gdt_entry(selector, descriptor.low, descriptor.high);
}
Descriptor& get_gdt_entry(u16 selector)
{
u16 i = (selector & 0xfffc) >> 3;
return *(Descriptor*)(&s_gdt[i]);
}
void flush_gdt()
{
s_gdtr.address = s_gdt;
s_gdtr.limit = (s_gdt_length * 8) - 1;
asm("lgdt %0" ::"m"(s_gdtr)
: "memory");
}
const DescriptorTablePointer& get_gdtr()
{
return s_gdtr;
}
const DescriptorTablePointer& get_idtr()
{
return s_idtr;
}
void gdt_init()
{
s_gdt_length = 5;
s_gdt_freelist = new Vector<u16>();
s_gdt_freelist->ensure_capacity(256);
for (size_t i = s_gdt_length; i < 256; ++i)
s_gdt_freelist->append(i * 8);
s_gdt_length = 256;
s_gdtr.address = s_gdt;
s_gdtr.limit = (s_gdt_length * 8) - 1;
write_raw_gdt_entry(0x0000, 0x00000000, 0x00000000);
write_raw_gdt_entry(0x0008, 0x0000ffff, 0x00cf9a00);
write_raw_gdt_entry(0x0010, 0x0000ffff, 0x00cf9200);
write_raw_gdt_entry(0x0018, 0x0000ffff, 0x00cffa00);
write_raw_gdt_entry(0x0020, 0x0000ffff, 0x00cff200);
flush_gdt();
asm volatile(
"mov %%ax, %%ds\n"
"mov %%ax, %%es\n"
"mov %%ax, %%fs\n"
"mov %%ax, %%gs\n"
"mov %%ax, %%ss\n" ::"a"(0x10)
: "memory");
// Make sure CS points to the kernel code descriptor.
asm volatile(
"ljmpl $0x8, $sanity\n"
"sanity:\n");
}
static void unimp_trap()
{
klog() << "Unhandled IRQ.";
@ -514,7 +432,7 @@ void flush_idt()
asm("lidt %0" ::"m"(s_idtr));
}
void idt_init()
static void idt_init()
{
s_idtr.address = s_idt;
s_idtr.limit = 0x100 * 8 - 1;
@ -683,21 +601,32 @@ void load_task_register(u16 selector)
asm("ltr %0" ::"r"(selector));
}
u32 g_in_irq;
void handle_interrupt(RegisterState regs)
void handle_interrupt(TrapFrame* trap)
{
clac();
++g_in_irq;
auto& regs = *trap->regs;
ASSERT(regs.isr_number >= IRQ_VECTOR_BASE && regs.isr_number <= (IRQ_VECTOR_BASE + GENERIC_INTERRUPT_HANDLERS_COUNT));
u8 irq = (u8)(regs.isr_number - 0x50);
ASSERT(s_interrupt_handler[irq]);
s_interrupt_handler[irq]->handle_interrupt(regs);
s_interrupt_handler[irq]->increment_invoking_counter();
--g_in_irq;
s_interrupt_handler[irq]->eoi();
}
void enter_trap_no_irq(TrapFrame* trap)
{
Processor::current().enter_trap(*trap, false);
}
void enter_trap(TrapFrame* trap)
{
Processor::current().enter_trap(*trap, true);
}
void exit_trap(TrapFrame* trap)
{
return Processor::current().exit_trap(*trap);
}
void sse_init()
{
asm volatile(
@ -740,9 +669,10 @@ void cpu_detect()
g_cpu_supports_rdseed = (extended_features.ebx() & (1 << 18));
}
void cpu_setup()
void cpu_setup(u32 cpu)
{
cpu_detect();
if (cpu == 0)
cpu_detect();
if (g_cpu_supports_sse) {
sse_init();
@ -863,6 +793,424 @@ u32 read_dr6()
return dr6;
}
FPUState Processor::s_clean_fpu_state;
void Processor::initialize(u32 cpu)
{
m_self = this;
m_cpu = cpu;
m_in_irq = 0;
gdt_init();
if (cpu == 0)
idt_init();
else
flush_idt();
ASSERT(&current() == this); // sanity check
if (cpu == 0) {
ASSERT((FlatPtr(&s_clean_fpu_state) & 0xF) == 0);
asm volatile("fninit");
asm volatile("fxsave %0"
: "=m"(s_clean_fpu_state));
}
klog() << "CPU #" << cpu << " using Processor at " << VirtualAddress(FlatPtr(this));
}
void Processor::write_raw_gdt_entry(u16 selector, u32 low, u32 high)
{
u16 i = (selector & 0xfffc) >> 3;
u32 prev_gdt_length = m_gdt_length;
if (i > m_gdt_length) {
m_gdt_length = i + 1;
ASSERT(m_gdt_length <= sizeof(m_gdt) / sizeof(m_gdt[0]));
m_gdtr.limit = (m_gdt_length + 1) * 8 - 1;
}
m_gdt[i].low = low;
m_gdt[i].high = high;
// clear selectors we may have skipped
while (i < prev_gdt_length) {
m_gdt[i].low = 0;
m_gdt[i].high = 0;
i++;
}
}
void Processor::write_gdt_entry(u16 selector, Descriptor& descriptor)
{
write_raw_gdt_entry(selector, descriptor.low, descriptor.high);
}
Descriptor& Processor::get_gdt_entry(u16 selector)
{
u16 i = (selector & 0xfffc) >> 3;
return *(Descriptor*)(&m_gdt[i]);
}
void Processor::flush_gdt()
{
m_gdtr.address = m_gdt;
m_gdtr.limit = (m_gdt_length * 8) - 1;
asm volatile("lgdt %0" ::"m"(m_gdtr)
: "memory");
}
const DescriptorTablePointer& Processor::get_gdtr()
{
return m_gdtr;
}
extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread)
{
ASSERT(from_thread == to_thread || from_thread->state() != Thread::Running);
ASSERT(to_thread->state() == Thread::Running);
auto& from_tss = from_thread->tss();
auto& to_tss = to_thread->tss();
asm volatile("fxsave %0"
: "=m"(from_thread->fpu_state()));
from_tss.fs = get_fs();
from_tss.gs = get_gs();
set_fs(to_tss.fs);
set_gs(to_tss.gs);
auto& tls_descriptor = Processor::current().get_gdt_entry(GDT_SELECTOR_TLS);
tls_descriptor.set_base(to_thread->thread_specific_data().as_ptr());
tls_descriptor.set_limit(to_thread->thread_specific_region_size());
if (from_tss.cr3 != to_tss.cr3)
write_cr3(to_tss.cr3);
asm volatile("fxrstor %0"
::"m"(to_thread->fpu_state()));
// TODO: debug registers
// TODO: ioperm?
}
#define ENTER_THREAD_CONTEXT_ARGS_SIZE (2 * 4) // to_thread, from_thread
void Processor::switch_context(Thread* from_thread, Thread* to_thread)
{
ASSERT(!in_irq());
ASSERT(is_kernel_mode());
#ifdef CONTEXT_SWITCH_DEBUG
dbg() << "switch_context --> switching out of: " << *from_thread;
#endif
// Switch to new thread context, passing from_thread and to_thread
// through to the new context using registers edx and eax
asm volatile(
// NOTE: changing how much we push to the stack affects
// SWITCH_CONTEXT_TO_STACK_SIZE and thread_context_first_enter()!
"pushfl \n"
"pushl %%ebx \n"
"pushl %%esi \n"
"pushl %%edi \n"
"pushl %%ebp \n"
"movl %%esp, %[from_esp] \n"
"movl $1f, %[from_eip] \n"
"movl %[to_esp0], %%ebx \n"
"movl %%ebx, %[tss_esp0] \n"
"movl %[to_esp], %%esp \n"
"pushl %[to_thread] \n"
"pushl %[from_thread] \n"
"pushl %[to_eip] \n"
"cld \n"
"jmp enter_thread_context \n"
"1: \n"
"popl %%edx \n"
"popl %%eax \n"
"popl %%ebp \n"
"popl %%edi \n"
"popl %%esi \n"
"popl %%ebx \n"
"popfl \n"
: [from_esp] "=m" (from_thread->tss().esp),
[from_eip] "=m" (from_thread->tss().eip),
[tss_esp0] "=m" (m_tss.esp0),
"=d" (from_thread), // needed so that from_thread retains the correct value
"=a" (to_thread) // needed so that to_thread retains the correct value
: [to_esp] "g" (to_thread->tss().esp),
[to_esp0] "g" (to_thread->tss().esp0),
[to_eip] "c" (to_thread->tss().eip),
[from_thread] "d" (from_thread),
[to_thread] "a" (to_thread)
);
#ifdef CONTEXT_SWITCH_DEBUG
dbg() << "switch_context <-- from " << *from_thread << " to " << *to_thread;
#endif
}
extern "C" void context_first_init(Thread* from_thread, Thread* to_thread, TrapFrame* trap)
{
ASSERT(!are_interrupts_enabled());
ASSERT(is_kernel_mode());
(void)from_thread;
(void)to_thread;
(void)trap;
#ifdef CONTEXT_SWITCH_DEBUG
dbg() << "switch_context <-- from " << *from_thread << " to " << *to_thread << " (context_first_init)";
#endif
}
extern "C" void thread_context_first_enter(void);
asm(
// enter_thread_context returns to here first time a thread is executing
".globl thread_context_first_enter \n"
"thread_context_first_enter: \n"
// switch_context will have pushed from_thread and to_thread to our new
// stack prior to thread_context_first_enter() being called, and the
// pointer to TrapFrame was the top of the stack before that
" movl 8(%esp), %ebx \n" // save pointer to TrapFrame
" cld \n"
" call context_first_init \n"
" addl $" __STRINGIFY(ENTER_THREAD_CONTEXT_ARGS_SIZE) ", %esp \n"
" movl %ebx, 0(%esp) \n" // push pointer to TrapFrame
" jmp common_trap_exit \n"
);
u32 Processor::init_context(Thread& thread)
{
ASSERT(is_kernel_mode());
const u32 kernel_stack_top = thread.kernel_stack_top();
u32 stack_top = kernel_stack_top;
// TODO: handle NT?
ASSERT((cpu_flags() & 0x24000) == 0); // Assume !(NT | VM)
auto& tss = thread.tss();
bool return_to_user = (tss.cs & 3) != 0;
// make room for an interrupt frame
if (!return_to_user) {
// userspace_esp and userspace_ss are not popped off by iret
// unless we're switching back to user mode
stack_top -= sizeof(RegisterState) - 2 * sizeof(u32);
} else {
stack_top -= sizeof(RegisterState);
}
// we want to end up 16-byte aligned, %esp + 4 should be aligned
stack_top -= sizeof(u32);
*reinterpret_cast<u32*>(kernel_stack_top - 4) = 0;
// set up the stack so that after returning from thread_context_first_enter()
// we will end up either in kernel mode or user mode, depending on how the thread is set up
// However, the first step is to always start in kernel mode with thread_context_first_enter
RegisterState& iretframe = *reinterpret_cast<RegisterState*>(stack_top);
iretframe.ss = tss.ss;
iretframe.gs = tss.gs;
iretframe.fs = tss.fs;
iretframe.es = tss.es;
iretframe.ds = tss.ds;
iretframe.edi = tss.edi;
iretframe.esi = tss.esi;
iretframe.ebp = tss.ebp;
iretframe.esp = 0;
iretframe.ebx = tss.ebx;
iretframe.edx = tss.edx;
iretframe.ecx = tss.ecx;
iretframe.eax = tss.eax;
iretframe.eflags = tss.eflags;
iretframe.eip = tss.eip;
iretframe.cs = tss.cs;
if (return_to_user) {
iretframe.userspace_esp = tss.esp;
iretframe.userspace_ss = tss.ss;
}
// make space for a trap frame
stack_top -= sizeof(TrapFrame);
TrapFrame& trap = *reinterpret_cast<TrapFrame*>(stack_top);
trap.regs = &iretframe;
trap.prev_irq_level = 0;
stack_top -= sizeof(u32); // pointer to TrapFrame
*reinterpret_cast<u32*>(stack_top) = stack_top + 4;
#ifdef CONTEXT_SWITCH_DEBUG
dbg() << "init_context " << thread << " set up to execute at eip: " << VirtualAddress(tss.eip) << " esp: " << VirtualAddress(tss.esp) << " stack top: " << VirtualAddress(stack_top);
#endif
// make switch_context() always first return to thread_context_first_enter()
// in kernel mode, so set up these values so that we end up popping iretframe
// off the stack right after the context switch completed, at which point
// control is transferred to what iretframe is pointing to.
tss.eip = FlatPtr(&thread_context_first_enter);
tss.esp0 = kernel_stack_top;
tss.esp = stack_top;
tss.cs = GDT_SELECTOR_CODE0;
tss.ds = GDT_SELECTOR_DATA0;
tss.es = GDT_SELECTOR_DATA0;
tss.gs = GDT_SELECTOR_DATA0;
tss.ss = GDT_SELECTOR_DATA0;
tss.fs = GDT_SELECTOR_PROC;
return stack_top;
}
extern "C" u32 do_init_context(Thread* thread)
{
return Processor::init_context(*thread);
}
extern "C" void do_assume_context(Thread* thread);
asm(
".global do_assume_context \n"
"do_assume_context: \n"
" movl 4(%esp), %ebx \n"
// We're going to call Processor::init_context, so just make sure
// we have enough stack space so we don't stomp over it
" subl $(" __STRINGIFY(4 + REGISTER_STATE_SIZE + TRAP_FRAME_SIZE + 4) "), %esp \n"
" pushl %ebx \n"
" cld \n"
" call do_init_context \n"
" addl $4, %esp \n"
" movl %eax, %esp \n" // move stack pointer to what Processor::init_context set up for us
" pushl %ebx \n" // push to_thread
" pushl %ebx \n" // push from_thread
" pushl $thread_context_first_enter \n" // should be same as tss.eip
" jmp enter_thread_context \n"
);
void Processor::assume_context(Thread& thread)
{
do_assume_context(&thread);
ASSERT_NOT_REACHED();
}
void Processor::initialize_context_switching(Thread& initial_thread)
{
ASSERT(initial_thread.process().is_ring0());
auto& tss = initial_thread.tss();
m_tss = tss;
m_tss.esp0 = tss.esp0;
m_tss.ss0 = GDT_SELECTOR_DATA0;
// user mode needs to be able to switch to kernel mode:
m_tss.cs = m_tss.ds = m_tss.es = m_tss.gs = m_tss.ss = GDT_SELECTOR_CODE0 | 3;
m_tss.fs = GDT_SELECTOR_PROC | 3;
asm volatile(
"movl %[new_esp], %%esp \n" // swich to new stack
"pushl %[from_to_thread] \n" // to_thread
"pushl %[from_to_thread] \n" // from_thread
"pushl $" __STRINGIFY(GDT_SELECTOR_CODE0) " \n"
"pushl %[new_eip] \n" // save the entry eip to the stack
"movl %%esp, %%ebx \n"
"addl $20, %%ebx \n" // calculate pointer to TrapFrame
"pushl %%ebx \n"
"cld \n"
"call enter_trap_no_irq \n"
"addl $4, %%esp \n"
"lret \n"
:: [new_esp] "g" (tss.esp),
[new_eip] "a" (tss.eip),
[from_to_thread] "b" (&initial_thread)
);
ASSERT_NOT_REACHED();
}
void Processor::enter_trap(TrapFrame& trap, bool raise_irq)
{
InterruptDisabler disabler;
trap.prev_irq_level = m_in_irq;
if (raise_irq)
m_in_irq++;
}
void Processor::exit_trap(TrapFrame& trap)
{
InterruptDisabler disabler;
ASSERT(m_in_irq >= trap.prev_irq_level);
m_in_irq = trap.prev_irq_level;
if (m_invoke_scheduler_async && !m_in_irq) {
m_invoke_scheduler_async = false;
Scheduler::invoke_async();
}
}
void Processor::gdt_init()
{
m_gdt_length = 0;
m_gdtr.address = nullptr;
m_gdtr.limit = 0;
write_raw_gdt_entry(0x0000, 0x00000000, 0x00000000);
write_raw_gdt_entry(GDT_SELECTOR_CODE0, 0x0000ffff, 0x00cf9a00); // code0
write_raw_gdt_entry(GDT_SELECTOR_DATA0, 0x0000ffff, 0x00cf9200); // data0
write_raw_gdt_entry(GDT_SELECTOR_CODE3, 0x0000ffff, 0x00cffa00); // code3
write_raw_gdt_entry(GDT_SELECTOR_DATA3, 0x0000ffff, 0x00cff200); // data3
Descriptor tls_descriptor;
tls_descriptor.low = tls_descriptor.high = 0;
tls_descriptor.dpl = 3;
tls_descriptor.segment_present = 1;
tls_descriptor.granularity = 0;
tls_descriptor.zero = 0;
tls_descriptor.operation_size = 1;
tls_descriptor.descriptor_type = 1;
tls_descriptor.type = 2;
write_gdt_entry(GDT_SELECTOR_TLS, tls_descriptor); // tls3
Descriptor fs_descriptor;
fs_descriptor.set_base(this);
fs_descriptor.set_limit(sizeof(Processor));
fs_descriptor.dpl = 0;
fs_descriptor.segment_present = 1;
fs_descriptor.granularity = 0;
fs_descriptor.zero = 0;
fs_descriptor.operation_size = 1;
fs_descriptor.descriptor_type = 1;
fs_descriptor.type = 2;
write_gdt_entry(GDT_SELECTOR_PROC, fs_descriptor); // fs0
Descriptor tss_descriptor;
tss_descriptor.set_base(&m_tss);
tss_descriptor.set_limit(sizeof(TSS32));
tss_descriptor.dpl = 0;
tss_descriptor.segment_present = 1;
tss_descriptor.granularity = 0;
tss_descriptor.zero = 0;
tss_descriptor.operation_size = 1;
tss_descriptor.descriptor_type = 0;
tss_descriptor.type = 9;
write_gdt_entry(GDT_SELECTOR_TSS, tss_descriptor); // tss
flush_gdt();
load_task_register(GDT_SELECTOR_TSS);
asm volatile(
"mov %%ax, %%ds\n"
"mov %%ax, %%es\n"
"mov %%ax, %%gs\n"
"mov %%ax, %%ss\n" ::"a"(GDT_SELECTOR_DATA0)
: "memory");
set_fs(GDT_SELECTOR_PROC);
// Make sure CS points to the kernel code descriptor.
asm volatile(
"ljmpl $" __STRINGIFY(GDT_SELECTOR_CODE0) ", $sanity\n"
"sanity:\n");
}
void Processor::set_thread_specific(u8* data, size_t len)
{
auto& descriptor = get_gdt_entry(GDT_SELECTOR_TLS);
descriptor.set_base(data);
descriptor.set_limit(len);
}
}
#ifdef DEBUG

View file

@ -106,6 +106,14 @@ union [[gnu::packed]] Descriptor
TrapGate_32bit = 0xf,
};
void* get_base() const
{
u32 b = base_lo;
b |= base_hi << 16;
b |= base_hi2 << 24;
return reinterpret_cast<void*>(b);
}
void set_base(void* b)
{
base_lo = (u32)(b)&0xffff;
@ -256,8 +264,6 @@ struct RegisterState;
const DescriptorTablePointer& get_gdtr();
const DescriptorTablePointer& get_idtr();
void gdt_init();
void idt_init();
void sse_init();
void register_interrupt_handler(u8 number, void (*f)());
void register_user_callable_interrupt_handler(u8 number, void (*f)());
@ -267,12 +273,7 @@ void replace_single_handler_with_shared(GenericInterruptHandler&);
void replace_shared_handler_with_single(GenericInterruptHandler&);
void unregister_generic_interrupt_handler(u8 number, GenericInterruptHandler&);
void flush_idt();
void flush_gdt();
void load_task_register(u16 selector);
u16 gdt_alloc_entry();
void gdt_free_entry(u16);
Descriptor& get_gdt_entry(u16 selector);
void write_gdt_entry(u16 selector, Descriptor&);
void handle_crash(RegisterState&, const char* description, int signal, bool out_of_memory = false);
[[noreturn]] static inline void hang()
@ -303,6 +304,39 @@ inline u32 cpu_flags()
return flags;
}
inline void set_fs(u32 segment)
{
asm volatile(
"movl %%eax, %%fs" :: "a"(segment)
: "memory"
);
}
inline void set_gs(u32 segment)
{
asm volatile(
"movl %%eax, %%gs" :: "a"(segment)
: "memory"
);
}
inline u32 get_fs()
{
u32 fs;
asm("mov %%fs, %%eax"
: "=a"(fs));
return fs;
}
inline u32 get_gs()
{
u32 gs;
asm("mov %%gs, %%eax"
: "=a"(gs));
return gs;
}
inline u32 read_fs_u32(u32 offset)
{
u32 val;
@ -460,6 +494,9 @@ struct [[gnu::packed]] RegisterState
u32 userspace_ss;
};
#define REGISTER_STATE_SIZE (19 * 4)
static_assert(REGISTER_STATE_SIZE == sizeof(RegisterState));
struct [[gnu::aligned(16)]] FPUState
{
u8 buffer[512];
@ -492,6 +529,15 @@ u32 read_cr4();
u32 read_dr6();
static inline bool is_kernel_mode()
{
u32 cs;
asm volatile (
"movl %%cs, %[cs] \n"
: [cs] "=g" (cs));
return (cs & 3) == 0;
}
class CPUID {
public:
CPUID(u32 function) { asm volatile("cpuid"
@ -552,6 +598,94 @@ private:
SplitQword m_start;
};
class Thread;
struct TrapFrame;
#define GDT_SELECTOR_CODE0 0x08
#define GDT_SELECTOR_DATA0 0x10
#define GDT_SELECTOR_CODE3 0x18
#define GDT_SELECTOR_DATA3 0x20
#define GDT_SELECTOR_TLS 0x28
#define GDT_SELECTOR_PROC 0x30
#define GDT_SELECTOR_TSS 0x38
class Processor {
Processor* m_self; // must be first field (%fs offset 0x0)
DescriptorTablePointer m_gdtr;
Descriptor m_gdt[256];
u32 m_gdt_length;
u32 m_cpu;
u32 m_in_irq;
TSS32 m_tss;
static FPUState s_clean_fpu_state;
bool m_invoke_scheduler_async;
void gdt_init();
void write_raw_gdt_entry(u16 selector, u32 low, u32 high);
void write_gdt_entry(u16 selector, Descriptor& descriptor);
public:
void initialize(u32 cpu);
Descriptor& get_gdt_entry(u16 selector);
void flush_gdt();
const DescriptorTablePointer& get_gdtr();
ALWAYS_INLINE static Processor& current()
{
return *(Processor*)read_fs_u32(0);
}
ALWAYS_INLINE static u32 id()
{
return current().m_cpu;
}
ALWAYS_INLINE u32& in_irq()
{
return m_in_irq;
}
ALWAYS_INLINE const FPUState& clean_fpu_state() const
{
return s_clean_fpu_state;
}
void invoke_scheduler_async() { m_invoke_scheduler_async = true; }
void enter_trap(TrapFrame& trap, bool raise_irq);
void exit_trap(TrapFrame& trap);
[[noreturn]] void initialize_context_switching(Thread& initial_thread);
void switch_context(Thread* from_thread, Thread* to_thread);
[[noreturn]] static void assume_context(Thread& thread);
static u32 init_context(Thread& thread);
void set_thread_specific(u8* data, size_t len);
};
struct TrapFrame {
u32 prev_irq_level;
RegisterState* regs; // must be last
TrapFrame() = delete;
TrapFrame(const TrapFrame&) = delete;
TrapFrame(TrapFrame&&) = delete;
TrapFrame& operator=(const TrapFrame&) = delete;
TrapFrame& operator=(TrapFrame&&) = delete;
};
#define TRAP_FRAME_SIZE (2 * 4)
static_assert(TRAP_FRAME_SIZE == sizeof(TrapFrame));
extern "C" void enter_trap_no_irq(TrapFrame*);
extern "C" void enter_trap(TrapFrame*);
extern "C" void exit_trap(TrapFrame*);
class MSR {
uint32_t m_msr;
@ -583,7 +717,8 @@ public:
}
};
void cpu_setup();
void cpu_setup(u32 cpu);
extern bool g_cpu_supports_nx;
extern bool g_cpu_supports_pae;
extern bool g_cpu_supports_pge;
@ -629,6 +764,4 @@ private:
u32 m_flags;
};
extern u32 g_in_irq;
}

View file

@ -27,6 +27,8 @@
#pragma once
#include <AK/Types.h>
#include <AK/Assertions.h>
#include <Kernel/Arch/i386/CPU.h>
extern "C" void interrupt_common_asm_entry();
@ -47,16 +49,35 @@ asm(
" pushl %fs\n"
" pushl %gs\n"
" pushl %ss\n"
" mov $0x10, %ax\n"
" mov $" __STRINGIFY(GDT_SELECTOR_DATA0) ", %ax\n"
" mov %ax, %ds\n"
" mov %ax, %es\n"
" mov $" __STRINGIFY(GDT_SELECTOR_PROC) ", %ax\n"
" mov %ax, %fs\n"
" pushl %esp \n" // set TrapFrame::regs
" subl $" __STRINGIFY(TRAP_FRAME_SIZE - 4) ", %esp \n"
" movl %esp, %ebx \n" // save pointer to TrapFrame
" pushl %ebx \n"
" cld\n"
" call enter_trap \n"
" movl %ebx, 0(%esp) \n" // push pointer to TrapFrame
" call handle_interrupt\n"
" add $0x4, %esp\n" // "popl %ss"
" movl %ebx, 0(%esp) \n" // push pointer to TrapFrame
".globl common_trap_exit \n"
"common_trap_exit: \n"
// another thread may have handled this trap at this point, so don't
// make assumptions about the stack other than there's a TrapFrame
// and a pointer to it.
" call exit_trap \n"
" addl $" __STRINGIFY(TRAP_FRAME_SIZE + 4) ", %esp\n" // pop TrapFrame and pointer to it
".globl interrupt_common_asm_exit \n"
"interrupt_common_asm_exit: \n"
" addl $4, %esp\n" // pop %ss
" popl %gs\n"
" popl %fs\n"
" popl %es\n"
" popl %ds\n"
" popa\n"
" add $0x4, %esp\n"
" iret\n");
" addl $0x4, %esp\n" // skip exception_code, isr_number
" iret\n"
);

View file

@ -28,6 +28,9 @@
#include <Kernel/Arch/i386/CPU.h>
#define __STRINGIFY_HELPER(x) #x
#define __STRINGIFY(x) __STRINGIFY_HELPER(x)
#ifdef DEBUG
[[noreturn]] void __assertion_failed(const char* msg, const char* file, unsigned line, const char* func);
# define ASSERT(expr) (static_cast<bool>(expr) ? (void)0 : __assertion_failed(# expr, __FILE__, __LINE__, __PRETTY_FUNCTION__))

View file

@ -59,6 +59,7 @@ class Scheduler;
class SharedBuffer;
class Socket;
template <typename BaseType> class SpinLock;
class RecursiveSpinLock;
template <typename BaseType, typename LockType> class ScopedSpinLock;
class TCPSocket;
class TTY;

View file

@ -38,6 +38,8 @@
#include <Kernel/VM/PageDirectory.h>
#include <Kernel/VM/TypedMapping.h>
//#define APIC_DEBUG
#define IRQ_APIC_SPURIOUS 0x7f
#define APIC_BASE_MSR 0x1b
@ -118,6 +120,7 @@ void APIC::write_icr(const ICRReg& icr)
extern "C" void apic_ap_start(void);
extern "C" u16 apic_ap_start_size;
extern "C" u32 ap_cpu_init_stacks;
extern "C" u32 ap_cpu_init_processor_info_array;
extern "C" u32 ap_cpu_init_cr0;
extern "C" u32 ap_cpu_init_cr3;
extern "C" u32 ap_cpu_init_cr4;
@ -151,7 +154,9 @@ bool APIC::init_bsp()
return false;
PhysicalAddress apic_base = get_base();
#ifdef APIC_DEBUG
klog() << "Initializing APIC, base: " << apic_base;
#endif
set_base(apic_base);
m_apic_base = MM.allocate_kernel_region(apic_base.page_base(), PAGE_ROUND_UP(1), {}, Region::Access::Read | Region::Access::Write);
@ -177,8 +182,10 @@ bool APIC::init_bsp()
size_t entry_length = madt_entry->length;
if (madt_entry->type == (u8)ACPI::Structures::MADTEntryType::LocalAPIC) {
auto* plapic_entry = (const ACPI::Structures::MADTEntries::ProcessorLocalAPIC*)madt_entry;
#ifdef APIC_DEBUG
klog() << "APIC: AP found @ MADT entry " << entry_index << ", Processor Id: " << String::format("%02x", plapic_entry->acpi_processor_id)
<< " APIC Id: " << String::format("%02x", plapic_entry->apic_id) << " Flags: " << String::format("%08x", plapic_entry->flags);
#endif
processor_cnt++;
if ((plapic_entry->flags & 0x1) != 0)
processor_enabled_cnt++;
@ -201,7 +208,10 @@ bool APIC::init_bsp()
u32 aps_to_enable = processor_enabled_cnt - 1;
// Copy the APIC startup code and variables to P0x00008000
auto apic_startup_region = MM.allocate_kernel_region_identity(PhysicalAddress(0x8000), PAGE_ROUND_UP(apic_ap_start_size), {}, Region::Access::Read | Region::Access::Write | Region::Access::Execute);
// Also account for the data appended to:
// * aps_to_enable u32 values for ap_cpu_init_stacks
// * aps_to_enable u32 values for ap_cpu_init_processor_info_array
auto apic_startup_region = MM.allocate_kernel_region_identity(PhysicalAddress(0x8000), PAGE_ROUND_UP(apic_ap_start_size + (2 * aps_to_enable * sizeof(u32))), {}, Region::Access::Read | Region::Access::Write | Region::Access::Execute);
memcpy(apic_startup_region->vaddr().as_ptr(), reinterpret_cast<const void*>(apic_ap_start), apic_ap_start_size);
// Allocate enough stacks for all APs
@ -212,20 +222,35 @@ bool APIC::init_bsp()
return false;
}
stack_region->set_stack(true);
klog() << "APIC: Allocated AP #" << i << " stack at " << stack_region->vaddr();
m_apic_ap_stacks.append(stack_region.release_nonnull());
}
// Store pointers to all stacks for the APs to use
auto ap_stack_array = APIC_INIT_VAR_PTR(u32, apic_startup_region->vaddr().as_ptr(), ap_cpu_init_stacks);
for (size_t i = 0; i < m_apic_ap_stacks.size(); i++)
ASSERT(aps_to_enable == m_apic_ap_stacks.size());
for (size_t i = 0; i < aps_to_enable; i++) {
ap_stack_array[i] = m_apic_ap_stacks[i].vaddr().get() + Thread::default_kernel_stack_size;
#ifdef APIC_DEBUG
klog() << "APIC: CPU[" << (i + 1) << "] stack at " << VirtualAddress(ap_stack_array[i]);
#endif
}
// Allocate Processor structures for all APs and store the pointer to the data
m_ap_processor_info.resize(aps_to_enable);
auto ap_processor_info_array = &ap_stack_array[aps_to_enable];
for (size_t i = 0; i < aps_to_enable; i++) {
ap_processor_info_array[i] = FlatPtr(&m_ap_processor_info.at(i));
#ifdef APIC_DEBUG
klog() << "APIC: CPU[" << (i + 1) << "] Processor at " << VirtualAddress(ap_processor_info_array[i]);
#endif
}
*APIC_INIT_VAR_PTR(u32, apic_startup_region->vaddr().as_ptr(), ap_cpu_init_processor_info_array) = FlatPtr(&ap_processor_info_array[0]);
// Store the BSP's CR3 value for the APs to use
*APIC_INIT_VAR_PTR(u32, apic_startup_region->vaddr().as_ptr(), ap_cpu_init_cr3) = MM.kernel_page_directory().cr3();
// Store the BSP's GDT and IDT for the APs to use
const auto& gdtr = get_gdtr();
const auto& gdtr = Processor::current().get_gdtr();
*APIC_INIT_VAR_PTR(u32, apic_startup_region->vaddr().as_ptr(), ap_cpu_gdtr) = FlatPtr(&gdtr);
const auto& idtr = get_idtr();
*APIC_INIT_VAR_PTR(u32, apic_startup_region->vaddr().as_ptr(), ap_cpu_idtr) = FlatPtr(&idtr);
@ -233,8 +258,10 @@ bool APIC::init_bsp()
// Store the BSP's CR0 and CR4 values for the APs to use
*APIC_INIT_VAR_PTR(u32, apic_startup_region->vaddr().as_ptr(), ap_cpu_init_cr0) = read_cr0();
*APIC_INIT_VAR_PTR(u32, apic_startup_region->vaddr().as_ptr(), ap_cpu_init_cr4) = read_cr4();
#ifdef APIC_DEBUG
klog() << "APIC: Starting " << aps_to_enable << " AP(s)";
#endif
// INIT
write_icr(ICRReg(0, ICRReg::INIT, ICRReg::Physical, ICRReg::Assert, ICRReg::TriggerMode::Edge, ICRReg::AllExcludingSelf));
@ -250,14 +277,18 @@ bool APIC::init_bsp()
// Now wait until the ap_cpu_init_pending variable dropped to 0, which means all APs are initialized and no longer need these special mappings
if (m_apic_ap_count.load(AK::MemoryOrder::memory_order_consume) != aps_to_enable) {
#ifdef APIC_DEBUG
klog() << "APIC: Waiting for " << aps_to_enable << " AP(s) to finish initialization...";
#endif
do {
// Wait a little bit
IO::delay(200);
} while (m_apic_ap_count.load(AK::MemoryOrder::memory_order_consume) != aps_to_enable);
}
#ifdef APIC_DEBUG
klog() << "APIC: " << processor_enabled_cnt << " processors are initialized and running";
#endif
}
return true;
}
@ -270,8 +301,9 @@ void APIC::enable_bsp()
void APIC::enable(u32 cpu)
{
if (cpu == 0)// FIXME: once memory management can deal with it, re-enable for all
klog() << "Enabling local APIC for cpu #" << cpu;
#ifdef APIC_DEBUG
klog() << "Enabling local APIC for cpu #" << cpu;
#endif
if (cpu == 0) {
// dummy read, apparently to avoid a bug in old CPUs.

View file

@ -87,6 +87,7 @@ private:
OwnPtr<Region> m_apic_base;
NonnullOwnPtrVector<Region> m_apic_ap_stacks;
Vector<Processor> m_ap_processor_info;
AK::Atomic<u32> m_apic_ap_count{0};
static PhysicalAddress get_base();

View file

@ -44,7 +44,6 @@ static bool modes_conflict(Lock::Mode mode1, Lock::Mode mode2)
void Lock::lock(Mode mode)
{
ASSERT(mode != Mode::Unlocked);
ASSERT(!Scheduler::is_active());
if (!are_interrupts_enabled()) {
klog() << "Interrupts disabled when trying to take Lock{" << m_name << "}";
dump_backtrace();

View file

@ -859,9 +859,17 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
// No other thread from this process will be scheduled to run
m_exec_tid = Thread::current->tid();
auto old_page_directory = move(m_page_directory);
auto old_regions = move(m_regions);
m_page_directory = PageDirectory::create_for_userspace(*this);
RefPtr<PageDirectory> old_page_directory;
NonnullOwnPtrVector<Region> old_regions;
{
// Need to make sure we don't swap contexts in the middle
InterruptDisabler disabler;
old_page_directory = move(m_page_directory);
old_regions = move(m_regions);
m_page_directory = PageDirectory::create_for_userspace(*this);
}
#ifdef MM_DEBUG
dbg() << "Process " << pid() << " exec: PD=" << m_page_directory.ptr() << " created";
#endif
@ -898,6 +906,8 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
{
ArmedScopeGuard rollback_regions_guard([&]() {
ASSERT(Process::current == this);
// Need to make sure we don't swap contexts in the middle
InterruptDisabler disabler;
m_page_directory = move(old_page_directory);
m_regions = move(old_regions);
MM.enter_process_paging_scope(*this);
@ -1028,7 +1038,7 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
// and we don't want to deal with faults after this point.
u32 new_userspace_esp = new_main_thread->make_userspace_stack_for_main_thread(move(arguments), move(environment));
// We cli() manually here because we don't want to get interrupted between do_exec() and Schedule::yield().
// We cli() manually here because we don't want to get interrupted between do_exec() and Processor::assume_context().
// The reason is that the task redirection we've set up above will be clobbered by the timer IRQ.
// If we used an InterruptDisabler that sti()'d on exit, we might timer tick'd too soon in exec().
if (Process::current == this)
@ -1036,15 +1046,9 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
// NOTE: Be careful to not trigger any page faults below!
Scheduler::prepare_to_modify_tss(*new_main_thread);
m_name = parts.take_last();
new_main_thread->set_name(m_name);
auto& tss = new_main_thread->m_tss;
u32 old_esp0 = tss.esp0;
m_master_tls_size = master_tls_size;
m_master_tls_alignment = master_tls_alignment;
@ -1052,25 +1056,21 @@ int Process::do_exec(NonnullRefPtr<FileDescription> main_program_description, Ve
new_main_thread->make_thread_specific_region({});
new_main_thread->reset_fpu_state();
memset(&tss, 0, sizeof(TSS32));
tss.iomapbase = sizeof(TSS32);
tss.eflags = 0x0202;
// NOTE: if a context switch were to happen, tss.eip and tss.esp would get overwritten!!!
auto& tss = new_main_thread->m_tss;
tss.cs = GDT_SELECTOR_CODE3 | 3;
tss.ds = GDT_SELECTOR_DATA3 | 3;
tss.es = GDT_SELECTOR_DATA3 | 3;
tss.ss = GDT_SELECTOR_DATA3 | 3;
tss.fs = GDT_SELECTOR_DATA3 | 3;
tss.gs = GDT_SELECTOR_TLS | 3;
tss.eip = entry_eip;
tss.cs = 0x1b;
tss.ds = 0x23;
tss.es = 0x23;
tss.fs = 0x23;
tss.gs = thread_specific_selector() | 3;
tss.ss = 0x23;
tss.cr3 = page_directory().cr3();
tss.esp = new_userspace_esp;
tss.ss0 = 0x10;
tss.esp0 = old_esp0;
tss.cr3 = m_page_directory->cr3();
tss.ss2 = m_pid;
#ifdef TASK_DEBUG
klog() << "Process exec'd " << path.characters() << " @ " << String::format("%p", tss.eip);
klog() << "Process exec'd " << path.characters() << " @ " << String::format("%p", entry_eip);
#endif
if (was_profiling)
@ -1261,7 +1261,8 @@ int Process::exec(String path, Vector<String> arguments, Vector<String> environm
}
if (Process::current == this) {
Scheduler::yield();
Thread::current->set_state(Thread::State::Running);
Processor::assume_context(*Thread::current);
ASSERT_NOT_REACHED();
}
return 0;

View file

@ -76,22 +76,10 @@ timeval Scheduler::time_since_boot()
Thread* g_finalizer;
Thread* g_colonel;
WaitQueue* g_finalizer_wait_queue;
bool g_finalizer_has_work;
Atomic<bool> g_finalizer_has_work{false};
static Process* s_colonel_process;
u64 g_uptime;
struct TaskRedirectionData {
u16 selector;
TSS32 tss;
};
static TaskRedirectionData s_redirection;
static bool s_active;
bool Scheduler::is_active()
{
return s_active;
}
Thread::JoinBlocker::JoinBlocker(Thread& joinee, void*& joinee_exit_value)
: m_joinee(joinee)
, m_joinee_exit_value(joinee_exit_value)
@ -280,6 +268,7 @@ bool Thread::WaitBlocker::should_unblock(Thread& thread, time_t, long)
return IterationDecision::Continue;
m_waitee_pid = child.pid();
dbg() << "Unblocking thread " << thread << " process " << thread.process() << " child exited: " << m_waitee_pid;
should_unblock = true;
return IterationDecision::Break;
});
@ -325,21 +314,26 @@ void Thread::consider_unblock(time_t now_sec, long now_usec)
}
}
void Scheduler::start()
{
ASSERT_INTERRUPTS_DISABLED();
ASSERT(!Thread::current);
Thread::current = g_colonel;
Process::current = &g_colonel->process();
g_colonel->set_ticks_left(time_slice_for(*g_colonel));
g_colonel->did_schedule();
g_colonel->set_initialized(true);
Processor::init_context(*g_colonel);
g_colonel->set_state(Thread::Running);
Processor::current().initialize_context_switching(*g_colonel);
ASSERT_NOT_REACHED();
}
bool Scheduler::pick_next()
{
ASSERT_INTERRUPTS_DISABLED();
ASSERT(!s_active);
TemporaryChange<bool> change(s_active, true);
ASSERT(s_active);
if (!Thread::current) {
// XXX: The first ever context_switch() goes to the idle process.
// This to setup a reliable place we can return to.
return context_switch(*g_colonel);
}
ASSERT(Thread::current);
auto now = time_since_boot();
auto now_sec = now.tv_sec;
@ -448,52 +442,48 @@ bool Scheduler::pick_next()
return context_switch(*thread_to_schedule);
}
bool Scheduler::yield()
{
//#ifdef SCHEDULER_DEBUG
#if 0
dbg() << "Scheduler: yielding thread " << *Thread::current << " in_trap: " << Processor::current().in_trap() << " in_irq: " << Processor::current().in_irq();
#endif
InterruptDisabler disabler;
ASSERT(Thread::current);
if (Processor::current().in_irq()) {
// If we're handling an IRQ we can't switch context, delay until
// exiting the trap
Processor::current().invoke_scheduler_async();
} else if (!Scheduler::pick_next())
return false;
//#ifdef SCHEDULER_DEBUG
#if 0
dbg() << "Scheduler: yield returns to thread " << *Thread::current << " in_trap: " << Processor::current().in_trap() << " in_irq: " << Processor::current().in_irq();
#endif
return true;
}
bool Scheduler::donate_to(Thread* beneficiary, const char* reason)
{
InterruptDisabler disabler;
ASSERT(!Processor::current().in_irq());
if (!Thread::is_thread(beneficiary))
return false;
(void)reason;
unsigned ticks_left = Thread::current->ticks_left();
if (!beneficiary || beneficiary->state() != Thread::Runnable || ticks_left <= 1)
return yield();
return Scheduler::yield();
unsigned ticks_to_donate = min(ticks_left - 1, time_slice_for(*beneficiary));
#ifdef SCHEDULER_DEBUG
dbg() << "Scheduler: Donating " << ticks_to_donate << " ticks to " << *beneficiary << ", reason=" << reason;
#endif
context_switch(*beneficiary);
beneficiary->set_ticks_left(ticks_to_donate);
switch_now();
Scheduler::context_switch(*beneficiary);
return false;
}
bool Scheduler::yield()
{
InterruptDisabler disabler;
ASSERT(Thread::current);
if (!pick_next())
return false;
switch_now();
return true;
}
void Scheduler::pick_next_and_switch_now()
{
bool someone_wants_to_run = pick_next();
ASSERT(someone_wants_to_run);
switch_now();
}
void Scheduler::switch_now()
{
Descriptor& descriptor = get_gdt_entry(Thread::current->selector());
descriptor.type = 9;
asm("sti\n"
"ljmp *(%%eax)\n" ::"a"(&Thread::current->far_ptr()));
}
bool Scheduler::context_switch(Thread& thread)
{
thread.set_ticks_left(time_slice_for(thread));
@ -508,96 +498,47 @@ bool Scheduler::context_switch(Thread& thread)
if (Thread::current->state() == Thread::Running)
Thread::current->set_state(Thread::Runnable);
asm volatile("fxsave %0"
: "=m"(Thread::current->fpu_state()));
#ifdef LOG_EVERY_CONTEXT_SWITCH
dbg() << "Scheduler: " << *Thread::current << " -> " << thread << " [" << thread.priority() << "] " << String::format("%w", thread.tss().cs) << ":" << String::format("%x", thread.tss().eip);
#endif
}
Thread* from = Thread::current;
Thread::current = &thread;
Process::current = &thread.process();
if (!thread.is_initialized()) {
Processor::init_context(thread);
thread.set_initialized(true);
}
thread.set_state(Thread::Running);
asm volatile("fxrstor %0" ::"m"(Thread::current->fpu_state()));
if (!thread.selector()) {
thread.set_selector(gdt_alloc_entry());
auto& descriptor = get_gdt_entry(thread.selector());
descriptor.set_base(&thread.tss());
descriptor.set_limit(sizeof(TSS32));
descriptor.dpl = 0;
descriptor.segment_present = 1;
descriptor.granularity = 0;
descriptor.zero = 0;
descriptor.operation_size = 1;
descriptor.descriptor_type = 0;
}
if (!thread.thread_specific_data().is_null()) {
auto& descriptor = thread_specific_descriptor();
descriptor.set_base(thread.thread_specific_data().as_ptr());
descriptor.set_limit(sizeof(ThreadSpecificData*));
}
auto& descriptor = get_gdt_entry(thread.selector());
descriptor.type = 11; // Busy TSS
Processor::current().switch_context(from, &thread);
return true;
}
static void initialize_redirection()
{
auto& descriptor = get_gdt_entry(s_redirection.selector);
descriptor.set_base(&s_redirection.tss);
descriptor.set_limit(sizeof(TSS32));
descriptor.dpl = 0;
descriptor.segment_present = 1;
descriptor.granularity = 0;
descriptor.zero = 0;
descriptor.operation_size = 1;
descriptor.descriptor_type = 0;
descriptor.type = 9;
flush_gdt();
}
void Scheduler::prepare_for_iret_to_new_process()
{
auto& descriptor = get_gdt_entry(s_redirection.selector);
descriptor.type = 9;
s_redirection.tss.backlink = Thread::current->selector();
load_task_register(s_redirection.selector);
}
void Scheduler::prepare_to_modify_tss(Thread& thread)
{
// This ensures that a currently running process modifying its own TSS
// in order to yield() and end up somewhere else doesn't just end up
// right after the yield().
if (Thread::current == &thread)
load_task_register(s_redirection.selector);
}
Process* Scheduler::colonel()
{
return s_colonel_process;
}
void Scheduler::initialize()
void Scheduler::initialize(u32 cpu)
{
ASSERT(&Processor::current() != nullptr); // sanity check
g_scheduler_data = new SchedulerData;
g_finalizer_wait_queue = new WaitQueue;
g_finalizer_has_work = false;
s_redirection.selector = gdt_alloc_entry();
initialize_redirection();
s_colonel_process = Process::create_kernel_process(g_colonel, "colonel", nullptr);
g_colonel->set_priority(THREAD_PRIORITY_MIN);
load_task_register(s_redirection.selector);
if (cpu == 0) {
g_finalizer_has_work.store(false, AK::MemoryOrder::memory_order_release);
s_colonel_process = Process::create_kernel_process(g_colonel, "colonel", idle_loop);
g_colonel->set_priority(THREAD_PRIORITY_MIN);
}
}
void Scheduler::timer_tick(const RegisterState& regs)
{
ASSERT_INTERRUPTS_DISABLED();
ASSERT(Processor::current().in_irq());
if (!Thread::current)
return;
@ -622,62 +563,25 @@ void Scheduler::timer_tick(const RegisterState& regs)
if (Thread::current->tick())
return;
auto& outgoing_tss = Thread::current->tss();
if (!pick_next())
return;
outgoing_tss.gs = regs.gs;
outgoing_tss.fs = regs.fs;
outgoing_tss.es = regs.es;
outgoing_tss.ds = regs.ds;
outgoing_tss.edi = regs.edi;
outgoing_tss.esi = regs.esi;
outgoing_tss.ebp = regs.ebp;
outgoing_tss.ebx = regs.ebx;
outgoing_tss.edx = regs.edx;
outgoing_tss.ecx = regs.ecx;
outgoing_tss.eax = regs.eax;
outgoing_tss.eip = regs.eip;
outgoing_tss.cs = regs.cs;
outgoing_tss.eflags = regs.eflags;
// Compute process stack pointer.
// Add 16 for CS, EIP, EFLAGS, exception code (interrupt mechanic)
outgoing_tss.esp = regs.esp + 16;
outgoing_tss.ss = regs.ss;
if ((outgoing_tss.cs & 3) != 0) {
outgoing_tss.ss = regs.userspace_ss;
outgoing_tss.esp = regs.userspace_esp;
}
prepare_for_iret_to_new_process();
// Set the NT (nested task) flag.
asm(
"pushf\n"
"orl $0x00004000, (%esp)\n"
"popf\n");
ASSERT_INTERRUPTS_DISABLED();
ASSERT(Processor::current().in_irq());
Processor::current().invoke_scheduler_async();
}
static bool s_should_stop_idling = false;
void Scheduler::stop_idling()
void Scheduler::invoke_async()
{
if (Thread::current != g_colonel)
return;
s_should_stop_idling = true;
ASSERT_INTERRUPTS_DISABLED();
ASSERT(!Processor::current().in_irq());
pick_next();
}
void Scheduler::idle_loop()
{
dbg() << "Scheduler: idle loop on CPU #" << Processor::current().id();
ASSERT(are_interrupts_enabled());
for (;;) {
asm("hlt");
if (s_should_stop_idling) {
s_should_stop_idling = false;
yield();
}
yield();
}
}

View file

@ -43,28 +43,25 @@ struct SchedulerData;
extern Thread* g_finalizer;
extern Thread* g_colonel;
extern WaitQueue* g_finalizer_wait_queue;
extern bool g_finalizer_has_work;
extern Atomic<bool> g_finalizer_has_work;
extern u64 g_uptime;
extern SchedulerData* g_scheduler_data;
extern timeval g_timeofday;
class Scheduler {
public:
static void initialize();
static void initialize(u32 cpu);
static void timer_tick(const RegisterState&);
[[noreturn]] static void start();
static bool pick_next();
static timeval time_since_boot();
static void pick_next_and_switch_now();
static void switch_now();
static bool yield();
static bool donate_to(Thread*, const char* reason);
static bool context_switch(Thread&);
static void prepare_to_modify_tss(Thread&);
static Process* colonel();
static bool is_active();
static void beep();
static void idle_loop();
static void stop_idling();
static void invoke_async();
template<typename Callback>
static inline IterationDecision for_each_runnable(Callback);
@ -74,9 +71,6 @@ public:
static void init_thread(Thread& thread);
static void update_state_for_thread(Thread& thread);
private:
static void prepare_for_iret_to_new_process();
};
}

View file

@ -69,6 +69,42 @@ public:
}
};
class RecursiveSpinLock
{
AK::Atomic<FlatPtr> m_lock{0};
u32 m_recursions{0};
public:
RecursiveSpinLock() = default;
RecursiveSpinLock(const RecursiveSpinLock&) = delete;
RecursiveSpinLock(RecursiveSpinLock&&) = delete;
ALWAYS_INLINE void lock()
{
FlatPtr cpu = FlatPtr(&Processor::current());
FlatPtr expected = 0;
while (!m_lock.compare_exchange_strong(expected, cpu, AK::memory_order_acq_rel)) {
if (expected == cpu)
break;
expected = 0;
}
m_recursions++;
}
ALWAYS_INLINE void unlock()
{
ASSERT(m_recursions > 0);
ASSERT(m_lock.load(AK::memory_order_consume) == FlatPtr(&Processor::current()));
if (--m_recursions == 0)
m_lock.store(0, AK::memory_order_release);
}
ALWAYS_INLINE bool is_locked() const
{
return m_lock.load(AK::memory_order_consume) != 0;
}
};
template <typename BaseType = u32, typename LockType = SpinLock<BaseType>>
class ScopedSpinLock
{

View file

@ -33,7 +33,7 @@
namespace Kernel {
extern "C" void syscall_handler(RegisterState&);
extern "C" void syscall_handler(TrapFrame*);
extern "C" void syscall_asm_entry();
asm(
@ -46,22 +46,23 @@ asm(
" pushl %fs\n"
" pushl %gs\n"
" pushl %ss\n"
" mov $0x10, %ax\n"
" mov $" __STRINGIFY(GDT_SELECTOR_DATA0) ", %ax\n"
" mov %ax, %ds\n"
" mov %ax, %es\n"
" mov $" __STRINGIFY(GDT_SELECTOR_PROC) ", %ax\n"
" mov %ax, %fs\n"
" cld\n"
" xor %esi, %esi\n"
" xor %edi, %edi\n"
" push %esp\n"
" call syscall_handler\n"
" add $0x8, %esp\n"
" popl %gs\n"
" popl %fs\n"
" popl %es\n"
" popl %ds\n"
" popa\n"
" add $0x4, %esp\n"
" iret\n");
" pushl %esp \n" // set TrapFrame::regs
" subl $" __STRINGIFY(TRAP_FRAME_SIZE - 4) ", %esp \n"
" movl %esp, %ebx \n"
" pushl %ebx \n" // push pointer to TrapFrame
" call enter_trap_no_irq \n"
" movl %ebx, 0(%esp) \n" // push pointer to TrapFrame
" call syscall_handler \n"
" movl %ebx, 0(%esp) \n" // push pointer to TrapFrame
" jmp common_trap_exit \n");
namespace Syscall {
@ -120,8 +121,9 @@ int handle(RegisterState& regs, u32 function, u32 arg1, u32 arg2, u32 arg3)
}
void syscall_handler(RegisterState& regs)
void syscall_handler(TrapFrame* trap)
{
auto& regs = *trap->regs;
// Special handling of the "gettid" syscall since it's extremely hot.
// FIXME: Remove this hack once userspace locks stop calling it so damn much.
if (regs.eax == SC_gettid) {

View file

@ -34,14 +34,12 @@ void FinalizerTask::spawn()
Process::create_kernel_process(g_finalizer, "FinalizerTask", [] {
Thread::current->set_priority(THREAD_PRIORITY_LOW);
for (;;) {
{
InterruptDisabler disabler;
if (!g_finalizer_has_work)
Thread::current->wait_on(*g_finalizer_wait_queue);
ASSERT(g_finalizer_has_work);
g_finalizer_has_work = false;
}
Thread::finalize_dying_threads();
dbg() << "Finalizer task is running";
Thread::current->wait_on(*g_finalizer_wait_queue);
bool expected = true;
if (g_finalizer_has_work.compare_exchange_strong(expected, false, AK::MemoryOrder::memory_order_acq_rel))
Thread::finalize_dying_threads();
}
});
}

View file

@ -35,6 +35,7 @@ void SyncTask::spawn()
{
Thread* syncd_thread = nullptr;
Process::create_kernel_process(syncd_thread, "SyncTask", [] {
dbg() << "SyncTask is running";
for (;;) {
VFS::the().sync();
Thread::current->sleep(1 * TimeManagement::the().ticks_per_second());

View file

@ -48,30 +48,6 @@ namespace Kernel {
Thread* Thread::current;
static FPUState s_clean_fpu_state;
u16 thread_specific_selector()
{
static u16 selector;
if (!selector) {
selector = gdt_alloc_entry();
auto& descriptor = get_gdt_entry(selector);
descriptor.dpl = 3;
descriptor.segment_present = 1;
descriptor.granularity = 0;
descriptor.zero = 0;
descriptor.operation_size = 1;
descriptor.descriptor_type = 1;
descriptor.type = 2;
}
return selector;
}
Descriptor& thread_specific_descriptor()
{
return get_gdt_entry(thread_specific_selector());
}
HashTable<Thread*>& thread_table()
{
ASSERT_INTERRUPTS_DISABLED();
@ -103,27 +79,23 @@ Thread::Thread(Process& process)
// Only IF is set when a process boots.
m_tss.eflags = 0x0202;
u16 cs, ds, ss, gs;
if (m_process.is_ring0()) {
cs = 0x08;
ds = 0x10;
ss = 0x10;
gs = 0;
m_tss.cs = GDT_SELECTOR_CODE0;
m_tss.ds = GDT_SELECTOR_DATA0;
m_tss.es = GDT_SELECTOR_DATA0;
m_tss.fs = GDT_SELECTOR_PROC;
m_tss.ss = GDT_SELECTOR_DATA0;
m_tss.gs = 0;
} else {
cs = 0x1b;
ds = 0x23;
ss = 0x23;
gs = thread_specific_selector() | 3;
m_tss.cs = GDT_SELECTOR_CODE3 | 3;
m_tss.ds = GDT_SELECTOR_DATA3 | 3;
m_tss.es = GDT_SELECTOR_DATA3 | 3;
m_tss.fs = GDT_SELECTOR_DATA3 | 3;
m_tss.ss = GDT_SELECTOR_DATA3 | 3;
m_tss.gs = GDT_SELECTOR_TLS | 3;
}
m_tss.ds = ds;
m_tss.es = ds;
m_tss.fs = ds;
m_tss.gs = gs;
m_tss.ss = ss;
m_tss.cs = cs;
m_tss.cr3 = m_process.page_directory().cr3();
m_kernel_stack_region = MM.allocate_kernel_region(default_kernel_stack_size, String::format("Kernel Stack (Thread %d)", m_tid), Region::Access::Read | Region::Access::Write, false, true);
@ -132,11 +104,11 @@ Thread::Thread(Process& process)
m_kernel_stack_top = m_kernel_stack_region->vaddr().offset(default_kernel_stack_size).get() & 0xfffffff8u;
if (m_process.is_ring0()) {
m_tss.esp = m_kernel_stack_top;
m_tss.esp = m_tss.esp0 = m_kernel_stack_top;
} else {
// Ring 3 processes get a separate stack for ring 0.
// The ring 3 stack will be assigned by exec().
m_tss.ss0 = 0x10;
m_tss.ss0 = GDT_SELECTOR_DATA0;
m_tss.esp0 = m_kernel_stack_top;
}
@ -155,9 +127,6 @@ Thread::~Thread()
thread_table().remove(this);
}
if (selector())
gdt_free_entry(selector());
ASSERT(m_process.m_thread_count);
m_process.m_thread_count--;
}
@ -219,9 +188,7 @@ void Thread::die_if_needed()
InterruptDisabler disabler;
set_state(Thread::State::Dying);
if (!Scheduler::is_active())
Scheduler::pick_next_and_switch_now();
Scheduler::yield();
}
void Thread::yield_without_holding_big_lock()
@ -613,12 +580,11 @@ ShouldUnblockThread Thread::dispatch_signal(u8 signal)
u32* stack = &m_tss.esp;
setup_stack(m_tss, stack);
Scheduler::prepare_to_modify_tss(*this);
m_tss.cs = 0x1b;
m_tss.ds = 0x23;
m_tss.es = 0x23;
m_tss.fs = 0x23;
m_tss.gs = thread_specific_selector() | 3;
m_tss.cs = GDT_SELECTOR_CODE3 | 3;
m_tss.ds = GDT_SELECTOR_DATA3 | 3;
m_tss.es = GDT_SELECTOR_DATA3 | 3;
m_tss.fs = GDT_SELECTOR_DATA3 | 3;
m_tss.gs = GDT_SELECTOR_TLS | 3;
m_tss.eip = g_return_to_ring3_from_signal_trampoline.get();
// FIXME: This state is such a hack. It avoids trouble if 'current' is the process receiving a signal.
set_state(Skip1SchedulerPass);
@ -713,17 +679,10 @@ Thread* Thread::clone(Process& process)
clone->m_signal_mask = m_signal_mask;
memcpy(clone->m_fpu_state, m_fpu_state, sizeof(FPUState));
clone->m_thread_specific_data = m_thread_specific_data;
clone->m_thread_specific_region_size = m_thread_specific_region_size;
return clone;
}
void Thread::initialize()
{
Scheduler::initialize();
asm volatile("fninit");
asm volatile("fxsave %0"
: "=m"(s_clean_fpu_state));
}
Vector<Thread*> Thread::all_threads()
{
Vector<Thread*> threads;
@ -760,10 +719,14 @@ void Thread::set_state(State new_state)
Scheduler::update_state_for_thread(*this);
}
if (new_state == Dying) {
g_finalizer_has_work = true;
g_finalizer_wait_queue->wake_all();
}
if (new_state == Dying)
notify_finalizer();
}
void Thread::notify_finalizer()
{
g_finalizer_has_work.store(true, AK::MemoryOrder::memory_order_release);
g_finalizer_wait_queue->wake_all();
}
String Thread::backtrace(ProcessInspectionHandle&) const
@ -786,7 +749,7 @@ static bool symbolicate(const RecognizedSymbol& symbol, const Process& process,
if (!is_user_address(VirtualAddress(symbol.address))) {
builder.append("0xdeadc0de\n");
} else {
if (!Scheduler::is_active() && elf_bundle && elf_bundle->elf_loader->has_symbols())
if (elf_bundle && elf_bundle->elf_loader->has_symbols())
builder.appendf("%p %s\n", symbol.address, elf_bundle->elf_loader->symbolicate(symbol.address).characters());
else
builder.appendf("%p\n", symbol.address);
@ -863,8 +826,8 @@ Vector<FlatPtr> Thread::raw_backtrace(FlatPtr ebp, FlatPtr eip) const
void Thread::make_thread_specific_region(Badge<Process>)
{
size_t thread_specific_region_alignment = max(process().m_master_tls_alignment, alignof(ThreadSpecificData));
size_t thread_specific_region_size = align_up_to(process().m_master_tls_size, thread_specific_region_alignment) + sizeof(ThreadSpecificData);
auto* region = process().allocate_region({}, thread_specific_region_size, "Thread-specific", PROT_READ | PROT_WRITE, true);
m_thread_specific_region_size = align_up_to(process().m_master_tls_size, thread_specific_region_alignment) + sizeof(ThreadSpecificData);
auto* region = process().allocate_region({}, m_thread_specific_region_size, "Thread-specific", PROT_READ | PROT_WRITE, true);
SmapDisabler disabler;
auto* thread_specific_data = (ThreadSpecificData*)region->vaddr().offset(align_up_to(process().m_master_tls_size, thread_specific_region_alignment)).as_ptr();
auto* thread_local_storage = (u8*)((u8*)thread_specific_data) - align_up_to(process().m_master_tls_size, process().m_master_tls_alignment);
@ -881,25 +844,34 @@ const LogStream& operator<<(const LogStream& stream, const Thread& value)
Thread::BlockResult Thread::wait_on(WaitQueue& queue, timeval* timeout, Atomic<bool>* lock, Thread* beneficiary, const char* reason)
{
cli();
bool did_unlock = unlock_process_if_locked();
if (lock)
*lock = false;
set_state(State::Queued);
queue.enqueue(*current);
TimerId timer_id {};
if (timeout) {
timer_id = TimerQueue::the().add_timer(*timeout, [&]() {
wake_from_queue();
});
bool did_unlock;
{
InterruptDisabler disable;
did_unlock = unlock_process_if_locked();
if (lock)
*lock = false;
set_state(State::Queued);
queue.enqueue(*current);
if (timeout) {
timer_id = TimerQueue::the().add_timer(*timeout, [&]() {
wake_from_queue();
});
}
// Yield and wait for the queue to wake us up again.
if (beneficiary)
Scheduler::donate_to(beneficiary, reason);
else
Scheduler::yield();
}
// Yield and wait for the queue to wake us up again.
if (beneficiary)
Scheduler::donate_to(beneficiary, reason);
else
Scheduler::yield();
if (!are_interrupts_enabled())
sti();
// We've unblocked, relock the process if needed and carry on.
if (did_unlock)
relock_process();
@ -916,7 +888,10 @@ Thread::BlockResult Thread::wait_on(WaitQueue& queue, timeval* timeout, Atomic<b
void Thread::wake_from_queue()
{
ASSERT(state() == State::Queued);
set_state(State::Runnable);
if (this != Thread::current)
set_state(State::Runnable);
else
set_state(State::Running);
}
Thread* Thread::from_tid(int tid)
@ -935,7 +910,7 @@ Thread* Thread::from_tid(int tid)
void Thread::reset_fpu_state()
{
memcpy(m_fpu_state, &s_clean_fpu_state, sizeof(FPUState));
memcpy(m_fpu_state, &Processor::current().clean_fpu_state(), sizeof(FPUState));
}
void Thread::start_tracing_from(pid_t tracer)

View file

@ -77,7 +77,6 @@ public:
~Thread();
static Thread* from_tid(int);
static void initialize();
static void finalize_dying_threads();
static Vector<Thread*> all_threads();
@ -287,6 +286,7 @@ public:
u32 ticks() const { return m_ticks; }
VirtualAddress thread_specific_data() const { return m_thread_specific_data; }
size_t thread_specific_region_size() const { return m_thread_specific_region_size; }
u64 sleep(u32 ticks);
u64 sleep_until(u64 wakeup_time);
@ -354,6 +354,9 @@ public:
void set_selector(u16 s) { m_far_ptr.selector = s; }
void set_state(State);
bool is_initialized() const { return m_initialized; }
void set_initialized(bool initialized) { m_initialized = initialized; }
void send_urgent_signal_to_self(u8 signal);
void send_signal(u8 signal, Process* sender);
void consider_unblock(time_t now_sec, long now_usec);
@ -472,6 +475,7 @@ private:
u32 m_kernel_stack_top { 0 };
OwnPtr<Region> m_kernel_stack_region;
VirtualAddress m_thread_specific_data;
size_t m_thread_specific_region_size { 0 };
SignalActionData m_signal_action_data[32];
Blocker* m_blocker { nullptr };
@ -506,9 +510,11 @@ private:
bool m_dump_backtrace_on_finalization { false };
bool m_should_die { false };
bool m_initialized {false};
OwnPtr<ThreadTracer> m_tracer;
void notify_finalizer();
void yield_without_holding_big_lock();
};
@ -595,7 +601,4 @@ inline IterationDecision Scheduler::for_each_nonrunnable(Callback callback)
return IterationDecision::Continue;
}
u16 thread_specific_selector();
Descriptor& thread_specific_descriptor();
}

View file

@ -51,6 +51,7 @@ extern FlatPtr end_of_kernel_bss;
namespace Kernel {
static MemoryManager* s_the;
RecursiveSpinLock MemoryManager::s_lock;
MemoryManager& MM
{
@ -164,6 +165,7 @@ void MemoryManager::parse_memory_map()
const PageTableEntry* MemoryManager::pte(const PageDirectory& page_directory, VirtualAddress vaddr)
{
ASSERT_INTERRUPTS_DISABLED();
ScopedSpinLock lock(s_lock);
u32 page_directory_table_index = (vaddr.get() >> 30) & 0x3;
u32 page_directory_index = (vaddr.get() >> 21) & 0x1ff;
u32 page_table_index = (vaddr.get() >> 12) & 0x1ff;
@ -179,6 +181,7 @@ const PageTableEntry* MemoryManager::pte(const PageDirectory& page_directory, Vi
PageTableEntry& MemoryManager::ensure_pte(PageDirectory& page_directory, VirtualAddress vaddr)
{
ASSERT_INTERRUPTS_DISABLED();
ScopedSpinLock lock(s_lock);
u32 page_directory_table_index = (vaddr.get() >> 30) & 0x3;
u32 page_directory_index = (vaddr.get() >> 21) & 0x1ff;
u32 page_table_index = (vaddr.get() >> 12) & 0x1ff;
@ -211,6 +214,7 @@ void MemoryManager::initialize()
Region* MemoryManager::kernel_region_from_vaddr(VirtualAddress vaddr)
{
ScopedSpinLock lock(s_lock);
for (auto& region : MM.m_kernel_regions) {
if (region.contains(vaddr))
return &region;
@ -220,6 +224,7 @@ Region* MemoryManager::kernel_region_from_vaddr(VirtualAddress vaddr)
Region* MemoryManager::user_region_from_vaddr(Process& process, VirtualAddress vaddr)
{
ScopedSpinLock lock(s_lock);
// FIXME: Use a binary search tree (maybe red/black?) or some other more appropriate data structure!
for (auto& region : process.m_regions) {
if (region.contains(vaddr))
@ -233,6 +238,7 @@ Region* MemoryManager::user_region_from_vaddr(Process& process, VirtualAddress v
Region* MemoryManager::region_from_vaddr(Process& process, VirtualAddress vaddr)
{
ScopedSpinLock lock(s_lock);
if (auto* region = user_region_from_vaddr(process, vaddr))
return region;
return kernel_region_from_vaddr(vaddr);
@ -240,6 +246,7 @@ Region* MemoryManager::region_from_vaddr(Process& process, VirtualAddress vaddr)
const Region* MemoryManager::region_from_vaddr(const Process& process, VirtualAddress vaddr)
{
ScopedSpinLock lock(s_lock);
if (auto* region = user_region_from_vaddr(const_cast<Process&>(process), vaddr))
return region;
return kernel_region_from_vaddr(vaddr);
@ -247,6 +254,7 @@ const Region* MemoryManager::region_from_vaddr(const Process& process, VirtualAd
Region* MemoryManager::region_from_vaddr(VirtualAddress vaddr)
{
ScopedSpinLock lock(s_lock);
if (auto* region = kernel_region_from_vaddr(vaddr))
return region;
auto page_directory = PageDirectory::find_by_cr3(read_cr3());
@ -260,16 +268,18 @@ PageFaultResponse MemoryManager::handle_page_fault(const PageFault& fault)
{
ASSERT_INTERRUPTS_DISABLED();
ASSERT(Thread::current);
if (g_in_irq) {
dbg() << "BUG! Page fault while handling IRQ! code=" << fault.code() << ", vaddr=" << fault.vaddr();
ScopedSpinLock lock(s_lock);
if (Processor::current().in_irq()) {
dbg() << "CPU[" << Processor::id() << "] BUG! Page fault while handling IRQ! code=" << fault.code() << ", vaddr=" << fault.vaddr() << ", irq level: " << Processor::current().in_irq();
dump_kernel_regions();
return PageFaultResponse::ShouldCrash;
}
#ifdef PAGE_FAULT_DEBUG
dbg() << "MM: handle_page_fault(" << String::format("%w", fault.code()) << ") at " << fault.vaddr();
dbg() << "MM: CPU[" << Processor::id() << "] handle_page_fault(" << String::format("%w", fault.code()) << ") at " << fault.vaddr();
#endif
auto* region = region_from_vaddr(fault.vaddr());
if (!region) {
klog() << "NP(error) fault at invalid address " << fault.vaddr();
klog() << "CPU[" << Processor::id() << "] NP(error) fault at invalid address " << fault.vaddr();
return PageFaultResponse::ShouldCrash;
}
@ -279,6 +289,7 @@ PageFaultResponse MemoryManager::handle_page_fault(const PageFault& fault)
OwnPtr<Region> MemoryManager::allocate_contiguous_kernel_region(size_t size, const StringView& name, u8 access, bool user_accessible, bool cacheable)
{
ASSERT(!(size % PAGE_SIZE));
ScopedSpinLock lock(s_lock);
auto range = kernel_page_directory().range_allocator().allocate_anywhere(size);
if (!range.is_valid())
return nullptr;
@ -292,6 +303,7 @@ OwnPtr<Region> MemoryManager::allocate_contiguous_kernel_region(size_t size, con
OwnPtr<Region> MemoryManager::allocate_kernel_region(size_t size, const StringView& name, u8 access, bool user_accessible, bool should_commit, bool cacheable)
{
ASSERT(!(size % PAGE_SIZE));
ScopedSpinLock lock(s_lock);
auto range = kernel_page_directory().range_allocator().allocate_anywhere(size);
if (!range.is_valid())
return nullptr;
@ -307,6 +319,7 @@ OwnPtr<Region> MemoryManager::allocate_kernel_region(size_t size, const StringVi
OwnPtr<Region> MemoryManager::allocate_kernel_region(PhysicalAddress paddr, size_t size, const StringView& name, u8 access, bool user_accessible, bool cacheable)
{
ASSERT(!(size % PAGE_SIZE));
ScopedSpinLock lock(s_lock);
auto range = kernel_page_directory().range_allocator().allocate_anywhere(size);
if (!range.is_valid())
return nullptr;
@ -319,6 +332,7 @@ OwnPtr<Region> MemoryManager::allocate_kernel_region(PhysicalAddress paddr, size
OwnPtr<Region> MemoryManager::allocate_kernel_region_identity(PhysicalAddress paddr, size_t size, const StringView& name, u8 access, bool user_accessible, bool cacheable)
{
ASSERT(!(size % PAGE_SIZE));
ScopedSpinLock lock(s_lock);
auto range = kernel_page_directory().identity_range_allocator().allocate_specific(VirtualAddress(paddr.get()), size);
if (!range.is_valid())
return nullptr;
@ -335,7 +349,7 @@ OwnPtr<Region> MemoryManager::allocate_user_accessible_kernel_region(size_t size
OwnPtr<Region> MemoryManager::allocate_kernel_region_with_vmobject(const Range& range, VMObject& vmobject, const StringView& name, u8 access, bool user_accessible, bool cacheable)
{
InterruptDisabler disabler;
ScopedSpinLock lock(s_lock);
OwnPtr<Region> region;
if (user_accessible)
region = Region::create_user_accessible(range, vmobject, 0, name, access, cacheable);
@ -349,6 +363,7 @@ OwnPtr<Region> MemoryManager::allocate_kernel_region_with_vmobject(const Range&
OwnPtr<Region> MemoryManager::allocate_kernel_region_with_vmobject(VMObject& vmobject, size_t size, const StringView& name, u8 access, bool user_accessible, bool cacheable)
{
ASSERT(!(size % PAGE_SIZE));
ScopedSpinLock lock(s_lock);
auto range = kernel_page_directory().range_allocator().allocate_anywhere(size);
if (!range.is_valid())
return nullptr;
@ -357,6 +372,7 @@ OwnPtr<Region> MemoryManager::allocate_kernel_region_with_vmobject(VMObject& vmo
void MemoryManager::deallocate_user_physical_page(PhysicalPage&& page)
{
ScopedSpinLock lock(s_lock);
for (auto& region : m_user_physical_regions) {
if (!region.contains(page)) {
klog() << "MM: deallocate_user_physical_page: " << page.paddr() << " not in " << region.lower() << " -> " << region.upper();
@ -375,6 +391,7 @@ void MemoryManager::deallocate_user_physical_page(PhysicalPage&& page)
RefPtr<PhysicalPage> MemoryManager::find_free_user_physical_page()
{
ASSERT(s_lock.is_locked());
RefPtr<PhysicalPage> page;
for (auto& region : m_user_physical_regions) {
page = region.take_free_page(false);
@ -386,7 +403,7 @@ RefPtr<PhysicalPage> MemoryManager::find_free_user_physical_page()
RefPtr<PhysicalPage> MemoryManager::allocate_user_physical_page(ShouldZeroFill should_zero_fill)
{
InterruptDisabler disabler;
ScopedSpinLock lock(s_lock);
auto page = find_free_user_physical_page();
if (!page) {
@ -425,6 +442,7 @@ RefPtr<PhysicalPage> MemoryManager::allocate_user_physical_page(ShouldZeroFill s
void MemoryManager::deallocate_supervisor_physical_page(PhysicalPage&& page)
{
ASSERT(s_lock.is_locked());
for (auto& region : m_super_physical_regions) {
if (!region.contains(page)) {
klog() << "MM: deallocate_supervisor_physical_page: " << page.paddr() << " not in " << region.lower() << " -> " << region.upper();
@ -443,7 +461,7 @@ void MemoryManager::deallocate_supervisor_physical_page(PhysicalPage&& page)
NonnullRefPtrVector<PhysicalPage> MemoryManager::allocate_contiguous_supervisor_physical_pages(size_t size)
{
ASSERT(!(size % PAGE_SIZE));
InterruptDisabler disabler;
ScopedSpinLock lock(s_lock);
size_t count = ceil_div(size, PAGE_SIZE);
NonnullRefPtrVector<PhysicalPage> physical_pages;
@ -471,7 +489,7 @@ NonnullRefPtrVector<PhysicalPage> MemoryManager::allocate_contiguous_supervisor_
RefPtr<PhysicalPage> MemoryManager::allocate_supervisor_physical_page()
{
InterruptDisabler disabler;
ScopedSpinLock lock(s_lock);
RefPtr<PhysicalPage> page;
for (auto& region : m_super_physical_regions) {
@ -502,7 +520,7 @@ RefPtr<PhysicalPage> MemoryManager::allocate_supervisor_physical_page()
void MemoryManager::enter_process_paging_scope(Process& process)
{
ASSERT(Thread::current);
InterruptDisabler disabler;
ScopedSpinLock lock(s_lock);
Thread::current->tss().cr3 = process.page_directory().cr3();
write_cr3(process.page_directory().cr3());
@ -528,6 +546,7 @@ extern "C" PageTableEntry boot_pd3_pt1023[1024];
PageDirectoryEntry* MemoryManager::quickmap_pd(PageDirectory& directory, size_t pdpt_index)
{
ScopedSpinLock lock(s_lock);
auto& pte = boot_pd3_pt1023[4];
auto pd_paddr = directory.m_directory_pages[pdpt_index]->paddr();
if (pte.physical_page_base() != pd_paddr.as_ptr()) {
@ -545,6 +564,7 @@ PageDirectoryEntry* MemoryManager::quickmap_pd(PageDirectory& directory, size_t
PageTableEntry* MemoryManager::quickmap_pt(PhysicalAddress pt_paddr)
{
ScopedSpinLock lock(s_lock);
auto& pte = boot_pd3_pt1023[8];
if (pte.physical_page_base() != pt_paddr.as_ptr()) {
#ifdef MM_DEBUG
@ -562,6 +582,7 @@ PageTableEntry* MemoryManager::quickmap_pt(PhysicalAddress pt_paddr)
u8* MemoryManager::quickmap_page(PhysicalPage& physical_page)
{
ASSERT_INTERRUPTS_DISABLED();
ScopedSpinLock lock(s_lock);
ASSERT(!m_quickmap_in_use);
m_quickmap_in_use = true;
@ -582,6 +603,7 @@ u8* MemoryManager::quickmap_page(PhysicalPage& physical_page)
void MemoryManager::unquickmap_page()
{
ASSERT_INTERRUPTS_DISABLED();
ScopedSpinLock lock(s_lock);
ASSERT(m_quickmap_in_use);
auto& pte = boot_pd3_pt1023[0];
pte.clear();
@ -592,6 +614,7 @@ void MemoryManager::unquickmap_page()
template<MemoryManager::AccessSpace space, MemoryManager::AccessType access_type>
bool MemoryManager::validate_range(const Process& process, VirtualAddress base_vaddr, size_t size) const
{
ASSERT(s_lock.is_locked());
ASSERT(size);
if (base_vaddr > base_vaddr.offset(size)) {
dbg() << "Shenanigans! Asked to validate wrappy " << base_vaddr << " size=" << size;
@ -627,12 +650,14 @@ bool MemoryManager::validate_user_stack(const Process& process, VirtualAddress v
{
if (!is_user_address(vaddr))
return false;
ScopedSpinLock lock(s_lock);
auto* region = user_region_from_vaddr(const_cast<Process&>(process), vaddr);
return region && region->is_user_accessible() && region->is_stack();
}
bool MemoryManager::validate_kernel_read(const Process& process, VirtualAddress vaddr, size_t size) const
{
ScopedSpinLock lock(s_lock);
return validate_range<AccessSpace::Kernel, AccessType::Read>(process, vaddr, size);
}
@ -640,6 +665,7 @@ bool MemoryManager::can_read_without_faulting(const Process& process, VirtualAdd
{
// FIXME: Use the size argument!
UNUSED_PARAM(size);
ScopedSpinLock lock(s_lock);
auto* pte = const_cast<MemoryManager*>(this)->pte(process.page_directory(), vaddr);
if (!pte)
return false;
@ -650,6 +676,7 @@ bool MemoryManager::validate_user_read(const Process& process, VirtualAddress va
{
if (!is_user_address(vaddr))
return false;
ScopedSpinLock lock(s_lock);
return validate_range<AccessSpace::User, AccessType::Read>(process, vaddr, size);
}
@ -657,24 +684,25 @@ bool MemoryManager::validate_user_write(const Process& process, VirtualAddress v
{
if (!is_user_address(vaddr))
return false;
ScopedSpinLock lock(s_lock);
return validate_range<AccessSpace::User, AccessType::Write>(process, vaddr, size);
}
void MemoryManager::register_vmobject(VMObject& vmobject)
{
InterruptDisabler disabler;
ScopedSpinLock lock(s_lock);
m_vmobjects.append(&vmobject);
}
void MemoryManager::unregister_vmobject(VMObject& vmobject)
{
InterruptDisabler disabler;
ScopedSpinLock lock(s_lock);
m_vmobjects.remove(&vmobject);
}
void MemoryManager::register_region(Region& region)
{
InterruptDisabler disabler;
ScopedSpinLock lock(s_lock);
if (region.is_kernel())
m_kernel_regions.append(&region);
else
@ -683,7 +711,7 @@ void MemoryManager::register_region(Region& region)
void MemoryManager::unregister_region(Region& region)
{
InterruptDisabler disabler;
ScopedSpinLock lock(s_lock);
if (region.is_kernel())
m_kernel_regions.remove(&region);
else

View file

@ -31,6 +31,7 @@
#include <AK/String.h>
#include <Kernel/Arch/i386/CPU.h>
#include <Kernel/Forward.h>
#include <Kernel/SpinLock.h>
#include <Kernel/VM/PhysicalPage.h>
#include <Kernel/VM/Region.h>
#include <Kernel/VM/VMObject.h>
@ -201,6 +202,8 @@ private:
InlineLinkedList<VMObject> m_vmobjects;
static RecursiveSpinLock s_lock;
bool m_quickmap_in_use { false };
RefPtr<PhysicalPage> m_low_pseudo_identity_mapping_pages[4];

View file

@ -52,7 +52,7 @@ void WaitQueue::wake_one(Atomic<bool>* lock)
return;
if (auto* thread = m_threads.take_first())
thread->wake_from_queue();
Scheduler::stop_idling();
Scheduler::yield();
}
void WaitQueue::wake_n(i32 wake_count)
@ -67,7 +67,7 @@ void WaitQueue::wake_n(i32 wake_count)
break;
thread->wake_from_queue();
}
Scheduler::stop_idling();
Scheduler::yield();
}
void WaitQueue::wake_all()
@ -77,7 +77,7 @@ void WaitQueue::wake_all()
return;
while (!m_threads.is_empty())
m_threads.take_first()->wake_from_queue();
Scheduler::stop_idling();
Scheduler::yield();
}
void WaitQueue::clear()

View file

@ -103,18 +103,19 @@ extern "C" [[noreturn]] void init()
{
setup_serial_debug();
cpu_setup();
cpu_setup(0);
kmalloc_init();
slab_alloc_init();
{
static Processor s_bsp_processor_info; // global but let's keep it "private"
s_bsp_processor_info.initialize(0);
}
CommandLine::initialize(reinterpret_cast<const char*>(low_physical_to_virtual(multiboot_info_ptr->cmdline)));
MemoryManager::initialize();
gdt_init();
idt_init();
// Invoke all static global constructors in the kernel.
// Note that we want to do this as early as possible.
for (ctor_func_t* ctor = &start_ctors; ctor < &end_ctors; ctor++)
@ -148,16 +149,12 @@ extern "C" [[noreturn]] void init()
VirtualConsole::switch_to(0);
Process::initialize();
Thread::initialize();
Scheduler::initialize(0);
Thread* init_stage2_thread = nullptr;
Process::create_kernel_process(init_stage2_thread, "init_stage2", init_stage2);
Scheduler::pick_next();
sti();
Scheduler::idle_loop();
Scheduler::start();
ASSERT_NOT_REACHED();
}
@ -166,8 +163,12 @@ extern "C" [[noreturn]] void init()
//
// The purpose of init_ap() is to initialize APs for multi-tasking.
//
extern "C" [[noreturn]] void init_ap(u32 cpu)
extern "C" [[noreturn]] void init_ap(u32 cpu, Processor* processor_info)
{
klog() << "CPU #" << cpu << " processor_info at " << VirtualAddress(FlatPtr(processor_info));
cpu_setup(cpu);
processor_info->initialize(cpu);
APIC::the().enable(cpu);
#if 0