Browse Source

Move the scheduler code to its own class.

This is very mechanical.
Andreas Kling 6 years ago
parent
commit
39d2fcbbee
8 changed files with 336 additions and 318 deletions
  1. 2 1
      Kernel/Makefile
  2. 30 300
      Kernel/Process.cpp
  3. 16 13
      Kernel/Process.h
  4. 261 0
      Kernel/Scheduler.cpp
  5. 20 0
      Kernel/Scheduler.h
  6. 2 1
      Kernel/Syscall.cpp
  7. 3 2
      Kernel/i8253.cpp
  8. 2 1
      Kernel/init.cpp

+ 2 - 1
Kernel/Makefile

@@ -20,7 +20,8 @@ KERNEL_OBJS = \
        ProcFileSystem.o \
        ProcFileSystem.o \
        RTC.o \
        RTC.o \
        TTY.o \
        TTY.o \
-       VirtualConsole.o
+       VirtualConsole.o \
+       Scheduler.o
 
 
 VFS_OBJS = \
 VFS_OBJS = \
     ../VirtualFileSystem/DiskDevice.o \
     ../VirtualFileSystem/DiskDevice.o \

+ 30 - 300
Kernel/Process.cpp

@@ -16,23 +16,13 @@
 #include <AK/StdLib.h>
 #include <AK/StdLib.h>
 #include <LibC/signal_numbers.h>
 #include <LibC/signal_numbers.h>
 #include "Syscall.h"
 #include "Syscall.h"
+#include "Scheduler.h"
 
 
 //#define DEBUG_IO
 //#define DEBUG_IO
 //#define TASK_DEBUG
 //#define TASK_DEBUG
 //#define FORK_DEBUG
 //#define FORK_DEBUG
-//#define SCHEDULER_DEBUG
-#define COOL_GLOBALS
 #define MAX_PROCESS_GIDS 32
 #define MAX_PROCESS_GIDS 32
 
 
-static const dword scheduler_time_slice = 5; // *10 = 50ms
-
-#ifdef COOL_GLOBALS
-struct CoolGlobals {
-    dword current_pid;
-};
-CoolGlobals* g_cool_globals;
-#endif
-
 // FIXME: Only do a single validation for accesses that don't span multiple pages.
 // FIXME: Only do a single validation for accesses that don't span multiple pages.
 // FIXME: Some places pass strlen(arg1) as arg2. This doesn't seem entirely perfect..
 // FIXME: Some places pass strlen(arg1) as arg2. This doesn't seem entirely perfect..
 #define VALIDATE_USER_READ(b, s) \
 #define VALIDATE_USER_READ(b, s) \
@@ -51,12 +41,9 @@ CoolGlobals* g_cool_globals;
 
 
 static const DWORD defaultStackSize = 16384;
 static const DWORD defaultStackSize = 16384;
 
 
-Process* current;
-Process* s_colonel_process;
-
 static pid_t next_pid;
 static pid_t next_pid;
-static InlineLinkedList<Process>* s_processes;
-static InlineLinkedList<Process>* s_dead_processes;
+InlineLinkedList<Process>* g_processes;
+InlineLinkedList<Process>* g_dead_processes;
 static String* s_hostname;
 static String* s_hostname;
 
 
 static String& hostnameStorage(InterruptDisabler&)
 static String& hostnameStorage(InterruptDisabler&)
@@ -71,63 +58,26 @@ static String getHostname()
     return hostnameStorage(disabler).isolatedCopy();
     return hostnameStorage(disabler).isolatedCopy();
 }
 }
 
 
-static bool contextSwitch(Process*);
-
-static void redo_colonel_process_tss()
-{
-    if (!s_colonel_process->selector())
-        s_colonel_process->setSelector(gdt_alloc_entry());
-
-    auto& tssDescriptor = getGDTEntry(s_colonel_process->selector());
-
-    tssDescriptor.setBase(&s_colonel_process->tss());
-    tssDescriptor.setLimit(0xffff);
-    tssDescriptor.dpl = 0;
-    tssDescriptor.segment_present = 1;
-    tssDescriptor.granularity = 1;
-    tssDescriptor.zero = 0;
-    tssDescriptor.operation_size = 1;
-    tssDescriptor.descriptor_type = 0;
-    tssDescriptor.type = 9;
-
-    flushGDT();
-}
-
-void Process::prepare_for_iret_to_new_process()
-{
-    redo_colonel_process_tss();
-    s_colonel_process->tss().backlink = current->selector();
-    load_task_register(s_colonel_process->selector());
-}
-
-static void hlt_loop()
-{
-    for (;;) {
-        asm volatile("hlt");
-    }
-}
+CoolGlobals* g_cool_globals;
 
 
 void Process::initialize()
 void Process::initialize()
 {
 {
 #ifdef COOL_GLOBALS
 #ifdef COOL_GLOBALS
-    g_cool_globals = (CoolGlobals*)0x1000;
+    g_cool_globals = reinterpret_cast<CoolGlobals*>(0x1000);
 #endif
 #endif
-    current = nullptr;
     next_pid = 0;
     next_pid = 0;
-    s_processes = new InlineLinkedList<Process>;
-    s_dead_processes = new InlineLinkedList<Process>;
-    s_colonel_process = Process::create_kernel_process(hlt_loop, "colonel");
+    g_processes = new InlineLinkedList<Process>;
+    g_dead_processes = new InlineLinkedList<Process>;
     s_hostname = new String("birx");
     s_hostname = new String("birx");
-    redo_colonel_process_tss();
-    load_task_register(s_colonel_process->selector());
+    Scheduler::initialize();
 }
 }
 
 
 Vector<Process*> Process::allProcesses()
 Vector<Process*> Process::allProcesses()
 {
 {
     InterruptDisabler disabler;
     InterruptDisabler disabler;
     Vector<Process*> processes;
     Vector<Process*> processes;
-    processes.ensureCapacity(s_processes->sizeSlow());
-    for (auto* process = s_processes->head(); process; process = process->next())
+    processes.ensureCapacity(g_processes->sizeSlow());
+    for (auto* process = g_processes->head(); process; process = process->next())
         processes.append(process);
         processes.append(process);
     return processes;
     return processes;
 }
 }
@@ -261,7 +211,7 @@ Process* Process::fork(RegisterDump& regs)
 
 
     ProcFileSystem::the().addProcess(*child);
     ProcFileSystem::the().addProcess(*child);
 
 
-    s_processes->prepend(child);
+    g_processes->prepend(child);
     system.nprocess++;
     system.nprocess++;
 #ifdef TASK_DEBUG
 #ifdef TASK_DEBUG
     kprintf("Process %u (%s) forked from %u @ %p\n", child->pid(), child->name().characters(), m_pid, child->m_tss.eip);
     kprintf("Process %u (%s) forked from %u @ %p\n", child->pid(), child->name().characters(), m_pid, child->m_tss.eip);
@@ -337,7 +287,7 @@ int Process::exec(const String& path, Vector<String>&& arguments, Vector<String>
 
 
     InterruptDisabler disabler;
     InterruptDisabler disabler;
     if (current == this)
     if (current == this)
-        load_task_register(s_colonel_process->selector());
+        Scheduler::prepare_to_modify_own_tss();
 
 
     m_name = parts.takeLast();
     m_name = parts.takeLast();
 
 
@@ -368,11 +318,11 @@ int Process::exec(const String& path, Vector<String>&& arguments, Vector<String>
     m_initialEnvironment = move(environment);
     m_initialEnvironment = move(environment);
 
 
 #ifdef TASK_DEBUG
 #ifdef TASK_DEBUG
-    kprintf("Process %u (%s) exec'd %s @ %p\n", pid(), name().characters(), filename, m_tss.eip);
+    kprintf("Process %u (%s) exec'd %s @ %p\n", pid(), name().characters(), path.characters(), m_tss.eip);
 #endif
 #endif
 
 
     if (current == this)
     if (current == this)
-        sched_yield();
+        Scheduler::yield();
 
 
     return 0;
     return 0;
 }
 }
@@ -479,7 +429,7 @@ Process* Process::create_user_process(const String& path, uid_t uid, gid_t gid,
 
 
     ProcFileSystem::the().addProcess(*process);
     ProcFileSystem::the().addProcess(*process);
 
 
-    s_processes->prepend(process);
+    g_processes->prepend(process);
     system.nprocess++;
     system.nprocess++;
 #ifdef TASK_DEBUG
 #ifdef TASK_DEBUG
     kprintf("Process %u (%s) spawned @ %p\n", process->pid(), process->name().characters(), process->m_tss.eip);
     kprintf("Process %u (%s) spawned @ %p\n", process->pid(), process->name().characters(), process->m_tss.eip);
@@ -533,7 +483,7 @@ Process* Process::create_kernel_process(void (*e)(), String&& name)
 
 
     if (process->pid() != 0) {
     if (process->pid() != 0) {
         InterruptDisabler disabler;
         InterruptDisabler disabler;
-        s_processes->prepend(process);
+        g_processes->prepend(process);
         system.nprocess++;
         system.nprocess++;
         ProcFileSystem::the().addProcess(*process);
         ProcFileSystem::the().addProcess(*process);
 #ifdef TASK_DEBUG
 #ifdef TASK_DEBUG
@@ -698,11 +648,8 @@ void Process::sys$exit(int status)
     m_termination_status = status;
     m_termination_status = status;
     m_termination_signal = 0;
     m_termination_signal = 0;
 
 
-    if (!scheduleNewProcess()) {
-        kprintf("Process::sys$exit: Failed to schedule a new process :(\n");
-        HANG;
-    }
-    switchNow();
+    Scheduler::pick_next_and_switch_now();
+    ASSERT_NOT_REACHED();
 }
 }
 
 
 void Process::terminate_due_to_signal(byte signal)
 void Process::terminate_due_to_signal(byte signal)
@@ -865,13 +812,13 @@ void Process::dispatch_signal(byte signal)
 void Process::sys$sigreturn()
 void Process::sys$sigreturn()
 {
 {
     InterruptDisabler disabler;
     InterruptDisabler disabler;
+    Scheduler::prepare_to_modify_own_tss();
     m_tss = m_tss_to_resume_kernel;
     m_tss = m_tss_to_resume_kernel;
 #ifdef SIGNAL_DEBUG
 #ifdef SIGNAL_DEBUG
     dbgprintf("sys$sigreturn in %s(%u)\n", name().characters(), pid());
     dbgprintf("sys$sigreturn in %s(%u)\n", name().characters(), pid());
     dbgprintf(" -> resuming execution at %w:%x\n", m_tss.cs, m_tss.eip);
     dbgprintf(" -> resuming execution at %w:%x\n", m_tss.cs, m_tss.eip);
 #endif
 #endif
-    load_task_register(s_colonel_process->selector());
-    sched_yield();
+    Scheduler::yield();
     kprintf("sys$sigreturn failed in %s(%u)\n", name().characters(), pid());
     kprintf("sys$sigreturn failed in %s(%u)\n", name().characters(), pid());
     ASSERT_NOT_REACHED();
     ASSERT_NOT_REACHED();
 }
 }
@@ -887,64 +834,30 @@ void Process::crash()
 {
 {
     ASSERT_INTERRUPTS_DISABLED();
     ASSERT_INTERRUPTS_DISABLED();
     ASSERT(state() != Dead);
     ASSERT(state() != Dead);
-
     m_termination_signal = SIGSEGV;
     m_termination_signal = SIGSEGV;
     set_state(Dead);
     set_state(Dead);
     dumpRegions();
     dumpRegions();
-
-    if (!scheduleNewProcess()) {
-        kprintf("Process::crash: Failed to schedule a new process :(\n");
-        HANG;
-    }
-    switchNow();
+    Scheduler::pick_next_and_switch_now();
+    ASSERT_NOT_REACHED();
 }
 }
 
 
 void Process::doHouseKeeping()
 void Process::doHouseKeeping()
 {
 {
-    if (s_dead_processes->isEmpty())
+    if (g_dead_processes->isEmpty())
         return;
         return;
     InterruptDisabler disabler;
     InterruptDisabler disabler;
     Process* next = nullptr;
     Process* next = nullptr;
-    for (auto* deadProcess = s_dead_processes->head(); deadProcess; deadProcess = next) {
+    for (auto* deadProcess = g_dead_processes->head(); deadProcess; deadProcess = next) {
         next = deadProcess->next();
         next = deadProcess->next();
         delete deadProcess;
         delete deadProcess;
     }
     }
-    s_dead_processes->clear();
-}
-
-int sched_yield()
-{
-    if (!current) {
-        kprintf("PANIC: sched_yield() with !current");
-        HANG;
-    }
-
-    //kprintf("%s<%u> yield()\n", current->name().characters(), current->pid());
-
-    InterruptDisabler disabler;
-    if (!scheduleNewProcess())
-        return 1;
-
-    //kprintf("yield() jumping to new process: %x (%s)\n", current->farPtr().selector, current->name().characters());
-    switchNow();
-    return 0;
-}
-
-void switchNow()
-{
-    Descriptor& descriptor = getGDTEntry(current->selector());
-    descriptor.type = 9;
-    flushGDT();
-    asm("sti\n"
-        "ljmp *(%%eax)\n"
-        ::"a"(&current->farPtr())
-    );
+    g_dead_processes->clear();
 }
 }
 
 
 void Process::for_each(Function<bool(Process&)> callback)
 void Process::for_each(Function<bool(Process&)> callback)
 {
 {
     ASSERT_INTERRUPTS_DISABLED();
     ASSERT_INTERRUPTS_DISABLED();
-    for (auto* process = s_processes->head(); process; process = process->next()) {
+    for (auto* process = g_processes->head(); process; process = process->next()) {
         if (!callback(*process))
         if (!callback(*process))
             break;
             break;
     }
     }
@@ -953,7 +866,7 @@ void Process::for_each(Function<bool(Process&)> callback)
 void Process::for_each_in_pgrp(pid_t pgid, Function<bool(Process&)> callback)
 void Process::for_each_in_pgrp(pid_t pgid, Function<bool(Process&)> callback)
 {
 {
     ASSERT_INTERRUPTS_DISABLED();
     ASSERT_INTERRUPTS_DISABLED();
-    for (auto* process = s_processes->head(); process; process = process->next()) {
+    for (auto* process = g_processes->head(); process; process = process->next()) {
         if (process->pgid() == pgid) {
         if (process->pgid() == pgid) {
             if (!callback(*process))
             if (!callback(*process))
                 break;
                 break;
@@ -964,7 +877,7 @@ void Process::for_each_in_pgrp(pid_t pgid, Function<bool(Process&)> callback)
 void Process::for_each_in_state(State state, Function<bool(Process&)> callback)
 void Process::for_each_in_state(State state, Function<bool(Process&)> callback)
 {
 {
     ASSERT_INTERRUPTS_DISABLED();
     ASSERT_INTERRUPTS_DISABLED();
-    for (auto* process = s_processes->head(); process;) {
+    for (auto* process = g_processes->head(); process;) {
         auto* next_process = process->next();
         auto* next_process = process->next();
         if (process->state() == state)
         if (process->state() == state)
             callback(*process);
             callback(*process);
@@ -975,7 +888,7 @@ void Process::for_each_in_state(State state, Function<bool(Process&)> callback)
 void Process::for_each_not_in_state(State state, Function<bool(Process&)> callback)
 void Process::for_each_not_in_state(State state, Function<bool(Process&)> callback)
 {
 {
     ASSERT_INTERRUPTS_DISABLED();
     ASSERT_INTERRUPTS_DISABLED();
-    for (auto* process = s_processes->head(); process;) {
+    for (auto* process = g_processes->head(); process;) {
         auto* next_process = process->next();
         auto* next_process = process->next();
         if (process->state() != state)
         if (process->state() != state)
             callback(*process);
             callback(*process);
@@ -983,187 +896,10 @@ void Process::for_each_not_in_state(State state, Function<bool(Process&)> callba
     }
     }
 }
 }
 
 
-bool scheduleNewProcess()
-{
-    ASSERT_INTERRUPTS_DISABLED();
-
-    if (!current) {
-        // XXX: The first ever context_switch() goes to the idle process.
-        //      This to setup a reliable place we can return to.
-        return contextSwitch(Process::colonel_process());
-    }
-
-    // Check and unblock processes whose wait conditions have been met.
-    Process::for_each([] (auto& process) {
-        if (process.state() == Process::BlockedSleep) {
-            if (process.wakeupTime() <= system.uptime)
-                process.unblock();
-            return true;
-        }
-
-        if (process.state() == Process::BlockedWait) {
-            auto* waitee = Process::from_pid(process.waitee());
-            if (!waitee) {
-                kprintf("waitee %u of %s(%u) reaped before I could wait?\n", process.waitee(), process.name().characters(), process.pid());
-                ASSERT_NOT_REACHED();
-            }
-            if (waitee->state() == Process::Dead) {
-                process.m_waitee_status = (waitee->m_termination_status << 8) | waitee->m_termination_signal;
-                process.unblock();
-                waitee->set_state(Process::Forgiven);
-            }
-            return true;
-        }
-
-        if (process.state() == Process::BlockedRead) {
-            ASSERT(process.m_fdBlockedOnRead != -1);
-            // FIXME: Block until the amount of data wanted is available.
-            if (process.m_file_descriptors[process.m_fdBlockedOnRead]->hasDataAvailableForRead())
-                process.unblock();
-            return true;
-        }
-        return true;
-    });
-
-    // Forgive dead orphans.
-    // FIXME: Does this really make sense?
-    Process::for_each_in_state(Process::Dead, [] (auto& process) {
-        if (!Process::from_pid(process.ppid()))
-            process.set_state(Process::Forgiven);
-        return true;
-    });
-
-    // Clean up forgiven processes.
-    // FIXME: Do we really need this to be a separate pass over the process list?
-    Process::for_each_in_state(Process::Forgiven, [] (auto& process) {
-        s_processes->remove(&process);
-        s_dead_processes->append(&process);
-        return true;
-    });
-
-    // Dispatch any pending signals.
-    // FIXME: Do we really need this to be a separate pass over the process list?
-    Process::for_each_not_in_state(Process::Dead, [] (auto& process) {
-        if (!process.has_unmasked_pending_signals())
-            return true;
-        // We know how to interrupt blocked processes, but if they are just executing
-        // at some random point in the kernel, let them continue. They'll be in userspace
-        // sooner or later and we can deliver the signal then.
-        // FIXME: Maybe we could check when returning from a syscall if there's a pending
-        //        signal and dispatch it then and there? Would that be doable without the
-        //        syscall effectively being "interrupted" despite having completed?
-        if (process.in_kernel() && !process.is_blocked())
-            return true;
-        process.dispatch_one_pending_signal();
-        if (process.is_blocked()) {
-            process.m_was_interrupted_while_blocked = true;
-            process.unblock();
-        }
-        return true;
-    });
-
-#ifdef SCHEDULER_DEBUG
-    dbgprintf("Scheduler choices:\n");
-    for (auto* process = s_processes->head(); process; process = process->next()) {
-        //if (process->state() == Process::BlockedWait || process->state() == Process::BlockedSleep)
-//            continue;
-        dbgprintf("% 12s %s(%u) @ %w:%x\n", toString(process->state()), process->name().characters(), process->pid(), process->tss().cs, process->tss().eip);
-    }
-#endif
-
-    auto* prevHead = s_processes->head();
-    for (;;) {
-        // Move head to tail.
-        s_processes->append(s_processes->removeHead());
-        auto* process = s_processes->head();
-
-        if (process->state() == Process::Runnable || process->state() == Process::Running) {
-#ifdef SCHEDULER_DEBUG
-            dbgprintf("switch to %s(%u)\n", process->name().characters(), process->pid());
-#endif
-            return contextSwitch(process);
-        }
-
-        if (process == prevHead) {
-            // Back at process_head, nothing wants to run.
-            kprintf("Nothing wants to run!\n");
-            kprintf("PID    OWNER      STATE  NSCHED  NAME\n");
-            for (auto* process = s_processes->head(); process; process = process->next()) {
-                kprintf("%w   %w:%w  %b     %w    %s\n",
-                    process->pid(),
-                    process->uid(),
-                    process->gid(),
-                    process->state(),
-                    process->timesScheduled(),
-                    process->name().characters());
-            }
-            kprintf("Switch to kernel process @ %w:%x\n", s_colonel_process->tss().cs, s_colonel_process->tss().eip);
-            return contextSwitch(Process::colonel_process());
-        }
-    }
-}
-
-static bool contextSwitch(Process* t)
-{
-    t->setTicksLeft(scheduler_time_slice);
-    t->didSchedule();
-
-    if (current == t)
-        return false;
-
-#ifdef SCHEDULER_DEBUG
-    // Some sanity checking to force a crash earlier.
-    auto csRPL = t->tss().cs & 3;
-    auto ssRPL = t->tss().ss & 3;
-
-    if (csRPL != ssRPL) {
-        kprintf("Fuckup! Switching from %s(%u) to %s(%u) has RPL mismatch\n",
-                current->name().characters(), current->pid(),
-                t->name().characters(), t->pid()
-                );
-        kprintf("code: %w:%x\n", t->tss().cs, t->tss().eip);
-        kprintf(" stk: %w:%x\n", t->tss().ss, t->tss().esp);
-        ASSERT(csRPL == ssRPL);
-    }
-#endif
-
-    if (current) {
-        // If the last process hasn't blocked (still marked as running),
-        // mark it as runnable for the next round.
-        if (current->state() == Process::Running)
-            current->set_state(Process::Runnable);
-    }
-
-    current = t;
-    t->set_state(Process::Running);
-
-#ifdef COOL_GLOBALS
-    g_cool_globals->current_pid = t->pid();
-#endif
-
-    if (!t->selector()) {
-        t->setSelector(gdt_alloc_entry());
-        auto& descriptor = getGDTEntry(t->selector());
-        descriptor.setBase(&t->tss());
-        descriptor.setLimit(0xffff);
-        descriptor.dpl = 0;
-        descriptor.segment_present = 1;
-        descriptor.granularity = 1;
-        descriptor.zero = 0;
-        descriptor.operation_size = 1;
-        descriptor.descriptor_type = 0;
-    }
-
-    auto& descriptor = getGDTEntry(t->selector());
-    descriptor.type = 11; // Busy TSS
-    flushGDT();
-    return true;
-}
-
 Process* Process::from_pid(pid_t pid)
 Process* Process::from_pid(pid_t pid)
 {
 {
     ASSERT_INTERRUPTS_DISABLED();
     ASSERT_INTERRUPTS_DISABLED();
-    for (auto* process = s_processes->head(); process; process = process->next()) {
+    for (auto* process = g_processes->head(); process; process = process->next()) {
         if (process->pid() == pid)
         if (process->pid() == pid)
             return process;
             return process;
     }
     }
@@ -1529,12 +1265,6 @@ void sleep(DWORD ticks)
     sched_yield();
     sched_yield();
 }
 }
 
 
-Process* Process::colonel_process()
-{
-    ASSERT(s_colonel_process);
-    return s_colonel_process;
-}
-
 bool Process::isValidAddressForKernel(LinearAddress laddr) const
 bool Process::isValidAddressForKernel(LinearAddress laddr) const
 {
 {
     // We check extra carefully here since the first 4MB of the address space is identity-mapped.
     // We check extra carefully here since the first 4MB of the address space is identity-mapped.

+ 16 - 13
Kernel/Process.h

@@ -15,6 +15,14 @@ class PageDirectory;
 class Region;
 class Region;
 class Zone;
 class Zone;
 
 
+#define COOL_GLOBALS
+#ifdef COOL_GLOBALS
+struct CoolGlobals {
+    pid_t current_pid;
+};
+extern CoolGlobals* g_cool_globals;
+#endif
+
 struct SignalActionData {
 struct SignalActionData {
     LinearAddress handler_or_sigaction;
     LinearAddress handler_or_sigaction;
     dword mask { 0 };
     dword mask { 0 };
@@ -59,7 +67,6 @@ public:
     bool in_kernel() const { return (m_tss.cs & 0x03) == 0; }
     bool in_kernel() const { return (m_tss.cs & 0x03) == 0; }
 
 
     static Process* from_pid(pid_t);
     static Process* from_pid(pid_t);
-    static Process* colonel_process();
 
 
     const String& name() const { return m_name; }
     const String& name() const { return m_name; }
     pid_t pid() const { return m_pid; }
     pid_t pid() const { return m_pid; }
@@ -93,10 +100,8 @@ public:
     static void for_each_in_state(State, Function<bool(Process&)>);
     static void for_each_in_state(State, Function<bool(Process&)>);
     static void for_each_not_in_state(State, Function<bool(Process&)>);
     static void for_each_not_in_state(State, Function<bool(Process&)>);
 
 
-    static void prepare_for_iret_to_new_process();
-
     bool tick() { ++m_ticks; return --m_ticksLeft; }
     bool tick() { ++m_ticks; return --m_ticksLeft; }
-    void setTicksLeft(DWORD t) { m_ticksLeft = t; }
+    void set_ticks_left(dword t) { m_ticksLeft = t; }
 
 
     void setSelector(WORD s) { m_farPtr.selector = s; }
     void setSelector(WORD s) { m_farPtr.selector = s; }
     void set_state(State s) { m_state = s; }
     void set_state(State s) { m_state = s; }
@@ -124,8 +129,8 @@ public:
     int sys$lseek(int fd, off_t, int whence);
     int sys$lseek(int fd, off_t, int whence);
     int sys$kill(pid_t pid, int sig);
     int sys$kill(pid_t pid, int sig);
     int sys$geterror() { return m_error; }
     int sys$geterror() { return m_error; }
-    void sys$exit(int status);
-    void sys$sigreturn();
+    void sys$exit(int status) NORETURN;
+    void sys$sigreturn() NORETURN;
     pid_t sys$spawn(const char* path, const char** args, const char** envp);
     pid_t sys$spawn(const char* path, const char** args, const char** envp);
     pid_t sys$waitpid(pid_t, int* wstatus, int options);
     pid_t sys$waitpid(pid_t, int* wstatus, int options);
     void* sys$mmap(void*, size_t size);
     void* sys$mmap(void*, size_t size);
@@ -155,7 +160,7 @@ public:
 
 
     static void initialize();
     static void initialize();
 
 
-    void crash();
+    void crash() NORETURN;
 
 
     const TTY* tty() const { return m_tty; }
     const TTY* tty() const { return m_tty; }
 
 
@@ -163,7 +168,7 @@ public:
     const Vector<RetainPtr<Region>>& regions() const { return m_regions; }
     const Vector<RetainPtr<Region>>& regions() const { return m_regions; }
     void dumpRegions();
     void dumpRegions();
 
 
-    void didSchedule() { ++m_timesScheduled; }
+    void did_schedule() { ++m_timesScheduled; }
     dword timesScheduled() const { return m_timesScheduled; }
     dword timesScheduled() const { return m_timesScheduled; }
 
 
     pid_t waitee() const { return m_waitee; }
     pid_t waitee() const { return m_waitee; }
@@ -195,7 +200,7 @@ public:
 
 
 private:
 private:
     friend class MemoryManager;
     friend class MemoryManager;
-    friend bool scheduleNewProcess();
+    friend class Scheduler;
 
 
     Process(String&& name, uid_t, gid_t, pid_t ppid, RingLevel, RetainPtr<VirtualFileSystem::Node>&& cwd = nullptr, RetainPtr<VirtualFileSystem::Node>&& executable = nullptr, TTY* = nullptr, Process* fork_parent = nullptr);
     Process(String&& name, uid_t, gid_t, pid_t ppid, RingLevel, RetainPtr<VirtualFileSystem::Node>&& cwd = nullptr, RetainPtr<VirtualFileSystem::Node>&& executable = nullptr, TTY* = nullptr, Process* fork_parent = nullptr);
 
 
@@ -309,10 +314,8 @@ static inline const char* toString(Process::State state)
     return nullptr;
     return nullptr;
 }
 }
 
 
-extern int sched_yield();
-extern bool scheduleNewProcess();
-extern void switchNow();
 extern void block(Process::State);
 extern void block(Process::State);
 extern void sleep(DWORD ticks);
 extern void sleep(DWORD ticks);
 
 
-extern Process* current;
+extern InlineLinkedList<Process>* g_processes;
+extern InlineLinkedList<Process>* g_dead_processes;

+ 261 - 0
Kernel/Scheduler.cpp

@@ -0,0 +1,261 @@
+#include "Scheduler.h"
+#include "Process.h"
+#include "system.h"
+
+//#define SCHEDULER_DEBUG
+
+static const dword time_slice = 5; // *10 = 50ms
+
+Process* current;
+static Process* s_colonel_process;
+
+bool Scheduler::pick_next()
+{
+    ASSERT_INTERRUPTS_DISABLED();
+
+    if (!current) {
+        // XXX: The first ever context_switch() goes to the idle process.
+        //      This to setup a reliable place we can return to.
+        return context_switch(*s_colonel_process);
+    }
+
+    // Check and unblock processes whose wait conditions have been met.
+    Process::for_each([] (auto& process) {
+        if (process.state() == Process::BlockedSleep) {
+            if (process.wakeupTime() <= system.uptime)
+                process.unblock();
+            return true;
+        }
+
+        if (process.state() == Process::BlockedWait) {
+            auto* waitee = Process::from_pid(process.waitee());
+            if (!waitee) {
+                kprintf("waitee %u of %s(%u) reaped before I could wait?\n", process.waitee(), process.name().characters(), process.pid());
+                ASSERT_NOT_REACHED();
+            }
+            if (waitee->state() == Process::Dead) {
+                process.m_waitee_status = (waitee->m_termination_status << 8) | waitee->m_termination_signal;
+                process.unblock();
+                waitee->set_state(Process::Forgiven);
+            }
+            return true;
+        }
+
+        if (process.state() == Process::BlockedRead) {
+            ASSERT(process.m_fdBlockedOnRead != -1);
+            // FIXME: Block until the amount of data wanted is available.
+            if (process.m_file_descriptors[process.m_fdBlockedOnRead]->hasDataAvailableForRead())
+                process.unblock();
+            return true;
+        }
+        return true;
+    });
+
+    // Forgive dead orphans.
+    // FIXME: Does this really make sense?
+    Process::for_each_in_state(Process::Dead, [] (auto& process) {
+        if (!Process::from_pid(process.ppid()))
+            process.set_state(Process::Forgiven);
+        return true;
+    });
+
+    // Clean up forgiven processes.
+    // FIXME: Do we really need this to be a separate pass over the process list?
+    Process::for_each_in_state(Process::Forgiven, [] (auto& process) {
+        g_processes->remove(&process);
+        g_dead_processes->append(&process);
+        return true;
+    });
+
+    // Dispatch any pending signals.
+    // FIXME: Do we really need this to be a separate pass over the process list?
+    Process::for_each_not_in_state(Process::Dead, [] (auto& process) {
+        if (!process.has_unmasked_pending_signals())
+            return true;
+        // We know how to interrupt blocked processes, but if they are just executing
+        // at some random point in the kernel, let them continue. They'll be in userspace
+        // sooner or later and we can deliver the signal then.
+        // FIXME: Maybe we could check when returning from a syscall if there's a pending
+        //        signal and dispatch it then and there? Would that be doable without the
+        //        syscall effectively being "interrupted" despite having completed?
+        if (process.in_kernel() && !process.is_blocked())
+            return true;
+        process.dispatch_one_pending_signal();
+        if (process.is_blocked()) {
+            process.m_was_interrupted_while_blocked = true;
+            process.unblock();
+        }
+        return true;
+    });
+
+#ifdef SCHEDULER_DEBUG
+    dbgprintf("Scheduler choices:\n");
+    for (auto* process = g_processes->head(); process; process = process->next()) {
+        //if (process->state() == Process::BlockedWait || process->state() == Process::BlockedSleep)
+//            continue;
+        dbgprintf("% 12s %s(%u) @ %w:%x\n", toString(process->state()), process->name().characters(), process->pid(), process->tss().cs, process->tss().eip);
+    }
+#endif
+
+    auto* prevHead = g_processes->head();
+    for (;;) {
+        // Move head to tail.
+        g_processes->append(g_processes->removeHead());
+        auto* process = g_processes->head();
+
+        if (process->state() == Process::Runnable || process->state() == Process::Running) {
+#ifdef SCHEDULER_DEBUG
+            dbgprintf("switch to %s(%u)\n", process->name().characters(), process->pid());
+#endif
+            return context_switch(*process);
+        }
+
+        if (process == prevHead) {
+            // Back at process_head, nothing wants to run.
+            kprintf("Nothing wants to run!\n");
+            kprintf("PID    OWNER      STATE  NSCHED  NAME\n");
+            for (auto* process = g_processes->head(); process; process = process->next()) {
+                kprintf("%w   %w:%w  %b     %w    %s\n",
+                    process->pid(),
+                    process->uid(),
+                    process->gid(),
+                    process->state(),
+                    process->timesScheduled(),
+                    process->name().characters());
+            }
+            kprintf("Switch to kernel process @ %w:%x\n", s_colonel_process->tss().cs, s_colonel_process->tss().eip);
+            return context_switch(*s_colonel_process);
+        }
+    }
+}
+
+bool Scheduler::yield()
+{
+    if (!current) {
+        kprintf("PANIC: sched_yield() with !current");
+        HANG;
+    }
+
+    //dbgprintf("%s<%u> yield()\n", current->name().characters(), current->pid());
+
+    InterruptDisabler disabler;
+    if (!pick_next())
+        return 1;
+
+    //dbgprintf("yield() jumping to new process: %x (%s)\n", current->farPtr().selector, current->name().characters());
+    switch_now();
+    return 0;
+}
+
+void Scheduler::pick_next_and_switch_now()
+{
+    bool someone_wants_to_run = pick_next();
+    ASSERT(someone_wants_to_run);
+    switch_now();
+}
+
+void Scheduler::switch_now()
+{
+    Descriptor& descriptor = getGDTEntry(current->selector());
+    descriptor.type = 9;
+    flushGDT();
+    asm("sti\n"
+        "ljmp *(%%eax)\n"
+        ::"a"(&current->farPtr())
+    );
+}
+
+bool Scheduler::context_switch(Process& process)
+{
+    process.set_ticks_left(time_slice);
+    process.did_schedule();
+
+    if (current == &process)
+        return false;
+
+    if (current) {
+        // If the last process hasn't blocked (still marked as running),
+        // mark it as runnable for the next round.
+        if (current->state() == Process::Running)
+            current->set_state(Process::Runnable);
+    }
+
+    current = &process;
+    process.set_state(Process::Running);
+
+#ifdef COOL_GLOBALS
+    g_cool_globals->current_pid = process.pid();
+#endif
+
+    if (!process.selector()) {
+        process.setSelector(gdt_alloc_entry());
+        auto& descriptor = getGDTEntry(process.selector());
+        descriptor.setBase(&process.tss());
+        descriptor.setLimit(0xffff);
+        descriptor.dpl = 0;
+        descriptor.segment_present = 1;
+        descriptor.granularity = 1;
+        descriptor.zero = 0;
+        descriptor.operation_size = 1;
+        descriptor.descriptor_type = 0;
+    }
+
+    auto& descriptor = getGDTEntry(process.selector());
+    descriptor.type = 11; // Busy TSS
+    flushGDT();
+    return true;
+}
+
+int sched_yield()
+{
+    return Scheduler::yield();
+}
+
+static void redo_colonel_process_tss()
+{
+    if (!s_colonel_process->selector())
+        s_colonel_process->setSelector(gdt_alloc_entry());
+
+    auto& tssDescriptor = getGDTEntry(s_colonel_process->selector());
+
+    tssDescriptor.setBase(&s_colonel_process->tss());
+    tssDescriptor.setLimit(0xffff);
+    tssDescriptor.dpl = 0;
+    tssDescriptor.segment_present = 1;
+    tssDescriptor.granularity = 1;
+    tssDescriptor.zero = 0;
+    tssDescriptor.operation_size = 1;
+    tssDescriptor.descriptor_type = 0;
+    tssDescriptor.type = 9;
+
+    flushGDT();
+}
+
+void Scheduler::prepare_for_iret_to_new_process()
+{
+    redo_colonel_process_tss();
+    s_colonel_process->tss().backlink = current->selector();
+    load_task_register(s_colonel_process->selector());
+}
+
+void Scheduler::prepare_to_modify_own_tss()
+{
+    // This ensures that a process modifying its own TSS in order to yield()
+    // and end up somewhere else doesn't just end up right after the yield().
+    load_task_register(s_colonel_process->selector());
+}
+
+static void hlt_loop()
+{
+    for (;;) {
+        asm volatile("hlt");
+    }
+}
+
+void Scheduler::initialize()
+{
+    s_colonel_process = Process::create_kernel_process(hlt_loop, "colonel");
+    current = nullptr;
+    redo_colonel_process_tss();
+    load_task_register(s_colonel_process->selector());
+}

+ 20 - 0
Kernel/Scheduler.h

@@ -0,0 +1,20 @@
+#pragma once
+
+#include <AK/Assertions.h>
+class Process;
+
+extern Process* current;
+
+class Scheduler {
+public:
+    static void initialize();
+    static bool pick_next();
+    static void pick_next_and_switch_now();
+    static void switch_now();
+    static bool yield();
+    static bool context_switch(Process&);
+    static void prepare_for_iret_to_new_process();
+    static void prepare_to_modify_own_tss();
+};
+
+int sched_yield();

+ 2 - 1
Kernel/Syscall.cpp

@@ -2,6 +2,7 @@
 #include "Process.h"
 #include "Process.h"
 #include "Syscall.h"
 #include "Syscall.h"
 #include "Console.h"
 #include "Console.h"
+#include "Scheduler.h"
 
 
 extern "C" void syscall_entry(RegisterDump&);
 extern "C" void syscall_entry(RegisterDump&);
 extern "C" void syscall_ISR();
 extern "C" void syscall_ISR();
@@ -48,7 +49,7 @@ static DWORD handle(RegisterDump& regs, DWORD function, DWORD arg1, DWORD arg2,
     ASSERT_INTERRUPTS_ENABLED();
     ASSERT_INTERRUPTS_ENABLED();
     switch (function) {
     switch (function) {
     case Syscall::SC_yield:
     case Syscall::SC_yield:
-        sched_yield();
+        Scheduler::yield();
         break;
         break;
     case Syscall::SC_putch:
     case Syscall::SC_putch:
         Console::the().putChar(arg1 & 0xff);
         Console::the().putChar(arg1 & 0xff);

+ 3 - 2
Kernel/i8253.cpp

@@ -5,6 +5,7 @@
 #include "Process.h"
 #include "Process.h"
 #include "system.h"
 #include "system.h"
 #include "PIC.h"
 #include "PIC.h"
+#include "Scheduler.h"
 
 
 #define IRQ_TIMER                0
 #define IRQ_TIMER                0
 
 
@@ -103,9 +104,9 @@ void clock_handle()
         current->tss().esp = regs.esp_if_crossRing;
         current->tss().esp = regs.esp_if_crossRing;
     }
     }
 
 
-    if (!scheduleNewProcess())
+    if (!Scheduler::pick_next())
         return;
         return;
-    Process::prepare_for_iret_to_new_process();
+    Scheduler::prepare_for_iret_to_new_process();
 
 
     // Set the NT (nested task) flag.
     // Set the NT (nested task) flag.
     asm(
     asm(

+ 2 - 1
Kernel/init.cpp

@@ -26,6 +26,7 @@
 #include "ProcFileSystem.h"
 #include "ProcFileSystem.h"
 #include "RTC.h"
 #include "RTC.h"
 #include "VirtualConsole.h"
 #include "VirtualConsole.h"
+#include "Scheduler.h"
 
 
 #define TEST_VFS
 #define TEST_VFS
 #define KSYMS
 #define KSYMS
@@ -311,7 +312,7 @@ void init()
     Process::create_kernel_process(undertaker_main, "undertaker");
     Process::create_kernel_process(undertaker_main, "undertaker");
     Process::create_kernel_process(init_stage2, "init");
     Process::create_kernel_process(init_stage2, "init");
 
 
-    scheduleNewProcess();
+    Scheduler::pick_next();
 
 
     sti();
     sti();