ソースを参照

Implement sending signals to blocked-in-kernel processes.

This is dirty but pretty cool! If we have a pending, unmasked signal for
a process that's blocked inside the kernel, we set up alternate stacks
for that process and unblock it to execute the signal handler.

A slightly different return trampoline is used here: since we need to
get back into the kernel, a dedicated syscall is used (sys$sigreturn.)

This restores the TSS contents of the process to the state it was in
while we were originally blocking in the kernel.

NOTE: There's currently only one "kernel resume TSS" so signal nesting
definitely won't work.
Andreas Kling 6 年 前
コミット
03a8357e84

+ 114 - 14
Kernel/Process.cpp

@@ -15,6 +15,7 @@
 #include "ProcFileSystem.h"
 #include <AK/StdLib.h>
 #include <LibC/signal_numbers.h>
+#include "Syscall.h"
 
 //#define DEBUG_IO
 //#define TASK_DEBUG
@@ -371,9 +372,9 @@ int Process::exec(const String& path, Vector<String>&& arguments, Vector<String>
     m_tss.gs = 0x23;
     m_tss.ss = 0x23;
     m_tss.cr3 = (dword)m_page_directory;
-    auto* stack_region = allocate_region(LinearAddress(), defaultStackSize, "stack");
-    ASSERT(stack_region);
-    m_stackTop3 = stack_region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
+    m_stack_region = allocate_region(LinearAddress(), defaultStackSize, "stack");
+    ASSERT(m_stack_region);
+    m_stackTop3 = m_stack_region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
     m_tss.esp = m_stackTop3;
     m_tss.ss0 = 0x10;
     m_tss.esp0 = old_esp0;
@@ -783,20 +784,49 @@ void Process::dispatch_signal(byte signal)
         return terminate_due_to_signal(signal);
     }
 
+    m_tss_to_resume_kernel = m_tss;
+#ifdef SIGNAL_DEBUG
+    kprintf("resume tss pc: %w:%x\n", m_tss_to_resume_kernel.cs, m_tss_to_resume_kernel.eip);
+#endif
+
+    word ret_ss = m_tss.ss;
+    dword ret_esp = m_tss.esp;
     word ret_cs = m_tss.cs;
     dword ret_eip = m_tss.eip;
     dword ret_eflags = m_tss.eflags;
 
+    bool interrupting_in_kernel = (ret_cs & 3) == 0;
+
     if ((ret_cs & 3) == 0) {
         // FIXME: Handle send_signal to process currently in kernel code.
-        kprintf("Boo! dispatch_signal in %s(%u) with return to %w:%x\n", name().characters(), pid(), ret_cs, ret_eip);
-        ASSERT_NOT_REACHED();
+        dbgprintf("dispatch_signal to %s(%u) in state=%s with return to %w:%x\n", name().characters(), pid(), toString(state()), ret_cs, ret_eip);
+        ASSERT(is_blocked());
     }
 
     ProcessPagingScope pagingScope(*this);
+
+    if (interrupting_in_kernel) {
+        if (!m_signal_stack_user_region) {
+            m_signal_stack_user_region = allocate_region(LinearAddress(), defaultStackSize, "signal stack (user)");
+            ASSERT(m_signal_stack_user_region);
+            m_signal_stack_kernel_region = allocate_region(LinearAddress(), defaultStackSize, "signal stack (kernel)");
+            ASSERT(m_signal_stack_user_region);
+        }
+        m_tss.ss = 0x23;
+        m_tss.esp = m_signal_stack_user_region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
+        m_tss.ss0 = 0x10;
+        m_tss.esp0 = m_signal_stack_kernel_region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
+        push_value_on_stack(ret_eflags);
+        push_value_on_stack(ret_cs);
+        push_value_on_stack(ret_eip);
+    } else {
+        push_value_on_stack(ret_cs);
+        push_value_on_stack(ret_eip);
+        push_value_on_stack(ret_eflags);
+    }
+
+    // PUSHA
     dword old_esp = m_tss.esp;
-    push_value_on_stack(ret_eip);
-    push_value_on_stack(ret_eflags);
     push_value_on_stack(m_tss.eax);
     push_value_on_stack(m_tss.ecx);
     push_value_on_stack(m_tss.edx);
@@ -805,31 +835,66 @@ void Process::dispatch_signal(byte signal)
     push_value_on_stack(m_tss.ebp);
     push_value_on_stack(m_tss.esi);
     push_value_on_stack(m_tss.edi);
+
     m_tss.eax = (dword)signal;
     m_tss.cs = 0x1b;
+    m_tss.ds = 0x23;
+    m_tss.es = 0x23;
+    m_tss.fs = 0x23;
+    m_tss.gs = 0x23;
     m_tss.eip = handler_laddr.get();
 
-    if (m_return_from_signal_trampoline.is_null()) {
+    if (m_return_to_ring3_from_signal_trampoline.is_null()) {
         // FIXME: This should be a global trampoline shared by all processes, not one created per process!
         // FIXME: Remap as read-only after setup.
         auto* region = allocate_region(LinearAddress(), PAGE_SIZE, "signal_trampoline", true, true);
-        m_return_from_signal_trampoline = region->linearAddress;
-        byte* code_ptr = m_return_from_signal_trampoline.asPtr();
+        m_return_to_ring3_from_signal_trampoline = region->linearAddress;
+        byte* code_ptr = m_return_to_ring3_from_signal_trampoline.asPtr();
         *code_ptr++ = 0x61; // popa
         *code_ptr++ = 0x9d; // popf
         *code_ptr++ = 0xc3; // ret
         *code_ptr++ = 0x0f; // ud2
         *code_ptr++ = 0x0b;
+
+        m_return_to_ring0_from_signal_trampoline = LinearAddress((dword)code_ptr);
+        *code_ptr++ = 0x61; // popa
+        *code_ptr++ = 0xb8; // mov eax, <dword>
+        *(dword*)code_ptr = Syscall::SC_sigreturn;
+        code_ptr += sizeof(dword);
+        *code_ptr++ = 0xcd; // int 0x80
+        *code_ptr++ = 0x80;
+        *code_ptr++ = 0x0f; // ud2
+        *code_ptr++ = 0x0b;
+
         // FIXME: For !SA_NODEFER, maybe we could do something like emitting an int 0x80 syscall here that
         //        unmasks the signal so it can be received again? I guess then I would need one trampoline
         //        per signal number if it's hard-coded, but it's just a few bytes per each.
     }
 
-    push_value_on_stack(m_return_from_signal_trampoline.get());
+    if (interrupting_in_kernel)
+        push_value_on_stack(m_return_to_ring0_from_signal_trampoline.get());
+    else
+        push_value_on_stack(m_return_to_ring3_from_signal_trampoline.get());
 
     m_pending_signals &= ~(1 << signal);
 
+#ifdef SIGNAL_DEBUG
     dbgprintf("signal: Okay, %s(%u) has been primed\n", name().characters(), pid());
+#endif
+}
+
+void Process::sys$sigreturn()
+{
+    InterruptDisabler disabler;
+    m_tss = m_tss_to_resume_kernel;
+#ifdef SIGNAL_DEBUG
+    dbgprintf("sys$sigreturn in %s(%u)\n", name().characters(), pid());
+    dbgprintf(" -> resuming execution at %w:%x\n", m_tss.cs, m_tss.eip);
+#endif
+    loadTaskRegister(s_kernelProcess->selector());
+    sched_yield();
+    kprintf("sys$sigreturn failed in %s(%u)\n", name().characters(), pid());
+    ASSERT_NOT_REACHED();
 }
 
 void Process::push_value_on_stack(dword value)
@@ -871,7 +936,7 @@ void Process::doHouseKeeping()
 int sched_yield()
 {
     if (!current) {
-        kprintf( "PANIC: yield() with !current" );
+        kprintf("PANIC: sched_yield() with !current");
         HANG;
     }
 
@@ -921,6 +986,18 @@ static void for_each_process_not_in_state(Process::State state, Callback callbac
     }
 }
 
+template<typename Callback>
+static void for_each_blocked_process(Callback callback)
+{
+    ASSERT_INTERRUPTS_DISABLED();
+    for (auto* process = s_processes->head(); process;) {
+        auto* next_process = process->next();
+        if (process->is_blocked())
+            callback(*process);
+        process = next_process;
+    }
+}
+
 bool scheduleNewProcess()
 {
     ASSERT_INTERRUPTS_DISABLED();
@@ -955,6 +1032,7 @@ bool scheduleNewProcess()
 
         if (process->state() == Process::BlockedRead) {
             ASSERT(process->m_fdBlockedOnRead != -1);
+            // FIXME: Block until the amount of data wanted is available.
             if (process->m_file_descriptors[process->m_fdBlockedOnRead]->hasDataAvailableForRead())
                 process->unblock();
             continue;
@@ -980,7 +1058,19 @@ bool scheduleNewProcess()
     for_each_process_not_in_state(Process::Dead, [] (auto& process) {
         if (!process.has_unmasked_pending_signals())
             return;
+        // We know how to interrupt blocked processes, but if they are just executing
+        // at some random point in the kernel, let them continue. They'll be in userspace
+        // sooner or later and we can deliver the signal then.
+        // FIXME: Maybe we could check when returning from a syscall if there's a pending
+        //        signal and dispatch it then and there? Would that be doable without the
+        //        syscall effectively being "interrupted" despite having completed?
+        if (process.in_kernel() && !process.is_blocked())
+            return;
         process.dispatch_one_pending_signal();
+        if (process.is_blocked()) {
+            process.m_was_interrupted_while_blocked = true;
+            process.unblock();
+        }
     });
 
 #ifdef SCHEDULER_DEBUG
@@ -1000,7 +1090,7 @@ bool scheduleNewProcess()
 
         if (process->state() == Process::Runnable || process->state() == Process::Running) {
 #ifdef SCHEDULER_DEBUG
-            dbgprintf("switch to %s(%u) (%p vs %p)\n", process->name().characters(), process->pid(), process, current);
+            dbgprintf("switch to %s(%u)\n", process->name().characters(), process->pid());
 #endif
             return contextSwitch(process);
         }
@@ -1177,6 +1267,8 @@ ssize_t Process::sys$read(int fd, void* outbuf, size_t nread)
             m_fdBlockedOnRead = fd;
             block(BlockedRead);
             sched_yield();
+            if (m_was_interrupted_while_blocked)
+                return -EINTR;
         }
     }
     nread = descriptor->read((byte*)outbuf, nread);
@@ -1345,6 +1437,11 @@ int Process::sys$sleep(unsigned seconds)
     if (!seconds)
         return 0;
     sleep(seconds * TICKS_PER_SECOND);
+    if (m_wakeupTime > system.uptime) {
+        ASSERT(m_was_interrupted_while_blocked);
+        dword ticks_left_until_original_wakeup_time = m_wakeupTime - system.uptime;
+        return ticks_left_until_original_wakeup_time / TICKS_PER_SECOND;
+    }
     return 0;
 }
 
@@ -1407,6 +1504,8 @@ pid_t Process::sys$waitpid(pid_t waitee, int* wstatus, int options)
     m_waitee_status = 0;
     block(BlockedWait);
     sched_yield();
+    if (m_was_interrupted_while_blocked)
+        return -EINTR;
     if (wstatus)
         *wstatus = m_waitee_status;
     return m_waitee;
@@ -1423,7 +1522,8 @@ void Process::block(Process::State state)
 {
     ASSERT(current->state() == Process::Running);
     system.nblocked++;
-    current->set_state(state);
+    m_was_interrupted_while_blocked = false;
+    set_state(state);
 }
 
 void block(Process::State state)

+ 17 - 1
Kernel/Process.h

@@ -51,6 +51,13 @@ public:
     bool isRing0() const { return m_ring == Ring0; }
     bool isRing3() const { return m_ring == Ring3; }
 
+    bool is_blocked() const
+    {
+        return m_state == BlockedSleep || m_state == BlockedWait || m_state == BlockedRead;
+    }
+
+    bool in_kernel() const { return (m_tss.cs & 0x03) == 0; }
+
     static Process* fromPID(pid_t);
     static Process* kernelProcess();
 
@@ -115,6 +122,7 @@ public:
     int sys$kill(pid_t pid, int sig);
     int sys$geterror() { return m_error; }
     void sys$exit(int status);
+    void sys$sigreturn();
     pid_t sys$spawn(const char* path, const char** args, const char** envp);
     pid_t sys$waitpid(pid_t, int* wstatus, int options);
     void* sys$mmap(void*, size_t size);
@@ -212,6 +220,7 @@ private:
     State m_state { Invalid };
     DWORD m_wakeupTime { 0 };
     TSS32 m_tss;
+    TSS32 m_tss_to_resume_kernel;
     Vector<RetainPtr<FileDescriptor>> m_file_descriptors;
     RingLevel m_ring { Ring0 };
     int m_error { 0 };
@@ -243,16 +252,23 @@ private:
     // FIXME: Implement some kind of ASLR?
     LinearAddress m_nextRegion;
 
-    LinearAddress m_return_from_signal_trampoline;
+    LinearAddress m_return_to_ring3_from_signal_trampoline;
+    LinearAddress m_return_to_ring0_from_signal_trampoline;
 
     pid_t m_ppid { 0 };
     mode_t m_umask { 022 };
 
+    bool m_was_interrupted_while_blocked { false };
+
     static void notify_waiters(pid_t waitee, int exit_status, int signal);
 
     Vector<String> m_arguments;
     Vector<String> m_initialEnvironment;
     HashTable<gid_t> m_gids;
+
+    Region* m_stack_region { nullptr };
+    Region* m_signal_stack_user_region { nullptr };
+    Region* m_signal_stack_kernel_region { nullptr };
 };
 
 class ProcessInspectionScope {

+ 5 - 1
Kernel/Syscall.cpp

@@ -54,7 +54,7 @@ static DWORD handle(RegisterDump& regs, DWORD function, DWORD arg1, DWORD arg2,
         Console::the().putChar(arg1 & 0xff);
         break;
     case Syscall::SC_sleep:
-        return current->sys$sleep(arg1);
+        return current->sys$sleep((unsigned)arg1);
     case Syscall::SC_gettimeofday:
         return current->sys$gettimeofday((timeval*)arg1);
     case Syscall::SC_spawn:
@@ -156,6 +156,10 @@ static DWORD handle(RegisterDump& regs, DWORD function, DWORD arg1, DWORD arg2,
         return current->sys$getgroups((int)arg1, (gid_t*)arg2);
     case Syscall::SC_setgroups:
         return current->sys$setgroups((size_t)arg1, (const gid_t*)arg2);
+    case Syscall::SC_sigreturn:
+        current->sys$sigreturn();
+        ASSERT_NOT_REACHED();
+        return 0;
     default:
         kprintf("<%u> int0x80: Unknown function %x requested {%x, %x, %x}\n", current->pid(), function, arg1, arg2, arg3);
         break;

+ 2 - 0
Kernel/Syscall.h

@@ -54,6 +54,7 @@
     __ENUMERATE_SYSCALL(umask) \
     __ENUMERATE_SYSCALL(getgroups) \
     __ENUMERATE_SYSCALL(setgroups) \
+    __ENUMERATE_SYSCALL(sigreturn) \
 
 
 #define DO_SYSCALL_A0(function) Syscall::invoke((dword)(function))
@@ -78,6 +79,7 @@ inline constexpr const char* toString(Function function)
     ENUMERATE_SYSCALLS
 #undef __ENUMERATE_SYSCALL
     }
+    return "Unknown";
 }
 
 void initialize();

+ 6 - 2
Userland/sh.cpp

@@ -331,8 +331,12 @@ int main(int, char**)
         char keybuf[16];
         ssize_t nread = read(0, keybuf, sizeof(keybuf));
         if (nread < 0) {
-            printf("failed to read :(\n");
-            return 2;
+            if (errno == EINTR) {
+                // Ignore. :^)
+            } else {
+                perror("read failed");
+                return 2;
+            }
         }
         for (ssize_t i = 0; i < nread; ++i) {
             putchar(keybuf[i]);

+ 42 - 5
Userland/sleep.cpp

@@ -1,10 +1,47 @@
-#include <LibC/unistd.h>
-#include <LibC/stdio.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <signal.h>
+#include <AK/String.h>
 
-int main(int c, char** v)
+static unsigned parseUInt(const String& str, bool& ok)
 {
-    unsigned secs = 10;
-    sleep(secs);
+    unsigned value = 0;
+    for (size_t i = 0; i < str.length(); ++i) {
+        if (str[i] < '0' || str[i] > '9') {
+            ok = false;
+            return 0;
+        }
+        value = value * 10;
+        value += str[i] - '0';
+    }
+    ok = true;
+    return value;
+}
+
+void handle_sigint(int)
+{
+}
+
+int main(int argc, char** argv)
+{
+    if (argc != 2) {
+        printf("usage: sleep <seconds>\n");
+        return 1;
+    }
+    bool ok;
+    unsigned secs = parseUInt(argv[1], ok);
+    if (!ok) {
+        fprintf(stderr, "Not a valid number of seconds: \"%s\"\n", argv[1]);
+        return 1;
+    }
+    struct sigaction sa;
+    memset(&sa, 0, sizeof(struct sigaction));
+    sa.sa_handler = handle_sigint;
+    sigaction(SIGINT, &sa, nullptr);
+    unsigned remaining = sleep(secs);
+    if (remaining) {
+        printf("Sleep interrupted with %u seconds remaining.\n", remaining);
+    }
     return 0;
 }
 

+ 1 - 1
VirtualFileSystem/Ext2FileSystem.cpp

@@ -7,7 +7,7 @@
 #include <AK/ktime.h>
 #include <AK/kstdio.h>
 #include <AK/BufferStream.h>
-#include "sys-errno.h"
+#include <LibC/errno_numbers.h>
 
 //#define EXT2_DEBUG
 

+ 1 - 1
VirtualFileSystem/FileDescriptor.cpp

@@ -1,7 +1,7 @@
 #include "FileDescriptor.h"
 #include "FileSystem.h"
 #include "CharacterDevice.h"
-#include "sys-errno.h"
+#include <LibC/errno_numbers.h>
 #include "UnixTypes.h"
 #include <AK/BufferStream.h>
 

+ 1 - 1
VirtualFileSystem/FullDevice.cpp

@@ -1,6 +1,6 @@
 #include "FullDevice.h"
 #include "Limits.h"
-#include "sys-errno.h"
+#include <LibC/errno_numbers.h>
 #include <AK/StdLib.h>
 #include <AK/kstdio.h>
 

+ 1 - 1
VirtualFileSystem/VirtualFileSystem.cpp

@@ -6,7 +6,7 @@
 #include <AK/kstdio.h>
 #include <AK/ktime.h>
 #include "CharacterDevice.h"
-#include "sys-errno.h"
+#include <LibC/errno_numbers.h>
 
 //#define VFS_DEBUG