mirror of
https://github.com/LadybirdBrowser/ladybird.git
synced 2024-12-04 05:20:30 +00:00
Implement sending signals to blocked-in-kernel processes.
This is dirty but pretty cool! If we have a pending, unmasked signal for a process that's blocked inside the kernel, we set up alternate stacks for that process and unblock it to execute the signal handler. A slightly different return trampoline is used here: since we need to get back into the kernel, a dedicated syscall is used (sys$sigreturn.) This restores the TSS contents of the process to the state it was in while we were originally blocking in the kernel. NOTE: There's currently only one "kernel resume TSS" so signal nesting definitely won't work.
This commit is contained in:
parent
c8b308910e
commit
03a8357e84
Notes:
sideshowbarker
2024-07-19 18:32:19 +09:00
Author: https://github.com/awesomekling Commit: https://github.com/SerenityOS/serenity/commit/03a8357e84a
10 changed files with 190 additions and 27 deletions
|
@ -15,6 +15,7 @@
|
|||
#include "ProcFileSystem.h"
|
||||
#include <AK/StdLib.h>
|
||||
#include <LibC/signal_numbers.h>
|
||||
#include "Syscall.h"
|
||||
|
||||
//#define DEBUG_IO
|
||||
//#define TASK_DEBUG
|
||||
|
@ -371,9 +372,9 @@ int Process::exec(const String& path, Vector<String>&& arguments, Vector<String>
|
|||
m_tss.gs = 0x23;
|
||||
m_tss.ss = 0x23;
|
||||
m_tss.cr3 = (dword)m_page_directory;
|
||||
auto* stack_region = allocate_region(LinearAddress(), defaultStackSize, "stack");
|
||||
ASSERT(stack_region);
|
||||
m_stackTop3 = stack_region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
|
||||
m_stack_region = allocate_region(LinearAddress(), defaultStackSize, "stack");
|
||||
ASSERT(m_stack_region);
|
||||
m_stackTop3 = m_stack_region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
|
||||
m_tss.esp = m_stackTop3;
|
||||
m_tss.ss0 = 0x10;
|
||||
m_tss.esp0 = old_esp0;
|
||||
|
@ -783,20 +784,49 @@ void Process::dispatch_signal(byte signal)
|
|||
return terminate_due_to_signal(signal);
|
||||
}
|
||||
|
||||
m_tss_to_resume_kernel = m_tss;
|
||||
#ifdef SIGNAL_DEBUG
|
||||
kprintf("resume tss pc: %w:%x\n", m_tss_to_resume_kernel.cs, m_tss_to_resume_kernel.eip);
|
||||
#endif
|
||||
|
||||
word ret_ss = m_tss.ss;
|
||||
dword ret_esp = m_tss.esp;
|
||||
word ret_cs = m_tss.cs;
|
||||
dword ret_eip = m_tss.eip;
|
||||
dword ret_eflags = m_tss.eflags;
|
||||
|
||||
bool interrupting_in_kernel = (ret_cs & 3) == 0;
|
||||
|
||||
if ((ret_cs & 3) == 0) {
|
||||
// FIXME: Handle send_signal to process currently in kernel code.
|
||||
kprintf("Boo! dispatch_signal in %s(%u) with return to %w:%x\n", name().characters(), pid(), ret_cs, ret_eip);
|
||||
ASSERT_NOT_REACHED();
|
||||
dbgprintf("dispatch_signal to %s(%u) in state=%s with return to %w:%x\n", name().characters(), pid(), toString(state()), ret_cs, ret_eip);
|
||||
ASSERT(is_blocked());
|
||||
}
|
||||
|
||||
ProcessPagingScope pagingScope(*this);
|
||||
|
||||
if (interrupting_in_kernel) {
|
||||
if (!m_signal_stack_user_region) {
|
||||
m_signal_stack_user_region = allocate_region(LinearAddress(), defaultStackSize, "signal stack (user)");
|
||||
ASSERT(m_signal_stack_user_region);
|
||||
m_signal_stack_kernel_region = allocate_region(LinearAddress(), defaultStackSize, "signal stack (kernel)");
|
||||
ASSERT(m_signal_stack_user_region);
|
||||
}
|
||||
m_tss.ss = 0x23;
|
||||
m_tss.esp = m_signal_stack_user_region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
|
||||
m_tss.ss0 = 0x10;
|
||||
m_tss.esp0 = m_signal_stack_kernel_region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
|
||||
push_value_on_stack(ret_eflags);
|
||||
push_value_on_stack(ret_cs);
|
||||
push_value_on_stack(ret_eip);
|
||||
} else {
|
||||
push_value_on_stack(ret_cs);
|
||||
push_value_on_stack(ret_eip);
|
||||
push_value_on_stack(ret_eflags);
|
||||
}
|
||||
|
||||
// PUSHA
|
||||
dword old_esp = m_tss.esp;
|
||||
push_value_on_stack(ret_eip);
|
||||
push_value_on_stack(ret_eflags);
|
||||
push_value_on_stack(m_tss.eax);
|
||||
push_value_on_stack(m_tss.ecx);
|
||||
push_value_on_stack(m_tss.edx);
|
||||
|
@ -805,31 +835,66 @@ void Process::dispatch_signal(byte signal)
|
|||
push_value_on_stack(m_tss.ebp);
|
||||
push_value_on_stack(m_tss.esi);
|
||||
push_value_on_stack(m_tss.edi);
|
||||
|
||||
m_tss.eax = (dword)signal;
|
||||
m_tss.cs = 0x1b;
|
||||
m_tss.ds = 0x23;
|
||||
m_tss.es = 0x23;
|
||||
m_tss.fs = 0x23;
|
||||
m_tss.gs = 0x23;
|
||||
m_tss.eip = handler_laddr.get();
|
||||
|
||||
if (m_return_from_signal_trampoline.is_null()) {
|
||||
if (m_return_to_ring3_from_signal_trampoline.is_null()) {
|
||||
// FIXME: This should be a global trampoline shared by all processes, not one created per process!
|
||||
// FIXME: Remap as read-only after setup.
|
||||
auto* region = allocate_region(LinearAddress(), PAGE_SIZE, "signal_trampoline", true, true);
|
||||
m_return_from_signal_trampoline = region->linearAddress;
|
||||
byte* code_ptr = m_return_from_signal_trampoline.asPtr();
|
||||
m_return_to_ring3_from_signal_trampoline = region->linearAddress;
|
||||
byte* code_ptr = m_return_to_ring3_from_signal_trampoline.asPtr();
|
||||
*code_ptr++ = 0x61; // popa
|
||||
*code_ptr++ = 0x9d; // popf
|
||||
*code_ptr++ = 0xc3; // ret
|
||||
*code_ptr++ = 0x0f; // ud2
|
||||
*code_ptr++ = 0x0b;
|
||||
|
||||
m_return_to_ring0_from_signal_trampoline = LinearAddress((dword)code_ptr);
|
||||
*code_ptr++ = 0x61; // popa
|
||||
*code_ptr++ = 0xb8; // mov eax, <dword>
|
||||
*(dword*)code_ptr = Syscall::SC_sigreturn;
|
||||
code_ptr += sizeof(dword);
|
||||
*code_ptr++ = 0xcd; // int 0x80
|
||||
*code_ptr++ = 0x80;
|
||||
*code_ptr++ = 0x0f; // ud2
|
||||
*code_ptr++ = 0x0b;
|
||||
|
||||
// FIXME: For !SA_NODEFER, maybe we could do something like emitting an int 0x80 syscall here that
|
||||
// unmasks the signal so it can be received again? I guess then I would need one trampoline
|
||||
// per signal number if it's hard-coded, but it's just a few bytes per each.
|
||||
}
|
||||
|
||||
push_value_on_stack(m_return_from_signal_trampoline.get());
|
||||
if (interrupting_in_kernel)
|
||||
push_value_on_stack(m_return_to_ring0_from_signal_trampoline.get());
|
||||
else
|
||||
push_value_on_stack(m_return_to_ring3_from_signal_trampoline.get());
|
||||
|
||||
m_pending_signals &= ~(1 << signal);
|
||||
|
||||
#ifdef SIGNAL_DEBUG
|
||||
dbgprintf("signal: Okay, %s(%u) has been primed\n", name().characters(), pid());
|
||||
#endif
|
||||
}
|
||||
|
||||
void Process::sys$sigreturn()
|
||||
{
|
||||
InterruptDisabler disabler;
|
||||
m_tss = m_tss_to_resume_kernel;
|
||||
#ifdef SIGNAL_DEBUG
|
||||
dbgprintf("sys$sigreturn in %s(%u)\n", name().characters(), pid());
|
||||
dbgprintf(" -> resuming execution at %w:%x\n", m_tss.cs, m_tss.eip);
|
||||
#endif
|
||||
loadTaskRegister(s_kernelProcess->selector());
|
||||
sched_yield();
|
||||
kprintf("sys$sigreturn failed in %s(%u)\n", name().characters(), pid());
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
|
||||
void Process::push_value_on_stack(dword value)
|
||||
|
@ -871,7 +936,7 @@ void Process::doHouseKeeping()
|
|||
int sched_yield()
|
||||
{
|
||||
if (!current) {
|
||||
kprintf( "PANIC: yield() with !current" );
|
||||
kprintf("PANIC: sched_yield() with !current");
|
||||
HANG;
|
||||
}
|
||||
|
||||
|
@ -921,6 +986,18 @@ static void for_each_process_not_in_state(Process::State state, Callback callbac
|
|||
}
|
||||
}
|
||||
|
||||
template<typename Callback>
|
||||
static void for_each_blocked_process(Callback callback)
|
||||
{
|
||||
ASSERT_INTERRUPTS_DISABLED();
|
||||
for (auto* process = s_processes->head(); process;) {
|
||||
auto* next_process = process->next();
|
||||
if (process->is_blocked())
|
||||
callback(*process);
|
||||
process = next_process;
|
||||
}
|
||||
}
|
||||
|
||||
bool scheduleNewProcess()
|
||||
{
|
||||
ASSERT_INTERRUPTS_DISABLED();
|
||||
|
@ -955,6 +1032,7 @@ bool scheduleNewProcess()
|
|||
|
||||
if (process->state() == Process::BlockedRead) {
|
||||
ASSERT(process->m_fdBlockedOnRead != -1);
|
||||
// FIXME: Block until the amount of data wanted is available.
|
||||
if (process->m_file_descriptors[process->m_fdBlockedOnRead]->hasDataAvailableForRead())
|
||||
process->unblock();
|
||||
continue;
|
||||
|
@ -980,7 +1058,19 @@ bool scheduleNewProcess()
|
|||
for_each_process_not_in_state(Process::Dead, [] (auto& process) {
|
||||
if (!process.has_unmasked_pending_signals())
|
||||
return;
|
||||
// We know how to interrupt blocked processes, but if they are just executing
|
||||
// at some random point in the kernel, let them continue. They'll be in userspace
|
||||
// sooner or later and we can deliver the signal then.
|
||||
// FIXME: Maybe we could check when returning from a syscall if there's a pending
|
||||
// signal and dispatch it then and there? Would that be doable without the
|
||||
// syscall effectively being "interrupted" despite having completed?
|
||||
if (process.in_kernel() && !process.is_blocked())
|
||||
return;
|
||||
process.dispatch_one_pending_signal();
|
||||
if (process.is_blocked()) {
|
||||
process.m_was_interrupted_while_blocked = true;
|
||||
process.unblock();
|
||||
}
|
||||
});
|
||||
|
||||
#ifdef SCHEDULER_DEBUG
|
||||
|
@ -1000,7 +1090,7 @@ bool scheduleNewProcess()
|
|||
|
||||
if (process->state() == Process::Runnable || process->state() == Process::Running) {
|
||||
#ifdef SCHEDULER_DEBUG
|
||||
dbgprintf("switch to %s(%u) (%p vs %p)\n", process->name().characters(), process->pid(), process, current);
|
||||
dbgprintf("switch to %s(%u)\n", process->name().characters(), process->pid());
|
||||
#endif
|
||||
return contextSwitch(process);
|
||||
}
|
||||
|
@ -1177,6 +1267,8 @@ ssize_t Process::sys$read(int fd, void* outbuf, size_t nread)
|
|||
m_fdBlockedOnRead = fd;
|
||||
block(BlockedRead);
|
||||
sched_yield();
|
||||
if (m_was_interrupted_while_blocked)
|
||||
return -EINTR;
|
||||
}
|
||||
}
|
||||
nread = descriptor->read((byte*)outbuf, nread);
|
||||
|
@ -1345,6 +1437,11 @@ int Process::sys$sleep(unsigned seconds)
|
|||
if (!seconds)
|
||||
return 0;
|
||||
sleep(seconds * TICKS_PER_SECOND);
|
||||
if (m_wakeupTime > system.uptime) {
|
||||
ASSERT(m_was_interrupted_while_blocked);
|
||||
dword ticks_left_until_original_wakeup_time = m_wakeupTime - system.uptime;
|
||||
return ticks_left_until_original_wakeup_time / TICKS_PER_SECOND;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1407,6 +1504,8 @@ pid_t Process::sys$waitpid(pid_t waitee, int* wstatus, int options)
|
|||
m_waitee_status = 0;
|
||||
block(BlockedWait);
|
||||
sched_yield();
|
||||
if (m_was_interrupted_while_blocked)
|
||||
return -EINTR;
|
||||
if (wstatus)
|
||||
*wstatus = m_waitee_status;
|
||||
return m_waitee;
|
||||
|
@ -1423,7 +1522,8 @@ void Process::block(Process::State state)
|
|||
{
|
||||
ASSERT(current->state() == Process::Running);
|
||||
system.nblocked++;
|
||||
current->set_state(state);
|
||||
m_was_interrupted_while_blocked = false;
|
||||
set_state(state);
|
||||
}
|
||||
|
||||
void block(Process::State state)
|
||||
|
|
|
@ -51,6 +51,13 @@ public:
|
|||
bool isRing0() const { return m_ring == Ring0; }
|
||||
bool isRing3() const { return m_ring == Ring3; }
|
||||
|
||||
bool is_blocked() const
|
||||
{
|
||||
return m_state == BlockedSleep || m_state == BlockedWait || m_state == BlockedRead;
|
||||
}
|
||||
|
||||
bool in_kernel() const { return (m_tss.cs & 0x03) == 0; }
|
||||
|
||||
static Process* fromPID(pid_t);
|
||||
static Process* kernelProcess();
|
||||
|
||||
|
@ -115,6 +122,7 @@ public:
|
|||
int sys$kill(pid_t pid, int sig);
|
||||
int sys$geterror() { return m_error; }
|
||||
void sys$exit(int status);
|
||||
void sys$sigreturn();
|
||||
pid_t sys$spawn(const char* path, const char** args, const char** envp);
|
||||
pid_t sys$waitpid(pid_t, int* wstatus, int options);
|
||||
void* sys$mmap(void*, size_t size);
|
||||
|
@ -212,6 +220,7 @@ private:
|
|||
State m_state { Invalid };
|
||||
DWORD m_wakeupTime { 0 };
|
||||
TSS32 m_tss;
|
||||
TSS32 m_tss_to_resume_kernel;
|
||||
Vector<RetainPtr<FileDescriptor>> m_file_descriptors;
|
||||
RingLevel m_ring { Ring0 };
|
||||
int m_error { 0 };
|
||||
|
@ -243,16 +252,23 @@ private:
|
|||
// FIXME: Implement some kind of ASLR?
|
||||
LinearAddress m_nextRegion;
|
||||
|
||||
LinearAddress m_return_from_signal_trampoline;
|
||||
LinearAddress m_return_to_ring3_from_signal_trampoline;
|
||||
LinearAddress m_return_to_ring0_from_signal_trampoline;
|
||||
|
||||
pid_t m_ppid { 0 };
|
||||
mode_t m_umask { 022 };
|
||||
|
||||
bool m_was_interrupted_while_blocked { false };
|
||||
|
||||
static void notify_waiters(pid_t waitee, int exit_status, int signal);
|
||||
|
||||
Vector<String> m_arguments;
|
||||
Vector<String> m_initialEnvironment;
|
||||
HashTable<gid_t> m_gids;
|
||||
|
||||
Region* m_stack_region { nullptr };
|
||||
Region* m_signal_stack_user_region { nullptr };
|
||||
Region* m_signal_stack_kernel_region { nullptr };
|
||||
};
|
||||
|
||||
class ProcessInspectionScope {
|
||||
|
|
|
@ -54,7 +54,7 @@ static DWORD handle(RegisterDump& regs, DWORD function, DWORD arg1, DWORD arg2,
|
|||
Console::the().putChar(arg1 & 0xff);
|
||||
break;
|
||||
case Syscall::SC_sleep:
|
||||
return current->sys$sleep(arg1);
|
||||
return current->sys$sleep((unsigned)arg1);
|
||||
case Syscall::SC_gettimeofday:
|
||||
return current->sys$gettimeofday((timeval*)arg1);
|
||||
case Syscall::SC_spawn:
|
||||
|
@ -156,6 +156,10 @@ static DWORD handle(RegisterDump& regs, DWORD function, DWORD arg1, DWORD arg2,
|
|||
return current->sys$getgroups((int)arg1, (gid_t*)arg2);
|
||||
case Syscall::SC_setgroups:
|
||||
return current->sys$setgroups((size_t)arg1, (const gid_t*)arg2);
|
||||
case Syscall::SC_sigreturn:
|
||||
current->sys$sigreturn();
|
||||
ASSERT_NOT_REACHED();
|
||||
return 0;
|
||||
default:
|
||||
kprintf("<%u> int0x80: Unknown function %x requested {%x, %x, %x}\n", current->pid(), function, arg1, arg2, arg3);
|
||||
break;
|
||||
|
|
|
@ -54,6 +54,7 @@
|
|||
__ENUMERATE_SYSCALL(umask) \
|
||||
__ENUMERATE_SYSCALL(getgroups) \
|
||||
__ENUMERATE_SYSCALL(setgroups) \
|
||||
__ENUMERATE_SYSCALL(sigreturn) \
|
||||
|
||||
|
||||
#define DO_SYSCALL_A0(function) Syscall::invoke((dword)(function))
|
||||
|
@ -78,6 +79,7 @@ inline constexpr const char* toString(Function function)
|
|||
ENUMERATE_SYSCALLS
|
||||
#undef __ENUMERATE_SYSCALL
|
||||
}
|
||||
return "Unknown";
|
||||
}
|
||||
|
||||
void initialize();
|
||||
|
|
|
@ -331,8 +331,12 @@ int main(int, char**)
|
|||
char keybuf[16];
|
||||
ssize_t nread = read(0, keybuf, sizeof(keybuf));
|
||||
if (nread < 0) {
|
||||
printf("failed to read :(\n");
|
||||
return 2;
|
||||
if (errno == EINTR) {
|
||||
// Ignore. :^)
|
||||
} else {
|
||||
perror("read failed");
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
for (ssize_t i = 0; i < nread; ++i) {
|
||||
putchar(keybuf[i]);
|
||||
|
|
|
@ -1,10 +1,47 @@
|
|||
#include <LibC/unistd.h>
|
||||
#include <LibC/stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <signal.h>
|
||||
#include <AK/String.h>
|
||||
|
||||
int main(int c, char** v)
|
||||
static unsigned parseUInt(const String& str, bool& ok)
|
||||
{
|
||||
unsigned secs = 10;
|
||||
sleep(secs);
|
||||
unsigned value = 0;
|
||||
for (size_t i = 0; i < str.length(); ++i) {
|
||||
if (str[i] < '0' || str[i] > '9') {
|
||||
ok = false;
|
||||
return 0;
|
||||
}
|
||||
value = value * 10;
|
||||
value += str[i] - '0';
|
||||
}
|
||||
ok = true;
|
||||
return value;
|
||||
}
|
||||
|
||||
void handle_sigint(int)
|
||||
{
|
||||
}
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if (argc != 2) {
|
||||
printf("usage: sleep <seconds>\n");
|
||||
return 1;
|
||||
}
|
||||
bool ok;
|
||||
unsigned secs = parseUInt(argv[1], ok);
|
||||
if (!ok) {
|
||||
fprintf(stderr, "Not a valid number of seconds: \"%s\"\n", argv[1]);
|
||||
return 1;
|
||||
}
|
||||
struct sigaction sa;
|
||||
memset(&sa, 0, sizeof(struct sigaction));
|
||||
sa.sa_handler = handle_sigint;
|
||||
sigaction(SIGINT, &sa, nullptr);
|
||||
unsigned remaining = sleep(secs);
|
||||
if (remaining) {
|
||||
printf("Sleep interrupted with %u seconds remaining.\n", remaining);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -7,7 +7,7 @@
|
|||
#include <AK/ktime.h>
|
||||
#include <AK/kstdio.h>
|
||||
#include <AK/BufferStream.h>
|
||||
#include "sys-errno.h"
|
||||
#include <LibC/errno_numbers.h>
|
||||
|
||||
//#define EXT2_DEBUG
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#include "FileDescriptor.h"
|
||||
#include "FileSystem.h"
|
||||
#include "CharacterDevice.h"
|
||||
#include "sys-errno.h"
|
||||
#include <LibC/errno_numbers.h>
|
||||
#include "UnixTypes.h"
|
||||
#include <AK/BufferStream.h>
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
#include "FullDevice.h"
|
||||
#include "Limits.h"
|
||||
#include "sys-errno.h"
|
||||
#include <LibC/errno_numbers.h>
|
||||
#include <AK/StdLib.h>
|
||||
#include <AK/kstdio.h>
|
||||
|
||||
|
|
|
@ -6,7 +6,7 @@
|
|||
#include <AK/kstdio.h>
|
||||
#include <AK/ktime.h>
|
||||
#include "CharacterDevice.h"
|
||||
#include "sys-errno.h"
|
||||
#include <LibC/errno_numbers.h>
|
||||
|
||||
//#define VFS_DEBUG
|
||||
|
||||
|
|
Loading…
Reference in a new issue