ソースを参照

Implement fork()!

This is quite cool! The syscall entry point plumbs the register dump
down to sys$fork(), which uses it to set up the child process's TSS
in order to resume execution right after the int 0x80 fork() call. :^)

This works pretty well, although there is some problem with the kernel
alias mappings used to clone the parent process's regions. If I disable
the MM::release_page_directory() code, there's no problem. Probably there's
a premature freeing of a physical page somehow.
Andreas Kling 6 年 前
コミット
8accc92c3c

+ 20 - 0
Kernel/MemoryManager.cpp

@@ -407,3 +407,23 @@ bool MemoryManager::validate_user_write(const Process& process, LinearAddress la
         return false;
     return true;
 }
+
+RetainPtr<Region> Region::clone()
+{
+    InterruptDisabler disabler;
+    KernelPagingScope pagingScope;
+
+    // FIXME: Implement COW regions.
+    auto clone_zone = MM.createZone(zone->size());
+    auto clone_region = adopt(*new Region(linearAddress, size, move(clone_zone), String(name)));
+
+    // FIXME: It would be cool to make the src_alias a read-only mapping.
+    byte* src_alias = MM.create_kernel_alias_for_region(*this);
+    byte* dest_alias = MM.create_kernel_alias_for_region(*clone_region);
+
+    memcpy(dest_alias, src_alias, size);
+
+    MM.remove_kernel_alias_for_region(*clone_region, dest_alias);
+    MM.remove_kernel_alias_for_region(*this, src_alias);
+    return clone_region;
+}

+ 2 - 0
Kernel/MemoryManager.h

@@ -40,6 +40,8 @@ private:
 struct Region : public Retainable<Region> {
     Region(LinearAddress, size_t, RetainPtr<Zone>&&, String&&);
     ~Region();
+
+    RetainPtr<Region> clone();
     LinearAddress linearAddress;
     size_t size { 0 };
     RetainPtr<Zone> zone;

+ 130 - 71
Kernel/Process.cpp

@@ -105,30 +105,6 @@ void Process::initialize()
     loadTaskRegister(s_kernelProcess->selector());
 }
 
-void Process::allocateLDT()
-{
-    ASSERT(!m_tss.ldt);
-    static const WORD numLDTEntries = 4;
-    m_ldt_selector = gdt_alloc_entry();
-    m_ldtEntries = new Descriptor[numLDTEntries];
-#if 0
-    kprintf("new ldt selector = %x\n", m_ldt_selector);
-    kprintf("new ldt table at = %p\n", m_ldtEntries);
-    kprintf("new ldt table size = %u\n", (numLDTEntries * 8) - 1);
-#endif
-    Descriptor& ldt = getGDTEntry(m_ldt_selector);
-    ldt.setBase(m_ldtEntries);
-    ldt.setLimit(numLDTEntries * 8 - 1);
-    ldt.dpl = 0;
-    ldt.segment_present = 1;
-    ldt.granularity = 0;
-    ldt.zero = 0;
-    ldt.operation_size = 1;
-    ldt.descriptor_type = 0;
-    ldt.type = Descriptor::LDT;
-    m_tss.ldt = m_ldt_selector;
-}
-
 template<typename Callback>
 static void forEachProcess(Callback callback)
 {
@@ -235,6 +211,77 @@ int Process::sys$gethostname(char* buffer, size_t size)
     return 0;
 }
 
+Process* Process::fork(RegisterDump& regs)
+{
+    auto* child = new Process(String(m_name), m_uid, m_gid, m_pid, m_ring, m_cwd.copyRef(), m_executable.copyRef(), m_tty, this);
+#ifdef FORK_DEBUG
+    dbgprintf("fork: child=%p\n", child);
+#endif
+
+#if 0
+    // FIXME: An honest fork() would copy these. Needs a Vector copy ctor.
+    child->m_arguments = m_arguments;
+    child->m_initialEnvironment = m_initialEnvironment;
+#endif
+
+    for (auto& region : m_regions) {
+#ifdef FORK_DEBUG
+        dbgprintf("fork: cloning Region{%p}\n", region.ptr());
+#endif
+        auto cloned_region = region->clone();
+        // FIXME: Move subregions into Region?
+        for (auto& subregion : m_subregions) {
+            if (subregion->region.ptr() != region.ptr())
+                continue;
+#ifdef FORK_DEBUG
+            dbgprintf("fork: cloning Subregion{%p}\n", subregion.ptr());
+#endif
+            auto cloned_subregion = make<Subregion>(*cloned_region, subregion->offset, subregion->size, subregion->linearAddress, String(subregion->name));
+            child->m_subregions.append(move(cloned_subregion));
+            MM.mapSubregion(*child, *child->m_subregions.last());
+        }
+        child->m_regions.append(move(cloned_region));
+        MM.mapRegion(*child, *child->m_regions.last());
+    }
+
+    child->m_tss.eax = 0; // fork() returns 0 in the child :^)
+    child->m_tss.ebx = regs.ebx;
+    child->m_tss.ecx = regs.ecx;
+    child->m_tss.edx = regs.edx;
+    child->m_tss.ebp = regs.ebp;
+    child->m_tss.esp = regs.esp_if_crossRing;
+    child->m_tss.esi = regs.esi;
+    child->m_tss.edi = regs.edi;
+    child->m_tss.eflags = regs.eflags;
+    child->m_tss.eip = regs.eip;
+    child->m_tss.cs = regs.cs;
+    child->m_tss.ds = regs.ds;
+    child->m_tss.es = regs.es;
+    child->m_tss.fs = regs.fs;
+    child->m_tss.gs = regs.gs;
+    child->m_tss.ss = regs.ss_if_crossRing;
+
+#ifdef FORK_DEBUG
+    dbgprintf("fork: child will begin executing at %w:%x with stack %w:%x\n", child->m_tss.cs, child->m_tss.eip, child->m_tss.ss, child->m_tss.esp);
+#endif
+
+    ProcFileSystem::the().addProcess(*child);
+
+    s_processes->prepend(child);
+    system.nprocess++;
+#ifdef TASK_DEBUG
+    kprintf("Process %u (%s) forked from %u @ %p\n", child->pid(), child->name().characters(), m_pid, child->m_tss.eip);
+#endif
+    return child;
+}
+
+pid_t Process::sys$fork(RegisterDump& regs)
+{
+    auto* child = fork(regs);
+    ASSERT(child);
+    return child->pid();
+}
+
 int Process::sys$spawn(const char* path, const char** args)
 {
     if (args) {
@@ -413,9 +460,9 @@ Process* Process::createKernelProcess(void (*e)(), String&& name)
     return process;
 }
 
-Process::Process(String&& name, uid_t uid, gid_t gid, pid_t parentPID, RingLevel ring, RetainPtr<VirtualFileSystem::Node>&& cwd, RetainPtr<VirtualFileSystem::Node>&& executable, TTY* tty)
+Process::Process(String&& name, uid_t uid, gid_t gid, pid_t parentPID, RingLevel ring, RetainPtr<VirtualFileSystem::Node>&& cwd, RetainPtr<VirtualFileSystem::Node>&& executable, TTY* tty, Process* fork_parent)
     : m_name(move(name))
-    , m_pid(next_pid++)
+    , m_pid(next_pid++) // FIXME: RACE: This variable looks racy!
     , m_uid(uid)
     , m_gid(gid)
     , m_state(Runnable)
@@ -425,57 +472,71 @@ Process::Process(String&& name, uid_t uid, gid_t gid, pid_t parentPID, RingLevel
     , m_tty(tty)
     , m_parentPID(parentPID)
 {
-    {
+    if (fork_parent) {
+        m_sid = fork_parent->m_sid;
+        m_pgid = fork_parent->m_pgid;
+    } else {
         // FIXME: Use a ProcessHandle? Presumably we're executing *IN* the parent right now though..
         InterruptDisabler disabler;
         if (auto* parent = Process::fromPID(m_parentPID)) {
             m_sid = parent->m_sid;
             m_pgid = parent->m_pgid;
         }
-
     }
 
     m_page_directory = (PageDirectory*)kmalloc_page_aligned(sizeof(PageDirectory));
     MM.populate_page_directory(*this);
 
-    m_file_descriptors.resize(m_max_open_file_descriptors);
-
-    if (tty) {
-        m_file_descriptors[0] = tty->open(O_RDONLY);
-        m_file_descriptors[1] = tty->open(O_WRONLY);
-        m_file_descriptors[2] = tty->open(O_WRONLY);
-    }
-
-    m_nextRegion = LinearAddress(0x10000000);
-
-    memset(&m_tss, 0, sizeof(m_tss));
-
-    if (isRing3()) {
-        memset(&m_ldtEntries, 0, sizeof(m_ldtEntries));
-        allocateLDT();
+    if (fork_parent) {
+        m_file_descriptors.resize(fork_parent->m_file_descriptors.size());
+        for (size_t i = 0; i < fork_parent->m_file_descriptors.size(); ++i) {
+            if (!fork_parent->m_file_descriptors[i])
+                continue;
+#ifdef FORK_DEBUG
+            dbgprintf("fork: cloning fd %u... (%p) istty? %um\n", i, fork_parent->m_file_descriptors[i].ptr(), fork_parent->m_file_descriptors[i]->isTTY());
+#endif
+            m_file_descriptors[i] = fork_parent->m_file_descriptors[i]->clone();
+        }
+    } else {
+        m_file_descriptors.resize(m_max_open_file_descriptors);
+        if (tty) {
+            m_file_descriptors[0] = tty->open(O_RDONLY);
+            m_file_descriptors[1] = tty->open(O_WRONLY);
+            m_file_descriptors[2] = tty->open(O_WRONLY);
+        }
     }
 
-    // Only IF is set when a process boots.
-    m_tss.eflags = 0x0202;
+    if (fork_parent)
+        m_nextRegion = fork_parent->m_nextRegion;
+    else
+        m_nextRegion = LinearAddress(0x10000000);
 
-    word cs, ds, ss;
-
-    if (isRing0()) {
-        cs = 0x08;
-        ds = 0x10;
-        ss = 0x10;
+    if (fork_parent) {
+        memcpy(&m_tss, &fork_parent->m_tss, sizeof(m_tss));
     } else {
-        cs = 0x1b;
-        ds = 0x23;
-        ss = 0x23;
-    }
+        memset(&m_tss, 0, sizeof(m_tss));
+
+        // Only IF is set when a process boots.
+        m_tss.eflags = 0x0202;
+        word cs, ds, ss;
+
+        if (isRing0()) {
+            cs = 0x08;
+            ds = 0x10;
+            ss = 0x10;
+        } else {
+            cs = 0x1b;
+            ds = 0x23;
+            ss = 0x23;
+        }
 
-    m_tss.ds = ds;
-    m_tss.es = ds;
-    m_tss.fs = ds;
-    m_tss.gs = ds;
-    m_tss.ss = ss;
-    m_tss.cs = cs;
+        m_tss.ds = ds;
+        m_tss.es = ds;
+        m_tss.fs = ds;
+        m_tss.gs = ds;
+        m_tss.ss = ss;
+        m_tss.cs = cs;
+    }
 
     m_tss.cr3 = (dword)m_page_directory;
 
@@ -486,10 +547,14 @@ Process::Process(String&& name, uid_t uid, gid_t gid, pid_t parentPID, RingLevel
         m_stackTop0 = (stackBottom + defaultStackSize) & 0xffffff8;
         m_tss.esp = m_stackTop0;
     } else {
-        auto* region = allocateRegion(defaultStackSize, "stack");
-        ASSERT(region);
-        m_stackTop3 = region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
-        m_tss.esp = m_stackTop3;
+        if (fork_parent) {
+            m_stackTop3 = fork_parent->m_stackTop3;
+        } else {
+            auto* region = allocateRegion(defaultStackSize, "stack");
+            ASSERT(region);
+            m_stackTop3 = region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
+            m_tss.esp = m_stackTop3;
+        }
     }
 
     if (isRing3()) {
@@ -511,12 +576,6 @@ Process::~Process()
     ProcFileSystem::the().removeProcess(*this);
     system.nprocess--;
 
-    if (isRing3()) {
-        delete [] m_ldtEntries;
-        m_ldtEntries = nullptr;
-        gdt_free_entry(m_ldt_selector);
-    }
-
     gdt_free_entry(selector());
 
     if (m_kernelStack) {

+ 4 - 3
Kernel/Process.h

@@ -119,6 +119,7 @@ public:
     int sys$uname(utsname*);
     int sys$readlink(const char*, char*, size_t);
     int sys$ttyname_r(int fd, char*, size_t);
+    pid_t sys$fork(RegisterDump&);
 
     static void initialize();
 
@@ -155,11 +156,13 @@ public:
 
     void send_signal(int signal, Process* sender);
 
+    Process* fork(RegisterDump&);
+
 private:
     friend class MemoryManager;
     friend bool scheduleNewProcess();
 
-    Process(String&& name, uid_t, gid_t, pid_t parentPID, RingLevel, RetainPtr<VirtualFileSystem::Node>&& cwd = nullptr, RetainPtr<VirtualFileSystem::Node>&& executable = nullptr, TTY* = nullptr);
+    Process(String&& name, uid_t, gid_t, pid_t parentPID, RingLevel, RetainPtr<VirtualFileSystem::Node>&& cwd = nullptr, RetainPtr<VirtualFileSystem::Node>&& executable = nullptr, TTY* = nullptr, Process* fork_parent = nullptr);
 
     void allocateLDT();
 
@@ -183,8 +186,6 @@ private:
     State m_state { Invalid };
     DWORD m_wakeupTime { 0 };
     TSS32 m_tss;
-    word m_ldt_selector { 0 };
-    Descriptor* m_ldtEntries { nullptr };
     Vector<OwnPtr<FileHandle>> m_file_descriptors;
     RingLevel m_ring { Ring0 };
     int m_error { 0 };

+ 5 - 2
Kernel/Syscall.cpp

@@ -43,7 +43,7 @@ void initialize()
     kprintf("syscall: int 0x80 handler installed\n");
 }
 
-DWORD handle(DWORD function, DWORD arg1, DWORD arg2, DWORD arg3)
+static DWORD handle(RegisterDump& regs, DWORD function, DWORD arg1, DWORD arg2, DWORD arg3)
 {
     ASSERT_INTERRUPTS_ENABLED();
     switch (function) {
@@ -128,6 +128,8 @@ DWORD handle(DWORD function, DWORD arg1, DWORD arg2, DWORD arg3)
         return current->sys$tcgetpgrp((int)arg1);
     case Syscall::PosixTcsetpgrp:
         return current->sys$tcsetpgrp((int)arg1, (pid_t)arg2);
+    case Syscall::PosixFork:
+        return current->sys$fork(regs);
     default:
         kprintf("<%u> int0x80: Unknown function %x requested {%x, %x, %x}\n", current->pid(), function, arg1, arg2, arg3);
         break;
@@ -143,5 +145,6 @@ void syscall_entry(RegisterDump& regs)
     DWORD arg1 = regs.edx;
     DWORD arg2 = regs.ecx;
     DWORD arg3 = regs.ebx;
-    regs.eax = Syscall::handle(function, arg1, arg2, arg3);
+    regs.eax = Syscall::handle(regs, function, arg1, arg2, arg3);
 }
+

+ 1 - 0
Kernel/Syscall.h

@@ -47,6 +47,7 @@ enum Function {
     PosixGetpgrp = 0x2015,
     PosixTcsetpgrp = 0x2016,
     PosixTcgetpgrp = 0x2017,
+    PosixFork = 0x2018,
 };
 
 void initialize();

+ 10 - 0
Kernel/kmalloc.cpp

@@ -26,6 +26,8 @@ typedef struct
 #define ETERNAL_BASE_PHYSICAL 0x200000
 #define BASE_PHYS   0x100000
 
+#define RANGE_SIZE 0x100000
+
 PRIVATE BYTE alloc_map[POOL_SIZE / CHUNK_SIZE / 8];
 
 volatile DWORD sum_alloc = 0;
@@ -36,6 +38,9 @@ volatile size_t kmalloc_sum_page_aligned = 0;
 static byte* s_next_eternal_ptr;
 static byte* s_next_page_aligned_ptr;
 
+static byte* s_end_of_eternal_range;
+static byte* s_end_of_page_aligned_range;
+
 bool is_kmalloc_address(void* ptr)
 {
     if (ptr >= (byte*)ETERNAL_BASE_PHYSICAL && ptr < s_next_eternal_ptr)
@@ -58,12 +63,16 @@ kmalloc_init()
 
     s_next_eternal_ptr = (byte*)ETERNAL_BASE_PHYSICAL;
     s_next_page_aligned_ptr = (byte*)PAGE_ALIGNED_BASE_PHYSICAL;
+
+    s_end_of_eternal_range = s_next_eternal_ptr + RANGE_SIZE;
+    s_end_of_page_aligned_range = s_next_page_aligned_ptr + RANGE_SIZE;
 }
 
 void* kmalloc_eternal(size_t size)
 {
     void* ptr = s_next_eternal_ptr;
     s_next_eternal_ptr += size;
+    ASSERT(s_next_eternal_ptr < s_end_of_eternal_range);
     kmalloc_sum_eternal += size;
     return ptr;
 }
@@ -73,6 +82,7 @@ void* kmalloc_page_aligned(size_t size)
     ASSERT((size % 4096) == 0);
     void* ptr = s_next_page_aligned_ptr;
     s_next_page_aligned_ptr += size;
+    ASSERT(s_next_page_aligned_ptr < s_end_of_page_aligned_range);
     kmalloc_sum_page_aligned += size;
     return ptr;
 }

+ 1 - 0
Kernel/sync.sh

@@ -18,6 +18,7 @@ cp ../Userland/cat mnt/bin/cat
 cp ../Userland/uname mnt/bin/uname
 cp ../Userland/clear mnt/bin/clear
 cp ../Userland/tst mnt/bin/tst
+cp ../Userland/ft mnt/bin/ft
 cp ../Userland/mm mnt/bin/mm
 cp ../Userland/kill mnt/bin/kill
 cp ../Userland/tty mnt/bin/tty

+ 5 - 0
LibC/unistd.cpp

@@ -5,6 +5,11 @@
 
 extern "C" {
 
+pid_t fork()
+{
+    return Syscall::invoke(Syscall::PosixFork);
+}
+
 uid_t getuid()
 {
     return Syscall::invoke(Syscall::PosixGetuid);

+ 1 - 0
LibC/unistd.h

@@ -8,6 +8,7 @@ __BEGIN_DECLS
 extern char** environ;
 
 inline int getpagesize() { return 4096; }
+pid_t fork();
 pid_t getsid(pid_t);
 pid_t setsid();
 int setpgid(pid_t pid, pid_t pgid);

+ 1 - 0
Userland/.gitignore

@@ -16,3 +16,4 @@ tst
 mm
 kill
 tty
+ft

+ 5 - 0
Userland/Makefile

@@ -14,6 +14,7 @@ OBJS = \
        tst.o \
        mm.o \
        kill.o \
+       ft.o \
        tty.o
 
 APPS = \
@@ -32,6 +33,7 @@ APPS = \
        tst \
        mm \
        kill \
+       ft \
        tty
 
 ARCH_FLAGS =
@@ -91,6 +93,9 @@ clear: clear.o
 tst: tst.o
 	$(LD) -o $@ $(LDFLAGS) $< ../LibC/LibC.a
 
+ft: ft.o
+	$(LD) -o $@ $(LDFLAGS) $< ../LibC/LibC.a
+
 mm: mm.o
 	$(LD) -o $@ $(LDFLAGS) $< ../LibC/LibC.a
 

+ 14 - 0
Userland/ft.cpp

@@ -0,0 +1,14 @@
+#include <stdio.h>
+#include <unistd.h>
+
+int main(int argc, char** argv)
+{
+    printf("Testing fork()...\n");
+    pid_t pid = fork();
+    if (!pid) {
+        printf("child, pid=%d\n", getpid());
+    } else {
+        printf("parent, child pid=%d\n", pid);
+    }
+    return 0;
+}

+ 12 - 0
Userland/sh.cpp

@@ -32,6 +32,13 @@ static int sh_pwd(int, const char**)
     return 0;
 }
 
+static int sh_fork(int, const char**)
+{
+    pid_t pid = fork();
+    printf("getpid()=%d, fork()=%d\n", getpid(), pid);
+    return 0;
+}
+
 static int sh_exit(int, const char**)
 {
     printf("Good-bye!\n");
@@ -94,6 +101,11 @@ static bool handle_builtin(int argc, const char** argv, int& retval)
         retval = sh_exit(argc, argv);
         return true;
     }
+
+    if (!strcmp(argv[0], "fork")) {
+        retval = sh_fork(argc, argv);
+        return true;
+    }
     return false;
 }
 

+ 13 - 0
VirtualFileSystem/FileHandle.cpp

@@ -15,6 +15,19 @@ FileHandle::~FileHandle()
 {
 }
 
+OwnPtr<FileHandle> FileHandle::clone()
+{
+    auto handle = make<FileHandle>(m_vnode.copyRef());
+    if (!handle)
+        return nullptr;
+    handle->m_currentOffset = m_currentOffset;
+#ifdef SERENITY
+    handle->m_fd = m_fd;
+    handle->m_isBlocking = m_isBlocking;
+#endif
+    return handle;
+}
+
 #ifndef SERENITY
 bool additionWouldOverflow(Unix::off_t a, Unix::off_t b)
 {

+ 2 - 0
VirtualFileSystem/FileHandle.h

@@ -11,6 +11,8 @@ public:
     explicit FileHandle(RetainPtr<VirtualFileSystem::Node>&&);
     ~FileHandle();
 
+    OwnPtr<FileHandle> clone();
+
     int close();
 
     Unix::off_t seek(Unix::off_t, int whence);