Syscall.cpp 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/ScopeGuard.h>
  7. #include <Kernel/API/Syscall.h>
  8. #include <Kernel/Arch/x86/Interrupts.h>
  9. #include <Kernel/Arch/x86/TrapFrame.h>
  10. #include <Kernel/Panic.h>
  11. #include <Kernel/Process.h>
  12. #include <Kernel/Sections.h>
  13. #include <Kernel/ThreadTracer.h>
  14. #include <Kernel/VM/MemoryManager.h>
  15. namespace Kernel {
  16. extern "C" void syscall_handler(TrapFrame*) __attribute__((used));
  17. extern "C" void syscall_asm_entry();
  18. static void syscall_asm_entry_dummy() __attribute__((used));
  19. NEVER_INLINE void syscall_asm_entry_dummy()
  20. {
  21. // clang-format off
  22. #if ARCH(I386)
  23. asm(
  24. ".globl syscall_asm_entry\n"
  25. "syscall_asm_entry:\n"
  26. " pushl $0x0\n"
  27. " pusha\n"
  28. " pushl %ds\n"
  29. " pushl %es\n"
  30. " pushl %fs\n"
  31. " pushl %gs\n"
  32. " pushl %ss\n"
  33. " mov $" __STRINGIFY(GDT_SELECTOR_DATA0) ", %ax\n"
  34. " mov %ax, %ds\n"
  35. " mov %ax, %es\n"
  36. " mov $" __STRINGIFY(GDT_SELECTOR_PROC) ", %ax\n"
  37. " mov %ax, %fs\n"
  38. " cld\n"
  39. " xor %esi, %esi\n"
  40. " xor %edi, %edi\n"
  41. " pushl %esp \n" // set TrapFrame::regs
  42. " subl $" __STRINGIFY(TRAP_FRAME_SIZE - 4) ", %esp \n"
  43. " movl %esp, %ebx \n"
  44. " pushl %ebx \n" // push pointer to TrapFrame
  45. " call enter_trap_no_irq \n"
  46. " movl %ebx, 0(%esp) \n" // push pointer to TrapFrame
  47. " call syscall_handler \n"
  48. " movl %ebx, 0(%esp) \n" // push pointer to TrapFrame
  49. " jmp common_trap_exit \n");
  50. #elif ARCH(X86_64)
  51. asm(
  52. ".globl syscall_asm_entry\n"
  53. "syscall_asm_entry:\n"
  54. " pushq $0x0\n"
  55. " pushq %r15\n"
  56. " pushq %r14\n"
  57. " pushq %r13\n"
  58. " pushq %r12\n"
  59. " pushq %r11\n"
  60. " pushq %r10\n"
  61. " pushq %r9\n"
  62. " pushq %r8\n"
  63. " pushq %rax\n"
  64. " pushq %rcx\n"
  65. " pushq %rdx\n"
  66. " pushq %rbx\n"
  67. " pushq %rsp\n"
  68. " pushq %rbp\n"
  69. " pushq %rsi\n"
  70. " pushq %rdi\n"
  71. " pushq %rsp \n" /* set TrapFrame::regs */
  72. " subq $" __STRINGIFY(TRAP_FRAME_SIZE - 8) ", %rsp \n"
  73. " subq $0x8, %rsp\n" /* align stack */
  74. " lea 0x8(%rsp), %rdi \n"
  75. " cld\n"
  76. " call enter_trap_no_irq \n"
  77. " lea 0x8(%rsp), %rdi \n"
  78. " call syscall_handler\n"
  79. " addq $0x8, %rsp\n" /* undo alignment */
  80. " jmp common_trap_exit \n");
  81. #endif
  82. // clang-format on
  83. }
  84. namespace Syscall {
  85. static KResultOr<FlatPtr> handle(RegisterState&, FlatPtr function, FlatPtr arg1, FlatPtr arg2, FlatPtr arg3);
  86. UNMAP_AFTER_INIT void initialize()
  87. {
  88. register_user_callable_interrupt_handler(syscall_vector, syscall_asm_entry);
  89. }
  90. #pragma GCC diagnostic ignored "-Wcast-function-type"
  91. typedef KResultOr<FlatPtr> (Process::*Handler)(FlatPtr, FlatPtr, FlatPtr);
  92. typedef KResultOr<FlatPtr> (Process::*HandlerWithRegisterState)(RegisterState&);
  93. #define __ENUMERATE_SYSCALL(x) reinterpret_cast<Handler>(&Process::sys$##x),
  94. static const Handler s_syscall_table[] = {
  95. ENUMERATE_SYSCALLS(__ENUMERATE_SYSCALL)
  96. };
  97. #undef __ENUMERATE_SYSCALL
  98. KResultOr<FlatPtr> handle(RegisterState& regs, FlatPtr function, FlatPtr arg1, FlatPtr arg2, FlatPtr arg3)
  99. {
  100. VERIFY_INTERRUPTS_ENABLED();
  101. auto current_thread = Thread::current();
  102. auto& process = current_thread->process();
  103. current_thread->did_syscall();
  104. if (function == SC_exit || function == SC_exit_thread) {
  105. // These syscalls need special handling since they never return to the caller.
  106. if (auto* tracer = process.tracer(); tracer && tracer->is_tracing_syscalls()) {
  107. #if ARCH(I386)
  108. regs.eax = 0;
  109. #else
  110. regs.rax = 0;
  111. #endif
  112. tracer->set_trace_syscalls(false);
  113. process.tracer_trap(*current_thread, regs); // this triggers SIGTRAP and stops the thread!
  114. }
  115. switch (function) {
  116. case SC_exit:
  117. process.sys$exit(arg1);
  118. break;
  119. case SC_exit_thread:
  120. process.sys$exit_thread(arg1, arg2, arg3);
  121. break;
  122. default:
  123. VERIFY_NOT_REACHED();
  124. }
  125. }
  126. if (function == SC_fork || function == SC_sigreturn) {
  127. // These syscalls want the RegisterState& rather than individual parameters.
  128. auto handler = (HandlerWithRegisterState)s_syscall_table[function];
  129. return (process.*(handler))(regs);
  130. }
  131. if (function >= Function::__Count) {
  132. dbgln("Unknown syscall {} requested ({:08x}, {:08x}, {:08x})", function, arg1, arg2, arg3);
  133. return ENOSYS;
  134. }
  135. if (s_syscall_table[function] == nullptr) {
  136. dbgln("Null syscall {} requested, you probably need to rebuild this program!", function);
  137. return ENOSYS;
  138. }
  139. return (process.*(s_syscall_table[function]))(arg1, arg2, arg3);
  140. }
  141. }
  142. NEVER_INLINE void syscall_handler(TrapFrame* trap)
  143. {
  144. auto& regs = *trap->regs;
  145. auto current_thread = Thread::current();
  146. {
  147. ScopedSpinLock lock(g_scheduler_lock);
  148. current_thread->set_may_die_immediately(false);
  149. }
  150. ScopeGuard reset_may_die_immediately = [&current_thread] {
  151. ScopedSpinLock lock(g_scheduler_lock);
  152. current_thread->set_may_die_immediately(true);
  153. };
  154. VERIFY(current_thread->previous_mode() == Thread::PreviousMode::UserMode);
  155. auto& process = current_thread->process();
  156. if (auto tracer = process.tracer(); tracer && tracer->is_tracing_syscalls()) {
  157. tracer->set_trace_syscalls(false);
  158. process.tracer_trap(*current_thread, regs); // this triggers SIGTRAP and stops the thread!
  159. }
  160. current_thread->yield_if_stopped();
  161. // Make sure SMAP protection is enabled on syscall entry.
  162. clac();
  163. // Apply a random offset in the range 0-255 to the stack pointer,
  164. // to make kernel stacks a bit less deterministic.
  165. u32 lsw;
  166. u32 msw;
  167. read_tsc(lsw, msw);
  168. auto* ptr = (char*)__builtin_alloca(lsw & 0xff);
  169. asm volatile(""
  170. : "=m"(*ptr));
  171. static constexpr FlatPtr iopl_mask = 3u << 12;
  172. FlatPtr flags;
  173. #if ARCH(I386)
  174. flags = regs.eflags;
  175. #else
  176. flags = regs.rflags;
  177. #endif
  178. if ((flags & (iopl_mask)) != 0) {
  179. PANIC("Syscall from process with IOPL != 0");
  180. }
  181. // NOTE: We take the big process lock before inspecting memory regions.
  182. process.big_lock().lock();
  183. VirtualAddress userspace_sp;
  184. #if ARCH(I386)
  185. userspace_sp = VirtualAddress { regs.userspace_esp };
  186. #else
  187. userspace_sp = VirtualAddress { regs.userspace_rsp };
  188. #endif
  189. if (!MM.validate_user_stack(process, userspace_sp)) {
  190. dbgln("Invalid stack pointer: {:p}", userspace_sp);
  191. handle_crash(regs, "Bad stack on syscall entry", SIGSTKFLT);
  192. }
  193. VirtualAddress ip;
  194. #if ARCH(I386)
  195. ip = VirtualAddress { regs.eip };
  196. #else
  197. ip = VirtualAddress { regs.rip };
  198. #endif
  199. auto* calling_region = MM.find_user_region_from_vaddr(process.space(), ip);
  200. if (!calling_region) {
  201. dbgln("Syscall from {:p} which has no associated region", ip);
  202. handle_crash(regs, "Syscall from unknown region", SIGSEGV);
  203. }
  204. if (calling_region->is_writable()) {
  205. dbgln("Syscall from writable memory at {:p}", ip);
  206. handle_crash(regs, "Syscall from writable memory", SIGSEGV);
  207. }
  208. if (process.space().enforces_syscall_regions() && !calling_region->is_syscall_region()) {
  209. dbgln("Syscall from non-syscall region");
  210. handle_crash(regs, "Syscall from non-syscall region", SIGSEGV);
  211. }
  212. #if ARCH(I386)
  213. auto function = regs.eax;
  214. auto arg1 = regs.edx;
  215. auto arg2 = regs.ecx;
  216. auto arg3 = regs.ebx;
  217. #else
  218. auto function = regs.rax;
  219. auto arg1 = regs.rdx;
  220. auto arg2 = regs.rcx;
  221. auto arg3 = regs.rbx;
  222. #endif
  223. auto result = Syscall::handle(regs, function, arg1, arg2, arg3);
  224. if (result.is_error()) {
  225. #if ARCH(I386)
  226. regs.eax = result.error();
  227. #else
  228. regs.rax = result.error();
  229. #endif
  230. } else {
  231. #if ARCH(I386)
  232. regs.eax = result.value();
  233. #else
  234. regs.rax = result.value();
  235. #endif
  236. }
  237. process.big_lock().unlock();
  238. if (auto tracer = process.tracer(); tracer && tracer->is_tracing_syscalls()) {
  239. tracer->set_trace_syscalls(false);
  240. process.tracer_trap(*current_thread, regs); // this triggers SIGTRAP and stops the thread!
  241. }
  242. current_thread->yield_if_stopped();
  243. current_thread->check_dispatch_pending_signal();
  244. // If the previous mode somehow changed something is seriously messed up...
  245. VERIFY(current_thread->previous_mode() == Thread::PreviousMode::UserMode);
  246. // Check if we're supposed to return to userspace or just die.
  247. current_thread->die_if_needed();
  248. VERIFY(!g_scheduler_lock.own_lock());
  249. }
  250. }