Process.cpp 45 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521
  1. #include "types.h"
  2. #include "Process.h"
  3. #include "kmalloc.h"
  4. #include "StdLib.h"
  5. #include "i386.h"
  6. #include "system.h"
  7. #include <VirtualFileSystem/FileDescriptor.h>
  8. #include <VirtualFileSystem/VirtualFileSystem.h>
  9. #include <ELFLoader/ELFLoader.h>
  10. #include "MemoryManager.h"
  11. #include "errno.h"
  12. #include "i8253.h"
  13. #include "RTC.h"
  14. #include "ProcFileSystem.h"
  15. #include <AK/StdLib.h>
  16. #include <LibC/signal_numbers.h>
  17. #include "Syscall.h"
  18. #include "Scheduler.h"
  19. //#define DEBUG_IO
  20. //#define TASK_DEBUG
  21. //#define FORK_DEBUG
  22. #define SIGNAL_DEBUG
  23. #define MAX_PROCESS_GIDS 32
  24. // FIXME: Only do a single validation for accesses that don't span multiple pages.
  25. // FIXME: Some places pass strlen(arg1) as arg2. This doesn't seem entirely perfect..
  26. #define VALIDATE_USER_READ_WITH_RETURN_TYPE(b, s, ret_type) \
  27. do { \
  28. LinearAddress laddr(reinterpret_cast<dword>(b)); \
  29. if (!validate_user_read(laddr) || !validate_user_read(laddr.offset((s) - 1))) { \
  30. dbgprintf("Bad read address passed to syscall: %p +%u\n", laddr.get(), (s)); \
  31. return (ret_type)-EFAULT; \
  32. } \
  33. } while(0)
  34. #define VALIDATE_USER_READ(b, s) VALIDATE_USER_READ_WITH_RETURN_TYPE(b, s, int)
  35. #define VALIDATE_USER_WRITE(b, s) \
  36. do { \
  37. LinearAddress laddr(reinterpret_cast<dword>(b)); \
  38. if (!validate_user_write(laddr) || !validate_user_write(laddr.offset((s) - 1))) { \
  39. dbgprintf("Bad write address passed to syscall: %p +%u\n", laddr.get(), (s)); \
  40. return -EFAULT; \
  41. } \
  42. } while(0)
  43. static const DWORD defaultStackSize = 16384;
  44. static pid_t next_pid;
  45. InlineLinkedList<Process>* g_processes;
  46. static String* s_hostname;
  47. static String& hostnameStorage(InterruptDisabler&)
  48. {
  49. ASSERT(s_hostname);
  50. return *s_hostname;
  51. }
  52. static String getHostname()
  53. {
  54. InterruptDisabler disabler;
  55. return hostnameStorage(disabler).isolatedCopy();
  56. }
  57. CoolGlobals* g_cool_globals;
  58. void Process::initialize()
  59. {
  60. #ifdef COOL_GLOBALS
  61. g_cool_globals = reinterpret_cast<CoolGlobals*>(0x1000);
  62. #endif
  63. next_pid = 0;
  64. g_processes = new InlineLinkedList<Process>;
  65. s_hostname = new String("birx");
  66. Scheduler::initialize();
  67. }
  68. Vector<Process*> Process::allProcesses()
  69. {
  70. InterruptDisabler disabler;
  71. Vector<Process*> processes;
  72. processes.ensureCapacity(g_processes->sizeSlow());
  73. for (auto* process = g_processes->head(); process; process = process->next())
  74. processes.append(process);
  75. return processes;
  76. }
  77. Region* Process::allocate_region(LinearAddress laddr, size_t size, String&& name, bool is_readable, bool is_writable)
  78. {
  79. // FIXME: This needs sanity checks. What if this overlaps existing regions?
  80. if (laddr.is_null()) {
  81. laddr = m_nextRegion;
  82. m_nextRegion = m_nextRegion.offset(size).offset(PAGE_SIZE);
  83. }
  84. laddr.mask(0xfffff000);
  85. m_regions.append(adopt(*new Region(laddr, size, move(name), is_readable, is_writable)));
  86. m_regions.last()->commit(*this);
  87. MM.mapRegion(*this, *m_regions.last());
  88. return m_regions.last().ptr();
  89. }
  90. Region* Process::allocate_file_backed_region(LinearAddress laddr, size_t size, RetainPtr<VirtualFileSystem::Node>&& vnode, String&& name, bool is_readable, bool is_writable)
  91. {
  92. ASSERT(!vnode->isCharacterDevice());
  93. // FIXME: This needs sanity checks. What if this overlaps existing regions?
  94. if (laddr.is_null()) {
  95. laddr = m_nextRegion;
  96. m_nextRegion = m_nextRegion.offset(size).offset(PAGE_SIZE);
  97. }
  98. laddr.mask(0xfffff000);
  99. m_regions.append(adopt(*new Region(laddr, size, move(vnode), move(name), is_readable, is_writable)));
  100. MM.mapRegion(*this, *m_regions.last());
  101. return m_regions.last().ptr();
  102. }
  103. Region* Process::allocate_region_with_vmo(LinearAddress laddr, size_t size, RetainPtr<VMObject>&& vmo, size_t offset_in_vmo, String&& name, bool is_readable, bool is_writable)
  104. {
  105. ASSERT(vmo);
  106. // FIXME: This needs sanity checks. What if this overlaps existing regions?
  107. if (laddr.is_null()) {
  108. laddr = m_nextRegion;
  109. m_nextRegion = m_nextRegion.offset(size).offset(PAGE_SIZE);
  110. }
  111. laddr.mask(0xfffff000);
  112. offset_in_vmo &= PAGE_MASK;
  113. size = ceilDiv(size, PAGE_SIZE) * PAGE_SIZE;
  114. m_regions.append(adopt(*new Region(laddr, size, move(vmo), offset_in_vmo, move(name), is_readable, is_writable)));
  115. MM.mapRegion(*this, *m_regions.last());
  116. return m_regions.last().ptr();
  117. }
  118. bool Process::deallocate_region(Region& region)
  119. {
  120. InterruptDisabler disabler;
  121. for (size_t i = 0; i < m_regions.size(); ++i) {
  122. if (m_regions[i].ptr() == &region) {
  123. MM.unmapRegion(*this, region);
  124. m_regions.remove(i);
  125. return true;
  126. }
  127. }
  128. return false;
  129. }
  130. Region* Process::regionFromRange(LinearAddress laddr, size_t size)
  131. {
  132. for (auto& region : m_regions) {
  133. if (region->linearAddress == laddr && region->size == size)
  134. return region.ptr();
  135. }
  136. return nullptr;
  137. }
  138. int Process::sys$set_mmap_name(void* addr, size_t size, const char* name)
  139. {
  140. VALIDATE_USER_READ(name, strlen(name));
  141. auto* region = regionFromRange(LinearAddress((dword)addr), size);
  142. if (!region)
  143. return -EINVAL;
  144. region->name = name;
  145. return 0;
  146. }
  147. void* Process::sys$mmap(const Syscall::SC_mmap_params* params)
  148. {
  149. VALIDATE_USER_READ_WITH_RETURN_TYPE(params, sizeof(Syscall::SC_mmap_params), void*);
  150. void* addr = (void*)params->addr;
  151. size_t size = params->size;
  152. int prot = params->prot;
  153. int flags = params->flags;
  154. int fd = params->fd;
  155. Unix::off_t offset = params->offset;
  156. if (size == 0)
  157. return (void*)-EINVAL;
  158. if ((dword)addr & ~PAGE_MASK || size & ~PAGE_MASK)
  159. return (void*)-EINVAL;
  160. if (flags & MAP_ANONYMOUS) {
  161. InterruptDisabler disabler;
  162. // FIXME: Implement mapping at a client-specified address. Most of the support is already in plcae.
  163. ASSERT(addr == nullptr);
  164. auto* region = allocate_region(LinearAddress(), size, "mmap", prot & PROT_READ, prot & PROT_WRITE);
  165. if (!region)
  166. return (void*)-ENOMEM;
  167. return region->linearAddress.asPtr();
  168. }
  169. if (offset & ~PAGE_MASK)
  170. return (void*)-EINVAL;
  171. auto* descriptor = file_descriptor(fd);
  172. if (!descriptor)
  173. return (void*)-EBADF;
  174. if (descriptor->vnode()->isCharacterDevice())
  175. return (void*)-ENODEV;
  176. // FIXME: If PROT_EXEC, check that the underlying file system isn't mounted noexec.
  177. auto region_name = descriptor->absolute_path();
  178. InterruptDisabler disabler;
  179. // FIXME: Implement mapping at a client-specified address. Most of the support is already in plcae.
  180. ASSERT(addr == nullptr);
  181. auto* region = allocate_file_backed_region(LinearAddress(), size, descriptor->vnode(), move(region_name), prot & PROT_READ, prot & PROT_WRITE);
  182. if (!region)
  183. return (void*)-ENOMEM;
  184. return region->linearAddress.asPtr();
  185. }
  186. int Process::sys$munmap(void* addr, size_t size)
  187. {
  188. InterruptDisabler disabler;
  189. auto* region = regionFromRange(LinearAddress((dword)addr), size);
  190. if (!region)
  191. return -1;
  192. if (!deallocate_region(*region))
  193. return -1;
  194. return 0;
  195. }
  196. int Process::sys$gethostname(char* buffer, size_t size)
  197. {
  198. VALIDATE_USER_WRITE(buffer, size);
  199. auto hostname = getHostname();
  200. if (size < (hostname.length() + 1))
  201. return -ENAMETOOLONG;
  202. memcpy(buffer, hostname.characters(), size);
  203. return 0;
  204. }
  205. Process* Process::fork(RegisterDump& regs)
  206. {
  207. auto* child = new Process(String(m_name), m_uid, m_gid, m_pid, m_ring, m_cwd.copyRef(), m_executable.copyRef(), m_tty, this);
  208. #ifdef FORK_DEBUG
  209. dbgprintf("fork: child=%p\n", child);
  210. #endif
  211. #if 0
  212. // FIXME: An honest fork() would copy these. Needs a Vector copy ctor.
  213. child->m_arguments = m_arguments;
  214. child->m_initialEnvironment = m_initialEnvironment;
  215. #endif
  216. for (auto& region : m_regions) {
  217. #ifdef FORK_DEBUG
  218. dbgprintf("fork: cloning Region{%p}\n", region.ptr());
  219. #endif
  220. auto cloned_region = region->clone();
  221. child->m_regions.append(move(cloned_region));
  222. MM.mapRegion(*child, *child->m_regions.last());
  223. }
  224. child->m_tss.eax = 0; // fork() returns 0 in the child :^)
  225. child->m_tss.ebx = regs.ebx;
  226. child->m_tss.ecx = regs.ecx;
  227. child->m_tss.edx = regs.edx;
  228. child->m_tss.ebp = regs.ebp;
  229. child->m_tss.esp = regs.esp_if_crossRing;
  230. child->m_tss.esi = regs.esi;
  231. child->m_tss.edi = regs.edi;
  232. child->m_tss.eflags = regs.eflags;
  233. child->m_tss.eip = regs.eip;
  234. child->m_tss.cs = regs.cs;
  235. child->m_tss.ds = regs.ds;
  236. child->m_tss.es = regs.es;
  237. child->m_tss.fs = regs.fs;
  238. child->m_tss.gs = regs.gs;
  239. child->m_tss.ss = regs.ss_if_crossRing;
  240. #ifdef FORK_DEBUG
  241. dbgprintf("fork: child will begin executing at %w:%x with stack %w:%x\n", child->m_tss.cs, child->m_tss.eip, child->m_tss.ss, child->m_tss.esp);
  242. #endif
  243. ProcFileSystem::the().addProcess(*child);
  244. {
  245. InterruptDisabler disabler;
  246. g_processes->prepend(child);
  247. system.nprocess++;
  248. }
  249. #ifdef TASK_DEBUG
  250. kprintf("Process %u (%s) forked from %u @ %p\n", child->pid(), child->name().characters(), m_pid, child->m_tss.eip);
  251. #endif
  252. return child;
  253. }
  254. pid_t Process::sys$fork(RegisterDump& regs)
  255. {
  256. auto* child = fork(regs);
  257. ASSERT(child);
  258. return child->pid();
  259. }
  260. int Process::do_exec(const String& path, Vector<String>&& arguments, Vector<String>&& environment)
  261. {
  262. auto parts = path.split('/');
  263. if (parts.isEmpty())
  264. return -ENOENT;
  265. int error;
  266. auto descriptor = VirtualFileSystem::the().open(path, error, 0, m_cwd ? m_cwd->inode : InodeIdentifier());
  267. if (!descriptor) {
  268. ASSERT(error != 0);
  269. return error;
  270. }
  271. if (!descriptor->metadata().mayExecute(m_euid, m_gids))
  272. return -EACCES;
  273. if (!descriptor->metadata().size) {
  274. kprintf("exec() of 0-length binaries not supported\n");
  275. return -ENOTIMPL;
  276. }
  277. auto vmo = VMObject::create_file_backed(descriptor->vnode(), descriptor->metadata().size);
  278. vmo->set_name(descriptor->absolute_path());
  279. auto* region = allocate_region_with_vmo(LinearAddress(), descriptor->metadata().size, vmo.copyRef(), 0, "helper", true, false);
  280. dword entry_eip = 0;
  281. PageDirectory* old_page_directory = m_page_directory;
  282. PageDirectory* new_page_directory = reinterpret_cast<PageDirectory*>(kmalloc_page_aligned(sizeof(PageDirectory)));
  283. #ifdef MM_DEBUG
  284. dbgprintf("Process %u exec: PD=%x created\n", pid(), new_page_directory);
  285. #endif
  286. MM.populate_page_directory(*new_page_directory);
  287. m_page_directory = new_page_directory;
  288. ProcessPagingScope paging_scope(*this);
  289. // FIXME: Should we consider doing on-demand paging here? Is it actually useful?
  290. bool success = region->page_in(*new_page_directory);
  291. ASSERT(success);
  292. {
  293. InterruptDisabler disabler;
  294. // Okay, here comes the sleight of hand, pay close attention..
  295. auto old_regions = move(m_regions);
  296. ELFLoader loader(region->linearAddress.asPtr());
  297. loader.map_section_hook = [&] (LinearAddress laddr, size_t size, size_t alignment, size_t offset_in_image, bool is_readable, bool is_writable, const String& name) {
  298. ASSERT(size);
  299. ASSERT(alignment == PAGE_SIZE);
  300. size = ((size / 4096) + 1) * 4096; // FIXME: Use ceil_div?
  301. (void) allocate_region_with_vmo(laddr, size, vmo.copyRef(), offset_in_image, String(name), is_readable, is_writable);
  302. return laddr.asPtr();
  303. };
  304. loader.alloc_section_hook = [&] (LinearAddress laddr, size_t size, size_t alignment, bool is_readable, bool is_writable, const String& name) {
  305. ASSERT(size);
  306. ASSERT(alignment == PAGE_SIZE);
  307. size = ((size / 4096) + 1) * 4096; // FIXME: Use ceil_div?
  308. (void) allocate_region(laddr, size, String(name), is_readable, is_writable);
  309. return laddr.asPtr();
  310. };
  311. bool success = loader.load();
  312. if (!success) {
  313. m_page_directory = old_page_directory;
  314. MM.enter_process_paging_scope(*this);
  315. MM.release_page_directory(*new_page_directory);
  316. m_regions = move(old_regions);
  317. kprintf("sys$execve: Failure loading %s\n", path.characters());
  318. return -ENOEXEC;
  319. }
  320. entry_eip = (dword)loader.symbol_ptr("_start");
  321. if (!entry_eip) {
  322. m_page_directory = old_page_directory;
  323. MM.enter_process_paging_scope(*this);
  324. MM.release_page_directory(*new_page_directory);
  325. m_regions = move(old_regions);
  326. return -ENOEXEC;
  327. }
  328. }
  329. InterruptDisabler disabler;
  330. Scheduler::prepare_to_modify_tss(*this);
  331. m_name = parts.takeLast();
  332. dword old_esp0 = m_tss.esp0;
  333. memset(&m_tss, 0, sizeof(m_tss));
  334. m_tss.eflags = 0x0202;
  335. m_tss.eip = entry_eip;
  336. m_tss.cs = 0x1b;
  337. m_tss.ds = 0x23;
  338. m_tss.es = 0x23;
  339. m_tss.fs = 0x23;
  340. m_tss.gs = 0x23;
  341. m_tss.ss = 0x23;
  342. m_tss.cr3 = (dword)m_page_directory;
  343. m_stack_region = allocate_region(LinearAddress(), defaultStackSize, "stack");
  344. ASSERT(m_stack_region);
  345. m_stackTop3 = m_stack_region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
  346. m_tss.esp = m_stackTop3;
  347. m_tss.ss0 = 0x10;
  348. m_tss.esp0 = old_esp0;
  349. m_tss.ss2 = m_pid;
  350. MM.release_page_directory(*old_page_directory);
  351. m_executable = descriptor->vnode();
  352. m_arguments = move(arguments);
  353. m_initialEnvironment = move(environment);
  354. #ifdef TASK_DEBUG
  355. kprintf("Process %u (%s) exec'd %s @ %p\n", pid(), name().characters(), path.characters(), m_tss.eip);
  356. #endif
  357. set_state(Skip1SchedulerPass);
  358. return 0;
  359. }
  360. int Process::exec(const String& path, Vector<String>&& arguments, Vector<String>&& environment)
  361. {
  362. // The bulk of exec() is done by do_exec(), which ensures that all locals
  363. // are cleaned up by the time we yield-teleport below.
  364. int rc = do_exec(path, move(arguments), move(environment));
  365. if (rc < 0)
  366. return rc;
  367. if (current == this) {
  368. Scheduler::yield();
  369. ASSERT_NOT_REACHED();
  370. }
  371. return 0;
  372. }
  373. int Process::sys$execve(const char* filename, const char** argv, const char** envp)
  374. {
  375. VALIDATE_USER_READ(filename, strlen(filename));
  376. if (argv) {
  377. for (size_t i = 0; argv[i]; ++i) {
  378. VALIDATE_USER_READ(argv[i], strlen(argv[i]));
  379. }
  380. }
  381. if (envp) {
  382. for (size_t i = 0; envp[i]; ++i) {
  383. VALIDATE_USER_READ(envp[i], strlen(envp[i]));
  384. }
  385. }
  386. String path(filename);
  387. auto parts = path.split('/');
  388. Vector<String> arguments;
  389. if (argv) {
  390. for (size_t i = 0; argv[i]; ++i) {
  391. arguments.append(argv[i]);
  392. }
  393. } else {
  394. arguments.append(parts.last());
  395. }
  396. Vector<String> environment;
  397. if (envp) {
  398. for (size_t i = 0; envp[i]; ++i) {
  399. environment.append(envp[i]);
  400. }
  401. }
  402. int rc = exec(path, move(arguments), move(environment));
  403. ASSERT(rc < 0); // We should never continue after a successful exec!
  404. return rc;
  405. }
  406. Process* Process::create_user_process(const String& path, uid_t uid, gid_t gid, pid_t parent_pid, int& error, Vector<String>&& arguments, Vector<String>&& environment, TTY* tty)
  407. {
  408. // FIXME: Don't split() the path twice (sys$spawn also does it...)
  409. auto parts = path.split('/');
  410. if (arguments.isEmpty()) {
  411. arguments.append(parts.last());
  412. }
  413. RetainPtr<VirtualFileSystem::Node> cwd;
  414. {
  415. InterruptDisabler disabler;
  416. if (auto* parent = Process::from_pid(parent_pid))
  417. cwd = parent->m_cwd.copyRef();
  418. }
  419. if (!cwd)
  420. cwd = VirtualFileSystem::the().root();
  421. auto* process = new Process(parts.takeLast(), uid, gid, parent_pid, Ring3, move(cwd), nullptr, tty);
  422. error = process->exec(path, move(arguments), move(environment));
  423. if (error != 0)
  424. return nullptr;
  425. ProcFileSystem::the().addProcess(*process);
  426. {
  427. InterruptDisabler disabler;
  428. g_processes->prepend(process);
  429. system.nprocess++;
  430. }
  431. #ifdef TASK_DEBUG
  432. kprintf("Process %u (%s) spawned @ %p\n", process->pid(), process->name().characters(), process->m_tss.eip);
  433. #endif
  434. error = 0;
  435. return process;
  436. }
  437. int Process::sys$get_environment(char*** environ)
  438. {
  439. auto* region = allocate_region(LinearAddress(), PAGE_SIZE, "environ");
  440. if (!region)
  441. return -ENOMEM;
  442. MM.mapRegion(*this, *region);
  443. char* envpage = (char*)region->linearAddress.get();
  444. *environ = (char**)envpage;
  445. char* bufptr = envpage + (sizeof(char*) * (m_initialEnvironment.size() + 1));
  446. for (size_t i = 0; i < m_initialEnvironment.size(); ++i) {
  447. (*environ)[i] = bufptr;
  448. memcpy(bufptr, m_initialEnvironment[i].characters(), m_initialEnvironment[i].length());
  449. bufptr += m_initialEnvironment[i].length();
  450. *(bufptr++) = '\0';
  451. }
  452. (*environ)[m_initialEnvironment.size()] = nullptr;
  453. return 0;
  454. }
  455. int Process::sys$get_arguments(int* argc, char*** argv)
  456. {
  457. auto* region = allocate_region(LinearAddress(), PAGE_SIZE, "argv");
  458. if (!region)
  459. return -ENOMEM;
  460. MM.mapRegion(*this, *region);
  461. char* argpage = (char*)region->linearAddress.get();
  462. *argc = m_arguments.size();
  463. *argv = (char**)argpage;
  464. char* bufptr = argpage + (sizeof(char*) * m_arguments.size());
  465. for (size_t i = 0; i < m_arguments.size(); ++i) {
  466. (*argv)[i] = bufptr;
  467. memcpy(bufptr, m_arguments[i].characters(), m_arguments[i].length());
  468. bufptr += m_arguments[i].length();
  469. *(bufptr++) = '\0';
  470. }
  471. return 0;
  472. }
  473. Process* Process::create_kernel_process(void (*e)(), String&& name)
  474. {
  475. auto* process = new Process(move(name), (uid_t)0, (gid_t)0, (pid_t)0, Ring0);
  476. process->m_tss.eip = (dword)e;
  477. if (process->pid() != 0) {
  478. {
  479. InterruptDisabler disabler;
  480. g_processes->prepend(process);
  481. system.nprocess++;
  482. }
  483. ProcFileSystem::the().addProcess(*process);
  484. #ifdef TASK_DEBUG
  485. kprintf("Kernel process %u (%s) spawned @ %p\n", process->pid(), process->name().characters(), process->m_tss.eip);
  486. #endif
  487. }
  488. return process;
  489. }
  490. Process::Process(String&& name, uid_t uid, gid_t gid, pid_t ppid, RingLevel ring, RetainPtr<VirtualFileSystem::Node>&& cwd, RetainPtr<VirtualFileSystem::Node>&& executable, TTY* tty, Process* fork_parent)
  491. : m_name(move(name))
  492. , m_pid(next_pid++) // FIXME: RACE: This variable looks racy!
  493. , m_uid(uid)
  494. , m_gid(gid)
  495. , m_euid(uid)
  496. , m_egid(gid)
  497. , m_state(Runnable)
  498. , m_ring(ring)
  499. , m_cwd(move(cwd))
  500. , m_executable(move(executable))
  501. , m_tty(tty)
  502. , m_ppid(ppid)
  503. {
  504. m_gids.set(m_gid);
  505. if (fork_parent) {
  506. m_sid = fork_parent->m_sid;
  507. m_pgid = fork_parent->m_pgid;
  508. } else {
  509. // FIXME: Use a ProcessHandle? Presumably we're executing *IN* the parent right now though..
  510. InterruptDisabler disabler;
  511. if (auto* parent = Process::from_pid(m_ppid)) {
  512. m_sid = parent->m_sid;
  513. m_pgid = parent->m_pgid;
  514. }
  515. }
  516. m_page_directory = (PageDirectory*)kmalloc_page_aligned(sizeof(PageDirectory));
  517. #ifdef MM_DEBUG
  518. dbgprintf("Process %u ctor: PD=%x created\n", pid(), m_page_directory);
  519. #endif
  520. MM.populate_page_directory(*m_page_directory);
  521. if (fork_parent) {
  522. m_file_descriptors.resize(fork_parent->m_file_descriptors.size());
  523. for (size_t i = 0; i < fork_parent->m_file_descriptors.size(); ++i) {
  524. if (!fork_parent->m_file_descriptors[i])
  525. continue;
  526. #ifdef FORK_DEBUG
  527. dbgprintf("fork: cloning fd %u... (%p) istty? %u\n", i, fork_parent->m_file_descriptors[i].ptr(), fork_parent->m_file_descriptors[i]->isTTY());
  528. #endif
  529. m_file_descriptors[i] = fork_parent->m_file_descriptors[i]->clone();
  530. }
  531. } else {
  532. m_file_descriptors.resize(m_max_open_file_descriptors);
  533. if (tty) {
  534. m_file_descriptors[0] = tty->open(O_RDONLY);
  535. m_file_descriptors[1] = tty->open(O_WRONLY);
  536. m_file_descriptors[2] = tty->open(O_WRONLY);
  537. }
  538. }
  539. if (fork_parent)
  540. m_nextRegion = fork_parent->m_nextRegion;
  541. else
  542. m_nextRegion = LinearAddress(0x10000000);
  543. if (fork_parent) {
  544. memcpy(&m_tss, &fork_parent->m_tss, sizeof(m_tss));
  545. } else {
  546. memset(&m_tss, 0, sizeof(m_tss));
  547. // Only IF is set when a process boots.
  548. m_tss.eflags = 0x0202;
  549. word cs, ds, ss;
  550. if (isRing0()) {
  551. cs = 0x08;
  552. ds = 0x10;
  553. ss = 0x10;
  554. } else {
  555. cs = 0x1b;
  556. ds = 0x23;
  557. ss = 0x23;
  558. }
  559. m_tss.ds = ds;
  560. m_tss.es = ds;
  561. m_tss.fs = ds;
  562. m_tss.gs = ds;
  563. m_tss.ss = ss;
  564. m_tss.cs = cs;
  565. }
  566. m_tss.cr3 = (dword)m_page_directory;
  567. if (isRing0()) {
  568. // FIXME: This memory is leaked.
  569. // But uh, there's also no kernel process termination, so I guess it's not technically leaked...
  570. dword stackBottom = (dword)kmalloc_eternal(defaultStackSize);
  571. m_stackTop0 = (stackBottom + defaultStackSize) & 0xffffff8;
  572. m_tss.esp = m_stackTop0;
  573. } else {
  574. if (fork_parent) {
  575. m_stackTop3 = fork_parent->m_stackTop3;
  576. } else {
  577. auto* region = allocate_region(LinearAddress(), defaultStackSize, "stack");
  578. ASSERT(region);
  579. m_stackTop3 = region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
  580. m_tss.esp = m_stackTop3;
  581. }
  582. }
  583. if (isRing3()) {
  584. // Ring3 processes need a separate stack for Ring0.
  585. m_kernelStack = kmalloc(defaultStackSize);
  586. m_stackTop0 = ((DWORD)m_kernelStack + defaultStackSize) & 0xffffff8;
  587. m_tss.ss0 = 0x10;
  588. m_tss.esp0 = m_stackTop0;
  589. }
  590. // HACK: Ring2 SS in the TSS is the current PID.
  591. m_tss.ss2 = m_pid;
  592. m_farPtr.offset = 0x98765432;
  593. }
  594. Process::~Process()
  595. {
  596. InterruptDisabler disabler;
  597. ProcFileSystem::the().removeProcess(*this);
  598. system.nprocess--;
  599. gdt_free_entry(selector());
  600. if (m_kernelStack) {
  601. kfree(m_kernelStack);
  602. m_kernelStack = nullptr;
  603. }
  604. MM.release_page_directory(*m_page_directory);
  605. }
  606. void Process::dumpRegions()
  607. {
  608. kprintf("Process %s(%u) regions:\n", name().characters(), pid());
  609. kprintf("BEGIN END SIZE NAME\n");
  610. for (auto& region : m_regions) {
  611. kprintf("%x -- %x %x %s\n",
  612. region->linearAddress.get(),
  613. region->linearAddress.offset(region->size - 1).get(),
  614. region->size,
  615. region->name.characters());
  616. }
  617. }
  618. void Process::sys$exit(int status)
  619. {
  620. cli();
  621. #ifdef TASK_DEBUG
  622. kprintf("sys$exit: %s(%u) exit with status %d\n", name().characters(), pid(), status);
  623. #endif
  624. set_state(Dead);
  625. m_termination_status = status;
  626. m_termination_signal = 0;
  627. Scheduler::pick_next_and_switch_now();
  628. ASSERT_NOT_REACHED();
  629. }
  630. void Process::terminate_due_to_signal(byte signal)
  631. {
  632. ASSERT_INTERRUPTS_DISABLED();
  633. ASSERT(signal < 32);
  634. dbgprintf("terminate_due_to_signal %s(%u) <- %u\n", name().characters(), pid(), signal);
  635. m_termination_status = 0;
  636. m_termination_signal = signal;
  637. set_state(Dead);
  638. }
  639. void Process::send_signal(byte signal, Process* sender)
  640. {
  641. ASSERT_INTERRUPTS_DISABLED();
  642. ASSERT(signal < 32);
  643. m_pending_signals |= 1 << signal;
  644. if (sender)
  645. dbgprintf("signal: %s(%u) sent %d to %s(%u)\n", sender->name().characters(), sender->pid(), signal, name().characters(), pid());
  646. else
  647. dbgprintf("signal: kernel sent %d to %s(%u)\n", signal, name().characters(), pid());
  648. }
  649. bool Process::has_unmasked_pending_signals() const
  650. {
  651. return m_pending_signals & ~m_signal_mask;
  652. }
  653. void Process::dispatch_one_pending_signal()
  654. {
  655. ASSERT_INTERRUPTS_DISABLED();
  656. dword signal_candidates = m_pending_signals & ~m_signal_mask;
  657. ASSERT(signal_candidates);
  658. byte signal = 0;
  659. for (; signal < 32; ++signal) {
  660. if (signal_candidates & (1 << signal)) {
  661. break;
  662. }
  663. }
  664. dispatch_signal(signal);
  665. }
  666. void Process::dispatch_signal(byte signal)
  667. {
  668. ASSERT_INTERRUPTS_DISABLED();
  669. ASSERT(signal < 32);
  670. dbgprintf("dispatch_signal %s(%u) <- %u\n", name().characters(), pid(), signal);
  671. auto& action = m_signal_action_data[signal];
  672. // FIXME: Implement SA_SIGINFO signal handlers.
  673. ASSERT(!(action.flags & SA_SIGINFO));
  674. auto handler_laddr = action.handler_or_sigaction;
  675. if (handler_laddr.is_null()) {
  676. // FIXME: Is termination really always the appropriate action?
  677. return terminate_due_to_signal(signal);
  678. }
  679. Scheduler::prepare_to_modify_tss(*this);
  680. word ret_cs = m_tss.cs;
  681. dword ret_eip = m_tss.eip;
  682. dword ret_eflags = m_tss.eflags;
  683. bool interrupting_in_kernel = (ret_cs & 3) == 0;
  684. if (interrupting_in_kernel) {
  685. dbgprintf("dispatch_signal to %s(%u) in state=%s with return to %w:%x\n", name().characters(), pid(), toString(state()), ret_cs, ret_eip);
  686. ASSERT(is_blocked());
  687. m_tss_to_resume_kernel = m_tss;
  688. #ifdef SIGNAL_DEBUG
  689. dbgprintf("resume tss pc: %w:%x\n", m_tss_to_resume_kernel.cs, m_tss_to_resume_kernel.eip);
  690. #endif
  691. }
  692. ProcessPagingScope pagingScope(*this);
  693. if (interrupting_in_kernel) {
  694. if (!m_signal_stack_user_region) {
  695. m_signal_stack_user_region = allocate_region(LinearAddress(), defaultStackSize, "signal stack (user)");
  696. ASSERT(m_signal_stack_user_region);
  697. m_signal_stack_kernel_region = allocate_region(LinearAddress(), defaultStackSize, "signal stack (kernel)");
  698. ASSERT(m_signal_stack_user_region);
  699. }
  700. m_tss.ss = 0x23;
  701. m_tss.esp = m_signal_stack_user_region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
  702. m_tss.ss0 = 0x10;
  703. m_tss.esp0 = m_signal_stack_kernel_region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
  704. push_value_on_stack(ret_eflags);
  705. push_value_on_stack(ret_cs);
  706. push_value_on_stack(ret_eip);
  707. } else {
  708. push_value_on_stack(ret_cs);
  709. push_value_on_stack(ret_eip);
  710. push_value_on_stack(ret_eflags);
  711. }
  712. // PUSHA
  713. dword old_esp = m_tss.esp;
  714. push_value_on_stack(m_tss.eax);
  715. push_value_on_stack(m_tss.ecx);
  716. push_value_on_stack(m_tss.edx);
  717. push_value_on_stack(m_tss.ebx);
  718. push_value_on_stack(old_esp);
  719. push_value_on_stack(m_tss.ebp);
  720. push_value_on_stack(m_tss.esi);
  721. push_value_on_stack(m_tss.edi);
  722. m_tss.eax = (dword)signal;
  723. m_tss.cs = 0x1b;
  724. m_tss.ds = 0x23;
  725. m_tss.es = 0x23;
  726. m_tss.fs = 0x23;
  727. m_tss.gs = 0x23;
  728. m_tss.eip = handler_laddr.get();
  729. if (m_return_to_ring3_from_signal_trampoline.is_null()) {
  730. // FIXME: This should be a global trampoline shared by all processes, not one created per process!
  731. // FIXME: Remap as read-only after setup.
  732. auto* region = allocate_region(LinearAddress(), PAGE_SIZE, "signal_trampoline", true, true);
  733. m_return_to_ring3_from_signal_trampoline = region->linearAddress;
  734. byte* code_ptr = m_return_to_ring3_from_signal_trampoline.asPtr();
  735. *code_ptr++ = 0x61; // popa
  736. *code_ptr++ = 0x9d; // popf
  737. *code_ptr++ = 0xc3; // ret
  738. *code_ptr++ = 0x0f; // ud2
  739. *code_ptr++ = 0x0b;
  740. m_return_to_ring0_from_signal_trampoline = LinearAddress((dword)code_ptr);
  741. *code_ptr++ = 0x61; // popa
  742. *code_ptr++ = 0xb8; // mov eax, <dword>
  743. *(dword*)code_ptr = Syscall::SC_sigreturn;
  744. code_ptr += sizeof(dword);
  745. *code_ptr++ = 0xcd; // int 0x80
  746. *code_ptr++ = 0x80;
  747. *code_ptr++ = 0x0f; // ud2
  748. *code_ptr++ = 0x0b;
  749. // FIXME: For !SA_NODEFER, maybe we could do something like emitting an int 0x80 syscall here that
  750. // unmasks the signal so it can be received again? I guess then I would need one trampoline
  751. // per signal number if it's hard-coded, but it's just a few bytes per each.
  752. }
  753. if (interrupting_in_kernel)
  754. push_value_on_stack(m_return_to_ring0_from_signal_trampoline.get());
  755. else
  756. push_value_on_stack(m_return_to_ring3_from_signal_trampoline.get());
  757. m_pending_signals &= ~(1 << signal);
  758. // FIXME: This state is such a hack. It avoids trouble if 'current' is the process receiving a signal.
  759. set_state(Skip1SchedulerPass);
  760. #ifdef SIGNAL_DEBUG
  761. dbgprintf("signal: Okay, %s(%u) {%s} has been primed with signal handler %w:%x\n", name().characters(), pid(), toString(state()), m_tss.cs, m_tss.eip);
  762. #endif
  763. }
  764. void Process::sys$sigreturn()
  765. {
  766. InterruptDisabler disabler;
  767. Scheduler::prepare_to_modify_tss(*this);
  768. m_tss = m_tss_to_resume_kernel;
  769. #ifdef SIGNAL_DEBUG
  770. dbgprintf("sys$sigreturn in %s(%u)\n", name().characters(), pid());
  771. dbgprintf(" -> resuming execution at %w:%x\n", m_tss.cs, m_tss.eip);
  772. #endif
  773. set_state(Skip1SchedulerPass);
  774. Scheduler::yield();
  775. kprintf("sys$sigreturn failed in %s(%u)\n", name().characters(), pid());
  776. ASSERT_NOT_REACHED();
  777. }
  778. void Process::push_value_on_stack(dword value)
  779. {
  780. m_tss.esp -= 4;
  781. dword* stack_ptr = (dword*)m_tss.esp;
  782. *stack_ptr = value;
  783. }
  784. void Process::crash()
  785. {
  786. ASSERT_INTERRUPTS_DISABLED();
  787. ASSERT(state() != Dead);
  788. m_termination_signal = SIGSEGV;
  789. set_state(Dead);
  790. dumpRegions();
  791. Scheduler::pick_next_and_switch_now();
  792. ASSERT_NOT_REACHED();
  793. }
  794. Process* Process::from_pid(pid_t pid)
  795. {
  796. ASSERT_INTERRUPTS_DISABLED();
  797. for (auto* process = g_processes->head(); process; process = process->next()) {
  798. if (process->pid() == pid)
  799. return process;
  800. }
  801. return nullptr;
  802. }
  803. FileDescriptor* Process::file_descriptor(int fd)
  804. {
  805. if (fd < 0)
  806. return nullptr;
  807. if ((size_t)fd < m_file_descriptors.size())
  808. return m_file_descriptors[fd].ptr();
  809. return nullptr;
  810. }
  811. const FileDescriptor* Process::file_descriptor(int fd) const
  812. {
  813. if (fd < 0)
  814. return nullptr;
  815. if ((size_t)fd < m_file_descriptors.size())
  816. return m_file_descriptors[fd].ptr();
  817. return nullptr;
  818. }
  819. ssize_t Process::sys$get_dir_entries(int fd, void* buffer, size_t size)
  820. {
  821. VALIDATE_USER_WRITE(buffer, size);
  822. auto* descriptor = file_descriptor(fd);
  823. if (!descriptor)
  824. return -EBADF;
  825. return descriptor->get_dir_entries((byte*)buffer, size);
  826. }
  827. int Process::sys$lseek(int fd, off_t offset, int whence)
  828. {
  829. auto* descriptor = file_descriptor(fd);
  830. if (!descriptor)
  831. return -EBADF;
  832. return descriptor->seek(offset, whence);
  833. }
  834. int Process::sys$ttyname_r(int fd, char* buffer, size_t size)
  835. {
  836. VALIDATE_USER_WRITE(buffer, size);
  837. auto* descriptor = file_descriptor(fd);
  838. if (!descriptor)
  839. return -EBADF;
  840. if (!descriptor->isTTY())
  841. return -ENOTTY;
  842. auto ttyName = descriptor->tty()->ttyName();
  843. if (size < ttyName.length() + 1)
  844. return -ERANGE;
  845. strcpy(buffer, ttyName.characters());
  846. return 0;
  847. }
  848. ssize_t Process::sys$write(int fd, const void* data, size_t size)
  849. {
  850. VALIDATE_USER_READ(data, size);
  851. #ifdef DEBUG_IO
  852. kprintf("Process::sys$write: called(%d, %p, %u)\n", fd, data, size);
  853. #endif
  854. auto* descriptor = file_descriptor(fd);
  855. #ifdef DEBUG_IO
  856. kprintf("Process::sys$write: handle=%p\n", descriptor);
  857. #endif
  858. if (!descriptor)
  859. return -EBADF;
  860. auto nwritten = descriptor->write((const byte*)data, size);
  861. if (has_unmasked_pending_signals()) {
  862. block(BlockedSignal);
  863. Scheduler::yield();
  864. if (nwritten == 0)
  865. return -EINTR;
  866. }
  867. #ifdef DEBUG_IO
  868. kprintf("Process::sys$write: nwritten=%u\n", nwritten);
  869. #endif
  870. return nwritten;
  871. }
  872. ssize_t Process::sys$read(int fd, void* outbuf, size_t nread)
  873. {
  874. VALIDATE_USER_WRITE(outbuf, nread);
  875. #ifdef DEBUG_IO
  876. kprintf("Process::sys$read: called(%d, %p, %u)\n", fd, outbuf, nread);
  877. #endif
  878. auto* descriptor = file_descriptor(fd);
  879. #ifdef DEBUG_IO
  880. kprintf("Process::sys$read: handle=%p\n", descriptor);
  881. #endif
  882. if (!descriptor)
  883. return -EBADF;
  884. if (descriptor->isBlocking()) {
  885. if (!descriptor->hasDataAvailableForRead()) {
  886. m_fdBlockedOnRead = fd;
  887. block(BlockedRead);
  888. sched_yield();
  889. if (m_was_interrupted_while_blocked)
  890. return -EINTR;
  891. }
  892. }
  893. nread = descriptor->read((byte*)outbuf, nread);
  894. #ifdef DEBUG_IO
  895. kprintf("Process::sys$read: nread=%u\n", nread);
  896. #endif
  897. return nread;
  898. }
  899. int Process::sys$close(int fd)
  900. {
  901. auto* descriptor = file_descriptor(fd);
  902. if (!descriptor)
  903. return -EBADF;
  904. int rc = descriptor->close();
  905. m_file_descriptors[fd] = nullptr;
  906. return rc;
  907. }
  908. int Process::sys$lstat(const char* path, Unix::stat* statbuf)
  909. {
  910. VALIDATE_USER_WRITE(statbuf, sizeof(Unix::stat));
  911. int error;
  912. auto descriptor = VirtualFileSystem::the().open(move(path), error, O_NOFOLLOW_NOERROR, cwdInode());
  913. if (!descriptor)
  914. return error;
  915. descriptor->stat(statbuf);
  916. return 0;
  917. }
  918. int Process::sys$stat(const char* path, Unix::stat* statbuf)
  919. {
  920. VALIDATE_USER_WRITE(statbuf, sizeof(Unix::stat));
  921. int error;
  922. auto descriptor = VirtualFileSystem::the().open(move(path), error, 0, cwdInode());
  923. if (!descriptor)
  924. return error;
  925. descriptor->stat(statbuf);
  926. return 0;
  927. }
  928. int Process::sys$readlink(const char* path, char* buffer, size_t size)
  929. {
  930. VALIDATE_USER_READ(path, strlen(path));
  931. VALIDATE_USER_WRITE(buffer, size);
  932. int error;
  933. auto descriptor = VirtualFileSystem::the().open(path, error, O_RDONLY | O_NOFOLLOW_NOERROR, cwdInode());
  934. if (!descriptor)
  935. return error;
  936. if (!descriptor->metadata().isSymbolicLink())
  937. return -EINVAL;
  938. auto contents = descriptor->readEntireFile();
  939. if (!contents)
  940. return -EIO; // FIXME: Get a more detailed error from VFS.
  941. memcpy(buffer, contents.pointer(), min(size, contents.size()));
  942. if (contents.size() + 1 < size)
  943. buffer[contents.size()] = '\0';
  944. return 0;
  945. }
  946. int Process::sys$chdir(const char* path)
  947. {
  948. VALIDATE_USER_READ(path, strlen(path));
  949. int error;
  950. auto descriptor = VirtualFileSystem::the().open(path, error, 0, cwdInode());
  951. if (!descriptor)
  952. return error;
  953. if (!descriptor->isDirectory())
  954. return -ENOTDIR;
  955. m_cwd = descriptor->vnode();
  956. return 0;
  957. }
  958. int Process::sys$getcwd(char* buffer, size_t size)
  959. {
  960. VALIDATE_USER_WRITE(buffer, size);
  961. auto path = VirtualFileSystem::the().absolutePath(cwdInode());
  962. if (path.isNull())
  963. return -EINVAL;
  964. if (size < path.length() + 1)
  965. return -ERANGE;
  966. strcpy(buffer, path.characters());
  967. return -ENOTIMPL;
  968. }
  969. size_t Process::number_of_open_file_descriptors() const
  970. {
  971. size_t count = 0;
  972. for (auto& descriptor : m_file_descriptors) {
  973. if (descriptor)
  974. ++count;
  975. }
  976. return count;
  977. }
  978. int Process::sys$open(const char* path, int options)
  979. {
  980. #ifdef DEBUG_IO
  981. kprintf("Process::sys$open(): PID=%u, path=%s {%u}\n", m_pid, path, pathLength);
  982. #endif
  983. VALIDATE_USER_READ(path, strlen(path));
  984. if (number_of_open_file_descriptors() >= m_max_open_file_descriptors)
  985. return -EMFILE;
  986. int error;
  987. auto descriptor = VirtualFileSystem::the().open(path, error, options, cwdInode());
  988. if (!descriptor)
  989. return error;
  990. if (options & O_DIRECTORY && !descriptor->isDirectory())
  991. return -ENOTDIR; // FIXME: This should be handled by VFS::open.
  992. int fd = 0;
  993. for (; fd < (int)m_max_open_file_descriptors; ++fd) {
  994. if (!m_file_descriptors[fd])
  995. break;
  996. }
  997. m_file_descriptors[fd] = move(descriptor);
  998. return fd;
  999. }
  1000. int Process::sys$uname(utsname* buf)
  1001. {
  1002. VALIDATE_USER_WRITE(buf, sizeof(utsname));
  1003. strcpy(buf->sysname, "Serenity");
  1004. strcpy(buf->release, "1.0-dev");
  1005. strcpy(buf->version, "FIXME");
  1006. strcpy(buf->machine, "i386");
  1007. strcpy(buf->nodename, getHostname().characters());
  1008. return 0;
  1009. }
  1010. int Process::sys$isatty(int fd)
  1011. {
  1012. auto* descriptor = file_descriptor(fd);
  1013. if (!descriptor)
  1014. return -EBADF;
  1015. if (!descriptor->isTTY())
  1016. return -ENOTTY;
  1017. return 1;
  1018. }
  1019. int Process::sys$kill(pid_t pid, int signal)
  1020. {
  1021. if (pid == 0) {
  1022. // FIXME: Send to same-group processes.
  1023. ASSERT(pid != 0);
  1024. }
  1025. if (pid == -1) {
  1026. // FIXME: Send to all processes.
  1027. ASSERT(pid != -1);
  1028. }
  1029. ASSERT(pid != current->pid()); // FIXME: Support this scenario.
  1030. InterruptDisabler disabler;
  1031. auto* peer = Process::from_pid(pid);
  1032. if (!peer)
  1033. return -ESRCH;
  1034. peer->send_signal(signal, this);
  1035. return 0;
  1036. }
  1037. int Process::sys$sleep(unsigned seconds)
  1038. {
  1039. if (!seconds)
  1040. return 0;
  1041. sleep(seconds * TICKS_PER_SECOND);
  1042. if (m_wakeupTime > system.uptime) {
  1043. ASSERT(m_was_interrupted_while_blocked);
  1044. dword ticks_left_until_original_wakeup_time = m_wakeupTime - system.uptime;
  1045. return ticks_left_until_original_wakeup_time / TICKS_PER_SECOND;
  1046. }
  1047. return 0;
  1048. }
  1049. int Process::sys$gettimeofday(timeval* tv)
  1050. {
  1051. VALIDATE_USER_WRITE(tv, sizeof(tv));
  1052. InterruptDisabler disabler;
  1053. auto now = RTC::now();
  1054. tv->tv_sec = now;
  1055. tv->tv_usec = 0;
  1056. return 0;
  1057. }
  1058. uid_t Process::sys$getuid()
  1059. {
  1060. return m_uid;
  1061. }
  1062. gid_t Process::sys$getgid()
  1063. {
  1064. return m_gid;
  1065. }
  1066. uid_t Process::sys$geteuid()
  1067. {
  1068. return m_euid;
  1069. }
  1070. gid_t Process::sys$getegid()
  1071. {
  1072. return m_egid;
  1073. }
  1074. pid_t Process::sys$getpid()
  1075. {
  1076. return m_pid;
  1077. }
  1078. pid_t Process::sys$getppid()
  1079. {
  1080. return m_ppid;
  1081. }
  1082. mode_t Process::sys$umask(mode_t mask)
  1083. {
  1084. auto old_mask = m_umask;
  1085. m_umask = mask;
  1086. return old_mask;
  1087. }
  1088. void Process::reap(Process& process)
  1089. {
  1090. InterruptDisabler disabler;
  1091. dbgprintf("reap: %s(%u) {%s}\n", process.name().characters(), process.pid(), toString(process.state()));
  1092. ASSERT(process.state() == Dead);
  1093. g_processes->remove(&process);
  1094. delete &process;
  1095. }
  1096. pid_t Process::sys$waitpid(pid_t waitee, int* wstatus, int options)
  1097. {
  1098. // FIXME: Respect options
  1099. (void) options;
  1100. if (wstatus)
  1101. VALIDATE_USER_WRITE(wstatus, sizeof(int));
  1102. {
  1103. InterruptDisabler disabler;
  1104. if (!Process::from_pid(waitee))
  1105. return -ECHILD;
  1106. }
  1107. m_waitee = waitee;
  1108. m_waitee_status = 0;
  1109. block(BlockedWait);
  1110. sched_yield();
  1111. if (m_was_interrupted_while_blocked)
  1112. return -EINTR;
  1113. Process* waitee_process;
  1114. {
  1115. InterruptDisabler disabler;
  1116. waitee_process = Process::from_pid(waitee);
  1117. }
  1118. ASSERT(waitee_process);
  1119. reap(*waitee_process);
  1120. if (wstatus)
  1121. *wstatus = m_waitee_status;
  1122. return m_waitee;
  1123. }
  1124. void Process::unblock()
  1125. {
  1126. ASSERT(m_state != Process::Runnable && m_state != Process::Running);
  1127. system.nblocked--;
  1128. m_state = Process::Runnable;
  1129. }
  1130. void Process::block(Process::State state)
  1131. {
  1132. ASSERT(current->state() == Process::Running);
  1133. system.nblocked++;
  1134. m_was_interrupted_while_blocked = false;
  1135. set_state(state);
  1136. }
  1137. void block(Process::State state)
  1138. {
  1139. current->block(state);
  1140. sched_yield();
  1141. }
  1142. void sleep(DWORD ticks)
  1143. {
  1144. ASSERT(current->state() == Process::Running);
  1145. current->setWakeupTime(system.uptime + ticks);
  1146. current->block(Process::BlockedSleep);
  1147. sched_yield();
  1148. }
  1149. bool Process::isValidAddressForKernel(LinearAddress laddr) const
  1150. {
  1151. // We check extra carefully here since the first 4MB of the address space is identity-mapped.
  1152. // This code allows access outside of the known used address ranges to get caught.
  1153. InterruptDisabler disabler;
  1154. if (laddr.get() >= ksyms().first().address && laddr.get() <= ksyms().last().address)
  1155. return true;
  1156. if (is_kmalloc_address((void*)laddr.get()))
  1157. return true;
  1158. return validate_user_read(laddr);
  1159. }
  1160. bool Process::validate_user_read(LinearAddress laddr) const
  1161. {
  1162. InterruptDisabler disabler;
  1163. return MM.validate_user_read(*this, laddr);
  1164. }
  1165. bool Process::validate_user_write(LinearAddress laddr) const
  1166. {
  1167. InterruptDisabler disabler;
  1168. return MM.validate_user_write(*this, laddr);
  1169. }
  1170. pid_t Process::sys$getsid(pid_t pid)
  1171. {
  1172. if (pid == 0)
  1173. return m_sid;
  1174. InterruptDisabler disabler;
  1175. auto* process = Process::from_pid(pid);
  1176. if (!process)
  1177. return -ESRCH;
  1178. if (m_sid != process->m_sid)
  1179. return -EPERM;
  1180. return process->m_sid;
  1181. }
  1182. pid_t Process::sys$setsid()
  1183. {
  1184. InterruptDisabler disabler;
  1185. bool found_process_with_same_pgid_as_my_pid = false;
  1186. Process::for_each_in_pgrp(pid(), [&] (auto&) {
  1187. found_process_with_same_pgid_as_my_pid = true;
  1188. return false;
  1189. });
  1190. if (found_process_with_same_pgid_as_my_pid)
  1191. return -EPERM;
  1192. m_sid = m_pid;
  1193. m_pgid = m_pid;
  1194. return m_sid;
  1195. }
  1196. pid_t Process::sys$getpgid(pid_t pid)
  1197. {
  1198. if (pid == 0)
  1199. return m_pgid;
  1200. InterruptDisabler disabler; // FIXME: Use a ProcessHandle
  1201. auto* process = Process::from_pid(pid);
  1202. if (!process)
  1203. return -ESRCH;
  1204. return process->m_pgid;
  1205. }
  1206. pid_t Process::sys$getpgrp()
  1207. {
  1208. return m_pgid;
  1209. }
  1210. static pid_t get_sid_from_pgid(pid_t pgid)
  1211. {
  1212. InterruptDisabler disabler;
  1213. auto* group_leader = Process::from_pid(pgid);
  1214. if (!group_leader)
  1215. return -1;
  1216. return group_leader->sid();
  1217. }
  1218. int Process::sys$setpgid(pid_t specified_pid, pid_t specified_pgid)
  1219. {
  1220. InterruptDisabler disabler; // FIXME: Use a ProcessHandle
  1221. pid_t pid = specified_pid ? specified_pid : m_pid;
  1222. if (specified_pgid < 0)
  1223. return -EINVAL;
  1224. auto* process = Process::from_pid(pid);
  1225. if (!process)
  1226. return -ESRCH;
  1227. pid_t new_pgid = specified_pgid ? specified_pgid : process->m_pid;
  1228. pid_t current_sid = get_sid_from_pgid(process->m_pgid);
  1229. pid_t new_sid = get_sid_from_pgid(new_pgid);
  1230. if (current_sid != new_sid) {
  1231. // Can't move a process between sessions.
  1232. return -EPERM;
  1233. }
  1234. // FIXME: There are more EPERM conditions to check for here..
  1235. process->m_pgid = new_pgid;
  1236. return 0;
  1237. }
  1238. pid_t Process::sys$tcgetpgrp(int fd)
  1239. {
  1240. auto* descriptor = file_descriptor(fd);
  1241. if (!descriptor)
  1242. return -EBADF;
  1243. if (!descriptor->isTTY())
  1244. return -ENOTTY;
  1245. auto& tty = *descriptor->tty();
  1246. if (&tty != m_tty)
  1247. return -ENOTTY;
  1248. return tty.pgid();
  1249. }
  1250. int Process::sys$tcsetpgrp(int fd, pid_t pgid)
  1251. {
  1252. if (pgid < 0)
  1253. return -EINVAL;
  1254. if (get_sid_from_pgid(pgid) != m_sid)
  1255. return -EINVAL;
  1256. auto* descriptor = file_descriptor(fd);
  1257. if (!descriptor)
  1258. return -EBADF;
  1259. if (!descriptor->isTTY())
  1260. return -ENOTTY;
  1261. auto& tty = *descriptor->tty();
  1262. if (&tty != m_tty)
  1263. return -ENOTTY;
  1264. tty.set_pgid(pgid);
  1265. return 0;
  1266. }
  1267. int Process::sys$getdtablesize()
  1268. {
  1269. return m_max_open_file_descriptors;
  1270. }
  1271. int Process::sys$dup(int old_fd)
  1272. {
  1273. auto* descriptor = file_descriptor(old_fd);
  1274. if (!descriptor)
  1275. return -EBADF;
  1276. if (number_of_open_file_descriptors() == m_max_open_file_descriptors)
  1277. return -EMFILE;
  1278. int new_fd = 0;
  1279. for (; new_fd < (int)m_max_open_file_descriptors; ++new_fd) {
  1280. if (!m_file_descriptors[new_fd])
  1281. break;
  1282. }
  1283. m_file_descriptors[new_fd] = descriptor;
  1284. return new_fd;
  1285. }
  1286. int Process::sys$dup2(int old_fd, int new_fd)
  1287. {
  1288. auto* descriptor = file_descriptor(old_fd);
  1289. if (!descriptor)
  1290. return -EBADF;
  1291. if (number_of_open_file_descriptors() == m_max_open_file_descriptors)
  1292. return -EMFILE;
  1293. m_file_descriptors[new_fd] = descriptor;
  1294. return new_fd;
  1295. }
  1296. Unix::sighandler_t Process::sys$signal(int signum, Unix::sighandler_t handler)
  1297. {
  1298. // FIXME: Fail with -EINVAL if attepmting to catch or ignore SIGKILL or SIGSTOP.
  1299. if (signum >= 32)
  1300. return (Unix::sighandler_t)-EINVAL;
  1301. dbgprintf("sys$signal: %d => L%x\n", signum, handler);
  1302. return nullptr;
  1303. }
  1304. int Process::sys$sigaction(int signum, const Unix::sigaction* act, Unix::sigaction* old_act)
  1305. {
  1306. // FIXME: Fail with -EINVAL if attepmting to change action for SIGKILL or SIGSTOP.
  1307. if (signum >= 32)
  1308. return -EINVAL;
  1309. VALIDATE_USER_READ(act, sizeof(Unix::sigaction));
  1310. InterruptDisabler disabler; // FIXME: This should use a narrower lock.
  1311. auto& action = m_signal_action_data[signum];
  1312. if (old_act) {
  1313. VALIDATE_USER_WRITE(old_act, sizeof(Unix::sigaction));
  1314. old_act->sa_flags = action.flags;
  1315. old_act->sa_restorer = (decltype(old_act->sa_restorer))action.restorer.get();
  1316. old_act->sa_sigaction = (decltype(old_act->sa_sigaction))action.handler_or_sigaction.get();
  1317. }
  1318. action.restorer = LinearAddress((dword)act->sa_restorer);
  1319. action.flags = act->sa_flags;
  1320. action.handler_or_sigaction = LinearAddress((dword)act->sa_sigaction);
  1321. return 0;
  1322. }
  1323. int Process::sys$getgroups(int count, gid_t* gids)
  1324. {
  1325. if (count < 0)
  1326. return -EINVAL;
  1327. ASSERT(m_gids.size() < MAX_PROCESS_GIDS);
  1328. if (!count)
  1329. return m_gids.size();
  1330. if (count != (int)m_gids.size())
  1331. return -EINVAL;
  1332. VALIDATE_USER_WRITE(gids, sizeof(gid_t) * count);
  1333. size_t i = 0;
  1334. for (auto gid : m_gids)
  1335. gids[i++] = gid;
  1336. return 0;
  1337. }
  1338. int Process::sys$setgroups(size_t count, const gid_t* gids)
  1339. {
  1340. if (!is_root())
  1341. return -EPERM;
  1342. if (count >= MAX_PROCESS_GIDS)
  1343. return -EINVAL;
  1344. VALIDATE_USER_READ(gids, sizeof(gid_t) * count);
  1345. m_gids.clear();
  1346. m_gids.set(m_gid);
  1347. for (size_t i = 0; i < count; ++i)
  1348. m_gids.set(gids[i]);
  1349. return 0;
  1350. }