Process.cpp 48 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662
  1. #include "types.h"
  2. #include "Process.h"
  3. #include "kmalloc.h"
  4. #include "StdLib.h"
  5. #include "i386.h"
  6. #include "system.h"
  7. #include <VirtualFileSystem/FileDescriptor.h>
  8. #include <VirtualFileSystem/VirtualFileSystem.h>
  9. #include <ELFLoader/ELFLoader.h>
  10. #include "MemoryManager.h"
  11. #include "errno.h"
  12. #include "i8253.h"
  13. #include "RTC.h"
  14. #include "ProcFileSystem.h"
  15. #include <AK/StdLib.h>
  16. #include <LibC/signal_numbers.h>
  17. #include "Syscall.h"
  18. #include "Scheduler.h"
  19. //#define DEBUG_IO
  20. //#define TASK_DEBUG
  21. //#define FORK_DEBUG
  22. #define SIGNAL_DEBUG
  23. #define MAX_PROCESS_GIDS 32
  24. // FIXME: Only do a single validation for accesses that don't span multiple pages.
  25. // FIXME: Some places pass strlen(arg1) as arg2. This doesn't seem entirely perfect..
  26. #define VALIDATE_USER_READ_WITH_RETURN_TYPE(b, s, ret_type) \
  27. do { \
  28. LinearAddress laddr(reinterpret_cast<dword>(b)); \
  29. if (!validate_user_read(laddr) || !validate_user_read(laddr.offset((s) - 1))) { \
  30. dbgprintf("Bad read address passed to syscall: %p +%u\n", laddr.get(), (s)); \
  31. return (ret_type)-EFAULT; \
  32. } \
  33. } while(0)
  34. #define VALIDATE_USER_READ(b, s) VALIDATE_USER_READ_WITH_RETURN_TYPE(b, s, int)
  35. #define VALIDATE_USER_WRITE(b, s) \
  36. do { \
  37. LinearAddress laddr(reinterpret_cast<dword>(b)); \
  38. if (!validate_user_write(laddr) || !validate_user_write(laddr.offset((s) - 1))) { \
  39. dbgprintf("Bad write address passed to syscall: %p +%u\n", laddr.get(), (s)); \
  40. return -EFAULT; \
  41. } \
  42. } while(0)
  43. static const DWORD defaultStackSize = 16384;
  44. static pid_t next_pid;
  45. InlineLinkedList<Process>* g_processes;
  46. static String* s_hostname;
  47. static String& hostnameStorage(InterruptDisabler&)
  48. {
  49. ASSERT(s_hostname);
  50. return *s_hostname;
  51. }
  52. static String getHostname()
  53. {
  54. InterruptDisabler disabler;
  55. return hostnameStorage(disabler).isolatedCopy();
  56. }
  57. CoolGlobals* g_cool_globals;
  58. void Process::initialize()
  59. {
  60. #ifdef COOL_GLOBALS
  61. g_cool_globals = reinterpret_cast<CoolGlobals*>(0x1000);
  62. #endif
  63. next_pid = 0;
  64. g_processes = new InlineLinkedList<Process>;
  65. s_hostname = new String("birx");
  66. Scheduler::initialize();
  67. }
  68. Vector<Process*> Process::allProcesses()
  69. {
  70. InterruptDisabler disabler;
  71. Vector<Process*> processes;
  72. processes.ensureCapacity(g_processes->sizeSlow());
  73. for (auto* process = g_processes->head(); process; process = process->next())
  74. processes.append(process);
  75. return processes;
  76. }
  77. Region* Process::allocate_region(LinearAddress laddr, size_t size, String&& name, bool is_readable, bool is_writable)
  78. {
  79. // FIXME: This needs sanity checks. What if this overlaps existing regions?
  80. if (laddr.is_null()) {
  81. laddr = m_nextRegion;
  82. m_nextRegion = m_nextRegion.offset(size).offset(PAGE_SIZE);
  83. }
  84. laddr.mask(0xfffff000);
  85. m_regions.append(adopt(*new Region(laddr, size, move(name), is_readable, is_writable)));
  86. m_regions.last()->commit(*this);
  87. MM.mapRegion(*this, *m_regions.last());
  88. return m_regions.last().ptr();
  89. }
  90. Region* Process::allocate_file_backed_region(LinearAddress laddr, size_t size, RetainPtr<VirtualFileSystem::Node>&& vnode, String&& name, bool is_readable, bool is_writable)
  91. {
  92. ASSERT(!vnode->isCharacterDevice());
  93. // FIXME: This needs sanity checks. What if this overlaps existing regions?
  94. if (laddr.is_null()) {
  95. laddr = m_nextRegion;
  96. m_nextRegion = m_nextRegion.offset(size).offset(PAGE_SIZE);
  97. }
  98. laddr.mask(0xfffff000);
  99. m_regions.append(adopt(*new Region(laddr, size, move(vnode), move(name), is_readable, is_writable)));
  100. MM.mapRegion(*this, *m_regions.last());
  101. return m_regions.last().ptr();
  102. }
  103. Region* Process::allocate_region_with_vmo(LinearAddress laddr, size_t size, RetainPtr<VMObject>&& vmo, size_t offset_in_vmo, String&& name, bool is_readable, bool is_writable)
  104. {
  105. ASSERT(vmo);
  106. // FIXME: This needs sanity checks. What if this overlaps existing regions?
  107. if (laddr.is_null()) {
  108. laddr = m_nextRegion;
  109. m_nextRegion = m_nextRegion.offset(size).offset(PAGE_SIZE);
  110. }
  111. laddr.mask(0xfffff000);
  112. offset_in_vmo &= PAGE_MASK;
  113. size = ceilDiv(size, PAGE_SIZE) * PAGE_SIZE;
  114. m_regions.append(adopt(*new Region(laddr, size, move(vmo), offset_in_vmo, move(name), is_readable, is_writable)));
  115. MM.mapRegion(*this, *m_regions.last());
  116. return m_regions.last().ptr();
  117. }
  118. bool Process::deallocate_region(Region& region)
  119. {
  120. InterruptDisabler disabler;
  121. for (size_t i = 0; i < m_regions.size(); ++i) {
  122. if (m_regions[i].ptr() == &region) {
  123. MM.unmapRegion(*this, region);
  124. m_regions.remove(i);
  125. return true;
  126. }
  127. }
  128. return false;
  129. }
  130. Region* Process::regionFromRange(LinearAddress laddr, size_t size)
  131. {
  132. for (auto& region : m_regions) {
  133. if (region->linearAddress == laddr && region->size == size)
  134. return region.ptr();
  135. }
  136. return nullptr;
  137. }
  138. int Process::sys$set_mmap_name(void* addr, size_t size, const char* name)
  139. {
  140. VALIDATE_USER_READ(name, strlen(name));
  141. auto* region = regionFromRange(LinearAddress((dword)addr), size);
  142. if (!region)
  143. return -EINVAL;
  144. region->name = name;
  145. return 0;
  146. }
  147. void* Process::sys$mmap(const Syscall::SC_mmap_params* params)
  148. {
  149. VALIDATE_USER_READ_WITH_RETURN_TYPE(params, sizeof(Syscall::SC_mmap_params), void*);
  150. void* addr = (void*)params->addr;
  151. size_t size = params->size;
  152. int prot = params->prot;
  153. int flags = params->flags;
  154. int fd = params->fd;
  155. Unix::off_t offset = params->offset;
  156. if (size == 0)
  157. return (void*)-EINVAL;
  158. if ((dword)addr & ~PAGE_MASK || size & ~PAGE_MASK)
  159. return (void*)-EINVAL;
  160. if (flags & MAP_ANONYMOUS) {
  161. InterruptDisabler disabler;
  162. // FIXME: Implement mapping at a client-specified address. Most of the support is already in plcae.
  163. ASSERT(addr == nullptr);
  164. auto* region = allocate_region(LinearAddress(), size, "mmap", prot & PROT_READ, prot & PROT_WRITE);
  165. if (!region)
  166. return (void*)-ENOMEM;
  167. return region->linearAddress.asPtr();
  168. }
  169. if (offset & ~PAGE_MASK)
  170. return (void*)-EINVAL;
  171. auto* descriptor = file_descriptor(fd);
  172. if (!descriptor)
  173. return (void*)-EBADF;
  174. if (descriptor->vnode()->isCharacterDevice())
  175. return (void*)-ENODEV;
  176. // FIXME: If PROT_EXEC, check that the underlying file system isn't mounted noexec.
  177. auto region_name = descriptor->absolute_path();
  178. InterruptDisabler disabler;
  179. // FIXME: Implement mapping at a client-specified address. Most of the support is already in plcae.
  180. ASSERT(addr == nullptr);
  181. auto* region = allocate_file_backed_region(LinearAddress(), size, descriptor->vnode(), move(region_name), prot & PROT_READ, prot & PROT_WRITE);
  182. if (!region)
  183. return (void*)-ENOMEM;
  184. return region->linearAddress.asPtr();
  185. }
  186. int Process::sys$munmap(void* addr, size_t size)
  187. {
  188. InterruptDisabler disabler;
  189. auto* region = regionFromRange(LinearAddress((dword)addr), size);
  190. if (!region)
  191. return -1;
  192. if (!deallocate_region(*region))
  193. return -1;
  194. return 0;
  195. }
  196. int Process::sys$gethostname(char* buffer, size_t size)
  197. {
  198. VALIDATE_USER_WRITE(buffer, size);
  199. auto hostname = getHostname();
  200. if (size < (hostname.length() + 1))
  201. return -ENAMETOOLONG;
  202. memcpy(buffer, hostname.characters(), size);
  203. return 0;
  204. }
  205. Process* Process::fork(RegisterDump& regs)
  206. {
  207. auto* child = new Process(String(m_name), m_uid, m_gid, m_pid, m_ring, m_cwd.copyRef(), m_executable.copyRef(), m_tty, this);
  208. if (!child)
  209. return nullptr;
  210. memcpy(child->m_signal_action_data, m_signal_action_data, sizeof(m_signal_action_data));
  211. child->m_signal_mask = m_signal_mask;
  212. #ifdef FORK_DEBUG
  213. dbgprintf("fork: child=%p\n", child);
  214. #endif
  215. #if 0
  216. // FIXME: An honest fork() would copy these. Needs a Vector copy ctor.
  217. child->m_arguments = m_arguments;
  218. child->m_initialEnvironment = m_initialEnvironment;
  219. #endif
  220. for (auto& region : m_regions) {
  221. #ifdef FORK_DEBUG
  222. dbgprintf("fork: cloning Region{%p}\n", region.ptr());
  223. #endif
  224. auto cloned_region = region->clone();
  225. child->m_regions.append(move(cloned_region));
  226. MM.mapRegion(*child, *child->m_regions.last());
  227. }
  228. child->m_tss.eax = 0; // fork() returns 0 in the child :^)
  229. child->m_tss.ebx = regs.ebx;
  230. child->m_tss.ecx = regs.ecx;
  231. child->m_tss.edx = regs.edx;
  232. child->m_tss.ebp = regs.ebp;
  233. child->m_tss.esp = regs.esp_if_crossRing;
  234. child->m_tss.esi = regs.esi;
  235. child->m_tss.edi = regs.edi;
  236. child->m_tss.eflags = regs.eflags;
  237. child->m_tss.eip = regs.eip;
  238. child->m_tss.cs = regs.cs;
  239. child->m_tss.ds = regs.ds;
  240. child->m_tss.es = regs.es;
  241. child->m_tss.fs = regs.fs;
  242. child->m_tss.gs = regs.gs;
  243. child->m_tss.ss = regs.ss_if_crossRing;
  244. #ifdef FORK_DEBUG
  245. dbgprintf("fork: child will begin executing at %w:%x with stack %w:%x\n", child->m_tss.cs, child->m_tss.eip, child->m_tss.ss, child->m_tss.esp);
  246. #endif
  247. ProcFileSystem::the().addProcess(*child);
  248. {
  249. InterruptDisabler disabler;
  250. g_processes->prepend(child);
  251. system.nprocess++;
  252. }
  253. #ifdef TASK_DEBUG
  254. kprintf("Process %u (%s) forked from %u @ %p\n", child->pid(), child->name().characters(), m_pid, child->m_tss.eip);
  255. #endif
  256. return child;
  257. }
  258. pid_t Process::sys$fork(RegisterDump& regs)
  259. {
  260. auto* child = fork(regs);
  261. ASSERT(child);
  262. return child->pid();
  263. }
  264. int Process::do_exec(const String& path, Vector<String>&& arguments, Vector<String>&& environment)
  265. {
  266. auto parts = path.split('/');
  267. if (parts.isEmpty())
  268. return -ENOENT;
  269. int error;
  270. auto descriptor = VirtualFileSystem::the().open(path, error, 0, m_cwd ? m_cwd->inode : InodeIdentifier());
  271. if (!descriptor) {
  272. ASSERT(error != 0);
  273. return error;
  274. }
  275. if (!descriptor->metadata().mayExecute(m_euid, m_gids))
  276. return -EACCES;
  277. if (!descriptor->metadata().size) {
  278. kprintf("exec() of 0-length binaries not supported\n");
  279. return -ENOTIMPL;
  280. }
  281. auto vmo = VMObject::create_file_backed(descriptor->vnode(), descriptor->metadata().size);
  282. vmo->set_name(descriptor->absolute_path());
  283. auto* region = allocate_region_with_vmo(LinearAddress(), descriptor->metadata().size, vmo.copyRef(), 0, "helper", true, false);
  284. dword entry_eip = 0;
  285. PageDirectory* old_page_directory = m_page_directory;
  286. PageDirectory* new_page_directory = reinterpret_cast<PageDirectory*>(kmalloc_page_aligned(sizeof(PageDirectory)));
  287. #ifdef MM_DEBUG
  288. dbgprintf("Process %u exec: PD=%x created\n", pid(), new_page_directory);
  289. #endif
  290. MM.populate_page_directory(*new_page_directory);
  291. m_page_directory = new_page_directory;
  292. ProcessPagingScope paging_scope(*this);
  293. // FIXME: Should we consider doing on-demand paging here? Is it actually useful?
  294. bool success = region->page_in(*new_page_directory);
  295. ASSERT(success);
  296. {
  297. InterruptDisabler disabler;
  298. // Okay, here comes the sleight of hand, pay close attention..
  299. auto old_regions = move(m_regions);
  300. ELFLoader loader(region->linearAddress.asPtr());
  301. loader.map_section_hook = [&] (LinearAddress laddr, size_t size, size_t alignment, size_t offset_in_image, bool is_readable, bool is_writable, const String& name) {
  302. ASSERT(size);
  303. ASSERT(alignment == PAGE_SIZE);
  304. size = ((size / 4096) + 1) * 4096; // FIXME: Use ceil_div?
  305. (void) allocate_region_with_vmo(laddr, size, vmo.copyRef(), offset_in_image, String(name), is_readable, is_writable);
  306. return laddr.asPtr();
  307. };
  308. loader.alloc_section_hook = [&] (LinearAddress laddr, size_t size, size_t alignment, bool is_readable, bool is_writable, const String& name) {
  309. ASSERT(size);
  310. ASSERT(alignment == PAGE_SIZE);
  311. size = ((size / 4096) + 1) * 4096; // FIXME: Use ceil_div?
  312. (void) allocate_region(laddr, size, String(name), is_readable, is_writable);
  313. return laddr.asPtr();
  314. };
  315. bool success = loader.load();
  316. if (!success) {
  317. m_page_directory = old_page_directory;
  318. MM.enter_process_paging_scope(*this);
  319. MM.release_page_directory(*new_page_directory);
  320. m_regions = move(old_regions);
  321. kprintf("sys$execve: Failure loading %s\n", path.characters());
  322. return -ENOEXEC;
  323. }
  324. entry_eip = (dword)loader.symbol_ptr("_start");
  325. if (!entry_eip) {
  326. m_page_directory = old_page_directory;
  327. MM.enter_process_paging_scope(*this);
  328. MM.release_page_directory(*new_page_directory);
  329. m_regions = move(old_regions);
  330. return -ENOEXEC;
  331. }
  332. }
  333. memset(m_signal_action_data, 0, sizeof(m_signal_action_data));
  334. m_signal_mask = 0xffffffff;
  335. InterruptDisabler disabler;
  336. Scheduler::prepare_to_modify_tss(*this);
  337. m_name = parts.takeLast();
  338. dword old_esp0 = m_tss.esp0;
  339. memset(&m_tss, 0, sizeof(m_tss));
  340. m_tss.eflags = 0x0202;
  341. m_tss.eip = entry_eip;
  342. m_tss.cs = 0x1b;
  343. m_tss.ds = 0x23;
  344. m_tss.es = 0x23;
  345. m_tss.fs = 0x23;
  346. m_tss.gs = 0x23;
  347. m_tss.ss = 0x23;
  348. m_tss.cr3 = (dword)m_page_directory;
  349. m_stack_region = allocate_region(LinearAddress(), defaultStackSize, "stack");
  350. ASSERT(m_stack_region);
  351. m_stackTop3 = m_stack_region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
  352. m_tss.esp = m_stackTop3;
  353. m_tss.ss0 = 0x10;
  354. m_tss.esp0 = old_esp0;
  355. m_tss.ss2 = m_pid;
  356. MM.release_page_directory(*old_page_directory);
  357. m_executable = descriptor->vnode();
  358. m_arguments = move(arguments);
  359. m_initialEnvironment = move(environment);
  360. #ifdef TASK_DEBUG
  361. kprintf("Process %u (%s) exec'd %s @ %p\n", pid(), name().characters(), path.characters(), m_tss.eip);
  362. #endif
  363. set_state(Skip1SchedulerPass);
  364. return 0;
  365. }
  366. int Process::exec(const String& path, Vector<String>&& arguments, Vector<String>&& environment)
  367. {
  368. // The bulk of exec() is done by do_exec(), which ensures that all locals
  369. // are cleaned up by the time we yield-teleport below.
  370. int rc = do_exec(path, move(arguments), move(environment));
  371. if (rc < 0)
  372. return rc;
  373. if (current == this) {
  374. Scheduler::yield();
  375. ASSERT_NOT_REACHED();
  376. }
  377. return 0;
  378. }
  379. int Process::sys$execve(const char* filename, const char** argv, const char** envp)
  380. {
  381. VALIDATE_USER_READ(filename, strlen(filename));
  382. if (argv) {
  383. VALIDATE_USER_READ(argv, sizeof(const char**));
  384. for (size_t i = 0; argv[i]; ++i) {
  385. VALIDATE_USER_READ(argv[i], strlen(argv[i]));
  386. }
  387. }
  388. if (envp) {
  389. VALIDATE_USER_READ(envp, sizeof(const char**));
  390. for (size_t i = 0; envp[i]; ++i) {
  391. VALIDATE_USER_READ(envp[i], strlen(envp[i]));
  392. }
  393. }
  394. String path(filename);
  395. auto parts = path.split('/');
  396. Vector<String> arguments;
  397. if (argv) {
  398. for (size_t i = 0; argv[i]; ++i) {
  399. arguments.append(argv[i]);
  400. }
  401. } else {
  402. arguments.append(parts.last());
  403. }
  404. Vector<String> environment;
  405. if (envp) {
  406. for (size_t i = 0; envp[i]; ++i)
  407. environment.append(envp[i]);
  408. }
  409. int rc = exec(path, move(arguments), move(environment));
  410. ASSERT(rc < 0); // We should never continue after a successful exec!
  411. return rc;
  412. }
  413. Process* Process::create_user_process(const String& path, uid_t uid, gid_t gid, pid_t parent_pid, int& error, Vector<String>&& arguments, Vector<String>&& environment, TTY* tty)
  414. {
  415. // FIXME: Don't split() the path twice (sys$spawn also does it...)
  416. auto parts = path.split('/');
  417. if (arguments.isEmpty()) {
  418. arguments.append(parts.last());
  419. }
  420. RetainPtr<VirtualFileSystem::Node> cwd;
  421. {
  422. InterruptDisabler disabler;
  423. if (auto* parent = Process::from_pid(parent_pid))
  424. cwd = parent->m_cwd.copyRef();
  425. }
  426. if (!cwd)
  427. cwd = VirtualFileSystem::the().root();
  428. auto* process = new Process(parts.takeLast(), uid, gid, parent_pid, Ring3, move(cwd), nullptr, tty);
  429. error = process->exec(path, move(arguments), move(environment));
  430. if (error != 0)
  431. return nullptr;
  432. ProcFileSystem::the().addProcess(*process);
  433. {
  434. InterruptDisabler disabler;
  435. g_processes->prepend(process);
  436. system.nprocess++;
  437. }
  438. #ifdef TASK_DEBUG
  439. kprintf("Process %u (%s) spawned @ %p\n", process->pid(), process->name().characters(), process->m_tss.eip);
  440. #endif
  441. error = 0;
  442. return process;
  443. }
  444. int Process::sys$get_environment(char*** environ)
  445. {
  446. auto* region = allocate_region(LinearAddress(), PAGE_SIZE, "environ");
  447. if (!region)
  448. return -ENOMEM;
  449. MM.mapRegion(*this, *region);
  450. char* envpage = (char*)region->linearAddress.get();
  451. *environ = (char**)envpage;
  452. char* bufptr = envpage + (sizeof(char*) * (m_initialEnvironment.size() + 1));
  453. for (size_t i = 0; i < m_initialEnvironment.size(); ++i) {
  454. (*environ)[i] = bufptr;
  455. memcpy(bufptr, m_initialEnvironment[i].characters(), m_initialEnvironment[i].length());
  456. bufptr += m_initialEnvironment[i].length();
  457. *(bufptr++) = '\0';
  458. }
  459. (*environ)[m_initialEnvironment.size()] = nullptr;
  460. return 0;
  461. }
  462. int Process::sys$get_arguments(int* argc, char*** argv)
  463. {
  464. auto* region = allocate_region(LinearAddress(), PAGE_SIZE, "argv");
  465. if (!region)
  466. return -ENOMEM;
  467. MM.mapRegion(*this, *region);
  468. char* argpage = (char*)region->linearAddress.get();
  469. *argc = m_arguments.size();
  470. *argv = (char**)argpage;
  471. char* bufptr = argpage + (sizeof(char*) * m_arguments.size());
  472. for (size_t i = 0; i < m_arguments.size(); ++i) {
  473. (*argv)[i] = bufptr;
  474. memcpy(bufptr, m_arguments[i].characters(), m_arguments[i].length());
  475. bufptr += m_arguments[i].length();
  476. *(bufptr++) = '\0';
  477. }
  478. return 0;
  479. }
  480. Process* Process::create_kernel_process(void (*e)(), String&& name)
  481. {
  482. auto* process = new Process(move(name), (uid_t)0, (gid_t)0, (pid_t)0, Ring0);
  483. process->m_tss.eip = (dword)e;
  484. if (process->pid() != 0) {
  485. {
  486. InterruptDisabler disabler;
  487. g_processes->prepend(process);
  488. system.nprocess++;
  489. }
  490. ProcFileSystem::the().addProcess(*process);
  491. #ifdef TASK_DEBUG
  492. kprintf("Kernel process %u (%s) spawned @ %p\n", process->pid(), process->name().characters(), process->m_tss.eip);
  493. #endif
  494. }
  495. return process;
  496. }
  497. Process::Process(String&& name, uid_t uid, gid_t gid, pid_t ppid, RingLevel ring, RetainPtr<VirtualFileSystem::Node>&& cwd, RetainPtr<VirtualFileSystem::Node>&& executable, TTY* tty, Process* fork_parent)
  498. : m_name(move(name))
  499. , m_pid(next_pid++) // FIXME: RACE: This variable looks racy!
  500. , m_uid(uid)
  501. , m_gid(gid)
  502. , m_euid(uid)
  503. , m_egid(gid)
  504. , m_state(Runnable)
  505. , m_ring(ring)
  506. , m_cwd(move(cwd))
  507. , m_executable(move(executable))
  508. , m_tty(tty)
  509. , m_ppid(ppid)
  510. {
  511. m_gids.set(m_gid);
  512. if (fork_parent) {
  513. m_sid = fork_parent->m_sid;
  514. m_pgid = fork_parent->m_pgid;
  515. } else {
  516. // FIXME: Use a ProcessHandle? Presumably we're executing *IN* the parent right now though..
  517. InterruptDisabler disabler;
  518. if (auto* parent = Process::from_pid(m_ppid)) {
  519. m_sid = parent->m_sid;
  520. m_pgid = parent->m_pgid;
  521. }
  522. }
  523. m_page_directory = (PageDirectory*)kmalloc_page_aligned(sizeof(PageDirectory));
  524. #ifdef MM_DEBUG
  525. dbgprintf("Process %u ctor: PD=%x created\n", pid(), m_page_directory);
  526. #endif
  527. MM.populate_page_directory(*m_page_directory);
  528. if (fork_parent) {
  529. m_file_descriptors.resize(fork_parent->m_file_descriptors.size());
  530. for (size_t i = 0; i < fork_parent->m_file_descriptors.size(); ++i) {
  531. if (!fork_parent->m_file_descriptors[i])
  532. continue;
  533. #ifdef FORK_DEBUG
  534. dbgprintf("fork: cloning fd %u... (%p) istty? %u\n", i, fork_parent->m_file_descriptors[i].ptr(), fork_parent->m_file_descriptors[i]->isTTY());
  535. #endif
  536. m_file_descriptors[i] = fork_parent->m_file_descriptors[i]->clone();
  537. }
  538. } else {
  539. m_file_descriptors.resize(m_max_open_file_descriptors);
  540. if (tty) {
  541. m_file_descriptors[0] = tty->open(O_RDONLY);
  542. m_file_descriptors[1] = tty->open(O_WRONLY);
  543. m_file_descriptors[2] = tty->open(O_WRONLY);
  544. }
  545. }
  546. if (fork_parent)
  547. m_nextRegion = fork_parent->m_nextRegion;
  548. else
  549. m_nextRegion = LinearAddress(0x10000000);
  550. if (fork_parent) {
  551. memcpy(&m_tss, &fork_parent->m_tss, sizeof(m_tss));
  552. } else {
  553. memset(&m_tss, 0, sizeof(m_tss));
  554. // Only IF is set when a process boots.
  555. m_tss.eflags = 0x0202;
  556. word cs, ds, ss;
  557. if (isRing0()) {
  558. cs = 0x08;
  559. ds = 0x10;
  560. ss = 0x10;
  561. } else {
  562. cs = 0x1b;
  563. ds = 0x23;
  564. ss = 0x23;
  565. }
  566. m_tss.ds = ds;
  567. m_tss.es = ds;
  568. m_tss.fs = ds;
  569. m_tss.gs = ds;
  570. m_tss.ss = ss;
  571. m_tss.cs = cs;
  572. }
  573. m_tss.cr3 = (dword)m_page_directory;
  574. if (isRing0()) {
  575. // FIXME: This memory is leaked.
  576. // But uh, there's also no kernel process termination, so I guess it's not technically leaked...
  577. dword stackBottom = (dword)kmalloc_eternal(defaultStackSize);
  578. m_stackTop0 = (stackBottom + defaultStackSize) & 0xffffff8;
  579. m_tss.esp = m_stackTop0;
  580. } else {
  581. if (fork_parent) {
  582. m_stackTop3 = fork_parent->m_stackTop3;
  583. } else {
  584. auto* region = allocate_region(LinearAddress(), defaultStackSize, "stack");
  585. ASSERT(region);
  586. m_stackTop3 = region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
  587. m_tss.esp = m_stackTop3;
  588. }
  589. }
  590. if (isRing3()) {
  591. // Ring3 processes need a separate stack for Ring0.
  592. m_kernelStack = kmalloc(defaultStackSize);
  593. m_stackTop0 = ((DWORD)m_kernelStack + defaultStackSize) & 0xffffff8;
  594. m_tss.ss0 = 0x10;
  595. m_tss.esp0 = m_stackTop0;
  596. }
  597. // HACK: Ring2 SS in the TSS is the current PID.
  598. m_tss.ss2 = m_pid;
  599. m_farPtr.offset = 0x98765432;
  600. }
  601. Process::~Process()
  602. {
  603. InterruptDisabler disabler;
  604. ProcFileSystem::the().removeProcess(*this);
  605. system.nprocess--;
  606. gdt_free_entry(selector());
  607. if (m_kernelStack) {
  608. kfree(m_kernelStack);
  609. m_kernelStack = nullptr;
  610. }
  611. MM.release_page_directory(*m_page_directory);
  612. }
  613. void Process::dumpRegions()
  614. {
  615. kprintf("Process %s(%u) regions:\n", name().characters(), pid());
  616. kprintf("BEGIN END SIZE NAME\n");
  617. for (auto& region : m_regions) {
  618. kprintf("%x -- %x %x %s\n",
  619. region->linearAddress.get(),
  620. region->linearAddress.offset(region->size - 1).get(),
  621. region->size,
  622. region->name.characters());
  623. }
  624. }
  625. void Process::sys$exit(int status)
  626. {
  627. cli();
  628. #ifdef TASK_DEBUG
  629. kprintf("sys$exit: %s(%u) exit with status %d\n", name().characters(), pid(), status);
  630. #endif
  631. set_state(Dead);
  632. m_termination_status = status;
  633. m_termination_signal = 0;
  634. Scheduler::pick_next_and_switch_now();
  635. ASSERT_NOT_REACHED();
  636. }
  637. void Process::terminate_due_to_signal(byte signal)
  638. {
  639. ASSERT_INTERRUPTS_DISABLED();
  640. ASSERT(signal < 32);
  641. dbgprintf("terminate_due_to_signal %s(%u) <- %u\n", name().characters(), pid(), signal);
  642. m_termination_status = 0;
  643. m_termination_signal = signal;
  644. set_state(Dead);
  645. }
  646. void Process::send_signal(byte signal, Process* sender)
  647. {
  648. ASSERT_INTERRUPTS_DISABLED();
  649. ASSERT(signal < 32);
  650. m_pending_signals |= 1 << signal;
  651. if (sender)
  652. dbgprintf("signal: %s(%u) sent %d to %s(%u)\n", sender->name().characters(), sender->pid(), signal, name().characters(), pid());
  653. else
  654. dbgprintf("signal: kernel sent %d to %s(%u)\n", signal, name().characters(), pid());
  655. }
  656. bool Process::has_unmasked_pending_signals() const
  657. {
  658. return m_pending_signals & m_signal_mask;
  659. }
  660. void Process::dispatch_one_pending_signal()
  661. {
  662. ASSERT_INTERRUPTS_DISABLED();
  663. dword signal_candidates = m_pending_signals & m_signal_mask;
  664. ASSERT(signal_candidates);
  665. byte signal = 0;
  666. for (; signal < 32; ++signal) {
  667. if (signal_candidates & (1 << signal)) {
  668. break;
  669. }
  670. }
  671. dispatch_signal(signal);
  672. }
  673. void Process::dispatch_signal(byte signal)
  674. {
  675. ASSERT_INTERRUPTS_DISABLED();
  676. ASSERT(signal < 32);
  677. dbgprintf("dispatch_signal %s(%u) <- %u\n", name().characters(), pid(), signal);
  678. auto& action = m_signal_action_data[signal];
  679. // FIXME: Implement SA_SIGINFO signal handlers.
  680. ASSERT(!(action.flags & SA_SIGINFO));
  681. auto handler_laddr = action.handler_or_sigaction;
  682. if (handler_laddr.is_null()) {
  683. // FIXME: Is termination really always the appropriate action?
  684. return terminate_due_to_signal(signal);
  685. }
  686. Scheduler::prepare_to_modify_tss(*this);
  687. word ret_cs = m_tss.cs;
  688. dword ret_eip = m_tss.eip;
  689. dword ret_eflags = m_tss.eflags;
  690. bool interrupting_in_kernel = (ret_cs & 3) == 0;
  691. if (interrupting_in_kernel) {
  692. dbgprintf("dispatch_signal to %s(%u) in state=%s with return to %w:%x\n", name().characters(), pid(), toString(state()), ret_cs, ret_eip);
  693. ASSERT(is_blocked());
  694. m_tss_to_resume_kernel = m_tss;
  695. #ifdef SIGNAL_DEBUG
  696. dbgprintf("resume tss pc: %w:%x\n", m_tss_to_resume_kernel.cs, m_tss_to_resume_kernel.eip);
  697. #endif
  698. }
  699. ProcessPagingScope pagingScope(*this);
  700. if (interrupting_in_kernel) {
  701. if (!m_signal_stack_user_region) {
  702. m_signal_stack_user_region = allocate_region(LinearAddress(), defaultStackSize, "signal stack (user)");
  703. ASSERT(m_signal_stack_user_region);
  704. m_signal_stack_kernel_region = allocate_region(LinearAddress(), defaultStackSize, "signal stack (kernel)");
  705. ASSERT(m_signal_stack_user_region);
  706. }
  707. m_tss.ss = 0x23;
  708. m_tss.esp = m_signal_stack_user_region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
  709. m_tss.ss0 = 0x10;
  710. m_tss.esp0 = m_signal_stack_kernel_region->linearAddress.offset(defaultStackSize).get() & 0xfffffff8;
  711. push_value_on_stack(ret_eflags);
  712. push_value_on_stack(ret_cs);
  713. push_value_on_stack(ret_eip);
  714. } else {
  715. push_value_on_stack(ret_cs);
  716. push_value_on_stack(ret_eip);
  717. push_value_on_stack(ret_eflags);
  718. }
  719. // PUSHA
  720. dword old_esp = m_tss.esp;
  721. push_value_on_stack(m_tss.eax);
  722. push_value_on_stack(m_tss.ecx);
  723. push_value_on_stack(m_tss.edx);
  724. push_value_on_stack(m_tss.ebx);
  725. push_value_on_stack(old_esp);
  726. push_value_on_stack(m_tss.ebp);
  727. push_value_on_stack(m_tss.esi);
  728. push_value_on_stack(m_tss.edi);
  729. m_tss.eax = (dword)signal;
  730. m_tss.cs = 0x1b;
  731. m_tss.ds = 0x23;
  732. m_tss.es = 0x23;
  733. m_tss.fs = 0x23;
  734. m_tss.gs = 0x23;
  735. m_tss.eip = handler_laddr.get();
  736. if (m_return_to_ring3_from_signal_trampoline.is_null()) {
  737. // FIXME: This should be a global trampoline shared by all processes, not one created per process!
  738. // FIXME: Remap as read-only after setup.
  739. auto* region = allocate_region(LinearAddress(), PAGE_SIZE, "signal_trampoline", true, true);
  740. m_return_to_ring3_from_signal_trampoline = region->linearAddress;
  741. byte* code_ptr = m_return_to_ring3_from_signal_trampoline.asPtr();
  742. *code_ptr++ = 0x61; // popa
  743. *code_ptr++ = 0x9d; // popf
  744. *code_ptr++ = 0xc3; // ret
  745. *code_ptr++ = 0x0f; // ud2
  746. *code_ptr++ = 0x0b;
  747. m_return_to_ring0_from_signal_trampoline = LinearAddress((dword)code_ptr);
  748. *code_ptr++ = 0x61; // popa
  749. *code_ptr++ = 0xb8; // mov eax, <dword>
  750. *(dword*)code_ptr = Syscall::SC_sigreturn;
  751. code_ptr += sizeof(dword);
  752. *code_ptr++ = 0xcd; // int 0x80
  753. *code_ptr++ = 0x80;
  754. *code_ptr++ = 0x0f; // ud2
  755. *code_ptr++ = 0x0b;
  756. // FIXME: For !SA_NODEFER, maybe we could do something like emitting an int 0x80 syscall here that
  757. // unmasks the signal so it can be received again? I guess then I would need one trampoline
  758. // per signal number if it's hard-coded, but it's just a few bytes per each.
  759. }
  760. if (interrupting_in_kernel)
  761. push_value_on_stack(m_return_to_ring0_from_signal_trampoline.get());
  762. else
  763. push_value_on_stack(m_return_to_ring3_from_signal_trampoline.get());
  764. m_pending_signals &= ~(1 << signal);
  765. // FIXME: This state is such a hack. It avoids trouble if 'current' is the process receiving a signal.
  766. set_state(Skip1SchedulerPass);
  767. #ifdef SIGNAL_DEBUG
  768. dbgprintf("signal: Okay, %s(%u) {%s} has been primed with signal handler %w:%x\n", name().characters(), pid(), toString(state()), m_tss.cs, m_tss.eip);
  769. #endif
  770. }
  771. void Process::sys$sigreturn()
  772. {
  773. InterruptDisabler disabler;
  774. Scheduler::prepare_to_modify_tss(*this);
  775. m_tss = m_tss_to_resume_kernel;
  776. #ifdef SIGNAL_DEBUG
  777. dbgprintf("sys$sigreturn in %s(%u)\n", name().characters(), pid());
  778. dbgprintf(" -> resuming execution at %w:%x\n", m_tss.cs, m_tss.eip);
  779. #endif
  780. set_state(Skip1SchedulerPass);
  781. Scheduler::yield();
  782. kprintf("sys$sigreturn failed in %s(%u)\n", name().characters(), pid());
  783. ASSERT_NOT_REACHED();
  784. }
  785. void Process::push_value_on_stack(dword value)
  786. {
  787. m_tss.esp -= 4;
  788. dword* stack_ptr = (dword*)m_tss.esp;
  789. *stack_ptr = value;
  790. }
  791. void Process::crash()
  792. {
  793. ASSERT_INTERRUPTS_DISABLED();
  794. ASSERT(state() != Dead);
  795. m_termination_signal = SIGSEGV;
  796. set_state(Dead);
  797. dumpRegions();
  798. Scheduler::pick_next_and_switch_now();
  799. ASSERT_NOT_REACHED();
  800. }
  801. Process* Process::from_pid(pid_t pid)
  802. {
  803. ASSERT_INTERRUPTS_DISABLED();
  804. for (auto* process = g_processes->head(); process; process = process->next()) {
  805. if (process->pid() == pid)
  806. return process;
  807. }
  808. return nullptr;
  809. }
  810. FileDescriptor* Process::file_descriptor(int fd)
  811. {
  812. if (fd < 0)
  813. return nullptr;
  814. if ((size_t)fd < m_file_descriptors.size())
  815. return m_file_descriptors[fd].ptr();
  816. return nullptr;
  817. }
  818. const FileDescriptor* Process::file_descriptor(int fd) const
  819. {
  820. if (fd < 0)
  821. return nullptr;
  822. if ((size_t)fd < m_file_descriptors.size())
  823. return m_file_descriptors[fd].ptr();
  824. return nullptr;
  825. }
  826. ssize_t Process::sys$get_dir_entries(int fd, void* buffer, size_t size)
  827. {
  828. VALIDATE_USER_WRITE(buffer, size);
  829. auto* descriptor = file_descriptor(fd);
  830. if (!descriptor)
  831. return -EBADF;
  832. return descriptor->get_dir_entries((byte*)buffer, size);
  833. }
  834. int Process::sys$lseek(int fd, off_t offset, int whence)
  835. {
  836. auto* descriptor = file_descriptor(fd);
  837. if (!descriptor)
  838. return -EBADF;
  839. return descriptor->seek(offset, whence);
  840. }
  841. int Process::sys$ttyname_r(int fd, char* buffer, size_t size)
  842. {
  843. VALIDATE_USER_WRITE(buffer, size);
  844. auto* descriptor = file_descriptor(fd);
  845. if (!descriptor)
  846. return -EBADF;
  847. if (!descriptor->isTTY())
  848. return -ENOTTY;
  849. auto ttyName = descriptor->tty()->ttyName();
  850. if (size < ttyName.length() + 1)
  851. return -ERANGE;
  852. strcpy(buffer, ttyName.characters());
  853. return 0;
  854. }
  855. ssize_t Process::sys$write(int fd, const void* data, size_t size)
  856. {
  857. VALIDATE_USER_READ(data, size);
  858. #ifdef DEBUG_IO
  859. dbgprintf("%s(%u): sys$write(%d, %p, %u)\n", name().characters(), pid(), fd, data, size);
  860. #endif
  861. auto* descriptor = file_descriptor(fd);
  862. if (!descriptor)
  863. return -EBADF;
  864. auto nwritten = descriptor->write((const byte*)data, size);
  865. if (has_unmasked_pending_signals()) {
  866. block(BlockedSignal);
  867. Scheduler::yield();
  868. if (nwritten == 0)
  869. return -EINTR;
  870. }
  871. #ifdef DEBUG_IO
  872. dbgprintf("%s(%u) sys$write: nwritten=%u\n", name().characters(), pid(), nwritten);
  873. #endif
  874. return nwritten;
  875. }
  876. ssize_t Process::sys$read(int fd, void* outbuf, size_t nread)
  877. {
  878. VALIDATE_USER_WRITE(outbuf, nread);
  879. #ifdef DEBUG_IO
  880. dbgprintf("%s(%u) sys$read(%d, %p, %u)\n", name().characters(), pid(), fd, outbuf, nread);
  881. #endif
  882. auto* descriptor = file_descriptor(fd);
  883. if (!descriptor)
  884. return -EBADF;
  885. if (descriptor->isBlocking()) {
  886. if (!descriptor->hasDataAvailableForRead()) {
  887. m_fdBlockedOnRead = fd;
  888. block(BlockedRead);
  889. sched_yield();
  890. if (m_was_interrupted_while_blocked)
  891. return -EINTR;
  892. }
  893. }
  894. nread = descriptor->read((byte*)outbuf, nread);
  895. #ifdef DEBUG_IO
  896. dbgprintf("%s(%u) Process::sys$read: nread=%u\n", name().characters(), pid(), nread);
  897. #endif
  898. return nread;
  899. }
  900. int Process::sys$close(int fd)
  901. {
  902. auto* descriptor = file_descriptor(fd);
  903. if (!descriptor)
  904. return -EBADF;
  905. int rc = descriptor->close();
  906. m_file_descriptors[fd] = nullptr;
  907. return rc;
  908. }
  909. int Process::sys$access(const char* pathname, int mode)
  910. {
  911. (void) mode;
  912. VALIDATE_USER_READ(pathname, strlen(pathname));
  913. ASSERT_NOT_REACHED();
  914. }
  915. int Process::sys$fcntl(int fd, int cmd, dword arg)
  916. {
  917. (void) cmd;
  918. (void) arg;
  919. dbgprintf("sys$fcntl: fd=%d, cmd=%d, arg=%u\n", fd, cmd, arg);
  920. auto* descriptor = file_descriptor(fd);
  921. if (!descriptor)
  922. return -EBADF;
  923. switch (cmd) {
  924. case F_GETFD:
  925. return descriptor->fd_flags();
  926. case F_SETFD:
  927. return descriptor->set_fd_flags(arg);
  928. case F_GETFL:
  929. return descriptor->file_flags();
  930. case F_SETFL:
  931. return descriptor->set_file_flags(arg);
  932. default:
  933. ASSERT_NOT_REACHED();
  934. }
  935. return 0;
  936. }
  937. int Process::sys$fstat(int fd, Unix::stat* statbuf)
  938. {
  939. VALIDATE_USER_WRITE(statbuf, sizeof(Unix::stat));
  940. auto* descriptor = file_descriptor(fd);
  941. if (!descriptor)
  942. return -EBADF;
  943. descriptor->stat(statbuf);
  944. return 0;
  945. }
  946. int Process::sys$lstat(const char* path, Unix::stat* statbuf)
  947. {
  948. VALIDATE_USER_WRITE(statbuf, sizeof(Unix::stat));
  949. int error;
  950. auto descriptor = VirtualFileSystem::the().open(move(path), error, O_NOFOLLOW_NOERROR, cwdInode());
  951. if (!descriptor)
  952. return error;
  953. descriptor->stat(statbuf);
  954. return 0;
  955. }
  956. int Process::sys$stat(const char* path, Unix::stat* statbuf)
  957. {
  958. VALIDATE_USER_WRITE(statbuf, sizeof(Unix::stat));
  959. int error;
  960. auto descriptor = VirtualFileSystem::the().open(move(path), error, 0, cwdInode());
  961. if (!descriptor)
  962. return error;
  963. descriptor->stat(statbuf);
  964. return 0;
  965. }
  966. int Process::sys$readlink(const char* path, char* buffer, size_t size)
  967. {
  968. VALIDATE_USER_READ(path, strlen(path));
  969. VALIDATE_USER_WRITE(buffer, size);
  970. int error;
  971. auto descriptor = VirtualFileSystem::the().open(path, error, O_RDONLY | O_NOFOLLOW_NOERROR, cwdInode());
  972. if (!descriptor)
  973. return error;
  974. if (!descriptor->metadata().isSymbolicLink())
  975. return -EINVAL;
  976. auto contents = descriptor->readEntireFile();
  977. if (!contents)
  978. return -EIO; // FIXME: Get a more detailed error from VFS.
  979. memcpy(buffer, contents.pointer(), min(size, contents.size()));
  980. if (contents.size() + 1 < size)
  981. buffer[contents.size()] = '\0';
  982. return 0;
  983. }
  984. int Process::sys$chdir(const char* path)
  985. {
  986. VALIDATE_USER_READ(path, strlen(path));
  987. int error;
  988. auto descriptor = VirtualFileSystem::the().open(path, error, 0, cwdInode());
  989. if (!descriptor)
  990. return error;
  991. if (!descriptor->isDirectory())
  992. return -ENOTDIR;
  993. m_cwd = descriptor->vnode();
  994. return 0;
  995. }
  996. int Process::sys$getcwd(char* buffer, size_t size)
  997. {
  998. VALIDATE_USER_WRITE(buffer, size);
  999. auto path = VirtualFileSystem::the().absolutePath(cwdInode());
  1000. if (path.isNull())
  1001. return -EINVAL;
  1002. if (size < path.length() + 1)
  1003. return -ERANGE;
  1004. strcpy(buffer, path.characters());
  1005. return 0;
  1006. }
  1007. size_t Process::number_of_open_file_descriptors() const
  1008. {
  1009. size_t count = 0;
  1010. for (auto& descriptor : m_file_descriptors) {
  1011. if (descriptor)
  1012. ++count;
  1013. }
  1014. return count;
  1015. }
  1016. int Process::sys$open(const char* path, int options)
  1017. {
  1018. #ifdef DEBUG_IO
  1019. dbgprintf("%s(%u) sys$open(\"%s\")\n", name().characters(), pid(), path);
  1020. #endif
  1021. VALIDATE_USER_READ(path, strlen(path));
  1022. if (number_of_open_file_descriptors() >= m_max_open_file_descriptors)
  1023. return -EMFILE;
  1024. int error;
  1025. auto descriptor = VirtualFileSystem::the().open(path, error, options, cwdInode());
  1026. if (!descriptor)
  1027. return error;
  1028. if (options & O_DIRECTORY && !descriptor->isDirectory())
  1029. return -ENOTDIR; // FIXME: This should be handled by VFS::open.
  1030. int fd = 0;
  1031. for (; fd < (int)m_max_open_file_descriptors; ++fd) {
  1032. if (!m_file_descriptors[fd])
  1033. break;
  1034. }
  1035. m_file_descriptors[fd] = move(descriptor);
  1036. return fd;
  1037. }
  1038. int Process::sys$pipe(int* pipefd)
  1039. {
  1040. VALIDATE_USER_WRITE(pipefd, sizeof(int) * 2);
  1041. ASSERT_NOT_REACHED();
  1042. }
  1043. int Process::sys$killpg(int pgrp, int signum)
  1044. {
  1045. if (signum < 1 || signum >= 32)
  1046. return -EINVAL;
  1047. (void) pgrp;
  1048. ASSERT_NOT_REACHED();
  1049. }
  1050. int Process::sys$setuid(uid_t)
  1051. {
  1052. ASSERT_NOT_REACHED();
  1053. }
  1054. int Process::sys$setgid(gid_t)
  1055. {
  1056. ASSERT_NOT_REACHED();
  1057. }
  1058. unsigned Process::sys$alarm(unsigned seconds)
  1059. {
  1060. (void) seconds;
  1061. ASSERT_NOT_REACHED();
  1062. }
  1063. int Process::sys$uname(utsname* buf)
  1064. {
  1065. VALIDATE_USER_WRITE(buf, sizeof(utsname));
  1066. strcpy(buf->sysname, "Serenity");
  1067. strcpy(buf->release, "1.0-dev");
  1068. strcpy(buf->version, "FIXME");
  1069. strcpy(buf->machine, "i386");
  1070. strcpy(buf->nodename, getHostname().characters());
  1071. return 0;
  1072. }
  1073. int Process::sys$isatty(int fd)
  1074. {
  1075. auto* descriptor = file_descriptor(fd);
  1076. if (!descriptor)
  1077. return -EBADF;
  1078. if (!descriptor->isTTY())
  1079. return -ENOTTY;
  1080. return 1;
  1081. }
  1082. int Process::sys$kill(pid_t pid, int signal)
  1083. {
  1084. if (pid == 0) {
  1085. // FIXME: Send to same-group processes.
  1086. ASSERT(pid != 0);
  1087. }
  1088. if (pid == -1) {
  1089. // FIXME: Send to all processes.
  1090. ASSERT(pid != -1);
  1091. }
  1092. ASSERT(pid != current->pid()); // FIXME: Support this scenario.
  1093. InterruptDisabler disabler;
  1094. auto* peer = Process::from_pid(pid);
  1095. if (!peer)
  1096. return -ESRCH;
  1097. peer->send_signal(signal, this);
  1098. return 0;
  1099. }
  1100. int Process::sys$sleep(unsigned seconds)
  1101. {
  1102. if (!seconds)
  1103. return 0;
  1104. sleep(seconds * TICKS_PER_SECOND);
  1105. if (m_wakeupTime > system.uptime) {
  1106. ASSERT(m_was_interrupted_while_blocked);
  1107. dword ticks_left_until_original_wakeup_time = m_wakeupTime - system.uptime;
  1108. return ticks_left_until_original_wakeup_time / TICKS_PER_SECOND;
  1109. }
  1110. return 0;
  1111. }
  1112. int Process::sys$gettimeofday(timeval* tv)
  1113. {
  1114. VALIDATE_USER_WRITE(tv, sizeof(tv));
  1115. InterruptDisabler disabler;
  1116. auto now = RTC::now();
  1117. tv->tv_sec = now;
  1118. tv->tv_usec = 0;
  1119. return 0;
  1120. }
  1121. uid_t Process::sys$getuid()
  1122. {
  1123. return m_uid;
  1124. }
  1125. gid_t Process::sys$getgid()
  1126. {
  1127. return m_gid;
  1128. }
  1129. uid_t Process::sys$geteuid()
  1130. {
  1131. return m_euid;
  1132. }
  1133. gid_t Process::sys$getegid()
  1134. {
  1135. return m_egid;
  1136. }
  1137. pid_t Process::sys$getpid()
  1138. {
  1139. return m_pid;
  1140. }
  1141. pid_t Process::sys$getppid()
  1142. {
  1143. return m_ppid;
  1144. }
  1145. mode_t Process::sys$umask(mode_t mask)
  1146. {
  1147. auto old_mask = m_umask;
  1148. m_umask = mask;
  1149. return old_mask;
  1150. }
  1151. void Process::reap(Process& process)
  1152. {
  1153. InterruptDisabler disabler;
  1154. dbgprintf("reap: %s(%u) {%s}\n", process.name().characters(), process.pid(), toString(process.state()));
  1155. ASSERT(process.state() == Dead);
  1156. g_processes->remove(&process);
  1157. delete &process;
  1158. }
  1159. pid_t Process::sys$waitpid(pid_t waitee, int* wstatus, int options)
  1160. {
  1161. //kprintf("sys$waitpid(%d, %p, %d)\n", waitee, wstatus, options);
  1162. // FIXME: Respect options
  1163. (void) options;
  1164. if (wstatus)
  1165. VALIDATE_USER_WRITE(wstatus, sizeof(int));
  1166. {
  1167. InterruptDisabler disabler;
  1168. if (waitee != -1 && !Process::from_pid(waitee))
  1169. return -ECHILD;
  1170. }
  1171. m_waitee = waitee;
  1172. m_waitee_status = 0;
  1173. block(BlockedWait);
  1174. sched_yield();
  1175. if (m_was_interrupted_while_blocked)
  1176. return -EINTR;
  1177. Process* waitee_process;
  1178. {
  1179. InterruptDisabler disabler;
  1180. // NOTE: If waitee was -1, m_waitee will have been filled in by the scheduler.
  1181. waitee_process = Process::from_pid(m_waitee);
  1182. }
  1183. ASSERT(waitee_process);
  1184. reap(*waitee_process);
  1185. if (wstatus)
  1186. *wstatus = m_waitee_status;
  1187. return m_waitee;
  1188. }
  1189. void Process::unblock()
  1190. {
  1191. ASSERT(m_state != Process::Runnable && m_state != Process::Running);
  1192. system.nblocked--;
  1193. m_state = Process::Runnable;
  1194. }
  1195. void Process::block(Process::State state)
  1196. {
  1197. ASSERT(current->state() == Process::Running);
  1198. system.nblocked++;
  1199. m_was_interrupted_while_blocked = false;
  1200. set_state(state);
  1201. }
  1202. void block(Process::State state)
  1203. {
  1204. current->block(state);
  1205. sched_yield();
  1206. }
  1207. void sleep(DWORD ticks)
  1208. {
  1209. ASSERT(current->state() == Process::Running);
  1210. current->setWakeupTime(system.uptime + ticks);
  1211. current->block(Process::BlockedSleep);
  1212. sched_yield();
  1213. }
  1214. bool Process::isValidAddressForKernel(LinearAddress laddr) const
  1215. {
  1216. // We check extra carefully here since the first 4MB of the address space is identity-mapped.
  1217. // This code allows access outside of the known used address ranges to get caught.
  1218. InterruptDisabler disabler;
  1219. if (laddr.get() >= ksyms().first().address && laddr.get() <= ksyms().last().address)
  1220. return true;
  1221. if (is_kmalloc_address((void*)laddr.get()))
  1222. return true;
  1223. return validate_user_read(laddr);
  1224. }
  1225. bool Process::validate_user_read(LinearAddress laddr) const
  1226. {
  1227. InterruptDisabler disabler;
  1228. return MM.validate_user_read(*this, laddr);
  1229. }
  1230. bool Process::validate_user_write(LinearAddress laddr) const
  1231. {
  1232. InterruptDisabler disabler;
  1233. return MM.validate_user_write(*this, laddr);
  1234. }
  1235. pid_t Process::sys$getsid(pid_t pid)
  1236. {
  1237. if (pid == 0)
  1238. return m_sid;
  1239. InterruptDisabler disabler;
  1240. auto* process = Process::from_pid(pid);
  1241. if (!process)
  1242. return -ESRCH;
  1243. if (m_sid != process->m_sid)
  1244. return -EPERM;
  1245. return process->m_sid;
  1246. }
  1247. pid_t Process::sys$setsid()
  1248. {
  1249. InterruptDisabler disabler;
  1250. bool found_process_with_same_pgid_as_my_pid = false;
  1251. Process::for_each_in_pgrp(pid(), [&] (auto&) {
  1252. found_process_with_same_pgid_as_my_pid = true;
  1253. return false;
  1254. });
  1255. if (found_process_with_same_pgid_as_my_pid)
  1256. return -EPERM;
  1257. m_sid = m_pid;
  1258. m_pgid = m_pid;
  1259. return m_sid;
  1260. }
  1261. pid_t Process::sys$getpgid(pid_t pid)
  1262. {
  1263. if (pid == 0)
  1264. return m_pgid;
  1265. InterruptDisabler disabler; // FIXME: Use a ProcessHandle
  1266. auto* process = Process::from_pid(pid);
  1267. if (!process)
  1268. return -ESRCH;
  1269. return process->m_pgid;
  1270. }
  1271. pid_t Process::sys$getpgrp()
  1272. {
  1273. return m_pgid;
  1274. }
  1275. static pid_t get_sid_from_pgid(pid_t pgid)
  1276. {
  1277. InterruptDisabler disabler;
  1278. auto* group_leader = Process::from_pid(pgid);
  1279. if (!group_leader)
  1280. return -1;
  1281. return group_leader->sid();
  1282. }
  1283. int Process::sys$setpgid(pid_t specified_pid, pid_t specified_pgid)
  1284. {
  1285. InterruptDisabler disabler; // FIXME: Use a ProcessHandle
  1286. pid_t pid = specified_pid ? specified_pid : m_pid;
  1287. if (specified_pgid < 0)
  1288. return -EINVAL;
  1289. auto* process = Process::from_pid(pid);
  1290. if (!process)
  1291. return -ESRCH;
  1292. pid_t new_pgid = specified_pgid ? specified_pgid : process->m_pid;
  1293. pid_t current_sid = get_sid_from_pgid(process->m_pgid);
  1294. pid_t new_sid = get_sid_from_pgid(new_pgid);
  1295. if (current_sid != new_sid) {
  1296. // Can't move a process between sessions.
  1297. return -EPERM;
  1298. }
  1299. // FIXME: There are more EPERM conditions to check for here..
  1300. process->m_pgid = new_pgid;
  1301. return 0;
  1302. }
  1303. int Process::sys$tcgetattr(int fd, Unix::termios* tp)
  1304. {
  1305. VALIDATE_USER_WRITE(tp, sizeof(Unix::termios));
  1306. auto* descriptor = file_descriptor(fd);
  1307. if (!descriptor)
  1308. return -EBADF;
  1309. if (!descriptor->isTTY())
  1310. return -ENOTTY;
  1311. auto& tty = *descriptor->tty();
  1312. #ifdef TERMIOS_DEBUG
  1313. kprintf("sys$tcgetattr(fd=%d, tp=%p)\n", fd, tp);
  1314. #endif
  1315. memcpy(tp, &tty.termios(), sizeof(Unix::termios));
  1316. return 0;
  1317. }
  1318. int Process::sys$tcsetattr(int fd, int optional_actions, const Unix::termios* tp)
  1319. {
  1320. (void) optional_actions;
  1321. VALIDATE_USER_READ(tp, sizeof(Unix::termios));
  1322. auto* descriptor = file_descriptor(fd);
  1323. if (!descriptor)
  1324. return -EBADF;
  1325. if (!descriptor->isTTY())
  1326. return -ENOTTY;
  1327. #ifdef TERMIOS_DEBUG
  1328. kprintf("sys$tcsetattr(fd=%d, tp=%p)\n", fd, tp);
  1329. #endif
  1330. auto& tty = *descriptor->tty();
  1331. memcpy(&tty.termios(), tp, sizeof(Unix::termios));
  1332. return 0;
  1333. }
  1334. pid_t Process::sys$tcgetpgrp(int fd)
  1335. {
  1336. auto* descriptor = file_descriptor(fd);
  1337. if (!descriptor)
  1338. return -EBADF;
  1339. if (!descriptor->isTTY())
  1340. return -ENOTTY;
  1341. auto& tty = *descriptor->tty();
  1342. if (&tty != m_tty)
  1343. return -ENOTTY;
  1344. return tty.pgid();
  1345. }
  1346. int Process::sys$tcsetpgrp(int fd, pid_t pgid)
  1347. {
  1348. if (pgid < 0)
  1349. return -EINVAL;
  1350. if (get_sid_from_pgid(pgid) != m_sid)
  1351. return -EINVAL;
  1352. auto* descriptor = file_descriptor(fd);
  1353. if (!descriptor)
  1354. return -EBADF;
  1355. if (!descriptor->isTTY())
  1356. return -ENOTTY;
  1357. auto& tty = *descriptor->tty();
  1358. if (&tty != m_tty)
  1359. return -ENOTTY;
  1360. tty.set_pgid(pgid);
  1361. return 0;
  1362. }
  1363. int Process::sys$getdtablesize()
  1364. {
  1365. return m_max_open_file_descriptors;
  1366. }
  1367. int Process::sys$dup(int old_fd)
  1368. {
  1369. auto* descriptor = file_descriptor(old_fd);
  1370. if (!descriptor)
  1371. return -EBADF;
  1372. if (number_of_open_file_descriptors() == m_max_open_file_descriptors)
  1373. return -EMFILE;
  1374. int new_fd = 0;
  1375. for (; new_fd < (int)m_max_open_file_descriptors; ++new_fd) {
  1376. if (!m_file_descriptors[new_fd])
  1377. break;
  1378. }
  1379. m_file_descriptors[new_fd] = descriptor;
  1380. return new_fd;
  1381. }
  1382. int Process::sys$dup2(int old_fd, int new_fd)
  1383. {
  1384. auto* descriptor = file_descriptor(old_fd);
  1385. if (!descriptor)
  1386. return -EBADF;
  1387. if (number_of_open_file_descriptors() == m_max_open_file_descriptors)
  1388. return -EMFILE;
  1389. m_file_descriptors[new_fd] = descriptor;
  1390. return new_fd;
  1391. }
  1392. Unix::sighandler_t Process::sys$signal(int signum, Unix::sighandler_t handler)
  1393. {
  1394. // FIXME: Fail with -EINVAL if attepmting to catch or ignore SIGKILL or SIGSTOP.
  1395. if (signum < 1 || signum >= 32)
  1396. return (Unix::sighandler_t)-EINVAL;
  1397. dbgprintf("sys$signal: %d => L%x\n", signum, handler);
  1398. return nullptr;
  1399. }
  1400. int Process::sys$sigprocmask(int how, const Unix::sigset_t* set, Unix::sigset_t* old_set)
  1401. {
  1402. if (old_set) {
  1403. VALIDATE_USER_READ(old_set, sizeof(Unix::sigset_t));
  1404. *old_set = m_signal_mask;
  1405. }
  1406. if (set) {
  1407. VALIDATE_USER_READ(set, sizeof(Unix::sigset_t));
  1408. switch (how) {
  1409. case SIG_BLOCK:
  1410. m_signal_mask &= ~(*set);
  1411. break;
  1412. case SIG_UNBLOCK:
  1413. m_signal_mask |= *set;
  1414. break;
  1415. case SIG_SETMASK:
  1416. m_signal_mask = *set;
  1417. break;
  1418. default:
  1419. return -EINVAL;
  1420. }
  1421. }
  1422. return 0;
  1423. }
  1424. int Process::sys$sigpending(Unix::sigset_t* set)
  1425. {
  1426. VALIDATE_USER_READ(set, sizeof(Unix::sigset_t));
  1427. *set = m_pending_signals;
  1428. return 0;
  1429. }
  1430. int Process::sys$sigaction(int signum, const Unix::sigaction* act, Unix::sigaction* old_act)
  1431. {
  1432. // FIXME: Fail with -EINVAL if attepmting to change action for SIGKILL or SIGSTOP.
  1433. if (signum < 1 || signum >= 32)
  1434. return -EINVAL;
  1435. VALIDATE_USER_READ(act, sizeof(Unix::sigaction));
  1436. InterruptDisabler disabler; // FIXME: This should use a narrower lock.
  1437. auto& action = m_signal_action_data[signum];
  1438. if (old_act) {
  1439. VALIDATE_USER_WRITE(old_act, sizeof(Unix::sigaction));
  1440. old_act->sa_flags = action.flags;
  1441. old_act->sa_restorer = (decltype(old_act->sa_restorer))action.restorer.get();
  1442. old_act->sa_sigaction = (decltype(old_act->sa_sigaction))action.handler_or_sigaction.get();
  1443. }
  1444. action.restorer = LinearAddress((dword)act->sa_restorer);
  1445. action.flags = act->sa_flags;
  1446. action.handler_or_sigaction = LinearAddress((dword)act->sa_sigaction);
  1447. return 0;
  1448. }
  1449. int Process::sys$getgroups(int count, gid_t* gids)
  1450. {
  1451. if (count < 0)
  1452. return -EINVAL;
  1453. ASSERT(m_gids.size() < MAX_PROCESS_GIDS);
  1454. if (!count)
  1455. return m_gids.size();
  1456. if (count != (int)m_gids.size())
  1457. return -EINVAL;
  1458. VALIDATE_USER_WRITE(gids, sizeof(gid_t) * count);
  1459. size_t i = 0;
  1460. for (auto gid : m_gids)
  1461. gids[i++] = gid;
  1462. return 0;
  1463. }
  1464. int Process::sys$setgroups(size_t count, const gid_t* gids)
  1465. {
  1466. if (!is_root())
  1467. return -EPERM;
  1468. if (count >= MAX_PROCESS_GIDS)
  1469. return -EINVAL;
  1470. VALIDATE_USER_READ(gids, sizeof(gid_t) * count);
  1471. m_gids.clear();
  1472. m_gids.set(m_gid);
  1473. for (size_t i = 0; i < count; ++i)
  1474. m_gids.set(gids[i]);
  1475. return 0;
  1476. }