Processor.cpp 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Format.h>
  7. #include <AK/StdLibExtras.h>
  8. #include <AK/String.h>
  9. #include <AK/Types.h>
  10. #include <Kernel/Interrupts/APIC.h>
  11. #include <Kernel/Process.h>
  12. #include <Kernel/Random.h>
  13. #include <Kernel/Sections.h>
  14. #include <Kernel/StdLib.h>
  15. #include <Kernel/Thread.h>
  16. #include <Kernel/VM/ProcessPagingScope.h>
  17. #include <Kernel/Arch/x86/CPUID.h>
  18. #include <Kernel/Arch/x86/Interrupts.h>
  19. #include <Kernel/Arch/x86/MSR.h>
  20. #include <Kernel/Arch/x86/Processor.h>
  21. #include <Kernel/Arch/x86/ProcessorInfo.h>
  22. #include <Kernel/Arch/x86/SafeMem.h>
  23. #include <Kernel/Arch/x86/ScopedCritical.h>
  24. #include <Kernel/Arch/x86/TrapFrame.h>
  25. namespace Kernel {
  26. READONLY_AFTER_INIT FPUState Processor::s_clean_fpu_state;
  27. READONLY_AFTER_INIT static ProcessorContainer s_processors {};
  28. READONLY_AFTER_INIT Atomic<u32> Processor::g_total_processors;
  29. static volatile bool s_smp_enabled;
  30. static Atomic<ProcessorMessage*> s_message_pool;
  31. Atomic<u32> Processor::s_idle_cpu_mask { 0 };
  32. extern "C" void thread_context_first_enter(void);
  33. extern "C" void exit_kernel_thread(void);
  34. // The compiler can't see the calls to these functions inside assembly.
  35. // Declare them, to avoid dead code warnings.
  36. extern "C" void context_first_init(Thread* from_thread, Thread* to_thread, TrapFrame* trap) __attribute__((used));
  37. extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread) __attribute__((used));
  38. extern "C" FlatPtr do_init_context(Thread* thread, u32 flags) __attribute__((used));
  39. UNMAP_AFTER_INIT static void sse_init()
  40. {
  41. write_cr0((read_cr0() & 0xfffffffbu) | 0x2);
  42. write_cr4(read_cr4() | 0x600);
  43. }
  44. void exit_kernel_thread(void)
  45. {
  46. Thread::current()->exit();
  47. }
  48. UNMAP_AFTER_INIT void Processor::cpu_detect()
  49. {
  50. // NOTE: This is called during Processor::early_initialize, we cannot
  51. // safely log at this point because we don't have kmalloc
  52. // initialized yet!
  53. auto set_feature =
  54. [&](CPUFeature f) {
  55. m_features = static_cast<CPUFeature>(static_cast<u32>(m_features) | static_cast<u32>(f));
  56. };
  57. m_features = static_cast<CPUFeature>(0);
  58. CPUID processor_info(0x1);
  59. if (processor_info.edx() & (1 << 4))
  60. set_feature(CPUFeature::TSC);
  61. if (processor_info.edx() & (1 << 6))
  62. set_feature(CPUFeature::PAE);
  63. if (processor_info.edx() & (1 << 13))
  64. set_feature(CPUFeature::PGE);
  65. if (processor_info.edx() & (1 << 23))
  66. set_feature(CPUFeature::MMX);
  67. if (processor_info.edx() & (1 << 24))
  68. set_feature(CPUFeature::FXSR);
  69. if (processor_info.edx() & (1 << 25))
  70. set_feature(CPUFeature::SSE);
  71. if (processor_info.edx() & (1 << 26))
  72. set_feature(CPUFeature::SSE2);
  73. if (processor_info.ecx() & (1 << 0))
  74. set_feature(CPUFeature::SSE3);
  75. if (processor_info.ecx() & (1 << 9))
  76. set_feature(CPUFeature::SSSE3);
  77. if (processor_info.ecx() & (1 << 19))
  78. set_feature(CPUFeature::SSE4_1);
  79. if (processor_info.ecx() & (1 << 20))
  80. set_feature(CPUFeature::SSE4_2);
  81. if (processor_info.ecx() & (1 << 26))
  82. set_feature(CPUFeature::XSAVE);
  83. if (processor_info.ecx() & (1 << 28))
  84. set_feature(CPUFeature::AVX);
  85. if (processor_info.ecx() & (1 << 30))
  86. set_feature(CPUFeature::RDRAND);
  87. if (processor_info.edx() & (1 << 11)) {
  88. u32 stepping = processor_info.eax() & 0xf;
  89. u32 model = (processor_info.eax() >> 4) & 0xf;
  90. u32 family = (processor_info.eax() >> 8) & 0xf;
  91. if (!(family == 6 && model < 3 && stepping < 3))
  92. set_feature(CPUFeature::SEP);
  93. if ((family == 6 && model >= 3) || (family == 0xf && model >= 0xe))
  94. set_feature(CPUFeature::CONSTANT_TSC);
  95. }
  96. u32 max_extended_leaf = CPUID(0x80000000).eax();
  97. if (max_extended_leaf >= 0x80000001) {
  98. CPUID extended_processor_info(0x80000001);
  99. if (extended_processor_info.edx() & (1 << 20))
  100. set_feature(CPUFeature::NX);
  101. if (extended_processor_info.edx() & (1 << 27))
  102. set_feature(CPUFeature::RDTSCP);
  103. if (extended_processor_info.edx() & (1 << 29))
  104. set_feature(CPUFeature::LM);
  105. if (extended_processor_info.edx() & (1 << 11)) {
  106. // Only available in 64 bit mode
  107. set_feature(CPUFeature::SYSCALL);
  108. }
  109. }
  110. if (max_extended_leaf >= 0x80000007) {
  111. CPUID cpuid(0x80000007);
  112. if (cpuid.edx() & (1 << 8)) {
  113. set_feature(CPUFeature::CONSTANT_TSC);
  114. set_feature(CPUFeature::NONSTOP_TSC);
  115. }
  116. }
  117. if (max_extended_leaf >= 0x80000008) {
  118. // CPUID.80000008H:EAX[7:0] reports the physical-address width supported by the processor.
  119. CPUID cpuid(0x80000008);
  120. m_physical_address_bit_width = cpuid.eax() & 0xff;
  121. } else {
  122. // For processors that do not support CPUID function 80000008H, the width is generally 36 if CPUID.01H:EDX.PAE [bit 6] = 1 and 32 otherwise.
  123. m_physical_address_bit_width = has_feature(CPUFeature::PAE) ? 36 : 32;
  124. }
  125. CPUID extended_features(0x7);
  126. if (extended_features.ebx() & (1 << 20))
  127. set_feature(CPUFeature::SMAP);
  128. if (extended_features.ebx() & (1 << 7))
  129. set_feature(CPUFeature::SMEP);
  130. if (extended_features.ecx() & (1 << 2))
  131. set_feature(CPUFeature::UMIP);
  132. if (extended_features.ebx() & (1 << 18))
  133. set_feature(CPUFeature::RDSEED);
  134. }
  135. UNMAP_AFTER_INIT void Processor::cpu_setup()
  136. {
  137. // NOTE: This is called during Processor::early_initialize, we cannot
  138. // safely log at this point because we don't have kmalloc
  139. // initialized yet!
  140. cpu_detect();
  141. if (has_feature(CPUFeature::SSE)) {
  142. // enter_thread_context() assumes that if a x86 CPU supports SSE then it also supports FXSR.
  143. // SSE support without FXSR is an extremely unlikely scenario, so let's be pragmatic about it.
  144. VERIFY(has_feature(CPUFeature::FXSR));
  145. sse_init();
  146. }
  147. write_cr0(read_cr0() | 0x00010000);
  148. if (has_feature(CPUFeature::PGE)) {
  149. // Turn on CR4.PGE so the CPU will respect the G bit in page tables.
  150. write_cr4(read_cr4() | 0x80);
  151. }
  152. if (has_feature(CPUFeature::NX)) {
  153. // Turn on IA32_EFER.NXE
  154. asm volatile(
  155. "movl $0xc0000080, %ecx\n"
  156. "rdmsr\n"
  157. "orl $0x800, %eax\n"
  158. "wrmsr\n");
  159. }
  160. if (has_feature(CPUFeature::SMEP)) {
  161. // Turn on CR4.SMEP
  162. write_cr4(read_cr4() | 0x100000);
  163. }
  164. if (has_feature(CPUFeature::SMAP)) {
  165. // Turn on CR4.SMAP
  166. write_cr4(read_cr4() | 0x200000);
  167. }
  168. if (has_feature(CPUFeature::UMIP)) {
  169. write_cr4(read_cr4() | 0x800);
  170. }
  171. if (has_feature(CPUFeature::TSC)) {
  172. write_cr4(read_cr4() | 0x4);
  173. }
  174. if (has_feature(CPUFeature::XSAVE)) {
  175. // Turn on CR4.OSXSAVE
  176. write_cr4(read_cr4() | 0x40000);
  177. // According to the Intel manual: "After reset, all bits (except bit 0) in XCR0 are cleared to zero; XCR0[0] is set to 1."
  178. // Sadly we can't trust this, for example VirtualBox starts with bits 0-4 set, so let's do it ourselves.
  179. write_xcr0(0x1);
  180. if (has_feature(CPUFeature::AVX)) {
  181. // Turn on SSE, AVX and x87 flags
  182. write_xcr0(read_xcr0() | 0x7);
  183. }
  184. }
  185. }
  186. String Processor::features_string() const
  187. {
  188. StringBuilder builder;
  189. auto feature_to_str =
  190. [](CPUFeature f) -> const char* {
  191. switch (f) {
  192. case CPUFeature::NX:
  193. return "nx";
  194. case CPUFeature::PAE:
  195. return "pae";
  196. case CPUFeature::PGE:
  197. return "pge";
  198. case CPUFeature::RDRAND:
  199. return "rdrand";
  200. case CPUFeature::RDSEED:
  201. return "rdseed";
  202. case CPUFeature::SMAP:
  203. return "smap";
  204. case CPUFeature::SMEP:
  205. return "smep";
  206. case CPUFeature::SSE:
  207. return "sse";
  208. case CPUFeature::TSC:
  209. return "tsc";
  210. case CPUFeature::RDTSCP:
  211. return "rdtscp";
  212. case CPUFeature::CONSTANT_TSC:
  213. return "constant_tsc";
  214. case CPUFeature::NONSTOP_TSC:
  215. return "nonstop_tsc";
  216. case CPUFeature::UMIP:
  217. return "umip";
  218. case CPUFeature::SEP:
  219. return "sep";
  220. case CPUFeature::SYSCALL:
  221. return "syscall";
  222. case CPUFeature::MMX:
  223. return "mmx";
  224. case CPUFeature::FXSR:
  225. return "fxsr";
  226. case CPUFeature::SSE2:
  227. return "sse2";
  228. case CPUFeature::SSE3:
  229. return "sse3";
  230. case CPUFeature::SSSE3:
  231. return "ssse3";
  232. case CPUFeature::SSE4_1:
  233. return "sse4.1";
  234. case CPUFeature::SSE4_2:
  235. return "sse4.2";
  236. case CPUFeature::XSAVE:
  237. return "xsave";
  238. case CPUFeature::AVX:
  239. return "avx";
  240. case CPUFeature::LM:
  241. return "lm";
  242. // no default statement here intentionally so that we get
  243. // a warning if a new feature is forgotten to be added here
  244. }
  245. // Shouldn't ever happen
  246. return "???";
  247. };
  248. bool first = true;
  249. for (u32 flag = 1; flag != 0; flag <<= 1) {
  250. if ((static_cast<u32>(m_features) & flag) != 0) {
  251. if (first)
  252. first = false;
  253. else
  254. builder.append(' ');
  255. auto str = feature_to_str(static_cast<CPUFeature>(flag));
  256. builder.append(str, strlen(str));
  257. }
  258. }
  259. return builder.build();
  260. }
  261. UNMAP_AFTER_INIT void Processor::early_initialize(u32 cpu)
  262. {
  263. m_self = this;
  264. m_cpu = cpu;
  265. m_in_irq = 0;
  266. m_in_critical = 0;
  267. m_invoke_scheduler_async = false;
  268. m_scheduler_initialized = false;
  269. m_message_queue = nullptr;
  270. m_idle_thread = nullptr;
  271. m_current_thread = nullptr;
  272. m_scheduler_data = nullptr;
  273. m_mm_data = nullptr;
  274. m_info = nullptr;
  275. m_halt_requested = false;
  276. if (cpu == 0) {
  277. s_smp_enabled = false;
  278. g_total_processors.store(1u, AK::MemoryOrder::memory_order_release);
  279. } else {
  280. g_total_processors.fetch_add(1u, AK::MemoryOrder::memory_order_acq_rel);
  281. }
  282. deferred_call_pool_init();
  283. cpu_setup();
  284. gdt_init();
  285. VERIFY(is_initialized()); // sanity check
  286. VERIFY(&current() == this); // sanity check
  287. }
  288. UNMAP_AFTER_INIT void Processor::initialize(u32 cpu)
  289. {
  290. VERIFY(m_self == this);
  291. VERIFY(&current() == this); // sanity check
  292. dmesgln("CPU[{}]: Supported features: {}", id(), features_string());
  293. if (!has_feature(CPUFeature::RDRAND))
  294. dmesgln("CPU[{}]: No RDRAND support detected, randomness will be poor", id());
  295. dmesgln("CPU[{}]: Physical address bit width: {}", id(), m_physical_address_bit_width);
  296. if (cpu == 0)
  297. idt_init();
  298. else
  299. flush_idt();
  300. if (cpu == 0) {
  301. VERIFY((FlatPtr(&s_clean_fpu_state) & 0xF) == 0);
  302. asm volatile("fninit");
  303. if (has_feature(CPUFeature::FXSR))
  304. asm volatile("fxsave %0"
  305. : "=m"(s_clean_fpu_state));
  306. else
  307. asm volatile("fnsave %0"
  308. : "=m"(s_clean_fpu_state));
  309. }
  310. m_info = new ProcessorInfo(*this);
  311. {
  312. // We need to prevent races between APs starting up at the same time
  313. VERIFY(cpu < s_processors.size());
  314. s_processors[cpu] = this;
  315. }
  316. }
  317. void Processor::write_raw_gdt_entry(u16 selector, u32 low, u32 high)
  318. {
  319. u16 i = (selector & 0xfffc) >> 3;
  320. u32 prev_gdt_length = m_gdt_length;
  321. if (i >= m_gdt_length) {
  322. m_gdt_length = i + 1;
  323. VERIFY(m_gdt_length <= sizeof(m_gdt) / sizeof(m_gdt[0]));
  324. m_gdtr.limit = (m_gdt_length + 1) * 8 - 1;
  325. }
  326. m_gdt[i].low = low;
  327. m_gdt[i].high = high;
  328. // clear selectors we may have skipped
  329. while (i < prev_gdt_length) {
  330. m_gdt[i].low = 0;
  331. m_gdt[i].high = 0;
  332. i++;
  333. }
  334. }
  335. void Processor::write_gdt_entry(u16 selector, Descriptor& descriptor)
  336. {
  337. write_raw_gdt_entry(selector, descriptor.low, descriptor.high);
  338. }
  339. Descriptor& Processor::get_gdt_entry(u16 selector)
  340. {
  341. u16 i = (selector & 0xfffc) >> 3;
  342. return *(Descriptor*)(&m_gdt[i]);
  343. }
  344. void Processor::flush_gdt()
  345. {
  346. m_gdtr.address = m_gdt;
  347. m_gdtr.limit = (m_gdt_length * 8) - 1;
  348. asm volatile("lgdt %0" ::"m"(m_gdtr)
  349. : "memory");
  350. }
  351. const DescriptorTablePointer& Processor::get_gdtr()
  352. {
  353. return m_gdtr;
  354. }
  355. Vector<FlatPtr> Processor::capture_stack_trace(Thread& thread, size_t max_frames)
  356. {
  357. FlatPtr frame_ptr = 0, eip = 0;
  358. Vector<FlatPtr, 32> stack_trace;
  359. auto walk_stack = [&](FlatPtr stack_ptr) {
  360. static constexpr size_t max_stack_frames = 4096;
  361. stack_trace.append(eip);
  362. size_t count = 1;
  363. while (stack_ptr && stack_trace.size() < max_stack_frames) {
  364. FlatPtr retaddr;
  365. count++;
  366. if (max_frames != 0 && count > max_frames)
  367. break;
  368. if (is_user_range(VirtualAddress(stack_ptr), sizeof(FlatPtr) * 2)) {
  369. if (!copy_from_user(&retaddr, &((FlatPtr*)stack_ptr)[1]) || !retaddr)
  370. break;
  371. stack_trace.append(retaddr);
  372. if (!copy_from_user(&stack_ptr, (FlatPtr*)stack_ptr))
  373. break;
  374. } else {
  375. void* fault_at;
  376. if (!safe_memcpy(&retaddr, &((FlatPtr*)stack_ptr)[1], sizeof(FlatPtr), fault_at) || !retaddr)
  377. break;
  378. stack_trace.append(retaddr);
  379. if (!safe_memcpy(&stack_ptr, (FlatPtr*)stack_ptr, sizeof(FlatPtr), fault_at))
  380. break;
  381. }
  382. }
  383. };
  384. auto capture_current_thread = [&]() {
  385. frame_ptr = (FlatPtr)__builtin_frame_address(0);
  386. eip = (FlatPtr)__builtin_return_address(0);
  387. walk_stack(frame_ptr);
  388. };
  389. // Since the thread may be running on another processor, there
  390. // is a chance a context switch may happen while we're trying
  391. // to get it. It also won't be entirely accurate and merely
  392. // reflect the status at the last context switch.
  393. ScopedSpinLock lock(g_scheduler_lock);
  394. if (&thread == Processor::current_thread()) {
  395. VERIFY(thread.state() == Thread::Running);
  396. // Leave the scheduler lock. If we trigger page faults we may
  397. // need to be preempted. Since this is our own thread it won't
  398. // cause any problems as the stack won't change below this frame.
  399. lock.unlock();
  400. capture_current_thread();
  401. } else if (thread.is_active()) {
  402. VERIFY(thread.cpu() != Processor::id());
  403. // If this is the case, the thread is currently running
  404. // on another processor. We can't trust the kernel stack as
  405. // it may be changing at any time. We need to probably send
  406. // an IPI to that processor, have it walk the stack and wait
  407. // until it returns the data back to us
  408. auto& proc = Processor::current();
  409. smp_unicast(
  410. thread.cpu(),
  411. [&]() {
  412. dbgln("CPU[{}] getting stack for cpu #{}", Processor::id(), proc.get_id());
  413. ProcessPagingScope paging_scope(thread.process());
  414. VERIFY(&Processor::current() != &proc);
  415. VERIFY(&thread == Processor::current_thread());
  416. // NOTE: Because the other processor is still holding the
  417. // scheduler lock while waiting for this callback to finish,
  418. // the current thread on the target processor cannot change
  419. // TODO: What to do about page faults here? We might deadlock
  420. // because the other processor is still holding the
  421. // scheduler lock...
  422. capture_current_thread();
  423. },
  424. false);
  425. } else {
  426. switch (thread.state()) {
  427. case Thread::Running:
  428. VERIFY_NOT_REACHED(); // should have been handled above
  429. case Thread::Runnable:
  430. case Thread::Stopped:
  431. case Thread::Blocked:
  432. case Thread::Dying:
  433. case Thread::Dead: {
  434. // We need to retrieve ebp from what was last pushed to the kernel
  435. // stack. Before switching out of that thread, it switch_context
  436. // pushed the callee-saved registers, and the last of them happens
  437. // to be ebp.
  438. ProcessPagingScope paging_scope(thread.process());
  439. #if ARCH(I386)
  440. auto& regs = thread.regs();
  441. u32* stack_top;
  442. stack_top = reinterpret_cast<u32*>(regs.esp);
  443. if (is_user_range(VirtualAddress(stack_top), sizeof(FlatPtr))) {
  444. if (!copy_from_user(&frame_ptr, &((FlatPtr*)stack_top)[0]))
  445. frame_ptr = 0;
  446. } else {
  447. void* fault_at;
  448. if (!safe_memcpy(&frame_ptr, &((FlatPtr*)stack_top)[0], sizeof(FlatPtr), fault_at))
  449. frame_ptr = 0;
  450. }
  451. eip = regs.eip;
  452. #else
  453. TODO();
  454. #endif
  455. // TODO: We need to leave the scheduler lock here, but we also
  456. // need to prevent the target thread from being run while
  457. // we walk the stack
  458. lock.unlock();
  459. walk_stack(frame_ptr);
  460. break;
  461. }
  462. default:
  463. dbgln("Cannot capture stack trace for thread {} in state {}", thread, thread.state_string());
  464. break;
  465. }
  466. }
  467. return stack_trace;
  468. }
  469. ProcessorContainer& Processor::processors()
  470. {
  471. return s_processors;
  472. }
  473. Processor& Processor::by_id(u32 cpu)
  474. {
  475. // s_processors does not need to be protected by a lock of any kind.
  476. // It is populated early in the boot process, and the BSP is waiting
  477. // for all APs to finish, after which this array never gets modified
  478. // again, so it's safe to not protect access to it here
  479. auto& procs = processors();
  480. VERIFY(procs[cpu] != nullptr);
  481. VERIFY(procs.size() > cpu);
  482. return *procs[cpu];
  483. }
  484. void Processor::enter_trap(TrapFrame& trap, bool raise_irq)
  485. {
  486. VERIFY_INTERRUPTS_DISABLED();
  487. VERIFY(&Processor::current() == this);
  488. trap.prev_irq_level = m_in_irq;
  489. if (raise_irq)
  490. m_in_irq++;
  491. auto* current_thread = Processor::current_thread();
  492. if (current_thread) {
  493. auto& current_trap = current_thread->current_trap();
  494. trap.next_trap = current_trap;
  495. current_trap = &trap;
  496. // The cs register of this trap tells us where we will return back to
  497. current_thread->set_previous_mode(((trap.regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode);
  498. } else {
  499. trap.next_trap = nullptr;
  500. }
  501. }
  502. void Processor::exit_trap(TrapFrame& trap)
  503. {
  504. VERIFY_INTERRUPTS_DISABLED();
  505. VERIFY(&Processor::current() == this);
  506. VERIFY(m_in_irq >= trap.prev_irq_level);
  507. m_in_irq = trap.prev_irq_level;
  508. smp_process_pending_messages();
  509. if (!m_in_irq && !m_in_critical)
  510. check_invoke_scheduler();
  511. auto* current_thread = Processor::current_thread();
  512. if (current_thread) {
  513. auto& current_trap = current_thread->current_trap();
  514. current_trap = trap.next_trap;
  515. if (current_trap) {
  516. VERIFY(current_trap->regs);
  517. // If we have another higher level trap then we probably returned
  518. // from an interrupt or irq handler. The cs register of the
  519. // new/higher level trap tells us what the mode prior to it was
  520. current_thread->set_previous_mode(((current_trap->regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode);
  521. } else {
  522. // If we don't have a higher level trap then we're back in user mode.
  523. // Unless we're a kernel process, in which case we're always in kernel mode
  524. current_thread->set_previous_mode(current_thread->process().is_kernel_process() ? Thread::PreviousMode::KernelMode : Thread::PreviousMode::UserMode);
  525. }
  526. }
  527. }
  528. void Processor::check_invoke_scheduler()
  529. {
  530. VERIFY(!m_in_irq);
  531. VERIFY(!m_in_critical);
  532. if (m_invoke_scheduler_async && m_scheduler_initialized) {
  533. m_invoke_scheduler_async = false;
  534. Scheduler::invoke_async();
  535. }
  536. }
  537. void Processor::flush_tlb_local(VirtualAddress vaddr, size_t page_count)
  538. {
  539. auto ptr = vaddr.as_ptr();
  540. while (page_count > 0) {
  541. // clang-format off
  542. asm volatile("invlpg %0"
  543. :
  544. : "m"(*ptr)
  545. : "memory");
  546. // clang-format on
  547. ptr += PAGE_SIZE;
  548. page_count--;
  549. }
  550. }
  551. void Processor::flush_tlb(const PageDirectory* page_directory, VirtualAddress vaddr, size_t page_count)
  552. {
  553. if (s_smp_enabled && (!is_user_address(vaddr) || Process::current()->thread_count() > 1))
  554. smp_broadcast_flush_tlb(page_directory, vaddr, page_count);
  555. else
  556. flush_tlb_local(vaddr, page_count);
  557. }
  558. void Processor::smp_return_to_pool(ProcessorMessage& msg)
  559. {
  560. ProcessorMessage* next = nullptr;
  561. do {
  562. msg.next = next;
  563. } while (s_message_pool.compare_exchange_strong(next, &msg, AK::MemoryOrder::memory_order_acq_rel));
  564. }
  565. ProcessorMessage& Processor::smp_get_from_pool()
  566. {
  567. ProcessorMessage* msg;
  568. // The assumption is that messages are never removed from the pool!
  569. for (;;) {
  570. msg = s_message_pool.load(AK::MemoryOrder::memory_order_consume);
  571. if (!msg) {
  572. if (!Processor::current().smp_process_pending_messages()) {
  573. // TODO: pause for a bit?
  574. }
  575. continue;
  576. }
  577. // If another processor were to use this message in the meanwhile,
  578. // "msg" is still valid (because it never gets freed). We'd detect
  579. // this because the expected value "msg" and pool would
  580. // no longer match, and the compare_exchange will fail. But accessing
  581. // "msg->next" is always safe here.
  582. if (s_message_pool.compare_exchange_strong(msg, msg->next, AK::MemoryOrder::memory_order_acq_rel)) {
  583. // We successfully "popped" this available message
  584. break;
  585. }
  586. }
  587. VERIFY(msg != nullptr);
  588. return *msg;
  589. }
  590. u32 Processor::smp_wake_n_idle_processors(u32 wake_count)
  591. {
  592. VERIFY(Processor::current().in_critical());
  593. VERIFY(wake_count > 0);
  594. if (!s_smp_enabled)
  595. return 0;
  596. // Wake at most N - 1 processors
  597. if (wake_count >= Processor::count()) {
  598. wake_count = Processor::count() - 1;
  599. VERIFY(wake_count > 0);
  600. }
  601. u32 current_id = Processor::current().id();
  602. u32 did_wake_count = 0;
  603. auto& apic = APIC::the();
  604. while (did_wake_count < wake_count) {
  605. // Try to get a set of idle CPUs and flip them to busy
  606. u32 idle_mask = s_idle_cpu_mask.load(AK::MemoryOrder::memory_order_relaxed) & ~(1u << current_id);
  607. u32 idle_count = __builtin_popcountl(idle_mask);
  608. if (idle_count == 0)
  609. break; // No (more) idle processor available
  610. u32 found_mask = 0;
  611. for (u32 i = 0; i < idle_count; i++) {
  612. u32 cpu = __builtin_ffsl(idle_mask) - 1;
  613. idle_mask &= ~(1u << cpu);
  614. found_mask |= 1u << cpu;
  615. }
  616. idle_mask = s_idle_cpu_mask.fetch_and(~found_mask, AK::MemoryOrder::memory_order_acq_rel) & found_mask;
  617. if (idle_mask == 0)
  618. continue; // All of them were flipped to busy, try again
  619. idle_count = __builtin_popcountl(idle_mask);
  620. for (u32 i = 0; i < idle_count; i++) {
  621. u32 cpu = __builtin_ffsl(idle_mask) - 1;
  622. idle_mask &= ~(1u << cpu);
  623. // Send an IPI to that CPU to wake it up. There is a possibility
  624. // someone else woke it up as well, or that it woke up due to
  625. // a timer interrupt. But we tried hard to avoid this...
  626. apic.send_ipi(cpu);
  627. did_wake_count++;
  628. }
  629. }
  630. return did_wake_count;
  631. }
  632. UNMAP_AFTER_INIT void Processor::smp_enable()
  633. {
  634. size_t msg_pool_size = Processor::count() * 100u;
  635. size_t msg_entries_cnt = Processor::count();
  636. auto msgs = new ProcessorMessage[msg_pool_size];
  637. auto msg_entries = new ProcessorMessageEntry[msg_pool_size * msg_entries_cnt];
  638. size_t msg_entry_i = 0;
  639. for (size_t i = 0; i < msg_pool_size; i++, msg_entry_i += msg_entries_cnt) {
  640. auto& msg = msgs[i];
  641. msg.next = i < msg_pool_size - 1 ? &msgs[i + 1] : nullptr;
  642. msg.per_proc_entries = &msg_entries[msg_entry_i];
  643. for (size_t k = 0; k < msg_entries_cnt; k++)
  644. msg_entries[msg_entry_i + k].msg = &msg;
  645. }
  646. s_message_pool.store(&msgs[0], AK::MemoryOrder::memory_order_release);
  647. // Start sending IPI messages
  648. s_smp_enabled = true;
  649. }
  650. void Processor::smp_cleanup_message(ProcessorMessage& msg)
  651. {
  652. switch (msg.type) {
  653. case ProcessorMessage::Callback:
  654. msg.callback_value().~Function();
  655. break;
  656. default:
  657. break;
  658. }
  659. }
  660. bool Processor::smp_process_pending_messages()
  661. {
  662. bool did_process = false;
  663. u32 prev_flags;
  664. enter_critical(prev_flags);
  665. if (auto pending_msgs = m_message_queue.exchange(nullptr, AK::MemoryOrder::memory_order_acq_rel)) {
  666. // We pulled the stack of pending messages in LIFO order, so we need to reverse the list first
  667. auto reverse_list =
  668. [](ProcessorMessageEntry* list) -> ProcessorMessageEntry* {
  669. ProcessorMessageEntry* rev_list = nullptr;
  670. while (list) {
  671. auto next = list->next;
  672. list->next = rev_list;
  673. rev_list = list;
  674. list = next;
  675. }
  676. return rev_list;
  677. };
  678. pending_msgs = reverse_list(pending_msgs);
  679. // now process in the right order
  680. ProcessorMessageEntry* next_msg;
  681. for (auto cur_msg = pending_msgs; cur_msg; cur_msg = next_msg) {
  682. next_msg = cur_msg->next;
  683. auto msg = cur_msg->msg;
  684. dbgln_if(SMP_DEBUG, "SMP[{}]: Processing message {}", id(), VirtualAddress(msg));
  685. switch (msg->type) {
  686. case ProcessorMessage::Callback:
  687. msg->invoke_callback();
  688. break;
  689. case ProcessorMessage::FlushTlb:
  690. if (is_user_address(VirtualAddress(msg->flush_tlb.ptr))) {
  691. // We assume that we don't cross into kernel land!
  692. VERIFY(is_user_range(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count * PAGE_SIZE));
  693. if (read_cr3() != msg->flush_tlb.page_directory->cr3()) {
  694. // This processor isn't using this page directory right now, we can ignore this request
  695. dbgln_if(SMP_DEBUG, "SMP[{}]: No need to flush {} pages at {}", id(), msg->flush_tlb.page_count, VirtualAddress(msg->flush_tlb.ptr));
  696. break;
  697. }
  698. }
  699. flush_tlb_local(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count);
  700. break;
  701. }
  702. bool is_async = msg->async; // Need to cache this value *before* dropping the ref count!
  703. auto prev_refs = msg->refs.fetch_sub(1u, AK::MemoryOrder::memory_order_acq_rel);
  704. VERIFY(prev_refs != 0);
  705. if (prev_refs == 1) {
  706. // All processors handled this. If this is an async message,
  707. // we need to clean it up and return it to the pool
  708. if (is_async) {
  709. smp_cleanup_message(*msg);
  710. smp_return_to_pool(*msg);
  711. }
  712. }
  713. if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed))
  714. halt_this();
  715. }
  716. did_process = true;
  717. } else if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed)) {
  718. halt_this();
  719. }
  720. leave_critical(prev_flags);
  721. return did_process;
  722. }
  723. bool Processor::smp_queue_message(ProcessorMessage& msg)
  724. {
  725. // Note that it's quite possible that the other processor may pop
  726. // the queue at any given time. We rely on the fact that the messages
  727. // are pooled and never get freed!
  728. auto& msg_entry = msg.per_proc_entries[id()];
  729. VERIFY(msg_entry.msg == &msg);
  730. ProcessorMessageEntry* next = nullptr;
  731. do {
  732. msg_entry.next = next;
  733. } while (m_message_queue.compare_exchange_strong(next, &msg_entry, AK::MemoryOrder::memory_order_acq_rel));
  734. return next == nullptr;
  735. }
  736. void Processor::smp_broadcast_message(ProcessorMessage& msg)
  737. {
  738. auto& cur_proc = Processor::current();
  739. dbgln_if(SMP_DEBUG, "SMP[{}]: Broadcast message {} to cpus: {} proc: {}", cur_proc.get_id(), VirtualAddress(&msg), count(), VirtualAddress(&cur_proc));
  740. msg.refs.store(count() - 1, AK::MemoryOrder::memory_order_release);
  741. VERIFY(msg.refs > 0);
  742. bool need_broadcast = false;
  743. for_each(
  744. [&](Processor& proc) {
  745. if (&proc != &cur_proc) {
  746. if (proc.smp_queue_message(msg))
  747. need_broadcast = true;
  748. }
  749. });
  750. // Now trigger an IPI on all other APs (unless all targets already had messages queued)
  751. if (need_broadcast)
  752. APIC::the().broadcast_ipi();
  753. }
  754. void Processor::smp_broadcast_wait_sync(ProcessorMessage& msg)
  755. {
  756. auto& cur_proc = Processor::current();
  757. VERIFY(!msg.async);
  758. // If synchronous then we must cleanup and return the message back
  759. // to the pool. Otherwise, the last processor to complete it will return it
  760. while (msg.refs.load(AK::MemoryOrder::memory_order_consume) != 0) {
  761. // TODO: pause for a bit?
  762. // We need to process any messages that may have been sent to
  763. // us while we're waiting. This also checks if another processor
  764. // may have requested us to halt.
  765. cur_proc.smp_process_pending_messages();
  766. }
  767. smp_cleanup_message(msg);
  768. smp_return_to_pool(msg);
  769. }
  770. void Processor::smp_broadcast(Function<void()> callback, bool async)
  771. {
  772. auto& msg = smp_get_from_pool();
  773. msg.async = async;
  774. msg.type = ProcessorMessage::Callback;
  775. new (msg.callback_storage) ProcessorMessage::CallbackFunction(move(callback));
  776. smp_broadcast_message(msg);
  777. if (!async)
  778. smp_broadcast_wait_sync(msg);
  779. }
  780. void Processor::smp_unicast_message(u32 cpu, ProcessorMessage& msg, bool async)
  781. {
  782. auto& cur_proc = Processor::current();
  783. VERIFY(cpu != cur_proc.get_id());
  784. auto& target_proc = processors()[cpu];
  785. msg.async = async;
  786. dbgln_if(SMP_DEBUG, "SMP[{}]: Send message {} to cpu #{} proc: {}", cur_proc.get_id(), VirtualAddress(&msg), cpu, VirtualAddress(&target_proc));
  787. msg.refs.store(1u, AK::MemoryOrder::memory_order_release);
  788. if (target_proc->smp_queue_message(msg)) {
  789. APIC::the().send_ipi(cpu);
  790. }
  791. if (!async) {
  792. // If synchronous then we must cleanup and return the message back
  793. // to the pool. Otherwise, the last processor to complete it will return it
  794. while (msg.refs.load(AK::MemoryOrder::memory_order_consume) != 0) {
  795. // TODO: pause for a bit?
  796. // We need to process any messages that may have been sent to
  797. // us while we're waiting. This also checks if another processor
  798. // may have requested us to halt.
  799. cur_proc.smp_process_pending_messages();
  800. }
  801. smp_cleanup_message(msg);
  802. smp_return_to_pool(msg);
  803. }
  804. }
  805. void Processor::smp_unicast(u32 cpu, Function<void()> callback, bool async)
  806. {
  807. auto& msg = smp_get_from_pool();
  808. msg.type = ProcessorMessage::Callback;
  809. new (msg.callback_storage) ProcessorMessage::CallbackFunction(move(callback));
  810. smp_unicast_message(cpu, msg, async);
  811. }
  812. void Processor::smp_broadcast_flush_tlb(const PageDirectory* page_directory, VirtualAddress vaddr, size_t page_count)
  813. {
  814. auto& msg = smp_get_from_pool();
  815. msg.async = false;
  816. msg.type = ProcessorMessage::FlushTlb;
  817. msg.flush_tlb.page_directory = page_directory;
  818. msg.flush_tlb.ptr = vaddr.as_ptr();
  819. msg.flush_tlb.page_count = page_count;
  820. smp_broadcast_message(msg);
  821. // While the other processors handle this request, we'll flush ours
  822. flush_tlb_local(vaddr, page_count);
  823. // Now wait until everybody is done as well
  824. smp_broadcast_wait_sync(msg);
  825. }
  826. void Processor::smp_broadcast_halt()
  827. {
  828. // We don't want to use a message, because this could have been triggered
  829. // by being out of memory and we might not be able to get a message
  830. for_each(
  831. [&](Processor& proc) {
  832. proc.m_halt_requested.store(true, AK::MemoryOrder::memory_order_release);
  833. });
  834. // Now trigger an IPI on all other APs
  835. APIC::the().broadcast_ipi();
  836. }
  837. void Processor::Processor::halt()
  838. {
  839. if (s_smp_enabled)
  840. smp_broadcast_halt();
  841. halt_this();
  842. }
  843. UNMAP_AFTER_INIT void Processor::deferred_call_pool_init()
  844. {
  845. size_t pool_count = sizeof(m_deferred_call_pool) / sizeof(m_deferred_call_pool[0]);
  846. for (size_t i = 0; i < pool_count; i++) {
  847. auto& entry = m_deferred_call_pool[i];
  848. entry.next = i < pool_count - 1 ? &m_deferred_call_pool[i + 1] : nullptr;
  849. new (entry.handler_storage) DeferredCallEntry::HandlerFunction;
  850. entry.was_allocated = false;
  851. }
  852. m_pending_deferred_calls = nullptr;
  853. m_free_deferred_call_pool_entry = &m_deferred_call_pool[0];
  854. }
  855. void Processor::deferred_call_return_to_pool(DeferredCallEntry* entry)
  856. {
  857. VERIFY(m_in_critical);
  858. VERIFY(!entry->was_allocated);
  859. entry->handler_value() = {};
  860. entry->next = m_free_deferred_call_pool_entry;
  861. m_free_deferred_call_pool_entry = entry;
  862. }
  863. DeferredCallEntry* Processor::deferred_call_get_free()
  864. {
  865. VERIFY(m_in_critical);
  866. if (m_free_deferred_call_pool_entry) {
  867. // Fast path, we have an entry in our pool
  868. auto* entry = m_free_deferred_call_pool_entry;
  869. m_free_deferred_call_pool_entry = entry->next;
  870. VERIFY(!entry->was_allocated);
  871. return entry;
  872. }
  873. auto* entry = new DeferredCallEntry;
  874. new (entry->handler_storage) DeferredCallEntry::HandlerFunction;
  875. entry->was_allocated = true;
  876. return entry;
  877. }
  878. void Processor::deferred_call_execute_pending()
  879. {
  880. VERIFY(m_in_critical);
  881. if (!m_pending_deferred_calls)
  882. return;
  883. auto* pending_list = m_pending_deferred_calls;
  884. m_pending_deferred_calls = nullptr;
  885. // We pulled the stack of pending deferred calls in LIFO order, so we need to reverse the list first
  886. auto reverse_list =
  887. [](DeferredCallEntry* list) -> DeferredCallEntry* {
  888. DeferredCallEntry* rev_list = nullptr;
  889. while (list) {
  890. auto next = list->next;
  891. list->next = rev_list;
  892. rev_list = list;
  893. list = next;
  894. }
  895. return rev_list;
  896. };
  897. pending_list = reverse_list(pending_list);
  898. do {
  899. pending_list->invoke_handler();
  900. // Return the entry back to the pool, or free it
  901. auto* next = pending_list->next;
  902. if (pending_list->was_allocated) {
  903. pending_list->handler_value().~Function();
  904. delete pending_list;
  905. } else
  906. deferred_call_return_to_pool(pending_list);
  907. pending_list = next;
  908. } while (pending_list);
  909. }
  910. void Processor::deferred_call_queue_entry(DeferredCallEntry* entry)
  911. {
  912. VERIFY(m_in_critical);
  913. entry->next = m_pending_deferred_calls;
  914. m_pending_deferred_calls = entry;
  915. }
  916. void Processor::deferred_call_queue(Function<void()> callback)
  917. {
  918. // NOTE: If we are called outside of a critical section and outside
  919. // of an irq handler, the function will be executed before we return!
  920. ScopedCritical critical;
  921. auto& cur_proc = Processor::current();
  922. auto* entry = cur_proc.deferred_call_get_free();
  923. entry->handler_value() = move(callback);
  924. cur_proc.deferred_call_queue_entry(entry);
  925. }
  926. UNMAP_AFTER_INIT void Processor::gdt_init()
  927. {
  928. m_gdt_length = 0;
  929. m_gdtr.address = nullptr;
  930. m_gdtr.limit = 0;
  931. write_raw_gdt_entry(0x0000, 0x00000000, 0x00000000);
  932. #if ARCH(I386)
  933. write_raw_gdt_entry(GDT_SELECTOR_CODE0, 0x0000ffff, 0x00cf9a00); // code0
  934. write_raw_gdt_entry(GDT_SELECTOR_DATA0, 0x0000ffff, 0x00cf9200); // data0
  935. write_raw_gdt_entry(GDT_SELECTOR_CODE3, 0x0000ffff, 0x00cffa00); // code3
  936. write_raw_gdt_entry(GDT_SELECTOR_DATA3, 0x0000ffff, 0x00cff200); // data3
  937. #else
  938. write_raw_gdt_entry(GDT_SELECTOR_CODE0, 0x0000ffff, 0x00af9a00); // code0
  939. write_raw_gdt_entry(GDT_SELECTOR_CODE3, 0x0000ffff, 0x00affa00); // code3
  940. write_raw_gdt_entry(GDT_SELECTOR_DATA3, 0x0000ffff, 0x008ff200); // data3
  941. #endif
  942. #if ARCH(I386)
  943. Descriptor tls_descriptor {};
  944. tls_descriptor.low = tls_descriptor.high = 0;
  945. tls_descriptor.dpl = 3;
  946. tls_descriptor.segment_present = 1;
  947. tls_descriptor.granularity = 0;
  948. tls_descriptor.operation_size64 = 0;
  949. tls_descriptor.operation_size32 = 1;
  950. tls_descriptor.descriptor_type = 1;
  951. tls_descriptor.type = 2;
  952. write_gdt_entry(GDT_SELECTOR_TLS, tls_descriptor); // tls3
  953. Descriptor fs_descriptor {};
  954. fs_descriptor.set_base(VirtualAddress { this });
  955. fs_descriptor.set_limit(sizeof(Processor) - 1);
  956. fs_descriptor.dpl = 0;
  957. fs_descriptor.segment_present = 1;
  958. fs_descriptor.granularity = 0;
  959. fs_descriptor.operation_size64 = 0;
  960. fs_descriptor.operation_size32 = 1;
  961. fs_descriptor.descriptor_type = 1;
  962. fs_descriptor.type = 2;
  963. write_gdt_entry(GDT_SELECTOR_PROC, fs_descriptor); // fs0
  964. #endif
  965. Descriptor tss_descriptor {};
  966. tss_descriptor.set_base(VirtualAddress { (size_t)&m_tss & 0xffffffff });
  967. tss_descriptor.set_limit(sizeof(TSS) - 1);
  968. tss_descriptor.dpl = 0;
  969. tss_descriptor.segment_present = 1;
  970. tss_descriptor.granularity = 0;
  971. tss_descriptor.operation_size64 = 0;
  972. tss_descriptor.operation_size32 = 1;
  973. tss_descriptor.descriptor_type = 0;
  974. tss_descriptor.type = 9;
  975. write_gdt_entry(GDT_SELECTOR_TSS, tss_descriptor); // tss
  976. #if ARCH(X86_64)
  977. Descriptor tss_descriptor_part2 {};
  978. tss_descriptor_part2.low = (size_t)&m_tss >> 32;
  979. write_gdt_entry(GDT_SELECTOR_TSS_PART2, tss_descriptor_part2);
  980. #endif
  981. flush_gdt();
  982. load_task_register(GDT_SELECTOR_TSS);
  983. #if ARCH(X86_64)
  984. MSR fs_base(MSR_FS_BASE);
  985. fs_base.set((size_t)this & 0xffffffff, (size_t)this >> 32);
  986. #else
  987. asm volatile(
  988. "mov %%ax, %%ds\n"
  989. "mov %%ax, %%es\n"
  990. "mov %%ax, %%gs\n"
  991. "mov %%ax, %%ss\n" ::"a"(GDT_SELECTOR_DATA0)
  992. : "memory");
  993. set_fs(GDT_SELECTOR_PROC);
  994. #endif
  995. #if ARCH(I386)
  996. // Make sure CS points to the kernel code descriptor.
  997. // clang-format off
  998. asm volatile(
  999. "ljmpl $" __STRINGIFY(GDT_SELECTOR_CODE0) ", $sanity\n"
  1000. "sanity:\n");
  1001. // clang-format on
  1002. #endif
  1003. }
  1004. extern "C" void context_first_init([[maybe_unused]] Thread* from_thread, [[maybe_unused]] Thread* to_thread, [[maybe_unused]] TrapFrame* trap)
  1005. {
  1006. VERIFY(!are_interrupts_enabled());
  1007. VERIFY(is_kernel_mode());
  1008. dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context <-- from {} {} to {} {} (context_first_init)", VirtualAddress(from_thread), *from_thread, VirtualAddress(to_thread), *to_thread);
  1009. VERIFY(to_thread == Thread::current());
  1010. Scheduler::enter_current(*from_thread, true);
  1011. // Since we got here and don't have Scheduler::context_switch in the
  1012. // call stack (because this is the first time we switched into this
  1013. // context), we need to notify the scheduler so that it can release
  1014. // the scheduler lock. We don't want to enable interrupts at this point
  1015. // as we're still in the middle of a context switch. Doing so could
  1016. // trigger a context switch within a context switch, leading to a crash.
  1017. FlatPtr flags;
  1018. #if ARCH(I386)
  1019. flags = trap->regs->eflags;
  1020. #else
  1021. flags = trap->regs->rflags;
  1022. #endif
  1023. Scheduler::leave_on_first_switch(flags & ~0x200);
  1024. }
  1025. extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread)
  1026. {
  1027. VERIFY(from_thread == to_thread || from_thread->state() != Thread::Running);
  1028. VERIFY(to_thread->state() == Thread::Running);
  1029. bool has_fxsr = Processor::current().has_feature(CPUFeature::FXSR);
  1030. Processor::set_current_thread(*to_thread);
  1031. auto& from_regs = from_thread->regs();
  1032. auto& to_regs = to_thread->regs();
  1033. if (has_fxsr)
  1034. asm volatile("fxsave %0"
  1035. : "=m"(from_thread->fpu_state()));
  1036. else
  1037. asm volatile("fnsave %0"
  1038. : "=m"(from_thread->fpu_state()));
  1039. #if ARCH(I386)
  1040. from_regs.fs = get_fs();
  1041. from_regs.gs = get_gs();
  1042. set_fs(to_regs.fs);
  1043. set_gs(to_regs.gs);
  1044. #endif
  1045. if (from_thread->process().is_traced())
  1046. read_debug_registers_into(from_thread->debug_register_state());
  1047. if (to_thread->process().is_traced()) {
  1048. write_debug_registers_from(to_thread->debug_register_state());
  1049. } else {
  1050. clear_debug_registers();
  1051. }
  1052. auto& processor = Processor::current();
  1053. #if ARCH(I386)
  1054. auto& tls_descriptor = processor.get_gdt_entry(GDT_SELECTOR_TLS);
  1055. tls_descriptor.set_base(to_thread->thread_specific_data());
  1056. tls_descriptor.set_limit(to_thread->thread_specific_region_size());
  1057. #endif
  1058. if (from_regs.cr3 != to_regs.cr3)
  1059. write_cr3(to_regs.cr3);
  1060. to_thread->set_cpu(processor.get_id());
  1061. processor.restore_in_critical(to_thread->saved_critical());
  1062. if (has_fxsr)
  1063. asm volatile("fxrstor %0" ::"m"(to_thread->fpu_state()));
  1064. else
  1065. asm volatile("frstor %0" ::"m"(to_thread->fpu_state()));
  1066. // TODO: ioperm?
  1067. }
  1068. extern "C" FlatPtr do_init_context(Thread* thread, u32 flags)
  1069. {
  1070. VERIFY_INTERRUPTS_DISABLED();
  1071. #if ARCH(I386)
  1072. thread->regs().eflags = flags;
  1073. #else
  1074. thread->regs().rflags = flags;
  1075. #endif
  1076. return Processor::current().init_context(*thread, true);
  1077. }
  1078. }