Processor.cpp 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Format.h>
  7. #include <AK/StdLibExtras.h>
  8. #include <AK/String.h>
  9. #include <AK/Types.h>
  10. #include <Kernel/Interrupts/APIC.h>
  11. #include <Kernel/Process.h>
  12. #include <Kernel/Random.h>
  13. #include <Kernel/Sections.h>
  14. #include <Kernel/StdLib.h>
  15. #include <Kernel/Thread.h>
  16. #include <Kernel/VM/ProcessPagingScope.h>
  17. #include <Kernel/Arch/x86/CPUID.h>
  18. #include <Kernel/Arch/x86/Interrupts.h>
  19. #include <Kernel/Arch/x86/MSR.h>
  20. #include <Kernel/Arch/x86/Processor.h>
  21. #include <Kernel/Arch/x86/ProcessorInfo.h>
  22. #include <Kernel/Arch/x86/SafeMem.h>
  23. #include <Kernel/Arch/x86/ScopedCritical.h>
  24. #include <Kernel/Arch/x86/TrapFrame.h>
  25. namespace Kernel {
  26. READONLY_AFTER_INIT FPUState Processor::s_clean_fpu_state;
  27. READONLY_AFTER_INIT static ProcessorContainer s_processors {};
  28. READONLY_AFTER_INIT Atomic<u32> Processor::g_total_processors;
  29. static volatile bool s_smp_enabled;
  30. static Atomic<ProcessorMessage*> s_message_pool;
  31. Atomic<u32> Processor::s_idle_cpu_mask { 0 };
  32. extern "C" void thread_context_first_enter(void);
  33. extern "C" void exit_kernel_thread(void);
  34. extern "C" void do_assume_context(Thread* thread, u32 flags);
  35. // The compiler can't see the calls to these functions inside assembly.
  36. // Declare them, to avoid dead code warnings.
  37. extern "C" void context_first_init(Thread* from_thread, Thread* to_thread, TrapFrame* trap) __attribute__((used));
  38. extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread) __attribute__((used));
  39. extern "C" FlatPtr do_init_context(Thread* thread, u32 flags) __attribute__((used));
  40. UNMAP_AFTER_INIT static void sse_init()
  41. {
  42. write_cr0((read_cr0() & 0xfffffffbu) | 0x2);
  43. write_cr4(read_cr4() | 0x600);
  44. }
  45. void exit_kernel_thread(void)
  46. {
  47. Thread::current()->exit();
  48. }
  49. UNMAP_AFTER_INIT void Processor::cpu_detect()
  50. {
  51. // NOTE: This is called during Processor::early_initialize, we cannot
  52. // safely log at this point because we don't have kmalloc
  53. // initialized yet!
  54. auto set_feature =
  55. [&](CPUFeature f) {
  56. m_features = static_cast<CPUFeature>(static_cast<u32>(m_features) | static_cast<u32>(f));
  57. };
  58. m_features = static_cast<CPUFeature>(0);
  59. CPUID processor_info(0x1);
  60. if (processor_info.edx() & (1 << 4))
  61. set_feature(CPUFeature::TSC);
  62. if (processor_info.edx() & (1 << 6))
  63. set_feature(CPUFeature::PAE);
  64. if (processor_info.edx() & (1 << 13))
  65. set_feature(CPUFeature::PGE);
  66. if (processor_info.edx() & (1 << 23))
  67. set_feature(CPUFeature::MMX);
  68. if (processor_info.edx() & (1 << 24))
  69. set_feature(CPUFeature::FXSR);
  70. if (processor_info.edx() & (1 << 25))
  71. set_feature(CPUFeature::SSE);
  72. if (processor_info.edx() & (1 << 26))
  73. set_feature(CPUFeature::SSE2);
  74. if (processor_info.ecx() & (1 << 0))
  75. set_feature(CPUFeature::SSE3);
  76. if (processor_info.ecx() & (1 << 9))
  77. set_feature(CPUFeature::SSSE3);
  78. if (processor_info.ecx() & (1 << 19))
  79. set_feature(CPUFeature::SSE4_1);
  80. if (processor_info.ecx() & (1 << 20))
  81. set_feature(CPUFeature::SSE4_2);
  82. if (processor_info.ecx() & (1 << 26))
  83. set_feature(CPUFeature::XSAVE);
  84. if (processor_info.ecx() & (1 << 28))
  85. set_feature(CPUFeature::AVX);
  86. if (processor_info.ecx() & (1 << 30))
  87. set_feature(CPUFeature::RDRAND);
  88. if (processor_info.edx() & (1 << 11)) {
  89. u32 stepping = processor_info.eax() & 0xf;
  90. u32 model = (processor_info.eax() >> 4) & 0xf;
  91. u32 family = (processor_info.eax() >> 8) & 0xf;
  92. if (!(family == 6 && model < 3 && stepping < 3))
  93. set_feature(CPUFeature::SEP);
  94. if ((family == 6 && model >= 3) || (family == 0xf && model >= 0xe))
  95. set_feature(CPUFeature::CONSTANT_TSC);
  96. }
  97. u32 max_extended_leaf = CPUID(0x80000000).eax();
  98. if (max_extended_leaf >= 0x80000001) {
  99. CPUID extended_processor_info(0x80000001);
  100. if (extended_processor_info.edx() & (1 << 20))
  101. set_feature(CPUFeature::NX);
  102. if (extended_processor_info.edx() & (1 << 27))
  103. set_feature(CPUFeature::RDTSCP);
  104. if (extended_processor_info.edx() & (1 << 29))
  105. set_feature(CPUFeature::LM);
  106. if (extended_processor_info.edx() & (1 << 11)) {
  107. // Only available in 64 bit mode
  108. set_feature(CPUFeature::SYSCALL);
  109. }
  110. }
  111. if (max_extended_leaf >= 0x80000007) {
  112. CPUID cpuid(0x80000007);
  113. if (cpuid.edx() & (1 << 8)) {
  114. set_feature(CPUFeature::CONSTANT_TSC);
  115. set_feature(CPUFeature::NONSTOP_TSC);
  116. }
  117. }
  118. if (max_extended_leaf >= 0x80000008) {
  119. // CPUID.80000008H:EAX[7:0] reports the physical-address width supported by the processor.
  120. CPUID cpuid(0x80000008);
  121. m_physical_address_bit_width = cpuid.eax() & 0xff;
  122. } else {
  123. // For processors that do not support CPUID function 80000008H, the width is generally 36 if CPUID.01H:EDX.PAE [bit 6] = 1 and 32 otherwise.
  124. m_physical_address_bit_width = has_feature(CPUFeature::PAE) ? 36 : 32;
  125. }
  126. CPUID extended_features(0x7);
  127. if (extended_features.ebx() & (1 << 20))
  128. set_feature(CPUFeature::SMAP);
  129. if (extended_features.ebx() & (1 << 7))
  130. set_feature(CPUFeature::SMEP);
  131. if (extended_features.ecx() & (1 << 2))
  132. set_feature(CPUFeature::UMIP);
  133. if (extended_features.ebx() & (1 << 18))
  134. set_feature(CPUFeature::RDSEED);
  135. }
  136. UNMAP_AFTER_INIT void Processor::cpu_setup()
  137. {
  138. // NOTE: This is called during Processor::early_initialize, we cannot
  139. // safely log at this point because we don't have kmalloc
  140. // initialized yet!
  141. cpu_detect();
  142. if (has_feature(CPUFeature::SSE)) {
  143. // enter_thread_context() assumes that if a x86 CPU supports SSE then it also supports FXSR.
  144. // SSE support without FXSR is an extremely unlikely scenario, so let's be pragmatic about it.
  145. VERIFY(has_feature(CPUFeature::FXSR));
  146. sse_init();
  147. }
  148. write_cr0(read_cr0() | 0x00010000);
  149. if (has_feature(CPUFeature::PGE)) {
  150. // Turn on CR4.PGE so the CPU will respect the G bit in page tables.
  151. write_cr4(read_cr4() | 0x80);
  152. }
  153. if (has_feature(CPUFeature::NX)) {
  154. // Turn on IA32_EFER.NXE
  155. asm volatile(
  156. "movl $0xc0000080, %ecx\n"
  157. "rdmsr\n"
  158. "orl $0x800, %eax\n"
  159. "wrmsr\n");
  160. }
  161. if (has_feature(CPUFeature::SMEP)) {
  162. // Turn on CR4.SMEP
  163. write_cr4(read_cr4() | 0x100000);
  164. }
  165. if (has_feature(CPUFeature::SMAP)) {
  166. // Turn on CR4.SMAP
  167. write_cr4(read_cr4() | 0x200000);
  168. }
  169. if (has_feature(CPUFeature::UMIP)) {
  170. write_cr4(read_cr4() | 0x800);
  171. }
  172. if (has_feature(CPUFeature::TSC)) {
  173. write_cr4(read_cr4() | 0x4);
  174. }
  175. if (has_feature(CPUFeature::XSAVE)) {
  176. // Turn on CR4.OSXSAVE
  177. write_cr4(read_cr4() | 0x40000);
  178. // According to the Intel manual: "After reset, all bits (except bit 0) in XCR0 are cleared to zero; XCR0[0] is set to 1."
  179. // Sadly we can't trust this, for example VirtualBox starts with bits 0-4 set, so let's do it ourselves.
  180. write_xcr0(0x1);
  181. if (has_feature(CPUFeature::AVX)) {
  182. // Turn on SSE, AVX and x87 flags
  183. write_xcr0(read_xcr0() | 0x7);
  184. }
  185. }
  186. }
  187. String Processor::features_string() const
  188. {
  189. StringBuilder builder;
  190. auto feature_to_str =
  191. [](CPUFeature f) -> const char* {
  192. switch (f) {
  193. case CPUFeature::NX:
  194. return "nx";
  195. case CPUFeature::PAE:
  196. return "pae";
  197. case CPUFeature::PGE:
  198. return "pge";
  199. case CPUFeature::RDRAND:
  200. return "rdrand";
  201. case CPUFeature::RDSEED:
  202. return "rdseed";
  203. case CPUFeature::SMAP:
  204. return "smap";
  205. case CPUFeature::SMEP:
  206. return "smep";
  207. case CPUFeature::SSE:
  208. return "sse";
  209. case CPUFeature::TSC:
  210. return "tsc";
  211. case CPUFeature::RDTSCP:
  212. return "rdtscp";
  213. case CPUFeature::CONSTANT_TSC:
  214. return "constant_tsc";
  215. case CPUFeature::NONSTOP_TSC:
  216. return "nonstop_tsc";
  217. case CPUFeature::UMIP:
  218. return "umip";
  219. case CPUFeature::SEP:
  220. return "sep";
  221. case CPUFeature::SYSCALL:
  222. return "syscall";
  223. case CPUFeature::MMX:
  224. return "mmx";
  225. case CPUFeature::FXSR:
  226. return "fxsr";
  227. case CPUFeature::SSE2:
  228. return "sse2";
  229. case CPUFeature::SSE3:
  230. return "sse3";
  231. case CPUFeature::SSSE3:
  232. return "ssse3";
  233. case CPUFeature::SSE4_1:
  234. return "sse4.1";
  235. case CPUFeature::SSE4_2:
  236. return "sse4.2";
  237. case CPUFeature::XSAVE:
  238. return "xsave";
  239. case CPUFeature::AVX:
  240. return "avx";
  241. case CPUFeature::LM:
  242. return "lm";
  243. // no default statement here intentionally so that we get
  244. // a warning if a new feature is forgotten to be added here
  245. }
  246. // Shouldn't ever happen
  247. return "???";
  248. };
  249. bool first = true;
  250. for (u32 flag = 1; flag != 0; flag <<= 1) {
  251. if ((static_cast<u32>(m_features) & flag) != 0) {
  252. if (first)
  253. first = false;
  254. else
  255. builder.append(' ');
  256. auto str = feature_to_str(static_cast<CPUFeature>(flag));
  257. builder.append(str, strlen(str));
  258. }
  259. }
  260. return builder.build();
  261. }
  262. UNMAP_AFTER_INIT void Processor::early_initialize(u32 cpu)
  263. {
  264. m_self = this;
  265. m_cpu = cpu;
  266. m_in_irq = 0;
  267. m_in_critical = 0;
  268. m_invoke_scheduler_async = false;
  269. m_scheduler_initialized = false;
  270. m_message_queue = nullptr;
  271. m_idle_thread = nullptr;
  272. m_current_thread = nullptr;
  273. m_scheduler_data = nullptr;
  274. m_mm_data = nullptr;
  275. m_info = nullptr;
  276. m_halt_requested = false;
  277. if (cpu == 0) {
  278. s_smp_enabled = false;
  279. g_total_processors.store(1u, AK::MemoryOrder::memory_order_release);
  280. } else {
  281. g_total_processors.fetch_add(1u, AK::MemoryOrder::memory_order_acq_rel);
  282. }
  283. deferred_call_pool_init();
  284. cpu_setup();
  285. gdt_init();
  286. VERIFY(is_initialized()); // sanity check
  287. VERIFY(&current() == this); // sanity check
  288. }
  289. UNMAP_AFTER_INIT void Processor::initialize(u32 cpu)
  290. {
  291. VERIFY(m_self == this);
  292. VERIFY(&current() == this); // sanity check
  293. dmesgln("CPU[{}]: Supported features: {}", id(), features_string());
  294. if (!has_feature(CPUFeature::RDRAND))
  295. dmesgln("CPU[{}]: No RDRAND support detected, randomness will be poor", id());
  296. dmesgln("CPU[{}]: Physical address bit width: {}", id(), m_physical_address_bit_width);
  297. if (cpu == 0)
  298. idt_init();
  299. else
  300. flush_idt();
  301. if (cpu == 0) {
  302. VERIFY((FlatPtr(&s_clean_fpu_state) & 0xF) == 0);
  303. asm volatile("fninit");
  304. if (has_feature(CPUFeature::FXSR))
  305. asm volatile("fxsave %0"
  306. : "=m"(s_clean_fpu_state));
  307. else
  308. asm volatile("fnsave %0"
  309. : "=m"(s_clean_fpu_state));
  310. }
  311. m_info = new ProcessorInfo(*this);
  312. {
  313. // We need to prevent races between APs starting up at the same time
  314. VERIFY(cpu < s_processors.size());
  315. s_processors[cpu] = this;
  316. }
  317. }
  318. void Processor::write_raw_gdt_entry(u16 selector, u32 low, u32 high)
  319. {
  320. u16 i = (selector & 0xfffc) >> 3;
  321. u32 prev_gdt_length = m_gdt_length;
  322. if (i >= m_gdt_length) {
  323. m_gdt_length = i + 1;
  324. VERIFY(m_gdt_length <= sizeof(m_gdt) / sizeof(m_gdt[0]));
  325. m_gdtr.limit = (m_gdt_length + 1) * 8 - 1;
  326. }
  327. m_gdt[i].low = low;
  328. m_gdt[i].high = high;
  329. // clear selectors we may have skipped
  330. while (i < prev_gdt_length) {
  331. m_gdt[i].low = 0;
  332. m_gdt[i].high = 0;
  333. i++;
  334. }
  335. }
  336. void Processor::write_gdt_entry(u16 selector, Descriptor& descriptor)
  337. {
  338. write_raw_gdt_entry(selector, descriptor.low, descriptor.high);
  339. }
  340. Descriptor& Processor::get_gdt_entry(u16 selector)
  341. {
  342. u16 i = (selector & 0xfffc) >> 3;
  343. return *(Descriptor*)(&m_gdt[i]);
  344. }
  345. void Processor::flush_gdt()
  346. {
  347. m_gdtr.address = m_gdt;
  348. m_gdtr.limit = (m_gdt_length * 8) - 1;
  349. asm volatile("lgdt %0" ::"m"(m_gdtr)
  350. : "memory");
  351. }
  352. const DescriptorTablePointer& Processor::get_gdtr()
  353. {
  354. return m_gdtr;
  355. }
  356. Vector<FlatPtr> Processor::capture_stack_trace(Thread& thread, size_t max_frames)
  357. {
  358. FlatPtr frame_ptr = 0, ip = 0;
  359. Vector<FlatPtr, 32> stack_trace;
  360. auto walk_stack = [&](FlatPtr stack_ptr) {
  361. static constexpr size_t max_stack_frames = 4096;
  362. stack_trace.append(ip);
  363. size_t count = 1;
  364. while (stack_ptr && stack_trace.size() < max_stack_frames) {
  365. FlatPtr retaddr;
  366. count++;
  367. if (max_frames != 0 && count > max_frames)
  368. break;
  369. if (is_user_range(VirtualAddress(stack_ptr), sizeof(FlatPtr) * 2)) {
  370. if (!copy_from_user(&retaddr, &((FlatPtr*)stack_ptr)[1]) || !retaddr)
  371. break;
  372. stack_trace.append(retaddr);
  373. if (!copy_from_user(&stack_ptr, (FlatPtr*)stack_ptr))
  374. break;
  375. } else {
  376. void* fault_at;
  377. if (!safe_memcpy(&retaddr, &((FlatPtr*)stack_ptr)[1], sizeof(FlatPtr), fault_at) || !retaddr)
  378. break;
  379. stack_trace.append(retaddr);
  380. if (!safe_memcpy(&stack_ptr, (FlatPtr*)stack_ptr, sizeof(FlatPtr), fault_at))
  381. break;
  382. }
  383. }
  384. };
  385. auto capture_current_thread = [&]() {
  386. frame_ptr = (FlatPtr)__builtin_frame_address(0);
  387. ip = (FlatPtr)__builtin_return_address(0);
  388. walk_stack(frame_ptr);
  389. };
  390. // Since the thread may be running on another processor, there
  391. // is a chance a context switch may happen while we're trying
  392. // to get it. It also won't be entirely accurate and merely
  393. // reflect the status at the last context switch.
  394. ScopedSpinLock lock(g_scheduler_lock);
  395. if (&thread == Processor::current_thread()) {
  396. VERIFY(thread.state() == Thread::Running);
  397. // Leave the scheduler lock. If we trigger page faults we may
  398. // need to be preempted. Since this is our own thread it won't
  399. // cause any problems as the stack won't change below this frame.
  400. lock.unlock();
  401. capture_current_thread();
  402. } else if (thread.is_active()) {
  403. VERIFY(thread.cpu() != Processor::id());
  404. // If this is the case, the thread is currently running
  405. // on another processor. We can't trust the kernel stack as
  406. // it may be changing at any time. We need to probably send
  407. // an IPI to that processor, have it walk the stack and wait
  408. // until it returns the data back to us
  409. auto& proc = Processor::current();
  410. smp_unicast(
  411. thread.cpu(),
  412. [&]() {
  413. dbgln("CPU[{}] getting stack for cpu #{}", Processor::id(), proc.get_id());
  414. ProcessPagingScope paging_scope(thread.process());
  415. VERIFY(&Processor::current() != &proc);
  416. VERIFY(&thread == Processor::current_thread());
  417. // NOTE: Because the other processor is still holding the
  418. // scheduler lock while waiting for this callback to finish,
  419. // the current thread on the target processor cannot change
  420. // TODO: What to do about page faults here? We might deadlock
  421. // because the other processor is still holding the
  422. // scheduler lock...
  423. capture_current_thread();
  424. },
  425. false);
  426. } else {
  427. switch (thread.state()) {
  428. case Thread::Running:
  429. VERIFY_NOT_REACHED(); // should have been handled above
  430. case Thread::Runnable:
  431. case Thread::Stopped:
  432. case Thread::Blocked:
  433. case Thread::Dying:
  434. case Thread::Dead: {
  435. // We need to retrieve ebp from what was last pushed to the kernel
  436. // stack. Before switching out of that thread, it switch_context
  437. // pushed the callee-saved registers, and the last of them happens
  438. // to be ebp.
  439. ProcessPagingScope paging_scope(thread.process());
  440. auto& regs = thread.regs();
  441. FlatPtr* stack_top;
  442. FlatPtr sp;
  443. #if ARCH(I386)
  444. sp = regs.esp;
  445. #else
  446. sp = regs.rsp;
  447. #endif
  448. stack_top = reinterpret_cast<FlatPtr*>(sp);
  449. if (is_user_range(VirtualAddress(stack_top), sizeof(FlatPtr))) {
  450. if (!copy_from_user(&frame_ptr, &((FlatPtr*)stack_top)[0]))
  451. frame_ptr = 0;
  452. } else {
  453. void* fault_at;
  454. if (!safe_memcpy(&frame_ptr, &((FlatPtr*)stack_top)[0], sizeof(FlatPtr), fault_at))
  455. frame_ptr = 0;
  456. }
  457. #if ARCH(I386)
  458. ip = regs.eip;
  459. #else
  460. ip = regs.rip;
  461. #endif
  462. // TODO: We need to leave the scheduler lock here, but we also
  463. // need to prevent the target thread from being run while
  464. // we walk the stack
  465. lock.unlock();
  466. walk_stack(frame_ptr);
  467. break;
  468. }
  469. default:
  470. dbgln("Cannot capture stack trace for thread {} in state {}", thread, thread.state_string());
  471. break;
  472. }
  473. }
  474. return stack_trace;
  475. }
  476. ProcessorContainer& Processor::processors()
  477. {
  478. return s_processors;
  479. }
  480. Processor& Processor::by_id(u32 cpu)
  481. {
  482. // s_processors does not need to be protected by a lock of any kind.
  483. // It is populated early in the boot process, and the BSP is waiting
  484. // for all APs to finish, after which this array never gets modified
  485. // again, so it's safe to not protect access to it here
  486. auto& procs = processors();
  487. VERIFY(procs[cpu] != nullptr);
  488. VERIFY(procs.size() > cpu);
  489. return *procs[cpu];
  490. }
  491. void Processor::enter_trap(TrapFrame& trap, bool raise_irq)
  492. {
  493. VERIFY_INTERRUPTS_DISABLED();
  494. VERIFY(&Processor::current() == this);
  495. trap.prev_irq_level = m_in_irq;
  496. if (raise_irq)
  497. m_in_irq++;
  498. auto* current_thread = Processor::current_thread();
  499. if (current_thread) {
  500. auto& current_trap = current_thread->current_trap();
  501. trap.next_trap = current_trap;
  502. current_trap = &trap;
  503. // The cs register of this trap tells us where we will return back to
  504. current_thread->set_previous_mode(((trap.regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode);
  505. } else {
  506. trap.next_trap = nullptr;
  507. }
  508. }
  509. void Processor::exit_trap(TrapFrame& trap)
  510. {
  511. VERIFY_INTERRUPTS_DISABLED();
  512. VERIFY(&Processor::current() == this);
  513. VERIFY(m_in_irq >= trap.prev_irq_level);
  514. m_in_irq = trap.prev_irq_level;
  515. smp_process_pending_messages();
  516. if (!m_in_irq && !m_in_critical)
  517. check_invoke_scheduler();
  518. auto* current_thread = Processor::current_thread();
  519. if (current_thread) {
  520. auto& current_trap = current_thread->current_trap();
  521. current_trap = trap.next_trap;
  522. if (current_trap) {
  523. VERIFY(current_trap->regs);
  524. // If we have another higher level trap then we probably returned
  525. // from an interrupt or irq handler. The cs register of the
  526. // new/higher level trap tells us what the mode prior to it was
  527. current_thread->set_previous_mode(((current_trap->regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode);
  528. } else {
  529. // If we don't have a higher level trap then we're back in user mode.
  530. // Unless we're a kernel process, in which case we're always in kernel mode
  531. current_thread->set_previous_mode(current_thread->process().is_kernel_process() ? Thread::PreviousMode::KernelMode : Thread::PreviousMode::UserMode);
  532. }
  533. }
  534. }
  535. void Processor::check_invoke_scheduler()
  536. {
  537. VERIFY(!m_in_irq);
  538. VERIFY(!m_in_critical);
  539. if (m_invoke_scheduler_async && m_scheduler_initialized) {
  540. m_invoke_scheduler_async = false;
  541. Scheduler::invoke_async();
  542. }
  543. }
  544. void Processor::flush_tlb_local(VirtualAddress vaddr, size_t page_count)
  545. {
  546. auto ptr = vaddr.as_ptr();
  547. while (page_count > 0) {
  548. // clang-format off
  549. asm volatile("invlpg %0"
  550. :
  551. : "m"(*ptr)
  552. : "memory");
  553. // clang-format on
  554. ptr += PAGE_SIZE;
  555. page_count--;
  556. }
  557. }
  558. void Processor::flush_tlb(const PageDirectory* page_directory, VirtualAddress vaddr, size_t page_count)
  559. {
  560. if (s_smp_enabled && (!is_user_address(vaddr) || Process::current()->thread_count() > 1))
  561. smp_broadcast_flush_tlb(page_directory, vaddr, page_count);
  562. else
  563. flush_tlb_local(vaddr, page_count);
  564. }
  565. void Processor::smp_return_to_pool(ProcessorMessage& msg)
  566. {
  567. ProcessorMessage* next = nullptr;
  568. do {
  569. msg.next = next;
  570. } while (s_message_pool.compare_exchange_strong(next, &msg, AK::MemoryOrder::memory_order_acq_rel));
  571. }
  572. ProcessorMessage& Processor::smp_get_from_pool()
  573. {
  574. ProcessorMessage* msg;
  575. // The assumption is that messages are never removed from the pool!
  576. for (;;) {
  577. msg = s_message_pool.load(AK::MemoryOrder::memory_order_consume);
  578. if (!msg) {
  579. if (!Processor::current().smp_process_pending_messages()) {
  580. // TODO: pause for a bit?
  581. }
  582. continue;
  583. }
  584. // If another processor were to use this message in the meanwhile,
  585. // "msg" is still valid (because it never gets freed). We'd detect
  586. // this because the expected value "msg" and pool would
  587. // no longer match, and the compare_exchange will fail. But accessing
  588. // "msg->next" is always safe here.
  589. if (s_message_pool.compare_exchange_strong(msg, msg->next, AK::MemoryOrder::memory_order_acq_rel)) {
  590. // We successfully "popped" this available message
  591. break;
  592. }
  593. }
  594. VERIFY(msg != nullptr);
  595. return *msg;
  596. }
  597. u32 Processor::smp_wake_n_idle_processors(u32 wake_count)
  598. {
  599. VERIFY(Processor::current().in_critical());
  600. VERIFY(wake_count > 0);
  601. if (!s_smp_enabled)
  602. return 0;
  603. // Wake at most N - 1 processors
  604. if (wake_count >= Processor::count()) {
  605. wake_count = Processor::count() - 1;
  606. VERIFY(wake_count > 0);
  607. }
  608. u32 current_id = Processor::current().id();
  609. u32 did_wake_count = 0;
  610. auto& apic = APIC::the();
  611. while (did_wake_count < wake_count) {
  612. // Try to get a set of idle CPUs and flip them to busy
  613. u32 idle_mask = s_idle_cpu_mask.load(AK::MemoryOrder::memory_order_relaxed) & ~(1u << current_id);
  614. u32 idle_count = __builtin_popcountl(idle_mask);
  615. if (idle_count == 0)
  616. break; // No (more) idle processor available
  617. u32 found_mask = 0;
  618. for (u32 i = 0; i < idle_count; i++) {
  619. u32 cpu = __builtin_ffsl(idle_mask) - 1;
  620. idle_mask &= ~(1u << cpu);
  621. found_mask |= 1u << cpu;
  622. }
  623. idle_mask = s_idle_cpu_mask.fetch_and(~found_mask, AK::MemoryOrder::memory_order_acq_rel) & found_mask;
  624. if (idle_mask == 0)
  625. continue; // All of them were flipped to busy, try again
  626. idle_count = __builtin_popcountl(idle_mask);
  627. for (u32 i = 0; i < idle_count; i++) {
  628. u32 cpu = __builtin_ffsl(idle_mask) - 1;
  629. idle_mask &= ~(1u << cpu);
  630. // Send an IPI to that CPU to wake it up. There is a possibility
  631. // someone else woke it up as well, or that it woke up due to
  632. // a timer interrupt. But we tried hard to avoid this...
  633. apic.send_ipi(cpu);
  634. did_wake_count++;
  635. }
  636. }
  637. return did_wake_count;
  638. }
  639. UNMAP_AFTER_INIT void Processor::smp_enable()
  640. {
  641. size_t msg_pool_size = Processor::count() * 100u;
  642. size_t msg_entries_cnt = Processor::count();
  643. auto msgs = new ProcessorMessage[msg_pool_size];
  644. auto msg_entries = new ProcessorMessageEntry[msg_pool_size * msg_entries_cnt];
  645. size_t msg_entry_i = 0;
  646. for (size_t i = 0; i < msg_pool_size; i++, msg_entry_i += msg_entries_cnt) {
  647. auto& msg = msgs[i];
  648. msg.next = i < msg_pool_size - 1 ? &msgs[i + 1] : nullptr;
  649. msg.per_proc_entries = &msg_entries[msg_entry_i];
  650. for (size_t k = 0; k < msg_entries_cnt; k++)
  651. msg_entries[msg_entry_i + k].msg = &msg;
  652. }
  653. s_message_pool.store(&msgs[0], AK::MemoryOrder::memory_order_release);
  654. // Start sending IPI messages
  655. s_smp_enabled = true;
  656. }
  657. void Processor::smp_cleanup_message(ProcessorMessage& msg)
  658. {
  659. switch (msg.type) {
  660. case ProcessorMessage::Callback:
  661. msg.callback_value().~Function();
  662. break;
  663. default:
  664. break;
  665. }
  666. }
  667. bool Processor::smp_process_pending_messages()
  668. {
  669. bool did_process = false;
  670. u32 prev_flags;
  671. enter_critical(prev_flags);
  672. if (auto pending_msgs = m_message_queue.exchange(nullptr, AK::MemoryOrder::memory_order_acq_rel)) {
  673. // We pulled the stack of pending messages in LIFO order, so we need to reverse the list first
  674. auto reverse_list =
  675. [](ProcessorMessageEntry* list) -> ProcessorMessageEntry* {
  676. ProcessorMessageEntry* rev_list = nullptr;
  677. while (list) {
  678. auto next = list->next;
  679. list->next = rev_list;
  680. rev_list = list;
  681. list = next;
  682. }
  683. return rev_list;
  684. };
  685. pending_msgs = reverse_list(pending_msgs);
  686. // now process in the right order
  687. ProcessorMessageEntry* next_msg;
  688. for (auto cur_msg = pending_msgs; cur_msg; cur_msg = next_msg) {
  689. next_msg = cur_msg->next;
  690. auto msg = cur_msg->msg;
  691. dbgln_if(SMP_DEBUG, "SMP[{}]: Processing message {}", id(), VirtualAddress(msg));
  692. switch (msg->type) {
  693. case ProcessorMessage::Callback:
  694. msg->invoke_callback();
  695. break;
  696. case ProcessorMessage::FlushTlb:
  697. if (is_user_address(VirtualAddress(msg->flush_tlb.ptr))) {
  698. // We assume that we don't cross into kernel land!
  699. VERIFY(is_user_range(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count * PAGE_SIZE));
  700. if (read_cr3() != msg->flush_tlb.page_directory->cr3()) {
  701. // This processor isn't using this page directory right now, we can ignore this request
  702. dbgln_if(SMP_DEBUG, "SMP[{}]: No need to flush {} pages at {}", id(), msg->flush_tlb.page_count, VirtualAddress(msg->flush_tlb.ptr));
  703. break;
  704. }
  705. }
  706. flush_tlb_local(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count);
  707. break;
  708. }
  709. bool is_async = msg->async; // Need to cache this value *before* dropping the ref count!
  710. auto prev_refs = msg->refs.fetch_sub(1u, AK::MemoryOrder::memory_order_acq_rel);
  711. VERIFY(prev_refs != 0);
  712. if (prev_refs == 1) {
  713. // All processors handled this. If this is an async message,
  714. // we need to clean it up and return it to the pool
  715. if (is_async) {
  716. smp_cleanup_message(*msg);
  717. smp_return_to_pool(*msg);
  718. }
  719. }
  720. if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed))
  721. halt_this();
  722. }
  723. did_process = true;
  724. } else if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed)) {
  725. halt_this();
  726. }
  727. leave_critical(prev_flags);
  728. return did_process;
  729. }
  730. bool Processor::smp_queue_message(ProcessorMessage& msg)
  731. {
  732. // Note that it's quite possible that the other processor may pop
  733. // the queue at any given time. We rely on the fact that the messages
  734. // are pooled and never get freed!
  735. auto& msg_entry = msg.per_proc_entries[id()];
  736. VERIFY(msg_entry.msg == &msg);
  737. ProcessorMessageEntry* next = nullptr;
  738. do {
  739. msg_entry.next = next;
  740. } while (m_message_queue.compare_exchange_strong(next, &msg_entry, AK::MemoryOrder::memory_order_acq_rel));
  741. return next == nullptr;
  742. }
  743. void Processor::smp_broadcast_message(ProcessorMessage& msg)
  744. {
  745. auto& cur_proc = Processor::current();
  746. dbgln_if(SMP_DEBUG, "SMP[{}]: Broadcast message {} to cpus: {} proc: {}", cur_proc.get_id(), VirtualAddress(&msg), count(), VirtualAddress(&cur_proc));
  747. msg.refs.store(count() - 1, AK::MemoryOrder::memory_order_release);
  748. VERIFY(msg.refs > 0);
  749. bool need_broadcast = false;
  750. for_each(
  751. [&](Processor& proc) {
  752. if (&proc != &cur_proc) {
  753. if (proc.smp_queue_message(msg))
  754. need_broadcast = true;
  755. }
  756. });
  757. // Now trigger an IPI on all other APs (unless all targets already had messages queued)
  758. if (need_broadcast)
  759. APIC::the().broadcast_ipi();
  760. }
  761. void Processor::smp_broadcast_wait_sync(ProcessorMessage& msg)
  762. {
  763. auto& cur_proc = Processor::current();
  764. VERIFY(!msg.async);
  765. // If synchronous then we must cleanup and return the message back
  766. // to the pool. Otherwise, the last processor to complete it will return it
  767. while (msg.refs.load(AK::MemoryOrder::memory_order_consume) != 0) {
  768. // TODO: pause for a bit?
  769. // We need to process any messages that may have been sent to
  770. // us while we're waiting. This also checks if another processor
  771. // may have requested us to halt.
  772. cur_proc.smp_process_pending_messages();
  773. }
  774. smp_cleanup_message(msg);
  775. smp_return_to_pool(msg);
  776. }
  777. void Processor::smp_broadcast(Function<void()> callback, bool async)
  778. {
  779. auto& msg = smp_get_from_pool();
  780. msg.async = async;
  781. msg.type = ProcessorMessage::Callback;
  782. new (msg.callback_storage) ProcessorMessage::CallbackFunction(move(callback));
  783. smp_broadcast_message(msg);
  784. if (!async)
  785. smp_broadcast_wait_sync(msg);
  786. }
  787. void Processor::smp_unicast_message(u32 cpu, ProcessorMessage& msg, bool async)
  788. {
  789. auto& cur_proc = Processor::current();
  790. VERIFY(cpu != cur_proc.get_id());
  791. auto& target_proc = processors()[cpu];
  792. msg.async = async;
  793. dbgln_if(SMP_DEBUG, "SMP[{}]: Send message {} to cpu #{} proc: {}", cur_proc.get_id(), VirtualAddress(&msg), cpu, VirtualAddress(&target_proc));
  794. msg.refs.store(1u, AK::MemoryOrder::memory_order_release);
  795. if (target_proc->smp_queue_message(msg)) {
  796. APIC::the().send_ipi(cpu);
  797. }
  798. if (!async) {
  799. // If synchronous then we must cleanup and return the message back
  800. // to the pool. Otherwise, the last processor to complete it will return it
  801. while (msg.refs.load(AK::MemoryOrder::memory_order_consume) != 0) {
  802. // TODO: pause for a bit?
  803. // We need to process any messages that may have been sent to
  804. // us while we're waiting. This also checks if another processor
  805. // may have requested us to halt.
  806. cur_proc.smp_process_pending_messages();
  807. }
  808. smp_cleanup_message(msg);
  809. smp_return_to_pool(msg);
  810. }
  811. }
  812. void Processor::smp_unicast(u32 cpu, Function<void()> callback, bool async)
  813. {
  814. auto& msg = smp_get_from_pool();
  815. msg.type = ProcessorMessage::Callback;
  816. new (msg.callback_storage) ProcessorMessage::CallbackFunction(move(callback));
  817. smp_unicast_message(cpu, msg, async);
  818. }
  819. void Processor::smp_broadcast_flush_tlb(const PageDirectory* page_directory, VirtualAddress vaddr, size_t page_count)
  820. {
  821. auto& msg = smp_get_from_pool();
  822. msg.async = false;
  823. msg.type = ProcessorMessage::FlushTlb;
  824. msg.flush_tlb.page_directory = page_directory;
  825. msg.flush_tlb.ptr = vaddr.as_ptr();
  826. msg.flush_tlb.page_count = page_count;
  827. smp_broadcast_message(msg);
  828. // While the other processors handle this request, we'll flush ours
  829. flush_tlb_local(vaddr, page_count);
  830. // Now wait until everybody is done as well
  831. smp_broadcast_wait_sync(msg);
  832. }
  833. void Processor::smp_broadcast_halt()
  834. {
  835. // We don't want to use a message, because this could have been triggered
  836. // by being out of memory and we might not be able to get a message
  837. for_each(
  838. [&](Processor& proc) {
  839. proc.m_halt_requested.store(true, AK::MemoryOrder::memory_order_release);
  840. });
  841. // Now trigger an IPI on all other APs
  842. APIC::the().broadcast_ipi();
  843. }
  844. void Processor::Processor::halt()
  845. {
  846. if (s_smp_enabled)
  847. smp_broadcast_halt();
  848. halt_this();
  849. }
  850. UNMAP_AFTER_INIT void Processor::deferred_call_pool_init()
  851. {
  852. size_t pool_count = sizeof(m_deferred_call_pool) / sizeof(m_deferred_call_pool[0]);
  853. for (size_t i = 0; i < pool_count; i++) {
  854. auto& entry = m_deferred_call_pool[i];
  855. entry.next = i < pool_count - 1 ? &m_deferred_call_pool[i + 1] : nullptr;
  856. new (entry.handler_storage) DeferredCallEntry::HandlerFunction;
  857. entry.was_allocated = false;
  858. }
  859. m_pending_deferred_calls = nullptr;
  860. m_free_deferred_call_pool_entry = &m_deferred_call_pool[0];
  861. }
  862. void Processor::deferred_call_return_to_pool(DeferredCallEntry* entry)
  863. {
  864. VERIFY(m_in_critical);
  865. VERIFY(!entry->was_allocated);
  866. entry->handler_value() = {};
  867. entry->next = m_free_deferred_call_pool_entry;
  868. m_free_deferred_call_pool_entry = entry;
  869. }
  870. DeferredCallEntry* Processor::deferred_call_get_free()
  871. {
  872. VERIFY(m_in_critical);
  873. if (m_free_deferred_call_pool_entry) {
  874. // Fast path, we have an entry in our pool
  875. auto* entry = m_free_deferred_call_pool_entry;
  876. m_free_deferred_call_pool_entry = entry->next;
  877. VERIFY(!entry->was_allocated);
  878. return entry;
  879. }
  880. auto* entry = new DeferredCallEntry;
  881. new (entry->handler_storage) DeferredCallEntry::HandlerFunction;
  882. entry->was_allocated = true;
  883. return entry;
  884. }
  885. void Processor::deferred_call_execute_pending()
  886. {
  887. VERIFY(m_in_critical);
  888. if (!m_pending_deferred_calls)
  889. return;
  890. auto* pending_list = m_pending_deferred_calls;
  891. m_pending_deferred_calls = nullptr;
  892. // We pulled the stack of pending deferred calls in LIFO order, so we need to reverse the list first
  893. auto reverse_list =
  894. [](DeferredCallEntry* list) -> DeferredCallEntry* {
  895. DeferredCallEntry* rev_list = nullptr;
  896. while (list) {
  897. auto next = list->next;
  898. list->next = rev_list;
  899. rev_list = list;
  900. list = next;
  901. }
  902. return rev_list;
  903. };
  904. pending_list = reverse_list(pending_list);
  905. do {
  906. pending_list->invoke_handler();
  907. // Return the entry back to the pool, or free it
  908. auto* next = pending_list->next;
  909. if (pending_list->was_allocated) {
  910. pending_list->handler_value().~Function();
  911. delete pending_list;
  912. } else
  913. deferred_call_return_to_pool(pending_list);
  914. pending_list = next;
  915. } while (pending_list);
  916. }
  917. void Processor::deferred_call_queue_entry(DeferredCallEntry* entry)
  918. {
  919. VERIFY(m_in_critical);
  920. entry->next = m_pending_deferred_calls;
  921. m_pending_deferred_calls = entry;
  922. }
  923. void Processor::deferred_call_queue(Function<void()> callback)
  924. {
  925. // NOTE: If we are called outside of a critical section and outside
  926. // of an irq handler, the function will be executed before we return!
  927. ScopedCritical critical;
  928. auto& cur_proc = Processor::current();
  929. auto* entry = cur_proc.deferred_call_get_free();
  930. entry->handler_value() = move(callback);
  931. cur_proc.deferred_call_queue_entry(entry);
  932. }
  933. UNMAP_AFTER_INIT void Processor::gdt_init()
  934. {
  935. m_gdt_length = 0;
  936. m_gdtr.address = nullptr;
  937. m_gdtr.limit = 0;
  938. write_raw_gdt_entry(0x0000, 0x00000000, 0x00000000);
  939. #if ARCH(I386)
  940. write_raw_gdt_entry(GDT_SELECTOR_CODE0, 0x0000ffff, 0x00cf9a00); // code0
  941. write_raw_gdt_entry(GDT_SELECTOR_DATA0, 0x0000ffff, 0x00cf9200); // data0
  942. write_raw_gdt_entry(GDT_SELECTOR_CODE3, 0x0000ffff, 0x00cffa00); // code3
  943. write_raw_gdt_entry(GDT_SELECTOR_DATA3, 0x0000ffff, 0x00cff200); // data3
  944. #else
  945. write_raw_gdt_entry(GDT_SELECTOR_CODE0, 0x0000ffff, 0x00af9a00); // code0
  946. write_raw_gdt_entry(GDT_SELECTOR_CODE3, 0x0000ffff, 0x00affa00); // code3
  947. write_raw_gdt_entry(GDT_SELECTOR_DATA3, 0x0000ffff, 0x008ff200); // data3
  948. #endif
  949. #if ARCH(I386)
  950. Descriptor tls_descriptor {};
  951. tls_descriptor.low = tls_descriptor.high = 0;
  952. tls_descriptor.dpl = 3;
  953. tls_descriptor.segment_present = 1;
  954. tls_descriptor.granularity = 0;
  955. tls_descriptor.operation_size64 = 0;
  956. tls_descriptor.operation_size32 = 1;
  957. tls_descriptor.descriptor_type = 1;
  958. tls_descriptor.type = 2;
  959. write_gdt_entry(GDT_SELECTOR_TLS, tls_descriptor); // tls3
  960. Descriptor gs_descriptor {};
  961. gs_descriptor.set_base(VirtualAddress { this });
  962. gs_descriptor.set_limit(sizeof(Processor) - 1);
  963. gs_descriptor.dpl = 0;
  964. gs_descriptor.segment_present = 1;
  965. gs_descriptor.granularity = 0;
  966. gs_descriptor.operation_size64 = 0;
  967. gs_descriptor.operation_size32 = 1;
  968. gs_descriptor.descriptor_type = 1;
  969. gs_descriptor.type = 2;
  970. write_gdt_entry(GDT_SELECTOR_PROC, gs_descriptor); // gs0
  971. #endif
  972. Descriptor tss_descriptor {};
  973. tss_descriptor.set_base(VirtualAddress { (size_t)&m_tss & 0xffffffff });
  974. tss_descriptor.set_limit(sizeof(TSS) - 1);
  975. tss_descriptor.dpl = 0;
  976. tss_descriptor.segment_present = 1;
  977. tss_descriptor.granularity = 0;
  978. tss_descriptor.operation_size64 = 0;
  979. tss_descriptor.operation_size32 = 1;
  980. tss_descriptor.descriptor_type = 0;
  981. tss_descriptor.type = 9;
  982. write_gdt_entry(GDT_SELECTOR_TSS, tss_descriptor); // tss
  983. #if ARCH(X86_64)
  984. Descriptor tss_descriptor_part2 {};
  985. tss_descriptor_part2.low = (size_t)&m_tss >> 32;
  986. write_gdt_entry(GDT_SELECTOR_TSS_PART2, tss_descriptor_part2);
  987. #endif
  988. flush_gdt();
  989. load_task_register(GDT_SELECTOR_TSS);
  990. #if ARCH(X86_64)
  991. MSR gs_base(MSR_GS_BASE);
  992. gs_base.set((u64)this);
  993. #else
  994. asm volatile(
  995. "mov %%ax, %%ds\n"
  996. "mov %%ax, %%es\n"
  997. "mov %%ax, %%fs\n"
  998. "mov %%ax, %%ss\n" ::"a"(GDT_SELECTOR_DATA0)
  999. : "memory");
  1000. set_gs(GDT_SELECTOR_PROC);
  1001. #endif
  1002. #if ARCH(I386)
  1003. // Make sure CS points to the kernel code descriptor.
  1004. // clang-format off
  1005. asm volatile(
  1006. "ljmpl $" __STRINGIFY(GDT_SELECTOR_CODE0) ", $sanity\n"
  1007. "sanity:\n");
  1008. // clang-format on
  1009. #endif
  1010. }
  1011. extern "C" void context_first_init([[maybe_unused]] Thread* from_thread, [[maybe_unused]] Thread* to_thread, [[maybe_unused]] TrapFrame* trap)
  1012. {
  1013. VERIFY(!are_interrupts_enabled());
  1014. VERIFY(is_kernel_mode());
  1015. dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context <-- from {} {} to {} {} (context_first_init)", VirtualAddress(from_thread), *from_thread, VirtualAddress(to_thread), *to_thread);
  1016. VERIFY(to_thread == Thread::current());
  1017. Scheduler::enter_current(*from_thread, true);
  1018. // Since we got here and don't have Scheduler::context_switch in the
  1019. // call stack (because this is the first time we switched into this
  1020. // context), we need to notify the scheduler so that it can release
  1021. // the scheduler lock. We don't want to enable interrupts at this point
  1022. // as we're still in the middle of a context switch. Doing so could
  1023. // trigger a context switch within a context switch, leading to a crash.
  1024. FlatPtr flags;
  1025. #if ARCH(I386)
  1026. flags = trap->regs->eflags;
  1027. #else
  1028. flags = trap->regs->rflags;
  1029. #endif
  1030. Scheduler::leave_on_first_switch(flags & ~0x200);
  1031. }
  1032. extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread)
  1033. {
  1034. VERIFY(from_thread == to_thread || from_thread->state() != Thread::Running);
  1035. VERIFY(to_thread->state() == Thread::Running);
  1036. bool has_fxsr = Processor::current().has_feature(CPUFeature::FXSR);
  1037. Processor::set_current_thread(*to_thread);
  1038. auto& from_regs = from_thread->regs();
  1039. auto& to_regs = to_thread->regs();
  1040. if (has_fxsr)
  1041. asm volatile("fxsave %0"
  1042. : "=m"(from_thread->fpu_state()));
  1043. else
  1044. asm volatile("fnsave %0"
  1045. : "=m"(from_thread->fpu_state()));
  1046. #if ARCH(I386)
  1047. from_regs.fs = get_fs();
  1048. from_regs.gs = get_gs();
  1049. set_fs(to_regs.fs);
  1050. set_gs(to_regs.gs);
  1051. #endif
  1052. if (from_thread->process().is_traced())
  1053. read_debug_registers_into(from_thread->debug_register_state());
  1054. if (to_thread->process().is_traced()) {
  1055. write_debug_registers_from(to_thread->debug_register_state());
  1056. } else {
  1057. clear_debug_registers();
  1058. }
  1059. auto& processor = Processor::current();
  1060. #if ARCH(I386)
  1061. auto& tls_descriptor = processor.get_gdt_entry(GDT_SELECTOR_TLS);
  1062. tls_descriptor.set_base(to_thread->thread_specific_data());
  1063. tls_descriptor.set_limit(to_thread->thread_specific_region_size());
  1064. #endif
  1065. if (from_regs.cr3 != to_regs.cr3)
  1066. write_cr3(to_regs.cr3);
  1067. to_thread->set_cpu(processor.get_id());
  1068. processor.restore_in_critical(to_thread->saved_critical());
  1069. if (has_fxsr)
  1070. asm volatile("fxrstor %0" ::"m"(to_thread->fpu_state()));
  1071. else
  1072. asm volatile("frstor %0" ::"m"(to_thread->fpu_state()));
  1073. // TODO: ioperm?
  1074. }
  1075. extern "C" FlatPtr do_init_context(Thread* thread, u32 flags)
  1076. {
  1077. VERIFY_INTERRUPTS_DISABLED();
  1078. #if ARCH(I386)
  1079. thread->regs().eflags = flags;
  1080. #else
  1081. thread->regs().rflags = flags;
  1082. #endif
  1083. return Processor::current().init_context(*thread, true);
  1084. }
  1085. void Processor::assume_context(Thread& thread, FlatPtr flags)
  1086. {
  1087. dbgln_if(CONTEXT_SWITCH_DEBUG, "Assume context for thread {} {}", VirtualAddress(&thread), thread);
  1088. VERIFY_INTERRUPTS_DISABLED();
  1089. Scheduler::prepare_after_exec();
  1090. // in_critical() should be 2 here. The critical section in Process::exec
  1091. // and then the scheduler lock
  1092. VERIFY(Processor::current().in_critical() == 2);
  1093. do_assume_context(&thread, flags);
  1094. VERIFY_NOT_REACHED();
  1095. }
  1096. }