Processor.cpp 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Format.h>
  7. #include <AK/StdLibExtras.h>
  8. #include <AK/String.h>
  9. #include <AK/Types.h>
  10. #include <Kernel/Interrupts/APIC.h>
  11. #include <Kernel/Memory/ScopedAddressSpaceSwitcher.h>
  12. #include <Kernel/Process.h>
  13. #include <Kernel/Sections.h>
  14. #include <Kernel/StdLib.h>
  15. #include <Kernel/Thread.h>
  16. #include <Kernel/Arch/x86/CPUID.h>
  17. #include <Kernel/Arch/x86/Interrupts.h>
  18. #include <Kernel/Arch/x86/MSR.h>
  19. #include <Kernel/Arch/x86/Processor.h>
  20. #include <Kernel/Arch/x86/ProcessorInfo.h>
  21. #include <Kernel/Arch/x86/SafeMem.h>
  22. #include <Kernel/Arch/x86/ScopedCritical.h>
  23. #include <Kernel/Arch/x86/TrapFrame.h>
  24. namespace Kernel {
  25. READONLY_AFTER_INIT FPUState Processor::s_clean_fpu_state;
  26. READONLY_AFTER_INIT static ProcessorContainer s_processors {};
  27. READONLY_AFTER_INIT Atomic<u32> Processor::g_total_processors;
  28. READONLY_AFTER_INIT static volatile bool s_smp_enabled;
  29. static Atomic<ProcessorMessage*> s_message_pool;
  30. Atomic<u32> Processor::s_idle_cpu_mask { 0 };
  31. // The compiler can't see the calls to these functions inside assembly.
  32. // Declare them, to avoid dead code warnings.
  33. extern "C" void context_first_init(Thread* from_thread, Thread* to_thread, TrapFrame* trap) __attribute__((used));
  34. extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread) __attribute__((used));
  35. extern "C" FlatPtr do_init_context(Thread* thread, u32 flags) __attribute__((used));
  36. bool Processor::is_smp_enabled()
  37. {
  38. return s_smp_enabled;
  39. }
  40. UNMAP_AFTER_INIT static void sse_init()
  41. {
  42. write_cr0((read_cr0() & 0xfffffffbu) | 0x2);
  43. write_cr4(read_cr4() | 0x600);
  44. }
  45. void exit_kernel_thread(void)
  46. {
  47. Thread::current()->exit();
  48. }
  49. UNMAP_AFTER_INIT void Processor::cpu_detect()
  50. {
  51. // NOTE: This is called during Processor::early_initialize, we cannot
  52. // safely log at this point because we don't have kmalloc
  53. // initialized yet!
  54. auto set_feature =
  55. [&](CPUFeature f) {
  56. m_features = static_cast<CPUFeature>(static_cast<u32>(m_features) | static_cast<u32>(f));
  57. };
  58. m_features = static_cast<CPUFeature>(0);
  59. CPUID processor_info(0x1);
  60. if (processor_info.edx() & (1 << 4))
  61. set_feature(CPUFeature::TSC);
  62. if (processor_info.edx() & (1 << 6))
  63. set_feature(CPUFeature::PAE);
  64. if (processor_info.edx() & (1 << 13))
  65. set_feature(CPUFeature::PGE);
  66. if (processor_info.edx() & (1 << 23))
  67. set_feature(CPUFeature::MMX);
  68. if (processor_info.edx() & (1 << 24))
  69. set_feature(CPUFeature::FXSR);
  70. if (processor_info.edx() & (1 << 25))
  71. set_feature(CPUFeature::SSE);
  72. if (processor_info.edx() & (1 << 26))
  73. set_feature(CPUFeature::SSE2);
  74. if (processor_info.ecx() & (1 << 0))
  75. set_feature(CPUFeature::SSE3);
  76. if (processor_info.ecx() & (1 << 9))
  77. set_feature(CPUFeature::SSSE3);
  78. if (processor_info.ecx() & (1 << 19))
  79. set_feature(CPUFeature::SSE4_1);
  80. if (processor_info.ecx() & (1 << 20))
  81. set_feature(CPUFeature::SSE4_2);
  82. if (processor_info.ecx() & (1 << 26))
  83. set_feature(CPUFeature::XSAVE);
  84. if (processor_info.ecx() & (1 << 28))
  85. set_feature(CPUFeature::AVX);
  86. if (processor_info.ecx() & (1 << 30))
  87. set_feature(CPUFeature::RDRAND);
  88. if (processor_info.ecx() & (1u << 31))
  89. set_feature(CPUFeature::HYPERVISOR);
  90. if (processor_info.edx() & (1 << 11)) {
  91. u32 stepping = processor_info.eax() & 0xf;
  92. u32 model = (processor_info.eax() >> 4) & 0xf;
  93. u32 family = (processor_info.eax() >> 8) & 0xf;
  94. if (!(family == 6 && model < 3 && stepping < 3))
  95. set_feature(CPUFeature::SEP);
  96. if ((family == 6 && model >= 3) || (family == 0xf && model >= 0xe))
  97. set_feature(CPUFeature::CONSTANT_TSC);
  98. }
  99. u32 max_extended_leaf = CPUID(0x80000000).eax();
  100. if (max_extended_leaf >= 0x80000001) {
  101. CPUID extended_processor_info(0x80000001);
  102. if (extended_processor_info.edx() & (1 << 20))
  103. set_feature(CPUFeature::NX);
  104. if (extended_processor_info.edx() & (1 << 27))
  105. set_feature(CPUFeature::RDTSCP);
  106. if (extended_processor_info.edx() & (1 << 29))
  107. set_feature(CPUFeature::LM);
  108. if (extended_processor_info.edx() & (1 << 11)) {
  109. // Only available in 64 bit mode
  110. set_feature(CPUFeature::SYSCALL);
  111. }
  112. }
  113. if (max_extended_leaf >= 0x80000007) {
  114. CPUID cpuid(0x80000007);
  115. if (cpuid.edx() & (1 << 8)) {
  116. set_feature(CPUFeature::CONSTANT_TSC);
  117. set_feature(CPUFeature::NONSTOP_TSC);
  118. }
  119. }
  120. if (max_extended_leaf >= 0x80000008) {
  121. // CPUID.80000008H:EAX[7:0] reports the physical-address width supported by the processor.
  122. CPUID cpuid(0x80000008);
  123. m_physical_address_bit_width = cpuid.eax() & 0xff;
  124. // CPUID.80000008H:EAX[15:8] reports the linear-address width supported by the processor.
  125. m_virtual_address_bit_width = (cpuid.eax() >> 8) & 0xff;
  126. } else {
  127. // For processors that do not support CPUID function 80000008H, the width is generally 36 if CPUID.01H:EDX.PAE [bit 6] = 1 and 32 otherwise.
  128. m_physical_address_bit_width = has_feature(CPUFeature::PAE) ? 36 : 32;
  129. // Processors that do not support CPUID function 80000008H, support a linear-address width of 32.
  130. m_virtual_address_bit_width = 32;
  131. }
  132. CPUID extended_features(0x7);
  133. if (extended_features.ebx() & (1 << 20))
  134. set_feature(CPUFeature::SMAP);
  135. if (extended_features.ebx() & (1 << 7))
  136. set_feature(CPUFeature::SMEP);
  137. if (extended_features.ecx() & (1 << 2))
  138. set_feature(CPUFeature::UMIP);
  139. if (extended_features.ebx() & (1 << 18))
  140. set_feature(CPUFeature::RDSEED);
  141. }
  142. UNMAP_AFTER_INIT void Processor::cpu_setup()
  143. {
  144. // NOTE: This is called during Processor::early_initialize, we cannot
  145. // safely log at this point because we don't have kmalloc
  146. // initialized yet!
  147. cpu_detect();
  148. if (has_feature(CPUFeature::SSE)) {
  149. // enter_thread_context() assumes that if a x86 CPU supports SSE then it also supports FXSR.
  150. // SSE support without FXSR is an extremely unlikely scenario, so let's be pragmatic about it.
  151. VERIFY(has_feature(CPUFeature::FXSR));
  152. sse_init();
  153. }
  154. write_cr0(read_cr0() | 0x00010000);
  155. if (has_feature(CPUFeature::PGE)) {
  156. // Turn on CR4.PGE so the CPU will respect the G bit in page tables.
  157. write_cr4(read_cr4() | 0x80);
  158. }
  159. if (has_feature(CPUFeature::NX)) {
  160. // Turn on IA32_EFER.NXE
  161. MSR ia32_efer(MSR_IA32_EFER);
  162. ia32_efer.set(ia32_efer.get() | 0x800);
  163. }
  164. if (has_feature(CPUFeature::SMEP)) {
  165. // Turn on CR4.SMEP
  166. write_cr4(read_cr4() | 0x100000);
  167. }
  168. if (has_feature(CPUFeature::SMAP)) {
  169. // Turn on CR4.SMAP
  170. write_cr4(read_cr4() | 0x200000);
  171. }
  172. if (has_feature(CPUFeature::UMIP)) {
  173. write_cr4(read_cr4() | 0x800);
  174. }
  175. if (has_feature(CPUFeature::TSC)) {
  176. write_cr4(read_cr4() | 0x4);
  177. }
  178. if (has_feature(CPUFeature::XSAVE)) {
  179. // Turn on CR4.OSXSAVE
  180. write_cr4(read_cr4() | 0x40000);
  181. // According to the Intel manual: "After reset, all bits (except bit 0) in XCR0 are cleared to zero; XCR0[0] is set to 1."
  182. // Sadly we can't trust this, for example VirtualBox starts with bits 0-4 set, so let's do it ourselves.
  183. write_xcr0(0x1);
  184. if (has_feature(CPUFeature::AVX)) {
  185. // Turn on SSE, AVX and x87 flags
  186. write_xcr0(read_xcr0() | 0x7);
  187. }
  188. }
  189. }
  190. String Processor::features_string() const
  191. {
  192. StringBuilder builder;
  193. auto feature_to_str =
  194. [](CPUFeature f) -> const char* {
  195. switch (f) {
  196. case CPUFeature::NX:
  197. return "nx";
  198. case CPUFeature::PAE:
  199. return "pae";
  200. case CPUFeature::PGE:
  201. return "pge";
  202. case CPUFeature::RDRAND:
  203. return "rdrand";
  204. case CPUFeature::RDSEED:
  205. return "rdseed";
  206. case CPUFeature::SMAP:
  207. return "smap";
  208. case CPUFeature::SMEP:
  209. return "smep";
  210. case CPUFeature::SSE:
  211. return "sse";
  212. case CPUFeature::TSC:
  213. return "tsc";
  214. case CPUFeature::RDTSCP:
  215. return "rdtscp";
  216. case CPUFeature::CONSTANT_TSC:
  217. return "constant_tsc";
  218. case CPUFeature::NONSTOP_TSC:
  219. return "nonstop_tsc";
  220. case CPUFeature::UMIP:
  221. return "umip";
  222. case CPUFeature::SEP:
  223. return "sep";
  224. case CPUFeature::SYSCALL:
  225. return "syscall";
  226. case CPUFeature::MMX:
  227. return "mmx";
  228. case CPUFeature::FXSR:
  229. return "fxsr";
  230. case CPUFeature::SSE2:
  231. return "sse2";
  232. case CPUFeature::SSE3:
  233. return "sse3";
  234. case CPUFeature::SSSE3:
  235. return "ssse3";
  236. case CPUFeature::SSE4_1:
  237. return "sse4.1";
  238. case CPUFeature::SSE4_2:
  239. return "sse4.2";
  240. case CPUFeature::XSAVE:
  241. return "xsave";
  242. case CPUFeature::AVX:
  243. return "avx";
  244. case CPUFeature::LM:
  245. return "lm";
  246. case CPUFeature::HYPERVISOR:
  247. return "hypervisor";
  248. // no default statement here intentionally so that we get
  249. // a warning if a new feature is forgotten to be added here
  250. }
  251. // Shouldn't ever happen
  252. return "???";
  253. };
  254. bool first = true;
  255. for (u32 flag = 1; flag != 0; flag <<= 1) {
  256. if ((static_cast<u32>(m_features) & flag) != 0) {
  257. if (first)
  258. first = false;
  259. else
  260. builder.append(' ');
  261. auto str = feature_to_str(static_cast<CPUFeature>(flag));
  262. builder.append(str, strlen(str));
  263. }
  264. }
  265. return builder.build();
  266. }
  267. UNMAP_AFTER_INIT void Processor::early_initialize(u32 cpu)
  268. {
  269. m_self = this;
  270. m_cpu = cpu;
  271. m_in_irq = 0;
  272. m_in_critical = 0;
  273. m_invoke_scheduler_async = false;
  274. m_scheduler_initialized = false;
  275. m_in_scheduler = true;
  276. m_message_queue = nullptr;
  277. m_idle_thread = nullptr;
  278. m_current_thread = nullptr;
  279. m_info = nullptr;
  280. m_halt_requested = false;
  281. if (cpu == 0) {
  282. s_smp_enabled = false;
  283. g_total_processors.store(1u, AK::MemoryOrder::memory_order_release);
  284. } else {
  285. g_total_processors.fetch_add(1u, AK::MemoryOrder::memory_order_acq_rel);
  286. }
  287. deferred_call_pool_init();
  288. cpu_setup();
  289. gdt_init();
  290. VERIFY(is_initialized()); // sanity check
  291. VERIFY(&current() == this); // sanity check
  292. }
  293. UNMAP_AFTER_INIT void Processor::initialize(u32 cpu)
  294. {
  295. VERIFY(m_self == this);
  296. VERIFY(&current() == this); // sanity check
  297. dmesgln("CPU[{}]: Supported features: {}", current_id(), features_string());
  298. if (!has_feature(CPUFeature::RDRAND))
  299. dmesgln("CPU[{}]: No RDRAND support detected, randomness will be poor", current_id());
  300. dmesgln("CPU[{}]: Physical address bit width: {}", current_id(), m_physical_address_bit_width);
  301. dmesgln("CPU[{}]: Virtual address bit width: {}", current_id(), m_virtual_address_bit_width);
  302. if (cpu == 0)
  303. idt_init();
  304. else
  305. flush_idt();
  306. if (cpu == 0) {
  307. VERIFY((FlatPtr(&s_clean_fpu_state) & 0xF) == 0);
  308. asm volatile("fninit");
  309. if (has_feature(CPUFeature::FXSR))
  310. asm volatile("fxsave %0"
  311. : "=m"(s_clean_fpu_state));
  312. else
  313. asm volatile("fnsave %0"
  314. : "=m"(s_clean_fpu_state));
  315. if (has_feature(CPUFeature::HYPERVISOR))
  316. detect_hypervisor();
  317. }
  318. m_info = new ProcessorInfo(*this);
  319. {
  320. // We need to prevent races between APs starting up at the same time
  321. VERIFY(cpu < s_processors.size());
  322. s_processors[cpu] = this;
  323. }
  324. }
  325. UNMAP_AFTER_INIT void Processor::detect_hypervisor()
  326. {
  327. CPUID hypervisor_leaf_range(0x40000000);
  328. // Get signature of hypervisor.
  329. alignas(sizeof(u32)) char hypervisor_signature_buffer[13];
  330. *reinterpret_cast<u32*>(hypervisor_signature_buffer) = hypervisor_leaf_range.ebx();
  331. *reinterpret_cast<u32*>(hypervisor_signature_buffer + 4) = hypervisor_leaf_range.ecx();
  332. *reinterpret_cast<u32*>(hypervisor_signature_buffer + 8) = hypervisor_leaf_range.edx();
  333. hypervisor_signature_buffer[12] = '\0';
  334. StringView hypervisor_signature(hypervisor_signature_buffer);
  335. dmesgln("CPU[{}]: CPUID hypervisor signature '{}' ({:#x} {:#x} {:#x}), max leaf {:#x}", current_id(), hypervisor_signature, hypervisor_leaf_range.ebx(), hypervisor_leaf_range.ecx(), hypervisor_leaf_range.edx(), hypervisor_leaf_range.eax());
  336. if (hypervisor_signature == "Microsoft Hv"sv)
  337. detect_hypervisor_hyperv(hypervisor_leaf_range);
  338. }
  339. UNMAP_AFTER_INIT void Processor::detect_hypervisor_hyperv(CPUID const& hypervisor_leaf_range)
  340. {
  341. if (hypervisor_leaf_range.eax() < 0x40000001)
  342. return;
  343. CPUID hypervisor_interface(0x40000001);
  344. // Get signature of hypervisor interface.
  345. alignas(sizeof(u32)) char interface_signature_buffer[5];
  346. *reinterpret_cast<u32*>(interface_signature_buffer) = hypervisor_interface.eax();
  347. interface_signature_buffer[4] = '\0';
  348. StringView hyperv_interface_signature(interface_signature_buffer);
  349. dmesgln("CPU[{}]: Hyper-V interface signature '{}' ({:#x})", current_id(), hyperv_interface_signature, hypervisor_interface.eax());
  350. if (hypervisor_leaf_range.eax() < 0x40000001)
  351. return;
  352. CPUID hypervisor_sysid(0x40000002);
  353. dmesgln("CPU[{}]: Hyper-V system identity {}.{}, build number {}", current_id(), hypervisor_sysid.ebx() >> 16, hypervisor_sysid.ebx() & 0xFFFF, hypervisor_sysid.eax());
  354. if (hypervisor_leaf_range.eax() < 0x40000005 || hyperv_interface_signature != "Hv#1"sv)
  355. return;
  356. dmesgln("CPU[{}]: Hyper-V hypervisor detected", current_id());
  357. // TODO: Actually do something with Hyper-V.
  358. }
  359. void Processor::write_raw_gdt_entry(u16 selector, u32 low, u32 high)
  360. {
  361. u16 i = (selector & 0xfffc) >> 3;
  362. u32 prev_gdt_length = m_gdt_length;
  363. if (i >= m_gdt_length) {
  364. m_gdt_length = i + 1;
  365. VERIFY(m_gdt_length <= sizeof(m_gdt) / sizeof(m_gdt[0]));
  366. m_gdtr.limit = (m_gdt_length + 1) * 8 - 1;
  367. }
  368. m_gdt[i].low = low;
  369. m_gdt[i].high = high;
  370. // clear selectors we may have skipped
  371. while (i < prev_gdt_length) {
  372. m_gdt[i].low = 0;
  373. m_gdt[i].high = 0;
  374. i++;
  375. }
  376. }
  377. void Processor::write_gdt_entry(u16 selector, Descriptor& descriptor)
  378. {
  379. write_raw_gdt_entry(selector, descriptor.low, descriptor.high);
  380. }
  381. Descriptor& Processor::get_gdt_entry(u16 selector)
  382. {
  383. u16 i = (selector & 0xfffc) >> 3;
  384. return *(Descriptor*)(&m_gdt[i]);
  385. }
  386. void Processor::flush_gdt()
  387. {
  388. m_gdtr.address = m_gdt;
  389. m_gdtr.limit = (m_gdt_length * 8) - 1;
  390. asm volatile("lgdt %0" ::"m"(m_gdtr)
  391. : "memory");
  392. }
  393. const DescriptorTablePointer& Processor::get_gdtr()
  394. {
  395. return m_gdtr;
  396. }
  397. Vector<FlatPtr> Processor::capture_stack_trace(Thread& thread, size_t max_frames)
  398. {
  399. FlatPtr frame_ptr = 0, ip = 0;
  400. Vector<FlatPtr, 32> stack_trace;
  401. auto walk_stack = [&](FlatPtr stack_ptr) {
  402. static constexpr size_t max_stack_frames = 4096;
  403. stack_trace.append(ip);
  404. size_t count = 1;
  405. while (stack_ptr && stack_trace.size() < max_stack_frames) {
  406. FlatPtr retaddr;
  407. count++;
  408. if (max_frames != 0 && count > max_frames)
  409. break;
  410. if (Memory::is_user_range(VirtualAddress(stack_ptr), sizeof(FlatPtr) * 2)) {
  411. if (copy_from_user(&retaddr, &((FlatPtr*)stack_ptr)[1]).is_error() || !retaddr)
  412. break;
  413. stack_trace.append(retaddr);
  414. if (copy_from_user(&stack_ptr, (FlatPtr*)stack_ptr).is_error())
  415. break;
  416. } else {
  417. void* fault_at;
  418. if (!safe_memcpy(&retaddr, &((FlatPtr*)stack_ptr)[1], sizeof(FlatPtr), fault_at) || !retaddr)
  419. break;
  420. stack_trace.append(retaddr);
  421. if (!safe_memcpy(&stack_ptr, (FlatPtr*)stack_ptr, sizeof(FlatPtr), fault_at))
  422. break;
  423. }
  424. }
  425. };
  426. auto capture_current_thread = [&]() {
  427. frame_ptr = (FlatPtr)__builtin_frame_address(0);
  428. ip = (FlatPtr)__builtin_return_address(0);
  429. walk_stack(frame_ptr);
  430. };
  431. // Since the thread may be running on another processor, there
  432. // is a chance a context switch may happen while we're trying
  433. // to get it. It also won't be entirely accurate and merely
  434. // reflect the status at the last context switch.
  435. SpinlockLocker lock(g_scheduler_lock);
  436. if (&thread == Processor::current_thread()) {
  437. VERIFY(thread.state() == Thread::Running);
  438. // Leave the scheduler lock. If we trigger page faults we may
  439. // need to be preempted. Since this is our own thread it won't
  440. // cause any problems as the stack won't change below this frame.
  441. lock.unlock();
  442. capture_current_thread();
  443. } else if (thread.is_active()) {
  444. VERIFY(thread.cpu() != Processor::current_id());
  445. // If this is the case, the thread is currently running
  446. // on another processor. We can't trust the kernel stack as
  447. // it may be changing at any time. We need to probably send
  448. // an IPI to that processor, have it walk the stack and wait
  449. // until it returns the data back to us
  450. auto& proc = Processor::current();
  451. smp_unicast(
  452. thread.cpu(),
  453. [&]() {
  454. dbgln("CPU[{}] getting stack for cpu #{}", Processor::current_id(), proc.id());
  455. ScopedAddressSpaceSwitcher switcher(thread.process());
  456. VERIFY(&Processor::current() != &proc);
  457. VERIFY(&thread == Processor::current_thread());
  458. // NOTE: Because the other processor is still holding the
  459. // scheduler lock while waiting for this callback to finish,
  460. // the current thread on the target processor cannot change
  461. // TODO: What to do about page faults here? We might deadlock
  462. // because the other processor is still holding the
  463. // scheduler lock...
  464. capture_current_thread();
  465. },
  466. false);
  467. } else {
  468. switch (thread.state()) {
  469. case Thread::Running:
  470. VERIFY_NOT_REACHED(); // should have been handled above
  471. case Thread::Runnable:
  472. case Thread::Stopped:
  473. case Thread::Blocked:
  474. case Thread::Dying:
  475. case Thread::Dead: {
  476. // We need to retrieve ebp from what was last pushed to the kernel
  477. // stack. Before switching out of that thread, it switch_context
  478. // pushed the callee-saved registers, and the last of them happens
  479. // to be ebp.
  480. ScopedAddressSpaceSwitcher switcher(thread.process());
  481. auto& regs = thread.regs();
  482. auto* stack_top = reinterpret_cast<FlatPtr*>(regs.sp());
  483. if (Memory::is_user_range(VirtualAddress(stack_top), sizeof(FlatPtr))) {
  484. if (copy_from_user(&frame_ptr, &((FlatPtr*)stack_top)[0]).is_error())
  485. frame_ptr = 0;
  486. } else {
  487. void* fault_at;
  488. if (!safe_memcpy(&frame_ptr, &((FlatPtr*)stack_top)[0], sizeof(FlatPtr), fault_at))
  489. frame_ptr = 0;
  490. }
  491. ip = regs.ip();
  492. // TODO: We need to leave the scheduler lock here, but we also
  493. // need to prevent the target thread from being run while
  494. // we walk the stack
  495. lock.unlock();
  496. walk_stack(frame_ptr);
  497. break;
  498. }
  499. default:
  500. dbgln("Cannot capture stack trace for thread {} in state {}", thread, thread.state_string());
  501. break;
  502. }
  503. }
  504. return stack_trace;
  505. }
  506. ProcessorContainer& Processor::processors()
  507. {
  508. return s_processors;
  509. }
  510. Processor& Processor::by_id(u32 id)
  511. {
  512. return *s_processors[id];
  513. }
  514. void Processor::enter_trap(TrapFrame& trap, bool raise_irq)
  515. {
  516. VERIFY_INTERRUPTS_DISABLED();
  517. VERIFY(&Processor::current() == this);
  518. trap.prev_irq_level = m_in_irq;
  519. if (raise_irq)
  520. m_in_irq++;
  521. auto* current_thread = Processor::current_thread();
  522. if (current_thread) {
  523. auto& current_trap = current_thread->current_trap();
  524. trap.next_trap = current_trap;
  525. current_trap = &trap;
  526. // The cs register of this trap tells us where we will return back to
  527. auto new_previous_mode = ((trap.regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode;
  528. if (current_thread->set_previous_mode(new_previous_mode) && trap.prev_irq_level == 0) {
  529. current_thread->update_time_scheduled(Scheduler::current_time(), new_previous_mode == Thread::PreviousMode::KernelMode, false);
  530. }
  531. } else {
  532. trap.next_trap = nullptr;
  533. }
  534. }
  535. void Processor::exit_trap(TrapFrame& trap)
  536. {
  537. VERIFY_INTERRUPTS_DISABLED();
  538. VERIFY(&Processor::current() == this);
  539. // Temporarily enter a critical section. This is to prevent critical
  540. // sections entered and left within e.g. smp_process_pending_messages
  541. // to trigger a context switch while we're executing this function
  542. // See the comment at the end of the function why we don't use
  543. // ScopedCritical here.
  544. m_in_critical = m_in_critical + 1;
  545. VERIFY(m_in_irq >= trap.prev_irq_level);
  546. m_in_irq = trap.prev_irq_level;
  547. if (s_smp_enabled)
  548. smp_process_pending_messages();
  549. // Process the deferred call queue. Among other things, this ensures
  550. // that any pending thread unblocks happen before we enter the scheduler.
  551. deferred_call_execute_pending();
  552. auto* current_thread = Processor::current_thread();
  553. if (current_thread) {
  554. auto& current_trap = current_thread->current_trap();
  555. current_trap = trap.next_trap;
  556. Thread::PreviousMode new_previous_mode;
  557. if (current_trap) {
  558. VERIFY(current_trap->regs);
  559. // If we have another higher level trap then we probably returned
  560. // from an interrupt or irq handler. The cs register of the
  561. // new/higher level trap tells us what the mode prior to it was
  562. new_previous_mode = ((current_trap->regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode;
  563. } else {
  564. // If we don't have a higher level trap then we're back in user mode.
  565. // Which means that the previous mode prior to being back in user mode was kernel mode
  566. new_previous_mode = Thread::PreviousMode::KernelMode;
  567. }
  568. if (current_thread->set_previous_mode(new_previous_mode))
  569. current_thread->update_time_scheduled(Scheduler::current_time(), true, false);
  570. }
  571. VERIFY_INTERRUPTS_DISABLED();
  572. // Leave the critical section without actually enabling interrupts.
  573. // We don't want context switches to happen until we're explicitly
  574. // triggering a switch in check_invoke_scheduler.
  575. m_in_critical = m_in_critical - 1;
  576. if (!m_in_irq && !m_in_critical)
  577. check_invoke_scheduler();
  578. }
  579. void Processor::check_invoke_scheduler()
  580. {
  581. InterruptDisabler disabler;
  582. VERIFY(!m_in_irq);
  583. VERIFY(!m_in_critical);
  584. VERIFY(&Processor::current() == this);
  585. if (m_invoke_scheduler_async && m_scheduler_initialized) {
  586. m_invoke_scheduler_async = false;
  587. Scheduler::invoke_async();
  588. }
  589. }
  590. void Processor::flush_tlb_local(VirtualAddress vaddr, size_t page_count)
  591. {
  592. auto ptr = vaddr.as_ptr();
  593. while (page_count > 0) {
  594. // clang-format off
  595. asm volatile("invlpg %0"
  596. :
  597. : "m"(*ptr)
  598. : "memory");
  599. // clang-format on
  600. ptr += PAGE_SIZE;
  601. page_count--;
  602. }
  603. }
  604. void Processor::flush_tlb(Memory::PageDirectory const* page_directory, VirtualAddress vaddr, size_t page_count)
  605. {
  606. if (s_smp_enabled && (!Memory::is_user_address(vaddr) || Process::current().thread_count() > 1))
  607. smp_broadcast_flush_tlb(page_directory, vaddr, page_count);
  608. else
  609. flush_tlb_local(vaddr, page_count);
  610. }
  611. void Processor::smp_return_to_pool(ProcessorMessage& msg)
  612. {
  613. ProcessorMessage* next = nullptr;
  614. for (;;) {
  615. msg.next = next;
  616. if (s_message_pool.compare_exchange_strong(next, &msg, AK::MemoryOrder::memory_order_acq_rel))
  617. break;
  618. Processor::pause();
  619. }
  620. }
  621. ProcessorMessage& Processor::smp_get_from_pool()
  622. {
  623. ProcessorMessage* msg;
  624. // The assumption is that messages are never removed from the pool!
  625. for (;;) {
  626. msg = s_message_pool.load(AK::MemoryOrder::memory_order_consume);
  627. if (!msg) {
  628. if (!Processor::current().smp_process_pending_messages()) {
  629. Processor::pause();
  630. }
  631. continue;
  632. }
  633. // If another processor were to use this message in the meanwhile,
  634. // "msg" is still valid (because it never gets freed). We'd detect
  635. // this because the expected value "msg" and pool would
  636. // no longer match, and the compare_exchange will fail. But accessing
  637. // "msg->next" is always safe here.
  638. if (s_message_pool.compare_exchange_strong(msg, msg->next, AK::MemoryOrder::memory_order_acq_rel)) {
  639. // We successfully "popped" this available message
  640. break;
  641. }
  642. }
  643. VERIFY(msg != nullptr);
  644. return *msg;
  645. }
  646. u32 Processor::smp_wake_n_idle_processors(u32 wake_count)
  647. {
  648. VERIFY_INTERRUPTS_DISABLED();
  649. VERIFY(wake_count > 0);
  650. if (!s_smp_enabled)
  651. return 0;
  652. // Wake at most N - 1 processors
  653. if (wake_count >= Processor::count()) {
  654. wake_count = Processor::count() - 1;
  655. VERIFY(wake_count > 0);
  656. }
  657. u32 current_id = Processor::current_id();
  658. u32 did_wake_count = 0;
  659. auto& apic = APIC::the();
  660. while (did_wake_count < wake_count) {
  661. // Try to get a set of idle CPUs and flip them to busy
  662. u32 idle_mask = s_idle_cpu_mask.load(AK::MemoryOrder::memory_order_relaxed) & ~(1u << current_id);
  663. u32 idle_count = __builtin_popcountl(idle_mask);
  664. if (idle_count == 0)
  665. break; // No (more) idle processor available
  666. u32 found_mask = 0;
  667. for (u32 i = 0; i < idle_count; i++) {
  668. u32 cpu = __builtin_ffsl(idle_mask) - 1;
  669. idle_mask &= ~(1u << cpu);
  670. found_mask |= 1u << cpu;
  671. }
  672. idle_mask = s_idle_cpu_mask.fetch_and(~found_mask, AK::MemoryOrder::memory_order_acq_rel) & found_mask;
  673. if (idle_mask == 0)
  674. continue; // All of them were flipped to busy, try again
  675. idle_count = __builtin_popcountl(idle_mask);
  676. for (u32 i = 0; i < idle_count; i++) {
  677. u32 cpu = __builtin_ffsl(idle_mask) - 1;
  678. idle_mask &= ~(1u << cpu);
  679. // Send an IPI to that CPU to wake it up. There is a possibility
  680. // someone else woke it up as well, or that it woke up due to
  681. // a timer interrupt. But we tried hard to avoid this...
  682. apic.send_ipi(cpu);
  683. did_wake_count++;
  684. }
  685. }
  686. return did_wake_count;
  687. }
  688. UNMAP_AFTER_INIT void Processor::smp_enable()
  689. {
  690. size_t msg_pool_size = Processor::count() * 100u;
  691. size_t msg_entries_cnt = Processor::count();
  692. auto msgs = new ProcessorMessage[msg_pool_size];
  693. auto msg_entries = new ProcessorMessageEntry[msg_pool_size * msg_entries_cnt];
  694. size_t msg_entry_i = 0;
  695. for (size_t i = 0; i < msg_pool_size; i++, msg_entry_i += msg_entries_cnt) {
  696. auto& msg = msgs[i];
  697. msg.next = i < msg_pool_size - 1 ? &msgs[i + 1] : nullptr;
  698. msg.per_proc_entries = &msg_entries[msg_entry_i];
  699. for (size_t k = 0; k < msg_entries_cnt; k++)
  700. msg_entries[msg_entry_i + k].msg = &msg;
  701. }
  702. s_message_pool.store(&msgs[0], AK::MemoryOrder::memory_order_release);
  703. // Start sending IPI messages
  704. s_smp_enabled = true;
  705. }
  706. void Processor::smp_cleanup_message(ProcessorMessage& msg)
  707. {
  708. switch (msg.type) {
  709. case ProcessorMessage::Callback:
  710. msg.callback_value().~Function();
  711. break;
  712. default:
  713. break;
  714. }
  715. }
  716. bool Processor::smp_process_pending_messages()
  717. {
  718. VERIFY(s_smp_enabled);
  719. bool did_process = false;
  720. enter_critical();
  721. if (auto pending_msgs = m_message_queue.exchange(nullptr, AK::MemoryOrder::memory_order_acq_rel)) {
  722. // We pulled the stack of pending messages in LIFO order, so we need to reverse the list first
  723. auto reverse_list =
  724. [](ProcessorMessageEntry* list) -> ProcessorMessageEntry* {
  725. ProcessorMessageEntry* rev_list = nullptr;
  726. while (list) {
  727. auto next = list->next;
  728. list->next = rev_list;
  729. rev_list = list;
  730. list = next;
  731. }
  732. return rev_list;
  733. };
  734. pending_msgs = reverse_list(pending_msgs);
  735. // now process in the right order
  736. ProcessorMessageEntry* next_msg;
  737. for (auto cur_msg = pending_msgs; cur_msg; cur_msg = next_msg) {
  738. next_msg = cur_msg->next;
  739. auto msg = cur_msg->msg;
  740. dbgln_if(SMP_DEBUG, "SMP[{}]: Processing message {}", current_id(), VirtualAddress(msg));
  741. switch (msg->type) {
  742. case ProcessorMessage::Callback:
  743. msg->invoke_callback();
  744. break;
  745. case ProcessorMessage::FlushTlb:
  746. if (Memory::is_user_address(VirtualAddress(msg->flush_tlb.ptr))) {
  747. // We assume that we don't cross into kernel land!
  748. VERIFY(Memory::is_user_range(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count * PAGE_SIZE));
  749. if (read_cr3() != msg->flush_tlb.page_directory->cr3()) {
  750. // This processor isn't using this page directory right now, we can ignore this request
  751. dbgln_if(SMP_DEBUG, "SMP[{}]: No need to flush {} pages at {}", current_id(), msg->flush_tlb.page_count, VirtualAddress(msg->flush_tlb.ptr));
  752. break;
  753. }
  754. }
  755. flush_tlb_local(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count);
  756. break;
  757. }
  758. bool is_async = msg->async; // Need to cache this value *before* dropping the ref count!
  759. auto prev_refs = msg->refs.fetch_sub(1u, AK::MemoryOrder::memory_order_acq_rel);
  760. VERIFY(prev_refs != 0);
  761. if (prev_refs == 1) {
  762. // All processors handled this. If this is an async message,
  763. // we need to clean it up and return it to the pool
  764. if (is_async) {
  765. smp_cleanup_message(*msg);
  766. smp_return_to_pool(*msg);
  767. }
  768. }
  769. if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed))
  770. halt_this();
  771. }
  772. did_process = true;
  773. } else if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed)) {
  774. halt_this();
  775. }
  776. leave_critical();
  777. return did_process;
  778. }
  779. bool Processor::smp_enqueue_message(ProcessorMessage& msg)
  780. {
  781. // Note that it's quite possible that the other processor may pop
  782. // the queue at any given time. We rely on the fact that the messages
  783. // are pooled and never get freed!
  784. auto& msg_entry = msg.per_proc_entries[id()];
  785. VERIFY(msg_entry.msg == &msg);
  786. ProcessorMessageEntry* next = nullptr;
  787. for (;;) {
  788. msg_entry.next = next;
  789. if (m_message_queue.compare_exchange_strong(next, &msg_entry, AK::MemoryOrder::memory_order_acq_rel))
  790. break;
  791. Processor::pause();
  792. }
  793. // If the enqueued message was the only message in the queue when posted,
  794. // we return true. This is used by callers when deciding whether to generate an IPI.
  795. return next == nullptr;
  796. }
  797. void Processor::smp_broadcast_message(ProcessorMessage& msg)
  798. {
  799. auto& current_processor = Processor::current();
  800. dbgln_if(SMP_DEBUG, "SMP[{}]: Broadcast message {} to cpus: {} processor: {}", current_processor.id(), VirtualAddress(&msg), count(), VirtualAddress(&current_processor));
  801. msg.refs.store(count() - 1, AK::MemoryOrder::memory_order_release);
  802. VERIFY(msg.refs > 0);
  803. bool need_broadcast = false;
  804. for_each(
  805. [&](Processor& proc) {
  806. if (&proc != &current_processor) {
  807. if (proc.smp_enqueue_message(msg))
  808. need_broadcast = true;
  809. }
  810. });
  811. // Now trigger an IPI on all other APs (unless all targets already had messages queued)
  812. if (need_broadcast)
  813. APIC::the().broadcast_ipi();
  814. }
  815. void Processor::smp_broadcast_wait_sync(ProcessorMessage& msg)
  816. {
  817. auto& cur_proc = Processor::current();
  818. VERIFY(!msg.async);
  819. // If synchronous then we must cleanup and return the message back
  820. // to the pool. Otherwise, the last processor to complete it will return it
  821. while (msg.refs.load(AK::MemoryOrder::memory_order_consume) != 0) {
  822. Processor::pause();
  823. // We need to process any messages that may have been sent to
  824. // us while we're waiting. This also checks if another processor
  825. // may have requested us to halt.
  826. cur_proc.smp_process_pending_messages();
  827. }
  828. smp_cleanup_message(msg);
  829. smp_return_to_pool(msg);
  830. }
  831. void Processor::smp_unicast_message(u32 cpu, ProcessorMessage& msg, bool async)
  832. {
  833. auto& current_processor = Processor::current();
  834. VERIFY(cpu != current_processor.id());
  835. auto& target_processor = processors()[cpu];
  836. msg.async = async;
  837. dbgln_if(SMP_DEBUG, "SMP[{}]: Send message {} to cpu #{} processor: {}", current_processor.id(), VirtualAddress(&msg), cpu, VirtualAddress(&target_processor));
  838. msg.refs.store(1u, AK::MemoryOrder::memory_order_release);
  839. if (target_processor->smp_enqueue_message(msg)) {
  840. APIC::the().send_ipi(cpu);
  841. }
  842. if (!async) {
  843. // If synchronous then we must cleanup and return the message back
  844. // to the pool. Otherwise, the last processor to complete it will return it
  845. while (msg.refs.load(AK::MemoryOrder::memory_order_consume) != 0) {
  846. Processor::pause();
  847. // We need to process any messages that may have been sent to
  848. // us while we're waiting. This also checks if another processor
  849. // may have requested us to halt.
  850. current_processor.smp_process_pending_messages();
  851. }
  852. smp_cleanup_message(msg);
  853. smp_return_to_pool(msg);
  854. }
  855. }
  856. void Processor::smp_unicast(u32 cpu, Function<void()> callback, bool async)
  857. {
  858. auto& msg = smp_get_from_pool();
  859. msg.type = ProcessorMessage::Callback;
  860. new (msg.callback_storage) ProcessorMessage::CallbackFunction(move(callback));
  861. smp_unicast_message(cpu, msg, async);
  862. }
  863. void Processor::smp_broadcast_flush_tlb(Memory::PageDirectory const* page_directory, VirtualAddress vaddr, size_t page_count)
  864. {
  865. auto& msg = smp_get_from_pool();
  866. msg.async = false;
  867. msg.type = ProcessorMessage::FlushTlb;
  868. msg.flush_tlb.page_directory = page_directory;
  869. msg.flush_tlb.ptr = vaddr.as_ptr();
  870. msg.flush_tlb.page_count = page_count;
  871. smp_broadcast_message(msg);
  872. // While the other processors handle this request, we'll flush ours
  873. flush_tlb_local(vaddr, page_count);
  874. // Now wait until everybody is done as well
  875. smp_broadcast_wait_sync(msg);
  876. }
  877. void Processor::smp_broadcast_halt()
  878. {
  879. // We don't want to use a message, because this could have been triggered
  880. // by being out of memory and we might not be able to get a message
  881. for_each(
  882. [&](Processor& proc) {
  883. proc.m_halt_requested.store(true, AK::MemoryOrder::memory_order_release);
  884. });
  885. // Now trigger an IPI on all other APs
  886. APIC::the().broadcast_ipi();
  887. }
  888. void Processor::Processor::halt()
  889. {
  890. if (s_smp_enabled)
  891. smp_broadcast_halt();
  892. halt_this();
  893. }
  894. UNMAP_AFTER_INIT void Processor::deferred_call_pool_init()
  895. {
  896. size_t pool_count = sizeof(m_deferred_call_pool) / sizeof(m_deferred_call_pool[0]);
  897. for (size_t i = 0; i < pool_count; i++) {
  898. auto& entry = m_deferred_call_pool[i];
  899. entry.next = i < pool_count - 1 ? &m_deferred_call_pool[i + 1] : nullptr;
  900. new (entry.handler_storage) DeferredCallEntry::HandlerFunction;
  901. entry.was_allocated = false;
  902. }
  903. m_pending_deferred_calls = nullptr;
  904. m_free_deferred_call_pool_entry = &m_deferred_call_pool[0];
  905. }
  906. void Processor::deferred_call_return_to_pool(DeferredCallEntry* entry)
  907. {
  908. VERIFY(m_in_critical);
  909. VERIFY(!entry->was_allocated);
  910. entry->handler_value() = {};
  911. entry->next = m_free_deferred_call_pool_entry;
  912. m_free_deferred_call_pool_entry = entry;
  913. }
  914. DeferredCallEntry* Processor::deferred_call_get_free()
  915. {
  916. VERIFY(m_in_critical);
  917. if (m_free_deferred_call_pool_entry) {
  918. // Fast path, we have an entry in our pool
  919. auto* entry = m_free_deferred_call_pool_entry;
  920. m_free_deferred_call_pool_entry = entry->next;
  921. VERIFY(!entry->was_allocated);
  922. return entry;
  923. }
  924. auto* entry = new DeferredCallEntry;
  925. new (entry->handler_storage) DeferredCallEntry::HandlerFunction;
  926. entry->was_allocated = true;
  927. return entry;
  928. }
  929. void Processor::deferred_call_execute_pending()
  930. {
  931. VERIFY(m_in_critical);
  932. if (!m_pending_deferred_calls)
  933. return;
  934. auto* pending_list = m_pending_deferred_calls;
  935. m_pending_deferred_calls = nullptr;
  936. // We pulled the stack of pending deferred calls in LIFO order, so we need to reverse the list first
  937. auto reverse_list =
  938. [](DeferredCallEntry* list) -> DeferredCallEntry* {
  939. DeferredCallEntry* rev_list = nullptr;
  940. while (list) {
  941. auto next = list->next;
  942. list->next = rev_list;
  943. rev_list = list;
  944. list = next;
  945. }
  946. return rev_list;
  947. };
  948. pending_list = reverse_list(pending_list);
  949. do {
  950. pending_list->invoke_handler();
  951. // Return the entry back to the pool, or free it
  952. auto* next = pending_list->next;
  953. if (pending_list->was_allocated) {
  954. pending_list->handler_value().~Function();
  955. delete pending_list;
  956. } else
  957. deferred_call_return_to_pool(pending_list);
  958. pending_list = next;
  959. } while (pending_list);
  960. }
  961. void Processor::deferred_call_queue_entry(DeferredCallEntry* entry)
  962. {
  963. VERIFY(m_in_critical);
  964. entry->next = m_pending_deferred_calls;
  965. m_pending_deferred_calls = entry;
  966. }
  967. void Processor::deferred_call_queue(Function<void()> callback)
  968. {
  969. // NOTE: If we are called outside of a critical section and outside
  970. // of an irq handler, the function will be executed before we return!
  971. ScopedCritical critical;
  972. auto& cur_proc = Processor::current();
  973. auto* entry = cur_proc.deferred_call_get_free();
  974. entry->handler_value() = move(callback);
  975. cur_proc.deferred_call_queue_entry(entry);
  976. }
  977. UNMAP_AFTER_INIT void Processor::gdt_init()
  978. {
  979. m_gdt_length = 0;
  980. m_gdtr.address = nullptr;
  981. m_gdtr.limit = 0;
  982. write_raw_gdt_entry(0x0000, 0x00000000, 0x00000000);
  983. #if ARCH(I386)
  984. write_raw_gdt_entry(GDT_SELECTOR_CODE0, 0x0000ffff, 0x00cf9a00); // code0
  985. write_raw_gdt_entry(GDT_SELECTOR_DATA0, 0x0000ffff, 0x00cf9200); // data0
  986. write_raw_gdt_entry(GDT_SELECTOR_CODE3, 0x0000ffff, 0x00cffa00); // code3
  987. write_raw_gdt_entry(GDT_SELECTOR_DATA3, 0x0000ffff, 0x00cff200); // data3
  988. #else
  989. write_raw_gdt_entry(GDT_SELECTOR_CODE0, 0x0000ffff, 0x00af9a00); // code0
  990. write_raw_gdt_entry(GDT_SELECTOR_CODE3, 0x0000ffff, 0x00affa00); // code3
  991. write_raw_gdt_entry(GDT_SELECTOR_DATA3, 0x0000ffff, 0x008ff200); // data3
  992. #endif
  993. #if ARCH(I386)
  994. Descriptor tls_descriptor {};
  995. tls_descriptor.low = tls_descriptor.high = 0;
  996. tls_descriptor.dpl = 3;
  997. tls_descriptor.segment_present = 1;
  998. tls_descriptor.granularity = 0;
  999. tls_descriptor.operation_size64 = 0;
  1000. tls_descriptor.operation_size32 = 1;
  1001. tls_descriptor.descriptor_type = 1;
  1002. tls_descriptor.type = 2;
  1003. write_gdt_entry(GDT_SELECTOR_TLS, tls_descriptor); // tls3
  1004. Descriptor gs_descriptor {};
  1005. gs_descriptor.set_base(VirtualAddress { this });
  1006. gs_descriptor.set_limit(sizeof(Processor) - 1);
  1007. gs_descriptor.dpl = 0;
  1008. gs_descriptor.segment_present = 1;
  1009. gs_descriptor.granularity = 0;
  1010. gs_descriptor.operation_size64 = 0;
  1011. gs_descriptor.operation_size32 = 1;
  1012. gs_descriptor.descriptor_type = 1;
  1013. gs_descriptor.type = 2;
  1014. write_gdt_entry(GDT_SELECTOR_PROC, gs_descriptor); // gs0
  1015. #endif
  1016. Descriptor tss_descriptor {};
  1017. tss_descriptor.set_base(VirtualAddress { (size_t)&m_tss & 0xffffffff });
  1018. tss_descriptor.set_limit(sizeof(TSS) - 1);
  1019. tss_descriptor.dpl = 0;
  1020. tss_descriptor.segment_present = 1;
  1021. tss_descriptor.granularity = 0;
  1022. tss_descriptor.operation_size64 = 0;
  1023. tss_descriptor.operation_size32 = 1;
  1024. tss_descriptor.descriptor_type = 0;
  1025. tss_descriptor.type = 9;
  1026. write_gdt_entry(GDT_SELECTOR_TSS, tss_descriptor); // tss
  1027. #if ARCH(X86_64)
  1028. Descriptor tss_descriptor_part2 {};
  1029. tss_descriptor_part2.low = (size_t)&m_tss >> 32;
  1030. write_gdt_entry(GDT_SELECTOR_TSS_PART2, tss_descriptor_part2);
  1031. #endif
  1032. flush_gdt();
  1033. load_task_register(GDT_SELECTOR_TSS);
  1034. #if ARCH(X86_64)
  1035. MSR gs_base(MSR_GS_BASE);
  1036. gs_base.set((u64)this);
  1037. #else
  1038. asm volatile(
  1039. "mov %%ax, %%ds\n"
  1040. "mov %%ax, %%es\n"
  1041. "mov %%ax, %%fs\n"
  1042. "mov %%ax, %%ss\n" ::"a"(GDT_SELECTOR_DATA0)
  1043. : "memory");
  1044. set_gs(GDT_SELECTOR_PROC);
  1045. #endif
  1046. #if ARCH(I386)
  1047. // Make sure CS points to the kernel code descriptor.
  1048. // clang-format off
  1049. asm volatile(
  1050. "ljmpl $" __STRINGIFY(GDT_SELECTOR_CODE0) ", $sanity\n"
  1051. "sanity:\n");
  1052. // clang-format on
  1053. #endif
  1054. }
  1055. extern "C" void context_first_init([[maybe_unused]] Thread* from_thread, [[maybe_unused]] Thread* to_thread, [[maybe_unused]] TrapFrame* trap)
  1056. {
  1057. VERIFY(!are_interrupts_enabled());
  1058. VERIFY(is_kernel_mode());
  1059. dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context <-- from {} {} to {} {} (context_first_init)", VirtualAddress(from_thread), *from_thread, VirtualAddress(to_thread), *to_thread);
  1060. VERIFY(to_thread == Thread::current());
  1061. Scheduler::enter_current(*from_thread, true);
  1062. auto in_critical = to_thread->saved_critical();
  1063. VERIFY(in_critical > 0);
  1064. Processor::restore_in_critical(in_critical);
  1065. // Since we got here and don't have Scheduler::context_switch in the
  1066. // call stack (because this is the first time we switched into this
  1067. // context), we need to notify the scheduler so that it can release
  1068. // the scheduler lock. We don't want to enable interrupts at this point
  1069. // as we're still in the middle of a context switch. Doing so could
  1070. // trigger a context switch within a context switch, leading to a crash.
  1071. FlatPtr flags = trap->regs->flags();
  1072. Scheduler::leave_on_first_switch(flags & ~0x200);
  1073. }
  1074. extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread)
  1075. {
  1076. VERIFY(from_thread == to_thread || from_thread->state() != Thread::Running);
  1077. VERIFY(to_thread->state() == Thread::Running);
  1078. bool has_fxsr = Processor::current().has_feature(CPUFeature::FXSR);
  1079. Processor::set_current_thread(*to_thread);
  1080. auto& from_regs = from_thread->regs();
  1081. auto& to_regs = to_thread->regs();
  1082. if (has_fxsr)
  1083. asm volatile("fxsave %0"
  1084. : "=m"(from_thread->fpu_state()));
  1085. else
  1086. asm volatile("fnsave %0"
  1087. : "=m"(from_thread->fpu_state()));
  1088. #if ARCH(I386)
  1089. from_regs.fs = get_fs();
  1090. from_regs.gs = get_gs();
  1091. set_fs(to_regs.fs);
  1092. set_gs(to_regs.gs);
  1093. #endif
  1094. if (from_thread->process().is_traced())
  1095. read_debug_registers_into(from_thread->debug_register_state());
  1096. if (to_thread->process().is_traced()) {
  1097. write_debug_registers_from(to_thread->debug_register_state());
  1098. } else {
  1099. clear_debug_registers();
  1100. }
  1101. auto& processor = Processor::current();
  1102. #if ARCH(I386)
  1103. auto& tls_descriptor = processor.get_gdt_entry(GDT_SELECTOR_TLS);
  1104. tls_descriptor.set_base(to_thread->thread_specific_data());
  1105. tls_descriptor.set_limit(to_thread->thread_specific_region_size());
  1106. #else
  1107. MSR fs_base_msr(MSR_FS_BASE);
  1108. fs_base_msr.set(to_thread->thread_specific_data().get());
  1109. #endif
  1110. if (from_regs.cr3 != to_regs.cr3)
  1111. write_cr3(to_regs.cr3);
  1112. to_thread->set_cpu(processor.id());
  1113. auto in_critical = to_thread->saved_critical();
  1114. VERIFY(in_critical > 0);
  1115. Processor::restore_in_critical(in_critical);
  1116. if (has_fxsr)
  1117. asm volatile("fxrstor %0" ::"m"(to_thread->fpu_state()));
  1118. else
  1119. asm volatile("frstor %0" ::"m"(to_thread->fpu_state()));
  1120. // TODO: ioperm?
  1121. }
  1122. extern "C" FlatPtr do_init_context(Thread* thread, u32 flags)
  1123. {
  1124. VERIFY_INTERRUPTS_DISABLED();
  1125. thread->regs().set_flags(flags);
  1126. return Processor::current().init_context(*thread, true);
  1127. }
  1128. void Processor::assume_context(Thread& thread, FlatPtr flags)
  1129. {
  1130. dbgln_if(CONTEXT_SWITCH_DEBUG, "Assume context for thread {} {}", VirtualAddress(&thread), thread);
  1131. VERIFY_INTERRUPTS_DISABLED();
  1132. Scheduler::prepare_after_exec();
  1133. // in_critical() should be 2 here. The critical section in Process::exec
  1134. // and then the scheduler lock
  1135. VERIFY(Processor::in_critical() == 2);
  1136. do_assume_context(&thread, flags);
  1137. VERIFY_NOT_REACHED();
  1138. }
  1139. u64 Processor::time_spent_idle() const
  1140. {
  1141. return m_idle_thread->time_in_user() + m_idle_thread->time_in_kernel();
  1142. }
  1143. }