Processor.cpp 45 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/Format.h>
  7. #include <AK/StdLibExtras.h>
  8. #include <AK/String.h>
  9. #include <AK/Types.h>
  10. #include <Kernel/Interrupts/APIC.h>
  11. #include <Kernel/Memory/ProcessPagingScope.h>
  12. #include <Kernel/Process.h>
  13. #include <Kernel/Sections.h>
  14. #include <Kernel/StdLib.h>
  15. #include <Kernel/Thread.h>
  16. #include <Kernel/Arch/x86/CPUID.h>
  17. #include <Kernel/Arch/x86/Interrupts.h>
  18. #include <Kernel/Arch/x86/MSR.h>
  19. #include <Kernel/Arch/x86/Processor.h>
  20. #include <Kernel/Arch/x86/ProcessorInfo.h>
  21. #include <Kernel/Arch/x86/SafeMem.h>
  22. #include <Kernel/Arch/x86/ScopedCritical.h>
  23. #include <Kernel/Arch/x86/TrapFrame.h>
  24. namespace Kernel {
  25. READONLY_AFTER_INIT FPUState Processor::s_clean_fpu_state;
  26. READONLY_AFTER_INIT static ProcessorContainer s_processors {};
  27. READONLY_AFTER_INIT Atomic<u32> Processor::g_total_processors;
  28. READONLY_AFTER_INIT static volatile bool s_smp_enabled;
  29. static Atomic<ProcessorMessage*> s_message_pool;
  30. Atomic<u32> Processor::s_idle_cpu_mask { 0 };
  31. // The compiler can't see the calls to these functions inside assembly.
  32. // Declare them, to avoid dead code warnings.
  33. extern "C" void context_first_init(Thread* from_thread, Thread* to_thread, TrapFrame* trap) __attribute__((used));
  34. extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread) __attribute__((used));
  35. extern "C" FlatPtr do_init_context(Thread* thread, u32 flags) __attribute__((used));
  36. bool Processor::is_smp_enabled()
  37. {
  38. return s_smp_enabled;
  39. }
  40. UNMAP_AFTER_INIT static void sse_init()
  41. {
  42. write_cr0((read_cr0() & 0xfffffffbu) | 0x2);
  43. write_cr4(read_cr4() | 0x600);
  44. }
  45. void exit_kernel_thread(void)
  46. {
  47. Thread::current()->exit();
  48. }
  49. UNMAP_AFTER_INIT void Processor::cpu_detect()
  50. {
  51. // NOTE: This is called during Processor::early_initialize, we cannot
  52. // safely log at this point because we don't have kmalloc
  53. // initialized yet!
  54. auto set_feature =
  55. [&](CPUFeature f) {
  56. m_features = static_cast<CPUFeature>(static_cast<u32>(m_features) | static_cast<u32>(f));
  57. };
  58. m_features = static_cast<CPUFeature>(0);
  59. CPUID processor_info(0x1);
  60. if (processor_info.edx() & (1 << 4))
  61. set_feature(CPUFeature::TSC);
  62. if (processor_info.edx() & (1 << 6))
  63. set_feature(CPUFeature::PAE);
  64. if (processor_info.edx() & (1 << 13))
  65. set_feature(CPUFeature::PGE);
  66. if (processor_info.edx() & (1 << 23))
  67. set_feature(CPUFeature::MMX);
  68. if (processor_info.edx() & (1 << 24))
  69. set_feature(CPUFeature::FXSR);
  70. if (processor_info.edx() & (1 << 25))
  71. set_feature(CPUFeature::SSE);
  72. if (processor_info.edx() & (1 << 26))
  73. set_feature(CPUFeature::SSE2);
  74. if (processor_info.ecx() & (1 << 0))
  75. set_feature(CPUFeature::SSE3);
  76. if (processor_info.ecx() & (1 << 9))
  77. set_feature(CPUFeature::SSSE3);
  78. if (processor_info.ecx() & (1 << 19))
  79. set_feature(CPUFeature::SSE4_1);
  80. if (processor_info.ecx() & (1 << 20))
  81. set_feature(CPUFeature::SSE4_2);
  82. if (processor_info.ecx() & (1 << 26))
  83. set_feature(CPUFeature::XSAVE);
  84. if (processor_info.ecx() & (1 << 28))
  85. set_feature(CPUFeature::AVX);
  86. if (processor_info.ecx() & (1 << 30))
  87. set_feature(CPUFeature::RDRAND);
  88. if (processor_info.ecx() & (1u << 31))
  89. set_feature(CPUFeature::HYPERVISOR);
  90. if (processor_info.edx() & (1 << 11)) {
  91. u32 stepping = processor_info.eax() & 0xf;
  92. u32 model = (processor_info.eax() >> 4) & 0xf;
  93. u32 family = (processor_info.eax() >> 8) & 0xf;
  94. if (!(family == 6 && model < 3 && stepping < 3))
  95. set_feature(CPUFeature::SEP);
  96. if ((family == 6 && model >= 3) || (family == 0xf && model >= 0xe))
  97. set_feature(CPUFeature::CONSTANT_TSC);
  98. }
  99. u32 max_extended_leaf = CPUID(0x80000000).eax();
  100. if (max_extended_leaf >= 0x80000001) {
  101. CPUID extended_processor_info(0x80000001);
  102. if (extended_processor_info.edx() & (1 << 20))
  103. set_feature(CPUFeature::NX);
  104. if (extended_processor_info.edx() & (1 << 27))
  105. set_feature(CPUFeature::RDTSCP);
  106. if (extended_processor_info.edx() & (1 << 29))
  107. set_feature(CPUFeature::LM);
  108. if (extended_processor_info.edx() & (1 << 11)) {
  109. // Only available in 64 bit mode
  110. set_feature(CPUFeature::SYSCALL);
  111. }
  112. }
  113. if (max_extended_leaf >= 0x80000007) {
  114. CPUID cpuid(0x80000007);
  115. if (cpuid.edx() & (1 << 8)) {
  116. set_feature(CPUFeature::CONSTANT_TSC);
  117. set_feature(CPUFeature::NONSTOP_TSC);
  118. }
  119. }
  120. if (max_extended_leaf >= 0x80000008) {
  121. // CPUID.80000008H:EAX[7:0] reports the physical-address width supported by the processor.
  122. CPUID cpuid(0x80000008);
  123. m_physical_address_bit_width = cpuid.eax() & 0xff;
  124. } else {
  125. // For processors that do not support CPUID function 80000008H, the width is generally 36 if CPUID.01H:EDX.PAE [bit 6] = 1 and 32 otherwise.
  126. m_physical_address_bit_width = has_feature(CPUFeature::PAE) ? 36 : 32;
  127. }
  128. CPUID extended_features(0x7);
  129. if (extended_features.ebx() & (1 << 20))
  130. set_feature(CPUFeature::SMAP);
  131. if (extended_features.ebx() & (1 << 7))
  132. set_feature(CPUFeature::SMEP);
  133. if (extended_features.ecx() & (1 << 2))
  134. set_feature(CPUFeature::UMIP);
  135. if (extended_features.ebx() & (1 << 18))
  136. set_feature(CPUFeature::RDSEED);
  137. }
  138. UNMAP_AFTER_INIT void Processor::cpu_setup()
  139. {
  140. // NOTE: This is called during Processor::early_initialize, we cannot
  141. // safely log at this point because we don't have kmalloc
  142. // initialized yet!
  143. cpu_detect();
  144. if (has_feature(CPUFeature::SSE)) {
  145. // enter_thread_context() assumes that if a x86 CPU supports SSE then it also supports FXSR.
  146. // SSE support without FXSR is an extremely unlikely scenario, so let's be pragmatic about it.
  147. VERIFY(has_feature(CPUFeature::FXSR));
  148. sse_init();
  149. }
  150. write_cr0(read_cr0() | 0x00010000);
  151. if (has_feature(CPUFeature::PGE)) {
  152. // Turn on CR4.PGE so the CPU will respect the G bit in page tables.
  153. write_cr4(read_cr4() | 0x80);
  154. }
  155. if (has_feature(CPUFeature::NX)) {
  156. // Turn on IA32_EFER.NXE
  157. asm volatile(
  158. "movl $0xc0000080, %ecx\n"
  159. "rdmsr\n"
  160. "orl $0x800, %eax\n"
  161. "wrmsr\n");
  162. }
  163. if (has_feature(CPUFeature::SMEP)) {
  164. // Turn on CR4.SMEP
  165. write_cr4(read_cr4() | 0x100000);
  166. }
  167. if (has_feature(CPUFeature::SMAP)) {
  168. // Turn on CR4.SMAP
  169. write_cr4(read_cr4() | 0x200000);
  170. }
  171. if (has_feature(CPUFeature::UMIP)) {
  172. write_cr4(read_cr4() | 0x800);
  173. }
  174. if (has_feature(CPUFeature::TSC)) {
  175. write_cr4(read_cr4() | 0x4);
  176. }
  177. if (has_feature(CPUFeature::XSAVE)) {
  178. // Turn on CR4.OSXSAVE
  179. write_cr4(read_cr4() | 0x40000);
  180. // According to the Intel manual: "After reset, all bits (except bit 0) in XCR0 are cleared to zero; XCR0[0] is set to 1."
  181. // Sadly we can't trust this, for example VirtualBox starts with bits 0-4 set, so let's do it ourselves.
  182. write_xcr0(0x1);
  183. if (has_feature(CPUFeature::AVX)) {
  184. // Turn on SSE, AVX and x87 flags
  185. write_xcr0(read_xcr0() | 0x7);
  186. }
  187. }
  188. }
  189. String Processor::features_string() const
  190. {
  191. StringBuilder builder;
  192. auto feature_to_str =
  193. [](CPUFeature f) -> const char* {
  194. switch (f) {
  195. case CPUFeature::NX:
  196. return "nx";
  197. case CPUFeature::PAE:
  198. return "pae";
  199. case CPUFeature::PGE:
  200. return "pge";
  201. case CPUFeature::RDRAND:
  202. return "rdrand";
  203. case CPUFeature::RDSEED:
  204. return "rdseed";
  205. case CPUFeature::SMAP:
  206. return "smap";
  207. case CPUFeature::SMEP:
  208. return "smep";
  209. case CPUFeature::SSE:
  210. return "sse";
  211. case CPUFeature::TSC:
  212. return "tsc";
  213. case CPUFeature::RDTSCP:
  214. return "rdtscp";
  215. case CPUFeature::CONSTANT_TSC:
  216. return "constant_tsc";
  217. case CPUFeature::NONSTOP_TSC:
  218. return "nonstop_tsc";
  219. case CPUFeature::UMIP:
  220. return "umip";
  221. case CPUFeature::SEP:
  222. return "sep";
  223. case CPUFeature::SYSCALL:
  224. return "syscall";
  225. case CPUFeature::MMX:
  226. return "mmx";
  227. case CPUFeature::FXSR:
  228. return "fxsr";
  229. case CPUFeature::SSE2:
  230. return "sse2";
  231. case CPUFeature::SSE3:
  232. return "sse3";
  233. case CPUFeature::SSSE3:
  234. return "ssse3";
  235. case CPUFeature::SSE4_1:
  236. return "sse4.1";
  237. case CPUFeature::SSE4_2:
  238. return "sse4.2";
  239. case CPUFeature::XSAVE:
  240. return "xsave";
  241. case CPUFeature::AVX:
  242. return "avx";
  243. case CPUFeature::LM:
  244. return "lm";
  245. case CPUFeature::HYPERVISOR:
  246. return "hypervisor";
  247. // no default statement here intentionally so that we get
  248. // a warning if a new feature is forgotten to be added here
  249. }
  250. // Shouldn't ever happen
  251. return "???";
  252. };
  253. bool first = true;
  254. for (u32 flag = 1; flag != 0; flag <<= 1) {
  255. if ((static_cast<u32>(m_features) & flag) != 0) {
  256. if (first)
  257. first = false;
  258. else
  259. builder.append(' ');
  260. auto str = feature_to_str(static_cast<CPUFeature>(flag));
  261. builder.append(str, strlen(str));
  262. }
  263. }
  264. return builder.build();
  265. }
  266. UNMAP_AFTER_INIT void Processor::early_initialize(u32 cpu)
  267. {
  268. m_self = this;
  269. m_cpu = cpu;
  270. m_in_irq = 0;
  271. m_in_critical = 0;
  272. m_invoke_scheduler_async = false;
  273. m_scheduler_initialized = false;
  274. m_in_scheduler = true;
  275. m_message_queue = nullptr;
  276. m_idle_thread = nullptr;
  277. m_current_thread = nullptr;
  278. m_info = nullptr;
  279. m_halt_requested = false;
  280. if (cpu == 0) {
  281. s_smp_enabled = false;
  282. g_total_processors.store(1u, AK::MemoryOrder::memory_order_release);
  283. } else {
  284. g_total_processors.fetch_add(1u, AK::MemoryOrder::memory_order_acq_rel);
  285. }
  286. deferred_call_pool_init();
  287. cpu_setup();
  288. gdt_init();
  289. VERIFY(is_initialized()); // sanity check
  290. VERIFY(&current() == this); // sanity check
  291. }
  292. UNMAP_AFTER_INIT void Processor::initialize(u32 cpu)
  293. {
  294. VERIFY(m_self == this);
  295. VERIFY(&current() == this); // sanity check
  296. dmesgln("CPU[{}]: Supported features: {}", current_id(), features_string());
  297. if (!has_feature(CPUFeature::RDRAND))
  298. dmesgln("CPU[{}]: No RDRAND support detected, randomness will be poor", current_id());
  299. dmesgln("CPU[{}]: Physical address bit width: {}", current_id(), m_physical_address_bit_width);
  300. if (cpu == 0)
  301. idt_init();
  302. else
  303. flush_idt();
  304. if (cpu == 0) {
  305. VERIFY((FlatPtr(&s_clean_fpu_state) & 0xF) == 0);
  306. asm volatile("fninit");
  307. if (has_feature(CPUFeature::FXSR))
  308. asm volatile("fxsave %0"
  309. : "=m"(s_clean_fpu_state));
  310. else
  311. asm volatile("fnsave %0"
  312. : "=m"(s_clean_fpu_state));
  313. if (has_feature(CPUFeature::HYPERVISOR))
  314. detect_hypervisor();
  315. }
  316. m_info = new ProcessorInfo(*this);
  317. {
  318. // We need to prevent races between APs starting up at the same time
  319. VERIFY(cpu < s_processors.size());
  320. s_processors[cpu] = this;
  321. }
  322. }
  323. UNMAP_AFTER_INIT void Processor::detect_hypervisor()
  324. {
  325. CPUID hypervisor_leaf_range(0x40000000);
  326. // Get signature of hypervisor.
  327. alignas(sizeof(u32)) char hypervisor_signature_buffer[13];
  328. *reinterpret_cast<u32*>(hypervisor_signature_buffer) = hypervisor_leaf_range.ebx();
  329. *reinterpret_cast<u32*>(hypervisor_signature_buffer + 4) = hypervisor_leaf_range.ecx();
  330. *reinterpret_cast<u32*>(hypervisor_signature_buffer + 8) = hypervisor_leaf_range.edx();
  331. hypervisor_signature_buffer[12] = '\0';
  332. StringView hypervisor_signature(hypervisor_signature_buffer);
  333. dmesgln("CPU[{}]: CPUID hypervisor signature '{}' ({:#x} {:#x} {:#x}), max leaf {:#x}", current_id(), hypervisor_signature, hypervisor_leaf_range.ebx(), hypervisor_leaf_range.ecx(), hypervisor_leaf_range.edx(), hypervisor_leaf_range.eax());
  334. if (hypervisor_signature == "Microsoft Hv"sv)
  335. detect_hypervisor_hyperv(hypervisor_leaf_range);
  336. }
  337. UNMAP_AFTER_INIT void Processor::detect_hypervisor_hyperv(CPUID const& hypervisor_leaf_range)
  338. {
  339. if (hypervisor_leaf_range.eax() < 0x40000001)
  340. return;
  341. CPUID hypervisor_interface(0x40000001);
  342. // Get signature of hypervisor interface.
  343. alignas(sizeof(u32)) char interface_signature_buffer[5];
  344. *reinterpret_cast<u32*>(interface_signature_buffer) = hypervisor_interface.eax();
  345. interface_signature_buffer[4] = '\0';
  346. StringView hyperv_interface_signature(interface_signature_buffer);
  347. dmesgln("CPU[{}]: Hyper-V interface signature '{}' ({:#x})", current_id(), hyperv_interface_signature, hypervisor_interface.eax());
  348. if (hypervisor_leaf_range.eax() < 0x40000001)
  349. return;
  350. CPUID hypervisor_sysid(0x40000002);
  351. dmesgln("CPU[{}]: Hyper-V system identity {}.{}, build number {}", current_id(), hypervisor_sysid.ebx() >> 16, hypervisor_sysid.ebx() & 0xFFFF, hypervisor_sysid.eax());
  352. if (hypervisor_leaf_range.eax() < 0x40000005 || hyperv_interface_signature != "Hv#1"sv)
  353. return;
  354. dmesgln("CPU[{}]: Hyper-V hypervisor detected", current_id());
  355. // TODO: Actually do something with Hyper-V.
  356. }
  357. void Processor::write_raw_gdt_entry(u16 selector, u32 low, u32 high)
  358. {
  359. u16 i = (selector & 0xfffc) >> 3;
  360. u32 prev_gdt_length = m_gdt_length;
  361. if (i >= m_gdt_length) {
  362. m_gdt_length = i + 1;
  363. VERIFY(m_gdt_length <= sizeof(m_gdt) / sizeof(m_gdt[0]));
  364. m_gdtr.limit = (m_gdt_length + 1) * 8 - 1;
  365. }
  366. m_gdt[i].low = low;
  367. m_gdt[i].high = high;
  368. // clear selectors we may have skipped
  369. while (i < prev_gdt_length) {
  370. m_gdt[i].low = 0;
  371. m_gdt[i].high = 0;
  372. i++;
  373. }
  374. }
  375. void Processor::write_gdt_entry(u16 selector, Descriptor& descriptor)
  376. {
  377. write_raw_gdt_entry(selector, descriptor.low, descriptor.high);
  378. }
  379. Descriptor& Processor::get_gdt_entry(u16 selector)
  380. {
  381. u16 i = (selector & 0xfffc) >> 3;
  382. return *(Descriptor*)(&m_gdt[i]);
  383. }
  384. void Processor::flush_gdt()
  385. {
  386. m_gdtr.address = m_gdt;
  387. m_gdtr.limit = (m_gdt_length * 8) - 1;
  388. asm volatile("lgdt %0" ::"m"(m_gdtr)
  389. : "memory");
  390. }
  391. const DescriptorTablePointer& Processor::get_gdtr()
  392. {
  393. return m_gdtr;
  394. }
  395. Vector<FlatPtr> Processor::capture_stack_trace(Thread& thread, size_t max_frames)
  396. {
  397. FlatPtr frame_ptr = 0, ip = 0;
  398. Vector<FlatPtr, 32> stack_trace;
  399. auto walk_stack = [&](FlatPtr stack_ptr) {
  400. static constexpr size_t max_stack_frames = 4096;
  401. stack_trace.append(ip);
  402. size_t count = 1;
  403. while (stack_ptr && stack_trace.size() < max_stack_frames) {
  404. FlatPtr retaddr;
  405. count++;
  406. if (max_frames != 0 && count > max_frames)
  407. break;
  408. if (Memory::is_user_range(VirtualAddress(stack_ptr), sizeof(FlatPtr) * 2)) {
  409. if (!copy_from_user(&retaddr, &((FlatPtr*)stack_ptr)[1]) || !retaddr)
  410. break;
  411. stack_trace.append(retaddr);
  412. if (!copy_from_user(&stack_ptr, (FlatPtr*)stack_ptr))
  413. break;
  414. } else {
  415. void* fault_at;
  416. if (!safe_memcpy(&retaddr, &((FlatPtr*)stack_ptr)[1], sizeof(FlatPtr), fault_at) || !retaddr)
  417. break;
  418. stack_trace.append(retaddr);
  419. if (!safe_memcpy(&stack_ptr, (FlatPtr*)stack_ptr, sizeof(FlatPtr), fault_at))
  420. break;
  421. }
  422. }
  423. };
  424. auto capture_current_thread = [&]() {
  425. frame_ptr = (FlatPtr)__builtin_frame_address(0);
  426. ip = (FlatPtr)__builtin_return_address(0);
  427. walk_stack(frame_ptr);
  428. };
  429. // Since the thread may be running on another processor, there
  430. // is a chance a context switch may happen while we're trying
  431. // to get it. It also won't be entirely accurate and merely
  432. // reflect the status at the last context switch.
  433. SpinlockLocker lock(g_scheduler_lock);
  434. if (&thread == Processor::current_thread()) {
  435. VERIFY(thread.state() == Thread::Running);
  436. // Leave the scheduler lock. If we trigger page faults we may
  437. // need to be preempted. Since this is our own thread it won't
  438. // cause any problems as the stack won't change below this frame.
  439. lock.unlock();
  440. capture_current_thread();
  441. } else if (thread.is_active()) {
  442. VERIFY(thread.cpu() != Processor::current_id());
  443. // If this is the case, the thread is currently running
  444. // on another processor. We can't trust the kernel stack as
  445. // it may be changing at any time. We need to probably send
  446. // an IPI to that processor, have it walk the stack and wait
  447. // until it returns the data back to us
  448. auto& proc = Processor::current();
  449. smp_unicast(
  450. thread.cpu(),
  451. [&]() {
  452. dbgln("CPU[{}] getting stack for cpu #{}", Processor::current_id(), proc.id());
  453. ProcessPagingScope paging_scope(thread.process());
  454. VERIFY(&Processor::current() != &proc);
  455. VERIFY(&thread == Processor::current_thread());
  456. // NOTE: Because the other processor is still holding the
  457. // scheduler lock while waiting for this callback to finish,
  458. // the current thread on the target processor cannot change
  459. // TODO: What to do about page faults here? We might deadlock
  460. // because the other processor is still holding the
  461. // scheduler lock...
  462. capture_current_thread();
  463. },
  464. false);
  465. } else {
  466. switch (thread.state()) {
  467. case Thread::Running:
  468. VERIFY_NOT_REACHED(); // should have been handled above
  469. case Thread::Runnable:
  470. case Thread::Stopped:
  471. case Thread::Blocked:
  472. case Thread::Dying:
  473. case Thread::Dead: {
  474. // We need to retrieve ebp from what was last pushed to the kernel
  475. // stack. Before switching out of that thread, it switch_context
  476. // pushed the callee-saved registers, and the last of them happens
  477. // to be ebp.
  478. ProcessPagingScope paging_scope(thread.process());
  479. auto& regs = thread.regs();
  480. auto* stack_top = reinterpret_cast<FlatPtr*>(regs.sp());
  481. if (Memory::is_user_range(VirtualAddress(stack_top), sizeof(FlatPtr))) {
  482. if (!copy_from_user(&frame_ptr, &((FlatPtr*)stack_top)[0]))
  483. frame_ptr = 0;
  484. } else {
  485. void* fault_at;
  486. if (!safe_memcpy(&frame_ptr, &((FlatPtr*)stack_top)[0], sizeof(FlatPtr), fault_at))
  487. frame_ptr = 0;
  488. }
  489. ip = regs.ip();
  490. // TODO: We need to leave the scheduler lock here, but we also
  491. // need to prevent the target thread from being run while
  492. // we walk the stack
  493. lock.unlock();
  494. walk_stack(frame_ptr);
  495. break;
  496. }
  497. default:
  498. dbgln("Cannot capture stack trace for thread {} in state {}", thread, thread.state_string());
  499. break;
  500. }
  501. }
  502. return stack_trace;
  503. }
  504. ProcessorContainer& Processor::processors()
  505. {
  506. return s_processors;
  507. }
  508. Processor& Processor::by_id(u32 id)
  509. {
  510. return *s_processors[id];
  511. }
  512. void Processor::enter_trap(TrapFrame& trap, bool raise_irq)
  513. {
  514. VERIFY_INTERRUPTS_DISABLED();
  515. VERIFY(&Processor::current() == this);
  516. trap.prev_irq_level = m_in_irq;
  517. if (raise_irq)
  518. m_in_irq++;
  519. auto* current_thread = Processor::current_thread();
  520. if (current_thread) {
  521. auto& current_trap = current_thread->current_trap();
  522. trap.next_trap = current_trap;
  523. current_trap = &trap;
  524. // The cs register of this trap tells us where we will return back to
  525. auto new_previous_mode = ((trap.regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode;
  526. if (current_thread->set_previous_mode(new_previous_mode) && trap.prev_irq_level == 0) {
  527. current_thread->update_time_scheduled(Scheduler::current_time(), new_previous_mode == Thread::PreviousMode::KernelMode, false);
  528. }
  529. } else {
  530. trap.next_trap = nullptr;
  531. }
  532. }
  533. void Processor::exit_trap(TrapFrame& trap)
  534. {
  535. VERIFY_INTERRUPTS_DISABLED();
  536. VERIFY(&Processor::current() == this);
  537. // Temporarily enter a critical section. This is to prevent critical
  538. // sections entered and left within e.g. smp_process_pending_messages
  539. // to trigger a context switch while we're executing this function
  540. // See the comment at the end of the function why we don't use
  541. // ScopedCritical here.
  542. m_in_critical = m_in_critical + 1;
  543. VERIFY(m_in_irq >= trap.prev_irq_level);
  544. m_in_irq = trap.prev_irq_level;
  545. if (s_smp_enabled)
  546. smp_process_pending_messages();
  547. // Process the deferred call queue. Among other things, this ensures
  548. // that any pending thread unblocks happen before we enter the scheduler.
  549. deferred_call_execute_pending();
  550. auto* current_thread = Processor::current_thread();
  551. if (current_thread) {
  552. auto& current_trap = current_thread->current_trap();
  553. current_trap = trap.next_trap;
  554. Thread::PreviousMode new_previous_mode;
  555. if (current_trap) {
  556. VERIFY(current_trap->regs);
  557. // If we have another higher level trap then we probably returned
  558. // from an interrupt or irq handler. The cs register of the
  559. // new/higher level trap tells us what the mode prior to it was
  560. new_previous_mode = ((current_trap->regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode;
  561. } else {
  562. // If we don't have a higher level trap then we're back in user mode.
  563. // Which means that the previous mode prior to being back in user mode was kernel mode
  564. new_previous_mode = Thread::PreviousMode::KernelMode;
  565. }
  566. if (current_thread->set_previous_mode(new_previous_mode))
  567. current_thread->update_time_scheduled(Scheduler::current_time(), true, false);
  568. }
  569. VERIFY_INTERRUPTS_DISABLED();
  570. // Leave the critical section without actually enabling interrupts.
  571. // We don't want context switches to happen until we're explicitly
  572. // triggering a switch in check_invoke_scheduler.
  573. m_in_critical = m_in_critical - 1;
  574. if (!m_in_irq && !m_in_critical)
  575. check_invoke_scheduler();
  576. }
  577. void Processor::check_invoke_scheduler()
  578. {
  579. InterruptDisabler disabler;
  580. VERIFY(!m_in_irq);
  581. VERIFY(!m_in_critical);
  582. VERIFY(&Processor::current() == this);
  583. if (m_invoke_scheduler_async && m_scheduler_initialized) {
  584. m_invoke_scheduler_async = false;
  585. Scheduler::invoke_async();
  586. }
  587. }
  588. void Processor::flush_tlb_local(VirtualAddress vaddr, size_t page_count)
  589. {
  590. auto ptr = vaddr.as_ptr();
  591. while (page_count > 0) {
  592. // clang-format off
  593. asm volatile("invlpg %0"
  594. :
  595. : "m"(*ptr)
  596. : "memory");
  597. // clang-format on
  598. ptr += PAGE_SIZE;
  599. page_count--;
  600. }
  601. }
  602. void Processor::flush_tlb(Memory::PageDirectory const* page_directory, VirtualAddress vaddr, size_t page_count)
  603. {
  604. if (s_smp_enabled && (!Memory::is_user_address(vaddr) || Process::current().thread_count() > 1))
  605. smp_broadcast_flush_tlb(page_directory, vaddr, page_count);
  606. else
  607. flush_tlb_local(vaddr, page_count);
  608. }
  609. void Processor::smp_return_to_pool(ProcessorMessage& msg)
  610. {
  611. ProcessorMessage* next = nullptr;
  612. for (;;) {
  613. msg.next = next;
  614. if (s_message_pool.compare_exchange_strong(next, &msg, AK::MemoryOrder::memory_order_acq_rel))
  615. break;
  616. Processor::pause();
  617. }
  618. }
  619. ProcessorMessage& Processor::smp_get_from_pool()
  620. {
  621. ProcessorMessage* msg;
  622. // The assumption is that messages are never removed from the pool!
  623. for (;;) {
  624. msg = s_message_pool.load(AK::MemoryOrder::memory_order_consume);
  625. if (!msg) {
  626. if (!Processor::current().smp_process_pending_messages()) {
  627. Processor::pause();
  628. }
  629. continue;
  630. }
  631. // If another processor were to use this message in the meanwhile,
  632. // "msg" is still valid (because it never gets freed). We'd detect
  633. // this because the expected value "msg" and pool would
  634. // no longer match, and the compare_exchange will fail. But accessing
  635. // "msg->next" is always safe here.
  636. if (s_message_pool.compare_exchange_strong(msg, msg->next, AK::MemoryOrder::memory_order_acq_rel)) {
  637. // We successfully "popped" this available message
  638. break;
  639. }
  640. }
  641. VERIFY(msg != nullptr);
  642. return *msg;
  643. }
  644. u32 Processor::smp_wake_n_idle_processors(u32 wake_count)
  645. {
  646. VERIFY_INTERRUPTS_DISABLED();
  647. VERIFY(wake_count > 0);
  648. if (!s_smp_enabled)
  649. return 0;
  650. // Wake at most N - 1 processors
  651. if (wake_count >= Processor::count()) {
  652. wake_count = Processor::count() - 1;
  653. VERIFY(wake_count > 0);
  654. }
  655. u32 current_id = Processor::current_id();
  656. u32 did_wake_count = 0;
  657. auto& apic = APIC::the();
  658. while (did_wake_count < wake_count) {
  659. // Try to get a set of idle CPUs and flip them to busy
  660. u32 idle_mask = s_idle_cpu_mask.load(AK::MemoryOrder::memory_order_relaxed) & ~(1u << current_id);
  661. u32 idle_count = __builtin_popcountl(idle_mask);
  662. if (idle_count == 0)
  663. break; // No (more) idle processor available
  664. u32 found_mask = 0;
  665. for (u32 i = 0; i < idle_count; i++) {
  666. u32 cpu = __builtin_ffsl(idle_mask) - 1;
  667. idle_mask &= ~(1u << cpu);
  668. found_mask |= 1u << cpu;
  669. }
  670. idle_mask = s_idle_cpu_mask.fetch_and(~found_mask, AK::MemoryOrder::memory_order_acq_rel) & found_mask;
  671. if (idle_mask == 0)
  672. continue; // All of them were flipped to busy, try again
  673. idle_count = __builtin_popcountl(idle_mask);
  674. for (u32 i = 0; i < idle_count; i++) {
  675. u32 cpu = __builtin_ffsl(idle_mask) - 1;
  676. idle_mask &= ~(1u << cpu);
  677. // Send an IPI to that CPU to wake it up. There is a possibility
  678. // someone else woke it up as well, or that it woke up due to
  679. // a timer interrupt. But we tried hard to avoid this...
  680. apic.send_ipi(cpu);
  681. did_wake_count++;
  682. }
  683. }
  684. return did_wake_count;
  685. }
  686. UNMAP_AFTER_INIT void Processor::smp_enable()
  687. {
  688. size_t msg_pool_size = Processor::count() * 100u;
  689. size_t msg_entries_cnt = Processor::count();
  690. auto msgs = new ProcessorMessage[msg_pool_size];
  691. auto msg_entries = new ProcessorMessageEntry[msg_pool_size * msg_entries_cnt];
  692. size_t msg_entry_i = 0;
  693. for (size_t i = 0; i < msg_pool_size; i++, msg_entry_i += msg_entries_cnt) {
  694. auto& msg = msgs[i];
  695. msg.next = i < msg_pool_size - 1 ? &msgs[i + 1] : nullptr;
  696. msg.per_proc_entries = &msg_entries[msg_entry_i];
  697. for (size_t k = 0; k < msg_entries_cnt; k++)
  698. msg_entries[msg_entry_i + k].msg = &msg;
  699. }
  700. s_message_pool.store(&msgs[0], AK::MemoryOrder::memory_order_release);
  701. // Start sending IPI messages
  702. s_smp_enabled = true;
  703. }
  704. void Processor::smp_cleanup_message(ProcessorMessage& msg)
  705. {
  706. switch (msg.type) {
  707. case ProcessorMessage::Callback:
  708. msg.callback_value().~Function();
  709. break;
  710. default:
  711. break;
  712. }
  713. }
  714. bool Processor::smp_process_pending_messages()
  715. {
  716. VERIFY(s_smp_enabled);
  717. bool did_process = false;
  718. enter_critical();
  719. if (auto pending_msgs = m_message_queue.exchange(nullptr, AK::MemoryOrder::memory_order_acq_rel)) {
  720. // We pulled the stack of pending messages in LIFO order, so we need to reverse the list first
  721. auto reverse_list =
  722. [](ProcessorMessageEntry* list) -> ProcessorMessageEntry* {
  723. ProcessorMessageEntry* rev_list = nullptr;
  724. while (list) {
  725. auto next = list->next;
  726. list->next = rev_list;
  727. rev_list = list;
  728. list = next;
  729. }
  730. return rev_list;
  731. };
  732. pending_msgs = reverse_list(pending_msgs);
  733. // now process in the right order
  734. ProcessorMessageEntry* next_msg;
  735. for (auto cur_msg = pending_msgs; cur_msg; cur_msg = next_msg) {
  736. next_msg = cur_msg->next;
  737. auto msg = cur_msg->msg;
  738. dbgln_if(SMP_DEBUG, "SMP[{}]: Processing message {}", current_id(), VirtualAddress(msg));
  739. switch (msg->type) {
  740. case ProcessorMessage::Callback:
  741. msg->invoke_callback();
  742. break;
  743. case ProcessorMessage::FlushTlb:
  744. if (Memory::is_user_address(VirtualAddress(msg->flush_tlb.ptr))) {
  745. // We assume that we don't cross into kernel land!
  746. VERIFY(Memory::is_user_range(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count * PAGE_SIZE));
  747. if (read_cr3() != msg->flush_tlb.page_directory->cr3()) {
  748. // This processor isn't using this page directory right now, we can ignore this request
  749. dbgln_if(SMP_DEBUG, "SMP[{}]: No need to flush {} pages at {}", current_id(), msg->flush_tlb.page_count, VirtualAddress(msg->flush_tlb.ptr));
  750. break;
  751. }
  752. }
  753. flush_tlb_local(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count);
  754. break;
  755. }
  756. bool is_async = msg->async; // Need to cache this value *before* dropping the ref count!
  757. auto prev_refs = msg->refs.fetch_sub(1u, AK::MemoryOrder::memory_order_acq_rel);
  758. VERIFY(prev_refs != 0);
  759. if (prev_refs == 1) {
  760. // All processors handled this. If this is an async message,
  761. // we need to clean it up and return it to the pool
  762. if (is_async) {
  763. smp_cleanup_message(*msg);
  764. smp_return_to_pool(*msg);
  765. }
  766. }
  767. if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed))
  768. halt_this();
  769. }
  770. did_process = true;
  771. } else if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed)) {
  772. halt_this();
  773. }
  774. leave_critical();
  775. return did_process;
  776. }
  777. bool Processor::smp_enqueue_message(ProcessorMessage& msg)
  778. {
  779. // Note that it's quite possible that the other processor may pop
  780. // the queue at any given time. We rely on the fact that the messages
  781. // are pooled and never get freed!
  782. auto& msg_entry = msg.per_proc_entries[id()];
  783. VERIFY(msg_entry.msg == &msg);
  784. ProcessorMessageEntry* next = nullptr;
  785. for (;;) {
  786. msg_entry.next = next;
  787. if (m_message_queue.compare_exchange_strong(next, &msg_entry, AK::MemoryOrder::memory_order_acq_rel))
  788. break;
  789. Processor::pause();
  790. }
  791. // If the enqueued message was the only message in the queue when posted,
  792. // we return true. This is used by callers when deciding whether to generate an IPI.
  793. return next == nullptr;
  794. }
  795. void Processor::smp_broadcast_message(ProcessorMessage& msg)
  796. {
  797. auto& current_processor = Processor::current();
  798. dbgln_if(SMP_DEBUG, "SMP[{}]: Broadcast message {} to cpus: {} processor: {}", current_processor.id(), VirtualAddress(&msg), count(), VirtualAddress(&current_processor));
  799. msg.refs.store(count() - 1, AK::MemoryOrder::memory_order_release);
  800. VERIFY(msg.refs > 0);
  801. bool need_broadcast = false;
  802. for_each(
  803. [&](Processor& proc) {
  804. if (&proc != &current_processor) {
  805. if (proc.smp_enqueue_message(msg))
  806. need_broadcast = true;
  807. }
  808. });
  809. // Now trigger an IPI on all other APs (unless all targets already had messages queued)
  810. if (need_broadcast)
  811. APIC::the().broadcast_ipi();
  812. }
  813. void Processor::smp_broadcast_wait_sync(ProcessorMessage& msg)
  814. {
  815. auto& cur_proc = Processor::current();
  816. VERIFY(!msg.async);
  817. // If synchronous then we must cleanup and return the message back
  818. // to the pool. Otherwise, the last processor to complete it will return it
  819. while (msg.refs.load(AK::MemoryOrder::memory_order_consume) != 0) {
  820. Processor::pause();
  821. // We need to process any messages that may have been sent to
  822. // us while we're waiting. This also checks if another processor
  823. // may have requested us to halt.
  824. cur_proc.smp_process_pending_messages();
  825. }
  826. smp_cleanup_message(msg);
  827. smp_return_to_pool(msg);
  828. }
  829. void Processor::smp_unicast_message(u32 cpu, ProcessorMessage& msg, bool async)
  830. {
  831. auto& current_processor = Processor::current();
  832. VERIFY(cpu != current_processor.id());
  833. auto& target_processor = processors()[cpu];
  834. msg.async = async;
  835. dbgln_if(SMP_DEBUG, "SMP[{}]: Send message {} to cpu #{} processor: {}", current_processor.id(), VirtualAddress(&msg), cpu, VirtualAddress(&target_processor));
  836. msg.refs.store(1u, AK::MemoryOrder::memory_order_release);
  837. if (target_processor->smp_enqueue_message(msg)) {
  838. APIC::the().send_ipi(cpu);
  839. }
  840. if (!async) {
  841. // If synchronous then we must cleanup and return the message back
  842. // to the pool. Otherwise, the last processor to complete it will return it
  843. while (msg.refs.load(AK::MemoryOrder::memory_order_consume) != 0) {
  844. Processor::pause();
  845. // We need to process any messages that may have been sent to
  846. // us while we're waiting. This also checks if another processor
  847. // may have requested us to halt.
  848. current_processor.smp_process_pending_messages();
  849. }
  850. smp_cleanup_message(msg);
  851. smp_return_to_pool(msg);
  852. }
  853. }
  854. void Processor::smp_unicast(u32 cpu, Function<void()> callback, bool async)
  855. {
  856. auto& msg = smp_get_from_pool();
  857. msg.type = ProcessorMessage::Callback;
  858. new (msg.callback_storage) ProcessorMessage::CallbackFunction(move(callback));
  859. smp_unicast_message(cpu, msg, async);
  860. }
  861. void Processor::smp_broadcast_flush_tlb(Memory::PageDirectory const* page_directory, VirtualAddress vaddr, size_t page_count)
  862. {
  863. auto& msg = smp_get_from_pool();
  864. msg.async = false;
  865. msg.type = ProcessorMessage::FlushTlb;
  866. msg.flush_tlb.page_directory = page_directory;
  867. msg.flush_tlb.ptr = vaddr.as_ptr();
  868. msg.flush_tlb.page_count = page_count;
  869. smp_broadcast_message(msg);
  870. // While the other processors handle this request, we'll flush ours
  871. flush_tlb_local(vaddr, page_count);
  872. // Now wait until everybody is done as well
  873. smp_broadcast_wait_sync(msg);
  874. }
  875. void Processor::smp_broadcast_halt()
  876. {
  877. // We don't want to use a message, because this could have been triggered
  878. // by being out of memory and we might not be able to get a message
  879. for_each(
  880. [&](Processor& proc) {
  881. proc.m_halt_requested.store(true, AK::MemoryOrder::memory_order_release);
  882. });
  883. // Now trigger an IPI on all other APs
  884. APIC::the().broadcast_ipi();
  885. }
  886. void Processor::Processor::halt()
  887. {
  888. if (s_smp_enabled)
  889. smp_broadcast_halt();
  890. halt_this();
  891. }
  892. UNMAP_AFTER_INIT void Processor::deferred_call_pool_init()
  893. {
  894. size_t pool_count = sizeof(m_deferred_call_pool) / sizeof(m_deferred_call_pool[0]);
  895. for (size_t i = 0; i < pool_count; i++) {
  896. auto& entry = m_deferred_call_pool[i];
  897. entry.next = i < pool_count - 1 ? &m_deferred_call_pool[i + 1] : nullptr;
  898. new (entry.handler_storage) DeferredCallEntry::HandlerFunction;
  899. entry.was_allocated = false;
  900. }
  901. m_pending_deferred_calls = nullptr;
  902. m_free_deferred_call_pool_entry = &m_deferred_call_pool[0];
  903. }
  904. void Processor::deferred_call_return_to_pool(DeferredCallEntry* entry)
  905. {
  906. VERIFY(m_in_critical);
  907. VERIFY(!entry->was_allocated);
  908. entry->handler_value() = {};
  909. entry->next = m_free_deferred_call_pool_entry;
  910. m_free_deferred_call_pool_entry = entry;
  911. }
  912. DeferredCallEntry* Processor::deferred_call_get_free()
  913. {
  914. VERIFY(m_in_critical);
  915. if (m_free_deferred_call_pool_entry) {
  916. // Fast path, we have an entry in our pool
  917. auto* entry = m_free_deferred_call_pool_entry;
  918. m_free_deferred_call_pool_entry = entry->next;
  919. VERIFY(!entry->was_allocated);
  920. return entry;
  921. }
  922. auto* entry = new DeferredCallEntry;
  923. new (entry->handler_storage) DeferredCallEntry::HandlerFunction;
  924. entry->was_allocated = true;
  925. return entry;
  926. }
  927. void Processor::deferred_call_execute_pending()
  928. {
  929. VERIFY(m_in_critical);
  930. if (!m_pending_deferred_calls)
  931. return;
  932. auto* pending_list = m_pending_deferred_calls;
  933. m_pending_deferred_calls = nullptr;
  934. // We pulled the stack of pending deferred calls in LIFO order, so we need to reverse the list first
  935. auto reverse_list =
  936. [](DeferredCallEntry* list) -> DeferredCallEntry* {
  937. DeferredCallEntry* rev_list = nullptr;
  938. while (list) {
  939. auto next = list->next;
  940. list->next = rev_list;
  941. rev_list = list;
  942. list = next;
  943. }
  944. return rev_list;
  945. };
  946. pending_list = reverse_list(pending_list);
  947. do {
  948. pending_list->invoke_handler();
  949. // Return the entry back to the pool, or free it
  950. auto* next = pending_list->next;
  951. if (pending_list->was_allocated) {
  952. pending_list->handler_value().~Function();
  953. delete pending_list;
  954. } else
  955. deferred_call_return_to_pool(pending_list);
  956. pending_list = next;
  957. } while (pending_list);
  958. }
  959. void Processor::deferred_call_queue_entry(DeferredCallEntry* entry)
  960. {
  961. VERIFY(m_in_critical);
  962. entry->next = m_pending_deferred_calls;
  963. m_pending_deferred_calls = entry;
  964. }
  965. void Processor::deferred_call_queue(Function<void()> callback)
  966. {
  967. // NOTE: If we are called outside of a critical section and outside
  968. // of an irq handler, the function will be executed before we return!
  969. ScopedCritical critical;
  970. auto& cur_proc = Processor::current();
  971. auto* entry = cur_proc.deferred_call_get_free();
  972. entry->handler_value() = move(callback);
  973. cur_proc.deferred_call_queue_entry(entry);
  974. }
  975. UNMAP_AFTER_INIT void Processor::gdt_init()
  976. {
  977. m_gdt_length = 0;
  978. m_gdtr.address = nullptr;
  979. m_gdtr.limit = 0;
  980. write_raw_gdt_entry(0x0000, 0x00000000, 0x00000000);
  981. #if ARCH(I386)
  982. write_raw_gdt_entry(GDT_SELECTOR_CODE0, 0x0000ffff, 0x00cf9a00); // code0
  983. write_raw_gdt_entry(GDT_SELECTOR_DATA0, 0x0000ffff, 0x00cf9200); // data0
  984. write_raw_gdt_entry(GDT_SELECTOR_CODE3, 0x0000ffff, 0x00cffa00); // code3
  985. write_raw_gdt_entry(GDT_SELECTOR_DATA3, 0x0000ffff, 0x00cff200); // data3
  986. #else
  987. write_raw_gdt_entry(GDT_SELECTOR_CODE0, 0x0000ffff, 0x00af9a00); // code0
  988. write_raw_gdt_entry(GDT_SELECTOR_CODE3, 0x0000ffff, 0x00affa00); // code3
  989. write_raw_gdt_entry(GDT_SELECTOR_DATA3, 0x0000ffff, 0x008ff200); // data3
  990. #endif
  991. #if ARCH(I386)
  992. Descriptor tls_descriptor {};
  993. tls_descriptor.low = tls_descriptor.high = 0;
  994. tls_descriptor.dpl = 3;
  995. tls_descriptor.segment_present = 1;
  996. tls_descriptor.granularity = 0;
  997. tls_descriptor.operation_size64 = 0;
  998. tls_descriptor.operation_size32 = 1;
  999. tls_descriptor.descriptor_type = 1;
  1000. tls_descriptor.type = 2;
  1001. write_gdt_entry(GDT_SELECTOR_TLS, tls_descriptor); // tls3
  1002. Descriptor gs_descriptor {};
  1003. gs_descriptor.set_base(VirtualAddress { this });
  1004. gs_descriptor.set_limit(sizeof(Processor) - 1);
  1005. gs_descriptor.dpl = 0;
  1006. gs_descriptor.segment_present = 1;
  1007. gs_descriptor.granularity = 0;
  1008. gs_descriptor.operation_size64 = 0;
  1009. gs_descriptor.operation_size32 = 1;
  1010. gs_descriptor.descriptor_type = 1;
  1011. gs_descriptor.type = 2;
  1012. write_gdt_entry(GDT_SELECTOR_PROC, gs_descriptor); // gs0
  1013. #endif
  1014. Descriptor tss_descriptor {};
  1015. tss_descriptor.set_base(VirtualAddress { (size_t)&m_tss & 0xffffffff });
  1016. tss_descriptor.set_limit(sizeof(TSS) - 1);
  1017. tss_descriptor.dpl = 0;
  1018. tss_descriptor.segment_present = 1;
  1019. tss_descriptor.granularity = 0;
  1020. tss_descriptor.operation_size64 = 0;
  1021. tss_descriptor.operation_size32 = 1;
  1022. tss_descriptor.descriptor_type = 0;
  1023. tss_descriptor.type = 9;
  1024. write_gdt_entry(GDT_SELECTOR_TSS, tss_descriptor); // tss
  1025. #if ARCH(X86_64)
  1026. Descriptor tss_descriptor_part2 {};
  1027. tss_descriptor_part2.low = (size_t)&m_tss >> 32;
  1028. write_gdt_entry(GDT_SELECTOR_TSS_PART2, tss_descriptor_part2);
  1029. #endif
  1030. flush_gdt();
  1031. load_task_register(GDT_SELECTOR_TSS);
  1032. #if ARCH(X86_64)
  1033. MSR gs_base(MSR_GS_BASE);
  1034. gs_base.set((u64)this);
  1035. #else
  1036. asm volatile(
  1037. "mov %%ax, %%ds\n"
  1038. "mov %%ax, %%es\n"
  1039. "mov %%ax, %%fs\n"
  1040. "mov %%ax, %%ss\n" ::"a"(GDT_SELECTOR_DATA0)
  1041. : "memory");
  1042. set_gs(GDT_SELECTOR_PROC);
  1043. #endif
  1044. #if ARCH(I386)
  1045. // Make sure CS points to the kernel code descriptor.
  1046. // clang-format off
  1047. asm volatile(
  1048. "ljmpl $" __STRINGIFY(GDT_SELECTOR_CODE0) ", $sanity\n"
  1049. "sanity:\n");
  1050. // clang-format on
  1051. #endif
  1052. }
  1053. extern "C" void context_first_init([[maybe_unused]] Thread* from_thread, [[maybe_unused]] Thread* to_thread, [[maybe_unused]] TrapFrame* trap)
  1054. {
  1055. VERIFY(!are_interrupts_enabled());
  1056. VERIFY(is_kernel_mode());
  1057. dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context <-- from {} {} to {} {} (context_first_init)", VirtualAddress(from_thread), *from_thread, VirtualAddress(to_thread), *to_thread);
  1058. VERIFY(to_thread == Thread::current());
  1059. Scheduler::enter_current(*from_thread, true);
  1060. auto in_critical = to_thread->saved_critical();
  1061. VERIFY(in_critical > 0);
  1062. Processor::restore_in_critical(in_critical);
  1063. // Since we got here and don't have Scheduler::context_switch in the
  1064. // call stack (because this is the first time we switched into this
  1065. // context), we need to notify the scheduler so that it can release
  1066. // the scheduler lock. We don't want to enable interrupts at this point
  1067. // as we're still in the middle of a context switch. Doing so could
  1068. // trigger a context switch within a context switch, leading to a crash.
  1069. FlatPtr flags = trap->regs->flags();
  1070. Scheduler::leave_on_first_switch(flags & ~0x200);
  1071. }
  1072. extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread)
  1073. {
  1074. VERIFY(from_thread == to_thread || from_thread->state() != Thread::Running);
  1075. VERIFY(to_thread->state() == Thread::Running);
  1076. bool has_fxsr = Processor::current().has_feature(CPUFeature::FXSR);
  1077. Processor::set_current_thread(*to_thread);
  1078. auto& from_regs = from_thread->regs();
  1079. auto& to_regs = to_thread->regs();
  1080. if (has_fxsr)
  1081. asm volatile("fxsave %0"
  1082. : "=m"(from_thread->fpu_state()));
  1083. else
  1084. asm volatile("fnsave %0"
  1085. : "=m"(from_thread->fpu_state()));
  1086. #if ARCH(I386)
  1087. from_regs.fs = get_fs();
  1088. from_regs.gs = get_gs();
  1089. set_fs(to_regs.fs);
  1090. set_gs(to_regs.gs);
  1091. #endif
  1092. if (from_thread->process().is_traced())
  1093. read_debug_registers_into(from_thread->debug_register_state());
  1094. if (to_thread->process().is_traced()) {
  1095. write_debug_registers_from(to_thread->debug_register_state());
  1096. } else {
  1097. clear_debug_registers();
  1098. }
  1099. auto& processor = Processor::current();
  1100. #if ARCH(I386)
  1101. auto& tls_descriptor = processor.get_gdt_entry(GDT_SELECTOR_TLS);
  1102. tls_descriptor.set_base(to_thread->thread_specific_data());
  1103. tls_descriptor.set_limit(to_thread->thread_specific_region_size());
  1104. #else
  1105. MSR fs_base_msr(MSR_FS_BASE);
  1106. fs_base_msr.set(to_thread->thread_specific_data().get());
  1107. #endif
  1108. if (from_regs.cr3 != to_regs.cr3)
  1109. write_cr3(to_regs.cr3);
  1110. to_thread->set_cpu(processor.id());
  1111. auto in_critical = to_thread->saved_critical();
  1112. VERIFY(in_critical > 0);
  1113. Processor::restore_in_critical(in_critical);
  1114. if (has_fxsr)
  1115. asm volatile("fxrstor %0" ::"m"(to_thread->fpu_state()));
  1116. else
  1117. asm volatile("frstor %0" ::"m"(to_thread->fpu_state()));
  1118. // TODO: ioperm?
  1119. }
  1120. extern "C" FlatPtr do_init_context(Thread* thread, u32 flags)
  1121. {
  1122. VERIFY_INTERRUPTS_DISABLED();
  1123. thread->regs().set_flags(flags);
  1124. return Processor::current().init_context(*thread, true);
  1125. }
  1126. void Processor::assume_context(Thread& thread, FlatPtr flags)
  1127. {
  1128. dbgln_if(CONTEXT_SWITCH_DEBUG, "Assume context for thread {} {}", VirtualAddress(&thread), thread);
  1129. VERIFY_INTERRUPTS_DISABLED();
  1130. Scheduler::prepare_after_exec();
  1131. // in_critical() should be 2 here. The critical section in Process::exec
  1132. // and then the scheduler lock
  1133. VERIFY(Processor::in_critical() == 2);
  1134. do_assume_context(&thread, flags);
  1135. VERIFY_NOT_REACHED();
  1136. }
  1137. }