Processor.cpp 49 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2022, the SerenityOS developers.
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/BuiltinWrappers.h>
  8. #include <AK/Format.h>
  9. #include <AK/StdLibExtras.h>
  10. #include <AK/StringBuilder.h>
  11. #include <AK/Types.h>
  12. #include <Kernel/Interrupts/APIC.h>
  13. #include <Kernel/Process.h>
  14. #include <Kernel/Scheduler.h>
  15. #include <Kernel/Sections.h>
  16. #include <Kernel/StdLib.h>
  17. #include <Kernel/Thread.h>
  18. #include <Kernel/Arch/Processor.h>
  19. #include <Kernel/Arch/ScopedCritical.h>
  20. #include <Kernel/Arch/x86/CPUID.h>
  21. #include <Kernel/Arch/x86/InterruptDisabler.h>
  22. #include <Kernel/Arch/x86/Interrupts.h>
  23. #include <Kernel/Arch/x86/MSR.h>
  24. #include <Kernel/Arch/x86/ProcessorInfo.h>
  25. #include <Kernel/Arch/x86/SafeMem.h>
  26. #include <Kernel/Arch/x86/TrapFrame.h>
  27. #include <Kernel/Memory/PageDirectory.h>
  28. #include <Kernel/Memory/ScopedAddressSpaceSwitcher.h>
  29. namespace Kernel {
  30. READONLY_AFTER_INIT FPUState Processor::s_clean_fpu_state;
  31. READONLY_AFTER_INIT static ProcessorContainer s_processors {};
  32. READONLY_AFTER_INIT Atomic<u32> Processor::g_total_processors;
  33. READONLY_AFTER_INIT static volatile bool s_smp_enabled;
  34. static Atomic<ProcessorMessage*> s_message_pool;
  35. Atomic<u32> Processor::s_idle_cpu_mask { 0 };
  36. // The compiler can't see the calls to these functions inside assembly.
  37. // Declare them, to avoid dead code warnings.
  38. extern "C" void context_first_init(Thread* from_thread, Thread* to_thread, TrapFrame* trap) __attribute__((used));
  39. extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread) __attribute__((used));
  40. extern "C" FlatPtr do_init_context(Thread* thread, u32 flags) __attribute__((used));
  41. extern "C" void syscall_entry();
  42. bool Processor::is_smp_enabled()
  43. {
  44. return s_smp_enabled;
  45. }
  46. UNMAP_AFTER_INIT static void sse_init()
  47. {
  48. write_cr0((read_cr0() & 0xfffffffbu) | 0x2);
  49. write_cr4(read_cr4() | 0x600);
  50. }
  51. void exit_kernel_thread(void)
  52. {
  53. Thread::current()->exit();
  54. }
  55. UNMAP_AFTER_INIT void Processor::cpu_detect()
  56. {
  57. // NOTE: This is called during Processor::early_initialize, we cannot
  58. // safely log at this point because we don't have kmalloc
  59. // initialized yet!
  60. m_features = static_cast<CPUFeature>(0);
  61. CPUID processor_info(0x1);
  62. auto handle_edx_bit_11_feature = [&] {
  63. u32 stepping = processor_info.eax() & 0xf;
  64. u32 model = (processor_info.eax() >> 4) & 0xf;
  65. u32 family = (processor_info.eax() >> 8) & 0xf;
  66. // FIXME: I have no clue what these mean or where it's from (the Intel manual I've seen just says EDX[11] is SEP).
  67. // If you do, please convert them to constants or add comments!
  68. if (!(family == 6 && model < 3 && stepping < 3))
  69. m_features |= CPUFeature::SEP;
  70. if ((family == 6 && model >= 3) || (family == 0xf && model >= 0xe))
  71. m_features |= CPUFeature::CONSTANT_TSC;
  72. };
  73. if (processor_info.ecx() & (1 << 0))
  74. m_features |= CPUFeature::SSE3;
  75. if (processor_info.ecx() & (1 << 9))
  76. m_features |= CPUFeature::SSSE3;
  77. if (processor_info.ecx() & (1 << 19))
  78. m_features |= CPUFeature::SSE4_1;
  79. if (processor_info.ecx() & (1 << 20))
  80. m_features |= CPUFeature::SSE4_2;
  81. if (processor_info.ecx() & (1 << 26))
  82. m_features |= CPUFeature::XSAVE;
  83. if (processor_info.ecx() & (1 << 28))
  84. m_features |= CPUFeature::AVX;
  85. if (processor_info.ecx() & (1 << 30))
  86. m_features |= CPUFeature::RDRAND;
  87. if (processor_info.ecx() & (1 << 31))
  88. m_features |= CPUFeature::HYPERVISOR;
  89. if (processor_info.edx() & (1 << 4))
  90. m_features |= CPUFeature::TSC;
  91. if (processor_info.edx() & (1 << 6))
  92. m_features |= CPUFeature::PAE;
  93. if (processor_info.edx() & (1 << 13))
  94. m_features |= CPUFeature::PGE;
  95. if (processor_info.edx() & (1 << 11))
  96. handle_edx_bit_11_feature();
  97. if (processor_info.edx() & (1 << 16))
  98. m_features |= CPUFeature::PAT;
  99. if (processor_info.edx() & (1 << 23))
  100. m_features |= CPUFeature::MMX;
  101. if (processor_info.edx() & (1 << 24))
  102. m_features |= CPUFeature::FXSR;
  103. if (processor_info.edx() & (1 << 25))
  104. m_features |= CPUFeature::SSE;
  105. if (processor_info.edx() & (1 << 26))
  106. m_features |= CPUFeature::SSE2;
  107. CPUID extended_features(0x7);
  108. if (extended_features.ebx() & (1 << 7))
  109. m_features |= CPUFeature::SMEP;
  110. if (extended_features.ebx() & (1 << 18))
  111. m_features |= CPUFeature::RDSEED;
  112. if (extended_features.ebx() & (1 << 20))
  113. m_features |= CPUFeature::SMAP;
  114. if (extended_features.ecx() & (1 << 2))
  115. m_features |= CPUFeature::UMIP;
  116. u32 max_extended_leaf = CPUID(0x80000000).eax();
  117. if (max_extended_leaf >= 0x80000001) {
  118. CPUID extended_processor_info(0x80000001);
  119. if (extended_processor_info.edx() & (1 << 11))
  120. m_features |= CPUFeature::SYSCALL; // Only available in 64 bit mode
  121. if (extended_processor_info.edx() & (1 << 20))
  122. m_features |= CPUFeature::NX;
  123. if (extended_processor_info.edx() & (1 << 27))
  124. m_features |= CPUFeature::RDTSCP;
  125. if (extended_processor_info.edx() & (1 << 29))
  126. m_features |= CPUFeature::LM;
  127. }
  128. if (max_extended_leaf >= 0x80000007) {
  129. CPUID cpuid(0x80000007);
  130. if (cpuid.edx() & (1 << 8)) {
  131. m_features |= CPUFeature::CONSTANT_TSC;
  132. m_features |= CPUFeature::NONSTOP_TSC;
  133. }
  134. }
  135. #if ARCH(X86_64)
  136. m_has_qemu_hvf_quirk = false;
  137. #endif
  138. if (max_extended_leaf >= 0x80000008) {
  139. // CPUID.80000008H:EAX[7:0] reports the physical-address width supported by the processor.
  140. CPUID cpuid(0x80000008);
  141. m_physical_address_bit_width = cpuid.eax() & 0xff;
  142. // CPUID.80000008H:EAX[15:8] reports the linear-address width supported by the processor.
  143. m_virtual_address_bit_width = (cpuid.eax() >> 8) & 0xff;
  144. } else {
  145. // For processors that do not support CPUID function 80000008H, the width is generally 36 if CPUID.01H:EDX.PAE [bit 6] = 1 and 32 otherwise.
  146. m_physical_address_bit_width = has_feature(CPUFeature::PAE) ? 36 : 32;
  147. // Processors that do not support CPUID function 80000008H, support a linear-address width of 32.
  148. m_virtual_address_bit_width = 32;
  149. #if ARCH(X86_64)
  150. // Workaround QEMU hypervisor.framework bug
  151. // https://gitlab.com/qemu-project/qemu/-/issues/664
  152. //
  153. // We detect this as follows:
  154. // * We're in a hypervisor
  155. // * hypervisor_leaf_range is null under Hypervisor.framework
  156. // * m_physical_address_bit_width is 36 bits
  157. if (has_feature(CPUFeature::HYPERVISOR)) {
  158. CPUID hypervisor_leaf_range(0x40000000);
  159. if (!hypervisor_leaf_range.ebx() && m_physical_address_bit_width == 36) {
  160. m_has_qemu_hvf_quirk = true;
  161. m_virtual_address_bit_width = 48;
  162. }
  163. }
  164. #endif
  165. }
  166. }
  167. UNMAP_AFTER_INIT void Processor::cpu_setup()
  168. {
  169. // NOTE: This is called during Processor::early_initialize, we cannot
  170. // safely log at this point because we don't have kmalloc
  171. // initialized yet!
  172. cpu_detect();
  173. if (has_feature(CPUFeature::SSE)) {
  174. // enter_thread_context() assumes that if a x86 CPU supports SSE then it also supports FXSR.
  175. // SSE support without FXSR is an extremely unlikely scenario, so let's be pragmatic about it.
  176. VERIFY(has_feature(CPUFeature::FXSR));
  177. sse_init();
  178. }
  179. write_cr0(read_cr0() | 0x00010000);
  180. if (has_feature(CPUFeature::PGE)) {
  181. // Turn on CR4.PGE so the CPU will respect the G bit in page tables.
  182. write_cr4(read_cr4() | 0x80);
  183. }
  184. if (has_feature(CPUFeature::NX)) {
  185. // Turn on IA32_EFER.NXE
  186. MSR ia32_efer(MSR_IA32_EFER);
  187. ia32_efer.set(ia32_efer.get() | 0x800);
  188. }
  189. if (has_feature(CPUFeature::PAT)) {
  190. MSR ia32_pat(MSR_IA32_PAT);
  191. // Set PA4 to Write Comine. This allows us to
  192. // use this mode by only setting the bit in the PTE
  193. // and leaving all other bits in the upper levels unset,
  194. // which maps to setting bit 3 of the index, resulting
  195. // in the index value 0 or 4.
  196. u64 pat = ia32_pat.get() & ~(0x7ull << 32);
  197. pat |= 0x1ull << 32; // set WC mode for PA4
  198. ia32_pat.set(pat);
  199. }
  200. if (has_feature(CPUFeature::SMEP)) {
  201. // Turn on CR4.SMEP
  202. write_cr4(read_cr4() | 0x100000);
  203. }
  204. if (has_feature(CPUFeature::SMAP)) {
  205. // Turn on CR4.SMAP
  206. write_cr4(read_cr4() | 0x200000);
  207. }
  208. if (has_feature(CPUFeature::UMIP)) {
  209. write_cr4(read_cr4() | 0x800);
  210. }
  211. if (has_feature(CPUFeature::TSC)) {
  212. write_cr4(read_cr4() | 0x4);
  213. }
  214. if (has_feature(CPUFeature::XSAVE)) {
  215. // Turn on CR4.OSXSAVE
  216. write_cr4(read_cr4() | 0x40000);
  217. // According to the Intel manual: "After reset, all bits (except bit 0) in XCR0 are cleared to zero; XCR0[0] is set to 1."
  218. // Sadly we can't trust this, for example VirtualBox starts with bits 0-4 set, so let's do it ourselves.
  219. write_xcr0(0x1);
  220. if (has_feature(CPUFeature::AVX)) {
  221. // Turn on SSE, AVX and x87 flags
  222. write_xcr0(read_xcr0() | 0x7);
  223. }
  224. }
  225. #if ARCH(X86_64)
  226. // x86_64 processors must support the syscall feature.
  227. VERIFY(has_feature(CPUFeature::SYSCALL));
  228. MSR efer_msr(MSR_EFER);
  229. efer_msr.set(efer_msr.get() | 1u);
  230. // Write code and stack selectors to the STAR MSR. The first value stored in bits 63:48 controls the sysret CS (value + 0x10) and SS (value + 0x8),
  231. // and the value stored in bits 47:32 controls the syscall CS (value) and SS (value + 0x8).
  232. u64 star = 0;
  233. star |= 0x13ul << 48u;
  234. star |= 0x08ul << 32u;
  235. MSR star_msr(MSR_STAR);
  236. star_msr.set(star);
  237. // Write the syscall entry point to the LSTAR MSR.
  238. MSR lstar_msr(MSR_LSTAR);
  239. lstar_msr.set(reinterpret_cast<u64>(&syscall_entry));
  240. // Write the SFMASK MSR. This MSR controls which bits of rflags are masked when a syscall instruction is executed -
  241. // if a bit is set in sfmask, the corresponding bit in rflags is cleared. The value set here clears most of rflags,
  242. // but keeps the reserved and virtualization bits intact. The userspace rflags value is saved in r11 by syscall.
  243. constexpr u64 rflags_mask = 0x257fd5u;
  244. MSR sfmask_msr(MSR_SFMASK);
  245. sfmask_msr.set(rflags_mask);
  246. #endif
  247. }
  248. NonnullOwnPtr<KString> Processor::features_string() const
  249. {
  250. StringBuilder builder;
  251. auto feature_to_str =
  252. [](CPUFeature f) -> StringView {
  253. switch (f) {
  254. case CPUFeature::NX:
  255. return "nx"sv;
  256. case CPUFeature::PAE:
  257. return "pae"sv;
  258. case CPUFeature::PGE:
  259. return "pge"sv;
  260. case CPUFeature::RDRAND:
  261. return "rdrand"sv;
  262. case CPUFeature::RDSEED:
  263. return "rdseed"sv;
  264. case CPUFeature::SMAP:
  265. return "smap"sv;
  266. case CPUFeature::SMEP:
  267. return "smep"sv;
  268. case CPUFeature::SSE:
  269. return "sse"sv;
  270. case CPUFeature::TSC:
  271. return "tsc"sv;
  272. case CPUFeature::RDTSCP:
  273. return "rdtscp"sv;
  274. case CPUFeature::CONSTANT_TSC:
  275. return "constant_tsc"sv;
  276. case CPUFeature::NONSTOP_TSC:
  277. return "nonstop_tsc"sv;
  278. case CPUFeature::UMIP:
  279. return "umip"sv;
  280. case CPUFeature::SEP:
  281. return "sep"sv;
  282. case CPUFeature::SYSCALL:
  283. return "syscall"sv;
  284. case CPUFeature::MMX:
  285. return "mmx"sv;
  286. case CPUFeature::FXSR:
  287. return "fxsr"sv;
  288. case CPUFeature::SSE2:
  289. return "sse2"sv;
  290. case CPUFeature::SSE3:
  291. return "sse3"sv;
  292. case CPUFeature::SSSE3:
  293. return "ssse3"sv;
  294. case CPUFeature::SSE4_1:
  295. return "sse4.1"sv;
  296. case CPUFeature::SSE4_2:
  297. return "sse4.2"sv;
  298. case CPUFeature::XSAVE:
  299. return "xsave"sv;
  300. case CPUFeature::AVX:
  301. return "avx"sv;
  302. case CPUFeature::LM:
  303. return "lm"sv;
  304. case CPUFeature::HYPERVISOR:
  305. return "hypervisor"sv;
  306. // no default statement here intentionally so that we get
  307. // a warning if a new feature is forgotten to be added here
  308. case CPUFeature::PAT:
  309. return "pat"sv;
  310. }
  311. // Shouldn't ever happen
  312. return "???"sv;
  313. };
  314. bool first = true;
  315. for (u32 flag = 1; flag != 0; flag <<= 1) {
  316. if (has_feature(static_cast<CPUFeature>(flag))) {
  317. if (first)
  318. first = false;
  319. else
  320. MUST(builder.try_append(' '));
  321. auto str = feature_to_str(static_cast<CPUFeature>(flag));
  322. MUST(builder.try_append(str));
  323. }
  324. }
  325. return KString::must_create(builder.string_view());
  326. }
  327. UNMAP_AFTER_INIT void Processor::early_initialize(u32 cpu)
  328. {
  329. m_self = this;
  330. m_cpu = cpu;
  331. m_in_irq = 0;
  332. m_in_critical = 0;
  333. m_invoke_scheduler_async = false;
  334. m_scheduler_initialized = false;
  335. m_in_scheduler = true;
  336. m_message_queue = nullptr;
  337. m_idle_thread = nullptr;
  338. m_current_thread = nullptr;
  339. m_info = nullptr;
  340. m_halt_requested = false;
  341. if (cpu == 0) {
  342. s_smp_enabled = false;
  343. g_total_processors.store(1u, AK::MemoryOrder::memory_order_release);
  344. } else {
  345. g_total_processors.fetch_add(1u, AK::MemoryOrder::memory_order_acq_rel);
  346. }
  347. deferred_call_pool_init();
  348. cpu_setup();
  349. gdt_init();
  350. VERIFY(is_initialized()); // sanity check
  351. VERIFY(&current() == this); // sanity check
  352. }
  353. UNMAP_AFTER_INIT void Processor::initialize(u32 cpu)
  354. {
  355. VERIFY(m_self == this);
  356. VERIFY(&current() == this); // sanity check
  357. dmesgln("CPU[{}]: Supported features: {}", current_id(), features_string());
  358. if (!has_feature(CPUFeature::RDRAND))
  359. dmesgln("CPU[{}]: No RDRAND support detected, randomness will be poor", current_id());
  360. dmesgln("CPU[{}]: Physical address bit width: {}", current_id(), m_physical_address_bit_width);
  361. dmesgln("CPU[{}]: Virtual address bit width: {}", current_id(), m_virtual_address_bit_width);
  362. #if ARCH(X86_64)
  363. if (m_has_qemu_hvf_quirk)
  364. dmesgln("CPU[{}]: Applied correction for QEMU Hypervisor.framework quirk", current_id());
  365. #endif
  366. if (cpu == 0)
  367. idt_init();
  368. else
  369. flush_idt();
  370. if (cpu == 0) {
  371. VERIFY((FlatPtr(&s_clean_fpu_state) & 0xF) == 0);
  372. asm volatile("fninit");
  373. if (has_feature(CPUFeature::FXSR))
  374. asm volatile("fxsave %0"
  375. : "=m"(s_clean_fpu_state));
  376. else
  377. asm volatile("fnsave %0"
  378. : "=m"(s_clean_fpu_state));
  379. if (has_feature(CPUFeature::HYPERVISOR))
  380. detect_hypervisor();
  381. }
  382. m_info = new ProcessorInfo(*this);
  383. {
  384. // We need to prevent races between APs starting up at the same time
  385. VERIFY(cpu < s_processors.size());
  386. s_processors[cpu] = this;
  387. }
  388. }
  389. UNMAP_AFTER_INIT void Processor::detect_hypervisor()
  390. {
  391. CPUID hypervisor_leaf_range(0x40000000);
  392. // Get signature of hypervisor.
  393. alignas(sizeof(u32)) char hypervisor_signature_buffer[13];
  394. *reinterpret_cast<u32*>(hypervisor_signature_buffer) = hypervisor_leaf_range.ebx();
  395. *reinterpret_cast<u32*>(hypervisor_signature_buffer + 4) = hypervisor_leaf_range.ecx();
  396. *reinterpret_cast<u32*>(hypervisor_signature_buffer + 8) = hypervisor_leaf_range.edx();
  397. hypervisor_signature_buffer[12] = '\0';
  398. StringView hypervisor_signature(hypervisor_signature_buffer);
  399. dmesgln("CPU[{}]: CPUID hypervisor signature '{}' ({:#x} {:#x} {:#x}), max leaf {:#x}", current_id(), hypervisor_signature, hypervisor_leaf_range.ebx(), hypervisor_leaf_range.ecx(), hypervisor_leaf_range.edx(), hypervisor_leaf_range.eax());
  400. if (hypervisor_signature == "Microsoft Hv"sv)
  401. detect_hypervisor_hyperv(hypervisor_leaf_range);
  402. }
  403. UNMAP_AFTER_INIT void Processor::detect_hypervisor_hyperv(CPUID const& hypervisor_leaf_range)
  404. {
  405. if (hypervisor_leaf_range.eax() < 0x40000001)
  406. return;
  407. CPUID hypervisor_interface(0x40000001);
  408. // Get signature of hypervisor interface.
  409. alignas(sizeof(u32)) char interface_signature_buffer[5];
  410. *reinterpret_cast<u32*>(interface_signature_buffer) = hypervisor_interface.eax();
  411. interface_signature_buffer[4] = '\0';
  412. StringView hyperv_interface_signature(interface_signature_buffer);
  413. dmesgln("CPU[{}]: Hyper-V interface signature '{}' ({:#x})", current_id(), hyperv_interface_signature, hypervisor_interface.eax());
  414. if (hypervisor_leaf_range.eax() < 0x40000001)
  415. return;
  416. CPUID hypervisor_sysid(0x40000002);
  417. dmesgln("CPU[{}]: Hyper-V system identity {}.{}, build number {}", current_id(), hypervisor_sysid.ebx() >> 16, hypervisor_sysid.ebx() & 0xFFFF, hypervisor_sysid.eax());
  418. if (hypervisor_leaf_range.eax() < 0x40000005 || hyperv_interface_signature != "Hv#1"sv)
  419. return;
  420. dmesgln("CPU[{}]: Hyper-V hypervisor detected", current_id());
  421. // TODO: Actually do something with Hyper-V.
  422. }
  423. void Processor::write_raw_gdt_entry(u16 selector, u32 low, u32 high)
  424. {
  425. u16 i = (selector & 0xfffc) >> 3;
  426. u32 prev_gdt_length = m_gdt_length;
  427. if (i >= m_gdt_length) {
  428. m_gdt_length = i + 1;
  429. VERIFY(m_gdt_length <= sizeof(m_gdt) / sizeof(m_gdt[0]));
  430. m_gdtr.limit = (m_gdt_length + 1) * 8 - 1;
  431. }
  432. m_gdt[i].low = low;
  433. m_gdt[i].high = high;
  434. // clear selectors we may have skipped
  435. for (auto j = prev_gdt_length; j < i; ++j) {
  436. m_gdt[j].low = 0;
  437. m_gdt[j].high = 0;
  438. }
  439. }
  440. void Processor::write_gdt_entry(u16 selector, Descriptor& descriptor)
  441. {
  442. write_raw_gdt_entry(selector, descriptor.low, descriptor.high);
  443. }
  444. Descriptor& Processor::get_gdt_entry(u16 selector)
  445. {
  446. u16 i = (selector & 0xfffc) >> 3;
  447. return *(Descriptor*)(&m_gdt[i]);
  448. }
  449. void Processor::flush_gdt()
  450. {
  451. m_gdtr.address = m_gdt;
  452. m_gdtr.limit = (m_gdt_length * 8) - 1;
  453. asm volatile("lgdt %0" ::"m"(m_gdtr)
  454. : "memory");
  455. }
  456. const DescriptorTablePointer& Processor::get_gdtr()
  457. {
  458. return m_gdtr;
  459. }
  460. ErrorOr<Vector<FlatPtr, 32>> Processor::capture_stack_trace(Thread& thread, size_t max_frames)
  461. {
  462. FlatPtr frame_ptr = 0, ip = 0;
  463. Vector<FlatPtr, 32> stack_trace;
  464. auto walk_stack = [&](FlatPtr stack_ptr) -> ErrorOr<void> {
  465. constexpr size_t max_stack_frames = 4096;
  466. bool is_walking_userspace_stack = false;
  467. TRY(stack_trace.try_append(ip));
  468. size_t count = 1;
  469. while (stack_ptr && stack_trace.size() < max_stack_frames) {
  470. FlatPtr retaddr;
  471. count++;
  472. if (max_frames != 0 && count > max_frames)
  473. break;
  474. if (!Memory::is_user_address(VirtualAddress { stack_ptr })) {
  475. if (is_walking_userspace_stack) {
  476. dbgln("SHENANIGANS! Userspace stack points back into kernel memory");
  477. break;
  478. }
  479. } else {
  480. is_walking_userspace_stack = true;
  481. }
  482. if (Memory::is_user_range(VirtualAddress(stack_ptr), sizeof(FlatPtr) * 2)) {
  483. if (copy_from_user(&retaddr, &((FlatPtr*)stack_ptr)[1]).is_error() || !retaddr)
  484. break;
  485. TRY(stack_trace.try_append(retaddr));
  486. if (copy_from_user(&stack_ptr, (FlatPtr*)stack_ptr).is_error())
  487. break;
  488. } else {
  489. void* fault_at;
  490. if (!safe_memcpy(&retaddr, &((FlatPtr*)stack_ptr)[1], sizeof(FlatPtr), fault_at) || !retaddr)
  491. break;
  492. TRY(stack_trace.try_append(retaddr));
  493. if (!safe_memcpy(&stack_ptr, (FlatPtr*)stack_ptr, sizeof(FlatPtr), fault_at))
  494. break;
  495. }
  496. }
  497. return {};
  498. };
  499. auto capture_current_thread = [&]() {
  500. frame_ptr = (FlatPtr)__builtin_frame_address(0);
  501. ip = (FlatPtr)__builtin_return_address(0);
  502. return walk_stack(frame_ptr);
  503. };
  504. // Since the thread may be running on another processor, there
  505. // is a chance a context switch may happen while we're trying
  506. // to get it. It also won't be entirely accurate and merely
  507. // reflect the status at the last context switch.
  508. SpinlockLocker lock(g_scheduler_lock);
  509. if (&thread == Processor::current_thread()) {
  510. VERIFY(thread.state() == Thread::State::Running);
  511. // Leave the scheduler lock. If we trigger page faults we may
  512. // need to be preempted. Since this is our own thread it won't
  513. // cause any problems as the stack won't change below this frame.
  514. lock.unlock();
  515. TRY(capture_current_thread());
  516. } else if (thread.is_active()) {
  517. VERIFY(thread.cpu() != Processor::current_id());
  518. // If this is the case, the thread is currently running
  519. // on another processor. We can't trust the kernel stack as
  520. // it may be changing at any time. We need to probably send
  521. // an IPI to that processor, have it walk the stack and wait
  522. // until it returns the data back to us
  523. auto& proc = Processor::current();
  524. ErrorOr<void> result;
  525. smp_unicast(
  526. thread.cpu(),
  527. [&]() {
  528. dbgln("CPU[{}] getting stack for cpu #{}", Processor::current_id(), proc.id());
  529. ScopedAddressSpaceSwitcher switcher(thread.process());
  530. VERIFY(&Processor::current() != &proc);
  531. VERIFY(&thread == Processor::current_thread());
  532. // NOTE: Because the other processor is still holding the
  533. // scheduler lock while waiting for this callback to finish,
  534. // the current thread on the target processor cannot change
  535. // TODO: What to do about page faults here? We might deadlock
  536. // because the other processor is still holding the
  537. // scheduler lock...
  538. result = capture_current_thread();
  539. },
  540. false);
  541. TRY(result);
  542. } else {
  543. switch (thread.state()) {
  544. case Thread::State::Running:
  545. VERIFY_NOT_REACHED(); // should have been handled above
  546. case Thread::State::Runnable:
  547. case Thread::State::Stopped:
  548. case Thread::State::Blocked:
  549. case Thread::State::Dying:
  550. case Thread::State::Dead: {
  551. // We need to retrieve ebp from what was last pushed to the kernel
  552. // stack. Before switching out of that thread, it switch_context
  553. // pushed the callee-saved registers, and the last of them happens
  554. // to be ebp.
  555. ScopedAddressSpaceSwitcher switcher(thread.process());
  556. auto& regs = thread.regs();
  557. auto* stack_top = reinterpret_cast<FlatPtr*>(regs.sp());
  558. if (Memory::is_user_range(VirtualAddress(stack_top), sizeof(FlatPtr))) {
  559. if (copy_from_user(&frame_ptr, &((FlatPtr*)stack_top)[0]).is_error())
  560. frame_ptr = 0;
  561. } else {
  562. void* fault_at;
  563. if (!safe_memcpy(&frame_ptr, &((FlatPtr*)stack_top)[0], sizeof(FlatPtr), fault_at))
  564. frame_ptr = 0;
  565. }
  566. ip = regs.ip();
  567. // TODO: We need to leave the scheduler lock here, but we also
  568. // need to prevent the target thread from being run while
  569. // we walk the stack
  570. lock.unlock();
  571. TRY(walk_stack(frame_ptr));
  572. break;
  573. }
  574. default:
  575. dbgln("Cannot capture stack trace for thread {} in state {}", thread, thread.state_string());
  576. break;
  577. }
  578. }
  579. return stack_trace;
  580. }
  581. ProcessorContainer& Processor::processors()
  582. {
  583. return s_processors;
  584. }
  585. Processor& Processor::by_id(u32 id)
  586. {
  587. return *s_processors[id];
  588. }
  589. void Processor::enter_trap(TrapFrame& trap, bool raise_irq)
  590. {
  591. VERIFY_INTERRUPTS_DISABLED();
  592. VERIFY(&Processor::current() == this);
  593. trap.prev_irq_level = m_in_irq;
  594. if (raise_irq)
  595. m_in_irq++;
  596. auto* current_thread = Processor::current_thread();
  597. if (current_thread) {
  598. auto& current_trap = current_thread->current_trap();
  599. trap.next_trap = current_trap;
  600. current_trap = &trap;
  601. // The cs register of this trap tells us where we will return back to
  602. auto new_previous_mode = ((trap.regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode;
  603. if (current_thread->set_previous_mode(new_previous_mode) && trap.prev_irq_level == 0) {
  604. current_thread->update_time_scheduled(Scheduler::current_time(), new_previous_mode == Thread::PreviousMode::KernelMode, false);
  605. }
  606. } else {
  607. trap.next_trap = nullptr;
  608. }
  609. }
  610. void Processor::exit_trap(TrapFrame& trap)
  611. {
  612. VERIFY_INTERRUPTS_DISABLED();
  613. VERIFY(&Processor::current() == this);
  614. // Temporarily enter a critical section. This is to prevent critical
  615. // sections entered and left within e.g. smp_process_pending_messages
  616. // to trigger a context switch while we're executing this function
  617. // See the comment at the end of the function why we don't use
  618. // ScopedCritical here.
  619. m_in_critical = m_in_critical + 1;
  620. VERIFY(m_in_irq >= trap.prev_irq_level);
  621. m_in_irq = trap.prev_irq_level;
  622. if (s_smp_enabled)
  623. smp_process_pending_messages();
  624. // Process the deferred call queue. Among other things, this ensures
  625. // that any pending thread unblocks happen before we enter the scheduler.
  626. deferred_call_execute_pending();
  627. auto* current_thread = Processor::current_thread();
  628. if (current_thread) {
  629. auto& current_trap = current_thread->current_trap();
  630. current_trap = trap.next_trap;
  631. Thread::PreviousMode new_previous_mode;
  632. if (current_trap) {
  633. VERIFY(current_trap->regs);
  634. // If we have another higher level trap then we probably returned
  635. // from an interrupt or irq handler. The cs register of the
  636. // new/higher level trap tells us what the mode prior to it was
  637. new_previous_mode = ((current_trap->regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode;
  638. } else {
  639. // If we don't have a higher level trap then we're back in user mode.
  640. // Which means that the previous mode prior to being back in user mode was kernel mode
  641. new_previous_mode = Thread::PreviousMode::KernelMode;
  642. }
  643. if (current_thread->set_previous_mode(new_previous_mode))
  644. current_thread->update_time_scheduled(Scheduler::current_time(), true, false);
  645. }
  646. VERIFY_INTERRUPTS_DISABLED();
  647. // Leave the critical section without actually enabling interrupts.
  648. // We don't want context switches to happen until we're explicitly
  649. // triggering a switch in check_invoke_scheduler.
  650. m_in_critical = m_in_critical - 1;
  651. if (!m_in_irq && !m_in_critical)
  652. check_invoke_scheduler();
  653. }
  654. void Processor::check_invoke_scheduler()
  655. {
  656. InterruptDisabler disabler;
  657. VERIFY(!m_in_irq);
  658. VERIFY(!m_in_critical);
  659. VERIFY(&Processor::current() == this);
  660. if (m_invoke_scheduler_async && m_scheduler_initialized) {
  661. m_invoke_scheduler_async = false;
  662. Scheduler::invoke_async();
  663. }
  664. }
  665. void Processor::flush_tlb_local(VirtualAddress vaddr, size_t page_count)
  666. {
  667. auto ptr = vaddr.as_ptr();
  668. while (page_count > 0) {
  669. // clang-format off
  670. asm volatile("invlpg %0"
  671. :
  672. : "m"(*ptr)
  673. : "memory");
  674. // clang-format on
  675. ptr += PAGE_SIZE;
  676. page_count--;
  677. }
  678. }
  679. void Processor::flush_tlb(Memory::PageDirectory const* page_directory, VirtualAddress vaddr, size_t page_count)
  680. {
  681. if (s_smp_enabled && (!Memory::is_user_address(vaddr) || Process::current().thread_count() > 1))
  682. smp_broadcast_flush_tlb(page_directory, vaddr, page_count);
  683. else
  684. flush_tlb_local(vaddr, page_count);
  685. }
  686. void Processor::smp_return_to_pool(ProcessorMessage& msg)
  687. {
  688. ProcessorMessage* next = nullptr;
  689. for (;;) {
  690. msg.next = next;
  691. if (s_message_pool.compare_exchange_strong(next, &msg, AK::MemoryOrder::memory_order_acq_rel))
  692. break;
  693. Processor::pause();
  694. }
  695. }
  696. ProcessorMessage& Processor::smp_get_from_pool()
  697. {
  698. ProcessorMessage* msg;
  699. // The assumption is that messages are never removed from the pool!
  700. for (;;) {
  701. msg = s_message_pool.load(AK::MemoryOrder::memory_order_consume);
  702. if (!msg) {
  703. if (!Processor::current().smp_process_pending_messages()) {
  704. Processor::pause();
  705. }
  706. continue;
  707. }
  708. // If another processor were to use this message in the meanwhile,
  709. // "msg" is still valid (because it never gets freed). We'd detect
  710. // this because the expected value "msg" and pool would
  711. // no longer match, and the compare_exchange will fail. But accessing
  712. // "msg->next" is always safe here.
  713. if (s_message_pool.compare_exchange_strong(msg, msg->next, AK::MemoryOrder::memory_order_acq_rel)) {
  714. // We successfully "popped" this available message
  715. break;
  716. }
  717. }
  718. VERIFY(msg != nullptr);
  719. return *msg;
  720. }
  721. u32 Processor::smp_wake_n_idle_processors(u32 wake_count)
  722. {
  723. VERIFY_INTERRUPTS_DISABLED();
  724. VERIFY(wake_count > 0);
  725. if (!s_smp_enabled)
  726. return 0;
  727. // Wake at most N - 1 processors
  728. if (wake_count >= Processor::count()) {
  729. wake_count = Processor::count() - 1;
  730. VERIFY(wake_count > 0);
  731. }
  732. u32 current_id = Processor::current_id();
  733. u32 did_wake_count = 0;
  734. auto& apic = APIC::the();
  735. while (did_wake_count < wake_count) {
  736. // Try to get a set of idle CPUs and flip them to busy
  737. u32 idle_mask = s_idle_cpu_mask.load(AK::MemoryOrder::memory_order_relaxed) & ~(1u << current_id);
  738. u32 idle_count = popcount(idle_mask);
  739. if (idle_count == 0)
  740. break; // No (more) idle processor available
  741. u32 found_mask = 0;
  742. for (u32 i = 0; i < idle_count; i++) {
  743. u32 cpu = bit_scan_forward(idle_mask) - 1;
  744. idle_mask &= ~(1u << cpu);
  745. found_mask |= 1u << cpu;
  746. }
  747. idle_mask = s_idle_cpu_mask.fetch_and(~found_mask, AK::MemoryOrder::memory_order_acq_rel) & found_mask;
  748. if (idle_mask == 0)
  749. continue; // All of them were flipped to busy, try again
  750. idle_count = popcount(idle_mask);
  751. for (u32 i = 0; i < idle_count; i++) {
  752. u32 cpu = bit_scan_forward(idle_mask) - 1;
  753. idle_mask &= ~(1u << cpu);
  754. // Send an IPI to that CPU to wake it up. There is a possibility
  755. // someone else woke it up as well, or that it woke up due to
  756. // a timer interrupt. But we tried hard to avoid this...
  757. apic.send_ipi(cpu);
  758. did_wake_count++;
  759. }
  760. }
  761. return did_wake_count;
  762. }
  763. UNMAP_AFTER_INIT void Processor::smp_enable()
  764. {
  765. size_t msg_pool_size = Processor::count() * 100u;
  766. size_t msg_entries_cnt = Processor::count();
  767. auto msgs = new ProcessorMessage[msg_pool_size];
  768. auto msg_entries = new ProcessorMessageEntry[msg_pool_size * msg_entries_cnt];
  769. size_t msg_entry_i = 0;
  770. for (size_t i = 0; i < msg_pool_size; i++, msg_entry_i += msg_entries_cnt) {
  771. auto& msg = msgs[i];
  772. msg.next = i < msg_pool_size - 1 ? &msgs[i + 1] : nullptr;
  773. msg.per_proc_entries = &msg_entries[msg_entry_i];
  774. for (size_t k = 0; k < msg_entries_cnt; k++)
  775. msg_entries[msg_entry_i + k].msg = &msg;
  776. }
  777. s_message_pool.store(&msgs[0], AK::MemoryOrder::memory_order_release);
  778. // Start sending IPI messages
  779. s_smp_enabled = true;
  780. }
  781. void Processor::smp_cleanup_message(ProcessorMessage& msg)
  782. {
  783. switch (msg.type) {
  784. case ProcessorMessage::Callback:
  785. msg.callback_value().~Function();
  786. break;
  787. default:
  788. break;
  789. }
  790. }
  791. bool Processor::smp_process_pending_messages()
  792. {
  793. VERIFY(s_smp_enabled);
  794. bool did_process = false;
  795. enter_critical();
  796. if (auto pending_msgs = m_message_queue.exchange(nullptr, AK::MemoryOrder::memory_order_acq_rel)) {
  797. // We pulled the stack of pending messages in LIFO order, so we need to reverse the list first
  798. auto reverse_list =
  799. [](ProcessorMessageEntry* list) -> ProcessorMessageEntry* {
  800. ProcessorMessageEntry* rev_list = nullptr;
  801. while (list) {
  802. auto next = list->next;
  803. list->next = rev_list;
  804. rev_list = list;
  805. list = next;
  806. }
  807. return rev_list;
  808. };
  809. pending_msgs = reverse_list(pending_msgs);
  810. // now process in the right order
  811. ProcessorMessageEntry* next_msg;
  812. for (auto cur_msg = pending_msgs; cur_msg; cur_msg = next_msg) {
  813. next_msg = cur_msg->next;
  814. auto msg = cur_msg->msg;
  815. dbgln_if(SMP_DEBUG, "SMP[{}]: Processing message {}", current_id(), VirtualAddress(msg));
  816. switch (msg->type) {
  817. case ProcessorMessage::Callback:
  818. msg->invoke_callback();
  819. break;
  820. case ProcessorMessage::FlushTlb:
  821. if (Memory::is_user_address(VirtualAddress(msg->flush_tlb.ptr))) {
  822. // We assume that we don't cross into kernel land!
  823. VERIFY(Memory::is_user_range(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count * PAGE_SIZE));
  824. if (read_cr3() != msg->flush_tlb.page_directory->cr3()) {
  825. // This processor isn't using this page directory right now, we can ignore this request
  826. dbgln_if(SMP_DEBUG, "SMP[{}]: No need to flush {} pages at {}", current_id(), msg->flush_tlb.page_count, VirtualAddress(msg->flush_tlb.ptr));
  827. break;
  828. }
  829. }
  830. flush_tlb_local(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count);
  831. break;
  832. }
  833. bool is_async = msg->async; // Need to cache this value *before* dropping the ref count!
  834. auto prev_refs = msg->refs.fetch_sub(1u, AK::MemoryOrder::memory_order_acq_rel);
  835. VERIFY(prev_refs != 0);
  836. if (prev_refs == 1) {
  837. // All processors handled this. If this is an async message,
  838. // we need to clean it up and return it to the pool
  839. if (is_async) {
  840. smp_cleanup_message(*msg);
  841. smp_return_to_pool(*msg);
  842. }
  843. }
  844. if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed))
  845. halt_this();
  846. }
  847. did_process = true;
  848. } else if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed)) {
  849. halt_this();
  850. }
  851. leave_critical();
  852. return did_process;
  853. }
  854. bool Processor::smp_enqueue_message(ProcessorMessage& msg)
  855. {
  856. // Note that it's quite possible that the other processor may pop
  857. // the queue at any given time. We rely on the fact that the messages
  858. // are pooled and never get freed!
  859. auto& msg_entry = msg.per_proc_entries[id()];
  860. VERIFY(msg_entry.msg == &msg);
  861. ProcessorMessageEntry* next = nullptr;
  862. for (;;) {
  863. msg_entry.next = next;
  864. if (m_message_queue.compare_exchange_strong(next, &msg_entry, AK::MemoryOrder::memory_order_acq_rel))
  865. break;
  866. Processor::pause();
  867. }
  868. // If the enqueued message was the only message in the queue when posted,
  869. // we return true. This is used by callers when deciding whether to generate an IPI.
  870. return next == nullptr;
  871. }
  872. void Processor::smp_broadcast_message(ProcessorMessage& msg)
  873. {
  874. auto& current_processor = Processor::current();
  875. dbgln_if(SMP_DEBUG, "SMP[{}]: Broadcast message {} to cpus: {} processor: {}", current_processor.id(), VirtualAddress(&msg), count(), VirtualAddress(&current_processor));
  876. msg.refs.store(count() - 1, AK::MemoryOrder::memory_order_release);
  877. VERIFY(msg.refs > 0);
  878. bool need_broadcast = false;
  879. for_each(
  880. [&](Processor& proc) {
  881. if (&proc != &current_processor) {
  882. if (proc.smp_enqueue_message(msg))
  883. need_broadcast = true;
  884. }
  885. });
  886. // Now trigger an IPI on all other APs (unless all targets already had messages queued)
  887. if (need_broadcast)
  888. APIC::the().broadcast_ipi();
  889. }
  890. void Processor::smp_broadcast_wait_sync(ProcessorMessage& msg)
  891. {
  892. auto& cur_proc = Processor::current();
  893. VERIFY(!msg.async);
  894. // If synchronous then we must cleanup and return the message back
  895. // to the pool. Otherwise, the last processor to complete it will return it
  896. while (msg.refs.load(AK::MemoryOrder::memory_order_consume) != 0) {
  897. Processor::pause();
  898. // We need to process any messages that may have been sent to
  899. // us while we're waiting. This also checks if another processor
  900. // may have requested us to halt.
  901. cur_proc.smp_process_pending_messages();
  902. }
  903. smp_cleanup_message(msg);
  904. smp_return_to_pool(msg);
  905. }
  906. void Processor::smp_unicast_message(u32 cpu, ProcessorMessage& msg, bool async)
  907. {
  908. auto& current_processor = Processor::current();
  909. VERIFY(cpu != current_processor.id());
  910. auto& target_processor = processors()[cpu];
  911. msg.async = async;
  912. dbgln_if(SMP_DEBUG, "SMP[{}]: Send message {} to cpu #{} processor: {}", current_processor.id(), VirtualAddress(&msg), cpu, VirtualAddress(&target_processor));
  913. msg.refs.store(1u, AK::MemoryOrder::memory_order_release);
  914. if (target_processor->smp_enqueue_message(msg)) {
  915. APIC::the().send_ipi(cpu);
  916. }
  917. if (!async) {
  918. // If synchronous then we must cleanup and return the message back
  919. // to the pool. Otherwise, the last processor to complete it will return it
  920. while (msg.refs.load(AK::MemoryOrder::memory_order_consume) != 0) {
  921. Processor::pause();
  922. // We need to process any messages that may have been sent to
  923. // us while we're waiting. This also checks if another processor
  924. // may have requested us to halt.
  925. current_processor.smp_process_pending_messages();
  926. }
  927. smp_cleanup_message(msg);
  928. smp_return_to_pool(msg);
  929. }
  930. }
  931. void Processor::smp_unicast(u32 cpu, Function<void()> callback, bool async)
  932. {
  933. auto& msg = smp_get_from_pool();
  934. msg.type = ProcessorMessage::Callback;
  935. new (msg.callback_storage) ProcessorMessage::CallbackFunction(move(callback));
  936. smp_unicast_message(cpu, msg, async);
  937. }
  938. void Processor::smp_broadcast_flush_tlb(Memory::PageDirectory const* page_directory, VirtualAddress vaddr, size_t page_count)
  939. {
  940. auto& msg = smp_get_from_pool();
  941. msg.async = false;
  942. msg.type = ProcessorMessage::FlushTlb;
  943. msg.flush_tlb.page_directory = page_directory;
  944. msg.flush_tlb.ptr = vaddr.as_ptr();
  945. msg.flush_tlb.page_count = page_count;
  946. smp_broadcast_message(msg);
  947. // While the other processors handle this request, we'll flush ours
  948. flush_tlb_local(vaddr, page_count);
  949. // Now wait until everybody is done as well
  950. smp_broadcast_wait_sync(msg);
  951. }
  952. void Processor::smp_broadcast_halt()
  953. {
  954. // We don't want to use a message, because this could have been triggered
  955. // by being out of memory and we might not be able to get a message
  956. for_each(
  957. [&](Processor& proc) {
  958. proc.m_halt_requested.store(true, AK::MemoryOrder::memory_order_release);
  959. });
  960. // Now trigger an IPI on all other APs
  961. APIC::the().broadcast_ipi();
  962. }
  963. void Processor::Processor::halt()
  964. {
  965. if (s_smp_enabled)
  966. smp_broadcast_halt();
  967. halt_this();
  968. }
  969. UNMAP_AFTER_INIT void Processor::deferred_call_pool_init()
  970. {
  971. size_t pool_count = sizeof(m_deferred_call_pool) / sizeof(m_deferred_call_pool[0]);
  972. for (size_t i = 0; i < pool_count; i++) {
  973. auto& entry = m_deferred_call_pool[i];
  974. entry.next = i < pool_count - 1 ? &m_deferred_call_pool[i + 1] : nullptr;
  975. new (entry.handler_storage) DeferredCallEntry::HandlerFunction;
  976. entry.was_allocated = false;
  977. }
  978. m_pending_deferred_calls = nullptr;
  979. m_free_deferred_call_pool_entry = &m_deferred_call_pool[0];
  980. }
  981. void Processor::deferred_call_return_to_pool(DeferredCallEntry* entry)
  982. {
  983. VERIFY(m_in_critical);
  984. VERIFY(!entry->was_allocated);
  985. entry->handler_value() = {};
  986. entry->next = m_free_deferred_call_pool_entry;
  987. m_free_deferred_call_pool_entry = entry;
  988. }
  989. DeferredCallEntry* Processor::deferred_call_get_free()
  990. {
  991. VERIFY(m_in_critical);
  992. if (m_free_deferred_call_pool_entry) {
  993. // Fast path, we have an entry in our pool
  994. auto* entry = m_free_deferred_call_pool_entry;
  995. m_free_deferred_call_pool_entry = entry->next;
  996. VERIFY(!entry->was_allocated);
  997. return entry;
  998. }
  999. auto* entry = new DeferredCallEntry;
  1000. new (entry->handler_storage) DeferredCallEntry::HandlerFunction;
  1001. entry->was_allocated = true;
  1002. return entry;
  1003. }
  1004. void Processor::deferred_call_execute_pending()
  1005. {
  1006. VERIFY(m_in_critical);
  1007. if (!m_pending_deferred_calls)
  1008. return;
  1009. auto* pending_list = m_pending_deferred_calls;
  1010. m_pending_deferred_calls = nullptr;
  1011. // We pulled the stack of pending deferred calls in LIFO order, so we need to reverse the list first
  1012. auto reverse_list =
  1013. [](DeferredCallEntry* list) -> DeferredCallEntry* {
  1014. DeferredCallEntry* rev_list = nullptr;
  1015. while (list) {
  1016. auto next = list->next;
  1017. list->next = rev_list;
  1018. rev_list = list;
  1019. list = next;
  1020. }
  1021. return rev_list;
  1022. };
  1023. pending_list = reverse_list(pending_list);
  1024. do {
  1025. pending_list->invoke_handler();
  1026. // Return the entry back to the pool, or free it
  1027. auto* next = pending_list->next;
  1028. if (pending_list->was_allocated) {
  1029. pending_list->handler_value().~Function();
  1030. delete pending_list;
  1031. } else
  1032. deferred_call_return_to_pool(pending_list);
  1033. pending_list = next;
  1034. } while (pending_list);
  1035. }
  1036. void Processor::deferred_call_queue_entry(DeferredCallEntry* entry)
  1037. {
  1038. VERIFY(m_in_critical);
  1039. entry->next = m_pending_deferred_calls;
  1040. m_pending_deferred_calls = entry;
  1041. }
  1042. void Processor::deferred_call_queue(Function<void()> callback)
  1043. {
  1044. // NOTE: If we are called outside of a critical section and outside
  1045. // of an irq handler, the function will be executed before we return!
  1046. ScopedCritical critical;
  1047. auto& cur_proc = Processor::current();
  1048. auto* entry = cur_proc.deferred_call_get_free();
  1049. entry->handler_value() = move(callback);
  1050. cur_proc.deferred_call_queue_entry(entry);
  1051. }
  1052. UNMAP_AFTER_INIT void Processor::gdt_init()
  1053. {
  1054. m_gdt_length = 0;
  1055. m_gdtr.address = nullptr;
  1056. m_gdtr.limit = 0;
  1057. write_raw_gdt_entry(0x0000, 0x00000000, 0x00000000);
  1058. #if ARCH(I386)
  1059. write_raw_gdt_entry(GDT_SELECTOR_CODE0, 0x0000ffff, 0x00cf9a00); // code0
  1060. write_raw_gdt_entry(GDT_SELECTOR_DATA0, 0x0000ffff, 0x00cf9200); // data0
  1061. write_raw_gdt_entry(GDT_SELECTOR_CODE3, 0x0000ffff, 0x00cffa00); // code3
  1062. write_raw_gdt_entry(GDT_SELECTOR_DATA3, 0x0000ffff, 0x00cff200); // data3
  1063. #else
  1064. write_raw_gdt_entry(GDT_SELECTOR_CODE0, 0x0000ffff, 0x00af9a00); // code0
  1065. write_raw_gdt_entry(GDT_SELECTOR_DATA0, 0x0000ffff, 0x00af9200); // data0
  1066. write_raw_gdt_entry(GDT_SELECTOR_DATA3, 0x0000ffff, 0x008ff200); // data3
  1067. write_raw_gdt_entry(GDT_SELECTOR_CODE3, 0x0000ffff, 0x00affa00); // code3
  1068. #endif
  1069. #if ARCH(I386)
  1070. Descriptor tls_descriptor {};
  1071. tls_descriptor.low = tls_descriptor.high = 0;
  1072. tls_descriptor.dpl = 3;
  1073. tls_descriptor.segment_present = 1;
  1074. tls_descriptor.granularity = 0;
  1075. tls_descriptor.operation_size64 = 0;
  1076. tls_descriptor.operation_size32 = 1;
  1077. tls_descriptor.descriptor_type = 1;
  1078. tls_descriptor.type = 2;
  1079. write_gdt_entry(GDT_SELECTOR_TLS, tls_descriptor); // tls3
  1080. Descriptor gs_descriptor {};
  1081. gs_descriptor.set_base(VirtualAddress { this });
  1082. gs_descriptor.set_limit(sizeof(Processor) - 1);
  1083. gs_descriptor.dpl = 0;
  1084. gs_descriptor.segment_present = 1;
  1085. gs_descriptor.granularity = 0;
  1086. gs_descriptor.operation_size64 = 0;
  1087. gs_descriptor.operation_size32 = 1;
  1088. gs_descriptor.descriptor_type = 1;
  1089. gs_descriptor.type = 2;
  1090. write_gdt_entry(GDT_SELECTOR_PROC, gs_descriptor); // gs0
  1091. #endif
  1092. Descriptor tss_descriptor {};
  1093. tss_descriptor.set_base(VirtualAddress { (size_t)&m_tss & 0xffffffff });
  1094. tss_descriptor.set_limit(sizeof(TSS) - 1);
  1095. tss_descriptor.dpl = 0;
  1096. tss_descriptor.segment_present = 1;
  1097. tss_descriptor.granularity = 0;
  1098. tss_descriptor.operation_size64 = 0;
  1099. tss_descriptor.operation_size32 = 1;
  1100. tss_descriptor.descriptor_type = 0;
  1101. tss_descriptor.type = Descriptor::SystemType::AvailableTSS;
  1102. write_gdt_entry(GDT_SELECTOR_TSS, tss_descriptor); // tss
  1103. #if ARCH(X86_64)
  1104. Descriptor tss_descriptor_part2 {};
  1105. tss_descriptor_part2.low = (size_t)&m_tss >> 32;
  1106. write_gdt_entry(GDT_SELECTOR_TSS_PART2, tss_descriptor_part2);
  1107. #endif
  1108. flush_gdt();
  1109. load_task_register(GDT_SELECTOR_TSS);
  1110. #if ARCH(X86_64)
  1111. MSR gs_base(MSR_GS_BASE);
  1112. gs_base.set((u64)this);
  1113. #else
  1114. asm volatile(
  1115. "mov %%ax, %%ds\n"
  1116. "mov %%ax, %%es\n"
  1117. "mov %%ax, %%fs\n"
  1118. "mov %%ax, %%ss\n" ::"a"(GDT_SELECTOR_DATA0)
  1119. : "memory");
  1120. set_gs(GDT_SELECTOR_PROC);
  1121. #endif
  1122. #if ARCH(I386)
  1123. // Make sure CS points to the kernel code descriptor.
  1124. // clang-format off
  1125. asm volatile(
  1126. "ljmpl $" __STRINGIFY(GDT_SELECTOR_CODE0) ", $sanity\n"
  1127. "sanity:\n");
  1128. // clang-format on
  1129. #endif
  1130. }
  1131. extern "C" void context_first_init([[maybe_unused]] Thread* from_thread, [[maybe_unused]] Thread* to_thread, [[maybe_unused]] TrapFrame* trap)
  1132. {
  1133. VERIFY(!are_interrupts_enabled());
  1134. VERIFY(is_kernel_mode());
  1135. dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context <-- from {} {} to {} {} (context_first_init)", VirtualAddress(from_thread), *from_thread, VirtualAddress(to_thread), *to_thread);
  1136. VERIFY(to_thread == Thread::current());
  1137. Scheduler::enter_current(*from_thread);
  1138. auto in_critical = to_thread->saved_critical();
  1139. VERIFY(in_critical > 0);
  1140. Processor::restore_in_critical(in_critical);
  1141. // Since we got here and don't have Scheduler::context_switch in the
  1142. // call stack (because this is the first time we switched into this
  1143. // context), we need to notify the scheduler so that it can release
  1144. // the scheduler lock. We don't want to enable interrupts at this point
  1145. // as we're still in the middle of a context switch. Doing so could
  1146. // trigger a context switch within a context switch, leading to a crash.
  1147. FlatPtr flags = trap->regs->flags();
  1148. Scheduler::leave_on_first_switch(flags & ~0x200);
  1149. }
  1150. extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread)
  1151. {
  1152. VERIFY(from_thread == to_thread || from_thread->state() != Thread::State::Running);
  1153. VERIFY(to_thread->state() == Thread::State::Running);
  1154. bool has_fxsr = Processor::current().has_feature(CPUFeature::FXSR);
  1155. Processor::set_current_thread(*to_thread);
  1156. auto& from_regs = from_thread->regs();
  1157. auto& to_regs = to_thread->regs();
  1158. // NOTE: IOPL should never be non-zero in any situation, so let's panic immediately
  1159. // instead of carrying on with elevated I/O privileges.
  1160. VERIFY(get_iopl_from_eflags(to_regs.flags()) == 0);
  1161. if (has_fxsr)
  1162. asm volatile("fxsave %0"
  1163. : "=m"(from_thread->fpu_state()));
  1164. else
  1165. asm volatile("fnsave %0"
  1166. : "=m"(from_thread->fpu_state()));
  1167. #if ARCH(I386)
  1168. from_regs.fs = get_fs();
  1169. from_regs.gs = get_gs();
  1170. set_fs(to_regs.fs);
  1171. set_gs(to_regs.gs);
  1172. #endif
  1173. if (from_thread->process().is_traced())
  1174. read_debug_registers_into(from_thread->debug_register_state());
  1175. if (to_thread->process().is_traced()) {
  1176. write_debug_registers_from(to_thread->debug_register_state());
  1177. } else {
  1178. clear_debug_registers();
  1179. }
  1180. auto& processor = Processor::current();
  1181. #if ARCH(I386)
  1182. auto& tls_descriptor = processor.get_gdt_entry(GDT_SELECTOR_TLS);
  1183. tls_descriptor.set_base(to_thread->thread_specific_data());
  1184. tls_descriptor.set_limit(to_thread->thread_specific_region_size());
  1185. #else
  1186. MSR fs_base_msr(MSR_FS_BASE);
  1187. fs_base_msr.set(to_thread->thread_specific_data().get());
  1188. #endif
  1189. if (from_regs.cr3 != to_regs.cr3)
  1190. write_cr3(to_regs.cr3);
  1191. to_thread->set_cpu(processor.id());
  1192. auto in_critical = to_thread->saved_critical();
  1193. VERIFY(in_critical > 0);
  1194. Processor::restore_in_critical(in_critical);
  1195. if (has_fxsr)
  1196. asm volatile("fxrstor %0" ::"m"(to_thread->fpu_state()));
  1197. else
  1198. asm volatile("frstor %0" ::"m"(to_thread->fpu_state()));
  1199. }
  1200. extern "C" FlatPtr do_init_context(Thread* thread, u32 flags)
  1201. {
  1202. VERIFY_INTERRUPTS_DISABLED();
  1203. thread->regs().set_flags(flags);
  1204. return Processor::current().init_context(*thread, true);
  1205. }
  1206. void Processor::assume_context(Thread& thread, FlatPtr flags)
  1207. {
  1208. dbgln_if(CONTEXT_SWITCH_DEBUG, "Assume context for thread {} {}", VirtualAddress(&thread), thread);
  1209. VERIFY_INTERRUPTS_DISABLED();
  1210. Scheduler::prepare_after_exec();
  1211. // in_critical() should be 2 here. The critical section in Process::exec
  1212. // and then the scheduler lock
  1213. VERIFY(Processor::in_critical() == 2);
  1214. do_assume_context(&thread, flags);
  1215. VERIFY_NOT_REACHED();
  1216. }
  1217. u64 Processor::time_spent_idle() const
  1218. {
  1219. return m_idle_thread->time_in_user() + m_idle_thread->time_in_kernel();
  1220. }
  1221. }