Processor.cpp 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2022, the SerenityOS developers.
  4. *
  5. * SPDX-License-Identifier: BSD-2-Clause
  6. */
  7. #include <AK/BuiltinWrappers.h>
  8. #include <AK/Format.h>
  9. #include <AK/StdLibExtras.h>
  10. #include <AK/StringBuilder.h>
  11. #include <AK/Types.h>
  12. #include <Kernel/Interrupts/APIC.h>
  13. #include <Kernel/Process.h>
  14. #include <Kernel/Scheduler.h>
  15. #include <Kernel/Sections.h>
  16. #include <Kernel/StdLib.h>
  17. #include <Kernel/Thread.h>
  18. #include <Kernel/Arch/Processor.h>
  19. #include <Kernel/Arch/ScopedCritical.h>
  20. #include <Kernel/Arch/x86/CPUID.h>
  21. #include <Kernel/Arch/x86/InterruptDisabler.h>
  22. #include <Kernel/Arch/x86/Interrupts.h>
  23. #include <Kernel/Arch/x86/MSR.h>
  24. #include <Kernel/Arch/x86/ProcessorInfo.h>
  25. #include <Kernel/Arch/x86/SafeMem.h>
  26. #include <Kernel/Arch/x86/TrapFrame.h>
  27. #include <Kernel/Memory/PageDirectory.h>
  28. #include <Kernel/Memory/ScopedAddressSpaceSwitcher.h>
  29. namespace Kernel {
  30. READONLY_AFTER_INIT FPUState Processor::s_clean_fpu_state;
  31. READONLY_AFTER_INIT static ProcessorContainer s_processors {};
  32. READONLY_AFTER_INIT Atomic<u32> Processor::g_total_processors;
  33. READONLY_AFTER_INIT static volatile bool s_smp_enabled;
  34. static Atomic<ProcessorMessage*> s_message_pool;
  35. Atomic<u32> Processor::s_idle_cpu_mask { 0 };
  36. // The compiler can't see the calls to these functions inside assembly.
  37. // Declare them, to avoid dead code warnings.
  38. extern "C" void context_first_init(Thread* from_thread, Thread* to_thread, TrapFrame* trap) __attribute__((used));
  39. extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread) __attribute__((used));
  40. extern "C" FlatPtr do_init_context(Thread* thread, u32 flags) __attribute__((used));
  41. extern "C" void syscall_entry();
  42. bool Processor::is_smp_enabled()
  43. {
  44. return s_smp_enabled;
  45. }
  46. UNMAP_AFTER_INIT static void sse_init()
  47. {
  48. write_cr0((read_cr0() & 0xfffffffbu) | 0x2);
  49. write_cr4(read_cr4() | 0x600);
  50. }
  51. void exit_kernel_thread(void)
  52. {
  53. Thread::current()->exit();
  54. }
  55. UNMAP_AFTER_INIT void Processor::cpu_detect()
  56. {
  57. // NOTE: This is called during Processor::early_initialize, we cannot
  58. // safely log at this point because we don't have kmalloc
  59. // initialized yet!
  60. auto set_feature =
  61. [&](CPUFeature f) {
  62. m_features = static_cast<CPUFeature>(static_cast<u32>(m_features) | static_cast<u32>(f));
  63. };
  64. m_features = static_cast<CPUFeature>(0);
  65. CPUID processor_info(0x1);
  66. if (processor_info.edx() & (1 << 4))
  67. set_feature(CPUFeature::TSC);
  68. if (processor_info.edx() & (1 << 6))
  69. set_feature(CPUFeature::PAE);
  70. if (processor_info.edx() & (1 << 13))
  71. set_feature(CPUFeature::PGE);
  72. if (processor_info.edx() & (1 << 23))
  73. set_feature(CPUFeature::MMX);
  74. if (processor_info.edx() & (1 << 24))
  75. set_feature(CPUFeature::FXSR);
  76. if (processor_info.edx() & (1 << 25))
  77. set_feature(CPUFeature::SSE);
  78. if (processor_info.edx() & (1 << 26))
  79. set_feature(CPUFeature::SSE2);
  80. if (processor_info.ecx() & (1 << 0))
  81. set_feature(CPUFeature::SSE3);
  82. if (processor_info.ecx() & (1 << 9))
  83. set_feature(CPUFeature::SSSE3);
  84. if (processor_info.ecx() & (1 << 19))
  85. set_feature(CPUFeature::SSE4_1);
  86. if (processor_info.ecx() & (1 << 20))
  87. set_feature(CPUFeature::SSE4_2);
  88. if (processor_info.ecx() & (1 << 26))
  89. set_feature(CPUFeature::XSAVE);
  90. if (processor_info.ecx() & (1 << 28))
  91. set_feature(CPUFeature::AVX);
  92. if (processor_info.ecx() & (1 << 30))
  93. set_feature(CPUFeature::RDRAND);
  94. if (processor_info.ecx() & (1u << 31))
  95. set_feature(CPUFeature::HYPERVISOR);
  96. if (processor_info.edx() & (1 << 11)) {
  97. u32 stepping = processor_info.eax() & 0xf;
  98. u32 model = (processor_info.eax() >> 4) & 0xf;
  99. u32 family = (processor_info.eax() >> 8) & 0xf;
  100. if (!(family == 6 && model < 3 && stepping < 3))
  101. set_feature(CPUFeature::SEP);
  102. if ((family == 6 && model >= 3) || (family == 0xf && model >= 0xe))
  103. set_feature(CPUFeature::CONSTANT_TSC);
  104. }
  105. if (processor_info.edx() & (1 << 16))
  106. set_feature(CPUFeature::PAT);
  107. u32 max_extended_leaf = CPUID(0x80000000).eax();
  108. if (max_extended_leaf >= 0x80000001) {
  109. CPUID extended_processor_info(0x80000001);
  110. if (extended_processor_info.edx() & (1 << 20))
  111. set_feature(CPUFeature::NX);
  112. if (extended_processor_info.edx() & (1 << 27))
  113. set_feature(CPUFeature::RDTSCP);
  114. if (extended_processor_info.edx() & (1 << 29))
  115. set_feature(CPUFeature::LM);
  116. if (extended_processor_info.edx() & (1 << 11)) {
  117. // Only available in 64 bit mode
  118. set_feature(CPUFeature::SYSCALL);
  119. }
  120. }
  121. if (max_extended_leaf >= 0x80000007) {
  122. CPUID cpuid(0x80000007);
  123. if (cpuid.edx() & (1 << 8)) {
  124. set_feature(CPUFeature::CONSTANT_TSC);
  125. set_feature(CPUFeature::NONSTOP_TSC);
  126. }
  127. }
  128. #if ARCH(X86_64)
  129. m_has_qemu_hvf_quirk = false;
  130. #endif
  131. if (max_extended_leaf >= 0x80000008) {
  132. // CPUID.80000008H:EAX[7:0] reports the physical-address width supported by the processor.
  133. CPUID cpuid(0x80000008);
  134. m_physical_address_bit_width = cpuid.eax() & 0xff;
  135. // CPUID.80000008H:EAX[15:8] reports the linear-address width supported by the processor.
  136. m_virtual_address_bit_width = (cpuid.eax() >> 8) & 0xff;
  137. } else {
  138. // For processors that do not support CPUID function 80000008H, the width is generally 36 if CPUID.01H:EDX.PAE [bit 6] = 1 and 32 otherwise.
  139. m_physical_address_bit_width = has_feature(CPUFeature::PAE) ? 36 : 32;
  140. // Processors that do not support CPUID function 80000008H, support a linear-address width of 32.
  141. m_virtual_address_bit_width = 32;
  142. #if ARCH(X86_64)
  143. // Workaround QEMU hypervisor.framework bug
  144. // https://gitlab.com/qemu-project/qemu/-/issues/664
  145. //
  146. // We detect this as follows:
  147. // * We're in a hypervisor
  148. // * hypervisor_leaf_range is null under Hypervisor.framework
  149. // * m_physical_address_bit_width is 36 bits
  150. if (has_feature(CPUFeature::HYPERVISOR)) {
  151. CPUID hypervisor_leaf_range(0x40000000);
  152. if (!hypervisor_leaf_range.ebx() && m_physical_address_bit_width == 36) {
  153. m_has_qemu_hvf_quirk = true;
  154. m_virtual_address_bit_width = 48;
  155. }
  156. }
  157. #endif
  158. }
  159. CPUID extended_features(0x7);
  160. if (extended_features.ebx() & (1 << 20))
  161. set_feature(CPUFeature::SMAP);
  162. if (extended_features.ebx() & (1 << 7))
  163. set_feature(CPUFeature::SMEP);
  164. if (extended_features.ecx() & (1 << 2))
  165. set_feature(CPUFeature::UMIP);
  166. if (extended_features.ebx() & (1 << 18))
  167. set_feature(CPUFeature::RDSEED);
  168. }
  169. UNMAP_AFTER_INIT void Processor::cpu_setup()
  170. {
  171. // NOTE: This is called during Processor::early_initialize, we cannot
  172. // safely log at this point because we don't have kmalloc
  173. // initialized yet!
  174. cpu_detect();
  175. if (has_feature(CPUFeature::SSE)) {
  176. // enter_thread_context() assumes that if a x86 CPU supports SSE then it also supports FXSR.
  177. // SSE support without FXSR is an extremely unlikely scenario, so let's be pragmatic about it.
  178. VERIFY(has_feature(CPUFeature::FXSR));
  179. sse_init();
  180. }
  181. write_cr0(read_cr0() | 0x00010000);
  182. if (has_feature(CPUFeature::PGE)) {
  183. // Turn on CR4.PGE so the CPU will respect the G bit in page tables.
  184. write_cr4(read_cr4() | 0x80);
  185. }
  186. if (has_feature(CPUFeature::NX)) {
  187. // Turn on IA32_EFER.NXE
  188. MSR ia32_efer(MSR_IA32_EFER);
  189. ia32_efer.set(ia32_efer.get() | 0x800);
  190. }
  191. if (has_feature(CPUFeature::PAT)) {
  192. MSR ia32_pat(MSR_IA32_PAT);
  193. // Set PA4 to Write Comine. This allows us to
  194. // use this mode by only setting the bit in the PTE
  195. // and leaving all other bits in the upper levels unset,
  196. // which maps to setting bit 3 of the index, resulting
  197. // in the index value 0 or 4.
  198. u64 pat = ia32_pat.get() & ~(0x7ull << 32);
  199. pat |= 0x1ull << 32; // set WC mode for PA4
  200. ia32_pat.set(pat);
  201. }
  202. if (has_feature(CPUFeature::SMEP)) {
  203. // Turn on CR4.SMEP
  204. write_cr4(read_cr4() | 0x100000);
  205. }
  206. if (has_feature(CPUFeature::SMAP)) {
  207. // Turn on CR4.SMAP
  208. write_cr4(read_cr4() | 0x200000);
  209. }
  210. if (has_feature(CPUFeature::UMIP)) {
  211. write_cr4(read_cr4() | 0x800);
  212. }
  213. if (has_feature(CPUFeature::TSC)) {
  214. write_cr4(read_cr4() | 0x4);
  215. }
  216. if (has_feature(CPUFeature::XSAVE)) {
  217. // Turn on CR4.OSXSAVE
  218. write_cr4(read_cr4() | 0x40000);
  219. // According to the Intel manual: "After reset, all bits (except bit 0) in XCR0 are cleared to zero; XCR0[0] is set to 1."
  220. // Sadly we can't trust this, for example VirtualBox starts with bits 0-4 set, so let's do it ourselves.
  221. write_xcr0(0x1);
  222. if (has_feature(CPUFeature::AVX)) {
  223. // Turn on SSE, AVX and x87 flags
  224. write_xcr0(read_xcr0() | 0x7);
  225. }
  226. }
  227. #if ARCH(X86_64)
  228. // x86_64 processors must support the syscall feature.
  229. VERIFY(has_feature(CPUFeature::SYSCALL));
  230. MSR efer_msr(MSR_EFER);
  231. efer_msr.set(efer_msr.get() | 1u);
  232. // Write code and stack selectors to the STAR MSR. The first value stored in bits 63:48 controls the sysret CS (value + 0x10) and SS (value + 0x8),
  233. // and the value stored in bits 47:32 controls the syscall CS (value) and SS (value + 0x8).
  234. u64 star = 0;
  235. star |= 0x13ul << 48u;
  236. star |= 0x08ul << 32u;
  237. MSR star_msr(MSR_STAR);
  238. star_msr.set(star);
  239. // Write the syscall entry point to the LSTAR MSR.
  240. MSR lstar_msr(MSR_LSTAR);
  241. lstar_msr.set(reinterpret_cast<u64>(&syscall_entry));
  242. // Write the SFMASK MSR. This MSR controls which bits of rflags are masked when a syscall instruction is executed -
  243. // if a bit is set in sfmask, the corresponding bit in rflags is cleared. The value set here clears most of rflags,
  244. // but keeps the reserved and virtualization bits intact. The userspace rflags value is saved in r11 by syscall.
  245. constexpr u64 rflags_mask = 0x257fd5u;
  246. MSR sfmask_msr(MSR_SFMASK);
  247. sfmask_msr.set(rflags_mask);
  248. #endif
  249. }
  250. NonnullOwnPtr<KString> Processor::features_string() const
  251. {
  252. StringBuilder builder;
  253. auto feature_to_str =
  254. [](CPUFeature f) -> StringView {
  255. switch (f) {
  256. case CPUFeature::NX:
  257. return "nx"sv;
  258. case CPUFeature::PAE:
  259. return "pae"sv;
  260. case CPUFeature::PGE:
  261. return "pge"sv;
  262. case CPUFeature::RDRAND:
  263. return "rdrand"sv;
  264. case CPUFeature::RDSEED:
  265. return "rdseed"sv;
  266. case CPUFeature::SMAP:
  267. return "smap"sv;
  268. case CPUFeature::SMEP:
  269. return "smep"sv;
  270. case CPUFeature::SSE:
  271. return "sse"sv;
  272. case CPUFeature::TSC:
  273. return "tsc"sv;
  274. case CPUFeature::RDTSCP:
  275. return "rdtscp"sv;
  276. case CPUFeature::CONSTANT_TSC:
  277. return "constant_tsc"sv;
  278. case CPUFeature::NONSTOP_TSC:
  279. return "nonstop_tsc"sv;
  280. case CPUFeature::UMIP:
  281. return "umip"sv;
  282. case CPUFeature::SEP:
  283. return "sep"sv;
  284. case CPUFeature::SYSCALL:
  285. return "syscall"sv;
  286. case CPUFeature::MMX:
  287. return "mmx"sv;
  288. case CPUFeature::FXSR:
  289. return "fxsr"sv;
  290. case CPUFeature::SSE2:
  291. return "sse2"sv;
  292. case CPUFeature::SSE3:
  293. return "sse3"sv;
  294. case CPUFeature::SSSE3:
  295. return "ssse3"sv;
  296. case CPUFeature::SSE4_1:
  297. return "sse4.1"sv;
  298. case CPUFeature::SSE4_2:
  299. return "sse4.2"sv;
  300. case CPUFeature::XSAVE:
  301. return "xsave"sv;
  302. case CPUFeature::AVX:
  303. return "avx"sv;
  304. case CPUFeature::LM:
  305. return "lm"sv;
  306. case CPUFeature::HYPERVISOR:
  307. return "hypervisor"sv;
  308. // no default statement here intentionally so that we get
  309. // a warning if a new feature is forgotten to be added here
  310. case CPUFeature::PAT:
  311. return "pat"sv;
  312. }
  313. // Shouldn't ever happen
  314. return "???"sv;
  315. };
  316. bool first = true;
  317. for (u32 flag = 1; flag != 0; flag <<= 1) {
  318. if ((static_cast<u32>(m_features) & flag) != 0) {
  319. if (first)
  320. first = false;
  321. else
  322. MUST(builder.try_append(' '));
  323. auto str = feature_to_str(static_cast<CPUFeature>(flag));
  324. MUST(builder.try_append(str));
  325. }
  326. }
  327. return KString::must_create(builder.string_view());
  328. }
  329. UNMAP_AFTER_INIT void Processor::early_initialize(u32 cpu)
  330. {
  331. m_self = this;
  332. m_cpu = cpu;
  333. m_in_irq = 0;
  334. m_in_critical = 0;
  335. m_invoke_scheduler_async = false;
  336. m_scheduler_initialized = false;
  337. m_in_scheduler = true;
  338. m_message_queue = nullptr;
  339. m_idle_thread = nullptr;
  340. m_current_thread = nullptr;
  341. m_info = nullptr;
  342. m_halt_requested = false;
  343. if (cpu == 0) {
  344. s_smp_enabled = false;
  345. g_total_processors.store(1u, AK::MemoryOrder::memory_order_release);
  346. } else {
  347. g_total_processors.fetch_add(1u, AK::MemoryOrder::memory_order_acq_rel);
  348. }
  349. deferred_call_pool_init();
  350. cpu_setup();
  351. gdt_init();
  352. VERIFY(is_initialized()); // sanity check
  353. VERIFY(&current() == this); // sanity check
  354. }
  355. UNMAP_AFTER_INIT void Processor::initialize(u32 cpu)
  356. {
  357. VERIFY(m_self == this);
  358. VERIFY(&current() == this); // sanity check
  359. dmesgln("CPU[{}]: Supported features: {}", current_id(), features_string());
  360. if (!has_feature(CPUFeature::RDRAND))
  361. dmesgln("CPU[{}]: No RDRAND support detected, randomness will be poor", current_id());
  362. dmesgln("CPU[{}]: Physical address bit width: {}", current_id(), m_physical_address_bit_width);
  363. dmesgln("CPU[{}]: Virtual address bit width: {}", current_id(), m_virtual_address_bit_width);
  364. #if ARCH(X86_64)
  365. if (m_has_qemu_hvf_quirk)
  366. dmesgln("CPU[{}]: Applied correction for QEMU Hypervisor.framework quirk", current_id());
  367. #endif
  368. if (cpu == 0)
  369. idt_init();
  370. else
  371. flush_idt();
  372. if (cpu == 0) {
  373. VERIFY((FlatPtr(&s_clean_fpu_state) & 0xF) == 0);
  374. asm volatile("fninit");
  375. if (has_feature(CPUFeature::FXSR))
  376. asm volatile("fxsave %0"
  377. : "=m"(s_clean_fpu_state));
  378. else
  379. asm volatile("fnsave %0"
  380. : "=m"(s_clean_fpu_state));
  381. if (has_feature(CPUFeature::HYPERVISOR))
  382. detect_hypervisor();
  383. }
  384. m_info = new ProcessorInfo(*this);
  385. {
  386. // We need to prevent races between APs starting up at the same time
  387. VERIFY(cpu < s_processors.size());
  388. s_processors[cpu] = this;
  389. }
  390. }
  391. UNMAP_AFTER_INIT void Processor::detect_hypervisor()
  392. {
  393. CPUID hypervisor_leaf_range(0x40000000);
  394. // Get signature of hypervisor.
  395. alignas(sizeof(u32)) char hypervisor_signature_buffer[13];
  396. *reinterpret_cast<u32*>(hypervisor_signature_buffer) = hypervisor_leaf_range.ebx();
  397. *reinterpret_cast<u32*>(hypervisor_signature_buffer + 4) = hypervisor_leaf_range.ecx();
  398. *reinterpret_cast<u32*>(hypervisor_signature_buffer + 8) = hypervisor_leaf_range.edx();
  399. hypervisor_signature_buffer[12] = '\0';
  400. StringView hypervisor_signature(hypervisor_signature_buffer);
  401. dmesgln("CPU[{}]: CPUID hypervisor signature '{}' ({:#x} {:#x} {:#x}), max leaf {:#x}", current_id(), hypervisor_signature, hypervisor_leaf_range.ebx(), hypervisor_leaf_range.ecx(), hypervisor_leaf_range.edx(), hypervisor_leaf_range.eax());
  402. if (hypervisor_signature == "Microsoft Hv"sv)
  403. detect_hypervisor_hyperv(hypervisor_leaf_range);
  404. }
  405. UNMAP_AFTER_INIT void Processor::detect_hypervisor_hyperv(CPUID const& hypervisor_leaf_range)
  406. {
  407. if (hypervisor_leaf_range.eax() < 0x40000001)
  408. return;
  409. CPUID hypervisor_interface(0x40000001);
  410. // Get signature of hypervisor interface.
  411. alignas(sizeof(u32)) char interface_signature_buffer[5];
  412. *reinterpret_cast<u32*>(interface_signature_buffer) = hypervisor_interface.eax();
  413. interface_signature_buffer[4] = '\0';
  414. StringView hyperv_interface_signature(interface_signature_buffer);
  415. dmesgln("CPU[{}]: Hyper-V interface signature '{}' ({:#x})", current_id(), hyperv_interface_signature, hypervisor_interface.eax());
  416. if (hypervisor_leaf_range.eax() < 0x40000001)
  417. return;
  418. CPUID hypervisor_sysid(0x40000002);
  419. dmesgln("CPU[{}]: Hyper-V system identity {}.{}, build number {}", current_id(), hypervisor_sysid.ebx() >> 16, hypervisor_sysid.ebx() & 0xFFFF, hypervisor_sysid.eax());
  420. if (hypervisor_leaf_range.eax() < 0x40000005 || hyperv_interface_signature != "Hv#1"sv)
  421. return;
  422. dmesgln("CPU[{}]: Hyper-V hypervisor detected", current_id());
  423. // TODO: Actually do something with Hyper-V.
  424. }
  425. void Processor::write_raw_gdt_entry(u16 selector, u32 low, u32 high)
  426. {
  427. u16 i = (selector & 0xfffc) >> 3;
  428. u32 prev_gdt_length = m_gdt_length;
  429. if (i >= m_gdt_length) {
  430. m_gdt_length = i + 1;
  431. VERIFY(m_gdt_length <= sizeof(m_gdt) / sizeof(m_gdt[0]));
  432. m_gdtr.limit = (m_gdt_length + 1) * 8 - 1;
  433. }
  434. m_gdt[i].low = low;
  435. m_gdt[i].high = high;
  436. // clear selectors we may have skipped
  437. for (auto j = prev_gdt_length; j < i; ++j) {
  438. m_gdt[j].low = 0;
  439. m_gdt[j].high = 0;
  440. }
  441. }
  442. void Processor::write_gdt_entry(u16 selector, Descriptor& descriptor)
  443. {
  444. write_raw_gdt_entry(selector, descriptor.low, descriptor.high);
  445. }
  446. Descriptor& Processor::get_gdt_entry(u16 selector)
  447. {
  448. u16 i = (selector & 0xfffc) >> 3;
  449. return *(Descriptor*)(&m_gdt[i]);
  450. }
  451. void Processor::flush_gdt()
  452. {
  453. m_gdtr.address = m_gdt;
  454. m_gdtr.limit = (m_gdt_length * 8) - 1;
  455. asm volatile("lgdt %0" ::"m"(m_gdtr)
  456. : "memory");
  457. }
  458. const DescriptorTablePointer& Processor::get_gdtr()
  459. {
  460. return m_gdtr;
  461. }
  462. ErrorOr<Vector<FlatPtr, 32>> Processor::capture_stack_trace(Thread& thread, size_t max_frames)
  463. {
  464. FlatPtr frame_ptr = 0, ip = 0;
  465. Vector<FlatPtr, 32> stack_trace;
  466. auto walk_stack = [&](FlatPtr stack_ptr) -> ErrorOr<void> {
  467. constexpr size_t max_stack_frames = 4096;
  468. bool is_walking_userspace_stack = false;
  469. TRY(stack_trace.try_append(ip));
  470. size_t count = 1;
  471. while (stack_ptr && stack_trace.size() < max_stack_frames) {
  472. FlatPtr retaddr;
  473. count++;
  474. if (max_frames != 0 && count > max_frames)
  475. break;
  476. if (!Memory::is_user_address(VirtualAddress { stack_ptr })) {
  477. if (is_walking_userspace_stack) {
  478. dbgln("SHENANIGANS! Userspace stack points back into kernel memory");
  479. break;
  480. }
  481. } else {
  482. is_walking_userspace_stack = true;
  483. }
  484. if (Memory::is_user_range(VirtualAddress(stack_ptr), sizeof(FlatPtr) * 2)) {
  485. if (copy_from_user(&retaddr, &((FlatPtr*)stack_ptr)[1]).is_error() || !retaddr)
  486. break;
  487. TRY(stack_trace.try_append(retaddr));
  488. if (copy_from_user(&stack_ptr, (FlatPtr*)stack_ptr).is_error())
  489. break;
  490. } else {
  491. void* fault_at;
  492. if (!safe_memcpy(&retaddr, &((FlatPtr*)stack_ptr)[1], sizeof(FlatPtr), fault_at) || !retaddr)
  493. break;
  494. TRY(stack_trace.try_append(retaddr));
  495. if (!safe_memcpy(&stack_ptr, (FlatPtr*)stack_ptr, sizeof(FlatPtr), fault_at))
  496. break;
  497. }
  498. }
  499. return {};
  500. };
  501. auto capture_current_thread = [&]() {
  502. frame_ptr = (FlatPtr)__builtin_frame_address(0);
  503. ip = (FlatPtr)__builtin_return_address(0);
  504. return walk_stack(frame_ptr);
  505. };
  506. // Since the thread may be running on another processor, there
  507. // is a chance a context switch may happen while we're trying
  508. // to get it. It also won't be entirely accurate and merely
  509. // reflect the status at the last context switch.
  510. SpinlockLocker lock(g_scheduler_lock);
  511. if (&thread == Processor::current_thread()) {
  512. VERIFY(thread.state() == Thread::State::Running);
  513. // Leave the scheduler lock. If we trigger page faults we may
  514. // need to be preempted. Since this is our own thread it won't
  515. // cause any problems as the stack won't change below this frame.
  516. lock.unlock();
  517. TRY(capture_current_thread());
  518. } else if (thread.is_active()) {
  519. VERIFY(thread.cpu() != Processor::current_id());
  520. // If this is the case, the thread is currently running
  521. // on another processor. We can't trust the kernel stack as
  522. // it may be changing at any time. We need to probably send
  523. // an IPI to that processor, have it walk the stack and wait
  524. // until it returns the data back to us
  525. auto& proc = Processor::current();
  526. ErrorOr<void> result;
  527. smp_unicast(
  528. thread.cpu(),
  529. [&]() {
  530. dbgln("CPU[{}] getting stack for cpu #{}", Processor::current_id(), proc.id());
  531. ScopedAddressSpaceSwitcher switcher(thread.process());
  532. VERIFY(&Processor::current() != &proc);
  533. VERIFY(&thread == Processor::current_thread());
  534. // NOTE: Because the other processor is still holding the
  535. // scheduler lock while waiting for this callback to finish,
  536. // the current thread on the target processor cannot change
  537. // TODO: What to do about page faults here? We might deadlock
  538. // because the other processor is still holding the
  539. // scheduler lock...
  540. result = capture_current_thread();
  541. },
  542. false);
  543. TRY(result);
  544. } else {
  545. switch (thread.state()) {
  546. case Thread::State::Running:
  547. VERIFY_NOT_REACHED(); // should have been handled above
  548. case Thread::State::Runnable:
  549. case Thread::State::Stopped:
  550. case Thread::State::Blocked:
  551. case Thread::State::Dying:
  552. case Thread::State::Dead: {
  553. // We need to retrieve ebp from what was last pushed to the kernel
  554. // stack. Before switching out of that thread, it switch_context
  555. // pushed the callee-saved registers, and the last of them happens
  556. // to be ebp.
  557. ScopedAddressSpaceSwitcher switcher(thread.process());
  558. auto& regs = thread.regs();
  559. auto* stack_top = reinterpret_cast<FlatPtr*>(regs.sp());
  560. if (Memory::is_user_range(VirtualAddress(stack_top), sizeof(FlatPtr))) {
  561. if (copy_from_user(&frame_ptr, &((FlatPtr*)stack_top)[0]).is_error())
  562. frame_ptr = 0;
  563. } else {
  564. void* fault_at;
  565. if (!safe_memcpy(&frame_ptr, &((FlatPtr*)stack_top)[0], sizeof(FlatPtr), fault_at))
  566. frame_ptr = 0;
  567. }
  568. ip = regs.ip();
  569. // TODO: We need to leave the scheduler lock here, but we also
  570. // need to prevent the target thread from being run while
  571. // we walk the stack
  572. lock.unlock();
  573. TRY(walk_stack(frame_ptr));
  574. break;
  575. }
  576. default:
  577. dbgln("Cannot capture stack trace for thread {} in state {}", thread, thread.state_string());
  578. break;
  579. }
  580. }
  581. return stack_trace;
  582. }
  583. ProcessorContainer& Processor::processors()
  584. {
  585. return s_processors;
  586. }
  587. Processor& Processor::by_id(u32 id)
  588. {
  589. return *s_processors[id];
  590. }
  591. void Processor::enter_trap(TrapFrame& trap, bool raise_irq)
  592. {
  593. VERIFY_INTERRUPTS_DISABLED();
  594. VERIFY(&Processor::current() == this);
  595. trap.prev_irq_level = m_in_irq;
  596. if (raise_irq)
  597. m_in_irq++;
  598. auto* current_thread = Processor::current_thread();
  599. if (current_thread) {
  600. auto& current_trap = current_thread->current_trap();
  601. trap.next_trap = current_trap;
  602. current_trap = &trap;
  603. // The cs register of this trap tells us where we will return back to
  604. auto new_previous_mode = ((trap.regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode;
  605. if (current_thread->set_previous_mode(new_previous_mode) && trap.prev_irq_level == 0) {
  606. current_thread->update_time_scheduled(Scheduler::current_time(), new_previous_mode == Thread::PreviousMode::KernelMode, false);
  607. }
  608. } else {
  609. trap.next_trap = nullptr;
  610. }
  611. }
  612. void Processor::exit_trap(TrapFrame& trap)
  613. {
  614. VERIFY_INTERRUPTS_DISABLED();
  615. VERIFY(&Processor::current() == this);
  616. // Temporarily enter a critical section. This is to prevent critical
  617. // sections entered and left within e.g. smp_process_pending_messages
  618. // to trigger a context switch while we're executing this function
  619. // See the comment at the end of the function why we don't use
  620. // ScopedCritical here.
  621. m_in_critical = m_in_critical + 1;
  622. VERIFY(m_in_irq >= trap.prev_irq_level);
  623. m_in_irq = trap.prev_irq_level;
  624. if (s_smp_enabled)
  625. smp_process_pending_messages();
  626. // Process the deferred call queue. Among other things, this ensures
  627. // that any pending thread unblocks happen before we enter the scheduler.
  628. deferred_call_execute_pending();
  629. auto* current_thread = Processor::current_thread();
  630. if (current_thread) {
  631. auto& current_trap = current_thread->current_trap();
  632. current_trap = trap.next_trap;
  633. Thread::PreviousMode new_previous_mode;
  634. if (current_trap) {
  635. VERIFY(current_trap->regs);
  636. // If we have another higher level trap then we probably returned
  637. // from an interrupt or irq handler. The cs register of the
  638. // new/higher level trap tells us what the mode prior to it was
  639. new_previous_mode = ((current_trap->regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode;
  640. } else {
  641. // If we don't have a higher level trap then we're back in user mode.
  642. // Which means that the previous mode prior to being back in user mode was kernel mode
  643. new_previous_mode = Thread::PreviousMode::KernelMode;
  644. }
  645. if (current_thread->set_previous_mode(new_previous_mode))
  646. current_thread->update_time_scheduled(Scheduler::current_time(), true, false);
  647. }
  648. VERIFY_INTERRUPTS_DISABLED();
  649. // Leave the critical section without actually enabling interrupts.
  650. // We don't want context switches to happen until we're explicitly
  651. // triggering a switch in check_invoke_scheduler.
  652. m_in_critical = m_in_critical - 1;
  653. if (!m_in_irq && !m_in_critical)
  654. check_invoke_scheduler();
  655. }
  656. void Processor::check_invoke_scheduler()
  657. {
  658. InterruptDisabler disabler;
  659. VERIFY(!m_in_irq);
  660. VERIFY(!m_in_critical);
  661. VERIFY(&Processor::current() == this);
  662. if (m_invoke_scheduler_async && m_scheduler_initialized) {
  663. m_invoke_scheduler_async = false;
  664. Scheduler::invoke_async();
  665. }
  666. }
  667. void Processor::flush_tlb_local(VirtualAddress vaddr, size_t page_count)
  668. {
  669. auto ptr = vaddr.as_ptr();
  670. while (page_count > 0) {
  671. // clang-format off
  672. asm volatile("invlpg %0"
  673. :
  674. : "m"(*ptr)
  675. : "memory");
  676. // clang-format on
  677. ptr += PAGE_SIZE;
  678. page_count--;
  679. }
  680. }
  681. void Processor::flush_tlb(Memory::PageDirectory const* page_directory, VirtualAddress vaddr, size_t page_count)
  682. {
  683. if (s_smp_enabled && (!Memory::is_user_address(vaddr) || Process::current().thread_count() > 1))
  684. smp_broadcast_flush_tlb(page_directory, vaddr, page_count);
  685. else
  686. flush_tlb_local(vaddr, page_count);
  687. }
  688. void Processor::smp_return_to_pool(ProcessorMessage& msg)
  689. {
  690. ProcessorMessage* next = nullptr;
  691. for (;;) {
  692. msg.next = next;
  693. if (s_message_pool.compare_exchange_strong(next, &msg, AK::MemoryOrder::memory_order_acq_rel))
  694. break;
  695. Processor::pause();
  696. }
  697. }
  698. ProcessorMessage& Processor::smp_get_from_pool()
  699. {
  700. ProcessorMessage* msg;
  701. // The assumption is that messages are never removed from the pool!
  702. for (;;) {
  703. msg = s_message_pool.load(AK::MemoryOrder::memory_order_consume);
  704. if (!msg) {
  705. if (!Processor::current().smp_process_pending_messages()) {
  706. Processor::pause();
  707. }
  708. continue;
  709. }
  710. // If another processor were to use this message in the meanwhile,
  711. // "msg" is still valid (because it never gets freed). We'd detect
  712. // this because the expected value "msg" and pool would
  713. // no longer match, and the compare_exchange will fail. But accessing
  714. // "msg->next" is always safe here.
  715. if (s_message_pool.compare_exchange_strong(msg, msg->next, AK::MemoryOrder::memory_order_acq_rel)) {
  716. // We successfully "popped" this available message
  717. break;
  718. }
  719. }
  720. VERIFY(msg != nullptr);
  721. return *msg;
  722. }
  723. u32 Processor::smp_wake_n_idle_processors(u32 wake_count)
  724. {
  725. VERIFY_INTERRUPTS_DISABLED();
  726. VERIFY(wake_count > 0);
  727. if (!s_smp_enabled)
  728. return 0;
  729. // Wake at most N - 1 processors
  730. if (wake_count >= Processor::count()) {
  731. wake_count = Processor::count() - 1;
  732. VERIFY(wake_count > 0);
  733. }
  734. u32 current_id = Processor::current_id();
  735. u32 did_wake_count = 0;
  736. auto& apic = APIC::the();
  737. while (did_wake_count < wake_count) {
  738. // Try to get a set of idle CPUs and flip them to busy
  739. u32 idle_mask = s_idle_cpu_mask.load(AK::MemoryOrder::memory_order_relaxed) & ~(1u << current_id);
  740. u32 idle_count = popcount(idle_mask);
  741. if (idle_count == 0)
  742. break; // No (more) idle processor available
  743. u32 found_mask = 0;
  744. for (u32 i = 0; i < idle_count; i++) {
  745. u32 cpu = bit_scan_forward(idle_mask) - 1;
  746. idle_mask &= ~(1u << cpu);
  747. found_mask |= 1u << cpu;
  748. }
  749. idle_mask = s_idle_cpu_mask.fetch_and(~found_mask, AK::MemoryOrder::memory_order_acq_rel) & found_mask;
  750. if (idle_mask == 0)
  751. continue; // All of them were flipped to busy, try again
  752. idle_count = popcount(idle_mask);
  753. for (u32 i = 0; i < idle_count; i++) {
  754. u32 cpu = bit_scan_forward(idle_mask) - 1;
  755. idle_mask &= ~(1u << cpu);
  756. // Send an IPI to that CPU to wake it up. There is a possibility
  757. // someone else woke it up as well, or that it woke up due to
  758. // a timer interrupt. But we tried hard to avoid this...
  759. apic.send_ipi(cpu);
  760. did_wake_count++;
  761. }
  762. }
  763. return did_wake_count;
  764. }
  765. UNMAP_AFTER_INIT void Processor::smp_enable()
  766. {
  767. size_t msg_pool_size = Processor::count() * 100u;
  768. size_t msg_entries_cnt = Processor::count();
  769. auto msgs = new ProcessorMessage[msg_pool_size];
  770. auto msg_entries = new ProcessorMessageEntry[msg_pool_size * msg_entries_cnt];
  771. size_t msg_entry_i = 0;
  772. for (size_t i = 0; i < msg_pool_size; i++, msg_entry_i += msg_entries_cnt) {
  773. auto& msg = msgs[i];
  774. msg.next = i < msg_pool_size - 1 ? &msgs[i + 1] : nullptr;
  775. msg.per_proc_entries = &msg_entries[msg_entry_i];
  776. for (size_t k = 0; k < msg_entries_cnt; k++)
  777. msg_entries[msg_entry_i + k].msg = &msg;
  778. }
  779. s_message_pool.store(&msgs[0], AK::MemoryOrder::memory_order_release);
  780. // Start sending IPI messages
  781. s_smp_enabled = true;
  782. }
  783. void Processor::smp_cleanup_message(ProcessorMessage& msg)
  784. {
  785. switch (msg.type) {
  786. case ProcessorMessage::Callback:
  787. msg.callback_value().~Function();
  788. break;
  789. default:
  790. break;
  791. }
  792. }
  793. bool Processor::smp_process_pending_messages()
  794. {
  795. VERIFY(s_smp_enabled);
  796. bool did_process = false;
  797. enter_critical();
  798. if (auto pending_msgs = m_message_queue.exchange(nullptr, AK::MemoryOrder::memory_order_acq_rel)) {
  799. // We pulled the stack of pending messages in LIFO order, so we need to reverse the list first
  800. auto reverse_list =
  801. [](ProcessorMessageEntry* list) -> ProcessorMessageEntry* {
  802. ProcessorMessageEntry* rev_list = nullptr;
  803. while (list) {
  804. auto next = list->next;
  805. list->next = rev_list;
  806. rev_list = list;
  807. list = next;
  808. }
  809. return rev_list;
  810. };
  811. pending_msgs = reverse_list(pending_msgs);
  812. // now process in the right order
  813. ProcessorMessageEntry* next_msg;
  814. for (auto cur_msg = pending_msgs; cur_msg; cur_msg = next_msg) {
  815. next_msg = cur_msg->next;
  816. auto msg = cur_msg->msg;
  817. dbgln_if(SMP_DEBUG, "SMP[{}]: Processing message {}", current_id(), VirtualAddress(msg));
  818. switch (msg->type) {
  819. case ProcessorMessage::Callback:
  820. msg->invoke_callback();
  821. break;
  822. case ProcessorMessage::FlushTlb:
  823. if (Memory::is_user_address(VirtualAddress(msg->flush_tlb.ptr))) {
  824. // We assume that we don't cross into kernel land!
  825. VERIFY(Memory::is_user_range(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count * PAGE_SIZE));
  826. if (read_cr3() != msg->flush_tlb.page_directory->cr3()) {
  827. // This processor isn't using this page directory right now, we can ignore this request
  828. dbgln_if(SMP_DEBUG, "SMP[{}]: No need to flush {} pages at {}", current_id(), msg->flush_tlb.page_count, VirtualAddress(msg->flush_tlb.ptr));
  829. break;
  830. }
  831. }
  832. flush_tlb_local(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count);
  833. break;
  834. }
  835. bool is_async = msg->async; // Need to cache this value *before* dropping the ref count!
  836. auto prev_refs = msg->refs.fetch_sub(1u, AK::MemoryOrder::memory_order_acq_rel);
  837. VERIFY(prev_refs != 0);
  838. if (prev_refs == 1) {
  839. // All processors handled this. If this is an async message,
  840. // we need to clean it up and return it to the pool
  841. if (is_async) {
  842. smp_cleanup_message(*msg);
  843. smp_return_to_pool(*msg);
  844. }
  845. }
  846. if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed))
  847. halt_this();
  848. }
  849. did_process = true;
  850. } else if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed)) {
  851. halt_this();
  852. }
  853. leave_critical();
  854. return did_process;
  855. }
  856. bool Processor::smp_enqueue_message(ProcessorMessage& msg)
  857. {
  858. // Note that it's quite possible that the other processor may pop
  859. // the queue at any given time. We rely on the fact that the messages
  860. // are pooled and never get freed!
  861. auto& msg_entry = msg.per_proc_entries[id()];
  862. VERIFY(msg_entry.msg == &msg);
  863. ProcessorMessageEntry* next = nullptr;
  864. for (;;) {
  865. msg_entry.next = next;
  866. if (m_message_queue.compare_exchange_strong(next, &msg_entry, AK::MemoryOrder::memory_order_acq_rel))
  867. break;
  868. Processor::pause();
  869. }
  870. // If the enqueued message was the only message in the queue when posted,
  871. // we return true. This is used by callers when deciding whether to generate an IPI.
  872. return next == nullptr;
  873. }
  874. void Processor::smp_broadcast_message(ProcessorMessage& msg)
  875. {
  876. auto& current_processor = Processor::current();
  877. dbgln_if(SMP_DEBUG, "SMP[{}]: Broadcast message {} to cpus: {} processor: {}", current_processor.id(), VirtualAddress(&msg), count(), VirtualAddress(&current_processor));
  878. msg.refs.store(count() - 1, AK::MemoryOrder::memory_order_release);
  879. VERIFY(msg.refs > 0);
  880. bool need_broadcast = false;
  881. for_each(
  882. [&](Processor& proc) {
  883. if (&proc != &current_processor) {
  884. if (proc.smp_enqueue_message(msg))
  885. need_broadcast = true;
  886. }
  887. });
  888. // Now trigger an IPI on all other APs (unless all targets already had messages queued)
  889. if (need_broadcast)
  890. APIC::the().broadcast_ipi();
  891. }
  892. void Processor::smp_broadcast_wait_sync(ProcessorMessage& msg)
  893. {
  894. auto& cur_proc = Processor::current();
  895. VERIFY(!msg.async);
  896. // If synchronous then we must cleanup and return the message back
  897. // to the pool. Otherwise, the last processor to complete it will return it
  898. while (msg.refs.load(AK::MemoryOrder::memory_order_consume) != 0) {
  899. Processor::pause();
  900. // We need to process any messages that may have been sent to
  901. // us while we're waiting. This also checks if another processor
  902. // may have requested us to halt.
  903. cur_proc.smp_process_pending_messages();
  904. }
  905. smp_cleanup_message(msg);
  906. smp_return_to_pool(msg);
  907. }
  908. void Processor::smp_unicast_message(u32 cpu, ProcessorMessage& msg, bool async)
  909. {
  910. auto& current_processor = Processor::current();
  911. VERIFY(cpu != current_processor.id());
  912. auto& target_processor = processors()[cpu];
  913. msg.async = async;
  914. dbgln_if(SMP_DEBUG, "SMP[{}]: Send message {} to cpu #{} processor: {}", current_processor.id(), VirtualAddress(&msg), cpu, VirtualAddress(&target_processor));
  915. msg.refs.store(1u, AK::MemoryOrder::memory_order_release);
  916. if (target_processor->smp_enqueue_message(msg)) {
  917. APIC::the().send_ipi(cpu);
  918. }
  919. if (!async) {
  920. // If synchronous then we must cleanup and return the message back
  921. // to the pool. Otherwise, the last processor to complete it will return it
  922. while (msg.refs.load(AK::MemoryOrder::memory_order_consume) != 0) {
  923. Processor::pause();
  924. // We need to process any messages that may have been sent to
  925. // us while we're waiting. This also checks if another processor
  926. // may have requested us to halt.
  927. current_processor.smp_process_pending_messages();
  928. }
  929. smp_cleanup_message(msg);
  930. smp_return_to_pool(msg);
  931. }
  932. }
  933. void Processor::smp_unicast(u32 cpu, Function<void()> callback, bool async)
  934. {
  935. auto& msg = smp_get_from_pool();
  936. msg.type = ProcessorMessage::Callback;
  937. new (msg.callback_storage) ProcessorMessage::CallbackFunction(move(callback));
  938. smp_unicast_message(cpu, msg, async);
  939. }
  940. void Processor::smp_broadcast_flush_tlb(Memory::PageDirectory const* page_directory, VirtualAddress vaddr, size_t page_count)
  941. {
  942. auto& msg = smp_get_from_pool();
  943. msg.async = false;
  944. msg.type = ProcessorMessage::FlushTlb;
  945. msg.flush_tlb.page_directory = page_directory;
  946. msg.flush_tlb.ptr = vaddr.as_ptr();
  947. msg.flush_tlb.page_count = page_count;
  948. smp_broadcast_message(msg);
  949. // While the other processors handle this request, we'll flush ours
  950. flush_tlb_local(vaddr, page_count);
  951. // Now wait until everybody is done as well
  952. smp_broadcast_wait_sync(msg);
  953. }
  954. void Processor::smp_broadcast_halt()
  955. {
  956. // We don't want to use a message, because this could have been triggered
  957. // by being out of memory and we might not be able to get a message
  958. for_each(
  959. [&](Processor& proc) {
  960. proc.m_halt_requested.store(true, AK::MemoryOrder::memory_order_release);
  961. });
  962. // Now trigger an IPI on all other APs
  963. APIC::the().broadcast_ipi();
  964. }
  965. void Processor::Processor::halt()
  966. {
  967. if (s_smp_enabled)
  968. smp_broadcast_halt();
  969. halt_this();
  970. }
  971. UNMAP_AFTER_INIT void Processor::deferred_call_pool_init()
  972. {
  973. size_t pool_count = sizeof(m_deferred_call_pool) / sizeof(m_deferred_call_pool[0]);
  974. for (size_t i = 0; i < pool_count; i++) {
  975. auto& entry = m_deferred_call_pool[i];
  976. entry.next = i < pool_count - 1 ? &m_deferred_call_pool[i + 1] : nullptr;
  977. new (entry.handler_storage) DeferredCallEntry::HandlerFunction;
  978. entry.was_allocated = false;
  979. }
  980. m_pending_deferred_calls = nullptr;
  981. m_free_deferred_call_pool_entry = &m_deferred_call_pool[0];
  982. }
  983. void Processor::deferred_call_return_to_pool(DeferredCallEntry* entry)
  984. {
  985. VERIFY(m_in_critical);
  986. VERIFY(!entry->was_allocated);
  987. entry->handler_value() = {};
  988. entry->next = m_free_deferred_call_pool_entry;
  989. m_free_deferred_call_pool_entry = entry;
  990. }
  991. DeferredCallEntry* Processor::deferred_call_get_free()
  992. {
  993. VERIFY(m_in_critical);
  994. if (m_free_deferred_call_pool_entry) {
  995. // Fast path, we have an entry in our pool
  996. auto* entry = m_free_deferred_call_pool_entry;
  997. m_free_deferred_call_pool_entry = entry->next;
  998. VERIFY(!entry->was_allocated);
  999. return entry;
  1000. }
  1001. auto* entry = new DeferredCallEntry;
  1002. new (entry->handler_storage) DeferredCallEntry::HandlerFunction;
  1003. entry->was_allocated = true;
  1004. return entry;
  1005. }
  1006. void Processor::deferred_call_execute_pending()
  1007. {
  1008. VERIFY(m_in_critical);
  1009. if (!m_pending_deferred_calls)
  1010. return;
  1011. auto* pending_list = m_pending_deferred_calls;
  1012. m_pending_deferred_calls = nullptr;
  1013. // We pulled the stack of pending deferred calls in LIFO order, so we need to reverse the list first
  1014. auto reverse_list =
  1015. [](DeferredCallEntry* list) -> DeferredCallEntry* {
  1016. DeferredCallEntry* rev_list = nullptr;
  1017. while (list) {
  1018. auto next = list->next;
  1019. list->next = rev_list;
  1020. rev_list = list;
  1021. list = next;
  1022. }
  1023. return rev_list;
  1024. };
  1025. pending_list = reverse_list(pending_list);
  1026. do {
  1027. pending_list->invoke_handler();
  1028. // Return the entry back to the pool, or free it
  1029. auto* next = pending_list->next;
  1030. if (pending_list->was_allocated) {
  1031. pending_list->handler_value().~Function();
  1032. delete pending_list;
  1033. } else
  1034. deferred_call_return_to_pool(pending_list);
  1035. pending_list = next;
  1036. } while (pending_list);
  1037. }
  1038. void Processor::deferred_call_queue_entry(DeferredCallEntry* entry)
  1039. {
  1040. VERIFY(m_in_critical);
  1041. entry->next = m_pending_deferred_calls;
  1042. m_pending_deferred_calls = entry;
  1043. }
  1044. void Processor::deferred_call_queue(Function<void()> callback)
  1045. {
  1046. // NOTE: If we are called outside of a critical section and outside
  1047. // of an irq handler, the function will be executed before we return!
  1048. ScopedCritical critical;
  1049. auto& cur_proc = Processor::current();
  1050. auto* entry = cur_proc.deferred_call_get_free();
  1051. entry->handler_value() = move(callback);
  1052. cur_proc.deferred_call_queue_entry(entry);
  1053. }
  1054. UNMAP_AFTER_INIT void Processor::gdt_init()
  1055. {
  1056. m_gdt_length = 0;
  1057. m_gdtr.address = nullptr;
  1058. m_gdtr.limit = 0;
  1059. write_raw_gdt_entry(0x0000, 0x00000000, 0x00000000);
  1060. #if ARCH(I386)
  1061. write_raw_gdt_entry(GDT_SELECTOR_CODE0, 0x0000ffff, 0x00cf9a00); // code0
  1062. write_raw_gdt_entry(GDT_SELECTOR_DATA0, 0x0000ffff, 0x00cf9200); // data0
  1063. write_raw_gdt_entry(GDT_SELECTOR_CODE3, 0x0000ffff, 0x00cffa00); // code3
  1064. write_raw_gdt_entry(GDT_SELECTOR_DATA3, 0x0000ffff, 0x00cff200); // data3
  1065. #else
  1066. write_raw_gdt_entry(GDT_SELECTOR_CODE0, 0x0000ffff, 0x00af9a00); // code0
  1067. write_raw_gdt_entry(GDT_SELECTOR_DATA0, 0x0000ffff, 0x00af9200); // data0
  1068. write_raw_gdt_entry(GDT_SELECTOR_DATA3, 0x0000ffff, 0x008ff200); // data3
  1069. write_raw_gdt_entry(GDT_SELECTOR_CODE3, 0x0000ffff, 0x00affa00); // code3
  1070. #endif
  1071. #if ARCH(I386)
  1072. Descriptor tls_descriptor {};
  1073. tls_descriptor.low = tls_descriptor.high = 0;
  1074. tls_descriptor.dpl = 3;
  1075. tls_descriptor.segment_present = 1;
  1076. tls_descriptor.granularity = 0;
  1077. tls_descriptor.operation_size64 = 0;
  1078. tls_descriptor.operation_size32 = 1;
  1079. tls_descriptor.descriptor_type = 1;
  1080. tls_descriptor.type = 2;
  1081. write_gdt_entry(GDT_SELECTOR_TLS, tls_descriptor); // tls3
  1082. Descriptor gs_descriptor {};
  1083. gs_descriptor.set_base(VirtualAddress { this });
  1084. gs_descriptor.set_limit(sizeof(Processor) - 1);
  1085. gs_descriptor.dpl = 0;
  1086. gs_descriptor.segment_present = 1;
  1087. gs_descriptor.granularity = 0;
  1088. gs_descriptor.operation_size64 = 0;
  1089. gs_descriptor.operation_size32 = 1;
  1090. gs_descriptor.descriptor_type = 1;
  1091. gs_descriptor.type = 2;
  1092. write_gdt_entry(GDT_SELECTOR_PROC, gs_descriptor); // gs0
  1093. #endif
  1094. Descriptor tss_descriptor {};
  1095. tss_descriptor.set_base(VirtualAddress { (size_t)&m_tss & 0xffffffff });
  1096. tss_descriptor.set_limit(sizeof(TSS) - 1);
  1097. tss_descriptor.dpl = 0;
  1098. tss_descriptor.segment_present = 1;
  1099. tss_descriptor.granularity = 0;
  1100. tss_descriptor.operation_size64 = 0;
  1101. tss_descriptor.operation_size32 = 1;
  1102. tss_descriptor.descriptor_type = 0;
  1103. tss_descriptor.type = Descriptor::SystemType::AvailableTSS;
  1104. write_gdt_entry(GDT_SELECTOR_TSS, tss_descriptor); // tss
  1105. #if ARCH(X86_64)
  1106. Descriptor tss_descriptor_part2 {};
  1107. tss_descriptor_part2.low = (size_t)&m_tss >> 32;
  1108. write_gdt_entry(GDT_SELECTOR_TSS_PART2, tss_descriptor_part2);
  1109. #endif
  1110. flush_gdt();
  1111. load_task_register(GDT_SELECTOR_TSS);
  1112. #if ARCH(X86_64)
  1113. MSR gs_base(MSR_GS_BASE);
  1114. gs_base.set((u64)this);
  1115. #else
  1116. asm volatile(
  1117. "mov %%ax, %%ds\n"
  1118. "mov %%ax, %%es\n"
  1119. "mov %%ax, %%fs\n"
  1120. "mov %%ax, %%ss\n" ::"a"(GDT_SELECTOR_DATA0)
  1121. : "memory");
  1122. set_gs(GDT_SELECTOR_PROC);
  1123. #endif
  1124. #if ARCH(I386)
  1125. // Make sure CS points to the kernel code descriptor.
  1126. // clang-format off
  1127. asm volatile(
  1128. "ljmpl $" __STRINGIFY(GDT_SELECTOR_CODE0) ", $sanity\n"
  1129. "sanity:\n");
  1130. // clang-format on
  1131. #endif
  1132. }
  1133. extern "C" void context_first_init([[maybe_unused]] Thread* from_thread, [[maybe_unused]] Thread* to_thread, [[maybe_unused]] TrapFrame* trap)
  1134. {
  1135. VERIFY(!are_interrupts_enabled());
  1136. VERIFY(is_kernel_mode());
  1137. dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context <-- from {} {} to {} {} (context_first_init)", VirtualAddress(from_thread), *from_thread, VirtualAddress(to_thread), *to_thread);
  1138. VERIFY(to_thread == Thread::current());
  1139. Scheduler::enter_current(*from_thread);
  1140. auto in_critical = to_thread->saved_critical();
  1141. VERIFY(in_critical > 0);
  1142. Processor::restore_in_critical(in_critical);
  1143. // Since we got here and don't have Scheduler::context_switch in the
  1144. // call stack (because this is the first time we switched into this
  1145. // context), we need to notify the scheduler so that it can release
  1146. // the scheduler lock. We don't want to enable interrupts at this point
  1147. // as we're still in the middle of a context switch. Doing so could
  1148. // trigger a context switch within a context switch, leading to a crash.
  1149. FlatPtr flags = trap->regs->flags();
  1150. Scheduler::leave_on_first_switch(flags & ~0x200);
  1151. }
  1152. extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread)
  1153. {
  1154. VERIFY(from_thread == to_thread || from_thread->state() != Thread::State::Running);
  1155. VERIFY(to_thread->state() == Thread::State::Running);
  1156. bool has_fxsr = Processor::current().has_feature(CPUFeature::FXSR);
  1157. Processor::set_current_thread(*to_thread);
  1158. auto& from_regs = from_thread->regs();
  1159. auto& to_regs = to_thread->regs();
  1160. // NOTE: IOPL should never be non-zero in any situation, so let's panic immediately
  1161. // instead of carrying on with elevated I/O privileges.
  1162. VERIFY(get_iopl_from_eflags(to_regs.flags()) == 0);
  1163. if (has_fxsr)
  1164. asm volatile("fxsave %0"
  1165. : "=m"(from_thread->fpu_state()));
  1166. else
  1167. asm volatile("fnsave %0"
  1168. : "=m"(from_thread->fpu_state()));
  1169. #if ARCH(I386)
  1170. from_regs.fs = get_fs();
  1171. from_regs.gs = get_gs();
  1172. set_fs(to_regs.fs);
  1173. set_gs(to_regs.gs);
  1174. #endif
  1175. if (from_thread->process().is_traced())
  1176. read_debug_registers_into(from_thread->debug_register_state());
  1177. if (to_thread->process().is_traced()) {
  1178. write_debug_registers_from(to_thread->debug_register_state());
  1179. } else {
  1180. clear_debug_registers();
  1181. }
  1182. auto& processor = Processor::current();
  1183. #if ARCH(I386)
  1184. auto& tls_descriptor = processor.get_gdt_entry(GDT_SELECTOR_TLS);
  1185. tls_descriptor.set_base(to_thread->thread_specific_data());
  1186. tls_descriptor.set_limit(to_thread->thread_specific_region_size());
  1187. #else
  1188. MSR fs_base_msr(MSR_FS_BASE);
  1189. fs_base_msr.set(to_thread->thread_specific_data().get());
  1190. #endif
  1191. if (from_regs.cr3 != to_regs.cr3)
  1192. write_cr3(to_regs.cr3);
  1193. to_thread->set_cpu(processor.id());
  1194. auto in_critical = to_thread->saved_critical();
  1195. VERIFY(in_critical > 0);
  1196. Processor::restore_in_critical(in_critical);
  1197. if (has_fxsr)
  1198. asm volatile("fxrstor %0" ::"m"(to_thread->fpu_state()));
  1199. else
  1200. asm volatile("frstor %0" ::"m"(to_thread->fpu_state()));
  1201. }
  1202. extern "C" FlatPtr do_init_context(Thread* thread, u32 flags)
  1203. {
  1204. VERIFY_INTERRUPTS_DISABLED();
  1205. thread->regs().set_flags(flags);
  1206. return Processor::current().init_context(*thread, true);
  1207. }
  1208. void Processor::assume_context(Thread& thread, FlatPtr flags)
  1209. {
  1210. dbgln_if(CONTEXT_SWITCH_DEBUG, "Assume context for thread {} {}", VirtualAddress(&thread), thread);
  1211. VERIFY_INTERRUPTS_DISABLED();
  1212. Scheduler::prepare_after_exec();
  1213. // in_critical() should be 2 here. The critical section in Process::exec
  1214. // and then the scheduler lock
  1215. VERIFY(Processor::in_critical() == 2);
  1216. do_assume_context(&thread, flags);
  1217. VERIFY_NOT_REACHED();
  1218. }
  1219. u64 Processor::time_spent_idle() const
  1220. {
  1221. return m_idle_thread->time_in_user() + m_idle_thread->time_in_kernel();
  1222. }
  1223. }