Processor.cpp 61 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. * Copyright (c) 2022, Linus Groh <linusg@serenityos.org>
  4. * Copyright (c) 2022, the SerenityOS developers.
  5. *
  6. * SPDX-License-Identifier: BSD-2-Clause
  7. */
  8. #include <AK/BuiltinWrappers.h>
  9. #include <AK/Format.h>
  10. #include <AK/StdLibExtras.h>
  11. #include <AK/StringBuilder.h>
  12. #include <AK/Types.h>
  13. #include <Kernel/Interrupts/APIC.h>
  14. #include <Kernel/Process.h>
  15. #include <Kernel/Scheduler.h>
  16. #include <Kernel/Sections.h>
  17. #include <Kernel/StdLib.h>
  18. #include <Kernel/Thread.h>
  19. #include <Kernel/Arch/InterruptDisabler.h>
  20. #include <Kernel/Arch/Interrupts.h>
  21. #include <Kernel/Arch/Processor.h>
  22. #include <Kernel/Arch/SafeMem.h>
  23. #include <Kernel/Arch/ScopedCritical.h>
  24. #include <Kernel/Arch/x86/CPUID.h>
  25. #include <Kernel/Arch/x86/MSR.h>
  26. #include <Kernel/Arch/x86/ProcessorInfo.h>
  27. #include <Kernel/Arch/x86/TrapFrame.h>
  28. #include <Kernel/Memory/PageDirectory.h>
  29. #include <Kernel/Memory/ScopedAddressSpaceSwitcher.h>
  30. namespace Kernel {
  31. READONLY_AFTER_INIT FPUState Processor::s_clean_fpu_state;
  32. READONLY_AFTER_INIT static ProcessorContainer s_processors {};
  33. READONLY_AFTER_INIT Atomic<u32> Processor::g_total_processors;
  34. READONLY_AFTER_INIT static bool volatile s_smp_enabled;
  35. static Atomic<ProcessorMessage*> s_message_pool;
  36. Atomic<u32> Processor::s_idle_cpu_mask { 0 };
  37. // The compiler can't see the calls to these functions inside assembly.
  38. // Declare them, to avoid dead code warnings.
  39. extern "C" void context_first_init(Thread* from_thread, Thread* to_thread, TrapFrame* trap) __attribute__((used));
  40. extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread) __attribute__((used));
  41. extern "C" FlatPtr do_init_context(Thread* thread, u32 flags) __attribute__((used));
  42. extern "C" void syscall_entry();
  43. bool Processor::is_smp_enabled()
  44. {
  45. return s_smp_enabled;
  46. }
  47. UNMAP_AFTER_INIT static void sse_init()
  48. {
  49. write_cr0((read_cr0() & 0xfffffffbu) | 0x2);
  50. write_cr4(read_cr4() | 0x600);
  51. }
  52. void exit_kernel_thread(void)
  53. {
  54. Thread::current()->exit();
  55. }
  56. UNMAP_AFTER_INIT void Processor::cpu_detect()
  57. {
  58. // NOTE: This is called during Processor::early_initialize, we cannot
  59. // safely log at this point because we don't have kmalloc
  60. // initialized yet!
  61. m_features = CPUFeature::Type(0u);
  62. CPUID processor_info(0x1);
  63. auto handle_edx_bit_11_feature = [&] {
  64. u32 stepping = processor_info.eax() & 0xf;
  65. u32 model = (processor_info.eax() >> 4) & 0xf;
  66. u32 family = (processor_info.eax() >> 8) & 0xf;
  67. // FIXME: I have no clue what these mean or where it's from (the Intel manual I've seen just says EDX[11] is SEP).
  68. // If you do, please convert them to constants or add comments!
  69. if (!(family == 6 && model < 3 && stepping < 3))
  70. m_features |= CPUFeature::SEP;
  71. if ((family == 6 && model >= 3) || (family == 0xf && model >= 0xe))
  72. m_features |= CPUFeature::CONSTANT_TSC;
  73. };
  74. if (processor_info.ecx() & (1 << 0))
  75. m_features |= CPUFeature::SSE3;
  76. if (processor_info.ecx() & (1 << 1))
  77. m_features |= CPUFeature::PCLMULQDQ;
  78. if (processor_info.ecx() & (1 << 2))
  79. m_features |= CPUFeature::DTES64;
  80. if (processor_info.ecx() & (1 << 3))
  81. m_features |= CPUFeature::MONITOR;
  82. if (processor_info.ecx() & (1 << 4))
  83. m_features |= CPUFeature::DS_CPL;
  84. if (processor_info.ecx() & (1 << 5))
  85. m_features |= CPUFeature::VMX;
  86. if (processor_info.ecx() & (1 << 6))
  87. m_features |= CPUFeature::SMX;
  88. if (processor_info.ecx() & (1 << 7))
  89. m_features |= CPUFeature::EST;
  90. if (processor_info.ecx() & (1 << 8))
  91. m_features |= CPUFeature::TM2;
  92. if (processor_info.ecx() & (1 << 9))
  93. m_features |= CPUFeature::SSSE3;
  94. if (processor_info.ecx() & (1 << 10))
  95. m_features |= CPUFeature::CNXT_ID;
  96. if (processor_info.ecx() & (1 << 11))
  97. m_features |= CPUFeature::SDBG;
  98. if (processor_info.ecx() & (1 << 12))
  99. m_features |= CPUFeature::FMA;
  100. if (processor_info.ecx() & (1 << 13))
  101. m_features |= CPUFeature::CX16;
  102. if (processor_info.ecx() & (1 << 14))
  103. m_features |= CPUFeature::XTPR;
  104. if (processor_info.ecx() & (1 << 15))
  105. m_features |= CPUFeature::PDCM;
  106. if (processor_info.ecx() & (1 << 17))
  107. m_features |= CPUFeature::PCID;
  108. if (processor_info.ecx() & (1 << 18))
  109. m_features |= CPUFeature::DCA;
  110. if (processor_info.ecx() & (1 << 19))
  111. m_features |= CPUFeature::SSE4_1;
  112. if (processor_info.ecx() & (1 << 20))
  113. m_features |= CPUFeature::SSE4_2;
  114. if (processor_info.ecx() & (1 << 21))
  115. m_features |= CPUFeature::X2APIC;
  116. if (processor_info.ecx() & (1 << 22))
  117. m_features |= CPUFeature::MOVBE;
  118. if (processor_info.ecx() & (1 << 23))
  119. m_features |= CPUFeature::POPCNT;
  120. if (processor_info.ecx() & (1 << 24))
  121. m_features |= CPUFeature::TSC_DEADLINE;
  122. if (processor_info.ecx() & (1 << 25))
  123. m_features |= CPUFeature::AES;
  124. if (processor_info.ecx() & (1 << 26))
  125. m_features |= CPUFeature::XSAVE;
  126. if (processor_info.ecx() & (1 << 27))
  127. m_features |= CPUFeature::OSXSAVE;
  128. if (processor_info.ecx() & (1 << 28))
  129. m_features |= CPUFeature::AVX;
  130. if (processor_info.ecx() & (1 << 29))
  131. m_features |= CPUFeature::F16C;
  132. if (processor_info.ecx() & (1 << 30))
  133. m_features |= CPUFeature::RDRAND;
  134. if (processor_info.ecx() & (1 << 31))
  135. m_features |= CPUFeature::HYPERVISOR;
  136. if (processor_info.edx() & (1 << 0))
  137. m_features |= CPUFeature::FPU;
  138. if (processor_info.edx() & (1 << 1))
  139. m_features |= CPUFeature::VME;
  140. if (processor_info.edx() & (1 << 2))
  141. m_features |= CPUFeature::DE;
  142. if (processor_info.edx() & (1 << 3))
  143. m_features |= CPUFeature::PSE;
  144. if (processor_info.edx() & (1 << 4))
  145. m_features |= CPUFeature::TSC;
  146. if (processor_info.edx() & (1 << 5))
  147. m_features |= CPUFeature::MSR;
  148. if (processor_info.edx() & (1 << 6))
  149. m_features |= CPUFeature::PAE;
  150. if (processor_info.edx() & (1 << 7))
  151. m_features |= CPUFeature::MCE;
  152. if (processor_info.edx() & (1 << 8))
  153. m_features |= CPUFeature::CX8;
  154. if (processor_info.edx() & (1 << 9))
  155. m_features |= CPUFeature::APIC;
  156. if (processor_info.edx() & (1 << 11))
  157. handle_edx_bit_11_feature();
  158. if (processor_info.edx() & (1 << 12))
  159. m_features |= CPUFeature::MTRR;
  160. if (processor_info.edx() & (1 << 13))
  161. m_features |= CPUFeature::PGE;
  162. if (processor_info.edx() & (1 << 14))
  163. m_features |= CPUFeature::MCA;
  164. if (processor_info.edx() & (1 << 15))
  165. m_features |= CPUFeature::CMOV;
  166. if (processor_info.edx() & (1 << 16))
  167. m_features |= CPUFeature::PAT;
  168. if (processor_info.edx() & (1 << 17))
  169. m_features |= CPUFeature::PSE36;
  170. if (processor_info.edx() & (1 << 18))
  171. m_features |= CPUFeature::PSN;
  172. if (processor_info.edx() & (1 << 19))
  173. m_features |= CPUFeature::CLFLUSH;
  174. if (processor_info.edx() & (1 << 21))
  175. m_features |= CPUFeature::DS;
  176. if (processor_info.edx() & (1 << 22))
  177. m_features |= CPUFeature::ACPI;
  178. if (processor_info.edx() & (1 << 23))
  179. m_features |= CPUFeature::MMX;
  180. if (processor_info.edx() & (1 << 24))
  181. m_features |= CPUFeature::FXSR;
  182. if (processor_info.edx() & (1 << 25))
  183. m_features |= CPUFeature::SSE;
  184. if (processor_info.edx() & (1 << 26))
  185. m_features |= CPUFeature::SSE2;
  186. if (processor_info.edx() & (1 << 27))
  187. m_features |= CPUFeature::SS;
  188. if (processor_info.edx() & (1 << 28))
  189. m_features |= CPUFeature::HTT;
  190. if (processor_info.edx() & (1 << 29))
  191. m_features |= CPUFeature::TM;
  192. if (processor_info.edx() & (1 << 30))
  193. m_features |= CPUFeature::IA64;
  194. if (processor_info.edx() & (1 << 31))
  195. m_features |= CPUFeature::PBE;
  196. CPUID extended_features(0x7);
  197. if (extended_features.ebx() & (1 << 0))
  198. m_features |= CPUFeature::FSGSBASE;
  199. if (extended_features.ebx() & (1 << 1))
  200. m_features |= CPUFeature::TSC_ADJUST;
  201. if (extended_features.ebx() & (1 << 2))
  202. m_features |= CPUFeature::SGX;
  203. if (extended_features.ebx() & (1 << 3))
  204. m_features |= CPUFeature::BMI1;
  205. if (extended_features.ebx() & (1 << 4))
  206. m_features |= CPUFeature::HLE;
  207. if (extended_features.ebx() & (1 << 5))
  208. m_features |= CPUFeature::AVX2;
  209. if (extended_features.ebx() & (1 << 6))
  210. m_features |= CPUFeature::FDP_EXCPTN_ONLY;
  211. if (extended_features.ebx() & (1 << 7))
  212. m_features |= CPUFeature::SMEP;
  213. if (extended_features.ebx() & (1 << 8))
  214. m_features |= CPUFeature::BMI2;
  215. if (extended_features.ebx() & (1 << 9))
  216. m_features |= CPUFeature::ERMS;
  217. if (extended_features.ebx() & (1 << 10))
  218. m_features |= CPUFeature::INVPCID;
  219. if (extended_features.ebx() & (1 << 11))
  220. m_features |= CPUFeature::RTM;
  221. if (extended_features.ebx() & (1 << 12))
  222. m_features |= CPUFeature::PQM;
  223. if (extended_features.ebx() & (1 << 13))
  224. m_features |= CPUFeature::ZERO_FCS_FDS;
  225. if (extended_features.ebx() & (1 << 14))
  226. m_features |= CPUFeature::MPX;
  227. if (extended_features.ebx() & (1 << 15))
  228. m_features |= CPUFeature::PQE;
  229. if (extended_features.ebx() & (1 << 16))
  230. m_features |= CPUFeature::AVX512_F;
  231. if (extended_features.ebx() & (1 << 17))
  232. m_features |= CPUFeature::AVX512_DQ;
  233. if (extended_features.ebx() & (1 << 18))
  234. m_features |= CPUFeature::RDSEED;
  235. if (extended_features.ebx() & (1 << 19))
  236. m_features |= CPUFeature::ADX;
  237. if (extended_features.ebx() & (1 << 20))
  238. m_features |= CPUFeature::SMAP;
  239. if (extended_features.ebx() & (1 << 21))
  240. m_features |= CPUFeature::AVX512_IFMA;
  241. if (extended_features.ebx() & (1 << 22))
  242. m_features |= CPUFeature::PCOMMIT;
  243. if (extended_features.ebx() & (1 << 23))
  244. m_features |= CPUFeature::CLFLUSHOPT;
  245. if (extended_features.ebx() & (1 << 24))
  246. m_features |= CPUFeature::CLWB;
  247. if (extended_features.ebx() & (1 << 25))
  248. m_features |= CPUFeature::INTEL_PT;
  249. if (extended_features.ebx() & (1 << 26))
  250. m_features |= CPUFeature::AVX512_PF;
  251. if (extended_features.ebx() & (1 << 27))
  252. m_features |= CPUFeature::AVX512_ER;
  253. if (extended_features.ebx() & (1 << 28))
  254. m_features |= CPUFeature::AVX512_CD;
  255. if (extended_features.ebx() & (1 << 29))
  256. m_features |= CPUFeature::SHA;
  257. if (extended_features.ebx() & (1 << 30))
  258. m_features |= CPUFeature::AVX512_BW;
  259. if (extended_features.ebx() & (1 << 31))
  260. m_features |= CPUFeature::AVX512_VL;
  261. if (extended_features.ecx() & (1 << 0))
  262. m_features |= CPUFeature::PREFETCHWT1;
  263. if (extended_features.ecx() & (1 << 1))
  264. m_features |= CPUFeature::AVX512_VBMI;
  265. if (extended_features.ecx() & (1 << 2))
  266. m_features |= CPUFeature::UMIP;
  267. if (extended_features.ecx() & (1 << 3))
  268. m_features |= CPUFeature::PKU;
  269. if (extended_features.ecx() & (1 << 4))
  270. m_features |= CPUFeature::OSPKE;
  271. if (extended_features.ecx() & (1 << 5))
  272. m_features |= CPUFeature::WAITPKG;
  273. if (extended_features.ecx() & (1 << 6))
  274. m_features |= CPUFeature::AVX512_VBMI2;
  275. if (extended_features.ecx() & (1 << 7))
  276. m_features |= CPUFeature::CET_SS;
  277. if (extended_features.ecx() & (1 << 8))
  278. m_features |= CPUFeature::GFNI;
  279. if (extended_features.ecx() & (1 << 9))
  280. m_features |= CPUFeature::VAES;
  281. if (extended_features.ecx() & (1 << 10))
  282. m_features |= CPUFeature::VPCLMULQDQ;
  283. if (extended_features.ecx() & (1 << 11))
  284. m_features |= CPUFeature::AVX512_VNNI;
  285. if (extended_features.ecx() & (1 << 12))
  286. m_features |= CPUFeature::AVX512_BITALG;
  287. if (extended_features.ecx() & (1 << 13))
  288. m_features |= CPUFeature::TME_EN;
  289. if (extended_features.ecx() & (1 << 14))
  290. m_features |= CPUFeature::AVX512_VPOPCNTDQ;
  291. if (extended_features.ecx() & (1 << 16))
  292. m_features |= CPUFeature::INTEL_5_LEVEL_PAGING;
  293. if (extended_features.ecx() & (1 << 22))
  294. m_features |= CPUFeature::RDPID;
  295. if (extended_features.ecx() & (1 << 23))
  296. m_features |= CPUFeature::KL;
  297. if (extended_features.ecx() & (1 << 25))
  298. m_features |= CPUFeature::CLDEMOTE;
  299. if (extended_features.ecx() & (1 << 27))
  300. m_features |= CPUFeature::MOVDIRI;
  301. if (extended_features.ecx() & (1 << 28))
  302. m_features |= CPUFeature::MOVDIR64B;
  303. if (extended_features.ecx() & (1 << 29))
  304. m_features |= CPUFeature::ENQCMD;
  305. if (extended_features.ecx() & (1 << 30))
  306. m_features |= CPUFeature::SGX_LC;
  307. if (extended_features.ecx() & (1 << 31))
  308. m_features |= CPUFeature::PKS;
  309. if (extended_features.edx() & (1 << 2))
  310. m_features |= CPUFeature::AVX512_4VNNIW;
  311. if (extended_features.edx() & (1 << 3))
  312. m_features |= CPUFeature::AVX512_4FMAPS;
  313. if (extended_features.edx() & (1 << 4))
  314. m_features |= CPUFeature::FSRM;
  315. if (extended_features.edx() & (1 << 8))
  316. m_features |= CPUFeature::AVX512_VP2INTERSECT;
  317. if (extended_features.edx() & (1 << 9))
  318. m_features |= CPUFeature::SRBDS_CTRL;
  319. if (extended_features.edx() & (1 << 10))
  320. m_features |= CPUFeature::MD_CLEAR;
  321. if (extended_features.edx() & (1 << 11))
  322. m_features |= CPUFeature::RTM_ALWAYS_ABORT;
  323. if (extended_features.edx() & (1 << 13))
  324. m_features |= CPUFeature::TSX_FORCE_ABORT;
  325. if (extended_features.edx() & (1 << 14))
  326. m_features |= CPUFeature::SERIALIZE;
  327. if (extended_features.edx() & (1 << 15))
  328. m_features |= CPUFeature::HYBRID;
  329. if (extended_features.edx() & (1 << 16))
  330. m_features |= CPUFeature::TSXLDTRK;
  331. if (extended_features.edx() & (1 << 18))
  332. m_features |= CPUFeature::PCONFIG;
  333. if (extended_features.edx() & (1 << 19))
  334. m_features |= CPUFeature::LBR;
  335. if (extended_features.edx() & (1 << 20))
  336. m_features |= CPUFeature::CET_IBT;
  337. if (extended_features.edx() & (1 << 22))
  338. m_features |= CPUFeature::AMX_BF16;
  339. if (extended_features.edx() & (1 << 23))
  340. m_features |= CPUFeature::AVX512_FP16;
  341. if (extended_features.edx() & (1 << 24))
  342. m_features |= CPUFeature::AMX_TILE;
  343. if (extended_features.edx() & (1 << 25))
  344. m_features |= CPUFeature::AMX_INT8;
  345. if (extended_features.edx() & (1 << 26))
  346. m_features |= CPUFeature::SPEC_CTRL;
  347. if (extended_features.edx() & (1 << 27))
  348. m_features |= CPUFeature::STIBP;
  349. if (extended_features.edx() & (1 << 28))
  350. m_features |= CPUFeature::L1D_FLUSH;
  351. if (extended_features.edx() & (1 << 29))
  352. m_features |= CPUFeature::IA32_ARCH_CAPABILITIES;
  353. if (extended_features.edx() & (1 << 30))
  354. m_features |= CPUFeature::IA32_CORE_CAPABILITIES;
  355. if (extended_features.edx() & (1 << 31))
  356. m_features |= CPUFeature::SSBD;
  357. u32 max_extended_leaf = CPUID(0x80000000).eax();
  358. if (max_extended_leaf >= 0x80000001) {
  359. CPUID extended_processor_info(0x80000001);
  360. if (extended_processor_info.ecx() & (1 << 0))
  361. m_features |= CPUFeature::LAHF_LM;
  362. if (extended_processor_info.ecx() & (1 << 1))
  363. m_features |= CPUFeature::CMP_LEGACY;
  364. if (extended_processor_info.ecx() & (1 << 2))
  365. m_features |= CPUFeature::SVM;
  366. if (extended_processor_info.ecx() & (1 << 3))
  367. m_features |= CPUFeature::EXTAPIC;
  368. if (extended_processor_info.ecx() & (1 << 4))
  369. m_features |= CPUFeature::CR8_LEGACY;
  370. if (extended_processor_info.ecx() & (1 << 5))
  371. m_features |= CPUFeature::ABM;
  372. if (extended_processor_info.ecx() & (1 << 6))
  373. m_features |= CPUFeature::SSE4A;
  374. if (extended_processor_info.ecx() & (1 << 7))
  375. m_features |= CPUFeature::MISALIGNSSE;
  376. if (extended_processor_info.ecx() & (1 << 8))
  377. m_features |= CPUFeature::_3DNOWPREFETCH;
  378. if (extended_processor_info.ecx() & (1 << 9))
  379. m_features |= CPUFeature::OSVW;
  380. if (extended_processor_info.ecx() & (1 << 10))
  381. m_features |= CPUFeature::IBS;
  382. if (extended_processor_info.ecx() & (1 << 11))
  383. m_features |= CPUFeature::XOP;
  384. if (extended_processor_info.ecx() & (1 << 12))
  385. m_features |= CPUFeature::SKINIT;
  386. if (extended_processor_info.ecx() & (1 << 13))
  387. m_features |= CPUFeature::WDT;
  388. if (extended_processor_info.ecx() & (1 << 15))
  389. m_features |= CPUFeature::LWP;
  390. if (extended_processor_info.ecx() & (1 << 16))
  391. m_features |= CPUFeature::FMA4;
  392. if (extended_processor_info.ecx() & (1 << 17))
  393. m_features |= CPUFeature::TCE;
  394. if (extended_processor_info.ecx() & (1 << 19))
  395. m_features |= CPUFeature::NODEID_MSR;
  396. if (extended_processor_info.ecx() & (1 << 21))
  397. m_features |= CPUFeature::TBM;
  398. if (extended_processor_info.ecx() & (1 << 22))
  399. m_features |= CPUFeature::TOPOEXT;
  400. if (extended_processor_info.ecx() & (1 << 23))
  401. m_features |= CPUFeature::PERFCTR_CORE;
  402. if (extended_processor_info.ecx() & (1 << 24))
  403. m_features |= CPUFeature::PERFCTR_NB;
  404. if (extended_processor_info.ecx() & (1 << 26))
  405. m_features |= CPUFeature::DBX;
  406. if (extended_processor_info.ecx() & (1 << 27))
  407. m_features |= CPUFeature::PERFTSC;
  408. if (extended_processor_info.ecx() & (1 << 28))
  409. m_features |= CPUFeature::PCX_L2I;
  410. if (extended_processor_info.edx() & (1 << 11))
  411. m_features |= CPUFeature::SYSCALL; // Only available in 64 bit mode
  412. if (extended_processor_info.edx() & (1 << 19))
  413. m_features |= CPUFeature::MP;
  414. if (extended_processor_info.edx() & (1 << 20))
  415. m_features |= CPUFeature::NX;
  416. if (extended_processor_info.edx() & (1 << 22))
  417. m_features |= CPUFeature::MMXEXT;
  418. if (extended_processor_info.edx() & (1 << 23))
  419. m_features |= CPUFeature::RDTSCP;
  420. if (extended_processor_info.edx() & (1 << 25))
  421. m_features |= CPUFeature::FXSR_OPT;
  422. if (extended_processor_info.edx() & (1 << 26))
  423. m_features |= CPUFeature::PDPE1GB;
  424. if (extended_processor_info.edx() & (1 << 27))
  425. m_features |= CPUFeature::RDTSCP;
  426. if (extended_processor_info.edx() & (1 << 29))
  427. m_features |= CPUFeature::LM;
  428. if (extended_processor_info.edx() & (1 << 30))
  429. m_features |= CPUFeature::_3DNOWEXT;
  430. if (extended_processor_info.edx() & (1 << 31))
  431. m_features |= CPUFeature::_3DNOW;
  432. }
  433. if (max_extended_leaf >= 0x80000007) {
  434. CPUID cpuid(0x80000007);
  435. if (cpuid.edx() & (1 << 8)) {
  436. m_features |= CPUFeature::CONSTANT_TSC;
  437. m_features |= CPUFeature::NONSTOP_TSC;
  438. }
  439. }
  440. #if ARCH(X86_64)
  441. m_has_qemu_hvf_quirk = false;
  442. #endif
  443. if (max_extended_leaf >= 0x80000008) {
  444. // CPUID.80000008H:EAX[7:0] reports the physical-address width supported by the processor.
  445. CPUID cpuid(0x80000008);
  446. m_physical_address_bit_width = cpuid.eax() & 0xff;
  447. // CPUID.80000008H:EAX[15:8] reports the linear-address width supported by the processor.
  448. m_virtual_address_bit_width = (cpuid.eax() >> 8) & 0xff;
  449. } else {
  450. // For processors that do not support CPUID function 80000008H, the width is generally 36 if CPUID.01H:EDX.PAE [bit 6] = 1 and 32 otherwise.
  451. m_physical_address_bit_width = has_feature(CPUFeature::PAE) ? 36 : 32;
  452. // Processors that do not support CPUID function 80000008H, support a linear-address width of 32.
  453. m_virtual_address_bit_width = 32;
  454. #if ARCH(X86_64)
  455. // Workaround QEMU hypervisor.framework bug
  456. // https://gitlab.com/qemu-project/qemu/-/issues/664
  457. //
  458. // We detect this as follows:
  459. // * We're in a hypervisor
  460. // * hypervisor_leaf_range is null under Hypervisor.framework
  461. // * m_physical_address_bit_width is 36 bits
  462. if (has_feature(CPUFeature::HYPERVISOR)) {
  463. CPUID hypervisor_leaf_range(0x40000000);
  464. if (!hypervisor_leaf_range.ebx() && m_physical_address_bit_width == 36) {
  465. m_has_qemu_hvf_quirk = true;
  466. m_virtual_address_bit_width = 48;
  467. }
  468. }
  469. #endif
  470. }
  471. }
  472. UNMAP_AFTER_INIT void Processor::cpu_setup()
  473. {
  474. // NOTE: This is called during Processor::early_initialize, we cannot
  475. // safely log at this point because we don't have kmalloc
  476. // initialized yet!
  477. cpu_detect();
  478. if (has_feature(CPUFeature::SSE)) {
  479. // enter_thread_context() assumes that if a x86 CPU supports SSE then it also supports FXSR.
  480. // SSE support without FXSR is an extremely unlikely scenario, so let's be pragmatic about it.
  481. VERIFY(has_feature(CPUFeature::FXSR));
  482. sse_init();
  483. }
  484. write_cr0(read_cr0() | 0x00010000);
  485. if (has_feature(CPUFeature::PGE)) {
  486. // Turn on CR4.PGE so the CPU will respect the G bit in page tables.
  487. write_cr4(read_cr4() | 0x80);
  488. }
  489. if (has_feature(CPUFeature::NX)) {
  490. // Turn on IA32_EFER.NXE
  491. MSR ia32_efer(MSR_IA32_EFER);
  492. ia32_efer.set(ia32_efer.get() | 0x800);
  493. }
  494. if (has_feature(CPUFeature::PAT)) {
  495. MSR ia32_pat(MSR_IA32_PAT);
  496. // Set PA4 to Write Comine. This allows us to
  497. // use this mode by only setting the bit in the PTE
  498. // and leaving all other bits in the upper levels unset,
  499. // which maps to setting bit 3 of the index, resulting
  500. // in the index value 0 or 4.
  501. u64 pat = ia32_pat.get() & ~(0x7ull << 32);
  502. pat |= 0x1ull << 32; // set WC mode for PA4
  503. ia32_pat.set(pat);
  504. }
  505. if (has_feature(CPUFeature::SMEP)) {
  506. // Turn on CR4.SMEP
  507. write_cr4(read_cr4() | 0x100000);
  508. }
  509. if (has_feature(CPUFeature::SMAP)) {
  510. // Turn on CR4.SMAP
  511. write_cr4(read_cr4() | 0x200000);
  512. }
  513. if (has_feature(CPUFeature::UMIP)) {
  514. write_cr4(read_cr4() | 0x800);
  515. }
  516. if (has_feature(CPUFeature::XSAVE)) {
  517. // Turn on CR4.OSXSAVE
  518. write_cr4(read_cr4() | 0x40000);
  519. // According to the Intel manual: "After reset, all bits (except bit 0) in XCR0 are cleared to zero; XCR0[0] is set to 1."
  520. // Sadly we can't trust this, for example VirtualBox starts with bits 0-4 set, so let's do it ourselves.
  521. write_xcr0(0x1);
  522. if (has_feature(CPUFeature::AVX)) {
  523. // Turn on SSE, AVX and x87 flags
  524. write_xcr0(read_xcr0() | SIMD::StateComponent::AVX | SIMD::StateComponent::SSE | SIMD::StateComponent::X87);
  525. }
  526. }
  527. #if ARCH(X86_64)
  528. // x86_64 processors must support the syscall feature.
  529. VERIFY(has_feature(CPUFeature::SYSCALL));
  530. MSR efer_msr(MSR_EFER);
  531. efer_msr.set(efer_msr.get() | 1u);
  532. // Write code and stack selectors to the STAR MSR. The first value stored in bits 63:48 controls the sysret CS (value + 0x10) and SS (value + 0x8),
  533. // and the value stored in bits 47:32 controls the syscall CS (value) and SS (value + 0x8).
  534. u64 star = 0;
  535. star |= 0x13ul << 48u;
  536. star |= 0x08ul << 32u;
  537. MSR star_msr(MSR_STAR);
  538. star_msr.set(star);
  539. // Write the syscall entry point to the LSTAR MSR.
  540. MSR lstar_msr(MSR_LSTAR);
  541. lstar_msr.set(reinterpret_cast<u64>(&syscall_entry));
  542. // Write the SFMASK MSR. This MSR controls which bits of rflags are masked when a syscall instruction is executed -
  543. // if a bit is set in sfmask, the corresponding bit in rflags is cleared. The value set here clears most of rflags,
  544. // but keeps the reserved and virtualization bits intact. The userspace rflags value is saved in r11 by syscall.
  545. constexpr u64 rflags_mask = 0x257fd5u;
  546. MSR sfmask_msr(MSR_SFMASK);
  547. sfmask_msr.set(rflags_mask);
  548. if (has_feature(CPUFeature::FSGSBASE)) {
  549. // Turn off CR4.FSGSBASE to ensure the current Processor base kernel address is not leaked via
  550. // the RDGSBASE instruction until we implement proper GS swapping at the userspace/kernel boundaries
  551. write_cr4(read_cr4() & ~0x10000);
  552. }
  553. #endif
  554. // Query OS-enabled CPUID features again, and set the flags if needed.
  555. CPUID processor_info(0x1);
  556. if (processor_info.ecx() & (1 << 27))
  557. m_features |= CPUFeature::OSXSAVE;
  558. CPUID extended_features(0x7);
  559. if (extended_features.ecx() & (1 << 4))
  560. m_features |= CPUFeature::OSPKE;
  561. }
  562. UNMAP_AFTER_INIT void Processor::early_initialize(u32 cpu)
  563. {
  564. m_self = this;
  565. m_cpu = cpu;
  566. m_in_irq = 0;
  567. m_in_critical = 0;
  568. m_invoke_scheduler_async = false;
  569. m_scheduler_initialized = false;
  570. m_in_scheduler = true;
  571. m_message_queue = nullptr;
  572. m_idle_thread = nullptr;
  573. m_current_thread = nullptr;
  574. m_info = nullptr;
  575. m_halt_requested = false;
  576. if (cpu == 0) {
  577. s_smp_enabled = false;
  578. g_total_processors.store(1u, AK::MemoryOrder::memory_order_release);
  579. } else {
  580. g_total_processors.fetch_add(1u, AK::MemoryOrder::memory_order_acq_rel);
  581. }
  582. deferred_call_pool_init();
  583. cpu_setup();
  584. gdt_init();
  585. VERIFY(is_initialized()); // sanity check
  586. VERIFY(&current() == this); // sanity check
  587. }
  588. UNMAP_AFTER_INIT void Processor::initialize(u32 cpu)
  589. {
  590. VERIFY(m_self == this);
  591. VERIFY(&current() == this); // sanity check
  592. m_info = new ProcessorInfo(*this);
  593. dmesgln("CPU[{}]: Supported features: {}", current_id(), m_info->features_string());
  594. if (!has_feature(CPUFeature::RDRAND))
  595. dmesgln("CPU[{}]: No RDRAND support detected, randomness will be poor", current_id());
  596. dmesgln("CPU[{}]: Physical address bit width: {}", current_id(), m_physical_address_bit_width);
  597. dmesgln("CPU[{}]: Virtual address bit width: {}", current_id(), m_virtual_address_bit_width);
  598. #if ARCH(X86_64)
  599. if (m_has_qemu_hvf_quirk)
  600. dmesgln("CPU[{}]: Applied correction for QEMU Hypervisor.framework quirk", current_id());
  601. #endif
  602. if (cpu == 0)
  603. initialize_interrupts();
  604. else
  605. flush_idt();
  606. if (cpu == 0) {
  607. VERIFY((FlatPtr(&s_clean_fpu_state) & 0xF) == 0);
  608. asm volatile("fninit");
  609. // Initialize AVX state
  610. if (has_feature(CPUFeature::XSAVE | CPUFeature::AVX)) {
  611. asm volatile("xsave %0\n"
  612. : "=m"(s_clean_fpu_state)
  613. : "a"(static_cast<u32>(SIMD::StateComponent::AVX | SIMD::StateComponent::SSE | SIMD::StateComponent::X87)), "d"(0u));
  614. } else if (has_feature(CPUFeature::FXSR)) {
  615. asm volatile("fxsave %0"
  616. : "=m"(s_clean_fpu_state));
  617. } else {
  618. asm volatile("fnsave %0"
  619. : "=m"(s_clean_fpu_state));
  620. }
  621. if (has_feature(CPUFeature::HYPERVISOR))
  622. detect_hypervisor();
  623. }
  624. {
  625. // We need to prevent races between APs starting up at the same time
  626. VERIFY(cpu < s_processors.size());
  627. s_processors[cpu] = this;
  628. }
  629. }
  630. UNMAP_AFTER_INIT void Processor::detect_hypervisor()
  631. {
  632. CPUID hypervisor_leaf_range(0x40000000);
  633. auto hypervisor_vendor_id_string = m_info->hypervisor_vendor_id_string();
  634. dmesgln("CPU[{}]: CPUID hypervisor signature '{}', max leaf {:#x}", current_id(), hypervisor_vendor_id_string, hypervisor_leaf_range.eax());
  635. if (hypervisor_vendor_id_string == "Microsoft Hv"sv)
  636. detect_hypervisor_hyperv(hypervisor_leaf_range);
  637. }
  638. UNMAP_AFTER_INIT void Processor::detect_hypervisor_hyperv(CPUID const& hypervisor_leaf_range)
  639. {
  640. if (hypervisor_leaf_range.eax() < 0x40000001)
  641. return;
  642. CPUID hypervisor_interface(0x40000001);
  643. // Get signature of hypervisor interface.
  644. alignas(sizeof(u32)) char interface_signature_buffer[5];
  645. *reinterpret_cast<u32*>(interface_signature_buffer) = hypervisor_interface.eax();
  646. interface_signature_buffer[4] = '\0';
  647. StringView hyperv_interface_signature { interface_signature_buffer, strlen(interface_signature_buffer) };
  648. dmesgln("CPU[{}]: Hyper-V interface signature '{}' ({:#x})", current_id(), hyperv_interface_signature, hypervisor_interface.eax());
  649. if (hypervisor_leaf_range.eax() < 0x40000001)
  650. return;
  651. CPUID hypervisor_sysid(0x40000002);
  652. dmesgln("CPU[{}]: Hyper-V system identity {}.{}, build number {}", current_id(), hypervisor_sysid.ebx() >> 16, hypervisor_sysid.ebx() & 0xFFFF, hypervisor_sysid.eax());
  653. if (hypervisor_leaf_range.eax() < 0x40000005 || hyperv_interface_signature != "Hv#1"sv)
  654. return;
  655. dmesgln("CPU[{}]: Hyper-V hypervisor detected", current_id());
  656. // TODO: Actually do something with Hyper-V.
  657. }
  658. void Processor::write_raw_gdt_entry(u16 selector, u32 low, u32 high)
  659. {
  660. u16 i = (selector & 0xfffc) >> 3;
  661. u32 prev_gdt_length = m_gdt_length;
  662. if (i >= m_gdt_length) {
  663. m_gdt_length = i + 1;
  664. VERIFY(m_gdt_length <= sizeof(m_gdt) / sizeof(m_gdt[0]));
  665. m_gdtr.limit = (m_gdt_length + 1) * 8 - 1;
  666. }
  667. m_gdt[i].low = low;
  668. m_gdt[i].high = high;
  669. // clear selectors we may have skipped
  670. for (auto j = prev_gdt_length; j < i; ++j) {
  671. m_gdt[j].low = 0;
  672. m_gdt[j].high = 0;
  673. }
  674. }
  675. void Processor::write_gdt_entry(u16 selector, Descriptor& descriptor)
  676. {
  677. write_raw_gdt_entry(selector, descriptor.low, descriptor.high);
  678. }
  679. Descriptor& Processor::get_gdt_entry(u16 selector)
  680. {
  681. u16 i = (selector & 0xfffc) >> 3;
  682. return *(Descriptor*)(&m_gdt[i]);
  683. }
  684. void Processor::flush_gdt()
  685. {
  686. m_gdtr.address = m_gdt;
  687. m_gdtr.limit = (m_gdt_length * 8) - 1;
  688. asm volatile("lgdt %0" ::"m"(m_gdtr)
  689. : "memory");
  690. }
  691. DescriptorTablePointer const& Processor::get_gdtr()
  692. {
  693. return m_gdtr;
  694. }
  695. ErrorOr<Vector<FlatPtr, 32>> Processor::capture_stack_trace(Thread& thread, size_t max_frames)
  696. {
  697. FlatPtr frame_ptr = 0, ip = 0;
  698. Vector<FlatPtr, 32> stack_trace;
  699. auto walk_stack = [&](FlatPtr stack_ptr) -> ErrorOr<void> {
  700. constexpr size_t max_stack_frames = 4096;
  701. bool is_walking_userspace_stack = false;
  702. TRY(stack_trace.try_append(ip));
  703. size_t count = 1;
  704. while (stack_ptr && stack_trace.size() < max_stack_frames) {
  705. FlatPtr retaddr;
  706. count++;
  707. if (max_frames != 0 && count > max_frames)
  708. break;
  709. if (!Memory::is_user_address(VirtualAddress { stack_ptr })) {
  710. if (is_walking_userspace_stack) {
  711. dbgln("SHENANIGANS! Userspace stack points back into kernel memory");
  712. break;
  713. }
  714. } else {
  715. is_walking_userspace_stack = true;
  716. }
  717. if (Memory::is_user_range(VirtualAddress(stack_ptr), sizeof(FlatPtr) * 2)) {
  718. if (copy_from_user(&retaddr, &((FlatPtr*)stack_ptr)[1]).is_error() || !retaddr)
  719. break;
  720. TRY(stack_trace.try_append(retaddr));
  721. if (copy_from_user(&stack_ptr, (FlatPtr*)stack_ptr).is_error())
  722. break;
  723. } else {
  724. void* fault_at;
  725. if (!safe_memcpy(&retaddr, &((FlatPtr*)stack_ptr)[1], sizeof(FlatPtr), fault_at) || !retaddr)
  726. break;
  727. TRY(stack_trace.try_append(retaddr));
  728. if (!safe_memcpy(&stack_ptr, (FlatPtr*)stack_ptr, sizeof(FlatPtr), fault_at))
  729. break;
  730. }
  731. }
  732. return {};
  733. };
  734. auto capture_current_thread = [&]() {
  735. frame_ptr = (FlatPtr)__builtin_frame_address(0);
  736. ip = (FlatPtr)__builtin_return_address(0);
  737. return walk_stack(frame_ptr);
  738. };
  739. // Since the thread may be running on another processor, there
  740. // is a chance a context switch may happen while we're trying
  741. // to get it. It also won't be entirely accurate and merely
  742. // reflect the status at the last context switch.
  743. SpinlockLocker lock(g_scheduler_lock);
  744. if (&thread == Processor::current_thread()) {
  745. VERIFY(thread.state() == Thread::State::Running);
  746. // Leave the scheduler lock. If we trigger page faults we may
  747. // need to be preempted. Since this is our own thread it won't
  748. // cause any problems as the stack won't change below this frame.
  749. lock.unlock();
  750. TRY(capture_current_thread());
  751. } else if (thread.is_active()) {
  752. VERIFY(thread.cpu() != Processor::current_id());
  753. // If this is the case, the thread is currently running
  754. // on another processor. We can't trust the kernel stack as
  755. // it may be changing at any time. We need to probably send
  756. // an IPI to that processor, have it walk the stack and wait
  757. // until it returns the data back to us
  758. auto& proc = Processor::current();
  759. ErrorOr<void> result;
  760. smp_unicast(
  761. thread.cpu(),
  762. [&]() {
  763. dbgln("CPU[{}] getting stack for cpu #{}", Processor::current_id(), proc.id());
  764. ScopedAddressSpaceSwitcher switcher(thread.process());
  765. VERIFY(&Processor::current() != &proc);
  766. VERIFY(&thread == Processor::current_thread());
  767. // NOTE: Because the other processor is still holding the
  768. // scheduler lock while waiting for this callback to finish,
  769. // the current thread on the target processor cannot change
  770. // TODO: What to do about page faults here? We might deadlock
  771. // because the other processor is still holding the
  772. // scheduler lock...
  773. result = capture_current_thread();
  774. },
  775. false);
  776. TRY(result);
  777. } else {
  778. switch (thread.state()) {
  779. case Thread::State::Running:
  780. VERIFY_NOT_REACHED(); // should have been handled above
  781. case Thread::State::Runnable:
  782. case Thread::State::Stopped:
  783. case Thread::State::Blocked:
  784. case Thread::State::Dying:
  785. case Thread::State::Dead: {
  786. // We need to retrieve ebp from what was last pushed to the kernel
  787. // stack. Before switching out of that thread, it switch_context
  788. // pushed the callee-saved registers, and the last of them happens
  789. // to be ebp.
  790. ScopedAddressSpaceSwitcher switcher(thread.process());
  791. auto& regs = thread.regs();
  792. auto* stack_top = reinterpret_cast<FlatPtr*>(regs.sp());
  793. if (Memory::is_user_range(VirtualAddress(stack_top), sizeof(FlatPtr))) {
  794. if (copy_from_user(&frame_ptr, &((FlatPtr*)stack_top)[0]).is_error())
  795. frame_ptr = 0;
  796. } else {
  797. void* fault_at;
  798. if (!safe_memcpy(&frame_ptr, &((FlatPtr*)stack_top)[0], sizeof(FlatPtr), fault_at))
  799. frame_ptr = 0;
  800. }
  801. ip = regs.ip();
  802. // TODO: We need to leave the scheduler lock here, but we also
  803. // need to prevent the target thread from being run while
  804. // we walk the stack
  805. lock.unlock();
  806. TRY(walk_stack(frame_ptr));
  807. break;
  808. }
  809. default:
  810. dbgln("Cannot capture stack trace for thread {} in state {}", thread, thread.state_string());
  811. break;
  812. }
  813. }
  814. return stack_trace;
  815. }
  816. ProcessorContainer& Processor::processors()
  817. {
  818. return s_processors;
  819. }
  820. Processor& Processor::by_id(u32 id)
  821. {
  822. return *s_processors[id];
  823. }
  824. void Processor::enter_trap(TrapFrame& trap, bool raise_irq)
  825. {
  826. VERIFY_INTERRUPTS_DISABLED();
  827. VERIFY(&Processor::current() == this);
  828. trap.prev_irq_level = m_in_irq;
  829. if (raise_irq)
  830. m_in_irq++;
  831. auto* current_thread = Processor::current_thread();
  832. if (current_thread) {
  833. auto& current_trap = current_thread->current_trap();
  834. trap.next_trap = current_trap;
  835. current_trap = &trap;
  836. // The cs register of this trap tells us where we will return back to
  837. auto new_previous_mode = ((trap.regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode;
  838. if (current_thread->set_previous_mode(new_previous_mode) && trap.prev_irq_level == 0) {
  839. current_thread->update_time_scheduled(Scheduler::current_time(), new_previous_mode == Thread::PreviousMode::KernelMode, false);
  840. }
  841. } else {
  842. trap.next_trap = nullptr;
  843. }
  844. }
  845. void Processor::exit_trap(TrapFrame& trap)
  846. {
  847. VERIFY_INTERRUPTS_DISABLED();
  848. VERIFY(&Processor::current() == this);
  849. // Temporarily enter a critical section. This is to prevent critical
  850. // sections entered and left within e.g. smp_process_pending_messages
  851. // to trigger a context switch while we're executing this function
  852. // See the comment at the end of the function why we don't use
  853. // ScopedCritical here.
  854. m_in_critical = m_in_critical + 1;
  855. VERIFY(m_in_irq >= trap.prev_irq_level);
  856. m_in_irq = trap.prev_irq_level;
  857. if (s_smp_enabled)
  858. smp_process_pending_messages();
  859. // Process the deferred call queue. Among other things, this ensures
  860. // that any pending thread unblocks happen before we enter the scheduler.
  861. deferred_call_execute_pending();
  862. auto* current_thread = Processor::current_thread();
  863. if (current_thread) {
  864. auto& current_trap = current_thread->current_trap();
  865. current_trap = trap.next_trap;
  866. Thread::PreviousMode new_previous_mode;
  867. if (current_trap) {
  868. VERIFY(current_trap->regs);
  869. // If we have another higher level trap then we probably returned
  870. // from an interrupt or irq handler. The cs register of the
  871. // new/higher level trap tells us what the mode prior to it was
  872. new_previous_mode = ((current_trap->regs->cs & 3) != 0) ? Thread::PreviousMode::UserMode : Thread::PreviousMode::KernelMode;
  873. } else {
  874. // If we don't have a higher level trap then we're back in user mode.
  875. // Which means that the previous mode prior to being back in user mode was kernel mode
  876. new_previous_mode = Thread::PreviousMode::KernelMode;
  877. }
  878. if (current_thread->set_previous_mode(new_previous_mode))
  879. current_thread->update_time_scheduled(Scheduler::current_time(), true, false);
  880. }
  881. VERIFY_INTERRUPTS_DISABLED();
  882. // Leave the critical section without actually enabling interrupts.
  883. // We don't want context switches to happen until we're explicitly
  884. // triggering a switch in check_invoke_scheduler.
  885. m_in_critical = m_in_critical - 1;
  886. if (!m_in_irq && !m_in_critical)
  887. check_invoke_scheduler();
  888. }
  889. void Processor::check_invoke_scheduler()
  890. {
  891. InterruptDisabler disabler;
  892. VERIFY(!m_in_irq);
  893. VERIFY(!m_in_critical);
  894. VERIFY(&Processor::current() == this);
  895. if (m_invoke_scheduler_async && m_scheduler_initialized) {
  896. m_invoke_scheduler_async = false;
  897. Scheduler::invoke_async();
  898. }
  899. }
  900. void Processor::flush_tlb_local(VirtualAddress vaddr, size_t page_count)
  901. {
  902. auto ptr = vaddr.as_ptr();
  903. while (page_count > 0) {
  904. // clang-format off
  905. asm volatile("invlpg %0"
  906. :
  907. : "m"(*ptr)
  908. : "memory");
  909. // clang-format on
  910. ptr += PAGE_SIZE;
  911. page_count--;
  912. }
  913. }
  914. void Processor::flush_tlb(Memory::PageDirectory const* page_directory, VirtualAddress vaddr, size_t page_count)
  915. {
  916. if (s_smp_enabled && (!Memory::is_user_address(vaddr) || Process::current().thread_count() > 1))
  917. smp_broadcast_flush_tlb(page_directory, vaddr, page_count);
  918. else
  919. flush_tlb_local(vaddr, page_count);
  920. }
  921. void Processor::smp_return_to_pool(ProcessorMessage& msg)
  922. {
  923. ProcessorMessage* next = nullptr;
  924. for (;;) {
  925. msg.next = next;
  926. if (s_message_pool.compare_exchange_strong(next, &msg, AK::MemoryOrder::memory_order_acq_rel))
  927. break;
  928. Processor::pause();
  929. }
  930. }
  931. ProcessorMessage& Processor::smp_get_from_pool()
  932. {
  933. ProcessorMessage* msg;
  934. // The assumption is that messages are never removed from the pool!
  935. for (;;) {
  936. msg = s_message_pool.load(AK::MemoryOrder::memory_order_consume);
  937. if (!msg) {
  938. if (!Processor::current().smp_process_pending_messages()) {
  939. Processor::pause();
  940. }
  941. continue;
  942. }
  943. // If another processor were to use this message in the meanwhile,
  944. // "msg" is still valid (because it never gets freed). We'd detect
  945. // this because the expected value "msg" and pool would
  946. // no longer match, and the compare_exchange will fail. But accessing
  947. // "msg->next" is always safe here.
  948. if (s_message_pool.compare_exchange_strong(msg, msg->next, AK::MemoryOrder::memory_order_acq_rel)) {
  949. // We successfully "popped" this available message
  950. break;
  951. }
  952. }
  953. VERIFY(msg != nullptr);
  954. return *msg;
  955. }
  956. u32 Processor::smp_wake_n_idle_processors(u32 wake_count)
  957. {
  958. VERIFY_INTERRUPTS_DISABLED();
  959. VERIFY(wake_count > 0);
  960. if (!s_smp_enabled)
  961. return 0;
  962. // Wake at most N - 1 processors
  963. if (wake_count >= Processor::count()) {
  964. wake_count = Processor::count() - 1;
  965. VERIFY(wake_count > 0);
  966. }
  967. u32 current_id = Processor::current_id();
  968. u32 did_wake_count = 0;
  969. auto& apic = APIC::the();
  970. while (did_wake_count < wake_count) {
  971. // Try to get a set of idle CPUs and flip them to busy
  972. u32 idle_mask = s_idle_cpu_mask.load(AK::MemoryOrder::memory_order_relaxed) & ~(1u << current_id);
  973. u32 idle_count = popcount(idle_mask);
  974. if (idle_count == 0)
  975. break; // No (more) idle processor available
  976. u32 found_mask = 0;
  977. for (u32 i = 0; i < idle_count; i++) {
  978. u32 cpu = bit_scan_forward(idle_mask) - 1;
  979. idle_mask &= ~(1u << cpu);
  980. found_mask |= 1u << cpu;
  981. }
  982. idle_mask = s_idle_cpu_mask.fetch_and(~found_mask, AK::MemoryOrder::memory_order_acq_rel) & found_mask;
  983. if (idle_mask == 0)
  984. continue; // All of them were flipped to busy, try again
  985. idle_count = popcount(idle_mask);
  986. for (u32 i = 0; i < idle_count; i++) {
  987. u32 cpu = bit_scan_forward(idle_mask) - 1;
  988. idle_mask &= ~(1u << cpu);
  989. // Send an IPI to that CPU to wake it up. There is a possibility
  990. // someone else woke it up as well, or that it woke up due to
  991. // a timer interrupt. But we tried hard to avoid this...
  992. apic.send_ipi(cpu);
  993. did_wake_count++;
  994. }
  995. }
  996. return did_wake_count;
  997. }
  998. UNMAP_AFTER_INIT void Processor::smp_enable()
  999. {
  1000. size_t msg_pool_size = Processor::count() * 100u;
  1001. size_t msg_entries_cnt = Processor::count();
  1002. auto msgs = new ProcessorMessage[msg_pool_size];
  1003. auto msg_entries = new ProcessorMessageEntry[msg_pool_size * msg_entries_cnt];
  1004. size_t msg_entry_i = 0;
  1005. for (size_t i = 0; i < msg_pool_size; i++, msg_entry_i += msg_entries_cnt) {
  1006. auto& msg = msgs[i];
  1007. msg.next = i < msg_pool_size - 1 ? &msgs[i + 1] : nullptr;
  1008. msg.per_proc_entries = &msg_entries[msg_entry_i];
  1009. for (size_t k = 0; k < msg_entries_cnt; k++)
  1010. msg_entries[msg_entry_i + k].msg = &msg;
  1011. }
  1012. s_message_pool.store(&msgs[0], AK::MemoryOrder::memory_order_release);
  1013. // Start sending IPI messages
  1014. s_smp_enabled = true;
  1015. }
  1016. void Processor::smp_cleanup_message(ProcessorMessage& msg)
  1017. {
  1018. switch (msg.type) {
  1019. case ProcessorMessage::Callback:
  1020. msg.callback_value().~Function();
  1021. break;
  1022. default:
  1023. break;
  1024. }
  1025. }
  1026. bool Processor::smp_process_pending_messages()
  1027. {
  1028. VERIFY(s_smp_enabled);
  1029. bool did_process = false;
  1030. enter_critical();
  1031. if (auto pending_msgs = m_message_queue.exchange(nullptr, AK::MemoryOrder::memory_order_acq_rel)) {
  1032. // We pulled the stack of pending messages in LIFO order, so we need to reverse the list first
  1033. auto reverse_list =
  1034. [](ProcessorMessageEntry* list) -> ProcessorMessageEntry* {
  1035. ProcessorMessageEntry* rev_list = nullptr;
  1036. while (list) {
  1037. auto next = list->next;
  1038. list->next = rev_list;
  1039. rev_list = list;
  1040. list = next;
  1041. }
  1042. return rev_list;
  1043. };
  1044. pending_msgs = reverse_list(pending_msgs);
  1045. // now process in the right order
  1046. ProcessorMessageEntry* next_msg;
  1047. for (auto cur_msg = pending_msgs; cur_msg; cur_msg = next_msg) {
  1048. next_msg = cur_msg->next;
  1049. auto msg = cur_msg->msg;
  1050. dbgln_if(SMP_DEBUG, "SMP[{}]: Processing message {}", current_id(), VirtualAddress(msg));
  1051. switch (msg->type) {
  1052. case ProcessorMessage::Callback:
  1053. msg->invoke_callback();
  1054. break;
  1055. case ProcessorMessage::FlushTlb:
  1056. if (Memory::is_user_address(VirtualAddress(msg->flush_tlb.ptr))) {
  1057. // We assume that we don't cross into kernel land!
  1058. VERIFY(Memory::is_user_range(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count * PAGE_SIZE));
  1059. if (read_cr3() != msg->flush_tlb.page_directory->cr3()) {
  1060. // This processor isn't using this page directory right now, we can ignore this request
  1061. dbgln_if(SMP_DEBUG, "SMP[{}]: No need to flush {} pages at {}", current_id(), msg->flush_tlb.page_count, VirtualAddress(msg->flush_tlb.ptr));
  1062. break;
  1063. }
  1064. }
  1065. flush_tlb_local(VirtualAddress(msg->flush_tlb.ptr), msg->flush_tlb.page_count);
  1066. break;
  1067. }
  1068. bool is_async = msg->async; // Need to cache this value *before* dropping the ref count!
  1069. auto prev_refs = msg->refs.fetch_sub(1u, AK::MemoryOrder::memory_order_acq_rel);
  1070. VERIFY(prev_refs != 0);
  1071. if (prev_refs == 1) {
  1072. // All processors handled this. If this is an async message,
  1073. // we need to clean it up and return it to the pool
  1074. if (is_async) {
  1075. smp_cleanup_message(*msg);
  1076. smp_return_to_pool(*msg);
  1077. }
  1078. }
  1079. if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed))
  1080. halt_this();
  1081. }
  1082. did_process = true;
  1083. } else if (m_halt_requested.load(AK::MemoryOrder::memory_order_relaxed)) {
  1084. halt_this();
  1085. }
  1086. leave_critical();
  1087. return did_process;
  1088. }
  1089. bool Processor::smp_enqueue_message(ProcessorMessage& msg)
  1090. {
  1091. // Note that it's quite possible that the other processor may pop
  1092. // the queue at any given time. We rely on the fact that the messages
  1093. // are pooled and never get freed!
  1094. auto& msg_entry = msg.per_proc_entries[id()];
  1095. VERIFY(msg_entry.msg == &msg);
  1096. ProcessorMessageEntry* next = nullptr;
  1097. for (;;) {
  1098. msg_entry.next = next;
  1099. if (m_message_queue.compare_exchange_strong(next, &msg_entry, AK::MemoryOrder::memory_order_acq_rel))
  1100. break;
  1101. Processor::pause();
  1102. }
  1103. // If the enqueued message was the only message in the queue when posted,
  1104. // we return true. This is used by callers when deciding whether to generate an IPI.
  1105. return next == nullptr;
  1106. }
  1107. void Processor::smp_broadcast_message(ProcessorMessage& msg)
  1108. {
  1109. auto& current_processor = Processor::current();
  1110. dbgln_if(SMP_DEBUG, "SMP[{}]: Broadcast message {} to cpus: {} processor: {}", current_processor.id(), VirtualAddress(&msg), count(), VirtualAddress(&current_processor));
  1111. msg.refs.store(count() - 1, AK::MemoryOrder::memory_order_release);
  1112. VERIFY(msg.refs > 0);
  1113. bool need_broadcast = false;
  1114. for_each(
  1115. [&](Processor& proc) {
  1116. if (&proc != &current_processor) {
  1117. if (proc.smp_enqueue_message(msg))
  1118. need_broadcast = true;
  1119. }
  1120. });
  1121. // Now trigger an IPI on all other APs (unless all targets already had messages queued)
  1122. if (need_broadcast)
  1123. APIC::the().broadcast_ipi();
  1124. }
  1125. void Processor::smp_broadcast_wait_sync(ProcessorMessage& msg)
  1126. {
  1127. auto& cur_proc = Processor::current();
  1128. VERIFY(!msg.async);
  1129. // If synchronous then we must cleanup and return the message back
  1130. // to the pool. Otherwise, the last processor to complete it will return it
  1131. while (msg.refs.load(AK::MemoryOrder::memory_order_consume) != 0) {
  1132. Processor::pause();
  1133. // We need to process any messages that may have been sent to
  1134. // us while we're waiting. This also checks if another processor
  1135. // may have requested us to halt.
  1136. cur_proc.smp_process_pending_messages();
  1137. }
  1138. smp_cleanup_message(msg);
  1139. smp_return_to_pool(msg);
  1140. }
  1141. void Processor::smp_unicast_message(u32 cpu, ProcessorMessage& msg, bool async)
  1142. {
  1143. auto& current_processor = Processor::current();
  1144. VERIFY(cpu != current_processor.id());
  1145. auto& target_processor = processors()[cpu];
  1146. msg.async = async;
  1147. dbgln_if(SMP_DEBUG, "SMP[{}]: Send message {} to cpu #{} processor: {}", current_processor.id(), VirtualAddress(&msg), cpu, VirtualAddress(&target_processor));
  1148. msg.refs.store(1u, AK::MemoryOrder::memory_order_release);
  1149. if (target_processor->smp_enqueue_message(msg)) {
  1150. APIC::the().send_ipi(cpu);
  1151. }
  1152. if (!async) {
  1153. // If synchronous then we must cleanup and return the message back
  1154. // to the pool. Otherwise, the last processor to complete it will return it
  1155. while (msg.refs.load(AK::MemoryOrder::memory_order_consume) != 0) {
  1156. Processor::pause();
  1157. // We need to process any messages that may have been sent to
  1158. // us while we're waiting. This also checks if another processor
  1159. // may have requested us to halt.
  1160. current_processor.smp_process_pending_messages();
  1161. }
  1162. smp_cleanup_message(msg);
  1163. smp_return_to_pool(msg);
  1164. }
  1165. }
  1166. void Processor::smp_unicast(u32 cpu, Function<void()> callback, bool async)
  1167. {
  1168. auto& msg = smp_get_from_pool();
  1169. msg.type = ProcessorMessage::Callback;
  1170. new (msg.callback_storage) ProcessorMessage::CallbackFunction(move(callback));
  1171. smp_unicast_message(cpu, msg, async);
  1172. }
  1173. void Processor::smp_broadcast_flush_tlb(Memory::PageDirectory const* page_directory, VirtualAddress vaddr, size_t page_count)
  1174. {
  1175. auto& msg = smp_get_from_pool();
  1176. msg.async = false;
  1177. msg.type = ProcessorMessage::FlushTlb;
  1178. msg.flush_tlb.page_directory = page_directory;
  1179. msg.flush_tlb.ptr = vaddr.as_ptr();
  1180. msg.flush_tlb.page_count = page_count;
  1181. smp_broadcast_message(msg);
  1182. // While the other processors handle this request, we'll flush ours
  1183. flush_tlb_local(vaddr, page_count);
  1184. // Now wait until everybody is done as well
  1185. smp_broadcast_wait_sync(msg);
  1186. }
  1187. void Processor::smp_broadcast_halt()
  1188. {
  1189. // We don't want to use a message, because this could have been triggered
  1190. // by being out of memory and we might not be able to get a message
  1191. for_each(
  1192. [&](Processor& proc) {
  1193. proc.m_halt_requested.store(true, AK::MemoryOrder::memory_order_release);
  1194. });
  1195. // Now trigger an IPI on all other APs
  1196. APIC::the().broadcast_ipi();
  1197. }
  1198. void Processor::Processor::halt()
  1199. {
  1200. if (s_smp_enabled)
  1201. smp_broadcast_halt();
  1202. halt_this();
  1203. }
  1204. UNMAP_AFTER_INIT void Processor::deferred_call_pool_init()
  1205. {
  1206. size_t pool_count = sizeof(m_deferred_call_pool) / sizeof(m_deferred_call_pool[0]);
  1207. for (size_t i = 0; i < pool_count; i++) {
  1208. auto& entry = m_deferred_call_pool[i];
  1209. entry.next = i < pool_count - 1 ? &m_deferred_call_pool[i + 1] : nullptr;
  1210. new (entry.handler_storage) DeferredCallEntry::HandlerFunction;
  1211. entry.was_allocated = false;
  1212. }
  1213. m_pending_deferred_calls = nullptr;
  1214. m_free_deferred_call_pool_entry = &m_deferred_call_pool[0];
  1215. }
  1216. void Processor::deferred_call_return_to_pool(DeferredCallEntry* entry)
  1217. {
  1218. VERIFY(m_in_critical);
  1219. VERIFY(!entry->was_allocated);
  1220. entry->handler_value() = {};
  1221. entry->next = m_free_deferred_call_pool_entry;
  1222. m_free_deferred_call_pool_entry = entry;
  1223. }
  1224. DeferredCallEntry* Processor::deferred_call_get_free()
  1225. {
  1226. VERIFY(m_in_critical);
  1227. if (m_free_deferred_call_pool_entry) {
  1228. // Fast path, we have an entry in our pool
  1229. auto* entry = m_free_deferred_call_pool_entry;
  1230. m_free_deferred_call_pool_entry = entry->next;
  1231. VERIFY(!entry->was_allocated);
  1232. return entry;
  1233. }
  1234. auto* entry = new DeferredCallEntry;
  1235. new (entry->handler_storage) DeferredCallEntry::HandlerFunction;
  1236. entry->was_allocated = true;
  1237. return entry;
  1238. }
  1239. void Processor::deferred_call_execute_pending()
  1240. {
  1241. VERIFY(m_in_critical);
  1242. if (!m_pending_deferred_calls)
  1243. return;
  1244. auto* pending_list = m_pending_deferred_calls;
  1245. m_pending_deferred_calls = nullptr;
  1246. // We pulled the stack of pending deferred calls in LIFO order, so we need to reverse the list first
  1247. auto reverse_list =
  1248. [](DeferredCallEntry* list) -> DeferredCallEntry* {
  1249. DeferredCallEntry* rev_list = nullptr;
  1250. while (list) {
  1251. auto next = list->next;
  1252. list->next = rev_list;
  1253. rev_list = list;
  1254. list = next;
  1255. }
  1256. return rev_list;
  1257. };
  1258. pending_list = reverse_list(pending_list);
  1259. do {
  1260. pending_list->invoke_handler();
  1261. // Return the entry back to the pool, or free it
  1262. auto* next = pending_list->next;
  1263. if (pending_list->was_allocated) {
  1264. pending_list->handler_value().~Function();
  1265. delete pending_list;
  1266. } else
  1267. deferred_call_return_to_pool(pending_list);
  1268. pending_list = next;
  1269. } while (pending_list);
  1270. }
  1271. void Processor::deferred_call_queue_entry(DeferredCallEntry* entry)
  1272. {
  1273. VERIFY(m_in_critical);
  1274. entry->next = m_pending_deferred_calls;
  1275. m_pending_deferred_calls = entry;
  1276. }
  1277. void Processor::deferred_call_queue(Function<void()> callback)
  1278. {
  1279. // NOTE: If we are called outside of a critical section and outside
  1280. // of an irq handler, the function will be executed before we return!
  1281. ScopedCritical critical;
  1282. auto& cur_proc = Processor::current();
  1283. auto* entry = cur_proc.deferred_call_get_free();
  1284. entry->handler_value() = move(callback);
  1285. cur_proc.deferred_call_queue_entry(entry);
  1286. }
  1287. UNMAP_AFTER_INIT void Processor::gdt_init()
  1288. {
  1289. m_gdt_length = 0;
  1290. m_gdtr.address = nullptr;
  1291. m_gdtr.limit = 0;
  1292. write_raw_gdt_entry(0x0000, 0x00000000, 0x00000000);
  1293. #if ARCH(I386)
  1294. write_raw_gdt_entry(GDT_SELECTOR_CODE0, 0x0000ffff, 0x00cf9a00); // code0
  1295. write_raw_gdt_entry(GDT_SELECTOR_DATA0, 0x0000ffff, 0x00cf9200); // data0
  1296. write_raw_gdt_entry(GDT_SELECTOR_CODE3, 0x0000ffff, 0x00cffa00); // code3
  1297. write_raw_gdt_entry(GDT_SELECTOR_DATA3, 0x0000ffff, 0x00cff200); // data3
  1298. #else
  1299. write_raw_gdt_entry(GDT_SELECTOR_CODE0, 0x0000ffff, 0x00af9a00); // code0
  1300. write_raw_gdt_entry(GDT_SELECTOR_DATA0, 0x0000ffff, 0x00af9200); // data0
  1301. write_raw_gdt_entry(GDT_SELECTOR_DATA3, 0x0000ffff, 0x008ff200); // data3
  1302. write_raw_gdt_entry(GDT_SELECTOR_CODE3, 0x0000ffff, 0x00affa00); // code3
  1303. #endif
  1304. #if ARCH(I386)
  1305. Descriptor tls_descriptor {};
  1306. tls_descriptor.low = tls_descriptor.high = 0;
  1307. tls_descriptor.dpl = 3;
  1308. tls_descriptor.segment_present = 1;
  1309. tls_descriptor.granularity = 0;
  1310. tls_descriptor.operation_size64 = 0;
  1311. tls_descriptor.operation_size32 = 1;
  1312. tls_descriptor.descriptor_type = 1;
  1313. tls_descriptor.type = 2;
  1314. write_gdt_entry(GDT_SELECTOR_TLS, tls_descriptor); // tls3
  1315. Descriptor gs_descriptor {};
  1316. gs_descriptor.set_base(VirtualAddress { this });
  1317. gs_descriptor.set_limit(sizeof(Processor) - 1);
  1318. gs_descriptor.dpl = 0;
  1319. gs_descriptor.segment_present = 1;
  1320. gs_descriptor.granularity = 0;
  1321. gs_descriptor.operation_size64 = 0;
  1322. gs_descriptor.operation_size32 = 1;
  1323. gs_descriptor.descriptor_type = 1;
  1324. gs_descriptor.type = 2;
  1325. write_gdt_entry(GDT_SELECTOR_PROC, gs_descriptor); // gs0
  1326. #endif
  1327. Descriptor tss_descriptor {};
  1328. tss_descriptor.set_base(VirtualAddress { (size_t)&m_tss & 0xffffffff });
  1329. tss_descriptor.set_limit(sizeof(TSS) - 1);
  1330. tss_descriptor.dpl = 0;
  1331. tss_descriptor.segment_present = 1;
  1332. tss_descriptor.granularity = 0;
  1333. tss_descriptor.operation_size64 = 0;
  1334. tss_descriptor.operation_size32 = 1;
  1335. tss_descriptor.descriptor_type = 0;
  1336. tss_descriptor.type = Descriptor::SystemType::AvailableTSS;
  1337. write_gdt_entry(GDT_SELECTOR_TSS, tss_descriptor); // tss
  1338. #if ARCH(X86_64)
  1339. Descriptor tss_descriptor_part2 {};
  1340. tss_descriptor_part2.low = (size_t)&m_tss >> 32;
  1341. write_gdt_entry(GDT_SELECTOR_TSS_PART2, tss_descriptor_part2);
  1342. #endif
  1343. flush_gdt();
  1344. load_task_register(GDT_SELECTOR_TSS);
  1345. #if ARCH(X86_64)
  1346. MSR gs_base(MSR_GS_BASE);
  1347. gs_base.set((u64)this);
  1348. #else
  1349. asm volatile(
  1350. "mov %%ax, %%ds\n"
  1351. "mov %%ax, %%es\n"
  1352. "mov %%ax, %%fs\n"
  1353. "mov %%ax, %%ss\n" ::"a"(GDT_SELECTOR_DATA0)
  1354. : "memory");
  1355. set_gs(GDT_SELECTOR_PROC);
  1356. #endif
  1357. #if ARCH(I386)
  1358. // Make sure CS points to the kernel code descriptor.
  1359. // clang-format off
  1360. asm volatile(
  1361. "ljmpl $" __STRINGIFY(GDT_SELECTOR_CODE0) ", $sanity\n"
  1362. "sanity:\n");
  1363. // clang-format on
  1364. #endif
  1365. }
  1366. extern "C" void context_first_init([[maybe_unused]] Thread* from_thread, [[maybe_unused]] Thread* to_thread, [[maybe_unused]] TrapFrame* trap)
  1367. {
  1368. VERIFY(!are_interrupts_enabled());
  1369. VERIFY(is_kernel_mode());
  1370. dbgln_if(CONTEXT_SWITCH_DEBUG, "switch_context <-- from {} {} to {} {} (context_first_init)", VirtualAddress(from_thread), *from_thread, VirtualAddress(to_thread), *to_thread);
  1371. VERIFY(to_thread == Thread::current());
  1372. Scheduler::enter_current(*from_thread);
  1373. auto in_critical = to_thread->saved_critical();
  1374. VERIFY(in_critical > 0);
  1375. Processor::restore_in_critical(in_critical);
  1376. // Since we got here and don't have Scheduler::context_switch in the
  1377. // call stack (because this is the first time we switched into this
  1378. // context), we need to notify the scheduler so that it can release
  1379. // the scheduler lock. We don't want to enable interrupts at this point
  1380. // as we're still in the middle of a context switch. Doing so could
  1381. // trigger a context switch within a context switch, leading to a crash.
  1382. FlatPtr flags = trap->regs->flags();
  1383. Scheduler::leave_on_first_switch(flags & ~0x200);
  1384. }
  1385. extern "C" void enter_thread_context(Thread* from_thread, Thread* to_thread)
  1386. {
  1387. VERIFY(from_thread == to_thread || from_thread->state() != Thread::State::Running);
  1388. VERIFY(to_thread->state() == Thread::State::Running);
  1389. bool has_fxsr = Processor::current().has_feature(CPUFeature::FXSR);
  1390. bool has_xsave_avx_support = Processor::current().has_feature(CPUFeature::XSAVE) && Processor::current().has_feature(CPUFeature::AVX);
  1391. Processor::set_current_thread(*to_thread);
  1392. auto& from_regs = from_thread->regs();
  1393. auto& to_regs = to_thread->regs();
  1394. // NOTE: IOPL should never be non-zero in any situation, so let's panic immediately
  1395. // instead of carrying on with elevated I/O privileges.
  1396. VERIFY(get_iopl_from_eflags(to_regs.flags()) == 0);
  1397. if (has_xsave_avx_support) {
  1398. // The specific state components saved correspond to the bits set in the requested-feature bitmap (RFBM), which is the logical-AND of EDX:EAX and XCR0.
  1399. // https://www.moritz.systems/blog/how-debuggers-work-getting-and-setting-x86-registers-part-2/
  1400. asm volatile("xsave %0\n"
  1401. : "=m"(from_thread->fpu_state())
  1402. : "a"(static_cast<u32>(SIMD::StateComponent::AVX | SIMD::StateComponent::SSE | SIMD::StateComponent::X87)), "d"(0u));
  1403. } else if (has_fxsr) {
  1404. asm volatile("fxsave %0"
  1405. : "=m"(from_thread->fpu_state()));
  1406. } else {
  1407. asm volatile("fnsave %0"
  1408. : "=m"(from_thread->fpu_state()));
  1409. }
  1410. #if ARCH(I386)
  1411. from_regs.fs = get_fs();
  1412. from_regs.gs = get_gs();
  1413. set_fs(to_regs.fs);
  1414. set_gs(to_regs.gs);
  1415. #endif
  1416. if (from_thread->process().is_traced())
  1417. read_debug_registers_into(from_thread->debug_register_state());
  1418. if (to_thread->process().is_traced()) {
  1419. write_debug_registers_from(to_thread->debug_register_state());
  1420. } else {
  1421. clear_debug_registers();
  1422. }
  1423. auto& processor = Processor::current();
  1424. #if ARCH(I386)
  1425. auto& tls_descriptor = processor.get_gdt_entry(GDT_SELECTOR_TLS);
  1426. tls_descriptor.set_base(to_thread->thread_specific_data());
  1427. tls_descriptor.set_limit(to_thread->thread_specific_region_size());
  1428. #else
  1429. MSR fs_base_msr(MSR_FS_BASE);
  1430. fs_base_msr.set(to_thread->thread_specific_data().get());
  1431. #endif
  1432. if (from_regs.cr3 != to_regs.cr3)
  1433. write_cr3(to_regs.cr3);
  1434. to_thread->set_cpu(processor.id());
  1435. auto in_critical = to_thread->saved_critical();
  1436. VERIFY(in_critical > 0);
  1437. Processor::restore_in_critical(in_critical);
  1438. if (has_xsave_avx_support)
  1439. asm volatile("xrstor %0" ::"m"(to_thread->fpu_state()), "a"(static_cast<u32>(SIMD::StateComponent::AVX | SIMD::StateComponent::SSE | SIMD::StateComponent::X87)), "d"(0u));
  1440. else if (has_fxsr)
  1441. asm volatile("fxrstor %0" ::"m"(to_thread->fpu_state()));
  1442. else
  1443. asm volatile("frstor %0" ::"m"(to_thread->fpu_state()));
  1444. }
  1445. extern "C" FlatPtr do_init_context(Thread* thread, u32 flags)
  1446. {
  1447. VERIFY_INTERRUPTS_DISABLED();
  1448. thread->regs().set_flags(flags);
  1449. return Processor::current().init_context(*thread, true);
  1450. }
  1451. void Processor::assume_context(Thread& thread, FlatPtr flags)
  1452. {
  1453. dbgln_if(CONTEXT_SWITCH_DEBUG, "Assume context for thread {} {}", VirtualAddress(&thread), thread);
  1454. VERIFY_INTERRUPTS_DISABLED();
  1455. Scheduler::prepare_after_exec();
  1456. // in_critical() should be 2 here. The critical section in Process::exec
  1457. // and then the scheduler lock
  1458. VERIFY(Processor::in_critical() == 2);
  1459. do_assume_context(&thread, flags);
  1460. VERIFY_NOT_REACHED();
  1461. }
  1462. u64 Processor::time_spent_idle() const
  1463. {
  1464. return m_idle_thread->time_in_user() + m_idle_thread->time_in_kernel();
  1465. }
  1466. }