Scheduler.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/ScopeGuard.h>
  7. #include <AK/Time.h>
  8. #include <Kernel/Arch/x86/InterruptDisabler.h>
  9. #include <Kernel/Debug.h>
  10. #include <Kernel/Panic.h>
  11. #include <Kernel/PerformanceManager.h>
  12. #include <Kernel/Process.h>
  13. #include <Kernel/RTC.h>
  14. #include <Kernel/Scheduler.h>
  15. #include <Kernel/Sections.h>
  16. #include <Kernel/Time/TimeManagement.h>
  17. // Remove this once SMP is stable and can be enabled by default
  18. #define SCHEDULE_ON_ALL_PROCESSORS 0
  19. namespace Kernel {
  20. class SchedulerPerProcessorData {
  21. AK_MAKE_NONCOPYABLE(SchedulerPerProcessorData);
  22. AK_MAKE_NONMOVABLE(SchedulerPerProcessorData);
  23. public:
  24. SchedulerPerProcessorData() = default;
  25. bool m_in_scheduler { true };
  26. };
  27. RecursiveSpinLock g_scheduler_lock;
  28. static u32 time_slice_for(const Thread& thread)
  29. {
  30. // One time slice unit == 4ms (assuming 250 ticks/second)
  31. if (thread.is_idle_thread())
  32. return 1;
  33. return 2;
  34. }
  35. READONLY_AFTER_INIT Thread* g_finalizer;
  36. READONLY_AFTER_INIT WaitQueue* g_finalizer_wait_queue;
  37. Atomic<bool> g_finalizer_has_work { false };
  38. READONLY_AFTER_INIT static Process* s_colonel_process;
  39. struct ThreadReadyQueue {
  40. IntrusiveList<Thread, RawPtr<Thread>, &Thread::m_ready_queue_node> thread_list;
  41. };
  42. static SpinLock<u8> g_ready_queues_lock;
  43. static u32 g_ready_queues_mask;
  44. static constexpr u32 g_ready_queue_buckets = sizeof(g_ready_queues_mask) * 8;
  45. READONLY_AFTER_INIT static ThreadReadyQueue* g_ready_queues; // g_ready_queue_buckets entries
  46. static void dump_thread_list();
  47. static inline u32 thread_priority_to_priority_index(u32 thread_priority)
  48. {
  49. // Converts the priority in the range of THREAD_PRIORITY_MIN...THREAD_PRIORITY_MAX
  50. // to a index into g_ready_queues where 0 is the highest priority bucket
  51. VERIFY(thread_priority >= THREAD_PRIORITY_MIN && thread_priority <= THREAD_PRIORITY_MAX);
  52. constexpr u32 thread_priority_count = THREAD_PRIORITY_MAX - THREAD_PRIORITY_MIN + 1;
  53. static_assert(thread_priority_count > 0);
  54. auto priority_bucket = ((thread_priority_count - (thread_priority - THREAD_PRIORITY_MIN)) / thread_priority_count) * (g_ready_queue_buckets - 1);
  55. VERIFY(priority_bucket < g_ready_queue_buckets);
  56. return priority_bucket;
  57. }
  58. Thread& Scheduler::pull_next_runnable_thread()
  59. {
  60. auto affinity_mask = 1u << Processor::current().id();
  61. ScopedSpinLock lock(g_ready_queues_lock);
  62. auto priority_mask = g_ready_queues_mask;
  63. while (priority_mask != 0) {
  64. auto priority = __builtin_ffsl(priority_mask);
  65. VERIFY(priority > 0);
  66. auto& ready_queue = g_ready_queues[--priority];
  67. for (auto& thread : ready_queue.thread_list) {
  68. VERIFY(thread.m_runnable_priority == (int)priority);
  69. if (thread.is_active())
  70. continue;
  71. if (!(thread.affinity() & affinity_mask))
  72. continue;
  73. thread.m_runnable_priority = -1;
  74. ready_queue.thread_list.remove(thread);
  75. if (ready_queue.thread_list.is_empty())
  76. g_ready_queues_mask &= ~(1u << priority);
  77. // Mark it as active because we are using this thread. This is similar
  78. // to comparing it with Processor::current_thread, but when there are
  79. // multiple processors there's no easy way to check whether the thread
  80. // is actually still needed. This prevents accidental finalization when
  81. // a thread is no longer in Running state, but running on another core.
  82. // We need to mark it active here so that this thread won't be
  83. // scheduled on another core if it were to be queued before actually
  84. // switching to it.
  85. // FIXME: Figure out a better way maybe?
  86. thread.set_active(true);
  87. return thread;
  88. }
  89. priority_mask &= ~(1u << priority);
  90. }
  91. return *Processor::idle_thread();
  92. }
  93. Thread* Scheduler::peek_next_runnable_thread()
  94. {
  95. auto affinity_mask = 1u << Processor::current().id();
  96. ScopedSpinLock lock(g_ready_queues_lock);
  97. auto priority_mask = g_ready_queues_mask;
  98. while (priority_mask != 0) {
  99. auto priority = __builtin_ffsl(priority_mask);
  100. VERIFY(priority > 0);
  101. auto& ready_queue = g_ready_queues[--priority];
  102. for (auto& thread : ready_queue.thread_list) {
  103. VERIFY(thread.m_runnable_priority == (int)priority);
  104. if (thread.is_active())
  105. continue;
  106. if (!(thread.affinity() & affinity_mask))
  107. continue;
  108. return &thread;
  109. }
  110. priority_mask &= ~(1u << priority);
  111. }
  112. // Unlike in pull_next_runnable_thread() we don't want to fall back to
  113. // the idle thread. We just want to see if we have any other thread ready
  114. // to be scheduled.
  115. return nullptr;
  116. }
  117. bool Scheduler::dequeue_runnable_thread(Thread& thread, bool check_affinity)
  118. {
  119. if (thread.is_idle_thread())
  120. return true;
  121. ScopedSpinLock lock(g_ready_queues_lock);
  122. auto priority = thread.m_runnable_priority;
  123. if (priority < 0) {
  124. VERIFY(!thread.m_ready_queue_node.is_in_list());
  125. return false;
  126. }
  127. if (check_affinity && !(thread.affinity() & (1 << Processor::current().id())))
  128. return false;
  129. VERIFY(g_ready_queues_mask & (1u << priority));
  130. auto& ready_queue = g_ready_queues[priority];
  131. thread.m_runnable_priority = -1;
  132. ready_queue.thread_list.remove(thread);
  133. if (ready_queue.thread_list.is_empty())
  134. g_ready_queues_mask &= ~(1u << priority);
  135. return true;
  136. }
  137. void Scheduler::queue_runnable_thread(Thread& thread)
  138. {
  139. VERIFY(g_scheduler_lock.own_lock());
  140. if (thread.is_idle_thread())
  141. return;
  142. auto priority = thread_priority_to_priority_index(thread.priority());
  143. ScopedSpinLock lock(g_ready_queues_lock);
  144. VERIFY(thread.m_runnable_priority < 0);
  145. thread.m_runnable_priority = (int)priority;
  146. VERIFY(!thread.m_ready_queue_node.is_in_list());
  147. auto& ready_queue = g_ready_queues[priority];
  148. bool was_empty = ready_queue.thread_list.is_empty();
  149. ready_queue.thread_list.append(thread);
  150. if (was_empty)
  151. g_ready_queues_mask |= (1u << priority);
  152. }
  153. UNMAP_AFTER_INIT void Scheduler::start()
  154. {
  155. VERIFY_INTERRUPTS_DISABLED();
  156. // We need to acquire our scheduler lock, which will be released
  157. // by the idle thread once control transferred there
  158. g_scheduler_lock.lock();
  159. auto& processor = Processor::current();
  160. processor.set_scheduler_data(*new SchedulerPerProcessorData());
  161. VERIFY(processor.is_initialized());
  162. auto& idle_thread = *Processor::idle_thread();
  163. VERIFY(processor.current_thread() == &idle_thread);
  164. idle_thread.set_ticks_left(time_slice_for(idle_thread));
  165. idle_thread.did_schedule();
  166. idle_thread.set_initialized(true);
  167. processor.init_context(idle_thread, false);
  168. idle_thread.set_state(Thread::Running);
  169. VERIFY(idle_thread.affinity() == (1u << processor.get_id()));
  170. processor.initialize_context_switching(idle_thread);
  171. VERIFY_NOT_REACHED();
  172. }
  173. bool Scheduler::pick_next()
  174. {
  175. VERIFY_INTERRUPTS_DISABLED();
  176. // Set the m_in_scheduler flag before acquiring the spinlock. This
  177. // prevents a recursive call into Scheduler::invoke_async upon
  178. // leaving the scheduler lock.
  179. ScopedCritical critical;
  180. auto& scheduler_data = Processor::current().get_scheduler_data();
  181. scheduler_data.m_in_scheduler = true;
  182. ScopeGuard guard(
  183. []() {
  184. // We may be on a different processor after we got switched
  185. // back to this thread!
  186. auto& scheduler_data = Processor::current().get_scheduler_data();
  187. VERIFY(scheduler_data.m_in_scheduler);
  188. scheduler_data.m_in_scheduler = false;
  189. });
  190. ScopedSpinLock lock(g_scheduler_lock);
  191. if constexpr (SCHEDULER_RUNNABLE_DEBUG) {
  192. dump_thread_list();
  193. }
  194. auto& thread_to_schedule = pull_next_runnable_thread();
  195. if constexpr (SCHEDULER_DEBUG) {
  196. #if ARCH(I386)
  197. dbgln("Scheduler[{}]: Switch to {} @ {:04x}:{:08x}",
  198. Processor::id(),
  199. thread_to_schedule,
  200. thread_to_schedule.regs().cs, thread_to_schedule.regs().eip);
  201. #else
  202. PANIC("Scheduler::pick_next() not implemented");
  203. #endif
  204. }
  205. // We need to leave our first critical section before switching context,
  206. // but since we're still holding the scheduler lock we're still in a critical section
  207. critical.leave();
  208. thread_to_schedule.set_ticks_left(time_slice_for(thread_to_schedule));
  209. return context_switch(&thread_to_schedule);
  210. }
  211. bool Scheduler::yield()
  212. {
  213. InterruptDisabler disabler;
  214. auto& proc = Processor::current();
  215. auto current_thread = Thread::current();
  216. dbgln_if(SCHEDULER_DEBUG, "Scheduler[{}]: yielding thread {} in_irq={}", proc.get_id(), *current_thread, proc.in_irq());
  217. VERIFY(current_thread != nullptr);
  218. if (proc.in_irq() || proc.in_critical()) {
  219. // If we're handling an IRQ we can't switch context, or we're in
  220. // a critical section where we don't want to switch contexts, then
  221. // delay until exiting the trap or critical section
  222. proc.invoke_scheduler_async();
  223. return false;
  224. }
  225. if (!Scheduler::pick_next())
  226. return false;
  227. if constexpr (SCHEDULER_DEBUG)
  228. dbgln("Scheduler[{}]: yield returns to thread {} in_irq={}", Processor::id(), *current_thread, Processor::current().in_irq());
  229. return true;
  230. }
  231. bool Scheduler::context_switch(Thread* thread)
  232. {
  233. if (s_mm_lock.own_lock()) {
  234. PANIC("In context switch while holding s_mm_lock");
  235. }
  236. thread->did_schedule();
  237. auto from_thread = Thread::current();
  238. if (from_thread == thread)
  239. return false;
  240. if (from_thread) {
  241. // If the last process hasn't blocked (still marked as running),
  242. // mark it as runnable for the next round.
  243. if (from_thread->state() == Thread::Running)
  244. from_thread->set_state(Thread::Runnable);
  245. #ifdef LOG_EVERY_CONTEXT_SWITCH
  246. # if ARCH(I386)
  247. dbgln("Scheduler[{}]: {} -> {} [prio={}] {:04x}:{:08x}", Processor::id(), from_thread->tid().value(),
  248. thread->tid().value(), thread->priority(), thread->regs().cs, thread->regs().eip);
  249. # else
  250. dbgln("Scheduler[{}]: {} -> {} [prio={}] {:04x}:{:16x}", Processor::id(), from_thread->tid().value(),
  251. thread->tid().value(), thread->priority(), thread->regs().cs, thread->regs().rip);
  252. # endif
  253. #endif
  254. }
  255. auto& proc = Processor::current();
  256. if (!thread->is_initialized()) {
  257. proc.init_context(*thread, false);
  258. thread->set_initialized(true);
  259. }
  260. thread->set_state(Thread::Running);
  261. PerformanceManager::add_context_switch_perf_event(*from_thread, *thread);
  262. proc.switch_context(from_thread, thread);
  263. // NOTE: from_thread at this point reflects the thread we were
  264. // switched from, and thread reflects Thread::current()
  265. enter_current(*from_thread, false);
  266. VERIFY(thread == Thread::current());
  267. if (thread->process().is_user_process()) {
  268. FlatPtr flags;
  269. auto& regs = Thread::current()->get_register_dump_from_stack();
  270. #if ARCH(I386)
  271. flags = regs.eflags;
  272. #else
  273. flags = regs.rflags;
  274. #endif
  275. auto iopl = get_iopl_from_eflags(flags);
  276. if (iopl != 0) {
  277. PANIC("Switched to thread {} with non-zero IOPL={}", Thread::current()->tid().value(), iopl);
  278. }
  279. }
  280. return true;
  281. }
  282. void Scheduler::enter_current(Thread& prev_thread, bool is_first)
  283. {
  284. VERIFY(g_scheduler_lock.own_lock());
  285. prev_thread.set_active(false);
  286. if (prev_thread.state() == Thread::Dying) {
  287. // If the thread we switched from is marked as dying, then notify
  288. // the finalizer. Note that as soon as we leave the scheduler lock
  289. // the finalizer may free from_thread!
  290. notify_finalizer();
  291. } else if (!is_first) {
  292. // Check if we have any signals we should deliver (even if we don't
  293. // end up switching to another thread).
  294. auto current_thread = Thread::current();
  295. if (!current_thread->is_in_block() && current_thread->previous_mode() != Thread::PreviousMode::KernelMode) {
  296. ScopedSpinLock lock(current_thread->get_lock());
  297. if (current_thread->state() == Thread::Running && current_thread->pending_signals_for_state()) {
  298. current_thread->dispatch_one_pending_signal();
  299. }
  300. }
  301. }
  302. }
  303. void Scheduler::leave_on_first_switch(u32 flags)
  304. {
  305. // This is called when a thread is switched into for the first time.
  306. // At this point, enter_current has already be called, but because
  307. // Scheduler::context_switch is not in the call stack we need to
  308. // clean up and release locks manually here
  309. g_scheduler_lock.unlock(flags);
  310. auto& scheduler_data = Processor::current().get_scheduler_data();
  311. VERIFY(scheduler_data.m_in_scheduler);
  312. scheduler_data.m_in_scheduler = false;
  313. }
  314. void Scheduler::prepare_after_exec()
  315. {
  316. // This is called after exec() when doing a context "switch" into
  317. // the new process. This is called from Processor::assume_context
  318. VERIFY(g_scheduler_lock.own_lock());
  319. auto& scheduler_data = Processor::current().get_scheduler_data();
  320. VERIFY(!scheduler_data.m_in_scheduler);
  321. scheduler_data.m_in_scheduler = true;
  322. }
  323. void Scheduler::prepare_for_idle_loop()
  324. {
  325. // This is called when the CPU finished setting up the idle loop
  326. // and is about to run it. We need to acquire he scheduler lock
  327. VERIFY(!g_scheduler_lock.own_lock());
  328. g_scheduler_lock.lock();
  329. auto& scheduler_data = Processor::current().get_scheduler_data();
  330. VERIFY(!scheduler_data.m_in_scheduler);
  331. scheduler_data.m_in_scheduler = true;
  332. }
  333. Process* Scheduler::colonel()
  334. {
  335. VERIFY(s_colonel_process);
  336. return s_colonel_process;
  337. }
  338. UNMAP_AFTER_INIT void Scheduler::initialize()
  339. {
  340. VERIFY(Processor::is_initialized()); // sanity check
  341. RefPtr<Thread> idle_thread;
  342. g_finalizer_wait_queue = new WaitQueue;
  343. g_ready_queues = new ThreadReadyQueue[g_ready_queue_buckets];
  344. g_finalizer_has_work.store(false, AK::MemoryOrder::memory_order_release);
  345. s_colonel_process = Process::create_kernel_process(idle_thread, "colonel", idle_loop, nullptr, 1, Process::RegisterProcess::No).leak_ref();
  346. VERIFY(s_colonel_process);
  347. VERIFY(idle_thread);
  348. idle_thread->set_priority(THREAD_PRIORITY_MIN);
  349. idle_thread->set_name(StringView("idle thread #0"));
  350. set_idle_thread(idle_thread);
  351. }
  352. UNMAP_AFTER_INIT void Scheduler::set_idle_thread(Thread* idle_thread)
  353. {
  354. idle_thread->set_idle_thread();
  355. Processor::current().set_idle_thread(*idle_thread);
  356. Processor::current().set_current_thread(*idle_thread);
  357. }
  358. UNMAP_AFTER_INIT Thread* Scheduler::create_ap_idle_thread(u32 cpu)
  359. {
  360. VERIFY(cpu != 0);
  361. // This function is called on the bsp, but creates an idle thread for another AP
  362. VERIFY(Processor::is_bootstrap_processor());
  363. VERIFY(s_colonel_process);
  364. Thread* idle_thread = s_colonel_process->create_kernel_thread(idle_loop, nullptr, THREAD_PRIORITY_MIN, String::formatted("idle thread #{}", cpu), 1 << cpu, false);
  365. VERIFY(idle_thread);
  366. return idle_thread;
  367. }
  368. void Scheduler::timer_tick(const RegisterState& regs)
  369. {
  370. VERIFY_INTERRUPTS_DISABLED();
  371. VERIFY(Processor::current().in_irq());
  372. auto current_thread = Processor::current_thread();
  373. if (!current_thread)
  374. return;
  375. // Sanity checks
  376. VERIFY(current_thread->current_trap());
  377. VERIFY(current_thread->current_trap()->regs == &regs);
  378. #if !SCHEDULE_ON_ALL_PROCESSORS
  379. if (!Processor::is_bootstrap_processor())
  380. return; // TODO: This prevents scheduling on other CPUs!
  381. #endif
  382. if (current_thread->tick())
  383. return;
  384. if (!current_thread->is_idle_thread() && !peek_next_runnable_thread()) {
  385. // If no other thread is ready to be scheduled we don't need to
  386. // switch to the idle thread. Just give the current thread another
  387. // time slice and let it run!
  388. current_thread->set_ticks_left(time_slice_for(*current_thread));
  389. current_thread->did_schedule();
  390. dbgln_if(SCHEDULER_DEBUG, "Scheduler[{}]: No other threads ready, give {} another timeslice", Processor::id(), *current_thread);
  391. return;
  392. }
  393. VERIFY_INTERRUPTS_DISABLED();
  394. VERIFY(Processor::current().in_irq());
  395. Processor::current().invoke_scheduler_async();
  396. }
  397. void Scheduler::invoke_async()
  398. {
  399. VERIFY_INTERRUPTS_DISABLED();
  400. auto& proc = Processor::current();
  401. VERIFY(!proc.in_irq());
  402. // Since this function is called when leaving critical sections (such
  403. // as a SpinLock), we need to check if we're not already doing this
  404. // to prevent recursion
  405. if (!proc.get_scheduler_data().m_in_scheduler)
  406. pick_next();
  407. }
  408. void Scheduler::yield_from_critical()
  409. {
  410. auto& proc = Processor::current();
  411. VERIFY(proc.in_critical());
  412. VERIFY(!proc.in_irq());
  413. yield(); // Flag a context switch
  414. u32 prev_flags;
  415. u32 prev_crit = Processor::current().clear_critical(prev_flags, false);
  416. // Note, we may now be on a different CPU!
  417. Processor::current().restore_critical(prev_crit, prev_flags);
  418. }
  419. void Scheduler::notify_finalizer()
  420. {
  421. if (g_finalizer_has_work.exchange(true, AK::MemoryOrder::memory_order_acq_rel) == false)
  422. g_finalizer_wait_queue->wake_all();
  423. }
  424. void Scheduler::idle_loop(void*)
  425. {
  426. auto& proc = Processor::current();
  427. dbgln("Scheduler[{}]: idle loop running", proc.get_id());
  428. VERIFY(are_interrupts_enabled());
  429. for (;;) {
  430. proc.idle_begin();
  431. asm("hlt");
  432. proc.idle_end();
  433. VERIFY_INTERRUPTS_ENABLED();
  434. #if SCHEDULE_ON_ALL_PROCESSORS
  435. yield();
  436. #else
  437. if (Processor::current().id() == 0)
  438. yield();
  439. #endif
  440. }
  441. }
  442. void Scheduler::dump_scheduler_state()
  443. {
  444. dump_thread_list();
  445. }
  446. bool Scheduler::is_initialized()
  447. {
  448. // The scheduler is initialized iff the idle thread exists
  449. return Processor::idle_thread() != nullptr;
  450. }
  451. void dump_thread_list()
  452. {
  453. dbgln("Scheduler thread list for processor {}:", Processor::id());
  454. auto get_cs = [](Thread& thread) -> u16 {
  455. if (!thread.current_trap())
  456. return thread.regs().cs;
  457. return thread.get_register_dump_from_stack().cs;
  458. };
  459. auto get_eip = [](Thread& thread) -> u32 {
  460. #if ARCH(I386)
  461. if (!thread.current_trap())
  462. return thread.regs().eip;
  463. return thread.get_register_dump_from_stack().eip;
  464. #else
  465. if (!thread.current_trap())
  466. return thread.regs().rip;
  467. return thread.get_register_dump_from_stack().rip;
  468. #endif
  469. };
  470. Thread::for_each([&](Thread& thread) {
  471. switch (thread.state()) {
  472. case Thread::Dying:
  473. dmesgln(" {:14} {:30} @ {:04x}:{:08x} Finalizable: {}, (nsched: {})",
  474. thread.state_string(),
  475. thread,
  476. get_cs(thread),
  477. get_eip(thread),
  478. thread.is_finalizable(),
  479. thread.times_scheduled());
  480. break;
  481. default:
  482. dmesgln(" {:14} Pr:{:2} {:30} @ {:04x}:{:08x} (nsched: {})",
  483. thread.state_string(),
  484. thread.priority(),
  485. thread,
  486. get_cs(thread),
  487. get_eip(thread),
  488. thread.times_scheduled());
  489. break;
  490. }
  491. });
  492. }
  493. }