Scheduler.cpp 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558
  1. /*
  2. * Copyright (c) 2018-2022, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/BuiltinWrappers.h>
  7. #include <AK/ScopeGuard.h>
  8. #include <AK/Singleton.h>
  9. #include <AK/Time.h>
  10. #include <Kernel/Arch/TrapFrame.h>
  11. #include <Kernel/Debug.h>
  12. #include <Kernel/InterruptDisabler.h>
  13. #include <Kernel/Panic.h>
  14. #include <Kernel/PerformanceManager.h>
  15. #include <Kernel/Process.h>
  16. #include <Kernel/Scheduler.h>
  17. #include <Kernel/Sections.h>
  18. #include <Kernel/Time/TimeManagement.h>
  19. #include <Kernel/kstdio.h>
  20. namespace Kernel {
  21. RecursiveSpinlock<LockRank::None> g_scheduler_lock {};
  22. static u32 time_slice_for(Thread const& thread)
  23. {
  24. // One time slice unit == 4ms (assuming 250 ticks/second)
  25. if (thread.is_idle_thread())
  26. return 1;
  27. return 2;
  28. }
  29. READONLY_AFTER_INIT Thread* g_finalizer;
  30. READONLY_AFTER_INIT WaitQueue* g_finalizer_wait_queue;
  31. Atomic<bool> g_finalizer_has_work { false };
  32. READONLY_AFTER_INIT static Process* s_colonel_process;
  33. struct ThreadReadyQueue {
  34. IntrusiveList<&Thread::m_ready_queue_node> thread_list;
  35. };
  36. struct ThreadReadyQueues {
  37. u32 mask {};
  38. static constexpr size_t count = sizeof(mask) * 8;
  39. Array<ThreadReadyQueue, count> queues;
  40. };
  41. static Singleton<SpinlockProtected<ThreadReadyQueues, LockRank::None>> g_ready_queues;
  42. static SpinlockProtected<TotalTimeScheduled, LockRank::None> g_total_time_scheduled {};
  43. static void dump_thread_list(bool = false);
  44. static inline u32 thread_priority_to_priority_index(u32 thread_priority)
  45. {
  46. // Converts the priority in the range of THREAD_PRIORITY_MIN...THREAD_PRIORITY_MAX
  47. // to a index into g_ready_queues where 0 is the highest priority bucket
  48. VERIFY(thread_priority >= THREAD_PRIORITY_MIN && thread_priority <= THREAD_PRIORITY_MAX);
  49. constexpr u32 thread_priority_count = THREAD_PRIORITY_MAX - THREAD_PRIORITY_MIN + 1;
  50. static_assert(thread_priority_count > 0);
  51. auto priority_bucket = ((thread_priority_count - (thread_priority - THREAD_PRIORITY_MIN)) / thread_priority_count) * (ThreadReadyQueues::count - 1);
  52. VERIFY(priority_bucket < ThreadReadyQueues::count);
  53. return priority_bucket;
  54. }
  55. Thread& Scheduler::pull_next_runnable_thread()
  56. {
  57. auto affinity_mask = 1u << Processor::current_id();
  58. return g_ready_queues->with([&](auto& ready_queues) -> Thread& {
  59. auto priority_mask = ready_queues.mask;
  60. while (priority_mask != 0) {
  61. auto priority = bit_scan_forward(priority_mask);
  62. VERIFY(priority > 0);
  63. auto& ready_queue = ready_queues.queues[--priority];
  64. for (auto& thread : ready_queue.thread_list) {
  65. VERIFY(thread.m_runnable_priority == (int)priority);
  66. if (thread.is_active())
  67. continue;
  68. if (!(thread.affinity() & affinity_mask))
  69. continue;
  70. thread.m_runnable_priority = -1;
  71. ready_queue.thread_list.remove(thread);
  72. if (ready_queue.thread_list.is_empty())
  73. ready_queues.mask &= ~(1u << priority);
  74. // Mark it as active because we are using this thread. This is similar
  75. // to comparing it with Processor::current_thread, but when there are
  76. // multiple processors there's no easy way to check whether the thread
  77. // is actually still needed. This prevents accidental finalization when
  78. // a thread is no longer in Running state, but running on another core.
  79. // We need to mark it active here so that this thread won't be
  80. // scheduled on another core if it were to be queued before actually
  81. // switching to it.
  82. // FIXME: Figure out a better way maybe?
  83. thread.set_active(true);
  84. return thread;
  85. }
  86. priority_mask &= ~(1u << priority);
  87. }
  88. return *Processor::idle_thread();
  89. });
  90. }
  91. Thread* Scheduler::peek_next_runnable_thread()
  92. {
  93. auto affinity_mask = 1u << Processor::current_id();
  94. return g_ready_queues->with([&](auto& ready_queues) -> Thread* {
  95. auto priority_mask = ready_queues.mask;
  96. while (priority_mask != 0) {
  97. auto priority = bit_scan_forward(priority_mask);
  98. VERIFY(priority > 0);
  99. auto& ready_queue = ready_queues.queues[--priority];
  100. for (auto& thread : ready_queue.thread_list) {
  101. VERIFY(thread.m_runnable_priority == (int)priority);
  102. if (thread.is_active())
  103. continue;
  104. if (!(thread.affinity() & affinity_mask))
  105. continue;
  106. return &thread;
  107. }
  108. priority_mask &= ~(1u << priority);
  109. }
  110. // Unlike in pull_next_runnable_thread() we don't want to fall back to
  111. // the idle thread. We just want to see if we have any other thread ready
  112. // to be scheduled.
  113. return nullptr;
  114. });
  115. }
  116. bool Scheduler::dequeue_runnable_thread(Thread& thread, bool check_affinity)
  117. {
  118. if (thread.is_idle_thread())
  119. return true;
  120. return g_ready_queues->with([&](auto& ready_queues) {
  121. auto priority = thread.m_runnable_priority;
  122. if (priority < 0) {
  123. VERIFY(!thread.m_ready_queue_node.is_in_list());
  124. return false;
  125. }
  126. if (check_affinity && !(thread.affinity() & (1 << Processor::current_id())))
  127. return false;
  128. VERIFY(ready_queues.mask & (1u << priority));
  129. auto& ready_queue = ready_queues.queues[priority];
  130. thread.m_runnable_priority = -1;
  131. ready_queue.thread_list.remove(thread);
  132. if (ready_queue.thread_list.is_empty())
  133. ready_queues.mask &= ~(1u << priority);
  134. return true;
  135. });
  136. }
  137. void Scheduler::enqueue_runnable_thread(Thread& thread)
  138. {
  139. VERIFY(g_scheduler_lock.is_locked_by_current_processor());
  140. if (thread.is_idle_thread())
  141. return;
  142. auto priority = thread_priority_to_priority_index(thread.priority());
  143. g_ready_queues->with([&](auto& ready_queues) {
  144. VERIFY(thread.m_runnable_priority < 0);
  145. thread.m_runnable_priority = (int)priority;
  146. VERIFY(!thread.m_ready_queue_node.is_in_list());
  147. auto& ready_queue = ready_queues.queues[priority];
  148. bool was_empty = ready_queue.thread_list.is_empty();
  149. ready_queue.thread_list.append(thread);
  150. if (was_empty)
  151. ready_queues.mask |= (1u << priority);
  152. });
  153. }
  154. UNMAP_AFTER_INIT void Scheduler::start()
  155. {
  156. VERIFY_INTERRUPTS_DISABLED();
  157. // We need to acquire our scheduler lock, which will be released
  158. // by the idle thread once control transferred there
  159. g_scheduler_lock.lock();
  160. auto& processor = Processor::current();
  161. VERIFY(processor.is_initialized());
  162. auto& idle_thread = *Processor::idle_thread();
  163. VERIFY(processor.current_thread() == &idle_thread);
  164. idle_thread.set_ticks_left(time_slice_for(idle_thread));
  165. idle_thread.did_schedule();
  166. idle_thread.set_initialized(true);
  167. processor.init_context(idle_thread, false);
  168. idle_thread.set_state(Thread::State::Running);
  169. VERIFY(idle_thread.affinity() == (1u << processor.id()));
  170. processor.initialize_context_switching(idle_thread);
  171. VERIFY_NOT_REACHED();
  172. }
  173. void Scheduler::pick_next()
  174. {
  175. VERIFY_INTERRUPTS_DISABLED();
  176. // Set the in_scheduler flag before acquiring the spinlock. This
  177. // prevents a recursive call into Scheduler::invoke_async upon
  178. // leaving the scheduler lock.
  179. ScopedCritical critical;
  180. Processor::set_current_in_scheduler(true);
  181. ScopeGuard guard(
  182. []() {
  183. // We may be on a different processor after we got switched
  184. // back to this thread!
  185. VERIFY(Processor::current_in_scheduler());
  186. Processor::set_current_in_scheduler(false);
  187. });
  188. SpinlockLocker lock(g_scheduler_lock);
  189. if constexpr (SCHEDULER_RUNNABLE_DEBUG) {
  190. dump_thread_list();
  191. }
  192. auto& thread_to_schedule = pull_next_runnable_thread();
  193. if constexpr (SCHEDULER_DEBUG) {
  194. dbgln("Scheduler[{}]: Switch to {} @ {:p}",
  195. Processor::current_id(),
  196. thread_to_schedule,
  197. thread_to_schedule.regs().ip());
  198. }
  199. // We need to leave our first critical section before switching context,
  200. // but since we're still holding the scheduler lock we're still in a critical section
  201. critical.leave();
  202. thread_to_schedule.set_ticks_left(time_slice_for(thread_to_schedule));
  203. context_switch(&thread_to_schedule);
  204. }
  205. void Scheduler::yield()
  206. {
  207. InterruptDisabler disabler;
  208. auto const* current_thread = Thread::current();
  209. dbgln_if(SCHEDULER_DEBUG, "Scheduler[{}]: yielding thread {} in_irq={}", Processor::current_id(), *current_thread, Processor::current_in_irq());
  210. VERIFY(current_thread != nullptr);
  211. if (Processor::current_in_irq() || Processor::in_critical()) {
  212. // If we're handling an IRQ we can't switch context, or we're in
  213. // a critical section where we don't want to switch contexts, then
  214. // delay until exiting the trap or critical section
  215. Processor::current().invoke_scheduler_async();
  216. return;
  217. }
  218. Scheduler::pick_next();
  219. }
  220. void Scheduler::context_switch(Thread* thread)
  221. {
  222. thread->did_schedule();
  223. auto* from_thread = Thread::current();
  224. VERIFY(from_thread);
  225. if (from_thread == thread)
  226. return;
  227. // If the last process hasn't blocked (still marked as running),
  228. // mark it as runnable for the next round.
  229. if (from_thread->state() == Thread::State::Running)
  230. from_thread->set_state(Thread::State::Runnable);
  231. #ifdef LOG_EVERY_CONTEXT_SWITCH
  232. auto const msg = "Scheduler[{}]: {} -> {} [prio={}] {:p}";
  233. dbgln(msg,
  234. Processor::current_id(), from_thread->tid().value(),
  235. thread->tid().value(), thread->priority(), thread->regs().ip());
  236. #endif
  237. auto& proc = Processor::current();
  238. if (!thread->is_initialized()) {
  239. proc.init_context(*thread, false);
  240. thread->set_initialized(true);
  241. }
  242. thread->set_state(Thread::State::Running);
  243. PerformanceManager::add_context_switch_perf_event(*from_thread, *thread);
  244. proc.switch_context(from_thread, thread);
  245. // NOTE: from_thread at this point reflects the thread we were
  246. // switched from, and thread reflects Thread::current()
  247. enter_current(*from_thread);
  248. VERIFY(thread == Thread::current());
  249. {
  250. SpinlockLocker lock(thread->get_lock());
  251. thread->dispatch_one_pending_signal();
  252. }
  253. }
  254. void Scheduler::enter_current(Thread& prev_thread)
  255. {
  256. VERIFY(g_scheduler_lock.is_locked_by_current_processor());
  257. // We already recorded the scheduled time when entering the trap, so this merely accounts for the kernel time since then
  258. auto scheduler_time = TimeManagement::scheduler_current_time();
  259. prev_thread.update_time_scheduled(scheduler_time, true, true);
  260. auto* current_thread = Thread::current();
  261. current_thread->update_time_scheduled(scheduler_time, true, false);
  262. // NOTE: When doing an exec(), we will context switch from and to the same thread!
  263. // In that case, we must not mark the previous thread as inactive.
  264. if (&prev_thread != current_thread)
  265. prev_thread.set_active(false);
  266. if (prev_thread.state() == Thread::State::Dying) {
  267. // If the thread we switched from is marked as dying, then notify
  268. // the finalizer. Note that as soon as we leave the scheduler lock
  269. // the finalizer may free from_thread!
  270. notify_finalizer();
  271. }
  272. }
  273. void Scheduler::leave_on_first_switch(InterruptsState previous_interrupts_state)
  274. {
  275. // This is called when a thread is switched into for the first time.
  276. // At this point, enter_current has already be called, but because
  277. // Scheduler::context_switch is not in the call stack we need to
  278. // clean up and release locks manually here
  279. g_scheduler_lock.unlock(previous_interrupts_state);
  280. VERIFY(Processor::current_in_scheduler());
  281. Processor::set_current_in_scheduler(false);
  282. }
  283. void Scheduler::prepare_after_exec()
  284. {
  285. // This is called after exec() when doing a context "switch" into
  286. // the new process. This is called from Processor::assume_context
  287. VERIFY(g_scheduler_lock.is_locked_by_current_processor());
  288. VERIFY(!Processor::current_in_scheduler());
  289. Processor::set_current_in_scheduler(true);
  290. }
  291. void Scheduler::prepare_for_idle_loop()
  292. {
  293. // This is called when the CPU finished setting up the idle loop
  294. // and is about to run it. We need to acquire the scheduler lock
  295. VERIFY(!g_scheduler_lock.is_locked_by_current_processor());
  296. g_scheduler_lock.lock();
  297. VERIFY(!Processor::current_in_scheduler());
  298. Processor::set_current_in_scheduler(true);
  299. }
  300. Process* Scheduler::colonel()
  301. {
  302. VERIFY(s_colonel_process);
  303. return s_colonel_process;
  304. }
  305. UNMAP_AFTER_INIT void Scheduler::initialize()
  306. {
  307. VERIFY(Processor::is_initialized()); // sanity check
  308. VERIFY(TimeManagement::is_initialized());
  309. g_finalizer_wait_queue = new WaitQueue;
  310. g_finalizer_has_work.store(false, AK::MemoryOrder::memory_order_release);
  311. auto [colonel_process, idle_thread] = MUST(Process::create_kernel_process(KString::must_create("colonel"sv), idle_loop, nullptr, 1, Process::RegisterProcess::No));
  312. s_colonel_process = &colonel_process.leak_ref();
  313. idle_thread->set_priority(THREAD_PRIORITY_MIN);
  314. idle_thread->set_name(KString::must_create("Idle Task #0"sv));
  315. set_idle_thread(idle_thread);
  316. }
  317. UNMAP_AFTER_INIT void Scheduler::set_idle_thread(Thread* idle_thread)
  318. {
  319. idle_thread->set_idle_thread();
  320. Processor::current().set_idle_thread(*idle_thread);
  321. Processor::set_current_thread(*idle_thread);
  322. }
  323. UNMAP_AFTER_INIT Thread* Scheduler::create_ap_idle_thread(u32 cpu)
  324. {
  325. VERIFY(cpu != 0);
  326. // This function is called on the bsp, but creates an idle thread for another AP
  327. VERIFY(Processor::is_bootstrap_processor());
  328. VERIFY(s_colonel_process);
  329. Thread* idle_thread = s_colonel_process->create_kernel_thread(idle_loop, nullptr, THREAD_PRIORITY_MIN, MUST(KString::formatted("idle thread #{}", cpu)), 1 << cpu, false);
  330. VERIFY(idle_thread);
  331. return idle_thread;
  332. }
  333. void Scheduler::add_time_scheduled(u64 time_to_add, bool is_kernel)
  334. {
  335. g_total_time_scheduled.with([&](auto& total_time_scheduled) {
  336. total_time_scheduled.total += time_to_add;
  337. if (is_kernel)
  338. total_time_scheduled.total_kernel += time_to_add;
  339. });
  340. }
  341. void Scheduler::timer_tick(RegisterState const& regs)
  342. {
  343. VERIFY_INTERRUPTS_DISABLED();
  344. VERIFY(Processor::current_in_irq());
  345. auto* current_thread = Processor::current_thread();
  346. if (!current_thread)
  347. return;
  348. // Sanity checks
  349. VERIFY(current_thread->current_trap());
  350. VERIFY(current_thread->current_trap()->regs == &regs);
  351. if (current_thread->process().is_kernel_process()) {
  352. // Because the previous mode when entering/exiting kernel threads never changes
  353. // we never update the time scheduled. So we need to update it manually on the
  354. // timer interrupt
  355. current_thread->update_time_scheduled(TimeManagement::scheduler_current_time(), true, false);
  356. }
  357. if (current_thread->previous_mode() == ExecutionMode::User && current_thread->should_die() && !current_thread->is_blocked()) {
  358. SpinlockLocker scheduler_lock(g_scheduler_lock);
  359. dbgln_if(SCHEDULER_DEBUG, "Scheduler[{}]: Terminating user mode thread {}", Processor::current_id(), *current_thread);
  360. current_thread->set_state(Thread::State::Dying);
  361. Processor::current().invoke_scheduler_async();
  362. return;
  363. }
  364. if (current_thread->tick())
  365. return;
  366. if (!current_thread->is_idle_thread() && !peek_next_runnable_thread()) {
  367. // If no other thread is ready to be scheduled we don't need to
  368. // switch to the idle thread. Just give the current thread another
  369. // time slice and let it run!
  370. current_thread->set_ticks_left(time_slice_for(*current_thread));
  371. current_thread->did_schedule();
  372. dbgln_if(SCHEDULER_DEBUG, "Scheduler[{}]: No other threads ready, give {} another timeslice", Processor::current_id(), *current_thread);
  373. return;
  374. }
  375. VERIFY_INTERRUPTS_DISABLED();
  376. VERIFY(Processor::current_in_irq());
  377. Processor::current().invoke_scheduler_async();
  378. }
  379. void Scheduler::invoke_async()
  380. {
  381. VERIFY_INTERRUPTS_DISABLED();
  382. VERIFY(!Processor::current_in_irq());
  383. // Since this function is called when leaving critical sections (such
  384. // as a Spinlock), we need to check if we're not already doing this
  385. // to prevent recursion
  386. if (!Processor::current_in_scheduler())
  387. pick_next();
  388. }
  389. void Scheduler::notify_finalizer()
  390. {
  391. if (!g_finalizer_has_work.exchange(true, AK::MemoryOrder::memory_order_acq_rel))
  392. g_finalizer_wait_queue->wake_all();
  393. }
  394. void Scheduler::idle_loop(void*)
  395. {
  396. auto& proc = Processor::current();
  397. dbgln("Scheduler[{}]: idle loop running", proc.id());
  398. VERIFY(Processor::are_interrupts_enabled());
  399. for (;;) {
  400. proc.idle_begin();
  401. proc.wait_for_interrupt();
  402. proc.idle_end();
  403. VERIFY_INTERRUPTS_ENABLED();
  404. yield();
  405. }
  406. }
  407. void Scheduler::dump_scheduler_state(bool with_stack_traces)
  408. {
  409. dump_thread_list(with_stack_traces);
  410. }
  411. bool Scheduler::is_initialized()
  412. {
  413. // The scheduler is initialized iff the idle thread exists
  414. return Processor::idle_thread() != nullptr;
  415. }
  416. TotalTimeScheduled Scheduler::get_total_time_scheduled()
  417. {
  418. return g_total_time_scheduled.with([&](auto& total_time_scheduled) { return total_time_scheduled; });
  419. }
  420. void dump_thread_list(bool with_stack_traces)
  421. {
  422. dbgln("Scheduler thread list for processor {}:", Processor::current_id());
  423. auto get_eip = [](Thread& thread) -> u32 {
  424. if (!thread.current_trap())
  425. return thread.regs().ip();
  426. return thread.get_register_dump_from_stack().ip();
  427. };
  428. Thread::for_each([&](Thread& thread) {
  429. auto color = thread.process().is_kernel_process() ? "\x1b[34;1m"sv : "\x1b[33;1m"sv;
  430. switch (thread.state()) {
  431. case Thread::State::Dying:
  432. dmesgln(" {}{:30}\x1b[0m @ {:08x} is {:14} (Finalizable: {}, nsched: {})",
  433. color,
  434. thread,
  435. get_eip(thread),
  436. thread.state_string(),
  437. thread.is_finalizable(),
  438. thread.times_scheduled());
  439. break;
  440. default:
  441. dmesgln(" {}{:30}\x1b[0m @ {:08x} is {:14} (Pr:{:2}, nsched: {})",
  442. color,
  443. thread,
  444. get_eip(thread),
  445. thread.state_string(),
  446. thread.priority(),
  447. thread.times_scheduled());
  448. break;
  449. }
  450. if (thread.state() == Thread::State::Blocked && thread.blocking_mutex()) {
  451. dmesgln(" Blocking on Mutex {:#x} ({})", thread.blocking_mutex(), thread.blocking_mutex()->name());
  452. }
  453. if (thread.state() == Thread::State::Blocked && thread.blocker()) {
  454. dmesgln(" Blocking on Blocker {:#x}", thread.blocker());
  455. }
  456. #if LOCK_DEBUG
  457. thread.for_each_held_lock([](auto const& entry) {
  458. dmesgln(" Holding lock {:#x} ({}) at {}", entry.lock, entry.lock->name(), entry.lock_location);
  459. });
  460. #endif
  461. if (with_stack_traces) {
  462. auto trace_or_error = thread.backtrace();
  463. if (!trace_or_error.is_error()) {
  464. auto trace = trace_or_error.release_value();
  465. dbgln("Backtrace:");
  466. kernelputstr(trace->characters(), trace->length());
  467. }
  468. }
  469. return IterationDecision::Continue;
  470. });
  471. }
  472. }