Scheduler.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561
  1. /*
  2. * Copyright (c) 2018-2022, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/BuiltinWrappers.h>
  7. #include <AK/ScopeGuard.h>
  8. #include <AK/Singleton.h>
  9. #include <AK/Time.h>
  10. #include <Kernel/Arch/TrapFrame.h>
  11. #include <Kernel/Debug.h>
  12. #include <Kernel/Interrupts/InterruptDisabler.h>
  13. #include <Kernel/Library/Panic.h>
  14. #include <Kernel/Sections.h>
  15. #include <Kernel/Tasks/PerformanceManager.h>
  16. #include <Kernel/Tasks/Process.h>
  17. #include <Kernel/Tasks/Scheduler.h>
  18. #include <Kernel/Time/TimeManagement.h>
  19. #include <Kernel/kstdio.h>
  20. namespace Kernel {
  21. RecursiveSpinlock<LockRank::None> g_scheduler_lock {};
  22. static u32 time_slice_for(Thread const& thread)
  23. {
  24. // One time slice unit == 4ms (assuming 250 ticks/second)
  25. if (thread.is_idle_thread())
  26. return 1;
  27. return 2;
  28. }
  29. READONLY_AFTER_INIT Thread* g_finalizer;
  30. READONLY_AFTER_INIT WaitQueue* g_finalizer_wait_queue;
  31. Atomic<bool> g_finalizer_has_work { false };
  32. READONLY_AFTER_INIT static Process* s_colonel_process;
  33. struct ThreadReadyQueue {
  34. IntrusiveList<&Thread::m_ready_queue_node> thread_list;
  35. };
  36. struct ThreadReadyQueues {
  37. u32 mask {};
  38. static constexpr size_t count = sizeof(mask) * 8;
  39. Array<ThreadReadyQueue, count> queues;
  40. };
  41. static Singleton<SpinlockProtected<ThreadReadyQueues, LockRank::None>> g_ready_queues;
  42. static SpinlockProtected<TotalTimeScheduled, LockRank::None> g_total_time_scheduled {};
  43. static void dump_thread_list(bool = false);
  44. static inline u32 thread_priority_to_priority_index(u32 thread_priority)
  45. {
  46. // Converts the priority in the range of THREAD_PRIORITY_MIN...THREAD_PRIORITY_MAX
  47. // to a index into g_ready_queues where 0 is the highest priority bucket
  48. VERIFY(thread_priority >= THREAD_PRIORITY_MIN && thread_priority <= THREAD_PRIORITY_MAX);
  49. constexpr u32 thread_priority_count = THREAD_PRIORITY_MAX - THREAD_PRIORITY_MIN + 1;
  50. static_assert(thread_priority_count > 0);
  51. auto priority_bucket = ((thread_priority_count - (thread_priority - THREAD_PRIORITY_MIN)) / thread_priority_count) * (ThreadReadyQueues::count - 1);
  52. VERIFY(priority_bucket < ThreadReadyQueues::count);
  53. return priority_bucket;
  54. }
  55. Thread& Scheduler::pull_next_runnable_thread()
  56. {
  57. auto affinity_mask = 1u << Processor::current_id();
  58. return g_ready_queues->with([&](auto& ready_queues) -> Thread& {
  59. auto priority_mask = ready_queues.mask;
  60. while (priority_mask != 0) {
  61. auto priority = bit_scan_forward(priority_mask);
  62. VERIFY(priority > 0);
  63. auto& ready_queue = ready_queues.queues[--priority];
  64. for (auto& thread : ready_queue.thread_list) {
  65. VERIFY(thread.m_runnable_priority == (int)priority);
  66. if (thread.is_active())
  67. continue;
  68. if (!(thread.affinity() & affinity_mask))
  69. continue;
  70. thread.m_runnable_priority = -1;
  71. ready_queue.thread_list.remove(thread);
  72. if (ready_queue.thread_list.is_empty())
  73. ready_queues.mask &= ~(1u << priority);
  74. // Mark it as active because we are using this thread. This is similar
  75. // to comparing it with Processor::current_thread, but when there are
  76. // multiple processors there's no easy way to check whether the thread
  77. // is actually still needed. This prevents accidental finalization when
  78. // a thread is no longer in Running state, but running on another core.
  79. // We need to mark it active here so that this thread won't be
  80. // scheduled on another core if it were to be queued before actually
  81. // switching to it.
  82. // FIXME: Figure out a better way maybe?
  83. thread.set_active(true);
  84. return thread;
  85. }
  86. priority_mask &= ~(1u << priority);
  87. }
  88. auto* idle_thread = Processor::idle_thread();
  89. idle_thread->set_active(true);
  90. return *idle_thread;
  91. });
  92. }
  93. Thread* Scheduler::peek_next_runnable_thread()
  94. {
  95. auto affinity_mask = 1u << Processor::current_id();
  96. return g_ready_queues->with([&](auto& ready_queues) -> Thread* {
  97. auto priority_mask = ready_queues.mask;
  98. while (priority_mask != 0) {
  99. auto priority = bit_scan_forward(priority_mask);
  100. VERIFY(priority > 0);
  101. auto& ready_queue = ready_queues.queues[--priority];
  102. for (auto& thread : ready_queue.thread_list) {
  103. VERIFY(thread.m_runnable_priority == (int)priority);
  104. if (thread.is_active())
  105. continue;
  106. if (!(thread.affinity() & affinity_mask))
  107. continue;
  108. return &thread;
  109. }
  110. priority_mask &= ~(1u << priority);
  111. }
  112. // Unlike in pull_next_runnable_thread() we don't want to fall back to
  113. // the idle thread. We just want to see if we have any other thread ready
  114. // to be scheduled.
  115. return nullptr;
  116. });
  117. }
  118. bool Scheduler::dequeue_runnable_thread(Thread& thread, bool check_affinity)
  119. {
  120. if (thread.is_idle_thread())
  121. return true;
  122. return g_ready_queues->with([&](auto& ready_queues) {
  123. auto priority = thread.m_runnable_priority;
  124. if (priority < 0) {
  125. VERIFY(!thread.m_ready_queue_node.is_in_list());
  126. return false;
  127. }
  128. if (check_affinity && !(thread.affinity() & (1 << Processor::current_id())))
  129. return false;
  130. VERIFY(ready_queues.mask & (1u << priority));
  131. auto& ready_queue = ready_queues.queues[priority];
  132. thread.m_runnable_priority = -1;
  133. ready_queue.thread_list.remove(thread);
  134. if (ready_queue.thread_list.is_empty())
  135. ready_queues.mask &= ~(1u << priority);
  136. return true;
  137. });
  138. }
  139. void Scheduler::enqueue_runnable_thread(Thread& thread)
  140. {
  141. VERIFY(g_scheduler_lock.is_locked_by_current_processor());
  142. if (thread.is_idle_thread())
  143. return;
  144. auto priority = thread_priority_to_priority_index(thread.priority());
  145. g_ready_queues->with([&](auto& ready_queues) {
  146. VERIFY(thread.m_runnable_priority < 0);
  147. thread.m_runnable_priority = (int)priority;
  148. VERIFY(!thread.m_ready_queue_node.is_in_list());
  149. auto& ready_queue = ready_queues.queues[priority];
  150. bool was_empty = ready_queue.thread_list.is_empty();
  151. ready_queue.thread_list.append(thread);
  152. if (was_empty)
  153. ready_queues.mask |= (1u << priority);
  154. });
  155. }
  156. UNMAP_AFTER_INIT void Scheduler::start()
  157. {
  158. VERIFY_INTERRUPTS_DISABLED();
  159. // We need to acquire our scheduler lock, which will be released
  160. // by the idle thread once control transferred there
  161. g_scheduler_lock.lock();
  162. auto& processor = Processor::current();
  163. VERIFY(processor.is_initialized());
  164. auto& idle_thread = *Processor::idle_thread();
  165. VERIFY(processor.current_thread() == &idle_thread);
  166. idle_thread.set_ticks_left(time_slice_for(idle_thread));
  167. idle_thread.did_schedule();
  168. idle_thread.set_initialized(true);
  169. processor.init_context(idle_thread, false);
  170. idle_thread.set_state(Thread::State::Running);
  171. VERIFY(idle_thread.affinity() == (1u << processor.id()));
  172. processor.initialize_context_switching(idle_thread);
  173. VERIFY_NOT_REACHED();
  174. }
  175. void Scheduler::pick_next()
  176. {
  177. VERIFY_INTERRUPTS_DISABLED();
  178. // Set the in_scheduler flag before acquiring the spinlock. This
  179. // prevents a recursive call into Scheduler::invoke_async upon
  180. // leaving the scheduler lock.
  181. ScopedCritical critical;
  182. Processor::set_current_in_scheduler(true);
  183. ScopeGuard guard(
  184. []() {
  185. // We may be on a different processor after we got switched
  186. // back to this thread!
  187. VERIFY(Processor::current_in_scheduler());
  188. Processor::set_current_in_scheduler(false);
  189. });
  190. SpinlockLocker lock(g_scheduler_lock);
  191. if constexpr (SCHEDULER_RUNNABLE_DEBUG) {
  192. dump_thread_list();
  193. }
  194. auto& thread_to_schedule = pull_next_runnable_thread();
  195. if constexpr (SCHEDULER_DEBUG) {
  196. dbgln("Scheduler[{}]: Switch to {} @ {:p}",
  197. Processor::current_id(),
  198. thread_to_schedule,
  199. thread_to_schedule.regs().ip());
  200. }
  201. // We need to leave our first critical section before switching context,
  202. // but since we're still holding the scheduler lock we're still in a critical section
  203. critical.leave();
  204. thread_to_schedule.set_ticks_left(time_slice_for(thread_to_schedule));
  205. context_switch(&thread_to_schedule);
  206. }
  207. void Scheduler::yield()
  208. {
  209. InterruptDisabler disabler;
  210. auto const* current_thread = Thread::current();
  211. dbgln_if(SCHEDULER_DEBUG, "Scheduler[{}]: yielding thread {} in_irq={}", Processor::current_id(), *current_thread, Processor::current_in_irq());
  212. VERIFY(current_thread != nullptr);
  213. if (Processor::current_in_irq() || Processor::in_critical()) {
  214. // If we're handling an IRQ we can't switch context, or we're in
  215. // a critical section where we don't want to switch contexts, then
  216. // delay until exiting the trap or critical section
  217. Processor::current().invoke_scheduler_async();
  218. return;
  219. }
  220. Scheduler::pick_next();
  221. }
  222. void Scheduler::context_switch(Thread* thread)
  223. {
  224. thread->did_schedule();
  225. auto* from_thread = Thread::current();
  226. VERIFY(from_thread);
  227. if (from_thread == thread)
  228. return;
  229. // If the last process hasn't blocked (still marked as running),
  230. // mark it as runnable for the next round.
  231. if (from_thread->state() == Thread::State::Running)
  232. from_thread->set_state(Thread::State::Runnable);
  233. #ifdef LOG_EVERY_CONTEXT_SWITCH
  234. auto const msg = "Scheduler[{}]: {} -> {} [prio={}] {:p}";
  235. dbgln(msg,
  236. Processor::current_id(), from_thread->tid().value(),
  237. thread->tid().value(), thread->priority(), thread->regs().ip());
  238. #endif
  239. auto& proc = Processor::current();
  240. if (!thread->is_initialized()) {
  241. proc.init_context(*thread, false);
  242. thread->set_initialized(true);
  243. }
  244. thread->set_state(Thread::State::Running);
  245. PerformanceManager::add_context_switch_perf_event(*from_thread, *thread);
  246. proc.switch_context(from_thread, thread);
  247. // NOTE: from_thread at this point reflects the thread we were
  248. // switched from, and thread reflects Thread::current()
  249. enter_current(*from_thread);
  250. VERIFY(thread == Thread::current());
  251. {
  252. SpinlockLocker lock(thread->get_lock());
  253. thread->dispatch_one_pending_signal();
  254. }
  255. }
  256. void Scheduler::enter_current(Thread& prev_thread)
  257. {
  258. VERIFY(g_scheduler_lock.is_locked_by_current_processor());
  259. // We already recorded the scheduled time when entering the trap, so this merely accounts for the kernel time since then
  260. auto scheduler_time = TimeManagement::scheduler_current_time();
  261. prev_thread.update_time_scheduled(scheduler_time, true, true);
  262. auto* current_thread = Thread::current();
  263. current_thread->update_time_scheduled(scheduler_time, true, false);
  264. // NOTE: When doing an exec(), we will context switch from and to the same thread!
  265. // In that case, we must not mark the previous thread as inactive.
  266. if (&prev_thread != current_thread)
  267. prev_thread.set_active(false);
  268. if (prev_thread.state() == Thread::State::Dying) {
  269. // If the thread we switched from is marked as dying, then notify
  270. // the finalizer. Note that as soon as we leave the scheduler lock
  271. // the finalizer may free from_thread!
  272. notify_finalizer();
  273. }
  274. }
  275. void Scheduler::leave_on_first_switch(InterruptsState previous_interrupts_state)
  276. {
  277. // This is called when a thread is switched into for the first time.
  278. // At this point, enter_current has already be called, but because
  279. // Scheduler::context_switch is not in the call stack we need to
  280. // clean up and release locks manually here
  281. g_scheduler_lock.unlock(previous_interrupts_state);
  282. VERIFY(Processor::current_in_scheduler());
  283. Processor::set_current_in_scheduler(false);
  284. }
  285. void Scheduler::prepare_after_exec()
  286. {
  287. // This is called after exec() when doing a context "switch" into
  288. // the new process. This is called from Processor::assume_context
  289. VERIFY(g_scheduler_lock.is_locked_by_current_processor());
  290. VERIFY(!Processor::current_in_scheduler());
  291. Processor::set_current_in_scheduler(true);
  292. }
  293. void Scheduler::prepare_for_idle_loop()
  294. {
  295. // This is called when the CPU finished setting up the idle loop
  296. // and is about to run it. We need to acquire the scheduler lock
  297. VERIFY(!g_scheduler_lock.is_locked_by_current_processor());
  298. g_scheduler_lock.lock();
  299. VERIFY(!Processor::current_in_scheduler());
  300. Processor::set_current_in_scheduler(true);
  301. }
  302. Process* Scheduler::colonel()
  303. {
  304. VERIFY(s_colonel_process);
  305. return s_colonel_process;
  306. }
  307. UNMAP_AFTER_INIT void Scheduler::initialize()
  308. {
  309. VERIFY(Processor::is_initialized()); // sanity check
  310. VERIFY(TimeManagement::is_initialized());
  311. g_finalizer_wait_queue = new WaitQueue;
  312. g_finalizer_has_work.store(false, AK::MemoryOrder::memory_order_release);
  313. auto [colonel_process, idle_thread] = MUST(Process::create_kernel_process("colonel"sv, idle_loop, nullptr, 1, Process::RegisterProcess::No));
  314. s_colonel_process = &colonel_process.leak_ref();
  315. idle_thread->set_priority(THREAD_PRIORITY_MIN);
  316. idle_thread->set_name("Idle Task #0"sv);
  317. set_idle_thread(idle_thread);
  318. }
  319. UNMAP_AFTER_INIT void Scheduler::set_idle_thread(Thread* idle_thread)
  320. {
  321. idle_thread->set_idle_thread();
  322. Processor::current().set_idle_thread(*idle_thread);
  323. Processor::set_current_thread(*idle_thread);
  324. }
  325. UNMAP_AFTER_INIT Thread* Scheduler::create_ap_idle_thread(u32 cpu)
  326. {
  327. VERIFY(cpu != 0);
  328. // This function is called on the bsp, but creates an idle thread for another AP
  329. VERIFY(Processor::is_bootstrap_processor());
  330. VERIFY(s_colonel_process);
  331. Thread* idle_thread = MUST(s_colonel_process->create_kernel_thread(idle_loop, nullptr, THREAD_PRIORITY_MIN, MUST(KString::formatted("idle thread #{}", cpu))->view(), 1 << cpu, false));
  332. VERIFY(idle_thread);
  333. return idle_thread;
  334. }
  335. void Scheduler::add_time_scheduled(u64 time_to_add, bool is_kernel)
  336. {
  337. g_total_time_scheduled.with([&](auto& total_time_scheduled) {
  338. total_time_scheduled.total += time_to_add;
  339. if (is_kernel)
  340. total_time_scheduled.total_kernel += time_to_add;
  341. });
  342. }
  343. void Scheduler::timer_tick(RegisterState const& regs)
  344. {
  345. VERIFY_INTERRUPTS_DISABLED();
  346. VERIFY(Processor::current_in_irq());
  347. auto* current_thread = Processor::current_thread();
  348. if (!current_thread)
  349. return;
  350. // Sanity checks
  351. VERIFY(current_thread->current_trap());
  352. VERIFY(current_thread->current_trap()->regs == &regs);
  353. if (current_thread->process().is_kernel_process()) {
  354. // Because the previous mode when entering/exiting kernel threads never changes
  355. // we never update the time scheduled. So we need to update it manually on the
  356. // timer interrupt
  357. current_thread->update_time_scheduled(TimeManagement::scheduler_current_time(), true, false);
  358. }
  359. if (current_thread->previous_mode() == ExecutionMode::User && current_thread->should_die() && !current_thread->is_blocked()) {
  360. SpinlockLocker scheduler_lock(g_scheduler_lock);
  361. dbgln_if(SCHEDULER_DEBUG, "Scheduler[{}]: Terminating user mode thread {}", Processor::current_id(), *current_thread);
  362. current_thread->set_state(Thread::State::Dying);
  363. Processor::current().invoke_scheduler_async();
  364. return;
  365. }
  366. if (current_thread->tick())
  367. return;
  368. if (!current_thread->is_idle_thread() && !peek_next_runnable_thread()) {
  369. // If no other thread is ready to be scheduled we don't need to
  370. // switch to the idle thread. Just give the current thread another
  371. // time slice and let it run!
  372. current_thread->set_ticks_left(time_slice_for(*current_thread));
  373. current_thread->did_schedule();
  374. dbgln_if(SCHEDULER_DEBUG, "Scheduler[{}]: No other threads ready, give {} another timeslice", Processor::current_id(), *current_thread);
  375. return;
  376. }
  377. VERIFY_INTERRUPTS_DISABLED();
  378. VERIFY(Processor::current_in_irq());
  379. Processor::current().invoke_scheduler_async();
  380. }
  381. void Scheduler::invoke_async()
  382. {
  383. VERIFY_INTERRUPTS_DISABLED();
  384. VERIFY(!Processor::current_in_irq());
  385. // Since this function is called when leaving critical sections (such
  386. // as a Spinlock), we need to check if we're not already doing this
  387. // to prevent recursion
  388. if (!Processor::current_in_scheduler())
  389. pick_next();
  390. }
  391. void Scheduler::notify_finalizer()
  392. {
  393. if (!g_finalizer_has_work.exchange(true, AK::MemoryOrder::memory_order_acq_rel))
  394. g_finalizer_wait_queue->wake_all();
  395. }
  396. void Scheduler::idle_loop(void*)
  397. {
  398. auto& proc = Processor::current();
  399. dbgln("Scheduler[{}]: idle loop running", proc.id());
  400. VERIFY(Processor::are_interrupts_enabled());
  401. for (;;) {
  402. proc.idle_begin();
  403. proc.wait_for_interrupt();
  404. proc.idle_end();
  405. VERIFY_INTERRUPTS_ENABLED();
  406. yield();
  407. }
  408. }
  409. void Scheduler::dump_scheduler_state(bool with_stack_traces)
  410. {
  411. dump_thread_list(with_stack_traces);
  412. }
  413. bool Scheduler::is_initialized()
  414. {
  415. // The scheduler is initialized iff the idle thread exists
  416. return Processor::idle_thread() != nullptr;
  417. }
  418. TotalTimeScheduled Scheduler::get_total_time_scheduled()
  419. {
  420. return g_total_time_scheduled.with([&](auto& total_time_scheduled) { return total_time_scheduled; });
  421. }
  422. void dump_thread_list(bool with_stack_traces)
  423. {
  424. dbgln("Scheduler thread list for processor {}:", Processor::current_id());
  425. auto get_eip = [](Thread& thread) -> u32 {
  426. if (!thread.current_trap())
  427. return thread.regs().ip();
  428. return thread.get_register_dump_from_stack().ip();
  429. };
  430. Thread::for_each_ignoring_jails([&](Thread& thread) {
  431. auto color = thread.process().is_kernel_process() ? "\x1b[34;1m"sv : "\x1b[33;1m"sv;
  432. switch (thread.state()) {
  433. case Thread::State::Dying:
  434. dmesgln(" {}{:30}\x1b[0m @ {:08x} is {:14} (Finalizable: {}, nsched: {})",
  435. color,
  436. thread,
  437. get_eip(thread),
  438. thread.state_string(),
  439. thread.is_finalizable(),
  440. thread.times_scheduled());
  441. break;
  442. default:
  443. dmesgln(" {}{:30}\x1b[0m @ {:08x} is {:14} (Pr:{:2}, nsched: {})",
  444. color,
  445. thread,
  446. get_eip(thread),
  447. thread.state_string(),
  448. thread.priority(),
  449. thread.times_scheduled());
  450. break;
  451. }
  452. if (thread.state() == Thread::State::Blocked && thread.blocking_mutex()) {
  453. dmesgln(" Blocking on Mutex {:#x} ({})", thread.blocking_mutex(), thread.blocking_mutex()->name());
  454. }
  455. if (thread.state() == Thread::State::Blocked && thread.blocker()) {
  456. dmesgln(" Blocking on Blocker {:#x}", thread.blocker());
  457. }
  458. #if LOCK_DEBUG
  459. thread.for_each_held_lock([](auto const& entry) {
  460. dmesgln(" Holding lock {:#x} ({}) at {}", entry.lock, entry.lock->name(), entry.lock_location);
  461. });
  462. #endif
  463. if (with_stack_traces) {
  464. auto trace_or_error = thread.backtrace();
  465. if (!trace_or_error.is_error()) {
  466. auto trace = trace_or_error.release_value();
  467. dbgln("Backtrace:");
  468. kernelputstr(trace->characters(), trace->length());
  469. }
  470. }
  471. return IterationDecision::Continue;
  472. });
  473. }
  474. }