Scheduler.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596
  1. /*
  2. * Copyright (c) 2018-2021, Andreas Kling <kling@serenityos.org>
  3. *
  4. * SPDX-License-Identifier: BSD-2-Clause
  5. */
  6. #include <AK/BuiltinWrappers.h>
  7. #include <AK/ScopeGuard.h>
  8. #include <AK/Singleton.h>
  9. #include <AK/Time.h>
  10. #include <Kernel/Arch/InterruptDisabler.h>
  11. #include <Kernel/Arch/x86/TrapFrame.h>
  12. #include <Kernel/Debug.h>
  13. #include <Kernel/Panic.h>
  14. #include <Kernel/PerformanceManager.h>
  15. #include <Kernel/Process.h>
  16. #include <Kernel/RTC.h>
  17. #include <Kernel/Scheduler.h>
  18. #include <Kernel/Sections.h>
  19. #include <Kernel/Time/TimeManagement.h>
  20. #include <Kernel/kstdio.h>
  21. // Remove this once SMP is stable and can be enabled by default
  22. #define SCHEDULE_ON_ALL_PROCESSORS 0
  23. namespace Kernel {
  24. RecursiveSpinlock g_scheduler_lock;
  25. static u32 time_slice_for(Thread const& thread)
  26. {
  27. // One time slice unit == 4ms (assuming 250 ticks/second)
  28. if (thread.is_idle_thread())
  29. return 1;
  30. return 2;
  31. }
  32. READONLY_AFTER_INIT Thread* g_finalizer;
  33. READONLY_AFTER_INIT WaitQueue* g_finalizer_wait_queue;
  34. Atomic<bool> g_finalizer_has_work { false };
  35. READONLY_AFTER_INIT static Process* s_colonel_process;
  36. struct ThreadReadyQueue {
  37. IntrusiveList<&Thread::m_ready_queue_node> thread_list;
  38. };
  39. struct ThreadReadyQueues {
  40. u32 mask {};
  41. static constexpr size_t count = sizeof(mask) * 8;
  42. Array<ThreadReadyQueue, count> queues;
  43. };
  44. static Singleton<SpinlockProtected<ThreadReadyQueues>> g_ready_queues;
  45. static SpinlockProtected<TotalTimeScheduled> g_total_time_scheduled;
  46. // The Scheduler::current_time function provides a current time for scheduling purposes,
  47. // which may not necessarily relate to wall time
  48. u64 (*Scheduler::current_time)();
  49. static void dump_thread_list(bool = false);
  50. static inline u32 thread_priority_to_priority_index(u32 thread_priority)
  51. {
  52. // Converts the priority in the range of THREAD_PRIORITY_MIN...THREAD_PRIORITY_MAX
  53. // to a index into g_ready_queues where 0 is the highest priority bucket
  54. VERIFY(thread_priority >= THREAD_PRIORITY_MIN && thread_priority <= THREAD_PRIORITY_MAX);
  55. constexpr u32 thread_priority_count = THREAD_PRIORITY_MAX - THREAD_PRIORITY_MIN + 1;
  56. static_assert(thread_priority_count > 0);
  57. auto priority_bucket = ((thread_priority_count - (thread_priority - THREAD_PRIORITY_MIN)) / thread_priority_count) * (ThreadReadyQueues::count - 1);
  58. VERIFY(priority_bucket < ThreadReadyQueues::count);
  59. return priority_bucket;
  60. }
  61. Thread& Scheduler::pull_next_runnable_thread()
  62. {
  63. auto affinity_mask = 1u << Processor::current_id();
  64. return g_ready_queues->with([&](auto& ready_queues) -> Thread& {
  65. auto priority_mask = ready_queues.mask;
  66. while (priority_mask != 0) {
  67. auto priority = bit_scan_forward(priority_mask);
  68. VERIFY(priority > 0);
  69. auto& ready_queue = ready_queues.queues[--priority];
  70. for (auto& thread : ready_queue.thread_list) {
  71. VERIFY(thread.m_runnable_priority == (int)priority);
  72. if (thread.is_active())
  73. continue;
  74. if (!(thread.affinity() & affinity_mask))
  75. continue;
  76. thread.m_runnable_priority = -1;
  77. ready_queue.thread_list.remove(thread);
  78. if (ready_queue.thread_list.is_empty())
  79. ready_queues.mask &= ~(1u << priority);
  80. // Mark it as active because we are using this thread. This is similar
  81. // to comparing it with Processor::current_thread, but when there are
  82. // multiple processors there's no easy way to check whether the thread
  83. // is actually still needed. This prevents accidental finalization when
  84. // a thread is no longer in Running state, but running on another core.
  85. // We need to mark it active here so that this thread won't be
  86. // scheduled on another core if it were to be queued before actually
  87. // switching to it.
  88. // FIXME: Figure out a better way maybe?
  89. thread.set_active(true);
  90. return thread;
  91. }
  92. priority_mask &= ~(1u << priority);
  93. }
  94. return *Processor::idle_thread();
  95. });
  96. }
  97. Thread* Scheduler::peek_next_runnable_thread()
  98. {
  99. auto affinity_mask = 1u << Processor::current_id();
  100. return g_ready_queues->with([&](auto& ready_queues) -> Thread* {
  101. auto priority_mask = ready_queues.mask;
  102. while (priority_mask != 0) {
  103. auto priority = bit_scan_forward(priority_mask);
  104. VERIFY(priority > 0);
  105. auto& ready_queue = ready_queues.queues[--priority];
  106. for (auto& thread : ready_queue.thread_list) {
  107. VERIFY(thread.m_runnable_priority == (int)priority);
  108. if (thread.is_active())
  109. continue;
  110. if (!(thread.affinity() & affinity_mask))
  111. continue;
  112. return &thread;
  113. }
  114. priority_mask &= ~(1u << priority);
  115. }
  116. // Unlike in pull_next_runnable_thread() we don't want to fall back to
  117. // the idle thread. We just want to see if we have any other thread ready
  118. // to be scheduled.
  119. return nullptr;
  120. });
  121. }
  122. bool Scheduler::dequeue_runnable_thread(Thread& thread, bool check_affinity)
  123. {
  124. if (thread.is_idle_thread())
  125. return true;
  126. return g_ready_queues->with([&](auto& ready_queues) {
  127. auto priority = thread.m_runnable_priority;
  128. if (priority < 0) {
  129. VERIFY(!thread.m_ready_queue_node.is_in_list());
  130. return false;
  131. }
  132. if (check_affinity && !(thread.affinity() & (1 << Processor::current_id())))
  133. return false;
  134. VERIFY(ready_queues.mask & (1u << priority));
  135. auto& ready_queue = ready_queues.queues[priority];
  136. thread.m_runnable_priority = -1;
  137. ready_queue.thread_list.remove(thread);
  138. if (ready_queue.thread_list.is_empty())
  139. ready_queues.mask &= ~(1u << priority);
  140. return true;
  141. });
  142. }
  143. void Scheduler::enqueue_runnable_thread(Thread& thread)
  144. {
  145. VERIFY(g_scheduler_lock.is_locked_by_current_processor());
  146. if (thread.is_idle_thread())
  147. return;
  148. auto priority = thread_priority_to_priority_index(thread.priority());
  149. g_ready_queues->with([&](auto& ready_queues) {
  150. VERIFY(thread.m_runnable_priority < 0);
  151. thread.m_runnable_priority = (int)priority;
  152. VERIFY(!thread.m_ready_queue_node.is_in_list());
  153. auto& ready_queue = ready_queues.queues[priority];
  154. bool was_empty = ready_queue.thread_list.is_empty();
  155. ready_queue.thread_list.append(thread);
  156. if (was_empty)
  157. ready_queues.mask |= (1u << priority);
  158. });
  159. }
  160. UNMAP_AFTER_INIT void Scheduler::start()
  161. {
  162. VERIFY_INTERRUPTS_DISABLED();
  163. // We need to acquire our scheduler lock, which will be released
  164. // by the idle thread once control transferred there
  165. g_scheduler_lock.lock();
  166. auto& processor = Processor::current();
  167. VERIFY(processor.is_initialized());
  168. auto& idle_thread = *Processor::idle_thread();
  169. VERIFY(processor.current_thread() == &idle_thread);
  170. idle_thread.set_ticks_left(time_slice_for(idle_thread));
  171. idle_thread.did_schedule();
  172. idle_thread.set_initialized(true);
  173. processor.init_context(idle_thread, false);
  174. idle_thread.set_state(Thread::State::Running);
  175. VERIFY(idle_thread.affinity() == (1u << processor.id()));
  176. processor.initialize_context_switching(idle_thread);
  177. VERIFY_NOT_REACHED();
  178. }
  179. void Scheduler::pick_next()
  180. {
  181. VERIFY_INTERRUPTS_DISABLED();
  182. // Set the in_scheduler flag before acquiring the spinlock. This
  183. // prevents a recursive call into Scheduler::invoke_async upon
  184. // leaving the scheduler lock.
  185. ScopedCritical critical;
  186. Processor::set_current_in_scheduler(true);
  187. ScopeGuard guard(
  188. []() {
  189. // We may be on a different processor after we got switched
  190. // back to this thread!
  191. VERIFY(Processor::current_in_scheduler());
  192. Processor::set_current_in_scheduler(false);
  193. });
  194. SpinlockLocker lock(g_scheduler_lock);
  195. if constexpr (SCHEDULER_RUNNABLE_DEBUG) {
  196. dump_thread_list();
  197. }
  198. auto& thread_to_schedule = pull_next_runnable_thread();
  199. if constexpr (SCHEDULER_DEBUG) {
  200. dbgln("Scheduler[{}]: Switch to {} @ {:#04x}:{:p}",
  201. Processor::current_id(),
  202. thread_to_schedule,
  203. thread_to_schedule.regs().cs, thread_to_schedule.regs().ip());
  204. }
  205. // We need to leave our first critical section before switching context,
  206. // but since we're still holding the scheduler lock we're still in a critical section
  207. critical.leave();
  208. thread_to_schedule.set_ticks_left(time_slice_for(thread_to_schedule));
  209. context_switch(&thread_to_schedule);
  210. }
  211. void Scheduler::yield()
  212. {
  213. InterruptDisabler disabler;
  214. auto const* current_thread = Thread::current();
  215. dbgln_if(SCHEDULER_DEBUG, "Scheduler[{}]: yielding thread {} in_irq={}", Processor::current_id(), *current_thread, Processor::current_in_irq());
  216. VERIFY(current_thread != nullptr);
  217. if (Processor::current_in_irq() || Processor::in_critical()) {
  218. // If we're handling an IRQ we can't switch context, or we're in
  219. // a critical section where we don't want to switch contexts, then
  220. // delay until exiting the trap or critical section
  221. Processor::current().invoke_scheduler_async();
  222. return;
  223. }
  224. Scheduler::pick_next();
  225. }
  226. void Scheduler::context_switch(Thread* thread)
  227. {
  228. if (Memory::s_mm_lock.is_locked_by_current_processor()) {
  229. PANIC("In context switch while holding Memory::s_mm_lock");
  230. }
  231. thread->did_schedule();
  232. auto* from_thread = Thread::current();
  233. VERIFY(from_thread);
  234. if (from_thread == thread)
  235. return;
  236. // If the last process hasn't blocked (still marked as running),
  237. // mark it as runnable for the next round.
  238. if (from_thread->state() == Thread::State::Running)
  239. from_thread->set_state(Thread::State::Runnable);
  240. #ifdef LOG_EVERY_CONTEXT_SWITCH
  241. auto const msg = "Scheduler[{}]: {} -> {} [prio={}] {:#04x}:{:p}";
  242. dbgln(msg,
  243. Processor::current_id(), from_thread->tid().value(),
  244. thread->tid().value(), thread->priority(), thread->regs().cs, thread->regs().ip());
  245. #endif
  246. auto& proc = Processor::current();
  247. if (!thread->is_initialized()) {
  248. proc.init_context(*thread, false);
  249. thread->set_initialized(true);
  250. }
  251. thread->set_state(Thread::State::Running);
  252. PerformanceManager::add_context_switch_perf_event(*from_thread, *thread);
  253. proc.switch_context(from_thread, thread);
  254. // NOTE: from_thread at this point reflects the thread we were
  255. // switched from, and thread reflects Thread::current()
  256. enter_current(*from_thread);
  257. VERIFY(thread == Thread::current());
  258. {
  259. SpinlockLocker lock(thread->get_lock());
  260. thread->dispatch_one_pending_signal();
  261. }
  262. }
  263. void Scheduler::enter_current(Thread& prev_thread)
  264. {
  265. VERIFY(g_scheduler_lock.is_locked_by_current_processor());
  266. // We already recorded the scheduled time when entering the trap, so this merely accounts for the kernel time since then
  267. auto scheduler_time = Scheduler::current_time();
  268. prev_thread.update_time_scheduled(scheduler_time, true, true);
  269. auto* current_thread = Thread::current();
  270. current_thread->update_time_scheduled(scheduler_time, true, false);
  271. // NOTE: When doing an exec(), we will context switch from and to the same thread!
  272. // In that case, we must not mark the previous thread as inactive.
  273. if (&prev_thread != current_thread)
  274. prev_thread.set_active(false);
  275. if (prev_thread.state() == Thread::State::Dying) {
  276. // If the thread we switched from is marked as dying, then notify
  277. // the finalizer. Note that as soon as we leave the scheduler lock
  278. // the finalizer may free from_thread!
  279. notify_finalizer();
  280. }
  281. }
  282. void Scheduler::leave_on_first_switch(u32 flags)
  283. {
  284. // This is called when a thread is switched into for the first time.
  285. // At this point, enter_current has already be called, but because
  286. // Scheduler::context_switch is not in the call stack we need to
  287. // clean up and release locks manually here
  288. g_scheduler_lock.unlock(flags);
  289. VERIFY(Processor::current_in_scheduler());
  290. Processor::set_current_in_scheduler(false);
  291. }
  292. void Scheduler::prepare_after_exec()
  293. {
  294. // This is called after exec() when doing a context "switch" into
  295. // the new process. This is called from Processor::assume_context
  296. VERIFY(g_scheduler_lock.is_locked_by_current_processor());
  297. VERIFY(!Processor::current_in_scheduler());
  298. Processor::set_current_in_scheduler(true);
  299. }
  300. void Scheduler::prepare_for_idle_loop()
  301. {
  302. // This is called when the CPU finished setting up the idle loop
  303. // and is about to run it. We need to acquire the scheduler lock
  304. VERIFY(!g_scheduler_lock.is_locked_by_current_processor());
  305. g_scheduler_lock.lock();
  306. VERIFY(!Processor::current_in_scheduler());
  307. Processor::set_current_in_scheduler(true);
  308. }
  309. Process* Scheduler::colonel()
  310. {
  311. VERIFY(s_colonel_process);
  312. return s_colonel_process;
  313. }
  314. static u64 current_time_tsc()
  315. {
  316. return read_tsc();
  317. }
  318. static u64 current_time_monotonic()
  319. {
  320. // We always need a precise timestamp here, we cannot rely on a coarse timestamp
  321. return (u64)TimeManagement::the().monotonic_time(TimePrecision::Precise).to_nanoseconds();
  322. }
  323. UNMAP_AFTER_INIT void Scheduler::initialize()
  324. {
  325. VERIFY(Processor::is_initialized()); // sanity check
  326. // Figure out a good scheduling time source
  327. if (Processor::current().has_feature(CPUFeature::TSC)) {
  328. // TODO: only use if TSC is running at a constant frequency?
  329. current_time = current_time_tsc;
  330. } else {
  331. // TODO: Using HPET is rather slow, can we use any other time source that may be faster?
  332. current_time = current_time_monotonic;
  333. }
  334. RefPtr<Thread> idle_thread;
  335. g_finalizer_wait_queue = new WaitQueue;
  336. g_finalizer_has_work.store(false, AK::MemoryOrder::memory_order_release);
  337. s_colonel_process = Process::create_kernel_process(idle_thread, KString::must_create("colonel"), idle_loop, nullptr, 1, Process::RegisterProcess::No).leak_ref();
  338. VERIFY(s_colonel_process);
  339. VERIFY(idle_thread);
  340. idle_thread->set_priority(THREAD_PRIORITY_MIN);
  341. idle_thread->set_name(KString::must_create("idle thread #0"));
  342. set_idle_thread(idle_thread);
  343. }
  344. UNMAP_AFTER_INIT void Scheduler::set_idle_thread(Thread* idle_thread)
  345. {
  346. idle_thread->set_idle_thread();
  347. Processor::current().set_idle_thread(*idle_thread);
  348. Processor::set_current_thread(*idle_thread);
  349. }
  350. UNMAP_AFTER_INIT Thread* Scheduler::create_ap_idle_thread(u32 cpu)
  351. {
  352. VERIFY(cpu != 0);
  353. // This function is called on the bsp, but creates an idle thread for another AP
  354. VERIFY(Processor::is_bootstrap_processor());
  355. VERIFY(s_colonel_process);
  356. Thread* idle_thread = s_colonel_process->create_kernel_thread(idle_loop, nullptr, THREAD_PRIORITY_MIN, MUST(KString::formatted("idle thread #{}", cpu)), 1 << cpu, false);
  357. VERIFY(idle_thread);
  358. return idle_thread;
  359. }
  360. void Scheduler::add_time_scheduled(u64 time_to_add, bool is_kernel)
  361. {
  362. g_total_time_scheduled.with([&](auto& total_time_scheduled) {
  363. total_time_scheduled.total += time_to_add;
  364. if (is_kernel)
  365. total_time_scheduled.total_kernel += time_to_add;
  366. });
  367. }
  368. void Scheduler::timer_tick(RegisterState const& regs)
  369. {
  370. VERIFY_INTERRUPTS_DISABLED();
  371. VERIFY(Processor::current_in_irq());
  372. auto* current_thread = Processor::current_thread();
  373. if (!current_thread)
  374. return;
  375. // Sanity checks
  376. VERIFY(current_thread->current_trap());
  377. VERIFY(current_thread->current_trap()->regs == &regs);
  378. #if !SCHEDULE_ON_ALL_PROCESSORS
  379. if (!Processor::is_bootstrap_processor())
  380. return; // TODO: This prevents scheduling on other CPUs!
  381. #endif
  382. if (current_thread->process().is_kernel_process()) {
  383. // Because the previous mode when entering/exiting kernel threads never changes
  384. // we never update the time scheduled. So we need to update it manually on the
  385. // timer interrupt
  386. current_thread->update_time_scheduled(current_time(), true, false);
  387. }
  388. if (current_thread->previous_mode() == Thread::PreviousMode::UserMode && current_thread->should_die() && !current_thread->is_blocked()) {
  389. SpinlockLocker scheduler_lock(g_scheduler_lock);
  390. dbgln_if(SCHEDULER_DEBUG, "Scheduler[{}]: Terminating user mode thread {}", Processor::current_id(), *current_thread);
  391. current_thread->set_state(Thread::State::Dying);
  392. Processor::current().invoke_scheduler_async();
  393. return;
  394. }
  395. if (current_thread->tick())
  396. return;
  397. if (!current_thread->is_idle_thread() && !peek_next_runnable_thread()) {
  398. // If no other thread is ready to be scheduled we don't need to
  399. // switch to the idle thread. Just give the current thread another
  400. // time slice and let it run!
  401. current_thread->set_ticks_left(time_slice_for(*current_thread));
  402. current_thread->did_schedule();
  403. dbgln_if(SCHEDULER_DEBUG, "Scheduler[{}]: No other threads ready, give {} another timeslice", Processor::current_id(), *current_thread);
  404. return;
  405. }
  406. VERIFY_INTERRUPTS_DISABLED();
  407. VERIFY(Processor::current_in_irq());
  408. Processor::current().invoke_scheduler_async();
  409. }
  410. void Scheduler::invoke_async()
  411. {
  412. VERIFY_INTERRUPTS_DISABLED();
  413. VERIFY(!Processor::current_in_irq());
  414. // Since this function is called when leaving critical sections (such
  415. // as a Spinlock), we need to check if we're not already doing this
  416. // to prevent recursion
  417. if (!Processor::current_in_scheduler())
  418. pick_next();
  419. }
  420. void Scheduler::notify_finalizer()
  421. {
  422. if (!g_finalizer_has_work.exchange(true, AK::MemoryOrder::memory_order_acq_rel))
  423. g_finalizer_wait_queue->wake_all();
  424. }
  425. void Scheduler::idle_loop(void*)
  426. {
  427. auto& proc = Processor::current();
  428. dbgln("Scheduler[{}]: idle loop running", proc.id());
  429. VERIFY(are_interrupts_enabled());
  430. for (;;) {
  431. proc.idle_begin();
  432. asm("hlt");
  433. proc.idle_end();
  434. VERIFY_INTERRUPTS_ENABLED();
  435. #if SCHEDULE_ON_ALL_PROCESSORS
  436. yield();
  437. #else
  438. if (Processor::current_id() == 0)
  439. yield();
  440. #endif
  441. }
  442. }
  443. void Scheduler::dump_scheduler_state(bool with_stack_traces)
  444. {
  445. dump_thread_list(with_stack_traces);
  446. }
  447. bool Scheduler::is_initialized()
  448. {
  449. // The scheduler is initialized iff the idle thread exists
  450. return Processor::idle_thread() != nullptr;
  451. }
  452. TotalTimeScheduled Scheduler::get_total_time_scheduled()
  453. {
  454. return g_total_time_scheduled.with([&](auto& total_time_scheduled) { return total_time_scheduled; });
  455. }
  456. void dump_thread_list(bool with_stack_traces)
  457. {
  458. dbgln("Scheduler thread list for processor {}:", Processor::current_id());
  459. auto get_cs = [](Thread& thread) -> u16 {
  460. if (!thread.current_trap())
  461. return thread.regs().cs;
  462. return thread.get_register_dump_from_stack().cs;
  463. };
  464. auto get_eip = [](Thread& thread) -> u32 {
  465. if (!thread.current_trap())
  466. return thread.regs().ip();
  467. return thread.get_register_dump_from_stack().ip();
  468. };
  469. Thread::for_each([&](Thread& thread) {
  470. switch (thread.state()) {
  471. case Thread::State::Dying:
  472. dmesgln(" {:14} {:30} @ {:04x}:{:08x} Finalizable: {}, (nsched: {})",
  473. thread.state_string(),
  474. thread,
  475. get_cs(thread),
  476. get_eip(thread),
  477. thread.is_finalizable(),
  478. thread.times_scheduled());
  479. break;
  480. default:
  481. dmesgln(" {:14} Pr:{:2} {:30} @ {:04x}:{:08x} (nsched: {})",
  482. thread.state_string(),
  483. thread.priority(),
  484. thread,
  485. get_cs(thread),
  486. get_eip(thread),
  487. thread.times_scheduled());
  488. break;
  489. }
  490. if (with_stack_traces) {
  491. auto trace_or_error = thread.backtrace();
  492. if (!trace_or_error.is_error()) {
  493. auto trace = trace_or_error.release_value();
  494. dbgln("Backtrace:");
  495. kernelputstr(trace->characters(), trace->length());
  496. }
  497. }
  498. return IterationDecision::Continue;
  499. });
  500. }
  501. }