Scheduler.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546
  1. /*
  2. * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include <AK/QuickSort.h>
  27. #include <AK/ScopeGuard.h>
  28. #include <AK/TemporaryChange.h>
  29. #include <AK/Time.h>
  30. #include <Kernel/Process.h>
  31. #include <Kernel/Profiling.h>
  32. #include <Kernel/RTC.h>
  33. #include <Kernel/Scheduler.h>
  34. #include <Kernel/Time/TimeManagement.h>
  35. #include <Kernel/TimerQueue.h>
  36. //#define LOG_EVERY_CONTEXT_SWITCH
  37. //#define SCHEDULER_DEBUG
  38. //#define SCHEDULER_RUNNABLE_DEBUG
  39. namespace Kernel {
  40. class SchedulerPerProcessorData {
  41. AK_MAKE_NONCOPYABLE(SchedulerPerProcessorData);
  42. AK_MAKE_NONMOVABLE(SchedulerPerProcessorData);
  43. public:
  44. SchedulerPerProcessorData() = default;
  45. WeakPtr<Thread> m_pending_beneficiary;
  46. const char* m_pending_donate_reason { nullptr };
  47. bool m_in_scheduler { true };
  48. };
  49. SchedulerData* g_scheduler_data;
  50. RecursiveSpinLock g_scheduler_lock;
  51. void Scheduler::init_thread(Thread& thread)
  52. {
  53. ASSERT(g_scheduler_data);
  54. g_scheduler_data->m_nonrunnable_threads.append(thread);
  55. }
  56. static u32 time_slice_for(const Thread& thread)
  57. {
  58. // One time slice unit == 4ms (assuming 250 ticks/second)
  59. if (&thread == Processor::current().idle_thread())
  60. return 1;
  61. return 2;
  62. }
  63. Thread* g_finalizer;
  64. WaitQueue* g_finalizer_wait_queue;
  65. Atomic<bool> g_finalizer_has_work { false };
  66. static Process* s_colonel_process;
  67. void Scheduler::start()
  68. {
  69. ASSERT_INTERRUPTS_DISABLED();
  70. // We need to acquire our scheduler lock, which will be released
  71. // by the idle thread once control transferred there
  72. g_scheduler_lock.lock();
  73. auto& processor = Processor::current();
  74. processor.set_scheduler_data(*new SchedulerPerProcessorData());
  75. ASSERT(processor.is_initialized());
  76. auto& idle_thread = *processor.idle_thread();
  77. ASSERT(processor.current_thread() == &idle_thread);
  78. ASSERT(processor.idle_thread() == &idle_thread);
  79. idle_thread.set_ticks_left(time_slice_for(idle_thread));
  80. idle_thread.did_schedule();
  81. idle_thread.set_initialized(true);
  82. processor.init_context(idle_thread, false);
  83. idle_thread.set_state(Thread::Running);
  84. ASSERT(idle_thread.affinity() == (1u << processor.id()));
  85. processor.initialize_context_switching(idle_thread);
  86. ASSERT_NOT_REACHED();
  87. }
  88. bool Scheduler::pick_next()
  89. {
  90. ASSERT_INTERRUPTS_DISABLED();
  91. auto current_thread = Thread::current();
  92. // Set the m_in_scheduler flag before acquiring the spinlock. This
  93. // prevents a recursive call into Scheduler::invoke_async upon
  94. // leaving the scheduler lock.
  95. ScopedCritical critical;
  96. auto& scheduler_data = Processor::current().get_scheduler_data();
  97. scheduler_data.m_in_scheduler = true;
  98. ScopeGuard guard(
  99. []() {
  100. // We may be on a different processor after we got switched
  101. // back to this thread!
  102. auto& scheduler_data = Processor::current().get_scheduler_data();
  103. ASSERT(scheduler_data.m_in_scheduler);
  104. scheduler_data.m_in_scheduler = false;
  105. });
  106. ScopedSpinLock lock(g_scheduler_lock);
  107. if (current_thread->should_die() && current_thread->state() == Thread::Running) {
  108. // Rather than immediately killing threads, yanking the kernel stack
  109. // away from them (which can lead to e.g. reference leaks), we always
  110. // allow Thread::wait_on to return. This allows the kernel stack to
  111. // clean up and eventually we'll get here shortly before transitioning
  112. // back to user mode (from Processor::exit_trap). At this point we
  113. // no longer want to schedule this thread. We can't wait until
  114. // Scheduler::enter_current because we don't want to allow it to
  115. // transition back to user mode.
  116. #ifdef SCHEDULER_DEBUG
  117. dbg() << "Scheduler[" << Processor::current().id() << "]: Thread " << *current_thread << " is dying";
  118. #endif
  119. current_thread->set_state(Thread::Dying);
  120. }
  121. #ifdef SCHEDULER_RUNNABLE_DEBUG
  122. dbg() << "Scheduler[" << Processor::current().id() << "]: Non-runnables:";
  123. Scheduler::for_each_nonrunnable([&](Thread& thread) -> IterationDecision {
  124. if (thread.state() == Thread::Dying)
  125. dbg() << " " << String::format("%-12s", thread.state_string()) << " " << thread << " @ " << String::formatted("{:04x}:{:08x}", thread.tss().cs, thread.tss().eip) << " Finalizable: " << thread.is_finalizable();
  126. else
  127. dbg() << " " << String::format("%-12s", thread.state_string()) << " " << thread << " @ " << String::formatted("{:04x}:{:08x}", thread.tss().cs, thread.tss().eip);
  128. return IterationDecision::Continue;
  129. });
  130. dbg() << "Scheduler[" << Processor::current().id() << "]: Runnables:";
  131. Scheduler::for_each_runnable([](Thread& thread) -> IterationDecision {
  132. dbg() << " " << String::format("%3u", thread.effective_priority()) << "/" << String::format("%2u", thread.priority()) << " " << String::format("%-12s", thread.state_string()) << " " << thread << " @ " << String::formatted("{:04x}:{:08x}", thread.tss().cs, thread.tss().eip);
  133. return IterationDecision::Continue;
  134. });
  135. #endif
  136. Thread* thread_to_schedule = nullptr;
  137. auto pending_beneficiary = scheduler_data.m_pending_beneficiary.strong_ref();
  138. Vector<Thread*, 128> sorted_runnables;
  139. for_each_runnable([&](auto& thread) {
  140. if ((thread.affinity() & (1u << Processor::current().id())) == 0)
  141. return IterationDecision::Continue;
  142. if (thread.state() == Thread::Running && &thread != current_thread)
  143. return IterationDecision::Continue;
  144. sorted_runnables.append(&thread);
  145. if (&thread == pending_beneficiary) {
  146. thread_to_schedule = &thread;
  147. return IterationDecision::Break;
  148. }
  149. return IterationDecision::Continue;
  150. });
  151. if (thread_to_schedule) {
  152. // The thread we're supposed to donate to still exists
  153. const char* reason = scheduler_data.m_pending_donate_reason;
  154. scheduler_data.m_pending_beneficiary = nullptr;
  155. scheduler_data.m_pending_donate_reason = nullptr;
  156. // We need to leave our first critical section before switching context,
  157. // but since we're still holding the scheduler lock we're still in a critical section
  158. critical.leave();
  159. #ifdef SCHEDULER_DEBUG
  160. dbg() << "Processing pending donate to " << *thread_to_schedule << " reason=" << reason;
  161. #endif
  162. return donate_to_and_switch(thread_to_schedule, reason);
  163. }
  164. // Either we're not donating or the beneficiary disappeared.
  165. // Either way clear any pending information
  166. scheduler_data.m_pending_beneficiary = nullptr;
  167. scheduler_data.m_pending_donate_reason = nullptr;
  168. quick_sort(sorted_runnables, [](auto& a, auto& b) { return a->effective_priority() >= b->effective_priority(); });
  169. for (auto* thread : sorted_runnables) {
  170. if (thread->process().exec_tid() && thread->process().exec_tid() != thread->tid())
  171. continue;
  172. ASSERT(thread->state() == Thread::Runnable || thread->state() == Thread::Running);
  173. if (!thread_to_schedule) {
  174. thread->m_extra_priority = 0;
  175. thread_to_schedule = thread;
  176. } else {
  177. thread->m_extra_priority++;
  178. }
  179. }
  180. if (!thread_to_schedule)
  181. thread_to_schedule = Processor::current().idle_thread();
  182. #ifdef SCHEDULER_DEBUG
  183. dbg() << "Scheduler[" << Processor::current().id() << "]: Switch to " << *thread_to_schedule << " @ " << String::format("%04x:%08x", thread_to_schedule->tss().cs, thread_to_schedule->tss().eip);
  184. #endif
  185. // We need to leave our first critical section before switching context,
  186. // but since we're still holding the scheduler lock we're still in a critical section
  187. critical.leave();
  188. thread_to_schedule->set_ticks_left(time_slice_for(*thread_to_schedule));
  189. return context_switch(thread_to_schedule);
  190. }
  191. bool Scheduler::yield()
  192. {
  193. InterruptDisabler disabler;
  194. auto& proc = Processor::current();
  195. auto& scheduler_data = proc.get_scheduler_data();
  196. // Clear any pending beneficiary
  197. scheduler_data.m_pending_beneficiary = nullptr;
  198. scheduler_data.m_pending_donate_reason = nullptr;
  199. auto current_thread = Thread::current();
  200. #ifdef SCHEDULER_DEBUG
  201. dbg() << "Scheduler[" << proc.id() << "]: yielding thread " << *current_thread << " in_irq: " << proc.in_irq();
  202. #endif
  203. ASSERT(current_thread != nullptr);
  204. if (proc.in_irq() || proc.in_critical()) {
  205. // If we're handling an IRQ we can't switch context, or we're in
  206. // a critical section where we don't want to switch contexts, then
  207. // delay until exiting the trap or critical section
  208. proc.invoke_scheduler_async();
  209. return false;
  210. }
  211. if (!Scheduler::pick_next())
  212. return false;
  213. #ifdef SCHEDULER_DEBUG
  214. dbg() << "Scheduler[" << Processor::current().id() << "]: yield returns to thread " << *current_thread << " in_irq: " << Processor::current().in_irq();
  215. #endif
  216. return true;
  217. }
  218. bool Scheduler::donate_to_and_switch(Thread* beneficiary, [[maybe_unused]] const char* reason)
  219. {
  220. ASSERT(g_scheduler_lock.own_lock());
  221. auto& proc = Processor::current();
  222. ASSERT(proc.in_critical() == 1);
  223. unsigned ticks_left = Thread::current()->ticks_left();
  224. if (!beneficiary || beneficiary->state() != Thread::Runnable || ticks_left <= 1)
  225. return Scheduler::yield();
  226. unsigned ticks_to_donate = min(ticks_left - 1, time_slice_for(*beneficiary));
  227. #ifdef SCHEDULER_DEBUG
  228. dbg() << "Scheduler[" << proc.id() << "]: Donating " << ticks_to_donate << " ticks to " << *beneficiary << ", reason=" << reason;
  229. #endif
  230. beneficiary->set_ticks_left(ticks_to_donate);
  231. return Scheduler::context_switch(beneficiary);
  232. }
  233. bool Scheduler::donate_to(RefPtr<Thread>& beneficiary, const char* reason)
  234. {
  235. ASSERT(beneficiary);
  236. if (beneficiary == Thread::current())
  237. return Scheduler::yield();
  238. // Set the m_in_scheduler flag before acquiring the spinlock. This
  239. // prevents a recursive call into Scheduler::invoke_async upon
  240. // leaving the scheduler lock.
  241. ScopedCritical critical;
  242. auto& proc = Processor::current();
  243. auto& scheduler_data = proc.get_scheduler_data();
  244. scheduler_data.m_in_scheduler = true;
  245. ScopeGuard guard(
  246. []() {
  247. // We may be on a different processor after we got switched
  248. // back to this thread!
  249. auto& scheduler_data = Processor::current().get_scheduler_data();
  250. ASSERT(scheduler_data.m_in_scheduler);
  251. scheduler_data.m_in_scheduler = false;
  252. });
  253. ASSERT(!proc.in_irq());
  254. if (proc.in_critical() > 1) {
  255. scheduler_data.m_pending_beneficiary = beneficiary; // Save the beneficiary
  256. scheduler_data.m_pending_donate_reason = reason;
  257. proc.invoke_scheduler_async();
  258. return false;
  259. }
  260. ScopedSpinLock lock(g_scheduler_lock);
  261. // "Leave" the critical section before switching context. Since we
  262. // still hold the scheduler lock, we're not actually leaving it.
  263. // Processor::switch_context expects Processor::in_critical() to be 1
  264. critical.leave();
  265. donate_to_and_switch(beneficiary, reason);
  266. return false;
  267. }
  268. bool Scheduler::context_switch(Thread* thread)
  269. {
  270. thread->did_schedule();
  271. auto from_thread = Thread::current();
  272. if (from_thread == thread)
  273. return false;
  274. if (from_thread) {
  275. // If the last process hasn't blocked (still marked as running),
  276. // mark it as runnable for the next round.
  277. if (from_thread->state() == Thread::Running)
  278. from_thread->set_state(Thread::Runnable);
  279. #ifdef LOG_EVERY_CONTEXT_SWITCH
  280. dbgln("Scheduler[{}]: {} -> {} [prio={}] {:04x}:{:08x}", Processor::current().id(), from_thread->tid().value(), thread->tid().value(), thread->priority(), thread->tss().cs, thread->tss().eip);
  281. #endif
  282. }
  283. auto& proc = Processor::current();
  284. if (!thread->is_initialized()) {
  285. proc.init_context(*thread, false);
  286. thread->set_initialized(true);
  287. }
  288. thread->set_state(Thread::Running);
  289. // Mark it as active because we are using this thread. This is similar
  290. // to comparing it with Processor::current_thread, but when there are
  291. // multiple processors there's no easy way to check whether the thread
  292. // is actually still needed. This prevents accidental finalization when
  293. // a thread is no longer in Running state, but running on another core.
  294. thread->set_active(true);
  295. proc.switch_context(from_thread, thread);
  296. // NOTE: from_thread at this point reflects the thread we were
  297. // switched from, and thread reflects Thread::current()
  298. enter_current(*from_thread, false);
  299. ASSERT(thread == Thread::current());
  300. #if ARCH(I386)
  301. auto iopl = get_iopl_from_eflags(Thread::current()->get_register_dump_from_stack().eflags);
  302. if (thread->process().is_user_process() && iopl != 0) {
  303. dbgln("PANIC: Switched to thread {} with non-zero IOPL={}", Thread::current()->tid().value(), iopl);
  304. Processor::halt();
  305. }
  306. #endif
  307. return true;
  308. }
  309. void Scheduler::enter_current(Thread& prev_thread, bool is_first)
  310. {
  311. ASSERT(g_scheduler_lock.own_lock());
  312. prev_thread.set_active(false);
  313. if (prev_thread.state() == Thread::Dying) {
  314. // If the thread we switched from is marked as dying, then notify
  315. // the finalizer. Note that as soon as we leave the scheduler lock
  316. // the finalizer may free from_thread!
  317. notify_finalizer();
  318. } else if (!is_first) {
  319. // Check if we have any signals we should deliver (even if we don't
  320. // end up switching to another thread).
  321. auto current_thread = Thread::current();
  322. if (!current_thread->is_in_block()) {
  323. ScopedSpinLock lock(current_thread->get_lock());
  324. if (current_thread->state() == Thread::Running && current_thread->pending_signals_for_state()) {
  325. current_thread->dispatch_one_pending_signal();
  326. }
  327. }
  328. }
  329. }
  330. void Scheduler::leave_on_first_switch(u32 flags)
  331. {
  332. // This is called when a thread is switched into for the first time.
  333. // At this point, enter_current has already be called, but because
  334. // Scheduler::context_switch is not in the call stack we need to
  335. // clean up and release locks manually here
  336. g_scheduler_lock.unlock(flags);
  337. auto& scheduler_data = Processor::current().get_scheduler_data();
  338. ASSERT(scheduler_data.m_in_scheduler);
  339. scheduler_data.m_in_scheduler = false;
  340. }
  341. void Scheduler::prepare_after_exec()
  342. {
  343. // This is called after exec() when doing a context "switch" into
  344. // the new process. This is called from Processor::assume_context
  345. ASSERT(g_scheduler_lock.own_lock());
  346. auto& scheduler_data = Processor::current().get_scheduler_data();
  347. ASSERT(!scheduler_data.m_in_scheduler);
  348. scheduler_data.m_in_scheduler = true;
  349. }
  350. void Scheduler::prepare_for_idle_loop()
  351. {
  352. // This is called when the CPU finished setting up the idle loop
  353. // and is about to run it. We need to acquire he scheduler lock
  354. ASSERT(!g_scheduler_lock.own_lock());
  355. g_scheduler_lock.lock();
  356. auto& scheduler_data = Processor::current().get_scheduler_data();
  357. ASSERT(!scheduler_data.m_in_scheduler);
  358. scheduler_data.m_in_scheduler = true;
  359. }
  360. Process* Scheduler::colonel()
  361. {
  362. ASSERT(s_colonel_process);
  363. return s_colonel_process;
  364. }
  365. void Scheduler::initialize()
  366. {
  367. ASSERT(&Processor::current() != nullptr); // sanity check
  368. RefPtr<Thread> idle_thread;
  369. g_scheduler_data = new SchedulerData;
  370. g_finalizer_wait_queue = new WaitQueue;
  371. g_finalizer_has_work.store(false, AK::MemoryOrder::memory_order_release);
  372. s_colonel_process = &Process::create_kernel_process(idle_thread, "colonel", idle_loop, nullptr, 1).leak_ref();
  373. ASSERT(s_colonel_process);
  374. ASSERT(idle_thread);
  375. idle_thread->set_priority(THREAD_PRIORITY_MIN);
  376. idle_thread->set_name(StringView("idle thread #0"));
  377. set_idle_thread(idle_thread);
  378. }
  379. void Scheduler::set_idle_thread(Thread* idle_thread)
  380. {
  381. Processor::current().set_idle_thread(*idle_thread);
  382. Processor::current().set_current_thread(*idle_thread);
  383. }
  384. Thread* Scheduler::create_ap_idle_thread(u32 cpu)
  385. {
  386. ASSERT(cpu != 0);
  387. // This function is called on the bsp, but creates an idle thread for another AP
  388. ASSERT(Processor::current().id() == 0);
  389. ASSERT(s_colonel_process);
  390. Thread* idle_thread = s_colonel_process->create_kernel_thread(idle_loop, nullptr, THREAD_PRIORITY_MIN, String::format("idle thread #%u", cpu), 1 << cpu, false);
  391. ASSERT(idle_thread);
  392. return idle_thread;
  393. }
  394. void Scheduler::timer_tick(const RegisterState& regs)
  395. {
  396. ASSERT_INTERRUPTS_DISABLED();
  397. ASSERT(Processor::current().in_irq());
  398. auto current_thread = Processor::current().current_thread();
  399. if (!current_thread)
  400. return;
  401. bool is_bsp = Processor::current().id() == 0;
  402. if (!is_bsp)
  403. return; // TODO: This prevents scheduling on other CPUs!
  404. if (current_thread->process().is_profiling()) {
  405. SmapDisabler disabler;
  406. auto backtrace = current_thread->raw_backtrace(regs.ebp, regs.eip);
  407. auto& sample = Profiling::next_sample_slot();
  408. sample.pid = current_thread->process().pid();
  409. sample.tid = current_thread->tid();
  410. sample.timestamp = TimeManagement::the().uptime_ms();
  411. for (size_t i = 0; i < min(backtrace.size(), Profiling::max_stack_frame_count); ++i) {
  412. sample.frames[i] = backtrace[i];
  413. }
  414. }
  415. if (current_thread->tick((regs.cs & 3) == 0))
  416. return;
  417. ASSERT_INTERRUPTS_DISABLED();
  418. ASSERT(Processor::current().in_irq());
  419. Processor::current().invoke_scheduler_async();
  420. }
  421. void Scheduler::invoke_async()
  422. {
  423. ASSERT_INTERRUPTS_DISABLED();
  424. auto& proc = Processor::current();
  425. ASSERT(!proc.in_irq());
  426. // Since this function is called when leaving critical sections (such
  427. // as a SpinLock), we need to check if we're not already doing this
  428. // to prevent recursion
  429. if (!proc.get_scheduler_data().m_in_scheduler)
  430. pick_next();
  431. }
  432. void Scheduler::yield_from_critical()
  433. {
  434. auto& proc = Processor::current();
  435. ASSERT(proc.in_critical());
  436. ASSERT(!proc.in_irq());
  437. yield(); // Flag a context switch
  438. u32 prev_flags;
  439. u32 prev_crit = Processor::current().clear_critical(prev_flags, false);
  440. // Note, we may now be on a different CPU!
  441. Processor::current().restore_critical(prev_crit, prev_flags);
  442. }
  443. void Scheduler::notify_finalizer()
  444. {
  445. if (g_finalizer_has_work.exchange(true, AK::MemoryOrder::memory_order_acq_rel) == false)
  446. g_finalizer_wait_queue->wake_all();
  447. }
  448. void Scheduler::idle_loop(void*)
  449. {
  450. dbg() << "Scheduler[" << Processor::current().id() << "]: idle loop running";
  451. ASSERT(are_interrupts_enabled());
  452. for (;;) {
  453. asm("hlt");
  454. if (Processor::current().id() == 0)
  455. yield();
  456. }
  457. }
  458. }