Scheduler.cpp 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852
  1. /*
  2. * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are met:
  7. *
  8. * 1. Redistributions of source code must retain the above copyright notice, this
  9. * list of conditions and the following disclaimer.
  10. *
  11. * 2. Redistributions in binary form must reproduce the above copyright notice,
  12. * this list of conditions and the following disclaimer in the documentation
  13. * and/or other materials provided with the distribution.
  14. *
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  16. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  17. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  18. * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  19. * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  20. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  21. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  22. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  23. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include <AK/QuickSort.h>
  27. #include <AK/ScopeGuard.h>
  28. #include <AK/TemporaryChange.h>
  29. #include <AK/Time.h>
  30. #include <Kernel/FileSystem/FileDescription.h>
  31. #include <Kernel/Net/Socket.h>
  32. #include <Kernel/Process.h>
  33. #include <Kernel/Profiling.h>
  34. #include <Kernel/RTC.h>
  35. #include <Kernel/Scheduler.h>
  36. #include <Kernel/Time/TimeManagement.h>
  37. #include <Kernel/TimerQueue.h>
  38. //#define LOG_EVERY_CONTEXT_SWITCH
  39. //#define SCHEDULER_DEBUG
  40. //#define SCHEDULER_RUNNABLE_DEBUG
  41. namespace Kernel {
  42. class SchedulerPerProcessorData {
  43. AK_MAKE_NONCOPYABLE(SchedulerPerProcessorData);
  44. AK_MAKE_NONMOVABLE(SchedulerPerProcessorData);
  45. public:
  46. SchedulerPerProcessorData() = default;
  47. WeakPtr<Thread> m_pending_beneficiary;
  48. const char* m_pending_donate_reason { nullptr };
  49. bool m_in_scheduler { true };
  50. };
  51. SchedulerData* g_scheduler_data;
  52. timeval g_timeofday;
  53. RecursiveSpinLock g_scheduler_lock;
  54. void Scheduler::init_thread(Thread& thread)
  55. {
  56. ASSERT(g_scheduler_data);
  57. g_scheduler_data->m_nonrunnable_threads.append(thread);
  58. }
  59. static u32 time_slice_for(const Thread& thread)
  60. {
  61. // One time slice unit == 1ms
  62. if (&thread == Processor::current().idle_thread())
  63. return 1;
  64. return 10;
  65. }
  66. timeval Scheduler::time_since_boot()
  67. {
  68. return { TimeManagement::the().seconds_since_boot(), (suseconds_t)TimeManagement::the().ticks_this_second() * 1000 };
  69. }
  70. Thread* g_finalizer;
  71. WaitQueue* g_finalizer_wait_queue;
  72. Atomic<bool> g_finalizer_has_work { false };
  73. static Process* s_colonel_process;
  74. u64 g_uptime;
  75. Thread::JoinBlocker::JoinBlocker(Thread& joinee, KResult& try_join_result, void*& joinee_exit_value)
  76. : m_joinee(&joinee)
  77. , m_joinee_exit_value(joinee_exit_value)
  78. {
  79. auto* current_thread = Thread::current();
  80. // We need to hold our lock to avoid a race where try_join succeeds
  81. // but the joinee is joining immediately
  82. ScopedSpinLock lock(m_lock);
  83. try_join_result = joinee.try_join(*current_thread);
  84. m_join_error = try_join_result.is_error();
  85. }
  86. void Thread::JoinBlocker::was_unblocked()
  87. {
  88. ScopedSpinLock lock(m_lock);
  89. if (!m_join_error && m_joinee) {
  90. // If the joinee hasn't exited yet, remove ourselves now
  91. ASSERT(m_joinee != Thread::current());
  92. m_joinee->join_done();
  93. m_joinee = nullptr;
  94. }
  95. }
  96. bool Thread::JoinBlocker::should_unblock(Thread&)
  97. {
  98. // We need to acquire our lock as the joinee could call joinee_exited
  99. // at any moment
  100. ScopedSpinLock lock(m_lock);
  101. if (m_join_error) {
  102. // Thread::block calls should_unblock before actually blocking.
  103. // If detected that we can't really block due to an error, we'll
  104. // return true here, which will cause Thread::block to return
  105. // with BlockResult::NotBlocked. Technically, because m_join_error
  106. // will only be set in the constructor, we don't need any lock
  107. // to check for it, but at the same time there should not be
  108. // any contention, either...
  109. return true;
  110. }
  111. return m_joinee == nullptr;
  112. }
  113. void Thread::JoinBlocker::joinee_exited(void* value)
  114. {
  115. ScopedSpinLock lock(m_lock);
  116. if (!m_joinee) {
  117. // m_joinee can be nullptr if the joiner timed out and the
  118. // joinee waits on m_lock while the joiner holds it but has
  119. // not yet called join_done.
  120. return;
  121. }
  122. m_joinee_exit_value = value;
  123. m_joinee = nullptr;
  124. set_interrupted_by_death();
  125. }
  126. Thread::FileDescriptionBlocker::FileDescriptionBlocker(const FileDescription& description)
  127. : m_blocked_description(description)
  128. {
  129. }
  130. const FileDescription& Thread::FileDescriptionBlocker::blocked_description() const
  131. {
  132. return m_blocked_description;
  133. }
  134. Thread::AcceptBlocker::AcceptBlocker(const FileDescription& description)
  135. : FileDescriptionBlocker(description)
  136. {
  137. }
  138. bool Thread::AcceptBlocker::should_unblock(Thread&)
  139. {
  140. auto& socket = *blocked_description().socket();
  141. return socket.can_accept();
  142. }
  143. Thread::ConnectBlocker::ConnectBlocker(const FileDescription& description)
  144. : FileDescriptionBlocker(description)
  145. {
  146. }
  147. bool Thread::ConnectBlocker::should_unblock(Thread&)
  148. {
  149. auto& socket = *blocked_description().socket();
  150. return socket.setup_state() == Socket::SetupState::Completed;
  151. }
  152. Thread::WriteBlocker::WriteBlocker(const FileDescription& description)
  153. : FileDescriptionBlocker(description)
  154. {
  155. }
  156. timespec* Thread::WriteBlocker::override_timeout(timespec* timeout)
  157. {
  158. auto& description = blocked_description();
  159. if (description.is_socket()) {
  160. auto& socket = *description.socket();
  161. if (socket.has_send_timeout()) {
  162. timeval_to_timespec(Scheduler::time_since_boot(), m_deadline);
  163. timespec_add_timeval(m_deadline, socket.send_timeout(), m_deadline);
  164. if (!timeout || m_deadline < *timeout)
  165. return &m_deadline;
  166. }
  167. }
  168. return timeout;
  169. }
  170. bool Thread::WriteBlocker::should_unblock(Thread&)
  171. {
  172. return blocked_description().can_write();
  173. }
  174. Thread::ReadBlocker::ReadBlocker(const FileDescription& description)
  175. : FileDescriptionBlocker(description)
  176. {
  177. }
  178. timespec* Thread::ReadBlocker::override_timeout(timespec* timeout)
  179. {
  180. auto& description = blocked_description();
  181. if (description.is_socket()) {
  182. auto& socket = *description.socket();
  183. if (socket.has_receive_timeout()) {
  184. timeval_to_timespec(Scheduler::time_since_boot(), m_deadline);
  185. timespec_add_timeval(m_deadline, socket.receive_timeout(), m_deadline);
  186. if (!timeout || m_deadline < *timeout)
  187. return &m_deadline;
  188. }
  189. }
  190. return timeout;
  191. }
  192. bool Thread::ReadBlocker::should_unblock(Thread&)
  193. {
  194. return blocked_description().can_read();
  195. }
  196. Thread::ConditionBlocker::ConditionBlocker(const char* state_string, Function<bool()>&& condition)
  197. : m_block_until_condition(move(condition))
  198. , m_state_string(state_string)
  199. {
  200. ASSERT(m_block_until_condition);
  201. }
  202. bool Thread::ConditionBlocker::should_unblock(Thread&)
  203. {
  204. return m_block_until_condition();
  205. }
  206. Thread::SleepBlocker::SleepBlocker(u64 wakeup_time)
  207. : m_wakeup_time(wakeup_time)
  208. {
  209. }
  210. bool Thread::SleepBlocker::should_unblock(Thread&)
  211. {
  212. return m_wakeup_time <= g_uptime;
  213. }
  214. Thread::SelectBlocker::SelectBlocker(const FDVector& read_fds, const FDVector& write_fds, const FDVector& except_fds)
  215. : m_select_read_fds(read_fds)
  216. , m_select_write_fds(write_fds)
  217. , m_select_exceptional_fds(except_fds)
  218. {
  219. }
  220. bool Thread::SelectBlocker::should_unblock(Thread& thread)
  221. {
  222. auto& process = thread.process();
  223. for (int fd : m_select_read_fds) {
  224. if (!process.m_fds[fd])
  225. continue;
  226. if (process.m_fds[fd].description()->can_read())
  227. return true;
  228. }
  229. for (int fd : m_select_write_fds) {
  230. if (!process.m_fds[fd])
  231. continue;
  232. if (process.m_fds[fd].description()->can_write())
  233. return true;
  234. }
  235. return false;
  236. }
  237. Thread::WaitBlocker::WaitBlocker(int wait_options, ProcessID& waitee_pid)
  238. : m_wait_options(wait_options)
  239. , m_waitee_pid(waitee_pid)
  240. {
  241. }
  242. bool Thread::WaitBlocker::should_unblock(Thread& thread)
  243. {
  244. bool should_unblock = m_wait_options & WNOHANG;
  245. if (m_waitee_pid != -1) {
  246. auto peer = Process::from_pid(m_waitee_pid);
  247. if (!peer)
  248. return true;
  249. }
  250. thread.process().for_each_child([&](Process& child) {
  251. if (m_waitee_pid != -1 && m_waitee_pid != child.pid())
  252. return IterationDecision::Continue;
  253. bool child_exited = child.is_dead();
  254. bool child_stopped = false;
  255. if (child.thread_count()) {
  256. child.for_each_thread([&](auto& child_thread) {
  257. if (child_thread.state() == Thread::State::Stopped && !child_thread.has_pending_signal(SIGCONT)) {
  258. child_stopped = true;
  259. return IterationDecision::Break;
  260. }
  261. return IterationDecision::Continue;
  262. });
  263. }
  264. bool fits_the_spec = ((m_wait_options & WEXITED) && child_exited)
  265. || ((m_wait_options & WSTOPPED) && child_stopped);
  266. if (!fits_the_spec)
  267. return IterationDecision::Continue;
  268. m_waitee_pid = child.pid();
  269. should_unblock = true;
  270. return IterationDecision::Break;
  271. });
  272. return should_unblock;
  273. }
  274. Thread::SemiPermanentBlocker::SemiPermanentBlocker(Reason reason)
  275. : m_reason(reason)
  276. {
  277. }
  278. bool Thread::SemiPermanentBlocker::should_unblock(Thread&)
  279. {
  280. // someone else has to unblock us
  281. return false;
  282. }
  283. // Called by the scheduler on threads that are blocked for some reason.
  284. // Make a decision as to whether to unblock them or not.
  285. void Thread::consider_unblock(time_t now_sec, long now_usec)
  286. {
  287. ScopedSpinLock lock(m_lock);
  288. switch (state()) {
  289. case Thread::Invalid:
  290. case Thread::Runnable:
  291. case Thread::Running:
  292. case Thread::Dead:
  293. case Thread::Stopped:
  294. case Thread::Queued:
  295. case Thread::Dying:
  296. /* don't know, don't care */
  297. return;
  298. case Thread::Blocked: {
  299. ASSERT(m_blocker != nullptr);
  300. timespec now;
  301. now.tv_sec = now_sec,
  302. now.tv_nsec = now_usec * 1000ull;
  303. bool timed_out = m_blocker_timeout && now >= *m_blocker_timeout;
  304. if (timed_out || m_blocker->should_unblock(*this))
  305. unblock();
  306. return;
  307. }
  308. }
  309. }
  310. void Scheduler::start()
  311. {
  312. ASSERT_INTERRUPTS_DISABLED();
  313. // We need to acquire our scheduler lock, which will be released
  314. // by the idle thread once control transferred there
  315. g_scheduler_lock.lock();
  316. auto& processor = Processor::current();
  317. processor.set_scheduler_data(*new SchedulerPerProcessorData());
  318. ASSERT(processor.is_initialized());
  319. auto& idle_thread = *processor.idle_thread();
  320. ASSERT(processor.current_thread() == &idle_thread);
  321. ASSERT(processor.idle_thread() == &idle_thread);
  322. idle_thread.set_ticks_left(time_slice_for(idle_thread));
  323. idle_thread.did_schedule();
  324. idle_thread.set_initialized(true);
  325. processor.init_context(idle_thread, false);
  326. idle_thread.set_state(Thread::Running);
  327. ASSERT(idle_thread.affinity() == (1u << processor.id()));
  328. processor.initialize_context_switching(idle_thread);
  329. ASSERT_NOT_REACHED();
  330. }
  331. bool Scheduler::pick_next()
  332. {
  333. ASSERT_INTERRUPTS_DISABLED();
  334. auto current_thread = Thread::current();
  335. auto now = time_since_boot();
  336. auto now_sec = now.tv_sec;
  337. auto now_usec = now.tv_usec;
  338. // Set the m_in_scheduler flag before acquiring the spinlock. This
  339. // prevents a recursive call into Scheduler::invoke_async upon
  340. // leaving the scheduler lock.
  341. ScopedCritical critical;
  342. auto& scheduler_data = Processor::current().get_scheduler_data();
  343. scheduler_data.m_in_scheduler = true;
  344. ScopeGuard guard(
  345. []() {
  346. // We may be on a different processor after we got switched
  347. // back to this thread!
  348. auto& scheduler_data = Processor::current().get_scheduler_data();
  349. ASSERT(scheduler_data.m_in_scheduler);
  350. scheduler_data.m_in_scheduler = false;
  351. });
  352. ScopedSpinLock lock(g_scheduler_lock);
  353. if (current_thread->should_die() && current_thread->state() == Thread::Running) {
  354. // Rather than immediately killing threads, yanking the kernel stack
  355. // away from them (which can lead to e.g. reference leaks), we always
  356. // allow Thread::wait_on to return. This allows the kernel stack to
  357. // clean up and eventually we'll get here shortly before transitioning
  358. // back to user mode (from Processor::exit_trap). At this point we
  359. // no longer want to schedule this thread. We can't wait until
  360. // Scheduler::enter_current because we don't want to allow it to
  361. // transition back to user mode.
  362. #ifdef SCHEDULER_DEBUG
  363. dbg() << "Scheduler[" << Processor::current().id() << "]: Thread " << *current_thread << " is dying";
  364. #endif
  365. current_thread->set_state(Thread::Dying);
  366. }
  367. // Check and unblock threads whose wait conditions have been met.
  368. Scheduler::for_each_nonrunnable([&](Thread& thread) {
  369. thread.consider_unblock(now_sec, now_usec);
  370. return IterationDecision::Continue;
  371. });
  372. Process::for_each([&](Process& process) {
  373. if (process.is_dead()) {
  374. if (current_thread->process().pid() != process.pid() && (!process.ppid() || !Process::from_pid(process.ppid()))) {
  375. auto name = process.name();
  376. auto pid = process.pid();
  377. auto exit_status = Process::reap(process);
  378. dbg() << "Scheduler[" << Processor::current().id() << "]: Reaped unparented process " << name << "(" << pid.value() << "), exit status: " << exit_status.si_status;
  379. }
  380. return IterationDecision::Continue;
  381. }
  382. if (process.m_alarm_deadline && g_uptime > process.m_alarm_deadline) {
  383. process.m_alarm_deadline = 0;
  384. // FIXME: Should we observe this signal somehow?
  385. (void)process.send_signal(SIGALRM, nullptr);
  386. }
  387. return IterationDecision::Continue;
  388. });
  389. // Dispatch any pending signals.
  390. Thread::for_each_living([&](Thread& thread) -> IterationDecision {
  391. ScopedSpinLock lock(thread.get_lock());
  392. if (!thread.has_unmasked_pending_signals())
  393. return IterationDecision::Continue;
  394. // NOTE: dispatch_one_pending_signal() may unblock the process.
  395. bool was_blocked = thread.is_blocked();
  396. if (thread.dispatch_one_pending_signal() == ShouldUnblockThread::No)
  397. return IterationDecision::Continue;
  398. if (was_blocked) {
  399. #ifdef SCHEDULER_DEBUG
  400. dbg() << "Scheduler[" << Processor::current().id() << "]:Unblock " << thread << " due to signal";
  401. #endif
  402. ASSERT(thread.m_blocker != nullptr);
  403. thread.m_blocker->set_interrupted_by_signal();
  404. thread.unblock();
  405. }
  406. return IterationDecision::Continue;
  407. });
  408. #ifdef SCHEDULER_RUNNABLE_DEBUG
  409. dbg() << "Scheduler[" << Processor::current().id() << "]: Non-runnables:";
  410. Scheduler::for_each_nonrunnable([&](Thread& thread) -> IterationDecision {
  411. if (thread.state() == Thread::Queued)
  412. dbg() << " " << String::format("%-12s", thread.state_string()) << " " << thread << " @ " << String::format("%w", thread.tss().cs) << ":" << String::format("%x", thread.tss().eip) << " Reason: " << (thread.wait_reason() ? thread.wait_reason() : "none");
  413. else if (thread.state() == Thread::Dying)
  414. dbg() << " " << String::format("%-12s", thread.state_string()) << " " << thread << " @ " << String::format("%w", thread.tss().cs) << ":" << String::format("%x", thread.tss().eip) << " Finalizable: " << thread.is_finalizable();
  415. else
  416. dbg() << " " << String::format("%-12s", thread.state_string()) << " " << thread << " @ " << String::format("%w", thread.tss().cs) << ":" << String::format("%x", thread.tss().eip);
  417. return IterationDecision::Continue;
  418. });
  419. dbg() << "Scheduler[" << Processor::current().id() << "]: Runnables:";
  420. Scheduler::for_each_runnable([](Thread& thread) -> IterationDecision {
  421. dbg() << " " << String::format("%3u", thread.effective_priority()) << "/" << String::format("%2u", thread.priority()) << " " << String::format("%-12s", thread.state_string()) << " " << thread << " @ " << String::format("%w", thread.tss().cs) << ":" << String::format("%x", thread.tss().eip);
  422. return IterationDecision::Continue;
  423. });
  424. #endif
  425. Thread* thread_to_schedule = nullptr;
  426. Vector<Thread*, 128> sorted_runnables;
  427. for_each_runnable([&](auto& thread) {
  428. if ((thread.affinity() & (1u << Processor::current().id())) == 0)
  429. return IterationDecision::Continue;
  430. if (thread.state() == Thread::Running && &thread != current_thread)
  431. return IterationDecision::Continue;
  432. sorted_runnables.append(&thread);
  433. if (&thread == scheduler_data.m_pending_beneficiary) {
  434. thread_to_schedule = &thread;
  435. return IterationDecision::Break;
  436. }
  437. return IterationDecision::Continue;
  438. });
  439. if (thread_to_schedule) {
  440. // The thread we're supposed to donate to still exists
  441. const char* reason = scheduler_data.m_pending_donate_reason;
  442. scheduler_data.m_pending_beneficiary = nullptr;
  443. scheduler_data.m_pending_donate_reason = nullptr;
  444. // We need to leave our first critical section before switching context,
  445. // but since we're still holding the scheduler lock we're still in a critical section
  446. critical.leave();
  447. #ifdef SCHEDULER_DEBUG
  448. dbg() << "Processing pending donate to " << *thread_to_schedule << " reason=" << reason;
  449. #endif
  450. return donate_to_and_switch(thread_to_schedule, reason);
  451. }
  452. // Either we're not donating or the beneficiary disappeared.
  453. // Either way clear any pending information
  454. scheduler_data.m_pending_beneficiary = nullptr;
  455. scheduler_data.m_pending_donate_reason = nullptr;
  456. quick_sort(sorted_runnables, [](auto& a, auto& b) { return a->effective_priority() >= b->effective_priority(); });
  457. for (auto* thread : sorted_runnables) {
  458. if (thread->process().exec_tid() && thread->process().exec_tid() != thread->tid())
  459. continue;
  460. ASSERT(thread->state() == Thread::Runnable || thread->state() == Thread::Running);
  461. if (!thread_to_schedule) {
  462. thread->m_extra_priority = 0;
  463. thread_to_schedule = thread;
  464. } else {
  465. thread->m_extra_priority++;
  466. }
  467. }
  468. if (!thread_to_schedule)
  469. thread_to_schedule = Processor::current().idle_thread();
  470. #ifdef SCHEDULER_DEBUG
  471. dbg() << "Scheduler[" << Processor::current().id() << "]: Switch to " << *thread_to_schedule << " @ " << String::format("%04x:%08x", thread_to_schedule->tss().cs, thread_to_schedule->tss().eip);
  472. #endif
  473. // We need to leave our first critical section before switching context,
  474. // but since we're still holding the scheduler lock we're still in a critical section
  475. critical.leave();
  476. return context_switch(thread_to_schedule);
  477. }
  478. bool Scheduler::yield()
  479. {
  480. InterruptDisabler disabler;
  481. auto& proc = Processor::current();
  482. auto& scheduler_data = proc.get_scheduler_data();
  483. // Clear any pending beneficiary
  484. scheduler_data.m_pending_beneficiary = nullptr;
  485. scheduler_data.m_pending_donate_reason = nullptr;
  486. auto current_thread = Thread::current();
  487. #ifdef SCHEDULER_DEBUG
  488. dbg() << "Scheduler[" << proc.id() << "]: yielding thread " << *current_thread << " in_irq: " << proc.in_irq();
  489. #endif
  490. ASSERT(current_thread != nullptr);
  491. if (proc.in_irq() || proc.in_critical()) {
  492. // If we're handling an IRQ we can't switch context, or we're in
  493. // a critical section where we don't want to switch contexts, then
  494. // delay until exiting the trap or critical section
  495. proc.invoke_scheduler_async();
  496. return false;
  497. }
  498. if (!Scheduler::pick_next())
  499. return false;
  500. #ifdef SCHEDULER_DEBUG
  501. dbg() << "Scheduler[" << Processor::current().id() << "]: yield returns to thread " << *current_thread << " in_irq: " << Processor::current().in_irq();
  502. #endif
  503. return true;
  504. }
  505. bool Scheduler::donate_to_and_switch(Thread* beneficiary, const char* reason)
  506. {
  507. ASSERT(g_scheduler_lock.own_lock());
  508. auto& proc = Processor::current();
  509. ASSERT(proc.in_critical() == 1);
  510. (void)reason;
  511. unsigned ticks_left = Thread::current()->ticks_left();
  512. if (!beneficiary || beneficiary->state() != Thread::Runnable || ticks_left <= 1)
  513. return Scheduler::yield();
  514. unsigned ticks_to_donate = min(ticks_left - 1, time_slice_for(*beneficiary));
  515. #ifdef SCHEDULER_DEBUG
  516. dbg() << "Scheduler[" << proc.id() << "]: Donating " << ticks_to_donate << " ticks to " << *beneficiary << ", reason=" << reason;
  517. #endif
  518. beneficiary->set_ticks_left(ticks_to_donate);
  519. return Scheduler::context_switch(beneficiary);
  520. }
  521. bool Scheduler::donate_to(RefPtr<Thread>& beneficiary, const char* reason)
  522. {
  523. ASSERT(beneficiary);
  524. if (beneficiary == Thread::current())
  525. return Scheduler::yield();
  526. // Set the m_in_scheduler flag before acquiring the spinlock. This
  527. // prevents a recursive call into Scheduler::invoke_async upon
  528. // leaving the scheduler lock.
  529. ScopedCritical critical;
  530. auto& proc = Processor::current();
  531. auto& scheduler_data = proc.get_scheduler_data();
  532. scheduler_data.m_in_scheduler = true;
  533. ScopeGuard guard(
  534. []() {
  535. // We may be on a different processor after we got switched
  536. // back to this thread!
  537. auto& scheduler_data = Processor::current().get_scheduler_data();
  538. ASSERT(scheduler_data.m_in_scheduler);
  539. scheduler_data.m_in_scheduler = false;
  540. });
  541. ASSERT(!proc.in_irq());
  542. if (proc.in_critical() > 1) {
  543. scheduler_data.m_pending_beneficiary = beneficiary->make_weak_ptr(); // Save the beneficiary
  544. scheduler_data.m_pending_donate_reason = reason;
  545. proc.invoke_scheduler_async();
  546. return false;
  547. }
  548. ScopedSpinLock lock(g_scheduler_lock);
  549. // "Leave" the critical section before switching context. Since we
  550. // still hold the scheduler lock, we're not actually leaving it.
  551. // Processor::switch_context expects Processor::in_critical() to be 1
  552. critical.leave();
  553. donate_to_and_switch(beneficiary, reason);
  554. return false;
  555. }
  556. bool Scheduler::context_switch(Thread* thread)
  557. {
  558. thread->set_ticks_left(time_slice_for(*thread));
  559. thread->did_schedule();
  560. auto from_thread = Thread::current();
  561. if (from_thread == thread)
  562. return false;
  563. if (from_thread) {
  564. // If the last process hasn't blocked (still marked as running),
  565. // mark it as runnable for the next round.
  566. if (from_thread->state() == Thread::Running)
  567. from_thread->set_state(Thread::Runnable);
  568. #ifdef LOG_EVERY_CONTEXT_SWITCH
  569. dbg() << "Scheduler[" << Processor::current().id() << "]: " << *from_thread << " -> " << *thread << " [" << thread->priority() << "] " << String::format("%w", thread->tss().cs) << ":" << String::format("%x", thread->tss().eip);
  570. #endif
  571. }
  572. auto& proc = Processor::current();
  573. if (!thread->is_initialized()) {
  574. proc.init_context(*thread, false);
  575. thread->set_initialized(true);
  576. }
  577. thread->set_state(Thread::Running);
  578. // Mark it as active because we are using this thread. This is similar
  579. // to comparing it with Processor::current_thread, but when there are
  580. // multiple processors there's no easy way to check whether the thread
  581. // is actually still needed. This prevents accidental finalization when
  582. // a thread is no longer in Running state, but running on another core.
  583. thread->set_active(true);
  584. proc.switch_context(from_thread, thread);
  585. // NOTE: from_thread at this point reflects the thread we were
  586. // switched from, and thread reflects Thread::current()
  587. enter_current(*from_thread);
  588. ASSERT(thread == Thread::current());
  589. return true;
  590. }
  591. void Scheduler::enter_current(Thread& prev_thread)
  592. {
  593. ASSERT(g_scheduler_lock.is_locked());
  594. prev_thread.set_active(false);
  595. if (prev_thread.state() == Thread::Dying) {
  596. // If the thread we switched from is marked as dying, then notify
  597. // the finalizer. Note that as soon as we leave the scheduler lock
  598. // the finalizer may free from_thread!
  599. notify_finalizer();
  600. }
  601. }
  602. void Scheduler::leave_on_first_switch(u32 flags)
  603. {
  604. // This is called when a thread is switched into for the first time.
  605. // At this point, enter_current has already be called, but because
  606. // Scheduler::context_switch is not in the call stack we need to
  607. // clean up and release locks manually here
  608. g_scheduler_lock.unlock(flags);
  609. auto& scheduler_data = Processor::current().get_scheduler_data();
  610. ASSERT(scheduler_data.m_in_scheduler);
  611. scheduler_data.m_in_scheduler = false;
  612. }
  613. void Scheduler::prepare_after_exec()
  614. {
  615. // This is called after exec() when doing a context "switch" into
  616. // the new process. This is called from Processor::assume_context
  617. ASSERT(g_scheduler_lock.own_lock());
  618. auto& scheduler_data = Processor::current().get_scheduler_data();
  619. ASSERT(!scheduler_data.m_in_scheduler);
  620. scheduler_data.m_in_scheduler = true;
  621. }
  622. void Scheduler::prepare_for_idle_loop()
  623. {
  624. // This is called when the CPU finished setting up the idle loop
  625. // and is about to run it. We need to acquire he scheduler lock
  626. ASSERT(!g_scheduler_lock.own_lock());
  627. g_scheduler_lock.lock();
  628. auto& scheduler_data = Processor::current().get_scheduler_data();
  629. ASSERT(!scheduler_data.m_in_scheduler);
  630. scheduler_data.m_in_scheduler = true;
  631. }
  632. Process* Scheduler::colonel()
  633. {
  634. ASSERT(s_colonel_process);
  635. return s_colonel_process;
  636. }
  637. void Scheduler::initialize()
  638. {
  639. ASSERT(&Processor::current() != nullptr); // sanity check
  640. RefPtr<Thread> idle_thread;
  641. g_scheduler_data = new SchedulerData;
  642. g_finalizer_wait_queue = new WaitQueue;
  643. g_finalizer_has_work.store(false, AK::MemoryOrder::memory_order_release);
  644. s_colonel_process = &Process::create_kernel_process(idle_thread, "colonel", idle_loop, 1).leak_ref();
  645. ASSERT(s_colonel_process);
  646. ASSERT(idle_thread);
  647. idle_thread->set_priority(THREAD_PRIORITY_MIN);
  648. idle_thread->set_name(StringView("idle thread #0"));
  649. set_idle_thread(idle_thread);
  650. }
  651. void Scheduler::set_idle_thread(Thread* idle_thread)
  652. {
  653. Processor::current().set_idle_thread(*idle_thread);
  654. Processor::current().set_current_thread(*idle_thread);
  655. }
  656. Thread* Scheduler::create_ap_idle_thread(u32 cpu)
  657. {
  658. ASSERT(cpu != 0);
  659. // This function is called on the bsp, but creates an idle thread for another AP
  660. ASSERT(Processor::current().id() == 0);
  661. ASSERT(s_colonel_process);
  662. Thread* idle_thread = s_colonel_process->create_kernel_thread(idle_loop, THREAD_PRIORITY_MIN, String::format("idle thread #%u", cpu), 1 << cpu, false);
  663. ASSERT(idle_thread);
  664. return idle_thread;
  665. }
  666. void Scheduler::timer_tick(const RegisterState& regs)
  667. {
  668. ASSERT_INTERRUPTS_DISABLED();
  669. ASSERT(Processor::current().in_irq());
  670. auto current_thread = Processor::current().current_thread();
  671. if (!current_thread)
  672. return;
  673. bool is_bsp = Processor::current().id() == 0;
  674. if (!is_bsp)
  675. return; // TODO: This prevents scheduling on other CPUs!
  676. if (is_bsp) {
  677. // TODO: We should probably move this out of the scheduler
  678. ++g_uptime;
  679. g_timeofday = TimeManagement::now_as_timeval();
  680. }
  681. if (current_thread->process().is_profiling()) {
  682. SmapDisabler disabler;
  683. auto backtrace = current_thread->raw_backtrace(regs.ebp, regs.eip);
  684. auto& sample = Profiling::next_sample_slot();
  685. sample.pid = current_thread->process().pid();
  686. sample.tid = current_thread->tid();
  687. sample.timestamp = g_uptime;
  688. for (size_t i = 0; i < min(backtrace.size(), Profiling::max_stack_frame_count); ++i) {
  689. sample.frames[i] = backtrace[i];
  690. }
  691. }
  692. if (is_bsp)
  693. TimerQueue::the().fire();
  694. if (current_thread->tick())
  695. return;
  696. ASSERT_INTERRUPTS_DISABLED();
  697. ASSERT(Processor::current().in_irq());
  698. Processor::current().invoke_scheduler_async();
  699. }
  700. void Scheduler::invoke_async()
  701. {
  702. ASSERT_INTERRUPTS_DISABLED();
  703. auto& proc = Processor::current();
  704. ASSERT(!proc.in_irq());
  705. // Since this function is called when leaving critical sections (such
  706. // as a SpinLock), we need to check if we're not already doing this
  707. // to prevent recursion
  708. if (!proc.get_scheduler_data().m_in_scheduler)
  709. pick_next();
  710. }
  711. void Scheduler::notify_finalizer()
  712. {
  713. if (g_finalizer_has_work.exchange(true, AK::MemoryOrder::memory_order_acq_rel) == false)
  714. g_finalizer_wait_queue->wake_all();
  715. }
  716. void Scheduler::idle_loop()
  717. {
  718. dbg() << "Scheduler[" << Processor::current().id() << "]: idle loop running";
  719. ASSERT(are_interrupts_enabled());
  720. for (;;) {
  721. asm("hlt");
  722. if (Processor::current().id() == 0)
  723. yield();
  724. }
  725. }
  726. }