Kernel: Fix deadlock cancelling timer
It's possible that a timer may have been queued to be executed by the timer irq handler, but if we're in a critical section on the same processor and are trying to cancel that timer, we would spin forever waiting for it to be executed.
This commit is contained in:
parent
6938be00f1
commit
d9fb93c5ce
Notes:
sideshowbarker
2024-07-18 09:12:19 +09:00
Author: https://github.com/tomuta Commit: https://github.com/SerenityOS/serenity/commit/d9fb93c5ce5 Pull-request: https://github.com/SerenityOS/serenity/pull/8657
2 changed files with 78 additions and 58 deletions
|
@ -89,7 +89,8 @@ void TimerQueue::add_timer_locked(NonnullRefPtr<Timer> timer)
|
|||
{
|
||||
Time timer_expiration = timer->m_expires;
|
||||
|
||||
VERIFY(!timer->is_queued());
|
||||
timer->clear_cancelled();
|
||||
timer->clear_callback_finished();
|
||||
|
||||
auto& queue = queue_for_timer(*timer);
|
||||
if (queue.list.is_empty()) {
|
||||
|
@ -148,67 +149,76 @@ bool TimerQueue::cancel_timer(TimerId id)
|
|||
};
|
||||
}
|
||||
|
||||
if (!found_timer) {
|
||||
// The timer may be executing right now, if it is then it should
|
||||
// be in m_timers_executing. If it is then release the lock
|
||||
// briefly to allow it to finish by removing itself
|
||||
// NOTE: This can only happen with multiple processors!
|
||||
while (true) {
|
||||
for (auto& timer : m_timers_executing) {
|
||||
if (timer.m_id == id) {
|
||||
found_timer = &timer;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found_timer) {
|
||||
VERIFY(timer_queue);
|
||||
remove_timer_locked(*timer_queue, *found_timer);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (found_timer) {
|
||||
// NOTE: This isn't the most efficient way to wait, but
|
||||
// it should only happen when multiple processors are used.
|
||||
// Also, the timers should execute pretty quickly, so it
|
||||
// should not loop here for very long. But we can't yield.
|
||||
lock.unlock();
|
||||
Processor::wait_check();
|
||||
lock.lock();
|
||||
found_timer = nullptr;
|
||||
} else {
|
||||
// We were not able to cancel the timer, but at this point
|
||||
// the handler should have completed if it was running!
|
||||
return false;
|
||||
}
|
||||
// The timer may be executing right now, if it is then it should
|
||||
// be in m_timers_executing. This is the case when the deferred
|
||||
// call has been queued but not yet executed.
|
||||
for (auto& timer : m_timers_executing) {
|
||||
if (timer.m_id == id) {
|
||||
found_timer = &timer;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
VERIFY(found_timer);
|
||||
VERIFY(timer_queue);
|
||||
remove_timer_locked(*timer_queue, *found_timer);
|
||||
if (!found_timer)
|
||||
return false;
|
||||
|
||||
// Keep a reference while we unlock
|
||||
NonnullRefPtr<Timer> executing_timer(*found_timer);
|
||||
lock.unlock();
|
||||
|
||||
if (!found_timer->set_cancelled()) {
|
||||
// We cancelled it even though the deferred call has been queued already.
|
||||
// We do not unref the timer here because the deferred call is still going
|
||||
// too need it!
|
||||
lock.lock();
|
||||
VERIFY(m_timers_executing.contains(*found_timer));
|
||||
m_timers_executing.remove(*found_timer);
|
||||
return true;
|
||||
}
|
||||
|
||||
// At this point the deferred call is queued and is being executed
|
||||
// on another processor. We need to wait until it's complete!
|
||||
while (!found_timer->is_callback_finished())
|
||||
Processor::wait_check();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool TimerQueue::cancel_timer(Timer& timer)
|
||||
{
|
||||
bool did_already_run = timer.set_cancelled();
|
||||
auto& timer_queue = queue_for_timer(timer);
|
||||
ScopedSpinLock lock(g_timerqueue_lock);
|
||||
if (!timer_queue.list.contains(timer)) {
|
||||
// The timer may be executing right now, if it is then it should
|
||||
// be in m_timers_executing. If it is then release the lock
|
||||
// briefly to allow it to finish by removing itself
|
||||
// NOTE: This can only happen with multiple processors!
|
||||
while (m_timers_executing.contains(timer)) {
|
||||
// NOTE: This isn't the most efficient way to wait, but
|
||||
// it should only happen when multiple processors are used.
|
||||
// Also, the timers should execute pretty quickly, so it
|
||||
// should not loop here for very long. But we can't yield.
|
||||
lock.unlock();
|
||||
Processor::wait_check();
|
||||
lock.lock();
|
||||
|
||||
if (!did_already_run) {
|
||||
ScopedSpinLock lock(g_timerqueue_lock);
|
||||
if (timer_queue.list.contains(timer)) {
|
||||
// The timer has not fired, remove it
|
||||
VERIFY(timer.ref_count() > 1);
|
||||
remove_timer_locked(timer_queue, timer);
|
||||
return true;
|
||||
}
|
||||
// We were not able to cancel the timer, but at this point
|
||||
// the handler should have completed if it was running!
|
||||
return false;
|
||||
|
||||
// The timer was queued to execute but hasn't had a chance
|
||||
// to run. In this case, it should still be in m_timers_executing
|
||||
// and we don't need to spin. It still holds a reference
|
||||
// that will be dropped when it does get a chance to run,
|
||||
// but since we called set_cancelled it will only drop its reference
|
||||
VERIFY(m_timers_executing.contains(timer));
|
||||
m_timers_executing.remove(timer);
|
||||
return true;
|
||||
}
|
||||
|
||||
VERIFY(timer.ref_count() > 1);
|
||||
remove_timer_locked(timer_queue, timer);
|
||||
// At this point the deferred call is queued and is being executed
|
||||
// on another processor. We need to wait until it's complete!
|
||||
while (!timer.is_callback_finished())
|
||||
Processor::wait_check();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -216,7 +226,6 @@ void TimerQueue::remove_timer_locked(Queue& queue, Timer& timer)
|
|||
{
|
||||
bool was_next_timer = (queue.list.first() == &timer);
|
||||
queue.list.remove(timer);
|
||||
timer.set_queued(false);
|
||||
auto now = timer.now(false);
|
||||
if (timer.m_expires > now)
|
||||
timer.m_remaining = timer.m_expires - now;
|
||||
|
@ -240,7 +249,6 @@ void TimerQueue::fire()
|
|||
|
||||
while (timer && timer->now(true) > timer->m_expires) {
|
||||
queue.list.remove(*timer);
|
||||
timer->set_queued(false);
|
||||
|
||||
m_timers_executing.append(*timer);
|
||||
|
||||
|
@ -249,10 +257,16 @@ void TimerQueue::fire()
|
|||
lock.unlock();
|
||||
|
||||
// Defer executing the timer outside of the irq handler
|
||||
Processor::current().deferred_call_queue([this, timer]() {
|
||||
timer->m_callback();
|
||||
ScopedSpinLock lock(g_timerqueue_lock);
|
||||
m_timers_executing.remove(*timer);
|
||||
Processor::deferred_call_queue([this, timer]() {
|
||||
// Check if we were cancelled in between being triggered
|
||||
// by the timer irq handler and now. If so, just drop
|
||||
// our reference and don't execute the callback.
|
||||
if (!timer->set_cancelled()) {
|
||||
timer->m_callback();
|
||||
ScopedSpinLock lock(g_timerqueue_lock);
|
||||
m_timers_executing.remove(*timer);
|
||||
}
|
||||
timer->set_callback_finished();
|
||||
// Drop the reference we added when queueing the timer
|
||||
timer->unref();
|
||||
});
|
||||
|
|
|
@ -43,7 +43,8 @@ private:
|
|||
Time m_expires;
|
||||
Time m_remaining {};
|
||||
Function<void()> m_callback;
|
||||
Atomic<bool, AK::MemoryOrder::memory_order_relaxed> m_queued { false };
|
||||
Atomic<bool> m_cancelled { false };
|
||||
Atomic<bool> m_callback_finished { false };
|
||||
|
||||
bool operator<(const Timer& rhs) const
|
||||
{
|
||||
|
@ -57,10 +58,15 @@ private:
|
|||
{
|
||||
return m_id == rhs.m_id;
|
||||
}
|
||||
bool is_queued() const { return m_queued; }
|
||||
void set_queued(bool queued) { m_queued = queued; }
|
||||
void clear_cancelled() { return m_cancelled.store(false, AK::memory_order_release); }
|
||||
bool set_cancelled() { return m_cancelled.exchange(true, AK::memory_order_acq_rel); }
|
||||
bool is_callback_finished() const { return m_callback_finished.load(AK::memory_order_acquire); }
|
||||
void clear_callback_finished() { m_callback_finished.store(false, AK::memory_order_release); }
|
||||
void set_callback_finished() { m_callback_finished.store(true, AK::memory_order_release); }
|
||||
Time now(bool) const;
|
||||
|
||||
bool is_queued() const { return m_list_node.is_in_list(); }
|
||||
|
||||
public:
|
||||
IntrusiveListNode<Timer> m_list_node;
|
||||
using List = IntrusiveList<Timer, RawPtr<Timer>, &Timer::m_list_node>;
|
||||
|
|
Loading…
Add table
Reference in a new issue