Kernel: Fix thread joining issues

The thread joining logic hadn't been updated to account for the subtle
differences introduced by software context switching. This fixes several
race conditions related to thread destruction and joining, as well as
finalization which did not properly account for detached state and the
fact that threads can be joined after termination as long as they're not
detached.

Fixes #3596
This commit is contained in:
Tom 2020-09-25 21:44:43 -06:00 committed by Andreas Kling
parent b245121f13
commit 1727b2d7cd
Notes: sideshowbarker 2024-07-19 02:13:21 +09:00
5 changed files with 196 additions and 82 deletions

View file

@ -246,8 +246,8 @@ void Process::kill_threads_except_self()
|| thread.state() == Thread::State::Dying)
return IterationDecision::Continue;
// At this point, we have no joiner anymore
thread.m_joiner = nullptr;
// We need to detach this thread in case it hasn't been joined
thread.detach();
thread.set_should_die();
return IterationDecision::Continue;
});
@ -258,6 +258,8 @@ void Process::kill_threads_except_self()
void Process::kill_all_threads()
{
for_each_thread([&](Thread& thread) {
// We need to detach this thread in case it hasn't been joined
thread.detach();
thread.set_should_die();
return IterationDecision::Continue;
});
@ -355,6 +357,7 @@ Process::Process(Thread*& first_thread, const String& name, uid_t uid, gid_t gid
} else {
// NOTE: This non-forked code path is only taken when the kernel creates a process "manually" (at boot.)
first_thread = new Thread(*this);
first_thread->detach();
}
}
@ -769,7 +772,8 @@ Thread* Process::create_kernel_thread(void (*entry)(), u32 priority, const Strin
thread->set_name(name);
thread->set_affinity(affinity);
thread->set_priority(priority);
thread->set_joinable(joinable);
if (!joinable)
thread->detach();
auto& tss = thread->tss();
tss.eip = (FlatPtr)entry;

View file

@ -84,19 +84,62 @@ Atomic<bool> g_finalizer_has_work { false };
static Process* s_colonel_process;
u64 g_uptime;
Thread::JoinBlocker::JoinBlocker(Thread& joinee, void*& joinee_exit_value)
: m_joinee(joinee)
Thread::JoinBlocker::JoinBlocker(Thread& joinee, KResult& try_join_result, void*& joinee_exit_value)
: m_joinee(&joinee)
, m_joinee_exit_value(joinee_exit_value)
{
ASSERT(m_joinee.m_joiner == nullptr);
auto current_thread = Thread::current();
m_joinee.m_joiner = current_thread;
current_thread->m_joinee = &joinee;
auto* current_thread = Thread::current();
// We need to hold our lock to avoid a race where try_join succeeds
// but the joinee is joining immediately
ScopedSpinLock lock(m_lock);
try_join_result = joinee.try_join(*current_thread);
m_join_error = try_join_result.is_error();
}
bool Thread::JoinBlocker::should_unblock(Thread& joiner)
void Thread::JoinBlocker::was_unblocked()
{
return !joiner.m_joinee;
ScopedSpinLock lock(m_lock);
if (!m_join_error && m_joinee) {
// If the joinee hasn't exited yet, remove ourselves now
ASSERT(m_joinee != Thread::current());
m_joinee->join_done();
m_joinee = nullptr;
}
}
bool Thread::JoinBlocker::should_unblock(Thread&)
{
// We need to acquire our lock as the joinee could call joinee_exited
// at any moment
ScopedSpinLock lock(m_lock);
if (m_join_error) {
// Thread::block calls should_unblock before actually blocking.
// If detected that we can't really block due to an error, we'll
// return true here, which will cause Thread::block to return
// with BlockResult::NotBlocked. Technically, because m_join_error
// will only be set in the constructor, we don't need any lock
// to check for it, but at the same time there should not be
// any contention, either...
return true;
}
return m_joinee == nullptr;
}
void Thread::JoinBlocker::joinee_exited(void* value)
{
ScopedSpinLock lock(m_lock);
if (!m_joinee) {
// m_joinee can be nullptr if the joiner timed out and the
// joinee waits on m_lock while the joiner holds it but has
// not yet called join_done.
return;
}
m_joinee_exit_value = value;
m_joinee = nullptr;
set_interrupted_by_death();
}
Thread::FileDescriptionBlocker::FileDescriptionBlocker(const FileDescription& description)

View file

@ -71,7 +71,8 @@ int Process::sys$create_thread(void* (*entry)(void*), Userspace<const Syscall::S
thread->set_name(builder.to_string());
thread->set_priority(requested_thread_priority);
thread->set_joinable(is_thread_joinable);
if (!is_thread_joinable)
thread->detach();
auto& tss = thread->tss();
tss.eip = (FlatPtr)entry;
@ -109,7 +110,7 @@ int Process::sys$detach_thread(pid_t tid)
if (!thread->is_joinable())
return -EINVAL;
thread->set_joinable(false);
thread->detach();
return 0;
}
@ -126,31 +127,20 @@ int Process::sys$join_thread(pid_t tid, Userspace<void**> exit_value)
if (thread == current_thread)
return -EDEADLK;
if (thread->m_joinee == current_thread)
return -EDEADLK;
ASSERT(thread->m_joiner != current_thread);
if (thread->m_joiner)
return -EINVAL;
if (!thread->is_joinable())
return -EINVAL;
void* joinee_exit_value = nullptr;
// NOTE: pthread_join() cannot be interrupted by signals. Only by death.
for (;;) {
auto result = current_thread->block<Thread::JoinBlocker>(nullptr, *thread, joinee_exit_value);
KResult try_join_result(KSuccess);
auto result = current_thread->block<Thread::JoinBlocker>(nullptr, *thread, try_join_result, joinee_exit_value);
if (result == Thread::BlockResult::NotBlocked) {
ASSERT_INTERRUPTS_DISABLED();
if (try_join_result.is_error())
return try_join_result.error();
break;
}
if (result == Thread::BlockResult::InterruptedByDeath) {
// NOTE: This cleans things up so that Thread::finalize() won't
// get confused about a missing joiner when finalizing the joinee.
InterruptDisabler disabler_t;
if (current_thread->m_joinee) {
current_thread->m_joinee->m_joiner = nullptr;
current_thread->m_joinee = nullptr;
}
ASSERT_INTERRUPTS_DISABLED();
break;
}
}

View file

@ -110,6 +110,8 @@ Thread::~Thread()
auto thread_cnt_before = m_process->m_thread_count.fetch_sub(1, AK::MemoryOrder::memory_order_acq_rel);
ASSERT(thread_cnt_before != 0);
ASSERT(!m_joiner);
}
void Thread::unblock()
@ -192,6 +194,9 @@ void Thread::die_if_needed()
void Thread::yield_without_holding_big_lock()
{
bool did_unlock = unlock_process_if_locked();
// NOTE: Even though we call Scheduler::yield here, unless we happen
// to be outside of a critical section, the yield will be postponed
// until leaving it in relock_process.
Scheduler::yield();
relock_process(did_unlock);
}
@ -203,8 +208,20 @@ bool Thread::unlock_process_if_locked()
void Thread::relock_process(bool did_unlock)
{
if (did_unlock)
// Clearing the critical section may trigger the context switch
// flagged by calling Scheduler::donate_to or Scheduler::yield
// above. We have to do it this way because we intentionally
// leave the critical section here to be able to switch contexts.
u32 prev_flags;
u32 prev_crit = Processor::current().clear_critical(prev_flags, true);
if (did_unlock) {
// We've unblocked, relock the process if needed and carry on.
process().big_lock().lock();
}
// NOTE: We may be on a differenct CPU now!
Processor::current().restore_critical(prev_crit, prev_flags);
}
u64 Thread::sleep(u64 ticks)
@ -263,14 +280,9 @@ void Thread::finalize()
#endif
set_state(Thread::State::Dead);
if (m_joiner) {
ScopedSpinLock lock(m_joiner->m_lock);
ASSERT(m_joiner->m_joinee == this);
static_cast<JoinBlocker*>(m_joiner->m_blocker)->set_joinee_exit_value(m_exit_value);
static_cast<JoinBlocker*>(m_joiner->m_blocker)->set_interrupted_by_death();
m_joiner->m_joinee = nullptr;
// NOTE: We clear the joiner pointer here as well, to be tidy.
m_joiner = nullptr;
if (auto* joiner = m_joiner.exchange(nullptr, AK::memory_order_acq_rel)) {
// Notify joiner that we exited
static_cast<JoinBlocker*>(joiner->m_blocker)->joinee_exited(m_exit_value);
}
if (m_dump_backtrace_on_finalization)
@ -992,19 +1004,9 @@ Thread::BlockResult Thread::wait_on(WaitQueue& queue, const char* reason, timeva
Scheduler::yield();
}
// Clearing the critical section may trigger the context switch
// flagged by calling Scheduler::donate_to or Scheduler::yield
// above. We have to do it this way because we intentionally
// leave the critical section here to be able to switch contexts.
u32 prev_flags;
u32 prev_crit = Processor::current().clear_critical(prev_flags, true);
// We've unblocked, relock the process if needed and carry on.
relock_process(did_unlock);
// NOTE: We may be on a differenct CPU now!
Processor::current().restore_critical(prev_crit, prev_flags);
// This looks counter productive, but we may not actually leave
// the critical section we just restored. It depends on whether
// we were in one while being called.

View file

@ -96,8 +96,46 @@ public:
u32 effective_priority() const;
void set_joinable(bool j) { m_is_joinable = j; }
bool is_joinable() const { return m_is_joinable; }
KResult try_join(Thread& joiner)
{
if (&joiner == this)
return KResult(-EDEADLK);
ScopedSpinLock lock(m_lock);
if (!m_is_joinable || state() == Dead)
return KResult(-EINVAL);
Thread* expected = nullptr;
if (!m_joiner.compare_exchange_strong(expected, &joiner, AK::memory_order_acq_rel))
return KResult(-EINVAL);
// From this point on the thread is no longer joinable by anyone
// else. It also means that if the join is timed, it becomes
// detached when a timeout happens.
m_is_joinable = false;
return KSuccess;
}
void join_done()
{
// To avoid possible deadlocking, this function must not acquire
// m_lock. This deadlock could occur if the joiner times out
// almost at the same time as this thread, and calls into this
// function to clear the joiner.
m_joiner.store(nullptr, AK::memory_order_release);
}
void detach()
{
ScopedSpinLock lock(m_lock);
m_is_joinable = false;
}
bool is_joinable() const
{
ScopedSpinLock lock(m_lock);
return m_is_joinable;
}
Process& process() { return m_process; }
const Process& process() const { return m_process; }
@ -129,10 +167,30 @@ public:
virtual const char* state_string() const = 0;
virtual bool is_reason_signal() const { return false; }
virtual timespec* override_timeout(timespec* timeout) { return timeout; }
void set_interrupted_by_death() { m_was_interrupted_by_death = true; }
bool was_interrupted_by_death() const { return m_was_interrupted_by_death; }
void set_interrupted_by_signal() { m_was_interrupted_while_blocked = true; }
bool was_interrupted_by_signal() const { return m_was_interrupted_while_blocked; }
virtual void was_unblocked() { }
void set_interrupted_by_death()
{
ScopedSpinLock lock(m_lock);
m_was_interrupted_by_death = true;
}
bool was_interrupted_by_death() const
{
ScopedSpinLock lock(m_lock);
return m_was_interrupted_by_death;
}
void set_interrupted_by_signal()
{
ScopedSpinLock lock(m_lock);
m_was_interrupted_while_blocked = true;
}
bool was_interrupted_by_signal() const
{
ScopedSpinLock lock(m_lock);
return m_was_interrupted_while_blocked;
}
protected:
mutable RecursiveSpinLock m_lock;
private:
bool m_was_interrupted_while_blocked { false };
@ -142,14 +200,16 @@ public:
class JoinBlocker final : public Blocker {
public:
explicit JoinBlocker(Thread& joinee, void*& joinee_exit_value);
explicit JoinBlocker(Thread& joinee, KResult& try_join_result, void*& joinee_exit_value);
virtual bool should_unblock(Thread&) override;
virtual const char* state_string() const override { return "Joining"; }
void set_joinee_exit_value(void* value) { m_joinee_exit_value = value; }
virtual void was_unblocked() override;
void joinee_exited(void* value);
private:
Thread& m_joinee;
Thread* m_joinee;
void*& m_joinee_exit_value;
bool m_join_error { false };
};
class FileDescriptionBlocker : public Blocker {
@ -344,26 +404,29 @@ public:
{
T t(forward<Args>(args)...);
{
ScopedSpinLock lock(m_lock);
// We should never be blocking a blocked (or otherwise non-active) thread.
ASSERT(state() == Thread::Running);
ASSERT(m_blocker == nullptr);
ScopedSpinLock lock(m_lock);
// We should never be blocking a blocked (or otherwise non-active) thread.
ASSERT(state() == Thread::Running);
ASSERT(m_blocker == nullptr);
if (t.should_unblock(*this)) {
// Don't block if the wake condition is already met
return BlockResult::NotBlocked;
}
m_blocker = &t;
m_blocker_timeout = t.override_timeout(timeout);
set_state(Thread::Blocked);
if (t.should_unblock(*this)) {
// Don't block if the wake condition is already met
return BlockResult::NotBlocked;
}
m_blocker = &t;
m_blocker_timeout = t.override_timeout(timeout);
set_state(Thread::Blocked);
// Release our lock
lock.unlock();
// Yield to the scheduler, and wait for us to resume unblocked.
yield_without_holding_big_lock();
ScopedSpinLock lock(m_lock);
// Acquire our lock again
lock.lock();
// We should no longer be blocked once we woke up
ASSERT(state() != Thread::Blocked);
@ -371,6 +434,10 @@ public:
m_blocker = nullptr;
m_blocker_timeout = nullptr;
// Notify the blocker that we are no longer blocking. It may need
// to clean up now while we're still holding m_lock
t.was_unblocked();
if (t.was_interrupted_by_signal())
return BlockResult::InterruptedBySignal;
@ -492,14 +559,23 @@ public:
void set_active(bool active)
{
ASSERT(g_scheduler_lock.own_lock());
m_is_active = active;
m_is_active.store(active, AK::memory_order_release);
}
bool is_finalizable() const
{
ASSERT(g_scheduler_lock.own_lock());
return !m_is_active;
// We can't finalize as long as this thread is still running
// Note that checking for Running state here isn't sufficient
// as the thread may not be in Running state but switching out.
// m_is_active is set to false once the context switch is
// complete and the thread is not executing on any processor.
if (m_is_active.load(AK::memory_order_consume))
return false;
// We can't finalize until the thread is either detached or
// a join has started. We can't make m_is_joinable atomic
// because that would introduce a race in try_join.
ScopedSpinLock lock(m_lock);
return !m_is_joinable;
}
Thread* clone(Process&);
@ -559,10 +635,9 @@ private:
timespec* m_blocker_timeout { nullptr };
const char* m_wait_reason { nullptr };
bool m_is_active { false };
Atomic<bool> m_is_active { false };
bool m_is_joinable { true };
Thread* m_joiner { nullptr };
Thread* m_joinee { nullptr };
Atomic<Thread*> m_joiner { nullptr };
void* m_exit_value { nullptr };
unsigned m_syscall_count { 0 };