mirror of
https://sourceware.org/git/glibc.git
synced 2025-08-08 17:42:12 +03:00
nptl: Do not use pthread set_tid_address as state synchronization (BZ #19951)
The use after free described in BZ#19951 is due the use of two different PD fields, 'joinid' and 'cancelhandling', to describe the thread state and to synchronize the calls of pthread_join, pthread_detach, pthread_exit, and normal thread exit. Any state change potentially requires to check for both field atomically to handle partial state (such as pthread_join() with a cancellation handler to issue a 'joinstate' field rollback). This patch uses a different PD member with 4 possible states (JOINABLE, DETACHED, EXITING, and EXITED) instead of pthread 'tid' field, with the following logic: 1. On pthread_create the inital state is set either to JOINABLE or DETACHED depending of the pthread attribute used. 2. On pthread_detach, a CAS is issued on the state. If the CAS fails it means that thread is already detached (DETACHED) or is being terminated (EXITING). For former an EINVAL is returned, while for latter pthread_detach should be reponsible to join the thread (and deallocate any internal resource). 3. In the exit phase of the wrapper function for the thread start routine (reached either if the thread function has returned, pthread_exit has being called, or cancellation handled has been acted upon) we issue a CAS on state to set to EXITING mode. If the thread is previously on DETACHED mode the thread itself is responsible for arranging the deallocation of any resource, otherwise the thread needs to be joined (detached threads cannot immediately deallocate themselves). 4. The clear_tid_field on 'clone' call is changed to set the new 'state' field on thread exit (EXITED). This state is only reached at thread termination. 5. The pthread_join implementation is now simpler: the futex wait is done directly on thread state and there is no need to reset it in case of timeout since the state is now set either by pthread_detach() or by the kernel on process termination. The race condition on pthread_detach is avoided with only one atomic operation on PD state: once the mode is set to THREAD_STATE_DETACHED it is up to thread itself to deallocate its memory (done on the exit phase at pthread_create()). Also, the INVALID_NOT_TERMINATED_TD_P is removed since a a negative tid is not possible and the macro is not used anywhere. This change trigger an invalid C11 thread tests: it crates a thread, which detaches itself, and after a timeout the creating thread checks if the join fails. The issue is once thrd_join() is called the thread lifetime is not defined. Checked on x86_64-linux-gnu, i686-linux-gnu, aarch64-linux-gnu, arm-linux-gnueabihf, and powerpc64-linux-gnu.
This commit is contained in:
@@ -22,116 +22,78 @@
|
||||
#include <time.h>
|
||||
#include <futex-internal.h>
|
||||
|
||||
static void
|
||||
cleanup (void *arg)
|
||||
/* Check for a possible deadlock situation where the threads are waiting for
|
||||
each other to finish. Note that this is a "may" error. To be 100% sure we
|
||||
catch this error we would have to lock the data structures but it is not
|
||||
necessary. In the unlikely case that two threads are really caught in this
|
||||
situation they will deadlock. It is the programmer's problem to figure
|
||||
this out. */
|
||||
static inline bool
|
||||
check_for_deadlock (struct pthread *pd)
|
||||
{
|
||||
/* If we already changed the waiter ID, reset it. The call cannot
|
||||
fail for any reason but the thread not having done that yet so
|
||||
there is no reason for a loop. */
|
||||
struct pthread *self = THREAD_SELF;
|
||||
atomic_compare_exchange_weak_acquire (&arg, &self, NULL);
|
||||
return ((pd == self
|
||||
|| (atomic_load_acquire (&self->joinstate) == THREAD_STATE_DETACHED
|
||||
&& (pd->cancelhandling
|
||||
& (CANCELING_BITMASK | CANCELED_BITMASK | EXITING_BITMASK
|
||||
| TERMINATED_BITMASK)) == 0))
|
||||
&& !cancel_enabled_and_canceled (self->cancelhandling));
|
||||
}
|
||||
|
||||
int
|
||||
__pthread_clockjoin_ex (pthread_t threadid, void **thread_return,
|
||||
clockid_t clockid,
|
||||
const struct __timespec64 *abstime, bool block)
|
||||
const struct __timespec64 *abstime)
|
||||
{
|
||||
struct pthread *pd = (struct pthread *) threadid;
|
||||
|
||||
/* Make sure the descriptor is valid. */
|
||||
if (INVALID_NOT_TERMINATED_TD_P (pd))
|
||||
/* Not a valid thread handle. */
|
||||
return ESRCH;
|
||||
|
||||
/* Is the thread joinable?. */
|
||||
if (IS_DETACHED (pd))
|
||||
/* We cannot wait for the thread. */
|
||||
return EINVAL;
|
||||
|
||||
/* Make sure the clock and time specified are valid. */
|
||||
if (abstime
|
||||
&& __glibc_unlikely (!futex_abstimed_supported_clockid (clockid)
|
||||
|| ! valid_nanoseconds (abstime->tv_nsec)))
|
||||
return EINVAL;
|
||||
|
||||
struct pthread *self = THREAD_SELF;
|
||||
int result = 0;
|
||||
|
||||
LIBC_PROBE (pthread_join, 1, threadid);
|
||||
|
||||
if ((pd == self
|
||||
|| (self->joinid == pd
|
||||
&& (pd->cancelhandling
|
||||
& (CANCELING_BITMASK | CANCELED_BITMASK | EXITING_BITMASK
|
||||
| TERMINATED_BITMASK)) == 0))
|
||||
&& !cancel_enabled_and_canceled (self->cancelhandling))
|
||||
/* This is a deadlock situation. The threads are waiting for each
|
||||
other to finish. Note that this is a "may" error. To be 100%
|
||||
sure we catch this error we would have to lock the data
|
||||
structures but it is not necessary. In the unlikely case that
|
||||
two threads are really caught in this situation they will
|
||||
deadlock. It is the programmer's problem to figure this
|
||||
out. */
|
||||
return EDEADLK;
|
||||
|
||||
/* Wait for the thread to finish. If it is already locked something
|
||||
is wrong. There can only be one waiter. */
|
||||
else if (__glibc_unlikely (atomic_compare_exchange_weak_acquire (&pd->joinid,
|
||||
&self,
|
||||
NULL)))
|
||||
/* There is already somebody waiting for the thread. */
|
||||
return EINVAL;
|
||||
|
||||
/* BLOCK waits either indefinitely or based on an absolute time. POSIX also
|
||||
states a cancellation point shall occur for pthread_join, and we use the
|
||||
same rationale for posix_timedjoin_np. Both clockwait_tid and the futex
|
||||
call use the cancellable variant. */
|
||||
if (block)
|
||||
int result = 0;
|
||||
unsigned int state;
|
||||
while ((state = atomic_load_acquire (&pd->joinstate))
|
||||
!= THREAD_STATE_EXITED)
|
||||
{
|
||||
/* During the wait we change to asynchronous cancellation. If we
|
||||
are cancelled the thread we are waiting for must be marked as
|
||||
un-wait-ed for again. */
|
||||
pthread_cleanup_push (cleanup, &pd->joinid);
|
||||
if (check_for_deadlock (pd))
|
||||
return EDEADLK;
|
||||
|
||||
/* We need acquire MO here so that we synchronize with the
|
||||
kernel's store to 0 when the clone terminates. (see above) */
|
||||
pid_t tid;
|
||||
while ((tid = atomic_load_acquire (&pd->tid)) != 0)
|
||||
{
|
||||
/* The kernel notifies a process which uses CLONE_CHILD_CLEARTID via
|
||||
futex wake-up when the clone terminates. The memory location
|
||||
contains the thread ID while the clone is running and is reset to
|
||||
zero by the kernel afterwards. The kernel up to version 3.16.3
|
||||
does not use the private futex operations for futex wake-up when
|
||||
the clone terminates. */
|
||||
int ret = __futex_abstimed_wait_cancelable64 (
|
||||
(unsigned int *) &pd->tid, tid, clockid, abstime, LLL_SHARED);
|
||||
if (ret == ETIMEDOUT || ret == EOVERFLOW)
|
||||
{
|
||||
result = ret;
|
||||
break;
|
||||
}
|
||||
/* POSIX states calling pthread_join on a non joinable thread is
|
||||
undefined. However, if PD is still in the cache we can warn
|
||||
the caller. */
|
||||
if (state == THREAD_STATE_DETACHED)
|
||||
return EINVAL;
|
||||
|
||||
/* pthread_join is a cancellation entrypoint and we use the same
|
||||
rationale for pthread_timedjoin_np.
|
||||
|
||||
The kernel notifies a process which uses CLONE_CHILD_CLEARTID via
|
||||
a memory zeroing and futex wake-up when the process terminates.
|
||||
The futex operation is not private. */
|
||||
int ret = __futex_abstimed_wait_cancelable64 (&pd->joinstate, state,
|
||||
clockid, abstime,
|
||||
LLL_SHARED);
|
||||
if (ret == ETIMEDOUT || ret == EOVERFLOW)
|
||||
{
|
||||
result = ret;
|
||||
break;
|
||||
}
|
||||
|
||||
pthread_cleanup_pop (0);
|
||||
}
|
||||
|
||||
void *pd_result = pd->result;
|
||||
if (__glibc_likely (result == 0))
|
||||
{
|
||||
/* We mark the thread as terminated and as joined. */
|
||||
pd->tid = -1;
|
||||
|
||||
/* Store the return value if the caller is interested. */
|
||||
if (thread_return != NULL)
|
||||
*thread_return = pd_result;
|
||||
|
||||
/* Free the TCB. */
|
||||
__nptl_free_tcb (pd);
|
||||
}
|
||||
else
|
||||
pd->joinid = NULL;
|
||||
|
||||
LIBC_PROBE (pthread_join_ret, 3, threadid, result, pd_result);
|
||||
|
||||
|
Reference in New Issue
Block a user