1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-08-05 19:35:52 +03:00
Files
glibc/sysdeps/nptl/dl-tls_init_tp.c
Adhemerval Zanella e4585134ca nptl: Do not use pthread set_tid_address as state synchronization (BZ #19951)
The use after free described in BZ#19951 is due the use of two different
PD fields, 'joinid' and 'cancelhandling', to describe the thread state
and to synchronize the calls of pthread_join, pthread_detach,
pthread_exit, and normal thread exit.

Any state change potentially requires to check for both field
atomically to handle partial state (such as pthread_join() with a
cancellation handler to issue a 'joinstate' field rollback).

This patch uses a different PD member with 4 possible states (JOINABLE,
DETACHED, EXITING, and EXITED) instead of pthread 'tid' field, with
the following logic:

  1. On pthread_create the inital state is set either to JOINABLE or
     DETACHED depending of the pthread attribute used.

  2. On pthread_detach, a CAS is issued on the state.  If the CAS
     fails it means that thread is already detached (DETACHED) or is
     being terminated (EXITING).  For former an EINVAL is returned,
     while for latter pthread_detach should be reponsible to join the
     thread (and deallocate any internal resource).

  3. In the exit phase of the wrapper function for the thread start
     routine (reached either if the thread function has returned,
     pthread_exit has being called, or cancellation handled has been
     acted upon) we issue a CAS on state to set to EXITING mode.  If the
     thread is previously on DETACHED mode the thread itself is
     responsible for arranging the deallocation of any resource,
     otherwise the thread needs to be joined (detached threads cannot
     immediately deallocate themselves).

  4. The clear_tid_field on 'clone' call is changed to set the new
     'state' field on thread exit (EXITED).  This state is only
     reached at thread termination.

  5. The pthread_join implementation is now simpler: the futex wait
     is done directly on thread state and there is no need to reset it
     in case of timeout since the state is now set either by
     pthread_detach() or by the kernel on process termination.

The race condition on pthread_detach is avoided with only one atomic
operation on PD state: once the mode is set to THREAD_STATE_DETACHED
it is up to thread itself to deallocate its memory (done on the exit
phase at pthread_create()).

Also, the INVALID_NOT_TERMINATED_TD_P is removed since a a negative
tid is not possible and the macro is not used anywhere.

This change trigger an invalid C11 thread tests: it crates a thread,
which detaches itself, and after a timeout the creating thread checks
if the join fails.  The issue is once thrd_join() is called the thread
lifetime is not defined.

Checked on x86_64-linux-gnu, i686-linux-gnu, aarch64-linux-gnu,
arm-linux-gnueabihf, and powerpc64-linux-gnu.
2025-07-09 19:57:21 -03:00

121 lines
3.9 KiB
C

/* Completion of TCB initialization after TLS_INIT_TP. NPTL version.
Copyright (C) 2020-2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <kernel-features.h>
#include <ldsodefs.h>
#include <list.h>
#include <pthreadP.h>
#include <tls.h>
#include <rseq-internal.h>
#include <thread_pointer.h>
#include <dl-symbol-redir-ifunc.h>
#define TUNABLE_NAMESPACE pthread
#include <dl-tunables.h>
#ifndef __ASSUME_SET_ROBUST_LIST
bool __nptl_set_robust_list_avail;
rtld_hidden_data_def (__nptl_set_robust_list_avail)
#endif
bool __nptl_initial_report_events;
rtld_hidden_def (__nptl_initial_report_events)
#ifdef SHARED
/* Dummy implementation. See __rtld_mutex_init. */
static int
rtld_mutex_dummy (pthread_mutex_t *lock)
{
return 0;
}
#endif
const unsigned int __rseq_flags;
size_t _rseq_align attribute_hidden;
void
__tls_pre_init_tp (void)
{
/* The list data structures are not consistent until
initialized. */
INIT_LIST_HEAD (&GL (dl_stack_used));
INIT_LIST_HEAD (&GL (dl_stack_user));
INIT_LIST_HEAD (&GL (dl_stack_cache));
#ifdef SHARED
___rtld_mutex_lock = rtld_mutex_dummy;
___rtld_mutex_unlock = rtld_mutex_dummy;
#endif
}
void
__tls_init_tp (void)
{
struct pthread *pd = THREAD_SELF;
/* Set up thread stack list management. */
list_add (&pd->list, &GL (dl_stack_user));
/* Early initialization of the TCB. */
pd->tid = INTERNAL_SYSCALL_CALL (set_tid_address, &pd->joinstate);
THREAD_SETMEM (pd, specific[0], &pd->specific_1stblock[0]);
THREAD_SETMEM (pd, stack_mode, ALLOCATE_GUARD_USER);
THREAD_SETMEM (pd, joinstate, THREAD_STATE_JOINABLE);
/* Before initializing GL (dl_stack_user), the debugger could not
find us and had to set __nptl_initial_report_events. Propagate
its setting. */
THREAD_SETMEM (pd, report_events, __nptl_initial_report_events);
/* Initialize the robust mutex data. */
{
#if __PTHREAD_MUTEX_HAVE_PREV
pd->robust_prev = &pd->robust_head;
#endif
pd->robust_head.list = &pd->robust_head;
pd->robust_head.futex_offset = (offsetof (pthread_mutex_t, __data.__lock)
- offsetof (pthread_mutex_t,
__data.__list.__next));
int res = INTERNAL_SYSCALL_CALL (set_robust_list, &pd->robust_head,
sizeof (struct robust_list_head));
if (!INTERNAL_SYSCALL_ERROR_P (res))
{
#ifndef __ASSUME_SET_ROBUST_LIST
__nptl_set_robust_list_avail = true;
#endif
}
}
{
/* If the registration fails or is disabled by tunable, the public
'__rseq_size' will be set to '0' regardless of the feature size of the
allocated rseq area. An rseq area of at least 32 bytes is always
allocated since application code is allowed to check the status of the
rseq registration by reading the content of the 'cpu_id' field. */
bool do_rseq = TUNABLE_GET (rseq, int, NULL);
if (!rseq_register_current_thread (pd, do_rseq))
_rseq_size = 0;
}
/* Set initial thread's stack block from 0 up to __libc_stack_end.
It will be bigger than it actually is, but for unwind.c/pt-longjmp.c
purposes this is good enough. */
THREAD_SETMEM (pd, stackblock_size, (size_t) __libc_stack_end);
}