1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-09-11 12:10:50 +03:00
Files
glibc/sysdeps/x86_64/dl-trampoline.S
H.J. Lu 848f0e46f0 i386: Update ___tls_get_addr to preserve vector registers
Compiler generates the following instruction sequence for dynamic TLS
access:

	leal	tls_var@tlsgd(,%ebx,1), %eax
	call	___tls_get_addr@PLT

CALL instruction is transparent to compiler which assumes all registers,
except for EFLAGS, AX, CX, and DX, are unchanged after CALL.  But
___tls_get_addr is a normal function which doesn't preserve any vector
registers.

1. Rename the generic __tls_get_addr function to ___tls_get_addr_internal.
2. Change ___tls_get_addr to a wrapper function with implementations for
FNSAVE, FXSAVE, XSAVE and XSAVEC to save and restore all vector registers.
3. dl-tlsdesc-dynamic.h has:

_dl_tlsdesc_dynamic:
	/* Like all TLS resolvers, preserve call-clobbered registers.
	   We need two scratch regs anyway.  */
	subl	$32, %esp
	cfi_adjust_cfa_offset (32)

It is wrong to use

	movl	%ebx, -28(%esp)
	movl	%esp, %ebx
	cfi_def_cfa_register(%ebx)
	...
	mov	%ebx, %esp
	cfi_def_cfa_register(%esp)
	movl	-28(%esp), %ebx

to preserve EBX on stack.  Fix it with:

	movl	%ebx, 28(%esp)
	movl	%esp, %ebx
	cfi_def_cfa_register(%ebx)
	...
	mov	%ebx, %esp
	cfi_def_cfa_register(%esp)
	movl	28(%esp), %ebx

4. Update _dl_tlsdesc_dynamic to call ___tls_get_addr_internal directly.
5. Add have-test-mtls-traditional to compile tst-tls23-mod.c with
traditional TLS variant to verify the fix.
6. Define DL_RUNTIME_RESOLVE_REALIGN_STACK in sysdeps/x86/sysdep.h.

This fixes BZ #32996.

Co-Authored-By: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Signed-off-by: H.J. Lu <hjl.tools@gmail.com>
Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
2025-06-19 04:30:31 +08:00

105 lines
3.0 KiB
ArmAsm

/* PLT trampolines. x86-64 version.
Copyright (C) 2004-2025 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
#include <config.h>
#include <sysdep.h>
#include <cpu-features-offsets.h>
#include <features-offsets.h>
#include <link-defines.h>
#include <isa-level.h>
/* Area on stack to save and restore registers used for parameter
passing when calling _dl_fixup. */
#define REGISTER_SAVE_RAX 0
#define REGISTER_SAVE_RCX (REGISTER_SAVE_RAX + 8)
#define REGISTER_SAVE_RDX (REGISTER_SAVE_RCX + 8)
#define REGISTER_SAVE_RSI (REGISTER_SAVE_RDX + 8)
#define REGISTER_SAVE_RDI (REGISTER_SAVE_RSI + 8)
#define REGISTER_SAVE_R8 (REGISTER_SAVE_RDI + 8)
#define REGISTER_SAVE_R9 (REGISTER_SAVE_R8 + 8)
#define RESTORE_AVX
#ifdef SHARED
# define VEC_SIZE 64
# define VMOVA vmovdqa64
# define VEC(i) zmm##i
# define _dl_runtime_profile _dl_runtime_profile_avx512
# define SECTION(p) p##.evex512
# include "dl-trampoline.h"
# undef _dl_runtime_profile
# undef VEC
# undef VMOVA
# undef VEC_SIZE
# undef SECTION
# if MINIMUM_X86_ISA_LEVEL <= AVX_X86_ISA_LEVEL
# define VEC_SIZE 32
# define VMOVA vmovdqa
# define VEC(i) ymm##i
# define SECTION(p) p##.avx
# define _dl_runtime_profile _dl_runtime_profile_avx
# include "dl-trampoline.h"
# undef _dl_runtime_profile
# undef VEC
# undef VMOVA
# undef VEC_SIZE
# undef SECTION
# endif
# if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
/* movaps/movups is 1-byte shorter. */
# define VEC_SIZE 16
# define VMOVA movaps
# define VEC(i) xmm##i
# define _dl_runtime_profile _dl_runtime_profile_sse
# undef RESTORE_AVX
# include "dl-trampoline.h"
# undef _dl_runtime_profile
# undef VEC
# undef VMOVA
# undef VEC_SIZE
# endif
#endif /* SHARED */
#if MINIMUM_X86_ISA_LEVEL < AVX_X86_ISA_LEVEL
# define USE_FXSAVE
# define STATE_SAVE_ALIGNMENT 16
# define _dl_runtime_resolve _dl_runtime_resolve_fxsave
# include "dl-trampoline.h"
# undef _dl_runtime_resolve
# undef USE_FXSAVE
# undef STATE_SAVE_ALIGNMENT
#endif
#define USE_XSAVE
#define STATE_SAVE_ALIGNMENT 64
#define _dl_runtime_resolve _dl_runtime_resolve_xsave
#include "dl-trampoline.h"
#undef _dl_runtime_resolve
#undef USE_XSAVE
#undef STATE_SAVE_ALIGNMENT
#define USE_XSAVEC
#define STATE_SAVE_ALIGNMENT 64
#define _dl_runtime_resolve _dl_runtime_resolve_xsavec
#include "dl-trampoline.h"
#undef _dl_runtime_resolve
#undef USE_XSAVEC
#undef STATE_SAVE_ALIGNMENT