mirror of
				https://sourceware.org/git/glibc.git
				synced 2025-11-03 20:53:13 +03:00 
			
		
		
		
	Compiler generates the following instruction sequence for dynamic TLS access: leal tls_var@tlsgd(,%ebx,1), %eax call ___tls_get_addr@PLT CALL instruction is transparent to compiler which assumes all registers, except for EFLAGS, AX, CX, and DX, are unchanged after CALL. But ___tls_get_addr is a normal function which doesn't preserve any vector registers. 1. Rename the generic __tls_get_addr function to ___tls_get_addr_internal. 2. Change ___tls_get_addr to a wrapper function with implementations for FNSAVE, FXSAVE, XSAVE and XSAVEC to save and restore all vector registers. 3. dl-tlsdesc-dynamic.h has: _dl_tlsdesc_dynamic: /* Like all TLS resolvers, preserve call-clobbered registers. We need two scratch regs anyway. */ subl $32, %esp cfi_adjust_cfa_offset (32) It is wrong to use movl %ebx, -28(%esp) movl %esp, %ebx cfi_def_cfa_register(%ebx) ... mov %ebx, %esp cfi_def_cfa_register(%esp) movl -28(%esp), %ebx to preserve EBX on stack. Fix it with: movl %ebx, 28(%esp) movl %esp, %ebx cfi_def_cfa_register(%ebx) ... mov %ebx, %esp cfi_def_cfa_register(%esp) movl 28(%esp), %ebx 4. Update _dl_tlsdesc_dynamic to call ___tls_get_addr_internal directly. 5. Add have-test-mtls-traditional to compile tst-tls23-mod.c with traditional TLS variant to verify the fix. 6. Define DL_RUNTIME_RESOLVE_REALIGN_STACK in sysdeps/x86/sysdep.h. This fixes BZ #32996. Co-Authored-By: Adhemerval Zanella <adhemerval.zanella@linaro.org> Signed-off-by: H.J. Lu <hjl.tools@gmail.com> Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
		
			
				
	
	
		
			212 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			212 lines
		
	
	
		
			7.3 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* Assembler macros for x86.
 | 
						|
   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 | 
						|
   This file is part of the GNU C Library.
 | 
						|
 | 
						|
   The GNU C Library is free software; you can redistribute it and/or
 | 
						|
   modify it under the terms of the GNU Lesser General Public
 | 
						|
   License as published by the Free Software Foundation; either
 | 
						|
   version 2.1 of the License, or (at your option) any later version.
 | 
						|
 | 
						|
   The GNU C Library is distributed in the hope that it will be useful,
 | 
						|
   but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
   Lesser General Public License for more details.
 | 
						|
 | 
						|
   You should have received a copy of the GNU Lesser General Public
 | 
						|
   License along with the GNU C Library; if not, see
 | 
						|
   <https://www.gnu.org/licenses/>.  */
 | 
						|
 | 
						|
#ifndef _X86_SYSDEP_H
 | 
						|
#define _X86_SYSDEP_H 1
 | 
						|
 | 
						|
#include <sysdeps/generic/sysdep.h>
 | 
						|
 | 
						|
/* The extended state feature IDs in the state component bitmap.  */
 | 
						|
#define X86_XSTATE_X87_ID	0
 | 
						|
#define X86_XSTATE_SSE_ID	1
 | 
						|
#define X86_XSTATE_AVX_ID	2
 | 
						|
#define X86_XSTATE_BNDREGS_ID	3
 | 
						|
#define X86_XSTATE_BNDCFG_ID	4
 | 
						|
#define X86_XSTATE_K_ID		5
 | 
						|
#define X86_XSTATE_ZMM_H_ID	6
 | 
						|
#define X86_XSTATE_ZMM_ID	7
 | 
						|
#define X86_XSTATE_PKRU_ID	9
 | 
						|
#define X86_XSTATE_TILECFG_ID	17
 | 
						|
#define X86_XSTATE_TILEDATA_ID	18
 | 
						|
#define X86_XSTATE_APX_F_ID	19
 | 
						|
 | 
						|
#ifdef __x86_64__
 | 
						|
/* Offset for fxsave/xsave area used by _dl_runtime_resolve.  Also need
 | 
						|
   space to preserve RCX, RDX, RSI, RDI, R8, R9 and RAX.  It must be
 | 
						|
   aligned to 16 bytes for fxsave and 64 bytes for xsave.  It is non-zero
 | 
						|
   because MOV, instead of PUSH, is used to save registers onto stack.
 | 
						|
 | 
						|
   +==================+<- stack frame start aligned at 8 or 16 bytes
 | 
						|
   |                  |<- paddings for stack realignment of 64 bytes
 | 
						|
   |------------------|<- xsave buffer end aligned at 64 bytes
 | 
						|
   |                  |<-
 | 
						|
   |                  |<-
 | 
						|
   |                  |<-
 | 
						|
   |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
 | 
						|
   |                  |<- 8-byte padding for 64-byte alignment
 | 
						|
   |                  |<- R9
 | 
						|
   |                  |<- R8
 | 
						|
   |                  |<- RDI
 | 
						|
   |                  |<- RSI
 | 
						|
   |                  |<- RDX
 | 
						|
   |                  |<- RCX
 | 
						|
   |                  |<- RAX
 | 
						|
   +==================+<- RSP aligned at 64 bytes
 | 
						|
 | 
						|
 */
 | 
						|
# define STATE_SAVE_OFFSET (8 * 7 + 8)
 | 
						|
 | 
						|
/* _dl_tlsdesc_dynamic preserves RDI, RSI and RBX before realigning
 | 
						|
   stack.  After realigning stack, it saves RCX, RDX, R8, R9, R10 and
 | 
						|
   R11.  Allocate space for RDI, RSI and RBX to avoid clobbering saved
 | 
						|
   RDI, RSI and RBX values on stack by xsave.
 | 
						|
 | 
						|
   +==================+<- stack frame start aligned at 8 or 16 bytes
 | 
						|
   |                  |<- RDI saved in the red zone
 | 
						|
   |                  |<- RSI saved in the red zone
 | 
						|
   |                  |<- RBX saved in the red zone
 | 
						|
   |                  |<- paddings for stack realignment of 64 bytes
 | 
						|
   |------------------|<- xsave buffer end aligned at 64 bytes
 | 
						|
   |                  |<-
 | 
						|
   |                  |<-
 | 
						|
   |                  |<-
 | 
						|
   |------------------|<- xsave buffer start at STATE_SAVE_OFFSET(%rsp)
 | 
						|
   |                  |<- 8-byte padding for 64-byte alignment
 | 
						|
   |                  |<- 8-byte padding for 64-byte alignment
 | 
						|
   |                  |<- R11
 | 
						|
   |                  |<- R10
 | 
						|
   |                  |<- R9
 | 
						|
   |                  |<- R8
 | 
						|
   |                  |<- RDX
 | 
						|
   |                  |<- RCX
 | 
						|
   +==================+<- RSP aligned at 64 bytes
 | 
						|
 | 
						|
   Define the total register save area size for all integer registers by
 | 
						|
   adding 24 to STATE_SAVE_OFFSET since RDI, RSI and RBX are saved onto
 | 
						|
   stack without adjusting stack pointer first, using the red-zone.  */
 | 
						|
# define TLSDESC_CALL_REGISTER_SAVE_AREA (STATE_SAVE_OFFSET + 24)
 | 
						|
 | 
						|
/* Save SSE, AVX, AVX512, mask, bound and APX registers.  Bound and APX
 | 
						|
   registers are mutually exclusive.  */
 | 
						|
# define STATE_SAVE_MASK		\
 | 
						|
  ((1 << X86_XSTATE_SSE_ID)		\
 | 
						|
   | (1 << X86_XSTATE_AVX_ID)		\
 | 
						|
   | (1 << X86_XSTATE_BNDREGS_ID)	\
 | 
						|
   | (1 << X86_XSTATE_K_ID)		\
 | 
						|
   | (1 << X86_XSTATE_ZMM_H_ID) 	\
 | 
						|
   | (1 << X86_XSTATE_ZMM_ID)		\
 | 
						|
   | (1 << X86_XSTATE_APX_F_ID))
 | 
						|
 | 
						|
/* The maximum supported xstate ID.  */
 | 
						|
# define X86_XSTATE_MAX_ID	X86_XSTATE_APX_F_ID
 | 
						|
 | 
						|
/* AMX state mask.  */
 | 
						|
# define AMX_STATE_SAVE_MASK		\
 | 
						|
  ((1 << X86_XSTATE_TILECFG_ID) | (1 << X86_XSTATE_TILEDATA_ID))
 | 
						|
 | 
						|
/* States to be included in xsave_state_full_size.  */
 | 
						|
# define FULL_STATE_SAVE_MASK		\
 | 
						|
  (STATE_SAVE_MASK | AMX_STATE_SAVE_MASK)
 | 
						|
#else
 | 
						|
/* Offset for fxsave/xsave area used by _dl_tlsdesc_dynamic.  Since i386
 | 
						|
   uses PUSH to save registers onto stack, use 0 here.  */
 | 
						|
# define STATE_SAVE_OFFSET 0
 | 
						|
# define TLSDESC_CALL_REGISTER_SAVE_AREA 0
 | 
						|
 | 
						|
/* Save SSE, AVX, AXV512, mask and bound registers.   */
 | 
						|
# define STATE_SAVE_MASK		\
 | 
						|
  ((1 << X86_XSTATE_SSE_ID)		\
 | 
						|
   | (1 << X86_XSTATE_AVX_ID)		\
 | 
						|
   | (1 << X86_XSTATE_BNDREGS_ID)	\
 | 
						|
   | (1 << X86_XSTATE_K_ID)		\
 | 
						|
   | (1 << X86_XSTATE_ZMM_H_ID))
 | 
						|
 | 
						|
/* The maximum supported xstate ID.  */
 | 
						|
# define X86_XSTATE_MAX_ID	X86_XSTATE_ZMM_H_ID
 | 
						|
 | 
						|
/* States to be included in xsave_state_size.  */
 | 
						|
# define FULL_STATE_SAVE_MASK		STATE_SAVE_MASK
 | 
						|
#endif
 | 
						|
 | 
						|
/* States which should be saved for TLSDESC_CALL and TLS_DESC_CALL.
 | 
						|
   Compiler assumes that all registers, including AMX and x87 FPU
 | 
						|
   stack registers, are unchanged after CALL, except for EFLAGS and
 | 
						|
   RAX/EAX.  */
 | 
						|
#define TLSDESC_CALL_STATE_SAVE_MASK	\
 | 
						|
  (FULL_STATE_SAVE_MASK | (1 << X86_XSTATE_X87_ID))
 | 
						|
 | 
						|
/* Constants for bits in __x86_string_control:  */
 | 
						|
 | 
						|
/* Avoid short distance REP MOVSB.  */
 | 
						|
#define X86_STRING_CONTROL_AVOID_SHORT_DISTANCE_REP_MOVSB	(1 << 0)
 | 
						|
 | 
						|
#ifdef	__ASSEMBLER__
 | 
						|
 | 
						|
/* Syntactic details of assembler.  */
 | 
						|
 | 
						|
/* ELF uses byte-counts for .align, most others use log2 of count of bytes.  */
 | 
						|
#define ALIGNARG(log2) 1<<log2
 | 
						|
#define ASM_SIZE_DIRECTIVE(name) .size name,.-name;
 | 
						|
 | 
						|
/* Common entry 16 byte aligns.  */
 | 
						|
#define ENTRY(name) ENTRY_P2ALIGN (name, 4)
 | 
						|
 | 
						|
#undef	END
 | 
						|
#define END(name)							      \
 | 
						|
  cfi_endproc;								      \
 | 
						|
  ASM_SIZE_DIRECTIVE(name)
 | 
						|
 | 
						|
#define ENTRY_CHK(name) ENTRY (name)
 | 
						|
#define END_CHK(name) END (name)
 | 
						|
 | 
						|
/* Since C identifiers are not normally prefixed with an underscore
 | 
						|
   on this system, the asm identifier `syscall_error' intrudes on the
 | 
						|
   C name space.  Make sure we use an innocuous name.  */
 | 
						|
#define	syscall_error	__syscall_error
 | 
						|
#define mcount		_mcount
 | 
						|
 | 
						|
#undef	PSEUDO_END
 | 
						|
#define	PSEUDO_END(name)						      \
 | 
						|
  END (name)
 | 
						|
 | 
						|
/* Local label name for asm code. */
 | 
						|
#ifndef L
 | 
						|
/* ELF-like local names start with `.L'.  */
 | 
						|
# define LOCAL_LABEL(name) .L##name
 | 
						|
# define L(name)	LOCAL_LABEL(name)
 | 
						|
#endif
 | 
						|
 | 
						|
#define atom_text_section .section ".text.atom", "ax"
 | 
						|
 | 
						|
#ifndef DL_STACK_ALIGNMENT
 | 
						|
/* Due to GCC bug:
 | 
						|
 | 
						|
   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58066
 | 
						|
 | 
						|
   __tls_get_addr may be called with 8-byte/4-byte stack alignment.
 | 
						|
   Although this bug has been fixed in GCC 4.9.4, 5.3 and 6, we can't
 | 
						|
   assume that stack will be always aligned at 16 bytes.  */
 | 
						|
# ifdef __x86_64__
 | 
						|
#  define DL_STACK_ALIGNMENT 8
 | 
						|
#  define MINIMUM_ALIGNMENT 16
 | 
						|
# else
 | 
						|
#  define DL_STACK_ALIGNMENT 4
 | 
						|
# endif
 | 
						|
#endif
 | 
						|
 | 
						|
/* True if _dl_runtime_resolve/_dl_tlsdesc_dynamic should align stack for
 | 
						|
   STATE_SAVE or align stack to MINIMUM_ALIGNMENT bytes before calling
 | 
						|
   _dl_fixup/__tls_get_addr.  */
 | 
						|
#define DL_RUNTIME_RESOLVE_REALIGN_STACK \
 | 
						|
  (STATE_SAVE_ALIGNMENT > DL_STACK_ALIGNMENT \
 | 
						|
   || MINIMUM_ALIGNMENT > DL_STACK_ALIGNMENT)
 | 
						|
 | 
						|
#endif	/* __ASSEMBLER__ */
 | 
						|
 | 
						|
#endif	/* _X86_SYSDEP_H */
 |