mirror of
https://sourceware.org/git/glibc.git
synced 2025-07-25 02:02:09 +03:00
Improve 64bit memcpy/memmove for Atom, Core 2 and Core i7
This patch includes optimized 64bit memcpy/memmove for Atom, Core 2 and Core i7. It improves memcpy by up to 3X on Atom, up to 4X on Core 2 and up to 1X on Core i7. It also improves memmove by up to 3X on Atom, up to 4X on Core 2 and up to 2X on Core i7.
This commit is contained in:
32
ChangeLog
32
ChangeLog
@ -1,3 +1,35 @@
|
|||||||
|
2010-06-25 H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
|
* debug/memmove_chk.c (__memmove_chk): Renamed to ...
|
||||||
|
(MEMMOVE_CHK): ...this. Default to __memmove_chk.
|
||||||
|
* string/memmove.c (memmove): Renamed to ...
|
||||||
|
(MEMMOVE): ...this. Default to memmove.
|
||||||
|
* sysdeps/x86_64/memcpy.S: Use ENTRY_CHK and END_CHK.
|
||||||
|
* sysdeps/x86_64/sysdep.h (ENTRY_CHK): Define.
|
||||||
|
(END_CHK): Define.
|
||||||
|
* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
|
||||||
|
memcpy-ssse3 mempcpy-ssse3 memmove-ssse3 memcpy-ssse3-back
|
||||||
|
mempcpy-ssse3-back memmove-ssse3-back.
|
||||||
|
* sysdeps/x86_64/multiarch/bcopy.S: New file .
|
||||||
|
* sysdeps/x86_64/multiarch/memcpy-ssse3-back.S: New file.
|
||||||
|
* sysdeps/x86_64/multiarch/memcpy-ssse3.S: New file.
|
||||||
|
* sysdeps/x86_64/multiarch/memcpy.S: New file.
|
||||||
|
* sysdeps/x86_64/multiarch/memcpy_chk.S: New file.
|
||||||
|
* sysdeps/x86_64/multiarch/memmove-ssse3-back.S: New file.
|
||||||
|
* sysdeps/x86_64/multiarch/memmove-ssse3.S: New file.
|
||||||
|
* sysdeps/x86_64/multiarch/memmove.c: New file.
|
||||||
|
* sysdeps/x86_64/multiarch/memmove_chk.c: New file.
|
||||||
|
* sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S: New file.
|
||||||
|
* sysdeps/x86_64/multiarch/mempcpy-ssse3.S: New file.
|
||||||
|
* sysdeps/x86_64/multiarch/mempcpy.S: New file.
|
||||||
|
* sysdeps/x86_64/multiarch/mempcpy_chk.S: New file.
|
||||||
|
* sysdeps/x86_64/multiarch/init-arch.h (bit_Fast_Copy_Backward):
|
||||||
|
Define.
|
||||||
|
(index_Fast_Copy_Backward): Define.
|
||||||
|
(HAS_ARCH_FEATURE): Define.
|
||||||
|
(HAS_FAST_REP_STRING): Define.
|
||||||
|
(HAS_FAST_COPY_BACKWARD): Define.
|
||||||
|
|
||||||
2010-06-21 Andreas Schwab <schwab@redhat.com>
|
2010-06-21 Andreas Schwab <schwab@redhat.com>
|
||||||
|
|
||||||
* sysdeps/unix/sysv/linux/getlogin_r.c (__getlogin_r_loginuid):
|
* sysdeps/unix/sysv/linux/getlogin_r.c (__getlogin_r_loginuid):
|
||||||
|
@ -23,8 +23,12 @@
|
|||||||
#include <memcopy.h>
|
#include <memcopy.h>
|
||||||
#include <pagecopy.h>
|
#include <pagecopy.h>
|
||||||
|
|
||||||
|
#ifndef MEMMOVE_CHK
|
||||||
|
# define MEMMOVE_CHK __memmove_chk
|
||||||
|
#endif
|
||||||
|
|
||||||
void *
|
void *
|
||||||
__memmove_chk (dest, src, len, destlen)
|
MEMMOVE_CHK (dest, src, len, destlen)
|
||||||
void *dest;
|
void *dest;
|
||||||
const void *src;
|
const void *src;
|
||||||
size_t len;
|
size_t len;
|
||||||
|
@ -37,9 +37,12 @@
|
|||||||
#define rettype void *
|
#define rettype void *
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifndef MEMMOVE
|
||||||
|
#define MEMMOVE memmove
|
||||||
|
#endif
|
||||||
|
|
||||||
rettype
|
rettype
|
||||||
memmove (a1, a2, len)
|
MEMMOVE (a1, a2, len)
|
||||||
a1const void *a1;
|
a1const void *a1;
|
||||||
a2const void *a2;
|
a2const void *a2;
|
||||||
size_t len;
|
size_t len;
|
||||||
|
@ -40,12 +40,12 @@
|
|||||||
.text
|
.text
|
||||||
|
|
||||||
#if defined PIC && !defined NOT_IN_libc
|
#if defined PIC && !defined NOT_IN_libc
|
||||||
ENTRY (__memcpy_chk)
|
ENTRY_CHK (__memcpy_chk)
|
||||||
|
|
||||||
cmpq %rdx, %rcx
|
cmpq %rdx, %rcx
|
||||||
jb HIDDEN_JUMPTARGET (__chk_fail)
|
jb HIDDEN_JUMPTARGET (__chk_fail)
|
||||||
|
|
||||||
END (__memcpy_chk)
|
END_CHK (__memcpy_chk)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ENTRY(memcpy) /* (void *, const void*, size_t) */
|
ENTRY(memcpy) /* (void *, const void*, size_t) */
|
||||||
|
@ -5,7 +5,9 @@ endif
|
|||||||
|
|
||||||
ifeq ($(subdir),string)
|
ifeq ($(subdir),string)
|
||||||
sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
|
sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
|
||||||
strend-sse4 memcmp-sse4
|
strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
|
||||||
|
memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
|
||||||
|
memmove-ssse3-back
|
||||||
ifeq (yes,$(config-cflags-sse4))
|
ifeq (yes,$(config-cflags-sse4))
|
||||||
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
|
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c
|
||||||
CFLAGS-strcspn-c.c += -msse4
|
CFLAGS-strcspn-c.c += -msse4
|
||||||
|
7
sysdeps/x86_64/multiarch/bcopy.S
Normal file
7
sysdeps/x86_64/multiarch/bcopy.S
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
#include <sysdep.h>
|
||||||
|
|
||||||
|
.text
|
||||||
|
ENTRY(bcopy)
|
||||||
|
xchg %rdi, %rsi
|
||||||
|
jmp HIDDEN_BUILTIN_JUMPTARGET(memmove)
|
||||||
|
END(bcopy)
|
@ -78,10 +78,13 @@ __init_cpu_features (void)
|
|||||||
case 0x25:
|
case 0x25:
|
||||||
case 0x2e:
|
case 0x2e:
|
||||||
case 0x2f:
|
case 0x2f:
|
||||||
/* Rep string instructions are fast on Intel Core i3, i5
|
/* Rep string instructions and copy backward are fast on
|
||||||
and i7. */
|
Intel Core i3, i5 and i7. */
|
||||||
|
#if index_Fast_Rep_String != index_Fast_Copy_Backward
|
||||||
|
# error index_Fast_Rep_String != index_Fast_Copy_Backward
|
||||||
|
#endif
|
||||||
__cpu_features.feature[index_Fast_Rep_String]
|
__cpu_features.feature[index_Fast_Rep_String]
|
||||||
|= bit_Fast_Rep_String;
|
|= bit_Fast_Rep_String | bit_Fast_Copy_Backward;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
02111-1307 USA. */
|
02111-1307 USA. */
|
||||||
|
|
||||||
#define bit_Fast_Rep_String (1 << 0)
|
#define bit_Fast_Rep_String (1 << 0)
|
||||||
|
#define bit_Fast_Copy_Backward (1 << 1)
|
||||||
|
|
||||||
#ifdef __ASSEMBLER__
|
#ifdef __ASSEMBLER__
|
||||||
|
|
||||||
@ -32,7 +33,8 @@
|
|||||||
# define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
|
# define index_SSE4_1 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
|
||||||
# define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
|
# define index_SSE4_2 COMMON_CPUID_INDEX_1*CPUID_SIZE+CPUID_ECX_OFFSET
|
||||||
|
|
||||||
#define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE
|
# define index_Fast_Rep_String FEATURE_INDEX_1*FEATURE_SIZE
|
||||||
|
# define index_Fast_Copy_Backward FEATURE_INDEX_1*FEATURE_SIZE
|
||||||
|
|
||||||
#else /* __ASSEMBLER__ */
|
#else /* __ASSEMBLER__ */
|
||||||
|
|
||||||
@ -102,6 +104,16 @@ extern const struct cpu_features *__get_cpu_features (void)
|
|||||||
# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 20)
|
# define HAS_SSE4_2 HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 20)
|
||||||
# define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 12)
|
# define HAS_FMA HAS_CPU_FEATURE (COMMON_CPUID_INDEX_1, ecx, 12)
|
||||||
|
|
||||||
# define index_Fast_Rep_String FEATURE_INDEX_1
|
# define index_Fast_Rep_String FEATURE_INDEX_1
|
||||||
|
# define index_Fast_Copy_Backward FEATURE_INDEX_1
|
||||||
|
|
||||||
|
#define HAS_ARCH_FEATURE(idx, bit) \
|
||||||
|
((__get_cpu_features ()->feature[idx] & (bit)) != 0)
|
||||||
|
|
||||||
|
#define HAS_FAST_REP_STRING \
|
||||||
|
HAS_ARCH_FEATURE (index_Fast_Rep_String, bit_Fast_Rep_String)
|
||||||
|
|
||||||
|
#define HAS_FAST_COPY_BACKWARD \
|
||||||
|
HAS_ARCH_FEATURE (index_Fast_Copy_Backward, bit_Fast_Copy_Backward)
|
||||||
|
|
||||||
#endif /* __ASSEMBLER__ */
|
#endif /* __ASSEMBLER__ */
|
||||||
|
3169
sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
Normal file
3169
sysdeps/x86_64/multiarch/memcpy-ssse3-back.S
Normal file
File diff suppressed because it is too large
Load Diff
3139
sysdeps/x86_64/multiarch/memcpy-ssse3.S
Normal file
3139
sysdeps/x86_64/multiarch/memcpy-ssse3.S
Normal file
File diff suppressed because it is too large
Load Diff
73
sysdeps/x86_64/multiarch/memcpy.S
Normal file
73
sysdeps/x86_64/multiarch/memcpy.S
Normal file
@ -0,0 +1,73 @@
|
|||||||
|
/* Multiple versions of memcpy
|
||||||
|
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||||
|
Contributed by Intel Corporation.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, write to the Free
|
||||||
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
|
02111-1307 USA. */
|
||||||
|
|
||||||
|
#include <sysdep.h>
|
||||||
|
#include <init-arch.h>
|
||||||
|
|
||||||
|
/* Define multiple versions only for the definition in lib and for
|
||||||
|
DSO. In static binaries we need memcpy before the initialization
|
||||||
|
happened. */
|
||||||
|
#if defined SHARED && !defined NOT_IN_libc
|
||||||
|
.text
|
||||||
|
ENTRY(memcpy)
|
||||||
|
.type memcpy, @gnu_indirect_function
|
||||||
|
cmpl $0, KIND_OFFSET+__cpu_features(%rip)
|
||||||
|
jne 1f
|
||||||
|
call __init_cpu_features
|
||||||
|
1: leaq __memcpy_sse2(%rip), %rax
|
||||||
|
testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
|
||||||
|
jz 2f
|
||||||
|
leaq __memcpy_ssse3(%rip), %rax
|
||||||
|
testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
|
||||||
|
jz 2f
|
||||||
|
leaq __memcpy_ssse3_back(%rip), %rax
|
||||||
|
2: ret
|
||||||
|
END(memcpy)
|
||||||
|
|
||||||
|
# undef ENTRY
|
||||||
|
# define ENTRY(name) \
|
||||||
|
.type __memcpy_sse2, @function; \
|
||||||
|
.p2align 4; \
|
||||||
|
__memcpy_sse2: cfi_startproc; \
|
||||||
|
CALL_MCOUNT
|
||||||
|
# undef END
|
||||||
|
# define END(name) \
|
||||||
|
cfi_endproc; .size __memcpy_sse2, .-__memcpy_sse2
|
||||||
|
|
||||||
|
# undef ENTRY_CHK
|
||||||
|
# define ENTRY_CHK(name) \
|
||||||
|
.type __memcpy_chk_sse2, @function; \
|
||||||
|
.globl __memcpy_chk_sse2; \
|
||||||
|
.p2align 4; \
|
||||||
|
__memcpy_chk_sse2: cfi_startproc; \
|
||||||
|
CALL_MCOUNT
|
||||||
|
# undef END_CHK
|
||||||
|
# define END_CHK(name) \
|
||||||
|
cfi_endproc; .size __memcpy_chk_sse2, .-__memcpy_chk_sse2
|
||||||
|
|
||||||
|
# undef libc_hidden_builtin_def
|
||||||
|
/* It doesn't make sense to send libc-internal memcpy calls through a PLT.
|
||||||
|
The speedup we get from using SSSE3 instruction is likely eaten away
|
||||||
|
by the indirect call in the PLT. */
|
||||||
|
# define libc_hidden_builtin_def(name) \
|
||||||
|
.globl __GI_memcpy; __GI_memcpy = __memcpy_sse2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "../memcpy.S"
|
47
sysdeps/x86_64/multiarch/memcpy_chk.S
Normal file
47
sysdeps/x86_64/multiarch/memcpy_chk.S
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
/* Multiple versions of __memcpy_chk
|
||||||
|
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||||
|
Contributed by Intel Corporation.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, write to the Free
|
||||||
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
|
02111-1307 USA. */
|
||||||
|
|
||||||
|
#include <sysdep.h>
|
||||||
|
#include <init-arch.h>
|
||||||
|
|
||||||
|
/* Define multiple versions only for the definition in lib and for
|
||||||
|
DSO. There are no multiarch memcpy functions for static binaries.
|
||||||
|
*/
|
||||||
|
#ifndef NOT_IN_libc
|
||||||
|
# ifdef SHARED
|
||||||
|
.text
|
||||||
|
ENTRY(__memcpy_chk)
|
||||||
|
.type __memcpy_chk, @gnu_indirect_function
|
||||||
|
cmpl $0, KIND_OFFSET+__cpu_features(%rip)
|
||||||
|
jne 1f
|
||||||
|
call __init_cpu_features
|
||||||
|
1: leaq __memcpy_chk_sse2(%rip), %rax
|
||||||
|
testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
|
||||||
|
jz 2f
|
||||||
|
leaq __memcpy_chk_ssse3(%rip), %rax
|
||||||
|
testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
|
||||||
|
jz 2f
|
||||||
|
leaq __memcpy_chk_ssse3_back(%rip), %rax
|
||||||
|
2: ret
|
||||||
|
END(__memcpy_chk)
|
||||||
|
# else
|
||||||
|
# include "../memcpy_chk.S"
|
||||||
|
# endif
|
||||||
|
#endif
|
4
sysdeps/x86_64/multiarch/memmove-ssse3-back.S
Normal file
4
sysdeps/x86_64/multiarch/memmove-ssse3-back.S
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
#define USE_AS_MEMMOVE
|
||||||
|
#define MEMCPY __memmove_ssse3_back
|
||||||
|
#define MEMCPY_CHK __memmove_chk_ssse3_back
|
||||||
|
#include "memcpy-ssse3-back.S"
|
4
sysdeps/x86_64/multiarch/memmove-ssse3.S
Normal file
4
sysdeps/x86_64/multiarch/memmove-ssse3.S
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
#define USE_AS_MEMMOVE
|
||||||
|
#define MEMCPY __memmove_ssse3
|
||||||
|
#define MEMCPY_CHK __memmove_chk_ssse3
|
||||||
|
#include "memcpy-ssse3.S"
|
24
sysdeps/x86_64/multiarch/memmove.c
Normal file
24
sysdeps/x86_64/multiarch/memmove.c
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
#ifndef NOT_IN_libc
|
||||||
|
#include "init-arch.h"
|
||||||
|
|
||||||
|
#define MEMMOVE __memmove_sse2
|
||||||
|
#ifdef SHARED
|
||||||
|
# undef libc_hidden_builtin_def
|
||||||
|
# define libc_hidden_builtin_def(name) \
|
||||||
|
__hidden_ver1 (__memmove_sse2, __GI_memmove, __memmove_sse2);
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "string/memmove.c"
|
||||||
|
|
||||||
|
#ifndef NOT_IN_libc
|
||||||
|
extern __typeof (__memmove_sse2) __memmove_sse2 attribute_hidden;
|
||||||
|
extern __typeof (__memmove_sse2) __memmove_ssse3 attribute_hidden;
|
||||||
|
extern __typeof (__memmove_sse2) __memmove_ssse3_back attribute_hidden;
|
||||||
|
|
||||||
|
libc_ifunc (memmove,
|
||||||
|
HAS_SSSE3
|
||||||
|
? (HAS_FAST_COPY_BACKWARD
|
||||||
|
? __memmove_ssse3_back : __memmove_ssse3)
|
||||||
|
: __memmove_sse2);
|
||||||
|
#endif
|
15
sysdeps/x86_64/multiarch/memmove_chk.c
Normal file
15
sysdeps/x86_64/multiarch/memmove_chk.c
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
#include "init-arch.h"
|
||||||
|
|
||||||
|
#define MEMMOVE_CHK __memmove_chk_sse2
|
||||||
|
|
||||||
|
#include "debug/memmove_chk.c"
|
||||||
|
|
||||||
|
extern __typeof (__memmove_chk_sse2) __memmove_chk_sse2 attribute_hidden;
|
||||||
|
extern __typeof (__memmove_chk_sse2) __memmove_chk_ssse3 attribute_hidden;
|
||||||
|
extern __typeof (__memmove_chk_sse2) __memmove_chk_ssse3_back attribute_hidden;
|
||||||
|
|
||||||
|
libc_ifunc (__memmove_chk,
|
||||||
|
HAS_SSSE3
|
||||||
|
? (HAS_FAST_COPY_BACKWARD
|
||||||
|
? __memmove_chk_ssse3_back : __memmove_chk_ssse3)
|
||||||
|
: __memmove_chk_sse2);
|
4
sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
Normal file
4
sysdeps/x86_64/multiarch/mempcpy-ssse3-back.S
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
#define USE_AS_MEMPCPY
|
||||||
|
#define MEMCPY __mempcpy_ssse3_back
|
||||||
|
#define MEMCPY_CHK __mempcpy_chk_ssse3_back
|
||||||
|
#include "memcpy-ssse3-back.S"
|
4
sysdeps/x86_64/multiarch/mempcpy-ssse3.S
Normal file
4
sysdeps/x86_64/multiarch/mempcpy-ssse3.S
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
#define USE_AS_MEMPCPY
|
||||||
|
#define MEMCPY __mempcpy_ssse3
|
||||||
|
#define MEMCPY_CHK __mempcpy_chk_ssse3
|
||||||
|
#include "memcpy-ssse3.S"
|
75
sysdeps/x86_64/multiarch/mempcpy.S
Normal file
75
sysdeps/x86_64/multiarch/mempcpy.S
Normal file
@ -0,0 +1,75 @@
|
|||||||
|
/* Multiple versions of mempcpy
|
||||||
|
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||||
|
Contributed by Intel Corporation.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, write to the Free
|
||||||
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
|
02111-1307 USA. */
|
||||||
|
|
||||||
|
#include <sysdep.h>
|
||||||
|
#include <init-arch.h>
|
||||||
|
|
||||||
|
/* Define multiple versions only for the definition in lib and for
|
||||||
|
DSO. In static binaries we need mempcpy before the initialization
|
||||||
|
happened. */
|
||||||
|
#if defined SHARED && !defined NOT_IN_libc
|
||||||
|
ENTRY(__mempcpy)
|
||||||
|
.type __mempcpy, @gnu_indirect_function
|
||||||
|
cmpl $0, KIND_OFFSET+__cpu_features(%rip)
|
||||||
|
jne 1f
|
||||||
|
call __init_cpu_features
|
||||||
|
1: leaq __mempcpy_sse2(%rip), %rax
|
||||||
|
testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
|
||||||
|
jz 2f
|
||||||
|
leaq __mempcpy_ssse3(%rip), %rax
|
||||||
|
testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
|
||||||
|
jz 2f
|
||||||
|
leaq __mempcpy_ssse3_back(%rip), %rax
|
||||||
|
2: ret
|
||||||
|
END(__mempcpy)
|
||||||
|
|
||||||
|
# undef ENTRY
|
||||||
|
# define ENTRY(name) \
|
||||||
|
.type __mempcpy_sse2, @function; \
|
||||||
|
.p2align 4; \
|
||||||
|
__mempcpy_sse2: cfi_startproc; \
|
||||||
|
CALL_MCOUNT
|
||||||
|
# undef END
|
||||||
|
# define END(name) \
|
||||||
|
cfi_endproc; .size __mempcpy_sse2, .-__mempcpy_sse2
|
||||||
|
|
||||||
|
# undef ENTRY_CHK
|
||||||
|
# define ENTRY_CHK(name) \
|
||||||
|
.type __mempcpy_chk_sse2, @function; \
|
||||||
|
.globl __mempcpy_chk_sse2; \
|
||||||
|
.p2align 4; \
|
||||||
|
__mempcpy_chk_sse2: cfi_startproc; \
|
||||||
|
CALL_MCOUNT
|
||||||
|
# undef END_CHK
|
||||||
|
# define END_CHK(name) \
|
||||||
|
cfi_endproc; .size __mempcpy_chk_sse2, .-__mempcpy_chk_sse2
|
||||||
|
|
||||||
|
# undef libc_hidden_def
|
||||||
|
# undef libc_hidden_builtin_def
|
||||||
|
/* It doesn't make sense to send libc-internal mempcpy calls through a PLT.
|
||||||
|
The speedup we get from using SSSE3 instruction is likely eaten away
|
||||||
|
by the indirect call in the PLT. */
|
||||||
|
# define libc_hidden_def(name) \
|
||||||
|
.globl __GI_mempcpy; __GI_mempcpy = __mempcpy_sse2
|
||||||
|
# define libc_hidden_builtin_def(name) \
|
||||||
|
.globl __GI___mempcpy; __GI___mempcpy = __mempcpy_sse2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "../mempcpy.S"
|
47
sysdeps/x86_64/multiarch/mempcpy_chk.S
Normal file
47
sysdeps/x86_64/multiarch/mempcpy_chk.S
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
/* Multiple versions of __mempcpy_chk
|
||||||
|
Copyright (C) 2010 Free Software Foundation, Inc.
|
||||||
|
Contributed by Intel Corporation.
|
||||||
|
This file is part of the GNU C Library.
|
||||||
|
|
||||||
|
The GNU C Library is free software; you can redistribute it and/or
|
||||||
|
modify it under the terms of the GNU Lesser General Public
|
||||||
|
License as published by the Free Software Foundation; either
|
||||||
|
version 2.1 of the License, or (at your option) any later version.
|
||||||
|
|
||||||
|
The GNU C Library is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||||
|
Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public
|
||||||
|
License along with the GNU C Library; if not, write to the Free
|
||||||
|
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
|
||||||
|
02111-1307 USA. */
|
||||||
|
|
||||||
|
#include <sysdep.h>
|
||||||
|
#include <init-arch.h>
|
||||||
|
|
||||||
|
/* Define multiple versions only for the definition in lib and for
|
||||||
|
DSO. There are no multiarch mempcpy functions for static binaries.
|
||||||
|
*/
|
||||||
|
#ifndef NOT_IN_libc
|
||||||
|
# ifdef SHARED
|
||||||
|
.text
|
||||||
|
ENTRY(__mempcpy_chk)
|
||||||
|
.type __mempcpy_chk, @gnu_indirect_function
|
||||||
|
cmpl $0, KIND_OFFSET+__cpu_features(%rip)
|
||||||
|
jne 1f
|
||||||
|
call __init_cpu_features
|
||||||
|
1: leaq __mempcpy_chk_sse2(%rip), %rax
|
||||||
|
testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
|
||||||
|
jz 2f
|
||||||
|
leaq __mempcpy_chk_ssse3(%rip), %rax
|
||||||
|
testl $bit_Fast_Copy_Backward, __cpu_features+FEATURE_OFFSET+index_Fast_Copy_Backward(%rip)
|
||||||
|
jz 2f
|
||||||
|
leaq __mempcpy_chk_ssse3_back(%rip), %rax
|
||||||
|
2: ret
|
||||||
|
END(__mempcpy_chk)
|
||||||
|
# else
|
||||||
|
# include "../mempcpy_chk.S"
|
||||||
|
# endif
|
||||||
|
#endif
|
@ -58,6 +58,9 @@
|
|||||||
cfi_endproc; \
|
cfi_endproc; \
|
||||||
ASM_SIZE_DIRECTIVE(name)
|
ASM_SIZE_DIRECTIVE(name)
|
||||||
|
|
||||||
|
#define ENTRY_CHK(name) ENTRY (name)
|
||||||
|
#define END_CHK(name) END (name)
|
||||||
|
|
||||||
/* If compiled for profiling, call `mcount' at the start of each function. */
|
/* If compiled for profiling, call `mcount' at the start of each function. */
|
||||||
#ifdef PROF
|
#ifdef PROF
|
||||||
/* The mcount code relies on a normal frame pointer being on the stack
|
/* The mcount code relies on a normal frame pointer being on the stack
|
||||||
|
Reference in New Issue
Block a user