1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-07-30 22:43:12 +03:00

Improve 64 bit strcat functions with SSE2/SSSE3

This commit is contained in:
Liubov Dmitrieva
2011-07-19 17:11:54 -04:00
committed by Ulrich Drepper
parent 7dc6bd90c5
commit 99710781cc
18 changed files with 1523 additions and 321 deletions

View File

@ -1,3 +1,32 @@
2011-07-15 Liubov Dmitrieva <liubov.dmitrieva@intel.com>
* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Add
strcat-ssse3 strcat-sse2-unaligned strncat-ssse3
strncat-sse2-unaligned strncat-c strlen-sse2-pminub
* sysdeps/x86_64/multiarch/strcat-sse2-unaligned.S: New file.
* sysdeps/x86_64/multiarch/strcat.S: New file.
* sysdeps/x86_64/multiarch/strncat.S: New file.
* sysdeps/x86_64/multiarch/strncat-c.c: New file.
* sysdeps/x86_64/multiarch/strcat-ssse3.S: New file.
* sysdeps/x86_64/multiarch/strncat-sse2-unaligned.S: New file.
* sysdeps/x86_64/multiarch/strncat-ssse3.S: New file.
* sysdeps/x86_64/multiarch/strcpy-ssse3.S
(USE_AS_STRCAT): Define.
Add strcat and strncat support.
* sysdeps/x86_64/multiarch/strlen-no-bsf.S: Likewise.
* sysdeps/x86_64/multiarch/strcpy-sse2-unaligned.S: Likewise.
* sysdeps/x86_64/multiarch/strlen-sse2-pminub.S: New file.
* string/strncat.c: Update.
(USE_AS_STRNCAT): Define.
* sysdeps/x86_64/multiarch/init-arch.c (__init_cpu_features):
Turn on bit_Prefer_PMINUB_for_stringop for Intel Core i3, i5
and i7.
* sysdeps/x86_64/multiarch/init-arch.h
(bit_Prefer_PMINUB_for_stringop): New.
(index_Prefer_PMINUB_for_stringop): Likewise.
* sysdeps/x86_64/multiarch/strlen.S (strlen): Check
bit_Prefer_PMINUB_for_stringop.
2011-07-19 Ulrich Drepper <drepper@gmail.com> 2011-07-19 Ulrich Drepper <drepper@gmail.com>
* crypt/sha512.h (struct sha512_ctx): Move buffer into union and add * crypt/sha512.h (struct sha512_ctx): Move buffer into union and add

5
NEWS
View File

@ -1,4 +1,4 @@
GNU C Library NEWS -- history of user-visible changes. 2011-7-6 GNU C Library NEWS -- history of user-visible changes. 2011-7-19
Copyright (C) 1992-2009, 2010, 2011 Free Software Foundation, Inc. Copyright (C) 1992-2009, 2010, 2011 Free Software Foundation, Inc.
See the end for copying conditions. See the end for copying conditions.
@ -23,6 +23,9 @@ Version 2.15
* Improved strcpy, strncpy, stpcpy, stpncpy for SSE2 and SSSE3 on x86-64. * Improved strcpy, strncpy, stpcpy, stpncpy for SSE2 and SSSE3 on x86-64.
Contributed by HJ Lu. Contributed by HJ Lu.
* Improved strcat and strncat on x86-64.
Contributed by Liubov Dmitrieva.
Version 2.14 Version 2.14

View File

@ -24,10 +24,12 @@
typedef char reg_char; typedef char reg_char;
#endif #endif
#undef strncat #ifndef STRNCAT
# define STRNCAT strncat
#endif
char * char *
strncat (s1, s2, n) STRNCAT (s1, s2, n)
char *s1; char *s1;
const char *s2; const char *s2;
size_t n; size_t n;

View File

@ -5,14 +5,16 @@ endif
ifeq ($(subdir),string) ifeq ($(subdir),string)
sysdep_routines += stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 strncmp-ssse3 \
strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \ strend-sse4 memcmp-sse4 memcpy-ssse3 mempcpy-ssse3 \
memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \ memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \ memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
strncase_l-ssse3 strlen-sse4 strlen-no-bsf memset-x86-64 \ strncase_l-ssse3 strlen-sse4 strlen-no-bsf memset-x86-64 \
strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \ strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
strcpy-sse2-unaligned strncpy-sse2-unaligned \ strcpy-sse2-unaligned strncpy-sse2-unaligned \
stpcpy-sse2-unaligned stpncpy-sse2-unaligned stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
strcat-sse2-unaligned strncat-sse2-unaligned \
strcat-ssse3 strncat-ssse3 strlen-sse2-pminub
ifeq (yes,$(config-cflags-sse4)) ifeq (yes,$(config-cflags-sse4))
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
CFLAGS-varshift.c += -msse4 CFLAGS-varshift.c += -msse4

View File

@ -97,18 +97,22 @@ __init_cpu_features (void)
case 0x2c: case 0x2c:
case 0x2e: case 0x2e:
case 0x2f: case 0x2f:
/* Rep string instructions, copy backward and unaligned loads /* Rep string instructions, copy backward, unaligned loads
are fast on Intel Core i3, i5 and i7. */ and pminub are fast on Intel Core i3, i5 and i7. */
#if index_Fast_Rep_String != index_Fast_Copy_Backward #if index_Fast_Rep_String != index_Fast_Copy_Backward
# error index_Fast_Rep_String != index_Fast_Copy_Backward # error index_Fast_Rep_String != index_Fast_Copy_Backward
#endif #endif
#if index_Fast_Rep_String != index_Fast_Unaligned_Load #if index_Fast_Rep_String != index_Fast_Unaligned_Load
# error index_Fast_Rep_String != index_Fast_Unaligned_Load # error index_Fast_Rep_String != index_Fast_Unaligned_Load
#endif
#if index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
# error index_Fast_Rep_String != index_Prefer_PMINUB_for_stringop
#endif #endif
__cpu_features.feature[index_Fast_Rep_String] __cpu_features.feature[index_Fast_Rep_String]
|= (bit_Fast_Rep_String |= (bit_Fast_Rep_String
| bit_Fast_Copy_Backward | bit_Fast_Copy_Backward
| bit_Fast_Unaligned_Load); | bit_Fast_Unaligned_Load
| bit_Prefer_PMINUB_for_stringop);
break; break;
} }
} }

View File

@ -21,6 +21,7 @@
#define bit_Slow_BSF (1 << 2) #define bit_Slow_BSF (1 << 2)
#define bit_Prefer_SSE_for_memop (1 << 3) #define bit_Prefer_SSE_for_memop (1 << 3)
#define bit_Fast_Unaligned_Load (1 << 4) #define bit_Fast_Unaligned_Load (1 << 4)
#define bit_Prefer_PMINUB_for_stringop (1 << 5)
#ifdef __ASSEMBLER__ #ifdef __ASSEMBLER__
@ -41,6 +42,7 @@
# define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE # define index_Slow_BSF FEATURE_INDEX_1*FEATURE_SIZE
# define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE # define index_Prefer_SSE_for_memop FEATURE_INDEX_1*FEATURE_SIZE
# define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE # define index_Fast_Unaligned_Load FEATURE_INDEX_1*FEATURE_SIZE
# define index_Prefer_PMINUB_for_stringop FEATURE_INDEX_1*FEATURE_SIZE
#else /* __ASSEMBLER__ */ #else /* __ASSEMBLER__ */

View File

@ -0,0 +1,55 @@
/* strcat with SSE2
Copyright (C) 2011 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#ifndef NOT_IN_libc
# include <sysdep.h>
# ifndef STRCAT
# define STRCAT __strcat_sse2_unaligned
# endif
# define USE_AS_STRCAT
.text
ENTRY (STRCAT)
mov %rdi, %r9
# ifdef USE_AS_STRNCAT
mov %rdx, %r8
# endif
# define RETURN jmp L(StartStrcpyPart)
# include "strlen-sse2-pminub.S"
# undef RETURN
L(StartStrcpyPart):
lea (%r9, %rax), %rdi
mov %rsi, %rcx
mov %r9, %rax /* save result */
# ifdef USE_AS_STRNCAT
test %r8, %r8
jz L(ExitZero)
# define USE_AS_STRNCPY
# endif
# include "strcpy-sse2-unaligned.S"
#endif

View File

@ -0,0 +1,559 @@
/* strcat with SSSE3
Copyright (C) 2011 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#ifndef NOT_IN_libc
# include <sysdep.h>
# ifndef STRCAT
# define STRCAT __strcat_ssse3
# endif
# define USE_AS_STRCAT
.text
ENTRY (STRCAT)
# ifdef USE_AS_STRNCAT
mov %rdx, %r8
# endif
# define RETURN jmp L(StartStrcpyPart)
# include "strlen-no-bsf.S"
# undef RETURN
L(StartStrcpyPart):
mov %rsi, %rcx
lea (%rdi, %rax), %rdx
# ifdef USE_AS_STRNCAT
test %r8, %r8
jz L(StrncatExit0)
cmp $8, %r8
jbe L(StrncatExit8Bytes)
# endif
cmpb $0, (%rcx)
jz L(Exit1)
cmpb $0, 1(%rcx)
jz L(Exit2)
cmpb $0, 2(%rcx)
jz L(Exit3)
cmpb $0, 3(%rcx)
jz L(Exit4)
cmpb $0, 4(%rcx)
jz L(Exit5)
cmpb $0, 5(%rcx)
jz L(Exit6)
cmpb $0, 6(%rcx)
jz L(Exit7)
cmpb $0, 7(%rcx)
jz L(Exit8)
cmpb $0, 8(%rcx)
jz L(Exit9)
# ifdef USE_AS_STRNCAT
cmp $16, %r8
jb L(StrncatExit15Bytes)
# endif
cmpb $0, 9(%rcx)
jz L(Exit10)
cmpb $0, 10(%rcx)
jz L(Exit11)
cmpb $0, 11(%rcx)
jz L(Exit12)
cmpb $0, 12(%rcx)
jz L(Exit13)
cmpb $0, 13(%rcx)
jz L(Exit14)
cmpb $0, 14(%rcx)
jz L(Exit15)
cmpb $0, 15(%rcx)
jz L(Exit16)
# ifdef USE_AS_STRNCAT
cmp $16, %r8
je L(StrncatExit16)
# define USE_AS_STRNCPY
# endif
# include "strcpy-ssse3.S"
.p2align 4
L(CopyFrom1To16Bytes):
add %rsi, %rdx
add %rsi, %rcx
test %al, %al
jz L(ExitHigh)
test $0x01, %al
jnz L(Exit1)
test $0x02, %al
jnz L(Exit2)
test $0x04, %al
jnz L(Exit3)
test $0x08, %al
jnz L(Exit4)
test $0x10, %al
jnz L(Exit5)
test $0x20, %al
jnz L(Exit6)
test $0x40, %al
jnz L(Exit7)
movlpd (%rcx), %xmm0
movlpd %xmm0, (%rdx)
mov %rdi, %rax
ret
.p2align 4
L(ExitHigh):
test $0x01, %ah
jnz L(Exit9)
test $0x02, %ah
jnz L(Exit10)
test $0x04, %ah
jnz L(Exit11)
test $0x08, %ah
jnz L(Exit12)
test $0x10, %ah
jnz L(Exit13)
test $0x20, %ah
jnz L(Exit14)
test $0x40, %ah
jnz L(Exit15)
movlpd (%rcx), %xmm0
movlpd 8(%rcx), %xmm1
movlpd %xmm0, (%rdx)
movlpd %xmm1, 8(%rdx)
mov %rdi, %rax
ret
.p2align 4
L(StrncatExit1):
xor %ah, %ah
movb %ah, 1(%rdx)
L(Exit1):
movb (%rcx), %al
movb %al, (%rdx)
mov %rdi, %rax
ret
.p2align 4
L(StrncatExit2):
xor %ah, %ah
movb %ah, 2(%rdx)
L(Exit2):
movw (%rcx), %ax
movw %ax, (%rdx)
mov %rdi, %rax
ret
.p2align 4
L(StrncatExit3):
xor %ah, %ah
movb %ah, 3(%rdx)
L(Exit3):
movw (%rcx), %ax
movw %ax, (%rdx)
movb 2(%rcx), %al
movb %al, 2(%rdx)
mov %rdi, %rax
ret
.p2align 4
L(StrncatExit4):
xor %ah, %ah
movb %ah, 4(%rdx)
L(Exit4):
mov (%rcx), %eax
mov %eax, (%rdx)
mov %rdi, %rax
ret
.p2align 4
L(StrncatExit5):
xor %ah, %ah
movb %ah, 5(%rdx)
L(Exit5):
mov (%rcx), %eax
mov %eax, (%rdx)
movb 4(%rcx), %al
movb %al, 4(%rdx)
mov %rdi, %rax
ret
.p2align 4
L(StrncatExit6):
xor %ah, %ah
movb %ah, 6(%rdx)
L(Exit6):
mov (%rcx), %eax
mov %eax, (%rdx)
movw 4(%rcx), %ax
movw %ax, 4(%rdx)
mov %rdi, %rax
ret
.p2align 4
L(StrncatExit7):
xor %ah, %ah
movb %ah, 7(%rdx)
L(Exit7):
mov (%rcx), %eax
mov %eax, (%rdx)
mov 3(%rcx), %eax
mov %eax, 3(%rdx)
mov %rdi, %rax
ret
.p2align 4
L(StrncatExit8):
xor %ah, %ah
movb %ah, 8(%rdx)
L(Exit8):
movlpd (%rcx), %xmm0
movlpd %xmm0, (%rdx)
mov %rdi, %rax
ret
.p2align 4
L(StrncatExit9):
xor %ah, %ah
movb %ah, 9(%rdx)
L(Exit9):
movlpd (%rcx), %xmm0
movlpd %xmm0, (%rdx)
movb 8(%rcx), %al
movb %al, 8(%rdx)
mov %rdi, %rax
ret
.p2align 4
L(StrncatExit10):
xor %ah, %ah
movb %ah, 10(%rdx)
L(Exit10):
movlpd (%rcx), %xmm0
movlpd %xmm0, (%rdx)
movw 8(%rcx), %ax
movw %ax, 8(%rdx)
mov %rdi, %rax
ret
.p2align 4
L(StrncatExit11):
xor %ah, %ah
movb %ah, 11(%rdx)
L(Exit11):
movlpd (%rcx), %xmm0
movlpd %xmm0, (%rdx)
mov 7(%rcx), %eax
mov %eax, 7(%rdx)
mov %rdi, %rax
ret
.p2align 4
L(StrncatExit12):
xor %ah, %ah
movb %ah, 12(%rdx)
L(Exit12):
movlpd (%rcx), %xmm0
movlpd %xmm0, (%rdx)
mov 8(%rcx), %eax
mov %eax, 8(%rdx)
mov %rdi, %rax
ret
.p2align 4
L(StrncatExit13):
xor %ah, %ah
movb %ah, 13(%rdx)
L(Exit13):
movlpd (%rcx), %xmm0
movlpd %xmm0, (%rdx)
movlpd 5(%rcx), %xmm1
movlpd %xmm1, 5(%rdx)
mov %rdi, %rax
ret
.p2align 4
L(StrncatExit14):
xor %ah, %ah
movb %ah, 14(%rdx)
L(Exit14):
movlpd (%rcx), %xmm0
movlpd %xmm0, (%rdx)
movlpd 6(%rcx), %xmm1
movlpd %xmm1, 6(%rdx)
mov %rdi, %rax
ret
.p2align 4
L(StrncatExit15):
xor %ah, %ah
movb %ah, 15(%rdx)
L(Exit15):
movlpd (%rcx), %xmm0
movlpd %xmm0, (%rdx)
movlpd 7(%rcx), %xmm1
movlpd %xmm1, 7(%rdx)
mov %rdi, %rax
ret
.p2align 4
L(StrncatExit16):
xor %ah, %ah
movb %ah, 16(%rdx)
L(Exit16):
movlpd (%rcx), %xmm0
movlpd 8(%rcx), %xmm1
movlpd %xmm0, (%rdx)
movlpd %xmm1, 8(%rdx)
mov %rdi, %rax
ret
# ifdef USE_AS_STRNCPY
.p2align 4
L(CopyFrom1To16BytesCase2):
add $16, %r8
add %rsi, %rcx
lea (%rsi, %rdx), %rsi
lea -9(%r8), %rdx
and $1<<7, %dh
or %al, %dh
test %dh, %dh
lea (%rsi), %rdx
jz L(ExitHighCase2)
test $0x01, %al
jnz L(Exit1)
cmp $1, %r8
je L(StrncatExit1)
test $0x02, %al
jnz L(Exit2)
cmp $2, %r8
je L(StrncatExit2)
test $0x04, %al
jnz L(Exit3)
cmp $3, %r8
je L(StrncatExit3)
test $0x08, %al
jnz L(Exit4)
cmp $4, %r8
je L(StrncatExit4)
test $0x10, %al
jnz L(Exit5)
cmp $5, %r8
je L(StrncatExit5)
test $0x20, %al
jnz L(Exit6)
cmp $6, %r8
je L(StrncatExit6)
test $0x40, %al
jnz L(Exit7)
cmp $7, %r8
je L(StrncatExit7)
movlpd (%rcx), %xmm0
movlpd %xmm0, (%rdx)
lea 7(%rdx), %rax
cmpb $1, (%rax)
sbb $-1, %rax
xor %cl, %cl
movb %cl, (%rax)
mov %rdi, %rax
ret
.p2align 4
L(ExitHighCase2):
test $0x01, %ah
jnz L(Exit9)
cmp $9, %r8
je L(StrncatExit9)
test $0x02, %ah
jnz L(Exit10)
cmp $10, %r8
je L(StrncatExit10)
test $0x04, %ah
jnz L(Exit11)
cmp $11, %r8
je L(StrncatExit11)
test $0x8, %ah
jnz L(Exit12)
cmp $12, %r8
je L(StrncatExit12)
test $0x10, %ah
jnz L(Exit13)
cmp $13, %r8
je L(StrncatExit13)
test $0x20, %ah
jnz L(Exit14)
cmp $14, %r8
je L(StrncatExit14)
test $0x40, %ah
jnz L(Exit15)
cmp $15, %r8
je L(StrncatExit15)
movlpd (%rcx), %xmm0
movlpd %xmm0, (%rdx)
movlpd 8(%rcx), %xmm1
movlpd %xmm1, 8(%rdx)
mov %rdi, %rax
ret
L(CopyFrom1To16BytesCase2OrCase3):
test %rax, %rax
jnz L(CopyFrom1To16BytesCase2)
.p2align 4
L(CopyFrom1To16BytesCase3):
add $16, %r8
add %rsi, %rdx
add %rsi, %rcx
cmp $8, %r8
ja L(ExitHighCase3)
cmp $1, %r8
je L(StrncatExit1)
cmp $2, %r8
je L(StrncatExit2)
cmp $3, %r8
je L(StrncatExit3)
cmp $4, %r8
je L(StrncatExit4)
cmp $5, %r8
je L(StrncatExit5)
cmp $6, %r8
je L(StrncatExit6)
cmp $7, %r8
je L(StrncatExit7)
movlpd (%rcx), %xmm0
movlpd %xmm0, (%rdx)
xor %ah, %ah
movb %ah, 8(%rdx)
mov %rdi, %rax
ret
.p2align 4
L(ExitHighCase3):
cmp $9, %r8
je L(StrncatExit9)
cmp $10, %r8
je L(StrncatExit10)
cmp $11, %r8
je L(StrncatExit11)
cmp $12, %r8
je L(StrncatExit12)
cmp $13, %r8
je L(StrncatExit13)
cmp $14, %r8
je L(StrncatExit14)
cmp $15, %r8
je L(StrncatExit15)
movlpd (%rcx), %xmm0
movlpd %xmm0, (%rdx)
movlpd 8(%rcx), %xmm1
movlpd %xmm1, 8(%rdx)
xor %ah, %ah
movb %ah, 16(%rdx)
mov %rdi, %rax
ret
.p2align 4
L(StrncatExit0):
mov %rdi, %rax
ret
.p2align 4
L(StrncatExit15Bytes):
cmp $9, %r8
je L(StrncatExit9)
cmpb $0, 9(%rcx)
jz L(Exit10)
cmp $10, %r8
je L(StrncatExit10)
cmpb $0, 10(%rcx)
jz L(Exit11)
cmp $11, %r8
je L(StrncatExit11)
cmpb $0, 11(%rcx)
jz L(Exit12)
cmp $12, %r8
je L(StrncatExit12)
cmpb $0, 12(%rcx)
jz L(Exit13)
cmp $13, %r8
je L(StrncatExit13)
cmpb $0, 13(%rcx)
jz L(Exit14)
cmp $14, %r8
je L(StrncatExit14)
movlpd (%rcx), %xmm0
movlpd %xmm0, (%rdx)
movlpd 7(%rcx), %xmm1
movlpd %xmm1, 7(%rdx)
lea 14(%rdx), %rax
cmpb $1, (%rax)
sbb $-1, %rax
xor %cl, %cl
movb %cl, (%rax)
mov %rdi, %rax
ret
.p2align 4
L(StrncatExit8Bytes):
cmpb $0, (%rcx)
jz L(Exit1)
cmp $1, %r8
je L(StrncatExit1)
cmpb $0, 1(%rcx)
jz L(Exit2)
cmp $2, %r8
je L(StrncatExit2)
cmpb $0, 2(%rcx)
jz L(Exit3)
cmp $3, %r8
je L(StrncatExit3)
cmpb $0, 3(%rcx)
jz L(Exit4)
cmp $4, %r8
je L(StrncatExit4)
cmpb $0, 4(%rcx)
jz L(Exit5)
cmp $5, %r8
je L(StrncatExit5)
cmpb $0, 5(%rcx)
jz L(Exit6)
cmp $6, %r8
je L(StrncatExit6)
cmpb $0, 6(%rcx)
jz L(Exit7)
cmp $7, %r8
je L(StrncatExit7)
movlpd (%rcx), %xmm0
movlpd %xmm0, (%rdx)
lea 7(%rdx), %rax
cmpb $1, (%rax)
sbb $-1, %rax
xor %cl, %cl
movb %cl, (%rax)
mov %rdi, %rax
ret
# endif
END (STRCAT)
#endif

View File

@ -0,0 +1,85 @@
/* Multiple versions of strcat
Copyright (C) 2009, 2011 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#include <sysdep.h>
#include <init-arch.h>
#ifndef USE_AS_STRNCAT
# ifndef STRCAT
# define STRCAT strcat
# endif
#endif
#ifdef USE_AS_STRNCAT
# define STRCAT_SSSE3 __strncat_ssse3
# define STRCAT_SSE2 __strncat_sse2
# define STRCAT_SSE2_UNALIGNED __strncat_sse2_unaligned
# define __GI_STRCAT __GI_strncat
# define __GI___STRCAT __GI___strncat
#else
# define STRCAT_SSSE3 __strcat_ssse3
# define STRCAT_SSE2 __strcat_sse2
# define STRCAT_SSE2_UNALIGNED __strcat_sse2_unaligned
# define __GI_STRCAT __GI_strcat
# define __GI___STRCAT __GI___strcat
#endif
/* Define multiple versions only for the definition in libc. */
#ifndef NOT_IN_libc
.text
ENTRY(STRCAT)
.type STRCAT, @gnu_indirect_function
cmpl $0, __cpu_features+KIND_OFFSET(%rip)
jne 1f
call __init_cpu_features
1: leaq STRCAT_SSE2_UNALIGNED(%rip), %rax
testl $bit_Fast_Unaligned_Load, __cpu_features+FEATURE_OFFSET+index_Fast_Unaligned_Load(%rip)
jnz 2f
leaq STRCAT_SSE2(%rip), %rax
testl $bit_SSSE3, __cpu_features+CPUID_OFFSET+index_SSSE3(%rip)
jz 2f
leaq STRCAT_SSSE3(%rip), %rax
2: ret
END(STRCAT)
# undef ENTRY
# define ENTRY(name) \
.type STRCAT_SSE2, @function; \
.align 16; \
STRCAT_SSE2: cfi_startproc; \
CALL_MCOUNT
# undef END
# define END(name) \
cfi_endproc; .size STRCAT_SSE2, .-STRCAT_SSE2
# undef libc_hidden_builtin_def
/* It doesn't make sense to send libc-internal strcat calls through a PLT.
The speedup we get from using SSSE3 instruction is likely eaten away
by the indirect call in the PLT. */
# define libc_hidden_builtin_def(name) \
.globl __GI_STRCAT; __GI_STRCAT = STRCAT_SSE2
# undef libc_hidden_def
# define libc_hidden_def(name) \
.globl __GI___STRCAT; __GI___STRCAT = STRCAT_SSE2
#endif
#ifndef USE_AS_STRNCAT
# include "../strcat.S"
#endif

View File

@ -20,12 +20,15 @@
#ifndef NOT_IN_libc #ifndef NOT_IN_libc
# ifndef USE_AS_STRCAT
# include <sysdep.h> # include <sysdep.h>
# ifndef STRCPY # ifndef STRCPY
# define STRCPY __strcpy_sse2_unaligned # define STRCPY __strcpy_sse2_unaligned
# endif # endif
# endif
# define JMPTBL(I, B) I - B # define JMPTBL(I, B) I - B
# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
lea TABLE(%rip), %r11; \ lea TABLE(%rip), %r11; \
@ -33,7 +36,9 @@
lea (%r11, %rcx), %rcx; \ lea (%r11, %rcx), %rcx; \
jmp *%rcx jmp *%rcx
.text # ifndef USE_AS_STRCAT
.text
ENTRY (STRCPY) ENTRY (STRCPY)
# ifdef USE_AS_STRNCPY # ifdef USE_AS_STRNCPY
mov %rdx, %r8 mov %rdx, %r8
@ -43,6 +48,8 @@ ENTRY (STRCPY)
mov %rsi, %rcx mov %rsi, %rcx
# ifndef USE_AS_STPCPY # ifndef USE_AS_STPCPY
mov %rdi, %rax /* save result */ mov %rdi, %rax /* save result */
# endif
# endif # endif
and $15, %rcx and $15, %rcx
@ -59,7 +66,7 @@ ENTRY (STRCPY)
pmovmskb %xmm1, %rdx pmovmskb %xmm1, %rdx
shr %cl, %rdx shr %cl, %rdx
# ifdef USE_AS_STRNCPY # ifdef USE_AS_STRNCPY
# if defined USE_AS_STPCPY # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
cmp $16, %r8 cmp $16, %r8
# else # else
cmp $17, %r8 cmp $17, %r8
@ -72,7 +79,7 @@ ENTRY (STRCPY)
pcmpeqb 16(%rsi), %xmm0 pcmpeqb 16(%rsi), %xmm0
pmovmskb %xmm0, %rdx pmovmskb %xmm0, %rdx
# ifdef USE_AS_STRNCPY # ifdef USE_AS_STRNCPY
# if defined USE_AS_STPCPY # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
cmp $32, %r8 cmp $32, %r8
# else # else
cmp $33, %r8 cmp $33, %r8
@ -102,7 +109,7 @@ L(Unalign16Both):
jbe L(CopyFrom1To16BytesCase2OrCase3) jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif # endif
test %rdx, %rdx test %rdx, %rdx
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm2) jnz L(CopyFrom1To16BytesUnalignedXmm2)
# else # else
jnz L(CopyFrom1To16Bytes) jnz L(CopyFrom1To16Bytes)
@ -118,7 +125,7 @@ L(Unalign16Both):
jbe L(CopyFrom1To16BytesCase2OrCase3) jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif # endif
test %rdx, %rdx test %rdx, %rdx
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm3) jnz L(CopyFrom1To16BytesUnalignedXmm3)
# else # else
jnz L(CopyFrom1To16Bytes) jnz L(CopyFrom1To16Bytes)
@ -134,7 +141,7 @@ L(Unalign16Both):
jbe L(CopyFrom1To16BytesCase2OrCase3) jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif # endif
test %rdx, %rdx test %rdx, %rdx
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm4) jnz L(CopyFrom1To16BytesUnalignedXmm4)
# else # else
jnz L(CopyFrom1To16Bytes) jnz L(CopyFrom1To16Bytes)
@ -150,7 +157,7 @@ L(Unalign16Both):
jbe L(CopyFrom1To16BytesCase2OrCase3) jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif # endif
test %rdx, %rdx test %rdx, %rdx
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm1) jnz L(CopyFrom1To16BytesUnalignedXmm1)
# else # else
jnz L(CopyFrom1To16Bytes) jnz L(CopyFrom1To16Bytes)
@ -166,7 +173,7 @@ L(Unalign16Both):
jbe L(CopyFrom1To16BytesCase2OrCase3) jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif # endif
test %rdx, %rdx test %rdx, %rdx
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm2) jnz L(CopyFrom1To16BytesUnalignedXmm2)
# else # else
jnz L(CopyFrom1To16Bytes) jnz L(CopyFrom1To16Bytes)
@ -182,7 +189,7 @@ L(Unalign16Both):
jbe L(CopyFrom1To16BytesCase2OrCase3) jbe L(CopyFrom1To16BytesCase2OrCase3)
# endif # endif
test %rdx, %rdx test %rdx, %rdx
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm3) jnz L(CopyFrom1To16BytesUnalignedXmm3)
# else # else
jnz L(CopyFrom1To16Bytes) jnz L(CopyFrom1To16Bytes)
@ -264,7 +271,7 @@ L(Unaligned64Leave):
movdqu %xmm4, (%rdi) movdqu %xmm4, (%rdi)
movdqu %xmm5, 16(%rdi) movdqu %xmm5, 16(%rdi)
movdqu %xmm6, 32(%rdi) movdqu %xmm6, 32(%rdi)
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 48(%rdi, %rdx), %rax lea 48(%rdi, %rdx), %rax
# endif # endif
@ -288,7 +295,7 @@ L(SourceStringAlignmentZero):
pmovmskb %xmm0, %rdx pmovmskb %xmm0, %rdx
# ifdef USE_AS_STRNCPY # ifdef USE_AS_STRNCPY
# if defined USE_AS_STPCPY # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
cmp $16, %r8 cmp $16, %r8
# else # else
cmp $17, %r8 cmp $17, %r8
@ -303,7 +310,7 @@ L(SourceStringAlignmentZero):
pmovmskb %xmm0, %rdx pmovmskb %xmm0, %rdx
# ifdef USE_AS_STRNCPY # ifdef USE_AS_STRNCPY
# if defined USE_AS_STPCPY # if defined USE_AS_STPCPY || defined USE_AS_STRCAT
cmp $32, %r8 cmp $32, %r8
# else # else
cmp $33, %r8 cmp $33, %r8
@ -314,11 +321,11 @@ L(SourceStringAlignmentZero):
jnz L(CopyFrom1To32Bytes1) jnz L(CopyFrom1To32Bytes1)
jmp L(Unalign16Both) jmp L(Unalign16Both)
/* ------End of main part with loops--------------------- */ /*------End of main part with loops---------------------*/
/* Case1 */ /* Case1 */
# if (!defined USE_AS_STRNCPY) # if (!defined USE_AS_STRNCPY) || (defined USE_AS_STRCAT)
.p2align 4 .p2align 4
L(CopyFrom1To16Bytes): L(CopyFrom1To16Bytes):
add %rcx, %rdi add %rcx, %rdi
@ -328,7 +335,7 @@ L(CopyFrom1To16Bytes):
# endif # endif
.p2align 4 .p2align 4
L(CopyFrom1To16BytesTail): L(CopyFrom1To16BytesTail):
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub %rcx, %r8 sub %rcx, %r8
# endif # endif
add %rcx, %rsi add %rcx, %rsi
@ -339,7 +346,7 @@ L(CopyFrom1To16BytesTail):
L(CopyFrom1To32Bytes1): L(CopyFrom1To32Bytes1):
add $16, %rsi add $16, %rsi
add $16, %rdi add $16, %rdi
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $16, %r8 sub $16, %r8
# endif # endif
L(CopyFrom1To16BytesTail1): L(CopyFrom1To16BytesTail1):
@ -348,7 +355,7 @@ L(CopyFrom1To16BytesTail1):
.p2align 4 .p2align 4
L(CopyFrom1To32Bytes): L(CopyFrom1To32Bytes):
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub %rcx, %r8 sub %rcx, %r8
# endif # endif
bsf %rdx, %rdx bsf %rdx, %rdx
@ -360,7 +367,7 @@ L(CopyFrom1To32Bytes):
.p2align 4 .p2align 4
L(CopyFrom1To16BytesUnaligned_0): L(CopyFrom1To16BytesUnaligned_0):
bsf %rdx, %rdx bsf %rdx, %rdx
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea (%rdi, %rdx), %rax lea (%rdi, %rdx), %rax
# endif # endif
@ -377,7 +384,7 @@ L(CopyFrom1To16BytesUnaligned_0):
L(CopyFrom1To16BytesUnaligned_16): L(CopyFrom1To16BytesUnaligned_16):
bsf %rcx, %rdx bsf %rcx, %rdx
movdqu %xmm4, (%rdi) movdqu %xmm4, (%rdi)
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 16(%rdi, %rdx), %rax lea 16(%rdi, %rdx), %rax
# endif # endif
@ -397,7 +404,7 @@ L(CopyFrom1To16BytesUnaligned_32):
bsf %rdx, %rdx bsf %rdx, %rdx
movdqu %xmm4, (%rdi) movdqu %xmm4, (%rdi)
movdqu %xmm5, 16(%rdi) movdqu %xmm5, 16(%rdi)
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 32(%rdi, %rdx), %rax lea 32(%rdi, %rdx), %rax
# endif # endif
@ -413,6 +420,7 @@ L(CopyFrom1To16BytesUnaligned_32):
# endif # endif
# ifdef USE_AS_STRNCPY # ifdef USE_AS_STRNCPY
# ifndef USE_AS_STRCAT
.p2align 4 .p2align 4
L(CopyFrom1To16BytesUnalignedXmm6): L(CopyFrom1To16BytesUnalignedXmm6):
movdqu %xmm6, (%rdi, %rcx) movdqu %xmm6, (%rdi, %rcx)
@ -437,6 +445,7 @@ L(CopyFrom1To16BytesUnalignedXmm3):
L(CopyFrom1To16BytesUnalignedXmm1): L(CopyFrom1To16BytesUnalignedXmm1):
movdqu %xmm1, (%rdi, %rcx) movdqu %xmm1, (%rdi, %rcx)
jmp L(CopyFrom1To16BytesXmmExit) jmp L(CopyFrom1To16BytesXmmExit)
# endif
.p2align 4 .p2align 4
L(CopyFrom1To16BytesExit): L(CopyFrom1To16BytesExit):
@ -519,7 +528,7 @@ L(CopyFrom1To16BytesTail1Case2OrCase3):
# endif # endif
/* ----End labels regarding with copying 1-16 bytes--and 1-32 bytes---- */ /*------------End labels regarding with copying 1-16 bytes--and 1-32 bytes----*/
.p2align 4 .p2align 4
L(Exit1): L(Exit1):
@ -527,7 +536,7 @@ L(Exit1):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea (%rdi), %rax lea (%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $1, %r8 sub $1, %r8
lea 1(%rdi), %rdi lea 1(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -541,7 +550,7 @@ L(Exit2):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 1(%rdi), %rax lea 1(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $2, %r8 sub $2, %r8
lea 2(%rdi), %rdi lea 2(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -556,7 +565,7 @@ L(Exit3):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 2(%rdi), %rax lea 2(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $3, %r8 sub $3, %r8
lea 3(%rdi), %rdi lea 3(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -570,7 +579,7 @@ L(Exit4):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 3(%rdi), %rax lea 3(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $4, %r8 sub $4, %r8
lea 4(%rdi), %rdi lea 4(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -585,7 +594,7 @@ L(Exit5):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 4(%rdi), %rax lea 4(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $5, %r8 sub $5, %r8
lea 5(%rdi), %rdi lea 5(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -601,7 +610,7 @@ L(Exit6):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 5(%rdi), %rax lea 5(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $6, %r8 sub $6, %r8
lea 6(%rdi), %rdi lea 6(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -617,7 +626,7 @@ L(Exit7):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 6(%rdi), %rax lea 6(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $7, %r8 sub $7, %r8
lea 7(%rdi), %rdi lea 7(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -631,7 +640,7 @@ L(Exit8):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 7(%rdi), %rax lea 7(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $8, %r8 sub $8, %r8
lea 8(%rdi), %rdi lea 8(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -646,7 +655,7 @@ L(Exit9):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 8(%rdi), %rax lea 8(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $9, %r8 sub $9, %r8
lea 9(%rdi), %rdi lea 9(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -662,7 +671,7 @@ L(Exit10):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 9(%rdi), %rax lea 9(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $10, %r8 sub $10, %r8
lea 10(%rdi), %rdi lea 10(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -678,7 +687,7 @@ L(Exit11):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 10(%rdi), %rax lea 10(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $11, %r8 sub $11, %r8
lea 11(%rdi), %rdi lea 11(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -694,7 +703,7 @@ L(Exit12):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 11(%rdi), %rax lea 11(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $12, %r8 sub $12, %r8
lea 12(%rdi), %rdi lea 12(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -710,7 +719,7 @@ L(Exit13):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 12(%rdi), %rax lea 12(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $13, %r8 sub $13, %r8
lea 13(%rdi), %rdi lea 13(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -726,7 +735,7 @@ L(Exit14):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 13(%rdi), %rax lea 13(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $14, %r8 sub $14, %r8
lea 14(%rdi), %rdi lea 14(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -742,7 +751,7 @@ L(Exit15):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 14(%rdi), %rax lea 14(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $15, %r8 sub $15, %r8
lea 15(%rdi), %rdi lea 15(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -756,7 +765,7 @@ L(Exit16):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 15(%rdi), %rax lea 15(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $16, %r8 sub $16, %r8
lea 16(%rdi), %rdi lea 16(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -771,7 +780,7 @@ L(Exit17):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 16(%rdi), %rax lea 16(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $17, %r8 sub $17, %r8
lea 17(%rdi), %rdi lea 17(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -787,7 +796,7 @@ L(Exit18):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 17(%rdi), %rax lea 17(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $18, %r8 sub $18, %r8
lea 18(%rdi), %rdi lea 18(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -803,7 +812,7 @@ L(Exit19):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 18(%rdi), %rax lea 18(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $19, %r8 sub $19, %r8
lea 19(%rdi), %rdi lea 19(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -819,7 +828,7 @@ L(Exit20):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 19(%rdi), %rax lea 19(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $20, %r8 sub $20, %r8
lea 20(%rdi), %rdi lea 20(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -836,7 +845,7 @@ L(Exit21):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 20(%rdi), %rax lea 20(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $21, %r8 sub $21, %r8
lea 21(%rdi), %rdi lea 21(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -852,7 +861,7 @@ L(Exit22):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 21(%rdi), %rax lea 21(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $22, %r8 sub $22, %r8
lea 22(%rdi), %rdi lea 22(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -868,7 +877,7 @@ L(Exit23):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 22(%rdi), %rax lea 22(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $23, %r8 sub $23, %r8
lea 23(%rdi), %rdi lea 23(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -884,7 +893,7 @@ L(Exit24):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 23(%rdi), %rax lea 23(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $24, %r8 sub $24, %r8
lea 24(%rdi), %rdi lea 24(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -901,7 +910,7 @@ L(Exit25):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 24(%rdi), %rax lea 24(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $25, %r8 sub $25, %r8
lea 25(%rdi), %rdi lea 25(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -919,7 +928,7 @@ L(Exit26):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 25(%rdi), %rax lea 25(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $26, %r8 sub $26, %r8
lea 26(%rdi), %rdi lea 26(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -937,7 +946,7 @@ L(Exit27):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 26(%rdi), %rax lea 26(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $27, %r8 sub $27, %r8
lea 27(%rdi), %rdi lea 27(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -955,7 +964,7 @@ L(Exit28):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 27(%rdi), %rax lea 27(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $28, %r8 sub $28, %r8
lea 28(%rdi), %rdi lea 28(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -971,7 +980,7 @@ L(Exit29):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 28(%rdi), %rax lea 28(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $29, %r8 sub $29, %r8
lea 29(%rdi), %rdi lea 29(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -987,7 +996,7 @@ L(Exit30):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 29(%rdi), %rax lea 29(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $30, %r8 sub $30, %r8
lea 30(%rdi), %rdi lea 30(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -1003,7 +1012,7 @@ L(Exit31):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 30(%rdi), %rax lea 30(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $31, %r8 sub $31, %r8
lea 31(%rdi), %rdi lea 31(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -1019,7 +1028,7 @@ L(Exit32):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 31(%rdi), %rax lea 31(%rdi), %rax
# endif # endif
# if defined USE_AS_STRNCPY # if defined USE_AS_STRNCPY && !defined USE_AS_STRCAT
sub $32, %r8 sub $32, %r8
lea 32(%rdi), %rdi lea 32(%rdi), %rdi
jnz L(StrncpyFillTailWithZero) jnz L(StrncpyFillTailWithZero)
@ -1032,6 +1041,10 @@ L(Exit32):
L(StrncpyExit0): L(StrncpyExit0):
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
mov %rdi, %rax mov %rdi, %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, (%rdi)
# endif # endif
ret ret
@ -1041,6 +1054,10 @@ L(StrncpyExit1):
mov %dl, (%rdi) mov %dl, (%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 1(%rdi), %rax lea 1(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 1(%rdi)
# endif # endif
ret ret
@ -1050,6 +1067,10 @@ L(StrncpyExit2):
mov %dx, (%rdi) mov %dx, (%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 2(%rdi), %rax lea 2(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 2(%rdi)
# endif # endif
ret ret
@ -1061,6 +1082,10 @@ L(StrncpyExit3):
mov %dl, 2(%rdi) mov %dl, 2(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 3(%rdi), %rax lea 3(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 3(%rdi)
# endif # endif
ret ret
@ -1070,6 +1095,10 @@ L(StrncpyExit4):
mov %edx, (%rdi) mov %edx, (%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 4(%rdi), %rax lea 4(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 4(%rdi)
# endif # endif
ret ret
@ -1081,6 +1110,10 @@ L(StrncpyExit5):
mov %dl, 4(%rdi) mov %dl, 4(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 5(%rdi), %rax lea 5(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 5(%rdi)
# endif # endif
ret ret
@ -1092,6 +1125,10 @@ L(StrncpyExit6):
mov %dx, 4(%rdi) mov %dx, 4(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 6(%rdi), %rax lea 6(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 6(%rdi)
# endif # endif
ret ret
@ -1103,6 +1140,10 @@ L(StrncpyExit7):
mov %edx, 3(%rdi) mov %edx, 3(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 7(%rdi), %rax lea 7(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 7(%rdi)
# endif # endif
ret ret
@ -1112,6 +1153,10 @@ L(StrncpyExit8):
mov %rdx, (%rdi) mov %rdx, (%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 8(%rdi), %rax lea 8(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 8(%rdi)
# endif # endif
ret ret
@ -1123,6 +1168,10 @@ L(StrncpyExit9):
mov %dl, 8(%rdi) mov %dl, 8(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 9(%rdi), %rax lea 9(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 9(%rdi)
# endif # endif
ret ret
@ -1134,6 +1183,10 @@ L(StrncpyExit10):
mov %dx, 8(%rdi) mov %dx, 8(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 10(%rdi), %rax lea 10(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 10(%rdi)
# endif # endif
ret ret
@ -1145,6 +1198,10 @@ L(StrncpyExit11):
mov %edx, 7(%rdi) mov %edx, 7(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 11(%rdi), %rax lea 11(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 11(%rdi)
# endif # endif
ret ret
@ -1156,6 +1213,10 @@ L(StrncpyExit12):
mov %edx, 8(%rdi) mov %edx, 8(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 12(%rdi), %rax lea 12(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 12(%rdi)
# endif # endif
ret ret
@ -1167,6 +1228,10 @@ L(StrncpyExit13):
mov %rdx, 5(%rdi) mov %rdx, 5(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 13(%rdi), %rax lea 13(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 13(%rdi)
# endif # endif
ret ret
@ -1178,6 +1243,10 @@ L(StrncpyExit14):
mov %rdx, 6(%rdi) mov %rdx, 6(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 14(%rdi), %rax lea 14(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 14(%rdi)
# endif # endif
ret ret
@ -1189,6 +1258,10 @@ L(StrncpyExit15):
mov %rdx, 7(%rdi) mov %rdx, 7(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 15(%rdi), %rax lea 15(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 15(%rdi)
# endif # endif
ret ret
@ -1198,6 +1271,10 @@ L(StrncpyExit16):
movdqu %xmm0, (%rdi) movdqu %xmm0, (%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 16(%rdi), %rax lea 16(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 16(%rdi)
# endif # endif
ret ret
@ -1209,6 +1286,10 @@ L(StrncpyExit17):
mov %cl, 16(%rdi) mov %cl, 16(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 17(%rdi), %rax lea 17(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 17(%rdi)
# endif # endif
ret ret
@ -1220,6 +1301,10 @@ L(StrncpyExit18):
mov %cx, 16(%rdi) mov %cx, 16(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 18(%rdi), %rax lea 18(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 18(%rdi)
# endif # endif
ret ret
@ -1231,6 +1316,10 @@ L(StrncpyExit19):
mov %ecx, 15(%rdi) mov %ecx, 15(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 19(%rdi), %rax lea 19(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 19(%rdi)
# endif # endif
ret ret
@ -1242,6 +1331,10 @@ L(StrncpyExit20):
mov %ecx, 16(%rdi) mov %ecx, 16(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 20(%rdi), %rax lea 20(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 20(%rdi)
# endif # endif
ret ret
@ -1255,6 +1348,10 @@ L(StrncpyExit21):
mov %dl, 20(%rdi) mov %dl, 20(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 21(%rdi), %rax lea 21(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 21(%rdi)
# endif # endif
ret ret
@ -1266,6 +1363,10 @@ L(StrncpyExit22):
mov %rcx, 14(%rdi) mov %rcx, 14(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 22(%rdi), %rax lea 22(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 22(%rdi)
# endif # endif
ret ret
@ -1277,6 +1378,10 @@ L(StrncpyExit23):
mov %rcx, 15(%rdi) mov %rcx, 15(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 23(%rdi), %rax lea 23(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 23(%rdi)
# endif # endif
ret ret
@ -1288,6 +1393,10 @@ L(StrncpyExit24):
mov %rcx, 16(%rdi) mov %rcx, 16(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 24(%rdi), %rax lea 24(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 24(%rdi)
# endif # endif
ret ret
@ -1301,6 +1410,10 @@ L(StrncpyExit25):
mov %cl, 24(%rdi) mov %cl, 24(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 25(%rdi), %rax lea 25(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 25(%rdi)
# endif # endif
ret ret
@ -1314,6 +1427,10 @@ L(StrncpyExit26):
mov %cx, 24(%rdi) mov %cx, 24(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 26(%rdi), %rax lea 26(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 26(%rdi)
# endif # endif
ret ret
@ -1327,6 +1444,10 @@ L(StrncpyExit27):
mov %ecx, 23(%rdi) mov %ecx, 23(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 27(%rdi), %rax lea 27(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 27(%rdi)
# endif # endif
ret ret
@ -1340,6 +1461,10 @@ L(StrncpyExit28):
mov %ecx, 24(%rdi) mov %ecx, 24(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 28(%rdi), %rax lea 28(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 28(%rdi)
# endif # endif
ret ret
@ -1351,6 +1476,10 @@ L(StrncpyExit29):
movdqu %xmm2, 13(%rdi) movdqu %xmm2, 13(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 29(%rdi), %rax lea 29(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 29(%rdi)
# endif # endif
ret ret
@ -1362,6 +1491,10 @@ L(StrncpyExit30):
movdqu %xmm2, 14(%rdi) movdqu %xmm2, 14(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 30(%rdi), %rax lea 30(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 30(%rdi)
# endif # endif
ret ret
@ -1373,6 +1506,10 @@ L(StrncpyExit31):
movdqu %xmm2, 15(%rdi) movdqu %xmm2, 15(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 31(%rdi), %rax lea 31(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 31(%rdi)
# endif # endif
ret ret
@ -1384,6 +1521,10 @@ L(StrncpyExit32):
movdqu %xmm2, 16(%rdi) movdqu %xmm2, 16(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 32(%rdi), %rax lea 32(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 32(%rdi)
# endif # endif
ret ret
@ -1395,8 +1536,14 @@ L(StrncpyExit33):
movdqu %xmm0, (%rdi) movdqu %xmm0, (%rdi)
movdqu %xmm2, 16(%rdi) movdqu %xmm2, 16(%rdi)
mov %cl, 32(%rdi) mov %cl, 32(%rdi)
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 33(%rdi)
# endif
ret ret
# ifndef USE_AS_STRCAT
.p2align 4 .p2align 4
L(Fill0): L(Fill0):
ret ret
@ -1553,6 +1700,9 @@ L(StrncpyFillExit):
add $16, %r8 add $16, %r8
BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4) BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %r8, 4)
/* end of ifndef USE_AS_STRCAT */
# endif
.p2align 4 .p2align 4
L(UnalignedLeaveCase2OrCase3): L(UnalignedLeaveCase2OrCase3):
test %rdx, %rdx test %rdx, %rdx
@ -1574,6 +1724,10 @@ L(Unaligned64LeaveCase3):
movdqu %xmm7, 48(%rdi) movdqu %xmm7, 48(%rdi)
# ifdef USE_AS_STPCPY # ifdef USE_AS_STPCPY
lea 64(%rdi), %rax lea 64(%rdi), %rax
# endif
# ifdef USE_AS_STRCAT
xor %ch, %ch
movb %ch, 64(%rdi)
# endif # endif
ret ret
@ -1585,8 +1739,11 @@ L(Unaligned64LeaveCase2):
add $48, %r8 add $48, %r8
jle L(CopyFrom1To16BytesCase2OrCase3) jle L(CopyFrom1To16BytesCase2OrCase3)
test %rdx, %rdx test %rdx, %rdx
# ifndef USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm4) jnz L(CopyFrom1To16BytesUnalignedXmm4)
# else
jnz L(CopyFrom1To16Bytes)
# endif
pcmpeqb %xmm5, %xmm0 pcmpeqb %xmm5, %xmm0
pmovmskb %xmm0, %rdx pmovmskb %xmm0, %rdx
movdqu %xmm4, (%rdi) movdqu %xmm4, (%rdi)
@ -1594,7 +1751,11 @@ L(Unaligned64LeaveCase2):
sub $16, %r8 sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3) jbe L(CopyFrom1To16BytesCase2OrCase3)
test %rdx, %rdx test %rdx, %rdx
# ifndef USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm5) jnz L(CopyFrom1To16BytesUnalignedXmm5)
# else
jnz L(CopyFrom1To16Bytes)
# endif
pcmpeqb %xmm6, %xmm0 pcmpeqb %xmm6, %xmm0
pmovmskb %xmm0, %rdx pmovmskb %xmm0, %rdx
@ -1603,7 +1764,11 @@ L(Unaligned64LeaveCase2):
sub $16, %r8 sub $16, %r8
jbe L(CopyFrom1To16BytesCase2OrCase3) jbe L(CopyFrom1To16BytesCase2OrCase3)
test %rdx, %rdx test %rdx, %rdx
# ifndef USE_AS_STRCAT
jnz L(CopyFrom1To16BytesUnalignedXmm6) jnz L(CopyFrom1To16BytesUnalignedXmm6)
# else
jnz L(CopyFrom1To16Bytes)
# endif
pcmpeqb %xmm7, %xmm0 pcmpeqb %xmm7, %xmm0
pmovmskb %xmm0, %rdx pmovmskb %xmm0, %rdx
@ -1617,13 +1782,18 @@ L(Unaligned64LeaveCase2):
.p2align 4 .p2align 4
L(ExitZero): L(ExitZero):
# ifndef USE_AS_STRCAT
mov %rdi, %rax mov %rdi, %rax
# endif
ret ret
# endif # endif
# ifndef USE_AS_STRCAT
END (STRCPY) END (STRCPY)
# else
END (STRCAT)
# endif
.p2align 4 .p2align 4
.section .rodata .section .rodata
L(ExitTable): L(ExitTable):
@ -1695,6 +1865,7 @@ L(ExitStrncpyTable):
.int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable))
.int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable))
# ifndef USE_AS_STRCAT
.p2align 4 .p2align 4
L(FillTable): L(FillTable):
.int JMPTBL(L(Fill0), L(FillTable)) .int JMPTBL(L(Fill0), L(FillTable))
@ -1715,4 +1886,6 @@ L(FillTable):
.int JMPTBL(L(Fill15), L(FillTable)) .int JMPTBL(L(Fill15), L(FillTable))
.int JMPTBL(L(Fill16), L(FillTable)) .int JMPTBL(L(Fill16), L(FillTable))
# endif # endif
# endif
#endif #endif

View File

@ -20,6 +20,7 @@
#ifndef NOT_IN_libc #ifndef NOT_IN_libc
# ifndef USE_AS_STRCAT
# include <sysdep.h> # include <sysdep.h>
# ifndef STRCPY # ifndef STRCPY
@ -79,6 +80,7 @@ ENTRY (STRCPY)
# endif # endif
cmpb $0, 15(%rcx) cmpb $0, 15(%rcx)
jz L(Exit16) jz L(Exit16)
# endif
# ifdef USE_AS_STRNCPY # ifdef USE_AS_STRNCPY
mov %rcx, %rsi mov %rcx, %rsi
@ -2180,7 +2182,7 @@ L(Shl15LoopExit):
jmp L(CopyFrom1To16Bytes) jmp L(CopyFrom1To16Bytes)
# endif # endif
# ifndef USE_AS_STRCAT
.p2align 4 .p2align 4
L(CopyFrom1To16Bytes): L(CopyFrom1To16Bytes):
# ifdef USE_AS_STRNCPY # ifdef USE_AS_STRNCPY
@ -2954,6 +2956,8 @@ L(StrncpyExit8Bytes):
# endif # endif
# endif
# ifdef USE_AS_STRNCPY # ifdef USE_AS_STRNCPY
L(StrncpyLeaveCase2OrCase3): L(StrncpyLeaveCase2OrCase3):
@ -3715,7 +3719,7 @@ L(StrncpyExit15):
lea 1(%rsi), %rsi lea 1(%rsi), %rsi
jmp L(CopyFrom1To16BytesCase3) jmp L(CopyFrom1To16BytesCase3)
# endif # endif
# ifndef USE_AS_STRCAT
END (STRCPY) END (STRCPY)
# endif
#endif #endif

View File

@ -1,5 +1,5 @@
/* strlen without BSF /* strlen SSE2 without bsf
Copyright (C) 2010 Free Software Foundation, Inc. Copyright (C) 2010, 2011 Free Software Foundation, Inc.
Contributed by Intel Corporation. Contributed by Intel Corporation.
This file is part of the GNU C Library. This file is part of the GNU C Library.
@ -18,12 +18,17 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */ 02111-1307 USA. */
#if defined SHARED && !defined NOT_IN_libc #if (defined SHARED || defined USE_AS_STRCAT) && !defined NOT_IN_libc
#include <sysdep.h> # ifndef USE_AS_STRCAT
.section .text.slow,"ax",@progbits # include <sysdep.h>
# define RETURN ret
.section .text.sse2,"ax",@progbits
ENTRY (__strlen_no_bsf) ENTRY (__strlen_no_bsf)
# endif
xor %eax, %eax xor %eax, %eax
cmpb $0, (%rdi) cmpb $0, (%rdi)
jz L(exit_tail0) jz L(exit_tail0)
@ -165,39 +170,37 @@ ENTRY (__strlen_no_bsf)
jnz L(exit) jnz L(exit)
and $-0x40, %rax and $-0x40, %rax
xor %r8d, %r8d
L(aligned_64): L(aligned_64):
pcmpeqb (%rax), %xmm0 pcmpeqb (%rax), %xmm0
pcmpeqb 16(%rax), %xmm1 pcmpeqb 16(%rax), %xmm1
pcmpeqb 32(%rax), %xmm2 pcmpeqb 32(%rax), %xmm2
pcmpeqb 48(%rax), %xmm3 pcmpeqb 48(%rax), %xmm3
pmovmskb %xmm0, %edx pmovmskb %xmm0, %edx
pmovmskb %xmm1, %esi pmovmskb %xmm1, %r11d
pmovmskb %xmm2, %edi pmovmskb %xmm2, %r10d
pmovmskb %xmm3, %r9d pmovmskb %xmm3, %r9d
or %edx, %r8d or %edx, %r9d
or %esi, %r8d or %r11d, %r9d
or %edi, %r8d or %r10d, %r9d
or %r9d, %r8d
lea 64(%rax), %rax lea 64(%rax), %rax
jz L(aligned_64) jz L(aligned_64)
test %edx, %edx test %edx, %edx
jnz L(aligned_64_exit_16) jnz L(aligned_64_exit_16)
test %esi, %esi test %r11d, %r11d
jnz L(aligned_64_exit_32) jnz L(aligned_64_exit_32)
test %edi, %edi test %r10d, %r10d
jnz L(aligned_64_exit_48) jnz L(aligned_64_exit_48)
L(aligned_64_exit_64): L(aligned_64_exit_64):
mov %r9d, %edx pmovmskb %xmm3, %edx
jmp L(aligned_64_exit) jmp L(aligned_64_exit)
L(aligned_64_exit_48): L(aligned_64_exit_48):
lea -16(%rax), %rax lea -16(%rax), %rax
mov %edi, %edx mov %r10d, %edx
jmp L(aligned_64_exit) jmp L(aligned_64_exit)
L(aligned_64_exit_32): L(aligned_64_exit_32):
lea -32(%rax), %rax lea -32(%rax), %rax
mov %esi, %edx mov %r11d, %edx
jmp L(aligned_64_exit) jmp L(aligned_64_exit)
L(aligned_64_exit_16): L(aligned_64_exit_16):
lea -48(%rax), %rax lea -48(%rax), %rax
@ -228,7 +231,7 @@ L(exit):
jnz L(exit_tail6) jnz L(exit_tail6)
add $7, %eax add $7, %eax
L(exit_tail0): L(exit_tail0):
ret RETURN
L(exit_high): L(exit_high):
add $8, %eax add $8, %eax
@ -253,57 +256,58 @@ L(exit_high):
test $0x40, %dh test $0x40, %dh
jnz L(exit_tail6) jnz L(exit_tail6)
add $7, %eax add $7, %eax
ret RETURN
.p2align 4 .p2align 4
L(exit_tail1): L(exit_tail1):
add $1, %eax add $1, %eax
ret RETURN
L(exit_tail2): L(exit_tail2):
add $2, %eax add $2, %eax
ret RETURN
L(exit_tail3): L(exit_tail3):
add $3, %eax add $3, %eax
ret RETURN
L(exit_tail4): L(exit_tail4):
add $4, %eax add $4, %eax
ret RETURN
L(exit_tail5): L(exit_tail5):
add $5, %eax add $5, %eax
ret RETURN
L(exit_tail6): L(exit_tail6):
add $6, %eax add $6, %eax
ret RETURN
L(exit_tail7): L(exit_tail7):
add $7, %eax add $7, %eax
ret RETURN
L(exit_tail8): L(exit_tail8):
add $8, %eax add $8, %eax
ret RETURN
L(exit_tail9): L(exit_tail9):
add $9, %eax add $9, %eax
ret RETURN
L(exit_tail10): L(exit_tail10):
add $10, %eax add $10, %eax
ret RETURN
L(exit_tail11): L(exit_tail11):
add $11, %eax add $11, %eax
ret RETURN
L(exit_tail12): L(exit_tail12):
add $12, %eax add $12, %eax
ret RETURN
L(exit_tail13): L(exit_tail13):
add $13, %eax add $13, %eax
ret RETURN
L(exit_tail14): L(exit_tail14):
add $14, %eax add $14, %eax
ret RETURN
L(exit_tail15): L(exit_tail15):
add $15, %eax add $15, %eax
ret # ifndef USE_AS_STRCAT
RETURN
END (__strlen_no_bsf) END (__strlen_no_bsf)
# endif
#endif #endif

View File

@ -0,0 +1,260 @@
/* strlen SSE2
Copyright (C) 2011 Free Software Foundation, Inc.
Contributed by Intel Corporation.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, write to the Free
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
#if !defined NOT_IN_libc && (defined SHARED || defined USE_AS_STRCAT)
# ifndef USE_AS_STRCAT
# include <sysdep.h>
# define RETURN ret
.section .text.sse2,"ax",@progbits
ENTRY (__strlen_sse2_pminub)
# endif
xor %rax, %rax
mov %edi, %ecx
and $0x3f, %ecx
pxor %xmm0, %xmm0
cmp $0x30, %ecx
ja L(next)
movdqu (%rdi), %xmm1
pcmpeqb %xmm1, %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit_less16)
mov %rdi, %rax
and $-16, %rax
jmp L(align16_start)
L(next):
mov %rdi, %rax
and $-16, %rax
pcmpeqb (%rax), %xmm0
mov $-1, %r10d
sub %rax, %rcx
shl %cl, %r10d
pmovmskb %xmm0, %edx
and %r10d, %edx
jnz L(exit)
L(align16_start):
pxor %xmm0, %xmm0
pxor %xmm1, %xmm1
pxor %xmm2, %xmm2
pxor %xmm3, %xmm3
pcmpeqb 16(%rax), %xmm0
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
jnz L(exit64)
pcmpeqb 80(%rax), %xmm0
add $64, %rax
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
jnz L(exit64)
pcmpeqb 80(%rax), %xmm0
add $64, %rax
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
jnz L(exit64)
pcmpeqb 80(%rax), %xmm0
add $64, %rax
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
test %edx, %edx
jnz L(exit64)
test $0x3f, %rax
jz L(align64_loop)
pcmpeqb 80(%rax), %xmm0
add $80, %rax
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit)
test $0x3f, %rax
jz L(align64_loop)
pcmpeqb 16(%rax), %xmm1
add $16, %rax
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit)
test $0x3f, %rax
jz L(align64_loop)
pcmpeqb 16(%rax), %xmm2
add $16, %rax
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit)
test $0x3f, %rax
jz L(align64_loop)
pcmpeqb 16(%rax), %xmm3
add $16, %rax
pmovmskb %xmm3, %edx
test %edx, %edx
jnz L(exit)
add $16, %rax
.p2align 4
L(align64_loop):
movaps (%rax), %xmm4
pminub 16(%rax), %xmm4
movaps 32(%rax), %xmm5
pminub 48(%rax), %xmm5
add $64, %rax
pminub %xmm4, %xmm5
pcmpeqb %xmm0, %xmm5
pmovmskb %xmm5, %edx
test %edx, %edx
jz L(align64_loop)
pcmpeqb -64(%rax), %xmm0
sub $80, %rax
pmovmskb %xmm0, %edx
test %edx, %edx
jnz L(exit16)
pcmpeqb 32(%rax), %xmm1
pmovmskb %xmm1, %edx
test %edx, %edx
jnz L(exit32)
pcmpeqb 48(%rax), %xmm2
pmovmskb %xmm2, %edx
test %edx, %edx
jnz L(exit48)
pcmpeqb 64(%rax), %xmm3
pmovmskb %xmm3, %edx
sub %rdi, %rax
bsf %rdx, %rdx
add %rdx, %rax
add $64, %rax
RETURN
.p2align 4
L(exit):
sub %rdi, %rax
L(exit_less16):
bsf %rdx, %rdx
add %rdx, %rax
RETURN
.p2align 4
L(exit16):
sub %rdi, %rax
bsf %rdx, %rdx
add %rdx, %rax
add $16, %rax
RETURN
.p2align 4
L(exit32):
sub %rdi, %rax
bsf %rdx, %rdx
add %rdx, %rax
add $32, %rax
RETURN
.p2align 4
L(exit48):
sub %rdi, %rax
bsf %rdx, %rdx
add %rdx, %rax
add $48, %rax
RETURN
.p2align 4
L(exit64):
sub %rdi, %rax
bsf %rdx, %rdx
add %rdx, %rax
add $64, %rax
# ifndef USE_AS_STRCAT
RETURN
END (__strlen_sse2_pminub)
# endif
#endif

View File

@ -32,7 +32,10 @@ ENTRY(strlen)
cmpl $0, __cpu_features+KIND_OFFSET(%rip) cmpl $0, __cpu_features+KIND_OFFSET(%rip)
jne 1f jne 1f
call __init_cpu_features call __init_cpu_features
1: leaq __strlen_sse2(%rip), %rax 1: leaq __strlen_sse2_pminub(%rip), %rax
testl $bit_Prefer_PMINUB_for_stringop, __cpu_features+FEATURE_OFFSET+index_Prefer_PMINUB_for_stringop(%rip)
jnz 2f
leaq __strlen_sse2(%rip), %rax
testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip) testl $bit_SSE4_2, __cpu_features+CPUID_OFFSET+index_SSE4_2(%rip)
jz 2f jz 2f
leaq __strlen_sse42(%rip), %rax leaq __strlen_sse42(%rip), %rax

View File

@ -0,0 +1,8 @@
#define STRNCAT __strncat_sse2
#ifdef SHARED
#undef libc_hidden_def
#define libc_hidden_def(name) \
__hidden_ver1 (__strncat_sse2, __GI___strncat, __strncat_sse2);
#endif
#include "string/strncat.c"

View File

@ -0,0 +1,3 @@
#define USE_AS_STRNCAT
#define STRCAT __strncat_sse2_unaligned
#include "strcat-sse2-unaligned.S"

View File

@ -0,0 +1,3 @@
#define USE_AS_STRNCAT
#define STRCAT __strncat_ssse3
#include "strcat-ssse3.S"

View File

@ -0,0 +1,3 @@
#define STRCAT strncat
#define USE_AS_STRNCAT
#include "strcat.S"