mirror of
https://sourceware.org/git/glibc.git
synced 2025-10-23 02:12:24 +03:00
Add strstr with unaligned loads. Fixes bug 12100.
A sse42 version of strstr used pcmpistr instruction which is quite ineffective. A faster way is look for pairs of characters which is uses sse2, is faster than pcmpistr and for real strings a pairs we look for are relatively rare. For linear time complexity we use buy or rent technique which switches to two-way algorithm when superlinear behaviour is detected.
This commit is contained in:
14
ChangeLog
14
ChangeLog
@@ -1,3 +1,17 @@
|
||||
2013-12-14 Ondřej Bílka <neleai@seznam.cz>
|
||||
|
||||
[BZ #12100]
|
||||
* sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S: New file
|
||||
* sysdeps/x86_64/multiarch/strstr-c.c: Moved to ...
|
||||
* sysdeps/x86_64/multiarch/strstr.c: ... here.
|
||||
(strstr): Add __strstr_sse2_unaligned ifunc.
|
||||
* sysdeps/x86_64/multiarch/strcasestr-c.c: Moved to ...
|
||||
* sysdeps/x86_64/multiarch/strcasestr.c ... here.
|
||||
(strcasestr): Remove __strcasestr_sse42 ifunc.
|
||||
* sysdeps/x86_64/multiarch/strcasestr-nonascii.c: Remove.
|
||||
* sysdeps/x86_64/multiarch/ifunc-impl-list.c: Update.
|
||||
* sysdeps/x86_64/multiarch/Makefile (sysdep_routines): Update.
|
||||
|
||||
2013-12-14 Kaz Kojima <kkojima@rr.iij4u.or.jp>
|
||||
|
||||
* sysdeps/sh/sh4/fpu/bits/fenv.h: Move to ...
|
||||
|
24
NEWS
24
NEWS
@@ -10,18 +10,18 @@ Version 2.19
|
||||
* The following bugs are resolved with this release:
|
||||
|
||||
156, 387, 431, 832, 926, 2801, 4772, 6786, 6787, 6807, 6810, 7003, 9954,
|
||||
10253, 10278, 11087, 11157, 11214, 12486, 13028, 13982, 13985, 14029,
|
||||
14032, 14143, 14155, 14547, 14699, 14752, 14876, 14910, 15004, 15048,
|
||||
15089, 15218, 15268, 15277, 15308, 15362, 15374, 15400, 15425, 15427,
|
||||
15483, 15522, 15531, 15532, 15593, 15601, 15608, 15609, 15610, 15632,
|
||||
15640, 15670, 15672, 15680, 15681, 15723, 15734, 15735, 15736, 15748,
|
||||
15749, 15754, 15760, 15763, 15764, 15797, 15799, 15825, 15843, 15844,
|
||||
15847, 15849, 15855, 15856, 15857, 15859, 15867, 15886, 15887, 15890,
|
||||
15892, 15893, 15895, 15897, 15901, 15905, 15909, 15915, 15917, 15919,
|
||||
15921, 15923, 15939, 15941, 15948, 15963, 15966, 15985, 15988, 15997,
|
||||
16032, 16034, 16036, 16037, 16038, 16041, 16055, 16071, 16072, 16074,
|
||||
16077, 16078, 16103, 16112, 16143, 16144, 16146, 16150, 16151, 16153,
|
||||
16167, 16172, 16195, 16214, 16245, 16271, 16274, 16283, 16289.
|
||||
10253, 10278, 11087, 11157, 11214, 12100, 12486, 13028, 13982, 13985,
|
||||
14029, 14032, 14143, 14155, 14547, 14699, 14752, 14876, 14910, 15004,
|
||||
15048, 15089, 15218, 15268, 15277, 15308, 15362, 15374, 15400, 15425,
|
||||
15427, 15483, 15522, 15531, 15532, 15593, 15601, 15608, 15609, 15610,
|
||||
15632, 15640, 15670, 15672, 15680, 15681, 15723, 15734, 15735, 15736,
|
||||
15748, 15749, 15754, 15760, 15763, 15764, 15797, 15799, 15825, 15843,
|
||||
15844, 15847, 15849, 15855, 15856, 15857, 15859, 15867, 15886, 15887,
|
||||
15890, 15892, 15893, 15895, 15897, 15901, 15905, 15909, 15915, 15917,
|
||||
15919, 15921, 15923, 15939, 15941, 15948, 15963, 15966, 15985, 15988,
|
||||
15997, 16032, 16034, 16036, 16037, 16038, 16041, 16055, 16071, 16072,
|
||||
16074, 16077, 16078, 16103, 16112, 16143, 16144, 16146, 16150, 16151,
|
||||
16153, 16167, 16172, 16195, 16214, 16245, 16271, 16274, 16283, 16289.
|
||||
|
||||
* The public headers no longer use __unused nor __block. This change is to
|
||||
support compiling programs that are derived from BSD sources and use
|
||||
|
@@ -11,22 +11,19 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c strcmp-ssse3 \
|
||||
memcmp-sse4 memcpy-ssse3 \
|
||||
memcpy-sse2-unaligned mempcpy-ssse3 \
|
||||
memmove-ssse3 memcpy-ssse3-back mempcpy-ssse3-back \
|
||||
memmove-ssse3-back strcasestr-nonascii strcasecmp_l-ssse3 \
|
||||
memmove-ssse3-back strcasecmp_l-ssse3 \
|
||||
strncase_l-ssse3 strcat-ssse3 strncat-ssse3\
|
||||
strcpy-ssse3 strncpy-ssse3 stpcpy-ssse3 stpncpy-ssse3 \
|
||||
strcpy-sse2-unaligned strncpy-sse2-unaligned \
|
||||
stpcpy-sse2-unaligned stpncpy-sse2-unaligned \
|
||||
strcat-sse2-unaligned strncat-sse2-unaligned \
|
||||
strchr-sse2-no-bsf memcmp-ssse3
|
||||
strchr-sse2-no-bsf memcmp-ssse3 strstr-sse2-unaligned
|
||||
ifeq (yes,$(config-cflags-sse4))
|
||||
sysdep_routines += strcspn-c strpbrk-c strspn-c strstr-c strcasestr-c varshift
|
||||
sysdep_routines += strcspn-c strpbrk-c strspn-c varshift
|
||||
CFLAGS-varshift.c += -msse4
|
||||
CFLAGS-strcspn-c.c += -msse4
|
||||
CFLAGS-strpbrk-c.c += -msse4
|
||||
CFLAGS-strspn-c.c += -msse4
|
||||
CFLAGS-strstr.c += -msse4
|
||||
CFLAGS-strcasestr.c += -msse4
|
||||
CFLAGS-strcasestr-nonascii.c += -msse4
|
||||
endif
|
||||
endif
|
||||
|
||||
|
@@ -98,8 +98,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/strcasestr.c. */
|
||||
IFUNC_IMPL (i, name, strcasestr,
|
||||
IFUNC_IMPL_ADD (array, i, strcasestr, HAS_SSE4_2,
|
||||
__strcasestr_sse42)
|
||||
IFUNC_IMPL_ADD (array, i, strcasestr, 1, __strcasestr_sse2))
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/strcat.S. */
|
||||
@@ -184,7 +182,7 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/strstr-c.c. */
|
||||
IFUNC_IMPL (i, name, strstr,
|
||||
IFUNC_IMPL_ADD (array, i, strstr, HAS_SSE4_2, __strstr_sse42)
|
||||
IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2_unaligned)
|
||||
IFUNC_IMPL_ADD (array, i, strstr, 1, __strstr_sse2))
|
||||
|
||||
/* Support sysdeps/x86_64/multiarch/wcscpy.S. */
|
||||
|
@@ -1,19 +0,0 @@
|
||||
/* Multiple versions of strcasestr
|
||||
All versions must be listed in ifunc-impl-list.c. */
|
||||
|
||||
#include "init-arch.h"
|
||||
|
||||
#define STRCASESTR __strcasestr_sse2
|
||||
|
||||
#include "string/strcasestr.c"
|
||||
|
||||
extern char *__strcasestr_sse42 (const char *, const char *) attribute_hidden;
|
||||
extern __typeof (__strcasestr_sse2) __strcasestr_sse2 attribute_hidden;
|
||||
|
||||
#if 1
|
||||
libc_ifunc (__strcasestr,
|
||||
HAS_SSE4_2 ? __strcasestr_sse42 : __strcasestr_sse2);
|
||||
#else
|
||||
libc_ifunc (__strcasestr,
|
||||
0 ? __strcasestr_sse42 : __strcasestr_sse2);
|
||||
#endif
|
@@ -1,50 +0,0 @@
|
||||
/* strstr with SSE4.2 intrinsics
|
||||
Copyright (C) 2010-2013 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <ctype.h>
|
||||
#include <xmmintrin.h>
|
||||
|
||||
|
||||
/* Similar to __m128i_strloadu. Convert to lower case for none-POSIX/C
|
||||
locale. */
|
||||
static __m128i
|
||||
__m128i_strloadu_tolower (const unsigned char *p)
|
||||
{
|
||||
union
|
||||
{
|
||||
char b[16];
|
||||
__m128i x;
|
||||
} u;
|
||||
|
||||
for (int i = 0; i < 16; ++i)
|
||||
if (p[i] == 0)
|
||||
{
|
||||
u.b[i] = 0;
|
||||
break;
|
||||
}
|
||||
else
|
||||
u.b[i] = tolower (p[i]);
|
||||
|
||||
return u.x;
|
||||
}
|
||||
|
||||
|
||||
#define STRCASESTR_NONASCII
|
||||
#define USE_AS_STRCASESTR
|
||||
#define STRSTR_SSE42 __strcasestr_sse42_nonascii
|
||||
#include "strstr.c"
|
@@ -1,7 +1,13 @@
|
||||
extern char *__strcasestr_sse42_nonascii (const unsigned char *s1,
|
||||
const unsigned char *s2)
|
||||
attribute_hidden;
|
||||
/* Multiple versions of strcasestr
|
||||
All versions must be listed in ifunc-impl-list.c. */
|
||||
|
||||
#define USE_AS_STRCASESTR
|
||||
#define STRSTR_SSE42 __strcasestr_sse42
|
||||
#include "strstr.c"
|
||||
#include "init-arch.h"
|
||||
|
||||
#define STRCASESTR __strcasestr_sse2
|
||||
|
||||
#include "string/strcasestr.c"
|
||||
|
||||
extern __typeof (__strcasestr_sse2) __strcasestr_sse2 attribute_hidden;
|
||||
|
||||
libc_ifunc (__strcasestr,
|
||||
__strcasestr_sse2);
|
||||
|
@@ -1,47 +0,0 @@
|
||||
/* Multiple versions of strstr.
|
||||
All versions must be listed in ifunc-impl-list.c.
|
||||
Copyright (C) 2012-2013 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
/* Redefine strstr so that the compiler won't complain about the type
|
||||
mismatch with the IFUNC selector in strong_alias, below. */
|
||||
#undef strstr
|
||||
#define strstr __redirect_strstr
|
||||
#include <string.h>
|
||||
#undef strstr
|
||||
|
||||
#define STRSTR __strstr_sse2
|
||||
#ifdef SHARED
|
||||
# undef libc_hidden_builtin_def
|
||||
# define libc_hidden_builtin_def(name) \
|
||||
__hidden_ver1 (__strstr_sse2, __GI_strstr, __strstr_sse2);
|
||||
#endif
|
||||
|
||||
#include "string/strstr.c"
|
||||
|
||||
extern __typeof (__redirect_strstr) __strstr_sse42 attribute_hidden;
|
||||
extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden;
|
||||
|
||||
#include "init-arch.h"
|
||||
|
||||
/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
|
||||
ifunc symbol properly. */
|
||||
extern __typeof (__redirect_strstr) __libc_strstr;
|
||||
libc_ifunc (__libc_strstr, HAS_SSE4_2 ? __strstr_sse42 : __strstr_sse2)
|
||||
|
||||
#undef strstr
|
||||
strong_alias (__libc_strstr, strstr)
|
374
sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
Normal file
374
sysdeps/x86_64/multiarch/strstr-sse2-unaligned.S
Normal file
@@ -0,0 +1,374 @@
|
||||
/* strstr with unaligned loads
|
||||
Copyright (C) 2009-2013 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
The GNU C Library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <sysdep.h>
|
||||
|
||||
ENTRY(__strstr_sse2_unaligned)
|
||||
movzbl (%rsi), %eax
|
||||
testb %al, %al
|
||||
je L(empty)
|
||||
movzbl 1(%rsi), %edx
|
||||
testb %dl, %dl
|
||||
je L(strchr)
|
||||
movd %eax, %xmm1
|
||||
movd %edx, %xmm2
|
||||
movq %rdi, %rax
|
||||
andl $4095, %eax
|
||||
punpcklbw %xmm1, %xmm1
|
||||
cmpq $4031, %rax
|
||||
punpcklbw %xmm2, %xmm2
|
||||
punpcklwd %xmm1, %xmm1
|
||||
punpcklwd %xmm2, %xmm2
|
||||
pshufd $0, %xmm1, %xmm1
|
||||
pshufd $0, %xmm2, %xmm2
|
||||
ja L(cross_page)
|
||||
movdqu (%rdi), %xmm3
|
||||
pxor %xmm5, %xmm5
|
||||
movdqu 1(%rdi), %xmm4
|
||||
movdqa %xmm3, %xmm6
|
||||
pcmpeqb %xmm1, %xmm3
|
||||
pcmpeqb %xmm2, %xmm4
|
||||
movdqu 16(%rdi), %xmm0
|
||||
pcmpeqb %xmm5, %xmm6
|
||||
pminub %xmm4, %xmm3
|
||||
movdqa %xmm3, %xmm4
|
||||
movdqu 17(%rdi), %xmm3
|
||||
pcmpeqb %xmm0, %xmm5
|
||||
pcmpeqb %xmm2, %xmm3
|
||||
por %xmm6, %xmm4
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pminub %xmm3, %xmm0
|
||||
por %xmm5, %xmm0
|
||||
pmovmskb %xmm4, %r8d
|
||||
pmovmskb %xmm0, %eax
|
||||
salq $16, %rax
|
||||
orq %rax, %r8
|
||||
je L(next_32_bytes)
|
||||
L(next_pair_index):
|
||||
bsf %r8, %rax
|
||||
addq %rdi, %rax
|
||||
cmpb $0, (%rax)
|
||||
je L(zero1)
|
||||
movzbl 2(%rsi), %edx
|
||||
testb %dl, %dl
|
||||
je L(found1)
|
||||
cmpb 2(%rax), %dl
|
||||
jne L(next_pair)
|
||||
xorl %edx, %edx
|
||||
jmp L(pair_loop_start)
|
||||
|
||||
.p2align 4
|
||||
L(strchr):
|
||||
movzbl %al, %esi
|
||||
jmp __strchr_sse2
|
||||
|
||||
.p2align 4
|
||||
L(pair_loop):
|
||||
addq $1, %rdx
|
||||
cmpb 2(%rax,%rdx), %cl
|
||||
jne L(next_pair)
|
||||
L(pair_loop_start):
|
||||
movzbl 3(%rsi,%rdx), %ecx
|
||||
testb %cl, %cl
|
||||
jne L(pair_loop)
|
||||
L(found1):
|
||||
ret
|
||||
L(zero1):
|
||||
xorl %eax, %eax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(next_pair):
|
||||
leaq -1(%r8), %rax
|
||||
andq %rax, %r8
|
||||
jne L(next_pair_index)
|
||||
|
||||
.p2align 4
|
||||
L(next_32_bytes):
|
||||
movdqu 32(%rdi), %xmm3
|
||||
pxor %xmm5, %xmm5
|
||||
movdqu 33(%rdi), %xmm4
|
||||
movdqa %xmm3, %xmm6
|
||||
pcmpeqb %xmm1, %xmm3
|
||||
pcmpeqb %xmm2, %xmm4
|
||||
movdqu 48(%rdi), %xmm0
|
||||
pcmpeqb %xmm5, %xmm6
|
||||
pminub %xmm4, %xmm3
|
||||
movdqa %xmm3, %xmm4
|
||||
movdqu 49(%rdi), %xmm3
|
||||
pcmpeqb %xmm0, %xmm5
|
||||
pcmpeqb %xmm2, %xmm3
|
||||
por %xmm6, %xmm4
|
||||
pcmpeqb %xmm1, %xmm0
|
||||
pminub %xmm3, %xmm0
|
||||
por %xmm5, %xmm0
|
||||
pmovmskb %xmm4, %eax
|
||||
salq $32, %rax
|
||||
pmovmskb %xmm0, %r8d
|
||||
salq $48, %r8
|
||||
orq %rax, %r8
|
||||
je L(loop_header)
|
||||
L(next_pair2_index):
|
||||
bsfq %r8, %rax
|
||||
addq %rdi, %rax
|
||||
cmpb $0, (%rax)
|
||||
je L(zero2)
|
||||
movzbl 2(%rsi), %edx
|
||||
testb %dl, %dl
|
||||
je L(found2)
|
||||
cmpb 2(%rax), %dl
|
||||
jne L(next_pair2)
|
||||
xorl %edx, %edx
|
||||
jmp L(pair_loop2_start)
|
||||
|
||||
.p2align 4
|
||||
L(pair_loop2):
|
||||
addq $1, %rdx
|
||||
cmpb 2(%rax,%rdx), %cl
|
||||
jne L(next_pair2)
|
||||
L(pair_loop2_start):
|
||||
movzbl 3(%rsi,%rdx), %ecx
|
||||
testb %cl, %cl
|
||||
jne L(pair_loop2)
|
||||
L(found2):
|
||||
ret
|
||||
L(zero2):
|
||||
xorl %eax, %eax
|
||||
ret
|
||||
L(empty):
|
||||
mov %rdi, %rax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(next_pair2):
|
||||
leaq -1(%r8), %rax
|
||||
andq %rax, %r8
|
||||
jne L(next_pair2_index)
|
||||
L(loop_header):
|
||||
movq $-512, %r11
|
||||
movq %rdi, %r9
|
||||
|
||||
pxor %xmm7, %xmm7
|
||||
andq $-64, %rdi
|
||||
|
||||
.p2align 4
|
||||
L(loop):
|
||||
movdqa 64(%rdi), %xmm3
|
||||
movdqu 63(%rdi), %xmm6
|
||||
movdqa %xmm3, %xmm0
|
||||
pxor %xmm2, %xmm3
|
||||
pxor %xmm1, %xmm6
|
||||
movdqa 80(%rdi), %xmm10
|
||||
por %xmm3, %xmm6
|
||||
pminub %xmm10, %xmm0
|
||||
movdqu 79(%rdi), %xmm3
|
||||
pxor %xmm2, %xmm10
|
||||
pxor %xmm1, %xmm3
|
||||
movdqa 96(%rdi), %xmm9
|
||||
por %xmm10, %xmm3
|
||||
pminub %xmm9, %xmm0
|
||||
pxor %xmm2, %xmm9
|
||||
movdqa 112(%rdi), %xmm8
|
||||
addq $64, %rdi
|
||||
pminub %xmm6, %xmm3
|
||||
movdqu 31(%rdi), %xmm4
|
||||
pminub %xmm8, %xmm0
|
||||
pxor %xmm2, %xmm8
|
||||
pxor %xmm1, %xmm4
|
||||
por %xmm9, %xmm4
|
||||
pminub %xmm4, %xmm3
|
||||
movdqu 47(%rdi), %xmm5
|
||||
pxor %xmm1, %xmm5
|
||||
por %xmm8, %xmm5
|
||||
pminub %xmm5, %xmm3
|
||||
pminub %xmm3, %xmm0
|
||||
pcmpeqb %xmm7, %xmm0
|
||||
pmovmskb %xmm0, %eax
|
||||
testl %eax, %eax
|
||||
je L(loop)
|
||||
pminub (%rdi), %xmm6
|
||||
pminub 32(%rdi),%xmm4
|
||||
pminub 48(%rdi),%xmm5
|
||||
pcmpeqb %xmm7, %xmm6
|
||||
pcmpeqb %xmm7, %xmm5
|
||||
pmovmskb %xmm6, %edx
|
||||
movdqa 16(%rdi), %xmm8
|
||||
pcmpeqb %xmm7, %xmm4
|
||||
movdqu 15(%rdi), %xmm0
|
||||
pmovmskb %xmm5, %r8d
|
||||
movdqa %xmm8, %xmm3
|
||||
pmovmskb %xmm4, %ecx
|
||||
pcmpeqb %xmm1,%xmm0
|
||||
pcmpeqb %xmm2,%xmm3
|
||||
salq $32, %rcx
|
||||
pcmpeqb %xmm7,%xmm8
|
||||
salq $48, %r8
|
||||
pminub %xmm0,%xmm3
|
||||
orq %rcx, %rdx
|
||||
por %xmm3,%xmm8
|
||||
orq %rdx, %r8
|
||||
pmovmskb %xmm8, %eax
|
||||
salq $16, %rax
|
||||
orq %rax, %r8
|
||||
je L(loop)
|
||||
L(next_pair_index3):
|
||||
bsfq %r8, %rcx
|
||||
addq %rdi, %rcx
|
||||
cmpb $0, (%rcx)
|
||||
je L(zero)
|
||||
xorl %eax, %eax
|
||||
movzbl 2(%rsi), %edx
|
||||
testb %dl, %dl
|
||||
je L(success3)
|
||||
cmpb 1(%rcx), %dl
|
||||
jne L(next_pair3)
|
||||
jmp L(pair_loop_start3)
|
||||
|
||||
.p2align 4
|
||||
L(pair_loop3):
|
||||
addq $1, %rax
|
||||
cmpb 1(%rcx,%rax), %dl
|
||||
jne L(next_pair3)
|
||||
L(pair_loop_start3):
|
||||
movzbl 3(%rsi,%rax), %edx
|
||||
testb %dl, %dl
|
||||
jne L(pair_loop3)
|
||||
L(success3):
|
||||
lea -1(%rcx), %rax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(next_pair3):
|
||||
addq %rax, %r11
|
||||
movq %rdi, %rax
|
||||
subq %r9, %rax
|
||||
cmpq %r11, %rax
|
||||
jl L(switch_strstr)
|
||||
leaq -1(%r8), %rax
|
||||
andq %rax, %r8
|
||||
jne L(next_pair_index3)
|
||||
jmp L(loop)
|
||||
|
||||
.p2align 4
|
||||
L(switch_strstr):
|
||||
movq %rdi, %rdi
|
||||
jmp __strstr_sse2
|
||||
|
||||
.p2align 4
|
||||
L(cross_page):
|
||||
|
||||
movq %rdi, %rax
|
||||
pxor %xmm0, %xmm0
|
||||
andq $-64, %rax
|
||||
movdqa (%rax), %xmm3
|
||||
movdqu -1(%rax), %xmm4
|
||||
movdqa %xmm3, %xmm8
|
||||
movdqa 16(%rax), %xmm5
|
||||
pcmpeqb %xmm1, %xmm4
|
||||
pcmpeqb %xmm0, %xmm8
|
||||
pcmpeqb %xmm2, %xmm3
|
||||
movdqa %xmm5, %xmm7
|
||||
pminub %xmm4, %xmm3
|
||||
movdqu 15(%rax), %xmm4
|
||||
pcmpeqb %xmm0, %xmm7
|
||||
por %xmm3, %xmm8
|
||||
movdqa %xmm5, %xmm3
|
||||
movdqa 32(%rax), %xmm5
|
||||
pcmpeqb %xmm1, %xmm4
|
||||
pcmpeqb %xmm2, %xmm3
|
||||
movdqa %xmm5, %xmm6
|
||||
pmovmskb %xmm8, %ecx
|
||||
pminub %xmm4, %xmm3
|
||||
movdqu 31(%rax), %xmm4
|
||||
por %xmm3, %xmm7
|
||||
movdqa %xmm5, %xmm3
|
||||
pcmpeqb %xmm0, %xmm6
|
||||
movdqa 48(%rax), %xmm5
|
||||
pcmpeqb %xmm1, %xmm4
|
||||
pmovmskb %xmm7, %r8d
|
||||
pcmpeqb %xmm2, %xmm3
|
||||
pcmpeqb %xmm5, %xmm0
|
||||
pminub %xmm4, %xmm3
|
||||
movdqu 47(%rax), %xmm4
|
||||
por %xmm3, %xmm6
|
||||
movdqa %xmm5, %xmm3
|
||||
salq $16, %r8
|
||||
pcmpeqb %xmm1, %xmm4
|
||||
pcmpeqb %xmm2, %xmm3
|
||||
pmovmskb %xmm6, %r10d
|
||||
pminub %xmm4, %xmm3
|
||||
por %xmm3, %xmm0
|
||||
salq $32, %r10
|
||||
orq %r10, %r8
|
||||
orq %rcx, %r8
|
||||
movl %edi, %ecx
|
||||
pmovmskb %xmm0, %edx
|
||||
subl %eax, %ecx
|
||||
salq $48, %rdx
|
||||
orq %rdx, %r8
|
||||
shrq %cl, %r8
|
||||
je L(loop_header)
|
||||
L(next_pair_index4):
|
||||
bsfq %r8, %rax
|
||||
addq %rdi, %rax
|
||||
cmpb $0, (%rax)
|
||||
je L(zero)
|
||||
|
||||
cmpq %rax,%rdi
|
||||
je L(next_pair4)
|
||||
|
||||
movzbl 2(%rsi), %edx
|
||||
testb %dl, %dl
|
||||
je L(found3)
|
||||
cmpb 1(%rax), %dl
|
||||
jne L(next_pair4)
|
||||
xorl %edx, %edx
|
||||
jmp L(pair_loop_start4)
|
||||
|
||||
.p2align 4
|
||||
L(pair_loop4):
|
||||
addq $1, %rdx
|
||||
cmpb 1(%rax,%rdx), %cl
|
||||
jne L(next_pair4)
|
||||
L(pair_loop_start4):
|
||||
movzbl 3(%rsi,%rdx), %ecx
|
||||
testb %cl, %cl
|
||||
jne L(pair_loop4)
|
||||
L(found3):
|
||||
subq $1, %rax
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(next_pair4):
|
||||
leaq -1(%r8), %rax
|
||||
andq %rax, %r8
|
||||
jne L(next_pair_index4)
|
||||
jmp L(loop_header)
|
||||
|
||||
.p2align 4
|
||||
L(found):
|
||||
rep
|
||||
ret
|
||||
|
||||
.p2align 4
|
||||
L(zero):
|
||||
xorl %eax, %eax
|
||||
ret
|
||||
|
||||
|
||||
END(__strstr_sse2_unaligned)
|
@@ -1,6 +1,6 @@
|
||||
/* strstr with SSE4.2 intrinsics
|
||||
Copyright (C) 2009-2013 Free Software Foundation, Inc.
|
||||
Contributed by Intel Corporation.
|
||||
/* Multiple versions of strstr.
|
||||
All versions must be listed in ifunc-impl-list.c.
|
||||
Copyright (C) 2012-2013 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
@@ -17,369 +17,31 @@
|
||||
License along with the GNU C Library; if not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include <nmmintrin.h>
|
||||
#include "varshift.h"
|
||||
/* Redefine strstr so that the compiler won't complain about the type
|
||||
mismatch with the IFUNC selector in strong_alias, below. */
|
||||
#undef strstr
|
||||
#define strstr __redirect_strstr
|
||||
#include <string.h>
|
||||
#undef strstr
|
||||
|
||||
#ifndef STRSTR_SSE42
|
||||
# define STRSTR_SSE42 __strstr_sse42
|
||||
#define STRSTR __strstr_sse2
|
||||
#ifdef SHARED
|
||||
# undef libc_hidden_builtin_def
|
||||
# define libc_hidden_builtin_def(name) \
|
||||
__hidden_ver1 (__strstr_sse2, __GI_strstr, __strstr_sse2);
|
||||
#endif
|
||||
|
||||
#ifdef USE_AS_STRCASESTR
|
||||
# include <ctype.h>
|
||||
# include <locale/localeinfo.h>
|
||||
#include "string/strstr.c"
|
||||
|
||||
# define LOADBYTE(C) tolower (C)
|
||||
# define CMPBYTE(C1, C2) (tolower (C1) == tolower (C2))
|
||||
#else
|
||||
# define LOADBYTE(C) (C)
|
||||
# define CMPBYTE(C1, C2) ((C1) == (C2))
|
||||
#endif
|
||||
extern __typeof (__redirect_strstr) __strstr_sse2_unaligned attribute_hidden;
|
||||
extern __typeof (__redirect_strstr) __strstr_sse2 attribute_hidden;
|
||||
|
||||
/* We use 0xe ordered-compare:
|
||||
_SIDD_SBYTE_OPS
|
||||
| _SIDD_CMP_EQUAL_ORDER
|
||||
| _SIDD_LEAST_SIGNIFICANT
|
||||
on pcmpistri to do the scanning and string comparsion requirements of
|
||||
sub-string match. In the scanning phase, we process Cflag and ECX
|
||||
index to locate the first fragment match; once the first fragment
|
||||
match position has been identified, we do comparison of subsequent
|
||||
string fragments until we can conclude false or true match; whe
|
||||
n concluding a false match, we may need to repeat scanning process
|
||||
from next relevant offset in the target string.
|
||||
#include "init-arch.h"
|
||||
|
||||
In the scanning phase we have 4 cases:
|
||||
case ECX CFlag ZFlag SFlag
|
||||
1 16 0 0 0
|
||||
2a 16 0 0 1
|
||||
2b 16 0 1 0
|
||||
2c 16 0 1 1
|
||||
/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle
|
||||
ifunc symbol properly. */
|
||||
extern __typeof (__redirect_strstr) __libc_strstr;
|
||||
libc_ifunc (__libc_strstr, HAS_FAST_UNALIGNED_LOAD ? __strstr_sse2_unaligned : __strstr_sse2)
|
||||
|
||||
1. No ordered-comparison match, both 16B fragments are valid, so
|
||||
continue to next fragment.
|
||||
2. No ordered-comparison match, there is EOS in either fragment,
|
||||
2a. Zflg = 0, Sflg = 1, we continue
|
||||
2b. Zflg = 1, Sflg = 0, we conclude no match and return.
|
||||
2c. Zflg = 1, sflg = 1, lenth determine match or no match
|
||||
|
||||
In the string comparison phase, the 1st fragment match is fixed up
|
||||
to produce ECX = 0. Subsequent fragment compare of nonzero index
|
||||
and no match conclude a false match.
|
||||
|
||||
case ECX CFlag ZFlag SFlag
|
||||
3 X 1 0 0/1
|
||||
4a 0 1 0 0
|
||||
4b 0 1 0 1
|
||||
4c 0 < X 1 0 0/1
|
||||
5 16 0 1 0
|
||||
|
||||
3. An initial ordered-comparison fragment match, we fix up to do
|
||||
subsequent string comparison
|
||||
4a. Continuation of fragment comparison of a string compare.
|
||||
4b. EOS reached in the reference string, we conclude true match and
|
||||
return
|
||||
4c. String compare failed if index is nonzero, we need to go back to
|
||||
scanning
|
||||
5. failed string compare, go back to scanning
|
||||
*/
|
||||
|
||||
#if !(defined USE_AS_STRCASESTR && defined STRCASESTR_NONASCII)
|
||||
/* Simple replacement of movdqu to address 4KB boundary cross issue.
|
||||
If EOS occurs within less than 16B before 4KB boundary, we don't
|
||||
cross to next page. */
|
||||
static __m128i
|
||||
__m128i_strloadu (const unsigned char * p, __m128i zero)
|
||||
{
|
||||
if (__builtin_expect ((int) ((size_t) p & 0xfff) > 0xff0, 0))
|
||||
{
|
||||
size_t offset = ((size_t) p & (16 - 1));
|
||||
__m128i a = _mm_load_si128 ((__m128i *) (p - offset));
|
||||
int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (a, zero));
|
||||
if ((bmsk >> offset) != 0)
|
||||
return __m128i_shift_right (a, offset);
|
||||
}
|
||||
return _mm_loadu_si128 ((__m128i *) p);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined USE_AS_STRCASESTR && !defined STRCASESTR_NONASCII
|
||||
|
||||
/* Similar to __m128i_strloadu. Convert to lower case for POSIX/C
|
||||
locale and other which have single-byte letters only in the ASCII
|
||||
range. */
|
||||
static __m128i
|
||||
__m128i_strloadu_tolower (const unsigned char *p, __m128i zero, __m128i uclow,
|
||||
__m128i uchigh, __m128i lcqword)
|
||||
{
|
||||
__m128i frag = __m128i_strloadu (p, zero);
|
||||
|
||||
/* Compare if 'Z' > bytes. Inverted way to get a mask for byte <= 'Z'. */
|
||||
__m128i r2 = _mm_cmpgt_epi8 (uchigh, frag);
|
||||
/* Compare if bytes are > 'A' - 1. */
|
||||
__m128i r1 = _mm_cmpgt_epi8 (frag, uclow);
|
||||
/* Mask byte == ff if byte(r2) <= 'Z' and byte(r1) > 'A' - 1. */
|
||||
__m128i mask = _mm_and_si128 (r2, r1);
|
||||
/* Apply lowercase bit 6 mask for above mask bytes == ff. */
|
||||
return _mm_or_si128 (frag, _mm_and_si128 (mask, lcqword));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
/* Calculate Knuth-Morris-Pratt string searching algorithm (or KMP
|
||||
algorithm) overlap for a fully populated 16B vector.
|
||||
Input parameter: 1st 16Byte loaded from the reference string of a
|
||||
strstr function.
|
||||
We don't use KMP algorithm if reference string is less than 16B. */
|
||||
static int
|
||||
__inline__ __attribute__ ((__always_inline__,))
|
||||
KMP16Bovrlap (__m128i s2)
|
||||
{
|
||||
__m128i b = _mm_unpacklo_epi8 (s2, s2);
|
||||
__m128i a = _mm_unpacklo_epi8 (b, b);
|
||||
a = _mm_shuffle_epi32 (a, 0);
|
||||
b = _mm_srli_si128 (s2, sizeof (char));
|
||||
int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (b, a));
|
||||
|
||||
/* _BitScanForward(&k1, bmsk); */
|
||||
int k1;
|
||||
__asm ("bsfl %[bmsk], %[k1]" : [k1] "=r" (k1) : [bmsk] "r" (bmsk));
|
||||
if (!bmsk)
|
||||
return 16;
|
||||
else if (bmsk == 0x7fff)
|
||||
return 1;
|
||||
else if (!k1)
|
||||
{
|
||||
/* There are al least two distinct chars in s2. If byte 0 and 1 are
|
||||
idential and the distinct value lies farther down, we can deduce
|
||||
the next byte offset to restart full compare is least no earlier
|
||||
than byte 3. */
|
||||
return 3;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Byte 1 is not degenerated to byte 0. */
|
||||
return k1 + 1;
|
||||
}
|
||||
}
|
||||
|
||||
char *
|
||||
__attribute__ ((section (".text.sse4.2")))
|
||||
STRSTR_SSE42 (const unsigned char *s1, const unsigned char *s2)
|
||||
{
|
||||
#define p1 s1
|
||||
const unsigned char *p2 = s2;
|
||||
|
||||
#ifndef STRCASESTR_NONASCII
|
||||
if (__builtin_expect (p2[0] == '\0', 0))
|
||||
return (char *) p1;
|
||||
|
||||
if (__builtin_expect (p1[0] == '\0', 0))
|
||||
return NULL;
|
||||
|
||||
/* Check if p1 length is 1 byte long. */
|
||||
if (__builtin_expect (p1[1] == '\0', 0))
|
||||
return p2[1] == '\0' && CMPBYTE (p1[0], p2[0]) ? (char *) p1 : NULL;
|
||||
#endif
|
||||
|
||||
#ifdef USE_AS_STRCASESTR
|
||||
# ifndef STRCASESTR_NONASCII
|
||||
if (__builtin_expect (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_NONASCII_CASE)
|
||||
!= 0, 0))
|
||||
return __strcasestr_sse42_nonascii (s1, s2);
|
||||
|
||||
const __m128i uclow = _mm_set1_epi8 (0x40);
|
||||
const __m128i uchigh = _mm_set1_epi8 (0x5b);
|
||||
const __m128i lcqword = _mm_set1_epi8 (0x20);
|
||||
const __m128i zero = _mm_setzero_si128 ();
|
||||
# define strloadu(p) __m128i_strloadu_tolower (p, zero, uclow, uchigh, lcqword)
|
||||
# else
|
||||
# define strloadu __m128i_strloadu_tolower
|
||||
# define zero _mm_setzero_si128 ()
|
||||
# endif
|
||||
#else
|
||||
# define strloadu(p) __m128i_strloadu (p, zero)
|
||||
const __m128i zero = _mm_setzero_si128 ();
|
||||
#endif
|
||||
|
||||
/* p1 > 1 byte long. Load up to 16 bytes of fragment. */
|
||||
__m128i frag1 = strloadu (p1);
|
||||
|
||||
__m128i frag2;
|
||||
if (p2[1] != '\0')
|
||||
/* p2 is > 1 byte long. */
|
||||
frag2 = strloadu (p2);
|
||||
else
|
||||
frag2 = _mm_insert_epi8 (zero, LOADBYTE (p2[0]), 0);
|
||||
|
||||
/* Unsigned bytes, equal order, does frag2 has null? */
|
||||
int cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
|
||||
int cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
|
||||
int cmp = _mm_cmpistri (frag2, frag1, 0x0c);
|
||||
int cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c);
|
||||
if (cmp_s & cmp_c)
|
||||
{
|
||||
int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (frag2, zero));
|
||||
int len;
|
||||
__asm ("bsfl %[bmsk], %[len]"
|
||||
: [len] "=r" (len) : [bmsk] "r" (bmsk));
|
||||
p1 += cmp;
|
||||
if ((len + cmp) <= 16)
|
||||
return (char *) p1;
|
||||
|
||||
/* Load up to 16 bytes of fragment. */
|
||||
frag1 = strloadu (p1);
|
||||
cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
|
||||
cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c);
|
||||
cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
|
||||
cmp = _mm_cmpistri (frag2, frag1, 0x0c);
|
||||
if ((len + cmp) <= 16)
|
||||
return (char *) p1 + cmp;
|
||||
}
|
||||
|
||||
if (cmp_s)
|
||||
{
|
||||
/* Adjust addr for 16B alginment in ensuing loop. */
|
||||
while (!cmp_z)
|
||||
{
|
||||
p1 += cmp;
|
||||
/* Load up to 16 bytes of fragment. */
|
||||
frag1 = strloadu (p1);
|
||||
cmp = _mm_cmpistri (frag2, frag1, 0x0c);
|
||||
cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
|
||||
cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
|
||||
/* Because s2 < 16 bytes and we adjusted p1 by non-zero cmp
|
||||
once already, this time cmp will be zero and we can exit. */
|
||||
if ((!cmp) & cmp_c)
|
||||
break;
|
||||
}
|
||||
|
||||
if (!cmp_c)
|
||||
return NULL;
|
||||
|
||||
/* Since s2 is less than 16 bytes, com_c is definitive
|
||||
determination of full match. */
|
||||
return (char *) p1 + cmp;
|
||||
}
|
||||
|
||||
/* General case, s2 is at least 16 bytes or more.
|
||||
First, the common case of false-match at first byte of p2. */
|
||||
const unsigned char *pt = NULL;
|
||||
int kmp_fwd = 0;
|
||||
re_trace:
|
||||
while (!cmp_c)
|
||||
{
|
||||
/* frag1 has null. */
|
||||
if (cmp_z)
|
||||
return NULL;
|
||||
|
||||
/* frag 1 has no null, advance 16 bytes. */
|
||||
p1 += 16;
|
||||
/* Load up to 16 bytes of fragment. */
|
||||
frag1 = strloadu (p1);
|
||||
/* Unsigned bytes, equal order, is there a partial match? */
|
||||
cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
|
||||
cmp = _mm_cmpistri (frag2, frag1, 0x0c);
|
||||
cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
|
||||
}
|
||||
|
||||
/* Next, handle initial positive match as first byte of p2. We have
|
||||
a partial fragment match, make full determination until we reached
|
||||
end of s2. */
|
||||
if (!cmp)
|
||||
{
|
||||
if (cmp_z)
|
||||
return (char *) p1;
|
||||
|
||||
pt = p1;
|
||||
p1 += 16;
|
||||
p2 += 16;
|
||||
/* Load up to 16 bytes of fragment. */
|
||||
frag2 = strloadu (p2);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Adjust 16B alignment. */
|
||||
p1 += cmp;
|
||||
pt = p1;
|
||||
}
|
||||
|
||||
/* Load up to 16 bytes of fragment. */
|
||||
frag1 = strloadu (p1);
|
||||
|
||||
/* Unsigned bytes, equal order, does frag2 has null? */
|
||||
cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
|
||||
cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
|
||||
cmp = _mm_cmpistri (frag2, frag1, 0x0c);
|
||||
cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c);
|
||||
while (!(cmp | cmp_z | cmp_s))
|
||||
{
|
||||
p1 += 16;
|
||||
p2 += 16;
|
||||
/* Load up to 16 bytes of fragment. */
|
||||
frag2 = strloadu (p2);
|
||||
/* Load up to 16 bytes of fragment. */
|
||||
frag1 = strloadu (p1);
|
||||
/* Unsigned bytes, equal order, does frag2 has null? */
|
||||
cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
|
||||
cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
|
||||
cmp = _mm_cmpistri (frag2, frag1, 0x0c);
|
||||
cmp_s = _mm_cmpistrs (frag2, frag1, 0x0c);
|
||||
}
|
||||
|
||||
/* Full determination yielded a false result, retrace s1 to next
|
||||
starting position.
|
||||
Zflg 1 0 1 0/1
|
||||
Sflg 0 1 1 0/1
|
||||
cmp na 0 0 >0
|
||||
action done done continue continue if s2 < s1
|
||||
false match retrace s1 else false
|
||||
*/
|
||||
|
||||
if (cmp_s & !cmp)
|
||||
return (char *) pt;
|
||||
if (cmp_z)
|
||||
{
|
||||
if (!cmp_s)
|
||||
return NULL;
|
||||
|
||||
/* Handle both zero and sign flag set and s1 is shorter in
|
||||
length. */
|
||||
int bmsk = _mm_movemask_epi8 (_mm_cmpeq_epi8 (zero, frag2));
|
||||
int bmsk1 = _mm_movemask_epi8 (_mm_cmpeq_epi8 (zero, frag1));
|
||||
int len;
|
||||
int len1;
|
||||
__asm ("bsfl %[bmsk], %[len]"
|
||||
: [len] "=r" (len) : [bmsk] "r" (bmsk));
|
||||
__asm ("bsfl %[bmsk1], %[len1]"
|
||||
: [len1] "=r" (len1) : [bmsk1] "r" (bmsk1));
|
||||
if (len >= len1)
|
||||
return NULL;
|
||||
}
|
||||
else if (!cmp)
|
||||
return (char *) pt;
|
||||
|
||||
/* Otherwise, we have to retrace and continue. Default of multiple
|
||||
paths that need to retrace from next byte in s1. */
|
||||
p2 = s2;
|
||||
frag2 = strloadu (p2);
|
||||
|
||||
if (!kmp_fwd)
|
||||
kmp_fwd = KMP16Bovrlap (frag2);
|
||||
|
||||
/* KMP algorithm predicted overlap needs to be corrected for
|
||||
partial fragment compare. */
|
||||
p1 = pt + (kmp_fwd > cmp ? cmp : kmp_fwd);
|
||||
|
||||
/* Since s2 is at least 16 bytes long, we're certain there is no
|
||||
match. */
|
||||
if (p1[0] == '\0')
|
||||
return NULL;
|
||||
|
||||
/* Load up to 16 bytes of fragment. */
|
||||
frag1 = strloadu (p1);
|
||||
|
||||
/* Unsigned bytes, equal order, is there a partial match? */
|
||||
cmp_c = _mm_cmpistrc (frag2, frag1, 0x0c);
|
||||
cmp = _mm_cmpistri (frag2, frag1, 0x0c);
|
||||
cmp_z = _mm_cmpistrz (frag2, frag1, 0x0c);
|
||||
goto re_trace;
|
||||
}
|
||||
#undef strstr
|
||||
strong_alias (__libc_strstr, strstr)
|
||||
|
Reference in New Issue
Block a user