1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-07-29 11:41:21 +03:00

S390: Optimize strcmp and wcscmp.

This patch provides optimized versions of strcmp and wcscmp with the z13
vector instructions.

The architecture specific string.h had a typo, which leads to ommiting the
inline version in this file if __USE_STRING_INLINES is defined.
Tested this inline version by tweaking test-strcmp.c.

ChangeLog:

	* sysdeps/s390/multiarch/strcmp-vx.S: New File.
	* sysdeps/s390/multiarch/strcmp.c: Likewise.
	* sysdeps/s390/multiarch/wcscmp-c.c: Likewise.
	* sysdeps/s390/multiarch/wcscmp-vx.S: Likewise.
	* sysdeps/s390/multiarch/wcscmp.c: Likewise.
	* sysdeps/s390/s390-32/multiarch/strcmp.c: Likewise.
	* sysdeps/s390/s390-64/multiarch/strcmp.c: Likewise.
	* sysdeps/s390/multiarch/Makefile (sysdep_routines): Add strcmp and
	wcscmp functions.
	* sysdeps/s390/multiarch/ifunc-impl-list.c
	(__libc_ifunc_impl_list): Add ifunc test for strcmp, wcscmp.
	* string/strcmp.c (STRCMP): Define and use macro.
	* benchtests/bench-wcscmp.c: New File.
	* benchtests/Makefile (wcsmbs-bench): Add wcscmp.
	* sysdeps/s390/bits/string.h: Fix typo: _HAVE_STRING_ARCH_strcmp
	instead of _HAVE_STRING_ARCH_memchr.
This commit is contained in:
Stefan Liebler
2015-08-26 10:26:22 +02:00
committed by Andreas Krebbel
parent e1fe91180e
commit 63724a6db6
14 changed files with 430 additions and 6 deletions

View File

@ -1,3 +1,22 @@
2015-08-26 Stefan Liebler <stli@linux.vnet.ibm.com>
* sysdeps/s390/multiarch/strcmp-vx.S: New File.
* sysdeps/s390/multiarch/strcmp.c: Likewise.
* sysdeps/s390/multiarch/wcscmp-c.c: Likewise.
* sysdeps/s390/multiarch/wcscmp-vx.S: Likewise.
* sysdeps/s390/multiarch/wcscmp.c: Likewise.
* sysdeps/s390/s390-32/multiarch/strcmp.c: Likewise.
* sysdeps/s390/s390-64/multiarch/strcmp.c: Likewise.
* sysdeps/s390/multiarch/Makefile (sysdep_routines): Add strcmp and
wcscmp functions.
* sysdeps/s390/multiarch/ifunc-impl-list.c
(__libc_ifunc_impl_list): Add ifunc test for strcmp, wcscmp.
* string/strcmp.c (STRCMP): Define and use macro.
* benchtests/bench-wcscmp.c: New File.
* benchtests/Makefile (wcsmbs-bench): Add wcscmp.
* sysdeps/s390/bits/string.h: Fix typo: _HAVE_STRING_ARCH_strcmp
instead of _HAVE_STRING_ARCH_memchr.
2015-08-26 Stefan Liebler <stli@linux.vnet.ibm.com> 2015-08-26 Stefan Liebler <stli@linux.vnet.ibm.com>
* sysdeps/s390/multiarch/strncat-c.c: New File. * sysdeps/s390/multiarch/strncat-c.c: New File.

View File

@ -36,7 +36,8 @@ string-bench := bcopy bzero memccpy memchr memcmp memcpy memmem memmove \
strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \ strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \
strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \ strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \
strcoll strcoll
wcsmbs-bench := wcslen wcsnlen wcscpy wcpcpy wcsncpy wcpncpy wcscat wcsncat wcsmbs-bench := wcslen wcsnlen wcscpy wcpcpy wcsncpy wcpncpy wcscat wcsncat \
wcscmp
string-bench-all := $(string-bench) ${wcsmbs-bench} string-bench-all := $(string-bench) ${wcsmbs-bench}
# We have to generate locales # We have to generate locales

20
benchtests/bench-wcscmp.c Normal file
View File

@ -0,0 +1,20 @@
/* Measure wcscmp functions.
Copyright (C) 2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#define WIDE 1
#include "bench-strcmp.c"

View File

@ -19,11 +19,15 @@
#undef strcmp #undef strcmp
#ifndef STRCMP
# define STRCMP strcmp
#endif
/* Compare S1 and S2, returning less than, equal to or /* Compare S1 and S2, returning less than, equal to or
greater than zero if S1 is lexicographically less than, greater than zero if S1 is lexicographically less than,
equal to or greater than S2. */ equal to or greater than S2. */
int int
strcmp (const char *p1, const char *p2) STRCMP (const char *p1, const char *p2)
{ {
const unsigned char *s1 = (const unsigned char *) p1; const unsigned char *s1 = (const unsigned char *) p1;
const unsigned char *s2 = (const unsigned char *) p2; const unsigned char *s2 = (const unsigned char *) p2;

View File

@ -226,8 +226,8 @@ memchr (const void *__str, int __c, size_t __n)
} }
#endif #endif
/* Search N bytes of S for C. */ /* Compare S1 and S2. */
#define _HAVE_STRING_ARCH_memchr 1 #define _HAVE_STRING_ARCH_strcmp 1
#ifndef _FORCE_INLINES #ifndef _FORCE_INLINES
__STRING_INLINE int __STRING_INLINE int
strcmp (const char *__s1, const char *__s2) strcmp (const char *__s1, const char *__s2)

View File

@ -6,7 +6,8 @@ sysdep_routines += strlen strlen-vx strlen-c \
strncpy strncpy-vx \ strncpy strncpy-vx \
stpncpy stpncpy-vx stpncpy-c \ stpncpy stpncpy-vx stpncpy-c \
strcat strcat-vx strcat-c \ strcat strcat-vx strcat-c \
strncat strncat-vx strncat-c strncat strncat-vx strncat-c \
strcmp strcmp-vx
endif endif
ifeq ($(subdir),wcsmbs) ifeq ($(subdir),wcsmbs)
@ -17,5 +18,6 @@ sysdep_routines += wcslen wcslen-vx wcslen-c \
wcsncpy wcsncpy-vx wcsncpy-c \ wcsncpy wcsncpy-vx wcsncpy-c \
wcpncpy wcpncpy-vx wcpncpy-c \ wcpncpy wcpncpy-vx wcpncpy-c \
wcscat wcscat-vx wcscat-c \ wcscat wcscat-vx wcscat-c \
wcsncat wcsncat-vx wcsncat-c wcsncat wcsncat-vx wcsncat-c \
wcscmp wcscmp-vx wcscmp-c
endif endif

View File

@ -103,6 +103,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
IFUNC_VX_IMPL (strncat); IFUNC_VX_IMPL (strncat);
IFUNC_VX_IMPL (wcsncat); IFUNC_VX_IMPL (wcsncat);
IFUNC_VX_IMPL (strcmp);
IFUNC_VX_IMPL (wcscmp);
#endif /* HAVE_S390_VX_ASM_SUPPORT */ #endif /* HAVE_S390_VX_ASM_SUPPORT */
return i; return i;

View File

@ -0,0 +1,116 @@
/* Vector optimized 32/64 bit S/390 version of strcmp.
Copyright (C) 2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
# include "sysdep.h"
# include "asm-syntax.h"
.text
/* int strcmp (const char *s1, const char *s2)
Compare two strings
Register usage:
-r1=loaded byte count s1
-r2=s1
-r3=s2
-r4=loaded byte coutn s2, tmp
-r5=current_len
-v16=part of s1
-v17=part of s2
-v18=index of unequal
*/
ENTRY(__strcmp_vx)
.machine "z13"
.machinemode "zarch_nohighgprs"
lghi %r5,0 /* current_len = 0. */
.Lloop:
vlbb %v16,0(%r5,%r2),6 /* Load s1 to block boundary. */
vlbb %v17,0(%r5,%r3),6 /* Load s2 to block boundary. */
lcbb %r1,0(%r5,%r2),6 /* Get loaded byte count of s1. */
jo .Llt16_1 /* Jump away if vr is not fully loaded. */
lcbb %r4,0(%r5,%r3),6
jo .Llt16_2 /* Jump away if vr is not fully loaded. */
/* Both vrs are fully loaded. */
aghi %r5,16
vfenezbs %v18,%v16,%v17 /* Compare not equal with zero search. */
jno .Lfound
vlbb %v16,0(%r5,%r2),6
vlbb %v17,0(%r5,%r3),6
lcbb %r1,0(%r5,%r2),6
jo .Llt16_1
lcbb %r4,0(%r5,%r3),6
jo .Llt16_2
aghi %r5,16
vfenezbs %v18,%v16,%v17
jno .Lfound
vlbb %v16,0(%r5,%r2),6
vlbb %v17,0(%r5,%r3),6
lcbb %r1,0(%r5,%r2),6
jo .Llt16_1
lcbb %r4,0(%r5,%r3),6
jo .Llt16_2
aghi %r5,16
vfenezbs %v18,%v16,%v17
jno .Lfound
vlbb %v16,0(%r5,%r2),6
vlbb %v17,0(%r5,%r3),6
lcbb %r1,0(%r5,%r2),6
jo .Llt16_1
lcbb %r4,0(%r5,%r3),6
jo .Llt16_2
aghi %r5,16
vfenezbs %v18,%v16,%v17
jno .Lfound
j .Lloop
.Llt16_1:
lcbb %r4,0(%r5,%r3),6 /* Get loaded byte count of s2. */
.Llt16_2:
clr %r1,%r4
locrh %r1,%r4 /* Get minimum of bytes loaded in s1/2. */
algfr %r5,%r1 /* Add smallest loaded bytes to current_len. */
vfenezbs %v18,%v16,%v17 /* Compare not equal with zero search. */
vlgvb %r4,%v18,7 /* Get not equal index or 16 if all equal. */
clrjl %r4,%r1,.Lfound /* Jump away if miscompare is within loaded
bytes. */
j .Lloop
.Lfound:
je .Lend_equal
lghi %r2,1
lghi %r1,-1
locgrl %r2,%r1
br %r14
.Lend_equal:
lghi %r2,0
br %r14
END(__strcmp_vx)
# define strcmp __strcmp_c
# undef libc_hidden_builtin_def
# define libc_hidden_builtin_def(name) strong_alias(__strcmp_c, __GI_strcmp)
#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */
#include <strcmp.S>

View File

@ -0,0 +1,26 @@
/* Multiple versions of strcmp.
Copyright (C) 2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
# include <string.h>
# include <ifunc-resolve.h>
# undef strcmp
s390_vx_libc_ifunc2 (__strcmp, strcmp)
#endif

View File

@ -0,0 +1,32 @@
/* Default wcscmp implementation for S/390.
Copyright (C) 2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
# define WCSCMP __wcscmp_c
# include <wchar.h>
extern __typeof (wcscmp) __wcscmp_c;
# undef weak_alias
# define weak_alias(name, alias)
# ifdef SHARED
# undef libc_hidden_def
# define libc_hidden_def(name) \
__hidden_ver1 (__wcscmp_c, __GI___wcscmp, __wcscmp_c);
# endif /* SHARED */
# include <wcsmbs/wcscmp.c>
#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */

View File

@ -0,0 +1,131 @@
/* Vector optimized 32/64 bit S/390 version of wcscmp.
Copyright (C) 2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
# include "sysdep.h"
# include "asm-syntax.h"
.text
/* int wcscmp (const wchar_t *s1, const wchar_t *s2)
Compare two strings
Register usage:
-r1=loaded byte count s1
-r2=s1
-r3=s2
-r4=loaded byte coutn s2, tmp
-r5=current_len
-v16=part of s1
-v17=part of s2
-v18=index of unequal
*/
ENTRY(__wcscmp_vx)
.machine "z13"
.machinemode "zarch_nohighgprs"
lghi %r5,0 /* current_len = 0. */
.Lloop:
vlbb %v16,0(%r5,%r2),6 /* Load s1 to block boundary. */
vlbb %v17,0(%r5,%r3),6 /* Load s2 to block boundary. */
lcbb %r1,0(%r5,%r2),6 /* Get loaded byte count of s1. */
jo .Llt16_1 /* Jump away if vr is not fully loaded. */
lcbb %r4,0(%r5,%r3),6
jo .Llt16_2 /* Jump away if vr is not fully loaded. */
/* Both vrs are fully loaded. */
aghi %r5,16
vfenezfs %v18,%v16,%v17 /* Compare not equal with zero search. */
jno .Lfound
vlbb %v16,0(%r5,%r2),6
vlbb %v17,0(%r5,%r3),6
lcbb %r1,0(%r5,%r2),6
jo .Llt16_1
lcbb %r4,0(%r5,%r3),6
jo .Llt16_2
aghi %r5,16
vfenezfs %v18,%v16,%v17
jno .Lfound
vlbb %v16,0(%r5,%r2),6
vlbb %v17,0(%r5,%r3),6
lcbb %r1,0(%r5,%r2),6
jo .Llt16_1
lcbb %r4,0(%r5,%r3),6
jo .Llt16_2
aghi %r5,16
vfenezfs %v18,%v16,%v17
jno .Lfound
vlbb %v16,0(%r5,%r2),6
vlbb %v17,0(%r5,%r3),6
lcbb %r1,0(%r5,%r2),6
jo .Llt16_1
lcbb %r4,0(%r5,%r3),6
jo .Llt16_2
aghi %r5,16
vfenezfs %v18,%v16,%v17
jno .Lfound
j .Lloop
.Lcmp_one_char:
/* At least one of both strings is not 4-byte aligned
and there is no full character before next block-boundary.
Compare one character to get over the boundary and
proceed with normal loop! */
vlef %v16,0(%r5,%r2),0 /* Load one character. */
vlef %v17,0(%r5,%r3),0
lghi %r1,4 /* Loaded byte count is 4. */
j .Llt_cmp /* Proceed with comparision. */
.Llt16_1:
lcbb %r4,0(%r5,%r3),6 /* Get loaded byte count of s2. */
.Llt16_2:
clr %r1,%r4
locrh %r1,%r4 /* Get minimum of bytes loaded in s1/2. */
nill %r1,65532 /* Align bytes loaded to full characters. */
jz .Lcmp_one_char /* Jump away if no full char is available. */
.Llt_cmp:
algfr %r5,%r1 /* Add smallest loaded bytes to current_len. */
vfenezfs %v18,%v16,%v17 /* Compare not equal with zero search. */
vlgvb %r4,%v18,7 /* Get not equal index or 16 if all equal. */
clrjl %r4,%r1,.Lfound /* Jump away if miscompare is within loaded
bytes. */
j .Lloop
.Lfound:
/* vfenezf found an unequal element or zero.
This instruction compares unsigned words, but wchar_t is signed.
Thus we have to compare the found element again. */
vlgvb %r4,%v18,7 /* Extract not equal byte-index, */
srl %r4,2 /* Convert it to character-index. */
vlgvf %r3,%v16,0(%r4) /* Load character-values. */
vlgvf %r4,%v17,0(%r4)
cr %r3,%r4
je .Lend_equal
lghi %r2,1
lghi %r1,-1
locgrl %r2,%r1
br %r14
.Lend_equal:
lghi %r2,0
br %r14
END(__wcscmp_vx)
#endif /* HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc) */

View File

@ -0,0 +1,28 @@
/* Multiple versions of wcscmp.
Copyright (C) 2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#if defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)
# include <wchar.h>
# include <ifunc-resolve.h>
s390_vx_libc_ifunc (__wcscmp)
weak_alias (__wcscmp, wcscmp)
#else
# include <wcsmbs/wcscmp.c>
#endif /* !(defined HAVE_S390_VX_ASM_SUPPORT && IS_IN (libc)) */

View File

@ -0,0 +1,21 @@
/* Multiple versions of strcmp.
Copyright (C) 2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
/* This wrapper-file is needed, because otherwise file
sysdeps/s390/s390-[32|64]/strcmp.S will be used. */
#include <sysdeps/s390/multiarch/strcmp.c>

View File

@ -0,0 +1,21 @@
/* Multiple versions of strcmp.
Copyright (C) 2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
/* This wrapper-file is needed, because otherwise file
sysdeps/s390/s390-[32|64]/strcmp.S will be used. */
#include <sysdeps/s390/multiarch/strcmp.c>