1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-07-29 11:41:21 +03:00

Simplify and speedup strstr/strcasestr first match

Looking at the benchtests, both strstr and strcasestr spend a lot of time
in a slow initialization loop handling one character per iteration.
This can be simplified and use the much faster strlen/strnlen/strchr/memcmp.
Read ahead a few cachelines to reduce the number of strnlen calls, which
improves performance by ~3-4%.  This patch improves the time taken for the
full strstr benchtest by >40%.

	* string/strcasestr.c (STRCASESTR): Simplify and speedup first match.
	* string/strstr.c (AVAILABLE): Likewise.
This commit is contained in:
Wilco Dijkstra
2018-08-03 17:24:12 +01:00
parent 430388d5dc
commit 284f42bc77
3 changed files with 39 additions and 44 deletions

View File

@ -1,3 +1,8 @@
2018-08-03 Wilco Dijkstra <wdijkstr@arm.com>
* string/strcasestr.c (STRCASESTR): Simplify and speedup first match.
* string/strstr.c (AVAILABLE): Likewise.
2018-08-03 H.J. Lu <hongjiu.lu@intel.com> 2018-08-03 H.J. Lu <hongjiu.lu@intel.com>
* sysdeps/i386/i686/multiarch/bzero-ia32.S: Don't include * sysdeps/i386/i686/multiarch/bzero-ia32.S: Don't include

View File

@ -58,31 +58,22 @@
case-insensitive comparison. This function gives unspecified case-insensitive comparison. This function gives unspecified
results in multibyte locales. */ results in multibyte locales. */
char * char *
STRCASESTR (const char *haystack_start, const char *needle_start) STRCASESTR (const char *haystack, const char *needle)
{ {
const char *haystack = haystack_start;
const char *needle = needle_start;
size_t needle_len; /* Length of NEEDLE. */ size_t needle_len; /* Length of NEEDLE. */
size_t haystack_len; /* Known minimum length of HAYSTACK. */ size_t haystack_len; /* Known minimum length of HAYSTACK. */
bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */
/* Determine length of NEEDLE, and in the process, make sure /* Handle empty NEEDLE special case. */
HAYSTACK is at least as long (no point processing all of a long if (needle[0] == '\0')
NEEDLE if HAYSTACK is too short). */ return (char *) haystack;
while (*haystack && *needle)
{ /* Ensure HAYSTACK length is at least as long as NEEDLE length.
ok &= (TOLOWER ((unsigned char) *haystack) Since a match may occur early on in a huge HAYSTACK, use strnlen
== TOLOWER ((unsigned char) *needle)); and read ahead a few cachelines for improved performance. */
haystack++; needle_len = strlen (needle);
needle++; haystack_len = __strnlen (haystack, needle_len + 256);
} if (haystack_len < needle_len)
if (*needle)
return NULL; return NULL;
if (ok)
return (char *) haystack_start;
needle_len = needle - needle_start;
haystack = haystack_start + 1;
haystack_len = needle_len - 1;
/* Perform the search. Abstract memory is considered to be an array /* Perform the search. Abstract memory is considered to be an array
of 'unsigned char' values, not an array of 'char' values. See of 'unsigned char' values, not an array of 'char' values. See
@ -90,10 +81,10 @@ STRCASESTR (const char *haystack_start, const char *needle_start)
if (needle_len < LONG_NEEDLE_THRESHOLD) if (needle_len < LONG_NEEDLE_THRESHOLD)
return two_way_short_needle ((const unsigned char *) haystack, return two_way_short_needle ((const unsigned char *) haystack,
haystack_len, haystack_len,
(const unsigned char *) needle_start, (const unsigned char *) needle,
needle_len); needle_len);
return two_way_long_needle ((const unsigned char *) haystack, haystack_len, return two_way_long_needle ((const unsigned char *) haystack, haystack_len,
(const unsigned char *) needle_start, (const unsigned char *) needle,
needle_len); needle_len);
} }

View File

@ -50,33 +50,32 @@
if NEEDLE is empty, otherwise NULL if NEEDLE is not found in if NEEDLE is empty, otherwise NULL if NEEDLE is not found in
HAYSTACK. */ HAYSTACK. */
char * char *
STRSTR (const char *haystack_start, const char *needle_start) STRSTR (const char *haystack, const char *needle)
{ {
const char *haystack = haystack_start;
const char *needle = needle_start;
size_t needle_len; /* Length of NEEDLE. */ size_t needle_len; /* Length of NEEDLE. */
size_t haystack_len; /* Known minimum length of HAYSTACK. */ size_t haystack_len; /* Known minimum length of HAYSTACK. */
bool ok = true; /* True if NEEDLE is prefix of HAYSTACK. */
/* Determine length of NEEDLE, and in the process, make sure /* Handle empty NEEDLE special case. */
HAYSTACK is at least as long (no point processing all of a long if (needle[0] == '\0')
NEEDLE if HAYSTACK is too short). */ return (char *) haystack;
while (*haystack && *needle)
ok &= *haystack++ == *needle++; /* Skip until we find the first matching char from NEEDLE. */
if (*needle) haystack = strchr (haystack, needle[0]);
return NULL; if (haystack == NULL || needle[1] == '\0')
if (ok) return (char *) haystack;
return (char *) haystack_start;
/* Ensure HAYSTACK length is at least as long as NEEDLE length.
/* Reduce the size of haystack using strchr, since it has a smaller Since a match may occur early on in a huge HAYSTACK, use strnlen
linear coefficient than the Two-Way algorithm. */ and read ahead a few cachelines for improved performance. */
needle_len = needle - needle_start; needle_len = strlen (needle);
haystack = strchr (haystack_start + 1, *needle_start); haystack_len = __strnlen (haystack, needle_len + 256);
if (!haystack || __builtin_expect (needle_len == 1, 0)) if (haystack_len < needle_len)
return NULL;
/* Check whether we have a match. This improves performance since we avoid
the initialization overhead of the two-way algorithm. */
if (memcmp (haystack, needle, needle_len) == 0)
return (char *) haystack; return (char *) haystack;
needle -= needle_len;
haystack_len = (haystack > haystack_start + needle_len ? 1
: needle_len + haystack_start - haystack);
/* Perform the search. Abstract memory is considered to be an array /* Perform the search. Abstract memory is considered to be an array
of 'unsigned char' values, not an array of 'char' values. See of 'unsigned char' values, not an array of 'char' values. See