1
0
mirror of https://sourceware.org/git/glibc.git synced 2025-08-08 17:42:12 +03:00

wcsmbs: Ensure wcstr worst-case linear execution time (BZ 23865)

It uses the same two-way algorithm used on strstr, strcasestr, and
memmem.  Different than strstr, neither the "shift table" optimization
nor the self-adapting filtering check is used because it would result in
a too-large shift table (and it also simplifies the implementation bit).

Checked on x86_64-linux-gnu and aarch64-linux-gnu.
Reviewed-by: DJ Delorie <dj@redhat.com>
This commit is contained in:
Adhemerval Zanella
2024-03-19 10:15:28 -03:00
parent 4b717562c4
commit cf11e74b0d
3 changed files with 505 additions and 70 deletions

View File

@@ -1,4 +1,5 @@
/* Copyright (C) 1995-2024 Free Software Foundation, Inc.
/* Locate a substring in a wide-character string.
Copyright (C) 1995-2024 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -15,18 +16,14 @@
License along with the GNU C Library; if not, see
<https://www.gnu.org/licenses/>. */
/*
* The original strstr() file contains the following comment:
*
* My personal strstr() implementation that beats most other algorithms.
* Until someone tells me otherwise, I assume that this is the
* fastest implementation of strstr() in C.
* I deliberately chose not to comment it. You should have at least
* as much fun trying to understand it, as I had to write it :-).
*
* Stephen R. van den Berg, berg@pool.informatik.rwth-aachen.de */
#include <wchar.h>
#include <string.h>
#define AVAILABLE(h, h_l, j, n_l) \
(((j) + (n_l) <= (h_l)) \
|| ((h_l) += __wcsnlen ((void*)((h) + (h_l)), (n_l) + 128), \
(j) + (n_l) <= (h_l)))
#include "wcs-two-way.h"
#ifndef WCSSTR
# define WCSSTR wcsstr
@@ -35,66 +32,20 @@
wchar_t *
WCSSTR (const wchar_t *haystack, const wchar_t *needle)
{
wchar_t b, c;
/* Ensure haystack length is at least as long as needle length.
Since a match may occur early on in a huge haystack, use strnlen
and read ahead a few cachelines for improved performance. */
size_t ne_len = __wcslen (needle);
size_t hs_len = __wcsnlen (haystack, ne_len | 128);
if (hs_len < ne_len)
return NULL;
if ((b = *needle) != L'\0')
{
haystack--; /* possible ANSI violation */
do
if ((c = *++haystack) == L'\0')
goto ret0;
while (c != b);
/* Check whether we have a match. This improves performance since we
avoid initialization overheads. */
if (__wmemcmp (haystack, needle, ne_len) == 0)
return (wchar_t *) haystack;
if (!(c = *++needle))
goto foundneedle;
++needle;
goto jin;
for (;;)
{
wchar_t a;
const wchar_t *rhaystack, *rneedle;
do
{
if (!(a = *++haystack))
goto ret0;
if (a == b)
break;
if ((a = *++haystack) == L'\0')
goto ret0;
shloop: ;
}
while (a != b);
jin: if (!(a = *++haystack))
goto ret0;
if (a != c)
goto shloop;
if (*(rhaystack = haystack-- + 1) == (a = *(rneedle = needle)))
do
{
if (a == L'\0')
goto foundneedle;
if (*++rhaystack != (a = *++needle))
break;
if (a == L'\0')
goto foundneedle;
}
while (*++rhaystack == (a = *++needle));
needle = rneedle; /* took the register-poor approach */
if (a == L'\0')
break;
}
}
foundneedle:
return (wchar_t*) haystack;
ret0:
return NULL;
return two_way_short_needle (haystack, hs_len, needle, ne_len);
}
/* This alias is for backward compatibility with drafts of the ISO C
standard. Unfortunately the Unix(TM) standard requires this name. */