mirror of
https://sourceware.org/git/glibc.git
synced 2025-08-08 17:42:12 +03:00
wcsmbs: Ensure wcstr worst-case linear execution time (BZ 23865)
It uses the same two-way algorithm used on strstr, strcasestr, and memmem. Different than strstr, neither the "shift table" optimization nor the self-adapting filtering check is used because it would result in a too-large shift table (and it also simplifies the implementation bit). Checked on x86_64-linux-gnu and aarch64-linux-gnu. Reviewed-by: DJ Delorie <dj@redhat.com>
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
/* Copyright (C) 1995-2024 Free Software Foundation, Inc.
|
||||
/* Locate a substring in a wide-character string.
|
||||
Copyright (C) 1995-2024 Free Software Foundation, Inc.
|
||||
This file is part of the GNU C Library.
|
||||
|
||||
The GNU C Library is free software; you can redistribute it and/or
|
||||
@@ -15,18 +16,14 @@
|
||||
License along with the GNU C Library; if not, see
|
||||
<https://www.gnu.org/licenses/>. */
|
||||
|
||||
/*
|
||||
* The original strstr() file contains the following comment:
|
||||
*
|
||||
* My personal strstr() implementation that beats most other algorithms.
|
||||
* Until someone tells me otherwise, I assume that this is the
|
||||
* fastest implementation of strstr() in C.
|
||||
* I deliberately chose not to comment it. You should have at least
|
||||
* as much fun trying to understand it, as I had to write it :-).
|
||||
*
|
||||
* Stephen R. van den Berg, berg@pool.informatik.rwth-aachen.de */
|
||||
|
||||
#include <wchar.h>
|
||||
#include <string.h>
|
||||
|
||||
#define AVAILABLE(h, h_l, j, n_l) \
|
||||
(((j) + (n_l) <= (h_l)) \
|
||||
|| ((h_l) += __wcsnlen ((void*)((h) + (h_l)), (n_l) + 128), \
|
||||
(j) + (n_l) <= (h_l)))
|
||||
#include "wcs-two-way.h"
|
||||
|
||||
#ifndef WCSSTR
|
||||
# define WCSSTR wcsstr
|
||||
@@ -35,66 +32,20 @@
|
||||
wchar_t *
|
||||
WCSSTR (const wchar_t *haystack, const wchar_t *needle)
|
||||
{
|
||||
wchar_t b, c;
|
||||
/* Ensure haystack length is at least as long as needle length.
|
||||
Since a match may occur early on in a huge haystack, use strnlen
|
||||
and read ahead a few cachelines for improved performance. */
|
||||
size_t ne_len = __wcslen (needle);
|
||||
size_t hs_len = __wcsnlen (haystack, ne_len | 128);
|
||||
if (hs_len < ne_len)
|
||||
return NULL;
|
||||
|
||||
if ((b = *needle) != L'\0')
|
||||
{
|
||||
haystack--; /* possible ANSI violation */
|
||||
do
|
||||
if ((c = *++haystack) == L'\0')
|
||||
goto ret0;
|
||||
while (c != b);
|
||||
/* Check whether we have a match. This improves performance since we
|
||||
avoid initialization overheads. */
|
||||
if (__wmemcmp (haystack, needle, ne_len) == 0)
|
||||
return (wchar_t *) haystack;
|
||||
|
||||
if (!(c = *++needle))
|
||||
goto foundneedle;
|
||||
++needle;
|
||||
goto jin;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
wchar_t a;
|
||||
const wchar_t *rhaystack, *rneedle;
|
||||
|
||||
do
|
||||
{
|
||||
if (!(a = *++haystack))
|
||||
goto ret0;
|
||||
if (a == b)
|
||||
break;
|
||||
if ((a = *++haystack) == L'\0')
|
||||
goto ret0;
|
||||
shloop: ;
|
||||
}
|
||||
while (a != b);
|
||||
|
||||
jin: if (!(a = *++haystack))
|
||||
goto ret0;
|
||||
|
||||
if (a != c)
|
||||
goto shloop;
|
||||
|
||||
if (*(rhaystack = haystack-- + 1) == (a = *(rneedle = needle)))
|
||||
do
|
||||
{
|
||||
if (a == L'\0')
|
||||
goto foundneedle;
|
||||
if (*++rhaystack != (a = *++needle))
|
||||
break;
|
||||
if (a == L'\0')
|
||||
goto foundneedle;
|
||||
}
|
||||
while (*++rhaystack == (a = *++needle));
|
||||
|
||||
needle = rneedle; /* took the register-poor approach */
|
||||
|
||||
if (a == L'\0')
|
||||
break;
|
||||
}
|
||||
}
|
||||
foundneedle:
|
||||
return (wchar_t*) haystack;
|
||||
ret0:
|
||||
return NULL;
|
||||
return two_way_short_needle (haystack, hs_len, needle, ne_len);
|
||||
}
|
||||
/* This alias is for backward compatibility with drafts of the ISO C
|
||||
standard. Unfortunately the Unix(TM) standard requires this name. */
|
||||
|
Reference in New Issue
Block a user