mirror of
https://github.com/postgres/postgres.git
synced 2025-12-10 14:22:35 +03:00
Fix text substring search for non-deterministic collations.
Due to an off-by-one error, the code failed to find matches at the end of the haystack. Fix by rewriting the loop. While at it, fix a comment that claimed that the function could find a zero-length match. Such a match could send a caller into an endless loop. However, zero-length matches only make sense with an empty search string, and that case is explicitly excluded by all callers. To make sure it stays that way, add an Assert and a comment. Bug: #19341 Reported-by: Adam Warland <adam.warland@infor.com> Author: Laurenz Albe <laurenz.albe@cybertec.at> Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Discussion: https://postgr.es/m/19341-1d9a22915edfec58@postgresql.org Backpatch-through: 18
This commit is contained in:
@@ -1111,6 +1111,7 @@ text_position_next_internal(char *start_ptr, TextPositionState *state)
|
|||||||
const char *hptr;
|
const char *hptr;
|
||||||
|
|
||||||
Assert(start_ptr >= haystack && start_ptr <= haystack_end);
|
Assert(start_ptr >= haystack && start_ptr <= haystack_end);
|
||||||
|
Assert(needle_len > 0);
|
||||||
|
|
||||||
state->last_match_len_tmp = needle_len;
|
state->last_match_len_tmp = needle_len;
|
||||||
|
|
||||||
@@ -1123,19 +1124,26 @@ text_position_next_internal(char *start_ptr, TextPositionState *state)
|
|||||||
* needle under the given collation.
|
* needle under the given collation.
|
||||||
*
|
*
|
||||||
* Note, the found substring could have a different length than the
|
* Note, the found substring could have a different length than the
|
||||||
* needle, including being empty. Callers that want to skip over the
|
* needle. Callers that want to skip over the found string need to
|
||||||
* found string need to read the length of the found substring from
|
* read the length of the found substring from last_match_len rather
|
||||||
* last_match_len rather than just using the length of their needle.
|
* than just using the length of their needle.
|
||||||
*
|
*
|
||||||
* Most callers will require "greedy" semantics, meaning that we need
|
* Most callers will require "greedy" semantics, meaning that we need
|
||||||
* to find the longest such substring, not the shortest. For callers
|
* to find the longest such substring, not the shortest. For callers
|
||||||
* that don't need greedy semantics, we can finish on the first match.
|
* that don't need greedy semantics, we can finish on the first match.
|
||||||
|
*
|
||||||
|
* This loop depends on the assumption that the needle is nonempty and
|
||||||
|
* any matching substring must also be nonempty. (Even if the
|
||||||
|
* collation would accept an empty match, returning one would send
|
||||||
|
* callers that search for successive matches into an infinite loop.)
|
||||||
*/
|
*/
|
||||||
const char *result_hptr = NULL;
|
const char *result_hptr = NULL;
|
||||||
|
|
||||||
hptr = start_ptr;
|
hptr = start_ptr;
|
||||||
while (hptr < haystack_end)
|
while (hptr < haystack_end)
|
||||||
{
|
{
|
||||||
|
const char *test_end;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* First check the common case that there is a match in the
|
* First check the common case that there is a match in the
|
||||||
* haystack of exactly the length of the needle.
|
* haystack of exactly the length of the needle.
|
||||||
@@ -1146,11 +1154,13 @@ text_position_next_internal(char *start_ptr, TextPositionState *state)
|
|||||||
return (char *) hptr;
|
return (char *) hptr;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Else check if any of the possible substrings starting at hptr
|
* Else check if any of the non-empty substrings starting at hptr
|
||||||
* are equal to the needle.
|
* compare equal to the needle.
|
||||||
*/
|
*/
|
||||||
for (const char *test_end = hptr; test_end < haystack_end; test_end += pg_mblen(test_end))
|
test_end = hptr;
|
||||||
|
do
|
||||||
{
|
{
|
||||||
|
test_end += pg_mblen(test_end);
|
||||||
if (pg_strncoll(hptr, (test_end - hptr), needle, needle_len, state->locale) == 0)
|
if (pg_strncoll(hptr, (test_end - hptr), needle, needle_len, state->locale) == 0)
|
||||||
{
|
{
|
||||||
state->last_match_len_tmp = (test_end - hptr);
|
state->last_match_len_tmp = (test_end - hptr);
|
||||||
@@ -1158,7 +1168,8 @@ text_position_next_internal(char *start_ptr, TextPositionState *state)
|
|||||||
if (!state->greedy)
|
if (!state->greedy)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
} while (test_end < haystack_end);
|
||||||
|
|
||||||
if (result_hptr)
|
if (result_hptr)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|||||||
@@ -1484,6 +1484,13 @@ SELECT array_sort('{a,B}'::text[] COLLATE "C");
|
|||||||
{B,a}
|
{B,a}
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
-- test replace() at the end of the string (bug #19341)
|
||||||
|
SELECT replace('testX' COLLATE case_insensitive, 'x' COLLATE case_insensitive, 'er');
|
||||||
|
replace
|
||||||
|
---------
|
||||||
|
tester
|
||||||
|
(1 row)
|
||||||
|
|
||||||
-- test language tags
|
-- test language tags
|
||||||
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
|
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
|
||||||
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
|
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
|
||||||
|
|||||||
@@ -568,6 +568,9 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
|
|||||||
SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
|
SELECT array_sort('{a,B}'::text[] COLLATE case_insensitive);
|
||||||
SELECT array_sort('{a,B}'::text[] COLLATE "C");
|
SELECT array_sort('{a,B}'::text[] COLLATE "C");
|
||||||
|
|
||||||
|
-- test replace() at the end of the string (bug #19341)
|
||||||
|
SELECT replace('testX' COLLATE case_insensitive, 'x' COLLATE case_insensitive, 'er');
|
||||||
|
|
||||||
-- test language tags
|
-- test language tags
|
||||||
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
|
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
|
||||||
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
|
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
|
||||||
|
|||||||
Reference in New Issue
Block a user