mirror of
https://github.com/postgres/postgres.git
synced 2025-05-06 19:59:18 +03:00
Repair bug in regexp split performance improvements.
Commit c8ea87e4b introduced a temporary conversion buffer for substrings extracted during regexp splits. Unfortunately the code that sized it was failing to ignore the effects of ignored degenerate regexp matches, so for regexp_split_* calls it could under-size the buffer in such cases. Fix, and add some regression test cases (though those will only catch the bug if run in a multibyte encoding). Backpatch to 9.3 as the faulty code was. Thanks to the PostGIS project, Regina Obe and Paul Ramsey for the report (via IRC) and assistance in analysis. Patch by me.
This commit is contained in:
parent
b91ae36029
commit
f7d0343ead
@ -982,6 +982,7 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
|
|||||||
int array_len;
|
int array_len;
|
||||||
int array_idx;
|
int array_idx;
|
||||||
int prev_match_end;
|
int prev_match_end;
|
||||||
|
int prev_valid_match_end;
|
||||||
int start_search;
|
int start_search;
|
||||||
int maxlen = 0; /* largest fetch length in characters */
|
int maxlen = 0; /* largest fetch length in characters */
|
||||||
|
|
||||||
@ -1024,6 +1025,7 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
|
|||||||
|
|
||||||
/* search for the pattern, perhaps repeatedly */
|
/* search for the pattern, perhaps repeatedly */
|
||||||
prev_match_end = 0;
|
prev_match_end = 0;
|
||||||
|
prev_valid_match_end = 0;
|
||||||
start_search = 0;
|
start_search = 0;
|
||||||
while (RE_wchar_execute(cpattern, wide_str, wide_len, start_search,
|
while (RE_wchar_execute(cpattern, wide_str, wide_len, start_search,
|
||||||
pmatch_len, pmatch))
|
pmatch_len, pmatch))
|
||||||
@ -1076,13 +1078,15 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
|
|||||||
matchctx->nmatches++;
|
matchctx->nmatches++;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* check length of unmatched portion between end of previous match
|
* check length of unmatched portion between end of previous valid
|
||||||
* and start of current one
|
* (nondegenerate, or degenerate but not ignored) match and start
|
||||||
|
* of current one
|
||||||
*/
|
*/
|
||||||
if (fetching_unmatched &&
|
if (fetching_unmatched &&
|
||||||
pmatch[0].rm_so >= 0 &&
|
pmatch[0].rm_so >= 0 &&
|
||||||
(pmatch[0].rm_so - prev_match_end) > maxlen)
|
(pmatch[0].rm_so - prev_valid_match_end) > maxlen)
|
||||||
maxlen = (pmatch[0].rm_so - prev_match_end);
|
maxlen = (pmatch[0].rm_so - prev_valid_match_end);
|
||||||
|
prev_valid_match_end = pmatch[0].rm_eo;
|
||||||
}
|
}
|
||||||
prev_match_end = pmatch[0].rm_eo;
|
prev_match_end = pmatch[0].rm_eo;
|
||||||
|
|
||||||
@ -1108,8 +1112,8 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
|
|||||||
* input string
|
* input string
|
||||||
*/
|
*/
|
||||||
if (fetching_unmatched &&
|
if (fetching_unmatched &&
|
||||||
(wide_len - prev_match_end) > maxlen)
|
(wide_len - prev_valid_match_end) > maxlen)
|
||||||
maxlen = (wide_len - prev_match_end);
|
maxlen = (wide_len - prev_valid_match_end);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Keep a note of the end position of the string for the benefit of
|
* Keep a note of the end position of the string for the benefit of
|
||||||
|
@ -674,6 +674,24 @@ SELECT regexp_split_to_array('123456','.');
|
|||||||
{"","","","","","",""}
|
{"","","","","","",""}
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
SELECT regexp_split_to_array('123456','');
|
||||||
|
regexp_split_to_array
|
||||||
|
-----------------------
|
||||||
|
{1,2,3,4,5,6}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT regexp_split_to_array('123456','(?:)');
|
||||||
|
regexp_split_to_array
|
||||||
|
-----------------------
|
||||||
|
{1,2,3,4,5,6}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT regexp_split_to_array('1','');
|
||||||
|
regexp_split_to_array
|
||||||
|
-----------------------
|
||||||
|
{1}
|
||||||
|
(1 row)
|
||||||
|
|
||||||
-- errors
|
-- errors
|
||||||
SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'zippy') AS foo;
|
SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'zippy') AS foo;
|
||||||
ERROR: invalid regexp option: "z"
|
ERROR: invalid regexp option: "z"
|
||||||
|
@ -188,6 +188,9 @@ SELECT regexp_split_to_array('the quick brown fox jumps over the lazy dog', 'nom
|
|||||||
SELECT regexp_split_to_array('123456','1');
|
SELECT regexp_split_to_array('123456','1');
|
||||||
SELECT regexp_split_to_array('123456','6');
|
SELECT regexp_split_to_array('123456','6');
|
||||||
SELECT regexp_split_to_array('123456','.');
|
SELECT regexp_split_to_array('123456','.');
|
||||||
|
SELECT regexp_split_to_array('123456','');
|
||||||
|
SELECT regexp_split_to_array('123456','(?:)');
|
||||||
|
SELECT regexp_split_to_array('1','');
|
||||||
-- errors
|
-- errors
|
||||||
SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'zippy') AS foo;
|
SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'zippy') AS foo;
|
||||||
SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'iz');
|
SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'iz');
|
||||||
|
Loading…
x
Reference in New Issue
Block a user