mirror of
https://github.com/postgres/postgres.git
synced 2025-06-01 14:21:49 +03:00
Fix regexp substring matching (substring(string from pattern)) for the corner
case where there is a match to the pattern overall but the user has specified a parenthesized subexpression and that subexpression hasn't got a match. An example is substring('foo' from 'foo(bar)?'). This should return NULL, since (bar) isn't matched, but it was mistakenly returning the whole-pattern match instead (ie, 'foo'). Per bug #4044 from Rui Martins. This has been broken since the beginning; patch in all supported versions. The old behavior was sufficiently inconsistent that it's impossible to believe anyone is depending on it.
This commit is contained in:
parent
9d2e16125f
commit
c249e9ed30
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.60.2.3 2007/01/03 22:39:42 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.60.2.4 2008/03/19 02:41:00 tgl Exp $
|
||||
*
|
||||
* Alistair Crooks added the code for the regex caching
|
||||
* agc - cached the regular expressions used - there's a good chance
|
||||
@ -192,36 +192,31 @@ RE_compile_and_cache(text *text_re, int cflags)
|
||||
}
|
||||
|
||||
/*
|
||||
* RE_compile_and_execute - compile and execute a RE
|
||||
* RE_execute - execute a RE
|
||||
*
|
||||
* Returns TRUE on match, FALSE on no match
|
||||
*
|
||||
* text_re --- the pattern, expressed as an *untoasted* TEXT object
|
||||
* re --- the compiled pattern as returned by RE_compile_and_cache
|
||||
* dat --- the data to match against (need not be null-terminated)
|
||||
* dat_len --- the length of the data string
|
||||
* cflags --- compile options for the pattern
|
||||
* nmatch, pmatch --- optional return area for match details
|
||||
*
|
||||
* Both pattern and data are given in the database encoding. We internally
|
||||
* Data is given in the database encoding. We internally
|
||||
* convert to array of pg_wchar which is what Spencer's regex package wants.
|
||||
*/
|
||||
static bool
|
||||
RE_compile_and_execute(text *text_re, char *dat, int dat_len,
|
||||
int cflags, int nmatch, regmatch_t *pmatch)
|
||||
RE_execute(regex_t *re, char *dat, int dat_len,
|
||||
int nmatch, regmatch_t *pmatch)
|
||||
{
|
||||
pg_wchar *data;
|
||||
size_t data_len;
|
||||
int regexec_result;
|
||||
regex_t *re;
|
||||
char errMsg[100];
|
||||
|
||||
/* Convert data string to wide characters */
|
||||
data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar));
|
||||
data_len = pg_mb2wchar_with_len(dat, data, dat_len);
|
||||
|
||||
/* Compile RE */
|
||||
re = RE_compile_and_cache(text_re, cflags);
|
||||
|
||||
/* Perform RE match and return result */
|
||||
regexec_result = pg_regexec(re,
|
||||
data,
|
||||
@ -246,6 +241,33 @@ RE_compile_and_execute(text *text_re, char *dat, int dat_len,
|
||||
return (regexec_result == REG_OKAY);
|
||||
}
|
||||
|
||||
/*
|
||||
* RE_compile_and_execute - compile and execute a RE
|
||||
*
|
||||
* Returns TRUE on match, FALSE on no match
|
||||
*
|
||||
* text_re --- the pattern, expressed as an *untoasted* TEXT object
|
||||
* dat --- the data to match against (need not be null-terminated)
|
||||
* dat_len --- the length of the data string
|
||||
* cflags --- compile options for the pattern
|
||||
* nmatch, pmatch --- optional return area for match details
|
||||
*
|
||||
* Both pattern and data are given in the database encoding. We internally
|
||||
* convert to array of pg_wchar which is what Spencer's regex package wants.
|
||||
*/
|
||||
static bool
|
||||
RE_compile_and_execute(text *text_re, char *dat, int dat_len,
|
||||
int cflags, int nmatch, regmatch_t *pmatch)
|
||||
{
|
||||
regex_t *re;
|
||||
|
||||
/* Compile RE */
|
||||
re = RE_compile_and_cache(text_re, cflags);
|
||||
|
||||
/* Perform RE match and return result */
|
||||
return RE_execute(re, dat, dat_len, nmatch, pmatch);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* assign_regex_flavor - GUC hook to validate and set REGEX_FLAVOR
|
||||
@ -400,8 +422,13 @@ textregexsubstr(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *s = PG_GETARG_TEXT_P(0);
|
||||
text *p = PG_GETARG_TEXT_P(1);
|
||||
bool match;
|
||||
regex_t *re;
|
||||
regmatch_t pmatch[2];
|
||||
int so,
|
||||
eo;
|
||||
|
||||
/* Compile RE */
|
||||
re = RE_compile_and_cache(p, regex_flavor);
|
||||
|
||||
/*
|
||||
* We pass two regmatch_t structs to get info about the overall match and
|
||||
@ -409,34 +436,37 @@ textregexsubstr(PG_FUNCTION_ARGS)
|
||||
* is a parenthesized subexpression, we return what it matched; else
|
||||
* return what the whole regexp matched.
|
||||
*/
|
||||
match = RE_compile_and_execute(p,
|
||||
VARDATA(s),
|
||||
VARSIZE(s) - VARHDRSZ,
|
||||
regex_flavor,
|
||||
2, pmatch);
|
||||
if (!RE_execute(re,
|
||||
VARDATA(s), VARSIZE(s) - VARHDRSZ,
|
||||
2, pmatch))
|
||||
PG_RETURN_NULL(); /* definitely no match */
|
||||
|
||||
/* match? then return the substring matching the pattern */
|
||||
if (match)
|
||||
if (re->re_nsub > 0)
|
||||
{
|
||||
int so,
|
||||
eo;
|
||||
|
||||
/* has parenthesized subexpressions, use the first one */
|
||||
so = pmatch[1].rm_so;
|
||||
eo = pmatch[1].rm_eo;
|
||||
if (so < 0 || eo < 0)
|
||||
{
|
||||
/* no parenthesized subexpression */
|
||||
so = pmatch[0].rm_so;
|
||||
eo = pmatch[0].rm_eo;
|
||||
}
|
||||
|
||||
return DirectFunctionCall3(text_substr,
|
||||
PointerGetDatum(s),
|
||||
Int32GetDatum(so + 1),
|
||||
Int32GetDatum(eo - so));
|
||||
}
|
||||
else
|
||||
{
|
||||
/* no parenthesized subexpression, use whole match */
|
||||
so = pmatch[0].rm_so;
|
||||
eo = pmatch[0].rm_eo;
|
||||
}
|
||||
|
||||
PG_RETURN_NULL();
|
||||
/*
|
||||
* It is possible to have a match to the whole pattern but no match
|
||||
* for a subexpression; for example 'foo(bar)?' is considered to match
|
||||
* 'foo' but there is no subexpression match. So this extra test for
|
||||
* match failure is not redundant.
|
||||
*/
|
||||
if (so < 0 || eo < 0)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
return DirectFunctionCall3(text_substr,
|
||||
PointerGetDatum(s),
|
||||
Int32GetDatum(so + 1),
|
||||
Int32GetDatum(eo - so));
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
x
Reference in New Issue
Block a user