mirror of
https://github.com/postgres/postgres.git
synced 2025-05-18 17:41:14 +03:00
Teach regex_fixed_prefix() the correct handling of advanced regex
escapes --- they aren't simply quoted characters. Problem noted by Antti Salmela. Also fix problem with incorrect handling of multibyte characters when followed by a quantifier.
This commit is contained in:
parent
d237a12e49
commit
6ba32b24b2
@ -15,7 +15,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.147.2.3 2004/02/27 21:44:44 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/utils/adt/selfuncs.c,v 1.147.2.4 2004/12/02 02:45:24 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -3218,6 +3218,8 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
|
|||||||
char *match;
|
char *match;
|
||||||
int pos,
|
int pos,
|
||||||
match_pos,
|
match_pos,
|
||||||
|
prev_pos,
|
||||||
|
prev_match_pos,
|
||||||
paren_depth;
|
paren_depth;
|
||||||
char *patt;
|
char *patt;
|
||||||
char *rest;
|
char *rest;
|
||||||
@ -3278,11 +3280,13 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
|
|||||||
|
|
||||||
/* OK, allocate space for pattern */
|
/* OK, allocate space for pattern */
|
||||||
match = palloc(strlen(patt) + 1);
|
match = palloc(strlen(patt) + 1);
|
||||||
match_pos = 0;
|
prev_match_pos = match_pos = 0;
|
||||||
|
|
||||||
/* note start at pos 1 to skip leading ^ */
|
/* note start at pos 1 to skip leading ^ */
|
||||||
for (pos = 1; patt[pos]; pos++)
|
for (prev_pos = pos = 1; patt[pos]; )
|
||||||
{
|
{
|
||||||
|
int len;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check for characters that indicate multiple possible matches
|
* Check for characters that indicate multiple possible matches
|
||||||
* here. XXX I suspect isalpha() is not an adequately
|
* here. XXX I suspect isalpha() is not an adequately
|
||||||
@ -3296,6 +3300,14 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
|
|||||||
(case_insensitive && isalpha((unsigned char) patt[pos])))
|
(case_insensitive && isalpha((unsigned char) patt[pos])))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In AREs, backslash followed by alphanumeric is an escape, not
|
||||||
|
* a quoted character. Must treat it as having multiple possible
|
||||||
|
* matches.
|
||||||
|
*/
|
||||||
|
if (patt[pos] == '\\' && isalnum((unsigned char) patt[pos + 1]))
|
||||||
|
break;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check for quantifiers. Except for +, this means the preceding
|
* Check for quantifiers. Except for +, this means the preceding
|
||||||
* character is optional, so we must remove it from the prefix
|
* character is optional, so we must remove it from the prefix
|
||||||
@ -3305,14 +3317,13 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
|
|||||||
patt[pos] == '?' ||
|
patt[pos] == '?' ||
|
||||||
patt[pos] == '{')
|
patt[pos] == '{')
|
||||||
{
|
{
|
||||||
if (match_pos > 0)
|
match_pos = prev_match_pos;
|
||||||
match_pos--;
|
pos = prev_pos;
|
||||||
pos--;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (patt[pos] == '+')
|
if (patt[pos] == '+')
|
||||||
{
|
{
|
||||||
pos--;
|
pos = prev_pos;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (patt[pos] == '\\')
|
if (patt[pos] == '\\')
|
||||||
@ -3322,7 +3333,14 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
|
|||||||
if (patt[pos] == '\0')
|
if (patt[pos] == '\0')
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
match[match_pos++] = patt[pos];
|
/* save position in case we need to back up on next loop cycle */
|
||||||
|
prev_match_pos = match_pos;
|
||||||
|
prev_pos = pos;
|
||||||
|
/* must use encoding-aware processing here */
|
||||||
|
len = pg_mblen(&patt[pos]);
|
||||||
|
memcpy(&match[match_pos], &patt[pos], len);
|
||||||
|
match_pos += len;
|
||||||
|
pos += len;
|
||||||
}
|
}
|
||||||
|
|
||||||
match[match_pos] = '\0';
|
match[match_pos] = '\0';
|
||||||
|
Loading…
x
Reference in New Issue
Block a user