1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-22 14:32:25 +03:00

Fix bugs in contrib/pg_trgm's LIKE pattern analysis code.

Extraction of trigrams did not process LIKE escape sequences properly,
leading to possible misidentification of trigrams near escapes, resulting
in incorrect index search results.

Fujii Masao
This commit is contained in:
Tom Lane
2012-08-20 13:24:52 -04:00
parent 51fed14d73
commit b2a01b9ad1
3 changed files with 39 additions and 19 deletions

View File

@@ -3497,6 +3497,12 @@ select * from test2 where t like '%bcd%';
abcdef abcdef
(1 row) (1 row)
select * from test2 where t like E'%\\bcd%';
t
--------
abcdef
(1 row)
select * from test2 where t ilike '%BCD%'; select * from test2 where t ilike '%BCD%';
t t
-------- --------
@@ -3539,6 +3545,12 @@ select * from test2 where t like '%bcd%';
abcdef abcdef
(1 row) (1 row)
select * from test2 where t like E'%\\bcd%';
t
--------
abcdef
(1 row)
select * from test2 where t ilike '%BCD%'; select * from test2 where t ilike '%BCD%';
t t
-------- --------

View File

@@ -49,6 +49,7 @@ explain (costs off)
select * from test2 where t ilike '%BCD%'; select * from test2 where t ilike '%BCD%';
select * from test2 where t like '%BCD%'; select * from test2 where t like '%BCD%';
select * from test2 where t like '%bcd%'; select * from test2 where t like '%bcd%';
select * from test2 where t like E'%\\bcd%';
select * from test2 where t ilike '%BCD%'; select * from test2 where t ilike '%BCD%';
select * from test2 where t ilike 'qua%'; select * from test2 where t ilike 'qua%';
drop index test2_idx_gin; drop index test2_idx_gin;
@@ -60,5 +61,6 @@ explain (costs off)
select * from test2 where t ilike '%BCD%'; select * from test2 where t ilike '%BCD%';
select * from test2 where t like '%BCD%'; select * from test2 where t like '%BCD%';
select * from test2 where t like '%bcd%'; select * from test2 where t like '%bcd%';
select * from test2 where t like E'%\\bcd%';
select * from test2 where t ilike '%BCD%'; select * from test2 where t ilike '%BCD%';
select * from test2 where t ilike 'qua%'; select * from test2 where t ilike 'qua%';

View File

@@ -272,33 +272,36 @@ get_wildcard_part(const char *str, int lenstr,
const char *beginword = str; const char *beginword = str;
const char *endword; const char *endword;
char *s = buf; char *s = buf;
bool in_wildcard_meta = false; bool in_leading_wildcard_meta = false;
bool in_trailing_wildcard_meta = false;
bool in_escape = false; bool in_escape = false;
int clen; int clen;
/* /*
* Find the first word character remembering whether last character was * Find the first word character, remembering whether preceding character
* wildcard meta-character. * was wildcard meta-character. Note that the in_escape state persists
* from this loop to the next one, since we may exit at a word character
* that is in_escape.
*/ */
while (beginword - str < lenstr) while (beginword - str < lenstr)
{ {
if (in_escape) if (in_escape)
{ {
in_escape = false;
in_wildcard_meta = false;
if (iswordchr(beginword)) if (iswordchr(beginword))
break; break;
in_escape = false;
in_leading_wildcard_meta = false;
} }
else else
{ {
if (ISESCAPECHAR(beginword)) if (ISESCAPECHAR(beginword))
in_escape = true; in_escape = true;
else if (ISWILDCARDCHAR(beginword)) else if (ISWILDCARDCHAR(beginword))
in_wildcard_meta = true; in_leading_wildcard_meta = true;
else if (iswordchr(beginword)) else if (iswordchr(beginword))
break; break;
else else
in_wildcard_meta = false; in_leading_wildcard_meta = false;
} }
beginword += pg_mblen(beginword); beginword += pg_mblen(beginword);
} }
@@ -310,11 +313,11 @@ get_wildcard_part(const char *str, int lenstr,
return NULL; return NULL;
/* /*
* Add left padding spaces if last character wasn't wildcard * Add left padding spaces if preceding character wasn't wildcard
* meta-character. * meta-character.
*/ */
*charlen = 0; *charlen = 0;
if (!in_wildcard_meta) if (!in_leading_wildcard_meta)
{ {
if (LPADDING > 0) if (LPADDING > 0)
{ {
@@ -333,15 +336,11 @@ get_wildcard_part(const char *str, int lenstr,
* string boundary. Strip escapes during copy. * string boundary. Strip escapes during copy.
*/ */
endword = beginword; endword = beginword;
in_wildcard_meta = false;
in_escape = false;
while (endword - str < lenstr) while (endword - str < lenstr)
{ {
clen = pg_mblen(endword); clen = pg_mblen(endword);
if (in_escape) if (in_escape)
{ {
in_escape = false;
in_wildcard_meta = false;
if (iswordchr(endword)) if (iswordchr(endword))
{ {
memcpy(s, endword, clen); memcpy(s, endword, clen);
@@ -349,7 +348,17 @@ get_wildcard_part(const char *str, int lenstr,
s += clen; s += clen;
} }
else else
{
/*
* Back up endword to the escape character when stopping at
* an escaped char, so that subsequent get_wildcard_part will
* restart from the escape character. We assume here that
* escape chars are single-byte.
*/
endword--;
break; break;
}
in_escape = false;
} }
else else
{ {
@@ -357,7 +366,7 @@ get_wildcard_part(const char *str, int lenstr,
in_escape = true; in_escape = true;
else if (ISWILDCARDCHAR(endword)) else if (ISWILDCARDCHAR(endword))
{ {
in_wildcard_meta = true; in_trailing_wildcard_meta = true;
break; break;
} }
else if (iswordchr(endword)) else if (iswordchr(endword))
@@ -367,19 +376,16 @@ get_wildcard_part(const char *str, int lenstr,
s += clen; s += clen;
} }
else else
{
in_wildcard_meta = false;
break; break;
}
} }
endword += clen; endword += clen;
} }
/* /*
* Add right padding spaces if last character wasn't wildcard * Add right padding spaces if next character isn't wildcard
* meta-character. * meta-character.
*/ */
if (!in_wildcard_meta) if (!in_trailing_wildcard_meta)
{ {
if (RPADDING > 0) if (RPADDING > 0)
{ {