1
0
mirror of https://github.com/postgres/postgres.git synced 2025-05-03 22:24:49 +03:00

Fix LIKE's special-case code for % followed by _. I'm not entirely sure that

this case is worth a special code path, but a special code path that gets
the boundary condition wrong is definitely no good.  Per bug  from
Andrew Gierth.

In passing, clean up some minor code formatting issues (excess parentheses
and blank lines in odd places).

Back-patch to 8.3, where the bug was introduced.
This commit is contained in:
Tom Lane 2009-05-24 18:10:38 +00:00
parent c6c458e24d
commit fc2660fc25
3 changed files with 95 additions and 61 deletions
src
backend/utils/adt
test/regress

@ -9,7 +9,7 @@
* (UTF8 is a special case because we can use a much more efficient version
* of NextChar than can be used for general multi-byte encodings.)
*
* Before the inclusion, we need to define following macros:
* Before the inclusion, we need to define the following macros:
*
* NextChar
* MatchText - to name of function wanted
@ -19,47 +19,46 @@
* Copyright (c) 1996-2009, PostgreSQL Global Development Group
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.24 2009/01/01 17:23:49 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.25 2009/05/24 18:10:37 tgl Exp $
*
*-------------------------------------------------------------------------
*/
/*
** Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
** Rich $alz is now <rsalz@bbn.com>.
** Special thanks to Lars Mathiesen <thorinn@diku.dk> for the LABORT code.
**
** This code was shamelessly stolen from the "pql" code by myself and
** slightly modified :)
**
** All references to the word "star" were replaced by "percent"
** All references to the word "wild" were replaced by "like"
**
** All the nice shell RE matching stuff was replaced by just "_" and "%"
**
** As I don't have a copy of the SQL standard handy I wasn't sure whether
** to leave in the '\' escape character handling.
**
** Keith Parks. <keith@mtcc.demon.co.uk>
**
** SQL92 lets you specify the escape character by saying
** LIKE <pattern> ESCAPE <escape character>. We are a small operation
** so we force you to use '\'. - ay 7/95
**
** Now we have the like_escape() function that converts patterns with
** any specified escape character (or none at all) to the internal
** default escape character, which is still '\'. - tgl 9/2000
**
** The code is rewritten to avoid requiring null-terminated strings,
** which in turn allows us to leave out some memcpy() operations.
** This code should be faster and take less memory, but no promises...
** - thomas 2000-08-06
**
*/
* Originally written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
* Rich $alz is now <rsalz@bbn.com>.
* Special thanks to Lars Mathiesen <thorinn@diku.dk> for the LABORT code.
*
* This code was shamelessly stolen from the "pql" code by myself and
* slightly modified :)
*
* All references to the word "star" were replaced by "percent"
* All references to the word "wild" were replaced by "like"
*
* All the nice shell RE matching stuff was replaced by just "_" and "%"
*
* As I don't have a copy of the SQL standard handy I wasn't sure whether
* to leave in the '\' escape character handling.
*
* Keith Parks. <keith@mtcc.demon.co.uk>
*
* SQL92 lets you specify the escape character by saying
* LIKE <pattern> ESCAPE <escape character>. We are a small operation
* so we force you to use '\'. - ay 7/95
*
* Now we have the like_escape() function that converts patterns with
* any specified escape character (or none at all) to the internal
* default escape character, which is still '\'. - tgl 9/2000
*
* The code is rewritten to avoid requiring null-terminated strings,
* which in turn allows us to leave out some memcpy() operations.
* This code should be faster and take less memory, but no promises...
* - thomas 2000-08-06
*/
/*--------------------
* Match text and p, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT.
* Match text and pattern, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT.
*
* LIKE_TRUE: they match
* LIKE_FALSE: they don't match
@ -80,7 +79,7 @@ static int
MatchText(char *t, int tlen, char *p, int plen)
{
/* Fast path for match-everything pattern */
if ((plen == 1) && (*p == '%'))
if (plen == 1 && *p == '%')
return LIKE_TRUE;
/*
@ -91,8 +90,7 @@ MatchText(char *t, int tlen, char *p, int plen)
* text and pattern on a byte by byte basis, even for multi-byte
* encodings.
*/
while ((tlen > 0) && (plen > 0))
while (tlen > 0 && plen > 0)
{
if (*p == '\\')
{
@ -116,7 +114,7 @@ MatchText(char *t, int tlen, char *p, int plen)
/* %% is the same as % according to the SQL standard */
/* Advance past all %'s */
while ((plen > 0) && (*p == '%'))
while (plen > 0 && *p == '%')
NextByte(p, plen);
/* Trailing percent matches everything. */
if (plen <= 0)
@ -127,22 +125,24 @@ MatchText(char *t, int tlen, char *p, int plen)
* rest of the pattern.
*/
if (*p == '_')
{
/* %_ is the same as _% - avoid matching _ repeatedly */
do
{
NextChar(t, tlen);
NextByte(p, plen);
} while (tlen > 0 && plen > 0 && *p == '_');
if (tlen <= 0)
{
return (plen <= 0) ? LIKE_TRUE : LIKE_ABORT;
}
else if (plen <= 0)
{
return LIKE_FALSE;
}
/*
* If we are at the end of the pattern, succeed: % followed
* by n _'s matches any string of at least n characters, and
* we have now found there are at least n characters.
*/
if (plen <= 0)
return LIKE_TRUE;
/* Look for a place that matches the rest of the pattern */
while (tlen > 0)
{
int matched = MatchText(t, tlen, p, plen);
@ -155,7 +155,6 @@ MatchText(char *t, int tlen, char *p, int plen)
}
else
{
char firstpat = TCHAR(*p);
if (*p == '\\')
@ -180,7 +179,6 @@ MatchText(char *t, int tlen, char *p, int plen)
}
NextChar(t, tlen);
}
}
@ -192,20 +190,20 @@ MatchText(char *t, int tlen, char *p, int plen)
}
else if (*p == '_')
{
/* _ matches any single character, and we know there is one */
NextChar(t, tlen);
NextByte(p, plen);
continue;
}
else if (TCHAR(*t) != TCHAR(*p))
else if (TCHAR(*p) != TCHAR(*t))
{
/*
* Not the single-character wildcard and no explicit match? Then
* time to quit...
*/
/* non-wildcard pattern char fails to match text char */
return LIKE_FALSE;
}
/*
* Pattern and text match, so advance.
*
* It is safe to use NextByte instead of NextChar here, even for
* multi-byte character sets, because we are not following immediately
* after a wildcard character. If we are in the middle of a multibyte
@ -222,9 +220,8 @@ MatchText(char *t, int tlen, char *p, int plen)
if (tlen > 0)
return LIKE_FALSE; /* end of pattern, but not of text */
/* End of input string. Do we have matching pattern remaining? */
while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of
* pattern */
/* End of text string. Do we have matching pattern remaining? */
while (plen > 0 && *p == '%') /* allow multiple %'s at end of pattern */
NextByte(p, plen);
if (plen <= 0)
@ -354,5 +351,4 @@ do_like_escape(text *pat, text *esc)
#ifdef MATCH_LOWER
#undef MATCH_LOWER
#endif

@ -902,6 +902,33 @@ SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
f
(1 row)
--
-- test %/_ combination cases, cf bug #4821
--
SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
t | t | f
---+---+---
t | t | f
(1 row)
SELECT 'foo' LIKE '%_' as t, 'f' LIKE '%_' as t, '' LIKE '%_' as f;
t | t | f
---+---+---
t | t | f
(1 row)
SELECT 'foo' LIKE '__%' as t, 'foo' LIKE '___%' as t, 'foo' LIKE '____%' as f;
t | t | f
---+---+---
t | t | f
(1 row)
SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
t | t | f
---+---+---
t | t | f
(1 row)
--
-- test implicit type conversion
--

@ -279,6 +279,17 @@ SELECT 'hawkeye' NOT ILIKE 'H%Eye' AS "false";
SELECT 'Hawkeye' ILIKE 'h%' AS "true";
SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
--
-- test %/_ combination cases, cf bug #4821
--
SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
SELECT 'foo' LIKE '%_' as t, 'f' LIKE '%_' as t, '' LIKE '%_' as f;
SELECT 'foo' LIKE '__%' as t, 'foo' LIKE '___%' as t, 'foo' LIKE '____%' as f;
SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
--
-- test implicit type conversion
--