1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-13 07:41:39 +03:00

Implement LIKE/ESCAPE. Change parser to use like()/notlike()

rather than the "~~" operator; this made it easy to add ESCAPE features.
Implement ILIKE, NOT ILIKE, and the ESCAPE clause for them.
 afaict this is not MultiByte clean, but lots of other stuff isn't either.
Fix up underlying support code for LIKE/NOT LIKE.
 Things should be faster and does not require internal string copying.
Update regression test to add explicit checks for
 LIKE/NOT LIKE/ILIKE/NOT ILIKE.
Remove colon and semi-colon operators as threatened in 7.0.
Implement SQL99 COMMIT/AND NO CHAIN.
 Throw elog(ERROR) on COMMIT/AND CHAIN per spec
 since we don't yet support it.
Implement SQL99 CREATE/DROP SCHEMA as equivalent to CREATE DATABASE.
 This is only a stopgap or demo since schemas will have another
 implementation soon.
Remove a few unused production rules to get rid of warnings
 which crept in on the last commit.
Fix up tabbing in some places by removing embedded spaces.
This commit is contained in:
Thomas G. Lockhart
2000-08-06 18:06:44 +00:00
parent df40234639
commit 30ab107dbf
6 changed files with 579 additions and 230 deletions

View File

@ -11,7 +11,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/adt/like.c,v 1.37 2000/07/07 21:12:50 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/adt/like.c,v 1.38 2000/08/06 18:05:41 thomas Exp $
*
*-------------------------------------------------------------------------
*/
@ -20,70 +20,30 @@
#include "mb/pg_wchar.h"
#include "utils/builtins.h"
static bool like(pg_wchar * text, pg_wchar * p);
#define LIKE_TRUE 1
#define LIKE_FALSE 0
#define LIKE_ABORT (-1)
static int MatchText(pg_wchar * t, int tlen, pg_wchar * p, int plen, char *e);
static int MatchTextLower(pg_wchar * t, int tlen, pg_wchar * p, int plen, char *e);
/*
* interface routines called by the function manager
*/
/*
fixedlen_like:
a generic fixed length like routine
s - the string to match against (not necessarily null-terminated)
p - the pattern (as text*)
charlen - the length of the string
*/
static bool
fixedlen_like(char *s, text *p, int charlen)
{
pg_wchar *sterm,
*pterm;
bool result;
int len;
/* be sure sterm is null-terminated */
#ifdef MULTIBYTE
sterm = (pg_wchar *) palloc((charlen + 1) * sizeof(pg_wchar));
(void) pg_mb2wchar_with_len((unsigned char *) s, sterm, charlen);
#else
sterm = (char *) palloc(charlen + 1);
memcpy(sterm, s, charlen);
sterm[charlen] = '\0';
#endif
/*
* p is a text, not a string so we have to make a string
* from the vl_data field of the struct.
*/
/* palloc the length of the text + the null character */
len = VARSIZE(p) - VARHDRSZ;
#ifdef MULTIBYTE
pterm = (pg_wchar *) palloc((len + 1) * sizeof(pg_wchar));
(void) pg_mb2wchar_with_len((unsigned char *) VARDATA(p), pterm, len);
#else
pterm = (char *) palloc(len + 1);
memcpy(pterm, VARDATA(p), len);
*(pterm + len) = '\0';
#endif
/* do the regexp matching */
result = like(sterm, pterm);
pfree(sterm);
pfree(pterm);
return result;
}
Datum
namelike(PG_FUNCTION_ARGS)
{
Name n = PG_GETARG_NAME(0);
text *p = PG_GETARG_TEXT_P(1);
PG_RETURN_BOOL(fixedlen_like(NameStr(*n), p, strlen(NameStr(*n))));
PG_RETURN_BOOL(MatchText(NameStr(*n), strlen(NameStr(*n)),
VARDATA(p), (VARSIZE(p)-VARHDRSZ),
NULL)
== LIKE_TRUE);
}
Datum
@ -92,7 +52,36 @@ namenlike(PG_FUNCTION_ARGS)
Name n = PG_GETARG_NAME(0);
text *p = PG_GETARG_TEXT_P(1);
PG_RETURN_BOOL(! fixedlen_like(NameStr(*n), p, strlen(NameStr(*n))));
PG_RETURN_BOOL(MatchText(NameStr(*n), strlen(NameStr(*n)),
VARDATA(p), (VARSIZE(p)-VARHDRSZ),
NULL)
!= LIKE_TRUE);
}
Datum
namelike_escape(PG_FUNCTION_ARGS)
{
Name n = PG_GETARG_NAME(0);
text *p = PG_GETARG_TEXT_P(1);
text *e = PG_GETARG_TEXT_P(2);
PG_RETURN_BOOL(MatchText(NameStr(*n), strlen(NameStr(*n)),
VARDATA(p), (VARSIZE(p)-VARHDRSZ),
((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL))
== LIKE_TRUE);
}
Datum
namenlike_escape(PG_FUNCTION_ARGS)
{
Name n = PG_GETARG_NAME(0);
text *p = PG_GETARG_TEXT_P(1);
text *e = PG_GETARG_TEXT_P(2);
PG_RETURN_BOOL(MatchText(NameStr(*n), strlen(NameStr(*n)),
VARDATA(p), (VARSIZE(p)-VARHDRSZ),
((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL))
!= LIKE_TRUE);
}
Datum
@ -101,7 +90,10 @@ textlike(PG_FUNCTION_ARGS)
text *s = PG_GETARG_TEXT_P(0);
text *p = PG_GETARG_TEXT_P(1);
PG_RETURN_BOOL(fixedlen_like(VARDATA(s), p, VARSIZE(s) - VARHDRSZ));
PG_RETURN_BOOL(MatchText(VARDATA(s), (VARSIZE(s)-VARHDRSZ),
VARDATA(p), (VARSIZE(p)-VARHDRSZ),
NULL)
== LIKE_TRUE);
}
Datum
@ -110,7 +102,140 @@ textnlike(PG_FUNCTION_ARGS)
text *s = PG_GETARG_TEXT_P(0);
text *p = PG_GETARG_TEXT_P(1);
PG_RETURN_BOOL(! fixedlen_like(VARDATA(s), p, VARSIZE(s) - VARHDRSZ));
PG_RETURN_BOOL(MatchText(VARDATA(s), (VARSIZE(s)-VARHDRSZ),
VARDATA(p), (VARSIZE(p)-VARHDRSZ),
NULL)
!= LIKE_TRUE);
}
Datum
textlike_escape(PG_FUNCTION_ARGS)
{
text *s = PG_GETARG_TEXT_P(0);
text *p = PG_GETARG_TEXT_P(1);
text *e = PG_GETARG_TEXT_P(2);
PG_RETURN_BOOL(MatchText(VARDATA(s), (VARSIZE(s)-VARHDRSZ),
VARDATA(p), (VARSIZE(p)-VARHDRSZ),
((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL))
== LIKE_TRUE);
}
Datum
textnlike_escape(PG_FUNCTION_ARGS)
{
text *s = PG_GETARG_TEXT_P(0);
text *p = PG_GETARG_TEXT_P(1);
text *e = PG_GETARG_TEXT_P(2);
PG_RETURN_BOOL(MatchText(VARDATA(s), (VARSIZE(s)-VARHDRSZ),
VARDATA(p), (VARSIZE(p)-VARHDRSZ),
((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL))
!= LIKE_TRUE);
}
/*
* Case-insensitive versions
*/
Datum
inamelike(PG_FUNCTION_ARGS)
{
Name n = PG_GETARG_NAME(0);
text *p = PG_GETARG_TEXT_P(1);
PG_RETURN_BOOL(MatchTextLower(NameStr(*n), strlen(NameStr(*n)),
VARDATA(p), (VARSIZE(p)-VARHDRSZ),
NULL)
== LIKE_TRUE);
}
Datum
inamenlike(PG_FUNCTION_ARGS)
{
Name n = PG_GETARG_NAME(0);
text *p = PG_GETARG_TEXT_P(1);
PG_RETURN_BOOL(MatchTextLower(NameStr(*n), strlen(NameStr(*n)),
VARDATA(p), (VARSIZE(p)-VARHDRSZ),
NULL)
!= LIKE_TRUE);
}
Datum
inamelike_escape(PG_FUNCTION_ARGS)
{
Name n = PG_GETARG_NAME(0);
text *p = PG_GETARG_TEXT_P(1);
text *e = PG_GETARG_TEXT_P(2);
PG_RETURN_BOOL(MatchTextLower(NameStr(*n), strlen(NameStr(*n)),
VARDATA(p), (VARSIZE(p)-VARHDRSZ),
((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL))
== LIKE_TRUE);
}
Datum
inamenlike_escape(PG_FUNCTION_ARGS)
{
Name n = PG_GETARG_NAME(0);
text *p = PG_GETARG_TEXT_P(1);
text *e = PG_GETARG_TEXT_P(2);
PG_RETURN_BOOL(MatchTextLower(NameStr(*n), strlen(NameStr(*n)),
VARDATA(p), (VARSIZE(p)-VARHDRSZ),
((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL))
!= LIKE_TRUE);
}
Datum
itextlike(PG_FUNCTION_ARGS)
{
text *s = PG_GETARG_TEXT_P(0);
text *p = PG_GETARG_TEXT_P(1);
PG_RETURN_BOOL(MatchTextLower(VARDATA(s), (VARSIZE(s)-VARHDRSZ),
VARDATA(p), (VARSIZE(p)-VARHDRSZ),
NULL)
== LIKE_TRUE);
}
Datum
itextnlike(PG_FUNCTION_ARGS)
{
text *s = PG_GETARG_TEXT_P(0);
text *p = PG_GETARG_TEXT_P(1);
PG_RETURN_BOOL(MatchTextLower(VARDATA(s), (VARSIZE(s)-VARHDRSZ),
VARDATA(p), (VARSIZE(p)-VARHDRSZ),
NULL)
!= LIKE_TRUE);
}
Datum
itextlike_escape(PG_FUNCTION_ARGS)
{
text *s = PG_GETARG_TEXT_P(0);
text *p = PG_GETARG_TEXT_P(1);
text *e = PG_GETARG_TEXT_P(2);
PG_RETURN_BOOL(MatchTextLower(VARDATA(s), (VARSIZE(s)-VARHDRSZ),
VARDATA(p), (VARSIZE(p)-VARHDRSZ),
((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL))
== LIKE_TRUE);
}
Datum
itextnlike_escape(PG_FUNCTION_ARGS)
{
text *s = PG_GETARG_TEXT_P(0);
text *p = PG_GETARG_TEXT_P(1);
text *e = PG_GETARG_TEXT_P(2);
PG_RETURN_BOOL(MatchTextLower(VARDATA(s), (VARSIZE(s)-VARHDRSZ),
VARDATA(p), (VARSIZE(p)-VARHDRSZ),
((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL))
!= LIKE_TRUE);
}
@ -136,12 +261,16 @@ textnlike(PG_FUNCTION_ARGS)
** LIKE <pattern> ESCAPE <escape character>. We are a small operation
** so we force you to use '\'. - ay 7/95]
**
** OK, we now support the SQL9x LIKE <pattern> ESCAPE <char> syntax.
** We should kill the backslash escaping mechanism since it is non-standard
** and undocumented afaik.
** The code is rewritten to avoid requiring null-terminated strings,
** which in turn allows us to leave out some memcpy() operations.
** This code should be faster and take less memory, but no promises...
** - thomas 2000-08-06
**
*/
#define LIKE_TRUE 1
#define LIKE_FALSE 0
#define LIKE_ABORT (-1)
/*--------------------
* Match text and p, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT.
*
@ -153,69 +282,97 @@ textnlike(PG_FUNCTION_ARGS)
* pattern either, so an upper-level % scan can stop scanning now.
*--------------------
*/
static int
DoMatch(pg_wchar * text, pg_wchar * p)
{
for (; *p && *text; text ++, p++)
{
switch (*p)
{
case '\\':
/* Literal match with following character. */
p++;
/* FALLTHROUGH */
default:
if (*text !=*p)
return LIKE_FALSE;
break;
case '_':
/* Match any single character. */
break;
case '%':
/* %% is the same as % according to the SQL standard */
/* Advance past all %'s */
while (*p == '%')
p++;
/* Trailing percent matches everything. */
if (*p == '\0')
return LIKE_TRUE;
/*
* Otherwise, scan for a text position at which we can
* match the rest of the pattern.
*/
for (; *text; text ++)
{
#define NextChar(p, plen) (p)++, (plen)--
static int
MatchText(pg_wchar * t, int tlen, pg_wchar * p, int plen, char *e)
{
/* Fast path for match-everything pattern
* Include weird case of escape character as a percent sign or underscore,
* when presumably that wildcard character becomes a literal.
*/
if ((plen == 1) && (*p == '%')
&& ! ((e != NULL) && (*e == '%')))
return LIKE_TRUE;
while ((tlen > 0) && (plen > 0))
{
/* If an escape character was specified and we find it here in the pattern,
* then we'd better have an exact match for the next character.
*/
if ((e != NULL) && (*p == *e))
{
NextChar(p, plen);
if ((plen <= 0) || (*t != *p))
return LIKE_FALSE;
}
else
{
switch (*p)
{
case '\\':
/* Literal match with following character. */
NextChar(p, plen);
/* FALLTHROUGH */
default:
if (*t != *p)
return LIKE_FALSE;
break;
case '_':
/* Match any single character. */
break;
case '%':
/* %% is the same as % according to the SQL standard */
/* Advance past all %'s */
while ((plen > 0) && (*p == '%'))
NextChar(p, plen);
/* Trailing percent matches everything. */
if (plen <= 0)
return LIKE_TRUE;
/*
* Optimization to prevent most recursion: don't
* recurse unless first pattern char might match this
* text char.
* Otherwise, scan for a text position at which we can
* match the rest of the pattern.
*/
if (*text == *p || *p == '\\' || *p == '_')
while (tlen > 0)
{
int matched = DoMatch(text, p);
/*
* Optimization to prevent most recursion: don't
* recurse unless first pattern char might match this
* text char.
*/
if ((*t == *p) || (*p == '\\') || (*p == '_')
|| ((e != NULL) && (*p == *e)))
{
int matched = MatchText(t, tlen, p, plen, e);
if (matched != LIKE_FALSE)
return matched; /* TRUE or ABORT */
if (matched != LIKE_FALSE)
return matched; /* TRUE or ABORT */
}
NextChar(t, tlen);
}
}
/*
* End of text with no match, so no point in trying later
* places to start matching this pattern.
*/
return LIKE_ABORT;
/*
* End of text with no match, so no point in trying later
* places to start matching this pattern.
*/
return LIKE_ABORT;
}
}
NextChar(t, tlen);
NextChar(p, plen);
}
if (*text !='\0')
if (tlen > 0)
return LIKE_FALSE; /* end of pattern, but not of text */
/* End of input string. Do we have matching pattern remaining? */
while (*p == '%') /* allow multiple %'s at end of pattern */
p++;
if (*p == '\0')
while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of pattern */
NextChar(p, plen);
if (plen <= 0)
return LIKE_TRUE;
/*
@ -223,16 +380,101 @@ DoMatch(pg_wchar * text, pg_wchar * p)
* start matching this pattern.
*/
return LIKE_ABORT;
}
} /* MatchText() */
/*
** User-level routine. Returns TRUE or FALSE.
*/
static bool
like(pg_wchar * text, pg_wchar * p)
static int
MatchTextLower(pg_wchar * t, int tlen, pg_wchar * p, int plen, char *e)
{
/* Fast path for match-everything pattern */
if (p[0] == '%' && p[1] == '\0')
return true;
return DoMatch(text, p) == LIKE_TRUE;
}
/* Fast path for match-everything pattern
* Include weird case of escape character as a percent sign or underscore,
* when presumably that wildcard character becomes a literal.
*/
if ((plen == 1) && (*p == '%')
&& ! ((e != NULL) && (*e == '%')))
return LIKE_TRUE;
while ((tlen > 0) && (plen > 0))
{
/* If an escape character was specified and we find it here in the pattern,
* then we'd better have an exact match for the next character.
*/
if ((e != NULL) && (tolower(*p) == tolower(*e)))
{
NextChar(p, plen);
if ((plen <= 0) || (tolower(*t) != tolower(*p)))
return LIKE_FALSE;
}
else
{
switch (*p)
{
case '\\':
/* Literal match with following character. */
NextChar(p, plen);
/* FALLTHROUGH */
default:
if (tolower(*t) != tolower(*p))
return LIKE_FALSE;
break;
case '_':
/* Match any single character. */
break;
case '%':
/* %% is the same as % according to the SQL standard */
/* Advance past all %'s */
while ((plen > 0) && (*p == '%'))
NextChar(p, plen);
/* Trailing percent matches everything. */
if (plen <= 0)
return LIKE_TRUE;
/*
* Otherwise, scan for a text position at which we can
* match the rest of the pattern.
*/
while (tlen > 0)
{
/*
* Optimization to prevent most recursion: don't
* recurse unless first pattern char might match this
* text char.
*/
if ((tolower(*t) == tolower(*p)) || (*p == '\\') || (*p == '_')
|| ((e != NULL) && (tolower(*p) == tolower(*e))))
{
int matched = MatchText(t, tlen, p, plen, e);
if (matched != LIKE_FALSE)
return matched; /* TRUE or ABORT */
}
NextChar(t, tlen);
}
/*
* End of text with no match, so no point in trying later
* places to start matching this pattern.
*/
return LIKE_ABORT;
}
}
NextChar(t, tlen);
NextChar(p, plen);
}
if (tlen > 0)
return LIKE_FALSE; /* end of pattern, but not of text */
/* End of input string. Do we have matching pattern remaining? */
while ((plen > 0) && (*p == '%')) /* allow multiple %'s at end of pattern */
NextChar(p, plen);
if (plen <= 0)
return LIKE_TRUE;
/*
* End of text with no match, so no point in trying later places to
* start matching this pattern.
*/
return LIKE_ABORT;
} /* MatchTextLower() */