Implement LIKE/ESCAPE. Change parser to use like()/notlike()

rather than the "~~" operator; this made it easy to add ESCAPE features. Implement ILIKE, NOT ILIKE, and the ESCAPE clause for them. afaict this is not MultiByte clean, but lots of other stuff isn't either. Fix up underlying support code for LIKE/NOT LIKE. Things should be faster and does not require internal string copying. Update regression test to add explicit checks for LIKE/NOT LIKE/ILIKE/NOT ILIKE. Remove colon and semi-colon operators as threatened in 7.0. Implement SQL99 COMMIT/AND NO CHAIN. Throw elog(ERROR) on COMMIT/AND CHAIN per spec since we don't yet support it. Implement SQL99 CREATE/DROP SCHEMA as equivalent to CREATE DATABASE. This is only a stopgap or demo since schemas will have another implementation soon. Remove a few unused production rules to get rid of warnings which crept in on the last commit. Fix up tabbing in some places by removing embedded spaces.
2025-11-10 17:42:29 +03:00 · 2000-08-06 18:06:44 +00:00
parent df40234639
commit 30ab107dbf
6 changed files with 579 additions and 230 deletions
--- a/src/backend/utils/adt/like.c
+++ b/src/backend/utils/adt/like.c
@@ -11,7 +11,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	$Header: /cvsroot/pgsql/src/backend/utils/adt/like.c,v 1.37 2000/07/07 21:12:50 tgl Exp $
+ *	$Header: /cvsroot/pgsql/src/backend/utils/adt/like.c,v 1.38 2000/08/06 18:05:41 thomas Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -20,70 +20,30 @@
 #include "mb/pg_wchar.h"
 #include "utils/builtins.h"

-static bool like(pg_wchar * text, pg_wchar * p);
+
+#define LIKE_TRUE						1
+#define LIKE_FALSE						0
+#define LIKE_ABORT						(-1)
+
+
+static int MatchText(pg_wchar * t, int tlen, pg_wchar * p, int plen, char *e);
+static int MatchTextLower(pg_wchar * t, int tlen, pg_wchar * p, int plen, char *e);
+

 /*
 *	interface routines called by the function manager
 */

-/*
-   fixedlen_like:
-
-   a generic fixed length like routine
-		 s		- the string to match against  (not necessarily null-terminated)
-		 p		   - the pattern (as text*)
-		 charlen   - the length of the string
-*/
-static bool
-fixedlen_like(char *s, text *p, int charlen)
-{
-	pg_wchar   *sterm,
-			   *pterm;
-	bool		result;
-	int			len;
-
-	/* be sure sterm is null-terminated */
-#ifdef MULTIBYTE
-	sterm = (pg_wchar *) palloc((charlen + 1) * sizeof(pg_wchar));
-	(void) pg_mb2wchar_with_len((unsigned char *) s, sterm, charlen);
-#else
-	sterm = (char *) palloc(charlen + 1);
-	memcpy(sterm, s, charlen);
-	sterm[charlen] = '\0';
-#endif
-
-	/*
-	 * p is a text, not a string so we have to make a string
-	 * from the vl_data field of the struct.
-	 */
-
-	/* palloc the length of the text + the null character */
-	len = VARSIZE(p) - VARHDRSZ;
-#ifdef MULTIBYTE
-	pterm = (pg_wchar *) palloc((len + 1) * sizeof(pg_wchar));
-	(void) pg_mb2wchar_with_len((unsigned char *) VARDATA(p), pterm, len);
-#else
-	pterm = (char *) palloc(len + 1);
-	memcpy(pterm, VARDATA(p), len);
-	*(pterm + len) = '\0';
-#endif
-
-	/* do the regexp matching */
-	result = like(sterm, pterm);
-
-	pfree(sterm);
-	pfree(pterm);
-
-	return result;
-}
-
 Datum
 namelike(PG_FUNCTION_ARGS)
 {
 	Name		n = PG_GETARG_NAME(0);
 	text	   *p = PG_GETARG_TEXT_P(1);

-	PG_RETURN_BOOL(fixedlen_like(NameStr(*n), p, strlen(NameStr(*n))));
+	PG_RETURN_BOOL(MatchText(NameStr(*n), strlen(NameStr(*n)),
+							 VARDATA(p), (VARSIZE(p)-VARHDRSZ),
+							 NULL)
+				   == LIKE_TRUE);
 }

 Datum
@@ -92,7 +52,36 @@ namenlike(PG_FUNCTION_ARGS)
 	Name		n = PG_GETARG_NAME(0);
 	text	   *p = PG_GETARG_TEXT_P(1);

-	PG_RETURN_BOOL(! fixedlen_like(NameStr(*n), p, strlen(NameStr(*n))));
+	PG_RETURN_BOOL(MatchText(NameStr(*n), strlen(NameStr(*n)),
+							 VARDATA(p), (VARSIZE(p)-VARHDRSZ),
+							 NULL)
+				   != LIKE_TRUE);
+}
+
+Datum
+namelike_escape(PG_FUNCTION_ARGS)
+{
+	Name		n = PG_GETARG_NAME(0);
+	text	   *p = PG_GETARG_TEXT_P(1);
+	text	   *e = PG_GETARG_TEXT_P(2);
+
+	PG_RETURN_BOOL(MatchText(NameStr(*n), strlen(NameStr(*n)),
+							 VARDATA(p), (VARSIZE(p)-VARHDRSZ),
+							 ((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL))
+				   == LIKE_TRUE);
+}
+
+Datum
+namenlike_escape(PG_FUNCTION_ARGS)
+{
+	Name		n = PG_GETARG_NAME(0);
+	text	   *p = PG_GETARG_TEXT_P(1);
+	text	   *e = PG_GETARG_TEXT_P(2);
+
+	PG_RETURN_BOOL(MatchText(NameStr(*n), strlen(NameStr(*n)),
+							 VARDATA(p), (VARSIZE(p)-VARHDRSZ),
+							 ((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL))
+				   != LIKE_TRUE);
 }

 Datum
@@ -101,7 +90,10 @@ textlike(PG_FUNCTION_ARGS)
 	text	   *s = PG_GETARG_TEXT_P(0);
 	text	   *p = PG_GETARG_TEXT_P(1);

-	PG_RETURN_BOOL(fixedlen_like(VARDATA(s), p, VARSIZE(s) - VARHDRSZ));
+	PG_RETURN_BOOL(MatchText(VARDATA(s), (VARSIZE(s)-VARHDRSZ),
+							 VARDATA(p), (VARSIZE(p)-VARHDRSZ),
+							 NULL)
+				   == LIKE_TRUE);
 }

 Datum
@@ -110,7 +102,140 @@ textnlike(PG_FUNCTION_ARGS)
 	text	   *s = PG_GETARG_TEXT_P(0);
 	text	   *p = PG_GETARG_TEXT_P(1);

-	PG_RETURN_BOOL(! fixedlen_like(VARDATA(s), p, VARSIZE(s) - VARHDRSZ));
+	PG_RETURN_BOOL(MatchText(VARDATA(s), (VARSIZE(s)-VARHDRSZ),
+							 VARDATA(p), (VARSIZE(p)-VARHDRSZ),
+							 NULL)
+				   != LIKE_TRUE);
+}
+
+Datum
+textlike_escape(PG_FUNCTION_ARGS)
+{
+	text	   *s = PG_GETARG_TEXT_P(0);
+	text	   *p = PG_GETARG_TEXT_P(1);
+	text	   *e = PG_GETARG_TEXT_P(2);
+
+	PG_RETURN_BOOL(MatchText(VARDATA(s), (VARSIZE(s)-VARHDRSZ),
+							 VARDATA(p), (VARSIZE(p)-VARHDRSZ),
+							 ((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL))
+				   == LIKE_TRUE);
+}
+
+Datum
+textnlike_escape(PG_FUNCTION_ARGS)
+{
+	text	   *s = PG_GETARG_TEXT_P(0);
+	text	   *p = PG_GETARG_TEXT_P(1);
+	text	   *e = PG_GETARG_TEXT_P(2);
+
+	PG_RETURN_BOOL(MatchText(VARDATA(s), (VARSIZE(s)-VARHDRSZ),
+							 VARDATA(p), (VARSIZE(p)-VARHDRSZ),
+							 ((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL))
+				   != LIKE_TRUE);
+}
+
+/*
+ * Case-insensitive versions
+ */
+
+Datum
+inamelike(PG_FUNCTION_ARGS)
+{
+	Name		n = PG_GETARG_NAME(0);
+	text	   *p = PG_GETARG_TEXT_P(1);
+
+	PG_RETURN_BOOL(MatchTextLower(NameStr(*n), strlen(NameStr(*n)),
+								  VARDATA(p), (VARSIZE(p)-VARHDRSZ),
+								  NULL)
+				   == LIKE_TRUE);
+}
+
+Datum
+inamenlike(PG_FUNCTION_ARGS)
+{
+	Name		n = PG_GETARG_NAME(0);
+	text	   *p = PG_GETARG_TEXT_P(1);
+
+	PG_RETURN_BOOL(MatchTextLower(NameStr(*n), strlen(NameStr(*n)),
+								  VARDATA(p), (VARSIZE(p)-VARHDRSZ),
+								  NULL)
+				   != LIKE_TRUE);
+}
+
+Datum
+inamelike_escape(PG_FUNCTION_ARGS)
+{
+	Name		n = PG_GETARG_NAME(0);
+	text	   *p = PG_GETARG_TEXT_P(1);
+	text	   *e = PG_GETARG_TEXT_P(2);
+
+	PG_RETURN_BOOL(MatchTextLower(NameStr(*n), strlen(NameStr(*n)),
+								  VARDATA(p), (VARSIZE(p)-VARHDRSZ),
+								  ((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL))
+				   == LIKE_TRUE);
+}
+
+Datum
+inamenlike_escape(PG_FUNCTION_ARGS)
+{
+	Name		n = PG_GETARG_NAME(0);
+	text	   *p = PG_GETARG_TEXT_P(1);
+	text	   *e = PG_GETARG_TEXT_P(2);
+
+	PG_RETURN_BOOL(MatchTextLower(NameStr(*n), strlen(NameStr(*n)),
+								  VARDATA(p), (VARSIZE(p)-VARHDRSZ),
+								  ((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL))
+				   != LIKE_TRUE);
+}
+
+Datum
+itextlike(PG_FUNCTION_ARGS)
+{
+	text	   *s = PG_GETARG_TEXT_P(0);
+	text	   *p = PG_GETARG_TEXT_P(1);
+
+	PG_RETURN_BOOL(MatchTextLower(VARDATA(s), (VARSIZE(s)-VARHDRSZ),
+								  VARDATA(p), (VARSIZE(p)-VARHDRSZ),
+								  NULL)
+				   == LIKE_TRUE);
+}
+
+Datum
+itextnlike(PG_FUNCTION_ARGS)
+{
+	text	   *s = PG_GETARG_TEXT_P(0);
+	text	   *p = PG_GETARG_TEXT_P(1);
+
+	PG_RETURN_BOOL(MatchTextLower(VARDATA(s), (VARSIZE(s)-VARHDRSZ),
+								  VARDATA(p), (VARSIZE(p)-VARHDRSZ),
+								  NULL)
+				   != LIKE_TRUE);
+}
+
+Datum
+itextlike_escape(PG_FUNCTION_ARGS)
+{
+	text	   *s = PG_GETARG_TEXT_P(0);
+	text	   *p = PG_GETARG_TEXT_P(1);
+	text	   *e = PG_GETARG_TEXT_P(2);
+
+	PG_RETURN_BOOL(MatchTextLower(VARDATA(s), (VARSIZE(s)-VARHDRSZ),
+								  VARDATA(p), (VARSIZE(p)-VARHDRSZ),
+								  ((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL))
+				   == LIKE_TRUE);
+}
+
+Datum
+itextnlike_escape(PG_FUNCTION_ARGS)
+{
+	text	   *s = PG_GETARG_TEXT_P(0);
+	text	   *p = PG_GETARG_TEXT_P(1);
+	text	   *e = PG_GETARG_TEXT_P(2);
+
+	PG_RETURN_BOOL(MatchTextLower(VARDATA(s), (VARSIZE(s)-VARHDRSZ),
+								  VARDATA(p), (VARSIZE(p)-VARHDRSZ),
+								  ((VARSIZE(e)-VARHDRSZ) > 0? VARDATA(e): NULL))
+				   != LIKE_TRUE);
 }


@@ -136,12 +261,16 @@ textnlike(PG_FUNCTION_ARGS)
 **	 LIKE <pattern> ESCAPE <escape character>. We are a small operation
 **	 so we force you to use '\'. - ay 7/95]
 **
+** OK, we now support the SQL9x LIKE <pattern> ESCAPE <char> syntax.
+** We should kill the backslash escaping mechanism since it is non-standard
+** and undocumented afaik.
+** The code is rewritten to avoid requiring null-terminated strings,
+** which in turn allows us to leave out some memcpy() operations.
+** This code should be faster and take less memory, but no promises...
+** - thomas 2000-08-06
+**
 */

-#define LIKE_TRUE						1
-#define LIKE_FALSE						0
-#define LIKE_ABORT						(-1)
-
 /*--------------------
 *	Match text and p, return LIKE_TRUE, LIKE_FALSE, or LIKE_ABORT.
 *
@@ -153,69 +282,97 @@ textnlike(PG_FUNCTION_ARGS)
 * pattern either, so an upper-level % scan can stop scanning now.
 *--------------------
 */
-static int
-DoMatch(pg_wchar * text, pg_wchar * p)
-{
-	for (; *p && *text; text ++, p++)
-	{
-		switch (*p)
-		{
-			case '\\':
-				/* Literal match with following character. */
-				p++;
-				/* FALLTHROUGH */
-			default:
-				if (*text !=*p)
-					return LIKE_FALSE;
-				break;
-			case '_':
-				/* Match any single character. */
-				break;
-			case '%':
-				/* %% is the same as % according to the SQL standard */
-				/* Advance past all %'s */
-				while (*p == '%')
-					p++;
-				/* Trailing percent matches everything. */
-				if (*p == '\0')
-					return LIKE_TRUE;

-				/*
-				 * Otherwise, scan for a text position at which we can
-				 * match the rest of the pattern.
-				 */
-				for (; *text; text ++)
-				{
+#define NextChar(p, plen) (p)++, (plen)--
+
+static int
+MatchText(pg_wchar * t, int tlen, pg_wchar * p, int plen, char *e)
+{
+	/* Fast path for match-everything pattern
+	 * Include weird case of escape character as a percent sign or underscore,
+	 * when presumably that wildcard character becomes a literal.
+	 */
+	if ((plen == 1) && (*p == '%')
+		&& ! ((e != NULL) && (*e == '%')))
+		return LIKE_TRUE;
+
+	while ((tlen > 0) && (plen > 0))
+	{
+		/* If an escape character was specified and we find it here in the pattern,
+		 * then we'd better have an exact match for the next character.
+		 */
+		if ((e != NULL) && (*p == *e))
+		{
+			NextChar(p, plen);
+			if ((plen <= 0) || (*t != *p))
+				return LIKE_FALSE;
+		}
+		else
+		{
+			switch (*p)
+			{
+				case '\\':
+					/* Literal match with following character. */
+					NextChar(p, plen);
+					/* FALLTHROUGH */
+				default:
+					if (*t != *p)
+						return LIKE_FALSE;
+					break;
+				case '_':
+					/* Match any single character. */
+					break;
+				case '%':
+					/* %% is the same as % according to the SQL standard */
+					/* Advance past all %'s */
+					while ((plen > 0) && (*p == '%'))
+						NextChar(p, plen);
+					/* Trailing percent matches everything. */
+					if (plen <= 0)
+						return LIKE_TRUE;

 					/*
-					 * Optimization to prevent most recursion: don't
-					 * recurse unless first pattern char might match this
-					 * text char.
+					 * Otherwise, scan for a text position at which we can
+					 * match the rest of the pattern.
 					 */
-					if (*text == *p || *p == '\\' || *p == '_')
+					while (tlen > 0)
 					{
-						int			matched = DoMatch(text, p);
+						/*
+						 * Optimization to prevent most recursion: don't
+						 * recurse unless first pattern char might match this
+						 * text char.
+						 */
+						if ((*t == *p) || (*p == '\\') || (*p == '_')
+							|| ((e != NULL) && (*p == *e)))
+						{
+							int matched = MatchText(t, tlen, p, plen, e);

-						if (matched != LIKE_FALSE)
-							return matched;		/* TRUE or ABORT */
+							if (matched != LIKE_FALSE)
+								return matched;		/* TRUE or ABORT */
+						}
+
+						NextChar(t, tlen);
 					}
-				}

-				/*
-				 * End of text with no match, so no point in trying later
-				 * places to start matching this pattern.
-				 */
-				return LIKE_ABORT;
+					/*
+					 * End of text with no match, so no point in trying later
+					 * places to start matching this pattern.
+					 */
+					return LIKE_ABORT;
+			}
 		}
+
+		NextChar(t, tlen);
+		NextChar(p, plen);
 	}

-	if (*text !='\0')
+	if (tlen > 0)
 		return LIKE_FALSE;		/* end of pattern, but not of text */

 	/* End of input string.  Do we have matching pattern remaining? */
-	while (*p == '%')			/* allow multiple %'s at end of pattern */
-		p++;
-	if (*p == '\0')
+	while ((plen > 0) && (*p == '%'))	/* allow multiple %'s at end of pattern */
+		NextChar(p, plen);
+	if (plen <= 0)
 		return LIKE_TRUE;

 	/*
@@ -223,16 +380,101 @@ DoMatch(pg_wchar * text, pg_wchar * p)
 	 * start matching this pattern.
 	 */
 	return LIKE_ABORT;
-}
+} /* MatchText() */

-/*
-**	User-level routine.  Returns TRUE or FALSE.
-*/
-static bool
-like(pg_wchar * text, pg_wchar * p)
+static int
+MatchTextLower(pg_wchar * t, int tlen, pg_wchar * p, int plen, char *e)
 {
-	/* Fast path for match-everything pattern */
-	if (p[0] == '%' && p[1] == '\0')
-		return true;
-	return DoMatch(text, p) == LIKE_TRUE;
-}
+	/* Fast path for match-everything pattern
+	 * Include weird case of escape character as a percent sign or underscore,
+	 * when presumably that wildcard character becomes a literal.
+	 */
+	if ((plen == 1) && (*p == '%')
+		&& ! ((e != NULL) && (*e == '%')))
+		return LIKE_TRUE;
+
+	while ((tlen > 0) && (plen > 0))
+	{
+		/* If an escape character was specified and we find it here in the pattern,
+		 * then we'd better have an exact match for the next character.
+		 */
+		if ((e != NULL) && (tolower(*p) == tolower(*e)))
+		{
+			NextChar(p, plen);
+			if ((plen <= 0) || (tolower(*t) != tolower(*p)))
+				return LIKE_FALSE;
+		}
+		else
+		{
+			switch (*p)
+			{
+				case '\\':
+					/* Literal match with following character. */
+					NextChar(p, plen);
+					/* FALLTHROUGH */
+				default:
+					if (tolower(*t) != tolower(*p))
+						return LIKE_FALSE;
+					break;
+				case '_':
+					/* Match any single character. */
+					break;
+				case '%':
+					/* %% is the same as % according to the SQL standard */
+					/* Advance past all %'s */
+					while ((plen > 0) && (*p == '%'))
+						NextChar(p, plen);
+					/* Trailing percent matches everything. */
+					if (plen <= 0)
+						return LIKE_TRUE;
+
+					/*
+					 * Otherwise, scan for a text position at which we can
+					 * match the rest of the pattern.
+					 */
+					while (tlen > 0)
+					{
+						/*
+						 * Optimization to prevent most recursion: don't
+						 * recurse unless first pattern char might match this
+						 * text char.
+						 */
+						if ((tolower(*t) == tolower(*p)) || (*p == '\\') || (*p == '_')
+							|| ((e != NULL) && (tolower(*p) == tolower(*e))))
+						{
+							int matched = MatchText(t, tlen, p, plen, e);
+
+							if (matched != LIKE_FALSE)
+								return matched;		/* TRUE or ABORT */
+						}
+
+						NextChar(t, tlen);
+					}
+
+					/*
+					 * End of text with no match, so no point in trying later
+					 * places to start matching this pattern.
+					 */
+					return LIKE_ABORT;
+			}
+		}
+
+		NextChar(t, tlen);
+		NextChar(p, plen);
+	}
+
+	if (tlen > 0)
+		return LIKE_FALSE;		/* end of pattern, but not of text */
+
+	/* End of input string.  Do we have matching pattern remaining? */
+	while ((plen > 0) && (*p == '%'))	/* allow multiple %'s at end of pattern */
+		NextChar(p, plen);
+	if (plen <= 0)
+		return LIKE_TRUE;
+
+	/*
+	 * End of text with no match, so no point in trying later places to
+	 * start matching this pattern.
+	 */
+	return LIKE_ABORT;
+} /* MatchTextLower() */