1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-14 02:22:38 +03:00

Bring SIMILAR TO and SUBSTRING into some semblance of conformance with

the SQL99 standard.  (I'm not sure that the character-class features are
quite right, but that can be fixed later.)  Document SQL99 and POSIX
regexps as being different features; provide variants of SUBSTRING for
each.
This commit is contained in:
Tom Lane
2002-09-22 17:27:25 +00:00
parent ac355d558e
commit 9946b83ded
7 changed files with 683 additions and 381 deletions

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.42 2002/09/04 20:31:28 momjian Exp $
* $Header: /cvsroot/pgsql/src/backend/utils/adt/regexp.c,v 1.43 2002/09/22 17:27:23 tgl Exp $
*
* Alistair Crooks added the code for the regex caching
* agc - cached the regular expressions used - there's a good chance
@@ -317,8 +317,7 @@ textregexsubstr(PG_FUNCTION_ARGS)
char *sterm;
int len;
bool match;
int nmatch = 1;
regmatch_t pmatch;
regmatch_t pmatch[2];
/* be sure sterm is null-terminated */
len = VARSIZE(s) - VARHDRSZ;
@@ -327,21 +326,131 @@ textregexsubstr(PG_FUNCTION_ARGS)
sterm[len] = '\0';
/*
* We need the match info back from the pattern match to be able to
* actually extract the substring. It seems to be adequate to pass in
* a structure to return only one result.
* We pass two regmatch_t structs to get info about the overall match
* and the match for the first parenthesized subexpression (if any).
* If there is a parenthesized subexpression, we return what it matched;
* else return what the whole regexp matched.
*/
match = RE_compile_and_execute(p, sterm, REG_EXTENDED, nmatch, &pmatch);
match = RE_compile_and_execute(p, sterm, REG_EXTENDED, 2, pmatch);
pfree(sterm);
/* match? then return the substring matching the pattern */
if (match)
{
int so,
eo;
so = pmatch[1].rm_so;
eo = pmatch[1].rm_eo;
if (so < 0 || eo < 0)
{
/* no parenthesized subexpression */
so = pmatch[0].rm_so;
eo = pmatch[0].rm_eo;
}
return (DirectFunctionCall3(text_substr,
PointerGetDatum(s),
Int32GetDatum(pmatch.rm_so + 1),
Int32GetDatum(pmatch.rm_eo - pmatch.rm_so)));
Int32GetDatum(so + 1),
Int32GetDatum(eo - so)));
}
PG_RETURN_NULL();
}
/* similar_escape()
* Convert a SQL99 regexp pattern to POSIX style, so it can be used by
* our regexp engine.
*/
Datum
similar_escape(PG_FUNCTION_ARGS)
{
text *pat_text;
text *esc_text;
text *result;
unsigned char *p,
*e,
*r;
int plen,
elen;
bool afterescape = false;
int nquotes = 0;
/* This function is not strict, so must test explicitly */
if (PG_ARGISNULL(0))
PG_RETURN_NULL();
pat_text = PG_GETARG_TEXT_P(0);
p = VARDATA(pat_text);
plen = (VARSIZE(pat_text) - VARHDRSZ);
if (PG_ARGISNULL(1))
{
/* No ESCAPE clause provided; default to backslash as escape */
e = "\\";
elen = 1;
}
else
{
esc_text = PG_GETARG_TEXT_P(1);
e = VARDATA(esc_text);
elen = (VARSIZE(esc_text) - VARHDRSZ);
if (elen == 0)
e = NULL; /* no escape character */
else if (elen != 1)
elog(ERROR, "ESCAPE string must be empty or one character");
}
/* We need room for ^, $, and up to 2 output bytes per input byte */
result = (text *) palloc(VARHDRSZ + 2 + 2 * plen);
r = VARDATA(result);
*r++ = '^';
while (plen > 0)
{
unsigned char pchar = *p;
if (afterescape)
{
if (pchar == '"') /* for SUBSTRING patterns */
*r++ = ((nquotes++ % 2) == 0) ? '(' : ')';
else
{
*r++ = '\\';
*r++ = pchar;
}
afterescape = false;
}
else if (e && pchar == *e)
{
/* SQL99 escape character; do not send to output */
afterescape = true;
}
else if (pchar == '%')
{
*r++ = '.';
*r++ = '*';
}
else if (pchar == '_')
{
*r++ = '.';
}
else if (pchar == '\\' || pchar == '.' || pchar == '?' ||
pchar == '{')
{
*r++ = '\\';
*r++ = pchar;
}
else
{
*r++ = pchar;
}
p++, plen--;
}
*r++ = '$';
VARATT_SIZEP(result) = r - ((unsigned char *) result);
PG_RETURN_TEXT_P(result);
}