1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-05 07:21:24 +03:00

Introduce parse_ident()

SQL-layer function to split qualified identifier into array parts.

Author: Pavel Stehule with minor editorization by me and Jim Nasby
This commit is contained in:
Teodor Sigaev
2016-03-18 18:16:14 +03:00
parent 992b5ba30d
commit 3187d6de0e
10 changed files with 375 additions and 2 deletions

View File

@ -27,6 +27,7 @@
#include "commands/dbcommands.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "parser/scansup.h"
#include "parser/keywords.h"
#include "postmaster/syslogger.h"
#include "rewrite/rewriteHandler.h"
@ -719,3 +720,226 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS);
}
/*
* This simple parser utility are compatible with lexer implementation,
* used only in parse_ident function
*/
static bool
is_ident_start(unsigned char c)
{
if (c == '_')
return true;
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
return true;
if (c >= 0200 && c <= 0377)
return true;
return false;
}
static bool
is_ident_cont(unsigned char c)
{
if (c >= '0' && c <= '9')
return true;
return is_ident_start(c);
}
/*
* Sanitize SQL string for using in error message.
*/
static char *
sanitize_text(text *t)
{
int len = VARSIZE_ANY_EXHDR(t);
const char *p = VARDATA_ANY(t);
StringInfo dstr;
dstr = makeStringInfo();
appendStringInfoChar(dstr, '"');
while (len--)
{
switch (*p)
{
case '\b':
appendStringInfoString(dstr, "\\b");
break;
case '\f':
appendStringInfoString(dstr, "\\f");
break;
case '\n':
appendStringInfoString(dstr, "\\n");
break;
case '\r':
appendStringInfoString(dstr, "\\r");
break;
case '\t':
appendStringInfoString(dstr, "\\t");
break;
case '\'':
appendStringInfoString(dstr, "''");
break;
case '\\':
appendStringInfoString(dstr, "\\\\");
break;
default:
if ((unsigned char) *p < ' ')
appendStringInfo(dstr, "\\u%04x", (int) *p);
else
appendStringInfoCharMacro(dstr, *p);
break;
}
p++;
}
appendStringInfoChar(dstr, '"');
return dstr->data;
}
/*
* parse_ident - parse SQL composed identifier to separate identifiers.
* When strict mode is active (second parameter), then any chars after
* last identifiers are disallowed.
*/
Datum
parse_ident(PG_FUNCTION_ARGS)
{
text *qualname;
char *qualname_str;
bool strict;
char *nextp;
bool after_dot = false;
ArrayBuildState *astate = NULL;
qualname = PG_GETARG_TEXT_PP(0);
qualname_str = text_to_cstring(qualname);
strict = PG_GETARG_BOOL(1);
nextp = qualname_str;
/* skip leading whitespace */
while (isspace((unsigned char) *nextp))
nextp++;
for (;;)
{
char *curname;
char *endp;
bool missing_ident;
missing_ident = true;
if (*nextp == '\"')
{
curname = nextp + 1;
for (;;)
{
endp = strchr(nextp + 1, '\"');
if (endp == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unclosed double quotes"),
errdetail("string %s is not valid identifier",
sanitize_text(qualname))));
if (endp[1] != '\"')
break;
memmove(endp, endp + 1, strlen(endp));
nextp = endp;
}
nextp = endp + 1;
*endp = '\0';
/* Show complete input string in this case. */
if (endp - curname == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("identifier should not be empty: %s",
sanitize_text(qualname))));
astate = accumArrayResult(astate, CStringGetTextDatum(curname),
false, TEXTOID, CurrentMemoryContext);
missing_ident = false;
}
else
{
if (is_ident_start((unsigned char) *nextp))
{
char *downname;
int len;
text *part;
curname = nextp++;
while (is_ident_cont((unsigned char) *nextp))
nextp++;
len = nextp - curname;
/*
* Unlike name, we don't implicitly truncate identifiers. This
* is useful for allowing the user to check for specific parts
* of the identifier being too long. It's easy enough for the
* user to get the truncated names by casting our output to
* name[].
*/
downname = downcase_identifier(curname, len, false, false);
part = cstring_to_text_with_len(downname, len);
astate = accumArrayResult(astate, PointerGetDatum(part), false,
TEXTOID, CurrentMemoryContext);
missing_ident = false;
}
}
if (missing_ident)
{
/* Different error messages based on where we failed. */
if (*nextp == '.')
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing valid identifier before \".\" symbol: %s",
sanitize_text(qualname))));
else if (after_dot)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing valid identifier after \".\" symbol: %s",
sanitize_text(qualname))));
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing valid identifier: %s",
sanitize_text(qualname))));
}
while (isspace((unsigned char) *nextp))
nextp++;
if (*nextp == '.')
{
after_dot = true;
nextp++;
while (isspace((unsigned char) *nextp))
nextp++;
}
else if (*nextp == '\0')
{
break;
}
else
{
if (strict)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("identifier contains disallowed characters: %s",
sanitize_text(qualname))));
break;
}
}
PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
}