1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-27 12:41:57 +03:00

Introduce parse_ident()

SQL-layer function to split qualified identifier into array parts.

Author: Pavel Stehule with minor editorization by me and Jim Nasby
This commit is contained in:
Teodor Sigaev
2016-03-18 18:16:14 +03:00
parent 992b5ba30d
commit 3187d6de0e
10 changed files with 375 additions and 2 deletions

View File

@ -990,3 +990,10 @@ RETURNS jsonb
LANGUAGE INTERNAL
STRICT IMMUTABLE
AS 'jsonb_set';
CREATE OR REPLACE FUNCTION
parse_ident(str text, strict boolean DEFAULT true)
RETURNS text[]
LANGUAGE INTERNAL
STRICT IMMUTABLE
AS 'parse_ident';

View File

@ -129,6 +129,15 @@ scanstr(const char *s)
*/
char *
downcase_truncate_identifier(const char *ident, int len, bool warn)
{
return downcase_identifier(ident, len, warn, true);
}
/*
* a workhorse for downcase_truncate_identifier
*/
char *
downcase_identifier(const char *ident, int len, bool warn, bool truncate)
{
char *result;
int i;
@ -158,12 +167,13 @@ downcase_truncate_identifier(const char *ident, int len, bool warn)
}
result[i] = '\0';
if (i >= NAMEDATALEN)
if (i >= NAMEDATALEN && truncate)
truncate_identifier(result, i, warn);
return result;
}
/*
* truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
*

View File

@ -27,6 +27,7 @@
#include "commands/dbcommands.h"
#include "funcapi.h"
#include "miscadmin.h"
#include "parser/scansup.h"
#include "parser/keywords.h"
#include "postmaster/syslogger.h"
#include "rewrite/rewriteHandler.h"
@ -719,3 +720,226 @@ pg_column_is_updatable(PG_FUNCTION_ARGS)
PG_RETURN_BOOL((events & REQ_EVENTS) == REQ_EVENTS);
}
/*
* This simple parser utility are compatible with lexer implementation,
* used only in parse_ident function
*/
static bool
is_ident_start(unsigned char c)
{
if (c == '_')
return true;
if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'))
return true;
if (c >= 0200 && c <= 0377)
return true;
return false;
}
static bool
is_ident_cont(unsigned char c)
{
if (c >= '0' && c <= '9')
return true;
return is_ident_start(c);
}
/*
* Sanitize SQL string for using in error message.
*/
static char *
sanitize_text(text *t)
{
int len = VARSIZE_ANY_EXHDR(t);
const char *p = VARDATA_ANY(t);
StringInfo dstr;
dstr = makeStringInfo();
appendStringInfoChar(dstr, '"');
while (len--)
{
switch (*p)
{
case '\b':
appendStringInfoString(dstr, "\\b");
break;
case '\f':
appendStringInfoString(dstr, "\\f");
break;
case '\n':
appendStringInfoString(dstr, "\\n");
break;
case '\r':
appendStringInfoString(dstr, "\\r");
break;
case '\t':
appendStringInfoString(dstr, "\\t");
break;
case '\'':
appendStringInfoString(dstr, "''");
break;
case '\\':
appendStringInfoString(dstr, "\\\\");
break;
default:
if ((unsigned char) *p < ' ')
appendStringInfo(dstr, "\\u%04x", (int) *p);
else
appendStringInfoCharMacro(dstr, *p);
break;
}
p++;
}
appendStringInfoChar(dstr, '"');
return dstr->data;
}
/*
* parse_ident - parse SQL composed identifier to separate identifiers.
* When strict mode is active (second parameter), then any chars after
* last identifiers are disallowed.
*/
Datum
parse_ident(PG_FUNCTION_ARGS)
{
text *qualname;
char *qualname_str;
bool strict;
char *nextp;
bool after_dot = false;
ArrayBuildState *astate = NULL;
qualname = PG_GETARG_TEXT_PP(0);
qualname_str = text_to_cstring(qualname);
strict = PG_GETARG_BOOL(1);
nextp = qualname_str;
/* skip leading whitespace */
while (isspace((unsigned char) *nextp))
nextp++;
for (;;)
{
char *curname;
char *endp;
bool missing_ident;
missing_ident = true;
if (*nextp == '\"')
{
curname = nextp + 1;
for (;;)
{
endp = strchr(nextp + 1, '\"');
if (endp == NULL)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("unclosed double quotes"),
errdetail("string %s is not valid identifier",
sanitize_text(qualname))));
if (endp[1] != '\"')
break;
memmove(endp, endp + 1, strlen(endp));
nextp = endp;
}
nextp = endp + 1;
*endp = '\0';
/* Show complete input string in this case. */
if (endp - curname == 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("identifier should not be empty: %s",
sanitize_text(qualname))));
astate = accumArrayResult(astate, CStringGetTextDatum(curname),
false, TEXTOID, CurrentMemoryContext);
missing_ident = false;
}
else
{
if (is_ident_start((unsigned char) *nextp))
{
char *downname;
int len;
text *part;
curname = nextp++;
while (is_ident_cont((unsigned char) *nextp))
nextp++;
len = nextp - curname;
/*
* Unlike name, we don't implicitly truncate identifiers. This
* is useful for allowing the user to check for specific parts
* of the identifier being too long. It's easy enough for the
* user to get the truncated names by casting our output to
* name[].
*/
downname = downcase_identifier(curname, len, false, false);
part = cstring_to_text_with_len(downname, len);
astate = accumArrayResult(astate, PointerGetDatum(part), false,
TEXTOID, CurrentMemoryContext);
missing_ident = false;
}
}
if (missing_ident)
{
/* Different error messages based on where we failed. */
if (*nextp == '.')
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing valid identifier before \".\" symbol: %s",
sanitize_text(qualname))));
else if (after_dot)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing valid identifier after \".\" symbol: %s",
sanitize_text(qualname))));
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("missing valid identifier: %s",
sanitize_text(qualname))));
}
while (isspace((unsigned char) *nextp))
nextp++;
if (*nextp == '.')
{
after_dot = true;
nextp++;
while (isspace((unsigned char) *nextp))
nextp++;
}
else if (*nextp == '\0')
{
break;
}
else
{
if (strict)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("identifier contains disallowed characters: %s",
sanitize_text(qualname))));
break;
}
}
PG_RETURN_DATUM(makeArrayResult(astate, CurrentMemoryContext));
}

View File

@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 201603151
#define CATALOG_VERSION_NO 201603181
#endif

View File

@ -3463,6 +3463,9 @@ DESCR("I/O");
DATA(insert OID = 4086 ( to_regnamespace PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 4089 "25" _null_ _null_ _null_ _null_ _null_ to_regnamespace _null_ _null_ _null_ ));
DESCR("convert namespace name to regnamespace");
DATA(insert OID = 1268 ( parse_ident PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 1009 "25 16" _null_ _null_ "{str,strict}" _null_ _null_ parse_ident _null_ _null_ _null_ ));
DESCR("parse qualified identifier to array of identifiers");
DATA(insert OID = 2246 ( fmgr_internal_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_internal_validator _null_ _null_ _null_ ));
DESCR("(internal)");
DATA(insert OID = 2247 ( fmgr_c_validator PGNSP PGUID 12 1 0 0 0 f f f f t f s s 1 0 2278 "26" _null_ _null_ _null_ _null_ _null_ fmgr_c_validator _null_ _null_ _null_ ));

View File

@ -20,6 +20,9 @@ extern char *scanstr(const char *s);
extern char *downcase_truncate_identifier(const char *ident, int len,
bool warn);
extern char *downcase_identifier(const char *ident, int len,
bool warn, bool truncate);
extern void truncate_identifier(char *ident, int len, bool warn);
extern bool scanner_isspace(char ch);

View File

@ -510,6 +510,7 @@ extern Datum pg_typeof(PG_FUNCTION_ARGS);
extern Datum pg_collation_for(PG_FUNCTION_ARGS);
extern Datum pg_relation_is_updatable(PG_FUNCTION_ARGS);
extern Datum pg_column_is_updatable(PG_FUNCTION_ARGS);
extern Datum parse_ident(PG_FUNCTION_ARGS);
/* oid.c */
extern Datum oidin(PG_FUNCTION_ARGS);

View File

@ -124,3 +124,69 @@ SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*';
(2 rows)
DROP TABLE NAME_TBL;
DO $$
DECLARE r text[];
BEGIN
r := parse_ident('Schemax.Tabley');
RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
r := parse_ident('"SchemaX"."TableY"');
RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
END;
$$;
NOTICE: schemax.tabley
NOTICE: "SchemaX"."TableY"
SELECT parse_ident('foo.boo');
parse_ident
-------------
{foo,boo}
(1 row)
SELECT parse_ident('foo.boo[]'); -- should fail
ERROR: identifier contains disallowed characters: "foo.boo[]"
SELECT parse_ident('foo.boo[]', strict => false); -- ok
parse_ident
-------------
{foo,boo}
(1 row)
-- should fail
SELECT parse_ident(' ');
ERROR: missing valid identifier: " "
SELECT parse_ident(' .aaa');
ERROR: missing valid identifier before "." symbol: " .aaa"
SELECT parse_ident(' aaa . ');
ERROR: missing valid identifier after "." symbol: " aaa . "
SELECT parse_ident('aaa.a%b');
ERROR: identifier contains disallowed characters: "aaa.a%b"
SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
ERROR: identifier contains disallowed characters: "X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"
SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
length | length
--------+--------
414 | 289
(1 row)
SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"');
parse_ident
-----------------------------------------------------------------------------------------------------------
{first," second "," third "," xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
(1 row)
SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"')::name[];
parse_ident
------------------------------------------------------------------------------------------------------
{first," second "," third "," xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"}
(1 row)
SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
ERROR: identifier contains disallowed characters: ""c".X XXXX\u0002XXXXXX"
SELECT parse_ident('1020');
ERROR: missing valid identifier: "1020"
SELECT parse_ident('10.20');
ERROR: missing valid identifier: "10.20"
SELECT parse_ident('.');
ERROR: missing valid identifier before "." symbol: "."
SELECT parse_ident('.1020');
ERROR: missing valid identifier before "." symbol: ".1020"
SELECT parse_ident('xxx.1020');
ERROR: missing valid identifier after "." symbol: "xxx.1020"

View File

@ -52,3 +52,36 @@ SELECT '' AS three, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '[0-9]';
SELECT '' AS two, c.f1 FROM NAME_TBL c WHERE c.f1 ~ '.*asdf.*';
DROP TABLE NAME_TBL;
DO $$
DECLARE r text[];
BEGIN
r := parse_ident('Schemax.Tabley');
RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
r := parse_ident('"SchemaX"."TableY"');
RAISE NOTICE '%', format('%I.%I', r[1], r[2]);
END;
$$;
SELECT parse_ident('foo.boo');
SELECT parse_ident('foo.boo[]'); -- should fail
SELECT parse_ident('foo.boo[]', strict => false); -- ok
-- should fail
SELECT parse_ident(' ');
SELECT parse_ident(' .aaa');
SELECT parse_ident(' aaa . ');
SELECT parse_ident('aaa.a%b');
SELECT parse_ident(E'X\rXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX');
SELECT length(a[1]), length(a[2]) from parse_ident('"xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx".yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy') as a ;
SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"');
SELECT parse_ident(' first . " second " ." third ". " ' || repeat('x',66) || '"')::name[];
SELECT parse_ident(E'"c".X XXXX\002XXXXXX');
SELECT parse_ident('1020');
SELECT parse_ident('10.20');
SELECT parse_ident('.');
SELECT parse_ident('.1020');
SELECT parse_ident('xxx.1020');