1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-07 19:06:32 +03:00

Initial SQL/XML support: xml data type and initial set of functions.

This commit is contained in:
Peter Eisentraut
2006-12-21 16:05:16 +00:00
parent ed1e9cd501
commit 8c1de5fb00
39 changed files with 2446 additions and 128 deletions

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/executor/execQual.c,v 1.199 2006/11/17 16:46:27 petere Exp $
* $PostgreSQL: pgsql/src/backend/executor/execQual.c,v 1.200 2006/12/21 16:05:13 petere Exp $
*
*-------------------------------------------------------------------------
*/
@@ -52,6 +52,7 @@
#include "utils/lsyscache.h"
#include "utils/memutils.h"
#include "utils/typcache.h"
#include "utils/xml.h"
/* static function decls */
@@ -119,6 +120,8 @@ static Datum ExecEvalMinMax(MinMaxExprState *minmaxExpr,
static Datum ExecEvalNullIf(FuncExprState *nullIfExpr,
ExprContext *econtext,
bool *isNull, ExprDoneCond *isDone);
static Datum ExecEvalXml(XmlExprState *xmlExpr, ExprContext *econtext,
bool *isNull, ExprDoneCond *isDone);
static Datum ExecEvalNullTest(NullTestState *nstate,
ExprContext *econtext,
bool *isNull, ExprDoneCond *isDone);
@@ -2878,6 +2881,120 @@ ExecEvalBooleanTest(GenericExprState *bstate,
}
}
/* ----------------------------------------------------------------
* ExecEvalXml
* ----------------------------------------------------------------
*/
static Datum
ExecEvalXml(XmlExprState *xmlExpr, ExprContext *econtext,
bool *isNull, ExprDoneCond *isDone)
{
StringInfoData buf;
bool isnull;
ListCell *arg;
text *result = NULL;
int len;
initStringInfo(&buf);
*isNull = false;
if (isDone)
*isDone = ExprSingleResult;
switch (xmlExpr->op)
{
case IS_XMLCONCAT:
*isNull = true;
foreach(arg, xmlExpr->args)
{
ExprState *e = (ExprState *) lfirst(arg);
Datum value = ExecEvalExpr(e, econtext, &isnull, NULL);
if (!isnull)
{
appendStringInfoString(&buf, DatumGetCString(OidFunctionCall1(xmlExpr->arg_typeout, value)));
*isNull = false;
}
}
break;
case IS_XMLELEMENT:
{
int state = 0, i = 0;
appendStringInfo(&buf, "<%s", xmlExpr->name);
foreach(arg, xmlExpr->named_args)
{
GenericExprState *gstate = (GenericExprState *) lfirst(arg);
Datum value = ExecEvalExpr(gstate->arg, econtext, &isnull, NULL);
if (!isnull)
{
char *outstr = DatumGetCString(OidFunctionCall1(xmlExpr->named_args_tcache[i], value));
appendStringInfo(&buf, " %s=\"%s\"", xmlExpr->named_args_ncache[i], outstr);
pfree(outstr);
}
i++;
}
if (xmlExpr->args)
{
ExprState *expr = linitial(xmlExpr->args);
Datum value = ExecEvalExpr(expr, econtext, &isnull, NULL);
if (!isnull)
{
char *outstr = DatumGetCString(OidFunctionCall1(xmlExpr->arg_typeout, value));
if (state == 0)
{
appendStringInfoChar(&buf, '>');
state = 1;
}
appendStringInfo(&buf, "%s", outstr);
pfree(outstr);
}
}
if (state == 0)
appendStringInfo(&buf, "/>");
else if (state == 1)
appendStringInfo(&buf, "</%s>", xmlExpr->name);
}
break;
case IS_XMLFOREST:
{
/* only if all argumets are null returns null */
int i = 0;
*isNull = true;
foreach(arg, xmlExpr->named_args)
{
GenericExprState *gstate = (GenericExprState *) lfirst(arg);
Datum value = ExecEvalExpr(gstate->arg, econtext, &isnull, NULL);
if (!isnull)
{
char *outstr = DatumGetCString(OidFunctionCall1(xmlExpr->named_args_tcache[i], value));
appendStringInfo(&buf, "<%s>%s</%s>", xmlExpr->named_args_ncache[i], outstr, xmlExpr->named_args_ncache[i]);
pfree(outstr);
*isNull = false;
}
i += 1;
}
}
break;
default:
break;
}
len = buf.len + VARHDRSZ;
result = palloc(len);
VARATT_SIZEP(result) = len;
memcpy(VARDATA(result), buf.data, buf.len);
pfree(buf.data);
PG_RETURN_TEXT_P(result);
}
/*
* ExecEvalCoerceToDomain
*
@@ -3668,6 +3785,64 @@ ExecInitExpr(Expr *node, PlanState *parent)
state = (ExprState *) mstate;
}
break;
case T_XmlExpr:
{
List *outlist;
ListCell *arg;
XmlExpr *xexpr = (XmlExpr *) node;
XmlExprState *xstate = makeNode(XmlExprState);
int i = 0;
Oid typeout;
xstate->name = xexpr->name;
xstate->xprstate.evalfunc = (ExprStateEvalFunc) ExecEvalXml;
xstate->op = xexpr->op;
outlist = NIL;
if (xexpr->named_args)
{
xstate->named_args_tcache = (Oid *) palloc(list_length(xexpr->named_args) * sizeof(int));
xstate->named_args_ncache = (char **) palloc(list_length(xexpr->named_args) * sizeof(char *));
i = 0;
foreach(arg, xexpr->named_args)
{
bool tpisvarlena;
Expr *e = (Expr *) lfirst(arg);
ExprState *estate = ExecInitExpr(e, parent);
TargetEntry *tle;
outlist = lappend(outlist, estate);
tle = (TargetEntry *) ((GenericExprState *) estate)->xprstate.expr;
getTypeOutputInfo(exprType((Node *)tle->expr), &typeout, &tpisvarlena);
xstate->named_args_ncache[i] = tle->resname;
xstate->named_args_tcache[i] = typeout;
i++;
}
}
else
{
xstate->named_args_tcache = NULL;
xstate->named_args_ncache = NULL;
}
xstate->named_args = outlist;
outlist = NIL;
foreach(arg, xexpr->args)
{
bool tpisvarlena;
ExprState *estate;
Expr *e = (Expr *) lfirst(arg);
getTypeOutputInfo(exprType((Node *)e), &typeout, &tpisvarlena);
estate = ExecInitExpr(e, parent);
outlist = lappend(outlist, estate);
}
xstate->arg_typeout = typeout;
xstate->args = outlist;
state = (ExprState *) xstate;
}
break;
case T_NullIfExpr:
{
NullIfExpr *nullifexpr = (NullIfExpr *) node;

View File

@@ -15,7 +15,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.354 2006/12/10 22:13:26 tgl Exp $
* $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.355 2006/12/21 16:05:13 petere Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1136,6 +1136,22 @@ _copyBooleanTest(BooleanTest *from)
return newnode;
}
/*
* _copyXmlExpr
*/
static XmlExpr *
_copyXmlExpr(XmlExpr *from)
{
XmlExpr *newnode = makeNode(XmlExpr);
COPY_SCALAR_FIELD(op);
COPY_STRING_FIELD(name);
COPY_NODE_FIELD(named_args);
COPY_NODE_FIELD(args);
return newnode;
}
/*
* _copyCoerceToDomain
*/
@@ -2966,6 +2982,9 @@ copyObject(void *from)
case T_BooleanTest:
retval = _copyBooleanTest(from);
break;
case T_XmlExpr:
retval = _copyXmlExpr(from);
break;
case T_CoerceToDomain:
retval = _copyCoerceToDomain(from);
break;

View File

@@ -18,7 +18,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.288 2006/12/10 22:13:26 tgl Exp $
* $PostgreSQL: pgsql/src/backend/nodes/equalfuncs.c,v 1.289 2006/12/21 16:05:13 petere Exp $
*
*-------------------------------------------------------------------------
*/
@@ -495,6 +495,17 @@ _equalBooleanTest(BooleanTest *a, BooleanTest *b)
return true;
}
static bool
_equalXmlExpr(XmlExpr *a, XmlExpr *b)
{
COMPARE_SCALAR_FIELD(op);
COMPARE_STRING_FIELD(name);
COMPARE_NODE_FIELD(named_args);
COMPARE_NODE_FIELD(args);
return true;
}
static bool
_equalCoerceToDomain(CoerceToDomain *a, CoerceToDomain *b)
{
@@ -1968,6 +1979,9 @@ equal(void *a, void *b)
case T_BooleanTest:
retval = _equalBooleanTest(a, b);
break;
case T_XmlExpr:
retval = _equalXmlExpr(a, b);
break;
case T_CoerceToDomain:
retval = _equalCoerceToDomain(a, b);
break;

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.286 2006/12/10 22:13:26 tgl Exp $
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.287 2006/12/21 16:05:13 petere Exp $
*
* NOTES
* Every node type that can appear in stored rules' parsetrees *must*
@@ -920,6 +920,17 @@ _outBooleanTest(StringInfo str, BooleanTest *node)
WRITE_ENUM_FIELD(booltesttype, BoolTestType);
}
static void
_outXmlExpr(StringInfo str, XmlExpr *node)
{
WRITE_NODE_TYPE("XMLEXPR");
WRITE_ENUM_FIELD(op, XmlExprOp);
WRITE_STRING_FIELD(name);
WRITE_NODE_FIELD(named_args);
WRITE_NODE_FIELD(args);
}
static void
_outCoerceToDomain(StringInfo str, CoerceToDomain *node)
{
@@ -2019,6 +2030,9 @@ _outNode(StringInfo str, void *obj)
case T_BooleanTest:
_outBooleanTest(str, obj);
break;
case T_XmlExpr:
_outXmlExpr(str, obj);
break;
case T_CoerceToDomain:
_outCoerceToDomain(str, obj);
break;

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.196 2006/12/10 22:13:26 tgl Exp $
* $PostgreSQL: pgsql/src/backend/nodes/readfuncs.c,v 1.197 2006/12/21 16:05:13 petere Exp $
*
* NOTES
* Path and Plan nodes do not have any readfuncs support, because we
@@ -764,6 +764,22 @@ _readBooleanTest(void)
READ_DONE();
}
/*
* _readXmlExpr
*/
static XmlExpr *
_readXmlExpr(void)
{
READ_LOCALS(XmlExpr);
READ_ENUM_FIELD(op, XmlExprOp);
READ_STRING_FIELD(name);
READ_NODE_FIELD(named_args);
READ_NODE_FIELD(args);
READ_DONE();
}
/*
* _readCoerceToDomain
*/
@@ -1014,6 +1030,8 @@ parseNodeString(void)
return_value = _readNullTest();
else if (MATCH("BOOLEANTEST", 11))
return_value = _readBooleanTest();
else if (MATCH("XMLEXPR", 7))
return_value = _readXmlExpr();
else if (MATCH("COERCETODOMAIN", 14))
return_value = _readCoerceToDomain();
else if (MATCH("COERCETODOMAINVALUE", 19))

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.223 2006/10/25 22:11:32 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.224 2006/12/21 16:05:13 petere Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
@@ -559,6 +559,8 @@ expression_returns_set_walker(Node *node, void *context)
return false;
if (IsA(node, NullIfExpr))
return false;
if (IsA(node, XmlExpr))
return false;
return expression_tree_walker(node, expression_returns_set_walker,
context);
@@ -876,6 +878,8 @@ contain_nonstrict_functions_walker(Node *node, void *context)
return true;
if (IsA(node, BooleanTest))
return true;
if (IsA(node, XmlExpr))
return true;
return expression_tree_walker(node, contain_nonstrict_functions_walker,
context);
}
@@ -3334,6 +3338,16 @@ expression_tree_walker(Node *node,
return walker(((NullTest *) node)->arg, context);
case T_BooleanTest:
return walker(((BooleanTest *) node)->arg, context);
case T_XmlExpr:
{
XmlExpr *xexpr = (XmlExpr *) node;
if (walker(xexpr->named_args, context))
return true;
if (walker(xexpr->args, context))
return true;
}
break;
case T_CoerceToDomain:
return walker(((CoerceToDomain *) node)->arg, context);
case T_TargetEntry:
@@ -3857,6 +3871,17 @@ expression_tree_mutator(Node *node,
return (Node *) newnode;
}
break;
case T_XmlExpr:
{
XmlExpr *xexpr = (XmlExpr *) node;
XmlExpr *newnode;
FLATCOPY(newnode, xexpr, XmlExpr);
MUTATE(newnode->named_args, xexpr->named_args, List *);
MUTATE(newnode->args, xexpr->args, List *);
return (Node *) newnode;
}
break;
case T_NullIfExpr:
{
NullIfExpr *expr = (NullIfExpr *) node;

View File

@@ -11,7 +11,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.568 2006/11/05 22:42:09 tgl Exp $
* $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.569 2006/12/21 16:05:14 petere Exp $
*
* HISTORY
* AUTHOR DATE MAJOR EVENT
@@ -106,6 +106,7 @@ static void insertSelectOptions(SelectStmt *stmt,
static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg);
static Node *doNegate(Node *n, int location);
static void doNegateFloat(Value *v);
static Node *makeXmlExpr(XmlExprOp op, char *name, List *named_args, List *args);
%}
@@ -345,6 +346,11 @@ static void doNegateFloat(Value *v);
%type <str> OptTableSpace OptConsTableSpace OptTableSpaceOwner
%type <list> opt_check_option
%type <target> xml_attribute_el
%type <list> xml_attribute_list xml_attributes
%type <node> xml_root_version
%type <ival> opt_xml_root_standalone document_or_content xml_whitespace_option
/*
* If you make any token changes, update the keyword table in
@@ -365,13 +371,13 @@ static void doNegateFloat(Value *v);
CHARACTER CHARACTERISTICS CHECK CHECKPOINT CLASS CLOSE
CLUSTER COALESCE COLLATE COLUMN COMMENT COMMIT
COMMITTED CONCURRENTLY CONNECTION CONSTRAINT CONSTRAINTS
CONVERSION_P CONVERT COPY CREATE CREATEDB
CONTENT CONVERSION_P CONVERT COPY CREATE CREATEDB
CREATEROLE CREATEUSER CROSS CSV CURRENT_DATE CURRENT_ROLE CURRENT_TIME
CURRENT_TIMESTAMP CURRENT_USER CURSOR CYCLE
DATABASE DAY_P DEALLOCATE DEC DECIMAL_P DECLARE DEFAULT DEFAULTS
DEFERRABLE DEFERRED DEFINER DELETE_P DELIMITER DELIMITERS
DESC DISABLE_P DISTINCT DO DOMAIN_P DOUBLE_P DROP
DESC DISABLE_P DISTINCT DO DOCUMENT DOMAIN_P DOUBLE_P DROP
EACH ELSE ENABLE_P ENCODING ENCRYPTED END_P ESCAPE EXCEPT EXCLUDING
EXCLUSIVE EXECUTE EXISTS EXPLAIN EXTERNAL EXTRACT
@@ -398,7 +404,7 @@ static void doNegateFloat(Value *v);
MATCH MAXVALUE MINUTE_P MINVALUE MODE MONTH_P MOVE
NAMES NATIONAL NATURAL NCHAR NEW NEXT NO NOCREATEDB
NAME NAMES NATIONAL NATURAL NCHAR NEW NEXT NO NOCREATEDB
NOCREATEROLE NOCREATEUSER NOINHERIT NOLOGIN_P NONE NOSUPERUSER
NOT NOTHING NOTIFY NOTNULL NOWAIT NULL_P NULLIF NUMERIC
@@ -417,8 +423,8 @@ static void doNegateFloat(Value *v);
SAVEPOINT SCHEMA SCROLL SECOND_P SECURITY SELECT SEQUENCE
SERIALIZABLE SESSION SESSION_USER SET SETOF SHARE
SHOW SIMILAR SIMPLE SMALLINT SOME STABLE START STATEMENT
STATISTICS STDIN STDOUT STORAGE STRICT_P SUBSTRING SUPERUSER_P SYMMETRIC
SHOW SIMILAR SIMPLE SMALLINT SOME STABLE STANDALONE START STATEMENT
STATISTICS STDIN STDOUT STORAGE STRICT_P STRIP SUBSTRING SUPERUSER_P SYMMETRIC
SYSID SYSTEM_P
TABLE TABLESPACE TEMP TEMPLATE TEMPORARY THEN TIME TIMESTAMP
@@ -428,12 +434,15 @@ static void doNegateFloat(Value *v);
UNCOMMITTED UNENCRYPTED UNION UNIQUE UNKNOWN UNLISTEN UNTIL
UPDATE USER USING
VACUUM VALID VALIDATOR VALUES VARCHAR VARYING
VERBOSE VIEW VOLATILE
VACUUM VALID VALIDATOR VALUE VALUES VARCHAR VARYING
VERBOSE VERSION VIEW VOLATILE
WHEN WHERE WITH WITHOUT WORK WRITE
WHEN WHERE WHITESPACE WITH WITHOUT WORK WRITE
YEAR_P
XMLATTRIBUTES XMLCONCAT XMLELEMENT XMLFOREST XMLPARSE
XMLPI XMLROOT XMLSERIALIZE
YEAR_P YES
ZONE
@@ -484,6 +493,7 @@ static void doNegateFloat(Value *v);
* left-associativity among the JOIN rules themselves.
*/
%left JOIN CROSS LEFT FULL RIGHT INNER_P NATURAL
%right PRESERVE STRIP
%%
/*
@@ -7868,6 +7878,146 @@ func_expr: func_name '(' ')'
v->op = IS_LEAST;
$$ = (Node *)v;
}
| XMLCONCAT '(' expr_list ')'
{
$$ = makeXmlExpr(IS_XMLCONCAT, NULL, NULL, $3);
}
| XMLELEMENT '(' NAME ColLabel ')'
{
$$ = makeXmlExpr(IS_XMLELEMENT, $4, NULL, NULL);
}
| XMLELEMENT '(' NAME ColLabel ',' xml_attributes ')'
{
$$ = makeXmlExpr(IS_XMLELEMENT, $4, $6, NULL);
}
| XMLELEMENT '(' NAME ColLabel ',' expr_list ')'
{
$$ = makeXmlExpr(IS_XMLELEMENT, $4, NULL, $6);
}
| XMLELEMENT '(' NAME ColLabel ',' xml_attributes ',' expr_list ')'
{
$$ = makeXmlExpr(IS_XMLELEMENT, $4, $6, $8);
}
| XMLFOREST '(' xml_attribute_list ')'
{
$$ = makeXmlExpr(IS_XMLFOREST, NULL, $3, NULL);
}
| XMLPARSE '(' document_or_content a_expr xml_whitespace_option ')'
{
FuncCall *n = makeNode(FuncCall);
n->funcname = SystemFuncName("xmlparse");
n->args = list_make3(makeBoolAConst($3 == DOCUMENT), $4, makeBoolAConst($5 == PRESERVE));
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->location = @1;
$$ = (Node *)n;
}
| XMLPI '(' NAME ColLabel ')'
{
FuncCall *n = makeNode(FuncCall);
n->funcname = SystemFuncName("xmlpi");
n->args = list_make1(makeStringConst($4, NULL));
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->location = @1;
$$ = (Node *)n;
}
| XMLPI '(' NAME ColLabel ',' a_expr ')'
{
FuncCall *n = makeNode(FuncCall);
n->funcname = SystemFuncName("xmlpi");
n->args = list_make2(makeStringConst($4, NULL), $6);
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->location = @1;
$$ = (Node *)n;
}
| XMLROOT '(' a_expr ',' xml_root_version opt_xml_root_standalone ')'
{
FuncCall *n = makeNode(FuncCall);
Node *ver;
A_Const *sa;
if ($5)
ver = $5;
else
{
A_Const *val;
val = makeNode(A_Const);
val->val.type = T_Null;
ver = (Node *) val;
}
if ($6)
sa = makeBoolAConst($6 == 1);
else
{
sa = makeNode(A_Const);
sa->val.type = T_Null;
}
n->funcname = SystemFuncName("xmlroot");
n->args = list_make3($3, ver, sa);
n->agg_star = FALSE;
n->agg_distinct = FALSE;
n->location = @1;
$$ = (Node *)n;
}
| XMLSERIALIZE '(' document_or_content a_expr AS Typename ')'
{
/*
* FIXME: This should be made distinguishable from
* CAST (for reverse compilation at least).
*/
$$ = makeTypeCast($4, $6);
}
;
/*
* SQL/XML support
*/
xml_root_version: VERSION a_expr { $$ = $2; }
| VERSION NO VALUE { $$ = NULL; }
;
opt_xml_root_standalone: ',' STANDALONE YES { $$ = 1; }
| ',' STANDALONE NO { $$ = -1; }
| ',' STANDALONE NO VALUE { $$ = 0; }
| /*EMPTY*/ { $$ = 0; }
;
xml_attributes: XMLATTRIBUTES '(' xml_attribute_list ')' { $$ = $3; }
;
xml_attribute_list: xml_attribute_el { $$ = list_make1($1); }
| xml_attribute_list ',' xml_attribute_el { $$ = lappend($1, $3); }
;
xml_attribute_el: a_expr AS ColLabel
{
$$ = makeNode(ResTarget);
$$->name = $3;
$$->indirection = NULL;
$$->val = (Node *) $1;
}
| a_expr
{
$$ = makeNode(ResTarget);
$$->name = NULL;
$$->indirection = NULL;
$$->val = (Node *) $1;
}
;
document_or_content: DOCUMENT { $$ = DOCUMENT; }
| CONTENT { $$ = CONTENT; }
;
xml_whitespace_option: PRESERVE WHITESPACE { $$ = PRESERVE; }
| STRIP WHITESPACE { $$ = STRIP; }
| /*EMPTY*/ { $$ = STRIP; }
;
/*
@@ -8562,6 +8712,7 @@ unreserved_keyword:
| CONCURRENTLY
| CONNECTION
| CONSTRAINTS
| CONTENT
| CONVERSION_P
| COPY
| CREATEDB
@@ -8581,6 +8732,7 @@ unreserved_keyword:
| DELIMITER
| DELIMITERS
| DISABLE_P
| DOCUMENT
| DOMAIN_P
| DOUBLE_P
| DROP
@@ -8640,6 +8792,7 @@ unreserved_keyword:
| MODE
| MONTH_P
| MOVE
| NAME
| NAMES
| NEXT
| NO
@@ -8700,12 +8853,14 @@ unreserved_keyword:
| SHOW
| SIMPLE
| STABLE
| STANDALONE
| START
| STATEMENT
| STATISTICS
| STDIN
| STDOUT
| STORAGE
| STRIP
| SUPERUSER_P
| SYSID
| SYSTEM_P
@@ -8729,13 +8884,17 @@ unreserved_keyword:
| VALID
| VALIDATOR
| VARYING
| VERSION
| VIEW
| VALUE
| VOLATILE
| WHITESPACE
| WITH
| WITHOUT
| WORK
| WRITE
| YEAR_P
| YES
| ZONE
;
@@ -8788,6 +8947,14 @@ col_name_keyword:
| TRIM
| VALUES
| VARCHAR
| XMLATTRIBUTES
| XMLELEMENT
| XMLCONCAT
| XMLFOREST
| XMLPARSE
| XMLPI
| XMLROOT
| XMLSERIALIZE
;
/* Function identifier --- keywords that can be function names.
@@ -9322,6 +9489,17 @@ doNegateFloat(Value *v)
}
}
static Node *
makeXmlExpr(XmlExprOp op, char *name, List *named_args, List *args)
{
XmlExpr *x = makeNode(XmlExpr);
x->op = op;
x->name = name;
x->named_args = named_args;
x->args = args;
return (Node *) x;
}
/*
* Must undefine base_yylex before including scan.c, since we want it
* to create the function base_yylex not filtered_base_yylex.

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.177 2006/10/07 21:51:02 petere Exp $
* $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.178 2006/12/21 16:05:14 petere Exp $
*
*-------------------------------------------------------------------------
*/
@@ -89,6 +89,7 @@ static const ScanKeyword ScanKeywords[] = {
{"connection", CONNECTION},
{"constraint", CONSTRAINT},
{"constraints", CONSTRAINTS},
{"content", CONTENT},
{"conversion", CONVERSION_P},
{"convert", CONVERT},
{"copy", COPY},
@@ -123,6 +124,7 @@ static const ScanKeyword ScanKeywords[] = {
{"disable", DISABLE_P},
{"distinct", DISTINCT},
{"do", DO},
{"document", DOCUMENT},
{"domain", DOMAIN_P},
{"double", DOUBLE_P},
{"drop", DROP},
@@ -218,6 +220,7 @@ static const ScanKeyword ScanKeywords[] = {
{"mode", MODE},
{"month", MONTH_P},
{"move", MOVE},
{"name", NAME},
{"names", NAMES},
{"national", NATIONAL},
{"natural", NATURAL},
@@ -314,6 +317,7 @@ static const ScanKeyword ScanKeywords[] = {
{"smallint", SMALLINT},
{"some", SOME},
{"stable", STABLE},
{"standalone", STANDALONE},
{"start", START},
{"statement", STATEMENT},
{"statistics", STATISTICS},
@@ -321,6 +325,7 @@ static const ScanKeyword ScanKeywords[] = {
{"stdout", STDOUT},
{"storage", STORAGE},
{"strict", STRICT_P},
{"strip", STRIP},
{"substring", SUBSTRING},
{"superuser", SUPERUSER_P},
{"symmetric", SYMMETRIC},
@@ -357,19 +362,31 @@ static const ScanKeyword ScanKeywords[] = {
{"vacuum", VACUUM},
{"valid", VALID},
{"validator", VALIDATOR},
{"value", VALUE},
{"values", VALUES},
{"varchar", VARCHAR},
{"varying", VARYING},
{"verbose", VERBOSE},
{"version", VERSION},
{"view", VIEW},
{"volatile", VOLATILE},
{"when", WHEN},
{"where", WHERE},
{"whitespace", WHITESPACE},
{"with", WITH},
{"without", WITHOUT},
{"work", WORK},
{"write", WRITE},
{"xmlattributes", XMLATTRIBUTES},
{"xmlconcat", XMLCONCAT},
{"xmlelement", XMLELEMENT},
{"xmlforest", XMLFOREST},
{"xmlparse", XMLPARSE},
{"xmlpi", XMLPI},
{"xmlroot", XMLROOT},
{"xmlserialize", XMLSERIALIZE},
{"year", YEAR_P},
{"yes", YES},
{"zone", ZONE},
};

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/parse_coerce.c,v 2.147 2006/12/10 22:13:26 tgl Exp $
* $PostgreSQL: pgsql/src/backend/parser/parse_coerce.c,v 2.148 2006/12/21 16:05:14 petere Exp $
*
*-------------------------------------------------------------------------
*/
@@ -919,6 +919,46 @@ coerce_to_bigint(ParseState *pstate, Node *node,
return node;
}
/*
* coerce_to_xml()
* Coerce an argument of a construct that requires xml input.
* Also check that input is not a set.
*
* Returns the possibly-transformed node tree.
*
* As with coerce_type, pstate may be NULL if no special unknown-Param
* processing is wanted.
*/
Node *
coerce_to_xml(ParseState *pstate, Node *node,
const char *constructName)
{
Oid inputTypeId = exprType(node);
if (inputTypeId != XMLOID)
{
node = coerce_to_target_type(pstate, node, inputTypeId,
XMLOID, -1,
COERCION_ASSIGNMENT,
COERCE_IMPLICIT_CAST);
if (node == NULL)
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
/* translator: first %s is name of a SQL construct, eg LIMIT */
errmsg("argument of %s must be type xml, not type %s",
constructName, format_type_be(inputTypeId))));
}
if (expression_returns_set(node))
ereport(ERROR,
(errcode(ERRCODE_DATATYPE_MISMATCH),
/* translator: %s is name of a SQL construct, eg LIMIT */
errmsg("argument of %s must not return a set",
constructName)));
return node;
}
/* select_common_type()
* Determine the common supertype of a list of input expression types.

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/parse_expr.c,v 1.199 2006/12/10 22:13:26 tgl Exp $
* $PostgreSQL: pgsql/src/backend/parser/parse_expr.c,v 1.200 2006/12/21 16:05:14 petere Exp $
*
*-------------------------------------------------------------------------
*/
@@ -33,6 +33,7 @@
#include "parser/parse_type.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/xml.h"
bool Transform_null_equals = false;
@@ -55,6 +56,7 @@ static Node *transformArrayExpr(ParseState *pstate, ArrayExpr *a);
static Node *transformRowExpr(ParseState *pstate, RowExpr *r);
static Node *transformCoalesceExpr(ParseState *pstate, CoalesceExpr *c);
static Node *transformMinMaxExpr(ParseState *pstate, MinMaxExpr *m);
static Node *transformXmlExpr(ParseState *pstate, XmlExpr *x);
static Node *transformBooleanTest(ParseState *pstate, BooleanTest *b);
static Node *transformColumnRef(ParseState *pstate, ColumnRef *cref);
static Node *transformWholeRowRef(ParseState *pstate, char *schemaname,
@@ -232,6 +234,10 @@ transformExpr(ParseState *pstate, Node *expr)
result = transformBooleanTest(pstate, (BooleanTest *) expr);
break;
case T_XmlExpr:
result = transformXmlExpr(pstate, (XmlExpr *) expr);
break;
/*********************************************
* Quietly accept node types that may be presented when we are
* called on an already-transformed tree.
@@ -1409,6 +1415,56 @@ transformBooleanTest(ParseState *pstate, BooleanTest *b)
return (Node *) b;
}
static Node *
transformXmlExpr(ParseState *pstate, XmlExpr *x)
{
ListCell *lc;
XmlExpr *newx = makeNode(XmlExpr);
newx->op = x->op;
if (x->name)
newx->name = map_sql_identifier_to_xml_name(x->name, false);
else
newx->name = NULL;
foreach(lc, x->named_args)
{
ResTarget *r = (ResTarget *) lfirst(lc);
Node *expr = transformExpr(pstate, r->val);
char *argname = NULL;
if (r->name)
argname = map_sql_identifier_to_xml_name(r->name, false);
else if (IsA(r->val, ColumnRef))
argname = map_sql_identifier_to_xml_name(FigureColname(r->val), true);
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
x->op == IS_XMLELEMENT
? errmsg("unnamed attribute value must be a column reference")
: errmsg("unnamed element value must be a column reference")));
newx->named_args = lappend(newx->named_args,
makeTargetEntry((Expr *) expr, 0, argname, false));
}
foreach(lc, x->args)
{
Node *e = (Node *) lfirst(lc);
Node *newe;
newe = coerce_to_xml(pstate, transformExpr(pstate, e),
(x->op == IS_XMLCONCAT
? "XMLCONCAT"
: (x->op == IS_XMLELEMENT
? "XMLELEMENT"
: "XMLFOREST")));
newx->args = lappend(newx->args, newe);
}
return (Node *) newx;
}
/*
* Construct a whole-row reference to represent the notation "relation.*".
*
@@ -1668,6 +1724,9 @@ exprType(Node *expr)
case T_BooleanTest:
type = BOOLOID;
break;
case T_XmlExpr:
type = XMLOID;
break;
case T_CoerceToDomain:
type = ((CoerceToDomain *) expr)->resulttype;
break;

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/parse_target.c,v 1.149 2006/10/04 00:29:56 momjian Exp $
* $PostgreSQL: pgsql/src/backend/parser/parse_target.c,v 1.150 2006/12/21 16:05:14 petere Exp $
*
*-------------------------------------------------------------------------
*/
@@ -1315,6 +1315,21 @@ FigureColnameInternal(Node *node, char **name)
return 2;
}
break;
case T_XmlExpr:
/* make SQL/XML functions act like a regular function */
switch (((XmlExpr*) node)->op)
{
case IS_XMLCONCAT:
*name = "xmlconcat";
return 2;
case IS_XMLELEMENT:
*name = "xmlelement";
return 2;
case IS_XMLFOREST:
*name = "xmlforest";
return 2;
}
break;
default:
break;
}

View File

@@ -1,7 +1,7 @@
#
# Makefile for utils/adt
#
# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.60 2006/04/05 22:11:55 tgl Exp $
# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.61 2006/12/21 16:05:15 petere Exp $
#
subdir = src/backend/utils/adt
@@ -25,7 +25,7 @@ OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o bool.o \
tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \
network.o mac.o inet_net_ntop.o inet_net_pton.o \
ri_triggers.o pg_lzcompress.o pg_locale.o formatting.o \
ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o
ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o xml.o
like.o: like.c like_match.c

View File

@@ -2,7 +2,7 @@
* ruleutils.c - Functions to convert stored expressions/querytrees
* back to source text
*
* $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.235 2006/11/10 22:59:29 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/ruleutils.c,v 1.236 2006/12/21 16:05:15 petere Exp $
**********************************************************************/
#include "postgres.h"
@@ -2988,6 +2988,7 @@ isSimpleNode(Node *node, Node *parentNode, int prettyFlags)
case T_CoalesceExpr:
case T_MinMaxExpr:
case T_NullIfExpr:
case T_XmlExpr:
case T_Aggref:
case T_FuncExpr:
/* function-like: name(..) or name[..] */
@@ -3096,6 +3097,7 @@ isSimpleNode(Node *node, Node *parentNode, int prettyFlags)
case T_CoalesceExpr: /* own parentheses */
case T_MinMaxExpr: /* own parentheses */
case T_NullIfExpr: /* other separators */
case T_XmlExpr: /* own parentheses */
case T_Aggref: /* own parentheses */
case T_CaseExpr: /* other separators */
return true;
@@ -3144,6 +3146,7 @@ isSimpleNode(Node *node, Node *parentNode, int prettyFlags)
case T_CoalesceExpr: /* own parentheses */
case T_MinMaxExpr: /* own parentheses */
case T_NullIfExpr: /* other separators */
case T_XmlExpr: /* own parentheses */
case T_Aggref: /* own parentheses */
case T_CaseExpr: /* other separators */
return true;
@@ -3845,6 +3848,28 @@ get_rule_expr(Node *node, deparse_context *context,
}
break;
case T_XmlExpr:
{
XmlExpr *xexpr = (XmlExpr *) node;
switch (xexpr->op)
{
case IS_XMLCONCAT:
appendStringInfo(buf, "XMLCONCAT(");
break;
case IS_XMLELEMENT:
appendStringInfo(buf, "XMLELEMENT(");
break;
case IS_XMLFOREST:
appendStringInfo(buf, "XMLFOREST(");
break;
}
get_rule_expr((Node *) xexpr->named_args, context, true);
get_rule_expr((Node *) xexpr->args, context, true);
appendStringInfoChar(buf, ')');
}
break;
case T_CoerceToDomain:
{
CoerceToDomain *ctest = (CoerceToDomain *) node;

942
src/backend/utils/adt/xml.c Normal file
View File

@@ -0,0 +1,942 @@
/*-------------------------------------------------------------------------
*
* xml.c
* XML data type support.
*
*
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.1 2006/12/21 16:05:15 petere Exp $
*
*-------------------------------------------------------------------------
*/
/*
* Generally, XML type support is only available when libxml use was
* configured during the build. But even if that is not done, the
* type and all the functions are available, but most of them will
* fail. For one thing, this avoids having to manage variant catalog
* installations. But it also has nice effects such as that you can
* dump a database containing XML type data even if the server is not
* linked with libxml.
*/
#include "postgres.h"
#ifdef USE_LIBXML
#include <libxml/chvalid.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#include <libxml/uri.h>
#include <libxml/xmlerror.h>
#endif /* USE_LIBXML */
#include "fmgr.h"
#include "mb/pg_wchar.h"
#include "nodes/execnodes.h"
#include "utils/builtins.h"
#include "utils/xml.h"
#ifdef USE_LIBXML
/*
* A couple of useful macros (similar to ones from libxml/parse.c)
*/
#define CMP4( s, c1, c2, c3, c4 ) \
( ((unsigned char *) s)[ 0 ] == c1 && ((unsigned char *) s)[ 1 ] == c2 && \
((unsigned char *) s)[ 2 ] == c3 && ((unsigned char *) s)[ 3 ] == c4 )
#define CMP5( s, c1, c2, c3, c4, c5 ) \
( CMP4( s, c1, c2, c3, c4 ) && ((unsigned char *) s)[ 4 ] == c5 )
#define PG_XML_DEFAULT_URI "dummy.xml"
#define XML_ERRBUF_SIZE 200
static void xml_init(void);
static void *xml_palloc(size_t size);
static void *xml_repalloc(void *ptr, size_t size);
static void xml_pfree(void *ptr);
static char *xml_pstrdup(const char *string);
static void xml_ereport(int level, char *msg, void *ctxt);
static void xml_errorHandler(void *ctxt, const char *msg, ...);
static void xml_ereport_by_code(int level, char *msg, int errcode);
static xmlChar *xml_text2xmlChar(text *in);
static xmlDocPtr xml_parse(text *data, int opts, bool is_document);
/* Global variables */
/* taken from contrib/xml2 */
/* FIXME: DO NOT USE global vars !!! */
char *xml_errbuf; /* per line error buffer */
char *xml_errmsg = NULL; /* overall error message */
#endif /* USE_LIBXML */
#define NO_XML_SUPPORT() ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("no XML support in this installation")))
Datum
xml_in(PG_FUNCTION_ARGS)
{
#ifdef USE_LIBXML
char *s = PG_GETARG_CSTRING(0);
size_t len;
xmltype *vardata;
len = strlen(s);
vardata = palloc(len + VARHDRSZ);
VARATT_SIZEP(vardata) = len + VARHDRSZ;
memcpy(VARDATA(vardata), s, len);
/*
* Parse the data to check if it is well-formed XML data. Assume
* that ERROR occurred if parsing failed. Do we need DTD
* validation (if DTD exists)?
*/
xml_parse(vardata, XML_PARSE_DTDATTR | XML_PARSE_DTDVALID, false);
PG_RETURN_XML_P(vardata);
#else
NO_XML_SUPPORT();
return 0;
#endif
}
Datum
xml_out(PG_FUNCTION_ARGS)
{
xmltype *s = PG_GETARG_XML_P(0);
char *result;
int32 len;
len = VARSIZE(s) - VARHDRSZ;
result = palloc(len + 1);
memcpy(result, VARDATA(s), len);
result[len] = '\0';
PG_RETURN_CSTRING(result);
}
#ifdef USE_LIBXML
static void
appendStringInfoText(StringInfo str, const text *t)
{
appendBinaryStringInfo(str, VARDATA(t), VARSIZE(t) - VARHDRSZ);
}
static xmltype *
stringinfo_to_xmltype(StringInfo buf)
{
int32 len;
xmltype *result;
len = buf->len + VARHDRSZ;
result = palloc(len);
VARATT_SIZEP(result) = len;
memcpy(VARDATA(result), buf->data, buf->len);
return result;
}
#endif
Datum
xmlcomment(PG_FUNCTION_ARGS)
{
#ifdef USE_LIBXML
text *arg = PG_GETARG_TEXT_P(0);
int len = VARATT_SIZEP(arg) - VARHDRSZ;
StringInfoData buf;
int i;
/* check for "--" in string or "-" at the end */
for (i = 1; i < len; i++)
if ((VARDATA(arg)[i] == '-' && VARDATA(arg)[i - 1] == '-')
|| (VARDATA(arg)[i] == '-' && i == len - 1))
ereport(ERROR,
(errcode(ERRCODE_INVALID_XML_COMMENT),
errmsg("invalid XML comment")));
initStringInfo(&buf);
appendStringInfo(&buf, "<!--");
appendStringInfoText(&buf, arg);
appendStringInfo(&buf, "-->");
PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
#else
NO_XML_SUPPORT();
return 0;
#endif
}
Datum
xmlparse(PG_FUNCTION_ARGS)
{
#ifdef USE_LIBXML
text *data;
bool is_document;
bool preserve_whitespace;
data = PG_GETARG_TEXT_P(0);
if (PG_NARGS() >= 2)
is_document = PG_GETARG_BOOL(1);
else
is_document = false;
if (PG_NARGS() >= 3)
preserve_whitespace = PG_GETARG_BOOL(2);
else
/*
* Since the XMLPARSE grammar makes STRIP WHITESPACE the
* default, this argument should really default to false. But
* until we have actually implemented whitespace stripping,
* this would be annoying.
*/
preserve_whitespace = true;
if (!preserve_whitespace)
ereport(WARNING,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("XMLPARSE with STRIP WHITESPACE is not implemented")));
/*
* Note, that here we try to apply DTD defaults
* (XML_PARSE_DTDATTR) according to SQL/XML:10.16.7.d: 'Default
* valies defined by internal DTD are applied'. As for external
* DTDs, we try to support them too, (see SQL/XML:10.16.7.e)
*/
xml_parse(data, XML_PARSE_DTDATTR, is_document); /* assume that ERROR occurred if parsing failed */
PG_RETURN_XML_P(data);
#else
NO_XML_SUPPORT();
return 0;
#endif
}
Datum
xmlpi(PG_FUNCTION_ARGS)
{
#ifdef USE_LIBXML
char *target = NameStr(*PG_GETARG_NAME(0));
StringInfoData buf;
if (strlen(target) >= 3
&& (target[0] == 'x' || target[0] == 'X')
&& (target[1] == 'm' || target[1] == 'M')
&& (target[2] == 'l' || target[2] == 'L'))
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid XML processing instruction"),
errdetail("XML processing instruction target name cannot start with \"xml\".")));
}
initStringInfo(&buf);
appendStringInfo(&buf, "<?");
appendStringInfoString(&buf, map_sql_identifier_to_xml_name(target, false));
if (PG_NARGS() > 1)
{
text *arg = PG_GETARG_TEXT_P(1);
char *string;
string = DatumGetCString(DirectFunctionCall1(textout, PointerGetDatum(arg)));
if (strstr(string, "?>"))
ereport(ERROR,
(errcode(ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION),
errmsg("invalid XML processing instruction"),
errdetail("XML processing instruction cannot contain \"?>\".")));
appendStringInfoString(&buf, " ");
appendStringInfoString(&buf, string);
}
appendStringInfoString(&buf, "?>");
PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
#else
NO_XML_SUPPORT();
return 0;
#endif
}
Datum
xmlroot(PG_FUNCTION_ARGS)
{
#ifdef USE_LIBXML
xmltype *data;
text *version;
int standalone;
StringInfoData buf;
if (PG_ARGISNULL(0))
PG_RETURN_NULL();
else
data = PG_GETARG_XML_P(0);
if (PG_ARGISNULL(1))
version = NULL;
else
version = PG_GETARG_TEXT_P(1);
if (PG_ARGISNULL(2))
standalone = 0;
else
{
bool tmp = PG_GETARG_BOOL(2);
standalone = (tmp ? 1 : -1);
}
/*
* FIXME: This is probably supposed to be cleverer if there
* already is an XML preamble.
*/
initStringInfo(&buf);
appendStringInfo(&buf,"<?xml");
if (version) {
appendStringInfo(&buf, " version=\"");
appendStringInfoText(&buf, version);
appendStringInfo(&buf, "\"");
}
if (standalone)
appendStringInfo(&buf, " standalone=\"%s\"", (standalone == 1 ? "yes" : "no"));
appendStringInfo(&buf, "?>");
appendStringInfoText(&buf, (text *) data);
PG_RETURN_XML_P(stringinfo_to_xmltype(&buf));
#else
NO_XML_SUPPORT();
return 0;
#endif
}
/*
* Validate document (given as string) against DTD (given as external link)
* TODO !!! use text instead of cstring for second arg
* TODO allow passing DTD as a string value (not only as an URI)
* TODO redesign (see comment with '!!!' below)
*/
Datum
xmlvalidate(PG_FUNCTION_ARGS)
{
#ifdef USE_LIBXML
text *data = PG_GETARG_TEXT_P(0);
text *dtdOrUri = PG_GETARG_TEXT_P(1);
bool result = FALSE;
xmlParserCtxtPtr ctxt; /* the parser context */
xmlDocPtr doc; /* the resulting document tree */
xmlDtdPtr dtd;
xml_init();
ctxt = xmlNewParserCtxt();
if (ctxt == NULL)
xml_ereport(ERROR, "could not allocate parser context", ctxt);
doc = xmlCtxtReadMemory(ctxt, (char *) VARDATA(data),
VARSIZE(data) - VARHDRSZ, PG_XML_DEFAULT_URI, NULL, 0);
if (doc == NULL)
xml_ereport(ERROR, "could not parse XML data", ctxt);
#if 0
uri = xmlCreateURI();
ereport(NOTICE, (errcode(0),errmsg(" dtd - %s", dtdOrUri)));
dtd = palloc(sizeof(xmlDtdPtr));
uri = xmlParseURI(dtdOrUri);
if (uri == NULL)
xml_ereport(ERROR, "not implemented yet... (TODO)", ctxt);
else
#endif
dtd = xmlParseDTD(NULL, xml_text2xmlChar(dtdOrUri));
if (dtd == NULL)
{
#if 0
xmlFreeDoc(doc);
xmlFreeParserCtxt(ctxt);
#endif
xml_ereport(ERROR, "could not load DTD", ctxt);
}
if (xmlValidateDtd(xmlNewValidCtxt(), doc, dtd) == 1)
result = TRUE;
#if 0
xmlFreeURI(uri);
xmlFreeDtd(dtd);
xmlFreeDoc(doc);
xmlFreeParserCtxt(ctxt);
xmlCleanupParser();
#endif
if (!result)
xml_ereport(NOTICE, "validation against DTD failed", ctxt);
PG_RETURN_BOOL(result);
#else /* not USE_LIBXML */
NO_XML_SUPPORT();
return 0;
#endif /* not USE_LIBXML */
}
#ifdef USE_LIBXML
/*
* Container for some init stuff (not good design!)
* TODO xmlChar is utf8-char, make proper tuning (initdb with enc!=utf8 and check)
*/
static void
xml_init(void)
{
/*
* Currently, we have no pure UTF-8 support for internals -- check
* if we can work.
*/
if (sizeof (char) != sizeof (xmlChar))
ereport(ERROR,
(errmsg("cannot initialize XML library"),
errdetail("libxml2 has incompatible char type: sizeof(char)=%u, sizeof(xmlChar)=%u.",
sizeof(char), sizeof(xmlChar))));
xmlMemSetup(xml_pfree, xml_palloc, xml_repalloc, xml_pstrdup);
xmlInitParser();
LIBXML_TEST_VERSION;
/* do not flood PG's logfile with libxml error messages - reset error handler*/
xmlSetGenericErrorFunc(NULL, xml_errorHandler);
xml_errmsg = NULL;
xml_errbuf = palloc(XML_ERRBUF_SIZE);
memset(xml_errbuf, 0, XML_ERRBUF_SIZE);
}
/*
* Convert a C string to XML internal representation
* (same things as for TEXT, but with checking the data for well-formedness
* and, moreover, validation against DTD, if needed).
* NOTICE: We use TEXT type as internal storage type. In the future,
* we plan to create own storage type (maybe several types/strategies)
* TODO predefined DTDs / XSDs and validation
* TODO validation against XML Schema
* TODO maybe, libxml2's xmlreader is better? (do not construct DOM, yet do not use SAX - see xml_reader.c)
* TODO what about internal URI for docs? (see PG_XML_DEFAULT_URI below)
*/
static xmlDocPtr
xml_parse(text *data, int opts, bool is_document)
{
bool validationFailed = FALSE;
xmlParserCtxtPtr ctxt; /* the parser context */
xmlDocPtr doc; /* the resulting document tree */
int res_code;
int32 len;
xmlChar *string;
#ifdef XML_DEBUG_DTD_CONST
xmlDtdPtr dtd; /* pointer to DTD */
#endif
xml_init();
len = VARSIZE(data) - VARHDRSZ; /* will be useful later */
string = xml_text2xmlChar(data);
ctxt = xmlNewParserCtxt();
if (ctxt == NULL)
xml_ereport(ERROR, "could not allocate parser context", ctxt);
/* first, we try to parse the string as it is XML doc, then, as XML chunk */
ereport(DEBUG3, (errmsg("string to parse: %s", string)));
if (len > 4 && CMP5(string, '<', '?', 'x', 'm', 'l'))
{
/* consider it as DOCUMENT */
doc = xmlCtxtReadMemory(ctxt, string, len, PG_XML_DEFAULT_URI, NULL, opts);
if (doc == NULL)
{
xml_ereport(ERROR, "could not parse XML data", ctxt);
#if 0
xmlFreeParserCtxt(ctxt);
xmlCleanupParser();
ereport(ERROR, (errmsg("could not parse XML data")));
#endif
}
}
else
{
/* attempt to parse the string as if it is an XML fragment */
ereport(DEBUG3, (errmsg("the string is not an XML doc, trying to parse as a CHUNK")));
doc = xmlNewDoc(NULL);
/* TODO resolve: xmlParseBalancedChunkMemory assumes that string is UTF8 encoded! */
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, string, NULL);
if (res_code != 0)
{
xmlFreeParserCtxt(ctxt);
xmlCleanupParser();
xml_ereport_by_code(ERROR, "could not parse XML data", res_code);
}
}
#ifdef XML_DEBUG_DTD_CONST
dtd = xmlParseDTD(NULL, (xmlChar *) XML_DEBUG_DTD_CONST);
xml_ereport(DEBUG3, "solid path to DTD was defined for debugging purposes", ctxt);
if (dtd == NULL)
{
xml_ereport(ERROR, "could not parse DTD data", ctxt);
}
else
#else
/* if dtd for our xml data is detected... */
if ((doc->intSubset != NULL) || (doc->extSubset != NULL))
#endif
{
/* assume that inline DTD exists - validation should be performed */
#ifdef XML_DEBUG_DTD_CONST
if (xmlValidateDtd(xmlNewValidCtxt(), doc, dtd) != 1)
#else
if (ctxt->valid == 0)
#endif
{
/* DTD exists, but validator reported 'validation failed' */
validationFailed = TRUE;
}
}
if (validationFailed)
xml_ereport(WARNING, "validation against DTD failed", ctxt);
/* TODO encoding issues
* (thoughts:
* CASE:
* - XML data has explicit encoding attribute in its prolog
* - if not, assume that enc. of XML data is the same as client's one
*
* The common rule is to accept the XML data only if its encoding
* is the same as encoding of the storage (server's). The other possible
* option is to accept all the docs, but DO TRANSFORMATION and, if needed,
* change the prolog.
*
* I think I'd stick the first way (for the 1st version),
* it's much simplier (less errors...)
* ) */
/* ... */
xmlFreeParserCtxt(ctxt);
xmlCleanupParser();
ereport(DEBUG3, (errmsg("XML data successfully parsed, encoding: %s",
(char *) doc->encoding)));
return doc;
}
/*
* xmlChar<->text convertions
*/
static xmlChar *
xml_text2xmlChar(text *in)
{
int32 len = VARSIZE(in) - VARHDRSZ;
xmlChar *res;
res = palloc(len + 1);
memcpy(res, VARDATA(in), len);
res[len] = '\0';
return(res);
}
/*
* Wrappers for memory management functions
*/
static void *
xml_palloc(size_t size)
{
return palloc(size);
}
static void *
xml_repalloc(void *ptr, size_t size)
{
return repalloc(ptr, size);
}
static void
xml_pfree(void *ptr)
{
pfree(ptr);
}
static char *
xml_pstrdup(const char *string)
{
return pstrdup(string);
}
/*
* Wrapper for "ereport" function.
* Adds detail - libxml's native error message, if any.
*/
static void
xml_ereport(int level, char *msg, void *ctxt)
{
char *xmlErrDetail;
int xmlErrLen, i;
xmlErrorPtr libxmlErr = NULL;
if (xml_errmsg != NULL)
{
ereport(DEBUG1, (errmsg("%s", xml_errmsg)));
pfree(xml_errmsg);
}
if (ctxt != NULL)
libxmlErr = xmlCtxtGetLastError(ctxt);
if (libxmlErr == NULL)
{
if (level == ERROR)
{
xmlFreeParserCtxt(ctxt);
xmlCleanupParser();
}
ereport(level, (errmsg(msg)));
}
else
{
/* as usual, libxml error message contains '\n'; get rid of it */
xmlErrLen = strlen(libxmlErr->message); /* - 1; */
xmlErrDetail = (char *) palloc(xmlErrLen);
for (i = 0; i < xmlErrLen; i++)
{
if (libxmlErr->message[i] == '\n')
xmlErrDetail[i] = '.';
else
xmlErrDetail[i] = libxmlErr->message[i];
}
if (level == ERROR)
{
xmlFreeParserCtxt(ctxt);
xmlCleanupParser();
}
ereport(level, (errmsg(msg), errdetail("%s", xmlErrDetail)));
}
}
/*
* Error handler for libxml error messages
*/
static void
xml_errorHandler(void *ctxt, const char *msg,...)
{
va_list args;
va_start(args, msg);
vsnprintf(xml_errbuf, XML_ERRBUF_SIZE, msg, args);
va_end(args);
/* Now copy the argument across */
if (xml_errmsg == NULL)
xml_errmsg = pstrdup(xml_errbuf);
else
{
int32 xsize = strlen(xml_errmsg);
xml_errmsg = repalloc(xml_errmsg, (size_t) (xsize + strlen(xml_errbuf) + 1));
strncpy(&xml_errmsg[xsize - 1], xml_errbuf, strlen(xml_errbuf));
xml_errmsg[xsize + strlen(xml_errbuf) - 1] = '\0';
}
memset(xml_errbuf, 0, XML_ERRBUF_SIZE);
}
/*
* Return error message by libxml error code
* TODO make them closer to recommendations from Postgres manual
*/
static void
xml_ereport_by_code(int level, char *msg, int code)
{
const char *det;
if (code < 0)
{
ereport(level, (errmsg(msg)));
return;
}
switch (code) {
case XML_ERR_INTERNAL_ERROR:
det = "libxml internal error";
break;
case XML_ERR_ENTITY_LOOP:
det = "Detected an entity reference loop";
break;
case XML_ERR_ENTITY_NOT_STARTED:
det = "EntityValue: \" or ' expected";
break;
case XML_ERR_ENTITY_NOT_FINISHED:
det = "EntityValue: \" or ' expected";
break;
case XML_ERR_ATTRIBUTE_NOT_STARTED:
det = "AttValue: \" or ' expected";
break;
case XML_ERR_LT_IN_ATTRIBUTE:
det = "Unescaped '<' not allowed in attributes values";
break;
case XML_ERR_LITERAL_NOT_STARTED:
det = "SystemLiteral \" or ' expected";
break;
case XML_ERR_LITERAL_NOT_FINISHED:
det = "Unfinished System or Public ID \" or ' expected";
break;
case XML_ERR_MISPLACED_CDATA_END:
det = "Sequence ']]>' not allowed in content";
break;
case XML_ERR_URI_REQUIRED:
det = "SYSTEM or PUBLIC, the URI is missing";
break;
case XML_ERR_PUBID_REQUIRED:
det = "PUBLIC, the Public Identifier is missing";
break;
case XML_ERR_HYPHEN_IN_COMMENT:
det = "Comment must not contain '--' (double-hyphen)";
break;
case XML_ERR_PI_NOT_STARTED:
det = "xmlParsePI : no target name";
break;
case XML_ERR_RESERVED_XML_NAME:
det = "Invalid PI name";
break;
case XML_ERR_NOTATION_NOT_STARTED:
det = "NOTATION: Name expected here";
break;
case XML_ERR_NOTATION_NOT_FINISHED:
det = "'>' required to close NOTATION declaration";
break;
case XML_ERR_VALUE_REQUIRED:
det = "Entity value required";
break;
case XML_ERR_URI_FRAGMENT:
det = "Fragment not allowed";
break;
case XML_ERR_ATTLIST_NOT_STARTED:
det = "'(' required to start ATTLIST enumeration";
break;
case XML_ERR_NMTOKEN_REQUIRED:
det = "NmToken expected in ATTLIST enumeration";
break;
case XML_ERR_ATTLIST_NOT_FINISHED:
det = "')' required to finish ATTLIST enumeration";
break;
case XML_ERR_MIXED_NOT_STARTED:
det = "MixedContentDecl : '|' or ')*' expected";
break;
case XML_ERR_PCDATA_REQUIRED:
det = "MixedContentDecl : '#PCDATA' expected";
break;
case XML_ERR_ELEMCONTENT_NOT_STARTED:
det = "ContentDecl : Name or '(' expected";
break;
case XML_ERR_ELEMCONTENT_NOT_FINISHED:
det = "ContentDecl : ',' '|' or ')' expected";
break;
case XML_ERR_PEREF_IN_INT_SUBSET:
det = "PEReference: forbidden within markup decl in internal subset";
break;
case XML_ERR_GT_REQUIRED:
det = "Expected '>'";
break;
case XML_ERR_CONDSEC_INVALID:
det = "XML conditional section '[' expected";
break;
case XML_ERR_EXT_SUBSET_NOT_FINISHED:
det = "Content error in the external subset";
break;
case XML_ERR_CONDSEC_INVALID_KEYWORD:
det = "conditional section INCLUDE or IGNORE keyword expected";
break;
case XML_ERR_CONDSEC_NOT_FINISHED:
det = "XML conditional section not closed";
break;
case XML_ERR_XMLDECL_NOT_STARTED:
det = "Text declaration '<?xml' required";
break;
case XML_ERR_XMLDECL_NOT_FINISHED:
det = "parsing XML declaration: '?>' expected";
break;
case XML_ERR_EXT_ENTITY_STANDALONE:
det = "external parsed entities cannot be standalone";
break;
case XML_ERR_ENTITYREF_SEMICOL_MISSING:
det = "EntityRef: expecting ';'";
break;
case XML_ERR_DOCTYPE_NOT_FINISHED:
det = "DOCTYPE improperly terminated";
break;
case XML_ERR_LTSLASH_REQUIRED:
det = "EndTag: '</' not found";
break;
case XML_ERR_EQUAL_REQUIRED:
det = "Expected '='";
break;
case XML_ERR_STRING_NOT_CLOSED:
det = "String not closed expecting \" or '";
break;
case XML_ERR_STRING_NOT_STARTED:
det = "String not started expecting ' or \"";
break;
case XML_ERR_ENCODING_NAME:
det = "Invalid XML encoding name";
break;
case XML_ERR_STANDALONE_VALUE:
det = "Standalone accepts only 'yes' or 'no'";
break;
case XML_ERR_DOCUMENT_EMPTY:
det = "Document is empty";
break;
case XML_ERR_DOCUMENT_END:
det = "Extra content at the end of the document";
break;
case XML_ERR_NOT_WELL_BALANCED:
det = "Chunk is not well balanced";
break;
case XML_ERR_EXTRA_CONTENT:
det = "Extra content at the end of well balanced chunk";
break;
case XML_ERR_VERSION_MISSING:
det = "Malformed declaration expecting version";
break;
/* more err codes... Please, keep the order! */
case XML_ERR_ATTRIBUTE_WITHOUT_VALUE: /* 41 */
det ="Attribute without value";
break;
case XML_ERR_ATTRIBUTE_REDEFINED:
det ="Attribute defined more than once in the same element";
break;
case XML_ERR_COMMENT_NOT_FINISHED: /* 45 */
det = "Comment is not finished";
break;
case XML_ERR_NAME_REQUIRED: /* 68 */
det = "Element name not found";
break;
case XML_ERR_TAG_NOT_FINISHED: /* 77 */
det = "Closing tag not found";
break;
default:
det = "Unregistered error (libxml error code: %d)";
ereport(DEBUG1, (errmsg("Check out \"libxml/xmlerror.h\" and bring errcode \"%d\" processing to \"xml.c\".", code)));
}
if (xml_errmsg != NULL)
{
ereport(DEBUG1, (errmsg("%s", xml_errmsg)));
pfree(xml_errmsg);
}
ereport(level, (errmsg(msg), errdetail(det, code)));
}
/*
* Convert one char in the current server encoding to a Unicode
* codepoint.
*/
static pg_wchar
sqlchar_to_unicode(unsigned char *s)
{
int save_enc;
pg_wchar ret;
char *utf8string = pg_do_encoding_conversion(s, pg_mblen(s), GetDatabaseEncoding(), PG_UTF8);
save_enc = GetDatabaseEncoding();
SetDatabaseEncoding(PG_UTF8);
pg_mb2wchar_with_len(utf8string, &ret, pg_mblen(s));
SetDatabaseEncoding(save_enc);
return ret;
}
static bool
is_valid_xml_namefirst(pg_wchar c)
{
/* (Letter | '_' | ':') */
return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
|| c == '_' || c == ':');
}
static bool
is_valid_xml_namechar(pg_wchar c)
{
/* Letter | Digit | '.' | '-' | '_' | ':' | CombiningChar | Extender */
return (xmlIsBaseCharQ(c) || xmlIsIdeographicQ(c)
|| xmlIsDigitQ(c)
|| c == '.' || c == '-' || c == '_' || c == ':'
|| xmlIsCombiningQ(c)
|| xmlIsExtenderQ(c));
}
#endif /* USE_LIBXML */
/*
* Map SQL identifier to XML name; see SQL/XML:2003 section 9.1.
*/
char *
map_sql_identifier_to_xml_name(unsigned char *ident, bool fully_escaped)
{
#ifdef USE_LIBXML
StringInfoData buf;
unsigned char *p;
initStringInfo(&buf);
for (p = ident; *p; p += pg_mblen(p))
{
if (*p == ':' && (p == ident || fully_escaped))
appendStringInfo(&buf, "_x003A_");
else if (*p == '_' && *(p+1) == 'x')
appendStringInfo(&buf, "_x005F_");
else if (fully_escaped && p == ident
&& ( *p == 'x' || *p == 'X')
&& ( *(p+1) == 'm' || *(p+1) == 'M')
&& ( *(p+2) == 'l' || *(p+2) == 'L'))
{
if (*p == 'x')
appendStringInfo(&buf, "_x0078_");
else
appendStringInfo(&buf, "_x0058_");
}
else
{
pg_wchar u = sqlchar_to_unicode(p);
if (!is_valid_xml_namechar(u)
|| (p == ident && !is_valid_xml_namefirst(u)))
appendStringInfo(&buf, "_x%04X_", (unsigned int) u);
else
appendBinaryStringInfo(&buf, p, pg_mblen(p));
}
}
return buf.data;
#else /* not USE_LIBXML */
NO_XML_SUPPORT();
return NULL;
#endif /* not USE_LIBXML */
}

View File

@@ -4,7 +4,7 @@
* (currently mule internal code (mic) is used)
* Tatsuo Ishii
*
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.59 2006/10/04 00:30:02 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.60 2006/12/21 16:05:15 petere Exp $
*/
#include "postgres.h"
@@ -599,7 +599,7 @@ void
SetDatabaseEncoding(int encoding)
{
if (!PG_VALID_BE_ENCODING(encoding))
elog(ERROR, "invalid database encoding");
elog(ERROR, "invalid database encoding: %d", encoding);
DatabaseEncoding = &pg_enc2name_tbl[encoding];
Assert(DatabaseEncoding->encoding == encoding);