1
0
mirror of https://github.com/postgres/postgres.git synced 2025-12-21 05:21:08 +03:00

Support [NO] INDENT option in XMLSERIALIZE().

This adds the ability to pretty-print XML documents ... according to
libxml's somewhat idiosyncratic notions of what's pretty, anyway.
One notable divergence from a strict reading of the spec is that
libxml is willing to collapse empty nodes "<node></node>" to just
"<node/>", whereas SQL and the underlying XML spec say that this
option should only result in whitespace tweaks.  Nonetheless,
it seems close enough to justify using the SQL-standard syntax.

Jim Jones, reviewed by Peter Smith and myself

Discussion: https://postgr.es/m/2f5df461-dad8-6d7d-4568-08e10608a69b@uni-muenster.de
This commit is contained in:
Tom Lane
2023-03-15 16:58:59 -04:00
parent 419a8dd814
commit 483bdb2afe
15 changed files with 775 additions and 22 deletions

View File

@@ -621,7 +621,7 @@ X061 XMLParse: character string input and DOCUMENT option YES
X065 XMLParse: binary string input and CONTENT option NO
X066 XMLParse: binary string input and DOCUMENT option NO
X068 XMLSerialize: BOM NO
X069 XMLSerialize: INDENT NO
X069 XMLSerialize: INDENT YES
X070 XMLSerialize: character string serialization and CONTENT option YES
X071 XMLSerialize: character string serialization and DOCUMENT option YES
X072 XMLSerialize: character string serialization YES

View File

@@ -3837,8 +3837,10 @@ ExecEvalXmlExpr(ExprState *state, ExprEvalStep *op)
return;
value = argvalue[0];
*op->resvalue = PointerGetDatum(xmltotext_with_xmloption(DatumGetXmlP(value),
xexpr->xmloption));
*op->resvalue =
PointerGetDatum(xmltotext_with_options(DatumGetXmlP(value),
xexpr->xmloption,
xexpr->indent));
*op->resnull = false;
}
break;

View File

@@ -613,7 +613,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
%type <node> xml_root_version opt_xml_root_standalone
%type <node> xmlexists_argument
%type <ival> document_or_content
%type <boolean> xml_whitespace_option
%type <boolean> xml_indent_option xml_whitespace_option
%type <list> xmltable_column_list xmltable_column_option_list
%type <node> xmltable_column_el
%type <defelt> xmltable_column_option_el
@@ -702,7 +702,7 @@ static Node *makeRecursiveViewSelect(char *relname, List *aliases, Node *query);
HANDLER HAVING HEADER_P HOLD HOUR_P
IDENTITY_P IF_P ILIKE IMMEDIATE IMMUTABLE IMPLICIT_P IMPORT_P IN_P INCLUDE
INCLUDING INCREMENT INDEX INDEXES INHERIT INHERITS INITIALLY INLINE_P
INCLUDING INCREMENT INDENT INDEX INDEXES INHERIT INHERITS INITIALLY INLINE_P
INNER_P INOUT INPUT_P INSENSITIVE INSERT INSTEAD INT_P INTEGER
INTERSECT INTERVAL INTO INVOKER IS ISNULL ISOLATION
@@ -15532,13 +15532,14 @@ func_expr_common_subexpr:
$$ = makeXmlExpr(IS_XMLROOT, NULL, NIL,
list_make3($3, $5, $6), @1);
}
| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename ')'
| XMLSERIALIZE '(' document_or_content a_expr AS SimpleTypename xml_indent_option ')'
{
XmlSerialize *n = makeNode(XmlSerialize);
n->xmloption = $3;
n->expr = $4;
n->typeName = $6;
n->indent = $7;
n->location = @1;
$$ = (Node *) n;
}
@@ -15592,6 +15593,11 @@ document_or_content: DOCUMENT_P { $$ = XMLOPTION_DOCUMENT; }
| CONTENT_P { $$ = XMLOPTION_CONTENT; }
;
xml_indent_option: INDENT { $$ = true; }
| NO INDENT { $$ = false; }
| /*EMPTY*/ { $$ = false; }
;
xml_whitespace_option: PRESERVE WHITESPACE_P { $$ = true; }
| STRIP_P WHITESPACE_P { $$ = false; }
| /*EMPTY*/ { $$ = false; }
@@ -16828,6 +16834,7 @@ unreserved_keyword:
| INCLUDE
| INCLUDING
| INCREMENT
| INDENT
| INDEX
| INDEXES
| INHERIT
@@ -17384,6 +17391,7 @@ bare_label_keyword:
| INCLUDE
| INCLUDING
| INCREMENT
| INDENT
| INDEX
| INDEXES
| INHERIT

View File

@@ -2331,6 +2331,7 @@ transformXmlSerialize(ParseState *pstate, XmlSerialize *xs)
typenameTypeIdAndMod(pstate, xs->typeName, &targetType, &targetTypmod);
xexpr->xmloption = xs->xmloption;
xexpr->indent = xs->indent;
xexpr->location = xs->location;
/* We actually only need these to be able to parse back the expression. */
xexpr->type = targetType;

View File

@@ -52,6 +52,7 @@
#include <libxml/tree.h>
#include <libxml/uri.h>
#include <libxml/xmlerror.h>
#include <libxml/xmlsave.h>
#include <libxml/xmlversion.h>
#include <libxml/xmlwriter.h>
#include <libxml/xpath.h>
@@ -146,6 +147,8 @@ static bool print_xml_decl(StringInfo buf, const xmlChar *version,
static bool xml_doctype_in_content(const xmlChar *str);
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
bool preserve_whitespace, int encoding,
XmlOptionType *parsed_xmloptiontype,
xmlNodePtr *parsed_nodes,
Node *escontext);
static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
@@ -273,7 +276,7 @@ xml_in(PG_FUNCTION_ARGS)
* Note: we don't need to worry about whether a soft error is detected.
*/
doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding(),
fcinfo->context);
NULL, NULL, fcinfo->context);
if (doc != NULL)
xmlFreeDoc(doc);
@@ -400,7 +403,7 @@ xml_recv(PG_FUNCTION_ARGS)
* Parse the data to check if it is well-formed XML data. Assume that
* xml_parse will throw ERROR if not.
*/
doc = xml_parse(result, xmloption, true, encoding, NULL);
doc = xml_parse(result, xmloption, true, encoding, NULL, NULL, NULL);
xmlFreeDoc(doc);
/* Now that we know what we're dealing with, convert to server encoding */
@@ -619,15 +622,182 @@ xmltotext(PG_FUNCTION_ARGS)
text *
xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg)
xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
{
if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data))
#ifdef USE_LIBXML
text *volatile result;
xmlDocPtr doc;
XmlOptionType parsed_xmloptiontype;
xmlNodePtr content_nodes;
volatile xmlBufferPtr buf = NULL;
volatile xmlSaveCtxtPtr ctxt = NULL;
ErrorSaveContext escontext = {T_ErrorSaveContext};
PgXmlErrorContext *xmlerrcxt;
#endif
if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
{
/*
* We don't actually need to do anything, so just return the
* binary-compatible input. For backwards-compatibility reasons,
* allow such cases to succeed even without USE_LIBXML.
*/
return (text *) data;
}
#ifdef USE_LIBXML
/* Parse the input according to the xmloption */
doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
&parsed_xmloptiontype, &content_nodes,
(Node *) &escontext);
if (doc == NULL || escontext.error_occurred)
{
if (doc)
xmlFreeDoc(doc);
/* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
ereport(ERROR,
(errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
errmsg("not an XML document")));
}
/* It's actually binary compatible, save for the above check. */
return (text *) data;
/* If we weren't asked to indent, we're done. */
if (!indent)
{
xmlFreeDoc(doc);
return (text *) data;
}
/* Otherwise, we gotta spin up some error handling. */
xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
PG_TRY();
{
size_t decl_len = 0;
/* The serialized data will go into this buffer. */
buf = xmlBufferCreate();
if (buf == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate xmlBuffer");
/* Detect whether there's an XML declaration */
parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
/*
* Emit declaration only if the input had one. Note: some versions of
* xmlSaveToBuffer leak memory if a non-null encoding argument is
* passed, so don't do that. We don't want any encoding conversion
* anyway.
*/
if (decl_len == 0)
ctxt = xmlSaveToBuffer(buf, NULL,
XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
else
ctxt = xmlSaveToBuffer(buf, NULL,
XML_SAVE_FORMAT);
if (ctxt == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate xmlSaveCtxt");
if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
{
/* If it's a document, saving is easy. */
if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
"could not save document to xmlBuffer");
}
else if (content_nodes != NULL)
{
/*
* Deal with the case where we have non-singly-rooted XML.
* libxml's dump functions don't work well for that without help.
* We build a fake root node that serves as a container for the
* content nodes, and then iterate over the nodes.
*/
xmlNodePtr root;
xmlNodePtr newline;
root = xmlNewNode(NULL, (const xmlChar *) "content-root");
if (root == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate xml node");
/* This attaches root to doc, so we need not free it separately. */
xmlDocSetRootElement(doc, root);
xmlAddChild(root, content_nodes);
/*
* We use this node to insert newlines in the dump. Note: in at
* least some libxml versions, xmlNewDocText would not attach the
* node to the document even if we passed it. Therefore, manage
* freeing of this node manually, and pass NULL here to make sure
* there's not a dangling link.
*/
newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
if (newline == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate xml node");
for (xmlNodePtr node = root->children; node; node = node->next)
{
/* insert newlines between nodes */
if (node->type != XML_TEXT_NODE && node->prev != NULL)
{
if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
{
xmlFreeNode(newline);
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
"could not save newline to xmlBuffer");
}
}
if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
{
xmlFreeNode(newline);
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
"could not save content to xmlBuffer");
}
}
xmlFreeNode(newline);
}
if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
{
ctxt = NULL; /* don't try to close it again */
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
"could not close xmlSaveCtxtPtr");
}
result = (text *) xmlBuffer_to_xmltype(buf);
}
PG_CATCH();
{
if (ctxt)
xmlSaveClose(ctxt);
if (buf)
xmlBufferFree(buf);
if (doc)
xmlFreeDoc(doc);
pg_xml_done(xmlerrcxt, true);
PG_RE_THROW();
}
PG_END_TRY();
xmlBufferFree(buf);
xmlFreeDoc(doc);
pg_xml_done(xmlerrcxt, false);
return result;
#else
NO_XML_SUPPORT();
return NULL;
#endif
}
@@ -762,7 +932,7 @@ xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
xmlDocPtr doc;
doc = xml_parse(data, xmloption_arg, preserve_whitespace,
GetDatabaseEncoding(), NULL);
GetDatabaseEncoding(), NULL, NULL, NULL);
xmlFreeDoc(doc);
return (xmltype *) data;
@@ -902,7 +1072,7 @@ xml_is_document(xmltype *arg)
* We'll report "true" if no soft error is reported by xml_parse().
*/
doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
GetDatabaseEncoding(), (Node *) &escontext);
GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
if (doc)
xmlFreeDoc(doc);
@@ -1491,6 +1661,14 @@ xml_doctype_in_content(const xmlChar *str)
* and xmloption_arg and preserve_whitespace are options for the
* transformation.
*
* If parsed_xmloptiontype isn't NULL, *parsed_xmloptiontype is set to the
* XmlOptionType actually used to parse the input (typically the same as
* xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
*
* If parsed_nodes isn't NULL and the input is not an XML document, the list
* of parsed nodes from the xmlParseBalancedChunkMemory call will be returned
* to *parsed_nodes.
*
* Errors normally result in ereport(ERROR), but if escontext is an
* ErrorSaveContext, then "safe" errors are reported there instead, and the
* caller must check SOFT_ERROR_OCCURRED() to see whether that happened.
@@ -1503,8 +1681,10 @@ xml_doctype_in_content(const xmlChar *str)
* yet do not use SAX - see xmlreader.c)
*/
static xmlDocPtr
xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
int encoding, Node *escontext)
xml_parse(text *data, XmlOptionType xmloption_arg,
bool preserve_whitespace, int encoding,
XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes,
Node *escontext)
{
int32 len;
xmlChar *string;
@@ -1574,6 +1754,13 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
parse_as_document = true;
}
/* initialize output parameters */
if (parsed_xmloptiontype != NULL)
*parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
XMLOPTION_CONTENT;
if (parsed_nodes != NULL)
*parsed_nodes = NULL;
if (parse_as_document)
{
/*
@@ -1620,7 +1807,8 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
if (*(utf8string + count))
{
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
utf8string + count, NULL);
utf8string + count,
parsed_nodes);
if (res_code != 0 || xmlerrcxt->err_occurred)
{
xml_errsave(escontext, xmlerrcxt,
@@ -4305,7 +4493,7 @@ wellformed_xml(text *data, XmlOptionType xmloption_arg)
* We'll report "true" if no soft error is reported by xml_parse().
*/
doc = xml_parse(data, xmloption_arg, true,
GetDatabaseEncoding(), (Node *) &escontext);
GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
if (doc)
xmlFreeDoc(doc);