mirror of
https://github.com/postgres/postgres.git
synced 2025-07-20 05:03:10 +03:00
Support [NO] INDENT option in XMLSERIALIZE().
This adds the ability to pretty-print XML documents ... according to libxml's somewhat idiosyncratic notions of what's pretty, anyway. One notable divergence from a strict reading of the spec is that libxml is willing to collapse empty nodes "<node></node>" to just "<node/>", whereas SQL and the underlying XML spec say that this option should only result in whitespace tweaks. Nonetheless, it seems close enough to justify using the SQL-standard syntax. Jim Jones, reviewed by Peter Smith and myself Discussion: https://postgr.es/m/2f5df461-dad8-6d7d-4568-08e10608a69b@uni-muenster.de
This commit is contained in:
@ -52,6 +52,7 @@
|
||||
#include <libxml/tree.h>
|
||||
#include <libxml/uri.h>
|
||||
#include <libxml/xmlerror.h>
|
||||
#include <libxml/xmlsave.h>
|
||||
#include <libxml/xmlversion.h>
|
||||
#include <libxml/xmlwriter.h>
|
||||
#include <libxml/xpath.h>
|
||||
@ -146,6 +147,8 @@ static bool print_xml_decl(StringInfo buf, const xmlChar *version,
|
||||
static bool xml_doctype_in_content(const xmlChar *str);
|
||||
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
|
||||
bool preserve_whitespace, int encoding,
|
||||
XmlOptionType *parsed_xmloptiontype,
|
||||
xmlNodePtr *parsed_nodes,
|
||||
Node *escontext);
|
||||
static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
|
||||
static int xml_xpathobjtoxmlarray(xmlXPathObjectPtr xpathobj,
|
||||
@ -273,7 +276,7 @@ xml_in(PG_FUNCTION_ARGS)
|
||||
* Note: we don't need to worry about whether a soft error is detected.
|
||||
*/
|
||||
doc = xml_parse(vardata, xmloption, true, GetDatabaseEncoding(),
|
||||
fcinfo->context);
|
||||
NULL, NULL, fcinfo->context);
|
||||
if (doc != NULL)
|
||||
xmlFreeDoc(doc);
|
||||
|
||||
@ -400,7 +403,7 @@ xml_recv(PG_FUNCTION_ARGS)
|
||||
* Parse the data to check if it is well-formed XML data. Assume that
|
||||
* xml_parse will throw ERROR if not.
|
||||
*/
|
||||
doc = xml_parse(result, xmloption, true, encoding, NULL);
|
||||
doc = xml_parse(result, xmloption, true, encoding, NULL, NULL, NULL);
|
||||
xmlFreeDoc(doc);
|
||||
|
||||
/* Now that we know what we're dealing with, convert to server encoding */
|
||||
@ -619,15 +622,182 @@ xmltotext(PG_FUNCTION_ARGS)
|
||||
|
||||
|
||||
text *
|
||||
xmltotext_with_xmloption(xmltype *data, XmlOptionType xmloption_arg)
|
||||
xmltotext_with_options(xmltype *data, XmlOptionType xmloption_arg, bool indent)
|
||||
{
|
||||
if (xmloption_arg == XMLOPTION_DOCUMENT && !xml_is_document(data))
|
||||
#ifdef USE_LIBXML
|
||||
text *volatile result;
|
||||
xmlDocPtr doc;
|
||||
XmlOptionType parsed_xmloptiontype;
|
||||
xmlNodePtr content_nodes;
|
||||
volatile xmlBufferPtr buf = NULL;
|
||||
volatile xmlSaveCtxtPtr ctxt = NULL;
|
||||
ErrorSaveContext escontext = {T_ErrorSaveContext};
|
||||
PgXmlErrorContext *xmlerrcxt;
|
||||
#endif
|
||||
|
||||
if (xmloption_arg != XMLOPTION_DOCUMENT && !indent)
|
||||
{
|
||||
/*
|
||||
* We don't actually need to do anything, so just return the
|
||||
* binary-compatible input. For backwards-compatibility reasons,
|
||||
* allow such cases to succeed even without USE_LIBXML.
|
||||
*/
|
||||
return (text *) data;
|
||||
}
|
||||
|
||||
#ifdef USE_LIBXML
|
||||
/* Parse the input according to the xmloption */
|
||||
doc = xml_parse(data, xmloption_arg, true, GetDatabaseEncoding(),
|
||||
&parsed_xmloptiontype, &content_nodes,
|
||||
(Node *) &escontext);
|
||||
if (doc == NULL || escontext.error_occurred)
|
||||
{
|
||||
if (doc)
|
||||
xmlFreeDoc(doc);
|
||||
/* A soft error must be failure to conform to XMLOPTION_DOCUMENT */
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_NOT_AN_XML_DOCUMENT),
|
||||
errmsg("not an XML document")));
|
||||
}
|
||||
|
||||
/* It's actually binary compatible, save for the above check. */
|
||||
return (text *) data;
|
||||
/* If we weren't asked to indent, we're done. */
|
||||
if (!indent)
|
||||
{
|
||||
xmlFreeDoc(doc);
|
||||
return (text *) data;
|
||||
}
|
||||
|
||||
/* Otherwise, we gotta spin up some error handling. */
|
||||
xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
|
||||
|
||||
PG_TRY();
|
||||
{
|
||||
size_t decl_len = 0;
|
||||
|
||||
/* The serialized data will go into this buffer. */
|
||||
buf = xmlBufferCreate();
|
||||
|
||||
if (buf == NULL || xmlerrcxt->err_occurred)
|
||||
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
|
||||
"could not allocate xmlBuffer");
|
||||
|
||||
/* Detect whether there's an XML declaration */
|
||||
parse_xml_decl(xml_text2xmlChar(data), &decl_len, NULL, NULL, NULL);
|
||||
|
||||
/*
|
||||
* Emit declaration only if the input had one. Note: some versions of
|
||||
* xmlSaveToBuffer leak memory if a non-null encoding argument is
|
||||
* passed, so don't do that. We don't want any encoding conversion
|
||||
* anyway.
|
||||
*/
|
||||
if (decl_len == 0)
|
||||
ctxt = xmlSaveToBuffer(buf, NULL,
|
||||
XML_SAVE_NO_DECL | XML_SAVE_FORMAT);
|
||||
else
|
||||
ctxt = xmlSaveToBuffer(buf, NULL,
|
||||
XML_SAVE_FORMAT);
|
||||
|
||||
if (ctxt == NULL || xmlerrcxt->err_occurred)
|
||||
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
|
||||
"could not allocate xmlSaveCtxt");
|
||||
|
||||
if (parsed_xmloptiontype == XMLOPTION_DOCUMENT)
|
||||
{
|
||||
/* If it's a document, saving is easy. */
|
||||
if (xmlSaveDoc(ctxt, doc) == -1 || xmlerrcxt->err_occurred)
|
||||
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
|
||||
"could not save document to xmlBuffer");
|
||||
}
|
||||
else if (content_nodes != NULL)
|
||||
{
|
||||
/*
|
||||
* Deal with the case where we have non-singly-rooted XML.
|
||||
* libxml's dump functions don't work well for that without help.
|
||||
* We build a fake root node that serves as a container for the
|
||||
* content nodes, and then iterate over the nodes.
|
||||
*/
|
||||
xmlNodePtr root;
|
||||
xmlNodePtr newline;
|
||||
|
||||
root = xmlNewNode(NULL, (const xmlChar *) "content-root");
|
||||
if (root == NULL || xmlerrcxt->err_occurred)
|
||||
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
|
||||
"could not allocate xml node");
|
||||
|
||||
/* This attaches root to doc, so we need not free it separately. */
|
||||
xmlDocSetRootElement(doc, root);
|
||||
xmlAddChild(root, content_nodes);
|
||||
|
||||
/*
|
||||
* We use this node to insert newlines in the dump. Note: in at
|
||||
* least some libxml versions, xmlNewDocText would not attach the
|
||||
* node to the document even if we passed it. Therefore, manage
|
||||
* freeing of this node manually, and pass NULL here to make sure
|
||||
* there's not a dangling link.
|
||||
*/
|
||||
newline = xmlNewDocText(NULL, (const xmlChar *) "\n");
|
||||
if (newline == NULL || xmlerrcxt->err_occurred)
|
||||
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
|
||||
"could not allocate xml node");
|
||||
|
||||
for (xmlNodePtr node = root->children; node; node = node->next)
|
||||
{
|
||||
/* insert newlines between nodes */
|
||||
if (node->type != XML_TEXT_NODE && node->prev != NULL)
|
||||
{
|
||||
if (xmlSaveTree(ctxt, newline) == -1 || xmlerrcxt->err_occurred)
|
||||
{
|
||||
xmlFreeNode(newline);
|
||||
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
|
||||
"could not save newline to xmlBuffer");
|
||||
}
|
||||
}
|
||||
|
||||
if (xmlSaveTree(ctxt, node) == -1 || xmlerrcxt->err_occurred)
|
||||
{
|
||||
xmlFreeNode(newline);
|
||||
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
|
||||
"could not save content to xmlBuffer");
|
||||
}
|
||||
}
|
||||
|
||||
xmlFreeNode(newline);
|
||||
}
|
||||
|
||||
if (xmlSaveClose(ctxt) == -1 || xmlerrcxt->err_occurred)
|
||||
{
|
||||
ctxt = NULL; /* don't try to close it again */
|
||||
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INTERNAL_ERROR,
|
||||
"could not close xmlSaveCtxtPtr");
|
||||
}
|
||||
|
||||
result = (text *) xmlBuffer_to_xmltype(buf);
|
||||
}
|
||||
PG_CATCH();
|
||||
{
|
||||
if (ctxt)
|
||||
xmlSaveClose(ctxt);
|
||||
if (buf)
|
||||
xmlBufferFree(buf);
|
||||
if (doc)
|
||||
xmlFreeDoc(doc);
|
||||
|
||||
pg_xml_done(xmlerrcxt, true);
|
||||
|
||||
PG_RE_THROW();
|
||||
}
|
||||
PG_END_TRY();
|
||||
|
||||
xmlBufferFree(buf);
|
||||
xmlFreeDoc(doc);
|
||||
|
||||
pg_xml_done(xmlerrcxt, false);
|
||||
|
||||
return result;
|
||||
#else
|
||||
NO_XML_SUPPORT();
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
@ -762,7 +932,7 @@ xmlparse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace)
|
||||
xmlDocPtr doc;
|
||||
|
||||
doc = xml_parse(data, xmloption_arg, preserve_whitespace,
|
||||
GetDatabaseEncoding(), NULL);
|
||||
GetDatabaseEncoding(), NULL, NULL, NULL);
|
||||
xmlFreeDoc(doc);
|
||||
|
||||
return (xmltype *) data;
|
||||
@ -902,7 +1072,7 @@ xml_is_document(xmltype *arg)
|
||||
* We'll report "true" if no soft error is reported by xml_parse().
|
||||
*/
|
||||
doc = xml_parse((text *) arg, XMLOPTION_DOCUMENT, true,
|
||||
GetDatabaseEncoding(), (Node *) &escontext);
|
||||
GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
|
||||
if (doc)
|
||||
xmlFreeDoc(doc);
|
||||
|
||||
@ -1491,6 +1661,14 @@ xml_doctype_in_content(const xmlChar *str)
|
||||
* and xmloption_arg and preserve_whitespace are options for the
|
||||
* transformation.
|
||||
*
|
||||
* If parsed_xmloptiontype isn't NULL, *parsed_xmloptiontype is set to the
|
||||
* XmlOptionType actually used to parse the input (typically the same as
|
||||
* xmloption_arg, but a DOCTYPE node in the input can force DOCUMENT mode).
|
||||
*
|
||||
* If parsed_nodes isn't NULL and the input is not an XML document, the list
|
||||
* of parsed nodes from the xmlParseBalancedChunkMemory call will be returned
|
||||
* to *parsed_nodes.
|
||||
*
|
||||
* Errors normally result in ereport(ERROR), but if escontext is an
|
||||
* ErrorSaveContext, then "safe" errors are reported there instead, and the
|
||||
* caller must check SOFT_ERROR_OCCURRED() to see whether that happened.
|
||||
@ -1503,8 +1681,10 @@ xml_doctype_in_content(const xmlChar *str)
|
||||
* yet do not use SAX - see xmlreader.c)
|
||||
*/
|
||||
static xmlDocPtr
|
||||
xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
|
||||
int encoding, Node *escontext)
|
||||
xml_parse(text *data, XmlOptionType xmloption_arg,
|
||||
bool preserve_whitespace, int encoding,
|
||||
XmlOptionType *parsed_xmloptiontype, xmlNodePtr *parsed_nodes,
|
||||
Node *escontext)
|
||||
{
|
||||
int32 len;
|
||||
xmlChar *string;
|
||||
@ -1574,6 +1754,13 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
|
||||
parse_as_document = true;
|
||||
}
|
||||
|
||||
/* initialize output parameters */
|
||||
if (parsed_xmloptiontype != NULL)
|
||||
*parsed_xmloptiontype = parse_as_document ? XMLOPTION_DOCUMENT :
|
||||
XMLOPTION_CONTENT;
|
||||
if (parsed_nodes != NULL)
|
||||
*parsed_nodes = NULL;
|
||||
|
||||
if (parse_as_document)
|
||||
{
|
||||
/*
|
||||
@ -1620,7 +1807,8 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
|
||||
if (*(utf8string + count))
|
||||
{
|
||||
res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0,
|
||||
utf8string + count, NULL);
|
||||
utf8string + count,
|
||||
parsed_nodes);
|
||||
if (res_code != 0 || xmlerrcxt->err_occurred)
|
||||
{
|
||||
xml_errsave(escontext, xmlerrcxt,
|
||||
@ -4305,7 +4493,7 @@ wellformed_xml(text *data, XmlOptionType xmloption_arg)
|
||||
* We'll report "true" if no soft error is reported by xml_parse().
|
||||
*/
|
||||
doc = xml_parse(data, xmloption_arg, true,
|
||||
GetDatabaseEncoding(), (Node *) &escontext);
|
||||
GetDatabaseEncoding(), NULL, NULL, (Node *) &escontext);
|
||||
if (doc)
|
||||
xmlFreeDoc(doc);
|
||||
|
||||
|
Reference in New Issue
Block a user