diff --git a/doc/src/sgml/datatype.sgml b/doc/src/sgml/datatype.sgml
index 7bc35ee172d..5cd7ec9157a 100644
--- a/doc/src/sgml/datatype.sgml
+++ b/doc/src/sgml/datatype.sgml
@@ -4136,9 +4136,11 @@ a0ee-bc99-9c0b-4ef8-bb6d-6bb9-bd38-0a11
The xml type can store well-formed
documents
, as defined by the XML standard, as well
- as content
fragments, which are defined by the
- production XMLDecl? content in the XML
- standard. Roughly, this means that content fragments can have
+ as content
fragments, which are defined by reference
+ to the more permissive
+ document node
+ of the XQuery and XPath data model.
+ Roughly, this means that content fragments can have
more than one top-level element or character node. The expression
xmlvalue IS DOCUMENT
can be used to evaluate whether a particular xml
@@ -4213,16 +4215,6 @@ SET xmloption TO { DOCUMENT | CONTENT };
data are allowed.
-
-
- With the default XML option setting, you cannot directly cast
- character strings to type xml if they contain a
- document type declaration, because the definition of XML content
- fragment does not accept them. If you need to do that, either
- use XMLPARSE or change the XML option.
-
-
-
diff --git a/src/backend/utils/adt/xml.c b/src/backend/utils/adt/xml.c
index 0309fdde9af..79698997def 100644
--- a/src/backend/utils/adt/xml.c
+++ b/src/backend/utils/adt/xml.c
@@ -139,6 +139,7 @@ static int parse_xml_decl(const xmlChar *str, size_t *lenp,
xmlChar **version, xmlChar **encoding, int *standalone);
static bool print_xml_decl(StringInfo buf, const xmlChar *version,
pg_enc encoding, int standalone);
+static bool xml_doctype_in_content(const xmlChar *str);
static xmlDocPtr xml_parse(text *data, XmlOptionType xmloption_arg,
bool preserve_whitespace, int encoding);
static text *xml_xmlnodetoxmltype(xmlNodePtr cur, PgXmlErrorContext *xmlerrcxt);
@@ -1154,8 +1155,15 @@ parse_xml_decl(const xmlChar *str, size_t *lenp,
if (xmlStrncmp(p, (xmlChar *) " */
- utf8len = strlen((const char *) (p + 5));
+ /*
+ * If next char is a name char, it's a PI like
+ * rather than an XMLDecl, so we have done what we came to do and found no
+ * XMLDecl.
+ *
+ * We need an input length value for xmlGetUTF8Char, but there's no need
+ * to count the whole document size, so use strnlen not strlen.
+ */
+ utf8len = strnlen((const char *) (p + 5), MAX_MULTIBYTE_CHAR_LEN);
utf8char = xmlGetUTF8Char(p + 5, &utf8len);
if (PG_XMLISNAMECHAR(utf8char))
goto finished;
@@ -1326,6 +1334,88 @@ print_xml_decl(StringInfo buf, const xmlChar *version,
return false;
}
+/*
+ * Test whether an input that is to be parsed as CONTENT contains a DTD.
+ *
+ * The SQL/XML:2003 definition of CONTENT ("XMLDecl? content") is not
+ * satisfied by a document with a DTD, which is a bit of a wart, as it means
+ * the CONTENT type is not a proper superset of DOCUMENT. SQL/XML:2006 and
+ * later fix that, by redefining content with reference to the "more
+ * permissive" Document Node of the XQuery/XPath Data Model, such that any
+ * DOCUMENT value is indeed also a CONTENT value. That definition is more
+ * useful, as CONTENT becomes usable for parsing input of unknown form (think
+ * pg_restore).
+ *
+ * As used below in parse_xml when parsing for CONTENT, libxml does not give
+ * us the 2006+ behavior, but only the 2003; it will choke if the input has
+ * a DTD. But we can provide the 2006+ definition of CONTENT easily enough,
+ * by detecting this case first and simply doing the parse as DOCUMENT.
+ *
+ * A DTD can be found arbitrarily far in, but that would be a contrived case;
+ * it will ordinarily start within a few dozen characters. The only things
+ * that can precede it are an XMLDecl (here, the caller will have called
+ * parse_xml_decl already), whitespace, comments, and processing instructions.
+ * This function need only return true if it sees a valid sequence of such
+ * things leading to must follow */
+ p = xmlStrstr(p + 2, (xmlChar *) "--");
+ if (!p || p[2] != '>')
+ return false;
+ /* advance over comment, and keep scanning */
+ p += 3;
+ continue;
+ }
+
+ /* otherwise, if it's not a PI , fail */
+ if (*p != '?')
+ return false;
+ p++;
+
+ /* find end of PI (the string ?> is forbidden within a PI) */
+ e = xmlStrstr(p, (xmlChar *) "?>");
+ if (!e)
+ return false;
+
+ /* we don't check PIs carefully, but do reject "xml" target */
+ if (e - p >= 3 && xmlStrncasecmp(p, (xmlChar *) "xml", 3) == 0)
+ return false;
+
+ /* advance over PI, keep scanning */
+ p = e + 2;
+ }
+}
+
/*
* Convert a C string to XML internal representation
@@ -1361,6 +1451,12 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
/* Use a TRY block to ensure we clean up correctly */
PG_TRY();
{
+ bool parse_as_document = false;
+ int res_code;
+ size_t count = 0;
+ xmlChar *version = NULL;
+ int standalone = 0;
+
xmlInitParser();
ctxt = xmlNewParserCtxt();
@@ -1368,7 +1464,25 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate parser context");
+ /* Decide whether to parse as document or content */
if (xmloption_arg == XMLOPTION_DOCUMENT)
+ parse_as_document = true;
+ else
+ {
+ /* Parse and skip over the XML declaration, if any */
+ res_code = parse_xml_decl(utf8string,
+ &count, &version, NULL, &standalone);
+ if (res_code != 0)
+ xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
+ "invalid XML content: invalid XML declaration",
+ res_code);
+
+ /* Is there a DOCTYPE element? */
+ if (xml_doctype_in_content(utf8string + count))
+ parse_as_document = true;
+ }
+
+ if (parse_as_document)
{
/*
* Note, that here we try to apply DTD defaults
@@ -1383,23 +1497,18 @@ xml_parse(text *data, XmlOptionType xmloption_arg, bool preserve_whitespace,
XML_PARSE_NOENT | XML_PARSE_DTDATTR
| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
if (doc == NULL || xmlerrcxt->err_occurred)
- xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
- "invalid XML document");
+ {
+ /* Use original option to decide which error code to throw */
+ if (xmloption_arg == XMLOPTION_DOCUMENT)
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
+ "invalid XML document");
+ else
+ xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_CONTENT,
+ "invalid XML content");
+ }
}
else
{
- int res_code;
- size_t count;
- xmlChar *version;
- int standalone;
-
- res_code = parse_xml_decl(utf8string,
- &count, &version, NULL, &standalone);
- if (res_code != 0)
- xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
- "invalid XML content: invalid XML declaration",
- res_code);
-
doc = xmlNewDoc(version);
Assert(doc->encoding == NULL);
doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
diff --git a/src/test/regress/expected/xml.out b/src/test/regress/expected/xml.out
index 9541b444abb..29d40baf792 100644
--- a/src/test/regress/expected/xml.out
+++ b/src/test/regress/expected/xml.out
@@ -532,6 +532,13 @@ LINE 1: EXECUTE foo ('bad');
DETAIL: line 1: Start tag expected, '<' not found
bad
^
+SELECT xml '';
+ERROR: invalid XML document
+LINE 1: SELECT xml '';
+ ^
+DETAIL: line 1: Extra content at the end of the document
+
+ ^
SET XML OPTION CONTENT;
EXECUTE foo ('');
xmlconcat
@@ -545,6 +552,45 @@ EXECUTE foo ('good');
good
(1 row)
+SELECT xml ' ';
+ xml
+--------------------------------------------------------------------
+
+(1 row)
+
+SELECT xml ' ';
+ xml
+------------------------------
+
+(1 row)
+
+SELECT xml '';
+ xml
+------------------
+
+(1 row)
+
+SELECT xml ' oops ';
+ERROR: invalid XML content
+LINE 1: SELECT xml ' oops ';
+ ^
+DETAIL: line 1: StartTag: invalid element name
+ oops
+ ^
+SELECT xml ' ';
+ERROR: invalid XML content
+LINE 1: SELECT xml ' ';
+ ^
+DETAIL: line 1: StartTag: invalid element name
+
+ ^
+SELECT xml '';
+ERROR: invalid XML content
+LINE 1: SELECT xml '';
+ ^
+DETAIL: line 1: Extra content at the end of the document
+
+ ^
-- Test backwards parsing
CREATE VIEW xmlview1 AS SELECT xmlcomment('test');
CREATE VIEW xmlview2 AS SELECT xmlconcat('hello', 'you');
diff --git a/src/test/regress/expected/xml_1.out b/src/test/regress/expected/xml_1.out
index a9b2b3b711b..edd1c731570 100644
--- a/src/test/regress/expected/xml_1.out
+++ b/src/test/regress/expected/xml_1.out
@@ -429,11 +429,53 @@ EXECUTE foo ('');
ERROR: prepared statement "foo" does not exist
EXECUTE foo ('bad');
ERROR: prepared statement "foo" does not exist
+SELECT xml '';
+ERROR: unsupported XML feature
+LINE 1: SELECT xml '';
+ ^
+DETAIL: This functionality requires the server to be built with libxml support.
+HINT: You need to rebuild PostgreSQL using --with-libxml.
SET XML OPTION CONTENT;
EXECUTE foo ('');
ERROR: prepared statement "foo" does not exist
EXECUTE foo ('good');
ERROR: prepared statement "foo" does not exist
+SELECT xml ' ';
+ERROR: unsupported XML feature
+LINE 1: SELECT xml ' ';
+ERROR: unsupported XML feature
+LINE 1: SELECT xml ' ';
+ERROR: unsupported XML feature
+LINE 1: SELECT xml '';
+ ^
+DETAIL: This functionality requires the server to be built with libxml support.
+HINT: You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml ' oops ';
+ERROR: unsupported XML feature
+LINE 1: SELECT xml ' oops ';
+ ^
+DETAIL: This functionality requires the server to be built with libxml support.
+HINT: You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml ' ';
+ERROR: unsupported XML feature
+LINE 1: SELECT xml ' ';
+ ^
+DETAIL: This functionality requires the server to be built with libxml support.
+HINT: You need to rebuild PostgreSQL using --with-libxml.
+SELECT xml '';
+ERROR: unsupported XML feature
+LINE 1: SELECT xml '';
+ ^
+DETAIL: This functionality requires the server to be built with libxml support.
+HINT: You need to rebuild PostgreSQL using --with-libxml.
-- Test backwards parsing
CREATE VIEW xmlview1 AS SELECT xmlcomment('test');
CREATE VIEW xmlview2 AS SELECT xmlconcat('hello', 'you');
diff --git a/src/test/regress/expected/xml_2.out b/src/test/regress/expected/xml_2.out
index 393861ee59a..6f18130f6a7 100644
--- a/src/test/regress/expected/xml_2.out
+++ b/src/test/regress/expected/xml_2.out
@@ -512,6 +512,13 @@ LINE 1: EXECUTE foo ('bad');
DETAIL: line 1: Start tag expected, '<' not found
bad
^
+SELECT xml '';
+ERROR: invalid XML document
+LINE 1: SELECT xml '';
+ ^
+DETAIL: line 1: Extra content at the end of the document
+
+ ^
SET XML OPTION CONTENT;
EXECUTE foo ('');
xmlconcat
@@ -525,6 +532,45 @@ EXECUTE foo ('good');
good
(1 row)
+SELECT xml ' ';
+ xml
+--------------------------------------------------------------------
+
+(1 row)
+
+SELECT xml ' ';
+ xml
+------------------------------
+
+(1 row)
+
+SELECT xml '';
+ xml
+------------------
+
+(1 row)
+
+SELECT xml ' oops ';
+ERROR: invalid XML content
+LINE 1: SELECT xml ' oops ';
+ ^
+DETAIL: line 1: StartTag: invalid element name
+ oops
+ ^
+SELECT xml ' ';
+ERROR: invalid XML content
+LINE 1: SELECT xml ' ';
+ ^
+DETAIL: line 1: StartTag: invalid element name
+
+ ^
+SELECT xml '';
+ERROR: invalid XML content
+LINE 1: SELECT xml '';
+ ^
+DETAIL: line 1: Extra content at the end of the document
+
+ ^
-- Test backwards parsing
CREATE VIEW xmlview1 AS SELECT xmlcomment('test');
CREATE VIEW xmlview2 AS SELECT xmlconcat('hello', 'you');
diff --git a/src/test/regress/sql/xml.sql b/src/test/regress/sql/xml.sql
index 07b2dd77bed..ca1f0422b36 100644
--- a/src/test/regress/sql/xml.sql
+++ b/src/test/regress/sql/xml.sql
@@ -149,10 +149,17 @@ PREPARE foo (xml) AS SELECT xmlconcat('', $1);
SET XML OPTION DOCUMENT;
EXECUTE foo ('');
EXECUTE foo ('bad');
+SELECT xml '';
SET XML OPTION CONTENT;
EXECUTE foo ('');
EXECUTE foo ('good');
+SELECT xml ' ';
+SELECT xml ' ';
+SELECT xml '';
+SELECT xml ' oops ';
+SELECT xml ' ';
+SELECT xml '';
-- Test backwards parsing