mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	Clean up encoding issues in the xml type: In text mode, encoding
declarations are ignored and removed, in binary mode they are honored as specified by the XML standard.
This commit is contained in:
		@@ -1,4 +1,4 @@
 | 
				
			|||||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.184 2007/01/14 22:37:59 neilc Exp $ -->
 | 
					<!-- $PostgreSQL: pgsql/doc/src/sgml/datatype.sgml,v 1.185 2007/01/18 13:59:11 petere Exp $ -->
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 <chapter id="datatype">
 | 
					 <chapter id="datatype">
 | 
				
			||||||
  <title id="datatype-title">Data Types</title>
 | 
					  <title id="datatype-title">Data Types</title>
 | 
				
			||||||
@@ -3418,8 +3418,107 @@ SELECT * FROM pg_attribute
 | 
				
			|||||||
    advantage over storing XML data in a <type>text</type> field is that it
 | 
					    advantage over storing XML data in a <type>text</type> field is that it
 | 
				
			||||||
    checks the input values for well-formedness, and there are support
 | 
					    checks the input values for well-formedness, and there are support
 | 
				
			||||||
    functions to perform type-safe operations on it; see <xref
 | 
					    functions to perform type-safe operations on it; see <xref
 | 
				
			||||||
    linkend="functions-xml">.  Currently, there is no support for
 | 
					    linkend="functions-xml">.
 | 
				
			||||||
    validation against a specific <acronym>XML</> schema.
 | 
					   </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   <para>
 | 
				
			||||||
 | 
					    In particular, the <type>xml</type> type can store well-formed
 | 
				
			||||||
 | 
					    <quote>documents</quote>, as defined by the XML standard, as well
 | 
				
			||||||
 | 
					    as <quote>content</quote> fragments, which are defined by the
 | 
				
			||||||
 | 
					    production <literal>XMLDecl? content</literal> in the XML
 | 
				
			||||||
 | 
					    standard.  Roughly, this means that content fragments can have
 | 
				
			||||||
 | 
					    more than one top-level element or character node.  The expression
 | 
				
			||||||
 | 
					    <literal><replaceable>xmlvalue</replaceable> IS DOCUMENT</literal>
 | 
				
			||||||
 | 
					    can be used to evaluate whether a particular <type>xml</type>
 | 
				
			||||||
 | 
					    value is a full document or only a content fragment.
 | 
				
			||||||
 | 
					   </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   <para>
 | 
				
			||||||
 | 
					    To produce a value of type <type>xml</type> from character data,
 | 
				
			||||||
 | 
					    use the function <function>xmlparse</function>:
 | 
				
			||||||
 | 
					<synopsis>
 | 
				
			||||||
 | 
					XMLPARSE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable>)
 | 
				
			||||||
 | 
					</synopsis>
 | 
				
			||||||
 | 
					    Examples:
 | 
				
			||||||
 | 
					<programlisting><![CDATA[
 | 
				
			||||||
 | 
					XMLPARSE (DOCUMENT '<?xml version="1.0"?><book><title>Manual</title><chapter>...</chapter><book>')
 | 
				
			||||||
 | 
					XMLPARSE (CONTENT 'abc<foo>bar</bar><bar>foo</foo>')
 | 
				
			||||||
 | 
					]]></programlisting>
 | 
				
			||||||
 | 
					    While this is the only way to convert character strings into XML
 | 
				
			||||||
 | 
					    values according to the SQL standard, the PostgreSQL-specific
 | 
				
			||||||
 | 
					    syntaxes
 | 
				
			||||||
 | 
					<programlisting><![CDATA[
 | 
				
			||||||
 | 
					xml '<foo>bar</foo>'
 | 
				
			||||||
 | 
					'<foo>bar</foo>'::xml
 | 
				
			||||||
 | 
					]]></programlisting>
 | 
				
			||||||
 | 
					    can also be used.
 | 
				
			||||||
 | 
					   </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   <para>
 | 
				
			||||||
 | 
					    The <type>xml</type> type does not validate its input values
 | 
				
			||||||
 | 
					    against a possibly included document type declaration (DTD).
 | 
				
			||||||
 | 
					   </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   <para>
 | 
				
			||||||
 | 
					    The inverse operation, producing character string type values from
 | 
				
			||||||
 | 
					    <type>xml</type>, uses the function
 | 
				
			||||||
 | 
					    <function>xmlserialize</function>:
 | 
				
			||||||
 | 
					<synopsis>
 | 
				
			||||||
 | 
					XMLSERIALIZE ( { DOCUMENT | CONTENT } <replaceable>value</replaceable> AS <replaceable>type</replaceable> )
 | 
				
			||||||
 | 
					</synopsis>
 | 
				
			||||||
 | 
					    <replaceable>type</replaceable> can be one of
 | 
				
			||||||
 | 
					    <type>character</type>, <type>character varying</type>, or
 | 
				
			||||||
 | 
					    <type>text</type> (or an alias name for those).  Again, according
 | 
				
			||||||
 | 
					    to the SQL standard, this is the only way to convert between type
 | 
				
			||||||
 | 
					    <type>xml</type> and character types, but PostgreSQL also allows
 | 
				
			||||||
 | 
					    you to simply cast the value.
 | 
				
			||||||
 | 
					   </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   <para>
 | 
				
			||||||
 | 
					    Care must be taken when dealing with multiple character encodings
 | 
				
			||||||
 | 
					    on the client, server, and in the XML data passed through them.
 | 
				
			||||||
 | 
					    When using the text mode to pass queries to the server and query
 | 
				
			||||||
 | 
					    results to the client (which is the normal mode), PostgreSQL
 | 
				
			||||||
 | 
					    converts all character data passed between the client and the
 | 
				
			||||||
 | 
					    server and vice versa to the character encoding of the respective
 | 
				
			||||||
 | 
					    end; see <xref linkend="multibyte">.  This includes string
 | 
				
			||||||
 | 
					    representations of XML values, such as in the above examples.
 | 
				
			||||||
 | 
					    This would ordinarily mean that encoding declarations contained in
 | 
				
			||||||
 | 
					    XML data might become invalid as the character data is converted
 | 
				
			||||||
 | 
					    to other encodings while travelling between client and server,
 | 
				
			||||||
 | 
					    while the embedded encoding declaration is not changed.  To cope
 | 
				
			||||||
 | 
					    with this behavior, an encoding declaration contained in a
 | 
				
			||||||
 | 
					    character string presented for input to the <type>xml</type> type
 | 
				
			||||||
 | 
					    is <emphasis>ignored</emphasis>, and the content is always assumed
 | 
				
			||||||
 | 
					    to be in the current server encoding.  Consequently, for correct
 | 
				
			||||||
 | 
					    processing, such character strings of XML data must be sent off
 | 
				
			||||||
 | 
					    from the client in the current client encoding.  It is the
 | 
				
			||||||
 | 
					    responsibility of the client to either convert the document to the
 | 
				
			||||||
 | 
					    current client encoding before sending it off to the server or to
 | 
				
			||||||
 | 
					    adjust the client encoding appropriately.  On output, values of
 | 
				
			||||||
 | 
					    type <type>xml</type> will not have an encoding declaration, and
 | 
				
			||||||
 | 
					    clients must assume that the data is in the current client
 | 
				
			||||||
 | 
					    encoding.
 | 
				
			||||||
 | 
					   </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   <para>
 | 
				
			||||||
 | 
					    When using the binary mode to pass query parameters to the server
 | 
				
			||||||
 | 
					    and query results back the the client, no character set conversion
 | 
				
			||||||
 | 
					    is performed, so the situation is different.  In this case, an
 | 
				
			||||||
 | 
					    encoding declaration in the XML data will be observed, and if it
 | 
				
			||||||
 | 
					    is absent, the data will be assumed to be in UTF-8 (as required by
 | 
				
			||||||
 | 
					    the XML standard; note that PostgreSQL does not support UTF-16 at
 | 
				
			||||||
 | 
					    all).  On output, data will have an encoding declaration
 | 
				
			||||||
 | 
					    specifying the client encoding, unless the client encoding is
 | 
				
			||||||
 | 
					    UTF-8, in which case it will be omitted.
 | 
				
			||||||
 | 
					   </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					   <para>
 | 
				
			||||||
 | 
					    Needless to say, processing XML data with PostgreSQL will be less
 | 
				
			||||||
 | 
					    error-prone and more efficient if data encoding, client encoding,
 | 
				
			||||||
 | 
					    and server encoding are the same.  Since XML data is internally
 | 
				
			||||||
 | 
					    processed in UTF-8, computations will be most efficient if the
 | 
				
			||||||
 | 
					    server encoding is also UTF-8.
 | 
				
			||||||
   </para>
 | 
					   </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   <para>
 | 
					   <para>
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -7,7 +7,7 @@
 | 
				
			|||||||
 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
 | 
					 * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
 | 
				
			||||||
 * Portions Copyright (c) 1994, Regents of the University of California
 | 
					 * Portions Copyright (c) 1994, Regents of the University of California
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.17 2007/01/14 13:11:54 petere Exp $
 | 
					 * $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.18 2007/01/18 13:59:11 petere Exp $
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 *-------------------------------------------------------------------------
 | 
					 *-------------------------------------------------------------------------
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
@@ -68,7 +68,8 @@ static void 	xml_errorHandler(void *ctxt, const char *msg, ...);
 | 
				
			|||||||
static void 	xml_ereport_by_code(int level, int sqlcode,
 | 
					static void 	xml_ereport_by_code(int level, int sqlcode,
 | 
				
			||||||
									const char *msg, int errcode);
 | 
														const char *msg, int errcode);
 | 
				
			||||||
static xmlChar *xml_text2xmlChar(text *in);
 | 
					static xmlChar *xml_text2xmlChar(text *in);
 | 
				
			||||||
static xmlDocPtr xml_parse(text *data, bool is_document, bool preserve_whitespace);
 | 
					static int		parse_xml_decl(const xmlChar *str, size_t *lenp, xmlChar **version, xmlChar **encoding, int *standalone);
 | 
				
			||||||
 | 
					static xmlDocPtr xml_parse(text *data, bool is_document, bool preserve_whitespace, xmlChar *encoding);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif /* USE_LIBXML */
 | 
					#endif /* USE_LIBXML */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -96,7 +97,7 @@ xml_in(PG_FUNCTION_ARGS)
 | 
				
			|||||||
	 * Parse the data to check if it is well-formed XML data.  Assume
 | 
						 * Parse the data to check if it is well-formed XML data.  Assume
 | 
				
			||||||
	 * that ERROR occurred if parsing failed.
 | 
						 * that ERROR occurred if parsing failed.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	doc = xml_parse(vardata, false, true);
 | 
						doc = xml_parse(vardata, false, true, NULL);
 | 
				
			||||||
	xmlFreeDoc(doc);
 | 
						xmlFreeDoc(doc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	PG_RETURN_XML_P(vardata);
 | 
						PG_RETURN_XML_P(vardata);
 | 
				
			||||||
@@ -107,19 +108,102 @@ xml_in(PG_FUNCTION_ARGS)
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define PG_XML_DEFAULT_VERSION "1.0"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static char *
 | 
				
			||||||
 | 
					xml_out_internal(xmltype *x, pg_enc target_encoding)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						char		*str;
 | 
				
			||||||
 | 
						size_t		len;
 | 
				
			||||||
 | 
					#ifdef USE_LIBXML
 | 
				
			||||||
 | 
						xmlChar		*version;
 | 
				
			||||||
 | 
						xmlChar		*encoding;
 | 
				
			||||||
 | 
						int			standalone;
 | 
				
			||||||
 | 
						int			res_code;
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						len = VARSIZE(x) - VARHDRSZ;
 | 
				
			||||||
 | 
						str = palloc(len + 1);
 | 
				
			||||||
 | 
						memcpy(str, VARDATA(x), len);
 | 
				
			||||||
 | 
						str[len] = '\0';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#ifdef USE_LIBXML
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * On output, we adjust the XML declaration as follows.  (These
 | 
				
			||||||
 | 
						 * rules are the moral equivalent of the clause "Serialization of
 | 
				
			||||||
 | 
						 * an XML value" in the SQL standard.)
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * We try to avoid generating an XML declaration if possible.
 | 
				
			||||||
 | 
						 * This is so that you don't get trivial things like xml '<foo/>'
 | 
				
			||||||
 | 
						 * resulting in '<?xml version="1.0"?><foo/>', which would surely
 | 
				
			||||||
 | 
						 * be annoying.  We must provide a declaration if the standalone
 | 
				
			||||||
 | 
						 * property is specified or if we include an encoding
 | 
				
			||||||
 | 
						 * specification.  If we have a declaration, we must specify a
 | 
				
			||||||
 | 
						 * version (XML requires this).  Otherwise we only make a
 | 
				
			||||||
 | 
						 * declaration if the version is not "1.0", which is the default
 | 
				
			||||||
 | 
						 * version specified in SQL:2003.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if ((res_code = parse_xml_decl((xmlChar *) str, &len, &version, &encoding, &standalone)) == 0)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							StringInfoData buf;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							initStringInfo(&buf);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if ((version && strcmp((char *) version, PG_XML_DEFAULT_VERSION) != 0)
 | 
				
			||||||
 | 
								|| (target_encoding && target_encoding != PG_UTF8)
 | 
				
			||||||
 | 
								|| standalone != -1)
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								appendStringInfoString(&buf, "<?xml");
 | 
				
			||||||
 | 
								if (version)
 | 
				
			||||||
 | 
									appendStringInfo(&buf, " version=\"%s\"", version);
 | 
				
			||||||
 | 
								else
 | 
				
			||||||
 | 
									appendStringInfo(&buf, " version=\"%s\"", PG_XML_DEFAULT_VERSION);
 | 
				
			||||||
 | 
								if (target_encoding && target_encoding != PG_UTF8)
 | 
				
			||||||
 | 
									/* XXX might be useful to convert this to IANA names
 | 
				
			||||||
 | 
									 * (ISO-8859-1 instead of LATIN1 etc.); needs field
 | 
				
			||||||
 | 
									 * experience */
 | 
				
			||||||
 | 
									appendStringInfo(&buf, " encoding=\"%s\"", pg_encoding_to_char(target_encoding));
 | 
				
			||||||
 | 
								if (standalone == 1)
 | 
				
			||||||
 | 
									appendStringInfoString(&buf, " standalone=\"yes\"");
 | 
				
			||||||
 | 
								else if (standalone == 0)
 | 
				
			||||||
 | 
									appendStringInfoString(&buf, " standalone=\"no\"");
 | 
				
			||||||
 | 
								appendStringInfoString(&buf, "?>");
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							else
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * If we are not going to produce an XML declaration, eat
 | 
				
			||||||
 | 
								 * a single newline in the original string to prevent
 | 
				
			||||||
 | 
								 * empty first lines in the output.
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								if (*(str + len) == '\n')
 | 
				
			||||||
 | 
									len += 1;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
							appendStringInfoString(&buf, str + len);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							return buf.data;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						xml_ereport_by_code(WARNING, ERRCODE_INTERNAL_ERROR,
 | 
				
			||||||
 | 
											"could not parse XML declaration in stored value", res_code);
 | 
				
			||||||
 | 
					#endif
 | 
				
			||||||
 | 
						return str;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Datum
 | 
					Datum
 | 
				
			||||||
xml_out(PG_FUNCTION_ARGS)
 | 
					xml_out(PG_FUNCTION_ARGS)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	xmltype		*s = PG_GETARG_XML_P(0);
 | 
						xmltype	   *x = PG_GETARG_XML_P(0);
 | 
				
			||||||
	char		*result;
 | 
					 | 
				
			||||||
	int32		len;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	len = VARSIZE(s) - VARHDRSZ;
 | 
						/*
 | 
				
			||||||
	result = palloc(len + 1);
 | 
						 * xml_out removes the encoding property in all cases.  This is
 | 
				
			||||||
	memcpy(result, VARDATA(s), len);
 | 
						 * because we cannot control from here whether the datum will be
 | 
				
			||||||
	result[len] = '\0';
 | 
						 * converted to a different client encoding, so we'd do more harm
 | 
				
			||||||
 | 
						 * than good by including it.
 | 
				
			||||||
	PG_RETURN_CSTRING(result);
 | 
						 */
 | 
				
			||||||
 | 
						PG_RETURN_CSTRING(xml_out_internal(x, 0));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -130,23 +214,44 @@ xml_recv(PG_FUNCTION_ARGS)
 | 
				
			|||||||
	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
 | 
						StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
 | 
				
			||||||
	xmltype	   *result;
 | 
						xmltype	   *result;
 | 
				
			||||||
	char	   *str;
 | 
						char	   *str;
 | 
				
			||||||
 | 
						char	   *newstr;
 | 
				
			||||||
	int			nbytes;
 | 
						int			nbytes;
 | 
				
			||||||
	xmlDocPtr	doc;
 | 
						xmlDocPtr	doc;
 | 
				
			||||||
 | 
						xmlChar	   *encoding = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 | 
						str = pq_getmsgtext(buf, buf->len - buf->cursor, &nbytes);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	result = (xmltype *) palloc(nbytes + VARHDRSZ);
 | 
						result = palloc(nbytes + VARHDRSZ);
 | 
				
			||||||
	VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 | 
						VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 | 
				
			||||||
	memcpy(VARDATA(result), str, nbytes);
 | 
						memcpy(VARDATA(result), str, nbytes);
 | 
				
			||||||
	pfree(str);
 | 
					
 | 
				
			||||||
 | 
						parse_xml_decl((xmlChar *) str, NULL, NULL, &encoding, NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Parse the data to check if it is well-formed XML data.  Assume
 | 
						 * Parse the data to check if it is well-formed XML data.  Assume
 | 
				
			||||||
	 * that ERROR occurred if parsing failed.
 | 
						 * that ERROR occurred if parsing failed.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	doc = xml_parse(result, false, true);
 | 
						doc = xml_parse(result, false, true, encoding);
 | 
				
			||||||
	xmlFreeDoc(doc);
 | 
						xmlFreeDoc(doc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						newstr = (char *) pg_do_encoding_conversion((unsigned char *) str,
 | 
				
			||||||
 | 
																	nbytes,
 | 
				
			||||||
 | 
																	encoding ? pg_char_to_encoding((char *) encoding) : PG_UTF8,
 | 
				
			||||||
 | 
																	GetDatabaseEncoding());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						pfree(str);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (newstr != str)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							free(result);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							nbytes = strlen(newstr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							result = palloc(nbytes + VARHDRSZ);
 | 
				
			||||||
 | 
							VARATT_SIZEP(result) = nbytes + VARHDRSZ;
 | 
				
			||||||
 | 
							memcpy(VARDATA(result), newstr, nbytes);
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	PG_RETURN_XML_P(result);
 | 
						PG_RETURN_XML_P(result);
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
	NO_XML_SUPPORT();
 | 
						NO_XML_SUPPORT();
 | 
				
			||||||
@@ -159,10 +264,11 @@ Datum
 | 
				
			|||||||
xml_send(PG_FUNCTION_ARGS)
 | 
					xml_send(PG_FUNCTION_ARGS)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	xmltype	   *x = PG_GETARG_XML_P(0);
 | 
						xmltype	   *x = PG_GETARG_XML_P(0);
 | 
				
			||||||
 | 
						char	   *outval = xml_out_internal(x, pg_get_client_encoding());
 | 
				
			||||||
	StringInfoData buf;
 | 
						StringInfoData buf;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	pq_begintypsend(&buf);
 | 
						pq_begintypsend(&buf);
 | 
				
			||||||
	pq_sendbytes(&buf, VARDATA(x), VARSIZE(x) - VARHDRSZ);
 | 
						pq_sendstring(&buf, outval);
 | 
				
			||||||
	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 | 
						PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -190,6 +296,21 @@ stringinfo_to_xmltype(StringInfo buf)
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static xmltype *
 | 
				
			||||||
 | 
					cstring_to_xmltype(const char *string)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int32		len;
 | 
				
			||||||
 | 
						xmltype	   *result;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						len = strlen(string) + VARHDRSZ;
 | 
				
			||||||
 | 
						result = palloc(len);
 | 
				
			||||||
 | 
						VARATT_SIZEP(result) = len;
 | 
				
			||||||
 | 
						memcpy(VARDATA(result), string, len - VARHDRSZ);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return result;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static xmltype *
 | 
					static xmltype *
 | 
				
			||||||
xmlBuffer_to_xmltype(xmlBufferPtr buf)
 | 
					xmlBuffer_to_xmltype(xmlBufferPtr buf)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -211,7 +332,7 @@ xmlcomment(PG_FUNCTION_ARGS)
 | 
				
			|||||||
{
 | 
					{
 | 
				
			||||||
#ifdef USE_LIBXML
 | 
					#ifdef USE_LIBXML
 | 
				
			||||||
	text *arg = PG_GETARG_TEXT_P(0);
 | 
						text *arg = PG_GETARG_TEXT_P(0);
 | 
				
			||||||
	int len =  VARATT_SIZEP(arg) - VARHDRSZ;
 | 
						int len =  VARSIZE(arg) - VARHDRSZ;
 | 
				
			||||||
	StringInfoData buf;
 | 
						StringInfoData buf;
 | 
				
			||||||
	int i;
 | 
						int i;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -310,7 +431,7 @@ xmlparse(text *data, bool is_document, bool preserve_whitespace)
 | 
				
			|||||||
#ifdef USE_LIBXML
 | 
					#ifdef USE_LIBXML
 | 
				
			||||||
	xmlDocPtr	doc;
 | 
						xmlDocPtr	doc;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	doc = xml_parse(data, is_document, preserve_whitespace);
 | 
						doc = xml_parse(data, is_document, preserve_whitespace, NULL);
 | 
				
			||||||
	xmlFreeDoc(doc);
 | 
						xmlFreeDoc(doc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return (xmltype *) data;
 | 
						return (xmltype *) data;
 | 
				
			||||||
@@ -383,7 +504,7 @@ xmlroot(xmltype *data, text *version, int standalone)
 | 
				
			|||||||
	xmlBufferPtr buffer;
 | 
						xmlBufferPtr buffer;
 | 
				
			||||||
	xmlSaveCtxtPtr save;
 | 
						xmlSaveCtxtPtr save;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	doc = xml_parse((text *) data, true, true);
 | 
						doc = xml_parse((text *) data, true, true, NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (version)
 | 
						if (version)
 | 
				
			||||||
		doc->version = xmlStrdup(xml_text2xmlChar(version));
 | 
							doc->version = xmlStrdup(xml_text2xmlChar(version));
 | 
				
			||||||
@@ -404,13 +525,16 @@ xmlroot(xmltype *data, text *version, int standalone)
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	buffer = xmlBufferCreate();
 | 
						buffer = xmlBufferCreate();
 | 
				
			||||||
	save = xmlSaveToBuffer(buffer, NULL, 0);
 | 
						save = xmlSaveToBuffer(buffer, "UTF-8", 0);
 | 
				
			||||||
	xmlSaveDoc(save, doc);
 | 
						xmlSaveDoc(save, doc);
 | 
				
			||||||
	xmlSaveClose(save);
 | 
						xmlSaveClose(save);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	xmlFreeDoc(doc);
 | 
						xmlFreeDoc(doc);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	result = xmlBuffer_to_xmltype(buffer);
 | 
						result = cstring_to_xmltype((char *) pg_do_encoding_conversion((unsigned char *) xmlBufferContent(buffer),
 | 
				
			||||||
 | 
																					   xmlBufferLength(buffer),
 | 
				
			||||||
 | 
																					   PG_UTF8,
 | 
				
			||||||
 | 
																					   GetDatabaseEncoding()));
 | 
				
			||||||
	xmlBufferFree(buffer);
 | 
						xmlBufferFree(buffer);
 | 
				
			||||||
	return result;
 | 
						return result;
 | 
				
			||||||
#else
 | 
					#else
 | 
				
			||||||
@@ -525,7 +649,7 @@ xml_is_document(xmltype *arg)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	PG_TRY();
 | 
						PG_TRY();
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		doc = xml_parse((text *) arg, true, true);
 | 
							doc = xml_parse((text *) arg, true, true, NULL);
 | 
				
			||||||
		result = true;
 | 
							result = true;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
	PG_CATCH();
 | 
						PG_CATCH();
 | 
				
			||||||
@@ -622,13 +746,21 @@ xml_init(void)
 | 
				
			|||||||
#define SKIP_XML_SPACE(p) while (xmlIsBlank_ch(*(p))) (p)++
 | 
					#define SKIP_XML_SPACE(p) while (xmlIsBlank_ch(*(p))) (p)++
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static int
 | 
					static int
 | 
				
			||||||
parse_xml_decl(const xmlChar *str, size_t *len, xmlChar **encoding, int *standalone)
 | 
					parse_xml_decl(const xmlChar *str, size_t *lenp, xmlChar **version, xmlChar **encoding, int *standalone)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	const xmlChar *p;
 | 
						const xmlChar *p;
 | 
				
			||||||
	const xmlChar *save_p;
 | 
						const xmlChar *save_p;
 | 
				
			||||||
 | 
						size_t		len;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	p = str;
 | 
						p = str;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (version)
 | 
				
			||||||
 | 
							*version = NULL;
 | 
				
			||||||
 | 
						if (encoding)
 | 
				
			||||||
 | 
							*encoding = NULL;
 | 
				
			||||||
 | 
						if (standalone)
 | 
				
			||||||
 | 
							*standalone = -1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (xmlStrncmp(p, (xmlChar *)"<?xml", 5) != 0)
 | 
						if (xmlStrncmp(p, (xmlChar *)"<?xml", 5) != 0)
 | 
				
			||||||
		goto finished;
 | 
							goto finished;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -645,9 +777,21 @@ parse_xml_decl(const xmlChar *str, size_t *len, xmlChar **encoding, int *standal
 | 
				
			|||||||
		return XML_ERR_VERSION_MISSING;
 | 
							return XML_ERR_VERSION_MISSING;
 | 
				
			||||||
	p += 1;
 | 
						p += 1;
 | 
				
			||||||
	SKIP_XML_SPACE(p);
 | 
						SKIP_XML_SPACE(p);
 | 
				
			||||||
	if (xmlStrncmp(p, (xmlChar *)"'1.0'", 5) != 0 && xmlStrncmp(p, (xmlChar *)"\"1.0\"", 5) != 0)
 | 
					
 | 
				
			||||||
 | 
						if (*p == '\'' || *p == '"')
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							const xmlChar *q;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							q = xmlStrchr(p + 1, *p);
 | 
				
			||||||
 | 
							if (!q)
 | 
				
			||||||
 | 
								return XML_ERR_VERSION_MISSING;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (version)
 | 
				
			||||||
 | 
								*version = xmlStrndup(p + 1, q - p - 1);
 | 
				
			||||||
 | 
							p = q + 1;
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
		return XML_ERR_VERSION_MISSING;
 | 
							return XML_ERR_VERSION_MISSING;
 | 
				
			||||||
	p += 5;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* encoding */
 | 
						/* encoding */
 | 
				
			||||||
	save_p = p;
 | 
						save_p = p;
 | 
				
			||||||
@@ -670,6 +814,7 @@ parse_xml_decl(const xmlChar *str, size_t *len, xmlChar **encoding, int *standal
 | 
				
			|||||||
			if (!q)
 | 
								if (!q)
 | 
				
			||||||
				return XML_ERR_MISSING_ENCODING;
 | 
									return XML_ERR_MISSING_ENCODING;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
								if (encoding)
 | 
				
			||||||
			*encoding = xmlStrndup(p + 1, q - p - 1);
 | 
								*encoding = xmlStrndup(p + 1, q - p - 1);
 | 
				
			||||||
			p = q + 1;
 | 
								p = q + 1;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
@@ -679,7 +824,6 @@ parse_xml_decl(const xmlChar *str, size_t *len, xmlChar **encoding, int *standal
 | 
				
			|||||||
	else
 | 
						else
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		p = save_p;
 | 
							p = save_p;
 | 
				
			||||||
		*encoding = NULL;
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* standalone */
 | 
						/* standalone */
 | 
				
			||||||
@@ -710,7 +854,6 @@ parse_xml_decl(const xmlChar *str, size_t *len, xmlChar **encoding, int *standal
 | 
				
			|||||||
	else
 | 
						else
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		p = save_p;
 | 
							p = save_p;
 | 
				
			||||||
		*standalone = -1;
 | 
					 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	SKIP_XML_SPACE(p);
 | 
						SKIP_XML_SPACE(p);
 | 
				
			||||||
@@ -719,8 +862,15 @@ parse_xml_decl(const xmlChar *str, size_t *len, xmlChar **encoding, int *standal
 | 
				
			|||||||
	p += 2;
 | 
						p += 2;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
finished:
 | 
					finished:
 | 
				
			||||||
	if (len)
 | 
						len = p - str;
 | 
				
			||||||
		*len = (p - str);
 | 
					
 | 
				
			||||||
 | 
						for (p = str; p < str + len; p++)
 | 
				
			||||||
 | 
							if (*p > 127)
 | 
				
			||||||
 | 
								return XML_ERR_INVALID_CHAR;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (lenp)
 | 
				
			||||||
 | 
							*lenp = len;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return XML_ERR_OK;
 | 
						return XML_ERR_OK;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -732,17 +882,24 @@ finished:
 | 
				
			|||||||
 * TODO what about internal URI for docs? (see PG_XML_DEFAULT_URI below)
 | 
					 * TODO what about internal URI for docs? (see PG_XML_DEFAULT_URI below)
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static xmlDocPtr
 | 
					static xmlDocPtr
 | 
				
			||||||
xml_parse(text *data, bool is_document, bool preserve_whitespace)
 | 
					xml_parse(text *data, bool is_document, bool preserve_whitespace, xmlChar *encoding)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	int					res_code;
 | 
					 | 
				
			||||||
	int32				len;
 | 
						int32				len;
 | 
				
			||||||
	xmlChar				*string;
 | 
						xmlChar				*string;
 | 
				
			||||||
 | 
						xmlChar				*utf8string;
 | 
				
			||||||
	xmlParserCtxtPtr 	ctxt = NULL;
 | 
						xmlParserCtxtPtr 	ctxt = NULL;
 | 
				
			||||||
	xmlDocPtr 			doc = NULL;
 | 
						xmlDocPtr 			doc = NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	len = VARSIZE(data) - VARHDRSZ; /* will be useful later */
 | 
						len = VARSIZE(data) - VARHDRSZ; /* will be useful later */
 | 
				
			||||||
	string = xml_text2xmlChar(data);
 | 
						string = xml_text2xmlChar(data);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						utf8string = pg_do_encoding_conversion(string,
 | 
				
			||||||
 | 
															   len,
 | 
				
			||||||
 | 
															   encoding
 | 
				
			||||||
 | 
															   ? pg_char_to_encoding((char *) encoding)
 | 
				
			||||||
 | 
															   : GetDatabaseEncoding(),
 | 
				
			||||||
 | 
															   PG_UTF8);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	xml_init();
 | 
						xml_init();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* We use a PG_TRY block to ensure libxml is cleaned up on error */
 | 
						/* We use a PG_TRY block to ensure libxml is cleaned up on error */
 | 
				
			||||||
@@ -762,8 +919,9 @@ xml_parse(text *data, bool is_document, bool preserve_whitespace)
 | 
				
			|||||||
			 * As for external DTDs, we try to support them too, (see
 | 
								 * As for external DTDs, we try to support them too, (see
 | 
				
			||||||
			 * SQL/XML:10.16.7.e)
 | 
								 * SQL/XML:10.16.7.e)
 | 
				
			||||||
			 */
 | 
								 */
 | 
				
			||||||
			doc = xmlCtxtReadMemory(ctxt, (char *) string, len,
 | 
								doc = xmlCtxtReadDoc(ctxt, utf8string,
 | 
				
			||||||
									PG_XML_DEFAULT_URI, NULL,
 | 
													 PG_XML_DEFAULT_URI,
 | 
				
			||||||
 | 
													 "UTF-8",
 | 
				
			||||||
									XML_PARSE_NOENT | XML_PARSE_DTDATTR
 | 
														XML_PARSE_NOENT | XML_PARSE_DTDATTR
 | 
				
			||||||
									| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
 | 
														| (preserve_whitespace ? 0 : XML_PARSE_NOBLANKS));
 | 
				
			||||||
			if (doc == NULL)
 | 
								if (doc == NULL)
 | 
				
			||||||
@@ -772,41 +930,26 @@ xml_parse(text *data, bool is_document, bool preserve_whitespace)
 | 
				
			|||||||
		}
 | 
							}
 | 
				
			||||||
		else
 | 
							else
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
 | 
								int			res_code;
 | 
				
			||||||
			size_t count;
 | 
								size_t count;
 | 
				
			||||||
			xmlChar *encoding = NULL;
 | 
								xmlChar	   *version = NULL;
 | 
				
			||||||
			int standalone = -1;
 | 
								int standalone = -1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			doc = xmlNewDoc(NULL);
 | 
								doc = xmlNewDoc(NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			res_code = parse_xml_decl(string, &count, &encoding, &standalone);
 | 
								res_code = parse_xml_decl(utf8string, &count, &version, NULL, &standalone);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			/* TODO resolve: xmlParseBalancedChunkMemory assumes that string is UTF8 encoded! */
 | 
					 | 
				
			||||||
			if (res_code == 0)
 | 
								if (res_code == 0)
 | 
				
			||||||
				res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, string + count, NULL);
 | 
									res_code = xmlParseBalancedChunkMemory(doc, NULL, NULL, 0, utf8string + count, NULL);
 | 
				
			||||||
			if (res_code != 0)
 | 
								if (res_code != 0)
 | 
				
			||||||
				xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
 | 
									xml_ereport_by_code(ERROR, ERRCODE_INVALID_XML_CONTENT,
 | 
				
			||||||
									"invalid XML content", res_code);
 | 
														"invalid XML content", res_code);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			doc->encoding = encoding;
 | 
								doc->version = xmlStrdup(version);
 | 
				
			||||||
 | 
								doc->encoding = xmlStrdup((xmlChar *) "UTF-8");
 | 
				
			||||||
			doc->standalone = standalone;
 | 
								doc->standalone = standalone;
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/* TODO encoding issues
 | 
					 | 
				
			||||||
		 * (thoughts:
 | 
					 | 
				
			||||||
		 * 		CASE:
 | 
					 | 
				
			||||||
		 *   		- XML data has explicit encoding attribute in its prolog
 | 
					 | 
				
			||||||
		 *   		- if not, assume that enc. of XML data is the same as client's one
 | 
					 | 
				
			||||||
		 *
 | 
					 | 
				
			||||||
		 * 		The common rule is to accept the XML data only if its encoding
 | 
					 | 
				
			||||||
		 * 		is the same as encoding of the storage (server's). The other possible
 | 
					 | 
				
			||||||
		 * 		option is to accept all the docs, but DO TRANSFORMATION and, if needed,
 | 
					 | 
				
			||||||
		 * 		change the prolog.
 | 
					 | 
				
			||||||
		 *
 | 
					 | 
				
			||||||
		 * 		I think I'd stick the first way (for the 1st version),
 | 
					 | 
				
			||||||
		 * 		it's much simplier (less errors...)
 | 
					 | 
				
			||||||
		 * ) */
 | 
					 | 
				
			||||||
		/* ... */
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
		if (ctxt)
 | 
							if (ctxt)
 | 
				
			||||||
			xmlFreeParserCtxt(ctxt);
 | 
								xmlFreeParserCtxt(ctxt);
 | 
				
			||||||
		xmlCleanupParser();
 | 
							xmlCleanupParser();
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -177,8 +177,7 @@ SELECT xmlpi(name foo, '   bar');
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
SELECT xmlroot(xml '<foo/>', version no value, standalone no value);
 | 
					SELECT xmlroot(xml '<foo/>', version no value, standalone no value);
 | 
				
			||||||
        xmlroot        
 | 
					        xmlroot        
 | 
				
			||||||
-----------------------
 | 
					---------
 | 
				
			||||||
 <?xml version="1.0"?>
 | 
					 | 
				
			||||||
 <foo/>
 | 
					 <foo/>
 | 
				
			||||||
 
 | 
					 
 | 
				
			||||||
(1 row)
 | 
					(1 row)
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user