1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-16 06:01:02 +03:00

Ignore XML declaration in xpath_internal(), for UTF8 databases.

When a value contained an XML declaration naming some other encoding,
this function interpreted UTF8 bytes as the named encoding, yielding
mojibake.  xml_parse() already has similar logic.  This would be
necessary but not sufficient for non-UTF8 databases, so preserve
behavior there until the xpath facility can support such databases
comprehensively.  Back-patch to 9.3 (all supported versions).

Pavel Stehule and Noah Misch

Discussion: https://postgr.es/m/CAFj8pRC-dM=tT=QkGi+Achkm+gwPmjyOayGuUfXVumCxkDgYWg@mail.gmail.com
This commit is contained in:
Noah Misch
2017-11-11 11:10:53 -08:00
parent 5edc63bda6
commit 2918fcedbf
5 changed files with 142 additions and 1 deletions

View File

@ -3845,6 +3845,7 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
int32 xpath_len;
xmlChar *string;
xmlChar *xpath_expr;
size_t xmldecl_len = 0;
int i;
int ndim;
Datum *ns_names_uris;
@ -3900,6 +3901,16 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
string = pg_xmlCharStrndup(datastr, len);
xpath_expr = pg_xmlCharStrndup(VARDATA_ANY(xpath_expr_text), xpath_len);
/*
* In a UTF8 database, skip any xml declaration, which might assert
* another encoding. Ignore parse_xml_decl() failure, letting
* xmlCtxtReadMemory() report parse errors. Documentation disclaims
* xpath() support for non-ASCII data in non-UTF8 databases, so leave
* those scenarios bug-compatible with historical behavior.
*/
if (GetDatabaseEncoding() == PG_UTF8)
parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
PG_TRY();
@ -3914,7 +3925,8 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
if (ctxt == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
"could not allocate parser context");
doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
len - xmldecl_len, NULL, NULL, 0);
if (doc == NULL || xmlerrcxt->err_occurred)
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
"could not parse XML document");