mirror of
https://github.com/postgres/postgres.git
synced 2025-05-08 07:21:33 +03:00
Ignore XML declaration in xpath_internal(), for UTF8 databases.
When a value contained an XML declaration naming some other encoding, this function interpreted UTF8 bytes as the named encoding, yielding mojibake. xml_parse() already has similar logic. This would be necessary but not sufficient for non-UTF8 databases, so preserve behavior there until the xpath facility can support such databases comprehensively. Back-patch to 9.3 (all supported versions). Pavel Stehule and Noah Misch Discussion: https://postgr.es/m/CAFj8pRC-dM=tT=QkGi+Achkm+gwPmjyOayGuUfXVumCxkDgYWg@mail.gmail.com
This commit is contained in:
parent
d380d080fa
commit
46fb15f48a
@ -3778,6 +3778,7 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
|
|||||||
int32 xpath_len;
|
int32 xpath_len;
|
||||||
xmlChar *string;
|
xmlChar *string;
|
||||||
xmlChar *xpath_expr;
|
xmlChar *xpath_expr;
|
||||||
|
size_t xmldecl_len = 0;
|
||||||
int i;
|
int i;
|
||||||
int ndim;
|
int ndim;
|
||||||
Datum *ns_names_uris;
|
Datum *ns_names_uris;
|
||||||
@ -3838,6 +3839,16 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
|
|||||||
memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
|
memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
|
||||||
xpath_expr[xpath_len] = '\0';
|
xpath_expr[xpath_len] = '\0';
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In a UTF8 database, skip any xml declaration, which might assert
|
||||||
|
* another encoding. Ignore parse_xml_decl() failure, letting
|
||||||
|
* xmlCtxtReadMemory() report parse errors. Documentation disclaims
|
||||||
|
* xpath() support for non-ASCII data in non-UTF8 databases, so leave
|
||||||
|
* those scenarios bug-compatible with historical behavior.
|
||||||
|
*/
|
||||||
|
if (GetDatabaseEncoding() == PG_UTF8)
|
||||||
|
parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
|
||||||
|
|
||||||
xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
|
xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
|
||||||
|
|
||||||
PG_TRY();
|
PG_TRY();
|
||||||
@ -3852,7 +3863,8 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
|
|||||||
if (ctxt == NULL || xmlerrcxt->err_occurred)
|
if (ctxt == NULL || xmlerrcxt->err_occurred)
|
||||||
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
|
xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
|
||||||
"could not allocate parser context");
|
"could not allocate parser context");
|
||||||
doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
|
doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
|
||||||
|
len - xmldecl_len, NULL, NULL, 0);
|
||||||
if (doc == NULL || xmlerrcxt->err_occurred)
|
if (doc == NULL || xmlerrcxt->err_occurred)
|
||||||
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
|
xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
|
||||||
"could not parse XML document");
|
"could not parse XML document");
|
||||||
|
@ -670,6 +670,37 @@ SELECT xpath('/nosuchtag', '<root/>');
|
|||||||
{}
|
{}
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
-- Round-trip non-ASCII data through xpath().
|
||||||
|
DO $$
|
||||||
|
DECLARE
|
||||||
|
xml_declaration text := '<?xml version="1.0" encoding="ISO-8859-1"?>';
|
||||||
|
degree_symbol text;
|
||||||
|
res xml[];
|
||||||
|
BEGIN
|
||||||
|
-- Per the documentation, xpath() doesn't work on non-ASCII data when
|
||||||
|
-- the server encoding is not UTF8. The EXCEPTION block below,
|
||||||
|
-- currently dead code, will be relevant if we remove this limitation.
|
||||||
|
IF current_setting('server_encoding') <> 'UTF8' THEN
|
||||||
|
RAISE LOG 'skip: encoding % unsupported for xml',
|
||||||
|
current_setting('server_encoding');
|
||||||
|
RETURN;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
degree_symbol := convert_from('\xc2b0', 'UTF8');
|
||||||
|
res := xpath('text()', (xml_declaration ||
|
||||||
|
'<x>' || degree_symbol || '</x>')::xml);
|
||||||
|
IF degree_symbol <> res[1]::text THEN
|
||||||
|
RAISE 'expected % (%), got % (%)',
|
||||||
|
degree_symbol, convert_to(degree_symbol, 'UTF8'),
|
||||||
|
res[1], convert_to(res[1]::text, 'UTF8');
|
||||||
|
END IF;
|
||||||
|
EXCEPTION
|
||||||
|
-- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
|
||||||
|
WHEN untranslatable_character THEN RAISE LOG 'skip: %', SQLERRM;
|
||||||
|
-- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
|
||||||
|
WHEN undefined_function THEN RAISE LOG 'skip: %', SQLERRM;
|
||||||
|
END
|
||||||
|
$$;
|
||||||
-- Test xmlexists and xpath_exists
|
-- Test xmlexists and xpath_exists
|
||||||
SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
|
SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
|
||||||
xmlexists
|
xmlexists
|
||||||
|
@ -576,6 +576,41 @@ LINE 1: SELECT xpath('/nosuchtag', '<root/>');
|
|||||||
^
|
^
|
||||||
DETAIL: This functionality requires the server to be built with libxml support.
|
DETAIL: This functionality requires the server to be built with libxml support.
|
||||||
HINT: You need to rebuild PostgreSQL using --with-libxml.
|
HINT: You need to rebuild PostgreSQL using --with-libxml.
|
||||||
|
-- Round-trip non-ASCII data through xpath().
|
||||||
|
DO $$
|
||||||
|
DECLARE
|
||||||
|
xml_declaration text := '<?xml version="1.0" encoding="ISO-8859-1"?>';
|
||||||
|
degree_symbol text;
|
||||||
|
res xml[];
|
||||||
|
BEGIN
|
||||||
|
-- Per the documentation, xpath() doesn't work on non-ASCII data when
|
||||||
|
-- the server encoding is not UTF8. The EXCEPTION block below,
|
||||||
|
-- currently dead code, will be relevant if we remove this limitation.
|
||||||
|
IF current_setting('server_encoding') <> 'UTF8' THEN
|
||||||
|
RAISE LOG 'skip: encoding % unsupported for xml',
|
||||||
|
current_setting('server_encoding');
|
||||||
|
RETURN;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
degree_symbol := convert_from('\xc2b0', 'UTF8');
|
||||||
|
res := xpath('text()', (xml_declaration ||
|
||||||
|
'<x>' || degree_symbol || '</x>')::xml);
|
||||||
|
IF degree_symbol <> res[1]::text THEN
|
||||||
|
RAISE 'expected % (%), got % (%)',
|
||||||
|
degree_symbol, convert_to(degree_symbol, 'UTF8'),
|
||||||
|
res[1], convert_to(res[1]::text, 'UTF8');
|
||||||
|
END IF;
|
||||||
|
EXCEPTION
|
||||||
|
-- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
|
||||||
|
WHEN untranslatable_character THEN RAISE LOG 'skip: %', SQLERRM;
|
||||||
|
-- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
|
||||||
|
WHEN undefined_function THEN RAISE LOG 'skip: %', SQLERRM;
|
||||||
|
END
|
||||||
|
$$;
|
||||||
|
ERROR: unsupported XML feature
|
||||||
|
DETAIL: This functionality requires the server to be built with libxml support.
|
||||||
|
HINT: You need to rebuild PostgreSQL using --with-libxml.
|
||||||
|
CONTEXT: PL/pgSQL function inline_code_block line 17 at assignment
|
||||||
-- Test xmlexists and xpath_exists
|
-- Test xmlexists and xpath_exists
|
||||||
SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
|
SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
|
||||||
ERROR: unsupported XML feature
|
ERROR: unsupported XML feature
|
||||||
|
@ -650,6 +650,37 @@ SELECT xpath('/nosuchtag', '<root/>');
|
|||||||
{}
|
{}
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
-- Round-trip non-ASCII data through xpath().
|
||||||
|
DO $$
|
||||||
|
DECLARE
|
||||||
|
xml_declaration text := '<?xml version="1.0" encoding="ISO-8859-1"?>';
|
||||||
|
degree_symbol text;
|
||||||
|
res xml[];
|
||||||
|
BEGIN
|
||||||
|
-- Per the documentation, xpath() doesn't work on non-ASCII data when
|
||||||
|
-- the server encoding is not UTF8. The EXCEPTION block below,
|
||||||
|
-- currently dead code, will be relevant if we remove this limitation.
|
||||||
|
IF current_setting('server_encoding') <> 'UTF8' THEN
|
||||||
|
RAISE LOG 'skip: encoding % unsupported for xml',
|
||||||
|
current_setting('server_encoding');
|
||||||
|
RETURN;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
degree_symbol := convert_from('\xc2b0', 'UTF8');
|
||||||
|
res := xpath('text()', (xml_declaration ||
|
||||||
|
'<x>' || degree_symbol || '</x>')::xml);
|
||||||
|
IF degree_symbol <> res[1]::text THEN
|
||||||
|
RAISE 'expected % (%), got % (%)',
|
||||||
|
degree_symbol, convert_to(degree_symbol, 'UTF8'),
|
||||||
|
res[1], convert_to(res[1]::text, 'UTF8');
|
||||||
|
END IF;
|
||||||
|
EXCEPTION
|
||||||
|
-- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
|
||||||
|
WHEN untranslatable_character THEN RAISE LOG 'skip: %', SQLERRM;
|
||||||
|
-- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
|
||||||
|
WHEN undefined_function THEN RAISE LOG 'skip: %', SQLERRM;
|
||||||
|
END
|
||||||
|
$$;
|
||||||
-- Test xmlexists and xpath_exists
|
-- Test xmlexists and xpath_exists
|
||||||
SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
|
SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
|
||||||
xmlexists
|
xmlexists
|
||||||
|
@ -189,6 +189,38 @@ SELECT xpath('count(//*)=3', '<root><sub/><sub/></root>');
|
|||||||
SELECT xpath('name(/*)', '<root><sub/><sub/></root>');
|
SELECT xpath('name(/*)', '<root><sub/><sub/></root>');
|
||||||
SELECT xpath('/nosuchtag', '<root/>');
|
SELECT xpath('/nosuchtag', '<root/>');
|
||||||
|
|
||||||
|
-- Round-trip non-ASCII data through xpath().
|
||||||
|
DO $$
|
||||||
|
DECLARE
|
||||||
|
xml_declaration text := '<?xml version="1.0" encoding="ISO-8859-1"?>';
|
||||||
|
degree_symbol text;
|
||||||
|
res xml[];
|
||||||
|
BEGIN
|
||||||
|
-- Per the documentation, xpath() doesn't work on non-ASCII data when
|
||||||
|
-- the server encoding is not UTF8. The EXCEPTION block below,
|
||||||
|
-- currently dead code, will be relevant if we remove this limitation.
|
||||||
|
IF current_setting('server_encoding') <> 'UTF8' THEN
|
||||||
|
RAISE LOG 'skip: encoding % unsupported for xml',
|
||||||
|
current_setting('server_encoding');
|
||||||
|
RETURN;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
degree_symbol := convert_from('\xc2b0', 'UTF8');
|
||||||
|
res := xpath('text()', (xml_declaration ||
|
||||||
|
'<x>' || degree_symbol || '</x>')::xml);
|
||||||
|
IF degree_symbol <> res[1]::text THEN
|
||||||
|
RAISE 'expected % (%), got % (%)',
|
||||||
|
degree_symbol, convert_to(degree_symbol, 'UTF8'),
|
||||||
|
res[1], convert_to(res[1]::text, 'UTF8');
|
||||||
|
END IF;
|
||||||
|
EXCEPTION
|
||||||
|
-- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
|
||||||
|
WHEN untranslatable_character THEN RAISE LOG 'skip: %', SQLERRM;
|
||||||
|
-- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
|
||||||
|
WHEN undefined_function THEN RAISE LOG 'skip: %', SQLERRM;
|
||||||
|
END
|
||||||
|
$$;
|
||||||
|
|
||||||
-- Test xmlexists and xpath_exists
|
-- Test xmlexists and xpath_exists
|
||||||
SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
|
SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
|
||||||
SELECT xmlexists('//town[text() = ''Cwmbran'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
|
SELECT xmlexists('//town[text() = ''Cwmbran'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
|
||||||
|
Loading…
x
Reference in New Issue
Block a user