mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	Ignore XML declaration in xpath_internal(), for UTF8 databases.
When a value contained an XML declaration naming some other encoding, this function interpreted UTF8 bytes as the named encoding, yielding mojibake. xml_parse() already has similar logic. This would be necessary but not sufficient for non-UTF8 databases, so preserve behavior there until the xpath facility can support such databases comprehensively. Back-patch to 9.3 (all supported versions). Pavel Stehule and Noah Misch Discussion: https://postgr.es/m/CAFj8pRC-dM=tT=QkGi+Achkm+gwPmjyOayGuUfXVumCxkDgYWg@mail.gmail.com
This commit is contained in:
		@@ -3792,6 +3792,7 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
 | 
				
			|||||||
	int32		xpath_len;
 | 
						int32		xpath_len;
 | 
				
			||||||
	xmlChar    *string;
 | 
						xmlChar    *string;
 | 
				
			||||||
	xmlChar    *xpath_expr;
 | 
						xmlChar    *xpath_expr;
 | 
				
			||||||
 | 
						size_t		xmldecl_len = 0;
 | 
				
			||||||
	int			i;
 | 
						int			i;
 | 
				
			||||||
	int			ndim;
 | 
						int			ndim;
 | 
				
			||||||
	Datum	   *ns_names_uris;
 | 
						Datum	   *ns_names_uris;
 | 
				
			||||||
@@ -3852,6 +3853,16 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
 | 
				
			|||||||
	memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
 | 
						memcpy(xpath_expr, VARDATA(xpath_expr_text), xpath_len);
 | 
				
			||||||
	xpath_expr[xpath_len] = '\0';
 | 
						xpath_expr[xpath_len] = '\0';
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * In a UTF8 database, skip any xml declaration, which might assert
 | 
				
			||||||
 | 
						 * another encoding.  Ignore parse_xml_decl() failure, letting
 | 
				
			||||||
 | 
						 * xmlCtxtReadMemory() report parse errors.  Documentation disclaims
 | 
				
			||||||
 | 
						 * xpath() support for non-ASCII data in non-UTF8 databases, so leave
 | 
				
			||||||
 | 
						 * those scenarios bug-compatible with historical behavior.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (GetDatabaseEncoding() == PG_UTF8)
 | 
				
			||||||
 | 
							parse_xml_decl(string, &xmldecl_len, NULL, NULL, NULL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
 | 
						xmlerrcxt = pg_xml_init(PG_XML_STRICTNESS_ALL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	PG_TRY();
 | 
						PG_TRY();
 | 
				
			||||||
@@ -3866,7 +3877,8 @@ xpath_internal(text *xpath_expr_text, xmltype *data, ArrayType *namespaces,
 | 
				
			|||||||
		if (ctxt == NULL || xmlerrcxt->err_occurred)
 | 
							if (ctxt == NULL || xmlerrcxt->err_occurred)
 | 
				
			||||||
			xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
 | 
								xml_ereport(xmlerrcxt, ERROR, ERRCODE_OUT_OF_MEMORY,
 | 
				
			||||||
						"could not allocate parser context");
 | 
											"could not allocate parser context");
 | 
				
			||||||
		doc = xmlCtxtReadMemory(ctxt, (char *) string, len, NULL, NULL, 0);
 | 
							doc = xmlCtxtReadMemory(ctxt, (char *) string + xmldecl_len,
 | 
				
			||||||
 | 
													len - xmldecl_len, NULL, NULL, 0);
 | 
				
			||||||
		if (doc == NULL || xmlerrcxt->err_occurred)
 | 
							if (doc == NULL || xmlerrcxt->err_occurred)
 | 
				
			||||||
			xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
 | 
								xml_ereport(xmlerrcxt, ERROR, ERRCODE_INVALID_XML_DOCUMENT,
 | 
				
			||||||
						"could not parse XML document");
 | 
											"could not parse XML document");
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -653,6 +653,37 @@ SELECT xpath('/nosuchtag', '<root/>');
 | 
				
			|||||||
 {}
 | 
					 {}
 | 
				
			||||||
(1 row)
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- Round-trip non-ASCII data through xpath().
 | 
				
			||||||
 | 
					DO $$
 | 
				
			||||||
 | 
					DECLARE
 | 
				
			||||||
 | 
					  xml_declaration text := '<?xml version="1.0" encoding="ISO-8859-1"?>';
 | 
				
			||||||
 | 
					  degree_symbol text;
 | 
				
			||||||
 | 
					  res xml[];
 | 
				
			||||||
 | 
					BEGIN
 | 
				
			||||||
 | 
					  -- Per the documentation, xpath() doesn't work on non-ASCII data when
 | 
				
			||||||
 | 
					  -- the server encoding is not UTF8.  The EXCEPTION block below,
 | 
				
			||||||
 | 
					  -- currently dead code, will be relevant if we remove this limitation.
 | 
				
			||||||
 | 
					  IF current_setting('server_encoding') <> 'UTF8' THEN
 | 
				
			||||||
 | 
					    RAISE LOG 'skip: encoding % unsupported for xml',
 | 
				
			||||||
 | 
					      current_setting('server_encoding');
 | 
				
			||||||
 | 
					    RETURN;
 | 
				
			||||||
 | 
					  END IF;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  degree_symbol := convert_from('\xc2b0', 'UTF8');
 | 
				
			||||||
 | 
					  res := xpath('text()', (xml_declaration ||
 | 
				
			||||||
 | 
					    '<x>' || degree_symbol || '</x>')::xml);
 | 
				
			||||||
 | 
					  IF degree_symbol <> res[1]::text THEN
 | 
				
			||||||
 | 
					    RAISE 'expected % (%), got % (%)',
 | 
				
			||||||
 | 
					      degree_symbol, convert_to(degree_symbol, 'UTF8'),
 | 
				
			||||||
 | 
					      res[1], convert_to(res[1]::text, 'UTF8');
 | 
				
			||||||
 | 
					  END IF;
 | 
				
			||||||
 | 
					EXCEPTION
 | 
				
			||||||
 | 
					  -- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
 | 
				
			||||||
 | 
					  WHEN untranslatable_character THEN RAISE LOG 'skip: %', SQLERRM;
 | 
				
			||||||
 | 
					  -- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
 | 
				
			||||||
 | 
					  WHEN undefined_function THEN RAISE LOG 'skip: %', SQLERRM;
 | 
				
			||||||
 | 
					END
 | 
				
			||||||
 | 
					$$;
 | 
				
			||||||
-- Test xmlexists and xpath_exists
 | 
					-- Test xmlexists and xpath_exists
 | 
				
			||||||
SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
 | 
					SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
 | 
				
			||||||
 xmlexists 
 | 
					 xmlexists 
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -564,6 +564,41 @@ LINE 1: SELECT xpath('/nosuchtag', '<root/>');
 | 
				
			|||||||
                                   ^
 | 
					                                   ^
 | 
				
			||||||
DETAIL:  This functionality requires the server to be built with libxml support.
 | 
					DETAIL:  This functionality requires the server to be built with libxml support.
 | 
				
			||||||
HINT:  You need to rebuild PostgreSQL using --with-libxml.
 | 
					HINT:  You need to rebuild PostgreSQL using --with-libxml.
 | 
				
			||||||
 | 
					-- Round-trip non-ASCII data through xpath().
 | 
				
			||||||
 | 
					DO $$
 | 
				
			||||||
 | 
					DECLARE
 | 
				
			||||||
 | 
					  xml_declaration text := '<?xml version="1.0" encoding="ISO-8859-1"?>';
 | 
				
			||||||
 | 
					  degree_symbol text;
 | 
				
			||||||
 | 
					  res xml[];
 | 
				
			||||||
 | 
					BEGIN
 | 
				
			||||||
 | 
					  -- Per the documentation, xpath() doesn't work on non-ASCII data when
 | 
				
			||||||
 | 
					  -- the server encoding is not UTF8.  The EXCEPTION block below,
 | 
				
			||||||
 | 
					  -- currently dead code, will be relevant if we remove this limitation.
 | 
				
			||||||
 | 
					  IF current_setting('server_encoding') <> 'UTF8' THEN
 | 
				
			||||||
 | 
					    RAISE LOG 'skip: encoding % unsupported for xml',
 | 
				
			||||||
 | 
					      current_setting('server_encoding');
 | 
				
			||||||
 | 
					    RETURN;
 | 
				
			||||||
 | 
					  END IF;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  degree_symbol := convert_from('\xc2b0', 'UTF8');
 | 
				
			||||||
 | 
					  res := xpath('text()', (xml_declaration ||
 | 
				
			||||||
 | 
					    '<x>' || degree_symbol || '</x>')::xml);
 | 
				
			||||||
 | 
					  IF degree_symbol <> res[1]::text THEN
 | 
				
			||||||
 | 
					    RAISE 'expected % (%), got % (%)',
 | 
				
			||||||
 | 
					      degree_symbol, convert_to(degree_symbol, 'UTF8'),
 | 
				
			||||||
 | 
					      res[1], convert_to(res[1]::text, 'UTF8');
 | 
				
			||||||
 | 
					  END IF;
 | 
				
			||||||
 | 
					EXCEPTION
 | 
				
			||||||
 | 
					  -- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
 | 
				
			||||||
 | 
					  WHEN untranslatable_character THEN RAISE LOG 'skip: %', SQLERRM;
 | 
				
			||||||
 | 
					  -- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
 | 
				
			||||||
 | 
					  WHEN undefined_function THEN RAISE LOG 'skip: %', SQLERRM;
 | 
				
			||||||
 | 
					END
 | 
				
			||||||
 | 
					$$;
 | 
				
			||||||
 | 
					ERROR:  unsupported XML feature
 | 
				
			||||||
 | 
					DETAIL:  This functionality requires the server to be built with libxml support.
 | 
				
			||||||
 | 
					HINT:  You need to rebuild PostgreSQL using --with-libxml.
 | 
				
			||||||
 | 
					CONTEXT:  PL/pgSQL function inline_code_block line 17 at assignment
 | 
				
			||||||
-- Test xmlexists and xpath_exists
 | 
					-- Test xmlexists and xpath_exists
 | 
				
			||||||
SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
 | 
					SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
 | 
				
			||||||
ERROR:  unsupported XML feature
 | 
					ERROR:  unsupported XML feature
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -635,6 +635,37 @@ SELECT xpath('/nosuchtag', '<root/>');
 | 
				
			|||||||
 {}
 | 
					 {}
 | 
				
			||||||
(1 row)
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- Round-trip non-ASCII data through xpath().
 | 
				
			||||||
 | 
					DO $$
 | 
				
			||||||
 | 
					DECLARE
 | 
				
			||||||
 | 
					  xml_declaration text := '<?xml version="1.0" encoding="ISO-8859-1"?>';
 | 
				
			||||||
 | 
					  degree_symbol text;
 | 
				
			||||||
 | 
					  res xml[];
 | 
				
			||||||
 | 
					BEGIN
 | 
				
			||||||
 | 
					  -- Per the documentation, xpath() doesn't work on non-ASCII data when
 | 
				
			||||||
 | 
					  -- the server encoding is not UTF8.  The EXCEPTION block below,
 | 
				
			||||||
 | 
					  -- currently dead code, will be relevant if we remove this limitation.
 | 
				
			||||||
 | 
					  IF current_setting('server_encoding') <> 'UTF8' THEN
 | 
				
			||||||
 | 
					    RAISE LOG 'skip: encoding % unsupported for xml',
 | 
				
			||||||
 | 
					      current_setting('server_encoding');
 | 
				
			||||||
 | 
					    RETURN;
 | 
				
			||||||
 | 
					  END IF;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  degree_symbol := convert_from('\xc2b0', 'UTF8');
 | 
				
			||||||
 | 
					  res := xpath('text()', (xml_declaration ||
 | 
				
			||||||
 | 
					    '<x>' || degree_symbol || '</x>')::xml);
 | 
				
			||||||
 | 
					  IF degree_symbol <> res[1]::text THEN
 | 
				
			||||||
 | 
					    RAISE 'expected % (%), got % (%)',
 | 
				
			||||||
 | 
					      degree_symbol, convert_to(degree_symbol, 'UTF8'),
 | 
				
			||||||
 | 
					      res[1], convert_to(res[1]::text, 'UTF8');
 | 
				
			||||||
 | 
					  END IF;
 | 
				
			||||||
 | 
					EXCEPTION
 | 
				
			||||||
 | 
					  -- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
 | 
				
			||||||
 | 
					  WHEN untranslatable_character THEN RAISE LOG 'skip: %', SQLERRM;
 | 
				
			||||||
 | 
					  -- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
 | 
				
			||||||
 | 
					  WHEN undefined_function THEN RAISE LOG 'skip: %', SQLERRM;
 | 
				
			||||||
 | 
					END
 | 
				
			||||||
 | 
					$$;
 | 
				
			||||||
-- Test xmlexists and xpath_exists
 | 
					-- Test xmlexists and xpath_exists
 | 
				
			||||||
SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
 | 
					SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
 | 
				
			||||||
 xmlexists 
 | 
					 xmlexists 
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -186,6 +186,38 @@ SELECT xpath('count(//*)=3', '<root><sub/><sub/></root>');
 | 
				
			|||||||
SELECT xpath('name(/*)', '<root><sub/><sub/></root>');
 | 
					SELECT xpath('name(/*)', '<root><sub/><sub/></root>');
 | 
				
			||||||
SELECT xpath('/nosuchtag', '<root/>');
 | 
					SELECT xpath('/nosuchtag', '<root/>');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- Round-trip non-ASCII data through xpath().
 | 
				
			||||||
 | 
					DO $$
 | 
				
			||||||
 | 
					DECLARE
 | 
				
			||||||
 | 
					  xml_declaration text := '<?xml version="1.0" encoding="ISO-8859-1"?>';
 | 
				
			||||||
 | 
					  degree_symbol text;
 | 
				
			||||||
 | 
					  res xml[];
 | 
				
			||||||
 | 
					BEGIN
 | 
				
			||||||
 | 
					  -- Per the documentation, xpath() doesn't work on non-ASCII data when
 | 
				
			||||||
 | 
					  -- the server encoding is not UTF8.  The EXCEPTION block below,
 | 
				
			||||||
 | 
					  -- currently dead code, will be relevant if we remove this limitation.
 | 
				
			||||||
 | 
					  IF current_setting('server_encoding') <> 'UTF8' THEN
 | 
				
			||||||
 | 
					    RAISE LOG 'skip: encoding % unsupported for xml',
 | 
				
			||||||
 | 
					      current_setting('server_encoding');
 | 
				
			||||||
 | 
					    RETURN;
 | 
				
			||||||
 | 
					  END IF;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  degree_symbol := convert_from('\xc2b0', 'UTF8');
 | 
				
			||||||
 | 
					  res := xpath('text()', (xml_declaration ||
 | 
				
			||||||
 | 
					    '<x>' || degree_symbol || '</x>')::xml);
 | 
				
			||||||
 | 
					  IF degree_symbol <> res[1]::text THEN
 | 
				
			||||||
 | 
					    RAISE 'expected % (%), got % (%)',
 | 
				
			||||||
 | 
					      degree_symbol, convert_to(degree_symbol, 'UTF8'),
 | 
				
			||||||
 | 
					      res[1], convert_to(res[1]::text, 'UTF8');
 | 
				
			||||||
 | 
					  END IF;
 | 
				
			||||||
 | 
					EXCEPTION
 | 
				
			||||||
 | 
					  -- character with byte sequence 0xc2 0xb0 in encoding "UTF8" has no equivalent in encoding "LATIN8"
 | 
				
			||||||
 | 
					  WHEN untranslatable_character THEN RAISE LOG 'skip: %', SQLERRM;
 | 
				
			||||||
 | 
					  -- default conversion function for encoding "UTF8" to "MULE_INTERNAL" does not exist
 | 
				
			||||||
 | 
					  WHEN undefined_function THEN RAISE LOG 'skip: %', SQLERRM;
 | 
				
			||||||
 | 
					END
 | 
				
			||||||
 | 
					$$;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- Test xmlexists and xpath_exists
 | 
					-- Test xmlexists and xpath_exists
 | 
				
			||||||
SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
 | 
					SELECT xmlexists('//town[text() = ''Toronto'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
 | 
				
			||||||
SELECT xmlexists('//town[text() = ''Cwmbran'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
 | 
					SELECT xmlexists('//town[text() = ''Cwmbran'']' PASSING BY REF '<towns><town>Bidford-on-Avon</town><town>Cwmbran</town><town>Bristol</town></towns>');
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user