mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-10-24 13:33:01 +03:00
html: Reenable buggy detection of XML declarations
Switch to UTF-8 if a document starts with '<?xm' to match old behavior. Also enable this check in the push parser. Fixes #637.
This commit is contained in:
18
HTMLparser.c
18
HTMLparser.c
@@ -4851,6 +4851,14 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) {
|
||||
|
||||
xmlDetectEncoding(ctxt);
|
||||
|
||||
/*
|
||||
* This is wrong but matches long-standing behavior. In most cases,
|
||||
* a document starting with an XML declaration will specify UTF-8.
|
||||
*/
|
||||
if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
|
||||
(xmlStrncmp(ctxt->input->cur, BAD_CAST "<?xm", 4) == 0))
|
||||
xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_UTF8);
|
||||
|
||||
/*
|
||||
* Wipe out everything which is before the first '<'
|
||||
*/
|
||||
@@ -5408,6 +5416,16 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
||||
*/
|
||||
goto done;
|
||||
case XML_PARSER_START:
|
||||
/*
|
||||
* This is wrong but matches long-standing behavior. In most
|
||||
* cases, a document starting with an XML declaration will
|
||||
* specify UTF-8.
|
||||
*/
|
||||
if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
|
||||
(xmlStrncmp(ctxt->input->cur, BAD_CAST "<?xm", 4) == 0)) {
|
||||
xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_UTF8);
|
||||
}
|
||||
|
||||
/*
|
||||
* Very first chars read from the document flow.
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user