mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-10-24 13:33:01 +03:00
parser: Fix regression when switching input encodings
Revert some changes from commit 98840d40.
WebKit/Chromium can actually switch from ISO-8859-1 to UTF-16 in the
middle of parsing. This is a bad idea, but we have to keep supporting
this use case.
This commit is contained in:
@@ -1177,12 +1177,20 @@ xmlSwitchInputEncoding(xmlParserCtxtPtr ctxt, xmlParserInputPtr input,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (in->encoder != NULL) {
|
if (in->encoder != NULL) {
|
||||||
|
if (in->encoder == handler)
|
||||||
|
return (0);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* TODO: Detect encoding mismatch. We should start by comparing
|
* Switching encodings during parsing is a really bad idea,
|
||||||
* in->encoder->name and handler->name, but there are a few
|
* but WebKit/Chromium switches from ISO-8859-1 to UTF-16 as soon as
|
||||||
* compatible encodings like UTF-16 and UCS-2 or UTF-32 and UCS-4.
|
* it finds Unicode characters with code points larger than 255.
|
||||||
|
*
|
||||||
|
* TODO: We should check whether the "raw" input buffer is empty and
|
||||||
|
* convert the old content using the old encoder.
|
||||||
*/
|
*/
|
||||||
xmlCharEncCloseFunc(handler);
|
|
||||||
|
xmlCharEncCloseFunc(in->encoder);
|
||||||
|
in->encoder = handler;
|
||||||
return (0);
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user