1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-21 14:53:44 +03:00

html: Map some encodings according to HTML5

Windows-1252 is a superset of ISO-8859-1 and should be used instead.
Same for ASCII.

Also map UCS-2 and UTF-16 to UTF-16LE.
This commit is contained in:
Nick Wellnhofer
2025-05-12 13:00:20 +02:00
parent 93f671064e
commit f0983199e8
6 changed files with 37 additions and 7 deletions

View File

@@ -2745,7 +2745,8 @@ htmlParseData(htmlParserCtxtPtr ctxt, htmlAsciiMask mask,
guess = htmlFindEncoding(ctxt);
#endif
if (guess == NULL) {
xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
xmlSwitchEncoding(ctxt,
XML_CHAR_ENCODING_WINDOWS_1252);
} else {
xmlSwitchEncodingName(ctxt, (const char *) guess);
xmlFree(guess);
@@ -3288,7 +3289,8 @@ htmlParseCharData(htmlParserCtxtPtr ctxt, int partial) {
guess = htmlFindEncoding(ctxt);
#endif
if (guess == NULL) {
xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_8859_1);
xmlSwitchEncoding(ctxt,
XML_CHAR_ENCODING_WINDOWS_1252);
} else {
xmlSwitchEncodingName(ctxt, (const char *) guess);
xmlFree(guess);