mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-10-24 13:33:01 +03:00
Add options to ignore the internal encoding
For both XML and HTML, the document can provide an encoding either in XMLDecl in XML, or as a meta element in HTML head. This adds options to ignore those encodings if the encoding is known in advace for example if the content had been converted before being passed to the parser. * parser.c include/libxml/parser.h: add XML_PARSE_IGNORE_ENC option for XML parsing * include/libxml/HTMLparser.h HTMLparser.c: adds the HTML_PARSE_IGNORE_ENC for HTML parsing * HTMLtree.c: fix the handling of saving when an unknown encoding is defined in meta document header * xmllint.c: add a --noenc option to activate the new parser options
This commit is contained in:
11
HTMLparser.c
11
HTMLparser.c
@@ -3448,7 +3448,8 @@ static void
|
||||
htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
|
||||
const xmlChar *encoding;
|
||||
|
||||
if ((ctxt == NULL) || (attvalue == NULL))
|
||||
if ((ctxt == NULL) || (attvalue == NULL) ||
|
||||
(ctxt->options & HTML_PARSE_IGNORE_ENC))
|
||||
return;
|
||||
|
||||
/* do not change encoding */
|
||||
@@ -3500,7 +3501,9 @@ htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
|
||||
xmlSwitchToEncoding(ctxt, handler);
|
||||
ctxt->charset = XML_CHAR_ENCODING_UTF8;
|
||||
} else {
|
||||
ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
|
||||
htmlParseErr(ctxt, XML_ERR_UNSUPPORTED_ENCODING,
|
||||
"htmlCheckEncoding: unknown encoding %s\n",
|
||||
encoding, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6537,6 +6540,10 @@ htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options)
|
||||
ctxt->options |= HTML_PARSE_NODEFDTD;
|
||||
options -= HTML_PARSE_NODEFDTD;
|
||||
}
|
||||
if (options & HTML_PARSE_IGNORE_ENC) {
|
||||
ctxt->options |= HTML_PARSE_IGNORE_ENC;
|
||||
options -= HTML_PARSE_IGNORE_ENC;
|
||||
}
|
||||
ctxt->dictNames = 0;
|
||||
return (options);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user