mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-07-29 11:41:22 +03:00
html: Support encoding auto-detection in push parser
Align with pull parser.
This commit is contained in:
16
HTMLparser.c
16
HTMLparser.c
@ -4935,6 +4935,14 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
*/
|
*/
|
||||||
goto done;
|
goto done;
|
||||||
case XML_PARSER_START:
|
case XML_PARSER_START:
|
||||||
|
/*
|
||||||
|
* Very first chars read from the document flow.
|
||||||
|
*/
|
||||||
|
if ((!terminate) && (avail < 4))
|
||||||
|
goto done;
|
||||||
|
|
||||||
|
xmlDetectEncoding(ctxt);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is wrong but matches long-standing behavior. In most
|
* This is wrong but matches long-standing behavior. In most
|
||||||
* cases, a document starting with an XML declaration will
|
* cases, a document starting with an XML declaration will
|
||||||
@ -4945,6 +4953,9 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_UTF8);
|
xmlSwitchEncoding(ctxt, XML_CHAR_ENCODING_UTF8);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* fall through */
|
||||||
|
|
||||||
|
case XML_PARSER_XML_DECL:
|
||||||
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
|
if ((ctxt->sax) && (ctxt->sax->setDocumentLocator)) {
|
||||||
ctxt->sax->setDocumentLocator(ctxt->userData,
|
ctxt->sax->setDocumentLocator(ctxt->userData,
|
||||||
(xmlSAXLocator *) &xmlDefaultSAXLocator);
|
(xmlSAXLocator *) &xmlDefaultSAXLocator);
|
||||||
@ -4953,8 +4964,9 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
(!ctxt->disableSAX))
|
(!ctxt->disableSAX))
|
||||||
ctxt->sax->startDocument(ctxt->userData);
|
ctxt->sax->startDocument(ctxt->userData);
|
||||||
|
|
||||||
/* Allow callback to modify state */
|
/* Allow callback to modify state for tests */
|
||||||
if (ctxt->instate == XML_PARSER_START)
|
if ((ctxt->instate == XML_PARSER_START) ||
|
||||||
|
(ctxt->instate == XML_PARSER_XML_DECL))
|
||||||
ctxt->instate = XML_PARSER_MISC;
|
ctxt->instate = XML_PARSER_MISC;
|
||||||
break;
|
break;
|
||||||
case XML_PARSER_START_TAG: {
|
case XML_PARSER_START_TAG: {
|
||||||
|
@ -1797,6 +1797,8 @@ htmlTokenizerTest(const char *filename, const char *result,
|
|||||||
config.startTag = BAD_CAST startTag;
|
config.startTag = BAD_CAST startTag;
|
||||||
config.inCharacters = 0;
|
config.inCharacters = 0;
|
||||||
ctxt->_private = &config;
|
ctxt->_private = &config;
|
||||||
|
/* Skip charset auto-detection */
|
||||||
|
ctxt->instate = XML_PARSER_XML_DECL;
|
||||||
htmlCtxtUseOptions(ctxt, options | HTML_PARSE_HTML5);
|
htmlCtxtUseOptions(ctxt, options | HTML_PARSE_HTML5);
|
||||||
htmlParseChunk(ctxt, data, size, 1);
|
htmlParseChunk(ctxt, data, size, 1);
|
||||||
htmlFreeParserCtxt(ctxt);
|
htmlFreeParserCtxt(ctxt);
|
||||||
|
Reference in New Issue
Block a user