mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-07-19 05:43:19 +03:00
fixed a nasty bug #119387, bad heuristic from the progressive HTML parser
* HTMLparser.c: fixed a nasty bug #119387, bad heuristic from the progressive HTML parser front-end on large character data island leading to an erroneous end of data detection by the parser. Some cleanup too to get closer from the XML progressive parser. Daniel
This commit is contained in:
@ -1,3 +1,11 @@
|
|||||||
|
Sat Aug 9 13:41:21 CEST 2003 Daniel Veillard <daniel@veillard.com>
|
||||||
|
|
||||||
|
* HTMLparser.c: fixed a nasty bug #119387, bad heuristic from
|
||||||
|
the progressive HTML parser front-end on large character data
|
||||||
|
island leading to an erroneous end of data detection by the
|
||||||
|
parser. Some cleanup too to get closer from the XML progressive
|
||||||
|
parser.
|
||||||
|
|
||||||
Sat Aug 9 00:42:47 HKT 2003 William Brack <wbrack@mmm.com.hk>
|
Sat Aug 9 00:42:47 HKT 2003 William Brack <wbrack@mmm.com.hk>
|
||||||
|
|
||||||
* win32/configure.js: Added in support for the ISO8859X
|
* win32/configure.js: Added in support for the ISO8859X
|
||||||
|
33
HTMLparser.c
33
HTMLparser.c
@ -4950,19 +4950,15 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
/* TODO: check generation of subtrees if noent !!! */
|
/* TODO: check generation of subtrees if noent !!! */
|
||||||
htmlParseReference(ctxt);
|
htmlParseReference(ctxt);
|
||||||
} else {
|
} else {
|
||||||
/* TODO Avoid the extra copy, handle directly !!!!!! */
|
|
||||||
/*
|
/*
|
||||||
* Goal of the following test is:
|
* check that the text sequence is complete
|
||||||
* - minimize calls to the SAX 'character' callback
|
* before handing out the data to the parser
|
||||||
* when they are mergeable
|
* to avoid problems with erroneous end of
|
||||||
|
* data detection.
|
||||||
*/
|
*/
|
||||||
if ((ctxt->inputNr == 1) &&
|
|
||||||
(avail < HTML_PARSER_BIG_BUFFER_SIZE)) {
|
|
||||||
if ((!terminate) &&
|
if ((!terminate) &&
|
||||||
(htmlParseLookupSequence(
|
(htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0))
|
||||||
ctxt, '<', 0, 0, 0) < 0))
|
|
||||||
goto done;
|
goto done;
|
||||||
}
|
|
||||||
ctxt->checkIndex = 0;
|
ctxt->checkIndex = 0;
|
||||||
#ifdef DEBUG_PUSH
|
#ifdef DEBUG_PUSH
|
||||||
xmlGenericError(xmlGenericErrorContext,
|
xmlGenericError(xmlGenericErrorContext,
|
||||||
@ -5160,12 +5156,27 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size,
|
|||||||
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
|
xmlGenericError(xmlGenericErrorContext, "HPP: pushed %d\n", size);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if 0
|
||||||
if ((terminate) || (ctxt->input->buf->buffer->use > 80))
|
if ((terminate) || (ctxt->input->buf->buffer->use > 80))
|
||||||
htmlParseTryOrFinish(ctxt, terminate);
|
htmlParseTryOrFinish(ctxt, terminate);
|
||||||
|
#endif
|
||||||
} else if (ctxt->instate != XML_PARSER_EOF) {
|
} else if (ctxt->instate != XML_PARSER_EOF) {
|
||||||
xmlParserInputBufferPush(ctxt->input->buf, 0, "");
|
if ((ctxt->input != NULL) && ctxt->input->buf != NULL) {
|
||||||
htmlParseTryOrFinish(ctxt, terminate);
|
xmlParserInputBufferPtr in = ctxt->input->buf;
|
||||||
|
if ((in->encoder != NULL) && (in->buffer != NULL) &&
|
||||||
|
(in->raw != NULL)) {
|
||||||
|
int nbchars;
|
||||||
|
|
||||||
|
nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
|
||||||
|
if (nbchars < 0) {
|
||||||
|
xmlGenericError(xmlGenericErrorContext,
|
||||||
|
"htmlParseChunk: encoder error\n");
|
||||||
|
return(XML_ERR_INVALID_ENCODING);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
htmlParseTryOrFinish(ctxt, terminate);
|
||||||
if (terminate) {
|
if (terminate) {
|
||||||
if ((ctxt->instate != XML_PARSER_EOF) &&
|
if ((ctxt->instate != XML_PARSER_EOF) &&
|
||||||
(ctxt->instate != XML_PARSER_EPILOG) &&
|
(ctxt->instate != XML_PARSER_EPILOG) &&
|
||||||
|
Reference in New Issue
Block a user