mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-08-07 06:43:02 +03:00
Different approach to fix quadratic behavior in HTML push parser
The old approach introduced a regression, see issue #312 and the previous commit. Disable code that tries to recover from invalid start tags. This only affects "recovery" mode. Add a comment outlining a better fix in accordance with the HTML5 spec.
This commit is contained in:
14
HTMLparser.c
14
HTMLparser.c
@@ -3958,13 +3958,25 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
|
|||||||
htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
|
htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
|
||||||
"htmlParseStartTag: invalid element name\n",
|
"htmlParseStartTag: invalid element name\n",
|
||||||
NULL, NULL);
|
NULL, NULL);
|
||||||
|
/*
|
||||||
|
* The recovery code is disabled for now as it can result in
|
||||||
|
* quadratic behavior with the push parser. htmlParseStartTag
|
||||||
|
* must consume all content up to the final '>' in order to avoid
|
||||||
|
* rescanning for this terminator.
|
||||||
|
*
|
||||||
|
* For a proper fix in line with HTML5, htmlParseStartTag and
|
||||||
|
* htmlParseElement should only be called when there's an ASCII
|
||||||
|
* alpha character following the initial '<'. Otherwise, the '<'
|
||||||
|
* should be emitted as text (unless followed by '!', '/' or '?').
|
||||||
|
*/
|
||||||
|
#if 0
|
||||||
/* if recover preserve text on classic misconstructs */
|
/* if recover preserve text on classic misconstructs */
|
||||||
if ((ctxt->recovery) && ((IS_BLANK_CH(CUR)) || (CUR == '<') ||
|
if ((ctxt->recovery) && ((IS_BLANK_CH(CUR)) || (CUR == '<') ||
|
||||||
(CUR == '=') || (CUR == '>') || (((CUR >= '0') && (CUR <= '9'))))) {
|
(CUR == '=') || (CUR == '>') || (((CUR >= '0') && (CUR <= '9'))))) {
|
||||||
htmlParseCharDataInternal(ctxt, '<');
|
htmlParseCharDataInternal(ctxt, '<');
|
||||||
return(-1);
|
return(-1);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Dump the bogus tag like browsers do */
|
/* Dump the bogus tag like browsers do */
|
||||||
while ((CUR != 0) && (CUR != '>') &&
|
while ((CUR != 0) && (CUR != '>') &&
|
||||||
|
Reference in New Issue
Block a user