1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-29 11:41:22 +03:00

Fix regression when parsing invalid HTML tags in push mode

Revert part of commit 173a0830 that changed behavior when parsing
malformed start tags with the push parser. This reintroduces quadratic
behavior in recovery mode which will be worked around in the next
commit.

Fixes #312.
This commit is contained in:
Nick Wellnhofer
2022-01-10 14:02:10 +01:00
parent 2732b23466
commit 094fc08a09

View File

@ -5992,32 +5992,12 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
} else if (cur == '<') {
if ((!terminate) && (next == 0))
goto done;
/*
* Only switch to START_TAG if the next character
* starts a valid name. Otherwise, htmlParseStartTag
* might return without consuming all characters
* up to the final '>'.
*/
if ((IS_ASCII_LETTER(next)) ||
(next == '_') || (next == ':') || (next == '.')) {
ctxt->instate = XML_PARSER_START_TAG;
ctxt->checkIndex = 0;
ctxt->instate = XML_PARSER_START_TAG;
ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"HPP: entering START_TAG\n");
xmlGenericError(xmlGenericErrorContext,
"HPP: entering START_TAG\n");
#endif
} else {
htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
"htmlParseTryOrFinish: "
"invalid element name\n",
NULL, NULL);
htmlCheckParagraph(ctxt);
if ((ctxt->sax != NULL) &&
(ctxt->sax->characters != NULL))
ctxt->sax->characters(ctxt->userData,
in->cur, 1);
NEXT;
}
break;
} else {
/*