From 094fc08a09a75feb694837b580bad0401d1e6a0a Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Mon, 10 Jan 2022 14:02:10 +0100 Subject: [PATCH] Fix regression when parsing invalid HTML tags in push mode Revert part of commit 173a0830 that changed behavior when parsing malformed start tags with the push parser. This reintroduces quadratic behavior in recovery mode which will be worked around in the next commit. Fixes #312. --- HTMLparser.c | 28 ++++------------------------ 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/HTMLparser.c b/HTMLparser.c index 02d476f9..d9d8d00d 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -5992,32 +5992,12 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { } else if (cur == '<') { if ((!terminate) && (next == 0)) goto done; - /* - * Only switch to START_TAG if the next character - * starts a valid name. Otherwise, htmlParseStartTag - * might return without consuming all characters - * up to the final '>'. - */ - if ((IS_ASCII_LETTER(next)) || - (next == '_') || (next == ':') || (next == '.')) { - ctxt->instate = XML_PARSER_START_TAG; - ctxt->checkIndex = 0; + ctxt->instate = XML_PARSER_START_TAG; + ctxt->checkIndex = 0; #ifdef DEBUG_PUSH - xmlGenericError(xmlGenericErrorContext, - "HPP: entering START_TAG\n"); + xmlGenericError(xmlGenericErrorContext, + "HPP: entering START_TAG\n"); #endif - } else { - htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED, - "htmlParseTryOrFinish: " - "invalid element name\n", - NULL, NULL); - htmlCheckParagraph(ctxt); - if ((ctxt->sax != NULL) && - (ctxt->sax->characters != NULL)) - ctxt->sax->characters(ctxt->userData, - in->cur, 1); - NEXT; - } break; } else { /*