1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-24 13:33:01 +03:00

Fix quadratic runtime in HTML parser

Commit eeb99329 removed an important optimization avoiding quadratic
runtime when repeatedly scanning the input buffer for terminating
characters in the HTML push parser. The related bug is

    https://bugzilla.gnome.org/show_bug.cgi?id=444994

Make sure that ctxt->checkIndex is always written and store additional
parser state in ctxt->inSubset which is unused in the HTML parser.

Found by OSS-Fuzz.
This commit is contained in:
Nick Wellnhofer
2020-06-28 15:54:23 +02:00
parent f8329fdc23
commit 477c7f6aff
5 changed files with 152 additions and 3 deletions

View File

@@ -5158,8 +5158,12 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
if (base < 0)
return (-1);
if (ctxt->checkIndex > base)
if (ctxt->checkIndex > base) {
base = ctxt->checkIndex;
/* Abuse inSubset member to restore current state. */
incomment = ctxt->inSubset & 1 ? 1 : 0;
invalue = ctxt->inSubset & 2 ? 1 : 0;
}
if (in->buf == NULL) {
buf = in->base;
@@ -5235,8 +5239,13 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
return (base - (in->cur - in->base));
}
}
if ((!incomment) && (!invalue))
ctxt->checkIndex = base;
ctxt->checkIndex = base;
/* Abuse inSubset member to track current state. */
ctxt->inSubset = 0;
if (incomment)
ctxt->inSubset |= 1;
if (invalue)
ctxt->inSubset |= 2;
#ifdef DEBUG_PUSH
if (next == 0)
xmlGenericError(xmlGenericErrorContext,