From be874d78311a8eb3af294757f7c5af5337cb458f Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Wed, 11 Sep 2024 19:47:07 +0200 Subject: [PATCH] html: Ignore unexpected DOCTYPE declarations --- HTMLparser.c | 3 +++ SAX2.c | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/HTMLparser.c b/HTMLparser.c index c332f610..574f4abe 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -4343,6 +4343,7 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) { (UPP(4) == 'C') && (UPP(5) == 'T') && (UPP(6) == 'Y') && (UPP(7) == 'P') && (UPP(8) == 'E')) { + ctxt->instate = XML_PARSER_MISC; htmlParseDocTypeDecl(ctxt); } SKIP_BLANKS; @@ -4350,6 +4351,7 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) { /* * Parse possible comments and PIs before any content */ + ctxt->instate = XML_PARSER_PROLOG; while (CUR == '<') { if ((NXT(1) == '!') && (NXT(2) == '-') && (NXT(3) == '-')) { SKIP(4); @@ -4366,6 +4368,7 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) { /* * Time to start parsing the tree itself */ + ctxt->instate = XML_PARSER_CONTENT; htmlParseContent(ctxt); /* diff --git a/SAX2.c b/SAX2.c index 1a9310d5..a0de8f86 100644 --- a/SAX2.c +++ b/SAX2.c @@ -245,10 +245,10 @@ xmlSAX2InternalSubset(void *ctx, const xmlChar *name, if (ctxt->myDoc == NULL) return; + if ((ctxt->html) && (ctxt->instate != XML_PARSER_MISC)) + return; dtd = xmlGetIntSubset(ctxt->myDoc); if (dtd != NULL) { - if (ctxt->html) - return; xmlUnlinkNode((xmlNodePtr) dtd); xmlFreeDtd(dtd); ctxt->myDoc->intSubset = NULL;