From c7c496434205bb51aa8873e434ca282bbb8d1fe4 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Fri, 9 May 2025 15:26:15 +0200 Subject: [PATCH] html: Move DTD creation to endDocument SAX callback --- HTMLparser.c | 27 --------------------------- SAX2.c | 17 ++++++++++++++++- 2 files changed, 16 insertions(+), 28 deletions(-) diff --git a/HTMLparser.c b/HTMLparser.c index f3e04834..1161b2b3 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -4331,8 +4331,6 @@ htmlCtxtParseContentInternal(htmlParserCtxtPtr ctxt, xmlParserInputPtr input) { */ int htmlParseDocument(htmlParserCtxtPtr ctxt) { - xmlDtdPtr dtd; - if ((ctxt == NULL) || (ctxt->input == NULL)) return(-1); @@ -4429,17 +4427,6 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) { if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) ctxt->sax->endDocument(ctxt->userData); - if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && (ctxt->myDoc != NULL)) { - dtd = xmlGetIntSubset(ctxt->myDoc); - if (dtd == NULL) { - ctxt->myDoc->intSubset = - xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", - BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", - BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); - if (ctxt->myDoc->intSubset == NULL) - htmlErrMemory(ctxt); - } - } if (! ctxt->wellFormed) return(-1); return(0); } @@ -5158,20 +5145,6 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, if ((ctxt->sax) && (ctxt->sax->endDocument != NULL)) ctxt->sax->endDocument(ctxt->userData); - if ((!(ctxt->options & HTML_PARSE_NODEFDTD)) && - (ctxt->myDoc != NULL)) { - xmlDtdPtr dtd; - dtd = xmlGetIntSubset(ctxt->myDoc); - if (dtd == NULL) { - ctxt->myDoc->intSubset = - xmlCreateIntSubset(ctxt->myDoc, BAD_CAST "html", - BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", - BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); - if (ctxt->myDoc->intSubset == NULL) - htmlErrMemory(ctxt); - } - } - ctxt->instate = XML_PARSER_EOF; } diff --git a/SAX2.c b/SAX2.c index 6da5123c..f454bb83 100644 --- a/SAX2.c +++ b/SAX2.c @@ -837,7 +837,10 @@ xmlSAX2EndDocument(void *ctx) #endif /* LIBXML_VALID_ENABLED */ doc = ctxt->myDoc; - if ((doc != NULL) && (doc->encoding == NULL)) { + if (doc == NULL) + return; + + if (doc->encoding == NULL) { const xmlChar *encoding = xmlGetActualEncoding(ctxt); if (encoding != NULL) { @@ -846,6 +849,18 @@ xmlSAX2EndDocument(void *ctx) xmlSAX2ErrMemory(ctxt); } } + +#ifdef LIBXML_HTML_ENABLED + if ((ctxt->html) && + ((ctxt->options & HTML_PARSE_NODEFDTD) == 0) && + (doc->intSubset == NULL)) { + doc->intSubset = xmlCreateIntSubset(doc, BAD_CAST "html", + BAD_CAST "-//W3C//DTD HTML 4.0 Transitional//EN", + BAD_CAST "http://www.w3.org/TR/REC-html40/loose.dtd"); + if (doc->intSubset == NULL) + xmlSAX2ErrMemory(ctxt); + } +#endif /* LIBXML_HTML_ENABLED */ } static void