From 029a04d2650150f918ea88d33ef0f3f84f835632 Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Mon, 24 Aug 2009 12:50:23 +0200 Subject: [PATCH] 541335 HTML avoid creating 2 head or 2 body element * HTMLparser.c: check when we see an head or a body tag and avoid autogenerating them * include/libxml/parser.h: the values for ctxt->html change depending on the head or body tags being seen --- HTMLparser.c | 26 +++++++++++++++++++------- include/libxml/parser.h | 5 ++++- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/HTMLparser.c b/HTMLparser.c index 63388109..f5957c58 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -163,6 +163,10 @@ htmlParseErrInt(xmlParserCtxtPtr ctxt, xmlParserErrors error, static int htmlnamePush(htmlParserCtxtPtr ctxt, const xmlChar * value) { + if ((ctxt->html < 3) && (xmlStrEqual(value, BAD_CAST "head"))) + ctxt->html = 3; + if ((ctxt->html < 10) && (xmlStrEqual(value, BAD_CAST "body"))) + ctxt->html = 10; if (ctxt->nameNr >= ctxt->nameMax) { ctxt->nameMax *= 2; ctxt->nameTab = (const xmlChar * *) @@ -1393,16 +1397,24 @@ htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { (xmlStrEqual(newtag, BAD_CAST"link")) || (xmlStrEqual(newtag, BAD_CAST"title")) || (xmlStrEqual(newtag, BAD_CAST"base")))) { - /* - * dropped OBJECT ... i you put it first BODY will be - * assumed ! - */ - htmlnamePush(ctxt, BAD_CAST"head"); - if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) - ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL); + if (ctxt->html >= 3) { + /* we already saw or generated an before */ + return; + } + /* + * dropped OBJECT ... i you put it first BODY will be + * assumed ! + */ + htmlnamePush(ctxt, BAD_CAST"head"); + if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) + ctxt->sax->startElement(ctxt->userData, BAD_CAST"head", NULL); } else if ((!xmlStrEqual(newtag, BAD_CAST"noframes")) && (!xmlStrEqual(newtag, BAD_CAST"frame")) && (!xmlStrEqual(newtag, BAD_CAST"frameset"))) { + if (ctxt->html >= 10) { + /* we already saw or generated a before */ + return; + } int i; for (i = 0;i < ctxt->nameNr;i++) { if (xmlStrEqual(ctxt->nameTab[i], BAD_CAST"body")) { diff --git a/include/libxml/parser.h b/include/libxml/parser.h index a42e7e83..148ee038 100644 --- a/include/libxml/parser.h +++ b/include/libxml/parser.h @@ -190,7 +190,10 @@ struct _xmlParserCtxt { const xmlChar *version; /* the XML version string */ const xmlChar *encoding; /* the declared encoding, if any */ int standalone; /* standalone document */ - int html; /* an HTML(1)/Docbook(2) document */ + int html; /* an HTML(1)/Docbook(2) document + * 3 is HTML after + * 10 is HTML after + */ /* Input stream stack */ xmlParserInputPtr input; /* Current input stream */