diff --git a/HTMLparser.c b/HTMLparser.c index 58d8ff92..7b407803 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -1522,6 +1522,9 @@ htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag) const htmlElemDesc *info; int i, priority; + if (ctxt->options & HTML_PARSE_HTML5) + return; + priority = htmlGetEndPriority(newtag); for (i = (ctxt->nameNr - 1); i >= 0; i--) { @@ -1565,6 +1568,9 @@ htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt) { int i; + if (ctxt->options & HTML_PARSE_HTML5) + return; + if (ctxt->nameNr == 0) return; for (i = (ctxt->nameNr - 1); i >= 0; i--) { @@ -1590,6 +1596,9 @@ htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt) static void htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar * newtag) { + if (ctxt->options & HTML_PARSE_HTML5) + return; + if (newtag == NULL) return; @@ -1667,7 +1676,7 @@ static void htmlCheckImplied(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { int i; - if (ctxt->options & HTML_PARSE_NOIMPLIED) + if (ctxt->options & (HTML_PARSE_NOIMPLIED | HTML_PARSE_HTML5)) return; if (!htmlOmittedDefaultValue) return; @@ -1738,6 +1747,9 @@ htmlCheckParagraph(htmlParserCtxtPtr ctxt) { if (ctxt == NULL) return(-1); + if (ctxt->options & HTML_PARSE_HTML5) + return(0); + tag = ctxt->name; if (tag == NULL) { htmlAutoClose(ctxt, BAD_CAST"p"); @@ -3893,6 +3905,11 @@ failed: * SAX: Start of Element ! */ if (!discardtag) { + if (ctxt->options & HTML_PARSE_HTML5) { + if (ctxt->nameNr > 0) + htmlnamePop(ctxt); + } + htmlnamePush(ctxt, name); if ((ctxt->sax != NULL) && (ctxt->sax->startElement != NULL)) { if (nbatts != 0) @@ -3978,6 +3995,12 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt) "End tag : expected '>'\n", NULL, NULL); } + if (ctxt->options & HTML_PARSE_HTML5) { + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, name); + return(0); + } + /* * if we ignored misplaced tags in htmlParseStartTag don't pop them * out now. @@ -4217,8 +4240,10 @@ htmlParseElementInternal(htmlParserCtxtPtr ctxt) { if ((CUR == '/') && (NXT(1) == '>')) { SKIP(2); htmlParserFinishElementParsing(ctxt); - if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) - ctxt->sax->endElement(ctxt->userData, name); + if ((ctxt->options & HTML_PARSE_HTML5) == 0) { + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, name); + } htmlnamePop(ctxt); return(0); } @@ -4245,8 +4270,10 @@ htmlParseElementInternal(htmlParserCtxtPtr ctxt) { */ if ((info != NULL) && (info->empty)) { htmlParserFinishElementParsing(ctxt); - if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) - ctxt->sax->endElement(ctxt->userData, name); + if ((ctxt->options & HTML_PARSE_HTML5) == 0) { + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, name); + } htmlnamePop(ctxt); return(0); } @@ -5208,8 +5235,11 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { if ((CUR == '/') && (NXT(1) == '>')) { SKIP(2); htmlParserFinishElementParsing(ctxt); - if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) - ctxt->sax->endElement(ctxt->userData, name); + if ((ctxt->options & HTML_PARSE_HTML5) == 0) { + if ((ctxt->sax != NULL) && + (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, name); + } htmlnamePop(ctxt); ctxt->instate = XML_PARSER_CONTENT; break; @@ -5243,8 +5273,11 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { */ if ((info != NULL) && (info->empty)) { htmlParserFinishElementParsing(ctxt); - if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) - ctxt->sax->endElement(ctxt->userData, name); + if ((ctxt->options & HTML_PARSE_HTML5) == 0) { + if ((ctxt->sax != NULL) && + (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, name); + } htmlnamePop(ctxt); } @@ -6032,6 +6065,10 @@ htmlCtxtUseOptions(htmlParserCtxtPtr ctxt, int options) ctxt->options |= HTML_PARSE_NOIMPLIED; options -= HTML_PARSE_NOIMPLIED; } + if (options & HTML_PARSE_HTML5) { + ctxt->options |= HTML_PARSE_HTML5; + options -= HTML_PARSE_HTML5; + } ctxt->dictNames = 0; ctxt->linenumbers = 1; return (options); diff --git a/include/libxml/HTMLparser.h b/include/libxml/HTMLparser.h index 08d7d0ae..4e73e5ef 100644 --- a/include/libxml/HTMLparser.h +++ b/include/libxml/HTMLparser.h @@ -206,6 +206,7 @@ XMLPUBFUN void */ typedef enum { HTML_PARSE_RECOVER = 1<<0, /* Relaxed parsing */ + HTML_PARSE_HTML5 = 1<<1, /* HTML5 support */ HTML_PARSE_NODEFDTD = 1<<2, /* do not default a doctype if not found */ HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */ HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */