diff --git a/HTMLparser.c b/HTMLparser.c index 0c2cdb31..cd80380e 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -4428,43 +4428,25 @@ htmlParseDocument(htmlParserCtxt *ctxt) { ctxt->sax->startDocument(ctxt->userData); /* - * Parse possible comments and PIs before any content + * Parse possible comments, PIs or doctype declarations + * before any content. */ + ctxt->instate = XML_PARSER_MISC; while (CUR == '<') { - if ((NXT(1) == '!') && (NXT(2) == '-') && (NXT(3) == '-')) { - SKIP(4); - htmlParseComment(ctxt, /* bogus */ 0); - } else if (NXT(1) == '?') { - SKIP(1); - htmlParseComment(ctxt, /* bogus */ 1); - } else { - break; - } - SKIP_BLANKS; - } - - /* - * Then possibly doc type declaration(s) and more Misc - * (doctypedecl Misc*)? - */ - if ((CUR == '<') && (NXT(1) == '!') && - (UPP(2) == 'D') && (UPP(3) == 'O') && - (UPP(4) == 'C') && (UPP(5) == 'T') && - (UPP(6) == 'Y') && (UPP(7) == 'P') && - (UPP(8) == 'E')) { - ctxt->instate = XML_PARSER_MISC; - htmlParseDocTypeDecl(ctxt); - } - SKIP_BLANKS; - - /* - * Parse possible comments and PIs before any content - */ - ctxt->instate = XML_PARSER_PROLOG; - while (CUR == '<') { - if ((NXT(1) == '!') && (NXT(2) == '-') && (NXT(3) == '-')) { - SKIP(4); - htmlParseComment(ctxt, /* bogus */ 0); + if (NXT(1) == '!') { + if ((NXT(2) == '-') && (NXT(3) == '-')) { + SKIP(4); + htmlParseComment(ctxt, /* bogus */ 0); + } else if ((UPP(2) == 'D') && (UPP(3) == 'O') && + (UPP(4) == 'C') && (UPP(5) == 'T') && + (UPP(6) == 'Y') && (UPP(7) == 'P') && + (UPP(8) == 'E')) { + htmlParseDocTypeDecl(ctxt); + ctxt->instate = XML_PARSER_PROLOG; + } else { + SKIP(2); + htmlParseComment(ctxt, /* bogus */ 1); + } } else if (NXT(1) == '?') { SKIP(1); htmlParseComment(ctxt, /* bogus */ 1); @@ -4472,6 +4454,7 @@ htmlParseDocument(htmlParserCtxt *ctxt) { break; } SKIP_BLANKS; + GROW; } /*