diff --git a/HTMLparser.c b/HTMLparser.c
index 0c2cdb31..cd80380e 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -4428,43 +4428,25 @@ htmlParseDocument(htmlParserCtxt *ctxt) {
ctxt->sax->startDocument(ctxt->userData);
/*
- * Parse possible comments and PIs before any content
+ * Parse possible comments, PIs or doctype declarations
+ * before any content.
*/
+ ctxt->instate = XML_PARSER_MISC;
while (CUR == '<') {
- if ((NXT(1) == '!') && (NXT(2) == '-') && (NXT(3) == '-')) {
- SKIP(4);
- htmlParseComment(ctxt, /* bogus */ 0);
- } else if (NXT(1) == '?') {
- SKIP(1);
- htmlParseComment(ctxt, /* bogus */ 1);
- } else {
- break;
- }
- SKIP_BLANKS;
- }
-
- /*
- * Then possibly doc type declaration(s) and more Misc
- * (doctypedecl Misc*)?
- */
- if ((CUR == '<') && (NXT(1) == '!') &&
- (UPP(2) == 'D') && (UPP(3) == 'O') &&
- (UPP(4) == 'C') && (UPP(5) == 'T') &&
- (UPP(6) == 'Y') && (UPP(7) == 'P') &&
- (UPP(8) == 'E')) {
- ctxt->instate = XML_PARSER_MISC;
- htmlParseDocTypeDecl(ctxt);
- }
- SKIP_BLANKS;
-
- /*
- * Parse possible comments and PIs before any content
- */
- ctxt->instate = XML_PARSER_PROLOG;
- while (CUR == '<') {
- if ((NXT(1) == '!') && (NXT(2) == '-') && (NXT(3) == '-')) {
- SKIP(4);
- htmlParseComment(ctxt, /* bogus */ 0);
+ if (NXT(1) == '!') {
+ if ((NXT(2) == '-') && (NXT(3) == '-')) {
+ SKIP(4);
+ htmlParseComment(ctxt, /* bogus */ 0);
+ } else if ((UPP(2) == 'D') && (UPP(3) == 'O') &&
+ (UPP(4) == 'C') && (UPP(5) == 'T') &&
+ (UPP(6) == 'Y') && (UPP(7) == 'P') &&
+ (UPP(8) == 'E')) {
+ htmlParseDocTypeDecl(ctxt);
+ ctxt->instate = XML_PARSER_PROLOG;
+ } else {
+ SKIP(2);
+ htmlParseComment(ctxt, /* bogus */ 1);
+ }
} else if (NXT(1) == '?') {
SKIP(1);
htmlParseComment(ctxt, /* bogus */ 1);
@@ -4472,6 +4454,7 @@ htmlParseDocument(htmlParserCtxt *ctxt) {
break;
}
SKIP_BLANKS;
+ GROW;
}
/*