1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-21 14:53:44 +03:00

html: Fix pull-parsing of initial comments and doctype decls

- Parse more bogus comments and multiple doctype declarations before
  switching to content.
- Grow buffer after parsing comment.
This commit is contained in:
Nick Wellnhofer
2025-06-22 13:46:16 +02:00
parent 1c96d5ef51
commit b424bae705

View File

@@ -4428,43 +4428,25 @@ htmlParseDocument(htmlParserCtxt *ctxt) {
ctxt->sax->startDocument(ctxt->userData);
/*
* Parse possible comments and PIs before any content
* Parse possible comments, PIs or doctype declarations
* before any content.
*/
ctxt->instate = XML_PARSER_MISC;
while (CUR == '<') {
if ((NXT(1) == '!') && (NXT(2) == '-') && (NXT(3) == '-')) {
SKIP(4);
htmlParseComment(ctxt, /* bogus */ 0);
} else if (NXT(1) == '?') {
SKIP(1);
htmlParseComment(ctxt, /* bogus */ 1);
} else {
break;
}
SKIP_BLANKS;
}
/*
* Then possibly doc type declaration(s) and more Misc
* (doctypedecl Misc*)?
*/
if ((CUR == '<') && (NXT(1) == '!') &&
(UPP(2) == 'D') && (UPP(3) == 'O') &&
(UPP(4) == 'C') && (UPP(5) == 'T') &&
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
(UPP(8) == 'E')) {
ctxt->instate = XML_PARSER_MISC;
htmlParseDocTypeDecl(ctxt);
}
SKIP_BLANKS;
/*
* Parse possible comments and PIs before any content
*/
ctxt->instate = XML_PARSER_PROLOG;
while (CUR == '<') {
if ((NXT(1) == '!') && (NXT(2) == '-') && (NXT(3) == '-')) {
SKIP(4);
htmlParseComment(ctxt, /* bogus */ 0);
if (NXT(1) == '!') {
if ((NXT(2) == '-') && (NXT(3) == '-')) {
SKIP(4);
htmlParseComment(ctxt, /* bogus */ 0);
} else if ((UPP(2) == 'D') && (UPP(3) == 'O') &&
(UPP(4) == 'C') && (UPP(5) == 'T') &&
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
(UPP(8) == 'E')) {
htmlParseDocTypeDecl(ctxt);
ctxt->instate = XML_PARSER_PROLOG;
} else {
SKIP(2);
htmlParseComment(ctxt, /* bogus */ 1);
}
} else if (NXT(1) == '?') {
SKIP(1);
htmlParseComment(ctxt, /* bogus */ 1);
@@ -4472,6 +4454,7 @@ htmlParseDocument(htmlParserCtxt *ctxt) {
break;
}
SKIP_BLANKS;
GROW;
}
/*