1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-07 12:21:17 +03:00

Skip incorrectly opened HTML comments

Commit 4fd69f3e fixed handling of '<' characters not followed by an
ASCII letter. But a '<!' sequence followed by invalid characters should
be treated as bogus comment and skipped.

Fixes #380.
This commit is contained in:
Nick Wellnhofer
2022-07-15 14:02:26 +02:00
parent 6722d22c88
commit e986d09cf5
6 changed files with 123 additions and 60 deletions

View File

@ -2545,6 +2545,21 @@ htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
static void
htmlSkipBogusComment(htmlParserCtxtPtr ctxt) {
int c;
htmlParseErr(ctxt, XML_HTML_INCORRECTLY_OPENED_COMMENT,
"Incorrectly opened comment\n", NULL, NULL);
do {
c = CUR;
if (c == 0)
break;
NEXT;
} while (c != '>');
}
/**
* htmlParseHTMLName:
* @ctxt: an HTML parser context
@ -4380,26 +4395,28 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
htmlParseScript(ctxt);
}
/*
* Sometimes DOCTYPE arrives in the middle of the document
*/
else if ((CUR == '<') && (NXT(1) == '!') &&
(UPP(2) == 'D') && (UPP(3) == 'O') &&
(UPP(4) == 'C') && (UPP(5) == 'T') &&
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
(UPP(8) == 'E')) {
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
"Misplaced DOCTYPE declaration\n",
BAD_CAST "DOCTYPE" , NULL);
htmlParseDocTypeDecl(ctxt);
}
/*
* First case : a comment
*/
else if ((CUR == '<') && (NXT(1) == '!') &&
(NXT(2) == '-') && (NXT(3) == '-')) {
htmlParseComment(ctxt);
else if ((CUR == '<') && (NXT(1) == '!')) {
/*
* Sometimes DOCTYPE arrives in the middle of the document
*/
if ((UPP(2) == 'D') && (UPP(3) == 'O') &&
(UPP(4) == 'C') && (UPP(5) == 'T') &&
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
(UPP(8) == 'E')) {
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
"Misplaced DOCTYPE declaration\n",
BAD_CAST "DOCTYPE" , NULL);
htmlParseDocTypeDecl(ctxt);
}
/*
* First case : a comment
*/
else if ((NXT(2) == '-') && (NXT(3) == '-')) {
htmlParseComment(ctxt);
}
else {
htmlSkipBogusComment(ctxt);
}
}
/*
@ -4785,26 +4802,28 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
htmlParseScript(ctxt);
}
/*
* Sometimes DOCTYPE arrives in the middle of the document
*/
else if ((CUR == '<') && (NXT(1) == '!') &&
(UPP(2) == 'D') && (UPP(3) == 'O') &&
(UPP(4) == 'C') && (UPP(5) == 'T') &&
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
(UPP(8) == 'E')) {
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
"Misplaced DOCTYPE declaration\n",
BAD_CAST "DOCTYPE" , NULL);
htmlParseDocTypeDecl(ctxt);
}
/*
* First case : a comment
*/
else if ((CUR == '<') && (NXT(1) == '!') &&
(NXT(2) == '-') && (NXT(3) == '-')) {
htmlParseComment(ctxt);
else if ((CUR == '<') && (NXT(1) == '!')) {
/*
* Sometimes DOCTYPE arrives in the middle of the document
*/
if ((UPP(2) == 'D') && (UPP(3) == 'O') &&
(UPP(4) == 'C') && (UPP(5) == 'T') &&
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
(UPP(8) == 'E')) {
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
"Misplaced DOCTYPE declaration\n",
BAD_CAST "DOCTYPE" , NULL);
htmlParseDocTypeDecl(ctxt);
}
/*
* First case : a comment
*/
else if ((NXT(2) == '-') && (NXT(3) == '-')) {
htmlParseComment(ctxt);
}
else {
htmlSkipBogusComment(ctxt);
}
}
/*
@ -5949,31 +5968,37 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
#endif
break;
}
} else if ((cur == '<') && (next == '!') &&
(UPP(2) == 'D') && (UPP(3) == 'O') &&
(UPP(4) == 'C') && (UPP(5) == 'T') &&
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
(UPP(8) == 'E')) {
} else if ((cur == '<') && (next == '!')) {
/*
* Sometimes DOCTYPE arrives in the middle of the document
*/
if ((!terminate) &&
(htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0))
goto done;
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
"Misplaced DOCTYPE declaration\n",
BAD_CAST "DOCTYPE" , NULL);
htmlParseDocTypeDecl(ctxt);
} else if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
goto done;
if ((UPP(2) == 'D') && (UPP(3) == 'O') &&
(UPP(4) == 'C') && (UPP(5) == 'T') &&
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
(UPP(8) == 'E')) {
if ((!terminate) &&
(htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0))
goto done;
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
"Misplaced DOCTYPE declaration\n",
BAD_CAST "DOCTYPE" , NULL);
htmlParseDocTypeDecl(ctxt);
} else if ((in->cur[2] == '-') && (in->cur[3] == '-')) {
if ((!terminate) &&
(htmlParseLookupCommentEnd(ctxt) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
"HPP: Parsing Comment\n");
xmlGenericError(xmlGenericErrorContext,
"HPP: Parsing Comment\n");
#endif
htmlParseComment(ctxt);
ctxt->instate = XML_PARSER_CONTENT;
htmlParseComment(ctxt);
ctxt->instate = XML_PARSER_CONTENT;
} else {
if ((!terminate) &&
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
goto done;
htmlSkipBogusComment(ctxt);
}
} else if ((cur == '<') && (next == '?')) {
if ((!terminate) &&
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))