mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-07-29 11:41:22 +03:00
Skip incorrectly opened HTML comments
Commit 4fd69f3e
fixed handling of '<' characters not followed by an
ASCII letter. But a '<!' sequence followed by invalid characters should
be treated as bogus comment and skipped.
Fixes #380.
This commit is contained in:
145
HTMLparser.c
145
HTMLparser.c
@ -2545,6 +2545,21 @@ htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
|
||||
|
||||
static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
|
||||
|
||||
static void
|
||||
htmlSkipBogusComment(htmlParserCtxtPtr ctxt) {
|
||||
int c;
|
||||
|
||||
htmlParseErr(ctxt, XML_HTML_INCORRECTLY_OPENED_COMMENT,
|
||||
"Incorrectly opened comment\n", NULL, NULL);
|
||||
|
||||
do {
|
||||
c = CUR;
|
||||
if (c == 0)
|
||||
break;
|
||||
NEXT;
|
||||
} while (c != '>');
|
||||
}
|
||||
|
||||
/**
|
||||
* htmlParseHTMLName:
|
||||
* @ctxt: an HTML parser context
|
||||
@ -4380,26 +4395,28 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
|
||||
htmlParseScript(ctxt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Sometimes DOCTYPE arrives in the middle of the document
|
||||
*/
|
||||
else if ((CUR == '<') && (NXT(1) == '!') &&
|
||||
(UPP(2) == 'D') && (UPP(3) == 'O') &&
|
||||
(UPP(4) == 'C') && (UPP(5) == 'T') &&
|
||||
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
||||
(UPP(8) == 'E')) {
|
||||
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
||||
"Misplaced DOCTYPE declaration\n",
|
||||
BAD_CAST "DOCTYPE" , NULL);
|
||||
htmlParseDocTypeDecl(ctxt);
|
||||
}
|
||||
|
||||
/*
|
||||
* First case : a comment
|
||||
*/
|
||||
else if ((CUR == '<') && (NXT(1) == '!') &&
|
||||
(NXT(2) == '-') && (NXT(3) == '-')) {
|
||||
htmlParseComment(ctxt);
|
||||
else if ((CUR == '<') && (NXT(1) == '!')) {
|
||||
/*
|
||||
* Sometimes DOCTYPE arrives in the middle of the document
|
||||
*/
|
||||
if ((UPP(2) == 'D') && (UPP(3) == 'O') &&
|
||||
(UPP(4) == 'C') && (UPP(5) == 'T') &&
|
||||
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
||||
(UPP(8) == 'E')) {
|
||||
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
||||
"Misplaced DOCTYPE declaration\n",
|
||||
BAD_CAST "DOCTYPE" , NULL);
|
||||
htmlParseDocTypeDecl(ctxt);
|
||||
}
|
||||
/*
|
||||
* First case : a comment
|
||||
*/
|
||||
else if ((NXT(2) == '-') && (NXT(3) == '-')) {
|
||||
htmlParseComment(ctxt);
|
||||
}
|
||||
else {
|
||||
htmlSkipBogusComment(ctxt);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4785,26 +4802,28 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
|
||||
htmlParseScript(ctxt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Sometimes DOCTYPE arrives in the middle of the document
|
||||
*/
|
||||
else if ((CUR == '<') && (NXT(1) == '!') &&
|
||||
(UPP(2) == 'D') && (UPP(3) == 'O') &&
|
||||
(UPP(4) == 'C') && (UPP(5) == 'T') &&
|
||||
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
||||
(UPP(8) == 'E')) {
|
||||
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
||||
"Misplaced DOCTYPE declaration\n",
|
||||
BAD_CAST "DOCTYPE" , NULL);
|
||||
htmlParseDocTypeDecl(ctxt);
|
||||
}
|
||||
|
||||
/*
|
||||
* First case : a comment
|
||||
*/
|
||||
else if ((CUR == '<') && (NXT(1) == '!') &&
|
||||
(NXT(2) == '-') && (NXT(3) == '-')) {
|
||||
htmlParseComment(ctxt);
|
||||
else if ((CUR == '<') && (NXT(1) == '!')) {
|
||||
/*
|
||||
* Sometimes DOCTYPE arrives in the middle of the document
|
||||
*/
|
||||
if ((UPP(2) == 'D') && (UPP(3) == 'O') &&
|
||||
(UPP(4) == 'C') && (UPP(5) == 'T') &&
|
||||
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
||||
(UPP(8) == 'E')) {
|
||||
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
||||
"Misplaced DOCTYPE declaration\n",
|
||||
BAD_CAST "DOCTYPE" , NULL);
|
||||
htmlParseDocTypeDecl(ctxt);
|
||||
}
|
||||
/*
|
||||
* First case : a comment
|
||||
*/
|
||||
else if ((NXT(2) == '-') && (NXT(3) == '-')) {
|
||||
htmlParseComment(ctxt);
|
||||
}
|
||||
else {
|
||||
htmlSkipBogusComment(ctxt);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@ -5949,31 +5968,37 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
} else if ((cur == '<') && (next == '!') &&
|
||||
(UPP(2) == 'D') && (UPP(3) == 'O') &&
|
||||
(UPP(4) == 'C') && (UPP(5) == 'T') &&
|
||||
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
||||
(UPP(8) == 'E')) {
|
||||
} else if ((cur == '<') && (next == '!')) {
|
||||
/*
|
||||
* Sometimes DOCTYPE arrives in the middle of the document
|
||||
*/
|
||||
if ((!terminate) &&
|
||||
(htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0))
|
||||
goto done;
|
||||
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
||||
"Misplaced DOCTYPE declaration\n",
|
||||
BAD_CAST "DOCTYPE" , NULL);
|
||||
htmlParseDocTypeDecl(ctxt);
|
||||
} else if ((cur == '<') && (next == '!') &&
|
||||
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
||||
if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
|
||||
goto done;
|
||||
if ((UPP(2) == 'D') && (UPP(3) == 'O') &&
|
||||
(UPP(4) == 'C') && (UPP(5) == 'T') &&
|
||||
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
||||
(UPP(8) == 'E')) {
|
||||
if ((!terminate) &&
|
||||
(htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0))
|
||||
goto done;
|
||||
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
||||
"Misplaced DOCTYPE declaration\n",
|
||||
BAD_CAST "DOCTYPE" , NULL);
|
||||
htmlParseDocTypeDecl(ctxt);
|
||||
} else if ((in->cur[2] == '-') && (in->cur[3] == '-')) {
|
||||
if ((!terminate) &&
|
||||
(htmlParseLookupCommentEnd(ctxt) < 0))
|
||||
goto done;
|
||||
#ifdef DEBUG_PUSH
|
||||
xmlGenericError(xmlGenericErrorContext,
|
||||
"HPP: Parsing Comment\n");
|
||||
xmlGenericError(xmlGenericErrorContext,
|
||||
"HPP: Parsing Comment\n");
|
||||
#endif
|
||||
htmlParseComment(ctxt);
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
htmlParseComment(ctxt);
|
||||
ctxt->instate = XML_PARSER_CONTENT;
|
||||
} else {
|
||||
if ((!terminate) &&
|
||||
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
|
||||
goto done;
|
||||
htmlSkipBogusComment(ctxt);
|
||||
}
|
||||
} else if ((cur == '<') && (next == '?')) {
|
||||
if ((!terminate) &&
|
||||
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
|
||||
|
@ -260,6 +260,7 @@ typedef enum {
|
||||
XML_DTD_DUP_TOKEN, /* 541 */
|
||||
XML_HTML_STRUCURE_ERROR = 800,
|
||||
XML_HTML_UNKNOWN_TAG, /* 801 */
|
||||
XML_HTML_INCORRECTLY_OPENED_COMMENT, /* 802 */
|
||||
XML_RNGP_ANYNAME_ATTR_ANCESTOR = 1000,
|
||||
XML_RNGP_ATTR_CONFLICT, /* 1001 */
|
||||
XML_RNGP_ATTRIBUTE_CHILDREN, /* 1002 */
|
||||
|
6
result/HTML/issue380.html
Normal file
6
result/HTML/issue380.html
Normal file
@ -0,0 +1,6 @@
|
||||
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||
<html>
|
||||
<body>
|
||||
...
|
||||
</body>
|
||||
</html>
|
6
result/HTML/issue380.html.err
Normal file
6
result/HTML/issue380.html.err
Normal file
@ -0,0 +1,6 @@
|
||||
./test/HTML/issue380.html:3: HTML parser error : Incorrectly opened comment
|
||||
<![if !supportLists]>...<![endif]>
|
||||
^
|
||||
./test/HTML/issue380.html:3: HTML parser error : Incorrectly opened comment
|
||||
<![if !supportLists]>...<![endif]>
|
||||
^
|
20
result/HTML/issue380.html.sax
Normal file
20
result/HTML/issue380.html.sax
Normal file
@ -0,0 +1,20 @@
|
||||
SAX.setDocumentLocator()
|
||||
SAX.startDocument()
|
||||
SAX.startElement(html)
|
||||
SAX.characters(
|
||||
, 3)
|
||||
SAX.startElement(body)
|
||||
SAX.characters(
|
||||
, 5)
|
||||
SAX.error: Incorrectly opened comment
|
||||
SAX.characters(..., 3)
|
||||
SAX.error: Incorrectly opened comment
|
||||
SAX.characters(
|
||||
, 3)
|
||||
SAX.endElement(body)
|
||||
SAX.characters(
|
||||
, 1)
|
||||
SAX.endElement(html)
|
||||
SAX.characters(
|
||||
, 1)
|
||||
SAX.endDocument()
|
5
test/HTML/issue380.html
Normal file
5
test/HTML/issue380.html
Normal file
@ -0,0 +1,5 @@
|
||||
<html>
|
||||
<body>
|
||||
<![if !supportLists]>...<![endif]>
|
||||
</body>
|
||||
</html>
|
Reference in New Issue
Block a user