mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-07-29 11:41:22 +03:00
Skip incorrectly opened HTML comments
Commit 4fd69f3e
fixed handling of '<' characters not followed by an
ASCII letter. But a '<!' sequence followed by invalid characters should
be treated as bogus comment and skipped.
Fixes #380.
This commit is contained in:
145
HTMLparser.c
145
HTMLparser.c
@ -2545,6 +2545,21 @@ htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
|
|||||||
|
|
||||||
static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
|
static const xmlChar * htmlParseNameComplex(xmlParserCtxtPtr ctxt);
|
||||||
|
|
||||||
|
static void
|
||||||
|
htmlSkipBogusComment(htmlParserCtxtPtr ctxt) {
|
||||||
|
int c;
|
||||||
|
|
||||||
|
htmlParseErr(ctxt, XML_HTML_INCORRECTLY_OPENED_COMMENT,
|
||||||
|
"Incorrectly opened comment\n", NULL, NULL);
|
||||||
|
|
||||||
|
do {
|
||||||
|
c = CUR;
|
||||||
|
if (c == 0)
|
||||||
|
break;
|
||||||
|
NEXT;
|
||||||
|
} while (c != '>');
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* htmlParseHTMLName:
|
* htmlParseHTMLName:
|
||||||
* @ctxt: an HTML parser context
|
* @ctxt: an HTML parser context
|
||||||
@ -4380,26 +4395,28 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
|
|||||||
htmlParseScript(ctxt);
|
htmlParseScript(ctxt);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
else if ((CUR == '<') && (NXT(1) == '!')) {
|
||||||
* Sometimes DOCTYPE arrives in the middle of the document
|
/*
|
||||||
*/
|
* Sometimes DOCTYPE arrives in the middle of the document
|
||||||
else if ((CUR == '<') && (NXT(1) == '!') &&
|
*/
|
||||||
(UPP(2) == 'D') && (UPP(3) == 'O') &&
|
if ((UPP(2) == 'D') && (UPP(3) == 'O') &&
|
||||||
(UPP(4) == 'C') && (UPP(5) == 'T') &&
|
(UPP(4) == 'C') && (UPP(5) == 'T') &&
|
||||||
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
||||||
(UPP(8) == 'E')) {
|
(UPP(8) == 'E')) {
|
||||||
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
||||||
"Misplaced DOCTYPE declaration\n",
|
"Misplaced DOCTYPE declaration\n",
|
||||||
BAD_CAST "DOCTYPE" , NULL);
|
BAD_CAST "DOCTYPE" , NULL);
|
||||||
htmlParseDocTypeDecl(ctxt);
|
htmlParseDocTypeDecl(ctxt);
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
/*
|
* First case : a comment
|
||||||
* First case : a comment
|
*/
|
||||||
*/
|
else if ((NXT(2) == '-') && (NXT(3) == '-')) {
|
||||||
else if ((CUR == '<') && (NXT(1) == '!') &&
|
htmlParseComment(ctxt);
|
||||||
(NXT(2) == '-') && (NXT(3) == '-')) {
|
}
|
||||||
htmlParseComment(ctxt);
|
else {
|
||||||
|
htmlSkipBogusComment(ctxt);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -4785,26 +4802,28 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
|
|||||||
htmlParseScript(ctxt);
|
htmlParseScript(ctxt);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
else if ((CUR == '<') && (NXT(1) == '!')) {
|
||||||
* Sometimes DOCTYPE arrives in the middle of the document
|
/*
|
||||||
*/
|
* Sometimes DOCTYPE arrives in the middle of the document
|
||||||
else if ((CUR == '<') && (NXT(1) == '!') &&
|
*/
|
||||||
(UPP(2) == 'D') && (UPP(3) == 'O') &&
|
if ((UPP(2) == 'D') && (UPP(3) == 'O') &&
|
||||||
(UPP(4) == 'C') && (UPP(5) == 'T') &&
|
(UPP(4) == 'C') && (UPP(5) == 'T') &&
|
||||||
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
||||||
(UPP(8) == 'E')) {
|
(UPP(8) == 'E')) {
|
||||||
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
||||||
"Misplaced DOCTYPE declaration\n",
|
"Misplaced DOCTYPE declaration\n",
|
||||||
BAD_CAST "DOCTYPE" , NULL);
|
BAD_CAST "DOCTYPE" , NULL);
|
||||||
htmlParseDocTypeDecl(ctxt);
|
htmlParseDocTypeDecl(ctxt);
|
||||||
}
|
}
|
||||||
|
/*
|
||||||
/*
|
* First case : a comment
|
||||||
* First case : a comment
|
*/
|
||||||
*/
|
else if ((NXT(2) == '-') && (NXT(3) == '-')) {
|
||||||
else if ((CUR == '<') && (NXT(1) == '!') &&
|
htmlParseComment(ctxt);
|
||||||
(NXT(2) == '-') && (NXT(3) == '-')) {
|
}
|
||||||
htmlParseComment(ctxt);
|
else {
|
||||||
|
htmlSkipBogusComment(ctxt);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -5949,31 +5968,37 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
#endif
|
#endif
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
} else if ((cur == '<') && (next == '!') &&
|
} else if ((cur == '<') && (next == '!')) {
|
||||||
(UPP(2) == 'D') && (UPP(3) == 'O') &&
|
|
||||||
(UPP(4) == 'C') && (UPP(5) == 'T') &&
|
|
||||||
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
|
||||||
(UPP(8) == 'E')) {
|
|
||||||
/*
|
/*
|
||||||
* Sometimes DOCTYPE arrives in the middle of the document
|
* Sometimes DOCTYPE arrives in the middle of the document
|
||||||
*/
|
*/
|
||||||
if ((!terminate) &&
|
if ((UPP(2) == 'D') && (UPP(3) == 'O') &&
|
||||||
(htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0))
|
(UPP(4) == 'C') && (UPP(5) == 'T') &&
|
||||||
goto done;
|
(UPP(6) == 'Y') && (UPP(7) == 'P') &&
|
||||||
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
(UPP(8) == 'E')) {
|
||||||
"Misplaced DOCTYPE declaration\n",
|
if ((!terminate) &&
|
||||||
BAD_CAST "DOCTYPE" , NULL);
|
(htmlParseLookupSequence(ctxt, '>', 0, 0, 1) < 0))
|
||||||
htmlParseDocTypeDecl(ctxt);
|
goto done;
|
||||||
} else if ((cur == '<') && (next == '!') &&
|
htmlParseErr(ctxt, XML_HTML_STRUCURE_ERROR,
|
||||||
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
"Misplaced DOCTYPE declaration\n",
|
||||||
if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
|
BAD_CAST "DOCTYPE" , NULL);
|
||||||
goto done;
|
htmlParseDocTypeDecl(ctxt);
|
||||||
|
} else if ((in->cur[2] == '-') && (in->cur[3] == '-')) {
|
||||||
|
if ((!terminate) &&
|
||||||
|
(htmlParseLookupCommentEnd(ctxt) < 0))
|
||||||
|
goto done;
|
||||||
#ifdef DEBUG_PUSH
|
#ifdef DEBUG_PUSH
|
||||||
xmlGenericError(xmlGenericErrorContext,
|
xmlGenericError(xmlGenericErrorContext,
|
||||||
"HPP: Parsing Comment\n");
|
"HPP: Parsing Comment\n");
|
||||||
#endif
|
#endif
|
||||||
htmlParseComment(ctxt);
|
htmlParseComment(ctxt);
|
||||||
ctxt->instate = XML_PARSER_CONTENT;
|
ctxt->instate = XML_PARSER_CONTENT;
|
||||||
|
} else {
|
||||||
|
if ((!terminate) &&
|
||||||
|
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
|
||||||
|
goto done;
|
||||||
|
htmlSkipBogusComment(ctxt);
|
||||||
|
}
|
||||||
} else if ((cur == '<') && (next == '?')) {
|
} else if ((cur == '<') && (next == '?')) {
|
||||||
if ((!terminate) &&
|
if ((!terminate) &&
|
||||||
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
|
(htmlParseLookupSequence(ctxt, '>', 0, 0, 0) < 0))
|
||||||
|
@ -260,6 +260,7 @@ typedef enum {
|
|||||||
XML_DTD_DUP_TOKEN, /* 541 */
|
XML_DTD_DUP_TOKEN, /* 541 */
|
||||||
XML_HTML_STRUCURE_ERROR = 800,
|
XML_HTML_STRUCURE_ERROR = 800,
|
||||||
XML_HTML_UNKNOWN_TAG, /* 801 */
|
XML_HTML_UNKNOWN_TAG, /* 801 */
|
||||||
|
XML_HTML_INCORRECTLY_OPENED_COMMENT, /* 802 */
|
||||||
XML_RNGP_ANYNAME_ATTR_ANCESTOR = 1000,
|
XML_RNGP_ANYNAME_ATTR_ANCESTOR = 1000,
|
||||||
XML_RNGP_ATTR_CONFLICT, /* 1001 */
|
XML_RNGP_ATTR_CONFLICT, /* 1001 */
|
||||||
XML_RNGP_ATTRIBUTE_CHILDREN, /* 1002 */
|
XML_RNGP_ATTRIBUTE_CHILDREN, /* 1002 */
|
||||||
|
6
result/HTML/issue380.html
Normal file
6
result/HTML/issue380.html
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
|
||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
...
|
||||||
|
</body>
|
||||||
|
</html>
|
6
result/HTML/issue380.html.err
Normal file
6
result/HTML/issue380.html.err
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
./test/HTML/issue380.html:3: HTML parser error : Incorrectly opened comment
|
||||||
|
<![if !supportLists]>...<![endif]>
|
||||||
|
^
|
||||||
|
./test/HTML/issue380.html:3: HTML parser error : Incorrectly opened comment
|
||||||
|
<![if !supportLists]>...<![endif]>
|
||||||
|
^
|
20
result/HTML/issue380.html.sax
Normal file
20
result/HTML/issue380.html.sax
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
SAX.setDocumentLocator()
|
||||||
|
SAX.startDocument()
|
||||||
|
SAX.startElement(html)
|
||||||
|
SAX.characters(
|
||||||
|
, 3)
|
||||||
|
SAX.startElement(body)
|
||||||
|
SAX.characters(
|
||||||
|
, 5)
|
||||||
|
SAX.error: Incorrectly opened comment
|
||||||
|
SAX.characters(..., 3)
|
||||||
|
SAX.error: Incorrectly opened comment
|
||||||
|
SAX.characters(
|
||||||
|
, 3)
|
||||||
|
SAX.endElement(body)
|
||||||
|
SAX.characters(
|
||||||
|
, 1)
|
||||||
|
SAX.endElement(html)
|
||||||
|
SAX.characters(
|
||||||
|
, 1)
|
||||||
|
SAX.endDocument()
|
5
test/HTML/issue380.html
Normal file
5
test/HTML/issue380.html
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<![if !supportLists]>...<![endif]>
|
||||||
|
</body>
|
||||||
|
</html>
|
Reference in New Issue
Block a user