mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-07-29 11:41:22 +03:00
use new htmlParseLookupCommentEnd to find comment ends
Note that the caret in error messages generated during comment parsing may have moved by one byte. See guidance provided on incorrectly-closed comments here: https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment
This commit is contained in:
committed by
Nick Wellnhofer
parent
29f5d20e84
commit
a67b63d183
46
HTMLparser.c
46
HTMLparser.c
@ -5220,6 +5220,39 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
|
|||||||
return (-1);
|
return (-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* htmlParseLookupCommentEnd:
|
||||||
|
* @ctxt: an HTML parser context
|
||||||
|
*
|
||||||
|
* Try to find a comment end tag in the input stream
|
||||||
|
* The search includes "-->" as well as WHATWG-recommended incorrectly-closed tags.
|
||||||
|
* (See https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment)
|
||||||
|
* This function has a side effect of (possibly) incrementing ctxt->checkIndex
|
||||||
|
* to avoid rescanning sequences of bytes, it DOES change the state of the
|
||||||
|
* parser, do not use liberally.
|
||||||
|
* This wraps to htmlParseLookupSequence()
|
||||||
|
*
|
||||||
|
* Returns the index to the current parsing point if the full sequence is available, -1 otherwise.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt)
|
||||||
|
{
|
||||||
|
int mark = 0;
|
||||||
|
int cur = CUR_PTR - BASE_PTR;
|
||||||
|
|
||||||
|
while (mark >= 0) {
|
||||||
|
mark = htmlParseLookupSequence(ctxt, '-', '-', 0, 0);
|
||||||
|
if ((mark < 0) ||
|
||||||
|
(NXT(mark+2) == '>') ||
|
||||||
|
((NXT(mark+2) == '!') && (NXT(mark+3) == '>'))) {
|
||||||
|
return mark;
|
||||||
|
}
|
||||||
|
ctxt->checkIndex = cur + mark + 1;
|
||||||
|
}
|
||||||
|
return mark;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* htmlParseTryOrFinish:
|
* htmlParseTryOrFinish:
|
||||||
* @ctxt: an HTML parser context
|
* @ctxt: an HTML parser context
|
||||||
@ -5405,8 +5438,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
cur = in->cur[0];
|
cur = in->cur[0];
|
||||||
if ((cur == '<') && (next == '!') &&
|
if ((cur == '<') && (next == '!') &&
|
||||||
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
||||||
if ((!terminate) &&
|
if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
|
||||||
(htmlParseLookupSequence(ctxt, '-', '-', '>', 0) < 0))
|
|
||||||
goto done;
|
goto done;
|
||||||
#ifdef DEBUG_PUSH
|
#ifdef DEBUG_PUSH
|
||||||
xmlGenericError(xmlGenericErrorContext,
|
xmlGenericError(xmlGenericErrorContext,
|
||||||
@ -5466,8 +5498,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
next = in->cur[1];
|
next = in->cur[1];
|
||||||
if ((cur == '<') && (next == '!') &&
|
if ((cur == '<') && (next == '!') &&
|
||||||
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
||||||
if ((!terminate) &&
|
if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
|
||||||
(htmlParseLookupSequence(ctxt, '-', '-', '>', 0) < 0))
|
|
||||||
goto done;
|
goto done;
|
||||||
#ifdef DEBUG_PUSH
|
#ifdef DEBUG_PUSH
|
||||||
xmlGenericError(xmlGenericErrorContext,
|
xmlGenericError(xmlGenericErrorContext,
|
||||||
@ -5514,8 +5545,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
next = in->cur[1];
|
next = in->cur[1];
|
||||||
if ((cur == '<') && (next == '!') &&
|
if ((cur == '<') && (next == '!') &&
|
||||||
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
||||||
if ((!terminate) &&
|
if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
|
||||||
(htmlParseLookupSequence(ctxt, '-', '-', '>', 0) < 0))
|
|
||||||
goto done;
|
goto done;
|
||||||
#ifdef DEBUG_PUSH
|
#ifdef DEBUG_PUSH
|
||||||
xmlGenericError(xmlGenericErrorContext,
|
xmlGenericError(xmlGenericErrorContext,
|
||||||
@ -5769,9 +5799,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
|||||||
htmlParseDocTypeDecl(ctxt);
|
htmlParseDocTypeDecl(ctxt);
|
||||||
} else if ((cur == '<') && (next == '!') &&
|
} else if ((cur == '<') && (next == '!') &&
|
||||||
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
||||||
if ((!terminate) &&
|
if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
|
||||||
(htmlParseLookupSequence(
|
|
||||||
ctxt, '-', '-', '>', 0) < 0))
|
|
||||||
goto done;
|
goto done;
|
||||||
#ifdef DEBUG_PUSH
|
#ifdef DEBUG_PUSH
|
||||||
xmlGenericError(xmlGenericErrorContext,
|
xmlGenericError(xmlGenericErrorContext,
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
./test/HTML/758606.html:1: HTML parser error : Invalid char in comment 0xC
|
./test/HTML/758606.html:1: HTML parser error : Invalid char in comment 0xC
|
||||||
<!--<!doctype
|
<!--<!doctype
|
||||||
^
|
^
|
||||||
./test/HTML/758606.html:2: HTML parser error : Comment not terminated
|
./test/HTML/758606.html:2: HTML parser error : Comment not terminated
|
||||||
<!--<!doctyp
|
<!--<!doctyp
|
||||||
|
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
./test/HTML/758606_2.html:1: HTML parser error : Invalid char in comment 0xC
|
./test/HTML/758606_2.html:1: HTML parser error : Invalid char in comment 0xC
|
||||||
<!dOctYPE
|
<!dOctYPE
|
||||||
^
|
^
|
||||||
./test/HTML/758606_2.html:2: HTML parser error : Comment not terminated
|
./test/HTML/758606_2.html:2: HTML parser error : Comment not terminated
|
||||||
<!--<!dOctYP
|
<!--<!dOctYP
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user