1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-29 11:41:22 +03:00

use new htmlParseLookupCommentEnd to find comment ends

Note that the caret in error messages generated during comment parsing
may have moved by one byte.

See guidance provided on incorrectly-closed comments here:

https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment
This commit is contained in:
Mike Dalessio
2020-10-11 14:15:37 -04:00
committed by Nick Wellnhofer
parent 29f5d20e84
commit a67b63d183
3 changed files with 39 additions and 11 deletions

View File

@ -5220,6 +5220,39 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
return (-1);
}
/**
* htmlParseLookupCommentEnd:
* @ctxt: an HTML parser context
*
* Try to find a comment end tag in the input stream
* The search includes "-->" as well as WHATWG-recommended incorrectly-closed tags.
* (See https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment)
* This function has a side effect of (possibly) incrementing ctxt->checkIndex
* to avoid rescanning sequences of bytes, it DOES change the state of the
* parser, do not use liberally.
* This wraps to htmlParseLookupSequence()
*
* Returns the index to the current parsing point if the full sequence is available, -1 otherwise.
*/
static int
htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt)
{
int mark = 0;
int cur = CUR_PTR - BASE_PTR;
while (mark >= 0) {
mark = htmlParseLookupSequence(ctxt, '-', '-', 0, 0);
if ((mark < 0) ||
(NXT(mark+2) == '>') ||
((NXT(mark+2) == '!') && (NXT(mark+3) == '>'))) {
return mark;
}
ctxt->checkIndex = cur + mark + 1;
}
return mark;
}
/**
* htmlParseTryOrFinish:
* @ctxt: an HTML parser context
@ -5405,8 +5438,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
cur = in->cur[0];
if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
if ((!terminate) &&
(htmlParseLookupSequence(ctxt, '-', '-', '>', 0) < 0))
if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@ -5466,8 +5498,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
next = in->cur[1];
if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
if ((!terminate) &&
(htmlParseLookupSequence(ctxt, '-', '-', '>', 0) < 0))
if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@ -5514,8 +5545,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
next = in->cur[1];
if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
if ((!terminate) &&
(htmlParseLookupSequence(ctxt, '-', '-', '>', 0) < 0))
if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,
@ -5769,9 +5799,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
htmlParseDocTypeDecl(ctxt);
} else if ((cur == '<') && (next == '!') &&
(in->cur[2] == '-') && (in->cur[3] == '-')) {
if ((!terminate) &&
(htmlParseLookupSequence(
ctxt, '-', '-', '>', 0) < 0))
if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
goto done;
#ifdef DEBUG_PUSH
xmlGenericError(xmlGenericErrorContext,