From 29f5d20e84efba6046faee0f8508f6f7e2894af5 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Mon, 3 Aug 2020 17:36:05 -0400 Subject: [PATCH] htmlParseComment: treat `--!>` as if it closed the comment See guidance provided on incorrectly-closed comments here: https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment --- HTMLparser.c | 26 +++++++++++++++++++------- result/HTML/comments.html | 2 +- result/HTML/comments.html.err | 3 +++ result/HTML/comments.html.sax | 6 +++++- result/HTML/comments2.html | 7 ++++--- result/HTML/comments2.html.err | 7 +++---- result/HTML/comments2.html.sax | 16 ++++++++++++++-- 7 files changed, 49 insertions(+), 18 deletions(-) diff --git a/HTMLparser.c b/HTMLparser.c index 26a1cdc2..41ab4aa5 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -3297,6 +3297,7 @@ htmlParseComment(htmlParserCtxtPtr ctxt) { int q, ql; int r, rl; int cur, l; + int next, nl; xmlParserInputState state; /* @@ -3329,6 +3330,21 @@ htmlParseComment(htmlParserCtxtPtr ctxt) { while ((cur != 0) && ((cur != '>') || (r != '-') || (q != '-'))) { + NEXTL(l); + next = CUR_CHAR(nl); + if (next == 0) { + SHRINK; + GROW; + next = CUR_CHAR(nl); + } + + if ((q == '-') && (r == '-') && (cur == '!') && (next == '>')) { + htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED, + "Comment incorrectly closed by '--!>'", NULL, NULL); + cur = '>'; + break; + } + if (len + 5 >= size) { xmlChar *tmp; @@ -3348,17 +3364,13 @@ htmlParseComment(htmlParserCtxtPtr ctxt) { htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, "Invalid char in comment 0x%X\n", q); } + q = r; ql = rl; r = cur; rl = l; - NEXTL(l); - cur = CUR_CHAR(l); - if (cur == 0) { - SHRINK; - GROW; - cur = CUR_CHAR(l); - } + cur = next; + l = nl; } buf[len] = 0; if (cur == '>') { diff --git a/result/HTML/comments.html b/result/HTML/comments.html index 47805c99..973050ce 100644 --- a/result/HTML/comments.html +++ b/result/HTML/comments.html @@ -8,7 +8,7 @@
- whatwg guidance is that this should be a DOM node + whatwg guidance is that this should be a DOM node
diff --git a/result/HTML/comments.html.err b/result/HTML/comments.html.err index e69de29b..5bb3deef 100644 --- a/result/HTML/comments.html.err +++ b/result/HTML/comments.html.err @@ -0,0 +1,3 @@ +./test/HTML/comments.html:10: HTML parser error : Comment incorrectly closed by '--!>' + whatwg guidance is + ^ diff --git a/result/HTML/comments.html.sax b/result/HTML/comments.html.sax index caf727b0..ee8fcd7b 100644 --- a/result/HTML/comments.html.sax +++ b/result/HTML/comments.html.sax @@ -24,7 +24,11 @@ SAX.characters( SAX.startElement(div) SAX.characters( , 9) -SAX.comment(incorrectly closed comment--!>whatwg guidance is that this should be a DOM node'SAX.comment(incorrectly closed comment) +SAX.startElement(span, id='under-test') +SAX.characters(whatwg guidance is that this s, 49) +SAX.endElement(span) +SAX.comment(correctly closed comment) SAX.characters( , 7) SAX.endElement(div) diff --git a/result/HTML/comments2.html b/result/HTML/comments2.html index dd71d0b5..eb077ac2 100644 --- a/result/HTML/comments2.html +++ b/result/HTML/comments2.html @@ -8,7 +8,8 @@
-
- - + whatwg guidance is that this should be a DOM node + + + diff --git a/result/HTML/comments2.html.err b/result/HTML/comments2.html.err index b16216b7..8d1f5926 100644 --- a/result/HTML/comments2.html.err +++ b/result/HTML/comments2.html.err @@ -1,4 +1,3 @@ -./test/HTML/comments2.html:15: HTML parser error : Comment not terminated - - -^ +./test/HTML/comments2.html:10: HTML parser error : Comment incorrectly closed by '--!>' + whatwg guidance is + ^ diff --git a/result/HTML/comments2.html.sax b/result/HTML/comments2.html.sax index 77ce9d82..d694f04f 100644 --- a/result/HTML/comments2.html.sax +++ b/result/HTML/comments2.html.sax @@ -24,10 +24,22 @@ SAX.characters( SAX.startElement(div) SAX.characters( , 9) -SAX.error: Comment not terminated - +SAX.error: Comment incorrectly closed by '--!>'SAX.comment(incorrectly closed comment) +SAX.startElement(span, id='under-test') +SAX.characters(whatwg guidance is that this s, 49) +SAX.endElement(span) +SAX.characters( + , 7) SAX.endElement(div) +SAX.characters( + , 5) SAX.endElement(div) +SAX.characters( + , 3) SAX.endElement(body) +SAX.characters( +, 1) SAX.endElement(html) +SAX.characters( +, 1) SAX.endDocument()