From 93ce33c2b85d457fd03e230d0b0b985216ee63b4 Mon Sep 17 00:00:00 2001
From: Nick Wellnhofer <wellnhofer@aevum.de>
Date: Thu, 23 Jul 2020 17:34:08 +0200
Subject: [PATCH] Fix several quadratic runtime issues in HTML push parser

Fix a few remaining cases where the HTML push parser would scan more
content during lookahead than being parsed later.

Make sure that htmlParseDocTypeDecl consumes all content up to the
final '>' in case of errors. The old comment said "We shouldn't try to
resynchronize", but ignoring invalid content is also what the HTML5
spec mandates.

Likewise, make htmlParseEndTag skip to the final '>' in invalid end
tags even if not in recovery mode. This is probably the most visible
change in practice and leads to different output for some tests but is
also more in line with HTML5.

Make sure that htmlParsePI and htmlParseComment don't abort if invalid
characters are encountered but log an error and ignore the character.

Change some other end-of-buffer checks to test for a zero byte instead
of relying on IS_CHAR.

Fix usage of IS_CHAR macro in htmlParseScript.
---
 HTMLparser.c                  | 192 ++++++++++++++++------------------
 fuzz/html.dict                |   4 +
 result/HTML/758606.html       |   2 +-
 result/HTML/758606.html.err   |  17 +--
 result/HTML/758606.html.sax   |   8 +-
 result/HTML/758606_2.html     |   4 +-
 result/HTML/758606_2.html.err |  15 +--
 result/HTML/758606_2.html.sax |  15 +--
 result/HTML/doc3.htm          |   1 -
 result/HTML/doc3.htm.err      |   6 +-
 result/HTML/doc3.htm.sax      |   3 +-
 result/HTML/wired.html        |   4 +-
 result/HTML/wired.html.err    |   3 +
 result/HTML/wired.html.sax    |   3 +-
 14 files changed, 122 insertions(+), 155 deletions(-)

diff --git a/HTMLparser.c b/HTMLparser.c
index 05170691..b9812985 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -2802,47 +2802,39 @@ htmlParseAttValue(htmlParserCtxtPtr ctxt) {
 static xmlChar *
 htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
     size_t len = 0, startPosition = 0;
+    int err = 0;
+    int quote;
     xmlChar *ret = NULL;
 
-    if (CUR == '"') {
-        NEXT;
-
-        if (CUR_PTR < BASE_PTR)
-            return(ret);
-        startPosition = CUR_PTR - BASE_PTR;
-
-	while ((IS_CHAR_CH(CUR)) && (CUR != '"')) {
-	    NEXT;
-	    len++;
-	}
-	if (!IS_CHAR_CH(CUR)) {
-	    htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
-			 "Unfinished SystemLiteral\n", NULL, NULL);
-	} else {
-	    ret = xmlStrndup((BASE_PTR+startPosition), len);
-	    NEXT;
-        }
-    } else if (CUR == '\'') {
-        NEXT;
-
-        if (CUR_PTR < BASE_PTR)
-            return(ret);
-        startPosition = CUR_PTR - BASE_PTR;
-
-	while ((IS_CHAR_CH(CUR)) && (CUR != '\'')) {
-	    NEXT;
-	    len++;
-	}
-	if (!IS_CHAR_CH(CUR)) {
-	    htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
-			 "Unfinished SystemLiteral\n", NULL, NULL);
-	} else {
-	    ret = xmlStrndup((BASE_PTR+startPosition), len);
-	    NEXT;
-        }
-    } else {
+    if ((CUR != '"') && (CUR != '\'')) {
 	htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
-	             " or ' expected\n", NULL, NULL);
+	             "SystemLiteral \" or ' expected\n", NULL, NULL);
+        return(NULL);
+    }
+    quote = CUR;
+    NEXT;
+
+    if (CUR_PTR < BASE_PTR)
+        return(ret);
+    startPosition = CUR_PTR - BASE_PTR;
+
+    while ((CUR != 0) && (CUR != quote)) {
+        /* TODO: Handle UTF-8 */
+        if (!IS_CHAR_CH(CUR)) {
+            htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
+                            "Invalid char in SystemLiteral 0x%X\n", CUR);
+            err = 1;
+        }
+        NEXT;
+        len++;
+    }
+    if (CUR != quote) {
+        htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
+                     "Unfinished SystemLiteral\n", NULL, NULL);
+    } else {
+        NEXT;
+        if (err == 0)
+            ret = xmlStrndup((BASE_PTR+startPosition), len);
     }
 
     return(ret);
@@ -2862,51 +2854,42 @@ htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
 static xmlChar *
 htmlParsePubidLiteral(htmlParserCtxtPtr ctxt) {
     size_t len = 0, startPosition = 0;
+    int err = 0;
+    int quote;
     xmlChar *ret = NULL;
+
+    if ((CUR != '"') && (CUR != '\'')) {
+	htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
+	             "PubidLiteral \" or ' expected\n", NULL, NULL);
+        return(NULL);
+    }
+    quote = CUR;
+    NEXT;
+
     /*
      * Name ::= (Letter | '_') (NameChar)*
      */
-    if (CUR == '"') {
-        NEXT;
+    if (CUR_PTR < BASE_PTR)
+        return(ret);
+    startPosition = CUR_PTR - BASE_PTR;
 
-        if (CUR_PTR < BASE_PTR)
-            return(ret);
-        startPosition = CUR_PTR - BASE_PTR;
-
-        while (IS_PUBIDCHAR_CH(CUR)) {
-            len++;
-            NEXT;
+    while ((CUR != 0) && (CUR != quote)) {
+        if (!IS_PUBIDCHAR_CH(CUR)) {
+            htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
+                            "Invalid char in PubidLiteral 0x%X\n", CUR);
+            err = 1;
         }
-
-	if (CUR != '"') {
-	    htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
-	                 "Unfinished PubidLiteral\n", NULL, NULL);
-	} else {
-	    ret = xmlStrndup((BASE_PTR + startPosition), len);
-	    NEXT;
-	}
-    } else if (CUR == '\'') {
+        len++;
         NEXT;
+    }
 
-        if (CUR_PTR < BASE_PTR)
-            return(ret);
-        startPosition = CUR_PTR - BASE_PTR;
-
-        while ((IS_PUBIDCHAR_CH(CUR)) && (CUR != '\'')){
-            len++;
-            NEXT;
-        }
-
-	if (CUR != '\'') {
-	    htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
-	                 "Unfinished PubidLiteral\n", NULL, NULL);
-	} else {
-	    ret = xmlStrndup((BASE_PTR + startPosition), len);
-	    NEXT;
-	}
+    if (CUR != '"') {
+        htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_FINISHED,
+                     "Unfinished PubidLiteral\n", NULL, NULL);
     } else {
-	htmlParseErr(ctxt, XML_ERR_LITERAL_NOT_STARTED,
-	             "PubidLiteral \" or ' expected\n", NULL, NULL);
+        NEXT;
+        if (err == 0)
+            ret = xmlStrndup((BASE_PTR + startPosition), len);
     }
 
     return(ret);
@@ -2972,7 +2955,7 @@ htmlParseScript(htmlParserCtxtPtr ctxt) {
                 }
             }
 	}
-        if (IS_CHAR_CH(cur)) {
+        if (IS_CHAR(cur)) {
 	    COPY_BUF(l,buf,nbchar,cur);
         } else {
             htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
@@ -3242,7 +3225,7 @@ htmlParsePI(htmlParserCtxtPtr ctxt) {
 	    }
             SKIP_BLANKS;
 	    cur = CUR_CHAR(l);
-	    while (IS_CHAR(cur) && (cur != '>')) {
+	    while ((cur != 0) && (cur != '>')) {
 		if (len + 5 >= size) {
 		    xmlChar *tmp;
 
@@ -3261,7 +3244,13 @@ htmlParsePI(htmlParserCtxtPtr ctxt) {
 		    GROW;
 		    count = 0;
 		}
-		COPY_BUF(l,buf,len,cur);
+                if (IS_CHAR(cur)) {
+		    COPY_BUF(l,buf,len,cur);
+                } else {
+                    htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
+                                    "Invalid char in processing instruction "
+                                    "0x%X\n", cur);
+                }
 		NEXTL(l);
 		cur = CUR_CHAR(l);
 		if (cur == 0) {
@@ -3331,15 +3320,15 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
     len = 0;
     buf[len] = 0;
     q = CUR_CHAR(ql);
-    if (!IS_CHAR(q))
+    if (q == 0)
         goto unfinished;
     NEXTL(ql);
     r = CUR_CHAR(rl);
-    if (!IS_CHAR(r))
+    if (r == 0)
         goto unfinished;
     NEXTL(rl);
     cur = CUR_CHAR(l);
-    while (IS_CHAR(cur) &&
+    while ((cur != 0) &&
            ((cur != '>') ||
 	    (r != '-') || (q != '-'))) {
 	if (len + 5 >= size) {
@@ -3355,7 +3344,12 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
 	    }
 	    buf = tmp;
 	}
-	COPY_BUF(ql,buf,len,q);
+        if (IS_CHAR(q)) {
+	    COPY_BUF(ql,buf,len,q);
+        } else {
+            htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR,
+                            "Invalid char in comment 0x%X\n", q);
+        }
 	q = r;
 	ql = rl;
 	r = cur;
@@ -3369,7 +3363,7 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
 	}
     }
     buf[len] = 0;
-    if (IS_CHAR(cur)) {
+    if (cur == '>') {
         NEXT;
 	if ((ctxt->sax != NULL) && (ctxt->sax->comment != NULL) &&
 	    (!ctxt->disableSAX))
@@ -3516,9 +3510,12 @@ htmlParseDocTypeDecl(htmlParserCtxtPtr ctxt) {
     if (CUR != '>') {
 	htmlParseErr(ctxt, XML_ERR_DOCTYPE_NOT_FINISHED,
 	             "DOCTYPE improperly terminated\n", NULL, NULL);
-        /* We shouldn't try to resynchronize ... */
+        /* Ignore bogus content */
+        while ((CUR != 0) && (CUR != '>'))
+            NEXT;
     }
-    NEXT;
+    if (CUR == '>')
+        NEXT;
 
     /*
      * Create or update the document accordingly to the DOCTYPE
@@ -3996,19 +3993,14 @@ htmlParseEndTag(htmlParserCtxtPtr ctxt)
      * We should definitely be at the ending "S? '>'" part
      */
     SKIP_BLANKS;
-    if ((!IS_CHAR_CH(CUR)) || (CUR != '>')) {
+    if (CUR != '>') {
         htmlParseErr(ctxt, XML_ERR_GT_REQUIRED,
 	             "End tag : expected '>'\n", NULL, NULL);
-	if (ctxt->recovery) {
-	    /*
-	     * We're not at the ending > !!
-	     * Error, unless in recover mode where we search forwards
-	     * until we find a >
-	     */
-	    while (CUR != '\0' && CUR != '>') NEXT;
-	    NEXT;
-	}
-    } else
+        /* Skip to next '>' */
+        while ((CUR != 0) && (CUR != '>'))
+            NEXT;
+    }
+    if (CUR == '>')
         NEXT;
 
     /*
@@ -4198,7 +4190,7 @@ htmlParseContent(htmlParserCtxtPtr ctxt) {
 			 "htmlParseStartTag: invalid element name\n",
 			 NULL, NULL);
 	        /* Dump the bogus tag like browsers do */
-        while ((IS_CHAR_CH(CUR)) && (CUR != '>'))
+                while ((CUR != 0) && (CUR != '>'))
 	            NEXT;
 
 	        if (currentNode != NULL)
@@ -4413,7 +4405,7 @@ htmlParseElement(htmlParserCtxtPtr ctxt) {
      */
     currentNode = xmlStrdup(ctxt->name);
     depth = ctxt->nameNr;
-    while (IS_CHAR_CH(CUR)) {
+    while (CUR != 0) {
 	oldptr = ctxt->input->cur;
 	htmlParseContent(ctxt);
 	if (oldptr==ctxt->input->cur) break;
@@ -4430,7 +4422,7 @@ htmlParseElement(htmlParserCtxtPtr ctxt) {
        node_info.node = ctxt->node;
        xmlParserAddNodeInfo(ctxt, &node_info);
     }
-    if (!IS_CHAR_CH(CUR)) {
+    if (CUR == 0) {
 	htmlAutoCloseOnEnd(ctxt);
     }
 
@@ -4451,7 +4443,7 @@ htmlParserFinishElementParsing(htmlParserCtxtPtr ctxt) {
        xmlParserAddNodeInfo(ctxt, ctxt->nodeInfo);
        htmlNodeInfoPop(ctxt);
     }
-    if (!IS_CHAR_CH(CUR)) {
+    if (CUR == 0) {
        htmlAutoCloseOnEnd(ctxt);
     }
 }
@@ -4600,7 +4592,7 @@ htmlParseContentInternal(htmlParserCtxtPtr ctxt) {
 			 "htmlParseStartTag: invalid element name\n",
 			 NULL, NULL);
 	        /* Dump the bogus tag like browsers do */
-	        while ((IS_CHAR_CH(CUR)) && (CUR != '>'))
+	        while ((CUR == 0) && (CUR != '>'))
 	            NEXT;
 
 	        htmlParserFinishElementParsing(ctxt);
diff --git a/fuzz/html.dict b/fuzz/html.dict
index 9f58ed1e..801b7bb5 100644
--- a/fuzz/html.dict
+++ b/fuzz/html.dict
@@ -96,6 +96,10 @@ attr_style=" style=\"\""
 
 comment="<!-- -->"
 
+doctype="<!DOCTYPE d>"
+doctype_system="<!DOCTYPE s SYSTEM \"u\">"
+doctype_public="<!DOCTYPE p PUBLIC \"i\" \"u\">"
+
 pi="<?a?>"
 
 ref_lt="&lt;"
diff --git a/result/HTML/758606.html b/result/HTML/758606.html
index 4f21f628..3974ca90 100644
--- a/result/HTML/758606.html
+++ b/result/HTML/758606.html
@@ -1,2 +1,2 @@
-<!DOCTYPE >
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
 
diff --git a/result/HTML/758606.html.err b/result/HTML/758606.html.err
index 060433a8..e3e61265 100644
--- a/result/HTML/758606.html.err
+++ b/result/HTML/758606.html.err
@@ -1,16 +1,7 @@
-./test/HTML/758606.html:1: HTML parser error : Comment not terminated 
-<!--
+./test/HTML/758606.html:1: HTML parser error : Invalid char in comment 0xC
 <!--<!doctype
-    ^
-./test/HTML/758606.html:1: HTML parser error : Invalid char in CDATA 0xC
-<!--<!doctype
-    ^
-./test/HTML/758606.html:1: HTML parser error : Misplaced DOCTYPE declaration
-<!--<!doctype
-     ^
-./test/HTML/758606.html:2: HTML parser error : htmlParseDocTypeDecl : no DOCTYPE name !
-
-^
-./test/HTML/758606.html:2: HTML parser error : DOCTYPE improperly terminated
+      ^
+./test/HTML/758606.html:2: HTML parser error : Comment not terminated 
+<!--<!doctyp
 
 ^
diff --git a/result/HTML/758606.html.sax b/result/HTML/758606.html.sax
index d44a5cf4..a21a9f82 100644
--- a/result/HTML/758606.html.sax
+++ b/result/HTML/758606.html.sax
@@ -1,10 +1,6 @@
 SAX.setDocumentLocator()
 SAX.startDocument()
+SAX.error: Invalid char in comment 0xC
 SAX.error: Comment not terminated 
-<!--
-SAX.error: Invalid char in CDATA 0xC
-SAX.error: Misplaced DOCTYPE declaration
-SAX.error: htmlParseDocTypeDecl : no DOCTYPE name !
-SAX.error: DOCTYPE improperly terminated
-SAX.internalSubset((null), , )
+<!--<!doctyp
 SAX.endDocument()
diff --git a/result/HTML/758606_2.html b/result/HTML/758606_2.html
index 273816a0..3974ca90 100644
--- a/result/HTML/758606_2.html
+++ b/result/HTML/758606_2.html
@@ -1,2 +1,2 @@
-<!DOCTYPE >
-<html><body><p>&#145;</p></body></html>
+<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
+
diff --git a/result/HTML/758606_2.html.err b/result/HTML/758606_2.html.err
index 4be039f4..e9bf4060 100644
--- a/result/HTML/758606_2.html.err
+++ b/result/HTML/758606_2.html.err
@@ -1,16 +1,7 @@
-./test/HTML/758606_2.html:1: HTML parser error : Comment not terminated 
-<!--
-<!--‘<!dOctYPE
-    ^
-./test/HTML/758606_2.html:1: HTML parser error : Invalid char in CDATA 0xC
-<!--‘<!dOctYPE
-    ^
-./test/HTML/758606_2.html:1: HTML parser error : Misplaced DOCTYPE declaration
+./test/HTML/758606_2.html:1: HTML parser error : Invalid char in comment 0xC
 Ā‘<!dOctYPE
   ^
-./test/HTML/758606_2.html:2: HTML parser error : htmlParseDocTypeDecl : no DOCTYPE name !
-
-^
-./test/HTML/758606_2.html:2: HTML parser error : DOCTYPE improperly terminated
+./test/HTML/758606_2.html:2: HTML parser error : Comment not terminated 
+<!--Ā‘<!dOctYP
 
 ^
diff --git a/result/HTML/758606_2.html.sax b/result/HTML/758606_2.html.sax
index 80ff3d77..8a46d9ca 100644
--- a/result/HTML/758606_2.html.sax
+++ b/result/HTML/758606_2.html.sax
@@ -1,17 +1,6 @@
 SAX.setDocumentLocator()
 SAX.startDocument()
+SAX.error: Invalid char in comment 0xC
 SAX.error: Comment not terminated 
-<!--
-SAX.error: Invalid char in CDATA 0xC
-SAX.startElement(html)
-SAX.startElement(body)
-SAX.startElement(p)
-SAX.characters(&#145;, 2)
-SAX.error: Misplaced DOCTYPE declaration
-SAX.error: htmlParseDocTypeDecl : no DOCTYPE name !
-SAX.error: DOCTYPE improperly terminated
-SAX.internalSubset((null), , )
-SAX.endElement(p)
-SAX.endElement(body)
-SAX.endElement(html)
+<!--Ā‘<!dOctYP
 SAX.endDocument()
diff --git a/result/HTML/doc3.htm b/result/HTML/doc3.htm
index e9d5e448..2cc68b9b 100644
--- a/result/HTML/doc3.htm
+++ b/result/HTML/doc3.htm
@@ -521,7 +521,6 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,
 							document.write("ype=js&size=100x90&url=http://www.goto.com/");
 							document.write("d/search/ssn/&target=_blank&Partner=SSN8042");
 							document.write("DF8478957377>");
-							document.write("RIPT>");
 							} else {
 							document.write("<A TARGET=_blank ");
 							document.write("HREF=http://www.goto.com/d/search/ssn/?from");
diff --git a/result/HTML/doc3.htm.err b/result/HTML/doc3.htm.err
index cdf715a6..5e2491da 100644
--- a/result/HTML/doc3.htm.err
+++ b/result/HTML/doc3.htm.err
@@ -46,9 +46,9 @@ om/ad_static.asp?pid=2097&sid=1881&asid=7708"></a></IFRAME></CENTER></LI></FONT>
 ./test/HTML/doc3.htm:803: HTML parser error : End tag : expected '>'
 							document.write("DF8478957377></SC");
 							                                 ^
-./test/HTML/doc3.htm:803: HTML parser error : Unexpected end tag : sc
-							document.write("DF8478957377></SC");
-							                                 ^
+./test/HTML/doc3.htm:804: HTML parser error : Unexpected end tag : sc
+							document.write("RIPT>");
+							                     ^
 ./test/HTML/doc3.htm:811: HTML parser error : Unexpected end tag : a
 							document.write("ype=gif&size=100x90></A>");
 							                                        ^
diff --git a/result/HTML/doc3.htm.sax b/result/HTML/doc3.htm.sax
index fb33cf22..9237ad3c 100644
--- a/result/HTML/doc3.htm.sax
+++ b/result/HTML/doc3.htm.sax
@@ -2700,7 +2700,8 @@ SAX.cdata(");
 SAX.error: End tag : expected '>'
 SAX.error: Unexpected end tag : sc
 SAX.cdata(");
-							document.write("RI, 361)
+							} else {
+							d, 328)
 SAX.error: Unexpected end tag : a
 SAX.cdata(");
 							}
diff --git a/result/HTML/wired.html b/result/HTML/wired.html
index 74d366e3..3da303f8 100644
--- a/result/HTML/wired.html
+++ b/result/HTML/wired.html
@@ -462,13 +462,13 @@ or <a href="/news/pointcast/0,1366,,00.html">PointCast</a></font><br>
 
 <font size="2" face="Arial,Helvetica, sans-serif"><b><a href="/news/school/0,1383,,00.html">Making the Grade</a></b></font><br><font size="2" face="Arial, Helvetica, sans-serif"><font size="1" face="Arial, Geneva, sans-serif" color="#000000">Reading, writing, and ROM.  <br><i>Sponsored by <a href="http://r.hotwired.com/r/wn_sch_r_nav_uop/http://ads25.focalink.com/SmartBanner/page?12630.53" style="text-decoration:none"><font color="#000000">U of Phoenix</font></a></i></font><br><br> 
 
-<font size="2" face="Arial,Helvetica, sans-serif"><b><a href="/news/infostructure/0,1377,,00.html">Infostructure</a></b></font><br><font size="1" face="Arial, Helvetica, sans-serif" color="#000000">An IS/IT resource <br><i>Sponsored by <a href="http://r.wired.com/r/wn_is_r_ssec/http://ad.doubleclick.net/clk;653163;3599571;s?http://www.sprintbiz.com/s%0Aervlet/appservlet?from=/wired/sprint/&amp;template=/security/security.html&amp;SITE=%0Awired.com&amp;BANNER=Sprint" style="text-decoration:none"><font color="#000000">Sprint</font></a></i></font></font><br><br> 
+<font size="2" face="Arial,Helvetica, sans-serif"><b><a href="/news/infostructure/0,1377,,00.html">Infostructure</a></b></font><br><font size="1" face="Arial, Helvetica, sans-serif" color="#000000">An IS/IT resource <br><i>Sponsored by <a href="http://r.wired.com/r/wn_is_r_ssec/http://ad.doubleclick.net/clk;653163;3599571;s?http://www.sprintbiz.com/s%0Aervlet/appservlet?from=/wired/sprint/&amp;template=/security/security.html&amp;SITE=%0Awired.com&amp;BANNER=Sprint" style="text-decoration:none"><font color="#000000">Sprint</font></a></i></font><br><br> 
 
 <font size="2" face="Arial,Helvetica, sans-serif"><b><a href="/news/y2k/0,1360,,00.html">Y2K Watch</a></b></font><br><font size="2" face="Arial, Helvetica, sans-serif"><font size="1" face="Arial, Geneva, sans-serif" color="#000000">Tick... Tick... Tick...</font><br><br> 
 
 <font face="Arial, Helvetica, sans-serif" size="2"><b><i><a href="/news/special_reports/1,1293,,00.html">More Hoo-Ha</a></i></b></font><br>&nbsp;<br>
 
-</font></font></font></font></font></font></font></font>
+</font></font></font></font></font></font></font></font></font>
 </td>
 </tr>
 <!-- start of Gen News -->
diff --git a/result/HTML/wired.html.err b/result/HTML/wired.html.err
index 70db11b0..116bbd2f 100644
--- a/result/HTML/wired.html.err
+++ b/result/HTML/wired.html.err
@@ -242,6 +242,9 @@ com&BANNER=Sprint" style="text-decoration:none"><font color="#000000">Sprint</a>
 </td>
      ^
 ./test/HTML/wired.html:414: HTML parser error : Opening and ending tag mismatch: td and font
+</td>
+     ^
+./test/HTML/wired.html:414: HTML parser error : Opening and ending tag mismatch: td and font
 </td>
      ^
 ./test/HTML/wired.html:432: HTML parser error : htmlParseEntityRef: expecting ';'
diff --git a/result/HTML/wired.html.sax b/result/HTML/wired.html.sax
index d5b16297..bb787656 100644
--- a/result/HTML/wired.html.sax
+++ b/result/HTML/wired.html.sax
@@ -1962,7 +1962,6 @@ SAX.endElement(a)
 SAX.endElement(i)
 SAX.error: End tag : expected '>'
 SAX.endElement(font)
-SAX.endElement(font)
 SAX.startElement(br)
 SAX.endElement(br)
 SAX.startElement(br)
@@ -2023,6 +2022,8 @@ SAX.error: Opening and ending tag mismatch: td and font
 SAX.endElement(font)
 SAX.error: Opening and ending tag mismatch: td and font
 SAX.endElement(font)
+SAX.error: Opening and ending tag mismatch: td and font
+SAX.endElement(font)
 SAX.endElement(td)
 SAX.characters(
 , 1)