From a3bfca59bf9a5b46dee8081d4c8a48740d6388f6 Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Thu, 12 Apr 2001 15:42:58 +0000 Subject: [PATCH] parsing real HTML is a nightmare. - HTMLparser.c result/HTML/*: revamped the way the HTML parser handles end of tags or end of input Daniel --- ChangeLog | 5 + HTMLparser.c | 92 +++++++-------- result/HTML/autoclose3.html.err | 3 - result/HTML/autoclose3.html.sax | 1 - result/HTML/doc3.htm | 27 +++-- result/HTML/doc3.htm.err | 76 +++++++----- result/HTML/doc3.htm.sax | 78 +++++++------ result/HTML/entities.html.err | 4 +- result/HTML/test3.html | 4 +- result/HTML/test3.html.err | 6 +- result/HTML/test3.html.sax | 20 ++-- result/HTML/wired.html | 83 +++++++------- result/HTML/wired.html.err | 197 ++++++++++++++++++-------------- result/HTML/wired.html.sax | 117 ++++++++++--------- 14 files changed, 393 insertions(+), 320 deletions(-) diff --git a/ChangeLog b/ChangeLog index 9a947ab9..9ad5c1ff 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Thu Apr 12 17:41:09 CEST 2001 Daniel Veillard + + * HTMLparser.c result/HTML/*: revamped the way the HTML + parser handles end of tags or end of input + Thu Apr 12 10:50:34 CEST 2001 Daniel Veillard * tree.[ch] : added xmlDocCopyNode for gdome2 support diff --git a/HTMLparser.c b/HTMLparser.c index 39447e3a..4b3bac86 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -445,7 +445,7 @@ htmlElemDesc html40ElementTable[] = { { "th", 0, 1, 0, 0, 0, 0, "table header cell" }, { "thead", 0, 1, 0, 0, 0, 0, "table header " }, { "title", 0, 0, 0, 0, 0, 0, "document title " }, -{ "tr", 0, 1, 0, 0, 0, 0, "table row " }, +{ "tr", 0, 0, 0, 0, 0, 0, "table row " }, { "tt", 0, 0, 0, 0, 0, 0, "teletype or monospaced text style" }, { "u", 0, 0, 0, 0, 1, 1, "underlined text style" }, { "ul", 0, 0, 0, 0, 0, 0, "unordered list " }, @@ -661,6 +661,7 @@ htmlCheckAutoClose(const xmlChar *newtag, const xmlChar *oldtag) { * htmlAutoCloseOnClose: * @ctxt: an HTML parser context * @newtag: The new tag name + * @force: force the tag closure * * The HTmL DtD allows an ending tag to implicitely close other tags. */ @@ -688,11 +689,7 @@ htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { xmlGenericError(xmlGenericErrorContext,"htmlAutoCloseOnClose: %s closes %s\n", newtag, ctxt->name); #endif } else { - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "Opening and ending tag mismatch: %s and %s\n", - newtag, ctxt->name); - ctxt->wellFormed = 0; + return; } if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) ctxt->sax->endElement(ctxt->userData, ctxt->name); @@ -706,6 +703,39 @@ htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { } } +/** + * htmlAutoCloseOnEnd: + * @ctxt: an HTML parser context + * + * Close all remaining tags at the end of the stream + */ +static void +htmlAutoCloseOnEnd(htmlParserCtxtPtr ctxt) { + xmlChar *oldname; + int i; + + if (ctxt->nameNr == 0) + return; +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"Close of stack: %d elements\n", ctxt->nameNr); +#endif + + for (i = (ctxt->nameNr - 1);i >= 0;i--) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"%d : %s\n", i, ctxt->nameTab[i]); +#endif + if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) + ctxt->sax->endElement(ctxt->userData, ctxt->name); + oldname = htmlnamePop(ctxt); + if (oldname != NULL) { +#ifdef DEBUG + xmlGenericError(xmlGenericErrorContext,"htmlAutoCloseOnEnd: popped %s\n", oldname); +#endif + xmlFree(oldname); + } + } +} + /** * htmlAutoClose: * @ctxt: an HTML parser context @@ -737,9 +767,8 @@ htmlAutoClose(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { } } if (newtag == NULL) { - htmlAutoCloseOnClose(ctxt, BAD_CAST"head"); - htmlAutoCloseOnClose(ctxt, BAD_CAST"body"); - htmlAutoCloseOnClose(ctxt, BAD_CAST"html"); + htmlAutoCloseOnEnd(ctxt); + return; } while ((newtag == NULL) && (ctxt->name != NULL) && ((xmlStrEqual(ctxt->name, BAD_CAST"head")) || @@ -3266,10 +3295,8 @@ htmlParseContent(htmlParserCtxtPtr ctxt) { * Fourth : end of the resource */ else if (CUR == 0) { - int level = ctxt->nodeNr; - htmlAutoClose(ctxt, NULL); - if (level == ctxt->nodeNr) - break; + htmlAutoCloseOnEnd(ctxt); + break; } /* @@ -3439,29 +3466,6 @@ htmlParseElement(htmlParserCtxtPtr ctxt) { if (ctxt->nameNr < depth) break; } - if (!IS_CHAR(CUR)) { - /************ - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "Premature end of data in tag %s\n", currentNode); - ctxt->wellFormed = 0; - *************/ - - /* - * end of parsing of this node. - */ - nodePop(ctxt); - oldname = htmlnamePop(ctxt); -#ifdef DEBUG - xmlGenericError(xmlGenericErrorContext,"Premature end of tag %s : popping out %s\n", name, oldname); -#endif - if (oldname != NULL) - xmlFree(oldname); - if (currentNode != NULL) - xmlFree(currentNode); - return; - } - /* * Capture end position and add node */ @@ -3472,6 +3476,10 @@ htmlParseElement(htmlParserCtxtPtr ctxt) { node_info.node = ctxt->node; xmlParserAddNodeInfo(ctxt, &node_info); } + if (!IS_CHAR(CUR)) { + htmlAutoCloseOnEnd(ctxt); + } + if (currentNode != NULL) xmlFree(currentNode); } @@ -3556,7 +3564,7 @@ htmlParseDocument(htmlParserCtxtPtr ctxt) { * autoclose */ if (CUR == 0) - htmlAutoClose(ctxt, NULL); + htmlAutoCloseOnEnd(ctxt); /* @@ -3899,7 +3907,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { else avail = in->buf->buffer->use - (in->cur - in->base); if ((avail == 0) && (terminate)) { - htmlAutoClose(ctxt, NULL); + htmlAutoCloseOnEnd(ctxt); if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { /* * SAX: end of the document processing. @@ -4077,9 +4085,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { goto done; } else { ctxt->errNo = XML_ERR_DOCUMENT_END; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "Extra content at the end of the document\n"); ctxt->wellFormed = 0; ctxt->instate = XML_PARSER_EOF; #ifdef DEBUG_PUSH @@ -4491,7 +4496,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) { } done: if ((avail == 0) && (terminate)) { - htmlAutoClose(ctxt, NULL); + htmlAutoCloseOnEnd(ctxt); if ((ctxt->nameNr == 0) && (ctxt->instate != XML_PARSER_EOF)) { /* * SAX: end of the document processing. @@ -4555,9 +4560,6 @@ htmlParseChunk(htmlParserCtxtPtr ctxt, const char *chunk, int size, (ctxt->instate != XML_PARSER_EPILOG) && (ctxt->instate != XML_PARSER_MISC)) { ctxt->errNo = XML_ERR_DOCUMENT_END; - if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL)) - ctxt->sax->error(ctxt->userData, - "Extra content at the end of the document\n"); ctxt->wellFormed = 0; } if (ctxt->instate != XML_PARSER_EOF) { diff --git a/result/HTML/autoclose3.html.err b/result/HTML/autoclose3.html.err index 09b9e332..e69de29b 100644 --- a/result/HTML/autoclose3.html.err +++ b/result/HTML/autoclose3.html.err @@ -1,3 +0,0 @@ -./test/HTML/autoclose3.html:4: error: Opening and ending tag mismatch: body and ul - -^ diff --git a/result/HTML/autoclose3.html.sax b/result/HTML/autoclose3.html.sax index e8ed3303..25e06ead 100644 --- a/result/HTML/autoclose3.html.sax +++ b/result/HTML/autoclose3.html.sax @@ -13,7 +13,6 @@ SAX.startElement(li) SAX.characters(item 2 , 7) SAX.endElement(li) -SAX.error: Opening and ending tag mismatch: body and ul SAX.endElement(ul) SAX.endElement(body) SAX.endElement(html) diff --git a/result/HTML/doc3.htm b/result/HTML/doc3.htm index c5a4f669..0738497c 100644 --- a/result/HTML/doc3.htm +++ b/result/HTML/doc3.htm @@ -803,15 +803,15 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0, -
Search news
+ +Search news


News archive -
- + - +
+ + + + + + diff --git a/result/HTML/doc3.htm.err b/result/HTML/doc3.htm.err index 46aae6f0..76609fcf 100644 --- a/result/HTML/doc3.htm.err +++ b/result/HTML/doc3.htm.err @@ -3,19 +3,19 @@ ^ ./test/HTML/doc3.htm:47: error: htmlParseEntityRef: expecting ';' href="http://ads.gamesquad.net/addclick.exe/adclick.cgi?REGION=game|tech|ent&i - ^ + ^ ./test/HTML/doc3.htm:47: error: htmlParseEntityRef: expecting ';' _top">



Code:BP6-hd

Code:BP6-hd

Code:BP6-hd

Code:BP6-hd

Code:BP6-hd

 

^ -./test/HTML/doc3.htm:834: error: Unexpected end tag : center +./test/HTML/doc3.htm:834: error: Opening and ending tag mismatch: center and form width="100%"> 

^ -./test/HTML/doc3.htm:834: error: Unexpected end tag : tr +./test/HTML/doc3.htm:834: error: Opening and ending tag mismatch: tr and form width="100%"> 

^ -./test/HTML/doc3.htm:834: error: Unexpected end tag : tbody +./test/HTML/doc3.htm:834: error: Opening and ending tag mismatch: tbody and form width="100%"> 

^ -./test/HTML/doc3.htm:834: error: Unexpected end tag : table +./test/HTML/doc3.htm:834: error: Opening and ending tag mismatch: table and form width="100%"> 

- ^ -./test/HTML/doc3.htm:835: error: Unexpected end tag : td + ^ +./test/HTML/doc3.htm:835: error: Opening and ending tag mismatch: td and form
  ^ +./test/HTML/doc3.htm:844: error: Opening and ending tag mismatch: div and form +,arial">Site design by Tim Brinkley --> + ^ +./test/HTML/doc3.htm:846: error: Opening and ending tag mismatch: html and form + + ^ diff --git a/result/HTML/doc3.htm.sax b/result/HTML/doc3.htm.sax index de3a6820..a7eb7f8d 100644 --- a/result/HTML/doc3.htm.sax +++ b/result/HTML/doc3.htm.sax @@ -2658,15 +2658,13 @@ SAX.characters(News archive, 26) SAX.endElement(a) SAX.error: Opening and ending tag mismatch: font and form -SAX.endElement(form) -SAX.endElement(font) SAX.characters( , 1) -SAX.error: Unexpected end tag : form -SAX.endElement(center) -SAX.endElement(td) -SAX.endElement(tr) -SAX.endElement(tbody) -SAX.endElement(table) +SAX.endElement(form) +SAX.error: Opening and ending tag mismatch: center and font +SAX.error: Opening and ending tag mismatch: td and font +SAX.error: Opening and ending tag mismatch: tr and font +SAX.error: Opening and ending tag mismatch: tbody and font +SAX.error: Opening and ending tag mismatch: table and font SAX.comment(
@@ -2712,8 +2710,6 @@ SAX.characters( SAX.startElement(b) SAX.startElement(noscript) SAX.error: Opening and ending tag mismatch: b and noscript -SAX.endElement(noscript) -SAX.endElement(b) SAX.startElement(a, href='http://www.goto.com/d/search/ssn/?fromGIF=true', target='_blank') SAX.startElement(img, align='bottom', border='0', height='90', ismap, src='doc3_files/100x90.gif', width='100') SAX.endElement(img) @@ -2726,7 +2722,7 @@ SAX.endElement(a) SAX.error: Unexpected end tag : a SAX.endElement(b) SAX.startElement(b) -SAX.error: Unexpected end tag : noscript +SAX.error: Opening and ending tag mismatch: noscript and b SAX.endElement(b) SAX.startElement(b) SAX.comment( END GoTo.com Search Box ) @@ -2759,8 +2755,6 @@ SAX.characters( , 15) SAX.endElement(p) SAX.error: Opening and ending tag mismatch: form and center -SAX.endElement(center) -SAX.endElement(form) SAX.comment( Pricewatch Search Box ) SAX.startElement(a, href='http://www.puicorp.com/bp6specials.htm', target='_BLANK') SAX.startElement(img, src='doc3_files/puibp6.gif') @@ -2789,18 +2783,11 @@ SAX.endElement(font) SAX.endElement(a) SAX.characters( , 1) SAX.error: Unexpected end tag : p -SAX.error: Opening and ending tag mismatch: center and td -SAX.endElement(td) -SAX.endElement(tr) -SAX.error: Opening and ending tag mismatch: center and tbody -SAX.endElement(tbody) -SAX.error: Opening and ending tag mismatch: center and table -SAX.endElement(table) SAX.endElement(center) -SAX.endElement(td) -SAX.endElement(tr) -SAX.endElement(tbody) -SAX.endElement(table) +SAX.error: Opening and ending tag mismatch: td and form +SAX.error: Opening and ending tag mismatch: tr and form +SAX.error: Opening and ending tag mismatch: tbody and form +SAX.error: Opening and ending tag mismatch: table and form SAX.characters( , 8) SAX.startElement(table, bgcolor='silver', border='0', cellpadding='0', cellspacing='0', height='100%', width='100%') @@ -2819,24 +2806,24 @@ SAX.endElement(tr) SAX.endElement(tbody) SAX.endElement(table) SAX.error: Unexpected end tag : p -SAX.error: Unexpected end tag : center -SAX.error: Unexpected end tag : tr -SAX.error: Unexpected end tag : tbody -SAX.error: Unexpected end tag : table +SAX.error: Opening and ending tag mismatch: center and form +SAX.error: Opening and ending tag mismatch: tr and form +SAX.error: Opening and ending tag mismatch: tbody and form +SAX.error: Opening and ending tag mismatch: table and form SAX.comment(
) SAX.characters( , 2) SAX.startElement(center) SAX.endElement(center) -SAX.error: Unexpected end tag : td -SAX.error: Unexpected end tag : tr +SAX.error: Opening and ending tag mismatch: td and form +SAX.error: Opening and ending tag mismatch: tr and form SAX.startElement(tr) SAX.startElement(td, colspan='3', valign='TOP', height='70') SAX.characters( , 2) SAX.endElement(td) SAX.characters( , 1) SAX.endElement(tr) -SAX.error: Unexpected end tag : table +SAX.error: Opening and ending tag mismatch: table and form SAX.characters( , 2) SAX.startElement(table, border='0', width='780') @@ -2871,16 +2858,37 @@ SAX.endElement(tr) SAX.comment(

Site design by Tim Brinkley ) SAX.endElement(tbody) SAX.endElement(table) -SAX.endElement(div) -SAX.ignorableWhitespace( +SAX.error: Opening and ending tag mismatch: div and form +SAX.characters( , 2) SAX.startElement(script) SAX.cdata( window.open=NS_ActualOpen; , 28) SAX.endElement(script) +SAX.characters( +, 2) +SAX.error: Opening and ending tag mismatch: body and form +SAX.error: Opening and ending tag mismatch: html and form SAX.ignorableWhitespace( , 2) +SAX.endElement(form) +SAX.endElement(noscript) +SAX.endElement(b) +SAX.endElement(td) +SAX.endElement(tr) +SAX.endElement(tbody) +SAX.endElement(table) +SAX.endElement(font) +SAX.endElement(center) +SAX.endElement(td) +SAX.endElement(tr) +SAX.endElement(tbody) +SAX.endElement(table) +SAX.endElement(center) +SAX.endElement(td) +SAX.endElement(tr) +SAX.endElement(tbody) +SAX.endElement(table) +SAX.endElement(div) SAX.endElement(body) SAX.endElement(html) -SAX.ignorableWhitespace( -, 2) SAX.endDocument() diff --git a/result/HTML/entities.html.err b/result/HTML/entities.html.err index a2bd6d07..ca98132d 100644 --- a/result/HTML/entities.html.err +++ b/result/HTML/entities.html.err @@ -1,9 +1,9 @@ ./test/HTML/entities.html:1: error: htmlParseEntityRef: expecting ';'

- ^ + ^ ./test/HTML/entities.html:1: error: htmlParseEntityRef: no name

- ^ + ^ ./test/HTML/entities.html:3: error: htmlParseEntityRef: expecting ';' a&b ^ diff --git a/result/HTML/test3.html b/result/HTML/test3.html index 597a1a42..4b916c70 100644 --- a/result/HTML/test3.html +++ b/result/HTML/test3.html @@ -38,7 +38,8 @@

Class ProblemDomain.Note

-

Links

+

+Links +

diff --git a/result/HTML/test3.html.err b/result/HTML/test3.html.err index 37414fce..a8c99672 100644 --- a/result/HTML/test3.html.err +++ b/result/HTML/test3.html.err @@ -7,6 +7,6 @@ ./test/HTML/test3.html:27: error: Opening and ending tag mismatch: h4 and b

Links

^ -./test/HTML/test3.html:27: error: Unexpected end tag : b -

Links

- ^ +./test/HTML/test3.html:34: error: Opening and ending tag mismatch: html and h4 + + ^ diff --git a/result/HTML/test3.html.sax b/result/HTML/test3.html.sax index 1f76e50f..bb969557 100644 --- a/result/HTML/test3.html.sax +++ b/result/HTML/test3.html.sax @@ -171,9 +171,7 @@ SAX.startElement(b) SAX.characters(Links, 5) SAX.error: Opening and ending tag mismatch: h4 and b SAX.endElement(b) -SAX.endElement(h4) -SAX.error: Unexpected end tag : b -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(ul) SAX.startElement(li) @@ -185,11 +183,11 @@ SAX.characters(HumanInterface, 14) SAX.endElement(a) SAX.endElement(li) SAX.endElement(ul) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(dir) SAX.endElement(dir) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(ul) SAX.startElement(li) @@ -201,11 +199,11 @@ SAX.characters(DataManagement.FlatFile, 23) SAX.endElement(a) SAX.endElement(li) SAX.endElement(ul) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(dir) SAX.endElement(dir) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(ul) SAX.startElement(li) @@ -217,14 +215,16 @@ SAX.characters(DataManagement, 14) SAX.endElement(a) SAX.endElement(li) SAX.endElement(ul) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(dir) SAX.endElement(dir) +SAX.characters( +, 2) +SAX.error: Opening and ending tag mismatch: html and h4 SAX.ignorableWhitespace( , 2) +SAX.endElement(h4) SAX.endElement(body) SAX.endElement(html) -SAX.ignorableWhitespace( -, 2) SAX.endDocument() diff --git a/result/HTML/wired.html b/result/HTML/wired.html index 66530977..9f4a78e3 100644 --- a/result/HTML/wired.html +++ b/result/HTML/wired.html @@ -127,17 +127,17 @@ Sports Finance FREE DELIVERY - - + + + -
+
+ - -
  -
STOCKS
Get Quote:
@@ -206,7 +206,6 @@ -
@@ -219,7 +218,7 @@

+ WIRED MAGAZINE @@ -272,11 +271,7 @@ or PointCast
- - - - - +
- @@ -390,9 +381,8 @@ or PointCastExecutive Summary
CEOs, COOs, CIOs unite.
-Sponsored by Vignette - -
+Sponsored by +Vignette

Making the Grade @@ -409,10 +399,8 @@ or PointCast An IS/IT resource
Sponsored by Sprint - -
- +wired.com&BANNER=Sprint" style="text-decoration:none"> +Sprint

Y2K Watch @@ -423,16 +411,6 @@ wired.com&BANNER=Sprint" style="text-decoration:none">
More Hoo-Ha
 
-
- - - - - - - - -
@@ -456,9 +434,7 @@ Contruction workers in Berlin opened an old wound in the German psyche this week
- -

@@ -316,10 +311,6 @@ or PointCast Readers on Apple's G4 ... AOL's passwords ... MS vs. Linux.

- - -
  CURRENT HOO-HA
  MEANWHILE...
- 
+ 

Other Top Stories @@ -586,10 +562,7 @@ Contruction workers in Berlin opened an old wound in the German psyche this week BBC News

- - - - +
@@ -627,7 +600,35 @@ Contruction workers in Berlin opened an old wound in the German psyche this week -
+
+ +
+ + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/result/HTML/wired.html.err b/result/HTML/wired.html.err index e9e588d8..86bd1c29 100644 --- a/result/HTML/wired.html.err +++ b/result/HTML/wired.html.err @@ -1,162 +1,162 @@ ./test/HTML/wired.html:6: error: htmlParseEntityRef: expecting ';'
^ -./test/HTML/wired.html:170: error: Unexpected end tag : form +./test/HTML/wired.html:170: error: Opening and ending tag mismatch: tr and form - ^ + ^ +./test/HTML/wired.html:171: error: Opening and ending tag mismatch: table and td + + ^ ./test/HTML/wired.html:244: error: Opening and ending tag mismatch: td and form ^ +./test/HTML/wired.html:244: error: Opening and ending tag mismatch: tr and form + + ^ ./test/HTML/wired.html:248: error: htmlParseEntityRef: expecting ';' MG SRC="http://barnesandnoble.bfast.com/booklink/serve?sourceid=383471&is_searc - ^ -./test/HTML/wired.html:265: error: Unexpected end tag : form - - ^ + ^ +./test/HTML/wired.html:266: error: Opening and ending tag mismatch: table and td + + ^ ./test/HTML/wired.html:346: error: Opening and ending tag mismatch: td and font ^ +./test/HTML/wired.html:347: error: Opening and ending tag mismatch: tr and font + + ^ +./test/HTML/wired.html:349: error: Opening and ending tag mismatch: table and font + + ^ ./test/HTML/wired.html:374: error: htmlParseEntityRef: no name a, sans-serif">
Rants - ^ + ^ ./test/HTML/wired.html:374: error: Opening and ending tag mismatch: td and font Readers on Apple's G4 ... AOL's passwords ... MS vs. Linux.



+ ^ ./test/HTML/wired.html:402: error: Opening and ending tag mismatch: a and font w.vignette.com/" style="text-decoration:none">Vignette
VignetteSpr ^ ./test/HTML/wired.html:406: error: Opening and ending tag mismatch: a and font com&BANNER=Sprint" style="text-decoration:none">SprintSprint' =Sprint" style="text-decoration:none">SprintSprint ^ -./test/HTML/wired.html:412: error: Opening and ending tag mismatch: td and font - - ^ -./test/HTML/wired.html:412: error: Opening and ending tag mismatch: td and font - - ^ -./test/HTML/wired.html:412: error: Opening and ending tag mismatch: td and font - - ^ -./test/HTML/wired.html:412: error: Opening and ending tag mismatch: td and font - - ^ -./test/HTML/wired.html:412: error: Opening and ending tag mismatch: td and font - - ^ -./test/HTML/wired.html:412: error: Opening and ending tag mismatch: td and font - - ^ -./test/HTML/wired.html:412: error: Opening and ending tag mismatch: td and font - +./test/HTML/wired.html:413: error: Opening and ending tag mismatch: tr and font + ^ ./test/HTML/wired.html:430: error: htmlParseEntityRef: expecting ';' href="http://www.lycos.com/news/flash/hitlerbunker.html?v=wn1015&lpv=1">Lycos + ^ +./test/HTML/wired.html:461: error: Opening and ending tag mismatch: td and font + + ^ +./test/HTML/wired.html:462: error: Opening and ending tag mismatch: tr and font + + ^ +./test/HTML/wired.html:508: error: Opening and ending tag mismatch: table and font + + ^ +./test/HTML/wired.html:512: error: Opening and ending tag mismatch: body and font + + ^ +./test/HTML/wired.html:513: error: Opening and ending tag mismatch: html and font + + ^ diff --git a/result/HTML/wired.html.sax b/result/HTML/wired.html.sax index b94358ca..ab0c6c48 100644 --- a/result/HTML/wired.html.sax +++ b/result/HTML/wired.html.sax @@ -779,8 +779,6 @@ SAX.characters( , 2) SAX.characters( , 1) SAX.error: Opening and ending tag mismatch: td and form -SAX.endElement(form) -SAX.endElement(td) SAX.characters( , 4) SAX.startElement(td, valign='top', bgcolor='#99FF99') @@ -794,12 +792,12 @@ SAX.characters( SAX.endElement(td) SAX.characters( , 2) -SAX.endElement(tr) +SAX.error: Opening and ending tag mismatch: tr and form SAX.characters( , 4) -SAX.error: Unexpected end tag : form +SAX.endElement(form) SAX.characters( , 1) -SAX.endElement(table) +SAX.error: Opening and ending tag mismatch: table and td SAX.endElement(td) SAX.characters( , 3) @@ -1077,9 +1075,7 @@ SAX.endElement(option) SAX.endElement(select) SAX.endElement(font) SAX.error: Opening and ending tag mismatch: td and form -SAX.endElement(form) -SAX.endElement(td) -SAX.endElement(tr) +SAX.error: Opening and ending tag mismatch: tr and form SAX.characters( , 2) SAX.startElement(tr, align='left', valign='top') @@ -1141,10 +1137,10 @@ SAX.characters( , 9) SAX.endElement(tr) SAX.characters( , 2) -SAX.error: Unexpected end tag : form +SAX.endElement(form) SAX.characters( , 9) -SAX.endElement(table) +SAX.error: Opening and ending tag mismatch: table and td SAX.characters( , 2) @@ -1456,23 +1452,21 @@ SAX.endElement(img) SAX.characters( , 1) SAX.error: Opening and ending tag mismatch: td and font -SAX.endElement(font) -SAX.endElement(td) SAX.characters( , 3) -SAX.endElement(tr) +SAX.error: Opening and ending tag mismatch: tr and font SAX.characters( , 2) -SAX.endElement(table) -SAX.ignorableWhitespace( +SAX.error: Opening and ending tag mismatch: table and font +SAX.characters( , 2) SAX.comment( end lower left side Navigation ) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.comment( CONTENT TABLE ) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(table, border='0', width='447', cellspacing='0', cellpadding='0', bordercolor='#66FF00') @@ -1632,13 +1626,9 @@ SAX.startElement(br) SAX.endElement(br) SAX.characters( , 2) SAX.error: Opening and ending tag mismatch: td and font -SAX.endElement(font) -SAX.error: Opening and ending tag mismatch: td and font -SAX.endElement(font) -SAX.endElement(td) SAX.characters( , 9) -SAX.endElement(tr) +SAX.error: Opening and ending tag mismatch: tr and font SAX.characters( , 1) SAX.comment( Commentary Frag End ) @@ -1894,9 +1884,7 @@ SAX.startElement(a, href='http://r.wired.com/r/wn_exec_r_vign/http://www.vignett SAX.startElement(font, color='#000000') SAX.characters(Vignette, 8) SAX.error: Opening and ending tag mismatch: a and font -SAX.endElement(font) -SAX.endElement(a) -SAX.endElement(i) +SAX.error: Opening and ending tag mismatch: i and font SAX.endElement(font) SAX.startElement(br) SAX.endElement(br) @@ -1959,12 +1947,10 @@ wired.com&BANNER=Sprint', style='text-decoration:none') SAX.startElement(font, color='#000000') SAX.characters(Sprint, 6) SAX.error: Opening and ending tag mismatch: a and font -SAX.endElement(font) -SAX.endElement(a) -SAX.endElement(i) +SAX.error: Opening and ending tag mismatch: i and font SAX.error: End tag : expected '>' SAX.endElement(font) -SAX.endElement(font) +SAX.error: Opening and ending tag mismatch: font and a SAX.startElement(br) SAX.endElement(br) SAX.startElement(br) @@ -2010,25 +1996,9 @@ SAX.characters( , 2) SAX.error: Opening and ending tag mismatch: td and font -SAX.endElement(font) -SAX.error: Opening and ending tag mismatch: td and font -SAX.endElement(font) -SAX.error: Opening and ending tag mismatch: td and font -SAX.endElement(font) -SAX.error: Opening and ending tag mismatch: td and font -SAX.endElement(font) -SAX.error: Opening and ending tag mismatch: td and font -SAX.endElement(font) -SAX.error: Opening and ending tag mismatch: td and font -SAX.endElement(font) -SAX.error: Opening and ending tag mismatch: td and font -SAX.endElement(font) -SAX.error: Opening and ending tag mismatch: td and font -SAX.endElement(font) -SAX.endElement(td) SAX.characters( , 1) -SAX.endElement(tr) +SAX.error: Opening and ending tag mismatch: tr and font SAX.characters( , 1) SAX.comment( start of Gen News ) @@ -2114,7 +2084,7 @@ SAX.characters( SAX.comment( end of Gen News ) SAX.characters( , 1) -SAX.endElement(table) +SAX.error: Opening and ending tag mismatch: table and font SAX.characters( @@ -2661,10 +2631,10 @@ SAX.comment( - - - - - - - - - - - - ) SAX.characters( , 6) -SAX.endElement(td) +SAX.error: Opening and ending tag mismatch: td and font SAX.characters( , 3) -SAX.endElement(tr) +SAX.error: Opening and ending tag mismatch: tr and font SAX.characters( , 4) @@ -2831,20 +2801,57 @@ SAX.characters( SAX.endElement(tr) SAX.characters( , 1) -SAX.endElement(table) -SAX.ignorableWhitespace( +SAX.error: Opening and ending tag mismatch: table and font +SAX.characters( , 3) SAX.startElement(br) SAX.endElement(br) -SAX.ignorableWhitespace( +SAX.characters( , 1) -SAX.endElement(body) -SAX.ignorableWhitespace( +SAX.error: Opening and ending tag mismatch: body and font +SAX.characters( , 1) -SAX.endElement(html) +SAX.error: Opening and ending tag mismatch: html and font SAX.ignorableWhitespace( , 2) +SAX.endElement(font) +SAX.endElement(a) +SAX.endElement(i) +SAX.endElement(font) +SAX.endElement(font) +SAX.endElement(a) +SAX.endElement(i) +SAX.endElement(font) +SAX.endElement(font) +SAX.endElement(font) +SAX.endElement(font) +SAX.endElement(font) +SAX.endElement(font) +SAX.endElement(font) +SAX.endElement(font) +SAX.endElement(td) +SAX.endElement(tr) +SAX.endElement(font) +SAX.endElement(font) +SAX.endElement(td) +SAX.endElement(tr) +SAX.endElement(table) +SAX.endElement(td) +SAX.endElement(tr) +SAX.endElement(table) +SAX.endElement(font) +SAX.endElement(td) +SAX.endElement(tr) +SAX.endElement(table) +SAX.endElement(td) +SAX.endElement(tr) +SAX.endElement(table) +SAX.endElement(td) +SAX.endElement(tr) +SAX.endElement(table) +SAX.endElement(body) +SAX.endElement(html) SAX.endDocument()