diff --git a/ChangeLog b/ChangeLog index 2a45a170..7a5516bd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +Fri May 11 16:07:13 CEST 2001 Daniel Veillard + + * HTMLparser.c: Patch from Jonas Borgström + (htmlGetEndPriority): New function, returns + the priority of a certain element. + (htmlAutoCloseOnClose): Only close inline elements if they + all have lower or equal priority. + * result/HTML: this of course changed a number of tests results. + Thu May 10 17:30:22 CEST 2001 Daniel Veillard * xmlIO.c catalog.c: plugged in the default catalog resolution diff --git a/HTMLparser.c b/HTMLparser.c index 870201dd..5e79464d 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -565,15 +565,32 @@ static const char *htmlScriptAttributes[] = { }; /* - * end tags that imply the end of the inside elements - */ -const char *htmlEndClose[] = { -"head", -"body", -"html", -NULL -}; + * This table is used by the htmlparser to know what to do with + * broken html pages. By assigning different priorities to different + * elements the parser can decide how to handle extra endtags. + * Endtags are only allowed to close elements with lower or equal + * priority. + */ +typedef struct { + const char *name; + int priority; +} elementPriority; + +const elementPriority htmlEndPriority[] = { + {"div", 150}, + {"td", 160}, + {"th", 160}, + {"tr", 170}, + {"thead", 180}, + {"tbody", 180}, + {"tfoot", 180}, + {"table", 190}, + {"head", 200}, + {"body", 200}, + {"html", 220}, + {NULL, 100} /* Default priority */ +}; static const char** htmlStartCloseIndex[100]; static int htmlStartCloseIndexinitialized = 0; @@ -627,6 +644,23 @@ htmlTagLookup(const xmlChar *tag) { return(NULL); } +/** + * htmlGetEndPriority: + * @name: The name of the element to look up the priority for. + * + * Return value: The "endtag" priority. + **/ +static int +htmlGetEndPriority (const xmlChar *name) { + int i = 0; + + while ((htmlEndPriority[i].name != NULL) && + (!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name))) + i++; + + return(htmlEndPriority[i].priority); +} + /** * htmlCheckAutoClose: * @newtag: The new tag name @@ -674,7 +708,7 @@ static void htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { htmlElemDescPtr info; xmlChar *oldname; - int i, endCloses = 0; + int i, priority; #ifdef DEBUG xmlGenericError(xmlGenericErrorContext,"Close of %s stack: %d elements\n", newtag, ctxt->nameNr); @@ -682,15 +716,20 @@ htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { xmlGenericError(xmlGenericErrorContext,"%d : %s\n", i, ctxt->nameTab[i]); #endif + priority = htmlGetEndPriority (newtag); + for (i = (ctxt->nameNr - 1);i >= 0;i--) { + if (xmlStrEqual(newtag, ctxt->nameTab[i])) break; + /* + * A missplaced endtagad can only close elements with lower + * or equal priority, so if we find an element with higher + * priority before we find an element with + * matching name, we just ignore this endtag + */ + if (htmlGetEndPriority (ctxt->nameTab[i]) > priority) return; } if (i < 0) return; - for (i = 0; (htmlEndClose[i] != NULL);i++) - if (xmlStrEqual(newtag, (const xmlChar *) htmlEndClose[i])) { - endCloses = 1; - break; - } while (!xmlStrEqual(newtag, ctxt->name)) { info = htmlTagLookup(ctxt->name); @@ -707,8 +746,6 @@ htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *newtag) { "Opening and ending tag mismatch: %s and %s\n", newtag, ctxt->name); ctxt->wellFormed = 0; - } else if (endCloses == 0) { - return; } if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL)) ctxt->sax->endElement(ctxt->userData, ctxt->name); diff --git a/result/HTML/doc3.htm b/result/HTML/doc3.htm index fd9f5148..3ed7223c 100644 --- a/result/HTML/doc3.htm +++ b/result/HTML/doc3.htm @@ -97,8 +97,7 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0, - - +
- +

@@ -784,8 +783,7 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,
-
+

-

PC Price @@ -880,10 +877,9 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,

 
-

+
- -  

@@ -893,8 +889,7 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,

- - + diff --git a/result/HTML/doc3.htm.err b/result/HTML/doc3.htm.err index 86783eaa..949fefbf 100644 --- a/result/HTML/doc3.htm.err +++ b/result/HTML/doc3.htm.err @@ -37,12 +37,9 @@ om/ad_static.asp?pid=2097&sid=1881&asid=7708">

 

^ -./test/HTML/doc3.htm:834: error: Opening and ending tag mismatch: tr and td -width="100%"> 

- ^ -./test/HTML/doc3.htm:834: error: Opening and ending tag mismatch: tbody and td -width="100%"> 

- ^ -./test/HTML/doc3.htm:834: error: Opening and ending tag mismatch: table and td -width="100%"> 

- ^ -./test/HTML/doc3.htm:836: error: Opening and ending tag mismatch: table and tbody +./test/HTML/doc3.htm:835: error: Unexpected end tag : td +
  ^ -./test/HTML/doc3.htm:844: error: Opening and ending tag mismatch: div and tbody -,arial">Site design by Tim Brinkley -->) SAX.characters( , 2) SAX.startElement(center) SAX.endElement(center) -SAX.endElement(td) -SAX.endElement(tr) +SAX.error: Unexpected end tag : td +SAX.error: Unexpected end tag : tr SAX.startElement(tr) SAX.startElement(td, colspan='3', valign='TOP', height='70') SAX.characters( , 2) SAX.endElement(td) SAX.characters( , 1) SAX.endElement(tr) -SAX.error: Opening and ending tag mismatch: table and tbody +SAX.error: Unexpected end tag : table SAX.characters( , 2) SAX.startElement(table, border='0', width='780') @@ -2864,17 +2863,14 @@ SAX.endElement(tr) SAX.comment(

Site design by Tim Brinkley ) SAX.endElement(tbody) SAX.endElement(table) -SAX.error: Opening and ending tag mismatch: div and tbody -SAX.characters( +SAX.endElement(div) +SAX.ignorableWhitespace( , 2) SAX.startElement(script) SAX.cdata( window.open=NS_ActualOpen; , 28) SAX.endElement(script) -SAX.characters( +SAX.ignorableWhitespace( , 2) -SAX.endElement(tbody) -SAX.endElement(table) -SAX.endElement(div) SAX.endElement(body) SAX.endElement(html) SAX.ignorableWhitespace( diff --git a/result/HTML/wired.html b/result/HTML/wired.html index e62d5c59..66530977 100644 --- a/result/HTML/wired.html +++ b/result/HTML/wired.html @@ -127,17 +127,17 @@ Sports Finance FREE DELIVERY - - - -
+
+ - + + +
  - -
STOCKS
Get Quote:
@@ -164,8 +164,7 @@
Portfolios
FIND A BOOK
- +
- +
@@ -207,6 +206,7 @@ +
@@ -219,7 +219,7 @@

WIRED MAGAZINE
@@ -629,8 +629,5 @@ Contruction workers in Berlin opened an old wound in the German psyche this week

-
- diff --git a/result/HTML/wired.html.err b/result/HTML/wired.html.err index ecf58e8f..82415e0e 100644 --- a/result/HTML/wired.html.err +++ b/result/HTML/wired.html.err @@ -181,27 +181,15 @@ option value="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&OP ./test/HTML/wired.html:97: error: htmlParseEntityRef: expecting ';' lue="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&OPs=MDRTP&M ^ -./test/HTML/wired.html:165: error: Opening and ending tag mismatch: td and form - - ^ -./test/HTML/wired.html:170: error: Opening and ending tag mismatch: tr and form +./test/HTML/wired.html:170: error: Unexpected end tag : form - ^ -./test/HTML/wired.html:171: error: Opening and ending tag mismatch: table and td - - ^ -./test/HTML/wired.html:244: error: Opening and ending tag mismatch: td and form - - ^ -./test/HTML/wired.html:244: error: Opening and ending tag mismatch: tr and form - - ^ + ^ ./test/HTML/wired.html:248: error: htmlParseEntityRef: expecting ';' MG SRC="http://barnesandnoble.bfast.com/booklink/serve?sourceid=383471&is_searc ^ -./test/HTML/wired.html:266: error: Opening and ending tag mismatch: table and td - - ^ +./test/HTML/wired.html:265: error: Unexpected end tag : form + + ^ ./test/HTML/wired.html:346: error: Opening and ending tag mismatch: td and font ^ diff --git a/result/HTML/wired.html.sax b/result/HTML/wired.html.sax index b90ae2db..0edd57eb 100644 --- a/result/HTML/wired.html.sax +++ b/result/HTML/wired.html.sax @@ -778,7 +778,8 @@ SAX.endElement(input) SAX.characters( , 2) SAX.characters( , 1) -SAX.error: Opening and ending tag mismatch: td and form +SAX.endElement(form) +SAX.endElement(td) SAX.characters( , 4) SAX.startElement(td, valign='top', bgcolor='#99FF99') @@ -792,12 +793,12 @@ SAX.characters( SAX.endElement(td) SAX.characters( , 2) -SAX.error: Opening and ending tag mismatch: tr and form +SAX.endElement(tr) SAX.characters( , 4) -SAX.endElement(form) +SAX.error: Unexpected end tag : form SAX.characters( , 1) -SAX.error: Opening and ending tag mismatch: table and td +SAX.endElement(table) SAX.endElement(td) SAX.characters( , 3) @@ -1074,8 +1075,9 @@ SAX.characters(Other SAX.endElement(option) SAX.endElement(select) SAX.endElement(font) -SAX.error: Opening and ending tag mismatch: td and form -SAX.error: Opening and ending tag mismatch: tr and form +SAX.endElement(form) +SAX.endElement(td) +SAX.endElement(tr) SAX.characters( , 2) SAX.startElement(tr, align='left', valign='top') @@ -1137,10 +1139,10 @@ SAX.characters( , 9) SAX.endElement(tr) SAX.characters( , 2) -SAX.endElement(form) +SAX.error: Unexpected end tag : form SAX.characters( , 9) -SAX.error: Opening and ending tag mismatch: table and td +SAX.endElement(table) SAX.characters( , 2) @@ -1461,14 +1463,14 @@ SAX.characters( , 2) SAX.endElement(table) -SAX.characters( +SAX.ignorableWhitespace( , 2) SAX.comment( end lower left side Navigation ) -SAX.characters( +SAX.ignorableWhitespace( , 1) SAX.comment( CONTENT TABLE ) -SAX.characters( +SAX.ignorableWhitespace( , 2) SAX.startElement(table, border='0', width='447', cellspacing='0', cellpadding='0', bordercolor='#66FF00') @@ -2828,20 +2830,14 @@ SAX.endElement(tr) SAX.characters( , 1) SAX.endElement(table) -SAX.characters( +SAX.ignorableWhitespace( , 3) SAX.startElement(br) SAX.endElement(br) -SAX.characters( +SAX.ignorableWhitespace( , 1) -SAX.endElement(td) -SAX.endElement(tr) -SAX.endElement(table) -SAX.endElement(td) -SAX.endElement(tr) -SAX.endElement(table) SAX.endElement(body) SAX.ignorableWhitespace( , 1)