1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-29 11:41:22 +03:00

- HTMLparser.c: Patch from Jonas Borgstrm

(htmlGetEndPriority): New function, returns
the priority of a certain element.
(htmlAutoCloseOnClose): Only close inline elements if they
all have lower or equal priority.
- result/HTML: this of course changed a number of tests results.
Daniel
This commit is contained in:
Daniel Veillard
2001-05-11 14:18:03 +00:00
parent 7d6fd219f2
commit 0a2a163d2e
8 changed files with 121 additions and 115 deletions

View File

@ -1,3 +1,12 @@
Fri May 11 16:07:13 CEST 2001 Daniel Veillard <Daniel.Veillard@imag.fr>
* HTMLparser.c: Patch from Jonas Borgstr<74>m
(htmlGetEndPriority): New function, returns
the priority of a certain element.
(htmlAutoCloseOnClose): Only close inline elements if they
all have lower or equal priority.
* result/HTML: this of course changed a number of tests results.
Thu May 10 17:30:22 CEST 2001 Daniel Veillard <Daniel.Veillard@imag.fr>
* xmlIO.c catalog.c: plugged in the default catalog resolution

View File

@ -565,15 +565,32 @@ static const char *htmlScriptAttributes[] = {
};
/*
* end tags that imply the end of the inside elements
* This table is used by the htmlparser to know what to do with
* broken html pages. By assigning different priorities to different
* elements the parser can decide how to handle extra endtags.
* Endtags are only allowed to close elements with lower or equal
* priority.
*/
const char *htmlEndClose[] = {
"head",
"body",
"html",
NULL
};
typedef struct {
const char *name;
int priority;
} elementPriority;
const elementPriority htmlEndPriority[] = {
{"div", 150},
{"td", 160},
{"th", 160},
{"tr", 170},
{"thead", 180},
{"tbody", 180},
{"tfoot", 180},
{"table", 190},
{"head", 200},
{"body", 200},
{"html", 220},
{NULL, 100} /* Default priority */
};
static const char** htmlStartCloseIndex[100];
static int htmlStartCloseIndexinitialized = 0;
@ -627,6 +644,23 @@ htmlTagLookup(const xmlChar *tag) {
return(NULL);
}
/**
* htmlGetEndPriority:
* @name: The name of the element to look up the priority for.
*
* Return value: The "endtag" priority.
**/
static int
htmlGetEndPriority (const xmlChar *name) {
int i = 0;
while ((htmlEndPriority[i].name != NULL) &&
(!xmlStrEqual((const xmlChar *)htmlEndPriority[i].name, name)))
i++;
return(htmlEndPriority[i].priority);
}
/**
* htmlCheckAutoClose:
* @newtag: The new tag name
@ -674,7 +708,7 @@ static void
htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
htmlElemDescPtr info;
xmlChar *oldname;
int i, endCloses = 0;
int i, priority;
#ifdef DEBUG
xmlGenericError(xmlGenericErrorContext,"Close of %s stack: %d elements\n", newtag, ctxt->nameNr);
@ -682,15 +716,20 @@ htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
xmlGenericError(xmlGenericErrorContext,"%d : %s\n", i, ctxt->nameTab[i]);
#endif
priority = htmlGetEndPriority (newtag);
for (i = (ctxt->nameNr - 1);i >= 0;i--) {
if (xmlStrEqual(newtag, ctxt->nameTab[i])) break;
/*
* A missplaced endtagad can only close elements with lower
* or equal priority, so if we find an element with higher
* priority before we find an element with
* matching name, we just ignore this endtag
*/
if (htmlGetEndPriority (ctxt->nameTab[i]) > priority) return;
}
if (i < 0) return;
for (i = 0; (htmlEndClose[i] != NULL);i++)
if (xmlStrEqual(newtag, (const xmlChar *) htmlEndClose[i])) {
endCloses = 1;
break;
}
while (!xmlStrEqual(newtag, ctxt->name)) {
info = htmlTagLookup(ctxt->name);
@ -707,8 +746,6 @@ htmlAutoCloseOnClose(htmlParserCtxtPtr ctxt, const xmlChar *newtag) {
"Opening and ending tag mismatch: %s and %s\n",
newtag, ctxt->name);
ctxt->wellFormed = 0;
} else if (endCloses == 0) {
return;
}
if ((ctxt->sax != NULL) && (ctxt->sax->endElement != NULL))
ctxt->sax->endElement(ctxt->userData, ctxt->name);

View File

@ -97,8 +97,7 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,
</td></tr></tbody></table>
</center></td></tr></tbody></table></td></tr>
</tbody></table>
<table bgcolor="#003399" border="0" cellspacing="6" width="80%"><tbody>
<tr>
<table bgcolor="#003399" border="0" cellspacing="6" width="80%"><tbody><tr>
<td bgcolor="black" valign="top" width="10%"><table border="0" cellpadding="3" cellspacing="0" width="100%"><tbody><tr><td width="100%">
<img height="1" src="doc3_files/spacer.gif" width="111">
<br>
@ -784,8 +783,7 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,
</td></tr></tbody></table>
</center>
</td>
<td bgcolor="silver" valign="top" width="10%">
<center>
<td bgcolor="silver" valign="top" width="10%"><center>
<p>
<table bgcolor="silver" border="0" cellpadding="0" cellspacing="0" width="100%"><tbody><tr><td colstart="1"><center>
<!-- <FORM ACTION="/cgi-bin/subscribe.pl" METHOD="POST" ENCTYPE="application/x-www-form-urlencoded">
@ -853,11 +851,10 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,
}
// -->
</script>
<b><noscript>
<b><noscript></noscript></b>
<a href="http://www.goto.com/d/search/ssn/?fromGIF=true" target="_blank"><img align="bottom" border="0" height="90" ismap src="doc3_files/100x90.gif" width="100"></a>
<b><a href="http://www.goto.com/d/search/ssn/?fromGIF=true" target="_blank"></a></b>
<b></b>
</noscript></b>
<b><!-- END GoTo.com Search Box --></b>
<!-- Pricewatch Search Box --><form action="http://www.pricewatch.com/search/search.asp" method="get" target="_Blank"><center><p>
<b><font color="white" face="ARIAL, HELVETICA" size="1">PC Price
@ -880,10 +877,9 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,
</a>
</td></tr></tbody></table>
<table bgcolor="silver" border="0" cellpadding="0" cellspacing="0" height="100%" width="100%"><tbody><tr><td width="100%"><EFBFBD></td></tr></tbody></table>
</center>
</center></td>
</tr></tbody></table>
<!-- </TABLE>--><center></center>
</td>
</tr>
<tr><td colspan="3" valign="TOP" height="70"><EFBFBD></td></tr>
<table border="0" width="780"><tbody>
<tr><td width="780"><p align="center">
@ -893,8 +889,7 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0,
</p></td></tr>
<!-- <TR> <TD WIDTH="780"> <P ALIGN="CENTER"><FONT SIZE="1" COLOR="#999999" FACE="Verdana,arial">Site design by Tim Brinkley</FONT> </TD> </TR> -->
</tbody></table>
<script> window.open=NS_ActualOpen; </script>
</tbody></table>
</div>
<script> window.open=NS_ActualOpen; </script>
</body>
</html>

View File

@ -37,10 +37,7 @@ om/ad_static.asp?pid=2097&sid=1881&asid=7708"></a></IFRAME></CENTER></LI></FONT
./test/HTML/doc3.htm:742: error: Unexpected end tag : p
=7708"></a></IFRAME></CENTER></LI></FONT></TD></TR></TBODY></TABLE></CENTER></P
^
./test/HTML/doc3.htm:767: error: Opening and ending tag mismatch: font and form
archive</A></FONT> </FORM></CENTER></TD></TR></TBODY></TABLE><!--
^
./test/HTML/doc3.htm:767: error: Opening and ending tag mismatch: center and font
./test/HTML/doc3.htm:767: error: Unexpected end tag : form
archive</A></FONT> </FORM></CENTER></TD></TR></TBODY></TABLE><!--
^
./test/HTML/doc3.htm:790: error: Unexpected end tag : iframe
@ -55,13 +52,10 @@ om/ad_static.asp?pid=2097&sid=1881&asid=7708"></a></IFRAME></CENTER></LI></FONT
./test/HTML/doc3.htm:806: error: Unexpected end tag : a
document.write("ype=gif&size=100x90></A>");
^
./test/HTML/doc3.htm:810: error: Opening and ending tag mismatch: b and noscript
<B><NOSCRIPT></B><A
^
./test/HTML/doc3.htm:815: error: Unexpected end tag : a
</A></A></B><B></NOSCRIPT></B><B><!-- END GoTo.com Search Box --></
^
./test/HTML/doc3.htm:815: error: Opening and ending tag mismatch: noscript and b
./test/HTML/doc3.htm:815: error: Unexpected end tag : noscript
</A></A></B><B></NOSCRIPT></B><B><!-- END GoTo.com Search Box --></
^
./test/HTML/doc3.htm:821: error: Opening and ending tag mismatch: form and center
@ -76,18 +70,12 @@ om/ad_static.asp?pid=2097&sid=1881&asid=7708"></a></IFRAME></CENTER></LI></FONT
./test/HTML/doc3.htm:834: error: Unexpected end tag : p
width="100%">&nbsp;</TD></TR></TBODY></TABLE></P></CENTER></TR></TBODY></TABLE>
^
./test/HTML/doc3.htm:834: error: Opening and ending tag mismatch: tr and td
width="100%">&nbsp;</TD></TR></TBODY></TABLE></P></CENTER></TR></TBODY></TABLE>
./test/HTML/doc3.htm:835: error: Unexpected end tag : td
<CENTER></CENTER></TD></TR><TR><TD COLSPAN="3" VALIGN="TOP"
^
./test/HTML/doc3.htm:834: error: Opening and ending tag mismatch: tbody and td
width="100%">&nbsp;</TD></TR></TBODY></TABLE></P></CENTER></TR></TBODY></TABLE>
./test/HTML/doc3.htm:835: error: Unexpected end tag : tr
<CENTER></CENTER></TD></TR><TR><TD COLSPAN="3" VALIGN="TOP"
^
./test/HTML/doc3.htm:834: error: Opening and ending tag mismatch: table and td
width="100%">&nbsp;</TD></TR></TBODY></TABLE></P></CENTER></TR></TBODY></TABLE>
^
./test/HTML/doc3.htm:836: error: Opening and ending tag mismatch: table and tbody
./test/HTML/doc3.htm:836: error: Unexpected end tag : table
HEIGHT="70">&nbsp;</TD> </TR></TABLE>
^
./test/HTML/doc3.htm:844: error: Opening and ending tag mismatch: div and tbody
,arial">Site design by Tim Brinkley</FONT> </TD> </TR> --></TBODY></TABLE></DI
^

View File

@ -2657,11 +2657,10 @@ SAX.startElement(a, href='http://bp6.gamesquad.net/cgi-bin/news/viewnews.cgi?new
SAX.endElement(br)
SAX.startElement(a, href='http://bp6.gamesquad.net/cgi-bin/news/viewnews.cgi?newsall')
SAX.characters(News
archive, 26)
SAX.endElement(a)
archive, 26)
SAX.characters( , 1)
SAX.endElement(a)
SAX.endElement(form)
SAX.endElement(font)
SAX.characters( , 1)
SAX.error: Unexpected end tag : form
SAX.endElement(center)
@ -2711,7 +2710,8 @@ SAX.characters(
SAX.endElement(script)
SAX.characters(
, 14)
SAX.startElement(b)
SAX.startElement(b)
SAX.startElement(noscript)
SAX.endElement(noscript)
SAX.endElement(b)
SAX.startElement(a, href='http://www.goto.com/d/search/ssn/?fromGIF=true', target='_blank')
@ -2724,9 +2724,7 @@ SAX.endElement(a)
, 15)
SAX.endElement(a)
SAX.error: Unexpected end tag : a
SAX.endElement(b)
SAX.startElement(b)
SAX.error: Opening and ending tag mismatch: noscript and b
SAX.endElement(b)
SAX.startElement(b)
SAX.error: Unexpected end tag : noscript
SAX.endElement(b)
@ -2813,23 +2811,24 @@ SAX.endElement(tbody)
SAX.endElement(tr)
SAX.endElement(tbody)
SAX.endElement(table)
SAX.error: Unexpected end tag : p
SAX.endElement(center)
SAX.error: Opening and ending tag mismatch: tr and td
SAX.error: Unexpected end tag : p
SAX.endElement(center)
SAX.endElement(td)
SAX.endElement(tr)
SAX.endElement(tbody)
SAX.endElement(table)
SAX.comment( </TABLE>)
SAX.characters(
, 2)
SAX.startElement(center)
SAX.endElement(center)
SAX.startElement(center)
SAX.endElement(center)
SAX.error: Unexpected end tag : td
SAX.error: Unexpected end tag : tr
SAX.startElement(tr)
SAX.startElement(td, colspan='3', valign='TOP', height='70')
SAX.characters(&nbsp;, 2)
SAX.endElement(td)
SAX.characters( , 1)
SAX.characters( , 1)
SAX.endElement(tr)
SAX.error: Unexpected end tag : table
SAX.characters(
@ -2864,17 +2863,14 @@ SAX.endElement(tr)
SAX.endElement(td)
SAX.endElement(tr)
SAX.comment( <TR> <TD WIDTH="780"> <P ALIGN="CENTER"><FONT SIZE="1" COLOR="#999999" FACE="Verdana,arial">Site design by Tim Brinkley</FONT> </TD> </TR> )
SAX.endElement(tbody)
SAX.endElement(table)
SAX.endElement(tbody)
SAX.endElement(table)
SAX.endElement(div)
SAX.ignorableWhitespace(
, 2)
SAX.startElement(script)
SAX.cdata( window.open=NS_ActualOpen; , 28)
SAX.cdata( window.open=NS_ActualOpen; , 28)
SAX.endElement(script)
SAX.characters(
, 2)
SAX.endElement(tbody)
SAX.ignorableWhitespace(
, 2)
SAX.endElement(body)

View File

@ -127,17 +127,17 @@
<tr><td bgcolor="#CCFFCC"><font size="1" face="Verdana, Arial, Helvetica, sans-serif" color="#000000"><a href="/news/news/reuters/sports/">Sports</a></font></td></tr>
<tr><td bgcolor="#99FF99"><font size="1" face="Verdana, Arial, Helvetica, sans-serif" color="#000000"><a href="/news/news/reuters/business/">Finance</a></font></td></tr>
<!-- End upper left nav --><!-- Begin lower Left Nav --><tr><td bgcolor="#FF0000"><font face="Verdana, Arial, Helvetica, sans-serif" color="#FFFFFF"><b><font size="1">FREE DELIVERY</font></b></font></td></tr>
<tr><td bgcolor="#99FF99"><table cellspacing="0" cellpadding="0" border="0">
<tr><td bgcolor="#99FF99"><form action="http://r.hotwired.com/r/hw_wm_r_nav_nwsltr/http://perl.hotwired.com/massmail/cgiParser.cgi" method="get" target="_top">
<tr><td bgcolor="#99FF99"><table cellspacing="0" cellpadding="0" border="0"><tr>
<td bgcolor="#99FF99"><form action="http://r.hotwired.com/r/hw_wm_r_nav_nwsltr/http://perl.hotwired.com/massmail/cgiParser.cgi" method="get" target="_top">
<input type="hidden" name="success_page" value="http://www.hotwired.com/email/signup/wirednews-ascii.html">
<input type="hidden" name="failure_page" value="http://www.hotwired.com/email/signup/wirednews-ascii.html">
<input type="hidden" name="LIST" value="wn_ascii">
<input type="hidden" name="SOURCE" value="other">
<input type="hidden" name="ACTION" value="subscribe">
<input type="TEXT" name="from" size="10" value="enter email">&nbsp;
</form></td>
<td valign="top" bgcolor="#99FF99"><input type="SUBMIT" name="SUBMIT" value="GO"></td>
</form></td></tr>
</tr></table></td></tr>
<tr><td bgcolor="#FF0000"><font face="Verdana, Arial, Helvetica, sans-serif" color="#FFFFFF"><b><font size="1">STOCKS</font></b></font></td></tr>
<tr><td bgcolor="#99FF99"><font face="Verdana, Arial, Helvetica, sans-serif" size="1">Get Quote:</font></td></tr>
<tr><td bgcolor="#99FF99" marginwidth="0" marginheight="0"><form method="get" action="http://r.wired.com/r/10020/http://stocks.wired.com/stocks_quotes.asp">
@ -164,8 +164,7 @@
</font></td></tr>
<tr><td bgcolor="#99FF99"><font size="1" face="Verdana, Arial, Helvetica, sans-serif" color="#000000"><a href="http://redirect.wired.com/redir/53/http://stocks.wired.com/stocks_portfolios.asp">Portfolios</a></font></td></tr>
<!-- BEGIN B&N spot --><tr><td bgcolor="#FF0000"><font size="1" face="Verdana, Arial, Helvetica, sans-serif" color="#FFFFFF"><b>FIND A BOOK</b></font></td></tr>
<tr><td bgcolor="#CCFFCC">
<table cellspacing="0" cellpadding="0" border="0" width="145">
<tr><td bgcolor="#CCFFCC"><table cellspacing="0" cellpadding="0" border="0" width="145">
<tr><td bgcolor="#CCFFCC"><form action="http://r.wired.com/r/wn_nav_c_bn/http://barnesandnoble.bfast.com/booklink/click">
<input type="hidden" name="sourceid" value="383471">
<input type="hidden" name="categoryid" value="categorydropdown">
@ -207,6 +206,7 @@
</option>
</select></font>
</form></td></tr>
<tr align="left" valign="top"><td valign="top" bgcolor="#CCFFCC">
<input type="submit" value="GO">
<img src="http://barnesandnoble.bfast.com/booklink/serve?sourceid=383471&amp;is_search=Y" border="0" align="top">
@ -219,7 +219,7 @@
</font>
<br clear="all">
</p></td></tr>
</form></td></tr>
</table></td></tr>
<!-- END B&N spot --><!-- BEGIN MAGAZINE SPOT --><tr><td bgcolor="#000000"><font color="#FFFFFF" face="Verdana, Arial, Helvetica, sans-serif" size="1"><b>WIRED
MAGAZINE </b></font></td></tr>
<tr><td bgcolor="#FFFF99" align="CENTER"><font face="verdana, arial, helvetica, sans-serif" size="1">
@ -629,8 +629,5 @@ Contruction workers in Berlin opened an old wound in the German psyche this week
</tr>
</table>
<br>
</td></tr>
</table></td></tr>
</table>
</body>
</html>

View File

@ -181,26 +181,14 @@ option value="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&OP
./test/HTML/wired.html:97: error: htmlParseEntityRef: expecting ';'
lue="http://www.hotbot.com/?SM=MC&DV=0&LG=any&RD=RG&DC=10&DE=2&_v=2&OPs=MDRTP&M
^
./test/HTML/wired.html:165: error: Opening and ending tag mismatch: td and form
</td>
^
./test/HTML/wired.html:170: error: Opening and ending tag mismatch: tr and form
./test/HTML/wired.html:170: error: Unexpected end tag : form
</tr> </form>
^
./test/HTML/wired.html:171: error: Opening and ending tag mismatch: table and td
</table></td>
^
./test/HTML/wired.html:244: error: Opening and ending tag mismatch: td and form
</select></font></td></tr>
^
./test/HTML/wired.html:244: error: Opening and ending tag mismatch: tr and form
</select></font></td></tr>
^
./test/HTML/wired.html:248: error: htmlParseEntityRef: expecting ';'
MG SRC="http://barnesandnoble.bfast.com/booklink/serve?sourceid=383471&is_searc
^
./test/HTML/wired.html:266: error: Opening and ending tag mismatch: table and td
</table>
./test/HTML/wired.html:265: error: Unexpected end tag : form
</tr> </form>
^
./test/HTML/wired.html:346: error: Opening and ending tag mismatch: td and font
</td>

View File

@ -778,7 +778,8 @@ SAX.endElement(input)
SAX.characters(&nbsp;, 2)
SAX.characters(
, 1)
SAX.error: Opening and ending tag mismatch: td and form
SAX.endElement(form)
SAX.endElement(td)
SAX.characters(
, 4)
SAX.startElement(td, valign='top', bgcolor='#99FF99')
@ -792,12 +793,12 @@ SAX.characters(
SAX.endElement(td)
SAX.characters(
, 2)
SAX.error: Opening and ending tag mismatch: tr and form
SAX.endElement(tr)
SAX.characters( , 4)
SAX.endElement(form)
SAX.error: Unexpected end tag : form
SAX.characters(
, 1)
SAX.error: Opening and ending tag mismatch: table and td
SAX.endElement(table)
SAX.endElement(td)
SAX.characters(
, 3)
@ -1074,8 +1075,9 @@ SAX.characters(Other
SAX.endElement(option)
SAX.endElement(select)
SAX.endElement(font)
SAX.error: Opening and ending tag mismatch: td and form
SAX.error: Opening and ending tag mismatch: tr and form
SAX.endElement(form)
SAX.endElement(td)
SAX.endElement(tr)
SAX.characters(
, 2)
SAX.startElement(tr, align='left', valign='top')
@ -1137,10 +1139,10 @@ SAX.characters(
, 9)
SAX.endElement(tr)
SAX.characters( , 2)
SAX.endElement(form)
SAX.error: Unexpected end tag : form
SAX.characters(
, 9)
SAX.error: Opening and ending tag mismatch: table and td
SAX.endElement(table)
SAX.characters(
, 2)
@ -1461,14 +1463,14 @@ SAX.characters(
, 2)
SAX.endElement(table)
SAX.characters(
SAX.ignorableWhitespace(
, 2)
SAX.comment( end lower left side Navigation )
SAX.characters(
SAX.ignorableWhitespace(
, 1)
SAX.comment( CONTENT TABLE )
SAX.characters(
SAX.ignorableWhitespace(
, 2)
SAX.startElement(table, border='0', width='447', cellspacing='0', cellpadding='0', bordercolor='#66FF00')
@ -2828,20 +2830,14 @@ SAX.endElement(tr)
SAX.characters(
, 1)
SAX.endElement(table)
SAX.characters(
SAX.ignorableWhitespace(
, 3)
SAX.startElement(br)
SAX.endElement(br)
SAX.characters(
SAX.ignorableWhitespace(
, 1)
SAX.endElement(td)
SAX.endElement(tr)
SAX.endElement(table)
SAX.endElement(td)
SAX.endElement(tr)
SAX.endElement(table)
SAX.endElement(body)
SAX.ignorableWhitespace(
, 1)