diff --git a/ChangeLog b/ChangeLog index cf280528..be3b6612 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Thu Sep 1 11:42:39 CEST 2005 Daniel Veillard + + * HTMLparser.c: Applied the last patch from Gary Coady for #304637 + changing the behaviour when text nodes are found in body + * result/HTML/*: this changes the output of some tests + Thu Sep 1 11:22:37 CEST 2005 Daniel Veillard * doc/downloads.html doc/xml.html: adding reference to Bull AIX rpms diff --git a/HTMLparser.c b/HTMLparser.c index 6b8b5624..d11ae088 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -964,7 +964,6 @@ NULL static const char *htmlNoContentElements[] = { "html", "head", - "body", NULL }; @@ -2042,6 +2041,7 @@ static int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len) { unsigned int i; int j; xmlNodePtr lastChild; + xmlDtdPtr dtd; for (j = 0;j < len;j++) if (!(IS_BLANK_CH(str[j]))) return(0); @@ -2054,8 +2054,17 @@ static int areBlanks(htmlParserCtxtPtr ctxt, const xmlChar *str, int len) { return(1); if (xmlStrEqual(ctxt->name, BAD_CAST"head")) return(1); - if (xmlStrEqual(ctxt->name, BAD_CAST"body")) - return(1); + + /* Only strip CDATA children of the body tag for strict HTML DTDs */ + if (xmlStrEqual(ctxt->name, BAD_CAST "body") && ctxt->myDoc != NULL) { + dtd = xmlGetIntSubset(ctxt->myDoc); + if (dtd != NULL && dtd->ExternalID != NULL) { + if (!xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4.01//EN") || + !xmlStrcasecmp(dtd->ExternalID, BAD_CAST "-//W3C//DTD HTML 4//EN")) + return(1); + } + } + if (ctxt->node == NULL) return(0); lastChild = xmlGetLastChild(ctxt->node); while ((lastChild) && (lastChild->type == XML_COMMENT_NODE)) diff --git a/result/HTML/Down.html b/result/HTML/Down.html index cd99aa16..929ed8a9 100644 --- a/result/HTML/Down.html +++ b/result/HTML/Down.html @@ -3,10 +3,8 @@ This service is temporary down

Sorry, this service is temporary down

-

We are doing our best to get it back on-line, -

The W3C system administrators

diff --git a/result/HTML/Down.html.sax b/result/HTML/Down.html.sax index a8c32c9b..c23c3329 100644 --- a/result/HTML/Down.html.sax +++ b/result/HTML/Down.html.sax @@ -16,19 +16,17 @@ SAX.ignorableWhitespace( , 2) SAX.startElement(body, bgcolor='#FFFFFF') -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(h1, align='center') SAX.characters(Sorry, this service is tempora, 37) SAX.endElement(h1) -SAX.startElement(p) SAX.characters( We are doing our best to get , 48) -SAX.endElement(p) SAX.startElement(p) SAX.characters(The W3C system administrators, 29) SAX.endElement(p) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.endElement(body) SAX.ignorableWhitespace( diff --git a/result/HTML/attrents.html b/result/HTML/attrents.html index 3231d1ef..0433f48f 100644 --- a/result/HTML/attrents.html +++ b/result/HTML/attrents.html @@ -1,4 +1,4 @@ - -
+ +
diff --git a/result/HTML/attrents.html.sax b/result/HTML/attrents.html.sax index 976bbbed..c1cfb427 100644 --- a/result/HTML/attrents.html.sax +++ b/result/HTML/attrents.html.sax @@ -4,13 +4,13 @@ SAX.startElement(html) SAX.ignorableWhitespace( , 2) SAX.startElement(body, bgcolor='#FFFFFF') -SAX.ignorableWhitespace( +SAX.characters( , 18) SAX.startElement(a, href='mailto:katherine@cbfanc.org,website@bis.doc.gov?subject=South San Francisco BIS Seminar - October 16th') SAX.endElement(a) SAX.startElement(br) SAX.endElement(br) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.endElement(body) SAX.ignorableWhitespace( diff --git a/result/HTML/cf_128.html b/result/HTML/cf_128.html index 8c4f11a6..b3e6b416 100644 --- a/result/HTML/cf_128.html +++ b/result/HTML/cf_128.html @@ -1,7 +1,9 @@ gnome-xml push mode bug - + + +
-
Foo1
Foo2

@@ -9,5 +11,6 @@
Foo3
+ + diff --git a/result/HTML/cf_128.html.sax b/result/HTML/cf_128.html.sax index 8aa82c14..8f66a42e 100644 --- a/result/HTML/cf_128.html.sax +++ b/result/HTML/cf_128.html.sax @@ -16,7 +16,7 @@ SAX.endElement(head) SAX.ignorableWhitespace( , 1) SAX.startElement(body) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(table, border='4') @@ -58,7 +58,7 @@ SAX.endElement(tr) SAX.characters( , 3) SAX.endElement(table) -SAX.ignorableWhitespace( +SAX.characters( , 3) SAX.endElement(body) SAX.ignorableWhitespace( diff --git a/result/HTML/doc2.htm b/result/HTML/doc2.htm index 04466950..2c7e230a 100644 --- a/result/HTML/doc2.htm +++ b/result/HTML/doc2.htm @@ -20,7 +20,9 @@ - <body bgcolor="#FFFFFF" text="#000000" link="#000080" vlink="#000080" alink="#000080" topmargin="0" leftmargin="0" marginheight="0" marginwidth="0"><p>This page uses frames, but your browser doesn't support them.</p></body> + <body bgcolor="#FFFFFF" text="#000000" link="#000080" vlink="#000080" alink="#000080" topmargin="0" leftmargin="0" marginheight="0" marginwidth="0"> + <p>This page uses frames, but your browser doesn't support them.</p> + </body> diff --git a/result/HTML/doc2.htm.sax b/result/HTML/doc2.htm.sax index 27d458a1..7cbbc510 100644 --- a/result/HTML/doc2.htm.sax +++ b/result/HTML/doc2.htm.sax @@ -51,12 +51,12 @@ SAX.characters( , 4) SAX.startElement(body, bgcolor='#FFFFFF', text='#000000', link='#000080', vlink='#000080', alink='#000080', topmargin='0', leftmargin='0', marginheight='0', marginwidth='0') -SAX.ignorableWhitespace( +SAX.characters( , 3) SAX.startElement(p) SAX.characters(This page uses frames, but you, 61) SAX.endElement(p) -SAX.ignorableWhitespace( +SAX.characters( , 3) SAX.endElement(body) SAX.characters( diff --git a/result/HTML/doc3.htm b/result/HTML/doc3.htm index ef6fddd4..a53c27b7 100644 --- a/result/HTML/doc3.htm +++ b/result/HTML/doc3.htm @@ -32,7 +32,7 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0, // End --> - +

@@ -439,7 +439,7 @@ eval("page" + id + " = window.open(URL, '" + id + "', 'toolbars=0, scrollbars=0, -
+ diff --git a/result/HTML/doc3.htm.sax b/result/HTML/doc3.htm.sax index 93f5a9f8..2cc38fa7 100644 --- a/result/HTML/doc3.htm.sax +++ b/result/HTML/doc3.htm.sax @@ -73,7 +73,7 @@ SAX.endElement(head) SAX.ignorableWhitespace( , 2) SAX.startElement(body, alink='red', bgcolor='black', link='red', text='white', vlink='red') -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(p) SAX.characters( @@ -2905,12 +2905,12 @@ SAX.comment(

ResourceConfig /dev/null 
AccessConfig /dev/null
diff --git a/result/HTML/fp40.htm.sax b/result/HTML/fp40.htm.sax index 3859458d..6f777945 100644 --- a/result/HTML/fp40.htm.sax +++ b/result/HTML/fp40.htm.sax @@ -26,7 +26,7 @@ SAX.ignorableWhitespace( , 2) SAX.startElement(body) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(font, face='Verdana') SAX.characters( @@ -167,7 +167,7 @@ SAX.characters( , 3) SAX.endElement(font) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(blockquote) SAX.characters( @@ -184,7 +184,7 @@ SAX.endElement(font) SAX.characters( , 1) SAX.endElement(blockquote) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(font, face='Verdana') SAX.characters( @@ -452,7 +452,7 @@ SAX.characters( , 4) SAX.endElement(font) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.endElement(body) SAX.ignorableWhitespace( diff --git a/result/HTML/liclose.html b/result/HTML/liclose.html index b8a464ee..f4e4edbf 100644 --- a/result/HTML/liclose.html +++ b/result/HTML/liclose.html @@ -1,10 +1,12 @@ -

+ diff --git a/result/HTML/liclose.html.sax b/result/HTML/liclose.html.sax index 30f26c0f..eabcdf67 100644 --- a/result/HTML/liclose.html.sax +++ b/result/HTML/liclose.html.sax @@ -15,7 +15,7 @@ SAX.endElement(head) SAX.ignorableWhitespace( , 1) SAX.startElement(body) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(ul) SAX.characters( @@ -28,7 +28,7 @@ SAX.startElement(li) SAX.characters(Second item, closes the first , 34) SAX.endElement(li) SAX.endElement(ul) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.endElement(body) diff --git a/result/HTML/pre.html b/result/HTML/pre.html index 17f5b3f9..5308b6d2 100644 --- a/result/HTML/pre.html +++ b/result/HTML/pre.html @@ -1,2 +1,4 @@ -
+ +
+ diff --git a/result/HTML/pre.html.sax b/result/HTML/pre.html.sax index 4f567aea..f8782b7d 100644 --- a/result/HTML/pre.html.sax +++ b/result/HTML/pre.html.sax @@ -4,7 +4,7 @@ SAX.startElement(html) SAX.ignorableWhitespace( , 1) SAX.startElement(body) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(pre) SAX.startElement(a, href='toto') @@ -12,7 +12,7 @@ SAX.endElement(a) SAX.startElement(img, src='titi') SAX.endElement(img) SAX.endElement(pre) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.endElement(body) SAX.ignorableWhitespace( diff --git a/result/HTML/python.html b/result/HTML/python.html index e5f9d56e..5fdc6a24 100644 --- a/result/HTML/python.html +++ b/result/HTML/python.html @@ -1,5 +1,6 @@ Python Programming Language - + + diff --git a/result/HTML/python.html.sax b/result/HTML/python.html.sax index 4f96e8a8..665539c1 100644 --- a/result/HTML/python.html.sax +++ b/result/HTML/python.html.sax @@ -20,7 +20,7 @@ SAX.endElement(head) SAX.ignorableWhitespace( , 1) SAX.startElement(body) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.endElement(body) SAX.endElement(html) diff --git a/result/HTML/reg1.html.sax b/result/HTML/reg1.html.sax index 1ca02716..c8ee3ac1 100644 --- a/result/HTML/reg1.html.sax +++ b/result/HTML/reg1.html.sax @@ -15,12 +15,12 @@ SAX.endElement(head) SAX.ignorableWhitespace( , 1) SAX.startElement(body) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(h1) SAX.characters(Regression test 1, 17) SAX.endElement(h1) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(p) SAX.characters( diff --git a/result/HTML/reg2.html.sax b/result/HTML/reg2.html.sax index 63acfd42..f85691e4 100644 --- a/result/HTML/reg2.html.sax +++ b/result/HTML/reg2.html.sax @@ -15,12 +15,12 @@ SAX.endElement(head) SAX.ignorableWhitespace( , 1) SAX.startElement(body) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(h1) SAX.characters(Regression test 2, 17) SAX.endElement(h1) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(p) SAX.characters( diff --git a/result/HTML/reg3.html.sax b/result/HTML/reg3.html.sax index 5d498b25..ec0f0396 100644 --- a/result/HTML/reg3.html.sax +++ b/result/HTML/reg3.html.sax @@ -15,12 +15,12 @@ SAX.endElement(head) SAX.ignorableWhitespace( , 1) SAX.startElement(body) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(h1) SAX.characters(Regression test 3, 17) SAX.endElement(h1) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(p) SAX.characters( @@ -29,7 +29,7 @@ Autoclose of tag P SAX.endElement(p) SAX.startElement(hr) SAX.endElement(hr) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(p) SAX.characters( diff --git a/result/HTML/reg4.html.sax b/result/HTML/reg4.html.sax index 5a4eacfd..4c5147cd 100644 --- a/result/HTML/reg4.html.sax +++ b/result/HTML/reg4.html.sax @@ -15,12 +15,12 @@ SAX.endElement(head) SAX.ignorableWhitespace( , 1) SAX.startElement(body) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(h1) SAX.characters(Regression test 4, 17) SAX.endElement(h1) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(p) SAX.characters( @@ -29,10 +29,10 @@ Wrong close of tag P SAX.endElement(p) SAX.startElement(hr) SAX.endElement(hr) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.error: Unexpected end tag : p -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.endElement(body) SAX.ignorableWhitespace( diff --git a/result/HTML/script.html.sax b/result/HTML/script.html.sax index 3a470619..b259f7f7 100644 --- a/result/HTML/script.html.sax +++ b/result/HTML/script.html.sax @@ -11,17 +11,17 @@ SAX.endElement(head) SAX.ignorableWhitespace( , 1) SAX.startElement(body) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(script, language='javascript') SAX.cdata( if (window.open<max) ;, 28) SAX.endElement(script) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(input, onclick='if(window.open<max);') SAX.endElement(input) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.endElement(body) SAX.ignorableWhitespace( diff --git a/result/HTML/script2.html b/result/HTML/script2.html index bf9318e8..2be4f93f 100644 --- a/result/HTML/script2.html +++ b/result/HTML/script2.html @@ -4,11 +4,13 @@ Test Page -
+ +

Příliš žluťoučký kůň úpěl ďábelksé ódy;

-
+
+ diff --git a/result/HTML/script2.html.sax b/result/HTML/script2.html.sax index 1607ccbf..19719917 100644 --- a/result/HTML/script2.html.sax +++ b/result/HTML/script2.html.sax @@ -20,7 +20,7 @@ SAX.endElement(head) SAX.ignorableWhitespace( , 1) SAX.startElement(body) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(div, id='portal') SAX.characters( @@ -38,7 +38,7 @@ SAX.endElement(p) SAX.characters( , 1) SAX.endElement(div) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.endElement(body) SAX.ignorableWhitespace( diff --git a/result/HTML/test2.html b/result/HTML/test2.html index ef62dc6c..98a27164 100644 --- a/result/HTML/test2.html +++ b/result/HTML/test2.html @@ -2,6 +2,7 @@ Linux Today +
diff --git a/result/HTML/test2.html.sax b/result/HTML/test2.html.sax index 03f7285d..78bb090a 100644 --- a/result/HTML/test2.html.sax +++ b/result/HTML/test2.html.sax @@ -11,7 +11,7 @@ SAX.endElement(head) SAX.ignorableWhitespace( , 1) SAX.startElement(body, bgcolor='White', link='Blue', text='Black', vlink='Black', alink='Red') -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(center) @@ -130,7 +130,7 @@ SAX.endElement(font) SAX.characters( , 1) SAX.endElement(center) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(p) SAX.characters( diff --git a/result/HTML/test3.html b/result/HTML/test3.html index 4437f4d5..903723c1 100644 --- a/result/HTML/test3.html +++ b/result/HTML/test3.html @@ -38,7 +38,7 @@

Class ProblemDomain.Note

-

Links

+

Links

diff --git a/result/HTML/test3.html.sax b/result/HTML/test3.html.sax index 1f76e50f..c9f66384 100644 --- a/result/HTML/test3.html.sax +++ b/result/HTML/test3.html.sax @@ -26,7 +26,7 @@ SAX.endElement(p) SAX.startElement(hr) SAX.endElement(hr) SAX.error: Unexpected end tag : p -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(dl) SAX.characters( @@ -59,14 +59,14 @@ SAX.characters(Interface, thats stores and ma, 58) SAX.endElement(dd) SAX.endElement(dd) SAX.endElement(dl) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(p) SAX.endElement(p) SAX.startElement(hr) SAX.endElement(hr) SAX.error: Unexpected end tag : p -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(dl) SAX.characters( @@ -163,7 +163,7 @@ SAX.endElement(dt) SAX.characters( , 2) SAX.endElement(dl) -SAX.ignorableWhitespace( +SAX.characters( , 4) SAX.startElement(h4) @@ -173,7 +173,7 @@ SAX.error: Opening and ending tag mismatch: h4 and b SAX.endElement(b) SAX.endElement(h4) SAX.error: Unexpected end tag : b -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(ul) SAX.startElement(li) @@ -185,11 +185,11 @@ SAX.characters(HumanInterface, 14) SAX.endElement(a) SAX.endElement(li) SAX.endElement(ul) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(dir) SAX.endElement(dir) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(ul) SAX.startElement(li) @@ -201,11 +201,11 @@ SAX.characters(DataManagement.FlatFile, 23) SAX.endElement(a) SAX.endElement(li) SAX.endElement(ul) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(dir) SAX.endElement(dir) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(ul) SAX.startElement(li) @@ -217,11 +217,11 @@ SAX.characters(DataManagement, 14) SAX.endElement(a) SAX.endElement(li) SAX.endElement(ul) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(dir) SAX.endElement(dir) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.endElement(body) SAX.endElement(html) diff --git a/result/HTML/wired.html b/result/HTML/wired.html index 674623fd..f7123e8c 100644 --- a/result/HTML/wired.html +++ b/result/HTML/wired.html @@ -2,6 +2,7 @@ Top Stories News from Wired News +
@@ -69,7 +70,8 @@
True to the Original
- + +
diff --git a/result/HTML/wired.html.sax b/result/HTML/wired.html.sax index 0edd57eb..3860c76f 100644 --- a/result/HTML/wired.html.sax +++ b/result/HTML/wired.html.sax @@ -8,7 +8,7 @@ SAX.characters(Top Stories News from Wired Ne, 32) SAX.endElement(title) SAX.endElement(head) SAX.startElement(body, bgcolor='#FFFFFF', text='#000000', link='#333399', vlink='#660066', alink='#666699') -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(table, border='0', width='600', cellspacing='0', cellpadding='0') @@ -315,19 +315,19 @@ SAX.endElement(tr) SAX.characters( , 1) SAX.endElement(table) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.comment( WIRED NEWS header ) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.comment( CMD_HOST = scoop.hotwired.com ) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(a, name='#') SAX.endElement(a) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.startElement(table, border='0', width='600', cellspacing='0', cellpadding='0') SAX.characters( @@ -574,14 +574,14 @@ SAX.comment( SAX.characters( , 1) SAX.endElement(table) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.comment( end WIRED NEWS header ) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.comment( begin upper left side Navigation ) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(table, border='0', cellpadding='3', cellspacing='0', align='LEFT', bgcolor='#FFFFFF') @@ -1463,14 +1463,14 @@ SAX.characters( , 2) SAX.endElement(table) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.comment( end lower left side Navigation ) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.comment( CONTENT TABLE ) -SAX.ignorableWhitespace( +SAX.characters( , 2) SAX.startElement(table, border='0', width='447', cellspacing='0', cellpadding='0', bordercolor='#66FF00') @@ -2830,13 +2830,13 @@ SAX.endElement(tr) SAX.characters( , 1) SAX.endElement(table) -SAX.ignorableWhitespace( +SAX.characters( , 3) SAX.startElement(br) SAX.endElement(br) -SAX.ignorableWhitespace( +SAX.characters( , 1) SAX.endElement(body) SAX.ignorableWhitespace(