From f41fbbf6a9316873c948d7170391be184408ee3d Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Tue, 13 Feb 2001 17:05:35 +0000 Subject: [PATCH] testing and bug fixing related to XSLT: - xpath.c result/XPath/tests/chaptersprefol: bugfixes on order and on predicate - HTMLparser.[ch] HTMLtree.c result/HTML/doc3.htm.err result/HTML/doc3.htm.sax result/HTML/wired.html: sometimes one really want to have tags closed on output even if we accept unclosed ones on input Daniel --- ChangeLog | 9 ++ HTMLparser.c | 184 +++++++++++++++--------------- HTMLparser.h | 11 +- HTMLtree.c | 2 +- include/libxml/HTMLparser.h | 11 +- result/HTML/doc3.htm.err | 6 + result/HTML/doc3.htm.sax | 2 + result/HTML/wired.html | 4 +- result/XPath/tests/chaptersprefol | 20 ++-- xpath.c | 35 ++++++ 10 files changed, 169 insertions(+), 115 deletions(-) diff --git a/ChangeLog b/ChangeLog index 96a65927..2d5854b1 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +Tue Feb 13 18:01:48 CET 2001 Daniel Veillard + + * xpath.c result/XPath/tests/chaptersprefol: bugfixes on order and + on predicate + * HTMLparser.[ch] HTMLtree.c result/HTML/doc3.htm.err + result/HTML/doc3.htm.sax result/HTML/wired.html: sometimes one + really want to have tags closed on output even if we accept + unclosed ones on input + Mon Feb 12 18:33:20 CET 2001 Daniel Veillard * xpath.c: ouch don't free NULL, rare case fixed diff --git a/HTMLparser.c b/HTMLparser.c index eedda554..4fb40df8 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -377,100 +377,100 @@ htmlSkipBlankChars(xmlParserCtxtPtr ctxt) { * DTD: 1 means that this element is valid only in the Loose DTD * 2 means that this element is valid only in the Frameset DTD * - * Name,Start Tag,End Tag, Empty, Depr., DTD, Description + * Name,Start Tag,End Tag,Save End, Empty, Depr., DTD, Description */ htmlElemDesc html40ElementTable[] = { -{ "a", 0, 0, 0, 0, 0, "anchor " }, -{ "abbr", 0, 0, 0, 0, 0, "abbreviated form" }, -{ "acronym", 0, 0, 0, 0, 0, "" }, -{ "address", 0, 0, 0, 0, 0, "information on author " }, -{ "applet", 0, 0, 0, 1, 1, "java applet " }, -{ "area", 0, 2, 1, 0, 0, "client-side image map area " }, -{ "b", 0, 0, 0, 0, 0, "bold text style" }, -{ "base", 0, 2, 1, 0, 0, "document base uri " }, -{ "basefont", 0, 2, 1, 1, 1, "base font size " }, -{ "bdo", 0, 0, 0, 0, 0, "i18n bidi over-ride " }, -{ "big", 0, 0, 0, 0, 0, "large text style" }, -{ "blockquote", 0, 0, 0, 0, 0, "long quotation " }, -{ "body", 1, 1, 0, 0, 0, "document body " }, -{ "br", 0, 2, 1, 0, 0, "forced line break " }, -{ "button", 0, 0, 0, 0, 0, "push button " }, -{ "caption", 0, 0, 0, 0, 0, "table caption " }, -{ "center", 0, 0, 0, 1, 1, "shorthand for div align=center " }, -{ "cite", 0, 0, 0, 0, 0, "citation" }, -{ "code", 0, 0, 0, 0, 0, "computer code fragment" }, -{ "col", 0, 2, 1, 0, 0, "table column " }, -{ "colgroup", 0, 1, 0, 0, 0, "table column group " }, -{ "dd", 0, 1, 0, 0, 0, "definition description " }, -{ "del", 0, 0, 0, 0, 0, "deleted text " }, -{ "dfn", 0, 0, 0, 0, 0, "instance definition" }, -{ "dir", 0, 0, 0, 1, 1, "directory list" }, -{ "div", 0, 0, 0, 0, 0, "generic language/style container"}, -{ "dl", 0, 0, 0, 0, 0, "definition list " }, -{ "dt", 0, 1, 0, 0, 0, "definition term " }, -{ "em", 0, 0, 0, 0, 0, "emphasis" }, -{ "fieldset", 0, 0, 0, 0, 0, "form control group " }, -{ "font", 0, 0, 0, 1, 1, "local change to font " }, -{ "form", 0, 0, 0, 0, 0, "interactive form " }, -{ "frame", 0, 2, 1, 0, 2, "subwindow " }, -{ "frameset", 0, 0, 0, 0, 2, "window subdivision" }, -{ "h1", 0, 0, 0, 0, 0, "heading " }, -{ "h2", 0, 0, 0, 0, 0, "heading " }, -{ "h3", 0, 0, 0, 0, 0, "heading " }, -{ "h4", 0, 0, 0, 0, 0, "heading " }, -{ "h5", 0, 0, 0, 0, 0, "heading " }, -{ "h6", 0, 0, 0, 0, 0, "heading " }, -{ "head", 1, 1, 0, 0, 0, "document head " }, -{ "hr", 0, 2, 1, 0, 0, "horizontal rule " }, -{ "html", 1, 1, 0, 0, 0, "document root element " }, -{ "i", 0, 0, 0, 0, 0, "italic text style" }, -{ "iframe", 0, 0, 0, 0, 1, "inline subwindow " }, -{ "img", 0, 2, 1, 0, 0, "embedded image " }, -{ "input", 0, 2, 1, 0, 0, "form control " }, -{ "ins", 0, 0, 0, 0, 0, "inserted text" }, -{ "isindex", 0, 2, 1, 1, 1, "single line prompt " }, -{ "kbd", 0, 0, 0, 0, 0, "text to be entered by the user" }, -{ "label", 0, 0, 0, 0, 0, "form field label text " }, -{ "legend", 0, 0, 0, 0, 0, "fieldset legend " }, -{ "li", 0, 1, 0, 0, 0, "list item " }, -{ "link", 0, 2, 1, 0, 0, "a media-independent link " }, -{ "map", 0, 0, 0, 0, 0, "client-side image map " }, -{ "menu", 0, 0, 0, 1, 1, "menu list " }, -{ "meta", 0, 2, 1, 0, 0, "generic metainformation " }, -{ "noframes", 0, 0, 0, 0, 2, "alternate content container for non frame-based rendering " }, -{ "noscript", 0, 0, 0, 0, 0, "alternate content container for non script-based rendering " }, -{ "object", 0, 0, 0, 0, 0, "generic embedded object " }, -{ "ol", 0, 0, 0, 0, 0, "ordered list " }, -{ "optgroup", 0, 0, 0, 0, 0, "option group " }, -{ "option", 0, 1, 0, 0, 0, "selectable choice " }, -{ "p", 0, 1, 0, 0, 0, "paragraph " }, -{ "param", 0, 2, 1, 0, 0, "named property value " }, -{ "pre", 0, 0, 0, 0, 0, "preformatted text " }, -{ "q", 0, 0, 0, 0, 0, "short inline quotation " }, -{ "s", 0, 0, 0, 1, 1, "strike-through text style" }, -{ "samp", 0, 0, 0, 0, 0, "sample program output, scripts, etc." }, -{ "script", 0, 0, 0, 0, 0, "script statements " }, -{ "select", 0, 0, 0, 0, 0, "option selector " }, -{ "small", 0, 0, 0, 0, 0, "small text style" }, -{ "span", 0, 0, 0, 0, 0, "generic language/style container " }, -{ "strike", 0, 0, 0, 1, 1, "strike-through text" }, -{ "strong", 0, 0, 0, 0, 0, "strong emphasis" }, -{ "style", 0, 0, 0, 0, 0, "style info " }, -{ "sub", 0, 0, 0, 0, 0, "subscript" }, -{ "sup", 0, 0, 0, 0, 0, "superscript " }, -{ "table", 0, 0, 0, 0, 0, " " }, -{ "tbody", 1, 1, 0, 0, 0, "table body " }, -{ "td", 0, 1, 0, 0, 0, "table data cell" }, -{ "textarea", 0, 0, 0, 0, 0, "multi-line text field " }, -{ "tfoot", 0, 1, 0, 0, 0, "table footer " }, -{ "th", 0, 1, 0, 0, 0, "table header cell" }, -{ "thead", 0, 1, 0, 0, 0, "table header " }, -{ "title", 0, 0, 0, 0, 0, "document title " }, -{ "tr", 0, 1, 0, 0, 0, "table row " }, -{ "tt", 0, 0, 0, 0, 0, "teletype or monospaced text style" }, -{ "u", 0, 0, 0, 1, 1, "underlined text style" }, -{ "ul", 0, 0, 0, 0, 0, "unordered list " }, -{ "var", 0, 0, 0, 0, 0, "instance of a variable or program argument" }, +{ "a", 0, 0, 0, 0, 0, 0, "anchor " }, +{ "abbr", 0, 0, 0, 0, 0, 0, "abbreviated form" }, +{ "acronym", 0, 0, 0, 0, 0, 0, "" }, +{ "address", 0, 0, 0, 0, 0, 0, "information on author " }, +{ "applet", 0, 0, 0, 0, 1, 1, "java applet " }, +{ "area", 0, 2, 2, 1, 0, 0, "client-side image map area " }, +{ "b", 0, 0, 0, 0, 0, 0, "bold text style" }, +{ "base", 0, 2, 2, 1, 0, 0, "document base uri " }, +{ "basefont", 0, 2, 2, 1, 1, 1, "base font size " }, +{ "bdo", 0, 0, 0, 0, 0, 0, "i18n bidi over-ride " }, +{ "big", 0, 0, 0, 0, 0, 0, "large text style" }, +{ "blockquote", 0, 0, 0, 0, 0, 0, "long quotation " }, +{ "body", 1, 1, 0, 0, 0, 0, "document body " }, +{ "br", 0, 2, 2, 1, 0, 0, "forced line break " }, +{ "button", 0, 0, 0, 0, 0, 0, "push button " }, +{ "caption", 0, 0, 0, 0, 0, 0, "table caption " }, +{ "center", 0, 0, 0, 0, 1, 1, "shorthand for div align=center " }, +{ "cite", 0, 0, 0, 0, 0, 0, "citation" }, +{ "code", 0, 0, 0, 0, 0, 0, "computer code fragment" }, +{ "col", 0, 2, 2, 1, 0, 0, "table column " }, +{ "colgroup", 0, 1, 0, 0, 0, 0, "table column group " }, +{ "dd", 0, 1, 0, 0, 0, 0, "definition description " }, +{ "del", 0, 0, 0, 0, 0, 0, "deleted text " }, +{ "dfn", 0, 0, 0, 0, 0, 0, "instance definition" }, +{ "dir", 0, 0, 0, 0, 1, 1, "directory list" }, +{ "div", 0, 0, 0, 0, 0, 0, "generic language/style container"}, +{ "dl", 0, 0, 0, 0, 0, 0, "definition list " }, +{ "dt", 0, 1, 0, 0, 0, 0, "definition term " }, +{ "em", 0, 0, 0, 0, 0, 0, "emphasis" }, +{ "fieldset", 0, 0, 0, 0, 0, 0, "form control group " }, +{ "font", 0, 0, 0, 0, 1, 1, "local change to font " }, +{ "form", 0, 0, 0, 0, 0, 0, "interactive form " }, +{ "frame", 0, 2, 2, 1, 0, 2, "subwindow " }, +{ "frameset", 0, 0, 0, 0, 0, 2, "window subdivision" }, +{ "h1", 0, 0, 0, 0, 0, 0, "heading " }, +{ "h2", 0, 0, 0, 0, 0, 0, "heading " }, +{ "h3", 0, 0, 0, 0, 0, 0, "heading " }, +{ "h4", 0, 0, 0, 0, 0, 0, "heading " }, +{ "h5", 0, 0, 0, 0, 0, 0, "heading " }, +{ "h6", 0, 0, 0, 0, 0, 0, "heading " }, +{ "head", 1, 1, 0, 0, 0, 0, "document head " }, +{ "hr", 0, 2, 2, 1, 0, 0, "horizontal rule " }, +{ "html", 1, 1, 0, 0, 0, 0, "document root element " }, +{ "i", 0, 0, 0, 0, 0, 0, "italic text style" }, +{ "iframe", 0, 0, 0, 0, 0, 1, "inline subwindow " }, +{ "img", 0, 2, 2, 1, 0, 0, "embedded image " }, +{ "input", 0, 2, 2, 1, 0, 0, "form control " }, +{ "ins", 0, 0, 0, 0, 0, 0, "inserted text" }, +{ "isindex", 0, 2, 2, 1, 1, 1, "single line prompt " }, +{ "kbd", 0, 0, 0, 0, 0, 0, "text to be entered by the user" }, +{ "label", 0, 0, 0, 0, 0, 0, "form field label text " }, +{ "legend", 0, 0, 0, 0, 0, 0, "fieldset legend " }, +{ "li", 0, 1, 1, 0, 0, 0, "list item " }, +{ "link", 0, 2, 2, 1, 0, 0, "a media-independent link " }, +{ "map", 0, 0, 0, 0, 0, 0, "client-side image map " }, +{ "menu", 0, 0, 0, 0, 1, 1, "menu list " }, +{ "meta", 0, 2, 2, 1, 0, 0, "generic metainformation " }, +{ "noframes", 0, 0, 0, 0, 0, 2, "alternate content container for non frame-based rendering " }, +{ "noscript", 0, 0, 0, 0, 0, 0, "alternate content container for non script-based rendering " }, +{ "object", 0, 0, 0, 0, 0, 0, "generic embedded object " }, +{ "ol", 0, 0, 0, 0, 0, 0, "ordered list " }, +{ "optgroup", 0, 0, 0, 0, 0, 0, "option group " }, +{ "option", 0, 1, 0, 0, 0, 0, "selectable choice " }, +{ "p", 0, 1, 1, 0, 0, 0, "paragraph " }, +{ "param", 0, 2, 2, 1, 0, 0, "named property value " }, +{ "pre", 0, 0, 0, 0, 0, 0, "preformatted text " }, +{ "q", 0, 0, 0, 0, 0, 0, "short inline quotation " }, +{ "s", 0, 0, 0, 0, 1, 1, "strike-through text style" }, +{ "samp", 0, 0, 0, 0, 0, 0, "sample program output, scripts, etc." }, +{ "script", 0, 0, 0, 0, 0, 0, "script statements " }, +{ "select", 0, 0, 0, 0, 0, 0, "option selector " }, +{ "small", 0, 0, 0, 0, 0, 0, "small text style" }, +{ "span", 0, 0, 0, 0, 0, 0, "generic language/style container " }, +{ "strike", 0, 0, 0, 0, 1, 1, "strike-through text" }, +{ "strong", 0, 0, 0, 0, 0, 0, "strong emphasis" }, +{ "style", 0, 0, 0, 0, 0, 0, "style info " }, +{ "sub", 0, 0, 0, 0, 0, 0, "subscript" }, +{ "sup", 0, 0, 0, 0, 0, 0, "superscript " }, +{ "table", 0, 0, 0, 0, 0, 0, " " }, +{ "tbody", 1, 0, 0, 0, 0, 0, "table body " }, +{ "td", 0, 0, 0, 0, 0, 0, "table data cell" }, +{ "textarea", 0, 0, 0, 0, 0, 0, "multi-line text field " }, +{ "tfoot", 0, 1, 0, 0, 0, 0, "table footer " }, +{ "th", 0, 1, 0, 0, 0, 0, "table header cell" }, +{ "thead", 0, 1, 0, 0, 0, 0, "table header " }, +{ "title", 0, 0, 0, 0, 0, 0, "document title " }, +{ "tr", 0, 1, 0, 0, 0, 0, "table row " }, +{ "tt", 0, 0, 0, 0, 0, 0, "teletype or monospaced text style" }, +{ "u", 0, 0, 0, 0, 1, 1, "underlined text style" }, +{ "ul", 0, 0, 0, 0, 0, 0, "unordered list " }, +{ "var", 0, 0, 0, 0, 0, 0, "instance of a variable or program argument" }, }; /* diff --git a/HTMLparser.h b/HTMLparser.h index b70c0a12..c79ad09c 100644 --- a/HTMLparser.h +++ b/HTMLparser.h @@ -34,11 +34,12 @@ typedef struct _htmlElemDesc htmlElemDesc; typedef htmlElemDesc *htmlElemDescPtr; struct _htmlElemDesc { const char *name; /* The tag name */ - int startTag; /* Whether the start tag can be implied */ - int endTag; /* Whether the end tag can be implied */ - int empty; /* Is this an empty element ? */ - int depr; /* Is this a deprecated element ? */ - int dtd; /* 1: only in Loose DTD, 2: only Frameset one */ + char startTag; /* Whether the start tag can be implied */ + char endTag; /* Whether the end tag can be implied */ + char saveEndTag; /* Whether the end tag should be saved */ + char empty; /* Is this an empty element ? */ + char depr; /* Is this a deprecated element ? */ + char dtd; /* 1: only in Loose DTD, 2: only Frameset one */ const char *desc; /* the description */ }; diff --git a/HTMLtree.c b/HTMLtree.c index 081ab129..5dce1744 100644 --- a/HTMLtree.c +++ b/HTMLtree.c @@ -871,7 +871,7 @@ htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, const return; } if ((cur->content == NULL) && (cur->children == NULL)) { - if ((info != NULL) && (info->endTag != 0) && + if ((info != NULL) && (info->saveEndTag != 0) && (strcmp(info->name, "html")) && (strcmp(info->name, "body"))) { xmlOutputBufferWriteString(buf, ">"); } else { diff --git a/include/libxml/HTMLparser.h b/include/libxml/HTMLparser.h index b70c0a12..c79ad09c 100644 --- a/include/libxml/HTMLparser.h +++ b/include/libxml/HTMLparser.h @@ -34,11 +34,12 @@ typedef struct _htmlElemDesc htmlElemDesc; typedef htmlElemDesc *htmlElemDescPtr; struct _htmlElemDesc { const char *name; /* The tag name */ - int startTag; /* Whether the start tag can be implied */ - int endTag; /* Whether the end tag can be implied */ - int empty; /* Is this an empty element ? */ - int depr; /* Is this a deprecated element ? */ - int dtd; /* 1: only in Loose DTD, 2: only Frameset one */ + char startTag; /* Whether the start tag can be implied */ + char endTag; /* Whether the end tag can be implied */ + char saveEndTag; /* Whether the end tag should be saved */ + char empty; /* Is this an empty element ? */ + char depr; /* Is this a deprecated element ? */ + char dtd; /* 1: only in Loose DTD, 2: only Frameset one */ const char *desc; /* the description */ }; diff --git a/result/HTML/doc3.htm.err b/result/HTML/doc3.htm.err index 73860c8c..46aae6f0 100644 --- a/result/HTML/doc3.htm.err +++ b/result/HTML/doc3.htm.err @@ -70,6 +70,12 @@ om/ad_static.asp?pid=2097&sid=1881&asid=7708">Code:BP6-hd

Code:BP6-hd

Code:BP6-hd

Code:BP6-hd

- @@ -625,7 +625,7 @@ Contruction workers in Berlin opened an old wound in the German psyche this week

-
+
+

diff --git a/result/XPath/tests/chaptersprefol b/result/XPath/tests/chaptersprefol index 6222eed9..a12c937c 100644 --- a/result/XPath/tests/chaptersprefol +++ b/result/XPath/tests/chaptersprefol @@ -23,25 +23,25 @@ Set contains 0 nodes: Expression: /child::EXAMPLE/child::chapter[3]/preceding::* Object is a Node Set : Set contains 10 nodes: -1 ELEMENT p +1 ELEMENT head 2 ELEMENT title 3 ELEMENT chapter ATTRIBUTE id TEXT - content=chapter2 -4 ELEMENT p -5 ELEMENT image + content=chapter1 +4 ELEMENT title +5 ELEMENT p +6 ELEMENT image ATTRIBUTE href TEXT content=linus.gif -6 ELEMENT p -7 ELEMENT title +7 ELEMENT p 8 ELEMENT chapter ATTRIBUTE id TEXT - content=chapter1 + content=chapter2 9 ELEMENT title -10 ELEMENT head +10 ELEMENT p ======================== Expression: /child::EXAMPLE/child::chapter[3]/following::* @@ -64,10 +64,10 @@ Set contains 6 nodes: Expression: /child::EXAMPLE/child::chapter[1]/image/preceding::* Object is a Node Set : Set contains 4 nodes: -1 ELEMENT p +1 ELEMENT head 2 ELEMENT title 3 ELEMENT title -4 ELEMENT head +4 ELEMENT p ======================== Expression: /child::EXAMPLE/child::chapter[1]/image/following::* diff --git a/xpath.c b/xpath.c index 619ec0e2..b8e9460c 100644 --- a/xpath.c +++ b/xpath.c @@ -584,6 +584,31 @@ xmlXPathCmpNodes(xmlNodePtr node1, xmlNodePtr node2) { return(-1); /* assume there is no sibling list corruption */ } +/** + * xmlXPathNodeSetSort: + * @set: the node set + * + * Sort the node set in document order + */ +void +xmlXPathNodeSetSort(xmlNodeSetPtr set) { + int i, j; + xmlNodePtr tmp; + + if (set == NULL) + return; + + for (i = 0;i < set->nodeNr -1;i++) { + for (j = i + 1; j < set->nodeNr; j++) { + if (xmlXPathCmpNodes(set->nodeTab[i], set->nodeTab[j]) == -1) { + tmp = set->nodeTab[i]; + set->nodeTab[i] = set->nodeTab[j]; + set->nodeTab[j] = tmp; + } + } + } +} + #define XML_NODESET_DEFAULT 10 /** * xmlXPathNodeSetCreate: @@ -5228,12 +5253,14 @@ xmlXPathEvalPathExpr(xmlXPathParserContextPtr ctxt) { void xmlXPathEvalUnionExpr(xmlXPathParserContextPtr ctxt) { + int sort = 0; xmlXPathEvalPathExpr(ctxt); CHECK_ERROR; SKIP_BLANKS; while (CUR == '|') { xmlXPathObjectPtr obj1,obj2, tmp; + sort = 1; CHECK_TYPE(XPATH_NODESET); obj1 = valuePop(ctxt); tmp = xmlXPathNewNodeSet(ctxt->context->node); @@ -5255,6 +5282,8 @@ xmlXPathEvalUnionExpr(xmlXPathParserContextPtr ctxt) { xmlXPathFreeObject(obj2); SKIP_BLANKS; } + if (sort) { + } } /** @@ -5515,6 +5544,9 @@ xmlXPathEvalExpr(xmlXPathParserContextPtr ctxt) { xmlXPathFreeObject(arg2); SKIP_BLANKS; } + if ((ctxt->value != NULL) && (ctxt->value->type == XPATH_NODESET) && + (ctxt->value->nodesetval != NULL)) + xmlXPathNodeSetSort(ctxt->value->nodesetval); } /** @@ -5581,6 +5613,7 @@ xmlXPathEvalPredicate(xmlXPathParserContextPtr ctxt) { xmlXPathObjectPtr obj, tmp; xmlNodeSetPtr newset = NULL; xmlNodeSetPtr oldset; + xmlNodePtr oldnode; int i; SKIP_BLANKS; @@ -5598,6 +5631,7 @@ xmlXPathEvalPredicate(xmlXPathParserContextPtr ctxt) { CHECK_TYPE(XPATH_NODESET); obj = valuePop(ctxt); oldset = obj->nodesetval; + oldnode = ctxt->context->node; ctxt->context->node = NULL; if ((oldset == NULL) || (oldset->nodeNr == 0)) { @@ -5675,6 +5709,7 @@ xmlXPathEvalPredicate(xmlXPathParserContextPtr ctxt) { xmlGenericErrorContextNodeSet(xmlGenericErrorContext, ctxt->value->nodesetval); #endif + ctxt->context->node = oldnode; } /**