From 944b5ff4f6187ff169dbe55043ed76c4955b7062 Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Wed, 15 Dec 1999 19:08:24 +0000 Subject: [PATCH] A posteriori validation should now work, Added --postvalid to tester program Added xmlDocGetRootElement() to the API, Daniel --- ChangeLog | 9 + doc/html/gnome-xml-entities.html | 52 ++-- doc/html/gnome-xml-htmlparser.html | 44 +-- doc/html/gnome-xml-htmltree.html | 20 +- doc/html/gnome-xml-nanohttp.html | 22 +- doc/html/gnome-xml-parserinternals.html | 182 ++++++------- doc/html/gnome-xml-tree.html | 342 +++++++++++++++--------- doc/html/gnome-xml-valid.html | 124 ++++----- doc/html/gnome-xml-xml-error.html | 22 +- doc/html/gnome-xml-xmlmemory.html | 38 +-- doc/html/gnome-xml-xpath.html | 38 +-- doc/html/index.sgml | 1 + doc/html/libxml.html | 269 ------------------- doc/xml.html | Bin 35003 -> 35284 bytes include/libxml/parserInternals.h | 12 +- parserInternals.h | 12 +- tester.c | 126 ++------- tree.c | 23 ++ valid.c | 122 ++++++++- 19 files changed, 658 insertions(+), 800 deletions(-) delete mode 100644 doc/html/libxml.html diff --git a/ChangeLog b/ChangeLog index f27c8d1f..05a1c558 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +Wed Dec 15 19:22:23 CET 1999 Daniel Veillard + + * valid.c: debugging a posteriori validation, except URI expansion + stuff this should be fixed now + * parserInternals.h: fixed a bug in IS_BASECHAR reported by + Carl Nygard + * tester.c: added --postvalid, cleaning of the code + * tree.[ch]: added xmlDocGetRootElement() + Tue Dec 14 20:30:34 PST 1999 Ramiro Estrugo * SAX.h, tree.h : changed 'namespace' to 'nameSpace' to workaround diff --git a/doc/html/gnome-xml-entities.html b/doc/html/gnome-xml-entities.html index 532fe281..0e6e2575 100644 --- a/doc/html/gnome-xml-entities.html +++ b/doc/html/gnome-xml-entities.html @@ -115,7 +115,7 @@ SIZE="3" >

Name

Synopsis

Description

Details






















Name

Synopsis

Description

Details


















Name

Synopsis

Description

Details






Name

Synopsis

Description

Details







Name

Synopsis

Description

Details























































































Name

Synopsis

xmlNodePtr xmlDocGetRootElement (xmlDocPtr doc); +xmlNodePtr xmlGetLastChild (xmlChar *namespace); +> *nameSpace); xmlNodePtr

Description

Details














































































xmlDocGetRootElement ()

xmlNodePtr  xmlDocGetRootElement            (xmlDocPtr doc);

Get the root element of the document (doc->root is a list +containing possibly comments, PIs, etc ...).

doc : the document
Returns :the xmlNodePtr for the root or NULL



















xmlChar *namespace); *nameSpace);namespacenameSpace :























Name

Synopsis

Description

Details













































Try to validate the dtd instance

Try to validate the document against the dtd instance

basically it does check all the definitions in the DtD.













Name

Synopsis

Description

Details







Name

Synopsis

Description

Details















Name

Synopsis

Description

Details















+ diff --git a/doc/html/libxml.html b/doc/html/libxml.html deleted file mode 100644 index e66d6f79..00000000 --- a/doc/html/libxml.html +++ /dev/null @@ -1,269 +0,0 @@ -Gnome XML Library
Gnome XML Library Reference Manual
<<< Previous PageHome Next Page >>>

Gnome XML Library

Table of Contents
parser
tree
entities
valid
xml-error
HTMLparser
HTMLtree
xpath
parserInternals
xmlmemory
nanohttp


<<< Previous PageHome Next Page >>>
Gnome XML Library Reference Manualparser
\ No newline at end of file diff --git a/doc/xml.html b/doc/xml.html index e0e286fc141b55e3bf85feaccbccf05457ead57d..0e74ab79f3197d1ff110b2665452fcc6eb412af5 100644 GIT binary patch delta 333 zcmZusu}T9$6vQZm95GEIVk3iCD5z(Q&4rLc#X|gvy}Ngp2bgK#@F#>c zf`z@6g`ePuxQC5}@ZinByqS4#kF77?dWk!I*gCbD;gsMs+>C9sy68Nww;P&LwEFJe zv^OwD=0AHoezV{6?@{+!={X%Q=Q6&W302aqF88LtQ0j6Lizy106cyUQkdX2YBFkzc z93iJBuX3!FKN&3UC$Z#d1Wdvzcu6?~Ds3GZ)`qbVl5^3haqtI=A_!0kV_B6)2s!B; z>V`lBGvLbQP};#(lySkqmR0zVf|N^ELL%2$J*Qpe4*#o=)@5_y*S7ns*TIL~8ey0u UNjy9YgW+g=IvSrR{^?==2T0RzV*mgE delta 82 zcmcaInQ8Y#rVaX>!d%6rNyW)UnMtV%CHV>^8JWcjo1HjkGqZC6xj@0qPk6l8C$APM k+q_rwUa_c=f}y3QrM{t|zM-Xpp}CcbnU#^*= 0x41) && ((c) <= 0x5a)) || \ - (((c) >= 0x61) && ((c) <= 0x7a)) || \ - (((c) >= 0xaa) && ((c) <= 0x5b)) || \ - (((c) >= 0xc0) && ((c) <= 0xd6)) || \ - (((c) >= 0xd8) && ((c) <= 0xf6)) || \ - (((c) >= 0xf8) && ((c) <= 0xff)) || \ - ((c) == 0xba)) + ((((c) >= 0x0041) && ((c) <= 0x005A)) || \ + (((c) >= 0x0061) && ((c) <= 0x007A)) || \ + (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \ + (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \ + (((c) >= 0x00F8) && ((c) <= 0x00FF))) /* * [88] Digit ::= ... long list see REC ... diff --git a/parserInternals.h b/parserInternals.h index 1ce25b07..9da48462 100644 --- a/parserInternals.h +++ b/parserInternals.h @@ -443,13 +443,11 @@ typedef unsigned char CHARVAL; * [85] BaseChar ::= ... long list see REC ... */ #define IS_BASECHAR(c) \ - ((((c) >= 0x41) && ((c) <= 0x5a)) || \ - (((c) >= 0x61) && ((c) <= 0x7a)) || \ - (((c) >= 0xaa) && ((c) <= 0x5b)) || \ - (((c) >= 0xc0) && ((c) <= 0xd6)) || \ - (((c) >= 0xd8) && ((c) <= 0xf6)) || \ - (((c) >= 0xf8) && ((c) <= 0xff)) || \ - ((c) == 0xba)) + ((((c) >= 0x0041) && ((c) <= 0x005A)) || \ + (((c) >= 0x0061) && ((c) <= 0x007A)) || \ + (((c) >= 0x00C0) && ((c) <= 0x00D6)) || \ + (((c) >= 0x00D8) && ((c) <= 0x00F6)) || \ + (((c) >= 0x00F8) && ((c) <= 0x00FF))) /* * [88] Digit ::= ... long list see REC ... diff --git a/tester.c b/tester.c index 29ae6b3c..11876275 100644 --- a/tester.c +++ b/tester.c @@ -44,84 +44,13 @@ static int recovery = 0; static int noent = 0; static int noout = 0; static int valid = 0; +static int postvalid = 0; static int repeat = 0; static int insert = 0; static int compress = 0; extern int xmlDoValidityCheckingDefaultValue; -/* - * Note: there is a couple of errors introduced on purpose. -static xmlChar buffer[] = -"\n\ -\n\ -\n\ -\n\ -\n\ - \n\ - \n\ - Jim Whitehead\n\ - Roy Fielding\n\ - \n\ - \n\ - \n\ - \n\ - \n\ - \n\ -\n\ -\n\ -"; - */ - -/************************************************************************ - * * - * Debug * - * * - ************************************************************************/ - -int treeTest(void) { - xmlDocPtr doc, tmp; - xmlNodePtr tree, subtree; - - /* - * build a fake XML document - */ - doc = xmlNewDoc(BAD_CAST "1.0"); - doc->root = xmlNewDocNode(doc, NULL, BAD_CAST "EXAMPLE", NULL); - xmlSetProp(doc->root, BAD_CAST "prop1", BAD_CAST "gnome is great"); - xmlSetProp(doc->root, BAD_CAST "prop2", BAD_CAST "&linux; too"); - xmlSetProp(doc->root, BAD_CAST "emptyprop", BAD_CAST ""); - tree = xmlNewChild(doc->root, NULL, BAD_CAST "head", NULL); - subtree = xmlNewChild(tree, NULL, BAD_CAST "title", - BAD_CAST "Welcome to Gnome"); - tree = xmlNewChild(doc->root, NULL, BAD_CAST "chapter", NULL); - subtree = xmlNewChild(tree, NULL, BAD_CAST "title", - BAD_CAST "The Linux adventure"); - subtree = xmlNewChild(tree, NULL, BAD_CAST "p", BAD_CAST "bla bla bla ..."); - subtree = xmlNewChild(tree, NULL, BAD_CAST "image", NULL); - xmlSetProp(subtree, BAD_CAST "href", BAD_CAST "linus.gif"); - - /* - * test intermediate copy if needed. - */ - if (copy) { - tmp = doc; - doc = xmlCopyDoc(doc, 1); - xmlFreeDoc(tmp); - } - - /* - * print it. - */ - if (noout == 0) - xmlDocDump(stdout, doc); - - /* - * free it. - */ - xmlFreeDoc(doc); - return(0); -} void parseAndPrintFile(char *filename) { xmlDocPtr doc, tmp; @@ -178,47 +107,18 @@ void parseAndPrintFile(char *filename) { } else xmlDebugDumpDocument(stdout, doc); } - if (debugent) - xmlDebugDumpEntities(stdout, doc); /* - * free it. + * A posteriori validation test */ - xmlFreeDoc(doc); -} - -void parseAndPrintBuffer(xmlChar *buf) { - xmlDocPtr doc, tmp; - - /* - * build an XML tree from a string; - */ - if (recovery) - doc = xmlRecoverDoc(buf); - else - doc = xmlParseDoc(buf); - - /* - * test intermediate copy if needed. - */ - if (copy) { - tmp = doc; - doc = xmlCopyDoc(doc, 1); - xmlFreeDoc(tmp); + if (postvalid) { + xmlValidCtxt cvp; + cvp.userData = (void *) stderr; cvp.error = (xmlValidityErrorFunc) fprintf; cvp.warning = (xmlValidityWarningFunc) fprintf; + xmlValidateDocument(&cvp, doc); } - /* - * print it. - */ - if (!debug) { - if (compress) - xmlSaveFile("-", doc); - else - xmlDocDump(stdout, doc); - } else - xmlDebugDumpDocument(stdout, doc); if (debugent) - xmlDebugDumpEntities(stdout, doc); + xmlDebugDumpEntities(stdout, doc); /* * free it. @@ -249,6 +149,9 @@ int main(int argc, char **argv) { else if ((!strcmp(argv[i], "-valid")) || (!strcmp(argv[i], "--valid"))) valid++; + else if ((!strcmp(argv[i], "-postvalid")) || + (!strcmp(argv[i], "--postvalid"))) + postvalid++; else if ((!strcmp(argv[i], "-insert")) || (!strcmp(argv[i], "--insert"))) insert++; @@ -280,12 +183,13 @@ int main(int argc, char **argv) { printf("\t--debug : dump a debug tree of the in-memory document\n"); printf("\t--debugent : debug the entities defined in the document\n"); printf("\t--copy : used to test the internal copy implementation\n"); - printf("\t--recover : output what is parsable on broken XmL documents\n"); + printf("\t--recover : output what was parsable on broken XML documents\n"); printf("\t--noent : substitute entity references by their value\n"); - printf("\t--noout : don't output the result\n"); + printf("\t--noout : don't output the result tree\n"); printf("\t--valid : validate the document in addition to std well-formed check\n"); - printf("\t--repeat : parse the file 100 times, for timing or profiling\n"); - printf("\t--insert : test for valid insertions\n"); + printf("\t--postvalid : do a posteriori validation, i.e after parsing\n"); + printf("\t--repeat : repeat 100 times, for timing or profiling\n"); + printf("\t--insert : ad-hoc test for valid insertions\n"); printf("\t--compress : turn on gzip compression of output\n"); } xmlCleanupParser(); diff --git a/tree.c b/tree.c index 5451f532..a920d400 100644 --- a/tree.c +++ b/tree.c @@ -2155,6 +2155,29 @@ xmlCopyDoc(xmlDocPtr doc, int recursive) { * * ************************************************************************/ +/** + * xmlDocGetRootElement: + * @doc: the document + * + * Get the root element of the document (doc->root is a list + * containing possibly comments, PIs, etc ...). + * + * Returns the xmlNodePtr for the root or NULL + */ +xmlNodePtr +xmlDocGetRootElement(xmlDocPtr doc) { + xmlNodePtr ret; + + if (doc == NULL) return(NULL); + ret = doc->root; + while (ret != NULL) { + if (ret->type == XML_ELEMENT_NODE) + return(ret); + ret = ret->next; + } + return(ret); +} + /** * xmlNodeSetLang: * @cur: the node being changed diff --git a/valid.c b/valid.c index bc6d2ab9..1efdbd06 100644 --- a/valid.c +++ b/valid.c @@ -2730,7 +2730,69 @@ xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, CHECK_DTD; - if ((elem == NULL) || (elem->name == NULL)) return(0); + if (elem == NULL) return(0); + if (elem->type == XML_TEXT_NODE) { + } + switch (elem->type) { + case XML_ATTRIBUTE_NODE: + VERROR(ctxt->userData, + "Attribute element not expected here\n"); + return(0); + case XML_TEXT_NODE: + if (elem->childs != NULL) { + VERROR(ctxt->userData, "Text element has childs !\n"); + return(0); + } + if (elem->properties != NULL) { + VERROR(ctxt->userData, "Text element has attributes !\n"); + return(0); + } + if (elem->ns != NULL) { + VERROR(ctxt->userData, "Text element has namespace !\n"); + return(0); + } + if (elem->ns != NULL) { + VERROR(ctxt->userData, + "Text element carries namespace definitions !\n"); + return(0); + } + if (elem->content == NULL) { + VERROR(ctxt->userData, + "Text element has no content !\n"); + return(0); + } + return(1); + case XML_CDATA_SECTION_NODE: + case XML_ENTITY_REF_NODE: + case XML_PI_NODE: + case XML_COMMENT_NODE: + return(1); + case XML_ENTITY_NODE: + VERROR(ctxt->userData, + "Entity element not expected here\n"); + return(0); + case XML_NOTATION_NODE: + VERROR(ctxt->userData, + "Notation element not expected here\n"); + return(0); + case XML_DOCUMENT_NODE: + case XML_DOCUMENT_TYPE_NODE: + case XML_DOCUMENT_FRAG_NODE: + VERROR(ctxt->userData, + "Document element not expected here\n"); + return(0); + case XML_HTML_DOCUMENT_NODE: + VERROR(ctxt->userData, + "\n"); + return(0); + case XML_ELEMENT_NODE: + break; + default: + VERROR(ctxt->userData, + "unknown element type %d\n", elem->type); + return(0); + } + if (elem->name == NULL) return(0); elemDecl = xmlGetDtdElementDesc(doc->intSubset, elem->name); if ((elemDecl == NULL) && (doc->extSubset != NULL)) @@ -2828,6 +2890,7 @@ xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, int xmlValidateRoot(xmlValidCtxtPtr ctxt, xmlDocPtr doc) { + xmlNodePtr root; if (doc == NULL) return(0); if ((doc->intSubset == NULL) || @@ -2835,14 +2898,15 @@ xmlValidateRoot(xmlValidCtxtPtr ctxt, xmlDocPtr doc) { VERROR(ctxt->userData, "Not valid: no DtD found\n"); return(0); } - if ((doc->root == NULL) || (doc->root->name == NULL)) { + root = xmlDocGetRootElement(doc); + if ((root == NULL) || (root->name == NULL)) { VERROR(ctxt->userData, "Not valid: no root element\n"); return(0); } - if (xmlStrcmp(doc->intSubset->name, doc->root->name)) { + if (xmlStrcmp(doc->intSubset->name, root->name)) { VERROR(ctxt->userData, - "Not valid: root and DtD name do not match %s and %s\n", - doc->root->name, doc->intSubset->name); + "Not valid: root and DtD name do not match '%s' and '%s'\n", + root->name, doc->intSubset->name); return(0); } return(1); @@ -2876,7 +2940,7 @@ xmlValidateElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr elem) { value = xmlNodeListGetString(doc, attr->val, 0); ret &= xmlValidateOneAttribute(ctxt, doc, elem, attr, value); if (value != NULL) - free(value); + xmlFree(value); attr= attr->next; } child = elem->childs; @@ -2937,7 +3001,7 @@ xmlValidateDocumentFinal(xmlValidCtxtPtr ctxt, xmlDocPtr doc) { * @doc: a document instance * @dtd: a dtd instance * - * Try to validate the dtd instance + * Try to validate the document against the dtd instance * * basically it does check all the definitions in the DtD. * @@ -2946,8 +3010,24 @@ xmlValidateDocumentFinal(xmlValidCtxtPtr ctxt, xmlDocPtr doc) { int xmlValidateDtd(xmlValidCtxtPtr ctxt, xmlDocPtr doc, xmlDtdPtr dtd) { - /* TODO xmlValidateDtd */ - return(1); + int ret; + xmlDtdPtr oldExt; + xmlNodePtr root; + + if (dtd == NULL) return(0); + if (doc == NULL) return(0); + oldExt = doc->extSubset; + doc->extSubset = dtd; + ret = xmlValidateRoot(ctxt, doc); + if (ret == 0) { + doc->extSubset = oldExt; + return(ret); + } + root = xmlDocGetRootElement(doc); + ret = xmlValidateElement(ctxt, doc, root); + ret &= xmlValidateDocumentFinal(ctxt, doc); + doc->extSubset = oldExt; + return(ret); } /** @@ -2967,10 +3047,32 @@ xmlValidateDtd(xmlValidCtxtPtr ctxt, xmlDocPtr doc, xmlDtdPtr dtd) { int xmlValidateDocument(xmlValidCtxtPtr ctxt, xmlDocPtr doc) { int ret; + xmlNodePtr root; + + if ((doc->intSubset == NULL) && (doc->extSubset == NULL)) + return(0); + if ((doc->intSubset != NULL) && ((doc->intSubset->SystemID != NULL) || + (doc->intSubset->ExternalID != NULL)) && (doc->extSubset == NULL)) { + doc->extSubset = xmlParseDTD(doc->intSubset->ExternalID, + doc->intSubset->SystemID); + if (doc->extSubset == NULL) { + if (doc->intSubset->SystemID != NULL) { + VERROR(ctxt->userData, + "Could not load the external subset '%s'\n", + doc->intSubset->SystemID); + } else { + VERROR(ctxt->userData, + "Could not load the external subset '%s'\n", + doc->intSubset->ExternalID); + } + return(0); + } + } if (!xmlValidateRoot(ctxt, doc)) return(0); - ret = xmlValidateElement(ctxt, doc, doc->root); + root = xmlDocGetRootElement(doc); + ret = xmlValidateElement(ctxt, doc, root); ret &= xmlValidateDocumentFinal(ctxt, doc); return(ret); }