diff --git a/ChangeLog b/ChangeLog index c355fc9d..c82ea6f5 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +Wed Apr 16 01:28:15 CEST 2003 Daniel Veillard + + * relaxng.c xmlreader.c xmllint.c include/libxml/relaxng.h + include/libxml/xmlreader.h: implemented streaming of + RelaxNG (when possible) on top of the xmlReader interface, + provided it as xmllint --stream --relaxng .rng .xml + This seems to mostly work. + * Makefile.am: updated to test RelaxNG streaming + Mon Apr 14 18:08:33 CEST 2003 Daniel Veillard * relaxng.c include/libxml/relaxng.h: integrated the regexp diff --git a/Makefile.am b/Makefile.am index cb482f6b..216af1b7 100644 --- a/Makefile.am +++ b/Makefile.am @@ -733,6 +733,33 @@ Relaxtests: xmllint$(EXEEXT) rm res.$$name err.$$name ; \ fi ; fi ; \ done; done) + @echo "##" + @echo "## Relax-NG streaming regression tests" + @echo "##" + -@(for i in $(srcdir)/test/relaxng/*.rng ; do \ + name=`basename $$i | sed 's+\.rng++'`; \ + for j in $(srcdir)/test/relaxng/"$$name"_*.xml ; do \ + if [ -f $$j ] ; then \ + xno=`basename $$j | sed 's+.*_\(.*\).xml+\1+'`; \ + if [ ! -f $(srcdir)/result/relaxng/"$$name"_"$$xno" ]; \ + then \ + echo New test file "$$name"_"$$xno" ; \ + $(CHECKER) $(top_builddir)/xmllint$(EXEEXT) --noout --relaxng $$i $$j \ + > $(srcdir)/result/relaxng/"$$name"_"$$xno" \ + 2> $(srcdir)/result/relaxng/"$$name"_"$$xno".err; \ + else \ + echo Testing "$$name"_"$$xno" ; \ + $(CHECKER) $(top_builddir)/xmllint$(EXEEXT) --noout --stream --relaxng $$i $$j \ + > res.$$name 2> err.$$name;\ + grep "MORY ALLO" .memdump | grep -v "MEMORY ALLOCATED : 0";\ + diff $(srcdir)/result/relaxng/"$$name"_"$$xno" \ + res.$$name;\ + diff $(srcdir)/result/relaxng/"$$name"_"$$xno".err \ + err.$$name | grep -v "error detected at";\ + grep Unimplemented err.$$name; \ + rm res.$$name err.$$name ; \ + fi ; fi ; \ + done; done) dist-hook: libxml2.spec -cp libxml2.spec $(distdir) diff --git a/include/libxml/relaxng.h b/include/libxml/relaxng.h index 3e8de7ab..a21f92c2 100644 --- a/include/libxml/relaxng.h +++ b/include/libxml/relaxng.h @@ -104,4 +104,19 @@ void xmlRelaxNGFreeValidCtxt (xmlRelaxNGValidCtxtPtr ctxt); int xmlRelaxNGValidateDoc (xmlRelaxNGValidCtxtPtr ctxt, xmlDocPtr doc); void xmlRelaxNGCleanupTypes (void); +/* + * Interfaces for progressive validation when possible + */ +int xmlRelaxNGValidatePushElement (xmlRelaxNGValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem); +int xmlRelaxNGValidatePushCData (xmlRelaxNGValidCtxtPtr ctxt, + const xmlChar *data, + int len); +int xmlRelaxNGValidatePopElement (xmlRelaxNGValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem); +int xmlRelaxNGValidateFullElement (xmlRelaxNGValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem); #endif /* __XML_RELAX_NG__ */ diff --git a/include/libxml/xmlreader.h b/include/libxml/xmlreader.h index de980616..1ced8b85 100644 --- a/include/libxml/xmlreader.h +++ b/include/libxml/xmlreader.h @@ -109,6 +109,10 @@ xmlDocPtr xmlTextReaderCurrentDoc (xmlTextReaderPtr reader); xmlNodePtr xmlTextReaderExpand (xmlTextReaderPtr reader); int xmlTextReaderNext (xmlTextReaderPtr reader); int xmlTextReaderIsValid (xmlTextReaderPtr reader); +#ifdef LIBXML_SCHEMAS_ENABLED +int xmlTextReaderRelaxNGValidate (xmlTextReaderPtr reader, + const char *rng); +#endif /* * Error handling extensions diff --git a/relaxng.c b/relaxng.c index 4a00024d..cb3c130c 100644 --- a/relaxng.c +++ b/relaxng.c @@ -57,8 +57,8 @@ static const xmlChar *xmlRelaxNGNs = (const xmlChar *) /* #define DEBUG_INCLUDE */ /* #define DEBUG_ERROR 1 */ /* #define DEBUG_COMPILE 1 */ +/* #define DEBUG_PROGRESSIVE 1 */ -#define UNBOUNDED (1 << 30) #define MAX_ERROR 5 #define TODO \ @@ -356,6 +356,17 @@ struct _xmlRelaxNGValidCtxt { int freeStatesNr; int freeStatesMax; xmlRelaxNGStatesPtr *freeStates; /* the pool of free state groups */ + + /* + * This is used for "progressive" validation + */ + xmlRegExecCtxtPtr elem; /* the current element regexp */ + int elemNr; /* the number of element validated */ + int elemMax; /* the max depth of elements */ + xmlRegExecCtxtPtr *elemTab; /* the stack of regexp runtime */ + int pstate; /* progressive state */ + xmlNodePtr pnode; /* the current node */ + xmlRelaxNGDefinePtr pdef; /* the non-streamable definition */ }; /** @@ -7095,8 +7106,19 @@ xmlRelaxNGParse(xmlRelaxNGParserCtxtPtr ctxt) /* * try to compile (parts of) the schemas */ - if (ctxt->grammar != NULL) + if ((ctxt->grammar != NULL) && (ctxt->grammar->start != NULL)) { + if (ctxt->grammar->start->type != XML_RELAXNG_START) { + xmlRelaxNGDefinePtr def; + + def = xmlRelaxNGNewDefine(ctxt, NULL); + if (def != NULL) { + def->type = XML_RELAXNG_START; + def->content = ctxt->grammar->start; + ctxt->grammar->start = def; + } + } xmlRelaxNGTryCompile(ctxt, ctxt->grammar->start); + } /* * Transfer the pointer for cleanup at the schema level. @@ -7485,7 +7507,7 @@ xmlRelaxNGValidateCompiledContent(xmlRelaxNGValidCtxtPtr ctxt, ctxt->state->seq = NULL; } else if (ret == 0) { /* - * TODO: get soem of the names needed to exit the current state of exec + * TODO: get some of the names needed to exit the current state of exec */ VALID_ERR2(XML_RELAXNG_ERR_NOELEM, BAD_CAST ""); ret = -1; @@ -7503,6 +7525,381 @@ xmlRelaxNGValidateCompiledContent(xmlRelaxNGValidCtxtPtr ctxt, * Progressive validation of when possible * * * ************************************************************************/ +static int xmlRelaxNGValidateAttributeList(xmlRelaxNGValidCtxtPtr ctxt, + xmlRelaxNGDefinePtr defines); +static int xmlRelaxNGValidateElementEnd(xmlRelaxNGValidCtxtPtr ctxt); + +/** + * xmlRelaxNGElemPush: + * @ctxt: the validation context + * @exec: the regexp runtime for the new content model + * + * Push a new regexp for the current node content model on the stack + * + * Returns 0 in case of success and -1 in case of error. + */ +static int +xmlRelaxNGElemPush(xmlRelaxNGValidCtxtPtr ctxt, xmlRegExecCtxtPtr exec) { + if (ctxt->elemTab == NULL) { + ctxt->elemMax = 10; + ctxt->elemTab = (xmlRegExecCtxtPtr *) xmlMalloc(ctxt->elemMax * + sizeof(xmlRegExecCtxtPtr)); + if (ctxt->elemTab == NULL) { + VALID_ERR(XML_RELAXNG_ERR_MEMORY); + return(-1); + } + } + if (ctxt->elemNr >= ctxt->elemMax) { + ctxt->elemMax *= 2; + ctxt->elemTab = (xmlRegExecCtxtPtr *) xmlRealloc(ctxt->elemTab, + ctxt->elemMax * sizeof(xmlRegExecCtxtPtr)); + if (ctxt->elemTab == NULL) { + VALID_ERR(XML_RELAXNG_ERR_MEMORY); + return(-1); + } + } + ctxt->elemTab[ctxt->elemNr++] = exec; + ctxt->elem = exec; + return(0); +} + +/** + * xmlRelaxNGElemPop: + * @ctxt: the validation context + * + * Pop the regexp of the current node content model from the stack + * + * Returns the exec or NULL if empty + */ +static xmlRegExecCtxtPtr +xmlRelaxNGElemPop(xmlRelaxNGValidCtxtPtr ctxt) { + xmlRegExecCtxtPtr ret; + + if (ctxt->elemNr <= 0) return(NULL); + ctxt->elemNr--; + ret = ctxt->elemTab[ctxt->elemNr]; + ctxt->elemTab[ctxt->elemNr] = NULL; + if (ctxt->elemNr > 0) + ctxt->elem = ctxt->elemTab[ctxt->elemNr - 1]; + else + ctxt->elem = NULL; + return(ret); +} + +/** + * xmlRelaxNGValidateProgressiveCallback: + * @exec: the regular expression instance + * @token: the token which matched + * @transdata: callback data, the define for the subelement if available + @ @inputdata: callback data, the Relax NG validation context + * + * Handle the callback and if needed validate the element children. + * some of the in/out informations are passed via the context in @inputdata. + */ +static void +xmlRelaxNGValidateProgressiveCallback(xmlRegExecCtxtPtr exec ATTRIBUTE_UNUSED, + const xmlChar *token, + void *transdata, + void *inputdata) { + xmlRelaxNGValidCtxtPtr ctxt = (xmlRelaxNGValidCtxtPtr) inputdata; + xmlRelaxNGDefinePtr define = (xmlRelaxNGDefinePtr) transdata; + xmlRelaxNGValidStatePtr state; + xmlNodePtr node = ctxt->pnode; + int ret; + +#ifdef DEBUG_PROGRESSIVE + xmlGenericError(xmlGenericErrorContext, + "Progressive callback for: '%s'\n", token); +#endif + if (ctxt == NULL) { + fprintf(stderr, "callback on %s missing context\n", token); + return; + } + ctxt->pstate = 1; + if (define == NULL) { + if (token[0] == '#') + return; + fprintf(stderr, "callback on %s missing define\n", token); + if ((ctxt != NULL) && (ctxt->errNo == XML_RELAXNG_OK)) + ctxt->errNo = XML_RELAXNG_ERR_INTERNAL; + ctxt->pstate = -1; + return; + } + if ((ctxt == NULL) || (define == NULL)) { + fprintf(stderr, "callback on %s missing info\n", token); + if ((ctxt != NULL) && (ctxt->errNo == XML_RELAXNG_OK)) + ctxt->errNo = XML_RELAXNG_ERR_INTERNAL; + ctxt->pstate = -1; + return; + } else if (define->type != XML_RELAXNG_ELEMENT) { + fprintf(stderr, "callback on %s define is not element\n", token); + if (ctxt->errNo == XML_RELAXNG_OK) + ctxt->errNo = XML_RELAXNG_ERR_INTERNAL; + ctxt->pstate = -1; + return; + } + if (node->type != XML_ELEMENT_NODE) { + VALID_ERR(XML_RELAXNG_ERR_NOTELEM); + if ((ctxt->flags & FLAGS_IGNORABLE) == 0) + xmlRelaxNGDumpValidError(ctxt); + ctxt->pstate = -1; + return; + } + if (define->contModel == NULL) { + /* + * this node cannot be validated in a streamable fashion + */ +#ifdef DEBUG_PROGRESSIVE + xmlGenericError(xmlGenericErrorContext, + "Element '%s' validation is not streamable\n", token); +#endif + ctxt->pstate = 0; + ctxt->pdef = define; + return; + } + exec = xmlRegNewExecCtxt(define->contModel, + xmlRelaxNGValidateProgressiveCallback, + ctxt); + if (exec == NULL) { + ctxt->pstate = -1; + return; + } + xmlRelaxNGElemPush(ctxt, exec); + + /* + * Validate the attributes part of the content. + */ + state = xmlRelaxNGNewValidState(ctxt, node); + if (state == NULL) { + ctxt->pstate = -1; + return; + } + ctxt->state = state; + if (define->attrs != NULL) { + ret = xmlRelaxNGValidateAttributeList(ctxt, define->attrs); + if (ret != 0) { + ctxt->pstate = -1; + VALID_ERR2(XML_RELAXNG_ERR_ATTRVALID, node->name); + } + } + ctxt->state->seq = NULL; + ret = xmlRelaxNGValidateElementEnd(ctxt); + if (ret != 0) { + ctxt->pstate = -1; + } + xmlRelaxNGFreeValidState(ctxt, state); + ctxt->state = NULL; +} + +/** + * xmlRelaxNGValidatePushElement: + * @ctxt: the validation context + * @doc: a document instance + * @elem: an element instance + * + * Push a new element start on the RelaxNG validation stack. + * + * returns 1 if no validation problem was found or 0 if validating the + * element requires a full node, and -1 in case of error. + */ +int +xmlRelaxNGValidatePushElement(xmlRelaxNGValidCtxtPtr ctxt, xmlDocPtr doc, + xmlNodePtr elem) +{ + int ret = 1; + + if ((ctxt == NULL) || (elem == NULL)) + return (-1); + +#ifdef DEBUG_PROGRESSIVE + xmlGenericError(xmlGenericErrorContext, "PushElem %s\n", elem->name); +#endif + if (ctxt->elem == 0) { + xmlRelaxNGPtr schema; + xmlRelaxNGGrammarPtr grammar; + xmlRegExecCtxtPtr exec; + xmlRelaxNGDefinePtr define; + + schema = ctxt->schema; + if (schema == NULL) { + VALID_ERR(XML_RELAXNG_ERR_NOGRAMMAR); + return (-1); + } + grammar = schema->topgrammar; + if ((grammar == NULL) || (grammar->start == NULL)) { + VALID_ERR(XML_RELAXNG_ERR_NOGRAMMAR); + return (-1); + } + define = grammar->start; + if (define->contModel == NULL) { + ctxt->pdef = define; + return (0); + } + exec = xmlRegNewExecCtxt(define->contModel, + xmlRelaxNGValidateProgressiveCallback, + ctxt); + if (exec == NULL) { + return (-1); + } + xmlRelaxNGElemPush(ctxt, exec); + } + ctxt->pnode = elem; + ctxt->pstate = 0; + if (elem->ns != NULL) { + ret = + xmlRegExecPushString2(ctxt->elem, elem->name, elem->ns->href, + ctxt); + } else { + ret = xmlRegExecPushString(ctxt->elem, elem->name, ctxt); + } + if (ret < 0) { + VALID_ERR2(XML_RELAXNG_ERR_ELEMWRONG, elem->name); + } else { + if (ctxt->pstate == 0) + ret = 0; + else if (ctxt->pstate < 0) + ret = -1; + else + ret = 1; + } +#ifdef DEBUG_PROGRESSIVE + if (ret < 0) + xmlGenericError(xmlGenericErrorContext, "PushElem %s failed\n", + elem->name); +#endif + return (ret); +} + +/** + * xmlRelaxNGValidatePushCData: + * @ctxt: the RelaxNG validation context + * @data: some character data read + * @len: the lenght of the data + * + * check the CData parsed for validation in the current stack + * + * returns 1 if no validation problem was found or -1 otherwise + */ +int +xmlRelaxNGValidatePushCData(xmlRelaxNGValidCtxtPtr ctxt, + const xmlChar * data, int len) +{ + int ret = 1; + + if ((ctxt == NULL) || (ctxt->elem == NULL) || (data == NULL)) + return (-1); + +#ifdef DEBUG_PROGRESSIVE + xmlGenericError(xmlGenericErrorContext, "CDATA %s %d\n", data, len); +#endif + + while (*data != 0) { + if (!IS_BLANK(*data)) + break; + data++; + } + if (*data == 0) + return(1); + + ret = xmlRegExecPushString(ctxt->elem, BAD_CAST "#text", ctxt); + if (ret < 0) { + VALID_ERR2(XML_RELAXNG_ERR_TEXTWRONG, " TODO "); +#ifdef DEBUG_PROGRESSIVE + xmlGenericError(xmlGenericErrorContext, "CDATA failed\n"); +#endif + + return(-1); + } + return(1); +} + +/** + * xmlRelaxNGValidatePopElement: + * @ctxt: the RelaxNG validation context + * @doc: a document instance + * @elem: an element instance + * + * Pop the element end from the RelaxNG validation stack. + * + * returns 1 if no validation problem was found or 0 otherwise + */ +int +xmlRelaxNGValidatePopElement(xmlRelaxNGValidCtxtPtr ctxt, + xmlDocPtr doc ATTRIBUTE_UNUSED, + xmlNodePtr elem) { + int ret; + xmlRegExecCtxtPtr exec; + + if ((ctxt == NULL) || (ctxt->elem == NULL) || (elem == NULL)) return(-1); +#ifdef DEBUG_PROGRESSIVE + xmlGenericError(xmlGenericErrorContext, "PopElem %s\n", elem->name); +#endif + /* + * verify that we reached a terminal state of the content model. + */ + exec = xmlRelaxNGElemPop(ctxt); + ret = xmlRegExecPushString(exec, NULL, NULL); + if (ret == 0) { + /* + * TODO: get some of the names needed to exit the current state of exec + */ + VALID_ERR2(XML_RELAXNG_ERR_NOELEM, BAD_CAST ""); + ret = -1; + } else if (ret < 0) { + ret = -1; + } else { + ret = 1; + } + xmlRegFreeExecCtxt(exec); +#ifdef DEBUG_PROGRESSIVE + if (ret < 0) + xmlGenericError(xmlGenericErrorContext, "PopElem %s failed\n", + elem->name); +#endif + return(ret); +} + +/** + * xmlRelaxNGValidateFullElement: + * @ctxt: the validation context + * @doc: a document instance + * @elem: an element instance + * + * Validate a full subtree when xmlRelaxNGValidatePushElement() returned + * 0 and the content of the node has been expanded. + * + * returns 1 if no validation problem was found or -1 in case of error. + */ +int +xmlRelaxNGValidateFullElement(xmlRelaxNGValidCtxtPtr ctxt, xmlDocPtr doc, + xmlNodePtr elem) { + int ret; + xmlRelaxNGValidStatePtr state; + + if ((ctxt == NULL) || (ctxt->pdef == NULL) || (elem == NULL)) return(-1); +#ifdef DEBUG_PROGRESSIVE + xmlGenericError(xmlGenericErrorContext, "FullElem %s\n", elem->name); +#endif + state = xmlRelaxNGNewValidState(ctxt, elem->parent); + if (state == NULL) { + return(-1); + } + state->seq = elem; + ctxt->state = state; + ctxt->errNo = XML_RELAXNG_OK; + ret = xmlRelaxNGValidateDefinition(ctxt, ctxt->pdef); + if ((ret != 0) || (ctxt->errNo != XML_RELAXNG_OK)) ret = -1; + else ret = 1; + xmlRelaxNGFreeValidState(ctxt, state); + ctxt->state = NULL; +#ifdef DEBUG_PROGRESSIVE + if (ret < 0) + xmlGenericError(xmlGenericErrorContext, "FullElem %s failed\n", + elem->name); +#endif + return(ret); +} + /************************************************************************ * * * Generic interpreted validation implementation * @@ -9158,11 +9555,10 @@ xmlRelaxNGValidateState(xmlRelaxNGValidCtxtPtr ctxt, case XML_RELAXNG_ATTRIBUTE: ret = xmlRelaxNGValidateAttribute(ctxt, define); break; + case XML_RELAXNG_START: case XML_RELAXNG_NOOP: case XML_RELAXNG_REF: case XML_RELAXNG_EXTERNALREF: - ret = xmlRelaxNGValidateDefinition(ctxt, define->content); - break; case XML_RELAXNG_PARENTREF: ret = xmlRelaxNGValidateDefinition(ctxt, define->content); break; @@ -9308,7 +9704,6 @@ xmlRelaxNGValidateState(xmlRelaxNGValidCtxtPtr ctxt, xmlFree(content); break; } - case XML_RELAXNG_START: case XML_RELAXNG_EXCEPT: case XML_RELAXNG_PARAM: TODO ret = -1; @@ -9617,6 +10012,16 @@ xmlRelaxNGFreeValidCtxt(xmlRelaxNGValidCtxtPtr ctxt) { } if (ctxt->errTab != NULL) xmlFree(ctxt->errTab); + if (ctxt->elemTab != NULL) { + xmlRegExecCtxtPtr exec; + + exec = xmlRelaxNGElemPop(ctxt); + while (exec != NULL) { + xmlRegFreeExecCtxt(exec); + exec = xmlRelaxNGElemPop(ctxt); + } + xmlFree(ctxt->elemTab); + } xmlFree(ctxt); } diff --git a/valid.c b/valid.c index 4dbe1171..2b5e0517 100644 --- a/valid.c +++ b/valid.c @@ -250,39 +250,6 @@ nodeVPop(xmlValidCtxtPtr ctxt) return (ret); } -#if 0 -/** - * xmlFreeValidCtxt: - * @ctxt: a validation context - * - * Free the memory allocated for a validation context - */ -void -xmlFreeValidCtxt(xmlValidCtxtPtr ctxt) { - if (ctxt == NULL) - return; -#ifdef LIBXML_REGEXP_ENABLED - while (ctxt->vstateNr >= 0) - vstateVPop(ctxt); - if (ctxt->vstateNr <= 1) return(-1); - ctxt->vstateNr--; - elemDecl = ctxt->vstateTab[ctxt->vstateNr].elemDecl; - ctxt->vstateTab[ctxt->vstateNr].elemDecl = NULL; - ctxt->vstateTab[ctxt->vstateNr].node = NULL; - if ((elemDecl != NULL) && (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)) { - xmlRegFreeExecCtxt(ctxt->vstateTab[ctxt->vstateNr].exec); - } - ctxt->vstateTab[ctxt->vstateNr].exec = NULL; - if (ctxt->vstateNr >= 1) - ctxt->vstate = &ctxt->vstateTab[ctxt->vstateNr - 1]; - else - ctxt->vstate = NULL; - return(ctxt->vstateNr); -#else /* ! LIBXML_REGEXP_ENABLED */ -#endif /* LIBXML_REGEXP_ENABLED */ -} -#endif - #ifdef DEBUG_VALID_ALGO static void xmlValidPrintNode(xmlNodePtr cur) { diff --git a/xmllint.c b/xmllint.c index fb9141bc..2aac293c 100644 --- a/xmllint.c +++ b/xmllint.c @@ -624,6 +624,8 @@ static void streamFile(char *filename) { if (reader != NULL) { if (valid) xmlTextReaderSetParserProp(reader, XML_PARSER_VALIDATE, 1); + if (relaxng != NULL) + xmlTextReaderRelaxNGValidate(reader, relaxng); /* * Process all nodes in sequence @@ -642,6 +644,14 @@ static void streamFile(char *filename) { progresult = 3; } } + if (relaxng != NULL) { + if (xmlTextReaderIsValid(reader) != 1) { + printf("%s fails to validate\n", filename); + progresult = 3; + } else { + printf("%s validates\n", filename); + } + } /* * Done, cleanup and status */ diff --git a/xmlreader.c b/xmlreader.c index 4821b2a9..54a3b94e 100644 --- a/xmlreader.c +++ b/xmlreader.c @@ -12,7 +12,6 @@ /* * TODOs: - * - provide an API to expand part of the tree * - provide an API to preserve part of the tree * - Streaming XInclude support * - validation against a provided DTD @@ -36,6 +35,7 @@ #include #include #include +#include /* #define DEBUG_CALLBACKS */ /* #define DEBUG_READER */ @@ -85,8 +85,15 @@ typedef enum { XML_TEXTREADER_DONE= 5 } xmlTextReaderState; +typedef enum { + XML_TEXTREADER_NOT_VALIDATE = 0, + XML_TEXTREADER_VALIDATE_DTD = 1, + XML_TEXTREADER_VALIDATE_RNG = 2 +} xmlTextReaderValidate; + struct _xmlTextReader { int mode; /* the parsing mode */ + xmlTextReaderValidate validate;/* is there any validation */ int allocs; /* what structure were deallocated */ xmlTextReaderState state; xmlParserCtxtPtr ctxt; /* the parser context */ @@ -112,6 +119,14 @@ struct _xmlTextReader { /* error handling */ xmlTextReaderErrorFunc errorFunc; /* callback function */ void *errorFuncArg; /* callback function user argument */ + +#ifdef LIBXML_SCHEMAS_ENABLED + /* Handling of RelaxNG validation */ + xmlRelaxNGPtr rngSchemas; /* The Relax NG schemas */ + xmlRelaxNGValidCtxtPtr rngValidCtxt; /* The Relax NG validation context */ + int rngValidErrors; /* The number of errors detected */ + xmlNodePtr rngFullNode; /* the node if RNG not progressive */ +#endif }; static const char *xmlTextReaderIsEmpty = "This element is empty"; @@ -425,22 +440,84 @@ xmlTextReaderValidatePush(xmlTextReaderPtr reader) { #ifdef LIBXML_REGEXP_ENABLED xmlNodePtr node = reader->node; - if ((node->ns == NULL) || (node->ns->prefix == NULL)) { - reader->ctxt->valid &= xmlValidatePushElement(&reader->ctxt->vctxt, - reader->ctxt->myDoc, node, node->name); - } else { - xmlChar *qname; + if ((reader->validate == XML_TEXTREADER_VALIDATE_DTD) && + (reader->ctxt != NULL) && (reader->ctxt->validate == 1)) { + if ((node->ns == NULL) || (node->ns->prefix == NULL)) { + reader->ctxt->valid &= xmlValidatePushElement(&reader->ctxt->vctxt, + reader->ctxt->myDoc, node, node->name); + } else { + /* TODO use the BuildQName interface */ + xmlChar *qname; - qname = xmlStrdup(node->ns->prefix); - qname = xmlStrcat(qname, BAD_CAST ":"); - qname = xmlStrcat(qname, node->name); - reader->ctxt->valid &= xmlValidatePushElement(&reader->ctxt->vctxt, - reader->ctxt->myDoc, node, qname); - if (qname != NULL) - xmlFree(qname); + qname = xmlStrdup(node->ns->prefix); + qname = xmlStrcat(qname, BAD_CAST ":"); + qname = xmlStrcat(qname, node->name); + reader->ctxt->valid &= xmlValidatePushElement(&reader->ctxt->vctxt, + reader->ctxt->myDoc, node, qname); + if (qname != NULL) + xmlFree(qname); + } +#ifdef LIBXML_SCHEMAS_ENABLED + } else if ((reader->validate == XML_TEXTREADER_VALIDATE_RNG) && + (reader->rngValidCtxt != NULL)) { + int ret; + + if (reader->rngFullNode != NULL) return; + ret = xmlRelaxNGValidatePushElement(reader->rngValidCtxt, + reader->ctxt->myDoc, + node); + if (ret == 0) { + /* + * this element requires a full tree + */ + node = xmlTextReaderExpand(reader); + if (node == NULL) { +printf("Expand failed !\n"); + ret = -1; + } else { + ret = xmlRelaxNGValidateFullElement(reader->rngValidCtxt, + reader->ctxt->myDoc, + node); + reader->rngFullNode = node; + } + } + if (ret != 1) + reader->rngValidErrors++; +#endif } #endif /* LIBXML_REGEXP_ENABLED */ } + +/** + * xmlTextReaderValidateCData: + * @reader: the xmlTextReaderPtr used + * @data: pointer to the CData + * @len: lenght of the CData block in bytes. + * + * Push some CData for validation + */ +static void +xmlTextReaderValidateCData(xmlTextReaderPtr reader, + const xmlChar *data, int len) { +#ifdef LIBXML_REGEXP_ENABLED + if ((reader->validate == XML_TEXTREADER_VALIDATE_DTD) && + (reader->ctxt != NULL) && (reader->ctxt->validate == 1)) { + reader->ctxt->valid &= xmlValidatePushCData(&reader->ctxt->vctxt, + data, len); +#ifdef LIBXML_SCHEMAS_ENABLED + } else if ((reader->validate == XML_TEXTREADER_VALIDATE_RNG) && + (reader->rngValidCtxt != NULL)) { + int ret; + + if (reader->rngFullNode != NULL) return; + ret = xmlRelaxNGValidatePushCData(reader->rngValidCtxt, data, len); + if (ret != 1) + reader->rngValidErrors++; +#endif + } +#endif /* LIBXML_REGEXP_ENABLED */ +} + /** * xmlTextReaderValidatePop: * @reader: the xmlTextReaderPtr used @@ -452,19 +529,39 @@ xmlTextReaderValidatePop(xmlTextReaderPtr reader) { #ifdef LIBXML_REGEXP_ENABLED xmlNodePtr node = reader->node; - if ((node->ns == NULL) || (node->ns->prefix == NULL)) { - reader->ctxt->valid &= xmlValidatePopElement(&reader->ctxt->vctxt, - reader->ctxt->myDoc, node, node->name); - } else { - xmlChar *qname; + if ((reader->validate == XML_TEXTREADER_VALIDATE_DTD) && + (reader->ctxt != NULL) && (reader->ctxt->validate == 1)) { + if ((node->ns == NULL) || (node->ns->prefix == NULL)) { + reader->ctxt->valid &= xmlValidatePopElement(&reader->ctxt->vctxt, + reader->ctxt->myDoc, node, node->name); + } else { + /* TODO use the BuildQName interface */ + xmlChar *qname; - qname = xmlStrdup(node->ns->prefix); - qname = xmlStrcat(qname, BAD_CAST ":"); - qname = xmlStrcat(qname, node->name); - reader->ctxt->valid &= xmlValidatePopElement(&reader->ctxt->vctxt, - reader->ctxt->myDoc, node, qname); - if (qname != NULL) - xmlFree(qname); + qname = xmlStrdup(node->ns->prefix); + qname = xmlStrcat(qname, BAD_CAST ":"); + qname = xmlStrcat(qname, node->name); + reader->ctxt->valid &= xmlValidatePopElement(&reader->ctxt->vctxt, + reader->ctxt->myDoc, node, qname); + if (qname != NULL) + xmlFree(qname); + } +#ifdef LIBXML_SCHEMAS_ENABLED + } else if ((reader->validate == XML_TEXTREADER_VALIDATE_RNG) && + (reader->rngValidCtxt != NULL)) { + int ret; + + if (reader->rngFullNode != NULL) { + if (node == reader->rngFullNode) + reader->rngFullNode = NULL; + return; + } + ret = xmlRelaxNGValidatePopElement(reader->rngValidCtxt, + reader->ctxt->myDoc, + node); + if (ret != 1) + reader->rngValidErrors++; +#endif } #endif /* LIBXML_REGEXP_ENABLED */ } @@ -514,8 +611,8 @@ xmlTextReaderValidateEntity(xmlTextReaderPtr reader) { xmlTextReaderValidatePush(reader); } else if ((node->type == XML_TEXT_NODE) || (node->type == XML_CDATA_SECTION_NODE)) { - ctxt->valid &= xmlValidatePushCData(&ctxt->vctxt, - node->content, xmlStrlen(node->content)); + xmlTextReaderValidateCData(reader, node->content, + xmlStrlen(node->content)); } /* @@ -745,7 +842,7 @@ get_next_node: reader->state = XML_TEXTREADER_END; goto node_found; } - if ((reader->ctxt->validate) && + if ((reader->validate) && (reader->node->type == XML_ELEMENT_NODE)) xmlTextReaderValidatePop(reader); reader->node = reader->node->next; @@ -770,7 +867,7 @@ get_next_node: reader->state = XML_TEXTREADER_END; goto node_found; } - if ((reader->ctxt->validate) && (reader->node->type == XML_ELEMENT_NODE)) + if ((reader->validate) && (reader->node->type == XML_ELEMENT_NODE)) xmlTextReaderValidatePop(reader); reader->node = reader->node->parent; if ((reader->node == NULL) || @@ -826,7 +923,7 @@ node_found: } } else if ((reader->node != NULL) && (reader->node->type == XML_ENTITY_REF_NODE) && - (reader->ctxt != NULL) && (reader->ctxt->validate == 1)) { + (reader->ctxt != NULL) && (reader->validate)) { xmlTextReaderValidateEntity(reader); } if ((reader->node != NULL) && @@ -837,9 +934,8 @@ node_found: goto get_next_node; } #ifdef LIBXML_REGEXP_ENABLED - if ((reader->ctxt->validate) && (reader->node != NULL)) { + if ((reader->validate) && (reader->node != NULL)) { xmlNodePtr node = reader->node; - xmlParserCtxtPtr ctxt = reader->ctxt; if ((node->type == XML_ELEMENT_NODE) && ((reader->state != XML_TEXTREADER_END) && @@ -847,8 +943,8 @@ node_found: xmlTextReaderValidatePush(reader); } else if ((node->type == XML_TEXT_NODE) || (node->type == XML_CDATA_SECTION_NODE)) { - ctxt->valid &= xmlValidatePushCData(&ctxt->vctxt, - node->content, xmlStrlen(node->content)); + xmlTextReaderValidateCData(reader, node->content, + xmlStrlen(node->content)); } } #endif /* LIBXML_REGEXP_ENABLED */ @@ -1140,6 +1236,14 @@ void xmlFreeTextReader(xmlTextReaderPtr reader) { if (reader == NULL) return; + if (reader->rngSchemas != NULL) { + xmlRelaxNGFree(reader->rngSchemas); + reader->rngSchemas = NULL; + } + if (reader->rngValidCtxt != NULL) { + xmlRelaxNGFreeValidCtxt(reader->rngValidCtxt); + reader->rngValidCtxt = NULL; + } if (reader->ctxt != NULL) { if (reader->ctxt->myDoc != NULL) { xmlFreeDoc(reader->ctxt->myDoc); @@ -2308,6 +2412,7 @@ xmlTextReaderSetParserProp(xmlTextReaderPtr reader, int prop, int value) { case XML_PARSER_VALIDATE: if (value != 0) { ctxt->validate = 1; + reader->validate = XML_TEXTREADER_VALIDATE_DTD; } else { ctxt->validate = 0; } @@ -2351,7 +2456,7 @@ xmlTextReaderGetParserProp(xmlTextReaderPtr reader, int prop) { return(1); return(0); case XML_PARSER_VALIDATE: - return(ctxt->validate); + return(reader->validate); case XML_PARSER_SUBST_ENTITIES: return(ctxt->replaceEntities); } @@ -2396,6 +2501,64 @@ xmlTextReaderCurrentDoc(xmlTextReaderPtr reader) { return(reader->ctxt->myDoc); } +/** + * xmlTextReaderRelaxNGValidate: + * @reader: the xmlTextReaderPtr used + * @rng: the path to a RelaxNG schema or NULL + * + * Use RelaxNG to validate the document as it is processed. + * Activation is only possible before the first Read(). + * if @rng is NULL, then RelaxNG validation is desactivated. + * + * Returns 0 in case the RelaxNG validation could be (des)activated and + * -1 in case of error. + */ +int +xmlTextReaderRelaxNGValidate(xmlTextReaderPtr reader, const char *rng) { + xmlRelaxNGParserCtxtPtr ctxt; + + if (reader == NULL) + return(-1); + + if (rng == NULL) { + if (reader->rngSchemas != NULL) { + xmlRelaxNGFree(reader->rngSchemas); + reader->rngSchemas = NULL; + } + if (reader->rngValidCtxt != NULL) { + xmlRelaxNGFreeValidCtxt(reader->rngValidCtxt); + reader->rngValidCtxt = NULL; + } + return(0); + } + if (reader->mode != XML_TEXTREADER_MODE_INITIAL) + return(-1); + ctxt = xmlRelaxNGNewParserCtxt(rng); + if (reader->errorFunc != NULL) { + xmlRelaxNGSetParserErrors(ctxt, + (xmlRelaxNGValidityErrorFunc) reader->errorFunc, + (xmlRelaxNGValidityWarningFunc) reader->errorFunc, + reader->errorFuncArg); + } + reader->rngSchemas = xmlRelaxNGParse(ctxt); + xmlRelaxNGFreeParserCtxt(ctxt); + if (reader->rngSchemas == NULL) + return(-1); + reader->rngValidCtxt = xmlRelaxNGNewValidCtxt(reader->rngSchemas); + if (reader->rngValidCtxt == NULL) + return(-1); + if (reader->errorFunc != NULL) { + xmlRelaxNGSetValidErrors(reader->rngValidCtxt, + (xmlRelaxNGValidityErrorFunc)reader->errorFunc, + (xmlRelaxNGValidityWarningFunc) reader->errorFunc, + reader->errorFuncArg); + } + reader->rngValidErrors = 0; + reader->rngFullNode = NULL; + reader->validate = XML_TEXTREADER_VALIDATE_RNG; + return(0); +} + /************************************************************************ * * * Error Handling Extensions * @@ -2621,8 +2784,15 @@ xmlTextReaderSetErrorHandler(xmlTextReaderPtr reader, */ int xmlTextReaderIsValid(xmlTextReaderPtr reader) { - if ((reader == NULL) || (reader->ctxt == NULL)) return(-1); - return(reader->ctxt->valid); + if (reader == NULL) return(-1); +#ifdef LIBXML_SCHEMAS_ENABLED + if (reader->validate == XML_TEXTREADER_VALIDATE_RNG) + return(reader->rngValidErrors == 0); +#endif + if ((reader->validate == XML_TEXTREADER_VALIDATE_DTD) && + (reader->ctxt != NULL)) + return(reader->ctxt->valid); + return(0); } /**