diff --git a/ChangeLog b/ChangeLog index 75ca7eef..092a4da8 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +Fri Dec 20 01:11:30 CET 2002 Daniel Veillard + + * testReader.c xmlreader.c valid.c include/libxml/tree.h + include/libxml/valid.h include/libxml/xmlreader.h: working on + DTD validation on top of xml reader interfaces. Allows to + validate arbitrary large instances. This required some extensions + to the valid module interface and augmenting the size of xmlID + and xmlRef structs a bit. + * uri.c xmlregexp.c: simple cleanup. + Wed Dec 18 15:51:22 CET 2002 Daniel Veillard * xmlreader.c include/libxml/xmlreader.h doc/libxml2-api.xml: more diff --git a/include/libxml/tree.h b/include/libxml/tree.h index 93d57e31..0f47268c 100644 --- a/include/libxml/tree.h +++ b/include/libxml/tree.h @@ -387,6 +387,8 @@ struct _xmlID { struct _xmlID *next; /* next ID */ const xmlChar *value; /* The ID name */ xmlAttrPtr attr; /* The attribute holding it */ + const xmlChar *name; /* The attribute if attr is not available */ + int lineno; /* The line number if attr is not available */ }; /** @@ -401,6 +403,8 @@ struct _xmlRef { struct _xmlRef *next; /* next Ref */ const xmlChar *value; /* The Ref name */ xmlAttrPtr attr; /* The attribute holding it */ + const xmlChar *name; /* The attribute if attr is not available */ + int lineno; /* The line number if attr is not available */ }; /** diff --git a/include/libxml/valid.h b/include/libxml/valid.h index 4cc5f978..36373e17 100644 --- a/include/libxml/valid.h +++ b/include/libxml/valid.h @@ -312,6 +312,17 @@ int xmlValidateNmtokensValue(const xmlChar *value); int xmlValidBuildContentModel(xmlValidCtxtPtr ctxt, xmlElementPtr elem); +int xmlValidatePushElement (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem, + const xmlChar *qname); +int xmlValidatePushCData (xmlValidCtxtPtr ctxt, + const xmlChar *data, + int len); +int xmlValidatePopElement (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem, + const xmlChar *qname); #endif /* LIBXML_REGEXP_ENABLED */ #ifdef __cplusplus } diff --git a/include/libxml/xmlreader.h b/include/libxml/xmlreader.h index 27c8ba3b..fbe1de0f 100644 --- a/include/libxml/xmlreader.h +++ b/include/libxml/xmlreader.h @@ -28,7 +28,8 @@ typedef xmlTextReader *xmlTextReaderPtr; /* * Constructors & Destructor */ -xmlTextReaderPtr xmlNewTextReader (xmlParserInputBufferPtr input); +xmlTextReaderPtr xmlNewTextReader (xmlParserInputBufferPtr input, + const char *URI); xmlTextReaderPtr xmlNewTextReaderFilename(const char *URI); void xmlFreeTextReader (xmlTextReaderPtr reader); diff --git a/testReader.c b/testReader.c index b89992df..7b19bb2a 100644 --- a/testReader.c +++ b/testReader.c @@ -37,11 +37,13 @@ int debug = 0; int dump = 0; int noent = 0; int count = 0; +int valid = 0; static void usage(const char *progname) { printf("Usage : %s [options] XMLfiles ...\n", progname); printf("\tParse the XML files using the xmlTextReader API\n"); - printf("\tand output the result of the parsing\n"); + printf("\t --count: count the number of attribute and elements\n"); + printf("\t --valid: validate the document\n"); exit(1); } static int elem, attrs; @@ -55,17 +57,6 @@ static void processNode(xmlTextReaderPtr reader) { elem++; attrs += xmlTextReaderAttributeCount(reader); } - } else { - xmlChar *name = xmlTextReaderName(reader); - if (name != NULL) { - printf("%s : %d", name, xmlTextReaderNodeType(reader)); - xmlFree(name); - } else { - printf("NULL: %d", xmlTextReaderNodeType(reader)); - } - if (xmlTextReaderIsEmptyElement(reader)) - printf(" empty"); - printf("\n"); } } @@ -80,6 +71,9 @@ static void handleFile(const char *filename) { reader = xmlNewTextReaderFilename(filename); if (reader != NULL) { + if (valid) + xmlTextReaderSetParserProp(reader, XML_PARSER_VALIDATE, 1); + /* * Process all nodes in sequence */ @@ -118,6 +112,8 @@ int main(int argc, char **argv) { dump++; else if ((!strcmp(argv[i], "-count")) || (!strcmp(argv[i], "--count"))) count++; + else if ((!strcmp(argv[i], "-valid")) || (!strcmp(argv[i], "--valid"))) + valid++; else if ((!strcmp(argv[i], "-noent")) || (!strcmp(argv[i], "--noent"))) noent++; diff --git a/uri.c b/uri.c index 1f4ab600..e0c96ddd 100644 --- a/uri.c +++ b/uri.c @@ -540,7 +540,7 @@ xmlPrintURI(FILE *stream, xmlURIPtr uri) { out = xmlSaveUri(uri); if (out != NULL) { - fprintf(stream, "%s", out); + fprintf(stream, "%s", (char *) out); xmlFree(out); } } diff --git a/valid.c b/valid.c index 421a29aa..7fa634c7 100644 --- a/valid.c +++ b/valid.c @@ -33,8 +33,90 @@ "Unimplemented block at %s:%d\n", \ __FILE__, __LINE__); +#define VERROR \ + if ((ctxt != NULL) && (ctxt->error != NULL)) ctxt->error -#ifndef LIBXML_REGEXP_ENABLED +#define VWARNING \ + if ((ctxt != NULL) && (ctxt->warning != NULL)) ctxt->warning + + +#ifdef LIBXML_REGEXP_ENABLED +/* + * If regexp are enabled we can do continuous validation without the + * need of a tree to validate the content model. this is done in each + * callbacks. + * Each xmlValidState represent the validation state associated to the + * set of nodes currently open from the document root to the current element. + */ + + +typedef struct _xmlValidState { + xmlElementPtr elemDecl; /* pointer to the content model */ + xmlNodePtr node; /* pointer to the current node */ + xmlRegExecCtxtPtr exec; /* regexp runtime */ +} _xmlValidState; + + +static int +vstateVPush(xmlValidCtxtPtr ctxt, xmlElementPtr elemDecl, xmlNodePtr node) { + if (ctxt->vstateMax == 0) { + ctxt->vstateMax = 10; + ctxt->vstateTab = (xmlValidState *) xmlMalloc(ctxt->vstateMax * + sizeof(ctxt->vstateTab[0])); + if (ctxt->vstateTab == NULL) { + VERROR(ctxt->userData, "realloc failed !n"); + return(-1); + } + } + + if (ctxt->vstateNr >= ctxt->vstateMax) { + ctxt->vstateMax *= 2; + ctxt->vstateTab = (xmlValidState *) xmlRealloc(ctxt->vstateTab, + ctxt->vstateMax * sizeof(ctxt->vstateTab[0])); + if (ctxt->vstateTab == NULL) { + VERROR(ctxt->userData, "realloc failed !n"); + return(-1); + } + } + ctxt->vstate = &ctxt->vstateTab[ctxt->vstateNr]; + ctxt->vstateTab[ctxt->vstateNr].elemDecl = elemDecl; + ctxt->vstateTab[ctxt->vstateNr].node = node; + if ((elemDecl != NULL) && (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)) { + if (elemDecl->contModel == NULL) + xmlValidBuildContentModel(ctxt, elemDecl); + if (elemDecl->contModel != NULL) { + ctxt->vstateTab[ctxt->vstateNr].exec = + xmlRegNewExecCtxt(elemDecl->contModel, NULL, NULL); + } else { + ctxt->vstateTab[ctxt->vstateNr].exec = NULL; + VERROR(ctxt->userData, + "Failed to build content model regexp for %s", node->name); + } + } + return(ctxt->vstateNr++); +} + +static int +vstateVPop(xmlValidCtxtPtr ctxt) { + xmlElementPtr elemDecl; + + if (ctxt->vstateNr <= 1) return(-1); + ctxt->vstateNr--; + elemDecl = ctxt->vstateTab[ctxt->vstateNr].elemDecl; + ctxt->vstateTab[ctxt->vstateNr].elemDecl = NULL; + ctxt->vstateTab[ctxt->vstateNr].node = NULL; + if ((elemDecl != NULL) && (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)) { + xmlRegFreeExecCtxt(ctxt->vstateTab[ctxt->vstateNr].exec); + } + ctxt->vstateTab[ctxt->vstateNr].exec = NULL; + if (ctxt->vstateNr >= 1) + ctxt->vstate = &ctxt->vstateTab[ctxt->vstateNr - 1]; + else + ctxt->vstate = NULL; + return(ctxt->vstateNr); +} + +#else /* not LIBXML_REGEXP_ENABLED */ /* * If regexp are not enabled, it uses a home made algorithm less * complex and easier to @@ -346,12 +428,6 @@ xmlValidStateDebug(xmlValidCtxtPtr ctxt) { (int) (long) node->content); \ } -#define VERROR \ - if ((ctxt != NULL) && (ctxt->error != NULL)) ctxt->error - -#define VWARNING \ - if ((ctxt != NULL) && (ctxt->warning != NULL)) ctxt->warning - #define CHECK_DTD \ if (doc == NULL) return(0); \ else if ((doc->intSubset == NULL) && \ @@ -2079,6 +2155,8 @@ xmlFreeID(xmlIDPtr id) { if (id == NULL) return; if (id->value != NULL) xmlFree((xmlChar *) id->value); + if (id->name != NULL) + xmlFree((xmlChar *) id->name); xmlFree(id); } @@ -2138,7 +2216,17 @@ xmlAddID(xmlValidCtxtPtr ctxt, xmlDocPtr doc, const xmlChar *value, * fill the structure. */ ret->value = xmlStrdup(value); - ret->attr = attr; + if ((ctxt != NULL) && (ctxt->vstateNr != 0)) { + /* + * Operating in streaming mode, attr is gonna disapear + */ + ret->name = xmlStrdup(attr->name); + ret->attr = NULL; + } else { + ret->attr = attr; + ret->name = NULL; + } + ret->lineno = xmlGetLineNo(attr->parent); if (xmlHashAddEntry(table, value, ret) < 0) { /* @@ -2292,6 +2380,13 @@ xmlGetID(xmlDocPtr doc, const xmlChar *ID) { id = xmlHashLookup(table, ID); if (id == NULL) return(NULL); + if (id->attr == NULL) { + /* + * We are operating on a stream, return a well known reference + * since the attribute node doesn't exist anymore + */ + return((xmlAttrPtr) doc); + } return(id->attr); } @@ -2341,6 +2436,8 @@ xmlFreeRef(xmlLinkPtr lk) { if (ref == NULL) return; if (ref->value != NULL) xmlFree((xmlChar *)ref->value); + if (ref->name != NULL) + xmlFree((xmlChar *)ref->name); xmlFree(ref); } @@ -2434,7 +2531,17 @@ xmlAddRef(xmlValidCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlDocPtr doc, const xmlChar *v * fill the structure. */ ret->value = xmlStrdup(value); - ret->attr = attr; + if ((ctxt != NULL) && (ctxt->vstateNr != 0)) { + /* + * Operating in streaming mode, attr is gonna disapear + */ + ret->name = xmlStrdup(attr->name); + ret->attr = NULL; + } else { + ret->name = NULL; + ret->attr = attr; + } + ret->lineno = xmlGetLineNo(attr->parent); /* To add a reference :- * References are maintained as a list of references, @@ -4601,7 +4708,7 @@ xmlValidateElementContent(xmlValidCtxtPtr ctxt, xmlNodePtr child, ret = 0; goto fail; case XML_CDATA_SECTION_NODE: - TODO + /* TODO */ ret = 0; goto fail; case XML_ELEMENT_NODE: @@ -4896,6 +5003,311 @@ done: return(ret); } +/** + * xmlValidateCheckMixed: + * @ctxt: the validation context + * @cont: the mixed content model + * @qname: the qualified name as appearing in the serialization + * + * Check if the given node is part of the content model. + * + * Returns 1 if yes, 0 if no, -1 in case of error + */ +static int +xmlValidateCheckMixed(xmlValidCtxtPtr ctxt ATTRIBUTE_UNUSED, + xmlElementContentPtr cont, const xmlChar *qname) { + while (cont != NULL) { + if (cont->type == XML_ELEMENT_CONTENT_ELEMENT) { + if (xmlStrEqual(cont->name, qname)) + return(1); + } else if ((cont->type == XML_ELEMENT_CONTENT_OR) && + (cont->c1 != NULL) && + (cont->c1->type == XML_ELEMENT_CONTENT_ELEMENT)){ + if (xmlStrEqual(cont->c1->name, qname)) + return(1); + } else if ((cont->type != XML_ELEMENT_CONTENT_OR) || + (cont->c1 == NULL) || + (cont->c1->type != XML_ELEMENT_CONTENT_PCDATA)){ + /* Internal error !!! */ + xmlGenericError(xmlGenericErrorContext, + "Internal: MIXED struct bad\n"); + break; + } + cont = cont->c2; + } + return(0); +} + +/** + * xmlValidGetElemDecl: + * @ctxt: the validation context + * @doc: a document instance + * @elem: an element instance + * @extsubset: pointer, (out) indicate if the declaration was found + * in the external subset. + * + * Finds a declaration associated to an element in the document. + * + * returns the pointer to the declaration or NULL if not found. + */ +static xmlElementPtr +xmlValidGetElemDecl(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + xmlNodePtr elem, int *extsubset) { + xmlElementPtr elemDecl = NULL; + const xmlChar *prefix = NULL; + + if ((elem == NULL) || (elem->name == NULL)) return(NULL); + if (extsubset != NULL) + *extsubset = 0; + + /* + * Fetch the declaration for the qualified name + */ + if ((elem->ns != NULL) && (elem->ns->prefix != NULL)) + prefix = elem->ns->prefix; + + if (prefix != NULL) { + elemDecl = xmlGetDtdQElementDesc(doc->intSubset, + elem->name, prefix); + if ((elemDecl == NULL) && (doc->extSubset != NULL)) { + elemDecl = xmlGetDtdQElementDesc(doc->extSubset, + elem->name, prefix); + if ((elemDecl != NULL) && (extsubset != NULL)) + *extsubset = 1; + } + } + + /* + * Fetch the declaration for the non qualified name + * This is "non-strict" validation should be done on the + * full QName but in that case being flexible makes sense. + */ + if (elemDecl == NULL) { + elemDecl = xmlGetDtdElementDesc(doc->intSubset, elem->name); + if ((elemDecl == NULL) && (doc->extSubset != NULL)) { + elemDecl = xmlGetDtdElementDesc(doc->extSubset, elem->name); + if ((elemDecl != NULL) && (extsubset != NULL)) + *extsubset = 1; + } + } + if (elemDecl == NULL) { + VECTXT(ctxt, elem); + VERROR(ctxt->userData, "No declaration for element %s\n", + elem->name); + } + return(elemDecl); +} + +/** + * xmlValidatePushElement: + * @ctxt: the validation context + * @doc: a document instance + * @elem: an element instance + * @qname: the qualified name as appearing in the serialization + * + * Push a new element start on the validation stack. + * + * returns 1 if no validation problem was found or 0 otherwise + */ +int +xmlValidatePushElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, + xmlNodePtr elem, const xmlChar *qname) { + int ret = 1; + xmlElementPtr eDecl; + int extsubset = 0; + + if ((ctxt->vstateNr > 0) && (ctxt->vstate != NULL)) { + xmlValidStatePtr state = ctxt->vstate; + xmlElementPtr elemDecl; + + /* + * Check the new element agaisnt the content model of the new elem. + */ + if (state->elemDecl != NULL) { + elemDecl = state->elemDecl; + + switch(elemDecl->etype) { + case XML_ELEMENT_TYPE_UNDEFINED: + ret = 0; + break; + case XML_ELEMENT_TYPE_EMPTY: + VECTXT(ctxt, state->node); + VERROR(ctxt->userData, + "Element %s was declared EMPTY this one has content\n", + state->node->name); + ret = 0; + break; + case XML_ELEMENT_TYPE_ANY: + /* I don't think anything is required then */ + break; + case XML_ELEMENT_TYPE_MIXED: + /* simple case of declared as #PCDATA */ + if ((elemDecl->content != NULL) && + (elemDecl->content->type == + XML_ELEMENT_CONTENT_PCDATA)) { + VECTXT(ctxt, state->node); + VERROR(ctxt->userData, + "Element %s was declared #PCDATA but contains non text nodes\n", + state->node->name); + ret = 0; + } else { + ret = xmlValidateCheckMixed(ctxt, elemDecl->content, + qname); + if (ret != 1) { + VECTXT(ctxt, state->node); + VERROR(ctxt->userData, + "Element %s is not declared in %s list of possible children\n", + qname, state->node->name); + } + } + break; + case XML_ELEMENT_TYPE_ELEMENT: + /* + * TODO: + * VC: Standalone Document Declaration + * - element types with element content, if white space + * occurs directly within any instance of those types. + */ + if (state->exec != NULL) { + ret = xmlRegExecPushString(state->exec, qname, NULL); + if (ret < 0) { + VECTXT(ctxt, state->node); + VERROR(ctxt->userData, + "Element %s content does not follow the DTD\nMisplaced %s\n", + state->node->name, qname); + ret = 0; + } else { + ret = 1; + } + } + break; + } + } + } + eDecl = xmlValidGetElemDecl(ctxt, doc, elem, &extsubset); + vstateVPush(ctxt, eDecl, elem); + return(ret); +} + +/** + * xmlValidatePushCData: + * @ctxt: the validation context + * @data: some character data read + * @len: the lenght of the data + * + * check the CData parsed for validation in the current stack + * + * returns 1 if no validation problem was found or 0 otherwise + */ +int +xmlValidatePushCData(xmlValidCtxtPtr ctxt, const xmlChar *data, int len) { + int ret = 1; + + if (len <= 0) + return(ret); + if ((ctxt->vstateNr > 0) && (ctxt->vstate != NULL)) { + xmlValidStatePtr state = ctxt->vstate; + xmlElementPtr elemDecl; + + /* + * Check the new element agaisnt the content model of the new elem. + */ + if (state->elemDecl != NULL) { + elemDecl = state->elemDecl; + + switch(elemDecl->etype) { + case XML_ELEMENT_TYPE_UNDEFINED: + ret = 0; + break; + case XML_ELEMENT_TYPE_EMPTY: + VECTXT(ctxt, state->node); + VERROR(ctxt->userData, + "Element %s was declared EMPTY this one has content\n", + state->node->name); + ret = 0; + break; + case XML_ELEMENT_TYPE_ANY: + break; + case XML_ELEMENT_TYPE_MIXED: + break; + case XML_ELEMENT_TYPE_ELEMENT: + if (len > 0) { + int i; + + for (i = 0;i < len;i++) { + if (!IS_BLANK(data[i])) { + VECTXT(ctxt, state->node); + VERROR(ctxt->userData, + "Element %s content does not follow the DTD\nText not allowed\n", + state->node->name); + ret = 0; + goto done; + } + } + /* + * TODO: + * VC: Standalone Document Declaration + * element types with element content, if white space + * occurs directly within any instance of those types. + */ + } + break; + } + } + } +done: + return(ret); +} + +/** + * xmlValidatePopElement: + * @ctxt: the validation context + * @doc: a document instance + * @elem: an element instance + * @qname: the qualified name as appearing in the serialization + * + * Pop the element end from the validation stack. + * + * returns 1 if no validation problem was found or 0 otherwise + */ +int +xmlValidatePopElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc ATTRIBUTE_UNUSED, + xmlNodePtr elem, const xmlChar *qname ATTRIBUTE_UNUSED) { + int ret = 1; + + if ((ctxt->vstateNr > 0) && (ctxt->vstate != NULL)) { + xmlValidStatePtr state = ctxt->vstate; + xmlElementPtr elemDecl; + + /* + * Check the new element agaisnt the content model of the new elem. + */ + if (state->elemDecl != NULL) { + elemDecl = state->elemDecl; + + if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT) { + if (state->exec != NULL) { + ret = xmlRegExecPushString(state->exec, NULL, NULL); + if (ret == 0) { + VECTXT(ctxt, state->node); + VERROR(ctxt->userData, + "Element %s content does not follow the DTD\nExpecting more child\n", + state->node->name); + } else { + /* + * previous validation errors should not generate + * a new one here + */ + ret = 1; + } + } + } + } + vstateVPop(ctxt); + } + return(ret); +} + /** * xmlValidateOneElement: * @ctxt: the validation context @@ -4923,7 +5335,6 @@ xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, xmlNodePtr child; int ret = 1, tmp; const xmlChar *name; - const xmlChar *prefix = NULL; int extsubset = 0; CHECK_DTD; @@ -5002,45 +5413,19 @@ xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc, "unknown element type %d\n", elem->type); return(0); } - if (elem->name == NULL) return(0); /* - * Fetch the declaration for the qualified name + * Fetch the declaration */ - if ((elem->ns != NULL) && (elem->ns->prefix != NULL)) - prefix = elem->ns->prefix; - - if (prefix != NULL) { - elemDecl = xmlGetDtdQElementDesc(doc->intSubset, - elem->name, prefix); - if ((elemDecl == NULL) && (doc->extSubset != NULL)) { - elemDecl = xmlGetDtdQElementDesc(doc->extSubset, - elem->name, prefix); - if (elemDecl != NULL) - extsubset = 1; - } - } - - /* - * Fetch the declaration for the non qualified name - * This is "non-strict" validation should be done on the - * full QName but in that case being flexible makes sense. - */ - if (elemDecl == NULL) { - elemDecl = xmlGetDtdElementDesc(doc->intSubset, elem->name); - if ((elemDecl == NULL) && (doc->extSubset != NULL)) { - elemDecl = xmlGetDtdElementDesc(doc->extSubset, elem->name); - if (elemDecl != NULL) - extsubset = 1; - } - } - if (elemDecl == NULL) { - VECTXT(ctxt, elem); - VERROR(ctxt->userData, "No declaration for element %s\n", - elem->name); + elemDecl = xmlValidGetElemDecl(ctxt, doc, elem, &extsubset); + if (elemDecl == NULL) return(0); - } + /* + * If vstateNr is not zero that means continuous validation is + * activated, do not try to check the content model at that level. + */ + if (ctxt->vstateNr == 0) { /* Check that the element content matches the definition */ switch (elemDecl->etype) { case XML_ELEMENT_TYPE_UNDEFINED: @@ -5168,6 +5553,7 @@ child_ok: ret = tmp; break; } + } /* not continuous */ /* [ VC: Required Attribute ] */ attr = elemDecl->attributes; @@ -5433,15 +5819,42 @@ xmlValidateRef(xmlRefPtr ref, xmlValidCtxtPtr ctxt, if (ref == NULL) return; - attr = ref->attr; - if (attr == NULL) + if ((ref->attr == NULL) && (ref->name == NULL)) return; - if (attr->atype == XML_ATTRIBUTE_IDREF) { + attr = ref->attr; + if (attr == NULL) { + xmlChar *dup, *str = NULL, *cur, save; + + dup = xmlStrdup(name); + if (dup == NULL) { + ctxt->valid = 0; + return; + } + cur = dup; + while (*cur != 0) { + str = cur; + while ((*cur != 0) && (!IS_BLANK(*cur))) cur++; + save = *cur; + *cur = 0; + id = xmlGetID(ctxt->doc, str); + if (id == NULL) { + VERROR(ctxt->userData, + "attribute %s line %d references an unknown ID \"%s\"\n", + ref->name, ref->lineno, str); + ctxt->valid = 0; + } + if (save == 0) + break; + *cur = save; + while (IS_BLANK(*cur)) cur++; + } + xmlFree(dup); + } else if (attr->atype == XML_ATTRIBUTE_IDREF) { id = xmlGetID(ctxt->doc, name); if (id == NULL) { VECTXT(ctxt, attr->parent); VERROR(ctxt->userData, - "IDREF attribute %s references an unknown ID \"%s\"\n", + "IDREF attribute %s references an unknown ID \"%s\"\n", attr->name, name); ctxt->valid = 0; } @@ -5463,7 +5876,7 @@ xmlValidateRef(xmlRefPtr ref, xmlValidCtxtPtr ctxt, if (id == NULL) { VECTXT(ctxt, attr->parent); VERROR(ctxt->userData, - "IDREFS attribute %s references an unknown ID \"%s\"\n", + "IDREFS attribute %s references an unknown ID \"%s\"\n", attr->name, str); ctxt->valid = 0; } diff --git a/xmllint.c b/xmllint.c index b5d8e0b3..5c166609 100644 --- a/xmllint.c +++ b/xmllint.c @@ -743,8 +743,29 @@ static void parseAndPrintFile(char *filename) { doc = xmlParseMemory((char *) base, info.st_size); munmap((char *) base, info.st_size); #endif - } else + } else if (valid) { + int ret; + xmlParserCtxtPtr ctxt; + + ctxt = xmlCreateFileParserCtxt(filename); + + if (ctxt == NULL) { + doc = NULL; + } else { + xmlParseDocument(ctxt); + if (ctxt->valid == 0) + progresult = 4; + ret = ctxt->wellFormed; + doc = ctxt->myDoc; + xmlFreeParserCtxt(ctxt); + if (!ret) { + xmlFreeDoc(doc); + doc = NULL; + } + } + } else { doc = xmlParseFile(filename); + } } /* diff --git a/xmlreader.c b/xmlreader.c index 254fd544..5649da9e 100644 --- a/xmlreader.c +++ b/xmlreader.c @@ -70,7 +70,8 @@ typedef enum { XML_TEXTREADER_ELEMENT= 1, XML_TEXTREADER_END= 2, XML_TEXTREADER_EMPTY= 3, - XML_TEXTREADER_BACKTRACK= 4 + XML_TEXTREADER_BACKTRACK= 4, + XML_TEXTREADER_DONE= 5 } xmlTextReaderState; struct _xmlTextReader { @@ -82,6 +83,8 @@ struct _xmlTextReader { xmlParserInputBufferPtr input; /* the input */ startElementSAXFunc startElement;/* initial SAX callbacks */ endElementSAXFunc endElement; /* idem */ + charactersSAXFunc characters; + cdataBlockSAXFunc cdataBlock; unsigned int base; /* base of the segment in the input */ unsigned int cur; /* current position in the input */ xmlNodePtr node; /* current node */ @@ -135,8 +138,13 @@ xmlTextReaderStartElement(void *ctx, const xmlChar *fullname, #ifdef DEBUG_CALLBACKS printf("xmlTextReaderStartElement(%s)\n", fullname); #endif - if ((reader != NULL) && (reader->startElement != NULL)) + if ((reader != NULL) && (reader->startElement != NULL)) { reader->startElement(ctx, fullname, atts); + if (ctxt->validate) { + ctxt->valid &= xmlValidatePushElement(&ctxt->vctxt, ctxt->myDoc, + ctxt->node, fullname); + } + } reader->state = XML_TEXTREADER_ELEMENT; } @@ -155,14 +163,74 @@ xmlTextReaderEndElement(void *ctx, const xmlChar *fullname) { #ifdef DEBUG_CALLBACKS printf("xmlTextReaderEndElement(%s)\n", fullname); #endif - if ((reader != NULL) && (reader->endElement != NULL)) + if ((reader != NULL) && (reader->endElement != NULL)) { + xmlNodePtr node = ctxt->node; + reader->endElement(ctx, fullname); + + if (ctxt->validate) { + ctxt->valid &= xmlValidatePopElement(&ctxt->vctxt, ctxt->myDoc, + node, fullname); + } + } if (reader->state == XML_TEXTREADER_ELEMENT) reader->state = XML_TEXTREADER_EMPTY; else reader->state = XML_TEXTREADER_END; } +/** + * xmlTextReaderCharacters: + * @ctx: the user data (XML parser context) + * @ch: a xmlChar string + * @len: the number of xmlChar + * + * receiving some chars from the parser. + */ +static void +xmlTextReaderCharacters(void *ctx, const xmlChar *ch, int len) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlTextReaderPtr reader = ctxt->_private; + +#ifdef DEBUG_CALLBACKS + printf("xmlTextReaderCharacters()\n"); +#endif + if ((reader != NULL) && (reader->characters != NULL)) { + reader->characters(ctx, ch, len); + + if (ctxt->validate) { + ctxt->valid &= xmlValidatePushCData(&ctxt->vctxt, ch, len); + } + } +} + +/** + * xmlTextReaderCDataBlock: + * @ctx: the user data (XML parser context) + * @value: The pcdata content + * @len: the block length + * + * called when a pcdata block has been parsed + */ +static void +xmlTextReaderCDataBlock(void *ctx, const xmlChar *ch, int len) +{ + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx; + xmlTextReaderPtr reader = ctxt->_private; + +#ifdef DEBUG_CALLBACKS + printf("xmlTextReaderCDataBlock()\n"); +#endif + if ((reader != NULL) && (reader->cdataBlock != NULL)) { + reader->cdataBlock(ctx, ch, len); + + if (ctxt->validate) { + ctxt->valid &= xmlValidatePushCData(&ctxt->vctxt, ch, len); + } + } +} + /** * xmlTextReaderPushData: * @reader: the xmlTextReaderPtr used @@ -177,10 +245,12 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) { unsigned int cur = reader->cur; xmlBufferPtr inbuf; int val; + int oldstate; if ((reader->input == NULL) || (reader->input->buffer == NULL)) return(-1); + oldstate = reader->state; reader->state = XML_TEXTREADER_NONE; inbuf = reader->input->buffer; while (reader->state == XML_TEXTREADER_NONE) { @@ -192,6 +262,7 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) { val = xmlParserInputBufferRead(reader->input, 4096); if (val <= 0) { reader->mode = XML_TEXTREADER_MODE_EOF; + reader->state = oldstate; return(val); } } else @@ -241,10 +312,14 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) { * At the end of the stream signal that the work is done to the Push * parser. */ - if ((reader->mode == XML_TEXTREADER_MODE_EOF) && (cur >= inbuf->use)) { - val = xmlParseChunk(reader->ctxt, - (const char *) &inbuf->content[reader->cur], 0, 1); + if (reader->mode == XML_TEXTREADER_MODE_EOF) { + if (reader->mode != XML_TEXTREADER_DONE) { + val = xmlParseChunk(reader->ctxt, + (const char *) &inbuf->content[reader->cur], 0, 1); + reader->mode = XML_TEXTREADER_DONE; + } } + reader->state = oldstate; return(0); } @@ -303,19 +378,20 @@ xmlTextReaderRead(xmlTextReaderPtr reader) { * that the parser didn't finished or that we arent at the end * of stream, continue processing. */ + while (((oldstate == XML_TEXTREADER_BACKTRACK) || + (reader->node->children == NULL) || + (reader->node->type == XML_ENTITY_REF_NODE) || + (reader->node->type == XML_DTD_NODE)) && + (reader->node->next == NULL) && + (reader->ctxt->nodeNr == olddepth) && + (reader->ctxt->instate != XML_PARSER_EOF)) { + val = xmlTextReaderPushData(reader); + if (val < 0) + return(-1); + if (reader->node == NULL) + return(0); + } if (oldstate != XML_TEXTREADER_BACKTRACK) { - while (((reader->node->children == NULL) || - (reader->node->type == XML_ENTITY_REF_NODE) || - (reader->node->type == XML_DTD_NODE)) && - (reader->node->next == NULL) && - (reader->ctxt->nodeNr == olddepth) && - (reader->ctxt->instate != XML_PARSER_EOF)) { - val = xmlTextReaderPushData(reader); - if (val < 0) - return(-1); - if (reader->node == NULL) - return(0); - } if ((reader->node->children != NULL) && (reader->node->type != XML_ENTITY_REF_NODE) && (reader->node->type != XML_DTD_NODE)) { @@ -348,6 +424,12 @@ xmlTextReaderRead(xmlTextReaderPtr reader) { return(1); } + if ((oldstate == XML_TEXTREADER_ELEMENT) && + (reader->node->type == XML_ELEMENT_NODE)) { + reader->state = XML_TEXTREADER_END; + DUMP_READER + return(1); + } reader->node = reader->node->parent; if ((reader->node == NULL) || (reader->node->type == XML_DOCUMENT_NODE) || @@ -355,6 +437,10 @@ xmlTextReaderRead(xmlTextReaderPtr reader) { (reader->node->type == XML_DOCB_DOCUMENT_NODE) || #endif (reader->node->type == XML_HTML_DOCUMENT_NODE)) { + if (reader->mode != XML_TEXTREADER_DONE) { + val = xmlParseChunk(reader->ctxt, "", 0, 1); + reader->mode = XML_TEXTREADER_DONE; + } reader->node = NULL; reader->depth = 0; @@ -501,13 +587,14 @@ xmlTextReaderReadBinHex(xmlTextReaderPtr reader, unsigned char *array, /** * xmlNewTextReader: * @input: the xmlParserInputBufferPtr used to read data + * @URI: the URI information for the source if available * * Create an xmlTextReader structure fed with @input * * Returns the new xmlTextReaderPtr or NULL in case of error */ xmlTextReaderPtr -xmlNewTextReader(xmlParserInputBufferPtr input) { +xmlNewTextReader(xmlParserInputBufferPtr input, const char *URI) { xmlTextReaderPtr ret; int val; @@ -533,6 +620,10 @@ xmlNewTextReader(xmlParserInputBufferPtr input) { ret->sax->startElement = xmlTextReaderStartElement; ret->endElement = ret->sax->endElement; ret->sax->endElement = xmlTextReaderEndElement; + ret->characters = ret->sax->characters; + ret->sax->characters = xmlTextReaderCharacters; + ret->cdataBlock = ret->sax->cdataBlock; + ret->sax->cdataBlock = xmlTextReaderCDataBlock; ret->mode = XML_TEXTREADER_MODE_INITIAL; ret->node = NULL; @@ -540,15 +631,16 @@ xmlNewTextReader(xmlParserInputBufferPtr input) { val = xmlParserInputBufferRead(input, 4); if (val >= 4) { ret->ctxt = xmlCreatePushParserCtxt(ret->sax, NULL, - (const char *) ret->input->buffer->content, 4, NULL); + (const char *) ret->input->buffer->content, 4, URI); ret->base = 0; ret->cur = 4; } else { - ret->ctxt = xmlCreatePushParserCtxt(ret->sax, NULL, NULL, 0, NULL); + ret->ctxt = xmlCreatePushParserCtxt(ret->sax, NULL, NULL, 0, URI); ret->base = 0; ret->cur = 0; } ret->ctxt->_private = ret; + ret->ctxt->linenumbers = 1; ret->allocs = XML_TEXTREADER_CTXT; return(ret); @@ -566,16 +658,23 @@ xmlTextReaderPtr xmlNewTextReaderFilename(const char *URI) { xmlParserInputBufferPtr input; xmlTextReaderPtr ret; + char *directory = NULL; input = xmlParserInputBufferCreateFilename(URI, XML_CHAR_ENCODING_NONE); if (input == NULL) return(NULL); - ret = xmlNewTextReader(input); + ret = xmlNewTextReader(input, URI); if (ret == NULL) { xmlFreeParserInputBuffer(input); return(NULL); } ret->allocs |= XML_TEXTREADER_INPUT; + if (ret->ctxt->directory == NULL) + directory = xmlParserGetDirectory(URI); + if ((ret->ctxt->directory == NULL) && (directory != NULL)) + ret->ctxt->directory = (char *) xmlStrdup((xmlChar *) directory); + if (directory != NULL) + xmlFree(directory); return(ret); } diff --git a/xmlregexp.c b/xmlregexp.c index 129c1269..481837b4 100644 --- a/xmlregexp.c +++ b/xmlregexp.c @@ -2455,7 +2455,7 @@ xmlRegCompactPushString(xmlRegExecCtxtPtr exec, /** * xmlRegExecPushString: - * @exec: a regexp execution context + * @exec: a regexp execution context or NULL to indicate the end * @value: a string token input * @data: data associated to the token to reuse in callbacks *