1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-29 11:41:22 +03:00

working on DTD validation on top of xml reader interfaces. Allows to

* testReader.c xmlreader.c valid.c include/libxml/tree.h
  include/libxml/valid.h include/libxml/xmlreader.h: working on
  DTD validation on top of xml reader interfaces. Allows to
  validate arbitrary large instances. This required some extensions
  to the valid module interface and augmenting the size of xmlID
  and xmlRef structs a bit.
* uri.c xmlregexp.c: simple cleanup.
Daniel
This commit is contained in:
Daniel Veillard
2002-12-20 00:16:24 +00:00
parent c84f8b58e1
commit ea7751d53b
10 changed files with 643 additions and 88 deletions

View File

@ -1,3 +1,13 @@
Fri Dec 20 01:11:30 CET 2002 Daniel Veillard <daniel@veillard.com>
* testReader.c xmlreader.c valid.c include/libxml/tree.h
include/libxml/valid.h include/libxml/xmlreader.h: working on
DTD validation on top of xml reader interfaces. Allows to
validate arbitrary large instances. This required some extensions
to the valid module interface and augmenting the size of xmlID
and xmlRef structs a bit.
* uri.c xmlregexp.c: simple cleanup.
Wed Dec 18 15:51:22 CET 2002 Daniel Veillard <daniel@veillard.com> Wed Dec 18 15:51:22 CET 2002 Daniel Veillard <daniel@veillard.com>
* xmlreader.c include/libxml/xmlreader.h doc/libxml2-api.xml: more * xmlreader.c include/libxml/xmlreader.h doc/libxml2-api.xml: more

View File

@ -387,6 +387,8 @@ struct _xmlID {
struct _xmlID *next; /* next ID */ struct _xmlID *next; /* next ID */
const xmlChar *value; /* The ID name */ const xmlChar *value; /* The ID name */
xmlAttrPtr attr; /* The attribute holding it */ xmlAttrPtr attr; /* The attribute holding it */
const xmlChar *name; /* The attribute if attr is not available */
int lineno; /* The line number if attr is not available */
}; };
/** /**
@ -401,6 +403,8 @@ struct _xmlRef {
struct _xmlRef *next; /* next Ref */ struct _xmlRef *next; /* next Ref */
const xmlChar *value; /* The Ref name */ const xmlChar *value; /* The Ref name */
xmlAttrPtr attr; /* The attribute holding it */ xmlAttrPtr attr; /* The attribute holding it */
const xmlChar *name; /* The attribute if attr is not available */
int lineno; /* The line number if attr is not available */
}; };
/** /**

View File

@ -312,6 +312,17 @@ int xmlValidateNmtokensValue(const xmlChar *value);
int xmlValidBuildContentModel(xmlValidCtxtPtr ctxt, int xmlValidBuildContentModel(xmlValidCtxtPtr ctxt,
xmlElementPtr elem); xmlElementPtr elem);
int xmlValidatePushElement (xmlValidCtxtPtr ctxt,
xmlDocPtr doc,
xmlNodePtr elem,
const xmlChar *qname);
int xmlValidatePushCData (xmlValidCtxtPtr ctxt,
const xmlChar *data,
int len);
int xmlValidatePopElement (xmlValidCtxtPtr ctxt,
xmlDocPtr doc,
xmlNodePtr elem,
const xmlChar *qname);
#endif /* LIBXML_REGEXP_ENABLED */ #endif /* LIBXML_REGEXP_ENABLED */
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@ -28,7 +28,8 @@ typedef xmlTextReader *xmlTextReaderPtr;
/* /*
* Constructors & Destructor * Constructors & Destructor
*/ */
xmlTextReaderPtr xmlNewTextReader (xmlParserInputBufferPtr input); xmlTextReaderPtr xmlNewTextReader (xmlParserInputBufferPtr input,
const char *URI);
xmlTextReaderPtr xmlNewTextReaderFilename(const char *URI); xmlTextReaderPtr xmlNewTextReaderFilename(const char *URI);
void xmlFreeTextReader (xmlTextReaderPtr reader); void xmlFreeTextReader (xmlTextReaderPtr reader);

View File

@ -37,11 +37,13 @@ int debug = 0;
int dump = 0; int dump = 0;
int noent = 0; int noent = 0;
int count = 0; int count = 0;
int valid = 0;
static void usage(const char *progname) { static void usage(const char *progname) {
printf("Usage : %s [options] XMLfiles ...\n", progname); printf("Usage : %s [options] XMLfiles ...\n", progname);
printf("\tParse the XML files using the xmlTextReader API\n"); printf("\tParse the XML files using the xmlTextReader API\n");
printf("\tand output the result of the parsing\n"); printf("\t --count: count the number of attribute and elements\n");
printf("\t --valid: validate the document\n");
exit(1); exit(1);
} }
static int elem, attrs; static int elem, attrs;
@ -55,17 +57,6 @@ static void processNode(xmlTextReaderPtr reader) {
elem++; elem++;
attrs += xmlTextReaderAttributeCount(reader); attrs += xmlTextReaderAttributeCount(reader);
} }
} else {
xmlChar *name = xmlTextReaderName(reader);
if (name != NULL) {
printf("%s : %d", name, xmlTextReaderNodeType(reader));
xmlFree(name);
} else {
printf("NULL: %d", xmlTextReaderNodeType(reader));
}
if (xmlTextReaderIsEmptyElement(reader))
printf(" empty");
printf("\n");
} }
} }
@ -80,6 +71,9 @@ static void handleFile(const char *filename) {
reader = xmlNewTextReaderFilename(filename); reader = xmlNewTextReaderFilename(filename);
if (reader != NULL) { if (reader != NULL) {
if (valid)
xmlTextReaderSetParserProp(reader, XML_PARSER_VALIDATE, 1);
/* /*
* Process all nodes in sequence * Process all nodes in sequence
*/ */
@ -118,6 +112,8 @@ int main(int argc, char **argv) {
dump++; dump++;
else if ((!strcmp(argv[i], "-count")) || (!strcmp(argv[i], "--count"))) else if ((!strcmp(argv[i], "-count")) || (!strcmp(argv[i], "--count")))
count++; count++;
else if ((!strcmp(argv[i], "-valid")) || (!strcmp(argv[i], "--valid")))
valid++;
else if ((!strcmp(argv[i], "-noent")) || else if ((!strcmp(argv[i], "-noent")) ||
(!strcmp(argv[i], "--noent"))) (!strcmp(argv[i], "--noent")))
noent++; noent++;

2
uri.c
View File

@ -540,7 +540,7 @@ xmlPrintURI(FILE *stream, xmlURIPtr uri) {
out = xmlSaveUri(uri); out = xmlSaveUri(uri);
if (out != NULL) { if (out != NULL) {
fprintf(stream, "%s", out); fprintf(stream, "%s", (char *) out);
xmlFree(out); xmlFree(out);
} }
} }

513
valid.c
View File

@ -33,8 +33,90 @@
"Unimplemented block at %s:%d\n", \ "Unimplemented block at %s:%d\n", \
__FILE__, __LINE__); __FILE__, __LINE__);
#define VERROR \
if ((ctxt != NULL) && (ctxt->error != NULL)) ctxt->error
#ifndef LIBXML_REGEXP_ENABLED #define VWARNING \
if ((ctxt != NULL) && (ctxt->warning != NULL)) ctxt->warning
#ifdef LIBXML_REGEXP_ENABLED
/*
* If regexp are enabled we can do continuous validation without the
* need of a tree to validate the content model. this is done in each
* callbacks.
* Each xmlValidState represent the validation state associated to the
* set of nodes currently open from the document root to the current element.
*/
typedef struct _xmlValidState {
xmlElementPtr elemDecl; /* pointer to the content model */
xmlNodePtr node; /* pointer to the current node */
xmlRegExecCtxtPtr exec; /* regexp runtime */
} _xmlValidState;
static int
vstateVPush(xmlValidCtxtPtr ctxt, xmlElementPtr elemDecl, xmlNodePtr node) {
if (ctxt->vstateMax == 0) {
ctxt->vstateMax = 10;
ctxt->vstateTab = (xmlValidState *) xmlMalloc(ctxt->vstateMax *
sizeof(ctxt->vstateTab[0]));
if (ctxt->vstateTab == NULL) {
VERROR(ctxt->userData, "realloc failed !n");
return(-1);
}
}
if (ctxt->vstateNr >= ctxt->vstateMax) {
ctxt->vstateMax *= 2;
ctxt->vstateTab = (xmlValidState *) xmlRealloc(ctxt->vstateTab,
ctxt->vstateMax * sizeof(ctxt->vstateTab[0]));
if (ctxt->vstateTab == NULL) {
VERROR(ctxt->userData, "realloc failed !n");
return(-1);
}
}
ctxt->vstate = &ctxt->vstateTab[ctxt->vstateNr];
ctxt->vstateTab[ctxt->vstateNr].elemDecl = elemDecl;
ctxt->vstateTab[ctxt->vstateNr].node = node;
if ((elemDecl != NULL) && (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)) {
if (elemDecl->contModel == NULL)
xmlValidBuildContentModel(ctxt, elemDecl);
if (elemDecl->contModel != NULL) {
ctxt->vstateTab[ctxt->vstateNr].exec =
xmlRegNewExecCtxt(elemDecl->contModel, NULL, NULL);
} else {
ctxt->vstateTab[ctxt->vstateNr].exec = NULL;
VERROR(ctxt->userData,
"Failed to build content model regexp for %s", node->name);
}
}
return(ctxt->vstateNr++);
}
static int
vstateVPop(xmlValidCtxtPtr ctxt) {
xmlElementPtr elemDecl;
if (ctxt->vstateNr <= 1) return(-1);
ctxt->vstateNr--;
elemDecl = ctxt->vstateTab[ctxt->vstateNr].elemDecl;
ctxt->vstateTab[ctxt->vstateNr].elemDecl = NULL;
ctxt->vstateTab[ctxt->vstateNr].node = NULL;
if ((elemDecl != NULL) && (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT)) {
xmlRegFreeExecCtxt(ctxt->vstateTab[ctxt->vstateNr].exec);
}
ctxt->vstateTab[ctxt->vstateNr].exec = NULL;
if (ctxt->vstateNr >= 1)
ctxt->vstate = &ctxt->vstateTab[ctxt->vstateNr - 1];
else
ctxt->vstate = NULL;
return(ctxt->vstateNr);
}
#else /* not LIBXML_REGEXP_ENABLED */
/* /*
* If regexp are not enabled, it uses a home made algorithm less * If regexp are not enabled, it uses a home made algorithm less
* complex and easier to * complex and easier to
@ -346,12 +428,6 @@ xmlValidStateDebug(xmlValidCtxtPtr ctxt) {
(int) (long) node->content); \ (int) (long) node->content); \
} }
#define VERROR \
if ((ctxt != NULL) && (ctxt->error != NULL)) ctxt->error
#define VWARNING \
if ((ctxt != NULL) && (ctxt->warning != NULL)) ctxt->warning
#define CHECK_DTD \ #define CHECK_DTD \
if (doc == NULL) return(0); \ if (doc == NULL) return(0); \
else if ((doc->intSubset == NULL) && \ else if ((doc->intSubset == NULL) && \
@ -2079,6 +2155,8 @@ xmlFreeID(xmlIDPtr id) {
if (id == NULL) return; if (id == NULL) return;
if (id->value != NULL) if (id->value != NULL)
xmlFree((xmlChar *) id->value); xmlFree((xmlChar *) id->value);
if (id->name != NULL)
xmlFree((xmlChar *) id->name);
xmlFree(id); xmlFree(id);
} }
@ -2138,7 +2216,17 @@ xmlAddID(xmlValidCtxtPtr ctxt, xmlDocPtr doc, const xmlChar *value,
* fill the structure. * fill the structure.
*/ */
ret->value = xmlStrdup(value); ret->value = xmlStrdup(value);
ret->attr = attr; if ((ctxt != NULL) && (ctxt->vstateNr != 0)) {
/*
* Operating in streaming mode, attr is gonna disapear
*/
ret->name = xmlStrdup(attr->name);
ret->attr = NULL;
} else {
ret->attr = attr;
ret->name = NULL;
}
ret->lineno = xmlGetLineNo(attr->parent);
if (xmlHashAddEntry(table, value, ret) < 0) { if (xmlHashAddEntry(table, value, ret) < 0) {
/* /*
@ -2292,6 +2380,13 @@ xmlGetID(xmlDocPtr doc, const xmlChar *ID) {
id = xmlHashLookup(table, ID); id = xmlHashLookup(table, ID);
if (id == NULL) if (id == NULL)
return(NULL); return(NULL);
if (id->attr == NULL) {
/*
* We are operating on a stream, return a well known reference
* since the attribute node doesn't exist anymore
*/
return((xmlAttrPtr) doc);
}
return(id->attr); return(id->attr);
} }
@ -2341,6 +2436,8 @@ xmlFreeRef(xmlLinkPtr lk) {
if (ref == NULL) return; if (ref == NULL) return;
if (ref->value != NULL) if (ref->value != NULL)
xmlFree((xmlChar *)ref->value); xmlFree((xmlChar *)ref->value);
if (ref->name != NULL)
xmlFree((xmlChar *)ref->name);
xmlFree(ref); xmlFree(ref);
} }
@ -2434,7 +2531,17 @@ xmlAddRef(xmlValidCtxtPtr ctxt ATTRIBUTE_UNUSED, xmlDocPtr doc, const xmlChar *v
* fill the structure. * fill the structure.
*/ */
ret->value = xmlStrdup(value); ret->value = xmlStrdup(value);
ret->attr = attr; if ((ctxt != NULL) && (ctxt->vstateNr != 0)) {
/*
* Operating in streaming mode, attr is gonna disapear
*/
ret->name = xmlStrdup(attr->name);
ret->attr = NULL;
} else {
ret->name = NULL;
ret->attr = attr;
}
ret->lineno = xmlGetLineNo(attr->parent);
/* To add a reference :- /* To add a reference :-
* References are maintained as a list of references, * References are maintained as a list of references,
@ -4601,7 +4708,7 @@ xmlValidateElementContent(xmlValidCtxtPtr ctxt, xmlNodePtr child,
ret = 0; ret = 0;
goto fail; goto fail;
case XML_CDATA_SECTION_NODE: case XML_CDATA_SECTION_NODE:
TODO /* TODO */
ret = 0; ret = 0;
goto fail; goto fail;
case XML_ELEMENT_NODE: case XML_ELEMENT_NODE:
@ -4896,6 +5003,311 @@ done:
return(ret); return(ret);
} }
/**
* xmlValidateCheckMixed:
* @ctxt: the validation context
* @cont: the mixed content model
* @qname: the qualified name as appearing in the serialization
*
* Check if the given node is part of the content model.
*
* Returns 1 if yes, 0 if no, -1 in case of error
*/
static int
xmlValidateCheckMixed(xmlValidCtxtPtr ctxt ATTRIBUTE_UNUSED,
xmlElementContentPtr cont, const xmlChar *qname) {
while (cont != NULL) {
if (cont->type == XML_ELEMENT_CONTENT_ELEMENT) {
if (xmlStrEqual(cont->name, qname))
return(1);
} else if ((cont->type == XML_ELEMENT_CONTENT_OR) &&
(cont->c1 != NULL) &&
(cont->c1->type == XML_ELEMENT_CONTENT_ELEMENT)){
if (xmlStrEqual(cont->c1->name, qname))
return(1);
} else if ((cont->type != XML_ELEMENT_CONTENT_OR) ||
(cont->c1 == NULL) ||
(cont->c1->type != XML_ELEMENT_CONTENT_PCDATA)){
/* Internal error !!! */
xmlGenericError(xmlGenericErrorContext,
"Internal: MIXED struct bad\n");
break;
}
cont = cont->c2;
}
return(0);
}
/**
* xmlValidGetElemDecl:
* @ctxt: the validation context
* @doc: a document instance
* @elem: an element instance
* @extsubset: pointer, (out) indicate if the declaration was found
* in the external subset.
*
* Finds a declaration associated to an element in the document.
*
* returns the pointer to the declaration or NULL if not found.
*/
static xmlElementPtr
xmlValidGetElemDecl(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
xmlNodePtr elem, int *extsubset) {
xmlElementPtr elemDecl = NULL;
const xmlChar *prefix = NULL;
if ((elem == NULL) || (elem->name == NULL)) return(NULL);
if (extsubset != NULL)
*extsubset = 0;
/*
* Fetch the declaration for the qualified name
*/
if ((elem->ns != NULL) && (elem->ns->prefix != NULL))
prefix = elem->ns->prefix;
if (prefix != NULL) {
elemDecl = xmlGetDtdQElementDesc(doc->intSubset,
elem->name, prefix);
if ((elemDecl == NULL) && (doc->extSubset != NULL)) {
elemDecl = xmlGetDtdQElementDesc(doc->extSubset,
elem->name, prefix);
if ((elemDecl != NULL) && (extsubset != NULL))
*extsubset = 1;
}
}
/*
* Fetch the declaration for the non qualified name
* This is "non-strict" validation should be done on the
* full QName but in that case being flexible makes sense.
*/
if (elemDecl == NULL) {
elemDecl = xmlGetDtdElementDesc(doc->intSubset, elem->name);
if ((elemDecl == NULL) && (doc->extSubset != NULL)) {
elemDecl = xmlGetDtdElementDesc(doc->extSubset, elem->name);
if ((elemDecl != NULL) && (extsubset != NULL))
*extsubset = 1;
}
}
if (elemDecl == NULL) {
VECTXT(ctxt, elem);
VERROR(ctxt->userData, "No declaration for element %s\n",
elem->name);
}
return(elemDecl);
}
/**
* xmlValidatePushElement:
* @ctxt: the validation context
* @doc: a document instance
* @elem: an element instance
* @qname: the qualified name as appearing in the serialization
*
* Push a new element start on the validation stack.
*
* returns 1 if no validation problem was found or 0 otherwise
*/
int
xmlValidatePushElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
xmlNodePtr elem, const xmlChar *qname) {
int ret = 1;
xmlElementPtr eDecl;
int extsubset = 0;
if ((ctxt->vstateNr > 0) && (ctxt->vstate != NULL)) {
xmlValidStatePtr state = ctxt->vstate;
xmlElementPtr elemDecl;
/*
* Check the new element agaisnt the content model of the new elem.
*/
if (state->elemDecl != NULL) {
elemDecl = state->elemDecl;
switch(elemDecl->etype) {
case XML_ELEMENT_TYPE_UNDEFINED:
ret = 0;
break;
case XML_ELEMENT_TYPE_EMPTY:
VECTXT(ctxt, state->node);
VERROR(ctxt->userData,
"Element %s was declared EMPTY this one has content\n",
state->node->name);
ret = 0;
break;
case XML_ELEMENT_TYPE_ANY:
/* I don't think anything is required then */
break;
case XML_ELEMENT_TYPE_MIXED:
/* simple case of declared as #PCDATA */
if ((elemDecl->content != NULL) &&
(elemDecl->content->type ==
XML_ELEMENT_CONTENT_PCDATA)) {
VECTXT(ctxt, state->node);
VERROR(ctxt->userData,
"Element %s was declared #PCDATA but contains non text nodes\n",
state->node->name);
ret = 0;
} else {
ret = xmlValidateCheckMixed(ctxt, elemDecl->content,
qname);
if (ret != 1) {
VECTXT(ctxt, state->node);
VERROR(ctxt->userData,
"Element %s is not declared in %s list of possible children\n",
qname, state->node->name);
}
}
break;
case XML_ELEMENT_TYPE_ELEMENT:
/*
* TODO:
* VC: Standalone Document Declaration
* - element types with element content, if white space
* occurs directly within any instance of those types.
*/
if (state->exec != NULL) {
ret = xmlRegExecPushString(state->exec, qname, NULL);
if (ret < 0) {
VECTXT(ctxt, state->node);
VERROR(ctxt->userData,
"Element %s content does not follow the DTD\nMisplaced %s\n",
state->node->name, qname);
ret = 0;
} else {
ret = 1;
}
}
break;
}
}
}
eDecl = xmlValidGetElemDecl(ctxt, doc, elem, &extsubset);
vstateVPush(ctxt, eDecl, elem);
return(ret);
}
/**
* xmlValidatePushCData:
* @ctxt: the validation context
* @data: some character data read
* @len: the lenght of the data
*
* check the CData parsed for validation in the current stack
*
* returns 1 if no validation problem was found or 0 otherwise
*/
int
xmlValidatePushCData(xmlValidCtxtPtr ctxt, const xmlChar *data, int len) {
int ret = 1;
if (len <= 0)
return(ret);
if ((ctxt->vstateNr > 0) && (ctxt->vstate != NULL)) {
xmlValidStatePtr state = ctxt->vstate;
xmlElementPtr elemDecl;
/*
* Check the new element agaisnt the content model of the new elem.
*/
if (state->elemDecl != NULL) {
elemDecl = state->elemDecl;
switch(elemDecl->etype) {
case XML_ELEMENT_TYPE_UNDEFINED:
ret = 0;
break;
case XML_ELEMENT_TYPE_EMPTY:
VECTXT(ctxt, state->node);
VERROR(ctxt->userData,
"Element %s was declared EMPTY this one has content\n",
state->node->name);
ret = 0;
break;
case XML_ELEMENT_TYPE_ANY:
break;
case XML_ELEMENT_TYPE_MIXED:
break;
case XML_ELEMENT_TYPE_ELEMENT:
if (len > 0) {
int i;
for (i = 0;i < len;i++) {
if (!IS_BLANK(data[i])) {
VECTXT(ctxt, state->node);
VERROR(ctxt->userData,
"Element %s content does not follow the DTD\nText not allowed\n",
state->node->name);
ret = 0;
goto done;
}
}
/*
* TODO:
* VC: Standalone Document Declaration
* element types with element content, if white space
* occurs directly within any instance of those types.
*/
}
break;
}
}
}
done:
return(ret);
}
/**
* xmlValidatePopElement:
* @ctxt: the validation context
* @doc: a document instance
* @elem: an element instance
* @qname: the qualified name as appearing in the serialization
*
* Pop the element end from the validation stack.
*
* returns 1 if no validation problem was found or 0 otherwise
*/
int
xmlValidatePopElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc ATTRIBUTE_UNUSED,
xmlNodePtr elem, const xmlChar *qname ATTRIBUTE_UNUSED) {
int ret = 1;
if ((ctxt->vstateNr > 0) && (ctxt->vstate != NULL)) {
xmlValidStatePtr state = ctxt->vstate;
xmlElementPtr elemDecl;
/*
* Check the new element agaisnt the content model of the new elem.
*/
if (state->elemDecl != NULL) {
elemDecl = state->elemDecl;
if (elemDecl->etype == XML_ELEMENT_TYPE_ELEMENT) {
if (state->exec != NULL) {
ret = xmlRegExecPushString(state->exec, NULL, NULL);
if (ret == 0) {
VECTXT(ctxt, state->node);
VERROR(ctxt->userData,
"Element %s content does not follow the DTD\nExpecting more child\n",
state->node->name);
} else {
/*
* previous validation errors should not generate
* a new one here
*/
ret = 1;
}
}
}
}
vstateVPop(ctxt);
}
return(ret);
}
/** /**
* xmlValidateOneElement: * xmlValidateOneElement:
* @ctxt: the validation context * @ctxt: the validation context
@ -4923,7 +5335,6 @@ xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
xmlNodePtr child; xmlNodePtr child;
int ret = 1, tmp; int ret = 1, tmp;
const xmlChar *name; const xmlChar *name;
const xmlChar *prefix = NULL;
int extsubset = 0; int extsubset = 0;
CHECK_DTD; CHECK_DTD;
@ -5002,45 +5413,19 @@ xmlValidateOneElement(xmlValidCtxtPtr ctxt, xmlDocPtr doc,
"unknown element type %d\n", elem->type); "unknown element type %d\n", elem->type);
return(0); return(0);
} }
if (elem->name == NULL) return(0);
/* /*
* Fetch the declaration for the qualified name * Fetch the declaration
*/ */
if ((elem->ns != NULL) && (elem->ns->prefix != NULL)) elemDecl = xmlValidGetElemDecl(ctxt, doc, elem, &extsubset);
prefix = elem->ns->prefix; if (elemDecl == NULL)
if (prefix != NULL) {
elemDecl = xmlGetDtdQElementDesc(doc->intSubset,
elem->name, prefix);
if ((elemDecl == NULL) && (doc->extSubset != NULL)) {
elemDecl = xmlGetDtdQElementDesc(doc->extSubset,
elem->name, prefix);
if (elemDecl != NULL)
extsubset = 1;
}
}
/*
* Fetch the declaration for the non qualified name
* This is "non-strict" validation should be done on the
* full QName but in that case being flexible makes sense.
*/
if (elemDecl == NULL) {
elemDecl = xmlGetDtdElementDesc(doc->intSubset, elem->name);
if ((elemDecl == NULL) && (doc->extSubset != NULL)) {
elemDecl = xmlGetDtdElementDesc(doc->extSubset, elem->name);
if (elemDecl != NULL)
extsubset = 1;
}
}
if (elemDecl == NULL) {
VECTXT(ctxt, elem);
VERROR(ctxt->userData, "No declaration for element %s\n",
elem->name);
return(0); return(0);
}
/*
* If vstateNr is not zero that means continuous validation is
* activated, do not try to check the content model at that level.
*/
if (ctxt->vstateNr == 0) {
/* Check that the element content matches the definition */ /* Check that the element content matches the definition */
switch (elemDecl->etype) { switch (elemDecl->etype) {
case XML_ELEMENT_TYPE_UNDEFINED: case XML_ELEMENT_TYPE_UNDEFINED:
@ -5168,6 +5553,7 @@ child_ok:
ret = tmp; ret = tmp;
break; break;
} }
} /* not continuous */
/* [ VC: Required Attribute ] */ /* [ VC: Required Attribute ] */
attr = elemDecl->attributes; attr = elemDecl->attributes;
@ -5433,15 +5819,42 @@ xmlValidateRef(xmlRefPtr ref, xmlValidCtxtPtr ctxt,
if (ref == NULL) if (ref == NULL)
return; return;
attr = ref->attr; if ((ref->attr == NULL) && (ref->name == NULL))
if (attr == NULL)
return; return;
if (attr->atype == XML_ATTRIBUTE_IDREF) { attr = ref->attr;
if (attr == NULL) {
xmlChar *dup, *str = NULL, *cur, save;
dup = xmlStrdup(name);
if (dup == NULL) {
ctxt->valid = 0;
return;
}
cur = dup;
while (*cur != 0) {
str = cur;
while ((*cur != 0) && (!IS_BLANK(*cur))) cur++;
save = *cur;
*cur = 0;
id = xmlGetID(ctxt->doc, str);
if (id == NULL) {
VERROR(ctxt->userData,
"attribute %s line %d references an unknown ID \"%s\"\n",
ref->name, ref->lineno, str);
ctxt->valid = 0;
}
if (save == 0)
break;
*cur = save;
while (IS_BLANK(*cur)) cur++;
}
xmlFree(dup);
} else if (attr->atype == XML_ATTRIBUTE_IDREF) {
id = xmlGetID(ctxt->doc, name); id = xmlGetID(ctxt->doc, name);
if (id == NULL) { if (id == NULL) {
VECTXT(ctxt, attr->parent); VECTXT(ctxt, attr->parent);
VERROR(ctxt->userData, VERROR(ctxt->userData,
"IDREF attribute %s references an unknown ID \"%s\"\n", "IDREF attribute %s references an unknown ID \"%s\"\n",
attr->name, name); attr->name, name);
ctxt->valid = 0; ctxt->valid = 0;
} }
@ -5463,7 +5876,7 @@ xmlValidateRef(xmlRefPtr ref, xmlValidCtxtPtr ctxt,
if (id == NULL) { if (id == NULL) {
VECTXT(ctxt, attr->parent); VECTXT(ctxt, attr->parent);
VERROR(ctxt->userData, VERROR(ctxt->userData,
"IDREFS attribute %s references an unknown ID \"%s\"\n", "IDREFS attribute %s references an unknown ID \"%s\"\n",
attr->name, str); attr->name, str);
ctxt->valid = 0; ctxt->valid = 0;
} }

View File

@ -743,8 +743,29 @@ static void parseAndPrintFile(char *filename) {
doc = xmlParseMemory((char *) base, info.st_size); doc = xmlParseMemory((char *) base, info.st_size);
munmap((char *) base, info.st_size); munmap((char *) base, info.st_size);
#endif #endif
} else } else if (valid) {
int ret;
xmlParserCtxtPtr ctxt;
ctxt = xmlCreateFileParserCtxt(filename);
if (ctxt == NULL) {
doc = NULL;
} else {
xmlParseDocument(ctxt);
if (ctxt->valid == 0)
progresult = 4;
ret = ctxt->wellFormed;
doc = ctxt->myDoc;
xmlFreeParserCtxt(ctxt);
if (!ret) {
xmlFreeDoc(doc);
doc = NULL;
}
}
} else {
doc = xmlParseFile(filename); doc = xmlParseFile(filename);
}
} }
/* /*

View File

@ -70,7 +70,8 @@ typedef enum {
XML_TEXTREADER_ELEMENT= 1, XML_TEXTREADER_ELEMENT= 1,
XML_TEXTREADER_END= 2, XML_TEXTREADER_END= 2,
XML_TEXTREADER_EMPTY= 3, XML_TEXTREADER_EMPTY= 3,
XML_TEXTREADER_BACKTRACK= 4 XML_TEXTREADER_BACKTRACK= 4,
XML_TEXTREADER_DONE= 5
} xmlTextReaderState; } xmlTextReaderState;
struct _xmlTextReader { struct _xmlTextReader {
@ -82,6 +83,8 @@ struct _xmlTextReader {
xmlParserInputBufferPtr input; /* the input */ xmlParserInputBufferPtr input; /* the input */
startElementSAXFunc startElement;/* initial SAX callbacks */ startElementSAXFunc startElement;/* initial SAX callbacks */
endElementSAXFunc endElement; /* idem */ endElementSAXFunc endElement; /* idem */
charactersSAXFunc characters;
cdataBlockSAXFunc cdataBlock;
unsigned int base; /* base of the segment in the input */ unsigned int base; /* base of the segment in the input */
unsigned int cur; /* current position in the input */ unsigned int cur; /* current position in the input */
xmlNodePtr node; /* current node */ xmlNodePtr node; /* current node */
@ -135,8 +138,13 @@ xmlTextReaderStartElement(void *ctx, const xmlChar *fullname,
#ifdef DEBUG_CALLBACKS #ifdef DEBUG_CALLBACKS
printf("xmlTextReaderStartElement(%s)\n", fullname); printf("xmlTextReaderStartElement(%s)\n", fullname);
#endif #endif
if ((reader != NULL) && (reader->startElement != NULL)) if ((reader != NULL) && (reader->startElement != NULL)) {
reader->startElement(ctx, fullname, atts); reader->startElement(ctx, fullname, atts);
if (ctxt->validate) {
ctxt->valid &= xmlValidatePushElement(&ctxt->vctxt, ctxt->myDoc,
ctxt->node, fullname);
}
}
reader->state = XML_TEXTREADER_ELEMENT; reader->state = XML_TEXTREADER_ELEMENT;
} }
@ -155,14 +163,74 @@ xmlTextReaderEndElement(void *ctx, const xmlChar *fullname) {
#ifdef DEBUG_CALLBACKS #ifdef DEBUG_CALLBACKS
printf("xmlTextReaderEndElement(%s)\n", fullname); printf("xmlTextReaderEndElement(%s)\n", fullname);
#endif #endif
if ((reader != NULL) && (reader->endElement != NULL)) if ((reader != NULL) && (reader->endElement != NULL)) {
xmlNodePtr node = ctxt->node;
reader->endElement(ctx, fullname); reader->endElement(ctx, fullname);
if (ctxt->validate) {
ctxt->valid &= xmlValidatePopElement(&ctxt->vctxt, ctxt->myDoc,
node, fullname);
}
}
if (reader->state == XML_TEXTREADER_ELEMENT) if (reader->state == XML_TEXTREADER_ELEMENT)
reader->state = XML_TEXTREADER_EMPTY; reader->state = XML_TEXTREADER_EMPTY;
else else
reader->state = XML_TEXTREADER_END; reader->state = XML_TEXTREADER_END;
} }
/**
* xmlTextReaderCharacters:
* @ctx: the user data (XML parser context)
* @ch: a xmlChar string
* @len: the number of xmlChar
*
* receiving some chars from the parser.
*/
static void
xmlTextReaderCharacters(void *ctx, const xmlChar *ch, int len)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlTextReaderPtr reader = ctxt->_private;
#ifdef DEBUG_CALLBACKS
printf("xmlTextReaderCharacters()\n");
#endif
if ((reader != NULL) && (reader->characters != NULL)) {
reader->characters(ctx, ch, len);
if (ctxt->validate) {
ctxt->valid &= xmlValidatePushCData(&ctxt->vctxt, ch, len);
}
}
}
/**
* xmlTextReaderCDataBlock:
* @ctx: the user data (XML parser context)
* @value: The pcdata content
* @len: the block length
*
* called when a pcdata block has been parsed
*/
static void
xmlTextReaderCDataBlock(void *ctx, const xmlChar *ch, int len)
{
xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
xmlTextReaderPtr reader = ctxt->_private;
#ifdef DEBUG_CALLBACKS
printf("xmlTextReaderCDataBlock()\n");
#endif
if ((reader != NULL) && (reader->cdataBlock != NULL)) {
reader->cdataBlock(ctx, ch, len);
if (ctxt->validate) {
ctxt->valid &= xmlValidatePushCData(&ctxt->vctxt, ch, len);
}
}
}
/** /**
* xmlTextReaderPushData: * xmlTextReaderPushData:
* @reader: the xmlTextReaderPtr used * @reader: the xmlTextReaderPtr used
@ -177,10 +245,12 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) {
unsigned int cur = reader->cur; unsigned int cur = reader->cur;
xmlBufferPtr inbuf; xmlBufferPtr inbuf;
int val; int val;
int oldstate;
if ((reader->input == NULL) || (reader->input->buffer == NULL)) if ((reader->input == NULL) || (reader->input->buffer == NULL))
return(-1); return(-1);
oldstate = reader->state;
reader->state = XML_TEXTREADER_NONE; reader->state = XML_TEXTREADER_NONE;
inbuf = reader->input->buffer; inbuf = reader->input->buffer;
while (reader->state == XML_TEXTREADER_NONE) { while (reader->state == XML_TEXTREADER_NONE) {
@ -192,6 +262,7 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) {
val = xmlParserInputBufferRead(reader->input, 4096); val = xmlParserInputBufferRead(reader->input, 4096);
if (val <= 0) { if (val <= 0) {
reader->mode = XML_TEXTREADER_MODE_EOF; reader->mode = XML_TEXTREADER_MODE_EOF;
reader->state = oldstate;
return(val); return(val);
} }
} else } else
@ -241,10 +312,14 @@ xmlTextReaderPushData(xmlTextReaderPtr reader) {
* At the end of the stream signal that the work is done to the Push * At the end of the stream signal that the work is done to the Push
* parser. * parser.
*/ */
if ((reader->mode == XML_TEXTREADER_MODE_EOF) && (cur >= inbuf->use)) { if (reader->mode == XML_TEXTREADER_MODE_EOF) {
val = xmlParseChunk(reader->ctxt, if (reader->mode != XML_TEXTREADER_DONE) {
(const char *) &inbuf->content[reader->cur], 0, 1); val = xmlParseChunk(reader->ctxt,
(const char *) &inbuf->content[reader->cur], 0, 1);
reader->mode = XML_TEXTREADER_DONE;
}
} }
reader->state = oldstate;
return(0); return(0);
} }
@ -303,19 +378,20 @@ xmlTextReaderRead(xmlTextReaderPtr reader) {
* that the parser didn't finished or that we arent at the end * that the parser didn't finished or that we arent at the end
* of stream, continue processing. * of stream, continue processing.
*/ */
while (((oldstate == XML_TEXTREADER_BACKTRACK) ||
(reader->node->children == NULL) ||
(reader->node->type == XML_ENTITY_REF_NODE) ||
(reader->node->type == XML_DTD_NODE)) &&
(reader->node->next == NULL) &&
(reader->ctxt->nodeNr == olddepth) &&
(reader->ctxt->instate != XML_PARSER_EOF)) {
val = xmlTextReaderPushData(reader);
if (val < 0)
return(-1);
if (reader->node == NULL)
return(0);
}
if (oldstate != XML_TEXTREADER_BACKTRACK) { if (oldstate != XML_TEXTREADER_BACKTRACK) {
while (((reader->node->children == NULL) ||
(reader->node->type == XML_ENTITY_REF_NODE) ||
(reader->node->type == XML_DTD_NODE)) &&
(reader->node->next == NULL) &&
(reader->ctxt->nodeNr == olddepth) &&
(reader->ctxt->instate != XML_PARSER_EOF)) {
val = xmlTextReaderPushData(reader);
if (val < 0)
return(-1);
if (reader->node == NULL)
return(0);
}
if ((reader->node->children != NULL) && if ((reader->node->children != NULL) &&
(reader->node->type != XML_ENTITY_REF_NODE) && (reader->node->type != XML_ENTITY_REF_NODE) &&
(reader->node->type != XML_DTD_NODE)) { (reader->node->type != XML_DTD_NODE)) {
@ -348,6 +424,12 @@ xmlTextReaderRead(xmlTextReaderPtr reader) {
return(1); return(1);
} }
if ((oldstate == XML_TEXTREADER_ELEMENT) &&
(reader->node->type == XML_ELEMENT_NODE)) {
reader->state = XML_TEXTREADER_END;
DUMP_READER
return(1);
}
reader->node = reader->node->parent; reader->node = reader->node->parent;
if ((reader->node == NULL) || if ((reader->node == NULL) ||
(reader->node->type == XML_DOCUMENT_NODE) || (reader->node->type == XML_DOCUMENT_NODE) ||
@ -355,6 +437,10 @@ xmlTextReaderRead(xmlTextReaderPtr reader) {
(reader->node->type == XML_DOCB_DOCUMENT_NODE) || (reader->node->type == XML_DOCB_DOCUMENT_NODE) ||
#endif #endif
(reader->node->type == XML_HTML_DOCUMENT_NODE)) { (reader->node->type == XML_HTML_DOCUMENT_NODE)) {
if (reader->mode != XML_TEXTREADER_DONE) {
val = xmlParseChunk(reader->ctxt, "", 0, 1);
reader->mode = XML_TEXTREADER_DONE;
}
reader->node = NULL; reader->node = NULL;
reader->depth = 0; reader->depth = 0;
@ -501,13 +587,14 @@ xmlTextReaderReadBinHex(xmlTextReaderPtr reader, unsigned char *array,
/** /**
* xmlNewTextReader: * xmlNewTextReader:
* @input: the xmlParserInputBufferPtr used to read data * @input: the xmlParserInputBufferPtr used to read data
* @URI: the URI information for the source if available
* *
* Create an xmlTextReader structure fed with @input * Create an xmlTextReader structure fed with @input
* *
* Returns the new xmlTextReaderPtr or NULL in case of error * Returns the new xmlTextReaderPtr or NULL in case of error
*/ */
xmlTextReaderPtr xmlTextReaderPtr
xmlNewTextReader(xmlParserInputBufferPtr input) { xmlNewTextReader(xmlParserInputBufferPtr input, const char *URI) {
xmlTextReaderPtr ret; xmlTextReaderPtr ret;
int val; int val;
@ -533,6 +620,10 @@ xmlNewTextReader(xmlParserInputBufferPtr input) {
ret->sax->startElement = xmlTextReaderStartElement; ret->sax->startElement = xmlTextReaderStartElement;
ret->endElement = ret->sax->endElement; ret->endElement = ret->sax->endElement;
ret->sax->endElement = xmlTextReaderEndElement; ret->sax->endElement = xmlTextReaderEndElement;
ret->characters = ret->sax->characters;
ret->sax->characters = xmlTextReaderCharacters;
ret->cdataBlock = ret->sax->cdataBlock;
ret->sax->cdataBlock = xmlTextReaderCDataBlock;
ret->mode = XML_TEXTREADER_MODE_INITIAL; ret->mode = XML_TEXTREADER_MODE_INITIAL;
ret->node = NULL; ret->node = NULL;
@ -540,15 +631,16 @@ xmlNewTextReader(xmlParserInputBufferPtr input) {
val = xmlParserInputBufferRead(input, 4); val = xmlParserInputBufferRead(input, 4);
if (val >= 4) { if (val >= 4) {
ret->ctxt = xmlCreatePushParserCtxt(ret->sax, NULL, ret->ctxt = xmlCreatePushParserCtxt(ret->sax, NULL,
(const char *) ret->input->buffer->content, 4, NULL); (const char *) ret->input->buffer->content, 4, URI);
ret->base = 0; ret->base = 0;
ret->cur = 4; ret->cur = 4;
} else { } else {
ret->ctxt = xmlCreatePushParserCtxt(ret->sax, NULL, NULL, 0, NULL); ret->ctxt = xmlCreatePushParserCtxt(ret->sax, NULL, NULL, 0, URI);
ret->base = 0; ret->base = 0;
ret->cur = 0; ret->cur = 0;
} }
ret->ctxt->_private = ret; ret->ctxt->_private = ret;
ret->ctxt->linenumbers = 1;
ret->allocs = XML_TEXTREADER_CTXT; ret->allocs = XML_TEXTREADER_CTXT;
return(ret); return(ret);
@ -566,16 +658,23 @@ xmlTextReaderPtr
xmlNewTextReaderFilename(const char *URI) { xmlNewTextReaderFilename(const char *URI) {
xmlParserInputBufferPtr input; xmlParserInputBufferPtr input;
xmlTextReaderPtr ret; xmlTextReaderPtr ret;
char *directory = NULL;
input = xmlParserInputBufferCreateFilename(URI, XML_CHAR_ENCODING_NONE); input = xmlParserInputBufferCreateFilename(URI, XML_CHAR_ENCODING_NONE);
if (input == NULL) if (input == NULL)
return(NULL); return(NULL);
ret = xmlNewTextReader(input); ret = xmlNewTextReader(input, URI);
if (ret == NULL) { if (ret == NULL) {
xmlFreeParserInputBuffer(input); xmlFreeParserInputBuffer(input);
return(NULL); return(NULL);
} }
ret->allocs |= XML_TEXTREADER_INPUT; ret->allocs |= XML_TEXTREADER_INPUT;
if (ret->ctxt->directory == NULL)
directory = xmlParserGetDirectory(URI);
if ((ret->ctxt->directory == NULL) && (directory != NULL))
ret->ctxt->directory = (char *) xmlStrdup((xmlChar *) directory);
if (directory != NULL)
xmlFree(directory);
return(ret); return(ret);
} }

View File

@ -2455,7 +2455,7 @@ xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
/** /**
* xmlRegExecPushString: * xmlRegExecPushString:
* @exec: a regexp execution context * @exec: a regexp execution context or NULL to indicate the end
* @value: a string token input * @value: a string token input
* @data: data associated to the token to reuse in callbacks * @data: data associated to the token to reuse in callbacks
* *