From aeb258a9ca9206070e912d8a3ce201777333748e Mon Sep 17 00:00:00 2001 From: Daniel Veillard Date: Fri, 13 Sep 2002 14:48:12 +0000 Subject: [PATCH] cosmetic cleanup started integrating a DTD validation layer based on the * hash.c: cosmetic cleanup * valid.c include/libxml/tree.h include/libxml/valid.h: started integrating a DTD validation layer based on the regexps Daniel --- ChangeLog | 6 ++ hash.c | 4 +- include/libxml/tree.h | 14 ++++ include/libxml/valid.h | 10 +++ valid.c | 179 +++++++++++++++++++++++++++++++++++++++++ 5 files changed, 211 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 14d0bc1f..683ab2d7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +Fri Sep 13 16:46:14 CEST 2002 Daniel Veillard + + * hash.c: cosmetic cleanup + * valid.c include/libxml/tree.h include/libxml/valid.h: started + integrating a DTD validation layer based on the regexps + Thu Sep 12 18:01:29 CEST 2002 Daniel Veillard * xmlregexp.c xmlschemas.c: fixed a bug reported by Jeff Goff, diff --git a/hash.c b/hash.c index 28bf014f..dbf634e2 100644 --- a/hash.c +++ b/hash.c @@ -552,8 +552,8 @@ typedef struct { static void stubHashScannerFull (void *payload, void *data, const xmlChar *name, - const xmlChar *name2, const xmlChar *name3 -) { + const xmlChar *name2 ATTRIBUTE_UNUSED, + const xmlChar *name3 ATTRIBUTE_UNUSED) { stubData *stubdata = (stubData *) data; stubdata->hashscanner (payload, stubdata->data, (xmlChar *) name); } diff --git a/include/libxml/tree.h b/include/libxml/tree.h index 19c437f4..efd902a5 100644 --- a/include/libxml/tree.h +++ b/include/libxml/tree.h @@ -262,6 +262,15 @@ typedef enum { XML_ELEMENT_TYPE_ELEMENT } xmlElementTypeVal; + +#ifdef __cplusplus +} +#endif +#include +#ifdef __cplusplus +extern "C" { +#endif + /** * xmlElement: * @@ -285,6 +294,11 @@ struct _xmlElement { xmlElementContentPtr content; /* the allowed element content */ xmlAttributePtr attributes; /* List of the declared attributes */ const xmlChar *prefix; /* the namespace prefix if any */ +#ifdef LIBXML_REGEXP_ENABLED + xmlRegexpPtr contModel; /* the validating regexp */ +#else + void *contModel; +#endif }; diff --git a/include/libxml/valid.h b/include/libxml/valid.h index 2aedb836..3d2f5087 100644 --- a/include/libxml/valid.h +++ b/include/libxml/valid.h @@ -12,6 +12,8 @@ #include #include +#include +#include #ifdef __cplusplus extern "C" { @@ -75,6 +77,14 @@ struct _xmlValidCtxt { int vstateNr; /* Depth of the validation stack */ int vstateMax; /* Max depth of the validation stack */ xmlValidState *vstateTab; /* array of validation states */ + +#ifdef LIBXML_REGEXP_ENABLED + xmlAutomataPtr am; /* the automata */ + xmlAutomataStatePtr state; /* used to build the automata */ +#else + void *am; + void *state; +#endif }; /* diff --git a/valid.c b/valid.c index ca0d29d6..4578e144 100644 --- a/valid.c +++ b/valid.c @@ -350,6 +350,185 @@ static xmlElementPtr xmlGetDtdElementDesc2(xmlDtdPtr dtd, const xmlChar *name, int create); xmlAttributePtr xmlScanAttributeDecl(xmlDtdPtr dtd, const xmlChar *elem); +#ifdef LIBXML_REGEXP_ENABLED + +/************************************************************************ + * * + * Content model validation based on the regexps * + * * + ************************************************************************/ + +/** + * xmlValidBuildAContentModel: + * @content: the content model + * @ctxt: the schema parser context + * @name: the element name whose content is being built + * + * Generate the automata sequence needed for that type + * + * Returns 0 if successful or -1 in case of error. + */ +static int +xmlValidBuildAContentModel(xmlElementContentPtr content, + xmlValidCtxtPtr ctxt, + const xmlChar *name) { + if (content == NULL) { + VERROR(ctxt->userData, + "Found unexpected type = NULL in %s content model\n", name); + return(-1); + } + switch (content->type) { + case XML_ELEMENT_CONTENT_PCDATA: + VERROR(ctxt->userData, "ContentModel found PCDATA for element %s\n", + name); + return(-1); + break; + case XML_ELEMENT_CONTENT_ELEMENT: { + xmlAutomataStatePtr oldstate = ctxt->state; + switch (content->ocur) { + case XML_ELEMENT_CONTENT_ONCE: + ctxt->state = xmlAutomataNewTransition(ctxt->am, + ctxt->state, NULL, content->name, NULL); + break; + case XML_ELEMENT_CONTENT_OPT: + ctxt->state = xmlAutomataNewTransition(ctxt->am, + ctxt->state, NULL, content->name, NULL); + xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state); + break; + case XML_ELEMENT_CONTENT_PLUS: + ctxt->state = xmlAutomataNewTransition(ctxt->am, + ctxt->state, NULL, content->name, NULL); + xmlAutomataNewTransition(ctxt->am, ctxt->state, + ctxt->state, content->name, NULL); + break; + case XML_ELEMENT_CONTENT_MULT: + xmlAutomataNewTransition(ctxt->am, ctxt->state, + ctxt->state, content->name, NULL); + break; + } + break; + } + case XML_ELEMENT_CONTENT_SEQ: { + xmlAutomataStatePtr oldstate; + xmlElementContentOccur ocur; + + /* + * Simply iterate over the content + */ + oldstate = ctxt->state; + ocur = content->ocur; + while (content->type == XML_ELEMENT_CONTENT_SEQ) { + xmlValidBuildAContentModel(content->c1, ctxt, name); + content = content->c2; + } + xmlValidBuildAContentModel(content->c2, ctxt, name); + switch (ocur) { + case XML_ELEMENT_CONTENT_ONCE: + break; + case XML_ELEMENT_CONTENT_OPT: + xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state); + break; + case XML_ELEMENT_CONTENT_MULT: + xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state); + xmlAutomataNewEpsilon(ctxt->am, ctxt->state, oldstate); + break; + case XML_ELEMENT_CONTENT_PLUS: + xmlAutomataNewEpsilon(ctxt->am, ctxt->state, oldstate); + break; + } + break; + } + case XML_ELEMENT_CONTENT_OR: { + xmlAutomataStatePtr start, end; + xmlElementContentOccur ocur; + + start = ctxt->state; + end = xmlAutomataNewState(ctxt->am); + ocur = content->ocur; + + /* + * iterate over the subtypes and remerge the end with an + * epsilon transition + */ + while (content->type == XML_ELEMENT_CONTENT_OR) { + ctxt->state = start; + xmlValidBuildAContentModel(content->c1, ctxt, name); + xmlAutomataNewEpsilon(ctxt->am, ctxt->state, end); + content = content->c2; + } + ctxt->state = start; + xmlValidBuildAContentModel(content->c1, ctxt, name); + xmlAutomataNewEpsilon(ctxt->am, ctxt->state, end); + ctxt->state = end; + switch (ocur) { + case XML_ELEMENT_CONTENT_ONCE: + break; + case XML_ELEMENT_CONTENT_OPT: + xmlAutomataNewEpsilon(ctxt->am, start, ctxt->state); + break; + case XML_ELEMENT_CONTENT_MULT: + xmlAutomataNewEpsilon(ctxt->am, start, ctxt->state); + xmlAutomataNewEpsilon(ctxt->am, ctxt->state, start); + break; + case XML_ELEMENT_CONTENT_PLUS: + xmlAutomataNewEpsilon(ctxt->am, ctxt->state, start); + break; + } + break; + } + default: + VERROR(ctxt->userData, "ContentModel broken for element %s\n", + name); + return(-1); + } + return(0); +} +/** + * xmlValidBuildContentModel: + * @ctxt: a validation context + * @elem: an element declaration node + * + * (Re)Build the automata associated to the content model of this + * element + * + * Returns 0 in case of success, -1 in case of error + */ +int +xmlValidBuildContentModel(xmlValidCtxtPtr ctxt, xmlElementPtr elem) { + xmlAutomataStatePtr start; + + if ((ctxt == NULL) || (elem == NULL)) + return(-1); + if (elem->type != XML_ELEMENT_DECL) + return(-1); + if (elem->etype != XML_ELEMENT_TYPE_ELEMENT) + return(0); + /* TODO: should we rebuild in this case ? */ + if (elem->contModel != NULL) + return(0); + + ctxt->am = xmlNewAutomata(); + if (ctxt->am == NULL) { + VERROR(ctxt->userData, "Cannot create automata for element %s\n", + elem->name); + return(-1); + } + start = ctxt->state = xmlAutomataGetInitState(ctxt->am); + xmlValidBuildAContentModel(elem->content, ctxt, elem->name); + xmlAutomataSetFinalState(ctxt->am, ctxt->state); + if (!xmlAutomataIsDeterminist(ctxt->am)) { + VERROR(ctxt->userData, "Content model of %s is not determinist:\n", + elem->name); + ctxt->valid = 0; + } + ctxt->state = NULL; + xmlFreeAutomata(ctxt->am); + ctxt->am = NULL; + return(0); +} + +#endif /* LIBXML_REGEXP_ENABLED */ + /************************************************************************ * * * QName handling helper *