From aeb258a9ca9206070e912d8a3ce201777333748e Mon Sep 17 00:00:00 2001
From: Daniel Veillard <veillard@src.gnome.org>
Date: Fri, 13 Sep 2002 14:48:12 +0000
Subject: [PATCH] cosmetic cleanup started integrating a DTD validation layer
 based on the

* hash.c: cosmetic cleanup
* valid.c include/libxml/tree.h include/libxml/valid.h: started
  integrating a DTD validation layer based on the regexps
Daniel
---
 ChangeLog              |   6 ++
 hash.c                 |   4 +-
 include/libxml/tree.h  |  14 ++++
 include/libxml/valid.h |  10 +++
 valid.c                | 179 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 211 insertions(+), 2 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 14d0bc1f..683ab2d7 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+Fri Sep 13 16:46:14 CEST 2002 Daniel Veillard <daniel@veillard.com>
+
+	* hash.c: cosmetic cleanup
+	* valid.c include/libxml/tree.h include/libxml/valid.h: started 
+	  integrating a DTD validation layer based on the regexps
+
 Thu Sep 12 18:01:29 CEST 2002 Daniel Veillard <daniel@veillard.com>
 
 	* xmlregexp.c xmlschemas.c: fixed a bug reported by Jeff Goff,
diff --git a/hash.c b/hash.c
index 28bf014f..dbf634e2 100644
--- a/hash.c
+++ b/hash.c
@@ -552,8 +552,8 @@ typedef struct {
 
 static void 
 stubHashScannerFull (void *payload, void *data, const xmlChar *name, 
-    const xmlChar *name2, const xmlChar *name3
-) {
+                     const xmlChar *name2 ATTRIBUTE_UNUSED,
+		     const xmlChar *name3 ATTRIBUTE_UNUSED) {
     stubData *stubdata = (stubData *) data;
     stubdata->hashscanner (payload, stubdata->data, (xmlChar *) name);
 }                                  
diff --git a/include/libxml/tree.h b/include/libxml/tree.h
index 19c437f4..efd902a5 100644
--- a/include/libxml/tree.h
+++ b/include/libxml/tree.h
@@ -262,6 +262,15 @@ typedef enum {
     XML_ELEMENT_TYPE_ELEMENT
 } xmlElementTypeVal;
 
+
+#ifdef __cplusplus
+}
+#endif
+#include <libxml/xmlregexp.h>
+#ifdef __cplusplus
+extern "C" {
+#endif
+
 /**
  * xmlElement:
  *
@@ -285,6 +294,11 @@ struct _xmlElement {
     xmlElementContentPtr content;	/* the allowed element content */
     xmlAttributePtr   attributes;	/* List of the declared attributes */
     const xmlChar        *prefix;	/* the namespace prefix if any */
+#ifdef LIBXML_REGEXP_ENABLED
+    xmlRegexpPtr       contModel;	/* the validating regexp */
+#else
+    void	      *contModel;
+#endif
 };
 
 
diff --git a/include/libxml/valid.h b/include/libxml/valid.h
index 2aedb836..3d2f5087 100644
--- a/include/libxml/valid.h
+++ b/include/libxml/valid.h
@@ -12,6 +12,8 @@
 
 #include <libxml/tree.h>
 #include <libxml/list.h>
+#include <libxml/xmlautomata.h>
+#include <libxml/xmlregexp.h>
 
 #ifdef __cplusplus
 extern "C" {
@@ -75,6 +77,14 @@ struct _xmlValidCtxt {
     int                vstateNr;      /* Depth of the validation stack */
     int                vstateMax;     /* Max depth of the validation stack */
     xmlValidState     *vstateTab;     /* array of validation states */
+
+#ifdef LIBXML_REGEXP_ENABLED
+    xmlAutomataPtr            am;     /* the automata */
+    xmlAutomataStatePtr    state;     /* used to build the automata */
+#else
+    void                     *am;
+    void                  *state;
+#endif
 };
 
 /*
diff --git a/valid.c b/valid.c
index ca0d29d6..4578e144 100644
--- a/valid.c
+++ b/valid.c
@@ -350,6 +350,185 @@ static xmlElementPtr xmlGetDtdElementDesc2(xmlDtdPtr dtd, const xmlChar *name,
 	                           int create);
 xmlAttributePtr xmlScanAttributeDecl(xmlDtdPtr dtd, const xmlChar *elem);
 
+#ifdef LIBXML_REGEXP_ENABLED
+
+/************************************************************************
+ *									*
+ *		Content model validation based on the regexps		*
+ *									*
+ ************************************************************************/
+
+/**
+ * xmlValidBuildAContentModel:
+ * @content:  the content model
+ * @ctxt:  the schema parser context
+ * @name:  the element name whose content is being built
+ *
+ * Generate the automata sequence needed for that type
+ *
+ * Returns 0 if successful or -1 in case of error.
+ */
+static int
+xmlValidBuildAContentModel(xmlElementContentPtr content,
+		           xmlValidCtxtPtr ctxt,
+		           const xmlChar *name) {
+    if (content == NULL) {
+	VERROR(ctxt->userData,
+	       "Found unexpected type = NULL in %s content model\n", name);
+	return(-1);
+    }
+    switch (content->type) {
+	case XML_ELEMENT_CONTENT_PCDATA:
+	    VERROR(ctxt->userData, "ContentModel found PCDATA for element %s\n",
+		   name);
+	    return(-1);
+	    break;
+	case XML_ELEMENT_CONTENT_ELEMENT: {
+	    xmlAutomataStatePtr oldstate = ctxt->state;
+	    switch (content->ocur) {
+		case XML_ELEMENT_CONTENT_ONCE:
+		    ctxt->state = xmlAutomataNewTransition(ctxt->am,
+			    ctxt->state, NULL, content->name, NULL);
+		    break;
+		case XML_ELEMENT_CONTENT_OPT:
+		    ctxt->state = xmlAutomataNewTransition(ctxt->am,
+			    ctxt->state, NULL, content->name, NULL);
+		    xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state);
+		    break;
+		case XML_ELEMENT_CONTENT_PLUS:
+		    ctxt->state = xmlAutomataNewTransition(ctxt->am,
+			    ctxt->state, NULL, content->name, NULL);
+		    xmlAutomataNewTransition(ctxt->am, ctxt->state,
+			                     ctxt->state, content->name, NULL);
+		    break;
+		case XML_ELEMENT_CONTENT_MULT:
+		    xmlAutomataNewTransition(ctxt->am, ctxt->state,
+			                     ctxt->state, content->name, NULL);
+		    break;
+	    }
+	    break;
+	}
+	case XML_ELEMENT_CONTENT_SEQ: {
+	    xmlAutomataStatePtr oldstate;
+	    xmlElementContentOccur ocur;
+
+	    /*
+	     * Simply iterate over the content
+	     */
+	    oldstate = ctxt->state;
+	    ocur = content->ocur;
+	    while (content->type == XML_ELEMENT_CONTENT_SEQ) {
+		xmlValidBuildAContentModel(content->c1, ctxt, name);
+		content = content->c2;
+	    }
+	    xmlValidBuildAContentModel(content->c2, ctxt, name);
+	    switch (ocur) {
+		case XML_ELEMENT_CONTENT_ONCE:
+		    break;
+		case XML_ELEMENT_CONTENT_OPT:
+		    xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state);
+		    break;
+		case XML_ELEMENT_CONTENT_MULT:
+		    xmlAutomataNewEpsilon(ctxt->am, oldstate, ctxt->state);
+		    xmlAutomataNewEpsilon(ctxt->am, ctxt->state, oldstate);
+		    break;
+		case XML_ELEMENT_CONTENT_PLUS:
+		    xmlAutomataNewEpsilon(ctxt->am, ctxt->state, oldstate);
+		    break;
+	    }
+	    break;
+	}
+	case XML_ELEMENT_CONTENT_OR: {
+	    xmlAutomataStatePtr start, end;
+	    xmlElementContentOccur ocur;
+
+	    start = ctxt->state;
+	    end = xmlAutomataNewState(ctxt->am);
+	    ocur = content->ocur;
+
+	    /*
+	     * iterate over the subtypes and remerge the end with an
+	     * epsilon transition
+	     */
+	    while (content->type == XML_ELEMENT_CONTENT_OR) {
+		ctxt->state = start;
+		xmlValidBuildAContentModel(content->c1, ctxt, name);
+		xmlAutomataNewEpsilon(ctxt->am, ctxt->state, end);
+		content = content->c2;
+	    }
+	    ctxt->state = start;
+	    xmlValidBuildAContentModel(content->c1, ctxt, name);
+	    xmlAutomataNewEpsilon(ctxt->am, ctxt->state, end);
+	    ctxt->state = end;
+	    switch (ocur) {
+		case XML_ELEMENT_CONTENT_ONCE:
+		    break;
+		case XML_ELEMENT_CONTENT_OPT:
+		    xmlAutomataNewEpsilon(ctxt->am, start, ctxt->state);
+		    break;
+		case XML_ELEMENT_CONTENT_MULT:
+		    xmlAutomataNewEpsilon(ctxt->am, start, ctxt->state);
+		    xmlAutomataNewEpsilon(ctxt->am, ctxt->state, start);
+		    break;
+		case XML_ELEMENT_CONTENT_PLUS:
+		    xmlAutomataNewEpsilon(ctxt->am, ctxt->state, start);
+		    break;
+	    }
+	    break;
+	}
+	default:
+	    VERROR(ctxt->userData, "ContentModel broken for element %s\n",
+		   name);
+	    return(-1);
+    }
+    return(0);
+}
+/**
+ * xmlValidBuildContentModel:
+ * @ctxt:  a validation context
+ * @elem:  an element declaration node
+ *
+ * (Re)Build the automata associated to the content model of this
+ * element
+ *
+ * Returns 0 in case of success, -1 in case of error
+ */
+int
+xmlValidBuildContentModel(xmlValidCtxtPtr ctxt, xmlElementPtr elem) {
+    xmlAutomataStatePtr start;
+
+    if ((ctxt == NULL) || (elem == NULL))
+	return(-1);
+    if (elem->type != XML_ELEMENT_DECL)
+	return(-1);
+    if (elem->etype != XML_ELEMENT_TYPE_ELEMENT)
+	return(0);
+    /* TODO: should we rebuild in this case ? */
+    if (elem->contModel != NULL)
+	return(0);
+
+    ctxt->am = xmlNewAutomata();
+    if (ctxt->am == NULL) {
+	VERROR(ctxt->userData, "Cannot create automata for element %s\n",
+	       elem->name);
+	return(-1);
+    }
+    start = ctxt->state = xmlAutomataGetInitState(ctxt->am);
+    xmlValidBuildAContentModel(elem->content, ctxt, elem->name);
+    xmlAutomataSetFinalState(ctxt->am, ctxt->state);
+    if (!xmlAutomataIsDeterminist(ctxt->am)) {
+	VERROR(ctxt->userData, "Content model of %s is not determinist:\n",
+	       elem->name);
+        ctxt->valid = 0;
+    }
+    ctxt->state = NULL;
+    xmlFreeAutomata(ctxt->am);
+    ctxt->am = NULL;
+    return(0);
+}
+
+#endif /* LIBXML_REGEXP_ENABLED */
+
 /************************************************************************
  *									*
  *			QName handling helper				*