This is the 2.0.0-beta, lots and lots and lots of changes

Have a look at http://xmlsoft.org/upgrade.html Daniel
2025-07-28 00:21:53 +03:00 · 2000-03-14 18:30:20 +00:00
parent 76234da152
commit cf46199c5e
91 changed files with 9978 additions and 5547 deletions
--- a/8
+++ b/8
@ -1,3 +1,11 @@
+Tue Mar 14 19:11:29 CET 2000 Daniel Veillard <Daniel.Veillard@w3.org>
+
+	* all: tagged LIB_XML_1_X
+	* *.c *.h : updated from W3C CVS tree
+	* configure.in : 2.0.0-beta
+	* libxml.spec.in : libxml2 package nam
+	* result/* : new version of the tests output
+
 Mon Mar  6 09:34:52 CET 2000 Daniel Veillard <Daniel.Veillard@w3.org>

 	* doc/xml.html, doc/update.html: updated docs, 1.8.7
--- a/HTMLparser.c
+++ b/HTMLparser.c
@ -121,36 +121,81 @@ PUSH_AND_POP(extern, xmlChar*, name)
 *   COPY(to) copy one char to *to, increment CUR_PTR and to accordingly
 */

-#define CUR (*ctxt->input->cur)
+#define CUR ((int) (*ctxt->input->cur))
+    
 #define UPPER (toupper(*ctxt->input->cur))
+
 #define SKIP(val) ctxt->nbChars += (val),ctxt->input->cur += (val)
+
 #define NXT(val) ctxt->input->cur[(val)]
+
 #define UPP(val) (toupper(ctxt->input->cur[(val)]))
+
 #define CUR_PTR ctxt->input->cur
+
 #define SHRINK  xmlParserInputShrink(ctxt->input)
+
 #define GROW  xmlParserInputGrow(ctxt->input, INPUT_CHUNK)

-#define SKIP_BLANKS 							\
-    while (IS_BLANK(*(ctxt->input->cur))) NEXT
+#define CURRENT ((int) (*ctxt->input->cur))

-#ifndef USE_UTF_8
-#define CURRENT (*ctxt->input->cur)
-#define NEXT {								\
-    if ((*ctxt->input->cur == 0) &&					\
-        (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {		\
-	    xmlPopInput(ctxt);						\
-    } else {								\
-        if (*(ctxt->input->cur) == '\n') {				\
-	    ctxt->input->line++; ctxt->input->col = 1;			\
-	} else ctxt->input->col++;					\
-	ctxt->input->cur++;						\
-	ctxt->nbChars++;						\
-        if (*ctxt->input->cur == 0)					\
-	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);		\
-    }}
+#define NEXT htmlNextChar(ctxt);

-#else
-#endif
+#define SKIP_BLANKS htmlSkipBlankChars(ctxt);
+
+/**
+ * htmlNextChar:
+ * @ctxt:  the HTML parser context
+ *
+ * Skip to the next char input char.
+ */
+
+void
+htmlNextChar(htmlParserCtxtPtr ctxt) {
+    if ((*ctxt->input->cur == 0) &&
+        (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
+	    xmlPopInput(ctxt);
+    } else {
+        if (*(ctxt->input->cur) == '\n') {
+	    ctxt->input->line++; ctxt->input->col = 1;
+	} else ctxt->input->col++;
+	ctxt->input->cur++;
+	ctxt->nbChars++;
+        if (*ctxt->input->cur == 0)
+	    xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+    }
+}
+
+/**
+ * htmlSkipBlankChars:
+ * @ctxt:  the HTML parser context
+ *
+ * skip all blanks character found at that point in the input streams.
+ *
+ * Returns the number of space chars skipped
+ */
+
+int
+htmlSkipBlankChars(xmlParserCtxtPtr ctxt) {
+    int res = 0;
+
+    while (IS_BLANK(*(ctxt->input->cur))) {
+	if ((*ctxt->input->cur == 0) &&
+	    (xmlParserInputGrow(ctxt->input, INPUT_CHUNK) <= 0)) {
+		xmlPopInput(ctxt);
+	} else {
+	    if (*(ctxt->input->cur) == '\n') {
+		ctxt->input->line++; ctxt->input->col = 1;
+	    } else ctxt->input->col++;
+	    ctxt->input->cur++;
+	    ctxt->nbChars++;
+	    if (*ctxt->input->cur == 0)
+		xmlParserInputGrow(ctxt->input, INPUT_CHUNK);
+	}
+	res++;
+    }
+    return(res);
+}



@ -475,7 +520,7 @@ htmlAutoCloseTag(htmlDocPtr doc, const xmlChar *name, htmlNodePtr elem) {
    if (elem == NULL) return(1);
    if (!xmlStrcmp(name, elem->name)) return(0);
    if (htmlCheckAutoClose(elem->name, name)) return(1);
-    child = elem->childs;
+    child = elem->children;
    while (child != NULL) {
        if (htmlAutoCloseTag(doc, name, child)) return(1);
 	child = child->next;
@ -499,7 +544,7 @@ htmlIsAutoClosed(htmlDocPtr doc, htmlNodePtr elem) {
    htmlNodePtr child;

    if (elem == NULL) return(1);
-    child = elem->childs;
+    child = elem->children;
    while (child != NULL) {
 	if (htmlAutoCloseTag(doc, elem->name, child)) return(1);
 	child = child->next;
@ -1275,7 +1320,7 @@ htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
    else
 	xmlCreateIntSubset(cur, BAD_CAST "HTML", ExternalID, URI);
    cur->name = NULL;
-    cur->root = NULL; 
+    cur->children = NULL; 
    cur->extSubset = NULL;
    cur->oldNs = NULL;
    cur->encoding = NULL;
@ -1285,7 +1330,6 @@ htmlNewDoc(const xmlChar *URI, const xmlChar *ExternalID) {
    cur->refs = NULL;
 #ifndef XML_WITHOUT_CORBA
    cur->_private = NULL;
-    cur->vepv = NULL;
 #endif
    return(cur);
 }
@ -1667,7 +1711,8 @@ htmlParseSystemLiteral(htmlParserCtxtPtr ctxt) {
        }
    } else {
 	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-	    ctxt->sax->error(ctxt->userData, "SystemLiteral \" or ' expected\n");
+	    ctxt->sax->error(ctxt->userData,
+	                     "SystemLiteral \" or ' expected\n");
 	ctxt->wellFormed = 0;
    }
    
--- a/HTMLtree.c
+++ b/HTMLtree.c
@ -80,7 +80,7 @@ htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
    }
    xmlBufferWriteChar(buf, " ");
    xmlBufferWriteCHAR(buf, cur->name);
-    value = xmlNodeListGetString(doc, cur->val, 0);
+    value = xmlNodeListGetString(doc, cur->children, 0);
    if (value) {
 	xmlBufferWriteChar(buf, "=");
 	xmlBufferWriteQuotedString(buf, value);
@ -212,7 +212,7 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
 	}
 	return;
    }
-    if ((cur->content == NULL) && (cur->childs == NULL)) {
+    if ((cur->content == NULL) && (cur->children == NULL)) {
        if ((info != NULL) && (info->endTag != 0))
 	    xmlBufferWriteChar(buf, ">");
 	else {
@ -242,15 +242,15 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
 	    xmlFree(buffer);
 	}
    }
-    if (cur->childs != NULL) {
-        if ((cur->childs->type != HTML_TEXT_NODE) &&
-	    (cur->childs->type != HTML_ENTITY_REF_NODE) &&
-	    (cur->childs != cur->last))
+    if (cur->children != NULL) {
+        if ((cur->children->type != HTML_TEXT_NODE) &&
+	    (cur->children->type != HTML_ENTITY_REF_NODE) &&
+	    (cur->children != cur->last))
 	    xmlBufferWriteChar(buf, "\n");
-	htmlNodeListDump(buf, doc, cur->childs);
+	htmlNodeListDump(buf, doc, cur->children);
        if ((cur->last->type != HTML_TEXT_NODE) &&
 	    (cur->last->type != HTML_ENTITY_REF_NODE) &&
-	    (cur->childs != cur->last))
+	    (cur->children != cur->last))
 	    xmlBufferWriteChar(buf, "\n");
    }
    if (!htmlIsAutoClosed(doc, cur)) {
@ -307,8 +307,8 @@ htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur) {
 	xmlBufferWriteChar(buf, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\" \"http://www.w3.org/TR/REC-html40/loose.dtd\">");

    }
-    if (cur->root != NULL) {
-        htmlNodeListDump(buf, cur, cur->root);
+    if (cur->children != NULL) {
+        htmlNodeListDump(buf, cur, cur->children);
    }
    xmlBufferWriteChar(buf, "\n");
    cur->type = type;
--- a/SAX.c
+++ b/SAX.c
@ -158,66 +158,112 @@ internalSubset(void *ctx, const xmlChar *name,
            name, ExternalID, SystemID);
 #endif
    xmlCreateIntSubset(ctxt->myDoc, name, ExternalID, SystemID);
+}
+
+/**
+ * externalSubset:
+ * @ctx: the user data (XML parser context)
+ *
+ * Callback on external subset declaration.
+ */
+void
+externalSubset(void *ctx, const xmlChar *name,
+	       const xmlChar *ExternalID, const xmlChar *SystemID)
+{
+    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
+#ifdef DEBUG_SAX
+    fprintf(stderr, "SAX.externalSubset(%s, %s, %s)\n",
+            name, ExternalID, SystemID);
+#endif
    if (((ExternalID != NULL) || (SystemID != NULL)) &&
        (ctxt->validate && ctxt->wellFormed && ctxt->myDoc)) {
 	/*
 	 * Try to fetch and parse the external subset.
 	 */
-	xmlDtdPtr ret = NULL;
-	xmlParserCtxtPtr dtdCtxt;
+	xmlParserInputPtr oldinput;
+	int oldinputNr;
+	int oldinputMax;
+	xmlParserInputPtr *oldinputTab;
+	int oldwellFormed;
 	xmlParserInputPtr input = NULL;
 	xmlCharEncoding enc;

-	dtdCtxt = xmlNewParserCtxt();
-	if (dtdCtxt == NULL) return;
-
 	/*
 	 * Ask the Entity resolver to load the damn thing
 	 */
-	if ((ctxt->directory != NULL) && (dtdCtxt->directory == NULL))
-	    dtdCtxt->directory = (char *) xmlStrdup(BAD_CAST ctxt->directory);
-
-	if ((dtdCtxt->sax != NULL) && (dtdCtxt->sax->resolveEntity != NULL))
-	    input = dtdCtxt->sax->resolveEntity(dtdCtxt->userData, ExternalID,
+	if ((ctxt->sax != NULL) && (ctxt->sax->resolveEntity != NULL))
+	    input = ctxt->sax->resolveEntity(ctxt->userData, ExternalID,
 	                                        SystemID);
 	if (input == NULL) {
-	    xmlFreeParserCtxt(dtdCtxt);
 	    return;
 	}

+	xmlNewDtd(ctxt->myDoc, name, ExternalID, SystemID);
+
 	/*
-	 * plug some encoding conversion routines here. !!!
+	 * make sure we won't destroy the main document context
 	 */
-	xmlPushInput(dtdCtxt, input);
-	enc = xmlDetectCharEncoding(dtdCtxt->input->cur);
-	xmlSwitchEncoding(dtdCtxt, enc);
+	oldinput = ctxt->input;
+	oldinputNr = ctxt->inputNr;
+	oldinputMax = ctxt->inputMax;
+	oldinputTab = ctxt->inputTab;
+	oldwellFormed = ctxt->wellFormed;
+
+	ctxt->inputTab = (xmlParserInputPtr *)
+	                 xmlMalloc(5 * sizeof(xmlParserInputPtr));
+	if (ctxt->inputTab == NULL) {
+	    ctxt->errNo = XML_ERR_NO_MEMORY;
+	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		ctxt->sax->error(ctxt->userData, 
+		     "externalSubset: out of memory\n");
+	    ctxt->errNo = XML_ERR_NO_MEMORY;
+	    ctxt->input = oldinput;
+	    ctxt->inputNr = oldinputNr;
+	    ctxt->inputMax = oldinputMax;
+	    ctxt->inputTab = oldinputTab;
+	    return;
+	}
+	ctxt->inputNr = 0;
+	ctxt->inputMax = 5;
+	ctxt->input = NULL;
+	xmlPushInput(ctxt, input);
+
+	/*
+	 * On the fly encoding conversion if needed
+	 */
+	enc = xmlDetectCharEncoding(ctxt->input->cur, 4);
+	xmlSwitchEncoding(ctxt, enc);

 	if (input->filename == NULL)
 	    input->filename = (char *) xmlStrdup(SystemID);
 	input->line = 1;
 	input->col = 1;
-	input->base = dtdCtxt->input->cur;
-	input->cur = dtdCtxt->input->cur;
+	input->base = ctxt->input->cur;
+	input->cur = ctxt->input->cur;
 	input->free = NULL;

 	/*
 	 * let's parse that entity knowing it's an external subset.
 	 */
-	xmlParseExternalSubset(dtdCtxt, ExternalID, SystemID);
+	xmlParseExternalSubset(ctxt, ExternalID, SystemID);

-	if (dtdCtxt->myDoc != NULL) {
-	    if (dtdCtxt->wellFormed) {
-		ret = dtdCtxt->myDoc->intSubset;
-		dtdCtxt->myDoc->intSubset = NULL;
-	    } else {
-		ret = NULL;
-	    }
-	    xmlFreeDoc(dtdCtxt->myDoc);
-	    dtdCtxt->myDoc = NULL;
-	}
-	xmlFreeParserCtxt(dtdCtxt);
+        /*
+	 * Free up the external entities
+	 */

-	ctxt->myDoc->extSubset = ret;
+	while (ctxt->inputNr > 1)
+	    xmlPopInput(ctxt);
+	xmlFreeInputStream(ctxt->input);
+        xmlFree(ctxt->inputTab);
+
+	/*
+	 * Restore the parsing context of the main entity
+	 */
+	ctxt->input = oldinput;
+	ctxt->inputNr = oldinputNr;
+	ctxt->inputMax = oldinputMax;
+	ctxt->inputTab = oldinputTab;
+	/* ctxt->wellFormed = oldwellFormed; */
    }
 }

@ -316,13 +362,23 @@ entityDecl(void *ctx, const xmlChar *name, int type,
    fprintf(stderr, "SAX.entityDecl(%s, %d, %s, %s, %s)\n",
            name, type, publicId, systemId, content);
 #endif
-    xmlAddDocEntity(ctxt->myDoc, name, type, publicId, systemId, content);
+    if (ctxt->inSubset == 1)
+	xmlAddDocEntity(ctxt->myDoc, name, type, publicId,
+		              systemId, content);
+    else if (ctxt->inSubset == 2)
+	xmlAddDtdEntity(ctxt->myDoc, name, type, publicId,
+		              systemId, content);
+    else {
+	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+	    ctxt->sax->error(ctxt, 
+	     "SAX.entityDecl(%s) called while not in subset\n", name);
+    }
 }

 /**
 * attributeDecl:
 * @ctx: the user data (XML parser context)
- * @name:  the attribute name 
+ * @fullname:  the attribute name 
 * @type:  the attribute type 
 * @publicId: The public ID of the attribute
 * @systemId: The system ID of the attribute
@ -331,24 +387,40 @@ entityDecl(void *ctx, const xmlChar *name, int type,
 * An attribute definition has been parsed
 */
 void
-attributeDecl(void *ctx, const xmlChar *elem, const xmlChar *name,
+attributeDecl(void *ctx, const xmlChar *elem, const xmlChar *fullname,
              int type, int def, const xmlChar *defaultValue,
 	      xmlEnumerationPtr tree)
 {
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
    xmlAttributePtr attr;
+    xmlChar *name = NULL, *prefix = NULL;

 #ifdef DEBUG_SAX
    fprintf(stderr, "SAX.attributeDecl(%s, %s, %d, %d, %s, ...)\n",
-            elem, name, type, def, defaultValue);
+            elem, fullname, type, def, defaultValue);
 #endif
+    name = xmlSplitQName(ctxt, fullname, &prefix);
+    if (ctxt->inSubset == 1)
 	attr = xmlAddAttributeDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, elem,
-                               name, type, def, defaultValue, tree);
+                               name, prefix, type, def, defaultValue, tree);
+    else if (ctxt->inSubset == 2)
+	attr = xmlAddAttributeDecl(&ctxt->vctxt, ctxt->myDoc->extSubset, elem,
+                               name, prefix, type, def, defaultValue, tree);
+    else {
+	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+	    ctxt->sax->error(ctxt, 
+	     "SAX.attributeDecl(%s) called while not in subset\n", name);
+	return;
+    }
    if (attr == 0) ctxt->valid = 0;
    if (ctxt->validate && ctxt->wellFormed &&
        ctxt->myDoc && ctxt->myDoc->intSubset)
 	ctxt->valid &= xmlValidateAttributeDecl(&ctxt->vctxt, ctxt->myDoc,
 	                                        attr);
+    if (prefix != NULL)
+	xmlFree(prefix);
+    if (name != NULL)
+	xmlFree(name);
 }

 /**
@ -367,16 +439,26 @@ elementDecl(void *ctx, const xmlChar *name, int type,
 	    xmlElementContentPtr content)
 {
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
-    xmlElementPtr elem;
+    xmlElementPtr elem = NULL;

 #ifdef DEBUG_SAX
    fprintf(stderr, "SAX.elementDecl(%s, %d, ...)\n",
-            name, type);
+            fullname, type);
 #endif
    
+    if (ctxt->inSubset == 1)
 	elem = xmlAddElementDecl(&ctxt->vctxt, ctxt->myDoc->intSubset,
                             name, type, content);
-    if (elem == 0) ctxt->valid = 0;
+    else if (ctxt->inSubset == 2)
+	elem = xmlAddElementDecl(&ctxt->vctxt, ctxt->myDoc->extSubset,
+                             name, type, content);
+    else {
+	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+	    ctxt->sax->error(ctxt, 
+	     "SAX.elementDecl(%s) called while not in subset\n", name);
+	return;
+    }
+    if (elem == NULL) ctxt->valid = 0;
    if (ctxt->validate && ctxt->wellFormed &&
        ctxt->myDoc && ctxt->myDoc->intSubset)
 	ctxt->valid &= xmlValidateElementDecl(&ctxt->vctxt, ctxt->myDoc, elem);
@ -396,15 +478,25 @@ notationDecl(void *ctx, const xmlChar *name,
 	     const xmlChar *publicId, const xmlChar *systemId)
 {
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
-    xmlNotationPtr nota;
+    xmlNotationPtr nota = NULL;

 #ifdef DEBUG_SAX
    fprintf(stderr, "SAX.notationDecl(%s, %s, %s)\n", name, publicId, systemId);
 #endif

+    if (ctxt->inSubset == 1)
 	nota = xmlAddNotationDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, name,
                              publicId, systemId);
-    if (nota == 0) ctxt->valid = 0;
+    else if (ctxt->inSubset == 2)
+	nota = xmlAddNotationDecl(&ctxt->vctxt, ctxt->myDoc->intSubset, name,
+                              publicId, systemId);
+    else {
+	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+	    ctxt->sax->error(ctxt, 
+	     "SAX.notationDecl(%s) called while not in subset\n", name);
+	return;
+    }
+    if (nota == NULL) ctxt->valid = 0;
    if (ctxt->validate && ctxt->wellFormed &&
        ctxt->myDoc && ctxt->myDoc->intSubset)
 	ctxt->valid &= xmlValidateNotationDecl(&ctxt->vctxt, ctxt->myDoc,
@ -518,6 +610,7 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
    xmlAttrPtr ret;
    xmlChar *name;
    xmlChar *ns;
+    xmlChar *nval;
    xmlNsPtr namespace;

 /****************
@ -528,7 +621,15 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
    /*
     * Split the full name into a namespace prefix and the tag name
     */
-    name = xmlSplitQName(fullname, &ns);
+    name = xmlSplitQName(ctxt, fullname, &ns);
+
+    /*
+     * Do the last stave of the attribute normalization
+     */
+    nval = xmlValidNormalizeAttributeValue(ctxt->myDoc,
+			       ctxt->node, fullname, value);
+    if (nval != NULL)
+	value = nval;

    /*
     * Check whether it's a namespace definition
@ -540,15 +641,28 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
 	xmlNewNs(ctxt->node, value, NULL);
 	if (name != NULL) 
 	    xmlFree(name);
+	if (nval != NULL)
+	    xmlFree(nval);
 	return;
    }
    if ((ns != NULL) && (ns[0] == 'x') && (ns[1] == 'm') && (ns[2] == 'l') &&
        (ns[3] == 'n') && (ns[4] == 's') && (ns[5] == 0)) {
+	/*
+	 * Validate also for namespace decls, they are attributes from
+	 * an XML-1.0 perspective
+	 TODO ... doesn't map well with current API
+        if (ctxt->validate && ctxt->wellFormed &&
+	    ctxt->myDoc && ctxt->myDoc->intSubset)
+	    ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt, ctxt->myDoc,
+					       ctxt->node, ret, value);
+	 */
 	/* a standard namespace definition */
 	xmlNewNs(ctxt->node, value, name);
 	xmlFree(ns);
 	if (name != NULL) 
 	    xmlFree(name);
+	if (nval != NULL)
+	    xmlFree(nval);
 	return;
    }

@ -562,17 +676,52 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
    ret = xmlNewNsProp(ctxt->node, namespace, name, NULL);

    if (ret != NULL) {
-        if ((ctxt->replaceEntities == 0) && (!ctxt->html))
-	    ret->val = xmlStringGetNodeList(ctxt->myDoc, value);
-	else
-	    ret->val = xmlNewDocText(ctxt->myDoc, value);
+        if ((ctxt->replaceEntities == 0) && (!ctxt->html)) {
+	    xmlNodePtr tmp;
+
+	    ret->children = xmlStringGetNodeList(ctxt->myDoc, value);
+	    tmp = ret->children;
+	    while (tmp != NULL) {
+		tmp->parent = (xmlNodePtr) ret;
+		if (tmp->next == NULL)
+		    ret->last = tmp;
+		tmp = tmp->next;
+	    }
+	} else {
+	    ret->children = xmlNewDocText(ctxt->myDoc, value);
+	    ret->last = ret->children;
+	    if (ret->children != NULL)
+		ret->children->parent = (xmlNodePtr) ret;
+	}
    }

    if (ctxt->validate && ctxt->wellFormed &&
-        ctxt->myDoc && ctxt->myDoc->intSubset)
+        ctxt->myDoc && ctxt->myDoc->intSubset) {
+	
+	/*
+	 * If we don't substitute entities, the validation should be
+	 * done on a value with replaced entities anyway.
+	 */
+        if (!ctxt->replaceEntities) {
+	    xmlChar *val;
+
+	    ctxt->depth++;
+	    val = xmlStringDecodeEntities(ctxt, value, XML_SUBSTITUTE_REF,
+		                          0,0,0);
+	    ctxt->depth--;
+	    if (val == NULL)
+		ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt,
+				ctxt->myDoc, ctxt->node, ret, value);
+	    else {
+		ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt,
+			        ctxt->myDoc, ctxt->node, ret, val);
+                xmlFree(val);
+	    }
+	} else {
 	    ctxt->valid &= xmlValidateOneAttribute(&ctxt->vctxt, ctxt->myDoc,
 					       ctxt->node, ret, value);
-    else {
+	}
+    } else {
        /*
 	 * when validating, the ID registration is done at the attribute
 	 * validation level. Otherwise we have to do specific handling here.
@ -583,6 +732,8 @@ attribute(void *ctx, const xmlChar *fullname, const xmlChar *value)
 	    xmlAddRef(&ctxt->vctxt, ctxt->myDoc, value, ret);
    }

+    if (nval != NULL)
+	xmlFree(nval);
    if (name != NULL) 
 	xmlFree(name);
    if (ns != NULL) 
@ -634,7 +785,7 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
    /*
     * Split the full name into a namespace prefix and the tag name
     */
-    name = xmlSplitQName(fullname, &prefix);
+    name = xmlSplitQName(ctxt, fullname, &prefix);


    /*
@ -644,13 +795,13 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
     */
    ret = xmlNewDocNode(ctxt->myDoc, NULL, name, NULL);
    if (ret == NULL) return;
-    if (ctxt->myDoc->root == NULL) {
+    if (ctxt->myDoc->children == NULL) {
 #ifdef DEBUG_SAX_TREE
 	fprintf(stderr, "Setting %s as root\n", name);
 #endif
-        ctxt->myDoc->root = ret;
+        xmlAddChild((xmlNodePtr) ctxt->myDoc, (xmlNodePtr) ret);
    } else if (parent == NULL) {
-        parent = ctxt->myDoc->root;
+        parent = ctxt->myDoc->children;
    }

    /*
@ -679,6 +830,15 @@ startElement(void *ctx, const xmlChar *fullname, const xmlChar **atts)
 	}
    }

+    /*
+     * If it's the Document root, finish the Dtd validation and
+     * check the document root element for validity
+     */
+    if ((ctxt->validate) && (ctxt->vctxt.finishDtd == 0)) {
+	ctxt->valid &= xmlValidateDtdFinal(&ctxt->vctxt, ctxt->myDoc);
+	ctxt->valid &= xmlValidateRoot(&ctxt->vctxt, ctxt->myDoc);
+	ctxt->vctxt.finishDtd = 1;
+    }
    /*
     * process all the attributes whose name start with "xml"
     */
@ -790,6 +950,9 @@ reference(void *ctx, const xmlChar *name)
 #ifdef DEBUG_SAX
    fprintf(stderr, "SAX.reference(%s)\n", name);
 #endif
+    if (name[0] == '#')
+	ret = xmlNewCharRef(ctxt->myDoc, name);
+    else
 	ret = xmlNewReference(ctxt->myDoc, name);
 #ifdef DEBUG_SAX_TREE
    fprintf(stderr, "add reference %s to %s \n", name, ctxt->node->name);
@ -884,32 +1047,36 @@ processingInstruction(void *ctx, const xmlChar *target,

    ret = xmlNewPI(target, data);
    if (ret == NULL) return;
-    ret->doc = ctxt->myDoc;
-    if (ctxt->myDoc->root == NULL) {
+    parent = ctxt->node;
+
+    if (ctxt->inSubset == 1) {
+	xmlAddChild((xmlNodePtr) ctxt->myDoc->intSubset, ret);
+	return;
+    } else if (ctxt->inSubset == 2) {
+	xmlAddChild((xmlNodePtr) ctxt->myDoc->extSubset, ret);
+	return;
+    }
+    if ((ctxt->myDoc->children == NULL) || (parent == NULL)) {
 #ifdef DEBUG_SAX_TREE
 	    fprintf(stderr, "Setting PI %s as root\n", target);
 #endif
-        ctxt->myDoc->root = ret;
-    } else if (parent == NULL) {
-        parent = ctxt->myDoc->root;
+        xmlAddChild((xmlNodePtr) ctxt->myDoc, (xmlNodePtr) ret);
+	return;
    }
-    if (parent != NULL) {
    if (parent->type == XML_ELEMENT_NODE) {
 #ifdef DEBUG_SAX_TREE
-	    fprintf(stderr, "adding PI child %s to %s\n", target, parent->name);
+	fprintf(stderr, "adding PI %s child to %s\n", target, parent->name);
 #endif
 	xmlAddChild(parent, ret);
    } else {
 #ifdef DEBUG_SAX_TREE
-	    fprintf(stderr, "adding PI sibling %s to ", target);
+	fprintf(stderr, "adding PI %s sibling to ", target);
 	xmlDebugDumpOneNode(stderr, parent, 0);
 #endif
 	xmlAddSibling(parent, ret);
    }
 }

-}
-
 /**
 * globalNamespace:
 * @ctx: the user data (XML parser context)
@ -1064,15 +1231,20 @@ comment(void *ctx, const xmlChar *value)
    ret = xmlNewDocComment(ctxt->myDoc, value);
    if (ret == NULL) return;

-    if (ctxt->myDoc->root == NULL) {
+    if (ctxt->inSubset == 1) {
+	xmlAddChild((xmlNodePtr) ctxt->myDoc->intSubset, ret);
+	return;
+    } else if (ctxt->inSubset == 2) {
+	xmlAddChild((xmlNodePtr) ctxt->myDoc->extSubset, ret);
+	return;
+    }
+    if ((ctxt->myDoc->children == NULL) || (parent == NULL)) {
 #ifdef DEBUG_SAX_TREE
 	    fprintf(stderr, "Setting comment as root\n");
 #endif
-        ctxt->myDoc->root = ret;
-    } else if (parent == NULL) {
-        parent = ctxt->myDoc->root;
+        xmlAddChild((xmlNodePtr) ctxt->myDoc, (xmlNodePtr) ret);
+	return;
    }
-    if (parent != NULL) {
    if (parent->type == XML_ELEMENT_NODE) {
 #ifdef DEBUG_SAX_TREE
 	fprintf(stderr, "adding comment child to %s\n", parent->name);
@ -1086,7 +1258,6 @@ comment(void *ctx, const xmlChar *value)
 	xmlAddSibling(parent, ret);
    }
 }
-}

 /**
 * cdataBlock:
@ -1148,6 +1319,7 @@ xmlSAXHandler xmlDefaultSAXHandler = {
    xmlParserError,
    getParameterEntity,
    cdataBlock,
+    externalSubset,
 };

 /**
@ -1159,6 +1331,7 @@ void
 xmlDefaultSAXHandlerInit(void)
 {
    xmlDefaultSAXHandler.internalSubset = internalSubset;
+    xmlDefaultSAXHandler.externalSubset = externalSubset;
    xmlDefaultSAXHandler.isStandalone = isStandalone;
    xmlDefaultSAXHandler.hasInternalSubset = hasInternalSubset;
    xmlDefaultSAXHandler.hasExternalSubset = hasExternalSubset;
@ -1181,6 +1354,9 @@ xmlDefaultSAXHandlerInit(void)
    xmlDefaultSAXHandler.ignorableWhitespace = ignorableWhitespace;
    xmlDefaultSAXHandler.processingInstruction = processingInstruction;
    xmlDefaultSAXHandler.comment = comment;
+    if (xmlGetWarningsDefaultValue == 0)
+	xmlDefaultSAXHandler.warning = NULL;
+    else
 	xmlDefaultSAXHandler.warning = xmlParserWarning;
    xmlDefaultSAXHandler.error = xmlParserError;
    xmlDefaultSAXHandler.fatalError = xmlParserError;
@ -1216,6 +1392,7 @@ xmlSAXHandler htmlDefaultSAXHandler = {
    xmlParserError,
    getParameterEntity,
    NULL,
+    NULL,
 };

 /**
@ -1227,6 +1404,7 @@ void
 htmlDefaultSAXHandlerInit(void)
 {
    htmlDefaultSAXHandler.internalSubset = NULL;
+    htmlDefaultSAXHandler.externalSubset = NULL;
    htmlDefaultSAXHandler.isStandalone = NULL;
    htmlDefaultSAXHandler.hasInternalSubset = NULL;
    htmlDefaultSAXHandler.hasExternalSubset = NULL;
--- a/configure.in
+++ b/configure.in
@ -3,9 +3,9 @@ AC_PREREQ(2.2)
 AC_INIT(entities.h)
 AM_CONFIG_HEADER(config.h)

-LIBXML_MAJOR_VERSION=1
-LIBXML_MINOR_VERSION=8
-LIBXML_MICRO_VERSION=7
+LIBXML_MAJOR_VERSION=2
+LIBXML_MINOR_VERSION=0
+LIBXML_MICRO_VERSION=0
 LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION
 LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION

@ -15,7 +15,7 @@ AC_SUBST(LIBXML_MICRO_VERSION)
 AC_SUBST(LIBXML_VERSION)
 AC_SUBST(LIBXML_VERSION_INFO)

-VERSION=$LIBXML_VERSION
+VERSION=$LIBXML_VERSION-beta

 AM_INIT_AUTOMAKE(libxml, $VERSION)

--- a/debugXML.c
+++ b/debugXML.c
@ -22,6 +22,7 @@
 #include "xmlmemory.h"
 #include "tree.h"
 #include "parser.h"
+#include "valid.h"
 #include "debugXML.h"
 #include "HTMLtree.h"
 #include "HTMLparser.h"
@ -38,6 +39,315 @@ void xmlDebugDumpString(FILE *output, const xmlChar *str) {
    fprintf(output, "...");
 }

+void xmlDebugDumpDtd(FILE *output, xmlDtdPtr dtd, int depth) {
+    int i;
+    char shift[100];
+
+    for (i = 0;((i < depth) && (i < 25));i++)
+        shift[2 * i] = shift[2 * i + 1] = ' ';
+    shift[2 * i] = shift[2 * i + 1] = 0;
+
+    fprintf(output, shift);
+
+    if (dtd->type != XML_DTD_NODE) {
+	fprintf(output, "PBM: not a DTD\n");
+	return;
+    }
+    if (dtd->name != NULL)
+	fprintf(output, "DTD(%s)", dtd->name);
+    else
+	fprintf(output, "DTD");
+    if (dtd->ExternalID != NULL)
+	fprintf(output, ", PUBLIC %s", dtd->ExternalID);
+    if (dtd->SystemID != NULL)
+	fprintf(output, ", SYSTEM %s", dtd->SystemID);
+    fprintf(output, "\n");
+    /*
+     * Do a bit of checking
+     */
+    if (dtd->parent == NULL)
+	fprintf(output, "PBM: Dtd has no parent\n");
+    if (dtd->doc == NULL)
+	fprintf(output, "PBM: Dtd has no doc\n");
+    if ((dtd->parent != NULL) && (dtd->doc != dtd->parent->doc))
+	fprintf(output, "PBM: Dtd doc differs from parent's one\n");
+    if (dtd->prev == NULL) {
+	if ((dtd->parent != NULL) && (dtd->parent->children != (xmlNodePtr)dtd))
+	    fprintf(output, "PBM: Dtd has no prev and not first of list\n");
+    } else {
+	if (dtd->prev->next != (xmlNodePtr) dtd)
+	    fprintf(output, "PBM: Dtd prev->next : back link wrong\n");
+    }
+    if (dtd->next == NULL) {
+	if ((dtd->parent != NULL) && (dtd->parent->last != (xmlNodePtr) dtd))
+	    fprintf(output, "PBM: Dtd has no next and not last of list\n");
+    } else {
+	if (dtd->next->prev != (xmlNodePtr) dtd)
+	    fprintf(output, "PBM: Dtd next->prev : forward link wrong\n");
+    }
+}
+
+void xmlDebugDumpAttrDecl(FILE *output, xmlAttributePtr attr, int depth) {
+    int i;
+    char shift[100];
+
+    for (i = 0;((i < depth) && (i < 25));i++)
+        shift[2 * i] = shift[2 * i + 1] = ' ';
+    shift[2 * i] = shift[2 * i + 1] = 0;
+
+    fprintf(output, shift);
+
+    if (attr->type != XML_ATTRIBUTE_DECL) {
+	fprintf(output, "PBM: not a Attr\n");
+	return;
+    }
+    if (attr->name != NULL)
+	fprintf(output, "ATTRDECL(%s)", attr->name);
+    else
+	fprintf(output, "PBM ATTRDECL noname!!!");
+    if (attr->elem != NULL)
+	fprintf(output, " for %s", attr->elem);
+    else
+	fprintf(output, " PBM noelem!!!");
+    switch (attr->atype) {
+        case XML_ATTRIBUTE_CDATA:
+	    fprintf(output, " CDATA");
+	    break;
+        case XML_ATTRIBUTE_ID:
+	    fprintf(output, " ID");
+	    break;
+        case XML_ATTRIBUTE_IDREF:
+	    fprintf(output, " IDREF");
+	    break;
+        case XML_ATTRIBUTE_IDREFS:
+	    fprintf(output, " IDREFS");
+	    break;
+        case XML_ATTRIBUTE_ENTITY:
+	    fprintf(output, " ENTITY");
+	    break;
+        case XML_ATTRIBUTE_ENTITIES:
+	    fprintf(output, " ENTITIES");
+	    break;
+        case XML_ATTRIBUTE_NMTOKEN:
+	    fprintf(output, " NMTOKEN");
+	    break;
+        case XML_ATTRIBUTE_NMTOKENS:
+	    fprintf(output, " NMTOKENS");
+	    break;
+        case XML_ATTRIBUTE_ENUMERATION:
+	    fprintf(output, " ENUMERATION");
+	    break;
+        case XML_ATTRIBUTE_NOTATION:
+	    fprintf(output, " NOTATION ");
+	    break;
+    }
+    if (attr->tree != NULL) {
+	int i;
+	xmlEnumerationPtr cur = attr->tree;
+
+	for (i = 0;i < 5; i++) {
+	    if (i != 0)
+		fprintf(output, "|%s", cur->name);
+	    else
+		fprintf(output, " (%s", cur->name);
+	    cur = cur->next;
+	    if (cur == NULL) break;
+	}
+	if (cur == NULL)
+	    fprintf(output, ")");
+	else
+	    fprintf(output, "...)");
+    }
+    switch (attr->def) {
+        case XML_ATTRIBUTE_NONE:
+	    break;
+        case XML_ATTRIBUTE_REQUIRED:
+	    fprintf(output, " REQUIRED");
+	    break;
+        case XML_ATTRIBUTE_IMPLIED:
+	    fprintf(output, " IMPLIED");
+	    break;
+        case XML_ATTRIBUTE_FIXED:
+	    fprintf(output, " FIXED");
+	    break;
+    }
+    if (attr->defaultValue != NULL) {
+	fprintf(output, "\"");
+	xmlDebugDumpString(output, attr->defaultValue);
+	fprintf(output, "\"");
+    }
+    printf("\n");
+
+    /*
+     * Do a bit of checking
+     */
+    if (attr->parent == NULL)
+	fprintf(output, "PBM: Attr has no parent\n");
+    if (attr->doc == NULL)
+	fprintf(output, "PBM: Attr has no doc\n");
+    if ((attr->parent != NULL) && (attr->doc != attr->parent->doc))
+	fprintf(output, "PBM: Attr doc differs from parent's one\n");
+    if (attr->prev == NULL) {
+	if ((attr->parent != NULL) && (attr->parent->children != (xmlNodePtr)attr))
+	    fprintf(output, "PBM: Attr has no prev and not first of list\n");
+    } else {
+	if (attr->prev->next != (xmlNodePtr) attr)
+	    fprintf(output, "PBM: Attr prev->next : back link wrong\n");
+    }
+    if (attr->next == NULL) {
+	if ((attr->parent != NULL) && (attr->parent->last != (xmlNodePtr) attr))
+	    fprintf(output, "PBM: Attr has no next and not last of list\n");
+    } else {
+	if (attr->next->prev != (xmlNodePtr) attr)
+	    fprintf(output, "PBM: Attr next->prev : forward link wrong\n");
+    }
+}
+
+void xmlDebugDumpElemDecl(FILE *output, xmlElementPtr elem, int depth) {
+    int i;
+    char shift[100];
+
+    for (i = 0;((i < depth) && (i < 25));i++)
+        shift[2 * i] = shift[2 * i + 1] = ' ';
+    shift[2 * i] = shift[2 * i + 1] = 0;
+
+    fprintf(output, shift);
+
+    if (elem->type != XML_ELEMENT_DECL) {
+	fprintf(output, "PBM: not a Elem\n");
+	return;
+    }
+    if (elem->name != NULL)
+	fprintf(output, "ELEMDECL(%s)", elem->name);
+    else
+	fprintf(output, "PBM ELEMDECL noname!!!");
+    switch (elem->etype) {
+	case XML_ELEMENT_TYPE_EMPTY: 
+	    fprintf(output, ", EMPTY");
+	    break;
+	case XML_ELEMENT_TYPE_ANY: 
+	    fprintf(output, ", ANY");
+	    break;
+	case XML_ELEMENT_TYPE_MIXED: 
+	    fprintf(output, ", MIXED ");
+	    break;
+	case XML_ELEMENT_TYPE_ELEMENT: 
+	    fprintf(output, ", MIXED ");
+	    break;
+    }
+    if (elem->content != NULL) {
+	char buf[1001];
+
+	buf[0] = 0;
+	xmlSprintfElementContent(buf, elem->content, 1);
+	buf[1000] = 0;
+	fprintf(output, "%s", buf);
+    }
+    printf("\n");
+
+    /*
+     * Do a bit of checking
+     */
+    if (elem->parent == NULL)
+	fprintf(output, "PBM: Elem has no parent\n");
+    if (elem->doc == NULL)
+	fprintf(output, "PBM: Elem has no doc\n");
+    if ((elem->parent != NULL) && (elem->doc != elem->parent->doc))
+	fprintf(output, "PBM: Elem doc differs from parent's one\n");
+    if (elem->prev == NULL) {
+	if ((elem->parent != NULL) && (elem->parent->children != (xmlNodePtr)elem))
+	    fprintf(output, "PBM: Elem has no prev and not first of list\n");
+    } else {
+	if (elem->prev->next != (xmlNodePtr) elem)
+	    fprintf(output, "PBM: Elem prev->next : back link wrong\n");
+    }
+    if (elem->next == NULL) {
+	if ((elem->parent != NULL) && (elem->parent->last != (xmlNodePtr) elem))
+	    fprintf(output, "PBM: Elem has no next and not last of list\n");
+    } else {
+	if (elem->next->prev != (xmlNodePtr) elem)
+	    fprintf(output, "PBM: Elem next->prev : forward link wrong\n");
+    }
+}
+
+void xmlDebugDumpEntityDecl(FILE *output, xmlEntityPtr ent, int depth) {
+    int i;
+    char shift[100];
+
+    for (i = 0;((i < depth) && (i < 25));i++)
+        shift[2 * i] = shift[2 * i + 1] = ' ';
+    shift[2 * i] = shift[2 * i + 1] = 0;
+
+    fprintf(output, shift);
+
+    if (ent->type != XML_ENTITY_DECL) {
+	fprintf(output, "PBM: not a Entity decl\n");
+	return;
+    }
+    if (ent->name != NULL)
+	fprintf(output, "ENTITYDECL(%s)", ent->name);
+    else
+	fprintf(output, "PBM ENTITYDECL noname!!!");
+    switch (ent->etype) {
+	case XML_INTERNAL_GENERAL_ENTITY: 
+	    fprintf(output, ", internal\n");
+	    break;
+	case XML_EXTERNAL_GENERAL_PARSED_ENTITY: 
+	    fprintf(output, ", external parsed\n");
+	    break;
+	case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY: 
+	    fprintf(output, ", unparsed\n");
+	    break;
+	case XML_INTERNAL_PARAMETER_ENTITY: 
+	    fprintf(output, ", parameter\n");
+	    break;
+	case XML_EXTERNAL_PARAMETER_ENTITY: 
+	    fprintf(output, ", external parameter\n");
+	    break;
+	case XML_INTERNAL_PREDEFINED_ENTITY: 
+	    fprintf(output, ", predefined\n");
+	    break;
+    }
+    if (ent->ExternalID) {
+        fprintf(output, shift);
+        fprintf(output, "ExternalID=%s\n", ent->ExternalID);
+    }
+    if (ent->SystemID) {
+        fprintf(output, shift);
+        fprintf(output, "SystemID=%s\n", ent->SystemID);
+    }
+    if (ent->content) {
+        fprintf(output, shift);
+	fprintf(output, "content=");
+	xmlDebugDumpString(output, ent->content);
+	fprintf(output, "\n");
+    }
+
+    /*
+     * Do a bit of checking
+     */
+    if (ent->parent == NULL)
+	fprintf(output, "PBM: Ent has no parent\n");
+    if (ent->doc == NULL)
+	fprintf(output, "PBM: Ent has no doc\n");
+    if ((ent->parent != NULL) && (ent->doc != ent->parent->doc))
+	fprintf(output, "PBM: Ent doc differs from parent's one\n");
+    if (ent->prev == NULL) {
+	if ((ent->parent != NULL) && (ent->parent->children != (xmlNodePtr)ent))
+	    fprintf(output, "PBM: Ent has no prev and not first of list\n");
+    } else {
+	if (ent->prev->next != (xmlNodePtr) ent)
+	    fprintf(output, "PBM: Ent prev->next : back link wrong\n");
+    }
+    if (ent->next == NULL) {
+	if ((ent->parent != NULL) && (ent->parent->last != (xmlNodePtr) ent))
+	    fprintf(output, "PBM: Ent has no next and not last of list\n");
+    } else {
+	if (ent->next->prev != (xmlNodePtr) ent)
+	    fprintf(output, "PBM: Ent next->prev : forward link wrong\n");
+    }
+}
+
 void xmlDebugDumpNamespace(FILE *output, xmlNsPtr ns, int depth) {
    int i;
    char shift[100];
@ -74,7 +384,7 @@ void xmlDebugDumpEntity(FILE *output, xmlEntityPtr ent, int depth) {
    shift[2 * i] = shift[2 * i + 1] = 0;

    fprintf(output, shift);
-    switch (ent->type) {
+    switch (ent->etype) {
        case XML_INTERNAL_GENERAL_ENTITY:
 	    fprintf(output, "INTERNAL_GENERAL_ENTITY ");
 	    break;
@ -91,7 +401,7 @@ void xmlDebugDumpEntity(FILE *output, xmlEntityPtr ent, int depth) {
 	    fprintf(output, "EXTERNAL_PARAMETER_ENTITY ");
 	    break;
 	default:
-	    fprintf(output, "ENTITY_%d ! ", ent->type);
+	    fprintf(output, "ENTITY_%d ! ", ent->etype);
    }
    fprintf(output, "%s\n", ent->name);
    if (ent->ExternalID) {
@ -119,9 +429,31 @@ void xmlDebugDumpAttr(FILE *output, xmlAttrPtr attr, int depth) {
    shift[2 * i] = shift[2 * i + 1] = 0;

    fprintf(output, shift);
+
    fprintf(output, "ATTRIBUTE %s\n", attr->name);
-    if (attr->val != NULL) 
-        xmlDebugDumpNodeList(output, attr->val, depth + 1);
+    if (attr->children != NULL) 
+        xmlDebugDumpNodeList(output, attr->children, depth + 1);
+
+    /*
+     * Do a bit of checking
+     */
+    if (attr->parent == NULL)
+	fprintf(output, "PBM: Attr has no parent\n");
+    if (attr->doc == NULL)
+	fprintf(output, "PBM: Attr has no doc\n");
+    if ((attr->parent != NULL) && (attr->doc != attr->parent->doc))
+	fprintf(output, "PBM: Attr doc differs from parent's one\n");
+    if (attr->prev == NULL) {
+	if ((attr->parent != NULL) && (attr->parent->properties != attr))
+	    fprintf(output, "PBM: Attr has no prev and not first of list\n");
+    } else {
+	if (attr->prev->next != attr)
+	    fprintf(output, "PBM: Attr prev->next : back link wrong\n");
+    }
+    if (attr->next != NULL) {
+	if (attr->next->prev != attr)
+	    fprintf(output, "PBM: Attr next->prev : forward link wrong\n");
+    }
 }

 void xmlDebugDumpAttrList(FILE *output, xmlAttrPtr attr, int depth) {
@ -139,9 +471,9 @@ void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth) {
        shift[2 * i] = shift[2 * i + 1] = ' ';
    shift[2 * i] = shift[2 * i + 1] = 0;

-    fprintf(output, shift);
    switch (node->type) {
 	case XML_ELEMENT_NODE:
+	    fprintf(output, shift);
 	    fprintf(output, "ELEMENT ");
 	    if (node->ns != NULL)
 	        fprintf(output, "%s:%s\n", node->ns->prefix, node->name);
@ -149,40 +481,63 @@ void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth) {
 	        fprintf(output, "%s\n", node->name);
 	    break;
 	case XML_ATTRIBUTE_NODE:
+	    fprintf(output, shift);
 	    fprintf(output, "Error, ATTRIBUTE found here\n");
 	    break;
 	case XML_TEXT_NODE:
+	    fprintf(output, shift);
 	    fprintf(output, "TEXT\n");
 	    break;
 	case XML_CDATA_SECTION_NODE:
+	    fprintf(output, shift);
 	    fprintf(output, "CDATA_SECTION\n");
 	    break;
 	case XML_ENTITY_REF_NODE:
-	    fprintf(output, "ENTITY_REF\n");
+	    fprintf(output, shift);
+	    fprintf(output, "ENTITY_REF(%s)\n", node->name);
 	    break;
 	case XML_ENTITY_NODE:
+	    fprintf(output, shift);
 	    fprintf(output, "ENTITY\n");
 	    break;
 	case XML_PI_NODE:
+	    fprintf(output, shift);
 	    fprintf(output, "PI %s\n", node->name);
 	    break;
 	case XML_COMMENT_NODE:
+	    fprintf(output, shift);
 	    fprintf(output, "COMMENT\n");
 	    break;
 	case XML_DOCUMENT_NODE:
 	case XML_HTML_DOCUMENT_NODE:
+	    fprintf(output, shift);
 	    fprintf(output, "Error, DOCUMENT found here\n");
 	    break;
 	case XML_DOCUMENT_TYPE_NODE:
+	    fprintf(output, shift);
 	    fprintf(output, "DOCUMENT_TYPE\n");
 	    break;
 	case XML_DOCUMENT_FRAG_NODE:
+	    fprintf(output, shift);
 	    fprintf(output, "DOCUMENT_FRAG\n");
 	    break;
 	case XML_NOTATION_NODE:
 	    fprintf(output, "NOTATION\n");
 	    break;
+	case XML_DTD_NODE:
+	    xmlDebugDumpDtd(output, (xmlDtdPtr) node, depth);
+	    return;
+	case XML_ELEMENT_DECL:
+	    xmlDebugDumpElemDecl(output, (xmlElementPtr) node, depth);
+	    return;
+	case XML_ATTRIBUTE_DECL:
+	    xmlDebugDumpAttrDecl(output, (xmlAttributePtr) node, depth);
+	    return;
+        case XML_ENTITY_DECL:
+	    xmlDebugDumpEntityDecl(output, (xmlEntityPtr) node, depth);
+	    return;
 	default:
+	    fprintf(output, shift);
 	    fprintf(output, "NODE_%d\n", node->type);
    }
    if (node->doc == NULL) {
@ -210,12 +565,35 @@ void xmlDebugDumpOneNode(FILE *output, xmlNodePtr node, int depth) {
 	if (ent != NULL)
 	    xmlDebugDumpEntity(output, ent, depth + 1);
    }
+    /*
+     * Do a bit of checking
+     */
+    if (node->parent == NULL)
+	fprintf(output, "PBM: Node has no parent\n");
+    if (node->doc == NULL)
+	fprintf(output, "PBM: Node has no doc\n");
+    if ((node->parent != NULL) && (node->doc != node->parent->doc))
+	fprintf(output, "PBM: Node doc differs from parent's one\n");
+    if (node->prev == NULL) {
+	if ((node->parent != NULL) && (node->parent->children != node))
+	    fprintf(output, "PBM: Node has no prev and not first of list\n");
+    } else {
+	if (node->prev->next != node)
+	    fprintf(output, "PBM: Node prev->next : back link wrong\n");
+    }
+    if (node->next == NULL) {
+	if ((node->parent != NULL) && (node->parent->last != node))
+	    fprintf(output, "PBM: Node has no next and not last of list\n");
+    } else {
+	if (node->next->prev != node)
+	    fprintf(output, "PBM: Node next->prev : forward link wrong\n");
+    }
 }

 void xmlDebugDumpNode(FILE *output, xmlNodePtr node, int depth) {
    xmlDebugDumpOneNode(output, node, depth);
-    if (node->childs != NULL)
-	xmlDebugDumpNodeList(output, node->childs, depth + 1);
+    if (node->children != NULL)
+	xmlDebugDumpNodeList(output, node->children, depth + 1);
 }

 void xmlDebugDumpNodeList(FILE *output, xmlNodePtr node, int depth) {
@ -306,8 +684,8 @@ void xmlDebugDumpDocument(FILE *output, xmlDocPtr doc) {
    xmlDebugDumpDocumentHead(output, doc);
    if (((doc->type == XML_DOCUMENT_NODE) ||
         (doc->type == XML_HTML_DOCUMENT_NODE)) &&
-        (doc->root != NULL))
-        xmlDebugDumpNodeList(output, doc->root, 1);
+        (doc->children != NULL))
+        xmlDebugDumpNodeList(output, doc->children, 1);
 }    

 void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) {
@ -368,27 +746,27 @@ void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) {
 	                            doc->intSubset->entities;
 	fprintf(output, "Entities in internal subset\n");
 	for (i = 0;i < table->nb_entities;i++) {
-	    cur = &table->table[i];
+	    cur = table->table[i];
 	    fprintf(output, "%d : %s : ", i, cur->name);
-	    switch (cur->type) {
+	    switch (cur->etype) {
 		case XML_INTERNAL_GENERAL_ENTITY:
-		    fprintf(output, "INTERNAL GENERAL");
+		    fprintf(output, "INTERNAL GENERAL, ");
 		    break;
 		case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
-		    fprintf(output, "EXTERNAL PARSED");
+		    fprintf(output, "EXTERNAL PARSED, ");
 		    break;
 		case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
-		    fprintf(output, "EXTERNAL UNPARSED");
+		    fprintf(output, "EXTERNAL UNPARSED, ");
 		    break;
 		case XML_INTERNAL_PARAMETER_ENTITY:
-		    fprintf(output, "INTERNAL PARAMETER");
+		    fprintf(output, "INTERNAL PARAMETER, ");
 		    break;
 		case XML_EXTERNAL_PARAMETER_ENTITY:
-		    fprintf(output, "EXTERNAL PARAMETER");
+		    fprintf(output, "EXTERNAL PARAMETER, ");
 		    break;
 		default:
 		    fprintf(output, "UNKNOWN TYPE %d",
-			    cur->type);
+			    cur->etype);
 	    }
 	    if (cur->ExternalID != NULL) 
 	        fprintf(output, "ID \"%s\"", cur->ExternalID);
@ -407,27 +785,27 @@ void xmlDebugDumpEntities(FILE *output, xmlDocPtr doc) {
 	                            doc->extSubset->entities;
 	fprintf(output, "Entities in external subset\n");
 	for (i = 0;i < table->nb_entities;i++) {
-	    cur = &table->table[i];
+	    cur = table->table[i];
 	    fprintf(output, "%d : %s : ", i, cur->name);
-	    switch (cur->type) {
+	    switch (cur->etype) {
 		case XML_INTERNAL_GENERAL_ENTITY:
-		    fprintf(output, "INTERNAL GENERAL");
+		    fprintf(output, "INTERNAL GENERAL, ");
 		    break;
 		case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
-		    fprintf(output, "EXTERNAL PARSED");
+		    fprintf(output, "EXTERNAL PARSED, ");
 		    break;
 		case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
-		    fprintf(output, "EXTERNAL UNPARSED");
+		    fprintf(output, "EXTERNAL UNPARSED, ");
 		    break;
 		case XML_INTERNAL_PARAMETER_ENTITY:
-		    fprintf(output, "INTERNAL PARAMETER");
+		    fprintf(output, "INTERNAL PARAMETER, ");
 		    break;
 		case XML_EXTERNAL_PARAMETER_ENTITY:
-		    fprintf(output, "EXTERNAL PARAMETER");
+		    fprintf(output, "EXTERNAL PARAMETER, ");
 		    break;
 		default:
 		    fprintf(output, "UNKNOWN TYPE %d",
-			    cur->type);
+			    cur->etype);
 	    }
 	    if (cur->ExternalID != NULL) 
 	        fprintf(output, "ID \"%s\"", cur->ExternalID);
@ -449,14 +827,14 @@ static int xmlLsCountNode(xmlNodePtr node) {

    switch (node->type) {
 	case XML_ELEMENT_NODE:
-	    list = node->childs;
+	    list = node->children;
 	    break;
 	case XML_DOCUMENT_NODE:
 	case XML_HTML_DOCUMENT_NODE:
-	    list = ((xmlDocPtr) node)->root;
+	    list = ((xmlDocPtr) node)->children;
 	    break;
 	case XML_ATTRIBUTE_NODE:
-	    list = ((xmlAttrPtr) node)->val;
+	    list = ((xmlAttrPtr) node)->children;
 	    break;
 	case XML_TEXT_NODE:
 	case XML_CDATA_SECTION_NODE:
@ -475,6 +853,10 @@ static int xmlLsCountNode(xmlNodePtr node) {
 	case XML_ENTITY_NODE:
 	case XML_DOCUMENT_FRAG_NODE:
 	case XML_NOTATION_NODE:
+	case XML_DTD_NODE:
+        case XML_ELEMENT_DECL:
+        case XML_ATTRIBUTE_DECL:
+        case XML_ENTITY_DECL:
 	    ret = 1;
 	    break;
    }
@ -621,9 +1003,9 @@ xmlShellList(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr node,

    if ((node->type == XML_DOCUMENT_NODE) ||
        (node->type == XML_HTML_DOCUMENT_NODE)) {
-        cur = ((xmlDocPtr) node)->root;
-    } else if (node->childs != NULL) {
-        cur = node->childs;
+        cur = ((xmlDocPtr) node)->children;
+    } else if (node->children != NULL) {
+        cur = node->children;
    } else {
 	xmlLsOneNode(stdout, node);
        return(0);
@ -910,10 +1292,10 @@ xmlShellDu(xmlShellCtxtPtr ctxt, char *arg, xmlNodePtr tree,

        if ((node->type == XML_DOCUMENT_NODE) ||
            (node->type == XML_HTML_DOCUMENT_NODE)) {
-	    node = ((xmlDocPtr) node)->root;
-        } else if (node->childs != NULL) {
+	    node = ((xmlDocPtr) node)->children;
+        } else if (node->children != NULL) {
 	    /* deep first */
-	    node = node->childs;
+	    node = node->children;
 	    indent++;
 	} else if ((node != tree) && (node->next != NULL)) {
 	    /* then siblings */
@ -1008,7 +1390,7 @@ xmlShellPwd(xmlShellCtxtPtr ctxt, char *buffer, xmlNodePtr node,
 	} else if (cur->type == XML_ATTRIBUTE_NODE) {
 	    sep = '@';
 	    name = (const char *) (((xmlAttrPtr) cur)->name);
-	    next = ((xmlAttrPtr) cur)->node;
+	    next = ((xmlAttrPtr) cur)->parent;
 	} else {
 	    next = cur->parent;
 	}
--- a/encoding.c
+++ b/encoding.c
@ -35,14 +35,11 @@
 #include <stdlib.h>
 #endif
 #include "encoding.h"
-#ifdef HAVE_UNICODE_H
-#include <unicode.h>
-#endif
 #include "xmlmemory.h"

-#ifdef HAVE_UNICODE_H
+xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
+xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;

-#else /* ! HAVE_UNICODE_H */
 /*
 * From rfc2044: encoding of the Unicode values on UTF-8:
 *
@ -54,6 +51,50 @@
 * I hope we won't use values > 0xFFFF anytime soon !
 */

+/**
+ * xmlCheckUTF8: Check utf-8 string for legality.
+ * @utf: Pointer to putative utf-8 encoded string.
+ *
+ * Checks @utf for being valid utf-8. @utf is assumed to be
+ * null-terminated. This function is not super-strict, as it will
+ * allow longer utf-8 sequences than necessary. Note that Java is
+ * capable of producing these sequences if provoked. Also note, this
+ * routine checks for the 4-byte maxiumum size, but does not check for
+ * 0x10ffff maximum value.
+ *
+ * Return value: true if @utf is valid.
+ **/
+int
+xmlCheckUTF8(const unsigned char *utf)
+{
+    int ix;
+    unsigned char c;
+
+    for (ix = 0; (c = utf[ix]);) {
+        if (c & 0x80) {
+	    if ((utf[ix + 1] & 0xc0) != 0x80)
+	        return(0);
+	    if ((c & 0xe0) == 0xe0) {
+	        if ((utf[ix + 2] & 0xc0) != 0x80)
+		    return(0);
+	        if ((c & 0xf0) == 0xf0) {
+		    if ((c & 0xf8) != 0xf0 || (utf[ix + 3] & 0xc0) != 0x80)
+		        return(0);
+		    ix += 4;
+		    /* 4-byte code */
+	        } else
+		  /* 3-byte code */
+		    ix += 3;
+	    } else
+	      /* 2-byte code */
+	        ix += 2;
+	} else
+	    /* 1-byte code */
+	    ix++;
+      }
+      return(1);
+}
+
 /**
 * isolat1ToUTF8:
 * @out:  a pointer to an array of bytes to store the result
@ -66,27 +107,27 @@
 * Returns the number of byte written, or -1 by lack of space.
 */
 int
-isolat1ToUTF8(unsigned char* out, int outlen, unsigned char* in, int inlen)
-{
+isolat1ToUTF8(unsigned char* out, int outlen,
+              const unsigned char* in, int *inlen) {
    unsigned char* outstart= out;
    unsigned char* outend= out+outlen;
-    unsigned char* inend= in+inlen;
+    const unsigned char* inend= in+*inlen;
    unsigned char c;

    while (in < inend) {
        c= *in++;
        if (c < 0x80) {
-            if (out >= outend)  return -1;
+            if (out >= outend)  return(-1);
            *out++ = c;
        }
        else {
-            if (out >= outend)  return -1;
+            if (out >= outend)  return(-1);
            *out++ = 0xC0 | (c >> 6);
-            if (out >= outend)  return -1;
+            if (out >= outend)  return(-1);
            *out++ = 0x80 | (0x3F & c);
        }
    }
-    return out-outstart;
+    return(out-outstart);
 }

 /**
@ -101,138 +142,398 @@ isolat1ToUTF8(unsigned char* out, int outlen, unsigned char* in, int inlen)
 * TODO: UTF8Toisolat1 need a fallback mechanism ...
 *
 * Returns the number of byte written, or -1 by lack of space, or -2
- *     if the transcoding failed.
+ *     if the transcoding fails (for *in is not valid utf8 string or
+ *     the result of transformation can't fit into the encoding we want)
+ * The value of @inlen after return is the number of octets consumed
+ *     as the return value is positive, else unpredictiable.
 */
 int
-UTF8Toisolat1(unsigned char* out, int outlen, unsigned char* in, int inlen)
-{
+UTF8Toisolat1(unsigned char* out, int outlen,
+              const unsigned char* in, int *inlen) {
    unsigned char* outstart= out;
    unsigned char* outend= out+outlen;
-    unsigned char* inend= in+inlen;
+    const unsigned char* inend= in+*inlen;
    unsigned char c;

    while (in < inend) {
        c= *in++;
        if (c < 0x80) {
-            if (out >= outend)  return -1;
+            if (out >= outend)  return(-1);
            *out++= c;
        }
-        else if (((c & 0xFE) == 0xC2) && in<inend) {
-            if (out >= outend)  return -1;
+	else if (in == inend) {
+            *inlen -= 1;
+            break;
+	}
+	else if (((c & 0xFC) == 0xC0) && ((*in & 0xC0) == 0x80)) {
+	    /* a two byte utf-8 and can be encoding as isolate1 */
            *out++= ((c & 0x03) << 6) | (*in++ & 0x3F);
 	}
-        else  return -2;
+	else
+	    return(-2);
+	/* TODO : some should be represent as "&#x____;" */
    }
-    return out-outstart;
+    return(out-outstart);
 }

 /**
- * UTF16ToUTF8:
+ * UTF16LEToUTF8:
 * @out:  a pointer to an array of bytes to store the result
 * @outlen:  the length of @out
- * @in:  a pointer to an array of UTF-16 chars (array of unsigned shorts)
- * @inlen:  the length of @in
+ * @inb:  a pointer to an array of UTF-16LE passwd as a byte array
+ * @inlenb:  the length of @in in UTF-16LE chars
 *
- * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
- * block of chars out.
- * Returns the number of byte written, or -1 by lack of space.
+ * Take a block of UTF-16LE ushorts in and try to convert it to an UTF-8
+ * block of chars out. This function assume the endian properity
+ * is the same between the native type of this machine and the
+ * inputed one.
+ *
+ * Returns the number of byte written, or -1 by lack of space, or -2
+ *     if the transcoding fails (for *in is not valid utf16 string)
+ *     The value of *inlen after return is the number of octets consumed
+ *     as the return value is positive, else unpredictiable.
 */
 int
-UTF16ToUTF8(unsigned char* out, int outlen, unsigned short* in, int inlen)
+UTF16LEToUTF8(unsigned char* out, int outlen,
+            const unsigned char* inb, int *inlenb)
 {
    unsigned char* outstart= out;
    unsigned char* outend= out+outlen;
-    unsigned short* inend= in+inlen;
-    unsigned int c, d;
+    unsigned short* in = (unsigned short*) inb;
+    unsigned short* inend;
+    unsigned int c, d, inlen;
+    unsigned char *tmp;
    int bits;

+    if ((*inlenb % 2) == 1)
+        (*inlenb)--;
+    inlen = *inlenb / 2;
+    inend= in + inlen;
    while (in < inend) {
+#ifdef BIG_ENDIAN
+	tmp = (unsigned char *) in;
+	c = *tmp++;
+	c = c | (((unsigned int)*tmp) << 8);
+	in++;
+#else /* BIG_ENDIAN */
        c= *in++;
+#endif /* BIG_ENDIAN */
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
-            if ((in<inend) && (((d=*in++) & 0xFC00) == 0xDC00)) {
+            if (in >= inend) {           /* (in > inend) shouldn't happens */
+                (*inlenb) -= 2;
+                break;
+            }
+#ifdef BIG_ENDIAN
+            tmp = (unsigned char *) in;
+            d = *tmp++;
+	    d = d | (((unsigned int)*tmp) << 8);
+	    in++;
+#else /* BIG_ENDIAN */
+            d = *in++;
+#endif /* BIG_ENDIAN */
+            if ((d & 0xFC00) == 0xDC00) {
                c &= 0x03FF;
                c <<= 10;
                c |= d & 0x03FF;
                c += 0x10000;
            }
-            else  return -1;
+            else
+	        return(-2);
        }

 	/* assertion: c is a single UTF-4 value */
-
-        if (out >= outend)  return -1;
+        if (out >= outend)
+	    return(-1);
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
-        else if (c <   0x800) {  *out++= (c >>  6) | 0xC0;  bits=  0; }
-        else if (c < 0x10000) {  *out++= (c >> 12) | 0xE0;  bits=  6; }
-        else                  {  *out++= (c >> 18) | 0xF0;  bits= 12; }
+        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
+        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
+        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
 
-        for ( ; bits > 0; bits-= 6) {
-            if (out >= outend)  return -1;
-            *out++= (c >> bits) & 0x3F;
+        for ( ; bits >= 0; bits-= 6) {
+            if (out >= outend)
+	        return(-1);
+            *out++= ((c >> bits) & 0x3F) | 0x80;
        }
    }
-    return out-outstart;
+    return(out-outstart);
 }

 /**
- * UTF8ToUTF16:
- * @out:  a pointer to an array of shorts to store the result
- * @outlen:  the length of @out (number of shorts)
+ * UTF8ToUTF16LE:
+ * @outb:  a pointer to an array of bytes to store the result
+ * @outlen:  the length of @outb
 * @in:  a pointer to an array of UTF-8 chars
 * @inlen:  the length of @in
 *
- * Take a block of UTF-8 chars in and try to convert it to an UTF-16
+ * Take a block of UTF-8 chars in and try to convert it to an UTF-16LE
 * block of chars out.
- * TODO: UTF8ToUTF16 need a fallback mechanism ...
+ * TODO: UTF8ToUTF16LE need a fallback mechanism ...
 *
 * Returns the number of byte written, or -1 by lack of space, or -2
 *     if the transcoding failed. 
 */
 int
-UTF8ToUTF16(unsigned short* out, int outlen, unsigned char* in, int inlen)
+UTF8ToUTF16LE(unsigned char* outb, int outlen,
+            const unsigned char* in, int *inlen)
 {
+    unsigned short* out = (unsigned short*) outb;
    unsigned short* outstart= out;
-    unsigned short* outend= out+outlen;
-    unsigned char* inend= in+inlen;
+    unsigned short* outend;
+    const unsigned char* inend= in+*inlen;
    unsigned int c, d, trailing;
+#ifdef BIG_ENDIAN
+    unsigned char *tmp;
+    unsigned short tmp1, tmp2;
+#endif /* BIG_ENDIAN */

+    outlen /= 2; /* convert in short length */
+    outend = out + outlen;
    while (in < inend) {
      d= *in++;
      if      (d < 0x80)  { c= d; trailing= 0; }
-      else if (d < 0xC0)  return -2;    /* trailing byte in leading position */
+      else if (d < 0xC0)
+          return(-2);    /* trailing byte in leading position */
      else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
-      else return -2;    /* no chance for this in UTF-16 */
+      else
+          return(-2);    /* no chance for this in UTF-16 */
+
+      if (inend - in < trailing) {
+          *inlen -= (inend - in);
+          break;
+      } 

      for ( ; trailing; trailing--) {
-          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  return -1;
+          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
+	      return(-1);
          c <<= 6;
          c |= d & 0x3F;
      }

      /* assertion: c is a single UTF-4 value */
        if (c < 0x10000) {
-            if (out >= outend)  return -1;
+            if (out >= outend)
+	        return(-1);
+#ifdef BIG_ENDIAN
+            tmp = (unsigned char *) out;
+            *tmp = c ;
+            *(tmp + 1) = c >> 8 ;
+            out++;
+#else /* BIG_ENDIAN */
            *out++ = c;
+#endif /* BIG_ENDIAN */
        }
        else if (c < 0x110000) {
-            if (out+1 >= outend)  return -1;
+            if (out+1 >= outend)
+	        return(-1);
            c -= 0x10000;
+#ifdef BIG_ENDIAN
+            tmp1 = 0xD800 | (c >> 10);
+            tmp = (unsigned char *) out;
+            *tmp = tmp1;
+            *(tmp + 1) = tmp1 >> 8;
+            out++;
+
+            tmp2 = 0xDC00 | (c & 0x03FF);
+            tmp = (unsigned char *) out;
+            *tmp  = tmp2;
+            *(tmp + 1) = tmp2 >> 8;
+            out++;
+#else /* BIG_ENDIAN */
            *out++ = 0xD800 | (c >> 10);
            *out++ = 0xDC00 | (c & 0x03FF);
+#endif /* BIG_ENDIAN */
        }
-        else  return -1;
+        else
+	    return(-1);
    }
-    return out-outstart;
+    return(out-outstart);
 }

-#endif /* ! HAVE_UNICODE_H */
+/**
+ * UTF16BEToUTF8:
+ * @out:  a pointer to an array of bytes to store the result
+ * @outlen:  the length of @out
+ * @inb:  a pointer to an array of UTF-16 passwd as a byte array
+ * @inlenb:  the length of @in in UTF-16 chars
+ *
+ * Take a block of UTF-16 ushorts in and try to convert it to an UTF-8
+ * block of chars out. This function assume the endian properity
+ * is the same between the native type of this machine and the
+ * inputed one.
+ *
+ * Returns the number of byte written, or -1 by lack of space, or -2
+ *     if the transcoding fails (for *in is not valid utf16 string)
+ * The value of *inlen after return is the number of octets consumed
+ *     as the return value is positive, else unpredictiable.
+ */
+int
+UTF16BEToUTF8(unsigned char* out, int outlen,
+            const unsigned char* inb, int *inlenb)
+{
+    unsigned char* outstart= out;
+    unsigned char* outend= out+outlen;
+    unsigned short* in = (unsigned short*) inb;
+    unsigned short* inend;
+    unsigned int c, d, inlen;
+#ifdef BIG_ENDIAN
+#else /* BIG_ENDIAN */
+    unsigned char *tmp;
+#endif /* BIG_ENDIAN */    
+    int bits;
+
+    if ((*inlenb % 2) == 1)
+        (*inlenb)--;
+    inlen = *inlenb / 2;
+    inend= in + inlen;
+    while (in < inend) {
+#ifdef BIG_ENDIAN    
+        c= *in++;
+#else
+        tmp = (unsigned char *) in;
+	c = *tmp++;
+	c = c << 8;
+	c = c | (unsigned int) *tmp;
+	in++;
+#endif	
+        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
+	    if (in >= inend) {           /* (in > inend) shouldn't happens */
+	        (*inlenb) -= 2;
+		break;
+	    }
+
+#ifdef BIG_ENDIAN
+            d= *in++;
+#else
+            tmp = (unsigned char *) in;
+	    d = *tmp++;
+	    d = d << 8;
+	    d = d | (unsigned int) *tmp;
+	    in++;
+#endif	    
+            if ((d & 0xFC00) == 0xDC00) {
+                c &= 0x03FF;
+                c <<= 10;
+                c |= d & 0x03FF;
+                c += 0x10000;
+            }
+            else 
+	        return(-2);
+        }
+
+	/* assertion: c is a single UTF-4 value */
+        if (out >= outend) 
+	    return(-1);
+        if      (c <    0x80) {  *out++=  c;                bits= -6; }
+        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
+        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
+        else                  {  *out++= ((c >> 18) & 0x07) | 0xF0;  bits= 12; }
+ 
+        for ( ; bits >= 0; bits-= 6) {
+            if (out >= outend) 
+	        return(-1);
+            *out++= ((c >> bits) & 0x3F) | 0x80;
+        }
+    }
+    return(out-outstart);
+}
+
+/**
+ * UTF8ToUTF16BE:
+ * @outb:  a pointer to an array of bytes to store the result
+ * @outlen:  the length of @outb
+ * @in:  a pointer to an array of UTF-8 chars
+ * @inlen:  the length of @in
+ *
+ * Take a block of UTF-8 chars in and try to convert it to an UTF-16BE
+ * block of chars out.
+ * TODO: UTF8ToUTF16BE need a fallback mechanism ...
+ *
+ * Returns the number of byte written, or -1 by lack of space, or -2
+ *     if the transcoding failed. 
+ */
+int
+UTF8ToUTF16BE(unsigned char* outb, int outlen,
+            const unsigned char* in, int *inlen)
+{
+    unsigned short* out = (unsigned short*) outb;
+    unsigned short* outstart= out;
+    unsigned short* outend;
+    const unsigned char* inend= in+*inlen;
+    unsigned int c, d, trailing;
+#ifdef BIG_ENDIAN
+#else
+    unsigned char *tmp;
+    unsigned short tmp1, tmp2;
+#endif /* BIG_ENDIAN */    
+
+    outlen /= 2; /* convert in short length */
+    outend = out + outlen;
+    while (in < inend) {
+      d= *in++;
+      if      (d < 0x80)  { c= d; trailing= 0; }
+      else if (d < 0xC0)
+          return(-2);    /* trailing byte in leading position */
+      else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
+      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
+      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
+      else
+          return(-2);    /* no chance for this in UTF-16 */
+
+      if (inend - in < trailing) {
+          *inlen -= (inend - in);
+          break;
+      } 
+
+      for ( ; trailing; trailing--) {
+          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  return(-1);
+          c <<= 6;
+          c |= d & 0x3F;
+      }
+
+      /* assertion: c is a single UTF-4 value */
+        if (c < 0x10000) {
+            if (out >= outend)  return(-1);
+#ifdef BIG_ENDIAN
+            *out++ = c;
+#else
+            tmp = (unsigned char *) out;
+            *tmp = c >> 8;
+            *(tmp + 1) = c;
+            out++;
+#endif /* BIG_ENDIAN */
+        }
+        else if (c < 0x110000) {
+            if (out+1 >= outend)  return(-1);
+            c -= 0x10000;
+#ifdef BIG_ENDIAN
+            *out++ = 0xD800 | (c >> 10);
+            *out++ = 0xDC00 | (c & 0x03FF);
+#else
+            tmp1 = 0xD800 | (c >> 10);
+            tmp = (unsigned char *) out;
+            *tmp = tmp1 >> 8;
+            *(tmp + 1) = tmp1;
+            out++;
+
+            tmp2 = 0xDC00 | (c & 0x03FF);
+            tmp = (unsigned char *) out;
+            *tmp = tmp2 >> 8;
+            *(tmp + 1) = tmp2;
+            out++;
+#endif
+        }
+        else  return(-1);
+    }
+    return(out-outstart);
+}

 /**
 * xmlDetectCharEncoding:
 * @in:  a pointer to the first bytes of the XML entity, must be at least
 *       4 bytes long.
+ * @len:  pointer to the length of the buffer
 *
 * Guess the encoding of the entity using the first bytes of the entity content
 * accordingly of the non-normative appendix F of the XML-1.0 recommendation.
@ -240,8 +541,9 @@ UTF8ToUTF16(unsigned short* out, int outlen, unsigned char* in, int inlen)
 * Returns one of the XML_CHAR_ENCODING_... values.
 */
 xmlCharEncoding
-xmlDetectCharEncoding(const unsigned char* in)
+xmlDetectCharEncoding(const unsigned char* in, int len)
 {
+    if (len >= 4) {
 	if ((in[0] == 0x00) && (in[1] == 0x00) &&
 	    (in[2] == 0x00) && (in[3] == 0x3C))
 	    return(XML_CHAR_ENCODING_UCS4BE);
@ -254,16 +556,19 @@ xmlDetectCharEncoding(const unsigned char* in)
 	if ((in[0] == 0x00) && (in[1] == 0x3C) &&
 	    (in[2] == 0x00) && (in[3] == 0x00))
 	    return(XML_CHAR_ENCODING_UCS4_3412);
-    if ((in[0] == 0xFE) && (in[1] == 0xFF))
-	return(XML_CHAR_ENCODING_UTF16BE);
-    if ((in[0] == 0xFF) && (in[1] == 0xFE))
-	return(XML_CHAR_ENCODING_UTF16LE);
 	if ((in[0] == 0x4C) && (in[1] == 0x6F) &&
 	    (in[2] == 0xA7) && (in[3] == 0x94))
 	    return(XML_CHAR_ENCODING_EBCDIC);
 	if ((in[0] == 0x3C) && (in[1] == 0x3F) &&
 	    (in[2] == 0x78) && (in[3] == 0x6D))
 	    return(XML_CHAR_ENCODING_UTF8);
+    }
+    if (len >= 2) {
+	if ((in[0] == 0xFE) && (in[1] == 0xFF))
+	    return(XML_CHAR_ENCODING_UTF16BE);
+	if ((in[0] == 0xFF) && (in[1] == 0xFE))
+	    return(XML_CHAR_ENCODING_UTF16LE);
+    }
    return(XML_CHAR_ENCODING_NONE);
 }

@ -364,7 +669,8 @@ static xmlCharEncodingHandlerPtr xmlDefaultCharEncodingHandler = NULL;
 * Returns the xmlCharEncodingHandlerPtr created (or NULL in case of error).
 */
 xmlCharEncodingHandlerPtr
-xmlNewCharEncodingHandler(const char *name, xmlCharEncodingInputFunc input,
+xmlNewCharEncodingHandler(const char *name, 
+                          xmlCharEncodingInputFunc input,
                          xmlCharEncodingOutputFunc output) {
    xmlCharEncodingHandlerPtr handler;
    char upper[500];
@ -429,11 +735,11 @@ xmlInitCharEncodingHandlers(void) {
 	return;
    }
    xmlNewCharEncodingHandler("UTF-8", NULL, NULL);
-#ifdef HAVE_UNICODE_H
-#else
-    /* xmlNewCharEncodingHandler("UTF-16", UTF16ToUTF8, UTF8ToUTF16); */
+    xmlUTF16LEHandler = 
+          xmlNewCharEncodingHandler("UTF-16LE", UTF16LEToUTF8, UTF8ToUTF16LE);
+    xmlUTF16BEHandler = 
+          xmlNewCharEncodingHandler("UTF-16BE", UTF16BEToUTF8, UTF8ToUTF16BE);
    xmlNewCharEncodingHandler("ISO-8859-1", isolat1ToUTF8, UTF8Toisolat1);
-#endif
 }

 /**
@ -493,7 +799,52 @@ xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
 xmlCharEncodingHandlerPtr
 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
    if (handlers == NULL) xmlInitCharEncodingHandlers();
-    /* TODO xmlGetCharEncodingHandler !!!!!!! */
+    switch (enc) {
+        case XML_CHAR_ENCODING_ERROR:
+	    return(NULL);
+        case XML_CHAR_ENCODING_NONE:
+	    return(NULL);
+        case XML_CHAR_ENCODING_UTF8:
+	    return(NULL);
+        case XML_CHAR_ENCODING_UTF16LE:
+	    return(xmlUTF16LEHandler);
+        case XML_CHAR_ENCODING_UTF16BE:
+	    return(xmlUTF16BEHandler);
+        case XML_CHAR_ENCODING_EBCDIC:
+	    return(NULL);
+        case XML_CHAR_ENCODING_UCS4LE:
+	    return(NULL);
+        case XML_CHAR_ENCODING_UCS4BE:
+	    return(NULL);
+        case XML_CHAR_ENCODING_UCS4_2143:
+	    return(NULL);
+        case XML_CHAR_ENCODING_UCS4_3412:
+	    return(NULL);
+        case XML_CHAR_ENCODING_UCS2:
+	    return(NULL);
+        case XML_CHAR_ENCODING_8859_1:
+	    return(NULL);
+        case XML_CHAR_ENCODING_8859_2:
+	    return(NULL);
+        case XML_CHAR_ENCODING_8859_3:
+	    return(NULL);
+        case XML_CHAR_ENCODING_8859_4:
+	    return(NULL);
+        case XML_CHAR_ENCODING_8859_5:
+	    return(NULL);
+        case XML_CHAR_ENCODING_8859_6:
+	    return(NULL);
+        case XML_CHAR_ENCODING_8859_7:
+	    return(NULL);
+        case XML_CHAR_ENCODING_8859_8:
+	    return(NULL);
+        case XML_CHAR_ENCODING_8859_9:
+	    return(NULL);
+        case XML_CHAR_ENCODING_2022_JP:
+        case XML_CHAR_ENCODING_SHIFT_JIS:
+        case XML_CHAR_ENCODING_EUC_JP:
+	    return(NULL);
+    }
    return(NULL);
 }

--- a/encoding.h
+++ b/encoding.h
@ -67,11 +67,11 @@ typedef enum {
 * Returns the number of byte written, or -1 by lack of space.
 */
 typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
-                                         unsigned char* in, int inlen);
+                                         const unsigned char* in, int *inlen);


 /**
- * xmlCharEncodingInputFunc:
+ * xmlCharEncodingOutputFunc:
 * @out:  a pointer ot an array of bytes to store the result
 * @outlen:  the lenght of @out
 * @in:  a pointer ot an array of UTF-8 chars
@ -84,7 +84,7 @@ typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
 *     if the transcoding failed.
 */
 typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int outlen,
-                                          unsigned char* in, int inlen);
+                                          const unsigned char* in, int *inlen);

 /*
 * Block defining the handlers for non UTF-8 encodings.
@ -101,10 +101,12 @@ struct _xmlCharEncodingHandler {
 void	xmlInitCharEncodingHandlers	(void);
 void	xmlCleanupCharEncodingHandlers	(void);
 void	xmlRegisterCharEncodingHandler	(xmlCharEncodingHandlerPtr handler);
-xmlCharEncoding xmlDetectCharEncoding	(const unsigned char* in);
+xmlCharEncoding xmlDetectCharEncoding	(const unsigned char* in,
+					 int len);
 xmlCharEncoding xmlParseCharEncoding	(const char* name);
 xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc);
 xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name);
+int	xmlCheckUTF8			(const unsigned char *utf);


 #ifdef __cplusplus
--- a/entities.c
+++ b/entities.c
@ -21,6 +21,8 @@
 #include "entities.h"
 #include "parser.h"

+#define DEBUG_ENT_REF /* debugging of cross entities dependancies */
+
 /*
 * The XML predefined entities.
 */
@ -45,6 +47,8 @@ xmlEntitiesTablePtr xmlPredefinedEntities = NULL;
 void xmlFreeEntity(xmlEntityPtr entity) {
    if (entity == NULL) return;

+    if (entity->children)
+	xmlFreeNodeList(entity->children);
    if (entity->name != NULL)
 	xmlFree((char *) entity->name);
    if (entity->ExternalID != NULL)
@ -55,22 +59,31 @@ void xmlFreeEntity(xmlEntityPtr entity) {
        xmlFree((char *) entity->content);
    if (entity->orig != NULL)
        xmlFree((char *) entity->orig);
+#ifdef WITH_EXTRA_ENT_DETECT
+    if (entity->entTab != NULL) {
+	int i;
+
+	for (i = 0; i < entity->entNr; i++)
+	    xmlFree(entity->entTab[i]);
+	xmlFree(entity->entTab);
+    }
+#endif
    memset(entity, -1, sizeof(xmlEntity));
+    xmlFree(entity);
 }

 /*
 * xmlAddEntity : register a new entity for an entities table.
 */
-static void
+static xmlEntityPtr
 xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type,
 	  const xmlChar *ExternalID, const xmlChar *SystemID, const xmlChar *content) {
    int i;
-    xmlEntityPtr cur;
-    int len;
+    xmlEntityPtr ret;

    for (i = 0;i < table->nb_entities;i++) {
-        cur = &table->table[i];
-	if (!xmlStrcmp(cur->name, name)) {
+        ret = table->table[i];
+	if (!xmlStrcmp(ret->name, name)) {
 	    /*
 	     * The entity is already defined in this Dtd, the spec says to NOT
 	     * override it ... Is it worth a Warning ??? !!!
@ -78,15 +91,15 @@ xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type,
 	     */
 	    if (((type == XML_INTERNAL_PARAMETER_ENTITY) ||
 	         (type == XML_EXTERNAL_PARAMETER_ENTITY)) &&
-	        ((cur->type == XML_INTERNAL_PARAMETER_ENTITY) ||
-	         (cur->type == XML_EXTERNAL_PARAMETER_ENTITY)))
-		return;
+	        ((ret->etype == XML_INTERNAL_PARAMETER_ENTITY) ||
+	         (ret->etype == XML_EXTERNAL_PARAMETER_ENTITY)))
+		return(NULL);
 	    else
 	    if (((type != XML_INTERNAL_PARAMETER_ENTITY) &&
 	         (type != XML_EXTERNAL_PARAMETER_ENTITY)) &&
-	        ((cur->type != XML_INTERNAL_PARAMETER_ENTITY) &&
-	         (cur->type != XML_EXTERNAL_PARAMETER_ENTITY)))
-		return;
+	        ((ret->etype != XML_INTERNAL_PARAMETER_ENTITY) &&
+	         (ret->etype != XML_EXTERNAL_PARAMETER_ENTITY)))
+		return(NULL);
 	}
    }
    if (table->nb_entities >= table->max_entities) {
@ -94,35 +107,43 @@ xmlAddEntity(xmlEntitiesTablePtr table, const xmlChar *name, int type,
 	 * need more elements.
 	 */
 	table->max_entities *= 2;
-	table->table = (xmlEntityPtr) 
-	    xmlRealloc(table->table, table->max_entities * sizeof(xmlEntity));
+	table->table = (xmlEntityPtr *) 
+	    xmlRealloc(table->table,
+		       table->max_entities * sizeof(xmlEntityPtr));
 	if (table->table == NULL) {
 	    perror("realloc failed");
-	    return;
+	    return(NULL);
 	}
    }
-    cur = &table->table[table->nb_entities];
-    cur->name = xmlStrdup(name);
-    for (len = 0;name[0] != 0;name++)len++;
-    cur->len = len;
-    cur->type = type;
+    ret = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
+    if (ret == NULL) {
+	fprintf(stderr, "xmlAddEntity: out of memory\n");
+	return(NULL);
+    }
+    memset(ret, 0, sizeof(xmlEntity));
+    ret->type = XML_ENTITY_DECL;
+    table->table[table->nb_entities] = ret;
+
+    /*
+     * fill the structure.
+     */
+    ret->name = xmlStrdup(name);
+    ret->etype = type;
    if (ExternalID != NULL)
-	cur->ExternalID = xmlStrdup(ExternalID);
-    else
-        cur->ExternalID = NULL;
+	ret->ExternalID = xmlStrdup(ExternalID);
    if (SystemID != NULL)
-	cur->SystemID = xmlStrdup(SystemID);
-    else
-        cur->SystemID = NULL;
+	ret->SystemID = xmlStrdup(SystemID);
    if (content != NULL) {
-        cur->length = xmlStrlen(content);
-	cur->content = xmlStrndup(content, cur->length);
+        ret->length = xmlStrlen(content);
+	ret->content = xmlStrndup(content, ret->length);
     } else {
-        cur->length = 0;
-        cur->content = NULL;
+        ret->length = 0;
+        ret->content = NULL;
    }
-    cur->orig = NULL;
+    ret->orig = NULL;
    table->nb_entities++;
+
+    return(ret);
 }

 /**
@ -182,7 +203,7 @@ xmlGetPredefinedEntity(const xmlChar *name) {
    if (xmlPredefinedEntities == NULL)
        xmlInitializePredefinedEntities();
    for (i = 0;i < xmlPredefinedEntities->nb_entities;i++) {
-	cur = &xmlPredefinedEntities->table[i];
+	cur = xmlPredefinedEntities->table[i];
 	if (!xmlStrcmp(cur->name, name)) return(cur);
    }
    return(NULL);
@ -197,24 +218,50 @@ xmlGetPredefinedEntity(const xmlChar *name) {
 * @SystemID:  the entity system ID if available
 * @content:  the entity content
 *
- * Register a new entity for this document DTD.
+ * Register a new entity for this document DTD external subset.
+ *
+ * Returns a pointer to the entity or NULL in case of error
 */
-void
+xmlEntityPtr
 xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
-	  const xmlChar *ExternalID, const xmlChar *SystemID, const xmlChar *content) {
+	        const xmlChar *ExternalID, const xmlChar *SystemID,
+		const xmlChar *content) {
    xmlEntitiesTablePtr table;
+    xmlEntityPtr ret;
+    xmlDtdPtr dtd;

+    if (doc == NULL) {
+        fprintf(stderr,
+	        "xmlAddDtdEntity: doc == NULL !\n");
+	return(NULL);
+    }
    if (doc->extSubset == NULL) {
        fprintf(stderr,
 	        "xmlAddDtdEntity: document without external subset !\n");
-	return;
+	return(NULL);
    }
-    table = (xmlEntitiesTablePtr) doc->extSubset->entities;
+    dtd = doc->extSubset;
+    table = (xmlEntitiesTablePtr) dtd->entities;
    if (table == NULL) {
        table = xmlCreateEntitiesTable();
-	doc->extSubset->entities = table;
+	dtd->entities = table;
    }
-    xmlAddEntity(table, name, type, ExternalID, SystemID, content);
+    ret = xmlAddEntity(table, name, type, ExternalID, SystemID, content);
+    if (ret == NULL) return(NULL);
+
+    /*
+     * Link it to the Dtd
+     */
+    ret->parent = dtd;
+    ret->doc = dtd->doc;
+    if (dtd->last == NULL) {
+	dtd->children = dtd->last = (xmlNodePtr) ret;
+    } else {
+        dtd->last->next = (xmlNodePtr) ret;
+	ret->prev = dtd->last;
+	dtd->last = (xmlNodePtr) ret;
+    }
+    return(ret);
 }

 /**
@ -227,29 +274,186 @@ xmlAddDtdEntity(xmlDocPtr doc, const xmlChar *name, int type,
 * @content:  the entity content
 *
 * Register a new entity for this document.
+ *
+ * Returns a pointer to the entity or NULL in case of error
 */
-void
+xmlEntityPtr
 xmlAddDocEntity(xmlDocPtr doc, const xmlChar *name, int type,
-	  const xmlChar *ExternalID, const xmlChar *SystemID, const xmlChar *content) {
+	        const xmlChar *ExternalID, const xmlChar *SystemID,
+	        const xmlChar *content) {
    xmlEntitiesTablePtr table;
+    xmlEntityPtr ret;
+    xmlDtdPtr dtd;

    if (doc == NULL) {
        fprintf(stderr,
 	        "xmlAddDocEntity: document is NULL !\n");
-	return;
+	return(NULL);
    }
    if (doc->intSubset == NULL) {
        fprintf(stderr,
 	        "xmlAddDtdEntity: document without internal subset !\n");
-	return;
+	return(NULL);
    }
+    dtd = doc->intSubset;
    table = (xmlEntitiesTablePtr) doc->intSubset->entities;
    if (table == NULL) {
        table = xmlCreateEntitiesTable();
 	doc->intSubset->entities = table;
    }
-    xmlAddEntity(table, name, type, ExternalID, SystemID, content);
+    ret = xmlAddEntity(table, name, type, ExternalID, SystemID, content);
+    if (ret == NULL) return(NULL);
+
+    /*
+     * Link it to the Dtd
+     */
+    ret->parent = dtd;
+    ret->doc = dtd->doc;
+    if (dtd->last == NULL) {
+	dtd->children = dtd->last = (xmlNodePtr) ret;
+    } else {
+	dtd->last->next = (xmlNodePtr) ret;
+	ret->prev = dtd->last;
+	dtd->last = (xmlNodePtr) ret;
    }
+    return(ret);
+}
+
+#ifdef WITH_EXTRA_ENT_DETECT
+/**
+ * xmlEntityCheckReference:
+ * @ent:  an existing entity
+ * @to:  the entity name it's referencing
+ *
+ * Function to keep track of references and detect cycles (well formedness 
+ * errors !).
+ *
+ * Returns: 0 if Okay, -1 in case of general error, 1 in case of loop 
+ *      detection.
+ */
+int
+xmlEntityCheckReference(xmlEntityPtr ent, const xmlChar *to) {
+    int i;
+    xmlDocPtr doc;
+
+    if (ent == NULL) return(-1);
+    if (to == NULL) return(-1);
+
+    doc = ent->doc;
+    if (doc == NULL) return(-1);
+
+#ifdef DEBUG_ENT_REF
+    printf("xmlEntityCheckReference(%s to %s)\n", ent->name, to);
+#endif
+
+
+    /*
+     * Do a recursive checking
+     */
+    for (i = 0;i < ent->entNr;i++) {
+	xmlEntityPtr indir = NULL;
+
+	if (!xmlStrcmp(to, ent->entTab[i]))
+	    return(1);
+
+	switch (ent->etype) {
+            case XML_INTERNAL_GENERAL_ENTITY:
+            case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
+		indir = xmlGetDocEntity(doc, ent->entTab[i]);
+		break;
+            case XML_INTERNAL_PARAMETER_ENTITY:
+            case XML_EXTERNAL_PARAMETER_ENTITY:
+		indir = xmlGetDtdEntity(doc, ent->entTab[i]);
+		break;
+            case XML_INTERNAL_PREDEFINED_ENTITY:
+            case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
+		break;
+	}
+	if (xmlEntityCheckReference(indir, to) == 1)
+	    return(1);
+    }
+    return(0);
+}
+
+/**
+ * xmlEntityAddReference:
+ * @ent:  an existing entity
+ * @to:  the entity name it's referencing
+ *
+ * Function to register reuse of an existing entity from a (new) one
+ * Used to keep track of references and detect cycles (well formedness 
+ * errors !).
+ *
+ * Returns: 0 if Okay, -1 in case of general error, 1 in case of loop 
+ *      detection.
+ */
+int
+xmlEntityAddReference(xmlEntityPtr ent, const xmlChar *to) {
+    int i;
+    xmlDocPtr doc;
+    xmlEntityPtr indir = NULL;
+
+    if (ent == NULL) return(-1);
+    if (to == NULL) return(-1);
+
+    doc = ent->doc;
+    if (doc == NULL) return(-1);
+
+#ifdef DEBUG_ENT_REF
+    printf("xmlEntityAddReference(%s to %s)\n", ent->name, to);
+#endif
+    if (ent->entTab == NULL) {
+	ent->entNr = 0;
+	ent->entMax = 5;
+	ent->entTab = (xmlChar **) xmlMalloc(ent->entMax * sizeof(xmlChar *));
+	if (ent->entTab == NULL) {
+	    fprintf(stderr, "xmlEntityAddReference: out of memory !\n");
+	    return(-1);
+	}
+    }
+
+    for (i = 0;i < ent->entNr;i++) {
+	if (!xmlStrcmp(to, ent->entTab[i]))
+	    return(0);
+    }
+
+    /*
+     * Do a recursive checking
+     */
+
+    switch (ent->etype) {
+	case XML_INTERNAL_GENERAL_ENTITY:
+	case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
+	    indir = xmlGetDocEntity(doc, to);
+	    break;
+	case XML_INTERNAL_PARAMETER_ENTITY:
+	case XML_EXTERNAL_PARAMETER_ENTITY:
+	    indir = xmlGetDtdEntity(doc, to);
+	    break;
+	case XML_INTERNAL_PREDEFINED_ENTITY:
+	case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
+	    break;
+    }
+    if ((indir != NULL) &&
+	(xmlEntityCheckReference(indir, ent->name) == 1))
+	return(1);
+
+    /*
+     * Add this to the list
+     */
+    if (ent->entMax <= ent->entNr) {
+	ent->entMax *= 2;
+	ent->entTab = (xmlChar **) xmlRealloc(ent->entTab,
+		                              ent->entMax * sizeof(xmlChar *));
+	if (ent->entTab == NULL) {
+	    fprintf(stderr, "xmlEntityAddReference: out of memory !\n");
+	    return(-1);
+	}
+    }
+    ent->entTab[ent->entNr++] = xmlStrdup(to);
+    return(0);
+}
+#endif

 /**
 * xmlGetParameterEntity:
@ -270,27 +474,27 @@ xmlGetParameterEntity(xmlDocPtr doc, const xmlChar *name) {
    if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
 	table = (xmlEntitiesTablePtr) doc->intSubset->entities;
 	for (i = 0;i < table->nb_entities;i++) {
-	    cur = &table->table[i];
-	    if (((cur->type ==  XML_INTERNAL_PARAMETER_ENTITY) ||
-	         (cur->type ==  XML_EXTERNAL_PARAMETER_ENTITY)) &&
+	    cur = table->table[i];
+	    if (((cur->etype ==  XML_INTERNAL_PARAMETER_ENTITY) ||
+	         (cur->etype ==  XML_EXTERNAL_PARAMETER_ENTITY)) &&
 		(!xmlStrcmp(cur->name, name))) return(cur);
 	}
    }
    if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
 	table = (xmlEntitiesTablePtr) doc->extSubset->entities;
 	for (i = 0;i < table->nb_entities;i++) {
-	    cur = &table->table[i];
-	    if (((cur->type ==  XML_INTERNAL_PARAMETER_ENTITY) ||
-	         (cur->type ==  XML_EXTERNAL_PARAMETER_ENTITY)) &&
+	    cur = table->table[i];
+	    if (((cur->etype ==  XML_INTERNAL_PARAMETER_ENTITY) ||
+	         (cur->etype ==  XML_EXTERNAL_PARAMETER_ENTITY)) &&
 		(!xmlStrcmp(cur->name, name))) return(cur);
 	}
    }
    if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
 	table = (xmlEntitiesTablePtr) doc->extSubset->entities;
 	for (i = 0;i < table->nb_entities;i++) {
-	    cur = &table->table[i];
-	    if (((cur->type ==  XML_INTERNAL_PARAMETER_ENTITY) ||
-	         (cur->type ==  XML_EXTERNAL_PARAMETER_ENTITY)) &&
+	    cur = table->table[i];
+	    if (((cur->etype ==  XML_INTERNAL_PARAMETER_ENTITY) ||
+	         (cur->etype ==  XML_EXTERNAL_PARAMETER_ENTITY)) &&
 		(!xmlStrcmp(cur->name, name))) return(cur);
 	}
    }
@ -316,9 +520,9 @@ xmlGetDtdEntity(xmlDocPtr doc, const xmlChar *name) {
    if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
 	table = (xmlEntitiesTablePtr) doc->extSubset->entities;
 	for (i = 0;i < table->nb_entities;i++) {
-	    cur = &table->table[i];
-	    if ((cur->type !=  XML_INTERNAL_PARAMETER_ENTITY) &&
-	        (cur->type !=  XML_EXTERNAL_PARAMETER_ENTITY) &&
+	    cur = table->table[i];
+	    if ((cur->etype !=  XML_INTERNAL_PARAMETER_ENTITY) &&
+	        (cur->etype !=  XML_EXTERNAL_PARAMETER_ENTITY) &&
 	        (!xmlStrcmp(cur->name, name))) return(cur);
 	}
    }
@ -345,18 +549,18 @@ xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
    if ((doc->intSubset != NULL) && (doc->intSubset->entities != NULL)) {
 	table = (xmlEntitiesTablePtr) doc->intSubset->entities;
 	for (i = 0;i < table->nb_entities;i++) {
-	    cur = &table->table[i];
-	    if ((cur->type !=  XML_INTERNAL_PARAMETER_ENTITY) &&
-	        (cur->type !=  XML_EXTERNAL_PARAMETER_ENTITY) &&
+	    cur = table->table[i];
+	    if ((cur->etype !=  XML_INTERNAL_PARAMETER_ENTITY) &&
+	        (cur->etype !=  XML_EXTERNAL_PARAMETER_ENTITY) &&
 	        (!xmlStrcmp(cur->name, name))) return(cur);
 	}
    }
    if ((doc->extSubset != NULL) && (doc->extSubset->entities != NULL)) {
 	table = (xmlEntitiesTablePtr) doc->extSubset->entities;
 	for (i = 0;i < table->nb_entities;i++) {
-	    cur = &table->table[i];
-	    if ((cur->type !=  XML_INTERNAL_PARAMETER_ENTITY) &&
-	        (cur->type !=  XML_EXTERNAL_PARAMETER_ENTITY) &&
+	    cur = table->table[i];
+	    if ((cur->etype !=  XML_INTERNAL_PARAMETER_ENTITY) &&
+	        (cur->etype !=  XML_EXTERNAL_PARAMETER_ENTITY) &&
 	        (!xmlStrcmp(cur->name, name))) return(cur);
 	}
    }
@ -364,9 +568,9 @@ xmlGetDocEntity(xmlDocPtr doc, const xmlChar *name) {
        xmlInitializePredefinedEntities();
    table = xmlPredefinedEntities;
    for (i = 0;i < table->nb_entities;i++) {
-	cur = &table->table[i];
-	if ((cur->type !=  XML_INTERNAL_PARAMETER_ENTITY) &&
-	    (cur->type !=  XML_EXTERNAL_PARAMETER_ENTITY) &&
+	cur = table->table[i];
+	if ((cur->etype !=  XML_INTERNAL_PARAMETER_ENTITY) &&
+	    (cur->etype !=  XML_EXTERNAL_PARAMETER_ENTITY) &&
 	    (!xmlStrcmp(cur->name, name))) return(cur);
    }

@ -612,6 +816,7 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
 	    *out++ = 'o';
 	    *out++ = 't';
 	    *out++ = ';';
+#if 0
 	} else if ((*cur == '\'') && (!html)) {
 	    *out++ = '&';
 	    *out++ = 'a';
@ -619,15 +824,20 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
 	    *out++ = 'o';
 	    *out++ = 's';
 	    *out++ = ';';
+#endif
 	} else if (((*cur >= 0x20) && (*cur < 0x80)) ||
 	    (*cur == '\n') || (*cur == '\r') || (*cur == '\t')) {
 	    /*
 	     * default case, just copy !
 	     */
 	    *out++ = *cur;
-#ifndef USE_UTF_8
-	} else if ((sizeof(xmlChar) == 1) && (*cur >= 0x80)) {
-	    char buf[10], *ptr;
+	} else if (*cur >= 0x80) {
+	    if (html) {
+		char buf[15], *ptr;
+
+		/*
+		 * TODO: improve by searching in html40EntitiesTable
+		 */
 #ifdef HAVE_SNPRINTF
 		snprintf(buf, 9, "&#%d;", *cur);
 #else
@ -635,7 +845,80 @@ xmlEncodeEntitiesReentrant(xmlDocPtr doc, const xmlChar *input) {
 #endif
 		ptr = buf;
 		while (*ptr != 0) *out++ = *ptr++;
+	    } else if (doc->encoding != NULL) {
+		/*
+		 * TODO !!!
+		 */
+		*out++ = *cur;
+	    } else {
+		/*
+		 * We assume we have UTF-8 input.
+		 */
+		char buf[10], *ptr;
+		int val = 0, l = 1;
+
+		if (*cur < 0xC0) {
+		    fprintf(stderr,
+			    "xmlEncodeEntitiesReentrant : input not UTF-8\n");
+		    doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
+#ifdef HAVE_SNPRINTF
+		    snprintf(buf, 9, "&#%d;", *cur);
+#else
+		    sprintf(buf, "&#%d;", *cur);
 #endif
+		    ptr = buf;
+		    while (*ptr != 0) *out++ = *ptr++;
+		    continue;
+		} else if (*cur < 0xE0) {
+                    val = (cur[0]) & 0x1F;
+		    val <<= 6;
+		    val |= (cur[1]) & 0x3F;
+		    l = 2;
+		} else if (*cur < 0xF0) {
+                    val = (cur[0]) & 0x0F;
+		    val <<= 6;
+		    val |= (cur[1]) & 0x3F;
+		    val <<= 6;
+		    val |= (cur[2]) & 0x3F;
+		    l = 3;
+		} else if (*cur < 0xF8) {
+                    val = (cur[0]) & 0x07;
+		    val <<= 6;
+		    val |= (cur[1]) & 0x3F;
+		    val <<= 6;
+		    val |= (cur[2]) & 0x3F;
+		    val <<= 6;
+		    val |= (cur[3]) & 0x3F;
+		    l = 4;
+		}
+		if ((l == 1) || (!IS_CHAR(val))) {
+		    fprintf(stderr,
+			"xmlEncodeEntitiesReentrant : char out of range\n");
+		    doc->encoding = xmlStrdup(BAD_CAST "ISO-8859-1");
+#ifdef HAVE_SNPRINTF
+		    snprintf(buf, 9, "&#%d;", *cur);
+#else
+		    sprintf(buf, "&#%d;", *cur);
+#endif
+		    ptr = buf;
+		    while (*ptr != 0) *out++ = *ptr++;
+		    cur++;
+		    continue;
+		}
+		/*
+		 * We could do multiple things here. Just save as a char ref
+		 */
+#ifdef HAVE_SNPRINTF
+		snprintf(buf, 14, "&#x%X;", val);
+#else
+		sprintf(buf, "&#x%X;", val);
+#endif
+		buf[14] = 0;
+		ptr = buf;
+		while (*ptr != 0) *out++ = *ptr++;
+		cur += l;
+		continue;
+	    }
 	} else if (IS_CHAR(*cur)) {
 	    char buf[10], *ptr;

@ -682,11 +965,11 @@ xmlCreateEntitiesTable(void) {
    }
    ret->max_entities = XML_MIN_ENTITIES_TABLE;
    ret->nb_entities = 0;
-    ret->table = (xmlEntityPtr ) 
-         xmlMalloc(ret->max_entities * sizeof(xmlEntity));
+    ret->table = (xmlEntityPtr *) 
+         xmlMalloc(ret->max_entities * sizeof(xmlEntityPtr));
    if (ret == NULL) {
        fprintf(stderr, "xmlCreateEntitiesTable : xmlMalloc(%ld) failed\n",
-	        ret->max_entities * (long)sizeof(xmlEntity));
+	        ret->max_entities * (long)sizeof(xmlEntityPtr));
 	xmlFree(ret);
        return(NULL);
    }
@ -706,7 +989,7 @@ xmlFreeEntitiesTable(xmlEntitiesTablePtr table) {
    if (table == NULL) return;

    for (i = 0;i < table->nb_entities;i++) {
-        xmlFreeEntity(&table->table[i]);
+        xmlFreeEntity(table->table[i]);
    }
    xmlFree(table->table);
    xmlFree(table);
@ -731,8 +1014,8 @@ xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
        fprintf(stderr, "xmlCopyEntitiesTable: out of memory !\n");
 	return(NULL);
    }
-    ret->table = (xmlEntityPtr) xmlMalloc(table->max_entities *
-                                         sizeof(xmlEntity));
+    ret->table = (xmlEntityPtr *) xmlMalloc(table->max_entities *
+                                            sizeof(xmlEntityPtr));
    if (ret->table == NULL) {
        fprintf(stderr, "xmlCopyEntitiesTable: out of memory !\n");
 	xmlFree(ret);
@ -741,34 +1024,119 @@ xmlCopyEntitiesTable(xmlEntitiesTablePtr table) {
    ret->max_entities = table->max_entities;
    ret->nb_entities = table->nb_entities;
    for (i = 0;i < ret->nb_entities;i++) {
-	cur = &ret->table[i];
-	ent = &table->table[i];
-	cur->len = ent->len;
-	cur->type = ent->type;
+	cur = (xmlEntityPtr) xmlMalloc(sizeof(xmlEntity));
+	if (cur == NULL) {
+	    fprintf(stderr, "xmlCopyEntityTable: out of memory !\n");
+	    xmlFree(ret);
+	    xmlFree(ret->table);
+	    return(NULL);
+	}
+	memset(cur, 0, sizeof(xmlEntity));
+	cur->type = XML_ELEMENT_DECL;
+	ret->table[i] = cur;
+	ent = table->table[i];
+
+	cur->etype = ent->etype;
 	if (ent->name != NULL)
 	    cur->name = xmlStrdup(ent->name);
-	else
-	    cur->name = NULL;
 	if (ent->ExternalID != NULL)
 	    cur->ExternalID = xmlStrdup(ent->ExternalID);
-	else
-	    cur->ExternalID = NULL;
 	if (ent->SystemID != NULL)
 	    cur->SystemID = xmlStrdup(ent->SystemID);
-	else
-	    cur->SystemID = NULL;
 	if (ent->content != NULL)
 	    cur->content = xmlStrdup(ent->content);
-	else
-	    cur->content = NULL;
 	if (ent->orig != NULL)
 	    cur->orig = xmlStrdup(ent->orig);
-	else
-	    cur->orig = NULL;
    }
    return(ret);
 }

+/**
+ * xmlDumpEntityDecl:
+ * @buf:  An XML buffer.
+ * @ent:  An entity table
+ *
+ * This will dump the content of the entity table as an XML DTD definition
+ */
+void
+xmlDumpEntityDecl(xmlBufferPtr buf, xmlEntityPtr ent) {
+    switch (ent->etype) {
+	case XML_INTERNAL_GENERAL_ENTITY:
+	    xmlBufferWriteChar(buf, "<!ENTITY ");
+	    xmlBufferWriteCHAR(buf, ent->name);
+	    xmlBufferWriteChar(buf, " ");
+	    if (ent->orig != NULL)
+		xmlBufferWriteQuotedString(buf, ent->orig);
+	    else
+		xmlBufferWriteQuotedString(buf, ent->content);
+	    xmlBufferWriteChar(buf, ">\n");
+	    break;
+	case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
+	    xmlBufferWriteChar(buf, "<!ENTITY ");
+	    xmlBufferWriteCHAR(buf, ent->name);
+	    if (ent->ExternalID != NULL) {
+		 xmlBufferWriteChar(buf, " PUBLIC ");
+		 xmlBufferWriteQuotedString(buf, ent->ExternalID);
+		 xmlBufferWriteChar(buf, " ");
+		 xmlBufferWriteQuotedString(buf, ent->SystemID);
+	    } else {
+		 xmlBufferWriteChar(buf, " SYSTEM ");
+		 xmlBufferWriteQuotedString(buf, ent->SystemID);
+	    }
+	    xmlBufferWriteChar(buf, ">\n");
+	    break;
+	case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
+	    xmlBufferWriteChar(buf, "<!ENTITY ");
+	    xmlBufferWriteCHAR(buf, ent->name);
+	    if (ent->ExternalID != NULL) {
+		 xmlBufferWriteChar(buf, " PUBLIC ");
+		 xmlBufferWriteQuotedString(buf, ent->ExternalID);
+		 xmlBufferWriteChar(buf, " ");
+		 xmlBufferWriteQuotedString(buf, ent->SystemID);
+	    } else {
+		 xmlBufferWriteChar(buf, " SYSTEM ");
+		 xmlBufferWriteQuotedString(buf, ent->SystemID);
+	    }
+	    if (ent->content != NULL) { /* Should be true ! */
+		xmlBufferWriteChar(buf, " NDATA ");
+		if (ent->orig != NULL)
+		    xmlBufferWriteCHAR(buf, ent->orig);
+		else
+		    xmlBufferWriteCHAR(buf, ent->content);
+	    }
+	    xmlBufferWriteChar(buf, ">\n");
+	    break;
+	case XML_INTERNAL_PARAMETER_ENTITY:
+	    xmlBufferWriteChar(buf, "<!ENTITY % ");
+	    xmlBufferWriteCHAR(buf, ent->name);
+	    xmlBufferWriteChar(buf, " ");
+	    if (ent->orig == NULL)
+		xmlBufferWriteQuotedString(buf, ent->content);
+	    else
+		xmlBufferWriteQuotedString(buf, ent->orig);
+	    xmlBufferWriteChar(buf, ">\n");
+	    break;
+	case XML_EXTERNAL_PARAMETER_ENTITY:
+	    xmlBufferWriteChar(buf, "<!ENTITY % ");
+	    xmlBufferWriteCHAR(buf, ent->name);
+	    if (ent->ExternalID != NULL) {
+		 xmlBufferWriteChar(buf, " PUBLIC ");
+		 xmlBufferWriteQuotedString(buf, ent->ExternalID);
+		 xmlBufferWriteChar(buf, " ");
+		 xmlBufferWriteQuotedString(buf, ent->SystemID);
+	    } else {
+		 xmlBufferWriteChar(buf, " SYSTEM ");
+		 xmlBufferWriteQuotedString(buf, ent->SystemID);
+	    }
+	    xmlBufferWriteChar(buf, ">\n");
+	    break;
+	default:
+	    fprintf(stderr,
+		"xmlDumpEntitiesTable: internal: unknown type %d\n",
+		    ent->etype);
+    }
+}
+
 /**
 * xmlDumpEntitiesTable:
 * @buf:  An XML buffer.
@ -784,81 +1152,7 @@ xmlDumpEntitiesTable(xmlBufferPtr buf, xmlEntitiesTablePtr table) {
    if (table == NULL) return;

    for (i = 0;i < table->nb_entities;i++) {
-        cur = &table->table[i];
-        switch (cur->type) {
-	    case XML_INTERNAL_GENERAL_ENTITY:
-	        xmlBufferWriteChar(buf, "<!ENTITY ");
-		xmlBufferWriteCHAR(buf, cur->name);
-		xmlBufferWriteChar(buf, " ");
-		if (cur->orig != NULL)
-		    xmlBufferWriteQuotedString(buf, cur->orig);
-		else
-		    xmlBufferWriteQuotedString(buf, cur->content);
-		xmlBufferWriteChar(buf, ">\n");
-	        break;
-	    case XML_EXTERNAL_GENERAL_PARSED_ENTITY:
-	        xmlBufferWriteChar(buf, "<!ENTITY ");
-		xmlBufferWriteCHAR(buf, cur->name);
-		if (cur->ExternalID != NULL) {
-		     xmlBufferWriteChar(buf, " PUBLIC ");
-		     xmlBufferWriteQuotedString(buf, cur->ExternalID);
-		     xmlBufferWriteChar(buf, " ");
-		     xmlBufferWriteQuotedString(buf, cur->SystemID);
-		} else {
-		     xmlBufferWriteChar(buf, " SYSTEM ");
-		     xmlBufferWriteQuotedString(buf, cur->SystemID);
-		}
-		xmlBufferWriteChar(buf, ">\n");
-	        break;
-	    case XML_EXTERNAL_GENERAL_UNPARSED_ENTITY:
-	        xmlBufferWriteChar(buf, "<!ENTITY ");
-		xmlBufferWriteCHAR(buf, cur->name);
-		if (cur->ExternalID != NULL) {
-		     xmlBufferWriteChar(buf, " PUBLIC ");
-		     xmlBufferWriteQuotedString(buf, cur->ExternalID);
-		     xmlBufferWriteChar(buf, " ");
-		     xmlBufferWriteQuotedString(buf, cur->SystemID);
-		} else {
-		     xmlBufferWriteChar(buf, " SYSTEM ");
-		     xmlBufferWriteQuotedString(buf, cur->SystemID);
-		}
-		if (cur->content != NULL) { /* Should be true ! */
-		    xmlBufferWriteChar(buf, " NDATA ");
-		    if (cur->orig != NULL)
-			xmlBufferWriteCHAR(buf, cur->orig);
-		    else
-			xmlBufferWriteCHAR(buf, cur->content);
-		}
-		xmlBufferWriteChar(buf, ">\n");
-	        break;
-	    case XML_INTERNAL_PARAMETER_ENTITY:
-	        xmlBufferWriteChar(buf, "<!ENTITY % ");
-		xmlBufferWriteCHAR(buf, cur->name);
-		xmlBufferWriteChar(buf, " ");
-		if (cur->orig == NULL)
-		    xmlBufferWriteQuotedString(buf, cur->content);
-		else
-		    xmlBufferWriteQuotedString(buf, cur->orig);
-		xmlBufferWriteChar(buf, ">\n");
-	        break;
-	    case XML_EXTERNAL_PARAMETER_ENTITY:
-	        xmlBufferWriteChar(buf, "<!ENTITY % ");
-		xmlBufferWriteCHAR(buf, cur->name);
-		if (cur->ExternalID != NULL) {
-		     xmlBufferWriteChar(buf, " PUBLIC ");
-		     xmlBufferWriteQuotedString(buf, cur->ExternalID);
-		     xmlBufferWriteChar(buf, " ");
-		     xmlBufferWriteQuotedString(buf, cur->SystemID);
-		} else {
-		     xmlBufferWriteChar(buf, " SYSTEM ");
-		     xmlBufferWriteQuotedString(buf, cur->SystemID);
-		}
-		xmlBufferWriteChar(buf, ">\n");
-	        break;
-	    default:
-	        fprintf(stderr,
-		    "xmlDumpEntitiesTable: internal: unknown type %d\n",
-		        cur->type);
-	}
+        cur = table->table[i];
+	xmlDumpEntityDecl(buf, cur);
    }
 }
--- a/entities.h
+++ b/entities.h
@ -15,12 +15,17 @@
 extern "C" {
 #endif

-#define XML_INTERNAL_GENERAL_ENTITY		1
-#define XML_EXTERNAL_GENERAL_PARSED_ENTITY	2
-#define XML_EXTERNAL_GENERAL_UNPARSED_ENTITY	3
-#define XML_INTERNAL_PARAMETER_ENTITY		4
-#define XML_EXTERNAL_PARAMETER_ENTITY		5
-#define XML_INTERNAL_PREDEFINED_ENTITY		6
+/*
+ * The different valid entity types
+ */
+typedef enum {
+    XML_INTERNAL_GENERAL_ENTITY = 1,
+    XML_EXTERNAL_GENERAL_PARSED_ENTITY = 2,
+    XML_EXTERNAL_GENERAL_UNPARSED_ENTITY = 3,
+    XML_INTERNAL_PARAMETER_ENTITY = 4,
+    XML_EXTERNAL_PARAMETER_ENTITY = 5,
+    XML_INTERNAL_PREDEFINED_ENTITY = 6
+} xmlEntityType;

 /*
 * An unit of storage for an entity, contains the string, the value
@ -30,14 +35,32 @@ extern "C" {
 typedef struct _xmlEntity xmlEntity;
 typedef xmlEntity *xmlEntityPtr;
 struct _xmlEntity {
-    int type;			/* The entity type */
-    int len;			/* The lenght of the name */
-    const xmlChar  *name;	/* Name of the entity */
-    const xmlChar  *ExternalID;	/* External identifier for PUBLIC Entity */
-    const xmlChar  *SystemID;	/* URI for a SYSTEM or PUBLIC Entity */
-    xmlChar *content;		/* The entity content or ndata if unparsed */
+#ifndef XML_WITHOUT_CORBA
+    void           *_private;	        /* for Corba, must be first ! */
+#endif
+    xmlElementType          type;       /* XML_ENTITY_DECL, must be second ! */
+    const xmlChar          *name;	/* Attribute name */
+    struct _xmlNode    *children;	/* NULL */
+    struct _xmlNode        *last;	/* NULL */
+    struct _xmlDtd       *parent;	/* -> DTD */
+    struct _xmlNode        *next;	/* next sibling link  */
+    struct _xmlNode        *prev;	/* previous sibling link  */
+    struct _xmlDoc          *doc;       /* the containing document */
+
+    xmlChar                *orig;	/* content without ref substitution */
+    xmlChar             *content;	/* content or ndata if unparsed */
    int                   length;	/* the content length */
-    xmlChar *orig;		/* The entity cont without ref substitution */
+    xmlEntityType          etype;	/* The entity type */
+    const xmlChar    *ExternalID;	/* External identifier for PUBLIC */
+    const xmlChar      *SystemID;	/* URI for a SYSTEM or PUBLIC Entity */
+
+#ifdef WITH_EXTRA_ENT_DETECT
+    /* Referenced entities name stack */
+    xmlChar           *ent;             /* Current parsed Node */
+    int                entNr;           /* Depth of the parsing stack */
+    int                entMax;          /* Max depth of the parsing stack */
+    xmlChar *         *entTab;          /* array of nodes */
+#endif
 };

 /*
@ -52,7 +75,7 @@ typedef xmlEntitiesTable *xmlEntitiesTablePtr;
 struct _xmlEntitiesTable {
    int nb_entities;		/* number of elements stored */
    int max_entities;		/* maximum number of elements */
-    xmlEntityPtr table;	        /* the table of entities */
+    xmlEntityPtr *table;	/* the table of entities */
 };


@ -60,13 +83,13 @@ struct _xmlEntitiesTable {
 * External functions :
 */

-void			xmlAddDocEntity		(xmlDocPtr doc,
+xmlEntityPtr		xmlAddDocEntity		(xmlDocPtr doc,
 						 const xmlChar *name,
 						 int type,
 						 const xmlChar *ExternalID,
 						 const xmlChar *SystemID,
 						 const xmlChar *content);
-void			xmlAddDtdEntity		(xmlDocPtr doc,
+xmlEntityPtr		xmlAddDtdEntity		(xmlDocPtr doc,
 						 const xmlChar *name,
 						 int type,
 						 const xmlChar *ExternalID,
@ -88,9 +111,16 @@ xmlEntitiesTablePtr	xmlCopyEntitiesTable	(xmlEntitiesTablePtr table);
 void			xmlFreeEntitiesTable	(xmlEntitiesTablePtr table);
 void			xmlDumpEntitiesTable	(xmlBufferPtr buf,
 						 xmlEntitiesTablePtr table);
+void			xmlDumpEntityDecl	(xmlBufferPtr buf,
+						 xmlEntityPtr ent);
 xmlEntitiesTablePtr	xmlCopyEntitiesTable	(xmlEntitiesTablePtr table);
 void			xmlCleanupPredefinedEntities(void);

+#ifdef WITH_EXTRA_ENT_DETECT
+int			xmlEntityAddReference	(xmlEntityPtr ent,
+						 const xmlChar *to);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
--- a/include/libxml/encoding.h
+++ b/include/libxml/encoding.h
@ -67,11 +67,11 @@ typedef enum {
 * Returns the number of byte written, or -1 by lack of space.
 */
 typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
-                                         unsigned char* in, int inlen);
+                                         const unsigned char* in, int *inlen);


 /**
- * xmlCharEncodingInputFunc:
+ * xmlCharEncodingOutputFunc:
 * @out:  a pointer ot an array of bytes to store the result
 * @outlen:  the lenght of @out
 * @in:  a pointer ot an array of UTF-8 chars
@ -84,7 +84,7 @@ typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
 *     if the transcoding failed.
 */
 typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int outlen,
-                                          unsigned char* in, int inlen);
+                                          const unsigned char* in, int *inlen);

 /*
 * Block defining the handlers for non UTF-8 encodings.
@ -101,10 +101,12 @@ struct _xmlCharEncodingHandler {
 void	xmlInitCharEncodingHandlers	(void);
 void	xmlCleanupCharEncodingHandlers	(void);
 void	xmlRegisterCharEncodingHandler	(xmlCharEncodingHandlerPtr handler);
-xmlCharEncoding xmlDetectCharEncoding	(const unsigned char* in);
+xmlCharEncoding xmlDetectCharEncoding	(const unsigned char* in,
+					 int len);
 xmlCharEncoding xmlParseCharEncoding	(const char* name);
 xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc);
 xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name);
+int	xmlCheckUTF8			(const unsigned char *utf);


 #ifdef __cplusplus
--- a/include/libxml/entities.h
+++ b/include/libxml/entities.h
@ -15,12 +15,17 @@
 extern "C" {
 #endif

-#define XML_INTERNAL_GENERAL_ENTITY		1
-#define XML_EXTERNAL_GENERAL_PARSED_ENTITY	2
-#define XML_EXTERNAL_GENERAL_UNPARSED_ENTITY	3
-#define XML_INTERNAL_PARAMETER_ENTITY		4
-#define XML_EXTERNAL_PARAMETER_ENTITY		5
-#define XML_INTERNAL_PREDEFINED_ENTITY		6
+/*
+ * The different valid entity types
+ */
+typedef enum {
+    XML_INTERNAL_GENERAL_ENTITY = 1,
+    XML_EXTERNAL_GENERAL_PARSED_ENTITY = 2,
+    XML_EXTERNAL_GENERAL_UNPARSED_ENTITY = 3,
+    XML_INTERNAL_PARAMETER_ENTITY = 4,
+    XML_EXTERNAL_PARAMETER_ENTITY = 5,
+    XML_INTERNAL_PREDEFINED_ENTITY = 6
+} xmlEntityType;

 /*
 * An unit of storage for an entity, contains the string, the value
@ -30,14 +35,32 @@ extern "C" {
 typedef struct _xmlEntity xmlEntity;
 typedef xmlEntity *xmlEntityPtr;
 struct _xmlEntity {
-    int type;			/* The entity type */
-    int len;			/* The lenght of the name */
-    const xmlChar  *name;	/* Name of the entity */
-    const xmlChar  *ExternalID;	/* External identifier for PUBLIC Entity */
-    const xmlChar  *SystemID;	/* URI for a SYSTEM or PUBLIC Entity */
-    xmlChar *content;		/* The entity content or ndata if unparsed */
+#ifndef XML_WITHOUT_CORBA
+    void           *_private;	        /* for Corba, must be first ! */
+#endif
+    xmlElementType          type;       /* XML_ENTITY_DECL, must be second ! */
+    const xmlChar          *name;	/* Attribute name */
+    struct _xmlNode    *children;	/* NULL */
+    struct _xmlNode        *last;	/* NULL */
+    struct _xmlDtd       *parent;	/* -> DTD */
+    struct _xmlNode        *next;	/* next sibling link  */
+    struct _xmlNode        *prev;	/* previous sibling link  */
+    struct _xmlDoc          *doc;       /* the containing document */
+
+    xmlChar                *orig;	/* content without ref substitution */
+    xmlChar             *content;	/* content or ndata if unparsed */
    int                   length;	/* the content length */
-    xmlChar *orig;		/* The entity cont without ref substitution */
+    xmlEntityType          etype;	/* The entity type */
+    const xmlChar    *ExternalID;	/* External identifier for PUBLIC */
+    const xmlChar      *SystemID;	/* URI for a SYSTEM or PUBLIC Entity */
+
+#ifdef WITH_EXTRA_ENT_DETECT
+    /* Referenced entities name stack */
+    xmlChar           *ent;             /* Current parsed Node */
+    int                entNr;           /* Depth of the parsing stack */
+    int                entMax;          /* Max depth of the parsing stack */
+    xmlChar *         *entTab;          /* array of nodes */
+#endif
 };

 /*
@ -52,7 +75,7 @@ typedef xmlEntitiesTable *xmlEntitiesTablePtr;
 struct _xmlEntitiesTable {
    int nb_entities;		/* number of elements stored */
    int max_entities;		/* maximum number of elements */
-    xmlEntityPtr table;	        /* the table of entities */
+    xmlEntityPtr *table;	/* the table of entities */
 };


@ -60,13 +83,13 @@ struct _xmlEntitiesTable {
 * External functions :
 */

-void			xmlAddDocEntity		(xmlDocPtr doc,
+xmlEntityPtr		xmlAddDocEntity		(xmlDocPtr doc,
 						 const xmlChar *name,
 						 int type,
 						 const xmlChar *ExternalID,
 						 const xmlChar *SystemID,
 						 const xmlChar *content);
-void			xmlAddDtdEntity		(xmlDocPtr doc,
+xmlEntityPtr		xmlAddDtdEntity		(xmlDocPtr doc,
 						 const xmlChar *name,
 						 int type,
 						 const xmlChar *ExternalID,
@ -88,9 +111,16 @@ xmlEntitiesTablePtr	xmlCopyEntitiesTable	(xmlEntitiesTablePtr table);
 void			xmlFreeEntitiesTable	(xmlEntitiesTablePtr table);
 void			xmlDumpEntitiesTable	(xmlBufferPtr buf,
 						 xmlEntitiesTablePtr table);
+void			xmlDumpEntityDecl	(xmlBufferPtr buf,
+						 xmlEntityPtr ent);
 xmlEntitiesTablePtr	xmlCopyEntitiesTable	(xmlEntitiesTablePtr table);
 void			xmlCleanupPredefinedEntities(void);

+#ifdef WITH_EXTRA_ENT_DETECT
+int			xmlEntityAddReference	(xmlEntityPtr ent,
+						 const xmlChar *to);
+#endif
+
 #ifdef __cplusplus
 }
 #endif
--- a/include/libxml/parser.h
+++ b/include/libxml/parser.h
@ -49,6 +49,9 @@ struct _xmlParserInput {
    int col;                          /* Current column */
    int consumed;                     /* How many xmlChars already consumed */
    xmlParserInputDeallocate free;    /* function to deallocate the base */
+    const xmlChar *encoding;          /* the encoding string for entity */
+    const xmlChar *version;           /* the version string for entity */
+    int standalone;                   /* Was that entity marked standalone */
 };

 /**
@ -95,6 +98,7 @@ typedef enum {
    XML_PARSER_ENTITY_DECL,	/* within an entity declaration */
    XML_PARSER_ENTITY_VALUE,	/* within an entity value in a decl */
    XML_PARSER_ATTRIBUTE_VALUE,	/* within an attribute value */
+    XML_PARSER_SYSTEM_LITERAL,	/* within a SYSTEM value */
    XML_PARSER_EPILOG 		/* the Misc* after the last end tag */
 } xmlParserInputState;

@ -151,7 +155,7 @@ struct _xmlParserCtxt {

    char           *directory;        /* the data directory */

-    /* Node name stack only used for HTML parsing */
+    /* Node name stack */
    xmlChar           *name;          /* Current parsed Node */
    int                nameNr;        /* Depth of the parsing stack */
    int                nameMax;       /* Max depth of the parsing stack */
@ -160,6 +164,20 @@ struct _xmlParserCtxt {
    long               nbChars;       /* number of xmlChar processed */
    long            checkIndex;       /* used by progressive parsing lookup */
    int             keepBlanks;       /* ugly but ... */
+    int             disableSAX;       /* SAX callbacks are disabled */
+    int               inSubset;       /* Parsing is in int 1/ext 2 subset */
+    xmlChar *          intSubName;    /* name of subset */
+    xmlChar *          extSubURI;     /* URI of external subset */
+    xmlChar *          extSubSystem;  /* SYSTEM ID of external subset */
+
+    /* xml:space values */
+    int *              space;         /* Should the parser preserve spaces */
+    int                spaceNr;       /* Depth of the parsing stack */
+    int                spaceMax;      /* Max depth of the parsing stack */
+    int *              spaceTab;      /* array of space infos */
+
+    int                depth;         /* to prevent entity substitution loops */
+    xmlParserInputPtr  entity;      /* used to check entities boundaries */
 };

 /**
@ -183,6 +201,8 @@ typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
 			    const xmlChar *publicId, const xmlChar *systemId);
 typedef void (*internalSubsetSAXFunc) (void *ctx, const xmlChar *name,
                            const xmlChar *ExternalID, const xmlChar *SystemID);
+typedef void (*externalSubsetSAXFunc) (void *ctx, const xmlChar *name,
+                            const xmlChar *ExternalID, const xmlChar *SystemID);
 typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
                            const xmlChar *name);
 typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
@ -254,6 +274,7 @@ struct _xmlSAXHandler {
    fatalErrorSAXFunc fatalError;
    getParameterEntitySAXFunc getParameterEntity;
    cdataBlockSAXFunc cdataBlock;
+    externalSubsetSAXFunc externalSubset;
 };

 /**
@ -278,7 +299,7 @@ extern xmlSAXHandler htmlDefaultSAXHandler;
 */

 extern int xmlSubstituteEntitiesDefaultValue;
-
+extern int xmlGetWarningsDefaultValue;


 /**
@ -363,6 +384,20 @@ xmlDtdPtr	xmlParseDTD		(const xmlChar *ExternalID,
 xmlDtdPtr	xmlSAXParseDTD		(xmlSAXHandlerPtr sax,
 					 const xmlChar *ExternalID,
 					 const xmlChar *SystemID);
+int		xmlParseBalancedChunkMemory(xmlDocPtr doc,
+					 xmlSAXHandlerPtr sax,
+					 void *user_data,
+					 int depth,
+					 const xmlChar *string,
+					 xmlNodePtr *list);
+int		xmlParseExternalEntity	(xmlDocPtr doc,
+					 xmlSAXHandlerPtr sax,
+					 void *user_data,
+					 int depth,
+					 const xmlChar *URL,
+					 const xmlChar *ID,
+					 xmlNodePtr *list);
+
 /**
 * SAX initialization routines
 */
--- a/include/libxml/parserInternals.h
+++ b/include/libxml/parserInternals.h
@ -17,31 +17,6 @@ extern "C" {

 #define XML_MAX_NAMELEN 1000

-/**
- * A few macros needed to help building the parser.
- */
-/* #define UNICODE */
-
-#ifdef UNICODE
-typedef unsigned long CHARVAL;
-
-#define NEXTCHARVAL(p) (unsigned long)					\
-   ((*(p) == 0) ? (unsigned long) 0 :							\
-    ((*(p) < 0x80) ? (unsigned long) (*(p)++) :						\
-      (*(p) < 0xC0) ? (unsigned long) 0 :						\
-       (*(p) < 0xE0) ? ((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) :		\
-        (*(p) < 0xF0) ? (((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) << 6 +	\
-	              (*(p)++ & 0x3F)) :				\
-         (*(p) < 0xF8) ? ((((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) << 6 +	\
-	                (*(p)++ & 0x3F)) << 6 + (*(p)++ & 0x3F)) : 0))
-#else
-typedef unsigned char CHARVAL;
-
-#define NEXTCHARVAL(p) (unsigned long) *(p);
-#define SKIPCHARVAL(p) (p)++;
-#endif
-
-#ifdef UNICODE
 /************************************************************************
 *									*
 * UNICODE version of the macros.      					*
@ -404,7 +379,7 @@ typedef unsigned char CHARVAL;
 #define IS_EXTENDER(c)							\
    (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) ||		\
     ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) ||		\
-     ((c) == 0xec6) || ((c) == 0x3005)					\
+     ((c) == 0xec6) || ((c) == 0x3005) ||				\
     (((c) >= 0x3031) && ((c) <= 0x3035)) ||				\
     (((c) >= 0x309b) && ((c) <= 0x309e)) ||				\
     (((c) >= 0x30fc) && ((c) <= 0x30fe)))
@ -423,65 +398,6 @@ typedef unsigned char CHARVAL;
 */
 #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))

-#else
-/************************************************************************
- *									*
- * 8bits / ISO-Latin version of the macros.				*
- *									*
- ************************************************************************/
-/*
- * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
- *                  | [#x10000-#x10FFFF]
- * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
- */
-#define IS_CHAR(c)							\
-    ((((c) >= 0x20) && ((c) <= 0xD7FF)) ||				\
-     ((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) ||			\
-     (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||				\
-     (((c) >= 0x10000) && ((c) <= 0x10FFFF)))
-
-/*
- * [85] BaseChar ::= ... long list see REC ...
- */
-#define IS_BASECHAR(c)							\
-     ((((c) >= 0x0041) && ((c) <= 0x005A)) ||				\
-      (((c) >= 0x0061) && ((c) <= 0x007A)) ||				\
-      (((c) >= 0x00C0) && ((c) <= 0x00D6)) ||				\
-      (((c) >= 0x00D8) && ((c) <= 0x00F6)) ||				\
-      (((c) >= 0x00F8) && ((c) <= 0x00FF)))
-
-/*
- * [88] Digit ::= ... long list see REC ...
- */
-#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
-
-/*
- * [84] Letter ::= BaseChar | Ideographic 
- */
-#define IS_LETTER(c) IS_BASECHAR(c)
-
-
-/*
- * [87] CombiningChar ::= ... long list see REC ...
- */
-#define IS_COMBINING(c) 0
-
-/*
- * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
- *                   #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
- *                   [#x309D-#x309E] | [#x30FC-#x30FE]
- */
-#define IS_EXTENDER(c) ((c) == 0xb7)
-
-#endif /* !UNICODE */
-
-/*
- * Blank chars.
- *
- * [3] S ::= (#x20 | #x9 | #xD | #xA)+
- */
-#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) ||	\
-                     ((c) == 0x0D))

 /*
 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
@ -502,10 +418,10 @@ typedef unsigned char CHARVAL;
    if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }

 #define MOVETO_ENDTAG(p)						\
-    while (IS_CHAR(*p) && (*(p) != '>')) (p)++
+    while ((*p) && (*(p) != '>')) (p)++

 #define MOVETO_STARTTAG(p)						\
-    while (IS_CHAR(*p) && (*(p) != '<')) (p)++
+    while ((*p) && (*(p) != '<')) (p)++

 /**
 * Parser context
@ -514,10 +430,13 @@ xmlParserCtxtPtr	xmlCreateDocParserCtxt	(xmlChar *cur);
 xmlParserCtxtPtr	xmlCreateFileParserCtxt	(const char *filename);
 xmlParserCtxtPtr	xmlCreateMemoryParserCtxt(char *buffer,
 						 int size);
-void			xmlFreeParserCtxt	(xmlParserCtxtPtr ctxt);
 xmlParserCtxtPtr	xmlNewParserCtxt	(void);
+xmlParserCtxtPtr	xmlCreateEntityParserCtxt(const xmlChar *URL,
+						 const xmlChar *ID,
+						 const xmlChar *base);
 void			xmlSwitchEncoding	(xmlParserCtxtPtr ctxt,
 						 xmlCharEncoding enc);
+void			xmlFreeParserCtxt	(xmlParserCtxtPtr ctxt);

 /**
 * Entities
@ -540,7 +459,8 @@ xmlParserInputPtr	xmlNewInputFromFile	(xmlParserCtxtPtr ctxt,
 /**
 * Namespaces.
 */
-xmlChar *			xmlSplitQName		(const xmlChar *name,
+xmlChar *		xmlSplitQName		(xmlParserCtxtPtr ctxt,
+						 const xmlChar *name,
 						 xmlChar **prefix);
 xmlChar *		xmlNamespaceParseNCName	(xmlParserCtxtPtr ctxt);
 xmlChar *		xmlNamespaceParseQName	(xmlParserCtxtPtr ctxt,
@ -606,6 +526,7 @@ xmlChar *			xmlParseEncName		(xmlParserCtxtPtr ctxt);
 xmlChar *		xmlParseEncodingDecl	(xmlParserCtxtPtr ctxt);
 int			xmlParseSDDecl		(xmlParserCtxtPtr ctxt);
 void			xmlParseXMLDecl		(xmlParserCtxtPtr ctxt);
+void			xmlParseTextDecl	(xmlParserCtxtPtr ctxt);
 void			xmlParseMisc		(xmlParserCtxtPtr ctxt);
 void			xmlParseExternalSubset	(xmlParserCtxtPtr ctxt,
 						 const xmlChar *ExternalID,
@ -624,6 +545,12 @@ xmlChar *			xmlDecodeEntities	(xmlParserCtxtPtr ctxt,
 						 xmlChar end,
 						 xmlChar  end2,
 						 xmlChar end3);
+xmlChar *		xmlStringDecodeEntities	(xmlParserCtxtPtr ctxt,
+						 const xmlChar *str,
+						 int what,
+						 xmlChar end,
+						 xmlChar  end2,
+						 xmlChar end3);

 /*
 * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP
--- a/include/libxml/tree.h
+++ b/include/libxml/tree.h
@ -36,24 +36,22 @@ typedef enum {
    XML_DOCUMENT_TYPE_NODE=	10,
    XML_DOCUMENT_FRAG_NODE=	11,
    XML_NOTATION_NODE=		12,
-    XML_HTML_DOCUMENT_NODE=	13
+    XML_HTML_DOCUMENT_NODE=	13,
+    XML_DTD_NODE=		14,
+    XML_ELEMENT_DECL=		15,
+    XML_ATTRIBUTE_DECL=		16,
+    XML_ENTITY_DECL=		17
 } xmlElementType;

 /*
 * Size of an internal character representation.
 *
- * Currently we use 8bit chars internal representation for memory efficiency,
- * but the parser is not tied to that, just define UNICODE to switch to
- * a 16 bits internal representation. Note that with 8 bits wide
- * xmlChars one can still use UTF-8 to handle correctly non ISO-Latin
- * input.
+ * We use 8bit chars internal representation for memory efficiency,
+ * Note that with 8 bits wide xmlChars one can still use UTF-8 to handle
+ * correctly non ISO-Latin input.
 */

-#ifdef UNICODE
-typedef unsigned short xmlChar;
-#else
 typedef unsigned char xmlChar;
-#endif

 #ifndef WIN32
 #ifndef CHAR
@ -109,14 +107,25 @@ struct _xmlEnumeration {
 typedef struct _xmlAttribute xmlAttribute;
 typedef xmlAttribute *xmlAttributePtr;
 struct _xmlAttribute {
-    const xmlChar         *elem;	/* Element holding the attribute */
+#ifndef XML_WITHOUT_CORBA
+    void           *_private;	        /* for Corba, must be first ! */
+#endif
+    xmlElementType          type;       /* XML_ATTRIBUTE_DECL, must be second ! */
    const xmlChar          *name;	/* Attribute name */
-    struct _xmlAttribute   *next;       /* list of attributes of an element */
-    xmlAttributeType       type;	/* The type */
+    struct _xmlNode    *children;	/* NULL */
+    struct _xmlNode        *last;	/* NULL */
+    struct _xmlDtd       *parent;	/* -> DTD */
+    struct _xmlNode        *next;	/* next sibling link  */
+    struct _xmlNode        *prev;	/* previous sibling link  */
+    struct _xmlDoc          *doc;       /* the containing document */
+
+    struct _xmlAttribute  *nexth;	/* next in hash table */
+    xmlAttributeType       atype;	/* The attribute type */
    xmlAttributeDefault      def;	/* the default */
    const xmlChar  *defaultValue;	/* or the default value */
    xmlEnumerationPtr       tree;       /* or the enumeration tree if any */
    const xmlChar        *prefix;	/* the namespace prefix if any */
+    const xmlChar          *elem;	/* Element holding the attribute */
 };

 /*
@ -156,8 +165,19 @@ typedef enum {
 typedef struct _xmlElement xmlElement;
 typedef xmlElement *xmlElementPtr;
 struct _xmlElement {
+#ifndef XML_WITHOUT_CORBA
+    void           *_private;	        /* for Corba, must be first ! */
+#endif
+    xmlElementType          type;       /* XML_ELEMENT_DECL, must be second ! */
    const xmlChar          *name;	/* Element name */
-    xmlElementTypeVal       type;	/* The type */
+    struct _xmlNode    *children;	/* NULL */
+    struct _xmlNode        *last;	/* NULL */
+    struct _xmlDtd       *parent;	/* -> DTD */
+    struct _xmlNode        *next;	/* next sibling link  */
+    struct _xmlNode        *prev;	/* previous sibling link  */
+    struct _xmlDoc          *doc;       /* the containing document */
+
+    xmlElementTypeVal      etype;	/* The type */
    xmlElementContentPtr content;	/* the allowed element content */
    xmlAttributePtr   attributes;	/* List of the declared attributes */
 };
@ -188,14 +208,25 @@ struct _xmlNs {
 typedef struct _xmlDtd xmlDtd;
 typedef xmlDtd *xmlDtdPtr;
 struct _xmlDtd {
+#ifndef XML_WITHOUT_CORBA
+    void           *_private;	/* for Corba, must be first ! */
+#endif
+    xmlElementType  type;       /* XML_DTD_NODE, must be second ! */
    const xmlChar *name;	/* Name of the DTD */
-    const xmlChar *ExternalID;	/* External identifier for PUBLIC DTD */
-    const xmlChar *SystemID;	/* URI for a SYSTEM or PUBLIC DTD */
+    struct _xmlNode *children;	/* the value of the property link */
+    struct _xmlNode *last;	/* last child link */
+    struct _xmlDoc  *parent;	/* child->parent link */
+    struct _xmlNode *next;	/* next sibling link  */
+    struct _xmlNode *prev;	/* previous sibling link  */
+    struct _xmlDoc  *doc;	/* the containing document */
+
+    /* End of common part */
    void          *notations;   /* Hash table for notations if any */
    void          *elements;    /* Hash table for elements if any */
    void          *attributes;  /* Hash table for attributes if any */
    void          *entities;    /* Hash table for entities if any */
-    /* struct xmlDtd *next;	 * next  link for this document  */
+    const xmlChar *ExternalID;	/* External identifier for PUBLIC DTD */
+    const xmlChar *SystemID;	/* URI for a SYSTEM or PUBLIC DTD */
 };

 /*
@ -206,14 +237,17 @@ typedef xmlAttr *xmlAttrPtr;
 struct _xmlAttr {
 #ifndef XML_WITHOUT_CORBA
    void           *_private;	/* for Corba, must be first ! */
-    void           *vepv;	/* for Corba, must be next ! */
 #endif
-    xmlElementType  type;       /* XML_ATTRIBUTE_NODE, must be third ! */
-    struct _xmlNode *node;	/* attr->node link */
-    struct _xmlAttr *next;	/* attribute list link */
+    xmlElementType   type;      /* XML_ATTRIBUTE_NODE, must be second ! */
    const xmlChar   *name;      /* the name of the property */
-    struct _xmlNode *val;       /* the value of the property */
+    struct _xmlNode *children;	/* the value of the property */
+    struct _xmlNode *last;	/* NULL */
+    struct _xmlNode *parent;	/* child->parent link */
+    struct _xmlAttr *next;	/* next sibling link  */
+    struct _xmlAttr *prev;	/* previous sibling link  */
+    struct _xmlDoc  *doc;	/* the containing document */
    xmlNs           *ns;        /* pointer to the associated namespace */
+    xmlAttributeType atype;     /* the attribute type if validating */
 };

 /*
@ -266,24 +300,25 @@ typedef xmlNode *xmlNodePtr;
 struct _xmlNode {
 #ifndef XML_WITHOUT_CORBA
    void           *_private;	/* for Corba, must be first ! */
-    void           *vepv;	/* for Corba, must be next ! */
 #endif
-    xmlElementType  type;	/* type number in the DTD, must be third ! */
-    struct _xmlDoc  *doc;	/* the containing document */
+    xmlElementType   type;	/* type number, must be second ! */
+    const xmlChar   *name;      /* the name of the node, or the entity */
+    struct _xmlNode *children;	/* parent->childs link */
+    struct _xmlNode *last;	/* last child link */
    struct _xmlNode *parent;	/* child->parent link */
    struct _xmlNode *next;	/* next sibling link  */
    struct _xmlNode *prev;	/* previous sibling link  */
-    struct _xmlNode *childs;	/* parent->childs link */
-    struct _xmlNode *last;	/* last child link */
-    struct _xmlAttr *properties;/* properties list */
-    const xmlChar  *name;       /* the name of the node, or the entity */
+    struct _xmlDoc  *doc;	/* the containing document */
    xmlNs           *ns;        /* pointer to the associated namespace */
-    xmlNs          *nsDef;      /* namespace definitions on this node */
 #ifndef XML_USE_BUFFER_CONTENT    
    xmlChar         *content;   /* the content */
 #else
    xmlBufferPtr     content;   /* the content in a buffer */
 #endif
+
+    /* End of common part */
+    struct _xmlAttr *properties;/* properties list */
+    xmlNs           *nsDef;     /* namespace definitions on this node */
 };

 /*
@ -294,20 +329,27 @@ typedef xmlDoc *xmlDocPtr;
 struct _xmlDoc {
 #ifndef XML_WITHOUT_CORBA
    void           *_private;	/* for Corba, must be first ! */
-    void           *vepv;	/* for Corba, must be next ! */
 #endif
    xmlElementType  type;       /* XML_DOCUMENT_NODE, must be second ! */
    char           *name;	/* name/filename/URI of the document */
-    const xmlChar  *version;	/* the XML version string */
-    const xmlChar  *encoding;   /* encoding, if any */
+    struct _xmlNode *children;	/* the document tree */
+    struct _xmlNode *last;	/* last child link */
+    struct _xmlNode *parent;	/* child->parent link */
+    struct _xmlNode *next;	/* next sibling link  */
+    struct _xmlNode *prev;	/* previous sibling link  */
+    struct _xmlDoc  *doc;	/* autoreference to itself */
+
+    /* End of common part */
    int             compression;/* level of zlib compression */
    int             standalone; /* standalone document (no external refs) */
    struct _xmlDtd  *intSubset;	/* the document internal subset */
    struct _xmlDtd  *extSubset;	/* the document external subset */
    struct _xmlNs   *oldNs;	/* Global namespace, the old way */
-    struct _xmlNode *root;	/* the document tree */
+    const xmlChar  *version;	/* the XML version string */
+    const xmlChar  *encoding;   /* encoding, if any */
    void           *ids;        /* Hash table for ID attributes if any */
    void           *refs;       /* Hash table for IDREFs attributes if any */
+    const xmlChar  *URL;	/* The URI for that document */
 };

 /*
@ -422,6 +464,8 @@ xmlNodePtr	xmlNewComment		(const xmlChar *content);
 xmlNodePtr	xmlNewCDataBlock	(xmlDocPtr doc,
 					 const xmlChar *content,
 					 int len);
+xmlNodePtr	xmlNewCharRef		(xmlDocPtr doc,
+					 const xmlChar *name);
 xmlNodePtr	xmlNewReference		(xmlDocPtr doc,
 					 const xmlChar *name);
 xmlNodePtr	xmlCopyNode		(xmlNodePtr node,
@ -513,13 +557,14 @@ xmlChar *	xmlNodeGetContent	(xmlNodePtr cur);
 xmlChar *	xmlNodeGetLang		(xmlNodePtr cur);
 void		xmlNodeSetLang		(xmlNodePtr cur,
 					 const xmlChar *lang);
+int		xmlNodeGetSpacePreserve	(xmlNodePtr cur);
 xmlChar *	xmlNodeGetBase		(xmlDocPtr doc,
 					 xmlNodePtr cur);

 /*
 * Removing content.
 */
-int		xmlRemoveProp		(xmlAttrPtr attr); /* TODO */
+int		xmlRemoveProp		(xmlAttrPtr attr);
 int		xmlRemoveNode		(xmlNodePtr node); /* TODO */

 /*
@ -532,6 +577,12 @@ void		xmlBufferWriteChar	(xmlBufferPtr buf,
 void		xmlBufferWriteQuotedString(xmlBufferPtr buf,
 					 const xmlChar *string);

+/*
+ * Namespace handling
+ */
+int		xmlReconciliateNs	(xmlDocPtr doc,
+					 xmlNodePtr tree);
+
 /*
 * Saving
 */
--- a/include/libxml/valid.h
+++ b/include/libxml/valid.h
@ -29,6 +29,14 @@ struct _xmlValidCtxt {
    void *userData;			/* user specific data block */
    xmlValidityErrorFunc error;		/* the callback in case of errors */
    xmlValidityWarningFunc warning;	/* the callback in case of warning */
+
+    /* Node analysis stack used when validating within entities */
+    xmlNodePtr         node;          /* Current parsed Node */
+    int                nodeNr;        /* Depth of the parsing stack */
+    int                nodeMax;       /* Max depth of the parsing stack */
+    xmlNodePtr        *nodeTab;       /* array of nodes */
+
+    int              finishDtd;       /* finished validating the Dtd ? */
 };

 /*
@ -114,6 +122,8 @@ xmlNotationPtr	    xmlAddNotationDecl	(xmlValidCtxtPtr ctxt,
 					 const xmlChar *SystemID);
 xmlNotationTablePtr xmlCopyNotationTable(xmlNotationTablePtr table);
 void		    xmlFreeNotationTable(xmlNotationTablePtr table);
+void		    xmlDumpNotationDecl	(xmlBufferPtr buf,
+					 xmlNotationPtr nota);
 void		    xmlDumpNotationTable(xmlBufferPtr buf,
 					 xmlNotationTablePtr table);

@ -122,6 +132,9 @@ xmlElementContentPtr xmlNewElementContent (xmlChar *name,
 					   xmlElementContentType type);
 xmlElementContentPtr xmlCopyElementContent(xmlElementContentPtr content);
 void		     xmlFreeElementContent(xmlElementContentPtr cur);
+void		     xmlSprintfElementContent(char *buf,
+	                                   xmlElementContentPtr content,
+					   int glob);

 /* Element */
 xmlElementPtr	   xmlAddElementDecl	(xmlValidCtxtPtr ctxt,
@ -133,6 +146,8 @@ xmlElementTablePtr xmlCopyElementTable	(xmlElementTablePtr table);
 void		   xmlFreeElementTable	(xmlElementTablePtr table);
 void		   xmlDumpElementTable	(xmlBufferPtr buf,
 					 xmlElementTablePtr table);
+void		   xmlDumpElementDecl	(xmlBufferPtr buf,
+					 xmlElementPtr elem);

 /* Enumeration */
 xmlEnumerationPtr  xmlCreateEnumeration	(xmlChar *name);
@ -144,6 +159,7 @@ xmlAttributePtr	    xmlAddAttributeDecl	    (xmlValidCtxtPtr ctxt,
 					     xmlDtdPtr dtd,
 					     const xmlChar *elem,
 					     const xmlChar *name,
+					     const xmlChar *prefix,
 					     xmlAttributeType type,
 					     xmlAttributeDefault def,
 					     const xmlChar *defaultValue,
@ -152,6 +168,8 @@ xmlAttributeTablePtr xmlCopyAttributeTable  (xmlAttributeTablePtr table);
 void		     xmlFreeAttributeTable  (xmlAttributeTablePtr table);
 void		     xmlDumpAttributeTable  (xmlBufferPtr buf,
 					     xmlAttributeTablePtr table);
+void		     xmlDumpAttributeDecl   (xmlBufferPtr buf,
+					     xmlAttributePtr attr);

 /* IDs */
 xmlIDPtr	xmlAddID	(xmlValidCtxtPtr ctxt,
@ -188,6 +206,10 @@ int		xmlValidateRoot		(xmlValidCtxtPtr ctxt,
 int		xmlValidateElementDecl	(xmlValidCtxtPtr ctxt,
 					 xmlDocPtr doc,
 		                         xmlElementPtr elem);
+xmlChar *	xmlValidNormalizeAttributeValue(xmlDocPtr doc,
+					 xmlNodePtr elem,
+					 const xmlChar *name,
+					 const xmlChar *value);
 int		xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt,
 					 xmlDocPtr doc,
 		                         xmlAttributePtr attr);
@ -199,6 +221,8 @@ int		xmlValidateNotationDecl	(xmlValidCtxtPtr ctxt,
 int		xmlValidateDtd		(xmlValidCtxtPtr ctxt,
 					 xmlDocPtr doc,
 					 xmlDtdPtr dtd);
+int		xmlValidateDtdFinal	(xmlValidCtxtPtr ctxt,
+					 xmlDocPtr doc);
 int		xmlValidateDocument	(xmlValidCtxtPtr ctxt,
 					 xmlDocPtr doc);
 int		xmlValidateElement	(xmlValidCtxtPtr ctxt,
--- a/include/libxml/xmlmemory.h
+++ b/include/libxml/xmlmemory.h
@ -8,7 +8,7 @@
 #ifndef _DEBUG_MEMORY_ALLOC_
 #define _DEBUG_MEMORY_ALLOC_

-#define NO_DEBUG_MEMORY
+/* #define NO_DEBUG_MEMORY */

 #ifdef NO_DEBUG_MEMORY
 #ifdef HAVE_MALLOC_H
--- a/libxml.spec.in
+++ b/libxml.spec.in
@ -3,14 +3,13 @@
 %define prefix   /usr

 Summary: libXML library
-Name: libxml
+Name: libxml2
 Version: %ver
 Release: 1
 Copyright: LGPL
 Group: X11/Libraries
 Source: ftp://ftp.gnome.org/pub/GNOME/sources/libxml/libxml-%{ver}.tar.gz
 BuildRoot: /var/tmp/libxml-%{PACKAGE_VERSION}-root
-Provides: libxml.so.0

 URL: http://rpmfind.net/veillard/XML/
 Prereq: /sbin/install-info
--- a/nanoftp.c
+++ b/nanoftp.c
@ -869,10 +869,11 @@ xmlNanoFTPConnect(void *ctx) {
 		    else
 #ifndef HAVE_SNPRINTF
 			len = sprintf(buf, "PASS libxml@%s\r\n",
+			               hostname);
 #else /* HAVE_SNPRINTF */
 			len = snprintf(buf, sizeof(buf), "PASS libxml@%s\r\n",
-#endif /* HAVE_SNPRINTF */
 			               hostname);
+#endif /* HAVE_SNPRINTF */
 #ifdef DEBUG_FTP
 		    printf(buf);
 #endif
@ -1226,11 +1227,13 @@ xmlNanoFTPGetConnection(void *ctx) {
 	portp = (unsigned char *) &dataAddr.sin_port;
 #ifndef HAVE_SNPRINTF
 	len = sprintf(buf, "PORT %d,%d,%d,%d,%d,%d\r\n",
-#else /* HAVE_SNPRINTF */
-	len = snprintf(buf, sizeof(buf), "PORT %d,%d,%d,%d,%d,%d\r\n",
-#endif /* HAVE_SNPRINTF */
 	       adp[0] & 0xff, adp[1] & 0xff, adp[2] & 0xff, adp[3] & 0xff,
 	       portp[0] & 0xff, portp[1] & 0xff);
+#else /* HAVE_SNPRINTF */
+	len = snprintf(buf, sizeof(buf), "PORT %d,%d,%d,%d,%d,%d\r\n",
+	       adp[0] & 0xff, adp[1] & 0xff, adp[2] & 0xff, adp[3] & 0xff,
+	       portp[0] & 0xff, portp[1] & 0xff);
+#endif /* HAVE_SNPRINTF */
        buf[sizeof(buf) - 1] = 0;
 #ifdef DEBUG_FTP
 	printf(buf);
@ -1264,13 +1267,34 @@ int
 xmlNanoFTPCloseConnection(void *ctx) {
    xmlNanoFTPCtxtPtr ctxt = (xmlNanoFTPCtxtPtr) ctx;
    int res;
+    fd_set rfd, efd;
+    struct timeval tv;

    close(ctxt->dataFd); ctxt->dataFd = -1;
+    tv.tv_sec = 15;
+    tv.tv_usec = 0;
+    FD_ZERO(&rfd);
+    FD_SET(ctxt->controlFd, &rfd);
+    FD_ZERO(&efd);
+    FD_SET(ctxt->controlFd, &efd);
+    res = select(ctxt->controlFd + 1, &rfd, NULL, &efd, &tv);
+    if (res < 0) {
+#ifdef DEBUG_FTP
+	perror("select");
+#endif
+	close(ctxt->controlFd); ctxt->controlFd = -1;
+	return(-1);
+    }
+    if (res == 0) {
+	fprintf(stderr, "xmlNanoFTPCloseConnection: timeout\n");
+	close(ctxt->controlFd); ctxt->controlFd = -1;
+    } else {
 	res = xmlNanoFTPGetResponse(ctxt);
 	if (res != 2) {
 	    close(ctxt->controlFd); ctxt->controlFd = -1;
 	    return(-1);
 	}
+    }
    return(0);
 }

--- a/nanohttp.c
+++ b/nanohttp.c
@ -753,7 +753,7 @@ retry:
    }
    ctxt->fd = ret;
    if (proxy) {
-#ifdef have_snprintf
+#ifdef HAVE_SNPRINTF
 	if (ctxt->port != 80)
 	    snprintf(buf, sizeof(buf),
 		     "GET http://%s:%d%s HTTP/1.0\r\nHost: %s\r\n\r\n",
--- a/parser.c
+++ b/parser.c
--- a/parser.h
+++ b/parser.h
@ -49,6 +49,9 @@ struct _xmlParserInput {
    int col;                          /* Current column */
    int consumed;                     /* How many xmlChars already consumed */
    xmlParserInputDeallocate free;    /* function to deallocate the base */
+    const xmlChar *encoding;          /* the encoding string for entity */
+    const xmlChar *version;           /* the version string for entity */
+    int standalone;                   /* Was that entity marked standalone */
 };

 /**
@ -95,6 +98,7 @@ typedef enum {
    XML_PARSER_ENTITY_DECL,	/* within an entity declaration */
    XML_PARSER_ENTITY_VALUE,	/* within an entity value in a decl */
    XML_PARSER_ATTRIBUTE_VALUE,	/* within an attribute value */
+    XML_PARSER_SYSTEM_LITERAL,	/* within a SYSTEM value */
    XML_PARSER_EPILOG 		/* the Misc* after the last end tag */
 } xmlParserInputState;

@ -151,7 +155,7 @@ struct _xmlParserCtxt {

    char           *directory;        /* the data directory */

-    /* Node name stack only used for HTML parsing */
+    /* Node name stack */
    xmlChar           *name;          /* Current parsed Node */
    int                nameNr;        /* Depth of the parsing stack */
    int                nameMax;       /* Max depth of the parsing stack */
@ -160,6 +164,20 @@ struct _xmlParserCtxt {
    long               nbChars;       /* number of xmlChar processed */
    long            checkIndex;       /* used by progressive parsing lookup */
    int             keepBlanks;       /* ugly but ... */
+    int             disableSAX;       /* SAX callbacks are disabled */
+    int               inSubset;       /* Parsing is in int 1/ext 2 subset */
+    xmlChar *          intSubName;    /* name of subset */
+    xmlChar *          extSubURI;     /* URI of external subset */
+    xmlChar *          extSubSystem;  /* SYSTEM ID of external subset */
+
+    /* xml:space values */
+    int *              space;         /* Should the parser preserve spaces */
+    int                spaceNr;       /* Depth of the parsing stack */
+    int                spaceMax;      /* Max depth of the parsing stack */
+    int *              spaceTab;      /* array of space infos */
+
+    int                depth;         /* to prevent entity substitution loops */
+    xmlParserInputPtr  entity;      /* used to check entities boundaries */
 };

 /**
@ -183,6 +201,8 @@ typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
 			    const xmlChar *publicId, const xmlChar *systemId);
 typedef void (*internalSubsetSAXFunc) (void *ctx, const xmlChar *name,
                            const xmlChar *ExternalID, const xmlChar *SystemID);
+typedef void (*externalSubsetSAXFunc) (void *ctx, const xmlChar *name,
+                            const xmlChar *ExternalID, const xmlChar *SystemID);
 typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
                            const xmlChar *name);
 typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
@ -254,6 +274,7 @@ struct _xmlSAXHandler {
    fatalErrorSAXFunc fatalError;
    getParameterEntitySAXFunc getParameterEntity;
    cdataBlockSAXFunc cdataBlock;
+    externalSubsetSAXFunc externalSubset;
 };

 /**
@ -278,7 +299,7 @@ extern xmlSAXHandler htmlDefaultSAXHandler;
 */

 extern int xmlSubstituteEntitiesDefaultValue;
-
+extern int xmlGetWarningsDefaultValue;


 /**
@ -363,6 +384,20 @@ xmlDtdPtr	xmlParseDTD		(const xmlChar *ExternalID,
 xmlDtdPtr	xmlSAXParseDTD		(xmlSAXHandlerPtr sax,
 					 const xmlChar *ExternalID,
 					 const xmlChar *SystemID);
+int		xmlParseBalancedChunkMemory(xmlDocPtr doc,
+					 xmlSAXHandlerPtr sax,
+					 void *user_data,
+					 int depth,
+					 const xmlChar *string,
+					 xmlNodePtr *list);
+int		xmlParseExternalEntity	(xmlDocPtr doc,
+					 xmlSAXHandlerPtr sax,
+					 void *user_data,
+					 int depth,
+					 const xmlChar *URL,
+					 const xmlChar *ID,
+					 xmlNodePtr *list);
+
 /**
 * SAX initialization routines
 */
--- a/parserInternals.h
+++ b/parserInternals.h
@ -17,31 +17,6 @@ extern "C" {

 #define XML_MAX_NAMELEN 1000

-/**
- * A few macros needed to help building the parser.
- */
-/* #define UNICODE */
-
-#ifdef UNICODE
-typedef unsigned long CHARVAL;
-
-#define NEXTCHARVAL(p) (unsigned long)					\
-   ((*(p) == 0) ? (unsigned long) 0 :							\
-    ((*(p) < 0x80) ? (unsigned long) (*(p)++) :						\
-      (*(p) < 0xC0) ? (unsigned long) 0 :						\
-       (*(p) < 0xE0) ? ((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) :		\
-        (*(p) < 0xF0) ? (((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) << 6 +	\
-	              (*(p)++ & 0x3F)) :				\
-         (*(p) < 0xF8) ? ((((((unsigned long) *(p)++) << 6) + (*(p)++ & 0x3F)) << 6 +	\
-	                (*(p)++ & 0x3F)) << 6 + (*(p)++ & 0x3F)) : 0))
-#else
-typedef unsigned char CHARVAL;
-
-#define NEXTCHARVAL(p) (unsigned long) *(p);
-#define SKIPCHARVAL(p) (p)++;
-#endif
-
-#ifdef UNICODE
 /************************************************************************
 *									*
 * UNICODE version of the macros.      					*
@ -404,7 +379,7 @@ typedef unsigned char CHARVAL;
 #define IS_EXTENDER(c)							\
    (((c) == 0xb7) || ((c) == 0x2d0) || ((c) == 0x2d1) ||		\
     ((c) == 0x387) || ((c) == 0x640) || ((c) == 0xe46) ||		\
-     ((c) == 0xec6) || ((c) == 0x3005)					\
+     ((c) == 0xec6) || ((c) == 0x3005) ||				\
     (((c) >= 0x3031) && ((c) <= 0x3035)) ||				\
     (((c) >= 0x309b) && ((c) <= 0x309e)) ||				\
     (((c) >= 0x30fc) && ((c) <= 0x30fe)))
@ -423,65 +398,6 @@ typedef unsigned char CHARVAL;
 */
 #define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c))

-#else
-/************************************************************************
- *									*
- * 8bits / ISO-Latin version of the macros.				*
- *									*
- ************************************************************************/
-/*
- * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD]
- *                  | [#x10000-#x10FFFF]
- * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
- */
-#define IS_CHAR(c)							\
-    ((((c) >= 0x20) && ((c) <= 0xD7FF)) ||				\
-     ((c) == 0x09) || ((c) == 0x0a) || ((c) == 0x0d) ||			\
-     (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||				\
-     (((c) >= 0x10000) && ((c) <= 0x10FFFF)))
-
-/*
- * [85] BaseChar ::= ... long list see REC ...
- */
-#define IS_BASECHAR(c)							\
-     ((((c) >= 0x0041) && ((c) <= 0x005A)) ||				\
-      (((c) >= 0x0061) && ((c) <= 0x007A)) ||				\
-      (((c) >= 0x00C0) && ((c) <= 0x00D6)) ||				\
-      (((c) >= 0x00D8) && ((c) <= 0x00F6)) ||				\
-      (((c) >= 0x00F8) && ((c) <= 0x00FF)))
-
-/*
- * [88] Digit ::= ... long list see REC ...
- */
-#define IS_DIGIT(c) (((c) >= 0x30) && ((c) <= 0x39))
-
-/*
- * [84] Letter ::= BaseChar | Ideographic 
- */
-#define IS_LETTER(c) IS_BASECHAR(c)
-
-
-/*
- * [87] CombiningChar ::= ... long list see REC ...
- */
-#define IS_COMBINING(c) 0
-
-/*
- * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 |
- *                   #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] |
- *                   [#x309D-#x309E] | [#x30FC-#x30FE]
- */
-#define IS_EXTENDER(c) ((c) == 0xb7)
-
-#endif /* !UNICODE */
-
-/*
- * Blank chars.
- *
- * [3] S ::= (#x20 | #x9 | #xD | #xA)+
- */
-#define IS_BLANK(c) (((c) == 0x20) || ((c) == 0x09) || ((c) == 0xa) ||	\
-                     ((c) == 0x0D))

 /*
 * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
@ -502,10 +418,10 @@ typedef unsigned char CHARVAL;
    if (*(p) == 0x10) { p++ ; if (*(p) == 0x13) p++; }

 #define MOVETO_ENDTAG(p)						\
-    while (IS_CHAR(*p) && (*(p) != '>')) (p)++
+    while ((*p) && (*(p) != '>')) (p)++

 #define MOVETO_STARTTAG(p)						\
-    while (IS_CHAR(*p) && (*(p) != '<')) (p)++
+    while ((*p) && (*(p) != '<')) (p)++

 /**
 * Parser context
@ -514,10 +430,13 @@ xmlParserCtxtPtr	xmlCreateDocParserCtxt	(xmlChar *cur);
 xmlParserCtxtPtr	xmlCreateFileParserCtxt	(const char *filename);
 xmlParserCtxtPtr	xmlCreateMemoryParserCtxt(char *buffer,
 						 int size);
-void			xmlFreeParserCtxt	(xmlParserCtxtPtr ctxt);
 xmlParserCtxtPtr	xmlNewParserCtxt	(void);
+xmlParserCtxtPtr	xmlCreateEntityParserCtxt(const xmlChar *URL,
+						 const xmlChar *ID,
+						 const xmlChar *base);
 void			xmlSwitchEncoding	(xmlParserCtxtPtr ctxt,
 						 xmlCharEncoding enc);
+void			xmlFreeParserCtxt	(xmlParserCtxtPtr ctxt);

 /**
 * Entities
@ -540,7 +459,8 @@ xmlParserInputPtr	xmlNewInputFromFile	(xmlParserCtxtPtr ctxt,
 /**
 * Namespaces.
 */
-xmlChar *			xmlSplitQName		(const xmlChar *name,
+xmlChar *		xmlSplitQName		(xmlParserCtxtPtr ctxt,
+						 const xmlChar *name,
 						 xmlChar **prefix);
 xmlChar *		xmlNamespaceParseNCName	(xmlParserCtxtPtr ctxt);
 xmlChar *		xmlNamespaceParseQName	(xmlParserCtxtPtr ctxt,
@ -606,6 +526,7 @@ xmlChar *			xmlParseEncName		(xmlParserCtxtPtr ctxt);
 xmlChar *		xmlParseEncodingDecl	(xmlParserCtxtPtr ctxt);
 int			xmlParseSDDecl		(xmlParserCtxtPtr ctxt);
 void			xmlParseXMLDecl		(xmlParserCtxtPtr ctxt);
+void			xmlParseTextDecl	(xmlParserCtxtPtr ctxt);
 void			xmlParseMisc		(xmlParserCtxtPtr ctxt);
 void			xmlParseExternalSubset	(xmlParserCtxtPtr ctxt,
 						 const xmlChar *ExternalID,
@ -624,6 +545,12 @@ xmlChar *			xmlDecodeEntities	(xmlParserCtxtPtr ctxt,
 						 xmlChar end,
 						 xmlChar  end2,
 						 xmlChar end3);
+xmlChar *		xmlStringDecodeEntities	(xmlParserCtxtPtr ctxt,
+						 const xmlChar *str,
+						 int what,
+						 xmlChar end,
+						 xmlChar  end2,
+						 xmlChar end3);

 /*
 * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP
--- a/result/SVG/bike.xml
+++ b/result/SVG/bike.xml
@ -1,6 +1,5 @@
 <?xml version="1.0"?>
 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG April 1999//EN" "http://www.w3.org/Graphics/SVG/svg-19990412.dtd">
-<!--DOCTYPE svg SYSTEM "svg-19990412.dtd"-->
 <svg width="4in" height="3in">
  <title>Kona Lavadome mountain bike
  </title>
--- a/result/VC/OneID
+++ b/result/VC/OneID
@ -1,3 +1,6 @@
 ./test/VC/OneID:4: validity error: Element doc has too may ID attributes defined : id
 <!ATTLIST doc id ID #IMPLIED>
                            ^
+./test/VC/OneID:4: validity error: Element doc has 2 ID attribute defined in the internal subset : id
+<!ATTLIST doc id ID #IMPLIED>
+                            ^
--- a/result/VC/OneID2
+++ b/result/VC/OneID2
@ -1,3 +1,6 @@
+./test/VC/OneID2:3: validity error: Element doc has 2 ID attribute defined in the internal subset : id
+<!ATTLIST doc id ID #IMPLIED>
+                            ^
 ./test/VC/OneID2:4: validity error: Element doc has too may ID attributes defined : val
 <!ELEMENT doc (#PCDATA)>
                       ^
--- a/result/VC/OneID3
+++ b/result/VC/OneID3
@ -1,3 +1,3 @@
-./test/VC/OneID3:2: validity error: Element doc has ID attribute defined in the external subset : id
-<!ATTLIST doc id ID #IMPLIED>
+dtds/doc.dtd:2: validity error: Element doc has ID attributes defined in the internal and external subset : val
+<!ATTLIST doc val ID #IMPLIED>
                             ^
--- a/result/VC/UniqueElementTypeDeclaration
+++ b/result/VC/UniqueElementTypeDeclaration
@ -1,3 +1,3 @@
-./test/VC/UniqueElementTypeDeclaration:3: validity error: Redefinition of element a
+dtds/a.dtd:1: validity error: Redefinition of element a
 <!ELEMENT a (#PCDATA | b | c)*>
                              ^
--- a/result/dtd1
+++ b/result/dtd1
@ -1,3 +1,4 @@
 <?xml version="1.0"?>
 <!DOCTYPE MEMO PUBLIC "-//SGMLSOURCE//DTD MEMO//EN" "http://www.sgmlsource.com/dtds/memo.dtd">
-<MEMO/>
+<MEMO>
+</MEMO>
--- a/result/dtd12
+++ b/result/dtd12
@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <!DOCTYPE doc [
-<!ENTITY % YN '"Yes"'>
-<!ENTITY WhatHeSaid "He said %YN;">
+<!ENTITY YN '"Yes"'>
+<!ENTITY WhatHeSaid "He said &YN;">
 ]>
 <doc>&WhatHeSaid;</doc>
--- a/result/dtd13
+++ b/result/dtd13
@ -0,0 +1,7 @@
+<?xml version="1.0"?>
+<!-- comment before the DTD -->
+<!DOCTYPE doc [
+<!ELEMENT doc ANY>
+]>
+<!-- comment after the DTD -->
+<doc/>
--- a/result/ent5
+++ b/result/ent5
@ -1,5 +1,5 @@
 <?xml version="1.0"?>
 <EXAMPLE>
-    This is an inverted exclamation sign &#161;
+    This is an inverted exclamation sign &#xA1;
    This is a space   
 </EXAMPLE>
--- a/result/ent7
+++ b/result/ent7
@ -6,5 +6,5 @@
 <!ELEMENT para (#PCDATA)>
 ]>
 <item>
-  <para>&apos;they called me &sampleEnt;&apos;</para>
+<para>'they called me &sampleEnt;'</para>
 </item>
--- a/result/ent8
+++ b/result/ent8
@ -4,7 +4,7 @@
 <!ENTITY test2 "test 2">
 ]>
 <doc>
-  <Content>Reten&#231;&#227;o</Content>
+   <Content>Reten&#xE7;&#xE3;o</Content>
   <Content>&lt;&gt;</Content>
   <Content>&test1;&test2;</Content>
 </doc>
--- a/result/eve.xml
+++ b/result/eve.xml
@ -2,4 +2,5 @@
 <!DOCTYPE spec PUBLIC "-//testspec//" "dtds/eve.dtd" [
 <!ENTITY iso6.doc.date "29-May-1999">
 ]>
-<spec/>
+<spec>
+</spec>
--- a/result/noent/dtd1
+++ b/result/noent/dtd1
@ -1,3 +1,4 @@
 <?xml version="1.0"?>
 <!DOCTYPE MEMO PUBLIC "-//SGMLSOURCE//DTD MEMO//EN" "http://www.sgmlsource.com/dtds/memo.dtd">
-<MEMO/>
+<MEMO>
+</MEMO>
--- a/result/noent/dtd12
+++ b/result/noent/dtd12
@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <!DOCTYPE doc [
-<!ENTITY % YN '"Yes"'>
-<!ENTITY WhatHeSaid "He said %YN;">
+<!ENTITY YN '"Yes"'>
+<!ENTITY WhatHeSaid "He said &YN;">
 ]>
-<doc>He said &quot;Yes&quot;</doc>
+<doc>He said &amp;YN;</doc>
--- a/result/noent/ent5
+++ b/result/noent/ent5
@ -1,5 +1,5 @@
 <?xml version="1.0"?>
 <EXAMPLE>
-    This is an inverted exclamation sign &#161;
+    This is an inverted exclamation sign &#xA1;
    This is a space   
 </EXAMPLE>
--- a/result/noent/ent7
+++ b/result/noent/ent7
@ -6,5 +6,5 @@
 <!ELEMENT para (#PCDATA)>
 ]>
 <item>
-  <para>&apos;they called me the hyacinth girl&apos;</para>
+<para>'they called me the hyacinth girl'</para>
 </item>
--- a/result/noent/ent8
+++ b/result/noent/ent8
@ -4,7 +4,7 @@
 <!ENTITY test2 "test 2">
 ]>
 <doc>
-  <Content>Reten&#231;&#227;o</Content>
+   <Content>Reten&#xE7;&#xE3;o</Content>
   <Content>&lt;&gt;</Content>
   <Content>test 1test 2</Content>
 </doc>
--- a/result/noent/eve.xml
+++ b/result/noent/eve.xml
@ -2,4 +2,5 @@
 <!DOCTYPE spec PUBLIC "-//testspec//" "dtds/eve.dtd" [
 <!ENTITY iso6.doc.date "29-May-1999">
 ]>
-<spec/>
+<spec>
+</spec>
--- a/result/noent/p3p
+++ b/result/noent/p3p
@ -2,14 +2,12 @@
 <RDF:RDF xmlns:RDF="http://www.w3.org/TR/WD-rdf-syntax#" p3p="http//www.w3.org/TR/1998/WD-P3P10-syntax#proposal.DTD">
 <PROP realm="http://www.CoolCatalog.com/catalogue/" entity="CoolCatalog" agreeID="94df1293a3e519bb" assurance="http://www.TrustUs.org">
  <USES>
-      <STATEMENT purp="2,3" recpnt="0" id="0" consq="a site with clothes you&apos;d appreciate.">
-        <WITH>
-          <PREFIX name="User.">
+  <STATEMENT purp="2,3" recpnt="0" id="0" consq="a site with clothes you'd appreciate.">
+    <WITH><PREFIX name="User.">
     <REF name="Name.First"/>
     <REF name="Bdate.Year" optional="1"/>
     <REF name="Gender"/>
-          </PREFIX>
-        </WITH>
+    </PREFIX></WITH>
  </STATEMENT>
  </USES>
  <USES>
@ -18,5 +16,4 @@
  </STATEMENT>
  </USES>
  <DISCLOSURE discURI="http://www.CoolCatalog.com/PrivacyPractice.html" access="3" other="0,1"/>
-  </PROP>
-</RDF:RDF>
+</PROP></RDF:RDF>
--- a/result/noent/rdf2
+++ b/result/noent/rdf2
@ -11,11 +11,11 @@
    <RPM:Packager>Till Bubeck &lt;bubeck@delix.de&gt;, Ngo Than &lt;than@delix.de&gt;</RPM:Packager>
    <RPM:Group>Libraries</RPM:Group>
    <RPM:Summary>Bibliothek zur Ansteuerung von Terminals</RPM:Summary>
-    <RPM:Description>Diese Library stellt dem Programmierer vom Terminal unabh&#228;ngige
-Routinen zur Ansteuerung Ihres Bildschirms zur Verf&#252;gung, die
+    <RPM:Description>Diese Library stellt dem Programmierer vom Terminal unabh&#xE4;ngige
+Routinen zur Ansteuerung Ihres Bildschirms zur Verf&#xFC;gung, die
 speziell optimiert sind.
-Diese Version ist die &apos;new curses&apos; (ncurses) Variante und ist der
-anerkannte Ersatz f&#252;r die klassische Curses-Library, die nicht mehr
+Diese Version ist die 'new curses' (ncurses) Variante und ist der
+anerkannte Ersatz f&#xFC;r die klassische Curses-Library, die nicht mehr
 weiterentwickelt wird.</RPM:Description>
    <RPM:Copyright>GPL</RPM:Copyright>
    <RPM:Sources>ncurses4-4.2-3.src.rpm</RPM:Sources>
--- a/result/noent/slashdot.rdf
+++ b/result/noent/slashdot.rdf
@ -1,51 +1,63 @@
 <?xml version="1.0"?>
 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://my.netscape.com/rdf/simple/0.9/">
+
  <channel>
    <title>Slashdot:News for Nerds. Stuff that Matters.</title>
    <link>http://slashdot.org/</link>
    <description>News for Nerds.  Stuff that Matters</description>
  </channel>
+
  <image>
    <title>Slashdot</title>
    <url>http://slashdot.org/images/slashdotlg.gif</url>
    <link>http://slashdot.org</link>
  </image>
+  
  <item>
    <title>100 Mbit/s on Fibre to the home</title>
    <link>http://slashdot.org/articles/99/06/06/1440211.shtml</link>
  </item>
+  
  <item>
    <title>Gimp 1.2 Preview</title>
    <link>http://slashdot.org/articles/99/06/06/1438246.shtml</link>
  </item>
+  
  <item>
-    <title>Sony&apos;s AIBO robot Sold Out</title>
+    <title>Sony's AIBO robot Sold Out</title>
    <link>http://slashdot.org/articles/99/06/06/1432256.shtml</link>
  </item>
+  
  <item>
    <title>Ask Slashdot: Another Word for &quot;Hacker&quot;?</title>
    <link>http://slashdot.org/askslashdot/99/06/05/1815225.shtml</link>
  </item>
+  
  <item>
    <title>Corel Linux FAQ</title>
    <link>http://slashdot.org/articles/99/06/05/1842218.shtml</link>
  </item>
+  
  <item>
    <title>Upside downsides MP3.COM.</title>
    <link>http://slashdot.org/articles/99/06/05/1558210.shtml</link>
  </item>
+  
  <item>
    <title>2 Terabits of Bandwidth</title>
    <link>http://slashdot.org/articles/99/06/05/1554258.shtml</link>
  </item>
+  
  <item>
    <title>Suppression of cold fusion research?</title>
    <link>http://slashdot.org/articles/99/06/04/2313200.shtml</link>
  </item>
+  
  <item>
    <title>California Gov. Halts Wage Info Sale</title>
    <link>http://slashdot.org/articles/99/06/04/235256.shtml</link>
  </item>
+  
  <item>
    <title>Red Hat Announces IPO</title>
    <link>http://slashdot.org/articles/99/06/04/0849207.shtml</link>
--- a/result/noent/slashdot.xml
+++ b/result/noent/slashdot.xml
@ -5,7 +5,7 @@
    <url>http://slashdot.org/articles/99/06/06/1440211.shtml</url>
    <time>1999-06-06 14:39:59</time>
    <author>CmdrTaco</author>
-    <department>wouldn&apos;t-it-be-nice</department>
+    <department>wouldn't-it-be-nice</department>
    <topic>internet</topic>
    <comments>20</comments>
    <section>articles</section>
@ -23,7 +23,7 @@
    <image>topicgimp.gif</image>
  </story>
 <story>
-    <title>Sony&apos;s AIBO robot Sold Out</title>
+    <title>Sony's AIBO robot Sold Out</title>
    <url>http://slashdot.org/articles/99/06/06/1432256.shtml</url>
    <time>1999-06-06 14:32:51</time>
    <author>CmdrTaco</author>
--- a/result/noent/svg1
+++ b/result/noent/svg1
@ -1,7 +1,8 @@
 <?xml version="1.0" standalone="yes"?>
 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG April 1999//EN" "http://www.w3.org/Graphics/SVG/svg-19990412.dtd">
 <svg width="242px" height="383px">
-  <g style="stroke: #000000"/>
+<g style="stroke: #000000"> 
+</g> 
 <g style="fill: #f2cc99"> 
   <polyline verts=" 69,18 82,8 99,3 118,5 135,12 149,21 156,13 165,9 177,13 183,28 180,50 164,91 155,107 154,114 151,121 141,127 139,136 155,206 157,251 126,342 133,357 128,376 83,376 75,368 67,350 61,350 53,369 4,369 2,361 5,354 12,342 16,321 4,257 4,244 7,218 9,179 26,127 43,93 32,77 30,70 24,67 16,49 17,35 18,23 30,12 40,7 53,7 62,12 69,18 69,18 69,18"/> 
 </g> 
@ -157,5 +158,4 @@
   <polyline verts=" 147,338 142,341 143,345 141,354 147,343 147,338 147,338 147,338"/> 
   <polyline verts=" 157,342 156,349 150,356 157,353 163,346 162,342 157,342 157,342 157,342"/> 
   <polyline verts=" 99,265 96,284 92,299 73,339 73,333 87,300 99,265 99,265 99,265"/> 
-  </g>
-</svg>
+</g></svg>
--- a/result/noent/svg2
+++ b/result/noent/svg2
@ -8,7 +8,8 @@
 <g style="stroke: #800040"> 
   <polyline verts=" 32,100 72,50 90,82 73,16 120,64 152,9 177,107"/> 
 </g> 
-  <g style="stroke: #000000"/>
+<g style="stroke: #000000"> 
+</g> 
 <g style="stroke: #0000ff"> 
   <rect x="30" y="101" width="51" height="33"/>
 </g> 
@ -38,11 +39,13 @@
 <g style="stroke: #008080"> 
   <text x="176" y="85">sadfsadfsad</text>
 </g> 
-  <g style="stroke: #000000"/>
+<g style="stroke: #000000"> 
+</g> 
 <g style="fill: #800040"> 
   <ellipse cx="208" cy="180" major="45" minor="31" angle="0"/>
 </g> 
-  <g style="stroke: #000000"/>
+<g style="stroke: #000000"> 
+</g> 
 <g style="fill: #ffffff"> 
   <g>
     <desc> Java Font definition:Dialog 700</desc> 
@ -50,5 +53,4 @@
   <g>
     <desc> Java Font definition:Dialog 700</desc> 
   </g>
-  </g>
-</svg>
+</g></svg>
--- a/result/noent/xml2
+++ b/result/noent/xml2
@ -1,8 +1,8 @@
 <?xml version="1.0"?>
 <!DOCTYPE test [
+<!ELEMENT test (#PCDATA)>
 <!ENTITY % xx "&#37;zz;">
 <!ENTITY % zz '&#60;!ENTITY tricky "error-prone" >'>
 <!ENTITY tricky "error-prone">
-<!ELEMENT test (#PCDATA)>
 ]>
 <test>This sample shows a error-prone method.</test>
--- a/result/p3p
+++ b/result/p3p
@ -2,14 +2,12 @@
 <RDF:RDF xmlns:RDF="http://www.w3.org/TR/WD-rdf-syntax#" p3p="http//www.w3.org/TR/1998/WD-P3P10-syntax#proposal.DTD">
 <PROP realm="http://www.CoolCatalog.com/catalogue/" entity="CoolCatalog" agreeID="94df1293a3e519bb" assurance="http://www.TrustUs.org">
  <USES>
-      <STATEMENT purp="2,3" recpnt="0" id="0" consq="a site with clothes you&apos;d appreciate.">
-        <WITH>
-          <PREFIX name="User.">
+  <STATEMENT purp="2,3" recpnt="0" id="0" consq="a site with clothes you'd appreciate.">
+    <WITH><PREFIX name="User.">
     <REF name="Name.First"/>
     <REF name="Bdate.Year" optional="1"/>
     <REF name="Gender"/>
-          </PREFIX>
-        </WITH>
+    </PREFIX></WITH>
  </STATEMENT>
  </USES>
  <USES>
@ -18,5 +16,4 @@
  </STATEMENT>
  </USES>
  <DISCLOSURE discURI="http://www.CoolCatalog.com/PrivacyPractice.html" access="3" other="0,1"/>
-  </PROP>
-</RDF:RDF>
+</PROP></RDF:RDF>
--- a/result/rdf2
+++ b/result/rdf2
@ -11,11 +11,11 @@
    <RPM:Packager>Till Bubeck &lt;bubeck@delix.de&gt;, Ngo Than &lt;than@delix.de&gt;</RPM:Packager>
    <RPM:Group>Libraries</RPM:Group>
    <RPM:Summary>Bibliothek zur Ansteuerung von Terminals</RPM:Summary>
-    <RPM:Description>Diese Library stellt dem Programmierer vom Terminal unabh&#228;ngige
-Routinen zur Ansteuerung Ihres Bildschirms zur Verf&#252;gung, die
+    <RPM:Description>Diese Library stellt dem Programmierer vom Terminal unabh&#xE4;ngige
+Routinen zur Ansteuerung Ihres Bildschirms zur Verf&#xFC;gung, die
 speziell optimiert sind.
-Diese Version ist die &apos;new curses&apos; (ncurses) Variante und ist der
-anerkannte Ersatz f&#252;r die klassische Curses-Library, die nicht mehr
+Diese Version ist die 'new curses' (ncurses) Variante und ist der
+anerkannte Ersatz f&#xFC;r die klassische Curses-Library, die nicht mehr
 weiterentwickelt wird.</RPM:Description>
    <RPM:Copyright>GPL</RPM:Copyright>
    <RPM:Sources>ncurses4-4.2-3.src.rpm</RPM:Sources>
--- a/result/slashdot.rdf
+++ b/result/slashdot.rdf
@ -1,51 +1,63 @@
 <?xml version="1.0"?>
 <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns="http://my.netscape.com/rdf/simple/0.9/">
+
  <channel>
    <title>Slashdot:News for Nerds. Stuff that Matters.</title>
    <link>http://slashdot.org/</link>
    <description>News for Nerds.  Stuff that Matters</description>
  </channel>
+
  <image>
    <title>Slashdot</title>
    <url>http://slashdot.org/images/slashdotlg.gif</url>
    <link>http://slashdot.org</link>
  </image>
+  
  <item>
    <title>100 Mbit/s on Fibre to the home</title>
    <link>http://slashdot.org/articles/99/06/06/1440211.shtml</link>
  </item>
+  
  <item>
    <title>Gimp 1.2 Preview</title>
    <link>http://slashdot.org/articles/99/06/06/1438246.shtml</link>
  </item>
+  
  <item>
-    <title>Sony&apos;s AIBO robot Sold Out</title>
+    <title>Sony's AIBO robot Sold Out</title>
    <link>http://slashdot.org/articles/99/06/06/1432256.shtml</link>
  </item>
+  
  <item>
    <title>Ask Slashdot: Another Word for &quot;Hacker&quot;?</title>
    <link>http://slashdot.org/askslashdot/99/06/05/1815225.shtml</link>
  </item>
+  
  <item>
    <title>Corel Linux FAQ</title>
    <link>http://slashdot.org/articles/99/06/05/1842218.shtml</link>
  </item>
+  
  <item>
    <title>Upside downsides MP3.COM.</title>
    <link>http://slashdot.org/articles/99/06/05/1558210.shtml</link>
  </item>
+  
  <item>
    <title>2 Terabits of Bandwidth</title>
    <link>http://slashdot.org/articles/99/06/05/1554258.shtml</link>
  </item>
+  
  <item>
    <title>Suppression of cold fusion research?</title>
    <link>http://slashdot.org/articles/99/06/04/2313200.shtml</link>
  </item>
+  
  <item>
    <title>California Gov. Halts Wage Info Sale</title>
    <link>http://slashdot.org/articles/99/06/04/235256.shtml</link>
  </item>
+  
  <item>
    <title>Red Hat Announces IPO</title>
    <link>http://slashdot.org/articles/99/06/04/0849207.shtml</link>
--- a/result/slashdot.xml
+++ b/result/slashdot.xml
@ -5,7 +5,7 @@
    <url>http://slashdot.org/articles/99/06/06/1440211.shtml</url>
    <time>1999-06-06 14:39:59</time>
    <author>CmdrTaco</author>
-    <department>wouldn&apos;t-it-be-nice</department>
+    <department>wouldn't-it-be-nice</department>
    <topic>internet</topic>
    <comments>20</comments>
    <section>articles</section>
@ -23,7 +23,7 @@
    <image>topicgimp.gif</image>
  </story>
 <story>
-    <title>Sony&apos;s AIBO robot Sold Out</title>
+    <title>Sony's AIBO robot Sold Out</title>
    <url>http://slashdot.org/articles/99/06/06/1432256.shtml</url>
    <time>1999-06-06 14:32:51</time>
    <author>CmdrTaco</author>
--- a/result/svg1
+++ b/result/svg1
@ -1,7 +1,8 @@
 <?xml version="1.0" standalone="yes"?>
 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG April 1999//EN" "http://www.w3.org/Graphics/SVG/svg-19990412.dtd">
 <svg width="242px" height="383px">
-  <g style="stroke: #000000"/>
+<g style="stroke: #000000"> 
+</g> 
 <g style="fill: #f2cc99"> 
   <polyline verts=" 69,18 82,8 99,3 118,5 135,12 149,21 156,13 165,9 177,13 183,28 180,50 164,91 155,107 154,114 151,121 141,127 139,136 155,206 157,251 126,342 133,357 128,376 83,376 75,368 67,350 61,350 53,369 4,369 2,361 5,354 12,342 16,321 4,257 4,244 7,218 9,179 26,127 43,93 32,77 30,70 24,67 16,49 17,35 18,23 30,12 40,7 53,7 62,12 69,18 69,18 69,18"/> 
 </g> 
@ -157,5 +158,4 @@
   <polyline verts=" 147,338 142,341 143,345 141,354 147,343 147,338 147,338 147,338"/> 
   <polyline verts=" 157,342 156,349 150,356 157,353 163,346 162,342 157,342 157,342 157,342"/> 
   <polyline verts=" 99,265 96,284 92,299 73,339 73,333 87,300 99,265 99,265 99,265"/> 
-  </g>
-</svg>
+</g></svg>
--- a/result/svg2
+++ b/result/svg2
@ -8,7 +8,8 @@
 <g style="stroke: #800040"> 
   <polyline verts=" 32,100 72,50 90,82 73,16 120,64 152,9 177,107"/> 
 </g> 
-  <g style="stroke: #000000"/>
+<g style="stroke: #000000"> 
+</g> 
 <g style="stroke: #0000ff"> 
   <rect x="30" y="101" width="51" height="33"/>
 </g> 
@ -38,11 +39,13 @@
 <g style="stroke: #008080"> 
   <text x="176" y="85">sadfsadfsad</text>
 </g> 
-  <g style="stroke: #000000"/>
+<g style="stroke: #000000"> 
+</g> 
 <g style="fill: #800040"> 
   <ellipse cx="208" cy="180" major="45" minor="31" angle="0"/>
 </g> 
-  <g style="stroke: #000000"/>
+<g style="stroke: #000000"> 
+</g> 
 <g style="fill: #ffffff"> 
   <g>
     <desc> Java Font definition:Dialog 700</desc> 
@ -50,5 +53,4 @@
   <g>
     <desc> Java Font definition:Dialog 700</desc> 
   </g>
-  </g>
-</svg>
+</g></svg>
--- a/result/valid/REC-xml-19980210.xml
+++ b/result/valid/REC-xml-19980210.xml
@ -1,6 +1,8 @@
 <?xml version="1.0" encoding="ISO-8859-1" standalone="no"?>
 <!DOCTYPE spec SYSTEM "dtds/spec.dtd" [
-<!ENTITY XML.version "1.0">
+<!-- LAST TOUCHED BY: Tim Bray, 8 February 1997 --><!-- The words 'FINAL EDIT' in comments mark places where changes
+need to be made after approval of the document by the ERB, before
+publication.  --><!ENTITY XML.version "1.0">
 <!ENTITY doc.date "10 February 1998">
 <!ENTITY iso6.doc.date "19980210">
 <!ENTITY w3c.doc.date "02-Feb-1998">
@ -15,27 +17,20 @@
 <!ENTITY br "\n">
 <!ENTITY cellback "#c0d9c0">
 <!ENTITY mdash "--">
-<!ENTITY com "--">
+<!-- &#x2014, but nsgmls doesn't grok hex --><!ENTITY com "--">
 <!ENTITY como "--">
 <!ENTITY comc "--">
 <!ENTITY hcro "&amp;#x">
-<!ENTITY nbsp "&#160;">
+<!-- <!ENTITY nbsp "<22>"> --><!ENTITY nbsp "&#160;">
 <!ENTITY magicents "<code>amp</code>,
 <code>lt</code>,
 <code>gt</code>,
 <code>apos</code>,
 <code>quot</code>">
-<!ENTITY doc.audience "public review and discussion">
+<!-- audience and distribution status:  for use at publication time --><!ENTITY doc.audience "public review and discussion">
 <!ENTITY doc.distribution "may be distributed freely, as long as
 all text and legal notices remain intact">
 ]>
-<!-- LAST TOUCHED BY: Tim Bray, 8 February 1997 -->
-<!-- The words 'FINAL EDIT' in comments mark places where changes
-need to be made after approval of the document by the ERB, before
-publication.  -->
-<!-- &#x2014, but nsgmls doesn't grok hex -->
-<!-- <!ENTITY nbsp "<22>"> -->
-<!-- audience and distribution status:  for use at publication time -->
 <!-- for Panorama *-->
 <?VERBATIM "eg" ?>
 <spec>
@ -110,7 +105,7 @@ HTML.</p>
 other interested parties and has been endorsed by the
 Director as a W3C Recommendation. It is a stable
 document and may be used as reference material or cited
-as a normative reference from another document. W3C&apos;s
+as a normative reference from another document. W3C's
 role in making the Recommendation is to draw attention
 to the specification and to promote its widespread
 deployment. This enhances the functionality and
@ -155,24 +150,24 @@ entify hard-coded document date in pubdate element,
 change expansion of entity WebSGML,
 update status description as per Dan Connolly (am not sure
 about refernece to Berners-Lee et al.),
-add &apos;The&apos; to abstract as per WG decision,
+add 'The' to abstract as per WG decision,
 move Relationship to Existing Standards to back matter and
 combine with References,
 re-order back matter so normative appendices come first,
 re-tag back matter so informative appendices are tagged informdiv1,
-remove XXX XXX from list of &apos;normative&apos; specs in prose,
+remove XXX XXX from list of 'normative' specs in prose,
 move some references from Other References to Normative References,
 add RFC 1738, 1808, and 2141 to Other References (they are not
 normative since we do not require the processor to enforce any 
 rules based on them),
-add reference to &apos;Fielding draft&apos; (Berners-Lee et al.),
+add reference to 'Fielding draft' (Berners-Lee et al.),
 move notation section to end of body,
 drop URIchar non-terminal and use SkipLit instead,
-lose stray reference to defunct nonterminal &apos;markupdecls&apos;,
-move reference to Aho et al. into appendix (Tim&apos;s right),
+lose stray reference to defunct nonterminal 'markupdecls',
+move reference to Aho et al. into appendix (Tim's right),
 add prose note saying that hash marks and fragment identifiers are
 NOT part of the URI formally speaking, and are NOT legal in 
-system identifiers (processor &apos;may&apos; signal an error).
+system identifiers (processor 'may' signal an error).
 Work through:
 Tim Bray reacting to James Clark,
 Tim Bray on his own,
@ -180,7 +175,7 @@ Eve Maler,

 NOT DONE YET:
 change binary / text to unparsed / parsed.
-handle James&apos;s suggestion about &lt; in attriubte values
+handle James's suggestion about &lt; in attriubte values
 uppercase hex characters,
 namechar list,
 </sitem>
@ -193,7 +188,7 @@ drop SDD from EncodingDecl,
 change text at version number 1.0,
 drop misleading (wrong!) sentence about ignorables and extenders,
 modify definition of PCData to make bar on msc grammatical,
-change grammar&apos;s handling of internal subset (drop non-terminal markupdecls),
+change grammar's handling of internal subset (drop non-terminal markupdecls),
 change definition of includeSect to allow conditional sections,
 add integral-declaration constraint on internal subset,
 drop misleading / dangerous sentence about relationship of
@ -207,14 +202,14 @@ Unicode character database (needs further work!).
 for PE appearance.</sitem>
 <sitem>1997-10-01 : TB : Case-sensitive markup; cleaned up
 element-type defs, lotsa little edits for style</sitem>
-        <sitem>1997-09-25 : TB : Change to elm&apos;s new DTD, with
+<sitem>1997-09-25 : TB : Change to elm's new DTD, with
 substantial detail cleanup as a side-effect</sitem>
 <sitem>1997-07-24 : CMSMcQ : correct error (lost *) in definition 
 of ignoreSectContents (thanks to Makoto Murata)</sitem>
 <sitem>Allow all empty elements to have end-tags, consistent with
 SGML TC (as per JJC).</sitem>
 <sitem>1997-07-23 : CMSMcQ : pre-emptive strike on pending corrections:
-introduce the term &apos;empty-element tag&apos;, note that all empty elements
+introduce the term 'empty-element tag', note that all empty elements
 may use it, and elements declared EMPTY must use it.
 Add WFC requiring encoding decl to come first in an entity.
 Redefine notations to point to PIs as well as binary entities.
@ -225,15 +220,15 @@ mixed and element content.
 </sitem>
 <sitem>1997-06-30 : CMSMcQ : change date, some cosmetic changes,
 changes to productions for choice, seq, Mixed, NotationType,
-Enumeration.  Follow James Clark&apos;s suggestion and prohibit 
+Enumeration.  Follow James Clark's suggestion and prohibit 
 conditional sections in internal subset.  TO DO:  simplify
-production for ignored sections as a result, since we don&apos;t 
-need to worry about parsers which don&apos;t expand PErefs finding
+production for ignored sections as a result, since we don't 
+need to worry about parsers which don't expand PErefs finding
 a conditional section.</sitem>
 <sitem>1997-06-29 : TB : various edits</sitem>
 <sitem>1997-06-29 : CMSMcQ : further changes:
 Suppress old FINAL EDIT comments and some dead material.
-Revise occurrences of % in grammar to exploit Henry Thompson&apos;s pun,
+Revise occurrences of % in grammar to exploit Henry Thompson's pun,
 especially markupdecl and attdef.
 Remove RMD requirement relating to element content (?).
 </sitem>
@ -249,8 +244,8 @@ Change def of %operator.
 Change standard definitions of lt, gt, amp.
 Strip leading zeros from #x00nn forms.</sitem>
 <sitem>1997-04-02 : CMSMcQ : final corrections of editorial errors
-found in last night&apos;s proofreading.  Reverse course once more on
-well-formed:   Webster&apos;s Second hyphenates it, and that&apos;s enough
+found in last night's proofreading.  Reverse course once more on
+well-formed:   Webster's Second hyphenates it, and that's enough
 for me.</sitem>
 <sitem>1997-04-01 : CMSMcQ : corrections from JJC, EM, HT, and self</sitem>
 <sitem>1997-03-31 : Tim Bray : many changes</sitem>
@ -265,11 +260,11 @@ Paul Grosso, and self.  Among other things:  give in on &quot;well formed&quot;
 (Terry is right), tentatively rename QuotedCData as AttValue
 and Literal as EntityValue to be more informative, since attribute
 values are the <emph>only</emph> place QuotedCData was used, and
-vice versa for entity text and Literal. (I&apos;d call it Entity Text, 
+vice versa for entity text and Literal. (I'd call it Entity Text, 
 but 8879 uses that name for both internal and external entities.)</sitem>
 <sitem>1997-03-26 : CMSMcQ : resynch the two forks of this draft, reapply
-my changes dated 03-20 and 03-21.  Normalize old &apos;may not&apos; to &apos;must not&apos;
-except in the one case where it meant &apos;may or may not&apos;.</sitem>
+my changes dated 03-20 and 03-21.  Normalize old 'may not' to 'must not'
+except in the one case where it meant 'may or may not'.</sitem>
 <sitem>1997-03-21 : TB : massive changes on plane flight from Chicago
 to Vancouver</sitem>
 <sitem>1997-03-21 : CMSMcQ : correct as many reported errors as possible.
@ -280,12 +275,12 @@ WWW conference April 1997:  restore some of the internal entity
 references (e.g. to docdate, etc.), change character xA0 to &amp;nbsp;
 and define nbsp as &amp;#160;, and refill a lot of paragraphs for
 legibility.</sitem>
-        <sitem>1996-11-12 : CMSMcQ : revise using Tim&apos;s edits:
+<sitem>1996-11-12 : CMSMcQ : revise using Tim's edits:
 Add list type of NUMBERED and change most lists either to
 BULLETS or to NUMBERED.
 Suppress QuotedNames, Names (not used).
 Correct trivial-grammar doc type decl.
-Rename &apos;marked section&apos; as &apos;CDATA section&apos; passim.
+Rename 'marked section' as 'CDATA section' passim.
 Also edits from James Clark:
 Define the set of characters from which [^abc] subtracts.
 Charref should use just [0-9] not Digit.
@ -293,9 +288,9 @@ Location info needs cleaner treatment:  remove?  (ERB
 question).
 One example of a PI has wrong pic.
 Clarify discussion of encoding names.
-Encoding failure should lead to unspecified results; don&apos;t
+Encoding failure should lead to unspecified results; don't
 prescribe error recovery.
-Don&apos;t require exposure of entity boundaries.
+Don't require exposure of entity boundaries.
 Ignore white space in element content.
 Reserve entity names of the form u-NNNN.
 Clarify relative URLs.
@ -313,17 +308,17 @@ Finish stylistic revision.</sitem>
 <sitem>1996-10-31 : TB : Add Entity Handling section</sitem>
 <sitem>1996-10-30 : TB : Clean up term &amp; termdef.  Slip in
 ERB decision re EMPTY.</sitem>
-        <sitem>1996-10-28 : TB : Change DTD.  Implement some of Michael&apos;s
+<sitem>1996-10-28 : TB : Change DTD.  Implement some of Michael's
 suggestions.  Change comments back to //.  Introduce language for
 XML namespace reservation.  Add section on white-space handling.
 Lots more cleanup.</sitem>
 <sitem>1996-10-24 : CMSMcQ : quick tweaks, implement some ERB
 decisions.  Characters are not integers.  Comments are /* */ not //.
 Add bibliographic refs to 10646, HyTime, Unicode.
-Rename old Cdata as MsData since it&apos;s <emph>only</emph> seen
+Rename old Cdata as MsData since it's <emph>only</emph> seen
 in marked sections.  Call them attribute-value pairs not
 name-value pairs, except once.  Internal subset is optional, needs
-&apos;?&apos;.  Implied attributes should be signaled to the app, not
+'?'.  Implied attributes should be signaled to the app, not
 have values supplied by processor.</sitem>
 <sitem>1996-10-16 : TB : track down &amp; excise all DSD references;
 introduce some EBNF for entity declarations.</sitem>
@ -340,10 +335,10 @@ Move old 2.2 XML Processors and Apps into intro.
 Mention comments, PIs, and marked sections in discussion of
 delimiter escaping.
 Streamline discussion of doctype decl syntax.
-Drop old section of &apos;PI syntax&apos; for doctype decl, and add
+Drop old section of 'PI syntax' for doctype decl, and add
 section on partial-DTD summary PIs to end of Logical Structures
 section.
-Revise DSD syntax section to use Tim&apos;s subset-in-a-PI
+Revise DSD syntax section to use Tim's subset-in-a-PI
 mechanism.</sitem>
 <sitem>1996-10-10 : TB : eliminate name recognizers (and more?)</sitem>
 <sitem>1996-10-09 : CMSMcQ : revise for style, consistency through 2.3
@ -381,7 +376,7 @@ Parsed data is made up of <termref def="dt-character">characters</termref>,
 some 
 of which form <termref def="dt-chardata">character data</termref>, 
 and some of which form <termref def="dt-markup">markup</termref>.
-Markup encodes a description of the document&apos;s storage layout and
+Markup encodes a description of the document's storage layout and
 logical structure. XML provides a mechanism to impose constraints on
 the storage layout and logical structure.</p>
 <p><termdef id="dt-xml-proc" term="XML Processor">A software module
@ -400,7 +395,7 @@ It was chaired by Jon Bosak of Sun
 Microsystems with the active participation of an XML Special
 Interest Group (previously known as the SGML Working Group) also
 organized by the W3C. The membership of the XML Working Group is given
-in an appendix. Dan Connolly served as the WG&apos;s contact with the W3C.
+in an appendix. Dan Connolly served as the WG's contact with the W3C.
 </p>
 <p>The design goals for XML are:<olist><item><p>XML shall be straightforwardly usable over the
 Internet.</p></item><item><p>XML shall support a wide variety of applications.</p></item><item><p>XML shall be compatible with SGML.</p></item><item><p>It shall be easy to write programs which process XML
@ -447,7 +442,7 @@ the processor may make unprocessed data from the document (with
 intermingled character data and markup) available to the application.
 Once a fatal error is detected, however, the processor must not
 continue normal processing (i.e., it must not
-continue to pass character data and information about the document&apos;s
+continue to pass character data and information about the document's
 logical structure to the application in the normal way).
 </termdef></p></def></gitem><gitem><label>at user option</label><def><p>Conforming software may or must (depending on the modal verb in the
 sentence) behave as described; if it does, it must
@ -608,7 +603,7 @@ beginning with a letter or one of a few punctuation characters, and continuing
 with letters, digits, hyphens, underscores, colons, or full stops, together
 known as name characters.</termdef>
 Names beginning with the string &quot;<code>xml</code>&quot;, or any string
-which would match <code>((&apos;X&apos;|&apos;x&apos;) (&apos;M&apos;|&apos;m&apos;) (&apos;L&apos;|&apos;l&apos;))</code>, are
+which would match <code>(('X'|'x') ('M'|'m') ('L'|'l'))</code>, are
 reserved for standardization in this or future versions of this
 specification.
 </p>
@ -629,9 +624,9 @@ should accept the colon as a name character.</p>
 name characters.
 <scrap lang="ebnf"><head>Names and Tokens</head><prod id="NT-NameChar"><lhs>NameChar</lhs><rhs><nt def="NT-Letter">Letter</nt> 
 | <nt def="NT-Digit">Digit</nt> 
-| &apos;.&apos; | &apos;-&apos; | &apos;_&apos; | &apos;:&apos;
+| '.' | '-' | '_' | ':'
 | <nt def="NT-CombiningChar">CombiningChar</nt> 
-| <nt def="NT-Extender">Extender</nt></rhs></prod><prod id="NT-Name"><lhs>Name</lhs><rhs>(<nt def="NT-Letter">Letter</nt> | &apos;_&apos; | &apos;:&apos;)
+| <nt def="NT-Extender">Extender</nt></rhs></prod><prod id="NT-Name"><lhs>Name</lhs><rhs>(<nt def="NT-Letter">Letter</nt> | '_' | ':')
 (<nt def="NT-NameChar">NameChar</nt>)*</rhs></prod><prod id="NT-Names"><lhs>Names</lhs><rhs><nt def="NT-Name">Name</nt> 
 (<nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt>)*</rhs></prod><prod id="NT-Nmtoken"><lhs>Nmtoken</lhs><rhs>(<nt def="NT-NameChar">NameChar</nt>)+</rhs></prod><prod id="NT-Nmtokens"><lhs>Nmtokens</lhs><rhs><nt def="NT-Nmtoken">Nmtoken</nt> (<nt def="NT-S">S</nt> <nt def="NT-Nmtoken">Nmtoken</nt>)*</rhs></prod></scrap>
 </p>
@ -645,30 +640,30 @@ and external identifiers
 (<nt def="NT-SystemLiteral">SystemLiteral</nt>).  
 Note that a <nt def="NT-SystemLiteral">SystemLiteral</nt>
 can be parsed without scanning for markup.
-<scrap lang="ebnf"><head>Literals</head><prod id="NT-EntityValue"><lhs>EntityValue</lhs><rhs>&apos;&quot;&apos; 
+<scrap lang="ebnf"><head>Literals</head><prod id="NT-EntityValue"><lhs>EntityValue</lhs><rhs>'&quot;' 
 ([^%&amp;&quot;] 
 | <nt def="NT-PEReference">PEReference</nt> 
 | <nt def="NT-Reference">Reference</nt>)*
-&apos;&quot;&apos; 
+'&quot;' 
 </rhs><rhs>|&nbsp; 
-&quot;&apos;&quot; 
-([^%&amp;&apos;] 
+&quot;'&quot; 
+([^%&amp;'] 
 | <nt def="NT-PEReference">PEReference</nt> 
 | <nt def="NT-Reference">Reference</nt>)* 
-&quot;&apos;&quot;</rhs></prod><prod id="NT-AttValue"><lhs>AttValue</lhs><rhs>&apos;&quot;&apos; 
+&quot;'&quot;</rhs></prod><prod id="NT-AttValue"><lhs>AttValue</lhs><rhs>'&quot;' 
 ([^&lt;&amp;&quot;] 
 | <nt def="NT-Reference">Reference</nt>)* 
-&apos;&quot;&apos; 
+'&quot;' 
 </rhs><rhs>|&nbsp; 
-&quot;&apos;&quot; 
-([^&lt;&amp;&apos;] 
+&quot;'&quot; 
+([^&lt;&amp;'] 
 | <nt def="NT-Reference">Reference</nt>)* 
-&quot;&apos;&quot;</rhs></prod><prod id="NT-SystemLiteral"><lhs>SystemLiteral</lhs><rhs>(&apos;&quot;&apos; [^&quot;]* &apos;&quot;&apos;) |&nbsp;(&quot;&apos;&quot; [^&apos;]* &quot;&apos;&quot;)
-</rhs></prod><prod id="NT-PubidLiteral"><lhs>PubidLiteral</lhs><rhs>&apos;&quot;&apos; <nt def="NT-PubidChar">PubidChar</nt>* 
-&apos;&quot;&apos; 
-| &quot;&apos;&quot; (<nt def="NT-PubidChar">PubidChar</nt> - &quot;&apos;&quot;)* &quot;&apos;&quot;</rhs></prod><prod id="NT-PubidChar"><lhs>PubidChar</lhs><rhs>#x20 | #xD | #xA 
+&quot;'&quot;</rhs></prod><prod id="NT-SystemLiteral"><lhs>SystemLiteral</lhs><rhs>('&quot;' [^&quot;]* '&quot;') |&nbsp;(&quot;'&quot; [^']* &quot;'&quot;)
+</rhs></prod><prod id="NT-PubidLiteral"><lhs>PubidLiteral</lhs><rhs>'&quot;' <nt def="NT-PubidChar">PubidChar</nt>* 
+'&quot;' 
+| &quot;'&quot; (<nt def="NT-PubidChar">PubidChar</nt> - &quot;'&quot;)* &quot;'&quot;</rhs></prod><prod id="NT-PubidChar"><lhs>PubidChar</lhs><rhs>#x20 | #xD | #xA 
 |&nbsp;[a-zA-Z0-9]
-|&nbsp;[-&apos;()+,./:=?;!*#@$_%]</rhs></prod></scrap>
+|&nbsp;[-'()+,./:=?;!*#@$_%]</rhs></prod></scrap>
 </p>
 </div2>
 <div2 id="syntax">
@ -729,10 +724,10 @@ is any string of characters not including the CDATA-section-close
 delimiter, &quot;<code>]]&gt;</code>&quot;.</p>
 <p>
 To allow attribute values to contain both single and double quotes, the
-apostrophe or single-quote character (&apos;) may be represented as
+apostrophe or single-quote character (') may be represented as
 &quot;<code>&amp;apos;</code>&quot;, and the double-quote character (&quot;) as
 &quot;<code>&amp;quot;</code>&quot;.
-<scrap lang="ebnf"><head>Character Data</head><prod id="NT-CharData"><lhs>CharData</lhs><rhs>[^&lt;&amp;]* - ([^&lt;&amp;]* &apos;]]&gt;&apos; [^&lt;&amp;]*)</rhs></prod></scrap>
+<scrap lang="ebnf"><head>Character Data</head><prod id="NT-CharData"><lhs>CharData</lhs><rhs>[^&lt;&amp;]* - ([^&lt;&amp;]* ']]&gt;' [^&lt;&amp;]*)</rhs></prod></scrap>
 </p>
 </div2>
 <div2 id="sec-comments">
@ -743,17 +738,17 @@ appear anywhere in a document outside other
 <termref def="dt-markup">markup</termref>; in addition,
 they may appear within the document type declaration
 at places allowed by the grammar.
-They are not part of the document&apos;s <termref def="dt-chardata">character
+They are not part of the document's <termref def="dt-chardata">character
 data</termref>; an XML
 processor may, but need not, make it possible for an application to
 retrieve the text of comments.
 <termref def="dt-compat">For compatibility</termref>, the string
 &quot;<code>--</code>&quot; (double-hyphen) must not occur within
 comments.
-<scrap lang="ebnf"><head>Comments</head><prod id="NT-Comment"><lhs>Comment</lhs><rhs>&apos;&lt;!--&apos;
-((<nt def="NT-Char">Char</nt> - &apos;-&apos;) 
-| (&apos;-&apos; (<nt def="NT-Char">Char</nt> - &apos;-&apos;)))* 
-&apos;--&gt;&apos;</rhs></prod></scrap>
+<scrap lang="ebnf"><head>Comments</head><prod id="NT-Comment"><lhs>Comment</lhs><rhs>'&lt;!--'
+((<nt def="NT-Char">Char</nt> - '-') 
+| ('-' (<nt def="NT-Char">Char</nt> - '-')))* 
+'--&gt;'</rhs></prod></scrap>
 </termdef>
 </p>
 <p>An example of a comment:
@ -766,13 +761,13 @@ comments.
 instructions</term> (PIs) allow documents to contain instructions
 for applications.
 
-<scrap lang="ebnf"><head>Processing Instructions</head><prod id="NT-PI"><lhs>PI</lhs><rhs>&apos;&lt;?&apos; <nt def="NT-PITarget">PITarget</nt> 
+<scrap lang="ebnf"><head>Processing Instructions</head><prod id="NT-PI"><lhs>PI</lhs><rhs>'&lt;?' <nt def="NT-PITarget">PITarget</nt> 
 (<nt def="NT-S">S</nt> 
 (<nt def="NT-Char">Char</nt>* - 
 (<nt def="NT-Char">Char</nt>* &pic; <nt def="NT-Char">Char</nt>*)))?
 &pic;</rhs></prod><prod id="NT-PITarget"><lhs>PITarget</lhs><rhs><nt def="NT-Name">Name</nt> - 
-((&apos;X&apos; | &apos;x&apos;) (&apos;M&apos; | &apos;m&apos;) (&apos;L&apos; | &apos;l&apos;))</rhs></prod></scrap></termdef>
-PIs are not part of the document&apos;s <termref def="dt-chardata">character
+(('X' | 'x') ('M' | 'm') ('L' | 'l'))</rhs></prod></scrap></termdef>
+PIs are not part of the document's <termref def="dt-chardata">character
 data</termref>, but must be passed through to the application. The
 PI begins with a target (<nt def="NT-PITarget">PITarget</nt>) used
 to identify the application to which the instruction is directed.  
@ -796,9 +791,9 @@ string &quot;<code>&lt;![CDATA[</code>&quot; and end with the string
 &quot;<code>]]&gt;</code>&quot;:
 <scrap lang="ebnf"><head>CDATA Sections</head><prod id="NT-CDSect"><lhs>CDSect</lhs><rhs><nt def="NT-CDStart">CDStart</nt> 
 <nt def="NT-CData">CData</nt> 
-<nt def="NT-CDEnd">CDEnd</nt></rhs></prod><prod id="NT-CDStart"><lhs>CDStart</lhs><rhs>&apos;&lt;![CDATA[&apos;</rhs></prod><prod id="NT-CData"><lhs>CData</lhs><rhs>(<nt def="NT-Char">Char</nt>* - 
-(<nt def="NT-Char">Char</nt>* &apos;]]&gt;&apos; <nt def="NT-Char">Char</nt>*))
-</rhs></prod><prod id="NT-CDEnd"><lhs>CDEnd</lhs><rhs>&apos;]]&gt;&apos;</rhs></prod></scrap>
+<nt def="NT-CDEnd">CDEnd</nt></rhs></prod><prod id="NT-CDStart"><lhs>CDStart</lhs><rhs>'&lt;![CDATA['</rhs></prod><prod id="NT-CData"><lhs>CData</lhs><rhs>(<nt def="NT-Char">Char</nt>* - 
+(<nt def="NT-Char">Char</nt>* ']]&gt;' <nt def="NT-Char">Char</nt>*))
+</rhs></prod><prod id="NT-CDEnd"><lhs>CDEnd</lhs><rhs>']]&gt;'</rhs></prod></scrap>

 Within a CDATA section, only the <nt def="NT-CDEnd">CDEnd</nt> string is
 recognized as markup, so that left angle brackets and ampersands may occur in
@ -865,9 +860,9 @@ the first <termref def="dt-element">element</termref> in the document.
 <nt def="NT-EncodingDecl">EncodingDecl</nt>? 
 <nt def="NT-SDDecl">SDDecl</nt>? 
 <nt def="NT-S">S</nt>? 
-&pic;</rhs></prod><prod id="NT-VersionInfo"><lhs>VersionInfo</lhs><rhs><nt def="NT-S">S</nt> &apos;version&apos; <nt def="NT-Eq">Eq</nt> 
-(&apos; <nt def="NT-VersionNum">VersionNum</nt> &apos; 
-| &quot; <nt def="NT-VersionNum">VersionNum</nt> &quot;)</rhs></prod><prod id="NT-Eq"><lhs>Eq</lhs><rhs><nt def="NT-S">S</nt>? &apos;=&apos; <nt def="NT-S">S</nt>?</rhs></prod><prod id="NT-VersionNum"><lhs>VersionNum</lhs><rhs>([a-zA-Z0-9_.:] | &apos;-&apos;)+</rhs></prod><prod id="NT-Misc"><lhs>Misc</lhs><rhs><nt def="NT-Comment">Comment</nt> | <nt def="NT-PI">PI</nt> | 
+&pic;</rhs></prod><prod id="NT-VersionInfo"><lhs>VersionInfo</lhs><rhs><nt def="NT-S">S</nt> 'version' <nt def="NT-Eq">Eq</nt> 
+(' <nt def="NT-VersionNum">VersionNum</nt> ' 
+| &quot; <nt def="NT-VersionNum">VersionNum</nt> &quot;)</rhs></prod><prod id="NT-Eq"><lhs>Eq</lhs><rhs><nt def="NT-S">S</nt>? '=' <nt def="NT-S">S</nt>?</rhs></prod><prod id="NT-VersionNum"><lhs>VersionNum</lhs><rhs>([a-zA-Z0-9_.:] | '-')+</rhs></prod><prod id="NT-Misc"><lhs>Misc</lhs><rhs><nt def="NT-Comment">Comment</nt> | <nt def="NT-PI">PI</nt> | 
 <nt def="NT-S">S</nt></rhs></prod></prodgroup></scrap></p>
 <p><termdef id="dt-doctype" term="Document Type Declaration">The XML
 <term>document type declaration</term> 
@ -903,15 +898,15 @@ For fuller information, see
 <prodgroup pcw2="6" pcw4="17.5" pcw5="9">
 <prod id="NT-doctypedecl">
 <lhs>doctypedecl</lhs>
-              <rhs>&apos;&lt;!DOCTYPE&apos; <nt def="NT-S">S</nt> 
+<rhs>'&lt;!DOCTYPE' <nt def="NT-S">S</nt> 
 <nt def="NT-Name">Name</nt> (<nt def="NT-S">S</nt> 
 <nt def="NT-ExternalID">ExternalID</nt>)? 
-<nt def="NT-S">S</nt>? (&apos;[&apos; 
+<nt def="NT-S">S</nt>? ('[' 
 (<nt def="NT-markupdecl">markupdecl</nt> 
 | <nt def="NT-PEReference">PEReference</nt> 
 | <nt def="NT-S">S</nt>)*
-&apos;]&apos; 
-<nt def="NT-S">S</nt>?)? &apos;&gt;&apos;</rhs>
+']' 
+<nt def="NT-S">S</nt>?)? '&gt;'</rhs>
 <vc def="vc-roottype"/>
 </prod>
 <prod id="NT-markupdecl">
@ -1025,8 +1020,8 @@ whether or not there are such declarations which appear external to
 the <termref def="dt-docent">document entity</termref>.
 <scrap lang="ebnf" id="fulldtd"><head>Standalone Document Declaration</head><prodgroup pcw2="4" pcw4="19.5" pcw5="9"><prod id="NT-SDDecl"><lhs>SDDecl</lhs><rhs>
 <nt def="NT-S">S</nt> 
-&apos;standalone&apos; <nt def="NT-Eq">Eq</nt> 
-((&quot;&apos;&quot; (&apos;yes&apos; | &apos;no&apos;) &quot;&apos;&quot;) | (&apos;&quot;&apos; (&apos;yes&apos; | &apos;no&apos;) &apos;&quot;&apos;))
+'standalone' <nt def="NT-Eq">Eq</nt> 
+((&quot;'&quot; ('yes' | 'no') &quot;'&quot;) | ('&quot;' ('yes' | 'no') '&quot;'))
 </rhs><vc def="vc-check-rmd"/></prod></prodgroup></scrap></p>
 <p>
 In a standalone document declaration, the value &quot;<code>yes</code>&quot; indicates
@ -1082,7 +1077,7 @@ directly within any instance of those types.
 </item>
 </ulist>
 </vcnote>
-        <p>An example XML declaration with a standalone document declaration:<eg>&lt;?xml version=&quot;&XML.version;&quot; standalone=&apos;yes&apos;?&gt;</eg></p>
+<p>An example XML declaration with a standalone document declaration:<eg>&lt;?xml version=&quot;&XML.version;&quot; standalone='yes'?&gt;</eg></p>
 </div2>
 <div2 id="sec-white-space">
 <head>White Space Handling</head>
@ -1111,7 +1106,7 @@ When declared, it must be given as an
 <termref def="dt-enumerated">enumerated type</termref> whose only
 possible values are &quot;<code>default</code>&quot; and &quot;<code>preserve</code>&quot;.
 For example:<eg><![CDATA[    <!ATTLIST poem   xml:space (default|preserve) 'preserve'>]]></eg></p>
-        <p>The value &quot;<code>default</code>&quot; signals that applications&apos;
+<p>The value &quot;<code>default</code>&quot; signals that applications'
 default white-space processing modes are acceptable for this element; the
 value &quot;<code>preserve</code>&quot; indicates the intent that applications preserve
 all the white space.
@ -1158,9 +1153,9 @@ In valid documents, this attribute, like any other, must be
 The values of the attribute are language identifiers as defined
 by <bibref ref="RFC1766"/>, &quot;Tags for the Identification of Languages&quot;:
 <scrap lang="ebnf"><head>Language Identification</head><prod id="NT-LanguageID"><lhs>LanguageID</lhs><rhs><nt def="NT-Langcode">Langcode</nt> 
-(&apos;-&apos; <nt def="NT-Subcode">Subcode</nt>)*</rhs></prod><prod id="NT-Langcode"><lhs>Langcode</lhs><rhs><nt def="NT-ISO639Code">ISO639Code</nt> | 
+('-' <nt def="NT-Subcode">Subcode</nt>)*</rhs></prod><prod id="NT-Langcode"><lhs>Langcode</lhs><rhs><nt def="NT-ISO639Code">ISO639Code</nt> | 
 <nt def="NT-IanaCode">IanaCode</nt> | 
-<nt def="NT-UserCode">UserCode</nt></rhs></prod><prod id="NT-ISO639Code"><lhs>ISO639Code</lhs><rhs>([a-z] | [A-Z]) ([a-z] | [A-Z])</rhs></prod><prod id="NT-IanaCode"><lhs>IanaCode</lhs><rhs>(&apos;i&apos; | &apos;I&apos;) &apos;-&apos; ([a-z] | [A-Z])+</rhs></prod><prod id="NT-UserCode"><lhs>UserCode</lhs><rhs>(&apos;x&apos; | &apos;X&apos;) &apos;-&apos; ([a-z] | [A-Z])+</rhs></prod><prod id="NT-Subcode"><lhs>Subcode</lhs><rhs>([a-z] | [A-Z])+</rhs></prod></scrap>
+<nt def="NT-UserCode">UserCode</nt></rhs></prod><prod id="NT-ISO639Code"><lhs>ISO639Code</lhs><rhs>([a-z] | [A-Z]) ([a-z] | [A-Z])</rhs></prod><prod id="NT-IanaCode"><lhs>IanaCode</lhs><rhs>('i' | 'I') '-' ([a-z] | [A-Z])+</rhs></prod><prod id="NT-UserCode"><lhs>UserCode</lhs><rhs>('x' | 'X') '-' ([a-z] | [A-Z])+</rhs></prod><prod id="NT-Subcode"><lhs>Subcode</lhs><rhs>([a-z] | [A-Z])+</rhs></prod></scrap>
 The <nt def="NT-Langcode">Langcode</nt> may be any of the following:
 <ulist><item><p>a two-letter language code as defined by 
 <bibref ref="ISO639"/>, &quot;Codes
@ -1258,14 +1253,14 @@ has a <termref def="dt-attrname">name</termref> and a <termref def="dt-attrval">
 </scrap>
 <p>This specification does not constrain the semantics, use, or (beyond
 syntax) names of the element types and attributes, except that names
-beginning with a match to <code>((&apos;X&apos;|&apos;x&apos;)(&apos;M&apos;|&apos;m&apos;)(&apos;L&apos;|&apos;l&apos;))</code>
+beginning with a match to <code>(('X'|'x')('M'|'m')('L'|'l'))</code>
 are reserved for standardization in this or future versions of this
 specification.
 </p>
 <wfcnote id="GIMatch">
 <head>Element Type Match</head>
 <p>
-The <nt def="NT-Name">Name</nt> in an element&apos;s end-tag must match 
+The <nt def="NT-Name">Name</nt> in an element's end-tag must match 
 the element type in
 the start-tag.
 </p>
@ -1309,13 +1304,13 @@ been declared.</p>
 <head>Start-Tags, End-Tags, and Empty-Element Tags</head>
 <p><termdef id="dt-stag" term="Start-Tag">The beginning of every
 non-empty XML element is marked by a <term>start-tag</term>.
-<scrap lang="ebnf"><head>Start-tag</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-STag"><lhs>STag</lhs><rhs>&apos;&lt;&apos; <nt def="NT-Name">Name</nt> 
+<scrap lang="ebnf"><head>Start-tag</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-STag"><lhs>STag</lhs><rhs>'&lt;' <nt def="NT-Name">Name</nt> 
 (<nt def="NT-S">S</nt> <nt def="NT-Attribute">Attribute</nt>)* 
-<nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs><wfc def="uniqattspec"/></prod><prod id="NT-Attribute"><lhs>Attribute</lhs><rhs><nt def="NT-Name">Name</nt> <nt def="NT-Eq">Eq</nt> 
+<nt def="NT-S">S</nt>? '&gt;'</rhs><wfc def="uniqattspec"/></prod><prod id="NT-Attribute"><lhs>Attribute</lhs><rhs><nt def="NT-Name">Name</nt> <nt def="NT-Eq">Eq</nt> 
 <nt def="NT-AttValue">AttValue</nt></rhs><vc def="ValueType"/><wfc def="NoExternalRefs"/><wfc def="CleanAttrVals"/></prod></prodgroup></scrap>
 The <nt def="NT-Name">Name</nt> in
 the start- and end-tags gives the 
-element&apos;s <term>type</term>.</termdef>
+element's <term>type</term>.</termdef>
 <termdef id="dt-attr" term="Attribute">
 The <nt def="NT-Name">Name</nt>-<nt def="NT-AttValue">AttValue</nt> pairs are
 referred to as 
@ -1325,7 +1320,7 @@ the <term>attribute specifications</term> of the element</termdef>,
 referred to as the <term>attribute name</term></termdef> and
 <termdef id="dt-attrval" term="Attribute Value">the content of the
 <nt def="NT-AttValue">AttValue</nt> (the text between the
-<code>&apos;</code> or <code>&quot;</code> delimiters)
+<code>'</code> or <code>&quot;</code> delimiters)
 as the <term>attribute value</term>.</termdef>
 </p>
 <wfcnote id="uniqattspec">
@ -1364,17 +1359,17 @@ a <code>&lt;</code>.
 <termdef id="dt-etag" term="End Tag">The end of every element 
 that begins with a start-tag must
 be marked by an <term>end-tag</term>
-containing a name that echoes the element&apos;s type as given in the
+containing a name that echoes the element's type as given in the
 start-tag:
-<scrap lang="ebnf"><head>End-tag</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-ETag"><lhs>ETag</lhs><rhs>&apos;&lt;/&apos; <nt def="NT-Name">Name</nt> 
-<nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs></prod></prodgroup></scrap>
+<scrap lang="ebnf"><head>End-tag</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-ETag"><lhs>ETag</lhs><rhs>'&lt;/' <nt def="NT-Name">Name</nt> 
+<nt def="NT-S">S</nt>? '&gt;'</rhs></prod></prodgroup></scrap>
 </termdef>
 </p>
 <p>An example of an end-tag:<eg>&lt;/termdef&gt;</eg></p>
 <p>
 <termdef id="dt-content" term="Content">The 
 <termref def="dt-text">text</termref> between the start-tag and
-end-tag is called the element&apos;s
+end-tag is called the element's
 <term>content</term>:
 <scrap lang="ebnf"><head>Content of Elements</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-content"><lhs>content</lhs><rhs>(<nt def="NT-element">element</nt> | <nt def="NT-CharData">CharData</nt> 
 | <nt def="NT-Reference">Reference</nt> | <nt def="NT-CDSect">CDSect</nt> 
@ -1386,9 +1381,9 @@ it must be represented either by a start-tag immediately followed
 by an end-tag or by an empty-element tag.</termdef>
 <termdef id="dt-eetag" term="empty-element tag">An 
 <term>empty-element tag</term> takes a special form:
-<scrap lang="ebnf"><head>Tags for Empty Elements</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-EmptyElemTag"><lhs>EmptyElemTag</lhs><rhs>&apos;&lt;&apos; <nt def="NT-Name">Name</nt> (<nt def="NT-S">S</nt> 
+<scrap lang="ebnf"><head>Tags for Empty Elements</head><prodgroup pcw2="6" pcw4="15" pcw5="11.5"><prod id="NT-EmptyElemTag"><lhs>EmptyElemTag</lhs><rhs>'&lt;' <nt def="NT-Name">Name</nt> (<nt def="NT-S">S</nt> 
 <nt def="NT-Attribute">Attribute</nt>)* <nt def="NT-S">S</nt>? 
-&apos;/&gt;&apos;</rhs><wfc def="uniqattspec"/></prod></prodgroup></scrap>
+'/&gt;'</rhs><wfc def="uniqattspec"/></prod></prodgroup></scrap>
 </termdef></p>
 <p>Empty-element tags may be used for any element which has no
 content, whether or not it is declared using the keyword
@ -1409,7 +1404,7 @@ tag must be used, and can only be used, for elements which are
 <termref def="dt-valid">validation</termref> purposes, 
 be constrained
 using element type and attribute-list declarations.
-An element type declaration constrains the element&apos;s
+An element type declaration constrains the element's
 <termref def="dt-content">content</termref>.
 </p>
 <p>Element type declarations often constrain which element types can
@ -1419,12 +1414,12 @@ when a declaration mentions an element type for which no declaration
 is provided, but this is not an error.</p>
 <p><termdef id="dt-eldecl" term="Element Type declaration">An <term>element
 type declaration</term> takes the form:
-<scrap lang="ebnf"><head>Element Type Declaration</head><prodgroup pcw2="5.5" pcw4="18" pcw5="9"><prod id="NT-elementdecl"><lhs>elementdecl</lhs><rhs>&apos;&lt;!ELEMENT&apos; <nt def="NT-S">S</nt> 
+<scrap lang="ebnf"><head>Element Type Declaration</head><prodgroup pcw2="5.5" pcw4="18" pcw5="9"><prod id="NT-elementdecl"><lhs>elementdecl</lhs><rhs>'&lt;!ELEMENT' <nt def="NT-S">S</nt> 
 <nt def="NT-Name">Name</nt> 
 <nt def="NT-S">S</nt> 
 <nt def="NT-contentspec">contentspec</nt>
-<nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs><vc def="EDUnique"/></prod><prod id="NT-contentspec"><lhs>contentspec</lhs><rhs>&apos;EMPTY&apos; 
-| &apos;ANY&apos; 
+<nt def="NT-S">S</nt>? '&gt;'</rhs><vc def="EDUnique"/></prod><prod id="NT-contentspec"><lhs>contentspec</lhs><rhs>'EMPTY' 
+| 'ANY' 
 | <nt def="NT-Mixed">Mixed</nt> 
 | <nt def="NT-children">children</nt>
 </rhs></prod></prodgroup></scrap>
@ -1461,14 +1456,14 @@ choice lists of content particles, or
 sequence lists of content particles:
 <scrap lang="ebnf"><head>Element-content Models</head><prodgroup pcw2="5.5" pcw4="16" pcw5="11"><prod id="NT-children"><lhs>children</lhs><rhs>(<nt def="NT-choice">choice</nt> 
 | <nt def="NT-seq">seq</nt>) 
-(&apos;?&apos; | &apos;*&apos; | &apos;+&apos;)?</rhs></prod><prod id="NT-cp"><lhs>cp</lhs><rhs>(<nt def="NT-Name">Name</nt> 
+('?' | '*' | '+')?</rhs></prod><prod id="NT-cp"><lhs>cp</lhs><rhs>(<nt def="NT-Name">Name</nt> 
 | <nt def="NT-choice">choice</nt> 
 | <nt def="NT-seq">seq</nt>) 
-(&apos;?&apos; | &apos;*&apos; | &apos;+&apos;)?</rhs></prod><prod id="NT-choice"><lhs>choice</lhs><rhs>&apos;(&apos; <nt def="NT-S">S</nt>? cp 
-( <nt def="NT-S">S</nt>? &apos;|&apos; <nt def="NT-S">S</nt>? <nt def="NT-cp">cp</nt> )*
-<nt def="NT-S">S</nt>? &apos;)&apos;</rhs><vc def="vc-PEinGroup"/></prod><prod id="NT-seq"><lhs>seq</lhs><rhs>&apos;(&apos; <nt def="NT-S">S</nt>? cp 
-( <nt def="NT-S">S</nt>? &apos;,&apos; <nt def="NT-S">S</nt>? <nt def="NT-cp">cp</nt> )*
-<nt def="NT-S">S</nt>? &apos;)&apos;</rhs><vc def="vc-PEinGroup"/></prod></prodgroup></scrap>
+('?' | '*' | '+')?</rhs></prod><prod id="NT-choice"><lhs>choice</lhs><rhs>'(' <nt def="NT-S">S</nt>? cp 
+( <nt def="NT-S">S</nt>? '|' <nt def="NT-S">S</nt>? <nt def="NT-cp">cp</nt> )*
+<nt def="NT-S">S</nt>? ')'</rhs><vc def="vc-PEinGroup"/></prod><prod id="NT-seq"><lhs>seq</lhs><rhs>'(' <nt def="NT-S">S</nt>? cp 
+( <nt def="NT-S">S</nt>? ',' <nt def="NT-S">S</nt>? <nt def="NT-cp">cp</nt> )*
+<nt def="NT-S">S</nt>? ')'</rhs><vc def="vc-PEinGroup"/></prod></prodgroup></scrap>
 where each <nt def="NT-Name">Name</nt> is the type of an element which may
 appear as a <termref def="dt-parentchild">child</termref>.  
 Any content
@ -1532,14 +1527,14 @@ character data, optionally interspersed with
 <termref def="dt-parentchild">child</termref> elements.</termdef>
 In this case, the types of the child elements
 may be constrained, but not their order or their number of occurrences:
-<scrap lang="ebnf"><head>Mixed-content Declaration</head><prodgroup pcw2="5.5" pcw4="16" pcw5="11"><prod id="NT-Mixed"><lhs>Mixed</lhs><rhs>&apos;(&apos; <nt def="NT-S">S</nt>? 
-&apos;#PCDATA&apos;
+<scrap lang="ebnf"><head>Mixed-content Declaration</head><prodgroup pcw2="5.5" pcw4="16" pcw5="11"><prod id="NT-Mixed"><lhs>Mixed</lhs><rhs>'(' <nt def="NT-S">S</nt>? 
+'#PCDATA'
 (<nt def="NT-S">S</nt>? 
-&apos;|&apos; 
+'|' 
 <nt def="NT-S">S</nt>? 
 <nt def="NT-Name">Name</nt>)* 
 <nt def="NT-S">S</nt>? 
-&apos;)*&apos; </rhs><rhs>| &apos;(&apos; <nt def="NT-S">S</nt>? &apos;#PCDATA&apos; <nt def="NT-S">S</nt>? &apos;)&apos;
+')*' </rhs><rhs>| '(' <nt def="NT-S">S</nt>? '#PCDATA' <nt def="NT-S">S</nt>? ')'
 </rhs><vc def="vc-PEinGroup"/><vc def="vc-MixedChildrenUnique"/></prod></prodgroup></scrap>
 where the <nt def="NT-Name">Name</nt>s give the types of elements
 that may appear as children.
@ -1575,10 +1570,10 @@ for attributes.</p></item></ulist>
 <termdef id="dt-attdecl" term="Attribute-List Declaration">
 <term>Attribute-list declarations</term> specify the name, data type, and default
 value (if any) of each attribute associated with a given element type:
-<scrap lang="ebnf"><head>Attribute-list Declaration</head><prod id="NT-AttlistDecl"><lhs>AttlistDecl</lhs><rhs>&apos;&lt;!ATTLIST&apos; <nt def="NT-S">S</nt> 
+<scrap lang="ebnf"><head>Attribute-list Declaration</head><prod id="NT-AttlistDecl"><lhs>AttlistDecl</lhs><rhs>'&lt;!ATTLIST' <nt def="NT-S">S</nt> 
 <nt def="NT-Name">Name</nt> 
 <nt def="NT-AttDef">AttDef</nt>*
-<nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs></prod><prod id="NT-AttDef"><lhs>AttDef</lhs><rhs><nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt> 
+<nt def="NT-S">S</nt>? '&gt;'</rhs></prod><prod id="NT-AttDef"><lhs>AttDef</lhs><rhs><nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt> 
 <nt def="NT-S">S</nt> <nt def="NT-AttType">AttType</nt> 
 <nt def="NT-S">S</nt> <nt def="NT-DefaultDecl">DefaultDecl</nt></rhs></prod></scrap>
 The <nt def="NT-Name">Name</nt> in the
@ -1615,7 +1610,7 @@ and semantic constraints, as noted:
 <scrap lang="ebnf"><head>Attribute Types</head><prodgroup pcw4="14" pcw5="11.5"><prod id="NT-AttType"><lhs>AttType</lhs><rhs><nt def="NT-StringType">StringType</nt> 
 | <nt def="NT-TokenizedType">TokenizedType</nt> 
 | <nt def="NT-EnumeratedType">EnumeratedType</nt>
-</rhs></prod><prod id="NT-StringType"><lhs>StringType</lhs><rhs>&apos;CDATA&apos;</rhs></prod><prod id="NT-TokenizedType"><lhs>TokenizedType</lhs><rhs>&apos;ID&apos;</rhs><vc def="id"/><vc def="one-id-per-el"/><vc def="id-default"/><rhs>| &apos;IDREF&apos;</rhs><vc def="idref"/><rhs>| &apos;IDREFS&apos;</rhs><vc def="idref"/><rhs>| &apos;ENTITY&apos;</rhs><vc def="entname"/><rhs>| &apos;ENTITIES&apos;</rhs><vc def="entname"/><rhs>| &apos;NMTOKEN&apos;</rhs><vc def="nmtok"/><rhs>| &apos;NMTOKENS&apos;</rhs><vc def="nmtok"/></prod></prodgroup></scrap>
+</rhs></prod><prod id="NT-StringType"><lhs>StringType</lhs><rhs>'CDATA'</rhs></prod><prod id="NT-TokenizedType"><lhs>TokenizedType</lhs><rhs>'ID'</rhs><vc def="id"/><vc def="one-id-per-el"/><vc def="id-default"/><rhs>| 'IDREF'</rhs><vc def="idref"/><rhs>| 'IDREFS'</rhs><vc def="idref"/><rhs>| 'ENTITY'</rhs><vc def="entname"/><rhs>| 'ENTITIES'</rhs><vc def="entname"/><rhs>| 'NMTOKEN'</rhs><vc def="nmtok"/><rhs>| 'NMTOKENS'</rhs><vc def="nmtok"/></prod></prodgroup></scrap>
 </p>
 <vcnote id="id">
 <head>ID</head>
@ -1679,21 +1674,21 @@ of a list of values provided in the declaration</termdef>. There are two
 kinds of enumerated types:
 <scrap lang="ebnf"><head>Enumerated Attribute Types</head><prod id="NT-EnumeratedType"><lhs>EnumeratedType</lhs><rhs><nt def="NT-NotationType">NotationType</nt> 
 | <nt def="NT-Enumeration">Enumeration</nt>
-</rhs></prod><prod id="NT-NotationType"><lhs>NotationType</lhs><rhs>&apos;NOTATION&apos; 
+</rhs></prod><prod id="NT-NotationType"><lhs>NotationType</lhs><rhs>'NOTATION' 
 <nt def="NT-S">S</nt> 
-&apos;(&apos; 
+'(' 
 <nt def="NT-S">S</nt>?  
 <nt def="NT-Name">Name</nt> 
-(<nt def="NT-S">S</nt>? &apos;|&apos; <nt def="NT-S">S</nt>?  
+(<nt def="NT-S">S</nt>? '|' <nt def="NT-S">S</nt>?  
 <nt def="NT-Name">Name</nt>)*
-<nt def="NT-S">S</nt>? &apos;)&apos;
-</rhs><vc def="notatn"/></prod><prod id="NT-Enumeration"><lhs>Enumeration</lhs><rhs>&apos;(&apos; <nt def="NT-S">S</nt>?
+<nt def="NT-S">S</nt>? ')'
+</rhs><vc def="notatn"/></prod><prod id="NT-Enumeration"><lhs>Enumeration</lhs><rhs>'(' <nt def="NT-S">S</nt>?
 <nt def="NT-Nmtoken">Nmtoken</nt> 
-(<nt def="NT-S">S</nt>? &apos;|&apos; 
+(<nt def="NT-S">S</nt>? '|' 
 <nt def="NT-S">S</nt>?  
 <nt def="NT-Nmtoken">Nmtoken</nt>)* 
 <nt def="NT-S">S</nt>? 
-&apos;)&apos;</rhs><vc def="enum"/></prod></scrap>
+')'</rhs><vc def="enum"/></prod></scrap>
 A <kw>NOTATION</kw> attribute identifies a 
 <termref def="dt-notation">notation</termref>, declared in the 
 DTD with associated system and/or public identifiers, to
@ -1726,10 +1721,10 @@ enumerated attribute types of a single element type.
 <head>Attribute Defaults</head>
 <p>An <termref def="dt-attdecl">attribute declaration</termref> provides
 information on whether
-the attribute&apos;s presence is required, and if not, how an XML processor should
+the attribute's presence is required, and if not, how an XML processor should
 react if a declared attribute is absent in a document.
-<scrap lang="ebnf"><head>Attribute Defaults</head><prodgroup pcw4="14" pcw5="11.5"><prod id="NT-DefaultDecl"><lhs>DefaultDecl</lhs><rhs>&apos;#REQUIRED&apos; 
-|&nbsp;&apos;#IMPLIED&apos; </rhs><rhs>| ((&apos;#FIXED&apos; S)? <nt def="NT-AttValue">AttValue</nt>)</rhs><vc def="RequiredAttr"/><vc def="defattrvalid"/><wfc def="CleanAttrVals"/><vc def="FixedAttr"/></prod></prodgroup></scrap>
+<scrap lang="ebnf"><head>Attribute Defaults</head><prodgroup pcw4="14" pcw5="11.5"><prod id="NT-DefaultDecl"><lhs>DefaultDecl</lhs><rhs>'#REQUIRED' 
+|&nbsp;'#IMPLIED' </rhs><rhs>| (('#FIXED' S)? <nt def="NT-AttValue">AttValue</nt>)</rhs><vc def="RequiredAttr"/><vc def="defattrvalid"/><wfc def="CleanAttrVals"/><vc def="FixedAttr"/></prod></prodgroup></scrap>

 </p>
 <p>In an attribute declaration, <kw>#REQUIRED</kw> means that the
@ -1817,16 +1812,16 @@ included in, or excluded from, the logical structure of the DTD based on
 the keyword which governs them.</termdef>
 <scrap lang="ebnf"><head>Conditional Section</head><prodgroup pcw2="9" pcw4="14.5"><prod id="NT-conditionalSect"><lhs>conditionalSect</lhs><rhs><nt def="NT-includeSect">includeSect</nt>
 | <nt def="NT-ignoreSect">ignoreSect</nt>
-</rhs></prod><prod id="NT-includeSect"><lhs>includeSect</lhs><rhs>&apos;&lt;![&apos; S? &apos;INCLUDE&apos; S? &apos;[&apos; 
+</rhs></prod><prod id="NT-includeSect"><lhs>includeSect</lhs><rhs>'&lt;![' S? 'INCLUDE' S? '[' 

 <nt def="NT-extSubsetDecl">extSubsetDecl</nt>
-&apos;]]&gt;&apos;
-</rhs></prod><prod id="NT-ignoreSect"><lhs>ignoreSect</lhs><rhs>&apos;&lt;![&apos; S? &apos;IGNORE&apos; S? &apos;[&apos; 
+']]&gt;'
+</rhs></prod><prod id="NT-ignoreSect"><lhs>ignoreSect</lhs><rhs>'&lt;![' S? 'IGNORE' S? '[' 
 <nt def="NT-ignoreSectContents">ignoreSectContents</nt>*
-&apos;]]&gt;&apos;</rhs></prod><prod id="NT-ignoreSectContents"><lhs>ignoreSectContents</lhs><rhs><nt def="NT-Ignore">Ignore</nt>
-(&apos;&lt;![&apos; <nt def="NT-ignoreSectContents">ignoreSectContents</nt> &apos;]]&gt;&apos; 
+']]&gt;'</rhs></prod><prod id="NT-ignoreSectContents"><lhs>ignoreSectContents</lhs><rhs><nt def="NT-Ignore">Ignore</nt>
+('&lt;![' <nt def="NT-ignoreSectContents">ignoreSectContents</nt> ']]&gt;' 
 <nt def="NT-Ignore">Ignore</nt>)*</rhs></prod><prod id="NT-Ignore"><lhs>Ignore</lhs><rhs><nt def="NT-Char">Char</nt>* - 
-(<nt def="NT-Char">Char</nt>* (&apos;&lt;![&apos; | &apos;]]&gt;&apos;) 
+(<nt def="NT-Char">Char</nt>* ('&lt;![' | ']]&gt;') 
 <nt def="NT-Char">Char</nt>*)
 </rhs></prod></prodgroup></scrap>
 </p>
@ -1854,8 +1849,8 @@ parameter-entity reference, the parameter entity must be replaced by its
 content before the processor decides whether to
 include or ignore the conditional section.</p>
 <p>An example:
-<eg>&lt;!ENTITY % draft &apos;INCLUDE&apos; &gt;
-&lt;!ENTITY % final &apos;IGNORE&apos; &gt;
+<eg>&lt;!ENTITY % draft 'INCLUDE' &gt;
+&lt;!ENTITY % final 'IGNORE' &gt;
 
 &lt;![%draft;[
 &lt;!ELEMENT book (comments*, title, body, supplements?)&gt;
@ -1903,7 +1898,7 @@ called the <termref def="dt-docent">document entity</termref>, which serves
 as the starting point for the <termref def="dt-xml-proc">XML
 processor</termref> and may contain the whole document.</p>
 <p>Entities may be either parsed or unparsed.
-<termdef id="dt-parsedent" term="Text Entity">A <term>parsed entity&apos;s</term>
+<termdef id="dt-parsedent" term="Text Entity">A <term>parsed entity's</term>
 contents are referred to as its 
 <termref def="dt-repltext">replacement text</termref>;
 this <termref def="dt-text">text</termref> is considered an
@ -1942,15 +1937,15 @@ a general entity with the same name are two distinct entities.
 A <term>character reference</term> refers to a specific character in the
 ISO/IEC 10646 character set, for example one not directly accessible from
 available input devices.
-<scrap lang="ebnf"><head>Character Reference</head><prod id="NT-CharRef"><lhs>CharRef</lhs><rhs>&apos;&amp;#&apos; [0-9]+ &apos;;&apos; </rhs><rhs>| &apos;&hcro;&apos; [0-9a-fA-F]+ &apos;;&apos;</rhs><wfc def="wf-Legalchar"/></prod></scrap>
+<scrap lang="ebnf"><head>Character Reference</head><prod id="NT-CharRef"><lhs>CharRef</lhs><rhs>'&amp;#' [0-9]+ ';' </rhs><rhs>| '&hcro;' [0-9a-fA-F]+ ';'</rhs><wfc def="wf-Legalchar"/></prod></scrap>
 <wfcnote id="wf-Legalchar"><head>Legal Character</head><p>Characters referred to using character references must
 match the production for
 <termref def="NT-Char">Char</termref>.</p></wfcnote>
 If the character reference begins with &quot;<code>&amp;#x</code>&quot;, the digits and
 letters up to the terminating <code>;</code> provide a hexadecimal
-representation of the character&apos;s code point in ISO/IEC 10646.
+representation of the character's code point in ISO/IEC 10646.
 If it begins just with &quot;<code>&amp;#</code>&quot;, the digits up to the terminating
-<code>;</code> provide a decimal representation of the character&apos;s 
+<code>;</code> provide a decimal representation of the character's 
 code point.
 </termdef>
 </p>
@ -1974,7 +1969,7 @@ semicolon
 </prod>
 <prod id="NT-EntityRef">
 <lhs>EntityRef</lhs>
-            <rhs>&apos;&amp;&apos; <nt def="NT-Name">Name</nt> &apos;;&apos;</rhs>
+<rhs>'&amp;' <nt def="NT-Name">Name</nt> ';'</rhs>
 <wfc def="wf-entdeclared"/>
 <vc def="vc-entdeclared"/>
 <wfc def="textent"/>
@ -1982,7 +1977,7 @@ semicolon
 </prod>
 <prod id="NT-PEReference">
 <lhs>PEReference</lhs>
-            <rhs>&apos;%&apos; <nt def="NT-Name">Name</nt> &apos;;&apos;</rhs>
+<rhs>'%' <nt def="NT-Name">Name</nt> ';'</rhs>
 <vc def="vc-entdeclared"/>
 <wfc def="norecursion"/>
 <wfc def="indtd"/>
@ -1992,7 +1987,7 @@ semicolon
 <head>Entity Declared</head>
 <p>In a document without any DTD, a document with only an internal
 DTD subset which contains no parameter entity references, or a document with
-&quot;<code>standalone=&apos;yes&apos;</code>&quot;, 
+&quot;<code>standalone='yes'</code>&quot;, 
 the <nt def="NT-Name">Name</nt> given in the entity reference must 
 <termref def="dt-match">match</termref> that in an 
 <titleref href="sec-entity-decl">entity declaration</titleref>, except that
@ -2007,12 +2002,12 @@ external parameter entities, a non-validating processor is
 <titleref href="include-if-valid">not obligated to</titleref> read
 and process their declarations; for such documents, the rule that
 an entity must be declared is a well-formedness constraint only
-if <titleref href="sec-rmd">standalone=&apos;yes&apos;</titleref>.</p>
+if <titleref href="sec-rmd">standalone='yes'</titleref>.</p>
 </wfcnote>
 <vcnote id="vc-entdeclared">
 <head>Entity Declared</head>
 <p>In a document with an external subset or external parameter
-entities with &quot;<code>standalone=&apos;no&apos;</code>&quot;,
+entities with &quot;<code>standalone='no'</code>&quot;,
 the <nt def="NT-Name">Name</nt> given in the entity reference must <termref def="dt-match">match</termref> that in an 
 <titleref href="sec-entity-decl">entity declaration</titleref>.
 For interoperability, valid documents should declare the entities 
@ -2062,11 +2057,11 @@ is classified &amp;security-level;.</eg></p>
 <p><termdef id="dt-entdecl" term="entity declaration">
 Entities are declared thus:
 <scrap lang="ebnf"><head>Entity Declaration</head><prodgroup pcw2="5" pcw4="18.5"><prod id="NT-EntityDecl"><lhs>EntityDecl</lhs><rhs><nt def="NT-GEDecl">GEDecl</nt><!--</rhs><com>General entities</com>
-<rhs>--> | <nt def="NT-PEDecl">PEDecl</nt></rhs><!--<com>Parameter entities</com>--></prod><prod id="NT-GEDecl"><lhs>GEDecl</lhs><rhs>&apos;&lt;!ENTITY&apos; <nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt> 
+<rhs>--> | <nt def="NT-PEDecl">PEDecl</nt></rhs><!--<com>Parameter entities</com>--></prod><prod id="NT-GEDecl"><lhs>GEDecl</lhs><rhs>'&lt;!ENTITY' <nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt> 
 <nt def="NT-S">S</nt> <nt def="NT-EntityDef">EntityDef</nt> 
-<nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs></prod><prod id="NT-PEDecl"><lhs>PEDecl</lhs><rhs>&apos;&lt;!ENTITY&apos; <nt def="NT-S">S</nt> &apos;%&apos; <nt def="NT-S">S</nt> 
+<nt def="NT-S">S</nt>? '&gt;'</rhs></prod><prod id="NT-PEDecl"><lhs>PEDecl</lhs><rhs>'&lt;!ENTITY' <nt def="NT-S">S</nt> '%' <nt def="NT-S">S</nt> 
 <nt def="NT-Name">Name</nt> <nt def="NT-S">S</nt> 
-<nt def="NT-PEDef">PEDef</nt> <nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs><!--<com>Parameter entities</com>--></prod><prod id="NT-EntityDef"><lhs>EntityDef</lhs><rhs><nt def="NT-EntityValue">EntityValue</nt>
+<nt def="NT-PEDef">PEDef</nt> <nt def="NT-S">S</nt>? '&gt;'</rhs><!--<com>Parameter entities</com>--></prod><prod id="NT-EntityDef"><lhs>EntityDef</lhs><rhs><nt def="NT-EntityValue">EntityValue</nt>
 <!--</rhs>
 <rhs>-->| (<nt def="NT-ExternalID">ExternalID</nt> 
 <nt def="NT-NDataDecl">NDataDecl</nt>?)</rhs><!-- <nt def='NT-ExternalDef'>ExternalDef</nt></rhs> --></prod><!-- FINAL EDIT: what happened to WFs here? --><prod id="NT-PEDef"><lhs>PEDef</lhs><rhs><nt def="NT-EntityValue">EntityValue</nt> 
@ -2107,12 +2102,12 @@ internal, it is an <term>external
 entity</term>, declared as follows:
 <scrap lang="ebnf"><head>External Entity Declaration</head><!--
 <prod id='NT-ExternalDef'><lhs>ExternalDef</lhs>
-<rhs></prod> --><prod id="NT-ExternalID"><lhs>ExternalID</lhs><rhs>&apos;SYSTEM&apos; <nt def="NT-S">S</nt> 
-<nt def="NT-SystemLiteral">SystemLiteral</nt></rhs><rhs>| &apos;PUBLIC&apos; <nt def="NT-S">S</nt> 
+<rhs></prod> --><prod id="NT-ExternalID"><lhs>ExternalID</lhs><rhs>'SYSTEM' <nt def="NT-S">S</nt> 
+<nt def="NT-SystemLiteral">SystemLiteral</nt></rhs><rhs>| 'PUBLIC' <nt def="NT-S">S</nt> 
 <nt def="NT-PubidLiteral">PubidLiteral</nt> 
 <nt def="NT-S">S</nt> 
 <nt def="NT-SystemLiteral">SystemLiteral</nt>
-</rhs></prod><prod id="NT-NDataDecl"><lhs>NDataDecl</lhs><rhs><nt def="NT-S">S</nt> &apos;NDATA&apos; <nt def="NT-S">S</nt> 
+</rhs></prod><prod id="NT-NDataDecl"><lhs>NDataDecl</lhs><rhs><nt def="NT-S">S</nt> 'NDATA' <nt def="NT-S">S</nt> 
 <nt def="NT-Name">Name</nt></rhs><vc def="not-declared"/></prod></scrap>
 If the <nt def="NT-NDataDecl">NDataDecl</nt> is present, this is a
 general <termref def="dt-unparsed">unparsed
@ -2127,7 +2122,7 @@ The <nt def="NT-Name">Name</nt> must match the declared name of a
 </vcnote>
 <p><termdef id="dt-sysid" term="System Identifier">The
 <nt def="NT-SystemLiteral">SystemLiteral</nt> 
-is called the entity&apos;s <term>system identifier</term>. It is a URI,
+is called the entity's <term>system identifier</term>. It is a URI,
 which may be used to retrieve the entity.</termdef>
 Note that the hash mark (<code>#</code>) and fragment identifier 
 frequently used with URIs are not, formally, part of the URI itself; 
@ -2151,7 +2146,7 @@ byte value).</p>
 <p><termdef id="dt-pubid" term="Public identifier">
 In addition to a system identifier, an external identifier may
 include a <term>public identifier</term>.</termdef>  
-An XML processor attempting to retrieve the entity&apos;s content may use the public
+An XML processor attempting to retrieve the entity's content may use the public
 identifier to try to generate an alternative URI.  If the processor
 is unable to do so, it must use the URI specified in the system
 literal.  Before a match is attempted, all strings
@ -2237,10 +2232,10 @@ Parsed entities which are stored in an encoding other than
 UTF-8 or UTF-16 must begin with a <titleref href="TextDecl">text
 declaration</titleref> containing an encoding declaration:
 <scrap lang="ebnf"><head>Encoding Declaration</head><prod id="NT-EncodingDecl"><lhs>EncodingDecl</lhs><rhs><nt def="NT-S">S</nt>
-&apos;encoding&apos; <nt def="NT-Eq">Eq</nt> 
-(&apos;&quot;&apos; <nt def="NT-EncName">EncName</nt> &apos;&quot;&apos; | 
-&quot;&apos;&quot; <nt def="NT-EncName">EncName</nt> &quot;&apos;&quot; )
-</rhs></prod><prod id="NT-EncName"><lhs>EncName</lhs><rhs>[A-Za-z] ([A-Za-z0-9._] | &apos;-&apos;)*</rhs><com>Encoding name contains only Latin characters</com></prod></scrap>
+'encoding' <nt def="NT-Eq">Eq</nt> 
+('&quot;' <nt def="NT-EncName">EncName</nt> '&quot;' | 
+&quot;'&quot; <nt def="NT-EncName">EncName</nt> &quot;'&quot; )
+</rhs></prod><prod id="NT-EncName"><lhs>EncName</lhs><rhs>[A-Za-z] ([A-Za-z0-9._] | '-')*</rhs><com>Encoding name contains only Latin characters</com></prod></scrap>
 In the <termref def="dt-docent">document entity</termref>, the encoding
 declaration is part of the <termref def="dt-xmldecl">XML declaration</termref>.
 The <nt def="NT-EncName">EncName</nt> is the name of the encoding used.
@ -2286,8 +2281,8 @@ an encoding declaration.</p>
 <p>It is a <termref def="dt-fatal">fatal error</termref> when an XML processor
 encounters an entity with an encoding that it is unable to process.</p>
 <p>Examples of encoding declarations:
-<eg>&lt;?xml encoding=&apos;UTF-8&apos;?&gt;
-&lt;?xml encoding=&apos;EUC-JP&apos;?&gt;</eg></p>
+<eg>&lt;?xml encoding='UTF-8'?&gt;
+&lt;?xml encoding='EUC-JP'?&gt;</eg></p>
 </div3>
 </div2>
 <div2 id="entproc">
@ -2309,9 +2304,9 @@ the value of an
 attribute which has been declared as type <kw>ENTITY</kw>, or as one of
 the space-separated tokens in the value of an attribute which has been
 declared as type <kw>ENTITIES</kw>.</p></def></gitem><gitem><label>Reference in Entity Value</label><def><p>as a reference
-within a parameter or internal entity&apos;s 
+within a parameter or internal entity's 
 <termref def="dt-litentval">literal entity value</termref> in
-the entity&apos;s declaration; corresponds to the nonterminal 
+the entity's declaration; corresponds to the nonterminal 
 <nt def="NT-EntityValue">EntityValue</nt>.</p></def></gitem><gitem><label>Reference in DTD</label><def><p>as a reference within either the internal or external subsets of the 
 <termref def="dt-doctype">DTD</termref>, but outside
 of an <nt def="NT-EntityValue">EntityValue</nt> or
@ -2470,7 +2465,7 @@ replacement text.
 If the entity is external, and the processor is not
 attempting to validate the XML document, the
 processor <termref def="dt-may">may</termref>, but need not, 
-include the entity&apos;s replacement text.
+include the entity's replacement text.
 If a non-validating parser does not include the replacement text,
 it must inform the application that it recognized, but did not
 read, the entity.</p>
@ -2479,7 +2474,7 @@ provided by the SGML and XML entity mechanism, primarily designed
 to support modularity in authoring, is not necessarily 
 appropriate for other applications, in particular document browsing.
 Browsers, for example, when encountering an external parsed entity reference,
-might choose to provide a visual indication of the entity&apos;s
+might choose to provide a visual indication of the entity's
 presence and retrieve it for display only on demand.
 </p>
 </div3>
@ -2508,8 +2503,8 @@ For example, this is well-formed:
 <eg><![CDATA[<!ENTITY % YN '"Yes"' >
 <!ENTITY WhatHeSaid "He said &YN;" >]]></eg>
 while this is not:
-<eg>&lt;!ENTITY EndAttr &quot;27&apos;&quot; &gt;
-&lt;element attribute=&apos;a-&amp;EndAttr;&gt;</eg>
+<eg>&lt;!ENTITY EndAttr &quot;27'&quot; &gt;
+&lt;element attribute='a-&amp;EndAttr;&gt;</eg>
 </p>
 </div3>
 <div3 id="notify">
@ -2548,7 +2543,7 @@ entities to contain an integral number of grammatical tokens in the DTD.
 <head>Construction of Internal Entity Replacement Text</head>
 <p>In discussing the treatment
 of internal entities, it is  
-useful to distinguish two forms of the entity&apos;s value.
+useful to distinguish two forms of the entity's value.
 <termdef id="dt-litentval" term="Literal Entity Value">The <term>literal
 entity value</term> is the quoted string actually
 present in the entity declaration, corresponding to the
@ -2579,9 +2574,9 @@ For example, given the following declarations:
 &#xA9; 1947 %pub;. &rights;" >]]></eg>
 then the replacement text for the entity &quot;<code>book</code>&quot; is:
 <eg>La Peste: Albert Camus, 
-&#169; 1947 &#201;ditions Gallimard. &amp;rights;</eg>
+© 1947 Éditions Gallimard. &amp;rights;</eg>
 The general-entity reference &quot;<code>&amp;rights;</code>&quot; would be expanded
-should the reference &quot;<code>&amp;book;</code>&quot; appear in the document&apos;s
+should the reference &quot;<code>&amp;book;</code>&quot; appear in the document's
 content or an attribute value.</p>
 <p>These simple rules may have complex interactions; for a detailed
 discussion of a difficult example, see
@ -2642,11 +2637,11 @@ entity and attribute-list declarations and in attribute specifications,
 and an external identifier for the notation which may allow an XML
 processor or its client application to locate a helper application
 capable of processing data in the given notation.
-<scrap lang="ebnf"><head>Notation Declarations</head><prod id="NT-NotationDecl"><lhs>NotationDecl</lhs><rhs>&apos;&lt;!NOTATION&apos; <nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt> 
+<scrap lang="ebnf"><head>Notation Declarations</head><prod id="NT-NotationDecl"><lhs>NotationDecl</lhs><rhs>'&lt;!NOTATION' <nt def="NT-S">S</nt> <nt def="NT-Name">Name</nt> 
 <nt def="NT-S">S</nt> 
 (<nt def="NT-ExternalID">ExternalID</nt> | 
 <nt def="NT-PublicID">PublicID</nt>)
-<nt def="NT-S">S</nt>? &apos;&gt;&apos;</rhs></prod><prod id="NT-PublicID"><lhs>PublicID</lhs><rhs>&apos;PUBLIC&apos; <nt def="NT-S">S</nt> 
+<nt def="NT-S">S</nt>? '&gt;'</rhs></prod><prod id="NT-PublicID"><lhs>PublicID</lhs><rhs>'PUBLIC' <nt def="NT-S">S</nt> 
 <nt def="NT-PubidLiteral">PubidLiteral</nt> 
 </rhs></prod></scrap>
 </termdef>
@ -2683,7 +2678,7 @@ without any identification at all.</p>
 <p>Conforming <termref def="dt-xml-proc">XML processors</termref> fall into two
 classes: validating and non-validating.</p>
 <p>Validating and non-validating processors alike must report
-violations of this specification&apos;s well-formedness constraints
+violations of this specification's well-formedness constraints
 in the content of the
 <termref def="dt-docent">document entity</termref> and any 
 other <termref def="dt-parsedent">parsed entities</termref> that 
@ -2784,7 +2779,7 @@ with a value in the range(s) indicated (inclusive).</p></def></gitem><gitem><lab
 with a value <emph>outside</emph> the
 range indicated.</p></def></gitem><gitem><label><code>[^abc]</code>, <code>[^#xN#xN#xN]</code></label><def><p>matches any <termref def="dt-character">character</termref>
 with a value not among the characters given.</p></def></gitem><gitem><label><code>&quot;string&quot;</code></label><def><p>matches a literal string <termref def="dt-match">matching</termref>
-that given inside the double quotes.</p></def></gitem><gitem><label><code>&apos;string&apos;</code></label><def><p>matches a literal string <termref def="dt-match">matching</termref>
+that given inside the double quotes.</p></def></gitem><gitem><label><code>'string'</code></label><def><p>matches a literal string <termref def="dt-match">matching</termref>
 that given inside the single quotes.</p></def></gitem></glist>
 These symbols may be combined to match more complex patterns as follows,
 where <code>A</code> and <code>B</code> represent simple expressions:
@ -2861,17 +2856,17 @@ Berners-Lee, T., R. Fielding, and L. Masinter.
 Semantics</emph>.
 1997.
 (Work in progress; see updates to RFC1738.)</bibl>
-          <bibl id="ABK" key="Br&#252;ggemann-Klein">Br&#252;ggemann-Klein, Anne.
+<bibl id="ABK" key="Br<EFBFBD>ggemann-Klein">Br<EFBFBD>ggemann-Klein, Anne.
 <emph>Regular Expressions into Finite Automata</emph>.
 Extended abstract in I. Simon, Hrsg., LATIN 1992, 
 S. 97-98. Springer-Verlag, Berlin 1992. 
 Full Version in Theoretical Computer Science 120: 197-213, 1993.

 </bibl>
-          <bibl id="ABKDW" key="Br&#252;ggemann-Klein and Wood">Br&#252;ggemann-Klein, Anne,
+<bibl id="ABKDW" key="Br<EFBFBD>ggemann-Klein and Wood">Br<EFBFBD>ggemann-Klein, Anne,
 and Derick Wood.
 <emph>Deterministic Regular Languages</emph>.
-Universit&#228;t Freiburg, Institut f&#252;r Informatik,
+Universit<EFBFBD>t Freiburg, Institut f<EFBFBD>r Informatik,
 Bericht 38, Oktober 1991.
 </bibl>
 <bibl id="Clark" key="Clark">James Clark.
@ -3268,7 +3263,7 @@ rather than name characters, because the property file classifies
 them as Alphabetic:  [#x02BB-#x02C1], #x0559, #x06E5, #x06E6.</p></item><item><p>Characters #x20DD-#x20E0 are excluded (in accordance with 
 Unicode, section 5.14).</p></item><item><p>Character #x00B7 is classified as an extender, because the
 property list so identifies it.</p></item><item><p>Character #x0387 is added as a name character, because #x00B7
-is its canonical equivalent.</p></item><item><p>Characters &apos;:&apos; and &apos;_&apos; are allowed as name-start characters.</p></item><item><p>Characters &apos;-&apos; and &apos;.&apos; are allowed as name characters.</p></item></ulist>
+is its canonical equivalent.</p></item><item><p>Characters ':' and '_' are allowed as name-start characters.</p></item><item><p>Characters '-' and '.' are allowed as name characters.</p></item></ulist>
 </p>
 </div1>
 <inform-div1 id="sec-xml-and-sgml">
@ -3365,7 +3360,7 @@ In this case, the two references to
 <code>b</code> can be collapsed 
 into a single reference, making the model read
 <code>(b, (c | d))</code>.  An initial <code>b</code> now clearly
-matches only a single name in the content model.  The parser doesn&apos;t
+matches only a single name in the content model.  The parser doesn't
 need to look ahead to see what follows; either <code>c</code> or
 <code>d</code> would be accepted.</p>
 <p>More formally:  a finite state automaton may be constructed from the
@ -3384,7 +3379,7 @@ and may be reported as an error.
 </p>
 <p>Algorithms exist which allow many but not all non-deterministic
 content models to be reduced automatically to equivalent deterministic
-models; see Br&#252;ggemann-Klein 1991 <bibref ref="ABK"/>.</p>
+models; see Br<EFBFBD>ggemann-Klein 1991 <bibref ref="ABK"/>.</p>
 </inform-div1>
 <inform-div1 id="sec-guessing">
 <head>Autodetection of Character Encodings</head>
@ -3408,10 +3403,10 @@ processor without, or with, any accompanying
 <p>
 Because each XML entity not in UTF-8 or UTF-16 format <emph>must</emph>
 begin with an XML encoding declaration, in which the first  characters
-must be &apos;<code>&lt;?xml</code>&apos;, any conforming processor can detect,
+must be '<code>&lt;?xml</code>', any conforming processor can detect,
 after two to four octets of input, which of the following cases apply. 
-In reading this list, it may help to know that in UCS-4, &apos;&lt;&apos; is
-&quot;<code>#x0000003C</code>&quot; and &apos;?&apos; is &quot;<code>#x0000003F</code>&quot;, and the Byte
+In reading this list, it may help to know that in UCS-4, '&lt;' is
+&quot;<code>#x0000003C</code>&quot; and '?' is &quot;<code>#x0000003F</code>&quot;, and the Byte
 Order Mark required of UTF-16 data streams is &quot;<code>#xFEFF</code>&quot;.</p>
 <p>
 <ulist><item><p><code>00 00 00 3C</code>: UCS-4, big-endian machine (1234 order)</p></item><item><p><code>3C 00 00 00</code>: UCS-4, little-endian machine (4321 order)</p></item><item><p><code>00 00 3C 00</code>: UCS-4, unusual octet order (2143)</p></item><item><p><code>00 3C 00 00</code>: UCS-4, unusual octet order (3412)</p></item><item><p><code>FE FF</code>: UTF-16, big-endian</p></item><item><p><code>FF FE</code>: UTF-16, little-endian</p></item><item><p><code>00 3C 00 3F</code>: UTF-16, big-endian, no Byte Order Mark
@ -3456,7 +3451,7 @@ character of input.
 </p>
 <p>
 Like any self-labeling system, the XML encoding declaration will not
-work if any software changes the entity&apos;s character set or encoding
+work if any software changes the entity's character set or encoding
 without updating the encoding declaration.  Implementors of
 character-encoding routines should be careful to ensure the accuracy
 of the internal and external information used to label the entity.
@ -3556,7 +3551,7 @@ Co-editor</role>
 <name>Joel Nava, Adobe</name>
 </member>
 <member>
-          <name>Conleth O&apos;Connell, Vignette</name>
+<name>Conleth O'Connell, Vignette</name>
 </member>
 <member>
 <name>Peter Sharpe, SoftQuad</name>
--- a/result/valid/dia.xml
+++ b/result/valid/dia.xml
@ -3,39 +3,39 @@
 <!ELEMENT diagram (diagramdata , layer*)>
 <!ELEMENT diagramdata (attribute)*>
 <!ELEMENT layer (object | group)*>
-<!ELEMENT object (attribute* , connections?)>
-<!ELEMENT connections (connection)*>
-<!ELEMENT connection EMPTY>
-<!ELEMENT group (object | group)*>
-<!ELEMENT attribute (composite | int | enum | real | boolean | color | point | rectangle | string | font)*>
-<!ELEMENT composite (attribute)*>
-<!ELEMENT int EMPTY>
-<!ELEMENT enum EMPTY>
-<!ELEMENT real EMPTY>
-<!ELEMENT boolean EMPTY>
-<!ELEMENT color EMPTY>
-<!ELEMENT point EMPTY>
-<!ELEMENT rectangle EMPTY>
-<!ELEMENT string EMPTY>
-<!ELEMENT font EMPTY>
 <!ATTLIST layer name CDATA #REQUIRED>
 <!ATTLIST layer visible (true | false) #REQUIRED>
+<!ELEMENT object (attribute* , connections?)>
 <!ATTLIST object type CDATA #REQUIRED>
 <!ATTLIST object version NMTOKEN #REQUIRED>
 <!ATTLIST object id ID #REQUIRED>
+<!ELEMENT connections (connection)*>
+<!ELEMENT connection EMPTY>
 <!ATTLIST connection handle NMTOKEN #REQUIRED>
 <!ATTLIST connection to IDREF #REQUIRED>
 <!ATTLIST connection connection NMTOKEN #REQUIRED>
+<!ELEMENT group (object | group)*>
+<!ELEMENT attribute (composite | int | enum | real | boolean | color | point | rectangle | string | font)*>
 <!ATTLIST attribute name CDATA #REQUIRED>
+<!ELEMENT composite (attribute)*>
 <!ATTLIST composite type CDATA #IMPLIED>
+<!ELEMENT int EMPTY>
 <!ATTLIST int val NMTOKEN #REQUIRED>
+<!ELEMENT enum EMPTY>
 <!ATTLIST enum val NMTOKEN #REQUIRED>
+<!ELEMENT real EMPTY>
 <!ATTLIST real val CDATA #REQUIRED>
+<!ELEMENT boolean EMPTY>
 <!ATTLIST boolean val (true | false) #REQUIRED>
+<!ELEMENT color EMPTY>
 <!ATTLIST color val CDATA #REQUIRED>
+<!ELEMENT point EMPTY>
 <!ATTLIST point val CDATA #REQUIRED>
+<!ELEMENT rectangle EMPTY>
 <!ATTLIST rectangle val CDATA #REQUIRED>
+<!ELEMENT string EMPTY>
 <!ATTLIST string val CDATA #IMPLIED>
+<!ELEMENT font EMPTY>
 <!ATTLIST font name CDATA #REQUIRED>
 ]>
 <dia:diagram xmlns:dia="http://www.lysator.liu.se/~alla/dia/">
--- a/result/valid/xlink.xml
+++ b/result/valid/xlink.xml
@ -68,7 +68,7 @@ type="text/css"?>
 </status>
 <abstract>
 <!-- edited the abstract for further clarity - bent -->
-      <p>This specification defines constructs that may be inserted into XML DTDs, schemas and document instances to describe links between objects. It uses XML syntax to create structures that can describe the simple unidirectional hyperlinks of today&apos;s HTML as well as more sophisticated links.</p>
+<p>This specification defines constructs that may be inserted into XML DTDs, schemas and document instances to describe links between objects. It uses XML syntax to create structures that can describe the simple unidirectional hyperlinks of today's HTML as well as more sophisticated links.</p>
 </abstract>
 <pubstmt>
 <p>Burlington, Seekonk, et al.: World-Wide Web Consortium, XML Working Group, 1998.</p>
@ -99,7 +99,7 @@ type="text/css"?>
 <sitem>1999-05-12: Prose/organization work. Re-organized some of the sections, removed XML constructs from the document, added descriptive prose, edited document text for clarity. Rewrote the link recognition section. bent</sitem>
 <sitem>1999-05-17: Further prose work. Added non-normative examples. Clarified arcs. bent</sitem>
 <sitem>1999-05-23: Edited for grammar and clarity. bent</sitem>
-        <sitem>1999-05-27: Final once-over before sending to group. Fixed sjd&apos;s email address. bent</sitem>
+<sitem>1999-05-27: Final once-over before sending to group. Fixed sjd's email address. bent</sitem>
 </slist>
 </revisiondesc>
 </header>
@ -109,7 +109,7 @@ type="text/css"?>
 <head>Introduction</head>
 <p>This specification defines constructs that may be inserted into XML DTDs, schemas, and document instances to describe links between objects. A <termref def="dt-link">link</termref>, as the term is used here, is an explicit relationship between two or more data objects or portions of data objects. This specification is concerned with the syntax used to assert link existence and describe link characteristics. Implicit (unasserted) relationships, for example that of one word to the next or that of a word in a text to its entry in an on-line dictionary are obviously important, but outside its scope.</p>
 <p>Links are asserted by <xtermref href="WD-xml-lang.html#dt-element">elements </xtermref> contained in <xtermref href="WD-xml-lang.html#dt-xml-doc">XML document instances</xtermref>. The simplest case is very like an HTML <code>A</code> link, and has these characteristics:
-			<ulist><item><p>The link is expressed at one of its ends (similar to the <code>A</code> element in some document)</p></item><item><p>Users can only initiate travel from that end to the other</p></item><item><p>The link&apos;s effect on windows, frames, go-back lists, stylesheets in use, and so on is mainly determined by browsers, not by the link itself. For example, traveral of <code>A</code> links normally replaces the current view, perhaps with a user option to open a new window.</p></item><item><p>The link goes to only one destination (although a server may have great freedom in finding or dynamically creating that destination).</p></item></ulist>
+			<ulist><item><p>The link is expressed at one of its ends (similar to the <code>A</code> element in some document)</p></item><item><p>Users can only initiate travel from that end to the other</p></item><item><p>The link's effect on windows, frames, go-back lists, stylesheets in use, and so on is mainly determined by browsers, not by the link itself. For example, traveral of <code>A</code> links normally replaces the current view, perhaps with a user option to open a new window.</p></item><item><p>The link goes to only one destination (although a server may have great freedom in finding or dynamically creating that destination).</p></item></ulist>
 		</p>
 <p>While this set of characteristics is already very powerful and obviously has proven itself highly useful and effective, each of these assumptions also limits the range of hypertext functionality. The linking model defined here provides ways to create links that go beyond each of these specific characteristics, thus providing features previously available mostly in dedicated hypermedia systems.
 		</p>
@ -137,7 +137,7 @@ document. bent-->
 		<glist><gitem><label><termdef id="dt-arc" term="Arc">arc</termdef></label><def><p>A symbolic representation of traversal behavior in links, especially the direction, context and timing of traversal.</p></def></gitem><gitem><label><termdef id="dt-eltree" term="Element Tree">element tree</termdef></label><def><p>A representation of the relevant structure specified by the tags and attributes in an XML document, based on &quot;groves&quot; as defined in the ISO DSSSL standard. </p></def></gitem><gitem><label><termdef id="dt-inline" term="In-Line Link">inline link</termdef></label><def><p>Abstractly, a <termref def="dt-link">link</termref> which serves as one of its own <termref def="dt-resource">resources</termref>. Concretely, a link where the content of the <termref def="dt-linkel">linking	element</termref> serves as a <termref def="dt-particip-resource">participating resource</termref>.
 				HTML <code>A</code>, HyTime <code>clink</code>, and TEI	<code>XREF</code>
 				are all inline links.</p></def></gitem><gitem><label><termdef id="dt-link" term="Link">link</termdef></label><def><p>An explicit relationship between two or more data objects or portions of data objects.</p></def></gitem><gitem><label><termdef id="dt-linkel" term="Linking Element">linking element </termdef></label><def><p>An <xtermref href="WD-xml-lang.html#dt-element">element</xtermref> that asserts the existence and describes the characteristics of a <termref def="dt-link"> link</termref>.</p></def></gitem><gitem><label><termdef id="dt-local-resource" term="Local Resource">local resource</termdef></label><def><p>The content of an <termref def="dt-inline">inline</termref>linking element. Note that the content of the linking element could be explicitly pointed to by means of a regular <termref def="dt-locator">locator</termref> in the same linking element, in which case the resource is considered <termref def="dt-remote-resource"> remote</termref>, not local.</p></def></gitem><gitem><label><termdef id="dt-locator" term="Locator">locator</termdef> </label><def><p>Data, provided as part of a link, which identifies a
-				<termref def="dt-resource">resource</termref>.</p></def></gitem><gitem><label><termdef id="dt-multidir" term="Multi-Directional Link">multidirectional link</termdef></label><def><p>A <termref def="dt-link">link</termref> whose <termref def="dt-traversal"> traversal</termref> can be initiated from more than one of its <termref def="dt-particip-resource"> participating resources</termref>. Note that being able to &quot;go back&quot; after following a one-directional link does not make the link multidirectional.</p></def></gitem><gitem><label><termdef id="dt-outofline" term="Out-of-line Link">out-of-line link</termdef></label><def><p>A <termref def="dt-link">link</termref> whose content does not serve as one of the link&apos;s <termref def="dt-particip-resource">participating resources </termref>. Such links 						presuppose a notion like <termref def="dt-xlg">extended link groups</termref>, which instruct application software where to look for links. Out-of-line links are generally required for supporting multidirectional <termref def="dt-traversal">traversal</termref> and for allowing read-only resources to have outgoing links.</p></def></gitem><gitem><label><termdef id="dt-parsedq" term="Parsed">parsed</termdef></label><def><p>In the context of link behavior, a parsed link is any link			whose content is transcluded into the document where the link originated. The	use of the term &quot;parsed&quot; directly refers to the concept in XML of a
+				<termref def="dt-resource">resource</termref>.</p></def></gitem><gitem><label><termdef id="dt-multidir" term="Multi-Directional Link">multidirectional link</termdef></label><def><p>A <termref def="dt-link">link</termref> whose <termref def="dt-traversal"> traversal</termref> can be initiated from more than one of its <termref def="dt-particip-resource"> participating resources</termref>. Note that being able to &quot;go back&quot; after following a one-directional link does not make the link multidirectional.</p></def></gitem><gitem><label><termdef id="dt-outofline" term="Out-of-line Link">out-of-line link</termdef></label><def><p>A <termref def="dt-link">link</termref> whose content does not serve as one of the link's <termref def="dt-particip-resource">participating resources </termref>. Such links 						presuppose a notion like <termref def="dt-xlg">extended link groups</termref>, which instruct application software where to look for links. Out-of-line links are generally required for supporting multidirectional <termref def="dt-traversal">traversal</termref> and for allowing read-only resources to have outgoing links.</p></def></gitem><gitem><label><termdef id="dt-parsedq" term="Parsed">parsed</termdef></label><def><p>In the context of link behavior, a parsed link is any link			whose content is transcluded into the document where the link originated. The	use of the term &quot;parsed&quot; directly refers to the concept in XML of a
 				parsed entity.</p></def></gitem><gitem><label><termdef id="dt-particip-resource" term="Participating Resource"> participating resource</termdef></label><def><p>A <termref def="dt-resource">resource</termref> that belongs to a link. All resources are potential contributors to a link; participating	resources are the actual contributors to a particular link.</p></def></gitem><gitem><label><termdef id="dt-remote-resource" term="Remote Resource">remote resource</termdef></label><def><p>Any participating resource of a link that is pointed to with a locator. </p></def></gitem><gitem><label><termdef id="dt-resource" term="Resource">resource</termdef></label><def><p>In the abstract sense, an addressable unit of information or service that is participating in a <termref def="dt-link">link</termref>. Examples include files, images, documents, programs, and query results. Concretely, anything reachable by the use of a <termref def="dt-locator">locator</termref> in some <termref def="dt-linkel">linking	element</termref>. Note that this term and its definition are taken from the basic specifications governing the World Wide Web. <!--Joel notes: need link here. bent asks: A link?-->
 				 </p></def></gitem><gitem><label><termdef id="dt-subresource" term="sub-Resource">sub-resource</termdef></label><def><p>A portion of a resource, pointed to as the precise	destination of a link. As one example, a link might specify that an entire	document be retrieved and displayed, but that some specific part(s) of it is the specific linked data, to be treated in an application-appropriate manner such as indication by highlighting, scrolling, etc.</p></def></gitem><gitem><label><termdef id="dt-traversal" term="Traversal">traversal</termdef></label><def><p>The action of using a <termref def="dt-link">link</termref>; that is, of accessing a <termref def="dt-resource">resource</termref>. Traversal may be initiated by a user action (for example, clicking on the displayed content of a <termref def="dt-linkel">linking element</termref>) or occur under program control.</p></def></gitem></glist>
 	</p>
@ -156,8 +156,8 @@ document. bent-->
 <p>A locator generally contains a URI, as described in IETF RFCs <bibref ref="rfc1738"/> and <bibref ref="rfc1808"/>. As these RFCs state, the URI may include a trailing <emph>query</emph> (marked by a leading &quot;<code>?</code>&quot;), and be followed by a &quot;<code>#</code>&quot; and a <emph>fragment identifier</emph>, with the query interpreted by the host providing the indicated resource, and the interpretation of the fragment identifier dependent on the data type of the indicated resource.</p>
 <!--Is there some restriction on URNs having queries and/or fragment identifiers?  Since these RFCs don't mention URIs explicitly, should the wording here lead from URLs to URIs more explicitly? -elm-->
 <p>In order to locate XML documents and portions of documents, a locator value may contain either a <xtermref href="http://www.w3.org/Addressing/rfc1738.txt"> URI</xtermref> or a fragment identifier, or both. Any fragment identifier for pointing into XML must be an <xtermref href="http://www.w3.org/TR/WD-xptr#dt-xpointer"> XPointer</xtermref>.</p>
-      <p>Special syntax may be used to request the use of particular processing models in accessing the locator&apos;s resource. This is designed to reflect the realities of network operation, where it may or may not be desirable to exercise fine control over the distribution of work between local and remote processors. 
-		<scrap id="locator" lang="ebnf"><head>Locator</head><prod id="nt-locator"><lhs>Locator</lhs><rhs><nt def="nt-uri">URI</nt></rhs><rhs>| <nt def="nt-connector">Connector</nt> (<xnt href="http://www.w3.org/TR/WD-xptr">XPointer</xnt> | <xnt href="WD-xml-lang.html#NT-Name">Name</xnt>)</rhs><rhs>| <nt def="nt-uri">URI</nt> <nt def="nt-connector">Connector</nt> (<xnt href="http://www.w3.org/TR/WD-xptr">XPointer</xnt> | <xnt href="WD-xml-lang.html#NT-Name">Name</xnt>)</rhs></prod><prod id="nt-connector"><lhs>Connector</lhs><rhs>&apos;#&apos; | &apos;|&apos;</rhs></prod><prod id="nt-uri"><lhs>URI</lhs><rhs><xnt href="WD-xml-lang.html#NT-URLchar">URIchar*</xnt></rhs></prod></scrap>
+<p>Special syntax may be used to request the use of particular processing models in accessing the locator's resource. This is designed to reflect the realities of network operation, where it may or may not be desirable to exercise fine control over the distribution of work between local and remote processors. 
+		<scrap id="locator" lang="ebnf"><head>Locator</head><prod id="nt-locator"><lhs>Locator</lhs><rhs><nt def="nt-uri">URI</nt></rhs><rhs>| <nt def="nt-connector">Connector</nt> (<xnt href="http://www.w3.org/TR/WD-xptr">XPointer</xnt> | <xnt href="WD-xml-lang.html#NT-Name">Name</xnt>)</rhs><rhs>| <nt def="nt-uri">URI</nt> <nt def="nt-connector">Connector</nt> (<xnt href="http://www.w3.org/TR/WD-xptr">XPointer</xnt> | <xnt href="WD-xml-lang.html#NT-Name">Name</xnt>)</rhs></prod><prod id="nt-connector"><lhs>Connector</lhs><rhs>'#' | '|'</rhs></prod><prod id="nt-uri"><lhs>URI</lhs><rhs><xnt href="WD-xml-lang.html#NT-URLchar">URIchar*</xnt></rhs></prod></scrap>
 	</p>
 <p><termdef id="dt-designated" term="Designated Resource">In this discussion, the term <term>designated resource</term> refers to the resource which an entire locator serves to locate.</termdef> The following rules apply:
 		<ulist><item><p><termdef id="dt-containing-resource" term="Containing Resource">		  The URI, if provided, locates a resource called the <term>containing resource</term>.</termdef></p></item><item><p>If the URI is not provided, the containing resource is considered to be the document in which the linking element is contained. 
@ -169,7 +169,7 @@ document. bent-->
 	</p>
 <p>Note that the definition of a URI includes an optional query component. </p>
 <p>In the case where the URI contains a query (to be interpreted by the server), information providers and authors of server software are urged to use queries as follows: 
-		<scrap id="querysyntax" lang="ebnf"><head>Query</head><prod id="nt-query"><lhs>Query</lhs><rhs>&apos;XML-XPTR=&apos; (<xnt href="http://www.w3.org/TR/WD-xptr"> XPointer</xnt> | <xnt href="http://www.w3.org/TR/REC-xml#NT-Name">Name</xnt>)</rhs></prod></scrap>
+		<scrap id="querysyntax" lang="ebnf"><head>Query</head><prod id="nt-query"><lhs>Query</lhs><rhs>'XML-XPTR=' (<xnt href="http://www.w3.org/TR/WD-xptr"> XPointer</xnt> | <xnt href="http://www.w3.org/TR/REC-xml#NT-Name">Name</xnt>)</rhs></prod></scrap>
 	</p>
 <!-- fixed link to XML recommendation - bent -->
 </div1>
@ -177,7 +177,7 @@ document. bent-->
 <?Pub Dtl?>
 <head>Link Recognition</head>
 <p>The existence of a <termref def="dt-link">link</termref> is asserted by a <termref def="dt-linkel">linking element</termref>. Linking elements must be recognized reliably by application software in order to provide appropriate display and behavior. There are several ways link recognition could be accomplished: for example, reserving element type names, reserving attributes names, leaving the matter of recognition entirely up to stylesheets and application software, or using the XLink <xtermref href="http://www.w3.org/TR/REC-xml-names/">namespace</xtermref> to specify element names and attribute names that would be recognized by namespace and XLink-aware processors. Using element and attribute names within the XLink namespace provides a balance between giving users control of their own markup language design and keeping the identification of linking elements simple and unambiguous.</p>
-      <p>The two approaches to identifying linking elements are relatively simple to implement. For example, here&apos;s how the HTML <code>A</code> element would be declared using attributes within the XLink namespace, and then how an element within the XLink namespace might do the same:
+<p>The two approaches to identifying linking elements are relatively simple to implement. For example, here's how the HTML <code>A</code> element would be declared using attributes within the XLink namespace, and then how an element within the XLink namespace might do the same:
 		<eg>&lt;A xlink:type=&quot;simple&quot; xlink:href=&quot;http://www.w3.org/TR/wd-xlink/&quot;
 xlink:title=&quot;The Xlink Working Draft&quot;&gt;The XLink Working Draft.&lt;/A&gt;</eg>
 		<eg>&lt;xlink:simple href=&quot;http://www.w3.org/TR/wd-xlink/&quot;
@ -208,8 +208,8 @@ title=&quot;The XLink Working Draft&quot;&gt;The XLink Working Draft&lt;/xlink:s
 </div2>
 <div2 id="link-semantics">
 <head>Semantic Attributes</head>
-        <p>There are two attributes associated with semantics, <code>role</code> and <code>title</code>. The <code>role</code> attribute is a generic string used to describe the function of the link&apos;s content. For example, a poem might have a link with a <code>role=&quot;stanza&quot;</code>. The <code>role</code> is also used as an identifier for the <code>from</code> and <code>to</code> attributes of arcs.</p>
-        <p>The <code>title</code> attribute is designed to provide human-readable text describing the link. It is very useful for those who have text-based applications, whether that be due to a constricted device that cannot display the link&apos;s content, or if it&apos;s being read by an application to a visually-impaired user, or if it&apos;s being used to create a table of links. The <code>title</code> attribute contains a simple, descriptive string.</p>
+<p>There are two attributes associated with semantics, <code>role</code> and <code>title</code>. The <code>role</code> attribute is a generic string used to describe the function of the link's content. For example, a poem might have a link with a <code>role=&quot;stanza&quot;</code>. The <code>role</code> is also used as an identifier for the <code>from</code> and <code>to</code> attributes of arcs.</p>
+<p>The <code>title</code> attribute is designed to provide human-readable text describing the link. It is very useful for those who have text-based applications, whether that be due to a constricted device that cannot display the link's content, or if it's being read by an application to a visually-impaired user, or if it's being used to create a table of links. The <code>title</code> attribute contains a simple, descriptive string.</p>
 </div2>
 </div1>
 <div1 id="linking-elements">
@ -257,7 +257,7 @@ The XLink Working Draft.&lt;/foo&gt;</eg>
 <p>Note that it is meaningful to have an out-of-line simple link, although
 	such links are uncommon. They are called &quot;one-ended&quot; and are typically used
 	to associate discrete semantic properties with locations. The properties might
-	be expressed by attributes on the link, the link&apos;s element type name, or in
+	be expressed by attributes on the link, the link's element type name, or in
 	some other way, and are not considered full-fledged resources of the link.
 	Most out-of-line links are extended links, as these have a far wider range
 	of uses.</p>
@ -270,7 +270,7 @@ The XLink Working Draft.&lt;/foo&gt;</eg>
 <p>These additional capabilities of extended links are required for:  
 		<ulist><item><p>Enabling outgoing links in documents that cannot be modified to add an inline link</p></item><item><p>Creating links to and from resources in formats with no native support for embedded links (such as most multimedia formats)</p></item><item><p>Applying and filtering sets of relevant links on demand</p></item><item><p>Enabling other advanced hypermedia capabilities</p></item></ulist>
 	</p>
-        <p>Application software might be expected to provide traversal among all of a link&apos;s participating resources (subject to semantic constraints outside the scope of this specification) and to signal the fact that a given resource or sub-resource participates in one or more links when it is displayed (even though there is no markup at exactly that point to signal it).</p>
+<p>Application software might be expected to provide traversal among all of a link's participating resources (subject to semantic constraints outside the scope of this specification) and to signal the fact that a given resource or sub-resource participates in one or more links when it is displayed (even though there is no markup at exactly that point to signal it).</p>
 <p>A linking element for an extended link contains a series of <xtermref href="http://www.w3.org/TR/REC-xml/#dt-parentchild">child elements</xtermref> that serve as locators and arcs. Because an extended link can have more than one remote resource, it separates out linking itself from the mechanisms used to locate each resource (whereas a simple link combines the two).</p>
 <p>The <code>xlink:type</code> attribute value for an extended link must be <code> extended</code>, if the link is being instantiated on an arbitrary element. Note that extended links introduce variants of the <code>show</code> and <code>actuate</code> behavior attributes. These attributes, the <code>showdefault</code> and <code>actuatedefault</code> define the same behavior as their counterparts. However, in this case, they are considered to define the default behavior for all the linking elements that they contain.</p>
 <p>However, when a linking element within an extended link has a <code>show</code> or <code>actuate</code> attribute of its own, that attribute overrides the defaults set on the extended linking element.</p>
@ -293,13 +293,13 @@ The XLink Working Draft.&lt;/foo&gt;</eg>
 	xlink:showdefault    (new|parsed|replace)    	#IMPLIED 
    xlink:actuatedefault (user|auto) 				#IMPLIED &gt;</eg>

-	The following two examples demonstrate how each of the above might appear within a document instance. Note that the content of these examples would be other elements. For brevity&apos;s sake, they&apos;ve been left blank. The first example shows how the link might appear, using an explicit XLink extended link:
+	The following two examples demonstrate how each of the above might appear within a document instance. Note that the content of these examples would be other elements. For brevity's sake, they've been left blank. The first example shows how the link might appear, using an explicit XLink extended link:

-<eg>&lt;xlink:extended role=&quot;address book&quot; title=&quot;Ben&apos;s Address Book&quot; showdefault=&quot;replace&quot; actuatedefault=&quot;user&quot;&gt; ... &lt;/xlink:extended&gt;</eg>
+<eg>&lt;xlink:extended role=&quot;address book&quot; title=&quot;Ben's Address Book&quot; showdefault=&quot;replace&quot; actuatedefault=&quot;user&quot;&gt; ... &lt;/xlink:extended&gt;</eg>

 	And the second shows how the link might appear, using an arbitrary element:

-<eg>&lt;foo xlink:type=&quot;extended&quot; xlink:role=&quot;address book&quot; xlink:title=&quot;Ben&apos;s Address Book&quot; xlink:showdefault=&quot;replace&quot; xlink:actuatedefault=&quot;user&quot;&gt; ... &lt;/foo&gt;</eg>
+<eg>&lt;foo xlink:type=&quot;extended&quot; xlink:role=&quot;address book&quot; xlink:title=&quot;Ben's Address Book&quot; xlink:showdefault=&quot;replace&quot; xlink:actuatedefault=&quot;user&quot;&gt; ... &lt;/foo&gt;</eg>
 	</p>
 </div2>
 <div2 id="xlink-arcs">
--- a/result/valid/xlink.xml.err
+++ b/result/valid/xlink.xml.err
@ -1,6 +1,6 @@
 ./test/valid/xlink.xml:450: validity error: ID dt-arc already defined
 	<p><termdef id="dt-arc" term="Arc">An <term>arc</term> is contained within an 
                                   ^
-./test/valid/xlink.xml:530: validity error: IDREF attribute def reference an unknown ID 'dt-xlg'
+./test/valid/xlink.xml:530: validity error: IDREF attribute def reference an unknown ID "dt-xlg"

 ^
--- a/result/xml2
+++ b/result/xml2
@ -1,8 +1,8 @@
 <?xml version="1.0"?>
 <!DOCTYPE test [
+<!ELEMENT test (#PCDATA)>
 <!ENTITY % xx "&#37;zz;">
 <!ENTITY % zz '&#60;!ENTITY tricky "error-prone" >'>
 <!ENTITY tricky "error-prone">
-<!ELEMENT test (#PCDATA)>
 ]>
 <test>This sample shows a &tricky; method.</test>
--- a/test/dtd12
+++ b/test/dtd12
@ -1,5 +1,5 @@
 <!DOCTYPE doc [
-<!ENTITY % YN '"Yes"' >
-<!ENTITY WhatHeSaid "He said %YN;" >
+<!ENTITY YN '"Yes"' >
+<!ENTITY WhatHeSaid "He said &YN;" >
 ]>
 <doc>&WhatHeSaid;</doc>
--- a/testSAX.c
+++ b/testSAX.c
@ -73,6 +73,7 @@ xmlSAXHandler emptySAXHandlerStruct = {
    NULL, /* xmlParserError */
    NULL, /* xmlParserError */
    NULL, /* getParameterEntity */
+    NULL, /* cdataBlock; */
 };

 xmlSAXHandlerPtr emptySAXHandler = &emptySAXHandlerStruct;
@ -454,6 +455,21 @@ processingInstructionDebug(void *ctx, const xmlChar *target,
            (char *) target, (char *) data);
 }

+/**
+ * cdataBlockDebug:
+ * @ctx: the user data (XML parser context)
+ * @value:  The pcdata content
+ * @len:  the block length
+ *
+ * called when a pcdata block has been parsed
+ */
+void
+cdataBlockDebug(void *ctx, const xmlChar *value, int len)
+{
+    fprintf(stderr, "SAX.pcdata(%.20s, %d)\n",
+	    (char *) value, len);
+}
+
 /**
 * commentDebug:
 * @ctxt:  An XML parser context
@ -553,6 +569,7 @@ xmlSAXHandler debugSAXHandlerStruct = {
    errorDebug,
    fatalErrorDebug,
    getParameterEntityDebug,
+    cdataBlockDebug
 };

 xmlSAXHandlerPtr debugSAXHandler = &debugSAXHandlerStruct;
--- a/tester.c
+++ b/tester.c
@ -14,6 +14,8 @@

 #include <stdio.h>
 #include <string.h>
+#include <stdio.h>
+#include <stdarg.h>

 #ifdef HAVE_SYS_TYPES_H
 #include <sys/types.h>
@ -39,6 +41,7 @@

 #include "xmlmemory.h"
 #include "parser.h"
+#include "parserInternals.h"
 #include "HTMLparser.h"
 #include "HTMLtree.h"
 #include "tree.h"
@ -51,18 +54,252 @@ static int copy = 0;
 static int recovery = 0;
 static int noent = 0;
 static int noout = 0;
+static int nowrap = 0;
 static int valid = 0;
 static int postvalid = 0;
 static int repeat = 0;
 static int insert = 0;
 static int compress = 0;
 static int html = 0;
+static int htmlout = 0;
 static int shell = 0;
 static int push = 0;
-static int blanks = 0;
+static int noblanks = 0;

 extern int xmlDoValidityCheckingDefaultValue;
+extern int xmlGetWarningsDefaultValue;

+/************************************************************************
+ * 									*
+ * 			HTML ouput					*
+ * 									*
+ ************************************************************************/
+char buffer[50000];
+
+void
+xmlHTMLEncodeSend(void) {
+    char *result;
+
+    result = (char *) xmlEncodeEntitiesReentrant(NULL, BAD_CAST buffer);
+    if (result) {
+	fprintf(stderr, "%s", result);
+	xmlFree(result);
+    }
+    buffer[0] = 0;
+}
+
+/**
+ * xmlHTMLPrintFileInfo:
+ * @input:  an xmlParserInputPtr input
+ * 
+ * Displays the associated file and line informations for the current input
+ */
+
+void
+xmlHTMLPrintFileInfo(xmlParserInputPtr input) {
+    fprintf(stderr, "<p>");
+    if (input != NULL) {
+	if (input->filename) {
+	    sprintf(&buffer[strlen(buffer)], "%s:%d: ", input->filename,
+		    input->line);
+	} else {
+	    sprintf(&buffer[strlen(buffer)], "Entity: line %d: ", input->line);
+	}
+    }
+    xmlHTMLEncodeSend();
+}
+
+/**
+ * xmlHTMLPrintFileContext:
+ * @input:  an xmlParserInputPtr input
+ * 
+ * Displays current context within the input content for error tracking
+ */
+
+void
+xmlHTMLPrintFileContext(xmlParserInputPtr input) {
+    const xmlChar *cur, *base;
+    int n;
+
+    if (input == NULL) return;
+    fprintf(stderr, "<pre>\n");
+    cur = input->cur;
+    base = input->base;
+    while ((cur > base) && ((*cur == '\n') || (*cur == '\r'))) {
+	cur--;
+    }
+    n = 0;
+    while ((n++ < 80) && (cur > base) && (*cur != '\n') && (*cur != '\r'))
+        cur--;
+    if ((*cur == '\n') || (*cur == '\r')) cur++;
+    base = cur;
+    n = 0;
+    while ((*cur != 0) && (*cur != '\n') && (*cur != '\r') && (n < 79)) {
+        sprintf(&buffer[strlen(buffer)], "%c", (unsigned char) *cur++);
+	n++;
+    }
+    sprintf(&buffer[strlen(buffer)], "\n");
+    cur = input->cur;
+    while ((*cur == '\n') || (*cur == '\r'))
+	cur--;
+    n = 0;
+    while ((cur != base) && (n++ < 80)) {
+        sprintf(&buffer[strlen(buffer)], " ");
+        base++;
+    }
+    sprintf(&buffer[strlen(buffer)],"^\n");
+    xmlHTMLEncodeSend();
+    fprintf(stderr, "</pre>");
+}
+
+/**
+ * xmlHTMLError:
+ * @ctx:  an XML parser context
+ * @msg:  the message to display/transmit
+ * @...:  extra parameters for the message display
+ * 
+ * Display and format an error messages, gives file, line, position and
+ * extra parameters.
+ */
+void
+xmlHTMLError(void *ctx, const char *msg, ...)
+{
+    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
+    xmlParserInputPtr input;
+    xmlParserInputPtr cur = NULL;
+    va_list args;
+
+    buffer[0] = 0;
+    input = ctxt->input;
+    if ((input != NULL) && (input->filename == NULL) && (ctxt->inputNr > 1)) {
+	cur = input;
+        input = ctxt->inputTab[ctxt->inputNr - 2];
+    }
+        
+    xmlHTMLPrintFileInfo(input);
+
+    fprintf(stderr, "<b>error</b>: ");
+    va_start(args, msg);
+    vsprintf(&buffer[strlen(buffer)], msg, args);
+    va_end(args);
+    xmlHTMLEncodeSend();
+    fprintf(stderr, "</p>\n");
+
+    xmlHTMLPrintFileContext(input);
+    xmlHTMLEncodeSend();
+}
+
+/**
+ * xmlHTMLWarning:
+ * @ctx:  an XML parser context
+ * @msg:  the message to display/transmit
+ * @...:  extra parameters for the message display
+ * 
+ * Display and format a warning messages, gives file, line, position and
+ * extra parameters.
+ */
+void
+xmlHTMLWarning(void *ctx, const char *msg, ...)
+{
+    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
+    xmlParserInputPtr input;
+    xmlParserInputPtr cur = NULL;
+    va_list args;
+
+    buffer[0] = 0;
+    input = ctxt->input;
+    if ((input != NULL) && (input->filename == NULL) && (ctxt->inputNr > 1)) {
+	cur = input;
+        input = ctxt->inputTab[ctxt->inputNr - 2];
+    }
+        
+
+    xmlHTMLPrintFileInfo(input);
+        
+    fprintf(stderr, "<b>warning</b>: ");
+    va_start(args, msg);
+    vsprintf(&buffer[strlen(buffer)], msg, args);
+    va_end(args);
+    xmlHTMLEncodeSend();
+    fprintf(stderr, "</p>\n");
+
+    xmlHTMLPrintFileContext(input);
+    xmlHTMLEncodeSend();
+}
+
+/**
+ * xmlHTMLValidityError:
+ * @ctx:  an XML parser context
+ * @msg:  the message to display/transmit
+ * @...:  extra parameters for the message display
+ * 
+ * Display and format an validity error messages, gives file,
+ * line, position and extra parameters.
+ */
+void
+xmlHTMLValidityError(void *ctx, const char *msg, ...)
+{
+    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
+    xmlParserInputPtr input;
+    va_list args;
+
+    buffer[0] = 0;
+    input = ctxt->input;
+    if ((input->filename == NULL) && (ctxt->inputNr > 1))
+        input = ctxt->inputTab[ctxt->inputNr - 2];
+        
+    xmlHTMLPrintFileInfo(input);
+
+    fprintf(stderr, "<b>validity error</b>: ");
+    va_start(args, msg);
+    vsprintf(&buffer[strlen(buffer)], msg, args);
+    va_end(args);
+    xmlHTMLEncodeSend();
+    fprintf(stderr, "</p>\n");
+
+    xmlHTMLPrintFileContext(input);
+    xmlHTMLEncodeSend();
+}
+
+/**
+ * xmlHTMLValidityWarning:
+ * @ctx:  an XML parser context
+ * @msg:  the message to display/transmit
+ * @...:  extra parameters for the message display
+ * 
+ * Display and format a validity warning messages, gives file, line,
+ * position and extra parameters.
+ */
+void
+xmlHTMLValidityWarning(void *ctx, const char *msg, ...)
+{
+    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) ctx;
+    xmlParserInputPtr input;
+    va_list args;
+
+    buffer[0] = 0;
+    input = ctxt->input;
+    if ((input->filename == NULL) && (ctxt->inputNr > 1))
+        input = ctxt->inputTab[ctxt->inputNr - 2];
+
+    xmlHTMLPrintFileInfo(input);
+        
+    fprintf(stderr, "<b>validity warning</b>: ");
+    va_start(args, msg);
+    vsprintf(&buffer[strlen(buffer)], msg, args);
+    va_end(args);
+    xmlHTMLEncodeSend();
+    fprintf(stderr, "</p>\n");
+
+    xmlHTMLPrintFileContext(input);
+    xmlHTMLEncodeSend();
+}
+
+/************************************************************************
+ * 									*
+ * 			Shell Interface					*
+ * 									*
+ ************************************************************************/
 /**
 * xmlShellReadline:
 * @prompt:  the prompt value
@ -97,6 +334,11 @@ xmlShellReadline(char *prompt) {
 #endif
 }

+/************************************************************************
+ * 									*
+ * 			Test processing					*
+ * 									*
+ ************************************************************************/
 void parseAndPrintFile(char *filename) {
    xmlDocPtr doc = NULL, tmp;

@ -129,9 +371,40 @@ void parseAndPrintFile(char *filename) {
 		    xmlFreeParserCtxt(ctxt);
 	        }
 	    }
-	} else if (recovery)
+	} else if (recovery) {
 	    doc = xmlRecoverFile(filename);
+	} else if (htmlout) {
+	    int ret;
+	    xmlParserCtxtPtr ctxt;
+	    xmlSAXHandler silent, *old;
+
+	    ctxt = xmlCreateFileParserCtxt(filename);
+	    memcpy(&silent, ctxt->sax, sizeof(silent));
+	    old = ctxt->sax;
+	    silent.error = xmlHTMLError;
+	    if (xmlGetWarningsDefaultValue)
+		silent.warning = xmlHTMLWarning;
 	    else 
+		silent.warning = NULL;
+	    silent.fatalError = xmlHTMLError;
+            ctxt->sax = &silent;
+	    ctxt->vctxt.error = xmlHTMLValidityError;
+	    if (xmlGetWarningsDefaultValue)
+		ctxt->vctxt.warning = xmlHTMLValidityWarning;
+	    else 
+		ctxt->vctxt.warning = NULL;
+
+	    xmlParseDocument(ctxt);
+
+	    ret = ctxt->wellFormed;
+	    doc = ctxt->myDoc;
+	    ctxt->sax = old;
+	    xmlFreeParserCtxt(ctxt);
+	    if (!ret) {
+		xmlFreeDoc(doc);
+		doc = NULL;
+	    }
+	} else
 	    doc = xmlParseFile(filename);
    }

@ -155,8 +428,8 @@ void parseAndPrintFile(char *filename) {
 	int nb, i;
 	xmlNodePtr node;

-	if (doc->root != NULL) {
-	    node = doc->root;
+	if (doc->children != NULL) {
+	    node = doc->children;
 	    while ((node != NULL) && (node->last == NULL)) node = node->next;
 	    if (node != NULL) {
 		nb = xmlValidGetValidElements(node->last, NULL, list, 256);
@ -224,6 +497,12 @@ int main(int argc, char **argv) {
 	else if ((!strcmp(argv[i], "-noout")) ||
 	         (!strcmp(argv[i], "--noout")))
 	    noout++;
+	else if ((!strcmp(argv[i], "-htmlout")) ||
+	         (!strcmp(argv[i], "--htmlout")))
+	    htmlout++;
+	else if ((!strcmp(argv[i], "-nowrap")) ||
+	         (!strcmp(argv[i], "--nowrap")))
+	    nowrap++;
 	else if ((!strcmp(argv[i], "-valid")) ||
 	         (!strcmp(argv[i], "--valid")))
 	    valid++;
@ -244,15 +523,19 @@ int main(int argc, char **argv) {
 	    compress++;
 	    xmlSetCompressMode(9);
        }
-	else if ((!strcmp(argv[i], "-blanks")) ||
-	         (!strcmp(argv[i], "--blanks"))) {
-	    blanks++;
-	    xmlKeepBlanksDefault(1);
-        }
 	else if ((!strcmp(argv[i], "-html")) ||
 	         (!strcmp(argv[i], "--html"))) {
 	    html++;
        }
+	else if ((!strcmp(argv[i], "-nowarning")) ||
+	         (!strcmp(argv[i], "--nowarning"))) {
+	    xmlGetWarningsDefaultValue = 0;
+        }
+	else if ((!strcmp(argv[i], "-noblanks")) ||
+	         (!strcmp(argv[i], "--noblanks"))) {
+	     noblanks++;
+	     xmlKeepBlanksDefault(0);
+        }
 	else if ((!strcmp(argv[i], "-shell")) ||
 	         (!strcmp(argv[i], "--shell"))) {
 	    shell++;
@ -261,6 +544,17 @@ int main(int argc, char **argv) {
    }
    if (noent != 0) xmlSubstituteEntitiesDefault(1);
    if (valid != 0) xmlDoValidityCheckingDefaultValue = 1;
+    if ((htmlout) && (!nowrap)) {
+	fprintf(stderr,
+         "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\"\n");
+	fprintf(stderr, "\t\"http://www.w3.org/TR/REC-html40/loose.dtd\">\n");
+	fprintf(stderr,
+	 "<html><head><title>%s output</title></head>\n",
+		argv[0]);
+	fprintf(stderr, 
+	 "<body bgcolor=\"#ffffff\"><h1 align=\"center\">%s output</h1>\n",
+		argv[0]);
+    }
    for (i = 1; i < argc ; i++) {
 	if (argv[i][0] != '-') {
 	    if (repeat) {
@ -271,8 +565,11 @@ int main(int argc, char **argv) {
 	    files ++;
 	}
    }
+    if ((htmlout) && (!nowrap)) {
+	fprintf(stderr, "</body></html>\n");
+    }
    if (files == 0) {
-	printf("Usage : %s [--debug] [--shell] [--debugent] [--copy] [--recover] [--noent] [--noout] [--valid] [--repeat] XMLfiles ...\n",
+	printf("Usage : %s [--debug] [--debugent] [--copy] [--recover] [--noent] [--noout] [--valid] [--repeat] XMLfiles ...\n",
 	       argv[0]);
 	printf("\tParse the XML files and output the result of the parsing\n");
 	printf("\t--debug : dump a debug tree of the in-memory document\n");
@ -281,6 +578,8 @@ int main(int argc, char **argv) {
 	printf("\t--recover : output what was parsable on broken XML documents\n");
 	printf("\t--noent : substitute entity references by their value\n");
 	printf("\t--noout : don't output the result tree\n");
+	printf("\t--htmlout : output results as HTML\n");
+	printf("\t--nowarp : do not put HTML doc wrapper\n");
 	printf("\t--valid : validate the document in addition to std well-formed check\n");
 	printf("\t--postvalid : do a posteriori validation, i.e after parsing\n");
 	printf("\t--repeat : repeat 100 times, for timing or profiling\n");
@ -288,8 +587,9 @@ int main(int argc, char **argv) {
 	printf("\t--compress : turn on gzip compression of output\n");
 	printf("\t--html : use the HTML parser\n");
 	printf("\t--shell : run a navigating shell\n");
-	printf("\t--blanks : keep blank text node\n");
 	printf("\t--push : use the push mode of the parser\n");
+	printf("\t--nowarning : do not emit warnings from parser/validator\n");
+	printf("\t--noblanks : drop (ignorable?) blanks spaces\n");
    }
    xmlCleanupParser();
    xmlMemoryDump();
--- a/tree.c
+++ b/tree.c
--- a/tree.h
+++ b/tree.h
@ -36,24 +36,22 @@ typedef enum {
    XML_DOCUMENT_TYPE_NODE=	10,
    XML_DOCUMENT_FRAG_NODE=	11,
    XML_NOTATION_NODE=		12,
-    XML_HTML_DOCUMENT_NODE=	13
+    XML_HTML_DOCUMENT_NODE=	13,
+    XML_DTD_NODE=		14,
+    XML_ELEMENT_DECL=		15,
+    XML_ATTRIBUTE_DECL=		16,
+    XML_ENTITY_DECL=		17
 } xmlElementType;

 /*
 * Size of an internal character representation.
 *
- * Currently we use 8bit chars internal representation for memory efficiency,
- * but the parser is not tied to that, just define UNICODE to switch to
- * a 16 bits internal representation. Note that with 8 bits wide
- * xmlChars one can still use UTF-8 to handle correctly non ISO-Latin
- * input.
+ * We use 8bit chars internal representation for memory efficiency,
+ * Note that with 8 bits wide xmlChars one can still use UTF-8 to handle
+ * correctly non ISO-Latin input.
 */

-#ifdef UNICODE
-typedef unsigned short xmlChar;
-#else
 typedef unsigned char xmlChar;
-#endif

 #ifndef WIN32
 #ifndef CHAR
@ -109,14 +107,25 @@ struct _xmlEnumeration {
 typedef struct _xmlAttribute xmlAttribute;
 typedef xmlAttribute *xmlAttributePtr;
 struct _xmlAttribute {
-    const xmlChar         *elem;	/* Element holding the attribute */
+#ifndef XML_WITHOUT_CORBA
+    void           *_private;	        /* for Corba, must be first ! */
+#endif
+    xmlElementType          type;       /* XML_ATTRIBUTE_DECL, must be second ! */
    const xmlChar          *name;	/* Attribute name */
-    struct _xmlAttribute   *next;       /* list of attributes of an element */
-    xmlAttributeType       type;	/* The type */
+    struct _xmlNode    *children;	/* NULL */
+    struct _xmlNode        *last;	/* NULL */
+    struct _xmlDtd       *parent;	/* -> DTD */
+    struct _xmlNode        *next;	/* next sibling link  */
+    struct _xmlNode        *prev;	/* previous sibling link  */
+    struct _xmlDoc          *doc;       /* the containing document */
+
+    struct _xmlAttribute  *nexth;	/* next in hash table */
+    xmlAttributeType       atype;	/* The attribute type */
    xmlAttributeDefault      def;	/* the default */
    const xmlChar  *defaultValue;	/* or the default value */
    xmlEnumerationPtr       tree;       /* or the enumeration tree if any */
    const xmlChar        *prefix;	/* the namespace prefix if any */
+    const xmlChar          *elem;	/* Element holding the attribute */
 };

 /*
@ -156,8 +165,19 @@ typedef enum {
 typedef struct _xmlElement xmlElement;
 typedef xmlElement *xmlElementPtr;
 struct _xmlElement {
+#ifndef XML_WITHOUT_CORBA
+    void           *_private;	        /* for Corba, must be first ! */
+#endif
+    xmlElementType          type;       /* XML_ELEMENT_DECL, must be second ! */
    const xmlChar          *name;	/* Element name */
-    xmlElementTypeVal       type;	/* The type */
+    struct _xmlNode    *children;	/* NULL */
+    struct _xmlNode        *last;	/* NULL */
+    struct _xmlDtd       *parent;	/* -> DTD */
+    struct _xmlNode        *next;	/* next sibling link  */
+    struct _xmlNode        *prev;	/* previous sibling link  */
+    struct _xmlDoc          *doc;       /* the containing document */
+
+    xmlElementTypeVal      etype;	/* The type */
    xmlElementContentPtr content;	/* the allowed element content */
    xmlAttributePtr   attributes;	/* List of the declared attributes */
 };
@ -188,14 +208,25 @@ struct _xmlNs {
 typedef struct _xmlDtd xmlDtd;
 typedef xmlDtd *xmlDtdPtr;
 struct _xmlDtd {
+#ifndef XML_WITHOUT_CORBA
+    void           *_private;	/* for Corba, must be first ! */
+#endif
+    xmlElementType  type;       /* XML_DTD_NODE, must be second ! */
    const xmlChar *name;	/* Name of the DTD */
-    const xmlChar *ExternalID;	/* External identifier for PUBLIC DTD */
-    const xmlChar *SystemID;	/* URI for a SYSTEM or PUBLIC DTD */
+    struct _xmlNode *children;	/* the value of the property link */
+    struct _xmlNode *last;	/* last child link */
+    struct _xmlDoc  *parent;	/* child->parent link */
+    struct _xmlNode *next;	/* next sibling link  */
+    struct _xmlNode *prev;	/* previous sibling link  */
+    struct _xmlDoc  *doc;	/* the containing document */
+
+    /* End of common part */
    void          *notations;   /* Hash table for notations if any */
    void          *elements;    /* Hash table for elements if any */
    void          *attributes;  /* Hash table for attributes if any */
    void          *entities;    /* Hash table for entities if any */
-    /* struct xmlDtd *next;	 * next  link for this document  */
+    const xmlChar *ExternalID;	/* External identifier for PUBLIC DTD */
+    const xmlChar *SystemID;	/* URI for a SYSTEM or PUBLIC DTD */
 };

 /*
@ -206,14 +237,17 @@ typedef xmlAttr *xmlAttrPtr;
 struct _xmlAttr {
 #ifndef XML_WITHOUT_CORBA
    void           *_private;	/* for Corba, must be first ! */
-    void           *vepv;	/* for Corba, must be next ! */
 #endif
-    xmlElementType  type;       /* XML_ATTRIBUTE_NODE, must be third ! */
-    struct _xmlNode *node;	/* attr->node link */
-    struct _xmlAttr *next;	/* attribute list link */
+    xmlElementType   type;      /* XML_ATTRIBUTE_NODE, must be second ! */
    const xmlChar   *name;      /* the name of the property */
-    struct _xmlNode *val;       /* the value of the property */
+    struct _xmlNode *children;	/* the value of the property */
+    struct _xmlNode *last;	/* NULL */
+    struct _xmlNode *parent;	/* child->parent link */
+    struct _xmlAttr *next;	/* next sibling link  */
+    struct _xmlAttr *prev;	/* previous sibling link  */
+    struct _xmlDoc  *doc;	/* the containing document */
    xmlNs           *ns;        /* pointer to the associated namespace */
+    xmlAttributeType atype;     /* the attribute type if validating */
 };

 /*
@ -266,24 +300,25 @@ typedef xmlNode *xmlNodePtr;
 struct _xmlNode {
 #ifndef XML_WITHOUT_CORBA
    void           *_private;	/* for Corba, must be first ! */
-    void           *vepv;	/* for Corba, must be next ! */
 #endif
-    xmlElementType  type;	/* type number in the DTD, must be third ! */
-    struct _xmlDoc  *doc;	/* the containing document */
+    xmlElementType   type;	/* type number, must be second ! */
+    const xmlChar   *name;      /* the name of the node, or the entity */
+    struct _xmlNode *children;	/* parent->childs link */
+    struct _xmlNode *last;	/* last child link */
    struct _xmlNode *parent;	/* child->parent link */
    struct _xmlNode *next;	/* next sibling link  */
    struct _xmlNode *prev;	/* previous sibling link  */
-    struct _xmlNode *childs;	/* parent->childs link */
-    struct _xmlNode *last;	/* last child link */
-    struct _xmlAttr *properties;/* properties list */
-    const xmlChar  *name;       /* the name of the node, or the entity */
+    struct _xmlDoc  *doc;	/* the containing document */
    xmlNs           *ns;        /* pointer to the associated namespace */
-    xmlNs          *nsDef;      /* namespace definitions on this node */
 #ifndef XML_USE_BUFFER_CONTENT    
    xmlChar         *content;   /* the content */
 #else
    xmlBufferPtr     content;   /* the content in a buffer */
 #endif
+
+    /* End of common part */
+    struct _xmlAttr *properties;/* properties list */
+    xmlNs           *nsDef;     /* namespace definitions on this node */
 };

 /*
@ -294,20 +329,27 @@ typedef xmlDoc *xmlDocPtr;
 struct _xmlDoc {
 #ifndef XML_WITHOUT_CORBA
    void           *_private;	/* for Corba, must be first ! */
-    void           *vepv;	/* for Corba, must be next ! */
 #endif
    xmlElementType  type;       /* XML_DOCUMENT_NODE, must be second ! */
    char           *name;	/* name/filename/URI of the document */
-    const xmlChar  *version;	/* the XML version string */
-    const xmlChar  *encoding;   /* encoding, if any */
+    struct _xmlNode *children;	/* the document tree */
+    struct _xmlNode *last;	/* last child link */
+    struct _xmlNode *parent;	/* child->parent link */
+    struct _xmlNode *next;	/* next sibling link  */
+    struct _xmlNode *prev;	/* previous sibling link  */
+    struct _xmlDoc  *doc;	/* autoreference to itself */
+
+    /* End of common part */
    int             compression;/* level of zlib compression */
    int             standalone; /* standalone document (no external refs) */
    struct _xmlDtd  *intSubset;	/* the document internal subset */
    struct _xmlDtd  *extSubset;	/* the document external subset */
    struct _xmlNs   *oldNs;	/* Global namespace, the old way */
-    struct _xmlNode *root;	/* the document tree */
+    const xmlChar  *version;	/* the XML version string */
+    const xmlChar  *encoding;   /* encoding, if any */
    void           *ids;        /* Hash table for ID attributes if any */
    void           *refs;       /* Hash table for IDREFs attributes if any */
+    const xmlChar  *URL;	/* The URI for that document */
 };

 /*
@ -422,6 +464,8 @@ xmlNodePtr	xmlNewComment		(const xmlChar *content);
 xmlNodePtr	xmlNewCDataBlock	(xmlDocPtr doc,
 					 const xmlChar *content,
 					 int len);
+xmlNodePtr	xmlNewCharRef		(xmlDocPtr doc,
+					 const xmlChar *name);
 xmlNodePtr	xmlNewReference		(xmlDocPtr doc,
 					 const xmlChar *name);
 xmlNodePtr	xmlCopyNode		(xmlNodePtr node,
@ -513,13 +557,14 @@ xmlChar *	xmlNodeGetContent	(xmlNodePtr cur);
 xmlChar *	xmlNodeGetLang		(xmlNodePtr cur);
 void		xmlNodeSetLang		(xmlNodePtr cur,
 					 const xmlChar *lang);
+int		xmlNodeGetSpacePreserve	(xmlNodePtr cur);
 xmlChar *	xmlNodeGetBase		(xmlDocPtr doc,
 					 xmlNodePtr cur);

 /*
 * Removing content.
 */
-int		xmlRemoveProp		(xmlAttrPtr attr); /* TODO */
+int		xmlRemoveProp		(xmlAttrPtr attr);
 int		xmlRemoveNode		(xmlNodePtr node); /* TODO */

 /*
@ -532,6 +577,12 @@ void		xmlBufferWriteChar	(xmlBufferPtr buf,
 void		xmlBufferWriteQuotedString(xmlBufferPtr buf,
 					 const xmlChar *string);

+/*
+ * Namespace handling
+ */
+int		xmlReconciliateNs	(xmlDocPtr doc,
+					 xmlNodePtr tree);
+
 /*
 * Saving
 */
--- a/valid.c
+++ b/valid.c
--- a/valid.h
+++ b/valid.h
@ -29,6 +29,14 @@ struct _xmlValidCtxt {
    void *userData;			/* user specific data block */
    xmlValidityErrorFunc error;		/* the callback in case of errors */
    xmlValidityWarningFunc warning;	/* the callback in case of warning */
+
+    /* Node analysis stack used when validating within entities */
+    xmlNodePtr         node;          /* Current parsed Node */
+    int                nodeNr;        /* Depth of the parsing stack */
+    int                nodeMax;       /* Max depth of the parsing stack */
+    xmlNodePtr        *nodeTab;       /* array of nodes */
+
+    int              finishDtd;       /* finished validating the Dtd ? */
 };

 /*
@ -114,6 +122,8 @@ xmlNotationPtr	    xmlAddNotationDecl	(xmlValidCtxtPtr ctxt,
 					 const xmlChar *SystemID);
 xmlNotationTablePtr xmlCopyNotationTable(xmlNotationTablePtr table);
 void		    xmlFreeNotationTable(xmlNotationTablePtr table);
+void		    xmlDumpNotationDecl	(xmlBufferPtr buf,
+					 xmlNotationPtr nota);
 void		    xmlDumpNotationTable(xmlBufferPtr buf,
 					 xmlNotationTablePtr table);

@ -122,6 +132,9 @@ xmlElementContentPtr xmlNewElementContent (xmlChar *name,
 					   xmlElementContentType type);
 xmlElementContentPtr xmlCopyElementContent(xmlElementContentPtr content);
 void		     xmlFreeElementContent(xmlElementContentPtr cur);
+void		     xmlSprintfElementContent(char *buf,
+	                                   xmlElementContentPtr content,
+					   int glob);

 /* Element */
 xmlElementPtr	   xmlAddElementDecl	(xmlValidCtxtPtr ctxt,
@ -133,6 +146,8 @@ xmlElementTablePtr xmlCopyElementTable	(xmlElementTablePtr table);
 void		   xmlFreeElementTable	(xmlElementTablePtr table);
 void		   xmlDumpElementTable	(xmlBufferPtr buf,
 					 xmlElementTablePtr table);
+void		   xmlDumpElementDecl	(xmlBufferPtr buf,
+					 xmlElementPtr elem);

 /* Enumeration */
 xmlEnumerationPtr  xmlCreateEnumeration	(xmlChar *name);
@ -144,6 +159,7 @@ xmlAttributePtr	    xmlAddAttributeDecl	    (xmlValidCtxtPtr ctxt,
 					     xmlDtdPtr dtd,
 					     const xmlChar *elem,
 					     const xmlChar *name,
+					     const xmlChar *prefix,
 					     xmlAttributeType type,
 					     xmlAttributeDefault def,
 					     const xmlChar *defaultValue,
@ -152,6 +168,8 @@ xmlAttributeTablePtr xmlCopyAttributeTable  (xmlAttributeTablePtr table);
 void		     xmlFreeAttributeTable  (xmlAttributeTablePtr table);
 void		     xmlDumpAttributeTable  (xmlBufferPtr buf,
 					     xmlAttributeTablePtr table);
+void		     xmlDumpAttributeDecl   (xmlBufferPtr buf,
+					     xmlAttributePtr attr);

 /* IDs */
 xmlIDPtr	xmlAddID	(xmlValidCtxtPtr ctxt,
@ -188,6 +206,10 @@ int		xmlValidateRoot		(xmlValidCtxtPtr ctxt,
 int		xmlValidateElementDecl	(xmlValidCtxtPtr ctxt,
 					 xmlDocPtr doc,
 		                         xmlElementPtr elem);
+xmlChar *	xmlValidNormalizeAttributeValue(xmlDocPtr doc,
+					 xmlNodePtr elem,
+					 const xmlChar *name,
+					 const xmlChar *value);
 int		xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt,
 					 xmlDocPtr doc,
 		                         xmlAttributePtr attr);
@ -199,6 +221,8 @@ int		xmlValidateNotationDecl	(xmlValidCtxtPtr ctxt,
 int		xmlValidateDtd		(xmlValidCtxtPtr ctxt,
 					 xmlDocPtr doc,
 					 xmlDtdPtr dtd);
+int		xmlValidateDtdFinal	(xmlValidCtxtPtr ctxt,
+					 xmlDocPtr doc);
 int		xmlValidateDocument	(xmlValidCtxtPtr ctxt,
 					 xmlDocPtr doc);
 int		xmlValidateElement	(xmlValidCtxtPtr ctxt,
--- a/xml-error.h
+++ b/xml-error.h
@ -115,8 +115,22 @@ typedef enum {

    XML_ERR_ENCODING_NAME, /* 80 */

-    XML_ERR_HYPHEN_IN_COMMENT /* 81 */
+    XML_ERR_HYPHEN_IN_COMMENT, /* 81 */

+    XML_ERR_INVALID_ENCODING, /* 82 */
+
+    XML_ERR_EXT_ENTITY_STANDALONE, /* 83 */
+
+    XML_ERR_CONDSEC_INVALID, /* 84 */
+
+    XML_ERR_VALUE_REQUIRED, /* 85 */
+
+    XML_ERR_NOT_WELL_BALANCED, /* 86 */
+    XML_ERR_EXTRA_CONTENT, /* 87 */
+    XML_ERR_ENTITY_CHAR_ERROR, /* 88 */
+    XML_ERR_ENTITY_PE_INTERNAL, /* 88 */
+    XML_ERR_ENTITY_LOOP, /* 89 */
+    XML_ERR_ENTITY_BOUNDARY /* 90 */
 }xmlParserErrors;

 void	xmlParserError		(void *ctx,
--- a/xmlIO.c
+++ b/xmlIO.c
@ -118,6 +118,7 @@ xmlFreeParserInputBuffer(xmlParserInputBufferPtr in) {
 * If filename is "-' then we use stdin as the input.
 * Automatic support for ZLIB/Compress compressed document is provided
 * by default if found at compile-time.
+ * Do an encoding check if enc == XML_CHAR_ENCODING_NONE
 *
 * Returns the new parser input or NULL
 */
@ -201,13 +202,10 @@ xmlParserInputBufferCreateFilename(const char *filename, xmlCharEncoding enc) {
 	}
 #endif
    }
-    /* 
-     * TODO : get the 4 first bytes and decode the charset
-     * if enc == XML_CHAR_ENCODING_NONE
-     * plug some encoding conversion routines here. !!!
-     * enc = xmlDetectCharEncoding(buffer);
-     */

+    /*
+     * Allocate the Input buffer front-end.
+     */
    ret = xmlAllocParserInputBuffer(enc);
    if (ret != NULL) {
 #ifdef HAVE_ZLIB_H
@ -218,7 +216,6 @@ xmlParserInputBufferCreateFilename(const char *filename, xmlCharEncoding enc) {
        ret->httpIO = httpIO;
        ret->ftpIO = ftpIO;
    }
-    xmlParserInputBufferRead(ret, 4);

    return(ret);
 }
@ -289,19 +286,30 @@ xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf) {
    if (len < 0) return(0);
    if (in->encoder != NULL) {
        xmlChar *buffer;
+	int processed = len;

 	buffer = (xmlChar *) xmlMalloc((len + 1) * 2 * sizeof(xmlChar));
 	if (buffer == NULL) {
 	    fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
-	    xmlFree(buffer);
 	    return(-1);
 	}
 	nbchars = in->encoder->input(buffer, (len + 1) * 2 * sizeof(xmlChar),
-	                             (xmlChar *) buf, len);
+	                             (xmlChar *) buf, &processed);
 	/*
 	 * TODO : we really need to have something atomic or the 
 	 *        encoder must report the number of bytes read
 	 */
+	if (nbchars < 0) {
+	    fprintf(stderr, "xmlParserInputBufferPush: encoder error\n");
+	    xmlFree(buffer);
+	    return(-1);
+	}
+	if (processed  != len) {
+	    fprintf(stderr,
+	            "TODO xmlParserInputBufferPush: processed  != len\n");
+	    xmlFree(buffer);
+	    return(-1);
+	}
        buffer[nbchars] = 0;
        xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars);
 	xmlFree(buffer);
@ -382,6 +390,7 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
    }
    if (in->encoder != NULL) {
        xmlChar *buf;
+	int wrote = res;

 	buf = (xmlChar *) xmlMalloc((res + 1) * 2 * sizeof(xmlChar));
 	if (buf == NULL) {
@ -390,10 +399,24 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
 	    return(-1);
 	}
 	nbchars = in->encoder->input(buf, (res + 1) * 2 * sizeof(xmlChar),
-	                             BAD_CAST buffer, res);
+	                             BAD_CAST buffer, &wrote);
        buf[nbchars] = 0;
        xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
 	xmlFree(buf);
+
+	/*
+	 * Check that the encoder was able to process the full input
+	 */
+	if (wrote != res) {
+	    fprintf(stderr, 
+	        "TODO : xmlParserInputBufferGrow wrote %d != res %d\n",
+		wrote, res);
+	    /*
+	     * TODO !!!
+	     * Need to keep the unprocessed input in a buffer in->unprocessed
+	     */
+	}
+
    } else {
 	nbchars = res;
        buffer[nbchars] = 0;
--- a/xmlmemory.h
+++ b/xmlmemory.h
@ -8,7 +8,7 @@
 #ifndef _DEBUG_MEMORY_ALLOC_
 #define _DEBUG_MEMORY_ALLOC_

-#define NO_DEBUG_MEMORY
+/* #define NO_DEBUG_MEMORY */

 #ifdef NO_DEBUG_MEMORY
 #ifdef HAVE_MALLOC_H
--- a/xpath.c
+++ b/xpath.c
@ -213,9 +213,9 @@ PUSH_AND_POP(xmlXPathObjectPtr, value)
 * Dirty macros, i.e. one need to make assumption on the context to use them
 *
 *   CUR_PTR return the current pointer to the xmlChar to be parsed.
- *   CUR     returns the current xmlChar value, i.e. a 8 bit value if compiled
- *           in ISO-Latin or UTF-8, and the current 16 bit value if compiled
- *           in UNICODE mode. This should be used internally by the parser
+ *   CUR     returns the current xmlChar value, i.e. a 8 bit value
+ *           in ISO-Latin or UTF-8.
+ *           This should be used internally by the parser
 *           only to compare to ASCII values otherwise it would break when
 *           running with UTF-8 encoding.
 *   NXT(n)  returns the n'th next xmlChar. Same as CUR is should be used only
@ -237,11 +237,8 @@ PUSH_AND_POP(xmlXPathObjectPtr, value)
 #define SKIP_BLANKS 							\
    while (IS_BLANK(*(ctxt->cur))) NEXT

-#ifndef USE_UTF_8
 #define CURRENT (*ctxt->cur)
 #define NEXT ((*ctxt->cur) ?  ctxt->cur++: ctxt->cur)
-#else
-#endif

 /************************************************************************
 *									*
@ -877,7 +874,7 @@ xmlXPathFreeContext(xmlXPathContextPtr ctxt) {
        fprintf(xmlXPathDebug, "%s:%d Internal error: no document\n",	\
 	        __FILE__, __LINE__);					\
    }									\
-    if (ctxt->doc->root == NULL) { 					\
+    if (ctxt->doc->children == NULL) { 					\
        fprintf(xmlXPathDebug,						\
 	        "%s:%d Internal error: document without root\n",	\
 	        __FILE__, __LINE__);					\
@ -1496,14 +1493,18 @@ xmlXPathNextChild(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
            case XML_PI_NODE:
            case XML_COMMENT_NODE:
            case XML_NOTATION_NODE:
-		return(ctxt->context->node->childs);
-            case XML_ATTRIBUTE_NODE:
-		return(NULL);
+            case XML_DTD_NODE:
+		return(ctxt->context->node->children);
            case XML_DOCUMENT_NODE:
            case XML_DOCUMENT_TYPE_NODE:
            case XML_DOCUMENT_FRAG_NODE:
            case XML_HTML_DOCUMENT_NODE:
-		return(((xmlDocPtr) ctxt->context->node)->root);
+		return(((xmlDocPtr) ctxt->context->node)->children);
+	    case XML_ELEMENT_DECL:
+	    case XML_ATTRIBUTE_DECL:
+	    case XML_ENTITY_DECL:
+            case XML_ATTRIBUTE_NODE:
+		return(NULL);
 	}
 	return(NULL);
    }
@ -1533,11 +1534,11 @@ xmlXPathNextDescendant(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
 	    return(NULL);

        if (ctxt->context->node == (xmlNodePtr) ctxt->context->doc)
-	    return(ctxt->context->doc->root);
-        return(ctxt->context->node->childs);
+	    return(ctxt->context->doc->children);
+        return(ctxt->context->node->children);
    }

-    if (cur->childs != NULL) return(cur->childs);
+    if (cur->children != NULL) return(cur->children);
    if (cur->next != NULL) return(cur->next);
    
    do {
@ -1606,13 +1607,17 @@ xmlXPathNextParent(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
            case XML_PI_NODE:
            case XML_COMMENT_NODE:
            case XML_NOTATION_NODE:
+            case XML_DTD_NODE:
+	    case XML_ELEMENT_DECL:
+	    case XML_ATTRIBUTE_DECL:
+	    case XML_ENTITY_DECL:
 		if (ctxt->context->node->parent == NULL)
 		    return((xmlNodePtr) ctxt->context->doc);
 		return(ctxt->context->node->parent);
            case XML_ATTRIBUTE_NODE: {
 		xmlAttrPtr att = (xmlAttrPtr) ctxt->context->node;

-		return(att->node);
+		return(att->parent);
 	    }
            case XML_DOCUMENT_NODE:
            case XML_DOCUMENT_TYPE_NODE:
@ -1655,6 +1660,10 @@ xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
            case XML_ENTITY_NODE:
            case XML_PI_NODE:
            case XML_COMMENT_NODE:
+	    case XML_DTD_NODE:
+	    case XML_ELEMENT_DECL:
+	    case XML_ATTRIBUTE_DECL:
+	    case XML_ENTITY_DECL:
            case XML_NOTATION_NODE:
 		if (ctxt->context->node->parent == NULL)
 		    return((xmlNodePtr) ctxt->context->doc);
@ -1662,7 +1671,7 @@ xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
            case XML_ATTRIBUTE_NODE: {
 		xmlAttrPtr cur = (xmlAttrPtr) ctxt->context->node;

-		return(cur->node);
+		return(cur->parent);
 	    }
            case XML_DOCUMENT_NODE:
            case XML_DOCUMENT_TYPE_NODE:
@ -1672,7 +1681,7 @@ xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
 	}
 	return(NULL);
    }
-    if (cur == ctxt->context->doc->root)
+    if (cur == ctxt->context->doc->children)
 	return((xmlNodePtr) ctxt->context->doc);
    if (cur == (xmlNodePtr) ctxt->context->doc)
 	return(NULL);
@ -1685,11 +1694,15 @@ xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
 	case XML_PI_NODE:
 	case XML_COMMENT_NODE:
 	case XML_NOTATION_NODE:
+	case XML_DTD_NODE:
+        case XML_ELEMENT_DECL:
+        case XML_ATTRIBUTE_DECL:
+        case XML_ENTITY_DECL:
 	    return(cur->parent);
 	case XML_ATTRIBUTE_NODE: {
 	    xmlAttrPtr att = (xmlAttrPtr) ctxt->context->node;

-	    return(att->node);
+	    return(att->parent);
 	}
 	case XML_DOCUMENT_NODE:
 	case XML_DOCUMENT_TYPE_NODE:
@ -1780,13 +1793,13 @@ xmlXPathNextFollowing(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
        return(NULL);
    if (cur == NULL)
        return(ctxt->context->node->next);; /* !!!!!!!!! */
-    if (cur->childs != NULL) return(cur->childs);
+    if (cur->children != NULL) return(cur->children);
    if (cur->next != NULL) return(cur->next);
    
    do {
        cur = cur->parent;
 	if (cur == NULL) return(NULL);
-	if (cur == ctxt->context->doc->root) return(NULL);
+	if (cur == ctxt->context->doc->children) return(NULL);
 	if (cur->next != NULL) {
 	    cur = cur->next;
 	    return(cur);
@ -1820,7 +1833,7 @@ xmlXPathNextPreceding(xmlXPathParserContextPtr ctxt, xmlNodePtr cur) {
    do {
        cur = cur->parent;
 	if (cur == NULL) return(NULL);
-	if (cur == ctxt->context->doc->root) return(NULL);
+	if (cur == ctxt->context->doc->children) return(NULL);
 	if (cur->prev != NULL) {
 	    cur = cur->prev;
 	    return(cur);
@ -2278,7 +2291,7 @@ xmlXPathIdFunction(xmlXPathParserContextPtr ctxt, int nargs) {
        ID = xmlStrndup(tokens, cur - tokens);
 	attr = xmlGetID(ctxt->context->doc, ID);
 	if (attr != NULL) {
-	    elem = attr->node;
+	    elem = attr->parent;
            xmlXPathNodeSetAdd(ret->nodesetval, elem);
        }
 	if (ID != NULL)
@ -3677,6 +3690,8 @@ xmlXPathEvalPathExpr(xmlXPathParserContextPtr ctxt) {
 	if (name != NULL)
 	    xmlFree(name);
    }
+    if (ctxt->context->nodelist != NULL)
+	valuePush(ctxt, xmlXPathNewNodeSetList(ctxt->context->nodelist));
 }

 /**