revamped the encoding support, added iconv support, so now libxml if

* encoding.[ch], xmlIO.[ch], parser.c, configure.in : revamped the encoding support, added iconv support, so now libxml if compiled with iconv automatically support japanese encodings among others. Work based on initial patch from Yuan-Chen Cheng I may have broken binary compat in the encoding handler registration scheme, but that was so utterly broken I don't expect anybody to have used this feature until now. * parserInternals.h: fixup on the CHAR range macro * xml-error.h, parser.c: catch URL/URI errors using the uri.c code. * tree.[ch]: added xmlBufferGrow(), was needed for iconv * uri.c: added xmlParseURI() I can't believe I forgot to implement this one in 2.0 !!! * SAX.c: moved doc->encoding update in the endDocument() call. * TODO: updated. Iconv rules :-) Daniel
2025-08-07 06:43:02 +03:00 · 2000-05-03 14:20:55 +00:00
parent 06047432eb
commit 496a1cf592
18 changed files with 1163 additions and 487 deletions
--- a/18
+++ b/18
@@ -1,3 +1,21 @@
 Wed May  3 14:21:25 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
 	* encoding.[ch], xmlIO.[ch], parser.c, configure.in : revamped
 	  the encoding support, added iconv support, so now libxml if
 	  compiled with iconv automatically support japanese encodings
 	  among others. Work based on initial patch from Yuan-Chen Cheng
 	  I may have broken binary compat in the encoding handler
 	  registration scheme, but that was so utterly broken I don't
 	  expect anybody to have used this feature until now.
 	* parserInternals.h: fixup on the CHAR range macro
 	* xml-error.h, parser.c: catch URL/URI errors using the uri.c
 	  code.
 	* tree.[ch]: added xmlBufferGrow(), was needed for iconv
 	* uri.c: added xmlParseURI() I can't believe I forgot to
 	  implement this one in 2.0 !!!
 	* SAX.c: moved doc->encoding update in the endDocument() call.
 	* TODO: updated.
 Mon Apr 24 13:30:13 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
 	* tree.h: removed extraneous xmlRemoveProp definition
--- a/SAX.c
+++ b/SAX.c
@@ -595,6 +595,15 @@ endDocument(void *ctx)
    if (ctxt->validate && ctxt->wellFormed &&
        ctxt->myDoc && ctxt->myDoc->intSubset)
 	ctxt->valid &= xmlValidateDocumentFinal(&ctxt->vctxt, ctxt->myDoc);
    /*
     * Grab the encoding if it was added on-the-fly
     */
    if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
 	(ctxt->myDoc->encoding == NULL)) {
 	ctxt->myDoc->encoding = ctxt->encoding;
 	ctxt->encoding = NULL;
    }
 }
 /**
--- a/9
+++ b/9
@@ -6,6 +6,8 @@
 TODO:
 =====
 - xmlSwitchToEncoding() need a rewrite for correct handling of conversion
  error code conditions.
 - DOM needs
  xmlAttrPtr xmlNewDocProp(xmlDocPtr doc, const xmlChar *name, const xmlChar *value)
  int xmlPruneProp(xmlNodePtr node, xmlAtttrPtr attr);
@@ -14,7 +16,6 @@ TODO:
 - add support for the trick from Henry conf/sun/valid/empty.xml
 - Correct standalone checking/emitting (hard)
  2.9 Standalone Document Declaration
 - URI checkings (no fragments) rfc2396.txt
 - Better checking of external parsed entities TAG 1234
 - Find way of representing PERefs in the Dtd so that %entity; can
  be saved back.
@@ -22,6 +23,7 @@ TODO:
  http://www.w3.org/XML/xml-19980210-errata ... bummmer 
 - Handle undefined namespaces in entity contents better ... at least
  issue a warning
 - Issue warning when using non-absolute namespaces URI.
 - General checking of DTD validation in presence of namespaces ... hairy
 - fix --disable-corba configure switch handling, and use XML_WITHOUT_CORBA
  not WITHOUT_CORBA flag
@@ -30,7 +32,7 @@ TODO:
 =====
 - Get OASIS testsuite to a more friendly result, check all the results
-  once stable.
+  once stable. Current state at:
  http://xmlsoft.org/conf/result.html
 - Optimization of tag strings allocation ?
@@ -55,11 +57,13 @@ EXTENSIONS:
 - Add Xlink recognition/API
  => started adding an xlink.[ch] with a unified API for XML and HTML.
     it's crap :-(
 - Implement XSLT
  => seems that someone volunteered ?!?
 - Implement XSchemas
  => Really need to be done <grin/>
 - O2K parsing;
  => this is a somewhat ugly mix of HTML and XML, adding a specific
@@ -88,6 +92,7 @@ EXTENSIONS:
 Done:
 =====
 - URI checkings (no fragments) rfc2396.txt
 - Added a clean mechanism for overload or added input methods:
  xmlRegisterInputCallbacks()
 - dynamically adapt the alloc entry point to use g_alloc()/g_free()
--- a/configure.in
+++ b/configure.in
@@ -4,7 +4,7 @@ AC_INIT(entities.h)
 AM_CONFIG_HEADER(config.h)
 LIBXML_MAJOR_VERSION=2
-LIBXML_MINOR_VERSION=0
+LIBXML_MINOR_VERSION=1
 LIBXML_MICRO_VERSION=0
 LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION
 LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION
@@ -203,6 +203,20 @@ fi
 AC_SUBST(WITH_XPATH)
 AC_SUBST(XPATH_OBJ)
 AC_ARG_WITH(iconv, [  --with-iconv            Add the ICONV support (on)])
 if test "$with_iconv" = "no" ; then
    echo Disabling ICONV support
    WITH_ICONV=0
 else    
    if test "$have_iconv" != "" ; then
        echo Iconv support not found
        WITH_ICONV=0
    else
        WITH_ICONV=1
    fi
 fi  
 AC_SUBST(WITH_ICONV)
 AC_ARG_WITH(debug, [  --with-debug            Add the debugging module (on)])
 if test "$with_debug" = "no" ; then
    echo Disabling DEBUG support
--- a/encoding.c
+++ b/encoding.c
@@ -34,12 +34,26 @@
 #ifdef HAVE_STDLIB_H
 #include <stdlib.h>
 #endif
 #include <libxml/xmlversion.h>
 #ifdef LIBXML_ICONV_ENABLED
 #ifdef HAVE_ERRNO_H
 #include <errno.h>
 #endif
 #endif
 #include <libxml/encoding.h>
 #include <libxml/xmlmemory.h>
 xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
 xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
 #ifdef LIBXML_ICONV_ENABLED
 #if 0
 #define DEBUG_ENCODING  /* Define this to get encoding traces */
 #endif
 #endif
 static int xmlLittleEndian = 1;
 /*
 * From rfc2044: encoding of the Unicode values on UTF-8:
 *
@@ -104,30 +118,38 @@ xmlCheckUTF8(const unsigned char *utf)
 *
 * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
 * block of chars out.
- * Returns the number of byte written, or -1 by lack of space.
+ * Returns 0 if success, or -1 otherwise
 * The value of @inlen after return is the number of octets consumed
 *     as the return value is positive, else unpredictiable.
 * The value of @outlen after return is the number of ocetes consumed.
 */
 int
-isolat1ToUTF8(unsigned char* out, int outlen,
+isolat1ToUTF8(unsigned char* out, int *outlen,
              const unsigned char* in, int *inlen) {
    unsigned char* outstart = out;
-    unsigned char* outend= out+outlen;
+    const unsigned char* processed = in;
    unsigned char* outend = out + *outlen;
    const unsigned char* inend = in + *inlen;
    unsigned char c;
    while (in < inend) {
        c= *in++;
        if (c < 0x80) {
-            if (out >= outend)  return(-1);
+            if (out >= outend)
 		break;
            *out++ = c;
        }
        else {
-            if (out >= outend)  return(-1);
+            if (out + 1 >= outend)  break;
            *out++ = 0xC0 | (c >> 6);
            if (out >= outend)  return(-1);
            *out++ = 0x80 | (0x3F & c);
        }
 	processed = in;
    }
-    return(out-outstart);
+    *outlen = out - outstart;
    *inlen = processed - in;
    return(0);
 }
 /**
@@ -141,17 +163,17 @@ isolat1ToUTF8(unsigned char* out, int outlen,
 * block of chars out.
 * TODO: UTF8Toisolat1 need a fallback mechanism ...
 *
- * Returns the number of byte written, or -1 by lack of space, or -2
+ * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
 *     if the transcoding fails (for *in is not valid utf8 string or
 *     the result of transformation can't fit into the encoding we want)
 * The value of @inlen after return is the number of octets consumed
 *     as the return value is positive, else unpredictiable.
 * The value of @outlen after return is the number of ocetes consumed.
 */
 int
-UTF8Toisolat1(unsigned char* out, int outlen,
+UTF8Toisolat1(unsigned char* out, int *outlen,
              const unsigned char* in, int *inlen) {
    unsigned char* outstart = out;
-    unsigned char* outend= out+outlen;
+    const unsigned char* processed = in;
    unsigned char* outend = out + *outlen;
    const unsigned char* inend = in + *inlen;
    unsigned char c;
@@ -162,18 +184,22 @@ UTF8Toisolat1(unsigned char* out, int outlen,
            *out++= c;
        }
 	else if (in == inend) {
            *inlen -= 1;
            break;
 	}
 	else if (((c & 0xFC) == 0xC0) && ((*in & 0xC0) == 0x80)) {
 	    /* a two byte utf-8 and can be encoding as isolate1 */
            *out++= ((c & 0x03) << 6) | (*in++ & 0x3F);
 	}
-	else
+	else {
 	    *outlen = out - outstart;
 	    *inlen = processed - in;
 	    return(-2);
 	/* TODO : some should be represent as "&#x____;" */
 	}
-    return(out-outstart);
+	processed = in;
    }
    *outlen = out - outstart;
    *inlen = processed - in;
    return(0);
 }
 /**
@@ -194,11 +220,12 @@ UTF8Toisolat1(unsigned char* out, int outlen,
 *     as the return value is positive, else unpredictiable.
 */
 int
-UTF16LEToUTF8(unsigned char* out, int outlen,
+UTF16LEToUTF8(unsigned char* out, int *outlen,
            const unsigned char* inb, int *inlenb)
 {
    unsigned char* outstart = out;
-    unsigned char* outend= out+outlen;
+    const unsigned char* processed = inb;
    unsigned char* outend = out + *outlen;
    unsigned short* in = (unsigned short*) inb;
    unsigned short* inend;
    unsigned int c, d, inlen;
@@ -210,40 +237,42 @@ UTF16LEToUTF8(unsigned char* out, int outlen,
    inlen = *inlenb / 2;
    inend = in + inlen;
    while (in < inend) {
-#ifdef BIG_ENDIAN
+        if (xmlLittleEndian) {
 	    c= *in++;
 	} else {
 	    tmp = (unsigned char *) in;
 	    c = *tmp++;
 	    c = c | (((unsigned int)*tmp) << 8);
 	    in++;
-#else /* BIG_ENDIAN */
+	}
        c= *in++;
 #endif /* BIG_ENDIAN */
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
            if (in >= inend) {           /* (in > inend) shouldn't happens */
                (*inlenb) -= 2;
                break;
            }
-#ifdef BIG_ENDIAN
+	    if (xmlLittleEndian) {
 		d = *in++;
 	    } else {
 		tmp = (unsigned char *) in;
 		d = *tmp++;
 		d = d | (((unsigned int)*tmp) << 8);
 		in++;
-#else /* BIG_ENDIAN */
+	    }
            d = *in++;
 #endif /* BIG_ENDIAN */
            if ((d & 0xFC00) == 0xDC00) {
                c &= 0x03FF;
                c <<= 10;
                c |= d & 0x03FF;
                c += 0x10000;
            }
-            else
+            else {
 		*outlen = out - outstart;
 		*inlenb = processed - inb;
 	        return(-2);
 	    }
        }
 	/* assertion: c is a single UTF-4 value */
        if (out >= outend)
-	    return(-1);
+	    break;
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
@@ -251,11 +280,14 @@ UTF16LEToUTF8(unsigned char* out, int outlen,
        for ( ; bits >= 0; bits-= 6) {
            if (out >= outend)
-	        return(-1);
+	        break;
            *out++= ((c >> bits) & 0x3F) | 0x80;
        }
 	processed = (const unsigned char*) in;
    }
-    return(out-outstart);
+    *outlen = out - outstart;
    *inlenb = processed - inb;
    return(0);
 }
 /**
@@ -273,40 +305,44 @@ UTF16LEToUTF8(unsigned char* out, int outlen,
 *     if the transcoding failed. 
 */
 int
-UTF8ToUTF16LE(unsigned char* outb, int outlen,
+UTF8ToUTF16LE(unsigned char* outb, int *outlen,
            const unsigned char* in, int *inlen)
 {
    unsigned short* out = (unsigned short*) outb;
    const unsigned char* processed = in;
    unsigned short* outstart= out;
    unsigned short* outend;
    const unsigned char* inend= in+*inlen;
    unsigned int c, d, trailing;
 #ifdef BIG_ENDIAN
    unsigned char *tmp;
    unsigned short tmp1, tmp2;
 #endif /* BIG_ENDIAN */
-    outlen /= 2; /* convert in short length */
+    outend = out + (*outlen / 2);
    outend = out + outlen;
    while (in < inend) {
      d= *in++;
      if      (d < 0x80)  { c= d; trailing= 0; }
-      else if (d < 0xC0)
+      else if (d < 0xC0) {
-          return(-2);    /* trailing byte in leading position */
+          /* trailing byte in leading position */
-      else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
+	  *outlen = out - outstart;
 	  *inlen = processed - in;
 	  return(-2);
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
-      else
+      else {
-          return(-2);    /* no chance for this in UTF-16 */
+	/* no chance for this in UTF-16 */
 	*outlen = out - outstart;
 	*inlen = processed - in;
 	return(-2);
      }
      if (inend - in < trailing) {
          *inlen -= (inend - in);
          break;
      } 
      for ( ; trailing; trailing--) {
          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
-	      return(-1);
+	      break;
          c <<= 6;
          c |= d & 0x3F;
      }
@@ -314,21 +350,24 @@ UTF8ToUTF16LE(unsigned char* outb, int outlen,
      /* assertion: c is a single UTF-4 value */
        if (c < 0x10000) {
            if (out >= outend)
-	        return(-1);
+	        break;
-#ifdef BIG_ENDIAN
+	    if (xmlLittleEndian) {
 		*out++ = c;
 	    } else {
 		tmp = (unsigned char *) out;
 		*tmp = c ;
 		*(tmp + 1) = c >> 8 ;
 		out++;
-#else /* BIG_ENDIAN */
+	    }
            *out++ = c;
 #endif /* BIG_ENDIAN */
        }
        else if (c < 0x110000) {
            if (out+1 >= outend)
-	        return(-1);
+	        break;
            c -= 0x10000;
-#ifdef BIG_ENDIAN
+	    if (xmlLittleEndian) {
 		*out++ = 0xD800 | (c >> 10);
 		*out++ = 0xDC00 | (c & 0x03FF);
 	    } else {
 		tmp1 = 0xD800 | (c >> 10);
 		tmp = (unsigned char *) out;
 		*tmp = tmp1;
@@ -340,15 +379,15 @@ UTF8ToUTF16LE(unsigned char* outb, int outlen,
 		*tmp  = tmp2;
 		*(tmp + 1) = tmp2 >> 8;
 		out++;
-#else /* BIG_ENDIAN */
+	    }
            *out++ = 0xD800 | (c >> 10);
            *out++ = 0xDC00 | (c & 0x03FF);
 #endif /* BIG_ENDIAN */
        }
        else
-	    return(-1);
+	    break;
 	processed = in;
    }
-    return(out-outstart);
+    *outlen = out - outstart;
    *inlen = processed - in;
    return(0);
 }
 /**
@@ -369,18 +408,16 @@ UTF8ToUTF16LE(unsigned char* outb, int outlen,
 *     as the return value is positive, else unpredictiable.
 */
 int
-UTF16BEToUTF8(unsigned char* out, int outlen,
+UTF16BEToUTF8(unsigned char* out, int *outlen,
            const unsigned char* inb, int *inlenb)
 {
    unsigned char* outstart = out;
-    unsigned char* outend= out+outlen;
+    const unsigned char* processed = inb;
    unsigned char* outend = out + *outlen;
    unsigned short* in = (unsigned short*) inb;
    unsigned short* inend;
    unsigned int c, d, inlen;
 #ifdef BIG_ENDIAN
 #else /* BIG_ENDIAN */
    unsigned char *tmp;
 #endif /* BIG_ENDIAN */    
    int bits;
    if ((*inlenb % 2) == 1)
@@ -388,43 +425,46 @@ UTF16BEToUTF8(unsigned char* out, int outlen,
    inlen = *inlenb / 2;
    inend= in + inlen;
    while (in < inend) {
-#ifdef BIG_ENDIAN    
+	if (xmlLittleEndian) {
        c= *in++;
 #else
 	    tmp = (unsigned char *) in;
 	    c = *tmp++;
 	    c = c << 8;
 	    c = c | (unsigned int) *tmp;
 	    in++;
-#endif	
+	} else {
 	    c= *in++;
 	} 
        if ((c & 0xFC00) == 0xD800) {    /* surrogates */
 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
-	        (*inlenb) -= 2;
+		*outlen = out - outstart;
-		break;
+		*inlenb = processed - inb;
 	        return(-2);
 	    }
-
+	    if (xmlLittleEndian) {
 #ifdef BIG_ENDIAN
            d= *in++;
 #else
 		tmp = (unsigned char *) in;
 		d = *tmp++;
 		d = d << 8;
 		d = d | (unsigned int) *tmp;
 		in++;
-#endif	    
+	    } else {
 		d= *in++;
 	    }
            if ((d & 0xFC00) == 0xDC00) {
                c &= 0x03FF;
                c <<= 10;
                c |= d & 0x03FF;
                c += 0x10000;
            }
-            else 
+            else {
 		*outlen = out - outstart;
 		*inlenb = processed - inb;
 	        return(-2);
 	    }
        }
 	/* assertion: c is a single UTF-4 value */
        if (out >= outend) 
-	    return(-1);
+	    break;
        if      (c <    0x80) {  *out++=  c;                bits= -6; }
        else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
        else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
@@ -432,11 +472,14 @@ UTF16BEToUTF8(unsigned char* out, int outlen,
        for ( ; bits >= 0; bits-= 6) {
            if (out >= outend) 
-	        return(-1);
+	        break;
            *out++= ((c >> bits) & 0x3F) | 0x80;
        }
 	processed = (const unsigned char*) in;
    }
-    return(out-outstart);
+    *outlen = out - outstart;
    *inlenb = processed - inb;
    return(0);
 }
 /**
@@ -454,63 +497,63 @@ UTF16BEToUTF8(unsigned char* out, int outlen,
 *     if the transcoding failed. 
 */
 int
-UTF8ToUTF16BE(unsigned char* outb, int outlen,
+UTF8ToUTF16BE(unsigned char* outb, int *outlen,
            const unsigned char* in, int *inlen)
 {
    unsigned short* out = (unsigned short*) outb;
    const unsigned char* processed = in;
    unsigned short* outstart= out;
    unsigned short* outend;
    const unsigned char* inend= in+*inlen;
    unsigned int c, d, trailing;
 #ifdef BIG_ENDIAN
 #else
    unsigned char *tmp;
    unsigned short tmp1, tmp2;
 #endif /* BIG_ENDIAN */    
-    outlen /= 2; /* convert in short length */
+    outend = out + (*outlen / 2);
    outend = out + outlen;
    while (in < inend) {
      d= *in++;
      if      (d < 0x80)  { c= d; trailing= 0; }
-      else if (d < 0xC0)
+      else if (d < 0xC0)  {
-          return(-2);    /* trailing byte in leading position */
+          /* trailing byte in leading position */
-      else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
+	  *outlen = out - outstart;
 	  *inlen = processed - in;
 	  return(-2);
      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
      else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
      else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
-      else
+      else {
-          return(-2);    /* no chance for this in UTF-16 */
+          /* no chance for this in UTF-16 */
 	  *outlen = out - outstart;
 	  *inlen = processed - in;
 	  return(-2);
      }
      if (inend - in < trailing) {
          *inlen -= (inend - in);
          break;
      } 
      for ( ; trailing; trailing--) {
-          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  return(-1);
+          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
          c <<= 6;
          c |= d & 0x3F;
      }
      /* assertion: c is a single UTF-4 value */
        if (c < 0x10000) {
-            if (out >= outend)  return(-1);
+            if (out >= outend)  break;
-#ifdef BIG_ENDIAN
+	    if (xmlLittleEndian) {
            *out++ = c;
 #else
 		tmp = (unsigned char *) out;
 		*tmp = c >> 8;
 		*(tmp + 1) = c;
 		out++;
-#endif /* BIG_ENDIAN */
+	    } else {
 		*out++ = c;
 	    }
        }
        else if (c < 0x110000) {
-            if (out+1 >= outend)  return(-1);
+            if (out+1 >= outend)  break;
            c -= 0x10000;
-#ifdef BIG_ENDIAN
+	    if (xmlLittleEndian) {
            *out++ = 0xD800 | (c >> 10);
            *out++ = 0xDC00 | (c & 0x03FF);
 #else
 		tmp1 = 0xD800 | (c >> 10);
 		tmp = (unsigned char *) out;
 		*tmp = tmp1 >> 8;
@@ -522,11 +565,18 @@ UTF8ToUTF16BE(unsigned char* outb, int outlen,
 		*tmp = tmp2 >> 8;
 		*(tmp + 1) = tmp2;
 		out++;
-#endif
+	    } else {
 		*out++ = 0xD800 | (c >> 10);
 		*out++ = 0xDC00 | (c & 0x03FF);
 	    }
        else  return(-1);
        }
-    return(out-outstart);
+        else
 	    break;
 	processed = in;
    }
    *outlen = out - outstart;
    *inlen = processed - in;
    return(0);
 }
 /**
@@ -636,8 +686,12 @@ xmlParseCharEncoding(const char* name)
    if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
    if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
-    if (!strcmp(upper, "Shift_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
+    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
    if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
 #ifdef DEBUG_ENCODING
    fprintf(stderr, "Unknown encoding %s\n", name);
 #endif
    return(XML_CHAR_ENCODING_ERROR);
 }
@@ -712,6 +766,9 @@ xmlNewCharEncodingHandler(const char *name,
     * registers and returns the handler.
     */
    xmlRegisterCharEncodingHandler(handler);
 #ifdef DEBUG_ENCODING
    fprintf(stderr, "Registered encoding handler for %s\n", name);
 #endif
    return(handler);
 }
@@ -725,11 +782,18 @@ xmlNewCharEncodingHandler(const char *name,
 */
 void
 xmlInitCharEncodingHandlers(void) {
    unsigned short int tst = 0x1234;
    unsigned char *ptr = (unsigned char *) &tst; 
    if (handlers != NULL) return;
    handlers = (xmlCharEncodingHandlerPtr *)
        xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
    if (*ptr == 0x12) xmlLittleEndian = 0;
    else if (*ptr == 0x34) xmlLittleEndian = 1;
    else fprintf(stderr, "Odd problem at endianness detection\n");
    if (handlers == NULL) {
        fprintf(stderr, "xmlInitCharEncodingHandlers : out of memory !\n");
 	return;
@@ -755,6 +819,7 @@ xmlCleanupCharEncodingHandlers(void) {
    for (;nbCharEncodingHandler > 0;) {
        nbCharEncodingHandler--;
 	if (handlers[nbCharEncodingHandler] != NULL) {
 	    if (handlers[nbCharEncodingHandler]->name != NULL)
 		xmlFree(handlers[nbCharEncodingHandler]->name);
 	    xmlFree(handlers[nbCharEncodingHandler]);
 	}
@@ -798,6 +863,8 @@ xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
 */
 xmlCharEncodingHandlerPtr
 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
    xmlCharEncodingHandlerPtr handler;
    if (handlers == NULL) xmlInitCharEncodingHandlers();
    switch (enc) {
        case XML_CHAR_ENCODING_ERROR:
@@ -811,40 +878,68 @@ xmlGetCharEncodingHandler(xmlCharEncoding enc) {
        case XML_CHAR_ENCODING_UTF16BE:
 	    return(xmlUTF16BEHandler);
        case XML_CHAR_ENCODING_EBCDIC:
-	    return(NULL);
+            handler = xmlFindCharEncodingHandler("EBCDIC");
            if (handler != NULL) return(handler);
            handler = xmlFindCharEncodingHandler("ebcdic");
            if (handler != NULL) return(handler);
 	    break;
        case XML_CHAR_ENCODING_UCS4LE:
-	    return(NULL);
+            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
            if (handler != NULL) return(handler);
            handler = xmlFindCharEncodingHandler("UCS-4");
            if (handler != NULL) return(handler);
            handler = xmlFindCharEncodingHandler("UCS4");
            if (handler != NULL) return(handler);
 	    break;
        case XML_CHAR_ENCODING_UCS4BE:
-	    return(NULL);
+            handler = xmlFindCharEncodingHandler("UCS4BE");
            if (handler != NULL) return(handler);
 	    break;
        case XML_CHAR_ENCODING_UCS4_2143:
-	    return(NULL);
+	    break;
        case XML_CHAR_ENCODING_UCS4_3412:
-	    return(NULL);
+	    break;
        case XML_CHAR_ENCODING_UCS2:
-	    return(NULL);
+            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
            if (handler != NULL) return(handler);
            handler = xmlFindCharEncodingHandler("UCS-2");
            if (handler != NULL) return(handler);
            handler = xmlFindCharEncodingHandler("UCS2");
            if (handler != NULL) return(handler);
 	    break;
        case XML_CHAR_ENCODING_8859_1:
 	    return(NULL);
        case XML_CHAR_ENCODING_8859_2:
 	    return(NULL);
        case XML_CHAR_ENCODING_8859_3:
 	    return(NULL);
        case XML_CHAR_ENCODING_8859_4:
 	    return(NULL);
        case XML_CHAR_ENCODING_8859_5:
 	    return(NULL);
        case XML_CHAR_ENCODING_8859_6:
 	    return(NULL);
        case XML_CHAR_ENCODING_8859_7:
 	    return(NULL);
        case XML_CHAR_ENCODING_8859_8:
 	    return(NULL);
        case XML_CHAR_ENCODING_8859_9:
 	    return(NULL);
        case XML_CHAR_ENCODING_2022_JP:
            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
            if (handler != NULL) return(handler);
 	    break;
        case XML_CHAR_ENCODING_SHIFT_JIS:
            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
            if (handler != NULL) return(handler);
            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
            if (handler != NULL) return(handler);
            handler = xmlFindCharEncodingHandler("Shift_JIS");
            if (handler != NULL) return(handler);
 	    break;
        case XML_CHAR_ENCODING_EUC_JP:
-	    return(NULL);
+            handler = xmlFindCharEncodingHandler("EUC-JP");
            if (handler != NULL) return(handler);
 	    break;
 	default: 
 	    break;
    }
 #ifdef DEBUG_ENCODING
    fprintf(stderr, "No handler found for encoding %d\n", enc);
 #endif
    return(NULL);
 }
@@ -858,23 +953,306 @@ xmlGetCharEncodingHandler(xmlCharEncoding enc) {
 */
 xmlCharEncodingHandlerPtr
 xmlFindCharEncodingHandler(const char *name) {
-    char upper[500];
+#ifdef LIBXML_ICONV_ENABLED
    iconv_t icv_in, icv_out;
    xmlCharEncodingHandlerPtr enc;
 #endif /* LIBXML_ICONV_ENABLED */
    char upper[100];
    int i;
    if (handlers == NULL) xmlInitCharEncodingHandlers();
    if (name == NULL) return(xmlDefaultCharEncodingHandler);
    if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
-    for (i = 0;i < 499;i++) {
+    for (i = 0;i < 99;i++) {
        upper[i] = toupper(name[i]);
 	if (upper[i] == 0) break;
    }
    upper[i] = 0;
    for (i = 0;i < nbCharEncodingHandler; i++)
-        if (!strcmp(name, handlers[i]->name))
+        if (!strcmp(upper, handlers[i]->name)) {
 #ifdef DEBUG_ENCODING
            fprintf(stderr, "Found registered handler for encoding %s\n", name);
 #endif
 	    return(handlers[i]);
 	}
 #ifdef LIBXML_ICONV_ENABLED
    /* check whether iconv can handle this */
    icv_in = iconv_open("UTF-8", name);
    icv_out = iconv_open(name, "UTF-8");
    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
 	    enc = xmlMalloc(sizeof(xmlCharEncodingHandler));
 	    if (enc == NULL) {
 	        iconv_close(icv_in);
 	        iconv_close(icv_out);
 		return(NULL);
 	    }
 	    enc->name = NULL;
 	    enc->input = NULL;
 	    enc->output = NULL;
 	    enc->iconv_in = icv_in;
 	    enc->iconv_out = icv_out;
 #ifdef DEBUG_ENCODING
            fprintf(stderr, "Found iconv handler for encoding %s\n", name);
 #endif
 	    return enc;
    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
 	    fprintf(stderr, "iconv : problems with filters for '%s'\n", name);
    }
 #endif /* LIBXML_ICONV_ENABLED */
 #ifdef DEBUG_ENCODING
    fprintf(stderr, "No handler found for encoding %s\n", name);
 #endif
    return(NULL);
 }
 #ifdef LIBXML_ICONV_ENABLED
 /**
 * xmlIconvWrapper:
 * @cd:		iconv converter data structure
 * @out:  a pointer to an array of bytes to store the result
 * @outlen:  the length of @out
 * @in:  a pointer to an array of ISO Latin 1 chars
 * @inlen:  the length of @in
 *
 * Returns 0 if success, or 
 *     -1 by lack of space, or
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
 *        the result of transformation can't fit into the encoding we want), or
 *     -3 if there the last byte can't form a single output char.
 *     
 * The value of @inlen after return is the number of octets consumed
 *     as the return value is positive, else unpredictiable.
 * The value of @outlen after return is the number of ocetes consumed.
 */
 static int
 xmlIconvWrapper(iconv_t cd,
 	unsigned char *out, int *outlen,
 	const unsigned char *in, int *inlen) {
 	size_t icv_inlen = *inlen, icv_outlen = *outlen;
 	const char *icv_in = (const char *) in;
 	char *icv_out = (char *) out;
 	int ret;
 	ret = iconv(cd,
 		&icv_in, &icv_inlen,
 		&icv_out, &icv_outlen);
 	*inlen -= icv_inlen;
 	*outlen -= icv_outlen;
 	if (icv_inlen != 0 || ret == (size_t) -1) {
 #ifdef EILSEQ
 		if (errno == EILSEQ) {
 			return -2;
 		} else
 #endif
 #ifdef E2BIG
 		if (errno == E2BIG) {
 			return -1;
 		} else
 #endif
 #ifdef EINVAL
 		if (errno == EINVAL) {
 			return -3;
 		}
 #endif
 		else {
 			return -3;
 		}
 	}
 	return 0;
 }
 #endif /* LIBXML_ICONV_ENABLED */
 /**
 * xmlCharEncInFunc:
 * @handler:	char enconding transformation data structure
 * @out:  an xmlBuffer for the output.
 * @in:  an xmlBuffer for the input
 *     
 * Generic front-end for the encoding handler input function
 *     
 * Returns the number of byte written if success, or 
 *     -1 general error
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
 *        the result of transformation can't fit into the encoding we want), or
 */
 int
 xmlCharEncInFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
                 xmlBufferPtr in) {
    int ret = -2;
    int written;
    int toconv;
    if (handler == NULL) return(-1);
    if (out == NULL) return(-1);
    if (in == NULL) return(-1);
    written = out->size - out->use;
    toconv = in->use;
    if (toconv * 2 >= written) {
        xmlBufferGrow(out, toconv * 2);
 	written = out->size - out->use - 1;
    }
    if (handler->input != NULL) {
 	ret = handler->input(&out->content[out->use], &written,
 	                     in->content, &toconv);
 	xmlBufferShrink(in, toconv);
 	out->use += written;
 	out->content[out->use] = 0;
    }
 #ifdef LIBXML_ICONV_ENABLED
    else if (handler->iconv_in != NULL) {
 	ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
 	                      &written, in->content, &toconv);
 	xmlBufferShrink(in, toconv);
 	out->use += written;
 	out->content[out->use] = 0;
 	if (ret == -1) ret = -3;
    }
 #endif /* LIBXML_ICONV_ENABLED */
 #ifdef DEBUG_ENCODING
    switch (ret) {
        case 0:
 	    fprintf(stderr, "converted %d bytes to %d bytes of input\n",
 	            toconv, written);
 	    break;
        case -1:
 	    fprintf(stderr,"converted %d bytes to %d bytes of input, %d left\n",
 	            toconv, written, in->use);
 	    break;
        case -2:
 	    fprintf(stderr, "input conversion failed due to input error\n");
 	    break;
        case -3:
 	    fprintf(stderr,"converted %d bytes to %d bytes of input, %d left\n",
 	            toconv, written, in->use);
 	    break;
 	default:
 	    fprintf(stderr,"Unknown input conversion failed %d\n", ret);
    }
 #endif
    /*
     * Ignore when input buffer is not on a boundary
     */
    if (ret == -3) ret = 0;
    return(ret);
 }
 /**
 * xmlCharEncOutFunc:
 * @handler:	char enconding transformation data structure
 * @out:  an xmlBuffer for the output.
 * @in:  an xmlBuffer for the input
 *     
 * Generic front-end for the encoding handler output function
 *     
 * Returns the number of byte written if success, or 
 *     -1 general error
 *     -2 if the transcoding fails (for *in is not valid utf8 string or
 *        the result of transformation can't fit into the encoding we want), or
 */
 int
 xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
                  xmlBufferPtr in) {
    int ret = -2;
    int written;
    int toconv;
    if (handler == NULL) return(-1);
    if (out == NULL) return(-1);
    if (in == NULL) return(-1);
    written = out->size - out->use;
    toconv = in->use;
    if (toconv * 2 >= written) {
        xmlBufferGrow(out, toconv * 2);
 	written = out->size - out->use - 1;
    }
    if (handler->output != NULL) {
 	ret = handler->output(&out->content[out->use], &written,
 	                     in->content, &toconv);
 	xmlBufferShrink(in, toconv);
 	out->use += written;
 	out->content[out->use] = 0;
    }
 #ifdef LIBXML_ICONV_ENABLED
    else if (handler->iconv_out != NULL) {
 	ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
 	                      &written, in->content, &toconv);
 	xmlBufferShrink(in, toconv);
 	out->use += written;
 	out->content[out->use] = 0;
 	if (ret == -1) ret = -3;
    }
 #endif /* LIBXML_ICONV_ENABLED */
 #ifdef DEBUG_ENCODING
    switch (ret) {
        case 0:
 	    fprintf(stderr, "converted %d bytes to %d bytes of output\n",
 	            toconv, written);
 	    break;
        case -1:
 	    fprintf(stderr, "output conversion failed by lack of space\n");
 	    break;
        case -2:
 	    fprintf(stderr, "output conversion failed due to output error\n");
 	    break;
        case -3:
 	    fprintf(stderr,"converted %d bytes to %d bytes of output %d left\n",
 	            toconv, written, in->use);
 	    break;
 	default:
 	    fprintf(stderr,"Unknown output conversion failed %d\n", ret);
    }
 #endif
    return(ret);
 }
 /**
 * xmlCharEncCloseFunc:
 * @handler:	char enconding transformation data structure
 *     
 * Generic front-end for hencoding handler close function
 *
 * Returns 0 if success, or -1 in case of error
 */
 int
 xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
    int ret = 0;
    if (handler == NULL) return(-1);
    if (handler->name == NULL) return(-1);
 #ifdef LIBXML_ICONV_ENABLED
    /*
     * Iconv handlers can be oused only once, free the whole block.
     * and the associated icon resources.
     */
    if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
 	if (handler->name != NULL)
 	    xmlFree(handler->name);
 	handler->name = NULL;
 	if (handler->iconv_out != NULL) {
 	    if (iconv_close(handler->iconv_out))
 		ret = -1;
 	    handler->iconv_out = NULL;
 	}
 	if (handler->iconv_in != NULL) {
 	    if (iconv_close(handler->iconv_in))
 		ret = -1;
 	    handler->iconv_in = NULL;
 	}
 	xmlFree(handler);
    }
 #endif /* LIBXML_ICONV_ENABLED */
 #ifdef DEBUG_ENCODING
    if (ret)
        fprintf(stderr, "failed to close the encoding handler\n");
    else
        fprintf(stderr, "closed the encoding handler\n");
 #endif
    return(ret);
 }
--- a/encoding.h
+++ b/encoding.h
@@ -22,12 +22,30 @@
 #define __XML_CHAR_ENCODING_H__
 #include <libxml/xmlversion.h>
 #ifdef LIBXML_ICONV_ENABLED
 #include <iconv.h>
 #endif
 #include <libxml/tree.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 /**
 * Predefined values for some standard encodings
 * Libxml don't do beforehand translation on UTF8, ISOLatinX
 * It also support UTF16 (LE and BE) by default.
 *
 * Anything else would have to be translated to UTF8 before being
 * given to the parser itself. The BOM for UTF16 and the encoding
 * declaration are looked at and a converter is looked for at that
 * point. If not found the parser stops here as asked by the XML REC
 * Converter can be registered by the user using xmlRegisterCharEncodingHandler
 * but the currentl form doesn't allow stateful transcoding (a serious
 * problem agreed !). If iconv has been found it will be used
 * automatically and allow stateful transcoding, the simplest is then
 * to be sure to enable icon and to provide iconv libs for the encoding
 * support needed.
 */
 typedef enum {
    XML_CHAR_ENCODING_ERROR=   -1, /* No char encoding detected */
@@ -65,9 +83,13 @@ typedef enum {
 * Take a block of chars in the original encoding and try to convert
 * it to an UTF-8 block of chars out.
 *
- * Returns the number of byte written, or -1 by lack of space.
+ * Returns the number of byte written, or -1 by lack of space, or -2
 *     if the transcoding failed.
 * The value of @inlen after return is the number of octets consumed
 *     as the return value is positive, else unpredictiable.
 * The value of @outlen after return is the number of ocetes consumed.
 */
-typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
+typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int *outlen,
                                         const unsigned char* in, int *inlen);
@@ -83,12 +105,17 @@ typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
 *
 * Returns the number of byte written, or -1 by lack of space, or -2
 *     if the transcoding failed.
 * The value of @inlen after return is the number of octets consumed
 *     as the return value is positive, else unpredictiable.
 * The value of @outlen after return is the number of ocetes consumed.
 */
-typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int outlen,
+typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int *outlen,
                                          const unsigned char* in, int *inlen);
 /*
 * Block defining the handlers for non UTF-8 encodings.
 * If iconv is supported, there is two extra fields 
 */
 typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
@@ -97,6 +124,10 @@ struct _xmlCharEncodingHandler {
    char                       *name;
    xmlCharEncodingInputFunc   input;
    xmlCharEncodingOutputFunc  output;
 #ifdef LIBXML_ICONV_ENABLED
    iconv_t                    iconv_in;
    iconv_t                    iconv_out;
 #endif /* LIBXML_ICONV_ENABLED */
 };
 void	xmlInitCharEncodingHandlers	(void);
@@ -109,6 +140,14 @@ xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc);
 xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name);
 int	xmlCheckUTF8			(const unsigned char *utf);
 int	xmlCharEncOutFunc		(xmlCharEncodingHandler *handler,
 					 xmlBufferPtr out,
 					 xmlBufferPtr in);
 int	xmlCharEncInFunc		(xmlCharEncodingHandler *handler,
 					 xmlBufferPtr out,
 					 xmlBufferPtr in);
 int	xmlCharEncCloseFunc		(xmlCharEncodingHandler *handler);
 #ifdef __cplusplus
 }
--- a/include/libxml/encoding.h
+++ b/include/libxml/encoding.h
@@ -22,12 +22,30 @@
 #define __XML_CHAR_ENCODING_H__
 #include <libxml/xmlversion.h>
 #ifdef LIBXML_ICONV_ENABLED
 #include <iconv.h>
 #endif
 #include <libxml/tree.h>
 #ifdef __cplusplus
 extern "C" {
 #endif
 /**
 * Predefined values for some standard encodings
 * Libxml don't do beforehand translation on UTF8, ISOLatinX
 * It also support UTF16 (LE and BE) by default.
 *
 * Anything else would have to be translated to UTF8 before being
 * given to the parser itself. The BOM for UTF16 and the encoding
 * declaration are looked at and a converter is looked for at that
 * point. If not found the parser stops here as asked by the XML REC
 * Converter can be registered by the user using xmlRegisterCharEncodingHandler
 * but the currentl form doesn't allow stateful transcoding (a serious
 * problem agreed !). If iconv has been found it will be used
 * automatically and allow stateful transcoding, the simplest is then
 * to be sure to enable icon and to provide iconv libs for the encoding
 * support needed.
 */
 typedef enum {
    XML_CHAR_ENCODING_ERROR=   -1, /* No char encoding detected */
@@ -65,9 +83,13 @@ typedef enum {
 * Take a block of chars in the original encoding and try to convert
 * it to an UTF-8 block of chars out.
 *
- * Returns the number of byte written, or -1 by lack of space.
+ * Returns the number of byte written, or -1 by lack of space, or -2
 *     if the transcoding failed.
 * The value of @inlen after return is the number of octets consumed
 *     as the return value is positive, else unpredictiable.
 * The value of @outlen after return is the number of ocetes consumed.
 */
-typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
+typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int *outlen,
                                         const unsigned char* in, int *inlen);
@@ -83,12 +105,17 @@ typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
 *
 * Returns the number of byte written, or -1 by lack of space, or -2
 *     if the transcoding failed.
 * The value of @inlen after return is the number of octets consumed
 *     as the return value is positive, else unpredictiable.
 * The value of @outlen after return is the number of ocetes consumed.
 */
-typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int outlen,
+typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int *outlen,
                                          const unsigned char* in, int *inlen);
 /*
 * Block defining the handlers for non UTF-8 encodings.
 * If iconv is supported, there is two extra fields 
 */
 typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
@@ -97,6 +124,10 @@ struct _xmlCharEncodingHandler {
    char                       *name;
    xmlCharEncodingInputFunc   input;
    xmlCharEncodingOutputFunc  output;
 #ifdef LIBXML_ICONV_ENABLED
    iconv_t                    iconv_in;
    iconv_t                    iconv_out;
 #endif /* LIBXML_ICONV_ENABLED */
 };
 void	xmlInitCharEncodingHandlers	(void);
@@ -109,6 +140,14 @@ xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc);
 xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name);
 int	xmlCheckUTF8			(const unsigned char *utf);
 int	xmlCharEncOutFunc		(xmlCharEncodingHandler *handler,
 					 xmlBufferPtr out,
 					 xmlBufferPtr in);
 int	xmlCharEncInFunc		(xmlCharEncodingHandler *handler,
 					 xmlBufferPtr out,
 					 xmlBufferPtr in);
 int	xmlCharEncCloseFunc		(xmlCharEncodingHandler *handler);
 #ifdef __cplusplus
 }
--- a/include/libxml/parserInternals.h
+++ b/include/libxml/parserInternals.h
@@ -28,10 +28,10 @@ extern "C" {
 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
 */
 #define IS_CHAR(c)							\
-    ((((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||		\
+    (((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||			\
-      (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) &&		\
+     (((c) >= 0x20) && ((c) <= 0xD7FF)) ||				\
-      (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) >= 0) &&		\
+     (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||				\
-      ((c) <= 0x10FFFF))
+     (((c) >= 0x10000) && ((c) <= 0x10FFFF)))
 /*
 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
@@ -442,8 +442,10 @@ xmlParserCtxtPtr	xmlNewParserCtxt	(void);
 xmlParserCtxtPtr	xmlCreateEntityParserCtxt(const xmlChar *URL,
 						 const xmlChar *ID,
 						 const xmlChar *base);
-void			xmlSwitchEncoding	(xmlParserCtxtPtr ctxt,
+int			xmlSwitchEncoding	(xmlParserCtxtPtr ctxt,
 						 xmlCharEncoding enc);
 int			xmlSwitchToEncoding	(xmlParserCtxtPtr ctxt,
 					     xmlCharEncodingHandlerPtr handler);
 void			xmlFreeParserCtxt	(xmlParserCtxtPtr ctxt);
 /**
--- a/include/libxml/tree.h
+++ b/include/libxml/tree.h
@@ -380,6 +380,8 @@ void		xmlBufferCCat		(xmlBufferPtr buf,
 					 const char *str);
 int		xmlBufferShrink		(xmlBufferPtr buf,
 					 int len);
 int		xmlBufferGrow		(xmlBufferPtr buf,
 					 int len);
 void		xmlBufferEmpty		(xmlBufferPtr buf);
 const xmlChar*	xmlBufferContent	(const xmlBufferPtr buf);
 int		xmlBufferUse		(const xmlBufferPtr buf);
--- a/include/libxml/xmlIO.h
+++ b/include/libxml/xmlIO.h
@@ -33,6 +33,7 @@ struct _xmlParserInputBuffer {
    xmlCharEncodingHandlerPtr encoder; /* I18N conversions to UTF-8 */
    xmlBufferPtr buffer;    /* Local buffer encoded in  UTF-8 */
    xmlBufferPtr raw;       /* if encoder != NULL buffer for raw input */
 };
--- a/parser.c
+++ b/parser.c
@@ -41,6 +41,7 @@
 #include <libxml/valid.h>
 #include <libxml/parserInternals.h>
 #include <libxml/xmlIO.h>
 #include <libxml/uri.h>
 #include "xml-error.h"
 #define XML_PARSER_BIG_BUFFER_SIZE 1000
@@ -483,7 +484,7 @@ xmlNextChar(xmlParserCtxtPtr ctxt) {
 			    if ((ctxt->sax != NULL) &&
 				(ctxt->sax->error != NULL))
 				ctxt->sax->error(ctxt->userData, 
-				 "Char out of allowed range\n");
+				 "Char 0x%X out of allowed range\n", val);
 			    ctxt->errNo = XML_ERR_INVALID_ENCODING;
 			    ctxt->wellFormed = 0;
 			    ctxt->disableSAX = 1;
@@ -612,7 +613,7 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
 		if ((ctxt->sax != NULL) &&
 		    (ctxt->sax->error != NULL))
 		    ctxt->sax->error(ctxt->userData, 
-				     "Char out of allowed range\n");
+				     "Char 0x%X out of allowed range\n", val);
 		ctxt->errNo = XML_ERR_INVALID_ENCODING;
 		ctxt->wellFormed = 0;
 		ctxt->disableSAX = 1;
@@ -727,7 +728,7 @@ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
 		if ((ctxt->sax != NULL) &&
 		    (ctxt->sax->error != NULL))
 		    ctxt->sax->error(ctxt->userData, 
-				     "Char out of allowed range\n");
+				     "Char 0x%X out of allowed range\n", val);
 		ctxt->errNo = XML_ERR_INVALID_ENCODING;
 		ctxt->wellFormed = 0;
 		ctxt->disableSAX = 1;
@@ -2278,155 +2279,14 @@ xmlCheckLanguageID(const xmlChar *lang) {
 *
 * change the input functions when discovering the character encoding
 * of a given entity.
 *
 * Returns 0 in case of success, -1 otherwise
 */
-void
+int
 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
 {
    xmlCharEncodingHandlerPtr handler;
    handler = xmlGetCharEncodingHandler(enc);
    if (handler != NULL) {
        if (ctxt->input != NULL) {
 	    if (ctxt->input->buf != NULL) {
 	        if (ctxt->input->buf->encoder != NULL) {
 		    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 			ctxt->sax->error(ctxt->userData,
 			     "xmlSwitchEncoding : encoder already regitered\n");
 		    return;
 		}
 		ctxt->input->buf->encoder = handler;
 	        /*
 		 * Is there already some content down the pipe to convert
 		 */
 	        if ((ctxt->input->buf->buffer != NULL) &&
 		    (ctxt->input->buf->buffer->use > 0)) {
 		    xmlChar *buf;
 		    int res, len, size;
 		    int processed;
 		    /*
 		     * Specific handling of the Byte Order Mark for 
 		     * UTF-16
 		     */
 		    if ((enc == XML_CHAR_ENCODING_UTF16LE) && 
 		        (ctxt->input->cur[0] == 0xFF) &&
 		        (ctxt->input->cur[1] == 0xFE)) {
 			SKIP(2);
 		    }
 		    if ((enc == XML_CHAR_ENCODING_UTF16BE) && 
 		        (ctxt->input->cur[0] == 0xFE) &&
 		        (ctxt->input->cur[1] == 0xFF)) {
 			SKIP(2);
 		    }
 		    /*
 		     * convert the non processed part
 		     */
 		    processed = ctxt->input->cur - ctxt->input->base;
                    len = ctxt->input->buf->buffer->use - processed;
 		    if (len <= 0) {
 		        return;
 		    }
 		    size = ctxt->input->buf->buffer->use * 4;
 		    if (size < 4000)
 		        size = 4000;
 retry_larger:			
 		    buf = (xmlChar *) xmlMalloc(size + 1);
 		    if (buf == NULL) {
 			if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 			    ctxt->sax->error(ctxt->userData,
 				 "xmlSwitchEncoding : out of memory\n");
 		        return;
 		    }
 		    /* TODO !!! Handling of buf too small */
 		    res = handler->input(buf, size, ctxt->input->cur, &len);
 		    if (res == -1) {
 		        size *= 2;
 			xmlFree(buf);
 			goto retry_larger;
 		    }
 		    if ((res < 0) ||
 		        (len != ctxt->input->buf->buffer->use - processed)) {
 			if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 			    ctxt->sax->error(ctxt->userData,
 				 "xmlSwitchEncoding : conversion failed\n");
                        xmlFree(buf);
 		        return;
 		    }
 		    /*
 		     * Conversion succeeded, get rid of the old buffer
 		     */
 		    xmlFree(ctxt->input->buf->buffer->content);
 		    ctxt->input->buf->buffer->content = buf;
 		    ctxt->input->base = buf;
 		    ctxt->input->cur = buf;
 		    ctxt->input->buf->buffer->size = size;
 		    ctxt->input->buf->buffer->use = res;
                    buf[res] = 0;
 		}
 		return;
 	    } else {
 	        if (ctxt->input->length == 0) {
 		    /*
 		     * When parsing a static memory array one must know the
 		     * size to be able to convert the buffer.
 		     */
 		    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 			ctxt->sax->error(ctxt->userData,
 					 "xmlSwitchEncoding : no input\n");
 		    return;
 		} else {
 		    xmlChar *buf;
 		    int res, len;
 		    int processed = ctxt->input->cur - ctxt->input->base;
 		    /*
 		     * convert the non processed part
 		     */
                    len = ctxt->input->length - processed;
 		    if (len <= 0) {
 			if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 			    ctxt->sax->error(ctxt->userData,
 				 "xmlSwitchEncoding : input fully consumed?\n");
 		        return;
 		    }
 		    buf = (xmlChar *) xmlMalloc(ctxt->input->length * 4);
 		    if (buf == NULL) {
 			if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 			    ctxt->sax->error(ctxt->userData,
 				 "xmlSwitchEncoding : out of memory\n");
 		        return;
 		    }
 		    res = handler->input(buf, ctxt->input->length * 4,
 		                         ctxt->input->cur, &len);
 		    if ((res < 0) ||
 		        (len != ctxt->input->length - processed)) {
 			if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 			    ctxt->sax->error(ctxt->userData,
 				 "xmlSwitchEncoding : conversion failed\n");
                        xmlFree(buf);
 		        return;
 		    }
 		    /*
 		     * Conversion succeeded, get rid of the old buffer
 		     */
 		    if ((ctxt->input->free != NULL) &&
 		        (ctxt->input->base != NULL))
 			ctxt->input->free((xmlChar *) ctxt->input->base);
 		    ctxt->input->base = ctxt->input->cur = buf;
 		    ctxt->input->length = res;
 		}
 	    }
 	} else {
 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 	        ctxt->sax->error(ctxt->userData,
 		                 "xmlSwitchEncoding : no input\n");
 	}
    }
    switch (enc) {
 	case XML_CHAR_ENCODING_ERROR:
 	    ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
@@ -2437,21 +2297,35 @@ retry_larger:
 	    break;
 	case XML_CHAR_ENCODING_NONE:
 	    /* let's assume it's UTF-8 without the XML decl */
-            return;
+	    return(0);
 	case XML_CHAR_ENCODING_UTF8:
 	    /* default encoding, no conversion should be needed */
-            return;
+	    return(0);
-        case XML_CHAR_ENCODING_UTF16LE:
+	default:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+	    break;
    }
    handler = xmlGetCharEncodingHandler(enc);
    if (handler == NULL) {
 	/*
 	 * Default handlers.
 	 */
 	switch (enc) {
 	    case XML_CHAR_ENCODING_ERROR:
 		ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
+		    ctxt->sax->error(ctxt->userData, "encoding unknown\n");
-		  "char encoding UTF16 little endian not supported\n");
+		ctxt->wellFormed = 0;
 		ctxt->disableSAX = 1;
 		break;
 	    case XML_CHAR_ENCODING_NONE:
 		/* let's assume it's UTF-8 without the XML decl */
 		return(0);
 	    case XML_CHAR_ENCODING_UTF8:
 		/* default encoding, no conversion should be needed */
 		return(0);
 	    case XML_CHAR_ENCODING_UTF16LE:
 		break;
 	    case XML_CHAR_ENCODING_UTF16BE:
 	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                ctxt->sax->error(ctxt->userData,
 		  "char encoding UTF16 big endian not supported\n");
 		break;
 	    case XML_CHAR_ENCODING_UCS4LE:
 		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
@@ -2490,59 +2364,23 @@ retry_larger:
 		      "char encoding UCS2 not supported\n");
 		break;
 	    case XML_CHAR_ENCODING_8859_1:
 	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                ctxt->sax->error(ctxt->userData,
 		  "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
            break;
 	    case XML_CHAR_ENCODING_8859_2:
 	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                ctxt->sax->error(ctxt->userData,
 		  "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
            break;
 	    case XML_CHAR_ENCODING_8859_3:
 	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                ctxt->sax->error(ctxt->userData,
 		  "char encoding ISO_8859_3 not supported\n");
            break;
 	    case XML_CHAR_ENCODING_8859_4:
 	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                ctxt->sax->error(ctxt->userData,
 		  "char encoding ISO_8859_4 not supported\n");
            break;
 	    case XML_CHAR_ENCODING_8859_5:
 	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                ctxt->sax->error(ctxt->userData,
 		  "char encoding ISO_8859_5 not supported\n");
            break;
 	    case XML_CHAR_ENCODING_8859_6:
 	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                ctxt->sax->error(ctxt->userData,
 		  "char encoding ISO_8859_6 not supported\n");
            break;
 	    case XML_CHAR_ENCODING_8859_7:
 	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                ctxt->sax->error(ctxt->userData,
 		  "char encoding ISO_8859_7 not supported\n");
            break;
 	    case XML_CHAR_ENCODING_8859_8:
 	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
                ctxt->sax->error(ctxt->userData,
 		  "char encoding ISO_8859_8 not supported\n");
            break;
 	    case XML_CHAR_ENCODING_8859_9:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		/*
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		 * Keep the internal content in the document encoding
-                ctxt->sax->error(ctxt->userData,
+		 */
-		  "char encoding ISO_8859_9 not supported\n");
+		if ((ctxt->inputNr == 1) &&
-            break;
+		    (ctxt->encoding == NULL) &&
 		    (ctxt->input->encoding != NULL)) {
 		    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
 		}
 		return(0);
 	    case XML_CHAR_ENCODING_2022_JP:
 		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
 		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
@@ -2563,6 +2401,150 @@ retry_larger:
 		break;
 	}
    }
    if (handler == NULL)
 	return(-1);
    return(xmlSwitchToEncoding(ctxt, handler));
 }
 /**
 * xmlSwitchToEncoding:
 * @ctxt:  the parser context
 * @handler:  the encoding handler
 *
 * change the input functions when discovering the character encoding
 * of a given entity.
 *
 * Returns 0 in case of success, -1 otherwise
 */
 int
 xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 
 {
    int nbchars;
    if (handler != NULL) {
        if (ctxt->input != NULL) {
 	    if (ctxt->input->buf != NULL) {
 	        if (ctxt->input->buf->encoder != NULL) {
 		    if (ctxt->input->buf->encoder == handler)
 			return(0);
 		    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 			ctxt->sax->error(ctxt->userData,
 			     "xmlSwitchEncoding : encoder already regitered\n");
 		    return(-1);
 		}
 		ctxt->input->buf->encoder = handler;
 	        /*
 		 * Is there already some content down the pipe to convert ?
 		 */
 	        if ((ctxt->input->buf->buffer != NULL) &&
 		    (ctxt->input->buf->buffer->use > 0)) {
 		    int processed;
 		    /*
 		     * Specific handling of the Byte Order Mark for 
 		     * UTF-16
 		     */
 		    if ((handler->name != NULL) &&
 			(!strcmp(handler->name, "UTF-16LE")) && 
 		        (ctxt->input->cur[0] == 0xFF) &&
 		        (ctxt->input->cur[1] == 0xFE)) {
 			ctxt->input->cur += 2;
 		    }
 		    if ((handler->name != NULL) &&
 			(!strcmp(handler->name, "UTF-16BE")) && 
 		        (ctxt->input->cur[0] == 0xFE) &&
 		        (ctxt->input->cur[1] == 0xFF)) {
 			ctxt->input->cur += 2;
 		    }
 		    /*
 		     * Shring the current input buffer.
 		     * Move it as the raw buffer and create a new input buffer
 		     */
 		    processed = ctxt->input->cur - ctxt->input->base;
 		    xmlBufferShrink(ctxt->input->buf->buffer, processed);
 		    ctxt->input->buf->raw = ctxt->input->buf->buffer;
 		    ctxt->input->buf->buffer = xmlBufferCreate();
 		    /*
 		     * convert as much as possible of the raw input
 		     * to the parser reading buffer.
 		     */
 		    nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
 		                               ctxt->input->buf->buffer,
 					       ctxt->input->buf->raw);
 		    if (nbchars < 0) {
 			fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
 			return(-1);
 		    }
 		    ctxt->input->base =
 		    ctxt->input->cur = ctxt->input->buf->buffer->content;
 		}
 		return(0);
 	    } else {
 	        if (ctxt->input->length == 0) {
 		    /*
 		     * When parsing a static memory array one must know the
 		     * size to be able to convert the buffer.
 		     */
 		    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 			ctxt->sax->error(ctxt->userData,
 					 "xmlSwitchEncoding : no input\n");
 		    return(-1);
 		} else {
 		    int processed;
 		    /*
 		     * Shring the current input buffer.
 		     * Move it as the raw buffer and create a new input buffer
 		     */
 		    processed = ctxt->input->cur - ctxt->input->base;
 		    ctxt->input->buf->raw = xmlBufferCreate();
 		    xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
 		                 ctxt->input->length - processed);
 		    ctxt->input->buf->buffer = xmlBufferCreate();
 		    /*
 		     * convert as much as possible of the raw input
 		     * to the parser reading buffer.
 		     */
 		    nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
 		                               ctxt->input->buf->buffer,
 					       ctxt->input->buf->raw);
 		    if (nbchars < 0) {
 			fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
 			return(-1);
 		    }
 		    /*
 		     * Conversion succeeded, get rid of the old buffer
 		     */
 		    if ((ctxt->input->free != NULL) &&
 		        (ctxt->input->base != NULL))
 			ctxt->input->free((xmlChar *) ctxt->input->base);
 		    ctxt->input->base =
 		    ctxt->input->cur = ctxt->input->buf->buffer->content;
 		}
 	    }
 	} else {
 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 	        ctxt->sax->error(ctxt->userData,
 		                 "xmlSwitchEncoding : no input\n");
 	    return(-1);
 	}
 	/*
 	 * The parsing is now done in UTF8 natively
 	 */
 	if (ctxt->encoding != NULL) {
 	    xmlFree((xmlChar *) ctxt->encoding);
 	    ctxt->encoding = NULL;
 	}
    } else 
 	return(-1);
    return(0);
 }
 /************************************************************************
 *									*
@@ -4253,7 +4235,7 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
 void
 xmlParseComment(xmlParserCtxtPtr ctxt) {
    xmlChar *buf = NULL;
-    int len = 0;
+    int len;
    int size = XML_PARSER_BUFFER_SIZE;
    int q, ql;
    int r, rl;
@@ -4282,10 +4264,11 @@ xmlParseComment(xmlParserCtxtPtr ctxt) {
    r = CUR_CHAR(rl);
    NEXTL(rl);
    cur = CUR_CHAR(l);
    len = 0;
    while (IS_CHAR(cur) &&
           ((cur != '>') ||
 	    (r != '-') || (q != '-'))) {
-	if ((r == '-') && (q == '-')) {
+	if ((r == '-') && (q == '-') && (len > 1)) {
 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 	        ctxt->sax->error(ctxt->userData,
 	       "Comment must not contain '--' (double-hyphen)`\n");
@@ -4732,12 +4715,37 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
 		    ctxt->disableSAX = 1;
 		}
 		if (URI) {
 		    xmlURIPtr uri;
 		    uri = xmlParseURI((const char *) URI);
 		    if (uri == NULL) {
 			if ((ctxt->sax != NULL) &&
-			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
+			    (!ctxt->disableSAX) &&
 			    (ctxt->sax->error != NULL))
 			    ctxt->sax->error(ctxt->userData,
 				        "Invalid URI: %s\n", URI);
 			ctxt->wellFormed = 0;
 			ctxt->errNo = XML_ERR_INVALID_URI;
 		    } else {
 			if (uri->fragment != NULL) {
 			    if ((ctxt->sax != NULL) &&
 				(!ctxt->disableSAX) &&
 				(ctxt->sax->error != NULL))
 				ctxt->sax->error(ctxt->userData,
 					    "Fragment not allowed: %s\n", URI);
 			    ctxt->wellFormed = 0;
 			    ctxt->errNo = XML_ERR_URI_FRAGMENT;
 			} else {
 			    if ((ctxt->sax != NULL) &&
 				(!ctxt->disableSAX) &&
 				(ctxt->sax->entityDecl != NULL))
 				ctxt->sax->entityDecl(ctxt->userData, name,
 					    XML_EXTERNAL_PARAMETER_ENTITY,
 					    literal, URI, NULL);
 			}
 			xmlFreeURI(uri);
 		    }
 		}
 	    }
 	} else {
 	    if ((RAW == '"') || (RAW == '\'')) {
@@ -4757,6 +4765,31 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
 		    ctxt->wellFormed = 0;
 		    ctxt->disableSAX = 1;
 		}
 		if (URI) {
 		    xmlURIPtr uri;
 		    uri = xmlParseURI((const char *)URI);
 		    if (uri == NULL) {
 			if ((ctxt->sax != NULL) &&
 			    (!ctxt->disableSAX) &&
 			    (ctxt->sax->error != NULL))
 			    ctxt->sax->error(ctxt->userData,
 				        "Invalid URI: %s\n", URI);
 			ctxt->wellFormed = 0;
 			ctxt->errNo = XML_ERR_INVALID_URI;
 		    } else {
 			if (uri->fragment != NULL) {
 			    if ((ctxt->sax != NULL) &&
 				(!ctxt->disableSAX) &&
 				(ctxt->sax->error != NULL))
 				ctxt->sax->error(ctxt->userData,
 					    "Fragment not allowed: %s\n", URI);
 			    ctxt->wellFormed = 0;
 			    ctxt->errNo = XML_ERR_URI_FRAGMENT;
 			}
 			xmlFreeURI(uri);
 		    }
 		}
 		if ((RAW != '>') && (!IS_BLANK(CUR))) {
 		    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 			ctxt->sax->error(ctxt->userData,
@@ -5973,7 +6006,20 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
    /*
     * We know that '<?xml' is here.
     */
    if ((RAW == '<') && (NXT(1) == '?') &&
 	(NXT(2) == 'x') && (NXT(3) == 'm') &&
 	(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
 	SKIP(5);
    } else {
 	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 	    ctxt->sax->error(ctxt->userData,
 	                     "Text declaration '<?xml' required\n");
 	ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
 	ctxt->wellFormed = 0;
 	ctxt->disableSAX = 1;
 	return;
    }
    if (!IS_BLANK(CUR)) {
 	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
@@ -6003,7 +6049,13 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
 	ctxt->wellFormed = 0;
 	ctxt->disableSAX = 1;
    }
-    ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
+    xmlParseEncodingDecl(ctxt);
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
 	/*
 	 * The XML REC instructs us to stop parsing right here
 	 */
        return;
    }
    SKIP_BLANKS;
    if ((RAW == '?') && (NXT(1) == '>')) {
@@ -6192,6 +6244,13 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
        (NXT(2) == 'x') && (NXT(3) == 'm') &&
 	(NXT(4) == 'l')) {
 	xmlParseTextDecl(ctxt);
 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
 	    /*
 	     * The XML REC instructs us to stop parsing right here
 	     */
 	    ctxt->instate = XML_PARSER_EOF;
 	    return;
 	}
    }
    if (ctxt->myDoc == NULL) {
        ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
@@ -6441,6 +6500,13 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
 		    (NXT(2) == 'x') && (NXT(3) == 'm') &&
 		    (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
 		    xmlParseTextDecl(ctxt);
 		    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
 			/*
 			 * The XML REC instructs us to stop parsing right here
 			 */
 			ctxt->instate = XML_PARSER_EOF;
 			return;
 		    }
 		    if (input->standalone) {
 			if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 			    ctxt->sax->error(ctxt->userData,
@@ -6947,6 +7013,15 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt) {
 			    (NXT(2) == 'x') && (NXT(3) == 'm') &&
 			    (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
 			    xmlParseTextDecl(ctxt);
 			    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
 				/*
 				 * The XML REC instructs us to stop parsing
 				 * right here
 				 */
 				ctxt->instate = XML_PARSER_EOF;
 				xmlFree(name);
 				return;
 			    }
 			}
 			if (ctxt->token == 0)
 			    ctxt->token = ' ';
@@ -8197,6 +8272,38 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
 	    ctxt->disableSAX = 1;
 	    ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
 	}
 	if (encoding != NULL) {
 	    xmlCharEncoding enc;
 	    xmlCharEncodingHandlerPtr handler;
 	    if (ctxt->input->encoding != NULL)
 		xmlFree((xmlChar *) ctxt->input->encoding);
 	    ctxt->input->encoding = encoding;
 	    enc = xmlParseCharEncoding((const char *) encoding);
 	    /*
 	     * registered set of known encodings
 	     */
 	    if (enc != XML_CHAR_ENCODING_ERROR) {
 		xmlSwitchEncoding(ctxt, enc);
 		if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
 		    xmlFree(encoding);
 		    return(NULL);
 		}
 	    } else {
 	        /*
 		 * fallback for unknown encodings
 		 */
                handler = xmlFindCharEncodingHandler((const char *) encoding);
 		if (handler != NULL) {
 		    xmlSwitchToEncoding(ctxt, handler);
 		} else {
 		    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
 		    xmlFree(encoding);
 		    return(NULL);
 		}
 	    }
 	}
    }
    return(encoding);
 }
@@ -8362,7 +8469,13 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
 	ctxt->wellFormed = 0;
 	ctxt->disableSAX = 1;
    }
-    ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
+    xmlParseEncodingDecl(ctxt);
    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
 	/*
 	 * The XML REC instructs us to stop parsing right here
 	 */
        return;
    }
    /*
     * We may have the standalone status.
@@ -8489,12 +8602,19 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
    if ((RAW == '<') && (NXT(1) == '?') &&
        (NXT(2) == 'x') && (NXT(3) == 'm') &&
 	(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
 	/*
 	 * Note that we will switch encoding on the fly.
 	 */
 	xmlParseXMLDecl(ctxt);
 	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
 	    /*
 	     * The XML REC instructs us to stop parsing right here
 	     */
 	    return(-1);
 	}
 	ctxt->standalone = ctxt->input->standalone;
 	SKIP_BLANKS;
 	if ((ctxt->encoding == NULL) && (ctxt->input->encoding != NULL))
 	    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
    } else {
 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
    }
@@ -8581,14 +8701,6 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
 	(!ctxt->disableSAX))
        ctxt->sax->endDocument(ctxt->userData);
    /*
     * Grab the encoding if it was added on-the-fly
     */
    if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
 	(ctxt->myDoc->encoding == NULL)) {
 	ctxt->myDoc->encoding = ctxt->encoding;
 	ctxt->encoding = NULL;
    }
    if (! ctxt->wellFormed) return(-1);
    return(0);
 }
@@ -8805,6 +8917,14 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
 			fprintf(stderr, "PP: Parsing XML Decl\n");
 #endif
 			xmlParseXMLDecl(ctxt);
 			if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
 			    /*
 			     * The XML REC instructs us to stop parsing right
 			     * here
 			     */
 			    ctxt->instate = XML_PARSER_EOF;
 			    return(0);
 			}
 			ctxt->standalone = ctxt->input->standalone;
 			if ((ctxt->encoding == NULL) &&
 			    (ctxt->input->encoding != NULL))
--- a/parserInternals.h
+++ b/parserInternals.h
@@ -28,10 +28,10 @@ extern "C" {
 * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
 */
 #define IS_CHAR(c)							\
-    ((((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||		\
+    (((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||			\
-      (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) &&		\
+     (((c) >= 0x20) && ((c) <= 0xD7FF)) ||				\
-      (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) >= 0) &&		\
+     (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||				\
-      ((c) <= 0x10FFFF))
+     (((c) >= 0x10000) && ((c) <= 0x10FFFF)))
 /*
 * [3] S ::= (#x20 | #x9 | #xD | #xA)+
@@ -442,8 +442,10 @@ xmlParserCtxtPtr	xmlNewParserCtxt	(void);
 xmlParserCtxtPtr	xmlCreateEntityParserCtxt(const xmlChar *URL,
 						 const xmlChar *ID,
 						 const xmlChar *base);
-void			xmlSwitchEncoding	(xmlParserCtxtPtr ctxt,
+int			xmlSwitchEncoding	(xmlParserCtxtPtr ctxt,
 						 xmlCharEncoding enc);
 int			xmlSwitchToEncoding	(xmlParserCtxtPtr ctxt,
 					     xmlCharEncodingHandlerPtr handler);
 void			xmlFreeParserCtxt	(xmlParserCtxtPtr ctxt);
 /**
--- a/tree.c
+++ b/tree.c
@@ -3771,6 +3771,31 @@ xmlBufferShrink(xmlBufferPtr buf, int len) {
    return(len);
 }
 /**
 * xmlBufferGrow:
 * @buf:  the buffer
 * @len:  the minimum free sie to allocate
 *
 * Grow the available space of an XML buffer.
 *
 * Returns the new available space or -1 in case of error
 */
 int
 xmlBufferGrow(xmlBufferPtr buf, int len) {
    int size;
    xmlChar *newbuf;
    if (len <= buf->use) return(0);
    size = buf->size + buf->use + len + 100;
    newbuf = xmlRealloc(buf->content, size);
    if (newbuf == NULL) return(-1);
    buf->content = newbuf;
    buf->size = size;
    return(buf->size - buf->use);
 }
 /**
 * xmlBufferDump:
 * @file:  the file output
--- a/tree.h
+++ b/tree.h
@@ -380,6 +380,8 @@ void		xmlBufferCCat		(xmlBufferPtr buf,
 					 const char *str);
 int		xmlBufferShrink		(xmlBufferPtr buf,
 					 int len);
 int		xmlBufferGrow		(xmlBufferPtr buf,
 					 int len);
 void		xmlBufferEmpty		(xmlBufferPtr buf);
 const xmlChar*	xmlBufferContent	(const xmlBufferPtr buf);
 int		xmlBufferUse		(const xmlBufferPtr buf);
--- a/uri.c
+++ b/uri.c
@@ -1283,6 +1283,34 @@ xmlParseURIReference(xmlURIPtr uri, const char *str) {
    return(0);
 }
 /**
 * xmlParseURI:
 * @str:  the URI string to analyze
 *
 * Parse an URI 
 * 
 * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
 *
 * Returns a newly build xmlURIPtr or NULL in case of error
 */
 xmlURIPtr
 xmlParseURI(const char *str) {
    xmlURIPtr uri;
    int ret;
    if (str == NULL)
 	return(NULL);
    uri = xmlCreateURI();
    if (uri != NULL) {
 	ret = xmlParseURIReference(uri, str);
        if (ret) {
 	    xmlFreeURI(uri);
 	    return(NULL);
 	}
    }
    return(uri);
 }
 /**
 * xmlNormalizeURIPath:
 * @path:  pointer to the path string
--- a/xml-error.h
+++ b/xml-error.h
@@ -130,7 +130,9 @@ typedef enum {
    XML_ERR_ENTITY_CHAR_ERROR, /* 88 */
    XML_ERR_ENTITY_PE_INTERNAL, /* 88 */
    XML_ERR_ENTITY_LOOP, /* 89 */
-    XML_ERR_ENTITY_BOUNDARY /* 90 */
+    XML_ERR_ENTITY_BOUNDARY, /* 90 */
    XML_ERR_INVALID_URI, /* 91 */
    XML_ERR_URI_FRAGMENT /* 92 */
 }xmlParserErrors;
 void	xmlParserError		(void *ctx,
--- a/xmlIO.c
+++ b/xmlIO.c
@@ -498,6 +498,10 @@ xmlAllocParserInputBuffer(xmlCharEncoding enc) {
    }
    ret->buffer->alloc = XML_BUFFER_ALLOC_DOUBLEIT;
    ret->encoder = xmlGetCharEncodingHandler(enc);
    if (ret->encoder != NULL)
        ret->raw = xmlBufferCreate();
    else
        ret->raw = NULL;
    ret->readcallback = NULL;
    ret->closecallback = NULL;
    ret->context = NULL;
@@ -513,13 +517,20 @@ xmlAllocParserInputBuffer(xmlCharEncoding enc) {
 */
 void
 xmlFreeParserInputBuffer(xmlParserInputBufferPtr in) {
-    if (in->buffer != NULL) {
+    if (in->raw) {
-        xmlBufferFree(in->buffer);
+        xmlBufferFree(in->raw);
-	in->buffer = NULL;
+	in->raw = NULL;
    }
    if (in->encoder != NULL) {
        xmlCharEncCloseFunc(in->encoder);
    }
    if (in->closecallback != NULL) {
 	in->closecallback(in->context);
    }
    if (in->buffer != NULL) {
        xmlBufferFree(in->buffer);
 	in->buffer = NULL;
    }
    memset(in, 0xbe, (size_t) sizeof(xmlParserInputBuffer));
    xmlFree(in);
@@ -683,34 +694,22 @@ xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf) {
    if (len < 0) return(0);
    if (in->encoder != NULL) {
        xmlChar *buffer;
 	int processed = len;
 	buffer = (xmlChar *) xmlMalloc((len + 1) * 2 * sizeof(xmlChar));
 	if (buffer == NULL) {
 	    fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
 	    return(-1);
 	}
 	nbchars = in->encoder->input(buffer, (len + 1) * 2 * sizeof(xmlChar),
 	                             (xmlChar *) buf, &processed);
        /*
-	 * TODO : we really need to have something atomic or the 
+	 * Store the data in the incoming raw buffer
 	 *        encoder must report the number of bytes read
 	 */
        if (in->raw == NULL) {
 	    in->raw = xmlBufferCreate();
 	}
 	xmlBufferAdd(in->raw, (const xmlChar *) buf, len);
 	/*
 	 * convert as much as possible to the parser reading buffer.
 	 */
 	nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
 	if (nbchars < 0) {
 	    fprintf(stderr, "xmlParserInputBufferPush: encoder error\n");
 	    xmlFree(buffer);
 	    return(-1);
 	}
 	if (processed  != len) {
 	    fprintf(stderr,
 	            "TODO xmlParserInputBufferPush: processed  != len\n");
 	    xmlFree(buffer);
 	    return(-1);
 	}
        buffer[nbchars] = 0;
        xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars);
 	xmlFree(buffer);
    } else {
 	nbchars = len;
        xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
@@ -730,7 +729,9 @@ xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf) {
 * Grow up the content of the input buffer, the old data are preserved
 * This routine handle the I18N transcoding to internal UTF-8
 * This routine is used when operating the parser in normal (pull) mode
- * TODO: one should be able to remove one extra copy
+ *
 * TODO: one should be able to remove one extra copy by copying directy
 *       onto in->buffer or in->raw
 *
 * Returns the number of chars read and stored in the buffer, or -1
 *         in case of error.
@@ -779,34 +780,22 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
 	return(-1);
    }
    if (in->encoder != NULL) {
-        xmlChar *buf;
+        /*
-	int wrote = res;
+	 * Store the data in the incoming raw buffer
 	 */
        if (in->raw == NULL) {
 	    in->raw = xmlBufferCreate();
 	}
 	xmlBufferAdd(in->raw, (const xmlChar *) buffer, len);
-	buf = (xmlChar *) xmlMalloc((res + 1) * 2 * sizeof(xmlChar));
+	/*
-	if (buf == NULL) {
+	 * convert as much as possible to the parser reading buffer.
-	    fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
+	 */
-	    xmlFree(buffer);
+	nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
 	if (nbchars < 0) {
 	    fprintf(stderr, "xmlParserInputBufferGrow: encoder error\n");
 	    return(-1);
 	}
 	nbchars = in->encoder->input(buf, (res + 1) * 2 * sizeof(xmlChar),
 	                             BAD_CAST buffer, &wrote);
        buf[nbchars] = 0;
        xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
 	xmlFree(buf);
 	/*
 	 * Check that the encoder was able to process the full input
 	 */
 	if (wrote != res) {
 	    fprintf(stderr, 
 	        "TODO : xmlParserInputBufferGrow wrote %d != res %d\n",
 		wrote, res);
 	    /*
 	     * TODO !!!
 	     * Need to keep the unprocessed input in a buffer in->unprocessed
 	     */
 	}
    } else {
 	nbchars = res;
        buffer[nbchars] = 0;
--- a/xmlIO.h
+++ b/xmlIO.h
@@ -33,6 +33,7 @@ struct _xmlParserInputBuffer {
    xmlCharEncodingHandlerPtr encoder; /* I18N conversions to UTF-8 */
    xmlBufferPtr buffer;    /* Local buffer encoded in  UTF-8 */
    xmlBufferPtr raw;       /* if encoder != NULL buffer for raw input */
 };