From 496a1cf59284292275cc5643e6078748dc79340e Mon Sep 17 00:00:00 2001
From: Daniel Veillard <veillard@src.gnome.org>
Date: Wed, 3 May 2000 14:20:55 +0000
Subject: [PATCH] revamped the encoding support, added iconv support, so now
 libxml if

* encoding.[ch], xmlIO.[ch], parser.c, configure.in : revamped
  the encoding support, added iconv support, so now libxml if
  compiled with iconv automatically support japanese encodings
  among others. Work based on initial patch from Yuan-Chen Cheng
  I may have broken binary compat in the encoding handler
  registration scheme, but that was so utterly broken I don't
  expect anybody to have used this feature until now.
* parserInternals.h: fixup on the CHAR range macro
* xml-error.h, parser.c: catch URL/URI errors using the uri.c
  code.
* tree.[ch]: added xmlBufferGrow(), was needed for iconv
* uri.c: added xmlParseURI() I can't believe I forgot to
  implement this one in 2.0 !!!
* SAX.c: moved doc->encoding update in the endDocument() call.
* TODO: updated.

  Iconv rules :-)

Daniel
---
 ChangeLog                        |  18 +
 SAX.c                            |   9 +
 TODO                             |   9 +-
 configure.in                     |  16 +-
 encoding.c                       | 728 +++++++++++++++++++++++--------
 encoding.h                       |  47 +-
 include/libxml/encoding.h        |  47 +-
 include/libxml/parserInternals.h |  12 +-
 include/libxml/tree.h            |   2 +
 include/libxml/xmlIO.h           |   1 +
 parser.c                         | 600 +++++++++++++++----------
 parserInternals.h                |  12 +-
 tree.c                           |  25 ++
 tree.h                           |   2 +
 uri.c                            |  28 ++
 xml-error.h                      |   4 +-
 xmlIO.c                          |  89 ++--
 xmlIO.h                          |   1 +
 18 files changed, 1163 insertions(+), 487 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 5ccb9e5e..5361b29c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@
+Wed May  3 14:21:25 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
+
+	* encoding.[ch], xmlIO.[ch], parser.c, configure.in : revamped
+	  the encoding support, added iconv support, so now libxml if
+	  compiled with iconv automatically support japanese encodings
+	  among others. Work based on initial patch from Yuan-Chen Cheng
+	  I may have broken binary compat in the encoding handler
+	  registration scheme, but that was so utterly broken I don't
+	  expect anybody to have used this feature until now.
+	* parserInternals.h: fixup on the CHAR range macro
+	* xml-error.h, parser.c: catch URL/URI errors using the uri.c
+	  code.
+	* tree.[ch]: added xmlBufferGrow(), was needed for iconv
+	* uri.c: added xmlParseURI() I can't believe I forgot to
+	  implement this one in 2.0 !!!
+	* SAX.c: moved doc->encoding update in the endDocument() call.
+	* TODO: updated.
+
 Mon Apr 24 13:30:13 CEST 2000 Daniel Veillard <Daniel.Veillard@w3.org>
 
 	* tree.h: removed extraneous xmlRemoveProp definition
diff --git a/SAX.c b/SAX.c
index 5293df72..dace3058 100644
--- a/SAX.c
+++ b/SAX.c
@@ -595,6 +595,15 @@ endDocument(void *ctx)
     if (ctxt->validate && ctxt->wellFormed &&
         ctxt->myDoc && ctxt->myDoc->intSubset)
 	ctxt->valid &= xmlValidateDocumentFinal(&ctxt->vctxt, ctxt->myDoc);
+
+    /*
+     * Grab the encoding if it was added on-the-fly
+     */
+    if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
+	(ctxt->myDoc->encoding == NULL)) {
+	ctxt->myDoc->encoding = ctxt->encoding;
+	ctxt->encoding = NULL;
+    }
 }
 
 /**
diff --git a/TODO b/TODO
index 51ea18b5..2b4ae5ee 100644
--- a/TODO
+++ b/TODO
@@ -6,6 +6,8 @@
 TODO:
 =====
 
+- xmlSwitchToEncoding() need a rewrite for correct handling of conversion
+  error code conditions.
 - DOM needs
   xmlAttrPtr xmlNewDocProp(xmlDocPtr doc, const xmlChar *name, const xmlChar *value)
   int xmlPruneProp(xmlNodePtr node, xmlAtttrPtr attr);
@@ -14,7 +16,6 @@ TODO:
 - add support for the trick from Henry conf/sun/valid/empty.xml
 - Correct standalone checking/emitting (hard)
   2.9 Standalone Document Declaration
-- URI checkings (no fragments) rfc2396.txt
 - Better checking of external parsed entities TAG 1234
 - Find way of representing PERefs in the Dtd so that %entity; can
   be saved back.
@@ -22,6 +23,7 @@ TODO:
   http://www.w3.org/XML/xml-19980210-errata ... bummmer 
 - Handle undefined namespaces in entity contents better ... at least
   issue a warning
+- Issue warning when using non-absolute namespaces URI.
 - General checking of DTD validation in presence of namespaces ... hairy
 - fix --disable-corba configure switch handling, and use XML_WITHOUT_CORBA
   not WITHOUT_CORBA flag
@@ -30,7 +32,7 @@ TODO:
 =====
 
 - Get OASIS testsuite to a more friendly result, check all the results
-  once stable.
+  once stable. Current state at:
   http://xmlsoft.org/conf/result.html
 
 - Optimization of tag strings allocation ?
@@ -55,11 +57,13 @@ EXTENSIONS:
 
 - Add Xlink recognition/API
   => started adding an xlink.[ch] with a unified API for XML and HTML.
+     it's crap :-(
 
 - Implement XSLT
   => seems that someone volunteered ?!?
 
 - Implement XSchemas
+  => Really need to be done <grin/>
 
 - O2K parsing;
   => this is a somewhat ugly mix of HTML and XML, adding a specific
@@ -88,6 +92,7 @@ EXTENSIONS:
 Done:
 =====
 
+- URI checkings (no fragments) rfc2396.txt
 - Added a clean mechanism for overload or added input methods:
   xmlRegisterInputCallbacks()
 - dynamically adapt the alloc entry point to use g_alloc()/g_free()
diff --git a/configure.in b/configure.in
index baea9330..3ef84fb7 100644
--- a/configure.in
+++ b/configure.in
@@ -4,7 +4,7 @@ AC_INIT(entities.h)
 AM_CONFIG_HEADER(config.h)
 
 LIBXML_MAJOR_VERSION=2
-LIBXML_MINOR_VERSION=0
+LIBXML_MINOR_VERSION=1
 LIBXML_MICRO_VERSION=0
 LIBXML_VERSION=$LIBXML_MAJOR_VERSION.$LIBXML_MINOR_VERSION.$LIBXML_MICRO_VERSION
 LIBXML_VERSION_INFO=`expr $LIBXML_MAJOR_VERSION + $LIBXML_MINOR_VERSION`:$LIBXML_MICRO_VERSION:$LIBXML_MINOR_VERSION
@@ -203,6 +203,20 @@ fi
 AC_SUBST(WITH_XPATH)
 AC_SUBST(XPATH_OBJ)
 
+AC_ARG_WITH(iconv, [  --with-iconv            Add the ICONV support (on)])
+if test "$with_iconv" = "no" ; then
+    echo Disabling ICONV support
+    WITH_ICONV=0
+else    
+    if test "$have_iconv" != "" ; then
+        echo Iconv support not found
+        WITH_ICONV=0
+    else
+        WITH_ICONV=1
+    fi
+fi  
+AC_SUBST(WITH_ICONV)
+
 AC_ARG_WITH(debug, [  --with-debug            Add the debugging module (on)])
 if test "$with_debug" = "no" ; then
     echo Disabling DEBUG support
diff --git a/encoding.c b/encoding.c
index 42009291..1a4c157f 100644
--- a/encoding.c
+++ b/encoding.c
@@ -34,12 +34,26 @@
 #ifdef HAVE_STDLIB_H
 #include <stdlib.h>
 #endif
+#include <libxml/xmlversion.h>
+#ifdef LIBXML_ICONV_ENABLED
+#ifdef HAVE_ERRNO_H
+#include <errno.h>
+#endif
+#endif
 #include <libxml/encoding.h>
 #include <libxml/xmlmemory.h>
 
 xmlCharEncodingHandlerPtr xmlUTF16LEHandler = NULL;
 xmlCharEncodingHandlerPtr xmlUTF16BEHandler = NULL;
 
+#ifdef LIBXML_ICONV_ENABLED
+#if 0
+#define DEBUG_ENCODING  /* Define this to get encoding traces */
+#endif
+#endif
+
+static int xmlLittleEndian = 1;
+
 /*
  * From rfc2044: encoding of the Unicode values on UTF-8:
  *
@@ -104,30 +118,38 @@ xmlCheckUTF8(const unsigned char *utf)
  *
  * Take a block of ISO Latin 1 chars in and try to convert it to an UTF-8
  * block of chars out.
- * Returns the number of byte written, or -1 by lack of space.
+ * Returns 0 if success, or -1 otherwise
+ * The value of @inlen after return is the number of octets consumed
+ *     as the return value is positive, else unpredictiable.
+ * The value of @outlen after return is the number of ocetes consumed.
  */
 int
-isolat1ToUTF8(unsigned char* out, int outlen,
+isolat1ToUTF8(unsigned char* out, int *outlen,
               const unsigned char* in, int *inlen) {
-    unsigned char* outstart= out;
-    unsigned char* outend= out+outlen;
-    const unsigned char* inend= in+*inlen;
+    unsigned char* outstart = out;
+    const unsigned char* processed = in;
+    unsigned char* outend = out + *outlen;
+    const unsigned char* inend = in + *inlen;
     unsigned char c;
 
     while (in < inend) {
         c= *in++;
         if (c < 0x80) {
-            if (out >= outend)  return(-1);
+            if (out >= outend)
+		break;
             *out++ = c;
         }
         else {
-            if (out >= outend)  return(-1);
+            if (out + 1 >= outend)  break;
             *out++ = 0xC0 | (c >> 6);
-            if (out >= outend)  return(-1);
             *out++ = 0x80 | (0x3F & c);
         }
+	processed = in;
     }
-    return(out-outstart);
+    *outlen = out - outstart;
+    *inlen = processed - in;
+
+    return(0);
 }
 
 /**
@@ -141,18 +163,18 @@ isolat1ToUTF8(unsigned char* out, int outlen,
  * block of chars out.
  * TODO: UTF8Toisolat1 need a fallback mechanism ...
  *
- * Returns the number of byte written, or -1 by lack of space, or -2
- *     if the transcoding fails (for *in is not valid utf8 string or
- *     the result of transformation can't fit into the encoding we want)
+ * Returns 0 if success, -2 if the transcoding fails, or -1 otherwise
  * The value of @inlen after return is the number of octets consumed
  *     as the return value is positive, else unpredictiable.
+ * The value of @outlen after return is the number of ocetes consumed.
  */
 int
-UTF8Toisolat1(unsigned char* out, int outlen,
+UTF8Toisolat1(unsigned char* out, int *outlen,
               const unsigned char* in, int *inlen) {
-    unsigned char* outstart= out;
-    unsigned char* outend= out+outlen;
-    const unsigned char* inend= in+*inlen;
+    unsigned char* outstart = out;
+    const unsigned char* processed = in;
+    unsigned char* outend = out + *outlen;
+    const unsigned char* inend = in + *inlen;
     unsigned char c;
 
     while (in < inend) {
@@ -162,18 +184,22 @@ UTF8Toisolat1(unsigned char* out, int outlen,
             *out++= c;
         }
 	else if (in == inend) {
-            *inlen -= 1;
             break;
 	}
 	else if (((c & 0xFC) == 0xC0) && ((*in & 0xC0) == 0x80)) {
 	    /* a two byte utf-8 and can be encoding as isolate1 */
             *out++= ((c & 0x03) << 6) | (*in++ & 0x3F);
 	}
-	else
+	else {
+	    *outlen = out - outstart;
+	    *inlen = processed - in;
 	    return(-2);
-	/* TODO : some should be represent as "&#x____;" */
+	}
+	processed = in;
     }
-    return(out-outstart);
+    *outlen = out - outstart;
+    *inlen = processed - in;
+    return(0);
 }
 
 /**
@@ -194,11 +220,12 @@ UTF8Toisolat1(unsigned char* out, int outlen,
  *     as the return value is positive, else unpredictiable.
  */
 int
-UTF16LEToUTF8(unsigned char* out, int outlen,
+UTF16LEToUTF8(unsigned char* out, int *outlen,
             const unsigned char* inb, int *inlenb)
 {
-    unsigned char* outstart= out;
-    unsigned char* outend= out+outlen;
+    unsigned char* outstart = out;
+    const unsigned char* processed = inb;
+    unsigned char* outend = out + *outlen;
     unsigned short* in = (unsigned short*) inb;
     unsigned short* inend;
     unsigned int c, d, inlen;
@@ -208,42 +235,44 @@ UTF16LEToUTF8(unsigned char* out, int outlen,
     if ((*inlenb % 2) == 1)
         (*inlenb)--;
     inlen = *inlenb / 2;
-    inend= in + inlen;
+    inend = in + inlen;
     while (in < inend) {
-#ifdef BIG_ENDIAN
-	tmp = (unsigned char *) in;
-	c = *tmp++;
-	c = c | (((unsigned int)*tmp) << 8);
-	in++;
-#else /* BIG_ENDIAN */
-        c= *in++;
-#endif /* BIG_ENDIAN */
+        if (xmlLittleEndian) {
+	    c= *in++;
+	} else {
+	    tmp = (unsigned char *) in;
+	    c = *tmp++;
+	    c = c | (((unsigned int)*tmp) << 8);
+	    in++;
+	}
         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
             if (in >= inend) {           /* (in > inend) shouldn't happens */
-                (*inlenb) -= 2;
                 break;
             }
-#ifdef BIG_ENDIAN
-            tmp = (unsigned char *) in;
-            d = *tmp++;
-	    d = d | (((unsigned int)*tmp) << 8);
-	    in++;
-#else /* BIG_ENDIAN */
-            d = *in++;
-#endif /* BIG_ENDIAN */
+	    if (xmlLittleEndian) {
+		d = *in++;
+	    } else {
+		tmp = (unsigned char *) in;
+		d = *tmp++;
+		d = d | (((unsigned int)*tmp) << 8);
+		in++;
+	    }
             if ((d & 0xFC00) == 0xDC00) {
                 c &= 0x03FF;
                 c <<= 10;
                 c |= d & 0x03FF;
                 c += 0x10000;
             }
-            else
+            else {
+		*outlen = out - outstart;
+		*inlenb = processed - inb;
 	        return(-2);
+	    }
         }
 
 	/* assertion: c is a single UTF-4 value */
         if (out >= outend)
-	    return(-1);
+	    break;
         if      (c <    0x80) {  *out++=  c;                bits= -6; }
         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
@@ -251,11 +280,14 @@ UTF16LEToUTF8(unsigned char* out, int outlen,
  
         for ( ; bits >= 0; bits-= 6) {
             if (out >= outend)
-	        return(-1);
+	        break;
             *out++= ((c >> bits) & 0x3F) | 0x80;
         }
+	processed = (const unsigned char*) in;
     }
-    return(out-outstart);
+    *outlen = out - outstart;
+    *inlenb = processed - inb;
+    return(0);
 }
 
 /**
@@ -273,40 +305,44 @@ UTF16LEToUTF8(unsigned char* out, int outlen,
  *     if the transcoding failed. 
  */
 int
-UTF8ToUTF16LE(unsigned char* outb, int outlen,
+UTF8ToUTF16LE(unsigned char* outb, int *outlen,
             const unsigned char* in, int *inlen)
 {
     unsigned short* out = (unsigned short*) outb;
+    const unsigned char* processed = in;
     unsigned short* outstart= out;
     unsigned short* outend;
     const unsigned char* inend= in+*inlen;
     unsigned int c, d, trailing;
-#ifdef BIG_ENDIAN
     unsigned char *tmp;
     unsigned short tmp1, tmp2;
-#endif /* BIG_ENDIAN */
 
-    outlen /= 2; /* convert in short length */
-    outend = out + outlen;
+    outend = out + (*outlen / 2);
     while (in < inend) {
       d= *in++;
       if      (d < 0x80)  { c= d; trailing= 0; }
-      else if (d < 0xC0)
-          return(-2);    /* trailing byte in leading position */
-      else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
+      else if (d < 0xC0) {
+          /* trailing byte in leading position */
+	  *outlen = out - outstart;
+	  *inlen = processed - in;
+	  return(-2);
+      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
-      else
-          return(-2);    /* no chance for this in UTF-16 */
+      else {
+	/* no chance for this in UTF-16 */
+	*outlen = out - outstart;
+	*inlen = processed - in;
+	return(-2);
+      }
 
       if (inend - in < trailing) {
-          *inlen -= (inend - in);
           break;
       } 
 
       for ( ; trailing; trailing--) {
           if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))
-	      return(-1);
+	      break;
           c <<= 6;
           c |= d & 0x3F;
       }
@@ -314,41 +350,44 @@ UTF8ToUTF16LE(unsigned char* outb, int outlen,
       /* assertion: c is a single UTF-4 value */
         if (c < 0x10000) {
             if (out >= outend)
-	        return(-1);
-#ifdef BIG_ENDIAN
-            tmp = (unsigned char *) out;
-            *tmp = c ;
-            *(tmp + 1) = c >> 8 ;
-            out++;
-#else /* BIG_ENDIAN */
-            *out++ = c;
-#endif /* BIG_ENDIAN */
+	        break;
+	    if (xmlLittleEndian) {
+		*out++ = c;
+	    } else {
+		tmp = (unsigned char *) out;
+		*tmp = c ;
+		*(tmp + 1) = c >> 8 ;
+		out++;
+	    }
         }
         else if (c < 0x110000) {
             if (out+1 >= outend)
-	        return(-1);
+	        break;
             c -= 0x10000;
-#ifdef BIG_ENDIAN
-            tmp1 = 0xD800 | (c >> 10);
-            tmp = (unsigned char *) out;
-            *tmp = tmp1;
-            *(tmp + 1) = tmp1 >> 8;
-            out++;
+	    if (xmlLittleEndian) {
+		*out++ = 0xD800 | (c >> 10);
+		*out++ = 0xDC00 | (c & 0x03FF);
+	    } else {
+		tmp1 = 0xD800 | (c >> 10);
+		tmp = (unsigned char *) out;
+		*tmp = tmp1;
+		*(tmp + 1) = tmp1 >> 8;
+		out++;
 
-            tmp2 = 0xDC00 | (c & 0x03FF);
-            tmp = (unsigned char *) out;
-            *tmp  = tmp2;
-            *(tmp + 1) = tmp2 >> 8;
-            out++;
-#else /* BIG_ENDIAN */
-            *out++ = 0xD800 | (c >> 10);
-            *out++ = 0xDC00 | (c & 0x03FF);
-#endif /* BIG_ENDIAN */
+		tmp2 = 0xDC00 | (c & 0x03FF);
+		tmp = (unsigned char *) out;
+		*tmp  = tmp2;
+		*(tmp + 1) = tmp2 >> 8;
+		out++;
+	    }
         }
         else
-	    return(-1);
+	    break;
+	processed = in;
     }
-    return(out-outstart);
+    *outlen = out - outstart;
+    *inlen = processed - in;
+    return(0);
 }
 
 /**
@@ -369,18 +408,16 @@ UTF8ToUTF16LE(unsigned char* outb, int outlen,
  *     as the return value is positive, else unpredictiable.
  */
 int
-UTF16BEToUTF8(unsigned char* out, int outlen,
+UTF16BEToUTF8(unsigned char* out, int *outlen,
             const unsigned char* inb, int *inlenb)
 {
-    unsigned char* outstart= out;
-    unsigned char* outend= out+outlen;
+    unsigned char* outstart = out;
+    const unsigned char* processed = inb;
+    unsigned char* outend = out + *outlen;
     unsigned short* in = (unsigned short*) inb;
     unsigned short* inend;
     unsigned int c, d, inlen;
-#ifdef BIG_ENDIAN
-#else /* BIG_ENDIAN */
     unsigned char *tmp;
-#endif /* BIG_ENDIAN */    
     int bits;
 
     if ((*inlenb % 2) == 1)
@@ -388,43 +425,46 @@ UTF16BEToUTF8(unsigned char* out, int outlen,
     inlen = *inlenb / 2;
     inend= in + inlen;
     while (in < inend) {
-#ifdef BIG_ENDIAN    
-        c= *in++;
-#else
-        tmp = (unsigned char *) in;
-	c = *tmp++;
-	c = c << 8;
-	c = c | (unsigned int) *tmp;
-	in++;
-#endif	
+	if (xmlLittleEndian) {
+	    tmp = (unsigned char *) in;
+	    c = *tmp++;
+	    c = c << 8;
+	    c = c | (unsigned int) *tmp;
+	    in++;
+	} else {
+	    c= *in++;
+	} 
         if ((c & 0xFC00) == 0xD800) {    /* surrogates */
 	    if (in >= inend) {           /* (in > inend) shouldn't happens */
-	        (*inlenb) -= 2;
-		break;
+		*outlen = out - outstart;
+		*inlenb = processed - inb;
+	        return(-2);
+	    }
+	    if (xmlLittleEndian) {
+		tmp = (unsigned char *) in;
+		d = *tmp++;
+		d = d << 8;
+		d = d | (unsigned int) *tmp;
+		in++;
+	    } else {
+		d= *in++;
 	    }
-
-#ifdef BIG_ENDIAN
-            d= *in++;
-#else
-            tmp = (unsigned char *) in;
-	    d = *tmp++;
-	    d = d << 8;
-	    d = d | (unsigned int) *tmp;
-	    in++;
-#endif	    
             if ((d & 0xFC00) == 0xDC00) {
                 c &= 0x03FF;
                 c <<= 10;
                 c |= d & 0x03FF;
                 c += 0x10000;
             }
-            else 
+            else {
+		*outlen = out - outstart;
+		*inlenb = processed - inb;
 	        return(-2);
+	    }
         }
 
 	/* assertion: c is a single UTF-4 value */
         if (out >= outend) 
-	    return(-1);
+	    break;
         if      (c <    0x80) {  *out++=  c;                bits= -6; }
         else if (c <   0x800) {  *out++= ((c >>  6) & 0x1F) | 0xC0;  bits=  0; }
         else if (c < 0x10000) {  *out++= ((c >> 12) & 0x0F) | 0xE0;  bits=  6; }
@@ -432,11 +472,14 @@ UTF16BEToUTF8(unsigned char* out, int outlen,
  
         for ( ; bits >= 0; bits-= 6) {
             if (out >= outend) 
-	        return(-1);
+	        break;
             *out++= ((c >> bits) & 0x3F) | 0x80;
         }
+	processed = (const unsigned char*) in;
     }
-    return(out-outstart);
+    *outlen = out - outstart;
+    *inlenb = processed - inb;
+    return(0);
 }
 
 /**
@@ -454,79 +497,86 @@ UTF16BEToUTF8(unsigned char* out, int outlen,
  *     if the transcoding failed. 
  */
 int
-UTF8ToUTF16BE(unsigned char* outb, int outlen,
+UTF8ToUTF16BE(unsigned char* outb, int *outlen,
             const unsigned char* in, int *inlen)
 {
     unsigned short* out = (unsigned short*) outb;
+    const unsigned char* processed = in;
     unsigned short* outstart= out;
     unsigned short* outend;
     const unsigned char* inend= in+*inlen;
     unsigned int c, d, trailing;
-#ifdef BIG_ENDIAN
-#else
     unsigned char *tmp;
     unsigned short tmp1, tmp2;
-#endif /* BIG_ENDIAN */    
 
-    outlen /= 2; /* convert in short length */
-    outend = out + outlen;
+    outend = out + (*outlen / 2);
     while (in < inend) {
       d= *in++;
       if      (d < 0x80)  { c= d; trailing= 0; }
-      else if (d < 0xC0)
-          return(-2);    /* trailing byte in leading position */
-      else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
+      else if (d < 0xC0)  {
+          /* trailing byte in leading position */
+	  *outlen = out - outstart;
+	  *inlen = processed - in;
+	  return(-2);
+      } else if (d < 0xE0)  { c= d & 0x1F; trailing= 1; }
       else if (d < 0xF0)  { c= d & 0x0F; trailing= 2; }
       else if (d < 0xF8)  { c= d & 0x07; trailing= 3; }
-      else
-          return(-2);    /* no chance for this in UTF-16 */
+      else {
+          /* no chance for this in UTF-16 */
+	  *outlen = out - outstart;
+	  *inlen = processed - in;
+	  return(-2);
+      }
 
       if (inend - in < trailing) {
-          *inlen -= (inend - in);
           break;
       } 
 
       for ( ; trailing; trailing--) {
-          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  return(-1);
+          if ((in >= inend) || (((d= *in++) & 0xC0) != 0x80))  break;
           c <<= 6;
           c |= d & 0x3F;
       }
 
       /* assertion: c is a single UTF-4 value */
         if (c < 0x10000) {
-            if (out >= outend)  return(-1);
-#ifdef BIG_ENDIAN
-            *out++ = c;
-#else
-            tmp = (unsigned char *) out;
-            *tmp = c >> 8;
-            *(tmp + 1) = c;
-            out++;
-#endif /* BIG_ENDIAN */
+            if (out >= outend)  break;
+	    if (xmlLittleEndian) {
+		tmp = (unsigned char *) out;
+		*tmp = c >> 8;
+		*(tmp + 1) = c;
+		out++;
+	    } else {
+		*out++ = c;
+	    }
         }
         else if (c < 0x110000) {
-            if (out+1 >= outend)  return(-1);
+            if (out+1 >= outend)  break;
             c -= 0x10000;
-#ifdef BIG_ENDIAN
-            *out++ = 0xD800 | (c >> 10);
-            *out++ = 0xDC00 | (c & 0x03FF);
-#else
-            tmp1 = 0xD800 | (c >> 10);
-            tmp = (unsigned char *) out;
-            *tmp = tmp1 >> 8;
-            *(tmp + 1) = tmp1;
-            out++;
+	    if (xmlLittleEndian) {
+		tmp1 = 0xD800 | (c >> 10);
+		tmp = (unsigned char *) out;
+		*tmp = tmp1 >> 8;
+		*(tmp + 1) = tmp1;
+		out++;
 
-            tmp2 = 0xDC00 | (c & 0x03FF);
-            tmp = (unsigned char *) out;
-            *tmp = tmp2 >> 8;
-            *(tmp + 1) = tmp2;
-            out++;
-#endif
+		tmp2 = 0xDC00 | (c & 0x03FF);
+		tmp = (unsigned char *) out;
+		*tmp = tmp2 >> 8;
+		*(tmp + 1) = tmp2;
+		out++;
+	    } else {
+		*out++ = 0xD800 | (c >> 10);
+		*out++ = 0xDC00 | (c & 0x03FF);
+	    }
         }
-        else  return(-1);
+        else
+	    break;
+	processed = in;
     }
-    return(out-outstart);
+    *outlen = out - outstart;
+    *inlen = processed - in;
+    return(0);
 }
 
 /**
@@ -636,8 +686,12 @@ xmlParseCharEncoding(const char* name)
     if (!strcmp(upper,  "ISO-8859-9")) return(XML_CHAR_ENCODING_8859_9);
 
     if (!strcmp(upper, "ISO-2022-JP")) return(XML_CHAR_ENCODING_2022_JP);
-    if (!strcmp(upper, "Shift_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
+    if (!strcmp(upper, "SHIFT_JIS")) return(XML_CHAR_ENCODING_SHIFT_JIS);
     if (!strcmp(upper, "EUC-JP")) return(XML_CHAR_ENCODING_EUC_JP);
+
+#ifdef DEBUG_ENCODING
+    fprintf(stderr, "Unknown encoding %s\n", name);
+#endif
     return(XML_CHAR_ENCODING_ERROR);
 }
 
@@ -712,6 +766,9 @@ xmlNewCharEncodingHandler(const char *name,
      * registers and returns the handler.
      */
     xmlRegisterCharEncodingHandler(handler);
+#ifdef DEBUG_ENCODING
+    fprintf(stderr, "Registered encoding handler for %s\n", name);
+#endif
     return(handler);
 }
 
@@ -725,11 +782,18 @@ xmlNewCharEncodingHandler(const char *name,
  */
 void
 xmlInitCharEncodingHandlers(void) {
+    unsigned short int tst = 0x1234;
+    unsigned char *ptr = (unsigned char *) &tst; 
+
     if (handlers != NULL) return;
 
     handlers = (xmlCharEncodingHandlerPtr *)
         xmlMalloc(MAX_ENCODING_HANDLERS * sizeof(xmlCharEncodingHandlerPtr));
 
+    if (*ptr == 0x12) xmlLittleEndian = 0;
+    else if (*ptr == 0x34) xmlLittleEndian = 1;
+    else fprintf(stderr, "Odd problem at endianness detection\n");
+
     if (handlers == NULL) {
         fprintf(stderr, "xmlInitCharEncodingHandlers : out of memory !\n");
 	return;
@@ -755,7 +819,8 @@ xmlCleanupCharEncodingHandlers(void) {
     for (;nbCharEncodingHandler > 0;) {
         nbCharEncodingHandler--;
 	if (handlers[nbCharEncodingHandler] != NULL) {
-	    xmlFree(handlers[nbCharEncodingHandler]->name);
+	    if (handlers[nbCharEncodingHandler]->name != NULL)
+		xmlFree(handlers[nbCharEncodingHandler]->name);
 	    xmlFree(handlers[nbCharEncodingHandler]);
 	}
     }
@@ -798,6 +863,8 @@ xmlRegisterCharEncodingHandler(xmlCharEncodingHandlerPtr handler) {
  */
 xmlCharEncodingHandlerPtr
 xmlGetCharEncodingHandler(xmlCharEncoding enc) {
+    xmlCharEncodingHandlerPtr handler;
+
     if (handlers == NULL) xmlInitCharEncodingHandlers();
     switch (enc) {
         case XML_CHAR_ENCODING_ERROR:
@@ -811,40 +878,68 @@ xmlGetCharEncodingHandler(xmlCharEncoding enc) {
         case XML_CHAR_ENCODING_UTF16BE:
 	    return(xmlUTF16BEHandler);
         case XML_CHAR_ENCODING_EBCDIC:
-	    return(NULL);
+            handler = xmlFindCharEncodingHandler("EBCDIC");
+            if (handler != NULL) return(handler);
+            handler = xmlFindCharEncodingHandler("ebcdic");
+            if (handler != NULL) return(handler);
+	    break;
         case XML_CHAR_ENCODING_UCS4LE:
-	    return(NULL);
+            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-4");
+            if (handler != NULL) return(handler);
+            handler = xmlFindCharEncodingHandler("UCS-4");
+            if (handler != NULL) return(handler);
+            handler = xmlFindCharEncodingHandler("UCS4");
+            if (handler != NULL) return(handler);
+	    break;
         case XML_CHAR_ENCODING_UCS4BE:
-	    return(NULL);
+            handler = xmlFindCharEncodingHandler("UCS4BE");
+            if (handler != NULL) return(handler);
+	    break;
         case XML_CHAR_ENCODING_UCS4_2143:
-	    return(NULL);
+	    break;
         case XML_CHAR_ENCODING_UCS4_3412:
-	    return(NULL);
+	    break;
         case XML_CHAR_ENCODING_UCS2:
-	    return(NULL);
+            handler = xmlFindCharEncodingHandler("ISO-10646-UCS-2");
+            if (handler != NULL) return(handler);
+            handler = xmlFindCharEncodingHandler("UCS-2");
+            if (handler != NULL) return(handler);
+            handler = xmlFindCharEncodingHandler("UCS2");
+            if (handler != NULL) return(handler);
+	    break;
         case XML_CHAR_ENCODING_8859_1:
-	    return(NULL);
         case XML_CHAR_ENCODING_8859_2:
-	    return(NULL);
         case XML_CHAR_ENCODING_8859_3:
-	    return(NULL);
         case XML_CHAR_ENCODING_8859_4:
-	    return(NULL);
         case XML_CHAR_ENCODING_8859_5:
-	    return(NULL);
         case XML_CHAR_ENCODING_8859_6:
-	    return(NULL);
         case XML_CHAR_ENCODING_8859_7:
-	    return(NULL);
         case XML_CHAR_ENCODING_8859_8:
-	    return(NULL);
         case XML_CHAR_ENCODING_8859_9:
 	    return(NULL);
         case XML_CHAR_ENCODING_2022_JP:
+            handler = xmlFindCharEncodingHandler("ISO-2022-JP");
+            if (handler != NULL) return(handler);
+	    break;
         case XML_CHAR_ENCODING_SHIFT_JIS:
+            handler = xmlFindCharEncodingHandler("SHIFT-JIS");
+            if (handler != NULL) return(handler);
+            handler = xmlFindCharEncodingHandler("SHIFT_JIS");
+            if (handler != NULL) return(handler);
+            handler = xmlFindCharEncodingHandler("Shift_JIS");
+            if (handler != NULL) return(handler);
+	    break;
         case XML_CHAR_ENCODING_EUC_JP:
-	    return(NULL);
+            handler = xmlFindCharEncodingHandler("EUC-JP");
+            if (handler != NULL) return(handler);
+	    break;
+	default: 
+	    break;
     }
+    
+#ifdef DEBUG_ENCODING
+    fprintf(stderr, "No handler found for encoding %d\n", enc);
+#endif
     return(NULL);
 }
 
@@ -858,23 +953,306 @@ xmlGetCharEncodingHandler(xmlCharEncoding enc) {
  */
 xmlCharEncodingHandlerPtr
 xmlFindCharEncodingHandler(const char *name) {
-    char upper[500];
+#ifdef LIBXML_ICONV_ENABLED
+    iconv_t icv_in, icv_out;
+    xmlCharEncodingHandlerPtr enc;
+#endif /* LIBXML_ICONV_ENABLED */
+    char upper[100];
     int i;
 
     if (handlers == NULL) xmlInitCharEncodingHandlers();
     if (name == NULL) return(xmlDefaultCharEncodingHandler);
     if (name[0] == 0) return(xmlDefaultCharEncodingHandler);
 
-    for (i = 0;i < 499;i++) {
+    for (i = 0;i < 99;i++) {
         upper[i] = toupper(name[i]);
 	if (upper[i] == 0) break;
     }
     upper[i] = 0;
 
     for (i = 0;i < nbCharEncodingHandler; i++)
-        if (!strcmp(name, handlers[i]->name))
+        if (!strcmp(upper, handlers[i]->name)) {
+#ifdef DEBUG_ENCODING
+            fprintf(stderr, "Found registered handler for encoding %s\n", name);
+#endif
 	    return(handlers[i]);
+	}
 
+#ifdef LIBXML_ICONV_ENABLED
+    /* check whether iconv can handle this */
+    icv_in = iconv_open("UTF-8", name);
+    icv_out = iconv_open(name, "UTF-8");
+    if ((icv_in != (iconv_t) -1) && (icv_out != (iconv_t) -1)) {
+	    enc = xmlMalloc(sizeof(xmlCharEncodingHandler));
+	    if (enc == NULL) {
+	        iconv_close(icv_in);
+	        iconv_close(icv_out);
+		return(NULL);
+	    }
+	    enc->name = NULL;
+	    enc->input = NULL;
+	    enc->output = NULL;
+	    enc->iconv_in = icv_in;
+	    enc->iconv_out = icv_out;
+#ifdef DEBUG_ENCODING
+            fprintf(stderr, "Found iconv handler for encoding %s\n", name);
+#endif
+	    return enc;
+    } else if ((icv_in != (iconv_t) -1) || icv_out != (iconv_t) -1) {
+	    fprintf(stderr, "iconv : problems with filters for '%s'\n", name);
+    }
+#endif /* LIBXML_ICONV_ENABLED */
+#ifdef DEBUG_ENCODING
+    fprintf(stderr, "No handler found for encoding %s\n", name);
+#endif
     return(NULL);
 }
 
+#ifdef LIBXML_ICONV_ENABLED
+/**
+ * xmlIconvWrapper:
+ * @cd:		iconv converter data structure
+ * @out:  a pointer to an array of bytes to store the result
+ * @outlen:  the length of @out
+ * @in:  a pointer to an array of ISO Latin 1 chars
+ * @inlen:  the length of @in
+ *
+ * Returns 0 if success, or 
+ *     -1 by lack of space, or
+ *     -2 if the transcoding fails (for *in is not valid utf8 string or
+ *        the result of transformation can't fit into the encoding we want), or
+ *     -3 if there the last byte can't form a single output char.
+ *     
+ * The value of @inlen after return is the number of octets consumed
+ *     as the return value is positive, else unpredictiable.
+ * The value of @outlen after return is the number of ocetes consumed.
+ */
+static int
+xmlIconvWrapper(iconv_t cd,
+	unsigned char *out, int *outlen,
+	const unsigned char *in, int *inlen) {
+
+	size_t icv_inlen = *inlen, icv_outlen = *outlen;
+	const char *icv_in = (const char *) in;
+	char *icv_out = (char *) out;
+	int ret;
+
+	ret = iconv(cd,
+		&icv_in, &icv_inlen,
+		&icv_out, &icv_outlen);
+	*inlen -= icv_inlen;
+	*outlen -= icv_outlen;
+	if (icv_inlen != 0 || ret == (size_t) -1) {
+#ifdef EILSEQ
+		if (errno == EILSEQ) {
+			return -2;
+		} else
+#endif
+#ifdef E2BIG
+		if (errno == E2BIG) {
+			return -1;
+		} else
+#endif
+#ifdef EINVAL
+		if (errno == EINVAL) {
+			return -3;
+		}
+#endif
+		else {
+			return -3;
+		}
+	}
+	return 0;
+}
+#endif /* LIBXML_ICONV_ENABLED */
+
+/**
+ * xmlCharEncInFunc:
+ * @handler:	char enconding transformation data structure
+ * @out:  an xmlBuffer for the output.
+ * @in:  an xmlBuffer for the input
+ *     
+ * Generic front-end for the encoding handler input function
+ *     
+ * Returns the number of byte written if success, or 
+ *     -1 general error
+ *     -2 if the transcoding fails (for *in is not valid utf8 string or
+ *        the result of transformation can't fit into the encoding we want), or
+ */
+int
+xmlCharEncInFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
+                 xmlBufferPtr in) {
+    int ret = -2;
+    int written;
+    int toconv;
+
+    if (handler == NULL) return(-1);
+    if (out == NULL) return(-1);
+    if (in == NULL) return(-1);
+
+    written = out->size - out->use;
+    toconv = in->use;
+    if (toconv * 2 >= written) {
+        xmlBufferGrow(out, toconv * 2);
+	written = out->size - out->use - 1;
+    }
+    if (handler->input != NULL) {
+	ret = handler->input(&out->content[out->use], &written,
+	                     in->content, &toconv);
+	xmlBufferShrink(in, toconv);
+	out->use += written;
+	out->content[out->use] = 0;
+    }
+#ifdef LIBXML_ICONV_ENABLED
+    else if (handler->iconv_in != NULL) {
+	ret = xmlIconvWrapper(handler->iconv_in, &out->content[out->use],
+	                      &written, in->content, &toconv);
+	xmlBufferShrink(in, toconv);
+	out->use += written;
+	out->content[out->use] = 0;
+	if (ret == -1) ret = -3;
+    }
+#endif /* LIBXML_ICONV_ENABLED */
+#ifdef DEBUG_ENCODING
+    switch (ret) {
+        case 0:
+	    fprintf(stderr, "converted %d bytes to %d bytes of input\n",
+	            toconv, written);
+	    break;
+        case -1:
+	    fprintf(stderr,"converted %d bytes to %d bytes of input, %d left\n",
+	            toconv, written, in->use);
+	    break;
+        case -2:
+	    fprintf(stderr, "input conversion failed due to input error\n");
+	    break;
+        case -3:
+	    fprintf(stderr,"converted %d bytes to %d bytes of input, %d left\n",
+	            toconv, written, in->use);
+	    break;
+	default:
+	    fprintf(stderr,"Unknown input conversion failed %d\n", ret);
+    }
+#endif
+    /*
+     * Ignore when input buffer is not on a boundary
+     */
+    if (ret == -3) ret = 0;
+    return(ret);
+}
+
+/**
+ * xmlCharEncOutFunc:
+ * @handler:	char enconding transformation data structure
+ * @out:  an xmlBuffer for the output.
+ * @in:  an xmlBuffer for the input
+ *     
+ * Generic front-end for the encoding handler output function
+ *     
+ * Returns the number of byte written if success, or 
+ *     -1 general error
+ *     -2 if the transcoding fails (for *in is not valid utf8 string or
+ *        the result of transformation can't fit into the encoding we want), or
+ */
+int
+xmlCharEncOutFunc(xmlCharEncodingHandler *handler, xmlBufferPtr out,
+                  xmlBufferPtr in) {
+    int ret = -2;
+    int written;
+    int toconv;
+
+    if (handler == NULL) return(-1);
+    if (out == NULL) return(-1);
+    if (in == NULL) return(-1);
+
+    written = out->size - out->use;
+    toconv = in->use;
+    if (toconv * 2 >= written) {
+        xmlBufferGrow(out, toconv * 2);
+	written = out->size - out->use - 1;
+    }
+    if (handler->output != NULL) {
+	ret = handler->output(&out->content[out->use], &written,
+	                     in->content, &toconv);
+	xmlBufferShrink(in, toconv);
+	out->use += written;
+	out->content[out->use] = 0;
+    }
+#ifdef LIBXML_ICONV_ENABLED
+    else if (handler->iconv_out != NULL) {
+	ret = xmlIconvWrapper(handler->iconv_out, &out->content[out->use],
+	                      &written, in->content, &toconv);
+	xmlBufferShrink(in, toconv);
+	out->use += written;
+	out->content[out->use] = 0;
+	if (ret == -1) ret = -3;
+    }
+#endif /* LIBXML_ICONV_ENABLED */
+#ifdef DEBUG_ENCODING
+    switch (ret) {
+        case 0:
+	    fprintf(stderr, "converted %d bytes to %d bytes of output\n",
+	            toconv, written);
+	    break;
+        case -1:
+	    fprintf(stderr, "output conversion failed by lack of space\n");
+	    break;
+        case -2:
+	    fprintf(stderr, "output conversion failed due to output error\n");
+	    break;
+        case -3:
+	    fprintf(stderr,"converted %d bytes to %d bytes of output %d left\n",
+	            toconv, written, in->use);
+	    break;
+	default:
+	    fprintf(stderr,"Unknown output conversion failed %d\n", ret);
+    }
+#endif
+    return(ret);
+}
+
+/**
+ * xmlCharEncCloseFunc:
+ * @handler:	char enconding transformation data structure
+ *     
+ * Generic front-end for hencoding handler close function
+ *
+ * Returns 0 if success, or -1 in case of error
+ */
+int
+xmlCharEncCloseFunc(xmlCharEncodingHandler *handler) {
+    int ret = 0;
+    if (handler == NULL) return(-1);
+    if (handler->name == NULL) return(-1);
+#ifdef LIBXML_ICONV_ENABLED
+    /*
+     * Iconv handlers can be oused only once, free the whole block.
+     * and the associated icon resources.
+     */
+    if ((handler->iconv_out != NULL) || (handler->iconv_in != NULL)) {
+	if (handler->name != NULL)
+	    xmlFree(handler->name);
+	handler->name = NULL;
+	if (handler->iconv_out != NULL) {
+	    if (iconv_close(handler->iconv_out))
+		ret = -1;
+	    handler->iconv_out = NULL;
+	}
+	if (handler->iconv_in != NULL) {
+	    if (iconv_close(handler->iconv_in))
+		ret = -1;
+	    handler->iconv_in = NULL;
+	}
+	xmlFree(handler);
+    }
+#endif /* LIBXML_ICONV_ENABLED */
+#ifdef DEBUG_ENCODING
+    if (ret)
+        fprintf(stderr, "failed to close the encoding handler\n");
+    else
+        fprintf(stderr, "closed the encoding handler\n");
+
+#endif
+    return(ret);
+}
+
diff --git a/encoding.h b/encoding.h
index 1b1c92e3..f6edbf29 100644
--- a/encoding.h
+++ b/encoding.h
@@ -22,12 +22,30 @@
 #define __XML_CHAR_ENCODING_H__
 
 #include <libxml/xmlversion.h>
+#ifdef LIBXML_ICONV_ENABLED
+#include <iconv.h>
+#endif
+#include <libxml/tree.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 /**
  * Predefined values for some standard encodings
+ * Libxml don't do beforehand translation on UTF8, ISOLatinX
+ * It also support UTF16 (LE and BE) by default.
+ *
+ * Anything else would have to be translated to UTF8 before being
+ * given to the parser itself. The BOM for UTF16 and the encoding
+ * declaration are looked at and a converter is looked for at that
+ * point. If not found the parser stops here as asked by the XML REC
+ * Converter can be registered by the user using xmlRegisterCharEncodingHandler
+ * but the currentl form doesn't allow stateful transcoding (a serious
+ * problem agreed !). If iconv has been found it will be used
+ * automatically and allow stateful transcoding, the simplest is then
+ * to be sure to enable icon and to provide iconv libs for the encoding
+ * support needed.
  */
 typedef enum {
     XML_CHAR_ENCODING_ERROR=   -1, /* No char encoding detected */
@@ -65,9 +83,13 @@ typedef enum {
  * Take a block of chars in the original encoding and try to convert
  * it to an UTF-8 block of chars out.
  *
- * Returns the number of byte written, or -1 by lack of space.
+ * Returns the number of byte written, or -1 by lack of space, or -2
+ *     if the transcoding failed.
+ * The value of @inlen after return is the number of octets consumed
+ *     as the return value is positive, else unpredictiable.
+ * The value of @outlen after return is the number of ocetes consumed.
  */
-typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
+typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int *outlen,
                                          const unsigned char* in, int *inlen);
 
 
@@ -83,12 +105,17 @@ typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
  *
  * Returns the number of byte written, or -1 by lack of space, or -2
  *     if the transcoding failed.
+ * The value of @inlen after return is the number of octets consumed
+ *     as the return value is positive, else unpredictiable.
+ * The value of @outlen after return is the number of ocetes consumed.
  */
-typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int outlen,
+typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int *outlen,
                                           const unsigned char* in, int *inlen);
 
+
 /*
  * Block defining the handlers for non UTF-8 encodings.
+ * If iconv is supported, there is two extra fields 
  */
 
 typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
@@ -96,7 +123,11 @@ typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;
 struct _xmlCharEncodingHandler {
     char                       *name;
     xmlCharEncodingInputFunc   input;
-    xmlCharEncodingOutputFunc output;
+    xmlCharEncodingOutputFunc  output;
+#ifdef LIBXML_ICONV_ENABLED
+    iconv_t                    iconv_in;
+    iconv_t                    iconv_out;
+#endif /* LIBXML_ICONV_ENABLED */
 };
 
 void	xmlInitCharEncodingHandlers	(void);
@@ -109,6 +140,14 @@ xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc);
 xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name);
 int	xmlCheckUTF8			(const unsigned char *utf);
 
+int	xmlCharEncOutFunc		(xmlCharEncodingHandler *handler,
+					 xmlBufferPtr out,
+					 xmlBufferPtr in);
+
+int	xmlCharEncInFunc		(xmlCharEncodingHandler *handler,
+					 xmlBufferPtr out,
+					 xmlBufferPtr in);
+int	xmlCharEncCloseFunc		(xmlCharEncodingHandler *handler);
 
 #ifdef __cplusplus
 }
diff --git a/include/libxml/encoding.h b/include/libxml/encoding.h
index 1b1c92e3..f6edbf29 100644
--- a/include/libxml/encoding.h
+++ b/include/libxml/encoding.h
@@ -22,12 +22,30 @@
 #define __XML_CHAR_ENCODING_H__
 
 #include <libxml/xmlversion.h>
+#ifdef LIBXML_ICONV_ENABLED
+#include <iconv.h>
+#endif
+#include <libxml/tree.h>
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 /**
  * Predefined values for some standard encodings
+ * Libxml don't do beforehand translation on UTF8, ISOLatinX
+ * It also support UTF16 (LE and BE) by default.
+ *
+ * Anything else would have to be translated to UTF8 before being
+ * given to the parser itself. The BOM for UTF16 and the encoding
+ * declaration are looked at and a converter is looked for at that
+ * point. If not found the parser stops here as asked by the XML REC
+ * Converter can be registered by the user using xmlRegisterCharEncodingHandler
+ * but the currentl form doesn't allow stateful transcoding (a serious
+ * problem agreed !). If iconv has been found it will be used
+ * automatically and allow stateful transcoding, the simplest is then
+ * to be sure to enable icon and to provide iconv libs for the encoding
+ * support needed.
  */
 typedef enum {
     XML_CHAR_ENCODING_ERROR=   -1, /* No char encoding detected */
@@ -65,9 +83,13 @@ typedef enum {
  * Take a block of chars in the original encoding and try to convert
  * it to an UTF-8 block of chars out.
  *
- * Returns the number of byte written, or -1 by lack of space.
+ * Returns the number of byte written, or -1 by lack of space, or -2
+ *     if the transcoding failed.
+ * The value of @inlen after return is the number of octets consumed
+ *     as the return value is positive, else unpredictiable.
+ * The value of @outlen after return is the number of ocetes consumed.
  */
-typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
+typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int *outlen,
                                          const unsigned char* in, int *inlen);
 
 
@@ -83,12 +105,17 @@ typedef int (* xmlCharEncodingInputFunc)(unsigned char* out, int outlen,
  *
  * Returns the number of byte written, or -1 by lack of space, or -2
  *     if the transcoding failed.
+ * The value of @inlen after return is the number of octets consumed
+ *     as the return value is positive, else unpredictiable.
+ * The value of @outlen after return is the number of ocetes consumed.
  */
-typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int outlen,
+typedef int (* xmlCharEncodingOutputFunc)(unsigned char* out, int *outlen,
                                           const unsigned char* in, int *inlen);
 
+
 /*
  * Block defining the handlers for non UTF-8 encodings.
+ * If iconv is supported, there is two extra fields 
  */
 
 typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler;
@@ -96,7 +123,11 @@ typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr;
 struct _xmlCharEncodingHandler {
     char                       *name;
     xmlCharEncodingInputFunc   input;
-    xmlCharEncodingOutputFunc output;
+    xmlCharEncodingOutputFunc  output;
+#ifdef LIBXML_ICONV_ENABLED
+    iconv_t                    iconv_in;
+    iconv_t                    iconv_out;
+#endif /* LIBXML_ICONV_ENABLED */
 };
 
 void	xmlInitCharEncodingHandlers	(void);
@@ -109,6 +140,14 @@ xmlCharEncodingHandlerPtr xmlGetCharEncodingHandler(xmlCharEncoding enc);
 xmlCharEncodingHandlerPtr xmlFindCharEncodingHandler(const char *name);
 int	xmlCheckUTF8			(const unsigned char *utf);
 
+int	xmlCharEncOutFunc		(xmlCharEncodingHandler *handler,
+					 xmlBufferPtr out,
+					 xmlBufferPtr in);
+
+int	xmlCharEncInFunc		(xmlCharEncodingHandler *handler,
+					 xmlBufferPtr out,
+					 xmlBufferPtr in);
+int	xmlCharEncCloseFunc		(xmlCharEncodingHandler *handler);
 
 #ifdef __cplusplus
 }
diff --git a/include/libxml/parserInternals.h b/include/libxml/parserInternals.h
index c3597348..e7e6fa0a 100644
--- a/include/libxml/parserInternals.h
+++ b/include/libxml/parserInternals.h
@@ -28,10 +28,10 @@ extern "C" {
  * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
  */
 #define IS_CHAR(c)							\
-    ((((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||		\
-      (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) &&		\
-      (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) >= 0) &&		\
-      ((c) <= 0x10FFFF))
+    (((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||			\
+     (((c) >= 0x20) && ((c) <= 0xD7FF)) ||				\
+     (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||				\
+     (((c) >= 0x10000) && ((c) <= 0x10FFFF)))
 
 /*
  * [3] S ::= (#x20 | #x9 | #xD | #xA)+
@@ -442,8 +442,10 @@ xmlParserCtxtPtr	xmlNewParserCtxt	(void);
 xmlParserCtxtPtr	xmlCreateEntityParserCtxt(const xmlChar *URL,
 						 const xmlChar *ID,
 						 const xmlChar *base);
-void			xmlSwitchEncoding	(xmlParserCtxtPtr ctxt,
+int			xmlSwitchEncoding	(xmlParserCtxtPtr ctxt,
 						 xmlCharEncoding enc);
+int			xmlSwitchToEncoding	(xmlParserCtxtPtr ctxt,
+					     xmlCharEncodingHandlerPtr handler);
 void			xmlFreeParserCtxt	(xmlParserCtxtPtr ctxt);
 
 /**
diff --git a/include/libxml/tree.h b/include/libxml/tree.h
index 1cb12e24..35ea5256 100644
--- a/include/libxml/tree.h
+++ b/include/libxml/tree.h
@@ -380,6 +380,8 @@ void		xmlBufferCCat		(xmlBufferPtr buf,
 					 const char *str);
 int		xmlBufferShrink		(xmlBufferPtr buf,
 					 int len);
+int		xmlBufferGrow		(xmlBufferPtr buf,
+					 int len);
 void		xmlBufferEmpty		(xmlBufferPtr buf);
 const xmlChar*	xmlBufferContent	(const xmlBufferPtr buf);
 int		xmlBufferUse		(const xmlBufferPtr buf);
diff --git a/include/libxml/xmlIO.h b/include/libxml/xmlIO.h
index 8f9b7e02..2d14ebeb 100644
--- a/include/libxml/xmlIO.h
+++ b/include/libxml/xmlIO.h
@@ -33,6 +33,7 @@ struct _xmlParserInputBuffer {
     xmlCharEncodingHandlerPtr encoder; /* I18N conversions to UTF-8 */
     
     xmlBufferPtr buffer;    /* Local buffer encoded in  UTF-8 */
+    xmlBufferPtr raw;       /* if encoder != NULL buffer for raw input */
 };
 
 
diff --git a/parser.c b/parser.c
index a8e6ff4d..6714d3cf 100644
--- a/parser.c
+++ b/parser.c
@@ -41,6 +41,7 @@
 #include <libxml/valid.h>
 #include <libxml/parserInternals.h>
 #include <libxml/xmlIO.h>
+#include <libxml/uri.h>
 #include "xml-error.h"
 
 #define XML_PARSER_BIG_BUFFER_SIZE 1000
@@ -483,7 +484,7 @@ xmlNextChar(xmlParserCtxtPtr ctxt) {
 			    if ((ctxt->sax != NULL) &&
 				(ctxt->sax->error != NULL))
 				ctxt->sax->error(ctxt->userData, 
-				 "Char out of allowed range\n");
+				 "Char 0x%X out of allowed range\n", val);
 			    ctxt->errNo = XML_ERR_INVALID_ENCODING;
 			    ctxt->wellFormed = 0;
 			    ctxt->disableSAX = 1;
@@ -612,7 +613,7 @@ xmlCurrentChar(xmlParserCtxtPtr ctxt, int *len) {
 		if ((ctxt->sax != NULL) &&
 		    (ctxt->sax->error != NULL))
 		    ctxt->sax->error(ctxt->userData, 
-				     "Char out of allowed range\n");
+				     "Char 0x%X out of allowed range\n", val);
 		ctxt->errNo = XML_ERR_INVALID_ENCODING;
 		ctxt->wellFormed = 0;
 		ctxt->disableSAX = 1;
@@ -727,7 +728,7 @@ xmlStringCurrentChar(xmlParserCtxtPtr ctxt, const xmlChar *cur, int *len) {
 		if ((ctxt->sax != NULL) &&
 		    (ctxt->sax->error != NULL))
 		    ctxt->sax->error(ctxt->userData, 
-				     "Char out of allowed range\n");
+				     "Char 0x%X out of allowed range\n", val);
 		ctxt->errNo = XML_ERR_INVALID_ENCODING;
 		ctxt->wellFormed = 0;
 		ctxt->disableSAX = 1;
@@ -2278,96 +2279,209 @@ xmlCheckLanguageID(const xmlChar *lang) {
  *
  * change the input functions when discovering the character encoding
  * of a given entity.
+ *
+ * Returns 0 in case of success, -1 otherwise
  */
-void
+int
 xmlSwitchEncoding(xmlParserCtxtPtr ctxt, xmlCharEncoding enc)
 {
     xmlCharEncodingHandlerPtr handler;
 
+    switch (enc) {
+	case XML_CHAR_ENCODING_ERROR:
+	    ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
+	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		ctxt->sax->error(ctxt->userData, "encoding unknown\n");
+	    ctxt->wellFormed = 0;
+	    ctxt->disableSAX = 1;
+	    break;
+	case XML_CHAR_ENCODING_NONE:
+	    /* let's assume it's UTF-8 without the XML decl */
+	    return(0);
+	case XML_CHAR_ENCODING_UTF8:
+	    /* default encoding, no conversion should be needed */
+	    return(0);
+	default:
+	    break;
+    }
     handler = xmlGetCharEncodingHandler(enc);
+    if (handler == NULL) {
+	/*
+	 * Default handlers.
+	 */
+	switch (enc) {
+	    case XML_CHAR_ENCODING_ERROR:
+		ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData, "encoding unknown\n");
+		ctxt->wellFormed = 0;
+		ctxt->disableSAX = 1;
+		break;
+	    case XML_CHAR_ENCODING_NONE:
+		/* let's assume it's UTF-8 without the XML decl */
+		return(0);
+	    case XML_CHAR_ENCODING_UTF8:
+		/* default encoding, no conversion should be needed */
+		return(0);
+	    case XML_CHAR_ENCODING_UTF16LE:
+		break;
+	    case XML_CHAR_ENCODING_UTF16BE:
+		break;
+	    case XML_CHAR_ENCODING_UCS4LE:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding USC4 little endian not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_UCS4BE:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding USC4 big endian not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_EBCDIC:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding EBCDIC not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_UCS4_2143:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding UCS4 2143 not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_UCS4_3412:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding UCS4 3412 not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_UCS2:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding UCS2 not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_8859_1:
+	    case XML_CHAR_ENCODING_8859_2:
+	    case XML_CHAR_ENCODING_8859_3:
+	    case XML_CHAR_ENCODING_8859_4:
+	    case XML_CHAR_ENCODING_8859_5:
+	    case XML_CHAR_ENCODING_8859_6:
+	    case XML_CHAR_ENCODING_8859_7:
+	    case XML_CHAR_ENCODING_8859_8:
+	    case XML_CHAR_ENCODING_8859_9:
+		/*
+		 * Keep the internal content in the document encoding
+		 */
+		if ((ctxt->inputNr == 1) &&
+		    (ctxt->encoding == NULL) &&
+		    (ctxt->input->encoding != NULL)) {
+		    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
+		}
+		return(0);
+	    case XML_CHAR_ENCODING_2022_JP:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding ISO-2022-JPnot supported\n");
+		break;
+	    case XML_CHAR_ENCODING_SHIFT_JIS:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding Shift_JIS not supported\n");
+		break;
+	    case XML_CHAR_ENCODING_EUC_JP:
+		ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+		    ctxt->sax->error(ctxt->userData,
+		      "char encoding EUC-JPnot supported\n");
+		break;
+	}
+    }
+    if (handler == NULL)
+	return(-1);
+    return(xmlSwitchToEncoding(ctxt, handler));
+}
+
+/**
+ * xmlSwitchToEncoding:
+ * @ctxt:  the parser context
+ * @handler:  the encoding handler
+ *
+ * change the input functions when discovering the character encoding
+ * of a given entity.
+ *
+ * Returns 0 in case of success, -1 otherwise
+ */
+int
+xmlSwitchToEncoding(xmlParserCtxtPtr ctxt, xmlCharEncodingHandlerPtr handler) 
+{
+    int nbchars;
+
     if (handler != NULL) {
         if (ctxt->input != NULL) {
 	    if (ctxt->input->buf != NULL) {
 	        if (ctxt->input->buf->encoder != NULL) {
+		    if (ctxt->input->buf->encoder == handler)
+			return(0);
 		    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 			ctxt->sax->error(ctxt->userData,
 			     "xmlSwitchEncoding : encoder already regitered\n");
-		    return;
+		    return(-1);
 		}
 		ctxt->input->buf->encoder = handler;
 
 	        /*
-		 * Is there already some content down the pipe to convert
+		 * Is there already some content down the pipe to convert ?
 		 */
 	        if ((ctxt->input->buf->buffer != NULL) &&
 		    (ctxt->input->buf->buffer->use > 0)) {
-		    xmlChar *buf;
-		    int res, len, size;
 		    int processed;
 
 		    /*
 		     * Specific handling of the Byte Order Mark for 
 		     * UTF-16
 		     */
-		    if ((enc == XML_CHAR_ENCODING_UTF16LE) && 
+		    if ((handler->name != NULL) &&
+			(!strcmp(handler->name, "UTF-16LE")) && 
 		        (ctxt->input->cur[0] == 0xFF) &&
 		        (ctxt->input->cur[1] == 0xFE)) {
-			SKIP(2);
+			ctxt->input->cur += 2;
 		    }
-		    if ((enc == XML_CHAR_ENCODING_UTF16BE) && 
+		    if ((handler->name != NULL) &&
+			(!strcmp(handler->name, "UTF-16BE")) && 
 		        (ctxt->input->cur[0] == 0xFE) &&
 		        (ctxt->input->cur[1] == 0xFF)) {
-			SKIP(2);
+			ctxt->input->cur += 2;
 		    }
 
 		    /*
-		     * convert the non processed part
+		     * Shring the current input buffer.
+		     * Move it as the raw buffer and create a new input buffer
 		     */
 		    processed = ctxt->input->cur - ctxt->input->base;
-                    len = ctxt->input->buf->buffer->use - processed;
-
-		    if (len <= 0) {
-		        return;
-		    }
-		    size = ctxt->input->buf->buffer->use * 4;
-		    if (size < 4000)
-		        size = 4000;
-retry_larger:			
-		    buf = (xmlChar *) xmlMalloc(size + 1);
-		    if (buf == NULL) {
-			if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-			    ctxt->sax->error(ctxt->userData,
-				 "xmlSwitchEncoding : out of memory\n");
-		        return;
-		    }
-		    /* TODO !!! Handling of buf too small */
-		    res = handler->input(buf, size, ctxt->input->cur, &len);
-		    if (res == -1) {
-		        size *= 2;
-			xmlFree(buf);
-			goto retry_larger;
-		    }
-		    if ((res < 0) ||
-		        (len != ctxt->input->buf->buffer->use - processed)) {
-			if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-			    ctxt->sax->error(ctxt->userData,
-				 "xmlSwitchEncoding : conversion failed\n");
-                        xmlFree(buf);
-		        return;
-		    }
+		    xmlBufferShrink(ctxt->input->buf->buffer, processed);
+		    ctxt->input->buf->raw = ctxt->input->buf->buffer;
+		    ctxt->input->buf->buffer = xmlBufferCreate();
 
 		    /*
-		     * Conversion succeeded, get rid of the old buffer
+		     * convert as much as possible of the raw input
+		     * to the parser reading buffer.
 		     */
-		    xmlFree(ctxt->input->buf->buffer->content);
-		    ctxt->input->buf->buffer->content = buf;
-		    ctxt->input->base = buf;
-		    ctxt->input->cur = buf;
-		    ctxt->input->buf->buffer->size = size;
-		    ctxt->input->buf->buffer->use = res;
-                    buf[res] = 0;
+		    nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
+		                               ctxt->input->buf->buffer,
+					       ctxt->input->buf->raw);
+		    if (nbchars < 0) {
+			fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
+			return(-1);
+		    }
+		    ctxt->input->base =
+		    ctxt->input->cur = ctxt->input->buf->buffer->content;
 		}
-		return;
+		return(0);
 	    } else {
 	        if (ctxt->input->length == 0) {
 		    /*
@@ -2377,191 +2491,59 @@ retry_larger:
 		    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 			ctxt->sax->error(ctxt->userData,
 					 "xmlSwitchEncoding : no input\n");
-		    return;
+		    return(-1);
 		} else {
-		    xmlChar *buf;
-		    int res, len;
-		    int processed = ctxt->input->cur - ctxt->input->base;
+		    int processed;
 
 		    /*
-		     * convert the non processed part
+		     * Shring the current input buffer.
+		     * Move it as the raw buffer and create a new input buffer
 		     */
-                    len = ctxt->input->length - processed;
-		    if (len <= 0) {
-			if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-			    ctxt->sax->error(ctxt->userData,
-				 "xmlSwitchEncoding : input fully consumed?\n");
-		        return;
-		    }
-		    buf = (xmlChar *) xmlMalloc(ctxt->input->length * 4);
-		    if (buf == NULL) {
-			if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-			    ctxt->sax->error(ctxt->userData,
-				 "xmlSwitchEncoding : out of memory\n");
-		        return;
-		    }
-		    res = handler->input(buf, ctxt->input->length * 4,
-		                         ctxt->input->cur, &len);
-		    if ((res < 0) ||
-		        (len != ctxt->input->length - processed)) {
-			if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-			    ctxt->sax->error(ctxt->userData,
-				 "xmlSwitchEncoding : conversion failed\n");
-                        xmlFree(buf);
-		        return;
+		    processed = ctxt->input->cur - ctxt->input->base;
+		    ctxt->input->buf->raw = xmlBufferCreate();
+		    xmlBufferAdd(ctxt->input->buf->raw, ctxt->input->cur,
+		                 ctxt->input->length - processed);
+		    ctxt->input->buf->buffer = xmlBufferCreate();
+
+		    /*
+		     * convert as much as possible of the raw input
+		     * to the parser reading buffer.
+		     */
+		    nbchars = xmlCharEncInFunc(ctxt->input->buf->encoder,
+		                               ctxt->input->buf->buffer,
+					       ctxt->input->buf->raw);
+		    if (nbchars < 0) {
+			fprintf(stderr, "xmlSwitchToEncoding: encoder error\n");
+			return(-1);
 		    }
+
 		    /*
 		     * Conversion succeeded, get rid of the old buffer
 		     */
 		    if ((ctxt->input->free != NULL) &&
 		        (ctxt->input->base != NULL))
 			ctxt->input->free((xmlChar *) ctxt->input->base);
-		    ctxt->input->base = ctxt->input->cur = buf;
-		    ctxt->input->length = res;
+		    ctxt->input->base =
+		    ctxt->input->cur = ctxt->input->buf->buffer->content;
 		}
 	    }
 	} else {
 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 	        ctxt->sax->error(ctxt->userData,
 		                 "xmlSwitchEncoding : no input\n");
+	    return(-1);
 	}
-    }
+	/*
+	 * The parsing is now done in UTF8 natively
+	 */
+	if (ctxt->encoding != NULL) {
+	    xmlFree((xmlChar *) ctxt->encoding);
+	    ctxt->encoding = NULL;
+	}
+    } else 
+	return(-1);
+    return(0);
 
-    switch (enc) {
-        case XML_CHAR_ENCODING_ERROR:
-	    ctxt->errNo = XML_ERR_UNKNOWN_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-	        ctxt->sax->error(ctxt->userData, "encoding unknown\n");
-	    ctxt->wellFormed = 0;
-	    ctxt->disableSAX = 1;
-            break;
-        case XML_CHAR_ENCODING_NONE:
-	    /* let's assume it's UTF-8 without the XML decl */
-            return;
-        case XML_CHAR_ENCODING_UTF8:
-	    /* default encoding, no conversion should be needed */
-            return;
-        case XML_CHAR_ENCODING_UTF16LE:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-		  "char encoding UTF16 little endian not supported\n");
-            break;
-        case XML_CHAR_ENCODING_UTF16BE:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-		  "char encoding UTF16 big endian not supported\n");
-            break;
-        case XML_CHAR_ENCODING_UCS4LE:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-		  "char encoding USC4 little endian not supported\n");
-            break;
-        case XML_CHAR_ENCODING_UCS4BE:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-		  "char encoding USC4 big endian not supported\n");
-            break;
-        case XML_CHAR_ENCODING_EBCDIC:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-		  "char encoding EBCDIC not supported\n");
-            break;
-        case XML_CHAR_ENCODING_UCS4_2143:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-		  "char encoding UCS4 2143 not supported\n");
-            break;
-        case XML_CHAR_ENCODING_UCS4_3412:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-		  "char encoding UCS4 3412 not supported\n");
-            break;
-        case XML_CHAR_ENCODING_UCS2:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-		  "char encoding UCS2 not supported\n");
-            break;
-        case XML_CHAR_ENCODING_8859_1:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-		  "char encoding ISO_8859_1 ISO Latin 1 not supported\n");
-            break;
-        case XML_CHAR_ENCODING_8859_2:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-		  "char encoding ISO_8859_2 ISO Latin 2 not supported\n");
-            break;
-        case XML_CHAR_ENCODING_8859_3:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-		  "char encoding ISO_8859_3 not supported\n");
-            break;
-        case XML_CHAR_ENCODING_8859_4:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-		  "char encoding ISO_8859_4 not supported\n");
-            break;
-        case XML_CHAR_ENCODING_8859_5:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-		  "char encoding ISO_8859_5 not supported\n");
-            break;
-        case XML_CHAR_ENCODING_8859_6:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-		  "char encoding ISO_8859_6 not supported\n");
-            break;
-        case XML_CHAR_ENCODING_8859_7:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-		  "char encoding ISO_8859_7 not supported\n");
-            break;
-        case XML_CHAR_ENCODING_8859_8:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-		  "char encoding ISO_8859_8 not supported\n");
-            break;
-        case XML_CHAR_ENCODING_8859_9:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-		  "char encoding ISO_8859_9 not supported\n");
-            break;
-        case XML_CHAR_ENCODING_2022_JP:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-                  "char encoding ISO-2022-JPnot supported\n");
-            break;
-        case XML_CHAR_ENCODING_SHIFT_JIS:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-                  "char encoding Shift_JISnot supported\n");
-            break;
-        case XML_CHAR_ENCODING_EUC_JP:
-	    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
-            if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
-                ctxt->sax->error(ctxt->userData,
-                  "char encoding EUC-JPnot supported\n");
-            break;
-    }
 }
 
 /************************************************************************
@@ -4253,7 +4235,7 @@ xmlParseExternalID(xmlParserCtxtPtr ctxt, xmlChar **publicID, int strict) {
 void
 xmlParseComment(xmlParserCtxtPtr ctxt) {
     xmlChar *buf = NULL;
-    int len = 0;
+    int len;
     int size = XML_PARSER_BUFFER_SIZE;
     int q, ql;
     int r, rl;
@@ -4282,10 +4264,11 @@ xmlParseComment(xmlParserCtxtPtr ctxt) {
     r = CUR_CHAR(rl);
     NEXTL(rl);
     cur = CUR_CHAR(l);
+    len = 0;
     while (IS_CHAR(cur) &&
            ((cur != '>') ||
 	    (r != '-') || (q != '-'))) {
-	if ((r == '-') && (q == '-')) {
+	if ((r == '-') && (q == '-') && (len > 1)) {
 	    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 	        ctxt->sax->error(ctxt->userData,
 	       "Comment must not contain '--' (double-hyphen)`\n");
@@ -4732,11 +4715,36 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
 		    ctxt->disableSAX = 1;
 		}
 		if (URI) {
-		    if ((ctxt->sax != NULL) &&
-			(!ctxt->disableSAX) && (ctxt->sax->entityDecl != NULL))
-			ctxt->sax->entityDecl(ctxt->userData, name,
-		                    XML_EXTERNAL_PARAMETER_ENTITY,
-				    literal, URI, NULL);
+		    xmlURIPtr uri;
+
+		    uri = xmlParseURI((const char *) URI);
+		    if (uri == NULL) {
+			if ((ctxt->sax != NULL) &&
+			    (!ctxt->disableSAX) &&
+			    (ctxt->sax->error != NULL))
+			    ctxt->sax->error(ctxt->userData,
+				        "Invalid URI: %s\n", URI);
+			ctxt->wellFormed = 0;
+			ctxt->errNo = XML_ERR_INVALID_URI;
+		    } else {
+			if (uri->fragment != NULL) {
+			    if ((ctxt->sax != NULL) &&
+				(!ctxt->disableSAX) &&
+				(ctxt->sax->error != NULL))
+				ctxt->sax->error(ctxt->userData,
+					    "Fragment not allowed: %s\n", URI);
+			    ctxt->wellFormed = 0;
+			    ctxt->errNo = XML_ERR_URI_FRAGMENT;
+			} else {
+			    if ((ctxt->sax != NULL) &&
+				(!ctxt->disableSAX) &&
+				(ctxt->sax->entityDecl != NULL))
+				ctxt->sax->entityDecl(ctxt->userData, name,
+					    XML_EXTERNAL_PARAMETER_ENTITY,
+					    literal, URI, NULL);
+			}
+			xmlFreeURI(uri);
+		    }
 		}
 	    }
 	} else {
@@ -4757,6 +4765,31 @@ xmlParseEntityDecl(xmlParserCtxtPtr ctxt) {
 		    ctxt->wellFormed = 0;
 		    ctxt->disableSAX = 1;
 		}
+		if (URI) {
+		    xmlURIPtr uri;
+
+		    uri = xmlParseURI((const char *)URI);
+		    if (uri == NULL) {
+			if ((ctxt->sax != NULL) &&
+			    (!ctxt->disableSAX) &&
+			    (ctxt->sax->error != NULL))
+			    ctxt->sax->error(ctxt->userData,
+				        "Invalid URI: %s\n", URI);
+			ctxt->wellFormed = 0;
+			ctxt->errNo = XML_ERR_INVALID_URI;
+		    } else {
+			if (uri->fragment != NULL) {
+			    if ((ctxt->sax != NULL) &&
+				(!ctxt->disableSAX) &&
+				(ctxt->sax->error != NULL))
+				ctxt->sax->error(ctxt->userData,
+					    "Fragment not allowed: %s\n", URI);
+			    ctxt->wellFormed = 0;
+			    ctxt->errNo = XML_ERR_URI_FRAGMENT;
+			}
+			xmlFreeURI(uri);
+		    }
+		}
 		if ((RAW != '>') && (!IS_BLANK(CUR))) {
 		    if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 			ctxt->sax->error(ctxt->userData,
@@ -5973,7 +6006,20 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
     /*
      * We know that '<?xml' is here.
      */
-    SKIP(5);
+    if ((RAW == '<') && (NXT(1) == '?') &&
+	(NXT(2) == 'x') && (NXT(3) == 'm') &&
+	(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
+	SKIP(5);
+    } else {
+	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
+	    ctxt->sax->error(ctxt->userData,
+	                     "Text declaration '<?xml' required\n");
+	ctxt->errNo = XML_ERR_XMLDECL_NOT_STARTED;
+	ctxt->wellFormed = 0;
+	ctxt->disableSAX = 1;
+
+	return;
+    }
 
     if (!IS_BLANK(CUR)) {
 	if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
@@ -6003,7 +6049,13 @@ xmlParseTextDecl(xmlParserCtxtPtr ctxt) {
 	ctxt->wellFormed = 0;
 	ctxt->disableSAX = 1;
     }
-    ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
+    xmlParseEncodingDecl(ctxt);
+    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
+	/*
+	 * The XML REC instructs us to stop parsing right here
+	 */
+        return;
+    }
 
     SKIP_BLANKS;
     if ((RAW == '?') && (NXT(1) == '>')) {
@@ -6192,6 +6244,13 @@ xmlParseExternalSubset(xmlParserCtxtPtr ctxt, const xmlChar *ExternalID,
         (NXT(2) == 'x') && (NXT(3) == 'm') &&
 	(NXT(4) == 'l')) {
 	xmlParseTextDecl(ctxt);
+	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
+	    /*
+	     * The XML REC instructs us to stop parsing right here
+	     */
+	    ctxt->instate = XML_PARSER_EOF;
+	    return;
+	}
     }
     if (ctxt->myDoc == NULL) {
         ctxt->myDoc = xmlNewDoc(BAD_CAST "1.0");
@@ -6441,6 +6500,13 @@ xmlParseReference(xmlParserCtxtPtr ctxt) {
 		    (NXT(2) == 'x') && (NXT(3) == 'm') &&
 		    (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
 		    xmlParseTextDecl(ctxt);
+		    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
+			/*
+			 * The XML REC instructs us to stop parsing right here
+			 */
+			ctxt->instate = XML_PARSER_EOF;
+			return;
+		    }
 		    if (input->standalone) {
 			if ((ctxt->sax != NULL) && (ctxt->sax->error != NULL))
 			    ctxt->sax->error(ctxt->userData,
@@ -6947,6 +7013,15 @@ xmlParsePEReference(xmlParserCtxtPtr ctxt) {
 			    (NXT(2) == 'x') && (NXT(3) == 'm') &&
 			    (NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
 			    xmlParseTextDecl(ctxt);
+			    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
+				/*
+				 * The XML REC instructs us to stop parsing
+				 * right here
+				 */
+				ctxt->instate = XML_PARSER_EOF;
+				xmlFree(name);
+				return;
+			    }
 			}
 			if (ctxt->token == 0)
 			    ctxt->token = ' ';
@@ -8197,6 +8272,38 @@ xmlParseEncodingDecl(xmlParserCtxtPtr ctxt) {
 	    ctxt->disableSAX = 1;
 	    ctxt->errNo = XML_ERR_STRING_NOT_STARTED;
 	}
+	if (encoding != NULL) {
+	    xmlCharEncoding enc;
+	    xmlCharEncodingHandlerPtr handler;
+
+	    if (ctxt->input->encoding != NULL)
+		xmlFree((xmlChar *) ctxt->input->encoding);
+	    ctxt->input->encoding = encoding;
+
+	    enc = xmlParseCharEncoding((const char *) encoding);
+	    /*
+	     * registered set of known encodings
+	     */
+	    if (enc != XML_CHAR_ENCODING_ERROR) {
+		xmlSwitchEncoding(ctxt, enc);
+		if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
+		    xmlFree(encoding);
+		    return(NULL);
+		}
+	    } else {
+	        /*
+		 * fallback for unknown encodings
+		 */
+                handler = xmlFindCharEncodingHandler((const char *) encoding);
+		if (handler != NULL) {
+		    xmlSwitchToEncoding(ctxt, handler);
+		} else {
+		    ctxt->errNo = XML_ERR_UNSUPPORTED_ENCODING;
+		    xmlFree(encoding);
+		    return(NULL);
+		}
+	    }
+	}
     }
     return(encoding);
 }
@@ -8362,7 +8469,13 @@ xmlParseXMLDecl(xmlParserCtxtPtr ctxt) {
 	ctxt->wellFormed = 0;
 	ctxt->disableSAX = 1;
     }
-    ctxt->input->encoding = xmlParseEncodingDecl(ctxt);
+    xmlParseEncodingDecl(ctxt);
+    if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
+	/*
+	 * The XML REC instructs us to stop parsing right here
+	 */
+        return;
+    }
 
     /*
      * We may have the standalone status.
@@ -8489,12 +8602,19 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
     if ((RAW == '<') && (NXT(1) == '?') &&
         (NXT(2) == 'x') && (NXT(3) == 'm') &&
 	(NXT(4) == 'l') && (IS_BLANK(NXT(5)))) {
+
+	/*
+	 * Note that we will switch encoding on the fly.
+	 */
 	xmlParseXMLDecl(ctxt);
+	if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
+	    /*
+	     * The XML REC instructs us to stop parsing right here
+	     */
+	    return(-1);
+	}
 	ctxt->standalone = ctxt->input->standalone;
 	SKIP_BLANKS;
-	if ((ctxt->encoding == NULL) && (ctxt->input->encoding != NULL))
-	    ctxt->encoding = xmlStrdup(ctxt->input->encoding);
-
     } else {
 	ctxt->version = xmlCharStrdup(XML_DEFAULT_VERSION);
     }
@@ -8581,14 +8701,6 @@ xmlParseDocument(xmlParserCtxtPtr ctxt) {
 	(!ctxt->disableSAX))
         ctxt->sax->endDocument(ctxt->userData);
 
-    /*
-     * Grab the encoding if it was added on-the-fly
-     */
-    if ((ctxt->encoding != NULL) && (ctxt->myDoc != NULL) &&
-	(ctxt->myDoc->encoding == NULL)) {
-	ctxt->myDoc->encoding = ctxt->encoding;
-	ctxt->encoding = NULL;
-    }
     if (! ctxt->wellFormed) return(-1);
     return(0);
 }
@@ -8805,6 +8917,14 @@ xmlParseTryOrFinish(xmlParserCtxtPtr ctxt, int terminate) {
 			fprintf(stderr, "PP: Parsing XML Decl\n");
 #endif
 			xmlParseXMLDecl(ctxt);
+			if (ctxt->errNo == XML_ERR_UNSUPPORTED_ENCODING) {
+			    /*
+			     * The XML REC instructs us to stop parsing right
+			     * here
+			     */
+			    ctxt->instate = XML_PARSER_EOF;
+			    return(0);
+			}
 			ctxt->standalone = ctxt->input->standalone;
 			if ((ctxt->encoding == NULL) &&
 			    (ctxt->input->encoding != NULL))
diff --git a/parserInternals.h b/parserInternals.h
index c3597348..e7e6fa0a 100644
--- a/parserInternals.h
+++ b/parserInternals.h
@@ -28,10 +28,10 @@ extern "C" {
  * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF.
  */
 #define IS_CHAR(c)							\
-    ((((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||		\
-      (((c) >= 0x20) && ((c) != 0xFFFE) && ((c) != 0xFFFF))) &&		\
-      (((c) <= 0xD7FF) || ((c) >= 0xE000)) && ((c) >= 0) &&		\
-      ((c) <= 0x10FFFF))
+    (((c) == 0x09) || ((c) == 0x0A) || ((c) == 0x0D) ||			\
+     (((c) >= 0x20) && ((c) <= 0xD7FF)) ||				\
+     (((c) >= 0xE000) && ((c) <= 0xFFFD)) ||				\
+     (((c) >= 0x10000) && ((c) <= 0x10FFFF)))
 
 /*
  * [3] S ::= (#x20 | #x9 | #xD | #xA)+
@@ -442,8 +442,10 @@ xmlParserCtxtPtr	xmlNewParserCtxt	(void);
 xmlParserCtxtPtr	xmlCreateEntityParserCtxt(const xmlChar *URL,
 						 const xmlChar *ID,
 						 const xmlChar *base);
-void			xmlSwitchEncoding	(xmlParserCtxtPtr ctxt,
+int			xmlSwitchEncoding	(xmlParserCtxtPtr ctxt,
 						 xmlCharEncoding enc);
+int			xmlSwitchToEncoding	(xmlParserCtxtPtr ctxt,
+					     xmlCharEncodingHandlerPtr handler);
 void			xmlFreeParserCtxt	(xmlParserCtxtPtr ctxt);
 
 /**
diff --git a/tree.c b/tree.c
index 2cc4b51d..74b5321f 100644
--- a/tree.c
+++ b/tree.c
@@ -3771,6 +3771,31 @@ xmlBufferShrink(xmlBufferPtr buf, int len) {
     return(len);
 }
 
+/**
+ * xmlBufferGrow:
+ * @buf:  the buffer
+ * @len:  the minimum free sie to allocate
+ *
+ * Grow the available space of an XML buffer.
+ *
+ * Returns the new available space or -1 in case of error
+ */
+int
+xmlBufferGrow(xmlBufferPtr buf, int len) {
+    int size;
+    xmlChar *newbuf;
+
+    if (len <= buf->use) return(0);
+
+    size = buf->size + buf->use + len + 100;
+
+    newbuf = xmlRealloc(buf->content, size);
+    if (newbuf == NULL) return(-1);
+    buf->content = newbuf;
+    buf->size = size;
+    return(buf->size - buf->use);
+}
+
 /**
  * xmlBufferDump:
  * @file:  the file output
diff --git a/tree.h b/tree.h
index 1cb12e24..35ea5256 100644
--- a/tree.h
+++ b/tree.h
@@ -380,6 +380,8 @@ void		xmlBufferCCat		(xmlBufferPtr buf,
 					 const char *str);
 int		xmlBufferShrink		(xmlBufferPtr buf,
 					 int len);
+int		xmlBufferGrow		(xmlBufferPtr buf,
+					 int len);
 void		xmlBufferEmpty		(xmlBufferPtr buf);
 const xmlChar*	xmlBufferContent	(const xmlBufferPtr buf);
 int		xmlBufferUse		(const xmlBufferPtr buf);
diff --git a/uri.c b/uri.c
index 1a481130..6000d39e 100644
--- a/uri.c
+++ b/uri.c
@@ -1283,6 +1283,34 @@ xmlParseURIReference(xmlURIPtr uri, const char *str) {
     return(0);
 }
 
+/**
+ * xmlParseURI:
+ * @str:  the URI string to analyze
+ *
+ * Parse an URI 
+ * 
+ * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]
+ *
+ * Returns a newly build xmlURIPtr or NULL in case of error
+ */
+xmlURIPtr
+xmlParseURI(const char *str) {
+    xmlURIPtr uri;
+    int ret;
+
+    if (str == NULL)
+	return(NULL);
+    uri = xmlCreateURI();
+    if (uri != NULL) {
+	ret = xmlParseURIReference(uri, str);
+        if (ret) {
+	    xmlFreeURI(uri);
+	    return(NULL);
+	}
+    }
+    return(uri);
+}
+
 /**
  * xmlNormalizeURIPath:
  * @path:  pointer to the path string
diff --git a/xml-error.h b/xml-error.h
index 34f4e668..25d9db09 100644
--- a/xml-error.h
+++ b/xml-error.h
@@ -130,7 +130,9 @@ typedef enum {
     XML_ERR_ENTITY_CHAR_ERROR, /* 88 */
     XML_ERR_ENTITY_PE_INTERNAL, /* 88 */
     XML_ERR_ENTITY_LOOP, /* 89 */
-    XML_ERR_ENTITY_BOUNDARY /* 90 */
+    XML_ERR_ENTITY_BOUNDARY, /* 90 */
+    XML_ERR_INVALID_URI, /* 91 */
+    XML_ERR_URI_FRAGMENT /* 92 */
 }xmlParserErrors;
 
 void	xmlParserError		(void *ctx,
diff --git a/xmlIO.c b/xmlIO.c
index 65f5632b..841a6b6d 100644
--- a/xmlIO.c
+++ b/xmlIO.c
@@ -498,6 +498,10 @@ xmlAllocParserInputBuffer(xmlCharEncoding enc) {
     }
     ret->buffer->alloc = XML_BUFFER_ALLOC_DOUBLEIT;
     ret->encoder = xmlGetCharEncodingHandler(enc);
+    if (ret->encoder != NULL)
+        ret->raw = xmlBufferCreate();
+    else
+        ret->raw = NULL;
     ret->readcallback = NULL;
     ret->closecallback = NULL;
     ret->context = NULL;
@@ -513,13 +517,20 @@ xmlAllocParserInputBuffer(xmlCharEncoding enc) {
  */
 void
 xmlFreeParserInputBuffer(xmlParserInputBufferPtr in) {
-    if (in->buffer != NULL) {
-        xmlBufferFree(in->buffer);
-	in->buffer = NULL;
+    if (in->raw) {
+        xmlBufferFree(in->raw);
+	in->raw = NULL;
+    }
+    if (in->encoder != NULL) {
+        xmlCharEncCloseFunc(in->encoder);
     }
     if (in->closecallback != NULL) {
 	in->closecallback(in->context);
     }
+    if (in->buffer != NULL) {
+        xmlBufferFree(in->buffer);
+	in->buffer = NULL;
+    }
 
     memset(in, 0xbe, (size_t) sizeof(xmlParserInputBuffer));
     xmlFree(in);
@@ -683,34 +694,22 @@ xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf) {
 
     if (len < 0) return(0);
     if (in->encoder != NULL) {
-        xmlChar *buffer;
-	int processed = len;
-
-	buffer = (xmlChar *) xmlMalloc((len + 1) * 2 * sizeof(xmlChar));
-	if (buffer == NULL) {
-	    fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
-	    return(-1);
-	}
-	nbchars = in->encoder->input(buffer, (len + 1) * 2 * sizeof(xmlChar),
-	                             (xmlChar *) buf, &processed);
-	/*
-	 * TODO : we really need to have something atomic or the 
-	 *        encoder must report the number of bytes read
+        /*
+	 * Store the data in the incoming raw buffer
 	 */
+        if (in->raw == NULL) {
+	    in->raw = xmlBufferCreate();
+	}
+	xmlBufferAdd(in->raw, (const xmlChar *) buf, len);
+
+	/*
+	 * convert as much as possible to the parser reading buffer.
+	 */
+	nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
 	if (nbchars < 0) {
 	    fprintf(stderr, "xmlParserInputBufferPush: encoder error\n");
-	    xmlFree(buffer);
 	    return(-1);
 	}
-	if (processed  != len) {
-	    fprintf(stderr,
-	            "TODO xmlParserInputBufferPush: processed  != len\n");
-	    xmlFree(buffer);
-	    return(-1);
-	}
-        buffer[nbchars] = 0;
-        xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars);
-	xmlFree(buffer);
     } else {
 	nbchars = len;
         xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
@@ -730,7 +729,9 @@ xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf) {
  * Grow up the content of the input buffer, the old data are preserved
  * This routine handle the I18N transcoding to internal UTF-8
  * This routine is used when operating the parser in normal (pull) mode
- * TODO: one should be able to remove one extra copy
+ *
+ * TODO: one should be able to remove one extra copy by copying directy
+ *       onto in->buffer or in->raw
  *
  * Returns the number of chars read and stored in the buffer, or -1
  *         in case of error.
@@ -779,34 +780,22 @@ xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
 	return(-1);
     }
     if (in->encoder != NULL) {
-        xmlChar *buf;
-	int wrote = res;
-
-	buf = (xmlChar *) xmlMalloc((res + 1) * 2 * sizeof(xmlChar));
-	if (buf == NULL) {
-	    fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
-	    xmlFree(buffer);
-	    return(-1);
+        /*
+	 * Store the data in the incoming raw buffer
+	 */
+        if (in->raw == NULL) {
+	    in->raw = xmlBufferCreate();
 	}
-	nbchars = in->encoder->input(buf, (res + 1) * 2 * sizeof(xmlChar),
-	                             BAD_CAST buffer, &wrote);
-        buf[nbchars] = 0;
-        xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
-	xmlFree(buf);
+	xmlBufferAdd(in->raw, (const xmlChar *) buffer, len);
 
 	/*
-	 * Check that the encoder was able to process the full input
+	 * convert as much as possible to the parser reading buffer.
 	 */
-	if (wrote != res) {
-	    fprintf(stderr, 
-	        "TODO : xmlParserInputBufferGrow wrote %d != res %d\n",
-		wrote, res);
-	    /*
-	     * TODO !!!
-	     * Need to keep the unprocessed input in a buffer in->unprocessed
-	     */
+	nbchars = xmlCharEncInFunc(in->encoder, in->buffer, in->raw);
+	if (nbchars < 0) {
+	    fprintf(stderr, "xmlParserInputBufferGrow: encoder error\n");
+	    return(-1);
 	}
-
     } else {
 	nbchars = res;
         buffer[nbchars] = 0;
diff --git a/xmlIO.h b/xmlIO.h
index 8f9b7e02..2d14ebeb 100644
--- a/xmlIO.h
+++ b/xmlIO.h
@@ -33,6 +33,7 @@ struct _xmlParserInputBuffer {
     xmlCharEncodingHandlerPtr encoder; /* I18N conversions to UTF-8 */
     
     xmlBufferPtr buffer;    /* Local buffer encoded in  UTF-8 */
+    xmlBufferPtr raw;       /* if encoder != NULL buffer for raw input */
 };