diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 6f36fc1c..f12b2806 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -40,7 +40,7 @@ gcc:c89:
   extends: .test
   variables:
     CONFIG: "--without-python"
-    CFLAGS: "-O2 -std=c89 -D_XOPEN_SOURCE=600 -Wno-error=unused-function"
+    CFLAGS: "-O2 -std=c89 -D_XOPEN_SOURCE=600 -Wno-error=unused-function -Wno-error=overlength-strings"
 
 gcc:minimum:
   extends: .test
diff --git a/HTMLparser.c b/HTMLparser.c
index 0107b12d..f3e04834 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -50,10 +50,6 @@
 #define HTML_PARSER_BIG_BUFFER_SIZE 1000
 #define HTML_PARSER_BUFFER_SIZE 100
 
-#define IS_WS_HTML(c) \
-    (((c) == 0x20) || \
-     (((c) >= 0x09) && ((c) <= 0x0D) && ((c) != 0x0B)))
-
 #define IS_HEX_DIGIT(c) \
     ((IS_ASCII_DIGIT(c)) || \
      ((((c) | 0x20) >= 'a') && (((c) | 0x20) <= 'f')))
@@ -314,17 +310,15 @@ htmlNodeInfoPop(htmlParserCtxtPtr ctxt)
 #define CUR (*ctxt->input->cur)
 
 /**
- * `the` HTML parser context
+ * Prescan to find encoding.
  *
- * Ty to find and encoding in the current data available in the input
- * buffer this is needed to try to switch to the proper encoding when
- * one face a character error.
- * That's an heuristic, since it's operating outside of parsing it could
- * try to use a meta which had been commented out, that's the reason it
- * should only be used in case of error, not as a default.
+ * Try to find an encoding in the current data available in the input
+ * buffer.
  *
- * @returns an encoding string or NULL if not found, the string need to
- *   be freed
+ * TODO: Implement HTML5 prescan algorithm.
+ *
+ * @param ctxt  the HTML parser context
+ * @returns  an encoding string or NULL if not found
  */
 static xmlChar *
 htmlFindEncoding(xmlParserCtxtPtr ctxt) {
@@ -3606,42 +3600,7 @@ htmlParseAttribute(htmlParserCtxtPtr ctxt, xmlChar **value) {
 }
 
 /**
- * Checks an http-equiv attribute from a Meta tag to detect
- * the encoding
- * If a new encoding is detected the parser is switched to decode
- * it and pass UTF8
- *
- * @param ctxt  an HTML parser context
- * @param attvalue  the attribute value
- */
-static void
-htmlCheckEncoding(htmlParserCtxtPtr ctxt, const xmlChar *attvalue) {
-    const xmlChar *encoding;
-    xmlChar *copy;
-
-    if (!attvalue)
-	return;
-
-    encoding = xmlStrcasestr(attvalue, BAD_CAST"charset");
-    if (encoding != NULL) {
-	encoding += 7;
-    }
-    /*
-     * skip blank
-     */
-    if (encoding && IS_WS_HTML(*encoding))
-	encoding = xmlStrcasestr(attvalue, BAD_CAST"=");
-    if (encoding && *encoding == '=') {
-	encoding ++;
-        copy = xmlStrdup(encoding);
-        if (copy == NULL)
-            htmlErrMemory(ctxt);
-	xmlSetDeclaredEncoding(ctxt, copy);
-    }
-}
-
-/**
- * Checks an attributes from a Meta tag
+ * Handle charset encoding in meta tag.
  *
  * @param ctxt  an HTML parser context
  * @param atts  the attributes values
@@ -3650,7 +3609,7 @@ static void
 htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
     int i;
     const xmlChar *att, *value;
-    int http = 0;
+    int isContentType = 0;
     const xmlChar *content = NULL;
 
     if ((ctxt == NULL) || (atts == NULL))
@@ -3663,23 +3622,33 @@ htmlCheckMeta(htmlParserCtxtPtr ctxt, const xmlChar **atts) {
         if (value != NULL) {
             if ((!xmlStrcasecmp(att, BAD_CAST "http-equiv")) &&
                 (!xmlStrcasecmp(value, BAD_CAST "Content-Type"))) {
-                http = 1;
+                isContentType = 1;
             } else if (!xmlStrcasecmp(att, BAD_CAST "charset")) {
-                xmlChar *copy;
+                xmlChar *encoding;
 
-                copy = xmlStrdup(value);
-                if (copy == NULL)
+                encoding = xmlStrdup(value);
+                if (encoding == NULL)
                     htmlErrMemory(ctxt);
-                xmlSetDeclaredEncoding(ctxt, copy);
+                xmlSetDeclaredEncoding(ctxt, encoding);
             } else if (!xmlStrcasecmp(att, BAD_CAST "content")) {
                 content = value;
             }
         }
 	att = atts[i++];
     }
-    if ((http) && (content != NULL))
-	htmlCheckEncoding(ctxt, content);
 
+    if ((isContentType) && (content != NULL)) {
+        htmlMetaEncodingOffsets off;
+
+        if (htmlParseContentType(content, &off)) {
+            xmlChar *encoding;
+
+            encoding = xmlStrndup(content + off.start, off.end - off.start);
+            if (encoding == NULL)
+                htmlErrMemory(ctxt);
+            xmlSetDeclaredEncoding(ctxt, encoding);
+        }
+    }
 }
 
 /**
@@ -3748,7 +3717,6 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
     const xmlChar **atts;
     int nbatts = 0;
     int maxatts;
-    int meta = 0;
     int i;
     int discardtag = 0;
 
@@ -3763,8 +3731,6 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
     name = htmlParseHTMLName(ctxt, 0).name;
     if (name == NULL)
         return;
-    if (xmlStrEqual(name, BAD_CAST"meta"))
-	meta = 1;
 
     if ((ctxt->options & HTML_PARSE_HTML5) == 0) {
         /*
@@ -3960,8 +3926,10 @@ failed:
         /*
          * Handle specific association to the META tag
          */
-        if (meta)
+        if (((ctxt->input->flags & XML_INPUT_HAS_ENCODING) == 0) &&
+            (strcmp((char *) name, "meta") == 0)) {
             htmlCheckMeta(ctxt, atts);
+        }
 #endif
     }
 
diff --git a/HTMLtree.c b/HTMLtree.c
index 2e579b8e..8b79c12b 100644
--- a/HTMLtree.c
+++ b/HTMLtree.c
@@ -25,6 +25,7 @@
 
 #include "private/buf.h"
 #include "private/error.h"
+#include "private/html.h"
 #include "private/io.h"
 #include "private/save.h"
 
@@ -34,265 +35,315 @@
  *									*
  ************************************************************************/
 
+typedef struct {
+    xmlAttrPtr attr; /* charset or content */
+    const xmlChar *attrValue;
+    htmlMetaEncodingOffsets off;
+} htmlMetaEncoding;
+
+static htmlNodePtr
+htmlFindFirstChild(htmlNodePtr parent, const char *name) {
+    htmlNodePtr child;
+
+    for (child = parent->children; child != NULL; child = child->next) {
+        if ((child->type == XML_ELEMENT_NODE) &&
+            (child->ns == NULL) &&
+            (xmlStrcasecmp(child->name, BAD_CAST name) == 0))
+            return(child);
+    }
+
+    return(NULL);
+}
+
+static htmlNodePtr
+htmlFindHead(htmlDocPtr doc) {
+    htmlNodePtr html;
+
+    if (doc == NULL)
+        return(NULL);
+
+    html = htmlFindFirstChild((htmlNodePtr) doc, "html");
+    if (html == NULL)
+        return(NULL);
+
+    return(htmlFindFirstChild(html, "head"));
+}
+
+int
+htmlParseContentType(const xmlChar *val, htmlMetaEncodingOffsets *off) {
+    const xmlChar *p = val;
+
+    while (1) {
+        size_t start, end;
+
+        while ((*p != 'c') && (*p != 'C')) {
+            if (*p == 0)
+                return(0);
+            p += 1;
+        }
+        p += 1;
+
+        if (xmlStrncasecmp(p, BAD_CAST "harset", 6) != 0)
+            continue;
+
+        p += 6;
+        while (IS_WS_HTML(*p)) p += 1;
+
+        if (*p != '=')
+            continue;
+
+        p += 1;
+        while (IS_WS_HTML(*p)) p += 1;
+
+        if (*p == 0)
+            return(0);
+
+        if ((*p == '"') || (*p == '\'')) {
+            int quote = *p;
+
+            p += 1;
+            while (IS_WS_HTML(*p)) p += 1;
+
+            start = p - val;
+            end = start;
+
+            while (*p != quote) {
+                if (*p == 0)
+                    return(0);
+                if (!IS_WS_HTML(*p))
+                    end = p + 1 - val;
+                p += 1;
+            }
+        } else {
+            start = p - val;
+
+            while ((*p != 0) && (*p != ';') && (!IS_WS_HTML(*p)))
+                p += 1;
+
+            end = p - val;
+        }
+
+        off->start = start;
+        off->end = end;
+        off->size = p - val + strlen((char *) p);
+
+        return(1);
+    }
+
+    return(0);
+}
+
+static xmlAttrPtr
+htmlFindMetaEncodingAttr(htmlNodePtr elem, int *outIsContentType) {
+    xmlAttrPtr attr, contentAttr = NULL;
+    int isContentType = 0;
+
+    if (xmlStrcasecmp(elem->name, BAD_CAST "meta") != 0)
+        return(NULL);
+
+    for (attr = elem->properties; attr != NULL; attr = attr->next) {
+        if (attr->ns != NULL)
+            continue;
+        if (xmlStrcasecmp(attr->name, BAD_CAST "charset") == 0) {
+            *outIsContentType = 0;
+            return(attr);
+        }
+        if (xmlStrcasecmp(attr->name, BAD_CAST "content") == 0)
+            contentAttr = attr;
+        if ((xmlStrcasecmp(attr->name, BAD_CAST "http-equiv") == 0) &&
+            (attr->children != NULL) &&
+            (attr->children->type == XML_TEXT_NODE) &&
+            (attr->children->next == NULL) &&
+            (xmlStrcasecmp(attr->children->content,
+                           BAD_CAST "Content-Type") == 0))
+            isContentType = 1;
+    }
+
+    if ((isContentType) && (contentAttr != NULL)) {
+        *outIsContentType = 1;
+        return(contentAttr);
+    }
+
+    return(NULL);
+}
+
+static int
+htmlParseMetaEncoding(htmlNodePtr elem, htmlMetaEncoding *menc) {
+    xmlAttrPtr attr;
+    const xmlChar *val = NULL;
+    int isContentType;
+
+    if ((elem->type != XML_ELEMENT_NODE) ||
+        (elem->ns != NULL) ||
+        (xmlStrcasecmp(elem->name, BAD_CAST "meta") != 0))
+        return(0);
+
+    attr = htmlFindMetaEncodingAttr(elem, &isContentType);
+    if (attr == NULL)
+        return(0);
+
+    if ((attr->children != NULL) &&
+        (attr->children->type == XML_TEXT_NODE) &&
+        (attr->children->next == NULL) &&
+        (attr->children->content != NULL))
+        val = attr->children->content;
+    else
+        val = BAD_CAST "";
+
+
+    if (!isContentType) {
+        size_t size = strlen((char *) val);
+        size_t start = 0;
+        size_t end = size;
+
+        while ((start < size) && (IS_WS_HTML(val[start])))
+            start += 1;
+
+        while ((end > 0) && (IS_WS_HTML(val[end-1])))
+            end -= 1;
+
+        menc->attr = attr;
+        menc->attrValue = val;
+        menc->off.start = start;
+        menc->off.end = end;
+        menc->off.size = size;
+
+        return(1);
+    } else {
+        if (htmlParseContentType(val, &menc->off)) {
+            menc->attr = attr;
+            menc->attrValue = val;
+
+            return(1);
+        }
+    }
+
+    return(0);
+}
+
+static xmlChar *
+htmlUpdateMetaEncoding(htmlMetaEncoding *menc, const char *encoding) {
+    xmlChar *newVal, *p;
+    size_t size, oldEncSize, newEncSize;
+
+    /*
+     * The pseudo "HTML" encoding only produces ASCII.
+     */
+    if (xmlStrcasecmp(BAD_CAST encoding, BAD_CAST "HTML") == 0)
+        encoding = "ASCII";
+
+    oldEncSize = menc->off.end - menc->off.start;
+    newEncSize = strlen((char *) encoding);
+    size = menc->off.size - oldEncSize + newEncSize;
+    newVal = xmlMalloc(size + 1);
+    if (newVal == NULL)
+        return(NULL);
+
+    p = newVal;
+    memcpy(p, menc->attrValue, menc->off.start);
+    p += menc->off.start;
+    memcpy(p, encoding, newEncSize);
+    p += newEncSize;
+    memcpy(p, menc->attrValue + menc->off.end, menc->off.size - menc->off.end);
+    newVal[size] = 0;
+
+    return(newVal);
+}
+
 /**
  * Look up and encoding declaration in the meta tags.
  *
- * Does not support `<meta charset="">` yet. Only supports deprecated
- * `<meta http-equiv="Content-Type" content="">`.
- *
- * The returned string points into attribute content. It should be
- * copied before modifying or freeing nodes.
+ * The returned string points into attribute content and can contain
+ * trailing garbage. It should be copied before modifying or freeing
+ * nodes.
  *
  * @param doc  the document
  * @returns the encoding ot NULL if not found.
  */
 const xmlChar *
 htmlGetMetaEncoding(htmlDocPtr doc) {
-    htmlNodePtr cur;
-    const xmlChar *content;
-    const xmlChar *encoding;
+    htmlNodePtr head, node;
 
-    if (doc == NULL)
-	return(NULL);
-    cur = doc->children;
+    head = htmlFindHead(doc);
+    if (head == NULL)
+        return(NULL);
 
-    /*
-     * Search the html
-     */
-    while (cur != NULL) {
-	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
-	    if (xmlStrEqual(cur->name, BAD_CAST"html"))
-		break;
-	    if (xmlStrEqual(cur->name, BAD_CAST"head"))
-		goto found_head;
-	    if (xmlStrEqual(cur->name, BAD_CAST"meta"))
-		goto found_meta;
-	}
-	cur = cur->next;
+    for (node = head->children; node != NULL; node = node->next) {
+        htmlMetaEncoding menc;
+
+        if (htmlParseMetaEncoding(node, &menc)) {
+            /*
+             * Returning a `const xmlChar *` only allows to return
+             * a suffix. In http-equiv meta tags, there could be
+             * more data after the charset, although it's probably
+             * rare in practice.
+             */
+            return(menc.attrValue + menc.off.start);
+        }
     }
-    if (cur == NULL)
-	return(NULL);
-    cur = cur->children;
 
-    /*
-     * Search the head
-     */
-    while (cur != NULL) {
-	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
-	    if (xmlStrEqual(cur->name, BAD_CAST"head"))
-		break;
-	    if (xmlStrEqual(cur->name, BAD_CAST"meta"))
-		goto found_meta;
-	}
-	cur = cur->next;
-    }
-    if (cur == NULL)
-	return(NULL);
-found_head:
-    cur = cur->children;
-
-    /*
-     * Search the meta elements
-     */
-found_meta:
-    while (cur != NULL) {
-	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
-	    if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
-		xmlAttrPtr attr = cur->properties;
-		int http;
-		const xmlChar *value;
-
-		content = NULL;
-		http = 0;
-		while (attr != NULL) {
-		    if ((attr->children != NULL) &&
-		        (attr->children->type == XML_TEXT_NODE) &&
-		        (attr->children->next == NULL)) {
-			value = attr->children->content;
-			if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
-			 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
-			    http = 1;
-			else if ((value != NULL)
-			 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
-			    content = value;
-			if ((http != 0) && (content != NULL))
-			    goto found_content;
-		    }
-		    attr = attr->next;
-		}
-	    }
-	}
-	cur = cur->next;
-    }
     return(NULL);
-
-found_content:
-    encoding = xmlStrstr(content, BAD_CAST"charset=");
-    if (encoding == NULL)
-	encoding = xmlStrstr(content, BAD_CAST"Charset=");
-    if (encoding == NULL)
-	encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
-    if (encoding != NULL) {
-	encoding += 8;
-    } else {
-	encoding = xmlStrstr(content, BAD_CAST"charset =");
-	if (encoding == NULL)
-	    encoding = xmlStrstr(content, BAD_CAST"Charset =");
-	if (encoding == NULL)
-	    encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
-	if (encoding != NULL)
-	    encoding += 9;
-    }
-    if (encoding != NULL) {
-	while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
-    }
-    return(encoding);
 }
 
 /**
  * Creates or updates a meta tag with an encoding declaration.
  *
- * Does not support `<meta charset="">` yet. Only supports deprecated
- * `<meta http-equiv="Content-Type" content="">`.
- *
  * NOTE: This will not change the document content encoding.
  *
  * @param doc  the document
  * @param encoding  the encoding string
- * @returns 0 in case of success and -1 in case of error
+ * @returns 0 in case of success, 1 if no head element was found or
+ * arguments are invalid and -1 if memory allocation failed.
  */
 int
 htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
-    htmlNodePtr cur, meta = NULL, head = NULL;
-    const xmlChar *content = NULL;
-    char newcontent[100];
+    htmlNodePtr head, meta;
+    int found = 0;
 
-    newcontent[0] = 0;
+    if (encoding == NULL)
+        return(1);
 
-    if (doc == NULL)
-	return(-1);
+    head = htmlFindHead(doc);
+    if (head == NULL)
+        return(1);
 
-    /* html isn't a real encoding it's just libxml2 way to get entities */
-    if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
+    for (meta = head->children; meta != NULL; meta = meta->next) {
+        htmlMetaEncoding menc;
+
+        if (htmlParseMetaEncoding(meta, &menc)) {
+            xmlChar *newVal;
+            int ret;
+
+            found = 1;
+
+            newVal = htmlUpdateMetaEncoding(&menc, (char *) encoding);
+            if (newVal == NULL)
+                return(-1);
+            xmlNodeSetContent((xmlNodePtr) menc.attr, NULL);
+            ret = xmlNodeAddContent((xmlNodePtr) menc.attr, newVal);
+            xmlFree(newVal);
+
+            if (ret < 0)
+                return(-1);
+        }
+    }
+
+    if (found)
+        return(0);
+
+    meta = xmlNewDocNode(head->doc, NULL, BAD_CAST "meta", NULL);
+    if (meta == NULL)
         return(-1);
 
-    if (encoding != NULL) {
-	snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
-                (char *)encoding);
-	newcontent[sizeof(newcontent) - 1] = 0;
-    }
-
-    cur = doc->children;
-
-    /*
-     * Search the html
-     */
-    while (cur != NULL) {
-	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
-	    if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
-		break;
-	    if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
-		goto found_head;
-	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
-		goto found_meta;
-	}
-	cur = cur->next;
-    }
-    if (cur == NULL)
-	return(-1);
-    cur = cur->children;
-
-    /*
-     * Search the head
-     */
-    while (cur != NULL) {
-	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
-	    if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
-		break;
-	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
-                head = cur->parent;
-		goto found_meta;
-            }
-	}
-	cur = cur->next;
-    }
-    if (cur == NULL)
-	return(-1);
-found_head:
-    head = cur;
-    if (cur->children == NULL)
-        goto create;
-    cur = cur->children;
-
-found_meta:
-    /*
-     * Search and update all the remaining the meta elements carrying
-     * encoding information
-     */
-    while (cur != NULL) {
-	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
-	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
-		xmlAttrPtr attr = cur->properties;
-		int http;
-		const xmlChar *value;
-
-		content = NULL;
-		http = 0;
-		while (attr != NULL) {
-		    if ((attr->children != NULL) &&
-		        (attr->children->type == XML_TEXT_NODE) &&
-		        (attr->children->next == NULL)) {
-			value = attr->children->content;
-			if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
-			 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
-			    http = 1;
-			else
-                        {
-                           if ((value != NULL) &&
-                               (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
-			       content = value;
-                        }
-		        if ((http != 0) && (content != NULL))
-			    break;
-		    }
-		    attr = attr->next;
-		}
-		if ((http != 0) && (content != NULL)) {
-		    meta = cur;
-		    break;
-		}
-
-	    }
-	}
-	cur = cur->next;
-    }
-create:
-    if (meta == NULL) {
-        if ((encoding != NULL) && (head != NULL)) {
-            /*
-             * Create a new Meta element with the right attributes
-             */
-
-            meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
-            if (head->children == NULL)
-                xmlAddChild(head, meta);
-            else
-                xmlAddPrevSibling(head->children, meta);
-            xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
-            xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
-        }
-    } else {
-        /* remove the meta tag if NULL is passed */
-        if (encoding == NULL) {
-            xmlUnlinkNode(meta);
-            xmlFreeNode(meta);
-        }
-        /* change the document only if there is a real encoding change */
-        else if (xmlStrcasestr(content, encoding) == NULL) {
-            xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
-        }
+    if (xmlNewProp(meta, BAD_CAST "charset", encoding) == NULL) {
+        xmlFreeNode(meta);
+        return(-1);
     }
 
+    if (head->children == NULL)
+        xmlAddChild(head, meta);
+    else
+        xmlAddPrevSibling(head->children, meta);
 
     return(0);
 }
@@ -383,7 +434,7 @@ htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur,
     outbuf->written = 0;
 
     use = xmlBufUse(buf);
-    htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
+    htmlNodeDumpInternal(outbuf, doc, cur, NULL, format);
     if (outbuf->error)
         ret = (size_t) -1;
     else
@@ -455,7 +506,7 @@ htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
     if (buf == NULL)
         return(-1);
 
-    htmlNodeDumpFormatOutput(buf, doc, cur, NULL, format);
+    htmlNodeDumpInternal(buf, doc, cur, NULL, format);
 
     ret = xmlOutputBufferClose(buf);
     return(ret);
@@ -479,14 +530,11 @@ htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
  * Serialize an HTML node to a memory, also returning the size of
  * the result. It's up to the caller to free the memory.
  *
- * WARNING: Uses the encoding from a deprecated meta tag, see
- * htmlGetMetaEncoding(). This is typically undesired. If no such
- * tag was found, ASCII with HTML 4.0 named character entities will
+ * Uses the encoding of the document. If the document has no
+ * encoding, ASCII with HTML 4.0 named character entities will
  * be used. This is inefficient compared to UTF-8 and might be
  * changed in a future version.
  *
- * Use of this function is therefore DISCOURAGED in favor of
- * htmlDocContentDumpFormatOutput().
  * @param cur  the document
  * @param mem  OUT: the memory pointer
  * @param size  OUT: the memory length
@@ -496,7 +544,6 @@ void
 htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
     xmlOutputBufferPtr buf;
     xmlCharEncodingHandlerPtr handler = NULL;
-    const char *encoding;
 
     xmlInitParser();
 
@@ -507,8 +554,7 @@ htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
     if (cur == NULL)
 	return;
 
-    encoding = (const char *) htmlGetMetaEncoding(cur);
-    if (htmlFindOutputEncoder(encoding, &handler) != XML_ERR_OK)
+    if (htmlFindOutputEncoder((char *) cur->encoding, &handler) != XML_ERR_OK)
         return;
     buf = xmlAllocOutputBuffer(handler);
     if (buf == NULL)
@@ -657,18 +703,19 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) {
 /**
  * Serialize an HTML node to an output buffer.
  *
- * Ignores `encoding` and uses the encoding of the output buffer.
+ * If `encoding` is specified, it is used to create or update meta
+ * tags containing the character encoding.
+ *
  * @param buf  the HTML buffer output
  * @param doc  the document
  * @param cur  the current node
- * @param encoding  the encoding string (unused)
+ * @param encoding  the encoding string (optional)
  * @param format  should formatting newlines been added
  */
 void
-htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
-	                 xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED,
-                         int format) {
-    xmlNodePtr root, parent;
+htmlNodeDumpInternal(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
+                     const char *encoding, int format) {
+    xmlNodePtr root, parent, metaHead = NULL;
     xmlAttrPtr attr;
     const htmlElemDesc * info;
 
@@ -699,24 +746,61 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
             }
             break;
 
-        case XML_ELEMENT_NODE:
+        case XML_ELEMENT_NODE: {
+            htmlMetaEncoding menc;
+            int isMeta = 0;
+            int addMeta = 0;
+
             /*
              * Some users like lxml are known to pass nodes with a corrupted
              * tree structure. Fall back to a recursive call to handle this
              * case.
              */
             if ((cur->parent != parent) && (cur->children != NULL)) {
-                htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
+                htmlNodeDumpInternal(buf, doc, cur, encoding, format);
                 break;
             }
 
             /*
              * Get specific HTML info for that node.
              */
-            if (cur->ns == NULL)
+            if (cur->ns == NULL) {
                 info = htmlTagLookup(cur->name);
-            else
+
+                if (encoding != NULL) {
+                    isMeta = htmlParseMetaEncoding(cur, &menc);
+
+                    /*
+                     * Don't add meta tag for "HTML" encoding.
+                     */
+                    if ((xmlStrcasecmp(BAD_CAST encoding,
+                                       BAD_CAST "HTML") != 0) &&
+                        (xmlStrcasecmp(cur->name, BAD_CAST "head") == 0) &&
+                        (parent != NULL) &&
+                        (parent->ns == NULL) &&
+                        (xmlStrcasecmp(parent->name, BAD_CAST "html") == 0) &&
+                        (parent->parent != NULL) &&
+                        (parent->parent->parent == NULL) &&
+                        (metaHead == NULL)) {
+                        xmlNodePtr n;
+
+                        metaHead = cur;
+                        addMeta = 1;
+
+                        for (n = cur->children; n != NULL; n = n->next) {
+                            int unused;
+
+                            if (htmlFindMetaEncodingAttr(n, &unused) != NULL) {
+                                metaHead = NULL;
+                                addMeta = 0;
+                                break;
+                            }
+                        }
+                    }
+                }
+            } else {
                 info = NULL;
+            }
 
             xmlOutputBufferWriteString(buf, "<");
             if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
@@ -728,7 +812,23 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
                 xmlNsListDumpOutput(buf, cur->nsDef);
             attr = cur->properties;
             while (attr != NULL) {
-                htmlAttrDumpOutput(buf, doc, attr);
+                if ((!isMeta) || (attr != menc.attr)) {
+                    htmlAttrDumpOutput(buf, doc, attr);
+                } else {
+                    xmlChar *newVal;
+
+                    xmlOutputBufferWriteString(buf, " ");
+                    xmlOutputBufferWriteString(buf, (char *) attr->name);
+
+                    newVal = htmlUpdateMetaEncoding(&menc, encoding);
+                    if (newVal == NULL) {
+                        buf->error = XML_ERR_NO_MEMORY;
+                        return;
+                    }
+                    xmlOutputBufferWriteString(buf, "=");
+                    xmlOutputBufferWriteQuotedString(buf, newVal);
+                    xmlFree(newVal);
+                }
                 attr = attr->next;
             }
 
@@ -740,7 +840,14 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
                     (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
                     xmlOutputBufferWriteString(buf, ">");
                 } else {
-                    xmlOutputBufferWriteString(buf, "></");
+                    if (addMeta) {
+                        xmlOutputBufferWriteString(buf, "><meta charset=\"");
+                        /* TODO: Escape */
+                        xmlOutputBufferWriteString(buf, encoding);
+                        xmlOutputBufferWriteString(buf, "\"></");
+                    } else {
+                        xmlOutputBufferWriteString(buf, "></");
+                    }
                     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
                         xmlOutputBufferWriteString(buf,
                                 (const char *)cur->ns->prefix);
@@ -751,13 +858,25 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
                 }
             } else {
                 xmlOutputBufferWriteString(buf, ">");
-                if ((format) && (info != NULL) && (!info->isinline) &&
-                    (cur->children->type != HTML_TEXT_NODE) &&
-                    (cur->children->type != HTML_ENTITY_REF_NODE) &&
-                    (cur->children != cur->last) &&
-                    (cur->name != NULL) &&
-                    (cur->name[0] != 'p')) /* p, pre, param */
+                if ((format) &&
+                    ((addMeta) ||
+                     ((info != NULL) && (!info->isinline) &&
+                      (cur->children->type != HTML_TEXT_NODE) &&
+                      (cur->children->type != HTML_ENTITY_REF_NODE) &&
+                      (cur->children != cur->last) &&
+                      (cur->name != NULL) &&
+                      (cur->name[0] != 'p')))) /* p, pre, param */
                     xmlOutputBufferWriteString(buf, "\n");
+                if (addMeta) {
+                    xmlOutputBufferWriteString(buf, "<meta charset=\"");
+                    /* TODO: Escape */
+                    xmlOutputBufferWriteString(buf, encoding);
+                    xmlOutputBufferWriteString(buf, "\">");
+                    if ((format) &&
+                        (cur->children->type != HTML_TEXT_NODE) &&
+                        (cur->children->type != HTML_ENTITY_REF_NODE))
+                        xmlOutputBufferWriteString(buf, "\n");
+                }
                 parent = cur;
                 cur = cur->children;
                 continue;
@@ -774,6 +893,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
             }
 
             break;
+        }
 
         case XML_ATTRIBUTE_NODE:
             htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur);
@@ -862,7 +982,7 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
                 if ((format) && (info != NULL) && (!info->isinline) &&
                     (cur->last->type != HTML_TEXT_NODE) &&
                     (cur->last->type != HTML_ENTITY_REF_NODE) &&
-                    (cur->children != cur->last) &&
+                    ((cur->children != cur->last) || (cur == metaHead)) &&
                     (cur->name != NULL) &&
                     (cur->name[0] != 'p')) /* p, pre, param */
                     xmlOutputBufferWriteString(buf, "\n");
@@ -884,32 +1004,48 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
                         (parent->name[0] != 'p')) /* p, pre, param */
                         xmlOutputBufferWriteString(buf, "\n");
                 }
+
+                if (cur == metaHead)
+                    metaHead = NULL;
             }
         }
     }
 }
 
+/**
+ * Serialize an HTML node to an output buffer.
+ *
+ * @param buf  the HTML buffer output
+ * @param doc  the document
+ * @param cur  the current node
+ * @param encoding  the encoding string (unused)
+ * @param format  should formatting newlines been added
+ */
+void
+htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
+                         const char *encoding ATTRIBUTE_UNUSED, int format) {
+    htmlNodeDumpInternal(buf, doc, cur, NULL, format);
+}
+
 /**
  * Same as htmlNodeDumpFormatOutput() with `format` set to 1 which is
  * typically undesired. Use of this function is DISCOURAGED in favor
  * of htmlNodeDumpFormatOutput().
  *
- * Ignores `encoding` and uses the encoding of the output buffer.
  * @param buf  the HTML buffer output
  * @param doc  the document
  * @param cur  the current node
  * @param encoding  the encoding string (unused)
  */
 void
-htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
-	           xmlNodePtr cur, const char *encoding ATTRIBUTE_UNUSED) {
-    htmlNodeDumpFormatOutput(buf, doc, cur, NULL, 1);
+htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
+                   const char *encoding ATTRIBUTE_UNUSED) {
+    htmlNodeDumpInternal(buf, doc, cur, NULL, 1);
 }
 
 /**
  * Serialize an HTML document to an output buffer.
  *
- * Ignores `encoding` and uses the encoding of the output buffer.
  * @param buf  the HTML buffer output
  * @param cur  the document
  * @param encoding  the encoding string (unused)
@@ -919,31 +1055,14 @@ void
 htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
 	                       const char *encoding ATTRIBUTE_UNUSED,
                                int format) {
-    int type = 0;
-
-    /*
-     * This is needed when serializing XML documents as HTML.
-     * xmlEncodeEntitiesReentrant uses the document type to
-     * determine the serialization mode.
-     *
-     * Once we call more low-level functions directly with
-     * HTML flags, this hack can be removed.
-     */
-    if (cur) {
-        type = cur->type;
-        cur->type = XML_HTML_DOCUMENT_NODE;
-    }
-    htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, format);
-    if (cur)
-        cur->type = (xmlElementType) type;
+    htmlNodeDumpInternal(buf, cur, (xmlNodePtr) cur, NULL, format);
 }
 
 /**
- * Same as htmlNodeDumpFormatOutput() with `format` set to 1 which is
- * typically undesired. Use of this function is DISCOURAGED in favor
- * of htmlDocContentDumpFormatOutput().
+ * Same as htmlDocContentDumpFormatDump() with `format` set to 1
+ * which is typically undesired. Use of this function is DISCOURAGED
+ * in favor of htmlDocContentDumpFormatOutput().
  *
- * Ignores `encoding` and uses the encoding of the output buffer.
  * @param buf  the HTML buffer output
  * @param cur  the document
  * @param encoding  the encoding string (unused)
@@ -951,7 +1070,7 @@ htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
 void
 htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
 	                 const char *encoding ATTRIBUTE_UNUSED) {
-    htmlNodeDumpFormatOutput(buf, cur, (xmlNodePtr) cur, NULL, 1);
+    htmlNodeDumpInternal(buf, cur, (xmlNodePtr) cur, NULL, 1);
 }
 
 /************************************************************************
@@ -963,13 +1082,12 @@ htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
 /**
  * Serialize an HTML document to an open `FILE`.
  *
- * WARNING: Uses the encoding from a deprecated meta tag, see
- * htmlGetMetaEncoding(). This is typically undesired. If no such
- * tag was found, ASCII with HTML 4.0 named character entities will
+ * Uses the encoding of the document. If the document has no
+ * encoding, ASCII with HTML 4.0 named character entities will
  * be used. This is inefficient compared to UTF-8 and might be
  * changed in a future version.
  *
- * Also enables "formatting" unconditionally which is typically
+ * Enables "formatting" unconditionally which is typically
  * undesired.
  *
  * Use of this function is DISCOURAGED in favor of
@@ -983,7 +1101,6 @@ int
 htmlDocDump(FILE *f, xmlDocPtr cur) {
     xmlOutputBufferPtr buf;
     xmlCharEncodingHandlerPtr handler = NULL;
-    const char *encoding;
     int ret;
 
     xmlInitParser();
@@ -992,8 +1109,7 @@ htmlDocDump(FILE *f, xmlDocPtr cur) {
 	return(-1);
     }
 
-    encoding = (const char *) htmlGetMetaEncoding(cur);
-    if (htmlFindOutputEncoder(encoding, &handler) != XML_ERR_OK)
+    if (htmlFindOutputEncoder((char *) cur->encoding, &handler) != XML_ERR_OK)
         return(-1);
     buf = xmlOutputBufferCreateFile(f, handler);
     if (buf == NULL)
@@ -1005,18 +1121,10 @@ htmlDocDump(FILE *f, xmlDocPtr cur) {
 }
 
 /**
- * Serialize an HTML document to a file. If `filename` is `"-"`,
- * stdout is used. This is potentially insecure and might be
- * changed in a future version.
+ * Serialize an HTML document to a file.
  *
- * WARNING: Uses the encoding from a deprecated meta tag, see
- * htmlGetMetaEncoding(). This is typically undesired. If no such
- * tag was found, ASCII with HTML 4.0 named character entities will
- * be used. This is inefficient compared to UTF-8 and might be
- * changed in a future version.
- *
- * Also enables "formatting" unconditionally which is typically
- * undesired.
+ * Same as htmlSaveFileFormat() with `encoding` set to NULL and
+ * `format` set to 1 which is typically undesired.
  *
  * Use of this function is DISCOURAGED in favor of
  * htmlSaveFileFormat().
@@ -1027,31 +1135,12 @@ htmlDocDump(FILE *f, xmlDocPtr cur) {
  */
 int
 htmlSaveFile(const char *filename, xmlDocPtr cur) {
-    xmlOutputBufferPtr buf;
-    xmlCharEncodingHandlerPtr handler = NULL;
-    const char *encoding;
-    int ret;
-
-    if ((cur == NULL) || (filename == NULL))
-        return(-1);
-
-    xmlInitParser();
-
-    encoding = (const char *) htmlGetMetaEncoding(cur);
-    if (htmlFindOutputEncoder(encoding, &handler) != XML_ERR_OK)
-        return(-1);
-    buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
-    if (buf == NULL)
-        return(-1);
-
-    htmlDocContentDumpOutput(buf, cur, NULL);
-
-    ret = xmlOutputBufferClose(buf);
-    return(ret);
+    return(htmlSaveFileFormat(filename, cur, NULL, 1));
 }
 
 /**
  * Serialize an HTML document to a file using a given encoding.
+ *
  * If `filename` is `"-"`, stdout is used. This is potentially
  * insecure and might be changed in a future version.
  *
@@ -1059,6 +1148,8 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) {
  * will be used. This is inefficient compared to UTF-8 and might be
  * changed in a future version.
  *
+ * Sets or updates meta tags containing the character encoding.
+ *
  * @param filename  the filename
  * @param cur  the document
  * @param format  should formatting newlines been added
@@ -1079,15 +1170,11 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
 
     if (htmlFindOutputEncoder(encoding, &handler) != XML_ERR_OK)
         return(-1);
-    if (handler != NULL)
-        htmlSetMetaEncoding(cur, (const xmlChar *) handler->name);
-    else
-	htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
 
     /*
      * save the content to a temp buffer.
      */
-    buf = xmlOutputBufferCreateFilename(filename, handler, 0);
+    buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
     if (buf == NULL)
         return(0);
 
@@ -1098,6 +1185,8 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
 }
 
 /**
+ * Serialize an HTML document to a file.
+ *
  * Same as htmlSaveFileFormat() with `format` set to 1 which is
  * typically undesired. Also see the warnings there. Use of this
  * function is DISCOURAGED in favor of htmlSaveFileFormat().
diff --git a/include/private/html.h b/include/private/html.h
index 415be221..e5590edc 100644
--- a/include/private/html.h
+++ b/include/private/html.h
@@ -5,9 +5,26 @@
 
 #ifdef LIBXML_HTML_ENABLED
 
+#define IS_WS_HTML(c) \
+    (((c) == 0x20) || \
+     (((c) >= 0x09) && ((c) <= 0x0D) && ((c) != 0x0B)))
+
+typedef struct {
+    size_t start;
+    size_t end;
+    size_t size;
+} htmlMetaEncodingOffsets;
+
 XML_HIDDEN xmlNodePtr
 htmlCtxtParseContentInternal(xmlParserCtxtPtr ctxt, xmlParserInputPtr input);
 
+XML_HIDDEN int
+htmlParseContentType(const xmlChar *val, htmlMetaEncodingOffsets *off);
+
+XML_HIDDEN void
+htmlNodeDumpInternal(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
+                     const char *encoding, int format);
+
 #endif /* LIBXML_HTML_ENABLED */
 
 #endif /* XML_HTML_H_PRIVATE__ */
diff --git a/python/tests/serialize.py b/python/tests/serialize.py
index 4666ec46..11db04c0 100755
--- a/python/tests/serialize.py
+++ b/python/tests/serialize.py
@@ -77,17 +77,14 @@ if str != """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http
    sys.exit(1)
 str = doc.serialize("ISO-8859-1")
 if str != """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
-<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>Hello</title></head><body><p>hello</p></body></html>
+<html><head><meta charset="ISO-8859-1"><title>Hello</title></head><body><p>hello</p></body></html>
 """:
    print("error serializing HTML document 2")
    sys.exit(1)
 str = doc.serialize(format=1)
 if str != """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
 <html>
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>Hello</title>
-</head>
+<head><title>Hello</title></head>
 <body><p>hello</p></body>
 </html>
 """:
@@ -97,13 +94,13 @@ str = doc.serialize("iso-8859-1", 1)
 if str != """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
 <html>
 <head>
-<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+<meta charset="iso-8859-1">
 <title>Hello</title>
 </head>
 <body><p>hello</p></body>
 </html>
 """:
-   print("error serializing HTML document 4")
+   print("error serializing HTML document 4", str)
    sys.exit(1)
 
 #
@@ -116,15 +113,12 @@ if str != """<html><head><title>Hello</title></head><body><p>hello</p></body></h
    print("error serializing HTML root 1")
    sys.exit(1)
 str = root.serialize("ISO-8859-1")
-if str != """<html><head><meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"><title>Hello</title></head><body><p>hello</p></body></html>""":
+if str != """<html><head><meta charset="ISO-8859-1"><title>Hello</title></head><body><p>hello</p></body></html>""":
    print("error serializing HTML root 2")
    sys.exit(1)
 str = root.serialize(format=1)
 if str != """<html>
-<head>
-<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
-<title>Hello</title>
-</head>
+<head><title>Hello</title></head>
 <body><p>hello</p></body>
 </html>""":
    print("error serializing HTML root 3")
@@ -132,7 +126,7 @@ if str != """<html>
 str = root.serialize("iso-8859-1", 1)
 if str != """<html>
 <head>
-<meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
+<meta charset="iso-8859-1">
 <title>Hello</title>
 </head>
 <body><p>hello</p></body>
diff --git a/result/HTML/758518-entity.html b/result/HTML/758518-entity.html
index 7dde7c6c..bfc2b664 100644
--- a/result/HTML/758518-entity.html
+++ b/result/HTML/758518-entity.html
@@ -1,2 +1,2 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
-<html><body>&amp;j&Ugrave;</body></html>
+<html><body>&amp;j�</body></html>
diff --git a/result/HTML/758518-tag.html b/result/HTML/758518-tag.html
index aa0ad342..de8ff580 100644
--- a/result/HTML/758518-tag.html
+++ b/result/HTML/758518-tag.html
@@ -1,2 +1,2 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
-<!--?a&#147;-->
+<!--?a�-->
diff --git a/result/HTML/758605.html b/result/HTML/758605.html
index 77f70a00..97ea22a4 100644
--- a/result/HTML/758605.html
+++ b/result/HTML/758605.html
@@ -1,3 +1,3 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
-<html><body>&amp;:&ecirc;
+<html><body>&amp;:�
 </body></html>
diff --git a/result/HTML/758606_2.html b/result/HTML/758606_2.html
index 1258fccd..f9b69b4d 100644
--- a/result/HTML/758606_2.html
+++ b/result/HTML/758606_2.html
@@ -1,3 +1,3 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
-<!--&#145;<!dOctYPE
+<!--�<!dOctYPE
 -->
diff --git a/result/HTML/fp40.htm b/result/HTML/fp40.htm
index 8affc19d..42ce90aa 100644
--- a/result/HTML/fp40.htm
+++ b/result/HTML/fp40.htm
@@ -11,7 +11,7 @@
 <font face="Verdana">
 <h1><a name="top">Microsoft FrontPage 2000 Server Extensions, UNIX</a></h1>
 
-<font size="2"><i>&copy; Copyright Microsoft Corporation, 1999&nbsp;</i></font>
+<font size="2"><i>� Copyright Microsoft Corporation, 1999�</i></font>
 
 
 <p>The FrontPage Server Extensions are a set of programs on the Web server that support: 
@@ -24,11 +24,11 @@
 </ul>
 
 
-<h2>Contents&nbsp;</h2>
+<h2>Contents�</h2>
 
 <a href="#relnotes">Release Notes</a><br>
 <a href="#moreinfo">Resources for More Information</a>
-<p>&nbsp;</p>
+<p>�</p>
 <hr>
 <h2><a name="relnotes">Release Notes</a></h2>
 
@@ -54,7 +54,7 @@ configuration files (access.conf, srm.conf), add the following lines to http.con
 </font>
 <blockquote>
   <font face="Courier New">
-ResourceConfig /dev/null&nbsp;<br>
+ResourceConfig /dev/null�<br>
 AccessConfig /dev/null</font>
 </blockquote>
 <font face="Verdana">
@@ -160,7 +160,7 @@ answering inquiries, so you can write your question in your own words. To begin,
 <p align="right"><font size="1"><a href="#moreinfo">Top of Section</a></font></p>
 
 
-<p>&nbsp;</p>  
+<p>�</p>  
 
 </font>
 </body>
diff --git a/result/HTML/html5_enc.html b/result/HTML/html5_enc.html
index 44ceebca..30edf290 100644
--- a/result/HTML/html5_enc.html
+++ b/result/HTML/html5_enc.html
@@ -4,6 +4,6 @@
 <meta charset="iso-8859-1">
 </head>
 <body>
-  <p>tr&egrave;s</p>
+  <p>tr�s</p>
 </body>
 </html>
diff --git a/result/HTML/wired.html b/result/HTML/wired.html
index 7441fc81..f9c4018d 100644
--- a/result/HTML/wired.html
+++ b/result/HTML/wired.html
@@ -91,7 +91,7 @@
       <td bgcolor="#FF0000" valign="BOTTOM" align="LEFT" width="97"><a href="/news/0,1287,,00.html"><img src="http://static.wired.com/news/images/news_ffffff.gif" width="103" height="30" border="0"></a></td>
 
 
-<td bgcolor="#FF0000" align="left" valign="center"><nobr><img src="http://static.wired.com/news/images/spacer.gif" width="344" height="1"><br><font size="1" face="Verdana, Arial, Geneva, sans-serif" color="#FFFFFF">&nbsp;&nbsp;&nbsp;<b>updated 10:15 a.m.&nbsp;&nbsp;15.Oct.99.PDT</b></font></nobr></td>
+<td bgcolor="#FF0000" align="left" valign="center"><nobr><img src="http://static.wired.com/news/images/spacer.gif" width="344" height="1"><br><font size="1" face="Verdana, Arial, Geneva, sans-serif" color="#FFFFFF">���<b>updated 10:15 a.m.��15.Oct.99.PDT</b></font></nobr></td>
 
 
         </tr>
@@ -190,7 +190,7 @@
 <input type="hidden" name="SOURCE" value="other">
  <input type="hidden" name="ACTION" value="subscribe">
  
-<input type="TEXT" name="from" size="10" value="enter email">&nbsp;
+<input type="TEXT" name="from" size="10" value="enter email">�
 </form>
 </td> 
 		<td valign="top" bgcolor="#99FF99">
@@ -210,7 +210,7 @@
   </tr>
   <tr>
     <td bgcolor="#99FF99" marginwidth="0" marginheight="0"><form method="get" action="http://r.wired.com/r/10020/http://stocks.wired.com/stocks_quotes.asp">
-<input type="TEXT" name="Symbol" size="12">&nbsp;<input type="SUBMIT" name="submit" value="GO">
+<input type="TEXT" name="Symbol" size="12">�<input type="SUBMIT" name="submit" value="GO">
 </form></td>
   </tr>
 <!-- BEGIN BUTTON ADS -->
@@ -418,14 +418,14 @@ or <a href="/news/pointcast/0,1366,,00.html">PointCast</a></font><br>
 
 <!-- SQL query for Package here -->
 
-<font face="Verdana, Arial, Geneva, sans-serif" size="2"><b><i>Nomad's Land</i></b></font><br><img src="http://static.wired.com/news/images/pix155.gif" height="10" width="155" alt=""><br><!-- IBD_SUBJECT: Homeless, but ID'd, in Seattle --><font face="Arial, Helvetica, sans-serif" size="5"><b><a href="/news/politics/0,1283,31911,00.html">Homeless, but ID'd, in Seattle</a></b></font><br><font size="1" face="Verdana, Arial, Geneva, sans-serif" color="#FF0000">8:15 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">The city council approves a plan to track the homeless by a numbering system, saying it'll improve services. The implications worry privacy advocates, naturally. By Craig Bicknell.</font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/politics/0,1283,,00.html">in&nbsp;Politics</a></i></font><br><table bgcolor="#F0F0F0" cellpadding="0" cellspacing="0" border="0" width="147" align="RIGHT">
+<font face="Verdana, Arial, Geneva, sans-serif" size="2"><b><i>Nomad's Land</i></b></font><br><img src="http://static.wired.com/news/images/pix155.gif" height="10" width="155" alt=""><br><!-- IBD_SUBJECT: Homeless, but ID'd, in Seattle --><font face="Arial, Helvetica, sans-serif" size="5"><b><a href="/news/politics/0,1283,31911,00.html">Homeless, but ID'd, in Seattle</a></b></font><br><font size="1" face="Verdana, Arial, Geneva, sans-serif" color="#FF0000">8:15 a.m.</font>�<font face="Verdana, Arial, Geneva, sans-serif" size="2">The city council approves a plan to track the homeless by a numbering system, saying it'll improve services. The implications worry privacy advocates, naturally. By Craig Bicknell.</font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/politics/0,1283,,00.html">in�Politics</a></i></font><br><table bgcolor="#F0F0F0" cellpadding="0" cellspacing="0" border="0" width="147" align="RIGHT">
  <!-- Commentary Frag Begin -->
         <tr>
-          <td bgcolor="#000000">&nbsp;</td>
+          <td bgcolor="#000000">�</td>
           <td bgcolor="#000000"><font size="1" face="Verdana, Arial, Helvetica, sans-serif" color="#FFFFFF"><b>HITS &amp; MISC.</b></font></td>
         </tr>
         <tr>
-          <td>&nbsp;</td>
+          <td>�</td>
           <td>
 <img src="http://static.wired.com/news/images/spacer.gif" height="5" width="5" alt=""><br>
 
@@ -434,12 +434,12 @@ or <a href="/news/pointcast/0,1366,,00.html">PointCast</a></font><br>
         </tr>
 <!-- Commentary Frag End -->
 <tr> 
-<td align="left" bgcolor="#000000">&nbsp;</td> 
+<td align="left" bgcolor="#000000">�</td> 
 <td bgcolor="#000000"><font size="1" face="Verdana, Arial, Helvetica, sans-serif" color="#FFFFFF"><b>CURRENT HOO-HA</b></font></td>
 </tr>
 
 <tr> 
-<td>&nbsp;</td>
+<td>�</td>
 <td>
 <img src="http://static.wired.com/news/images/spacer.gif" height="5" width="5" alt="">
 <br>
@@ -466,26 +466,26 @@ or <a href="/news/pointcast/0,1366,,00.html">PointCast</a></font><br>
 
 <font size="2" face="Arial,Helvetica, sans-serif"><b><a href="/news/y2k/0,1360,,00.html">Y2K Watch</a></b></font><br><font size="2" face="Arial, Helvetica, sans-serif"><font size="1" face="Arial, Geneva, sans-serif" color="#000000">Tick... Tick... Tick...</font><br><br> 
 
-<font face="Arial, Helvetica, sans-serif" size="2"><b><i><a href="/news/special_reports/1,1293,,00.html">More Hoo-Ha</a></i></b></font><br>&nbsp;<br>
+<font face="Arial, Helvetica, sans-serif" size="2"><b><i><a href="/news/special_reports/1,1293,,00.html">More Hoo-Ha</a></i></b></font><br>�<br>
 
 </font></font></font></font></font></font></font></font></font></font>
 </td>
 </tr>
 <!-- start of Gen News -->
                 <tr> 
-                  <td bgcolor="#000000">&nbsp;</td>
+                  <td bgcolor="#000000">�</td>
           <td bgcolor="#000000"><font size="1" face="Verdana, Arial, Helvetica, sans-serif" color="#FFFFFF"><b>MEANWHILE...</b></font></td>
         </tr>
 
         <tr> 
-          <td>&nbsp;</td>
+          <td>�</td>
           <td align="left" valign="top">
           <img src="http://static.wired.com/news/images/spacer.gif" height="5" width="5" alt=""><br>
 
 
 <!-- 31942 -->
-<font size="2" face="Arial, Helvetica, sans-serif" color="#000000"><b>F&uuml;hrer Furor</b></font><br><font size="1" face="Arial, Geneva, sans-serif" color="#000000"><p>
-Contruction workers in Berlin opened an old wound in the German psyche this week when they accidentally stumbled across Adolf Hitler's bunker while excavating near the Brandenburg Gate. The bunker, just south of the Gate, was where Hitler and his closest associates barricaded themselves as the Red Army approached Berlin in the waning days of World War II. It is also where the F&uuml;hrer and his bride, Eva Braun, committed suicide rather than fall into the hands of the Russians. Although the bunker's location has never been a mystery, it has been sealed off since the end of the war to keep neo-Nazis from turning it into a shrine.
+<font size="2" face="Arial, Helvetica, sans-serif" color="#000000"><b>F�hrer Furor</b></font><br><font size="1" face="Arial, Geneva, sans-serif" color="#000000"><p>
+Contruction workers in Berlin opened an old wound in the German psyche this week when they accidentally stumbled across Adolf Hitler's bunker while excavating near the Brandenburg Gate. The bunker, just south of the Gate, was where Hitler and his closest associates barricaded themselves as the Red Army approached Berlin in the waning days of World War II. It is also where the F�hrer and his bride, Eva Braun, committed suicide rather than fall into the hands of the Russians. Although the bunker's location has never been a mystery, it has been sealed off since the end of the war to keep neo-Nazis from turning it into a shrine.
 <br></p>
 <li>More from <a href="http://www.lycos.com/news/flash/hitlerbunker.html?v=wn1015&amp;lpv=1">Lycos</a>
 </li></font><br><br>
@@ -495,7 +495,7 @@ Contruction workers in Berlin opened an old wound in the German psyche this week
 </table>
 
 
-<font size="1">&nbsp;<br></font>
+<font size="1">�<br></font>
 
 <br>
 
@@ -503,7 +503,7 @@ Contruction workers in Berlin opened an old wound in the German psyche this week
 <img src="http://static.wired.com/news/images/pix155.gif" height="10" width="155" alt=""><br>
 
 <!-- SQL query here -->
-<!-- IBD_SUBJECT:Wall Street Keeps Reeling --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/reuters/0,1349,31934,00.html">Wall Street Keeps Reeling</a></b></font><br><font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">10:15 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">The Dow and Nasdaq suffer sizeable losses during the first half of Friday trading. Why? Wholesale prices are the highest this decade, and Greenspan is concerned about stock prices.</font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/reuters/0,1349,,00.html">in&nbsp;Reuters</a></i></font><br><br><!-- IBD_SUBJECT:The Market's Madness --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/reuters/0,1349,31935,00.html">The Market's Madness</a></b></font><br><font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">9:10 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">The bulls and the bears are in the midst of a Battle Royale, and all this turbulence is not a healthy thing. So say the experts.</font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/reuters/0,1349,,00.html">in&nbsp;Reuters</a></i></font><br><br><!-- IBD_SUBJECT:'Want a Loan? What's Your Race?' --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/politics/0,1283,31533,00.html">'Want a Loan? What's Your Race?'</a></b></font><br><font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">The Federal Reserve is in the middle of changing banking regulations to let banks collect data on the race, sex, religion, and national origin of their customers. By Declan McCullagh. </font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/politics/0,1283,,00.html">in&nbsp;Politics</a></i></font><br><br><!-- IBD_SUBJECT:Music Regs: A Bagful of Noise --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/business/0,1367,31832,00.html">Music Regs: A Bagful of Noise</a></b></font><br><font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">The struggle to come up with a digital music standard that would minimize download piracy is pushing right up against the holiday gift-giving season. By Jennifer Sullivan.</font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/business/0,1367,,00.html">in&nbsp;Business</a></i></font><br><br><!-- IBD_SUBJECT:Can't Beat 'Em? Green 'Em --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/technology/0,1282,31927,00.html">Can't Beat 'Em? Green 'Em</a></b></font><br><font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">High-tech companies are notoriously environmentally unfriendly, and a growing number of "Greenies" are trying to change things from the inside ... with varying results. By Chris Gaither.</font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/technology/0,1282,,00.html">in&nbsp;Technology</a></i></font><br><br><!-- IBD_SUBJECT:Y2K Cloud Over MS Office --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/business/0,1367,31932,00.html">Y2K Cloud Over MS Office</a></b></font><br><font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">Windows NT sales remain strong, but corporate clients are wary of upgrading to MS Office 2000. Analysts say that means strong, but not stunning, Microsoft earnings. </font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/business/0,1367,,00.html">in&nbsp;Business</a></i></font><br><br><font color="#FF0000" face="Verdana, Arial, Geneva, sans-serif" size="1">Med-Tech</font><br><!-- IBD_SUBJECT:Biochips for Custom Chemo --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/technology/0,1282,31914,00.html">Biochips for Custom Chemo</a></b></font><br><font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">Different cancer patients need different medicine, but doctors can rarely determine the best match. New biochip technology promises chemotherapy tailored to a tumor's genetic make-up. By Kristen Philipkoski.</font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/technology/0,1282,,00.html">in&nbsp;Technology</a></i></font><br><br><!-- IBD_SUBJECT:High Stakes in Priceline Suit --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/business/0,1367,31916,00.html">High Stakes in Priceline Suit</a></b></font><br><font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">It's not just another round of Redmond-bashing. A Priceline.com lawsuit against Microsoft's Expedia.com may have a big impact on how Net companies protect their business models. By Joanna Glasner.</font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/business/0,1367,,00.html">in&nbsp;Business</a></i></font><br><br><!-- IBD_SUBJECT:Biodiversity Merges Online --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/technology/0,1282,31918,00.html">Biodiversity Merges Online</a></b></font><br><font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>&nbsp;<font face="Verdana, Arial, Geneva, sans-serif" size="2">The far-flung databases on global biodiversity get together to form one monster database. Soon the red-eyed tree frog will be eyeing those Swedish lingonberries. From the Environment News Service.</font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/technology/0,1282,,00.html">in&nbsp;Technology</a></i></font><br><br><!-- SQL above -->
+<!-- IBD_SUBJECT:Wall Street Keeps Reeling --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/reuters/0,1349,31934,00.html">Wall Street Keeps Reeling</a></b></font><br><font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">10:15 a.m.</font>�<font face="Verdana, Arial, Geneva, sans-serif" size="2">The Dow and Nasdaq suffer sizeable losses during the first half of Friday trading. Why? Wholesale prices are the highest this decade, and Greenspan is concerned about stock prices.</font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/reuters/0,1349,,00.html">in�Reuters</a></i></font><br><br><!-- IBD_SUBJECT:The Market's Madness --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/reuters/0,1349,31935,00.html">The Market's Madness</a></b></font><br><font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">9:10 a.m.</font>�<font face="Verdana, Arial, Geneva, sans-serif" size="2">The bulls and the bears are in the midst of a Battle Royale, and all this turbulence is not a healthy thing. So say the experts.</font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/reuters/0,1349,,00.html">in�Reuters</a></i></font><br><br><!-- IBD_SUBJECT:'Want a Loan? What's Your Race?' --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/politics/0,1283,31533,00.html">'Want a Loan? What's Your Race?'</a></b></font><br><font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>�<font face="Verdana, Arial, Geneva, sans-serif" size="2">The Federal Reserve is in the middle of changing banking regulations to let banks collect data on the race, sex, religion, and national origin of their customers. By Declan McCullagh. </font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/politics/0,1283,,00.html">in�Politics</a></i></font><br><br><!-- IBD_SUBJECT:Music Regs: A Bagful of Noise --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/business/0,1367,31832,00.html">Music Regs: A Bagful of Noise</a></b></font><br><font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>�<font face="Verdana, Arial, Geneva, sans-serif" size="2">The struggle to come up with a digital music standard that would minimize download piracy is pushing right up against the holiday gift-giving season. By Jennifer Sullivan.</font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/business/0,1367,,00.html">in�Business</a></i></font><br><br><!-- IBD_SUBJECT:Can't Beat 'Em? Green 'Em --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/technology/0,1282,31927,00.html">Can't Beat 'Em? Green 'Em</a></b></font><br><font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>�<font face="Verdana, Arial, Geneva, sans-serif" size="2">High-tech companies are notoriously environmentally unfriendly, and a growing number of "Greenies" are trying to change things from the inside ... with varying results. By Chris Gaither.</font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/technology/0,1282,,00.html">in�Technology</a></i></font><br><br><!-- IBD_SUBJECT:Y2K Cloud Over MS Office --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/business/0,1367,31932,00.html">Y2K Cloud Over MS Office</a></b></font><br><font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>�<font face="Verdana, Arial, Geneva, sans-serif" size="2">Windows NT sales remain strong, but corporate clients are wary of upgrading to MS Office 2000. Analysts say that means strong, but not stunning, Microsoft earnings. </font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/business/0,1367,,00.html">in�Business</a></i></font><br><br><font color="#FF0000" face="Verdana, Arial, Geneva, sans-serif" size="1">Med-Tech</font><br><!-- IBD_SUBJECT:Biochips for Custom Chemo --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/technology/0,1282,31914,00.html">Biochips for Custom Chemo</a></b></font><br><font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>�<font face="Verdana, Arial, Geneva, sans-serif" size="2">Different cancer patients need different medicine, but doctors can rarely determine the best match. New biochip technology promises chemotherapy tailored to a tumor's genetic make-up. By Kristen Philipkoski.</font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/technology/0,1282,,00.html">in�Technology</a></i></font><br><br><!-- IBD_SUBJECT:High Stakes in Priceline Suit --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/business/0,1367,31916,00.html">High Stakes in Priceline Suit</a></b></font><br><font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>�<font face="Verdana, Arial, Geneva, sans-serif" size="2">It's not just another round of Redmond-bashing. A Priceline.com lawsuit against Microsoft's Expedia.com may have a big impact on how Net companies protect their business models. By Joanna Glasner.</font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/business/0,1367,,00.html">in�Business</a></i></font><br><br><!-- IBD_SUBJECT:Biodiversity Merges Online --><font face="Arial, Helvetica, sans-serif" size="3"><b><a href="/news/technology/0,1282,31918,00.html">Biodiversity Merges Online</a></b></font><br><font color="#ff0000" face="Verdana, Arial, Geneva, sans-serif" size="1">3:00 a.m.</font>�<font face="Verdana, Arial, Geneva, sans-serif" size="2">The far-flung databases on global biodiversity get together to form one monster database. Soon the red-eyed tree frog will be eyeing those Swedish lingonberries. From the Environment News Service.</font><br><font face="Verdana, Arial, Helvetica, sans-serif" size="1"><i><a href="/news/technology/0,1282,,00.html">in�Technology</a></i></font><br><br><!-- SQL above -->
 
 
         
@@ -536,18 +536,18 @@ Contruction workers in Berlin opened an old wound in the German psyche this week
 
 <p><font face="Verdana, Arial, Geneva, sans-serif" size="1">
 <a href="http://www.wired.com/news/feedback.html">Send us feedback</a>
-&nbsp;|&nbsp;
+�|�
 <a href="http://www.hotwired.com/jobs/">Work at Wired Digital</a>
-&nbsp;|&nbsp;
+�|�
 <a href="http://home.wired.com/advertising/">Advertise with us</a>
 <br>
 <a href="http://home.wired.com/">About Wired Digital</a>
-&nbsp;|&nbsp;
+�|�
 <a href="http://www.wired.com/home/digital/privacy/">Our Privacy Policy</a></font>
 
 
 </p>
-<p><font face="Verdana, Arial, Geneva" size="1"><a href="http://www.wired.com/home/copyright.html">Copyright</a> &copy; 1994-99 Wired Digital Inc. All rights reserved.</font>
+<p><font face="Verdana, Arial, Geneva" size="1"><a href="http://www.wired.com/home/copyright.html">Copyright</a> � 1994-99 Wired Digital Inc. All rights reserved.</font>
 
 <br>
 <!-- TRACKING -->
diff --git a/result/HTML/xml-declaration-1.html b/result/HTML/xml-declaration-1.html
index 1d0ca6c0..e4e9c35a 100644
--- a/result/HTML/xml-declaration-1.html
+++ b/result/HTML/xml-declaration-1.html
@@ -1,4 +1,4 @@
 <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
 <!--?xml encoding="UTF-8"--><html><body>
-<p>&ouml;&auml;&uuml;&szlig;</p>
+<p>öäüß</p>
 </body></html>
diff --git a/testparser.c b/testparser.c
index aacdd679..24264bcc 100644
--- a/testparser.c
+++ b/testparser.c
@@ -14,6 +14,7 @@
 #include <libxml/xmlsave.h>
 #include <libxml/xmlwriter.h>
 #include <libxml/HTMLparser.h>
+#include <libxml/HTMLtree.h>
 
 #include <string.h>
 
@@ -611,6 +612,129 @@ testHtmlIds(void) {
     return 0;
 }
 
+#define MHE "meta http-equiv=\"Content-Type\""
+
+static int
+testHtmlInsertMetaEncoding(void) {
+    /* We currently require a head element to be present. */
+    const char *html =
+        "<html>"
+        "<head></head>"
+        "<body>text</body>"
+        "</html>\n";
+    const char *expect =
+        "<html>"
+        "<head><meta charset=\"utf-8\"></head>"
+        "<body>text</body>"
+        "</html>\n";
+    htmlDocPtr doc;
+    xmlBufferPtr buf;
+    xmlSaveCtxtPtr save;
+    xmlChar *out;
+    int size, err = 0;
+
+
+    doc = htmlReadDoc(BAD_CAST html, NULL, NULL, HTML_PARSE_NODEFDTD);
+
+    /* xmlSave updates meta tags */
+    buf = xmlBufferCreate();
+    save = xmlSaveToBuffer(buf, "utf-8", 0);
+    xmlSaveDoc(save, doc);
+    xmlSaveClose(save);
+    if (!xmlStrEqual(xmlBufferContent(buf), BAD_CAST expect)) {
+        fprintf(stderr, "meta tag insertion failed when serializing\n");
+        err = 1;
+    }
+    xmlBufferFree(buf);
+
+    htmlSetMetaEncoding(doc, BAD_CAST "utf-8");
+    /* htmlDocDumpMemoryFormat doesn't update meta tags */
+    htmlDocDumpMemoryFormat(doc, &out, &size, 0);
+    if (!xmlStrEqual(out, BAD_CAST expect)) {
+        fprintf(stderr, "htmlSetMetaEncoding insertion failed\n");
+        err = 1;
+    }
+    xmlFree(out);
+
+    xmlFreeDoc(doc);
+    return err;
+}
+
+static int
+testHtmlUpdateMetaEncoding(void) {
+    /* We rely on the implementation adjusting all meta tags */
+    const char *html =
+        "<html>\n"
+        "    <head>\n"
+        "        <meta charset=\"utf-8\">\n"
+        "        <meta charset=\"  foo  \">\n"
+        "        <meta charset=\"\">\n"
+        "        <" MHE " content=\"text/html; ChArSeT=foo\">\n"
+        "        <" MHE " content=\"text/html; charset = \">\n"
+        "        <" MHE " content=\"text/html; charset = '  foo  '\">\n"
+        "        <" MHE " content=\"text/html; charset = '  foo  \">\n"
+        "        <" MHE " content='text/html; charset = \"  foo  \"'>\n"
+        "        <" MHE " content='text/html; charset = \"  foo  '>\n"
+        "        <" MHE " content=\"charset ; charset = bar; baz\">\n"
+        "        <" MHE " content=\"text/html\">\n"
+        "        <" MHE " content=\"\">\n"
+        "        <" MHE ">\n"
+        "    </head>\n"
+        "    <body></body>\n"
+        "</html>\n";
+    const char *expect =
+        "<html>\n"
+        "    <head>\n"
+        "        <meta charset=\"utf-8\">\n"
+        "        <meta charset=\"  utf-8  \">\n"
+        "        <meta charset=\"utf-8\">\n"
+        "        <" MHE " content=\"text/html; ChArSeT=utf-8\">\n"
+        "        <" MHE " content=\"text/html; charset = \">\n"
+        "        <" MHE " content=\"text/html; charset = '  utf-8  '\">\n"
+        "        <" MHE " content=\"text/html; charset = '  foo  \">\n"
+        "        <" MHE " content='text/html; charset = \"  utf-8  \"'>\n"
+        "        <" MHE " content='text/html; charset = \"  foo  '>\n"
+        "        <" MHE " content=\"charset ; charset = utf-8; baz\">\n"
+        "        <" MHE " content=\"text/html\">\n"
+        "        <" MHE " content=\"\">\n"
+        "        <" MHE ">\n"
+        "    </head>\n"
+        "    <body></body>\n"
+        "</html>\n";
+    htmlDocPtr doc;
+    xmlBufferPtr buf;
+    xmlSaveCtxtPtr save;
+    xmlChar *out;
+    int size, err = 0;
+
+    doc = htmlReadDoc(BAD_CAST html, NULL, NULL, HTML_PARSE_NODEFDTD);
+
+    /* xmlSave updates meta tags */
+    buf = xmlBufferCreate();
+    save = xmlSaveToBuffer(buf, NULL, 0);
+    xmlSaveDoc(save, doc);
+    xmlSaveClose(save);
+    if (!xmlStrEqual(xmlBufferContent(buf), BAD_CAST expect)) {
+        fprintf(stderr, "meta tag update failed when serializing\n");
+        err = 1;
+    }
+    xmlBufferFree(buf);
+
+    xmlFree((xmlChar *) doc->encoding);
+    doc->encoding = NULL;
+    htmlSetMetaEncoding(doc, BAD_CAST "utf-8");
+    /* htmlDocDumpMemoryFormat doesn't update meta tags */
+    htmlDocDumpMemoryFormat(doc, &out, &size, 0);
+    if (!xmlStrEqual(out, BAD_CAST expect)) {
+        fprintf(stderr, "htmlSetMetaEncoding update failed\n");
+        err = 1;
+    }
+    xmlFree(out);
+
+    xmlFreeDoc(doc);
+    return err;
+}
+
 #ifdef LIBXML_PUSH_ENABLED
 static int
 testHtmlPushWithEncoding(void) {
@@ -1293,6 +1417,8 @@ main(void) {
 #endif
 #ifdef LIBXML_HTML_ENABLED
     err |= testHtmlIds();
+    err |= testHtmlInsertMetaEncoding();
+    err |= testHtmlUpdateMetaEncoding();
 #ifdef LIBXML_PUSH_ENABLED
     err |= testHtmlPushWithEncoding();
 #endif
diff --git a/xmlsave.c b/xmlsave.c
index 2aa8531a..8df1608c 100644
--- a/xmlsave.c
+++ b/xmlsave.c
@@ -25,6 +25,7 @@
 #include "private/enc.h"
 #include "private/entities.h"
 #include "private/error.h"
+#include "private/html.h"
 #include "private/io.h"
 #include "private/save.h"
 
@@ -1022,32 +1023,24 @@ xmlAttrDumpOutput(xmlSaveCtxtPtr ctxt, xmlAttrPtr cur) {
  */
 static int
 htmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
-    const xmlChar *encoding;
     int switched_encoding = 0;
     int format = 0;
     xmlDocPtr doc;
 
     xmlInitParser();
 
-    encoding = ctxt->encoding;
     doc = cur->doc;
-    if (doc != NULL) {
-        if (encoding == NULL)
-            encoding = doc->encoding;
-
-        /* We probably shouldn't do this unless we're dumping a document. */
-        if (encoding != NULL)
-            htmlSetMetaEncoding(doc, encoding);
-    }
 
     if (ctxt->encoding == NULL) {
-        if ((encoding == NULL) && (doc != NULL))
-            encoding = htmlGetMetaEncoding(doc);
+        const char *encoding = NULL;
+
+        if (doc != NULL)
+            encoding = (char *) doc->encoding;
 
         if (encoding == NULL)
-            encoding = BAD_CAST "HTML";
+            encoding = "HTML";
 
-	if (xmlSaveSwitchEncoding(ctxt, (const char*) encoding) < 0)
+	if (xmlSaveSwitchEncoding(ctxt, encoding) < 0)
 	    return(-1);
 	switched_encoding = 1;
     }
@@ -1055,7 +1048,7 @@ htmlNodeDumpOutputInternal(xmlSaveCtxtPtr ctxt, xmlNodePtr cur) {
     if (ctxt->options & XML_SAVE_FORMAT)
         format = 1;
 
-    htmlNodeDumpFormatOutput(ctxt->buf, doc, cur, NULL, format);
+    htmlNodeDumpInternal(ctxt->buf, doc, cur, (char *) ctxt->encoding, format);
 
     if (switched_encoding) {
 	xmlSaveClearEncoding(ctxt);
@@ -1361,16 +1354,9 @@ xmlSaveDocInternal(xmlSaveCtxtPtr ctxt, xmlDocPtr cur,
 #ifdef LIBXML_HTML_ENABLED
         int format = 0;
 
-        if (encoding != NULL)
-	    htmlSetMetaEncoding(cur, encoding);
-
 	if (ctxt->encoding == NULL) {
-            if (encoding == NULL) {
-                encoding = htmlGetMetaEncoding(cur);
-
-                if (encoding == NULL)
-                    encoding = BAD_CAST "HTML";
-            }
+            if (encoding == NULL)
+                encoding = BAD_CAST "HTML";
 
 	    if (xmlSaveSwitchEncoding(ctxt, (const char*) encoding) < 0) {
 		return(-1);
@@ -1380,7 +1366,8 @@ xmlSaveDocInternal(xmlSaveCtxtPtr ctxt, xmlDocPtr cur,
 
         if (ctxt->options & XML_SAVE_FORMAT)
             format = 1;
-        htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
+        htmlNodeDumpInternal(buf, cur, (htmlNodePtr) cur,
+                             (char *) ctxt->encoding, format);
 #else
         return(-1);
 #endif