mirror of
				https://gitlab.gnome.org/GNOME/libxml2.git
				synced 2025-10-24 13:33:01 +03:00 
			
		
		
		
	For https://bugzilla.gnome.org/show_bug.cgi?id=630682 The python tests were reporting errors, some of it was due to a small change in case encoding, but the main one was about htmlSetMetaEncoding(doc, NULL) being broken by not removing the associated meta tag anymore
		
			
				
	
	
		
			1236 lines
		
	
	
		
			32 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			1236 lines
		
	
	
		
			32 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * HTMLtree.c : implementation of access function for an HTML tree.
 | |
|  *
 | |
|  * See Copyright for the status of this software.
 | |
|  *
 | |
|  * daniel@veillard.com
 | |
|  */
 | |
| 
 | |
| 
 | |
| #define IN_LIBXML
 | |
| #include "libxml.h"
 | |
| #ifdef LIBXML_HTML_ENABLED
 | |
| 
 | |
| #include <string.h> /* for memset() only ! */
 | |
| 
 | |
| #ifdef HAVE_CTYPE_H
 | |
| #include <ctype.h>
 | |
| #endif
 | |
| #ifdef HAVE_STDLIB_H
 | |
| #include <stdlib.h>
 | |
| #endif
 | |
| 
 | |
| #include <libxml/xmlmemory.h>
 | |
| #include <libxml/HTMLparser.h>
 | |
| #include <libxml/HTMLtree.h>
 | |
| #include <libxml/entities.h>
 | |
| #include <libxml/valid.h>
 | |
| #include <libxml/xmlerror.h>
 | |
| #include <libxml/parserInternals.h>
 | |
| #include <libxml/globals.h>
 | |
| #include <libxml/uri.h>
 | |
| 
 | |
| /************************************************************************
 | |
|  *									*
 | |
|  *   		Getting/Setting encoding meta tags			*
 | |
|  *									*
 | |
|  ************************************************************************/
 | |
| 
 | |
| /**
 | |
|  * htmlGetMetaEncoding:
 | |
|  * @doc:  the document
 | |
|  * 
 | |
|  * Encoding definition lookup in the Meta tags
 | |
|  *
 | |
|  * Returns the current encoding as flagged in the HTML source
 | |
|  */
 | |
| const xmlChar *
 | |
| htmlGetMetaEncoding(htmlDocPtr doc) {
 | |
|     htmlNodePtr cur;
 | |
|     const xmlChar *content;
 | |
|     const xmlChar *encoding;
 | |
| 
 | |
|     if (doc == NULL)
 | |
| 	return(NULL);
 | |
|     cur = doc->children;
 | |
| 
 | |
|     /*
 | |
|      * Search the html
 | |
|      */
 | |
|     while (cur != NULL) {
 | |
| 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 | |
| 	    if (xmlStrEqual(cur->name, BAD_CAST"html"))
 | |
| 		break;
 | |
| 	    if (xmlStrEqual(cur->name, BAD_CAST"head"))
 | |
| 		goto found_head;
 | |
| 	    if (xmlStrEqual(cur->name, BAD_CAST"meta"))
 | |
| 		goto found_meta;
 | |
| 	}
 | |
| 	cur = cur->next;
 | |
|     }
 | |
|     if (cur == NULL)
 | |
| 	return(NULL);
 | |
|     cur = cur->children;
 | |
| 
 | |
|     /*
 | |
|      * Search the head
 | |
|      */
 | |
|     while (cur != NULL) {
 | |
| 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 | |
| 	    if (xmlStrEqual(cur->name, BAD_CAST"head"))
 | |
| 		break;
 | |
| 	    if (xmlStrEqual(cur->name, BAD_CAST"meta"))
 | |
| 		goto found_meta;
 | |
| 	}
 | |
| 	cur = cur->next;
 | |
|     }
 | |
|     if (cur == NULL)
 | |
| 	return(NULL);
 | |
| found_head:
 | |
|     cur = cur->children;
 | |
| 
 | |
|     /*
 | |
|      * Search the meta elements
 | |
|      */
 | |
| found_meta:
 | |
|     while (cur != NULL) {
 | |
| 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 | |
| 	    if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
 | |
| 		xmlAttrPtr attr = cur->properties;
 | |
| 		int http;
 | |
| 		const xmlChar *value;
 | |
| 
 | |
| 		content = NULL;
 | |
| 		http = 0;
 | |
| 		while (attr != NULL) {
 | |
| 		    if ((attr->children != NULL) &&
 | |
| 		        (attr->children->type == XML_TEXT_NODE) &&
 | |
| 		        (attr->children->next == NULL)) {
 | |
| 			value = attr->children->content;
 | |
| 			if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
 | |
| 			 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
 | |
| 			    http = 1;
 | |
| 			else if ((value != NULL)
 | |
| 			 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
 | |
| 			    content = value;
 | |
| 			if ((http != 0) && (content != NULL))
 | |
| 			    goto found_content;
 | |
| 		    }
 | |
| 		    attr = attr->next;
 | |
| 		}
 | |
| 	    }
 | |
| 	}
 | |
| 	cur = cur->next;
 | |
|     }
 | |
|     return(NULL);
 | |
| 
 | |
| found_content:
 | |
|     encoding = xmlStrstr(content, BAD_CAST"charset=");
 | |
|     if (encoding == NULL) 
 | |
| 	encoding = xmlStrstr(content, BAD_CAST"Charset=");
 | |
|     if (encoding == NULL) 
 | |
| 	encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
 | |
|     if (encoding != NULL) {
 | |
| 	encoding += 8;
 | |
|     } else {
 | |
| 	encoding = xmlStrstr(content, BAD_CAST"charset =");
 | |
| 	if (encoding == NULL) 
 | |
| 	    encoding = xmlStrstr(content, BAD_CAST"Charset =");
 | |
| 	if (encoding == NULL) 
 | |
| 	    encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
 | |
| 	if (encoding != NULL)
 | |
| 	    encoding += 9;
 | |
|     }
 | |
|     if (encoding != NULL) {
 | |
| 	while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
 | |
|     }
 | |
|     return(encoding);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * htmlSetMetaEncoding:
 | |
|  * @doc:  the document
 | |
|  * @encoding:  the encoding string
 | |
|  *
 | |
|  * Sets the current encoding in the Meta tags
 | |
|  * NOTE: this will not change the document content encoding, just
 | |
|  * the META flag associated.
 | |
|  *
 | |
|  * Returns 0 in case of success and -1 in case of error
 | |
|  */
 | |
| int
 | |
| htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
 | |
|     htmlNodePtr cur, meta = NULL, head = NULL;
 | |
|     const xmlChar *content = NULL;
 | |
|     char newcontent[100];
 | |
| 
 | |
|     newcontent[0] = 0;
 | |
| 
 | |
|     if (doc == NULL)
 | |
| 	return(-1);
 | |
| 
 | |
|     /* html isn't a real encoding it's just libxml2 way to get entities */
 | |
|     if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
 | |
|         return(-1);
 | |
| 
 | |
|     if (encoding != NULL) {
 | |
| 	snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
 | |
|                 (char *)encoding);
 | |
| 	newcontent[sizeof(newcontent) - 1] = 0;
 | |
|     }
 | |
| 
 | |
|     cur = doc->children;
 | |
| 
 | |
|     /*
 | |
|      * Search the html
 | |
|      */
 | |
|     while (cur != NULL) {
 | |
| 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 | |
| 	    if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
 | |
| 		break;
 | |
| 	    if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
 | |
| 		goto found_head;
 | |
| 	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
 | |
| 		goto found_meta;
 | |
| 	}
 | |
| 	cur = cur->next;
 | |
|     }
 | |
|     if (cur == NULL)
 | |
| 	return(-1);
 | |
|     cur = cur->children;
 | |
| 
 | |
|     /*
 | |
|      * Search the head
 | |
|      */
 | |
|     while (cur != NULL) {
 | |
| 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 | |
| 	    if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
 | |
| 		break;
 | |
| 	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
 | |
|                 head = cur->parent;
 | |
| 		goto found_meta;
 | |
|             }
 | |
| 	}
 | |
| 	cur = cur->next;
 | |
|     }
 | |
|     if (cur == NULL)
 | |
| 	return(-1);
 | |
| found_head:
 | |
|     head = cur;
 | |
|     if (cur->children == NULL)
 | |
|         goto create;
 | |
|     cur = cur->children;
 | |
| 
 | |
| found_meta:
 | |
|     /*
 | |
|      * Search and update all the remaining the meta elements carrying
 | |
|      * encoding informations
 | |
|      */
 | |
|     while (cur != NULL) {
 | |
| 	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
 | |
| 	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
 | |
| 		xmlAttrPtr attr = cur->properties;
 | |
| 		int http;
 | |
| 		const xmlChar *value;
 | |
| 
 | |
| 		content = NULL;
 | |
| 		http = 0;
 | |
| 		while (attr != NULL) {
 | |
| 		    if ((attr->children != NULL) &&
 | |
| 		        (attr->children->type == XML_TEXT_NODE) &&
 | |
| 		        (attr->children->next == NULL)) {
 | |
| 			value = attr->children->content;
 | |
| 			if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
 | |
| 			 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
 | |
| 			    http = 1;
 | |
| 			else
 | |
|                         {
 | |
|                            if ((value != NULL) &&
 | |
|                                (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
 | |
| 			       content = value;
 | |
|                         }
 | |
| 		        if ((http != 0) && (content != NULL))
 | |
| 			    break;
 | |
| 		    }
 | |
| 		    attr = attr->next;
 | |
| 		}
 | |
| 		if ((http != 0) && (content != NULL)) {
 | |
| 		    meta = cur;
 | |
| 		    break;
 | |
| 		}
 | |
| 
 | |
| 	    }
 | |
| 	}
 | |
| 	cur = cur->next;
 | |
|     }
 | |
| create:
 | |
|     if (meta == NULL) {
 | |
|         if ((encoding != NULL) && (head != NULL)) {
 | |
|             /*
 | |
|              * Create a new Meta element with the right attributes
 | |
|              */
 | |
| 
 | |
|             meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
 | |
|             if (head->children == NULL)
 | |
|                 xmlAddChild(head, meta);
 | |
|             else
 | |
|                 xmlAddPrevSibling(head->children, meta);
 | |
|             xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
 | |
|             xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
 | |
|         }
 | |
|     } else {
 | |
|         /* remove the meta tag if NULL is passed */
 | |
|         if (encoding == NULL) {
 | |
|             xmlUnlinkNode(meta);
 | |
|             xmlFreeNode(meta);
 | |
|         }
 | |
|         /* change the document only if there is a real encoding change */
 | |
|         else if (xmlStrcasestr(content, encoding) == NULL) {
 | |
|             xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
 | |
|         }
 | |
|     }
 | |
| 
 | |
| 
 | |
|     return(0);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * booleanHTMLAttrs:
 | |
|  *
 | |
|  * These are the HTML attributes which will be output
 | |
|  * in minimized form, i.e. <option selected="selected"> will be
 | |
|  * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
 | |
|  *
 | |
|  */
 | |
| static const char* htmlBooleanAttrs[] = {
 | |
|   "checked", "compact", "declare", "defer", "disabled", "ismap",
 | |
|   "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
 | |
|   "selected", NULL
 | |
| };
 | |
| 
 | |
| 
 | |
| /**
 | |
|  * htmlIsBooleanAttr:
 | |
|  * @name:  the name of the attribute to check
 | |
|  *
 | |
|  * Determine if a given attribute is a boolean attribute.
 | |
|  * 
 | |
|  * returns: false if the attribute is not boolean, true otherwise.
 | |
|  */
 | |
| int
 | |
| htmlIsBooleanAttr(const xmlChar *name)
 | |
| {
 | |
|     int i = 0;
 | |
| 
 | |
|     while (htmlBooleanAttrs[i] != NULL) {
 | |
|         if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
 | |
|             return 1;
 | |
|         i++;
 | |
|     }
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| #ifdef LIBXML_OUTPUT_ENABLED
 | |
| /*
 | |
|  * private routine exported from xmlIO.c
 | |
|  */
 | |
| xmlOutputBufferPtr
 | |
| xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
 | |
| /************************************************************************
 | |
|  *									*
 | |
|  * 			Output error handlers				*
 | |
|  *									*
 | |
|  ************************************************************************/
 | |
| /**
 | |
|  * htmlSaveErrMemory:
 | |
|  * @extra:  extra informations
 | |
|  *
 | |
|  * Handle an out of memory condition
 | |
|  */
 | |
| static void
 | |
| htmlSaveErrMemory(const char *extra)
 | |
| {
 | |
|     __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * htmlSaveErr:
 | |
|  * @code:  the error number
 | |
|  * @node:  the location of the error.
 | |
|  * @extra:  extra informations
 | |
|  *
 | |
|  * Handle an out of memory condition
 | |
|  */
 | |
| static void
 | |
| htmlSaveErr(int code, xmlNodePtr node, const char *extra)
 | |
| {
 | |
|     const char *msg = NULL;
 | |
| 
 | |
|     switch(code) {
 | |
|         case XML_SAVE_NOT_UTF8:
 | |
| 	    msg = "string is not in UTF-8\n";
 | |
| 	    break;
 | |
| 	case XML_SAVE_CHAR_INVALID:
 | |
| 	    msg = "invalid character value\n";
 | |
| 	    break;
 | |
| 	case XML_SAVE_UNKNOWN_ENCODING:
 | |
| 	    msg = "unknown encoding %s\n";
 | |
| 	    break;
 | |
| 	case XML_SAVE_NO_DOCTYPE:
 | |
| 	    msg = "HTML has no DOCTYPE\n";
 | |
| 	    break;
 | |
| 	default:
 | |
| 	    msg = "unexpected error number\n";
 | |
|     }
 | |
|     __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
 | |
| }
 | |
| 
 | |
| /************************************************************************
 | |
|  *									*
 | |
|  *   		Dumping HTML tree content to a simple buffer		*
 | |
|  *									*
 | |
|  ************************************************************************/
 | |
| 
 | |
| static int
 | |
| htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
 | |
| 	           int format);
 | |
| 
 | |
| /**
 | |
|  * htmlNodeDumpFormat:
 | |
|  * @buf:  the HTML buffer output
 | |
|  * @doc:  the document
 | |
|  * @cur:  the current node
 | |
|  * @format:  should formatting spaces been added
 | |
|  *
 | |
|  * Dump an HTML node, recursive behaviour,children are printed too.
 | |
|  *
 | |
|  * Returns the number of byte written or -1 in case of error
 | |
|  */
 | |
| static int
 | |
| htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
 | |
| 	           int format) {
 | |
|     unsigned int use;
 | |
|     int ret;
 | |
|     xmlOutputBufferPtr outbuf;
 | |
| 
 | |
|     if (cur == NULL) {
 | |
| 	return (-1);
 | |
|     }
 | |
|     if (buf == NULL) {
 | |
| 	return (-1);
 | |
|     }
 | |
|     outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
 | |
|     if (outbuf == NULL) {
 | |
|         htmlSaveErrMemory("allocating HTML output buffer");
 | |
| 	return (-1);
 | |
|     }
 | |
|     memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
 | |
|     outbuf->buffer = buf;
 | |
|     outbuf->encoder = NULL;
 | |
|     outbuf->writecallback = NULL;
 | |
|     outbuf->closecallback = NULL;
 | |
|     outbuf->context = NULL;
 | |
|     outbuf->written = 0;
 | |
| 
 | |
|     use = buf->use;
 | |
|     htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
 | |
|     xmlFree(outbuf);
 | |
|     ret = buf->use - use;
 | |
|     return (ret);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * htmlNodeDump:
 | |
|  * @buf:  the HTML buffer output
 | |
|  * @doc:  the document
 | |
|  * @cur:  the current node
 | |
|  *
 | |
|  * Dump an HTML node, recursive behaviour,children are printed too,
 | |
|  * and formatting returns are added.
 | |
|  *
 | |
|  * Returns the number of byte written or -1 in case of error
 | |
|  */
 | |
| int
 | |
| htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
 | |
|     xmlInitParser();
 | |
| 
 | |
|     return(htmlNodeDumpFormat(buf, doc, cur, 1));
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * htmlNodeDumpFileFormat:
 | |
|  * @out:  the FILE pointer
 | |
|  * @doc:  the document
 | |
|  * @cur:  the current node
 | |
|  * @encoding: the document encoding
 | |
|  * @format:  should formatting spaces been added
 | |
|  *
 | |
|  * Dump an HTML node, recursive behaviour,children are printed too.
 | |
|  *
 | |
|  * TODO: if encoding == NULL try to save in the doc encoding
 | |
|  *
 | |
|  * returns: the number of byte written or -1 in case of failure.
 | |
|  */
 | |
| int
 | |
| htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
 | |
| 	               xmlNodePtr cur, const char *encoding, int format) {
 | |
|     xmlOutputBufferPtr buf;
 | |
|     xmlCharEncodingHandlerPtr handler = NULL;
 | |
|     int ret;
 | |
| 
 | |
|     xmlInitParser();
 | |
| 
 | |
|     if (encoding != NULL) {
 | |
| 	xmlCharEncoding enc;
 | |
| 
 | |
| 	enc = xmlParseCharEncoding(encoding);
 | |
| 	if (enc != XML_CHAR_ENCODING_UTF8) {
 | |
| 	    handler = xmlFindCharEncodingHandler(encoding);
 | |
| 	    if (handler == NULL)
 | |
| 		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|     /*
 | |
|      * Fallback to HTML or ASCII when the encoding is unspecified
 | |
|      */
 | |
|     if (handler == NULL)
 | |
| 	handler = xmlFindCharEncodingHandler("HTML");
 | |
|     if (handler == NULL)
 | |
| 	handler = xmlFindCharEncodingHandler("ascii");
 | |
| 
 | |
|     /* 
 | |
|      * save the content to a temp buffer.
 | |
|      */
 | |
|     buf = xmlOutputBufferCreateFile(out, handler);
 | |
|     if (buf == NULL) return(0);
 | |
| 
 | |
|     htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
 | |
| 
 | |
|     ret = xmlOutputBufferClose(buf);
 | |
|     return(ret);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * htmlNodeDumpFile:
 | |
|  * @out:  the FILE pointer
 | |
|  * @doc:  the document
 | |
|  * @cur:  the current node
 | |
|  *
 | |
|  * Dump an HTML node, recursive behaviour,children are printed too,
 | |
|  * and formatting returns are added.
 | |
|  */
 | |
| void
 | |
| htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
 | |
|     htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * htmlDocDumpMemoryFormat:
 | |
|  * @cur:  the document
 | |
|  * @mem:  OUT: the memory pointer
 | |
|  * @size:  OUT: the memory length
 | |
|  * @format:  should formatting spaces been added
 | |
|  *
 | |
|  * Dump an HTML document in memory and return the xmlChar * and it's size.
 | |
|  * It's up to the caller to free the memory.
 | |
|  */
 | |
| void
 | |
| htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
 | |
|     xmlOutputBufferPtr buf;
 | |
|     xmlCharEncodingHandlerPtr handler = NULL;
 | |
|     const char *encoding;
 | |
| 
 | |
|     xmlInitParser();
 | |
| 
 | |
|     if ((mem == NULL) || (size == NULL))
 | |
|         return;
 | |
|     if (cur == NULL) {
 | |
| 	*mem = NULL;
 | |
| 	*size = 0;
 | |
| 	return;
 | |
|     }
 | |
| 
 | |
|     encoding = (const char *) htmlGetMetaEncoding(cur);
 | |
| 
 | |
|     if (encoding != NULL) {
 | |
| 	xmlCharEncoding enc;
 | |
| 
 | |
| 	enc = xmlParseCharEncoding(encoding);
 | |
| 	if (enc != cur->charset) {
 | |
| 	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
 | |
| 		/*
 | |
| 		 * Not supported yet
 | |
| 		 */
 | |
| 		*mem = NULL;
 | |
| 		*size = 0;
 | |
| 		return;
 | |
| 	    }
 | |
| 
 | |
| 	    handler = xmlFindCharEncodingHandler(encoding);
 | |
| 	    if (handler == NULL)
 | |
|                 htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
 | |
| 
 | |
| 	} else {
 | |
| 	    handler = xmlFindCharEncodingHandler(encoding);
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|     /*
 | |
|      * Fallback to HTML or ASCII when the encoding is unspecified
 | |
|      */
 | |
|     if (handler == NULL)
 | |
| 	handler = xmlFindCharEncodingHandler("HTML");
 | |
|     if (handler == NULL)
 | |
| 	handler = xmlFindCharEncodingHandler("ascii");
 | |
| 
 | |
|     buf = xmlAllocOutputBufferInternal(handler);
 | |
|     if (buf == NULL) {
 | |
| 	*mem = NULL;
 | |
| 	*size = 0;
 | |
| 	return;
 | |
|     }
 | |
| 
 | |
|     htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
 | |
| 
 | |
|     xmlOutputBufferFlush(buf);
 | |
|     if (buf->conv != NULL) {
 | |
| 	*size = buf->conv->use;
 | |
| 	*mem = xmlStrndup(buf->conv->content, *size);
 | |
|     } else {
 | |
| 	*size = buf->buffer->use;
 | |
| 	*mem = xmlStrndup(buf->buffer->content, *size);
 | |
|     }
 | |
|     (void)xmlOutputBufferClose(buf);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * htmlDocDumpMemory:
 | |
|  * @cur:  the document
 | |
|  * @mem:  OUT: the memory pointer
 | |
|  * @size:  OUT: the memory length
 | |
|  *
 | |
|  * Dump an HTML document in memory and return the xmlChar * and it's size.
 | |
|  * It's up to the caller to free the memory.
 | |
|  */
 | |
| void
 | |
| htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
 | |
| 	htmlDocDumpMemoryFormat(cur, mem, size, 1);
 | |
| }
 | |
| 
 | |
| 
 | |
| /************************************************************************
 | |
|  *									*
 | |
|  *   		Dumping HTML tree content to an I/O output buffer	*
 | |
|  *									*
 | |
|  ************************************************************************/
 | |
| 
 | |
| void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
 | |
| 
 | |
| /**
 | |
|  * htmlDtdDumpOutput:
 | |
|  * @buf:  the HTML buffer output
 | |
|  * @doc:  the document
 | |
|  * @encoding:  the encoding string
 | |
|  * 
 | |
|  * TODO: check whether encoding is needed
 | |
|  *
 | |
|  * Dump the HTML document DTD, if any.
 | |
|  */
 | |
| static void
 | |
| htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 | |
| 	          const char *encoding ATTRIBUTE_UNUSED) {
 | |
|     xmlDtdPtr cur = doc->intSubset;
 | |
| 
 | |
|     if (cur == NULL) {
 | |
| 	htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
 | |
| 	return;
 | |
|     }
 | |
|     xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
 | |
|     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 | |
|     if (cur->ExternalID != NULL) {
 | |
| 	xmlOutputBufferWriteString(buf, " PUBLIC ");
 | |
| 	xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
 | |
| 	if (cur->SystemID != NULL) {
 | |
| 	    xmlOutputBufferWriteString(buf, " ");
 | |
| 	    xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
 | |
| 	} 
 | |
|     }  else if (cur->SystemID != NULL) {
 | |
| 	xmlOutputBufferWriteString(buf, " SYSTEM ");
 | |
| 	xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
 | |
|     }
 | |
|     xmlOutputBufferWriteString(buf, ">\n");
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * htmlAttrDumpOutput:
 | |
|  * @buf:  the HTML buffer output
 | |
|  * @doc:  the document
 | |
|  * @cur:  the attribute pointer
 | |
|  * @encoding:  the encoding string
 | |
|  *
 | |
|  * Dump an HTML attribute
 | |
|  */
 | |
| static void
 | |
| htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
 | |
| 	           const char *encoding ATTRIBUTE_UNUSED) {
 | |
|     xmlChar *value;
 | |
| 
 | |
|     /*
 | |
|      * TODO: The html output method should not escape a & character
 | |
|      *       occurring in an attribute value immediately followed by
 | |
|      *       a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
 | |
|      */
 | |
| 
 | |
|     if (cur == NULL) {
 | |
| 	return;
 | |
|     }
 | |
|     xmlOutputBufferWriteString(buf, " ");
 | |
|     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 | |
|         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 | |
| 	xmlOutputBufferWriteString(buf, ":");
 | |
|     }
 | |
|     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 | |
|     if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
 | |
| 	value = xmlNodeListGetString(doc, cur->children, 0);
 | |
| 	if (value) {
 | |
| 	    xmlOutputBufferWriteString(buf, "=");
 | |
| 	    if ((cur->ns == NULL) && (cur->parent != NULL) &&
 | |
| 		(cur->parent->ns == NULL) &&
 | |
| 		((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
 | |
| 	         (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
 | |
| 		 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
 | |
| 		 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
 | |
| 		  (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
 | |
| 		xmlChar *escaped;
 | |
| 		xmlChar *tmp = value;
 | |
| 
 | |
| 		while (IS_BLANK_CH(*tmp)) tmp++;
 | |
| 
 | |
| 		escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
 | |
| 		if (escaped != NULL) {
 | |
| 		    xmlBufferWriteQuotedString(buf->buffer, escaped);
 | |
| 		    xmlFree(escaped);
 | |
| 		} else {
 | |
| 		    xmlBufferWriteQuotedString(buf->buffer, value);
 | |
| 		}
 | |
| 	    } else {
 | |
| 		xmlBufferWriteQuotedString(buf->buffer, value);
 | |
| 	    }
 | |
| 	    xmlFree(value);
 | |
| 	} else  {
 | |
| 	    xmlOutputBufferWriteString(buf, "=\"\"");
 | |
| 	}
 | |
|     }
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * htmlAttrListDumpOutput:
 | |
|  * @buf:  the HTML buffer output
 | |
|  * @doc:  the document
 | |
|  * @cur:  the first attribute pointer
 | |
|  * @encoding:  the encoding string
 | |
|  *
 | |
|  * Dump a list of HTML attributes
 | |
|  */
 | |
| static void
 | |
| htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
 | |
|     if (cur == NULL) {
 | |
| 	return;
 | |
|     }
 | |
|     while (cur != NULL) {
 | |
|         htmlAttrDumpOutput(buf, doc, cur, encoding);
 | |
| 	cur = cur->next;
 | |
|     }
 | |
| }
 | |
| 
 | |
| 
 | |
| 
 | |
| /**
 | |
|  * htmlNodeListDumpOutput:
 | |
|  * @buf:  the HTML buffer output
 | |
|  * @doc:  the document
 | |
|  * @cur:  the first node
 | |
|  * @encoding:  the encoding string
 | |
|  * @format:  should formatting spaces been added
 | |
|  *
 | |
|  * Dump an HTML node list, recursive behaviour,children are printed too.
 | |
|  */
 | |
| static void
 | |
| htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 | |
| 	               xmlNodePtr cur, const char *encoding, int format) {
 | |
|     if (cur == NULL) {
 | |
| 	return;
 | |
|     }
 | |
|     while (cur != NULL) {
 | |
|         htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
 | |
| 	cur = cur->next;
 | |
|     }
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * htmlNodeDumpFormatOutput:
 | |
|  * @buf:  the HTML buffer output
 | |
|  * @doc:  the document
 | |
|  * @cur:  the current node
 | |
|  * @encoding:  the encoding string
 | |
|  * @format:  should formatting spaces been added
 | |
|  *
 | |
|  * Dump an HTML node, recursive behaviour,children are printed too.
 | |
|  */
 | |
| void
 | |
| htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 | |
| 	                 xmlNodePtr cur, const char *encoding, int format) {
 | |
|     const htmlElemDesc * info;
 | |
| 
 | |
|     xmlInitParser();
 | |
| 
 | |
|     if ((cur == NULL) || (buf == NULL)) {
 | |
| 	return;
 | |
|     }
 | |
|     /*
 | |
|      * Special cases.
 | |
|      */
 | |
|     if (cur->type == XML_DTD_NODE)
 | |
| 	return;
 | |
|     if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
 | |
|         (cur->type == XML_DOCUMENT_NODE)){
 | |
| 	htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
 | |
| 	return;
 | |
|     }
 | |
|     if (cur->type == XML_ATTRIBUTE_NODE) {
 | |
|         htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
 | |
| 	return;
 | |
|     }
 | |
|     if (cur->type == HTML_TEXT_NODE) {
 | |
| 	if (cur->content != NULL) {
 | |
| 	    if (((cur->name == (const xmlChar *)xmlStringText) ||
 | |
| 		 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
 | |
| 		((cur->parent == NULL) ||
 | |
| 		 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
 | |
| 		  (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
 | |
| 		xmlChar *buffer;
 | |
| 
 | |
| 		buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
 | |
| 		if (buffer != NULL) {
 | |
| 		    xmlOutputBufferWriteString(buf, (const char *)buffer);
 | |
| 		    xmlFree(buffer);
 | |
| 		}
 | |
| 	    } else {
 | |
| 		xmlOutputBufferWriteString(buf, (const char *)cur->content);
 | |
| 	    }
 | |
| 	}
 | |
| 	return;
 | |
|     }
 | |
|     if (cur->type == HTML_COMMENT_NODE) {
 | |
| 	if (cur->content != NULL) {
 | |
| 	    xmlOutputBufferWriteString(buf, "<!--");
 | |
| 	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
 | |
| 	    xmlOutputBufferWriteString(buf, "-->");
 | |
| 	}
 | |
| 	return;
 | |
|     }
 | |
|     if (cur->type == HTML_PI_NODE) {
 | |
| 	if (cur->name == NULL)
 | |
| 	    return;
 | |
| 	xmlOutputBufferWriteString(buf, "<?");
 | |
| 	xmlOutputBufferWriteString(buf, (const char *)cur->name);
 | |
| 	if (cur->content != NULL) {
 | |
| 	    xmlOutputBufferWriteString(buf, " ");
 | |
| 	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
 | |
| 	}
 | |
| 	xmlOutputBufferWriteString(buf, ">");
 | |
| 	return;
 | |
|     }
 | |
|     if (cur->type == HTML_ENTITY_REF_NODE) {
 | |
|         xmlOutputBufferWriteString(buf, "&");
 | |
| 	xmlOutputBufferWriteString(buf, (const char *)cur->name);
 | |
|         xmlOutputBufferWriteString(buf, ";");
 | |
| 	return;
 | |
|     }
 | |
|     if (cur->type == HTML_PRESERVE_NODE) {
 | |
| 	if (cur->content != NULL) {
 | |
| 	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
 | |
| 	}
 | |
| 	return;
 | |
|     }
 | |
| 
 | |
|     /*
 | |
|      * Get specific HTML info for that node.
 | |
|      */
 | |
|     if (cur->ns == NULL)
 | |
| 	info = htmlTagLookup(cur->name);
 | |
|     else
 | |
| 	info = NULL;
 | |
| 
 | |
|     xmlOutputBufferWriteString(buf, "<");
 | |
|     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 | |
|         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 | |
| 	xmlOutputBufferWriteString(buf, ":");
 | |
|     }
 | |
|     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 | |
|     if (cur->nsDef)
 | |
| 	xmlNsListDumpOutput(buf, cur->nsDef);
 | |
|     if (cur->properties != NULL)
 | |
|         htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
 | |
| 
 | |
|     if ((info != NULL) && (info->empty)) {
 | |
|         xmlOutputBufferWriteString(buf, ">");
 | |
| 	if ((format) && (!info->isinline) && (cur->next != NULL)) {
 | |
| 	    if ((cur->next->type != HTML_TEXT_NODE) &&
 | |
| 		(cur->next->type != HTML_ENTITY_REF_NODE) &&
 | |
| 		(cur->parent != NULL) &&
 | |
| 		(cur->parent->name != NULL) &&
 | |
| 		(cur->parent->name[0] != 'p')) /* p, pre, param */
 | |
| 		xmlOutputBufferWriteString(buf, "\n");
 | |
| 	}
 | |
| 	return;
 | |
|     }
 | |
|     if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
 | |
| 	(cur->children == NULL)) {
 | |
|         if ((info != NULL) && (info->saveEndTag != 0) &&
 | |
| 	    (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
 | |
| 	    (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
 | |
| 	    xmlOutputBufferWriteString(buf, ">");
 | |
| 	} else {
 | |
| 	    xmlOutputBufferWriteString(buf, "></");
 | |
|             if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 | |
|                 xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 | |
|                 xmlOutputBufferWriteString(buf, ":");
 | |
|             }
 | |
| 	    xmlOutputBufferWriteString(buf, (const char *)cur->name);
 | |
| 	    xmlOutputBufferWriteString(buf, ">");
 | |
| 	}
 | |
| 	if ((format) && (cur->next != NULL) &&
 | |
|             (info != NULL) && (!info->isinline)) {
 | |
| 	    if ((cur->next->type != HTML_TEXT_NODE) &&
 | |
| 		(cur->next->type != HTML_ENTITY_REF_NODE) &&
 | |
| 		(cur->parent != NULL) &&
 | |
| 		(cur->parent->name != NULL) &&
 | |
| 		(cur->parent->name[0] != 'p')) /* p, pre, param */
 | |
| 		xmlOutputBufferWriteString(buf, "\n");
 | |
| 	}
 | |
| 	return;
 | |
|     }
 | |
|     xmlOutputBufferWriteString(buf, ">");
 | |
|     if ((cur->type != XML_ELEMENT_NODE) &&
 | |
| 	(cur->content != NULL)) {
 | |
| 	    /*
 | |
| 	     * Uses the OutputBuffer property to automatically convert
 | |
| 	     * invalids to charrefs
 | |
| 	     */
 | |
| 
 | |
|             xmlOutputBufferWriteString(buf, (const char *) cur->content);
 | |
|     }
 | |
|     if (cur->children != NULL) {
 | |
|         if ((format) && (info != NULL) && (!info->isinline) &&
 | |
| 	    (cur->children->type != HTML_TEXT_NODE) &&
 | |
| 	    (cur->children->type != HTML_ENTITY_REF_NODE) &&
 | |
| 	    (cur->children != cur->last) &&
 | |
| 	    (cur->name != NULL) &&
 | |
| 	    (cur->name[0] != 'p')) /* p, pre, param */
 | |
| 	    xmlOutputBufferWriteString(buf, "\n");
 | |
| 	htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
 | |
|         if ((format) && (info != NULL) && (!info->isinline) &&
 | |
| 	    (cur->last->type != HTML_TEXT_NODE) &&
 | |
| 	    (cur->last->type != HTML_ENTITY_REF_NODE) &&
 | |
| 	    (cur->children != cur->last) &&
 | |
| 	    (cur->name != NULL) &&
 | |
| 	    (cur->name[0] != 'p')) /* p, pre, param */
 | |
| 	    xmlOutputBufferWriteString(buf, "\n");
 | |
|     }
 | |
|     xmlOutputBufferWriteString(buf, "</");
 | |
|     if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
 | |
|         xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
 | |
| 	xmlOutputBufferWriteString(buf, ":");
 | |
|     }
 | |
|     xmlOutputBufferWriteString(buf, (const char *)cur->name);
 | |
|     xmlOutputBufferWriteString(buf, ">");
 | |
|     if ((format) && (info != NULL) && (!info->isinline) &&
 | |
| 	(cur->next != NULL)) {
 | |
|         if ((cur->next->type != HTML_TEXT_NODE) &&
 | |
| 	    (cur->next->type != HTML_ENTITY_REF_NODE) &&
 | |
| 	    (cur->parent != NULL) &&
 | |
| 	    (cur->parent->name != NULL) &&
 | |
| 	    (cur->parent->name[0] != 'p')) /* p, pre, param */
 | |
| 	    xmlOutputBufferWriteString(buf, "\n");
 | |
|     }
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * htmlNodeDumpOutput:
 | |
|  * @buf:  the HTML buffer output
 | |
|  * @doc:  the document
 | |
|  * @cur:  the current node
 | |
|  * @encoding:  the encoding string
 | |
|  *
 | |
|  * Dump an HTML node, recursive behaviour,children are printed too,
 | |
|  * and formatting returns/spaces are added.
 | |
|  */
 | |
| void
 | |
| htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
 | |
| 	           xmlNodePtr cur, const char *encoding) {
 | |
|     htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * htmlDocContentDumpFormatOutput:
 | |
|  * @buf:  the HTML buffer output
 | |
|  * @cur:  the document
 | |
|  * @encoding:  the encoding string
 | |
|  * @format:  should formatting spaces been added
 | |
|  *
 | |
|  * Dump an HTML document.
 | |
|  */
 | |
| void
 | |
| htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
 | |
| 	                       const char *encoding, int format) {
 | |
|     int type;
 | |
| 
 | |
|     xmlInitParser();
 | |
| 
 | |
|     if ((buf == NULL) || (cur == NULL))
 | |
|         return;
 | |
| 
 | |
|     /*
 | |
|      * force to output the stuff as HTML, especially for entities
 | |
|      */
 | |
|     type = cur->type;
 | |
|     cur->type = XML_HTML_DOCUMENT_NODE;
 | |
|     if (cur->intSubset != NULL) {
 | |
|         htmlDtdDumpOutput(buf, cur, NULL);
 | |
|     }
 | |
|     if (cur->children != NULL) {
 | |
|         htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
 | |
|     }
 | |
|     xmlOutputBufferWriteString(buf, "\n");
 | |
|     cur->type = (xmlElementType) type;
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * htmlDocContentDumpOutput:
 | |
|  * @buf:  the HTML buffer output
 | |
|  * @cur:  the document
 | |
|  * @encoding:  the encoding string
 | |
|  *
 | |
|  * Dump an HTML document. Formating return/spaces are added.
 | |
|  */
 | |
| void
 | |
| htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
 | |
| 	                 const char *encoding) {
 | |
|     htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
 | |
| }
 | |
| 
 | |
| /************************************************************************
 | |
|  *									*
 | |
|  *		Saving functions front-ends				*
 | |
|  *									*
 | |
|  ************************************************************************/
 | |
| 
 | |
| /**
 | |
|  * htmlDocDump:
 | |
|  * @f:  the FILE*
 | |
|  * @cur:  the document
 | |
|  *
 | |
|  * Dump an HTML document to an open FILE.
 | |
|  *
 | |
|  * returns: the number of byte written or -1 in case of failure.
 | |
|  */
 | |
| int
 | |
| htmlDocDump(FILE *f, xmlDocPtr cur) {
 | |
|     xmlOutputBufferPtr buf;
 | |
|     xmlCharEncodingHandlerPtr handler = NULL;
 | |
|     const char *encoding;
 | |
|     int ret;
 | |
| 
 | |
|     xmlInitParser();
 | |
| 
 | |
|     if ((cur == NULL) || (f == NULL)) {
 | |
| 	return(-1);
 | |
|     }
 | |
| 
 | |
|     encoding = (const char *) htmlGetMetaEncoding(cur);
 | |
| 
 | |
|     if (encoding != NULL) {
 | |
| 	xmlCharEncoding enc;
 | |
| 
 | |
| 	enc = xmlParseCharEncoding(encoding);
 | |
| 	if (enc != cur->charset) {
 | |
| 	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
 | |
| 		/*
 | |
| 		 * Not supported yet
 | |
| 		 */
 | |
| 		return(-1);
 | |
| 	    }
 | |
| 
 | |
| 	    handler = xmlFindCharEncodingHandler(encoding);
 | |
| 	    if (handler == NULL)
 | |
| 		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
 | |
| 	} else {
 | |
| 	    handler = xmlFindCharEncodingHandler(encoding);
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|     /*
 | |
|      * Fallback to HTML or ASCII when the encoding is unspecified
 | |
|      */
 | |
|     if (handler == NULL)
 | |
| 	handler = xmlFindCharEncodingHandler("HTML");
 | |
|     if (handler == NULL)
 | |
| 	handler = xmlFindCharEncodingHandler("ascii");
 | |
| 
 | |
|     buf = xmlOutputBufferCreateFile(f, handler);
 | |
|     if (buf == NULL) return(-1);
 | |
|     htmlDocContentDumpOutput(buf, cur, NULL);
 | |
| 
 | |
|     ret = xmlOutputBufferClose(buf);
 | |
|     return(ret);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * htmlSaveFile:
 | |
|  * @filename:  the filename (or URL)
 | |
|  * @cur:  the document
 | |
|  *
 | |
|  * Dump an HTML document to a file. If @filename is "-" the stdout file is
 | |
|  * used.
 | |
|  * returns: the number of byte written or -1 in case of failure.
 | |
|  */
 | |
| int
 | |
| htmlSaveFile(const char *filename, xmlDocPtr cur) {
 | |
|     xmlOutputBufferPtr buf;
 | |
|     xmlCharEncodingHandlerPtr handler = NULL;
 | |
|     const char *encoding;
 | |
|     int ret;
 | |
| 
 | |
|     if ((cur == NULL) || (filename == NULL))
 | |
|         return(-1);
 | |
|        
 | |
|     xmlInitParser();
 | |
| 
 | |
|     encoding = (const char *) htmlGetMetaEncoding(cur);
 | |
| 
 | |
|     if (encoding != NULL) {
 | |
| 	xmlCharEncoding enc;
 | |
| 
 | |
| 	enc = xmlParseCharEncoding(encoding);
 | |
| 	if (enc != cur->charset) {
 | |
| 	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
 | |
| 		/*
 | |
| 		 * Not supported yet
 | |
| 		 */
 | |
| 		return(-1);
 | |
| 	    }
 | |
| 
 | |
| 	    handler = xmlFindCharEncodingHandler(encoding);
 | |
| 	    if (handler == NULL)
 | |
| 		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|     /*
 | |
|      * Fallback to HTML or ASCII when the encoding is unspecified
 | |
|      */
 | |
|     if (handler == NULL)
 | |
| 	handler = xmlFindCharEncodingHandler("HTML");
 | |
|     if (handler == NULL)
 | |
| 	handler = xmlFindCharEncodingHandler("ascii");
 | |
| 
 | |
|     /* 
 | |
|      * save the content to a temp buffer.
 | |
|      */
 | |
|     buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
 | |
|     if (buf == NULL) return(0);
 | |
| 
 | |
|     htmlDocContentDumpOutput(buf, cur, NULL);
 | |
| 
 | |
|     ret = xmlOutputBufferClose(buf);
 | |
|     return(ret);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * htmlSaveFileFormat:
 | |
|  * @filename:  the filename
 | |
|  * @cur:  the document
 | |
|  * @format:  should formatting spaces been added
 | |
|  * @encoding: the document encoding
 | |
|  *
 | |
|  * Dump an HTML document to a file using a given encoding.
 | |
|  * 
 | |
|  * returns: the number of byte written or -1 in case of failure.
 | |
|  */
 | |
| int
 | |
| htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
 | |
| 	           const char *encoding, int format) {
 | |
|     xmlOutputBufferPtr buf;
 | |
|     xmlCharEncodingHandlerPtr handler = NULL;
 | |
|     int ret;
 | |
| 
 | |
|     if ((cur == NULL) || (filename == NULL))
 | |
|         return(-1);
 | |
| 
 | |
|     xmlInitParser();
 | |
| 
 | |
|     if (encoding != NULL) {
 | |
| 	xmlCharEncoding enc;
 | |
| 
 | |
| 	enc = xmlParseCharEncoding(encoding);
 | |
| 	if (enc != cur->charset) {
 | |
| 	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
 | |
| 		/*
 | |
| 		 * Not supported yet
 | |
| 		 */
 | |
| 		return(-1);
 | |
| 	    }
 | |
| 
 | |
| 	    handler = xmlFindCharEncodingHandler(encoding);
 | |
| 	    if (handler == NULL)
 | |
| 		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
 | |
| 	}
 | |
|         htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
 | |
|     } else {
 | |
| 	htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
 | |
|     }
 | |
| 
 | |
|     /*
 | |
|      * Fallback to HTML or ASCII when the encoding is unspecified
 | |
|      */
 | |
|     if (handler == NULL)
 | |
| 	handler = xmlFindCharEncodingHandler("HTML");
 | |
|     if (handler == NULL)
 | |
| 	handler = xmlFindCharEncodingHandler("ascii");
 | |
| 
 | |
|     /* 
 | |
|      * save the content to a temp buffer.
 | |
|      */
 | |
|     buf = xmlOutputBufferCreateFilename(filename, handler, 0);
 | |
|     if (buf == NULL) return(0);
 | |
| 
 | |
|     htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
 | |
| 
 | |
|     ret = xmlOutputBufferClose(buf);
 | |
|     return(ret);
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * htmlSaveFileEnc:
 | |
|  * @filename:  the filename
 | |
|  * @cur:  the document
 | |
|  * @encoding: the document encoding
 | |
|  *
 | |
|  * Dump an HTML document to a file using a given encoding
 | |
|  * and formatting returns/spaces are added.
 | |
|  * 
 | |
|  * returns: the number of byte written or -1 in case of failure.
 | |
|  */
 | |
| int
 | |
| htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
 | |
|     return(htmlSaveFileFormat(filename, cur, encoding, 1));
 | |
| }
 | |
| 
 | |
| #endif /* LIBXML_OUTPUT_ENABLED */
 | |
| 
 | |
| #define bottom_HTMLtree
 | |
| #include "elfgcchack.h"
 | |
| #endif /* LIBXML_HTML_ENABLED */
 |