mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-10-21 14:53:44 +03:00
1315 lines
38 KiB
C
1315 lines
38 KiB
C
/*
|
|
* HTMLtree.c : implementation of access function for an HTML tree.
|
|
*
|
|
* See Copyright for the status of this software.
|
|
*
|
|
* Author: Daniel Veillard
|
|
*/
|
|
|
|
|
|
#define IN_LIBXML
|
|
#include "libxml.h"
|
|
#ifdef LIBXML_HTML_ENABLED
|
|
|
|
#include <string.h> /* for memset() only ! */
|
|
#include <ctype.h>
|
|
#include <stdlib.h>
|
|
|
|
#include <libxml/xmlmemory.h>
|
|
#include <libxml/HTMLparser.h>
|
|
#include <libxml/HTMLtree.h>
|
|
#include <libxml/entities.h>
|
|
#include <libxml/xmlerror.h>
|
|
#include <libxml/parserInternals.h>
|
|
#include <libxml/uri.h>
|
|
|
|
#include "private/buf.h"
|
|
#include "private/html.h"
|
|
#include "private/error.h"
|
|
#include "private/html.h"
|
|
#include "private/io.h"
|
|
#include "private/save.h"
|
|
#include "private/tree.h"
|
|
|
|
/************************************************************************
|
|
* *
|
|
* Getting/Setting encoding meta tags *
|
|
* *
|
|
************************************************************************/
|
|
|
|
typedef struct {
|
|
xmlAttrPtr attr; /* charset or content */
|
|
const xmlChar *attrValue;
|
|
htmlMetaEncodingOffsets off;
|
|
} htmlMetaEncoding;
|
|
|
|
static htmlNodePtr
|
|
htmlFindFirstChild(htmlNodePtr parent, const char *name) {
|
|
htmlNodePtr child;
|
|
|
|
for (child = parent->children; child != NULL; child = child->next) {
|
|
if ((child->type == XML_ELEMENT_NODE) &&
|
|
(xmlStrcasecmp(child->name, BAD_CAST name) == 0))
|
|
return(child);
|
|
}
|
|
|
|
return(NULL);
|
|
}
|
|
|
|
static htmlNodePtr
|
|
htmlFindHead(htmlDocPtr doc) {
|
|
htmlNodePtr html;
|
|
|
|
if (doc == NULL)
|
|
return(NULL);
|
|
|
|
html = htmlFindFirstChild((htmlNodePtr) doc, "html");
|
|
if (html == NULL)
|
|
return(NULL);
|
|
|
|
return(htmlFindFirstChild(html, "head"));
|
|
}
|
|
|
|
int
|
|
htmlParseContentType(const xmlChar *val, htmlMetaEncodingOffsets *off) {
|
|
const xmlChar *p = val;
|
|
|
|
while (1) {
|
|
size_t start, end;
|
|
|
|
while ((*p != 'c') && (*p != 'C')) {
|
|
if (*p == 0)
|
|
return(0);
|
|
p += 1;
|
|
}
|
|
p += 1;
|
|
|
|
if (xmlStrncasecmp(p, BAD_CAST "harset", 6) != 0)
|
|
continue;
|
|
|
|
p += 6;
|
|
while (IS_WS_HTML(*p)) p += 1;
|
|
|
|
if (*p != '=')
|
|
continue;
|
|
|
|
p += 1;
|
|
while (IS_WS_HTML(*p)) p += 1;
|
|
|
|
if (*p == 0)
|
|
return(0);
|
|
|
|
if ((*p == '"') || (*p == '\'')) {
|
|
int quote = *p;
|
|
|
|
p += 1;
|
|
while (IS_WS_HTML(*p)) p += 1;
|
|
|
|
start = p - val;
|
|
end = start;
|
|
|
|
while (*p != quote) {
|
|
if (*p == 0)
|
|
return(0);
|
|
if (!IS_WS_HTML(*p))
|
|
end = p + 1 - val;
|
|
p += 1;
|
|
}
|
|
} else {
|
|
start = p - val;
|
|
|
|
while ((*p != 0) && (*p != ';') && (!IS_WS_HTML(*p)))
|
|
p += 1;
|
|
|
|
end = p - val;
|
|
}
|
|
|
|
off->start = start;
|
|
off->end = end;
|
|
off->size = p - val + strlen((char *) p);
|
|
|
|
return(1);
|
|
}
|
|
|
|
return(0);
|
|
}
|
|
|
|
static xmlAttrPtr
|
|
htmlFindMetaEncodingAttr(htmlNodePtr elem, int *outIsContentType) {
|
|
xmlAttrPtr attr, contentAttr = NULL;
|
|
int isContentType = 0;
|
|
|
|
if (xmlStrcasecmp(elem->name, BAD_CAST "meta") != 0)
|
|
return(NULL);
|
|
|
|
for (attr = elem->properties; attr != NULL; attr = attr->next) {
|
|
if (attr->ns != NULL)
|
|
continue;
|
|
if (xmlStrcasecmp(attr->name, BAD_CAST "charset") == 0) {
|
|
*outIsContentType = 0;
|
|
return(attr);
|
|
}
|
|
if (xmlStrcasecmp(attr->name, BAD_CAST "content") == 0)
|
|
contentAttr = attr;
|
|
if ((xmlStrcasecmp(attr->name, BAD_CAST "http-equiv") == 0) &&
|
|
(attr->children != NULL) &&
|
|
(attr->children->type == XML_TEXT_NODE) &&
|
|
(attr->children->next == NULL) &&
|
|
(xmlStrcasecmp(attr->children->content,
|
|
BAD_CAST "Content-Type") == 0))
|
|
isContentType = 1;
|
|
}
|
|
|
|
if ((isContentType) && (contentAttr != NULL)) {
|
|
*outIsContentType = 1;
|
|
return(contentAttr);
|
|
}
|
|
|
|
return(NULL);
|
|
}
|
|
|
|
static int
|
|
htmlParseMetaEncoding(htmlNodePtr elem, htmlMetaEncoding *menc) {
|
|
xmlAttrPtr attr;
|
|
const xmlChar *val = NULL;
|
|
int isContentType;
|
|
|
|
if ((elem->type != XML_ELEMENT_NODE) ||
|
|
(xmlStrcasecmp(elem->name, BAD_CAST "meta") != 0))
|
|
return(0);
|
|
|
|
attr = htmlFindMetaEncodingAttr(elem, &isContentType);
|
|
if (attr == NULL)
|
|
return(0);
|
|
|
|
if ((attr->children != NULL) &&
|
|
(attr->children->type == XML_TEXT_NODE) &&
|
|
(attr->children->next == NULL) &&
|
|
(attr->children->content != NULL))
|
|
val = attr->children->content;
|
|
else
|
|
val = BAD_CAST "";
|
|
|
|
|
|
if (!isContentType) {
|
|
size_t size = strlen((char *) val);
|
|
size_t start = 0;
|
|
size_t end = size;
|
|
|
|
while ((start < size) && (IS_WS_HTML(val[start])))
|
|
start += 1;
|
|
|
|
while ((end > 0) && (IS_WS_HTML(val[end-1])))
|
|
end -= 1;
|
|
|
|
menc->attr = attr;
|
|
menc->attrValue = val;
|
|
menc->off.start = start;
|
|
menc->off.end = end;
|
|
menc->off.size = size;
|
|
|
|
return(1);
|
|
} else {
|
|
if (htmlParseContentType(val, &menc->off)) {
|
|
menc->attr = attr;
|
|
menc->attrValue = val;
|
|
|
|
return(1);
|
|
}
|
|
}
|
|
|
|
return(0);
|
|
}
|
|
|
|
static xmlChar *
|
|
htmlUpdateMetaEncoding(htmlMetaEncoding *menc, const char *encoding) {
|
|
xmlChar *newVal, *p;
|
|
size_t size, oldEncSize, newEncSize;
|
|
|
|
/*
|
|
* The pseudo "HTML" encoding only produces ASCII.
|
|
*/
|
|
if (xmlStrcasecmp(BAD_CAST encoding, BAD_CAST "HTML") == 0)
|
|
encoding = "ASCII";
|
|
|
|
oldEncSize = menc->off.end - menc->off.start;
|
|
newEncSize = strlen((char *) encoding);
|
|
size = menc->off.size - oldEncSize + newEncSize;
|
|
newVal = xmlMalloc(size + 1);
|
|
if (newVal == NULL)
|
|
return(NULL);
|
|
|
|
p = newVal;
|
|
memcpy(p, menc->attrValue, menc->off.start);
|
|
p += menc->off.start;
|
|
memcpy(p, encoding, newEncSize);
|
|
p += newEncSize;
|
|
memcpy(p, menc->attrValue + menc->off.end, menc->off.size - menc->off.end);
|
|
newVal[size] = 0;
|
|
|
|
return(newVal);
|
|
}
|
|
|
|
/**
|
|
* Look up and encoding declaration in the meta tags.
|
|
*
|
|
* The returned string points into attribute content and can contain
|
|
* trailing garbage. It should be copied before modifying or freeing
|
|
* nodes.
|
|
*
|
|
* @param doc the document
|
|
* @returns the encoding ot NULL if not found.
|
|
*/
|
|
const xmlChar *
|
|
htmlGetMetaEncoding(xmlDoc *doc) {
|
|
htmlNodePtr head, node;
|
|
|
|
head = htmlFindHead(doc);
|
|
if (head == NULL)
|
|
return(NULL);
|
|
|
|
for (node = head->children; node != NULL; node = node->next) {
|
|
htmlMetaEncoding menc;
|
|
|
|
if (htmlParseMetaEncoding(node, &menc)) {
|
|
/*
|
|
* Returning a `const xmlChar *` only allows to return
|
|
* a suffix. In http-equiv meta tags, there could be
|
|
* more data after the charset, although it's probably
|
|
* rare in practice.
|
|
*/
|
|
return(menc.attrValue + menc.off.start);
|
|
}
|
|
}
|
|
|
|
return(NULL);
|
|
}
|
|
|
|
/**
|
|
* Creates or updates a meta tag with an encoding declaration.
|
|
*
|
|
* NOTE: This will not change the document content encoding.
|
|
*
|
|
* @param doc the document
|
|
* @param encoding the encoding string
|
|
* @returns 0 in case of success, 1 if no head element was found or
|
|
* arguments are invalid and -1 if memory allocation failed.
|
|
*/
|
|
int
|
|
htmlSetMetaEncoding(xmlDoc *doc, const xmlChar *encoding) {
|
|
htmlNodePtr head, meta;
|
|
int found = 0;
|
|
|
|
if (encoding == NULL)
|
|
return(1);
|
|
|
|
head = htmlFindHead(doc);
|
|
if (head == NULL)
|
|
return(1);
|
|
|
|
for (meta = head->children; meta != NULL; meta = meta->next) {
|
|
htmlMetaEncoding menc;
|
|
|
|
if (htmlParseMetaEncoding(meta, &menc)) {
|
|
xmlChar *newVal;
|
|
int ret;
|
|
|
|
found = 1;
|
|
|
|
newVal = htmlUpdateMetaEncoding(&menc, (char *) encoding);
|
|
if (newVal == NULL)
|
|
return(-1);
|
|
xmlNodeSetContent((xmlNodePtr) menc.attr, NULL);
|
|
ret = xmlNodeAddContent((xmlNodePtr) menc.attr, newVal);
|
|
xmlFree(newVal);
|
|
|
|
if (ret < 0)
|
|
return(-1);
|
|
}
|
|
}
|
|
|
|
if (found)
|
|
return(0);
|
|
|
|
meta = xmlNewDocNode(head->doc, NULL, BAD_CAST "meta", NULL);
|
|
if (meta == NULL)
|
|
return(-1);
|
|
|
|
if (xmlNewProp(meta, BAD_CAST "charset", encoding) == NULL) {
|
|
xmlFreeNode(meta);
|
|
return(-1);
|
|
}
|
|
|
|
if (head->children == NULL)
|
|
xmlAddChild(head, meta);
|
|
else
|
|
xmlAddPrevSibling(head->children, meta);
|
|
|
|
return(0);
|
|
}
|
|
|
|
/**
|
|
* Determine if a given attribute is a boolean attribute. This
|
|
* doesn't handle HTML5.
|
|
*
|
|
* @deprecated Internal function, don't use.
|
|
*
|
|
* @param name the name of the attribute to check
|
|
* @returns false if the attribute is not boolean, true otherwise.
|
|
*/
|
|
int
|
|
htmlIsBooleanAttr(const xmlChar *name)
|
|
{
|
|
const char *str = NULL;
|
|
|
|
if (name == NULL)
|
|
return(0);
|
|
|
|
/*
|
|
* These are the HTML attributes which will be output
|
|
* in minimized form, i.e. `<option selected="selected">` will be
|
|
* output as `<option selected>`, as per XSLT 1.0 16.2 "HTML Output
|
|
* Method":
|
|
*
|
|
* "checked", "compact", "declare", "defer", "disabled", "ismap",
|
|
* "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
|
|
* "selected"
|
|
*
|
|
* Additional attributes from HTML5 (not implemented yet):
|
|
*
|
|
* "allowfullscreen", "alpha", "async", "autofocus", "autoplay",
|
|
* "controls", "default", "formnovalidate", "inert", "itemscope",
|
|
* "loop", "muted", "nomodule", "novalidate", "open", "playsinline",
|
|
* "required", "reversed", "shadowrootdelegatesfocus",
|
|
* "shadowrootclonable", "shadowrootserializable",
|
|
* "shadowrootcustomelementregistry", "truespeed"
|
|
*/
|
|
|
|
switch (name[0] | 0x20) {
|
|
case 'c':
|
|
name += 1;
|
|
switch (name[0] | 0x20) {
|
|
case 'h': str = "ecked"; break;
|
|
case 'o': str = "mpact"; break;
|
|
}
|
|
break;
|
|
case 'd':
|
|
name += 1;
|
|
switch (name[0] | 0x20) {
|
|
case 'e':
|
|
name += 1;
|
|
switch (name[0] | 0x20) {
|
|
case 'c': str = "lare"; break;
|
|
case 'f': str = "er"; break;
|
|
}
|
|
break;
|
|
case 'i': str = "sabled"; break;
|
|
}
|
|
break;
|
|
case 'i':
|
|
str = "smap";
|
|
break;
|
|
case 'm':
|
|
str = "ultiple";
|
|
break;
|
|
case 'n':
|
|
name += 1;
|
|
if ((name[0] | 0x20) != 'o')
|
|
break;
|
|
name += 1;
|
|
switch (name[0] | 0x20) {
|
|
case 'h': str = "ref"; break;
|
|
case 'r': str = "esize"; break;
|
|
case 's': str = "hade"; break;
|
|
case 'w': str = "rap"; break;
|
|
}
|
|
break;
|
|
case 'r':
|
|
str = "eadonly";
|
|
break;
|
|
case 's':
|
|
str = "elected";
|
|
break;
|
|
}
|
|
|
|
if (str == NULL)
|
|
return(0);
|
|
|
|
return(xmlStrcasecmp(name + 1, BAD_CAST str) == 0);
|
|
}
|
|
|
|
#ifdef LIBXML_OUTPUT_ENABLED
|
|
/************************************************************************
|
|
* *
|
|
* Dumping HTML tree content to a simple buffer *
|
|
* *
|
|
************************************************************************/
|
|
|
|
static xmlParserErrors
|
|
htmlFindOutputEncoder(const char *encoding, xmlCharEncodingHandler **out) {
|
|
/*
|
|
* Fallback to HTML if the encoding is unspecified
|
|
*/
|
|
if (encoding == NULL)
|
|
encoding = "HTML";
|
|
|
|
return(xmlOpenCharEncodingHandler(encoding, /* output */ 1, out));
|
|
}
|
|
|
|
/**
|
|
* Serialize an HTML document to an xmlBuf.
|
|
*
|
|
* @param buf the xmlBuf output
|
|
* @param doc the document (unused)
|
|
* @param cur the current node
|
|
* @param format should formatting newlines been added
|
|
* @returns the number of bytes written or -1 in case of error
|
|
*/
|
|
static size_t
|
|
htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc ATTRIBUTE_UNUSED,
|
|
xmlNodePtr cur, int format) {
|
|
size_t use;
|
|
size_t ret;
|
|
xmlOutputBufferPtr outbuf;
|
|
|
|
if (cur == NULL) {
|
|
return ((size_t) -1);
|
|
}
|
|
if (buf == NULL) {
|
|
return ((size_t) -1);
|
|
}
|
|
outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
|
|
if (outbuf == NULL)
|
|
return ((size_t) -1);
|
|
memset(outbuf, 0, sizeof(xmlOutputBuffer));
|
|
outbuf->buffer = buf;
|
|
outbuf->encoder = NULL;
|
|
outbuf->writecallback = NULL;
|
|
outbuf->closecallback = NULL;
|
|
outbuf->context = NULL;
|
|
outbuf->written = 0;
|
|
|
|
use = xmlBufUse(buf);
|
|
htmlNodeDumpInternal(outbuf, cur, NULL, format);
|
|
if (outbuf->error)
|
|
ret = (size_t) -1;
|
|
else
|
|
ret = xmlBufUse(buf) - use;
|
|
xmlFree(outbuf);
|
|
return (ret);
|
|
}
|
|
|
|
/**
|
|
* Serialize an HTML node to an xmlBuffer. Always uses UTF-8.
|
|
*
|
|
* @param buf the HTML buffer output
|
|
* @param doc the document
|
|
* @param cur the current node
|
|
* @returns the number of bytes written or -1 in case of error
|
|
*/
|
|
int
|
|
htmlNodeDump(xmlBuffer *buf, xmlDoc *doc, xmlNode *cur) {
|
|
xmlBufPtr buffer;
|
|
size_t ret1;
|
|
int ret2;
|
|
|
|
if ((buf == NULL) || (cur == NULL))
|
|
return(-1);
|
|
|
|
xmlInitParser();
|
|
buffer = xmlBufFromBuffer(buf);
|
|
if (buffer == NULL)
|
|
return(-1);
|
|
|
|
ret1 = htmlBufNodeDumpFormat(buffer, doc, cur, 1);
|
|
|
|
ret2 = xmlBufBackToBuffer(buffer, buf);
|
|
|
|
if ((ret1 == (size_t) -1) || (ret2 < 0))
|
|
return(-1);
|
|
return(ret1 > INT_MAX ? INT_MAX : ret1);
|
|
}
|
|
|
|
/**
|
|
* Serialize an HTML node to an xmlBuffer.
|
|
*
|
|
* If encoding is NULL, ASCII with HTML 4.0 named character entities
|
|
* will be used. This is inefficient compared to UTF-8 and might be
|
|
* changed in a future version.
|
|
*
|
|
* @param out the FILE pointer
|
|
* @param doc the document (unused)
|
|
* @param cur the current node
|
|
* @param encoding the document encoding (optional)
|
|
* @param format should formatting newlines been added
|
|
* @returns the number of bytes written or -1 in case of failure.
|
|
*/
|
|
int
|
|
htmlNodeDumpFileFormat(FILE *out, xmlDoc *doc ATTRIBUTE_UNUSED,
|
|
xmlNode *cur, const char *encoding, int format) {
|
|
xmlOutputBufferPtr buf;
|
|
xmlCharEncodingHandlerPtr handler;
|
|
int ret;
|
|
|
|
xmlInitParser();
|
|
|
|
/*
|
|
* save the content to a temp buffer.
|
|
*/
|
|
if (htmlFindOutputEncoder(encoding, &handler) != XML_ERR_OK)
|
|
return(-1);
|
|
buf = xmlOutputBufferCreateFile(out, handler);
|
|
if (buf == NULL) {
|
|
xmlCharEncCloseFunc(handler);
|
|
return(-1);
|
|
}
|
|
|
|
htmlNodeDumpInternal(buf, cur, NULL, format);
|
|
|
|
ret = xmlOutputBufferClose(buf);
|
|
return(ret);
|
|
}
|
|
|
|
/**
|
|
* Same as #htmlNodeDumpFileFormat with `format` set to 1 which is
|
|
* typically undesired. Use of this function is DISCOURAGED in favor
|
|
* of #htmlNodeDumpFileFormat.
|
|
*
|
|
* @param out the FILE pointer
|
|
* @param doc the document
|
|
* @param cur the current node
|
|
*/
|
|
void
|
|
htmlNodeDumpFile(FILE *out, xmlDoc *doc, xmlNode *cur) {
|
|
htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
|
|
}
|
|
|
|
/**
|
|
* Serialize an HTML node to a memory, also returning the size of
|
|
* the result. It's up to the caller to free the memory.
|
|
*
|
|
* Uses the encoding of the document. If the document has no
|
|
* encoding, ASCII with HTML 4.0 named character entities will
|
|
* be used. This is inefficient compared to UTF-8 and might be
|
|
* changed in a future version.
|
|
*
|
|
* @param cur the document
|
|
* @param mem OUT: the memory pointer
|
|
* @param size OUT: the memory length
|
|
* @param format should formatting newlines been added
|
|
*/
|
|
void
|
|
htmlDocDumpMemoryFormat(xmlDoc *cur, xmlChar**mem, int *size, int format) {
|
|
xmlOutputBufferPtr buf;
|
|
xmlCharEncodingHandlerPtr handler = NULL;
|
|
|
|
xmlInitParser();
|
|
|
|
if ((mem == NULL) || (size == NULL))
|
|
return;
|
|
*mem = NULL;
|
|
*size = 0;
|
|
if (cur == NULL)
|
|
return;
|
|
|
|
if (htmlFindOutputEncoder((char *) cur->encoding, &handler) != XML_ERR_OK)
|
|
return;
|
|
buf = xmlAllocOutputBuffer(handler);
|
|
if (buf == NULL) {
|
|
xmlCharEncCloseFunc(handler);
|
|
return;
|
|
}
|
|
|
|
htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
|
|
|
|
xmlOutputBufferFlush(buf);
|
|
|
|
if (!buf->error) {
|
|
if (buf->conv != NULL) {
|
|
*size = xmlBufUse(buf->conv);
|
|
*mem = xmlStrndup(xmlBufContent(buf->conv), *size);
|
|
} else {
|
|
*size = xmlBufUse(buf->buffer);
|
|
*mem = xmlStrndup(xmlBufContent(buf->buffer), *size);
|
|
}
|
|
}
|
|
|
|
xmlOutputBufferClose(buf);
|
|
}
|
|
|
|
/**
|
|
* Same as #htmlDocDumpMemoryFormat with `format` set to 1 which
|
|
* is typically undesired. Also see the warnings there. Use of
|
|
* this function is DISCOURAGED in favor of
|
|
* #htmlDocContentDumpFormatOutput.
|
|
*
|
|
* @param cur the document
|
|
* @param mem OUT: the memory pointer
|
|
* @param size OUT: the memory length
|
|
*/
|
|
void
|
|
htmlDocDumpMemory(xmlDoc *cur, xmlChar**mem, int *size) {
|
|
htmlDocDumpMemoryFormat(cur, mem, size, 1);
|
|
}
|
|
|
|
|
|
/************************************************************************
|
|
* *
|
|
* Dumping HTML tree content to an I/O output buffer *
|
|
* *
|
|
************************************************************************/
|
|
|
|
/**
|
|
* Serialize the HTML document's DTD, if any.
|
|
*
|
|
* Ignores `encoding` and uses the encoding of the output buffer.
|
|
*
|
|
* @param buf the HTML buffer output
|
|
* @param doc the document
|
|
* @param encoding the encoding string (unused)
|
|
*/
|
|
static void
|
|
htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
|
|
const char *encoding ATTRIBUTE_UNUSED) {
|
|
xmlDtdPtr cur = doc->intSubset;
|
|
|
|
if (cur == NULL)
|
|
return;
|
|
xmlOutputBufferWrite(buf, 10, "<!DOCTYPE ");
|
|
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
|
if (cur->ExternalID != NULL) {
|
|
xmlOutputBufferWrite(buf, 8, " PUBLIC ");
|
|
xmlOutputBufferWriteQuotedString(buf, cur->ExternalID);
|
|
if (cur->SystemID != NULL) {
|
|
xmlOutputBufferWrite(buf, 1, " ");
|
|
xmlOutputBufferWriteQuotedString(buf, cur->SystemID);
|
|
}
|
|
} else if (cur->SystemID != NULL &&
|
|
xmlStrcmp(cur->SystemID, BAD_CAST "about:legacy-compat")) {
|
|
xmlOutputBufferWrite(buf, 8, " SYSTEM ");
|
|
xmlOutputBufferWriteQuotedString(buf, cur->SystemID);
|
|
}
|
|
xmlOutputBufferWrite(buf, 2, ">\n");
|
|
}
|
|
|
|
static void
|
|
htmlSerializeUri(xmlOutputBufferPtr buf, const xmlChar *content) {
|
|
const xmlChar *tmp = content;
|
|
|
|
/*
|
|
* See appendix "B.2.1 Non-ASCII characters in URI attribute
|
|
* values" in the HTML 4.01 spec. This is also recommended
|
|
* by the HTML output method of the XSLT 1.0 spec.
|
|
*
|
|
* We also escape space and control chars.
|
|
*/
|
|
|
|
/* Skip over initial whitespace */
|
|
while (IS_WS_HTML(*tmp)) tmp++;
|
|
if (tmp > content) {
|
|
xmlOutputBufferWrite(buf, tmp - content, (char *) content);
|
|
content = tmp;
|
|
}
|
|
|
|
while (1) {
|
|
char escbuf[3];
|
|
const char *repl;
|
|
int replSize;
|
|
int c = *tmp;
|
|
|
|
while ((c > 0x20) && (c < 0x7F) && (c != '"') && (c != '&')) {
|
|
tmp += 1;
|
|
c = *tmp;
|
|
}
|
|
|
|
if (tmp > content)
|
|
xmlOutputBufferWrite(buf, tmp - content, (char *) content);
|
|
|
|
if ((c <= 0x20) || (c >= 0x7F)) {
|
|
static const char hex[16] = {
|
|
'0', '1', '2', '3', '4', '5', '6', '7',
|
|
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
|
|
};
|
|
|
|
if (c == 0)
|
|
break;
|
|
|
|
escbuf[0] = '%';
|
|
escbuf[1] = hex[(c >> 4) & 0x0F];
|
|
escbuf[2] = hex[c & 0x0F];
|
|
repl = escbuf;
|
|
replSize = 3;
|
|
} else if (c == '"') {
|
|
repl = """;
|
|
replSize = 6;
|
|
} else {
|
|
repl = "&";
|
|
replSize = 5;
|
|
}
|
|
|
|
xmlOutputBufferWrite(buf, replSize, repl);
|
|
tmp += 1;
|
|
content = tmp;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Serialize an HTML attribute.
|
|
*
|
|
* @param buf the HTML buffer output
|
|
* @param cur the attribute pointer
|
|
*/
|
|
static void
|
|
htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlAttrPtr cur) {
|
|
xmlOutputBufferWrite(buf, 1, " ");
|
|
|
|
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
|
|
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
|
|
xmlOutputBufferWrite(buf, 1, ":");
|
|
}
|
|
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
|
|
|
/*
|
|
* The HTML5 spec requires to always serialize empty attribute
|
|
* values as `=""`. We should probably align with HTML5 at some
|
|
* point.
|
|
*/
|
|
if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
|
|
xmlNodePtr child;
|
|
int isUri;
|
|
|
|
xmlOutputBufferWrite(buf, 2, "=\"");
|
|
|
|
/*
|
|
* Special handling of URIs doesn't conform to HTML5 and
|
|
* should probably be removed at some point.
|
|
*/
|
|
isUri = (cur->ns == NULL) && (cur->parent != NULL) &&
|
|
(cur->parent->ns == NULL) &&
|
|
((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
|
|
(!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
|
|
(!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
|
|
((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
|
|
(!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))));
|
|
|
|
for (child = cur->children; child != NULL; child = child->next) {
|
|
if (child->type == XML_TEXT_NODE) {
|
|
const xmlChar *content = child->content;
|
|
|
|
if (content == NULL)
|
|
continue;
|
|
|
|
if (isUri) {
|
|
htmlSerializeUri(buf, content);
|
|
} else {
|
|
xmlSerializeText(buf, content, SIZE_MAX,
|
|
XML_ESCAPE_HTML | XML_ESCAPE_ATTR);
|
|
}
|
|
} else if (child->type == XML_ENTITY_REF_NODE) {
|
|
/* TODO: We should probably expand entity refs */
|
|
xmlOutputBufferWrite(buf, 1, "&");
|
|
xmlOutputBufferWriteString(buf, (char *) child->name);
|
|
xmlOutputBufferWrite(buf, 1, ";");
|
|
}
|
|
}
|
|
|
|
xmlOutputBufferWrite(buf, 1, "\"");
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Serialize an HTML node to an output buffer.
|
|
*
|
|
* If `encoding` is specified, it is used to create or update meta
|
|
* tags containing the character encoding.
|
|
*
|
|
* @param buf the HTML buffer output
|
|
* @param cur the current node
|
|
* @param encoding the encoding string (optional)
|
|
* @param format should formatting newlines been added
|
|
*/
|
|
void
|
|
htmlNodeDumpInternal(xmlOutputBuffer *buf, xmlNode *cur,
|
|
const char *encoding, int format) {
|
|
xmlNodePtr root, parent, metaHead = NULL;
|
|
xmlAttrPtr attr;
|
|
const htmlElemDesc * info;
|
|
int isRaw = 0;
|
|
|
|
xmlInitParser();
|
|
|
|
if ((cur == NULL) || (buf == NULL)) {
|
|
return;
|
|
}
|
|
|
|
root = cur;
|
|
parent = cur->parent;
|
|
while (1) {
|
|
switch (cur->type) {
|
|
case XML_HTML_DOCUMENT_NODE:
|
|
case XML_DOCUMENT_NODE:
|
|
if (((xmlDocPtr) cur)->intSubset != NULL) {
|
|
htmlDtdDumpOutput(buf, (xmlDocPtr) cur, NULL);
|
|
}
|
|
if (cur->children != NULL) {
|
|
/* Always validate cur->parent when descending. */
|
|
if (cur->parent == parent) {
|
|
parent = cur;
|
|
cur = cur->children;
|
|
continue;
|
|
}
|
|
} else {
|
|
xmlOutputBufferWrite(buf, 1, "\n");
|
|
}
|
|
break;
|
|
|
|
case XML_ELEMENT_NODE: {
|
|
htmlMetaEncoding menc;
|
|
int isMeta = 0;
|
|
int addMeta = 0;
|
|
|
|
/*
|
|
* Some users like lxml are known to pass nodes with a corrupted
|
|
* tree structure. Fall back to a recursive call to handle this
|
|
* case.
|
|
*/
|
|
if ((cur->parent != parent) && (cur->children != NULL)) {
|
|
htmlNodeDumpInternal(buf, cur, encoding, format);
|
|
break;
|
|
}
|
|
|
|
/*
|
|
* Get specific HTML info for that node.
|
|
*/
|
|
if (cur->ns == NULL)
|
|
info = htmlTagLookup(cur->name);
|
|
else
|
|
info = NULL;
|
|
|
|
if (encoding != NULL) {
|
|
isMeta = htmlParseMetaEncoding(cur, &menc);
|
|
|
|
/*
|
|
* Don't add meta tag for "HTML" encoding.
|
|
*/
|
|
if ((xmlStrcasecmp(BAD_CAST encoding,
|
|
BAD_CAST "HTML") != 0) &&
|
|
(xmlStrcasecmp(cur->name, BAD_CAST "head") == 0) &&
|
|
(parent != NULL) &&
|
|
(xmlStrcasecmp(parent->name, BAD_CAST "html") == 0) &&
|
|
(parent->parent != NULL) &&
|
|
(parent->parent->parent == NULL) &&
|
|
(metaHead == NULL)) {
|
|
xmlNodePtr n;
|
|
|
|
metaHead = cur;
|
|
addMeta = 1;
|
|
|
|
for (n = cur->children; n != NULL; n = n->next) {
|
|
int unused;
|
|
|
|
if (htmlFindMetaEncodingAttr(n, &unused) != NULL) {
|
|
metaHead = NULL;
|
|
addMeta = 0;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
xmlOutputBufferWrite(buf, 1, "<");
|
|
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
|
|
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
|
|
xmlOutputBufferWrite(buf, 1, ":");
|
|
}
|
|
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
|
if (cur->nsDef)
|
|
xmlNsListDumpOutput(buf, cur->nsDef);
|
|
attr = cur->properties;
|
|
while (attr != NULL) {
|
|
if ((!isMeta) || (attr != menc.attr)) {
|
|
htmlAttrDumpOutput(buf, attr);
|
|
} else {
|
|
xmlOutputBufferWrite(buf, 1, " ");
|
|
xmlOutputBufferWriteString(buf, (char *) attr->name);
|
|
|
|
xmlOutputBufferWrite(buf, 2, "=\"");
|
|
xmlSerializeText(buf, menc.attrValue, menc.off.start,
|
|
XML_ESCAPE_HTML | XML_ESCAPE_ATTR);
|
|
xmlSerializeText(buf, BAD_CAST encoding, SIZE_MAX,
|
|
XML_ESCAPE_HTML | XML_ESCAPE_ATTR);
|
|
xmlSerializeText(buf, menc.attrValue + menc.off.end,
|
|
menc.off.size - menc.off.end,
|
|
XML_ESCAPE_HTML | XML_ESCAPE_ATTR);
|
|
xmlOutputBufferWrite(buf, 1, "\"");
|
|
}
|
|
attr = attr->next;
|
|
}
|
|
|
|
if ((info != NULL) && (info->empty)) {
|
|
xmlOutputBufferWrite(buf, 1, ">");
|
|
} else if (cur->children == NULL) {
|
|
if (addMeta) {
|
|
xmlOutputBufferWrite(buf, 16, "><meta charset=\"");
|
|
xmlSerializeText(buf, BAD_CAST encoding, SIZE_MAX,
|
|
XML_ESCAPE_HTML | XML_ESCAPE_ATTR);
|
|
xmlOutputBufferWrite(buf, 4, "\"></");
|
|
} else {
|
|
xmlOutputBufferWrite(buf, 3, "></");
|
|
}
|
|
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
|
|
xmlOutputBufferWriteString(buf,
|
|
(const char *)cur->ns->prefix);
|
|
xmlOutputBufferWrite(buf, 1, ":");
|
|
}
|
|
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
|
xmlOutputBufferWrite(buf, 1, ">");
|
|
} else {
|
|
xmlOutputBufferWrite(buf, 1, ">");
|
|
if ((format) &&
|
|
((addMeta) ||
|
|
((info != NULL) && (!info->isinline) &&
|
|
(cur->children->type != HTML_TEXT_NODE) &&
|
|
(cur->children->type != HTML_ENTITY_REF_NODE) &&
|
|
(cur->children != cur->last) &&
|
|
(cur->name != NULL) &&
|
|
(cur->name[0] != 'p')))) /* p, pre, param */
|
|
xmlOutputBufferWrite(buf, 1, "\n");
|
|
if (addMeta) {
|
|
xmlOutputBufferWrite(buf, 15, "<meta charset=\"");
|
|
xmlSerializeText(buf, BAD_CAST encoding, SIZE_MAX,
|
|
XML_ESCAPE_HTML | XML_ESCAPE_ATTR);
|
|
xmlOutputBufferWrite(buf, 2, "\">");
|
|
if ((format) &&
|
|
(cur->children->type != HTML_TEXT_NODE) &&
|
|
(cur->children->type != HTML_ENTITY_REF_NODE))
|
|
xmlOutputBufferWrite(buf, 1, "\n");
|
|
}
|
|
|
|
if ((info != NULL) && (info->dataMode >= DATA_RAWTEXT))
|
|
isRaw = 1;
|
|
|
|
parent = cur;
|
|
cur = cur->children;
|
|
continue;
|
|
}
|
|
|
|
if ((format) && (cur->next != NULL) &&
|
|
(info != NULL) && (!info->isinline)) {
|
|
if ((cur->next->type != HTML_TEXT_NODE) &&
|
|
(cur->next->type != HTML_ENTITY_REF_NODE) &&
|
|
(parent != NULL) &&
|
|
(parent->name != NULL) &&
|
|
(parent->name[0] != 'p')) /* p, pre, param */
|
|
xmlOutputBufferWrite(buf, 1, "\n");
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case XML_ATTRIBUTE_NODE:
|
|
htmlAttrDumpOutput(buf, (xmlAttrPtr) cur);
|
|
break;
|
|
|
|
case HTML_TEXT_NODE:
|
|
if (cur->content == NULL)
|
|
break;
|
|
if ((cur->name == (const xmlChar *)xmlStringTextNoenc) ||
|
|
(isRaw)) {
|
|
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
|
} else {
|
|
xmlSerializeText(buf, cur->content, SIZE_MAX, XML_ESCAPE_HTML);
|
|
}
|
|
break;
|
|
|
|
case HTML_COMMENT_NODE:
|
|
if (cur->content != NULL) {
|
|
xmlOutputBufferWrite(buf, 4, "<!--");
|
|
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
|
xmlOutputBufferWrite(buf, 3, "-->");
|
|
}
|
|
break;
|
|
|
|
case HTML_PI_NODE:
|
|
if (cur->name != NULL) {
|
|
xmlOutputBufferWrite(buf, 2, "<?");
|
|
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
|
if (cur->content != NULL) {
|
|
xmlOutputBufferWrite(buf, 1, " ");
|
|
xmlOutputBufferWriteString(buf,
|
|
(const char *)cur->content);
|
|
}
|
|
xmlOutputBufferWrite(buf, 1, ">");
|
|
}
|
|
break;
|
|
|
|
case HTML_ENTITY_REF_NODE:
|
|
xmlOutputBufferWrite(buf, 1, "&");
|
|
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
|
xmlOutputBufferWrite(buf, 1, ";");
|
|
break;
|
|
|
|
case HTML_PRESERVE_NODE:
|
|
if (cur->content != NULL) {
|
|
xmlOutputBufferWriteString(buf, (const char *)cur->content);
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
while (1) {
|
|
if (cur == root)
|
|
return;
|
|
if (cur->next != NULL) {
|
|
cur = cur->next;
|
|
break;
|
|
}
|
|
|
|
isRaw = 0;
|
|
|
|
cur = parent;
|
|
/* cur->parent was validated when descending. */
|
|
parent = cur->parent;
|
|
|
|
if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
|
|
(cur->type == XML_DOCUMENT_NODE)) {
|
|
xmlOutputBufferWrite(buf, 1, "\n");
|
|
} else {
|
|
if ((format) && (cur->ns == NULL))
|
|
info = htmlTagLookup(cur->name);
|
|
else
|
|
info = NULL;
|
|
|
|
if ((format) && (info != NULL) && (!info->isinline) &&
|
|
(cur->last->type != HTML_TEXT_NODE) &&
|
|
(cur->last->type != HTML_ENTITY_REF_NODE) &&
|
|
((cur->children != cur->last) || (cur == metaHead)) &&
|
|
(cur->name != NULL) &&
|
|
(cur->name[0] != 'p')) /* p, pre, param */
|
|
xmlOutputBufferWrite(buf, 1, "\n");
|
|
|
|
xmlOutputBufferWrite(buf, 2, "</");
|
|
if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
|
|
xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
|
|
xmlOutputBufferWrite(buf, 1, ":");
|
|
}
|
|
xmlOutputBufferWriteString(buf, (const char *)cur->name);
|
|
xmlOutputBufferWrite(buf, 1, ">");
|
|
|
|
if ((format) && (info != NULL) && (!info->isinline) &&
|
|
(cur->next != NULL)) {
|
|
if ((cur->next->type != HTML_TEXT_NODE) &&
|
|
(cur->next->type != HTML_ENTITY_REF_NODE) &&
|
|
(parent != NULL) &&
|
|
(parent->name != NULL) &&
|
|
(parent->name[0] != 'p')) /* p, pre, param */
|
|
xmlOutputBufferWrite(buf, 1, "\n");
|
|
}
|
|
|
|
if (cur == metaHead)
|
|
metaHead = NULL;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Serialize an HTML node to an output buffer.
|
|
*
|
|
* @param buf the HTML buffer output
|
|
* @param doc the document (unused)
|
|
* @param cur the current node
|
|
* @param encoding the encoding string (unused)
|
|
* @param format should formatting newlines been added
|
|
*/
|
|
void
|
|
htmlNodeDumpFormatOutput(xmlOutputBuffer *buf,
|
|
xmlDoc *doc ATTRIBUTE_UNUSED, xmlNode *cur,
|
|
const char *encoding ATTRIBUTE_UNUSED, int format) {
|
|
htmlNodeDumpInternal(buf, cur, NULL, format);
|
|
}
|
|
|
|
/**
|
|
* Same as #htmlNodeDumpFormatOutput with `format` set to 1 which is
|
|
* typically undesired. Use of this function is DISCOURAGED in favor
|
|
* of #htmlNodeDumpFormatOutput.
|
|
*
|
|
* @param buf the HTML buffer output
|
|
* @param doc the document (unused)
|
|
* @param cur the current node
|
|
* @param encoding the encoding string (unused)
|
|
*/
|
|
void
|
|
htmlNodeDumpOutput(xmlOutputBuffer *buf, xmlDoc *doc ATTRIBUTE_UNUSED,
|
|
xmlNode *cur, const char *encoding ATTRIBUTE_UNUSED) {
|
|
htmlNodeDumpInternal(buf, cur, NULL, 1);
|
|
}
|
|
|
|
/**
|
|
* Serialize an HTML document to an output buffer.
|
|
*
|
|
* @param buf the HTML buffer output
|
|
* @param cur the document
|
|
* @param encoding the encoding string (unused)
|
|
* @param format should formatting newlines been added
|
|
*/
|
|
void
|
|
htmlDocContentDumpFormatOutput(xmlOutputBuffer *buf, xmlDoc *cur,
|
|
const char *encoding ATTRIBUTE_UNUSED,
|
|
int format) {
|
|
htmlNodeDumpInternal(buf, (xmlNodePtr) cur, NULL, format);
|
|
}
|
|
|
|
/**
|
|
* Same as #htmlDocContentDumpFormatOutput with `format` set to 1
|
|
* which is typically undesired. Use of this function is DISCOURAGED
|
|
* in favor of #htmlDocContentDumpFormatOutput.
|
|
*
|
|
* @param buf the HTML buffer output
|
|
* @param cur the document
|
|
* @param encoding the encoding string (unused)
|
|
*/
|
|
void
|
|
htmlDocContentDumpOutput(xmlOutputBuffer *buf, xmlDoc *cur,
|
|
const char *encoding ATTRIBUTE_UNUSED) {
|
|
htmlNodeDumpInternal(buf, (xmlNodePtr) cur, NULL, 1);
|
|
}
|
|
|
|
/************************************************************************
|
|
* *
|
|
* Saving functions front-ends *
|
|
* *
|
|
************************************************************************/
|
|
|
|
/**
|
|
* Serialize an HTML document to an open `FILE`.
|
|
*
|
|
* Uses the encoding of the document. If the document has no
|
|
* encoding, ASCII with HTML 4.0 named character entities will
|
|
* be used. This is inefficient compared to UTF-8 and might be
|
|
* changed in a future version.
|
|
*
|
|
* Enables "formatting" unconditionally which is typically
|
|
* undesired.
|
|
*
|
|
* Use of this function is DISCOURAGED in favor of
|
|
* #htmlNodeDumpFileFormat.
|
|
*
|
|
* @param f the FILE*
|
|
* @param cur the document
|
|
* @returns the number of bytes written or -1 in case of failure.
|
|
*/
|
|
int
|
|
htmlDocDump(FILE *f, xmlDoc *cur) {
|
|
xmlOutputBufferPtr buf;
|
|
xmlCharEncodingHandlerPtr handler = NULL;
|
|
int ret;
|
|
|
|
xmlInitParser();
|
|
|
|
if ((cur == NULL) || (f == NULL)) {
|
|
return(-1);
|
|
}
|
|
|
|
if (htmlFindOutputEncoder((char *) cur->encoding, &handler) != XML_ERR_OK)
|
|
return(-1);
|
|
buf = xmlOutputBufferCreateFile(f, handler);
|
|
if (buf == NULL) {
|
|
xmlCharEncCloseFunc(handler);
|
|
return(-1);
|
|
}
|
|
htmlDocContentDumpOutput(buf, cur, NULL);
|
|
|
|
ret = xmlOutputBufferClose(buf);
|
|
return(ret);
|
|
}
|
|
|
|
/**
|
|
* Serialize an HTML document to a file.
|
|
*
|
|
* Same as #htmlSaveFileFormat with `encoding` set to NULL and
|
|
* `format` set to 1 which is typically undesired.
|
|
*
|
|
* Use of this function is DISCOURAGED in favor of
|
|
* #htmlSaveFileFormat.
|
|
*
|
|
* @param filename the filename (or URL)
|
|
* @param cur the document
|
|
* @returns the number of bytes written or -1 in case of failure.
|
|
*/
|
|
int
|
|
htmlSaveFile(const char *filename, xmlDoc *cur) {
|
|
return(htmlSaveFileFormat(filename, cur, NULL, 1));
|
|
}
|
|
|
|
/**
|
|
* Serialize an HTML document to a file using a given encoding.
|
|
*
|
|
* If `filename` is `"-"`, stdout is used. This is potentially
|
|
* insecure and might be changed in a future version.
|
|
*
|
|
* If encoding is NULL, ASCII with HTML 4.0 named character entities
|
|
* will be used. This is inefficient compared to UTF-8 and might be
|
|
* changed in a future version.
|
|
*
|
|
* Sets or updates meta tags containing the character encoding.
|
|
*
|
|
* @param filename the filename
|
|
* @param cur the document
|
|
* @param format should formatting newlines been added
|
|
* @param encoding the document encoding (optional)
|
|
* @returns the number of bytes written or -1 in case of failure.
|
|
*/
|
|
int
|
|
htmlSaveFileFormat(const char *filename, xmlDoc *cur,
|
|
const char *encoding, int format) {
|
|
xmlOutputBufferPtr buf;
|
|
xmlCharEncodingHandlerPtr handler = NULL;
|
|
int ret;
|
|
|
|
if ((cur == NULL) || (filename == NULL))
|
|
return(-1);
|
|
|
|
xmlInitParser();
|
|
|
|
if (htmlFindOutputEncoder(encoding, &handler) != XML_ERR_OK)
|
|
return(-1);
|
|
|
|
/*
|
|
* save the content to a temp buffer.
|
|
*/
|
|
buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
|
|
if (buf == NULL) {
|
|
xmlCharEncCloseFunc(handler);
|
|
return(0);
|
|
}
|
|
|
|
htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
|
|
|
|
ret = xmlOutputBufferClose(buf);
|
|
return(ret);
|
|
}
|
|
|
|
/**
|
|
* Serialize an HTML document to a file.
|
|
*
|
|
* Same as #htmlSaveFileFormat with `format` set to 1 which is
|
|
* typically undesired. Also see the warnings there. Use of this
|
|
* function is DISCOURAGED in favor of #htmlSaveFileFormat.
|
|
*
|
|
* @param filename the filename
|
|
* @param cur the document
|
|
* @param encoding the document encoding
|
|
* @returns the number of bytes written or -1 in case of failure.
|
|
*/
|
|
int
|
|
htmlSaveFileEnc(const char *filename, xmlDoc *cur, const char *encoding) {
|
|
return(htmlSaveFileFormat(filename, cur, encoding, 1));
|
|
}
|
|
|
|
#endif /* LIBXML_OUTPUT_ENABLED */
|
|
|
|
#endif /* LIBXML_HTML_ENABLED */
|