diff --git a/ChangeLog b/ChangeLog index b0cb0312..5443e2c4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +Wed Nov 27 20:36:08 CET 2002 Daniel Veillard + + * HTMLtree.c include/libxml/HTMLtree.h: applied the same kind + of refactoring to the HTML saving code. + * doc/libxml2-*.xml doc/API*.html: slight API changes got reflected + in the doc. + Wed Nov 27 12:40:16 CET 2002 Daniel Veillard * tree.c include/libxml/tree.h: refactored the XML dump of a node diff --git a/HTMLtree.c b/HTMLtree.c index 7650184b..58c12ae7 100644 --- a/HTMLtree.c +++ b/HTMLtree.c @@ -11,6 +11,8 @@ #include "libxml.h" #ifdef LIBXML_HTML_ENABLED +#include /* for memset() only ! */ + #ifdef HAVE_CTYPE_H #include #endif @@ -319,146 +321,10 @@ htmlIsBooleanAttr(const xmlChar *name) * * ************************************************************************/ -static void -htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format); -static void +static int htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format); -/** - * htmlDtdDump: - * @buf: the HTML buffer output - * @doc: the document - * - * Dump the HTML document DTD, if any. - */ -static void -htmlDtdDump(xmlBufferPtr buf, xmlDocPtr doc) { - xmlDtdPtr cur = doc->intSubset; - - if (cur == NULL) { - xmlGenericError(xmlGenericErrorContext, - "htmlDtdDump : no internal subset\n"); - return; - } - xmlBufferWriteChar(buf, "name); - if (cur->ExternalID != NULL) { - xmlBufferWriteChar(buf, " PUBLIC "); - xmlBufferWriteQuotedString(buf, cur->ExternalID); - if (cur->SystemID != NULL) { - xmlBufferWriteChar(buf, " "); - xmlBufferWriteQuotedString(buf, cur->SystemID); - } - } else if (cur->SystemID != NULL) { - xmlBufferWriteChar(buf, " SYSTEM "); - xmlBufferWriteQuotedString(buf, cur->SystemID); - } - xmlBufferWriteChar(buf, ">\n"); -} - -/** - * htmlAttrDump: - * @buf: the HTML buffer output - * @doc: the document - * @cur: the attribute pointer - * - * Dump an HTML attribute - */ -static void -htmlAttrDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) { - xmlChar *value; - - /* - * TODO: The html output method should not escape a & character - * occurring in an attribute value immediately followed by - * a { character (see Section B.7.1 of the HTML 4.0 Recommendation). - */ - - if (cur == NULL) { - xmlGenericError(xmlGenericErrorContext, - "htmlAttrDump : property == NULL\n"); - return; - } - xmlBufferWriteChar(buf, " "); - xmlBufferWriteCHAR(buf, cur->name); - if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) { - value = xmlNodeListGetString(doc, cur->children, 0); - if (value) { - xmlBufferWriteChar(buf, "="); - if ((!xmlStrcasecmp(cur->name, BAD_CAST "href")) || - (!xmlStrcasecmp(cur->name, BAD_CAST "src"))) { - xmlChar *escaped; - xmlChar *tmp = value; - - while (IS_BLANK(*tmp)) tmp++; - - escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&"); - if (escaped != NULL) { - xmlBufferWriteQuotedString(buf, escaped); - xmlFree(escaped); - } else { - xmlBufferWriteQuotedString(buf, value); - } - } else { - xmlBufferWriteQuotedString(buf, value); - } - xmlFree(value); - } else { - xmlBufferWriteChar(buf, "=\"\""); - } - } -} - -/** - * htmlAttrListDump: - * @buf: the HTML buffer output - * @doc: the document - * @cur: the first attribute pointer - * - * Dump a list of HTML attributes - */ -static void -htmlAttrListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, int format) { - int i = 0; - - if (cur == NULL) { - xmlGenericError(xmlGenericErrorContext, - "htmlAttrListDump : property == NULL\n"); - return; - } - while (cur != NULL) { - i++; - if ((format) && (i >= 5)) { - i = 0; - xmlBufferWriteChar(buf, "\n"); - } - htmlAttrDump(buf, doc, cur); - cur = cur->next; - } -} - -/** - * htmlNodeListDump: - * @buf: the HTML buffer output - * @doc: the document - * @cur: the first node - * - * Dump an HTML node list, recursive behaviour,children are printed too. - */ -static void -htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format) { - if (cur == NULL) { - xmlGenericError(xmlGenericErrorContext, - "htmlNodeListDump : node == NULL\n"); - return; - } - while (cur != NULL) { - htmlNodeDumpFormat(buf, doc, cur, format); - cur = cur->next; - } -} - /** * htmlNodeDumpFormat: * @buf: the HTML buffer output @@ -467,150 +333,41 @@ htmlNodeListDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format) { * @format: should formatting spaces been added * * Dump an HTML node, recursive behaviour,children are printed too. + * + * Returns the number of byte written or -1 in case of error */ -static void +static int htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, int format) { - const htmlElemDesc * info; + unsigned int use; + int ret; + xmlOutputBufferPtr outbuf; if (cur == NULL) { - xmlGenericError(xmlGenericErrorContext, - "htmlNodeDumpFormat : node == NULL\n"); - return; + return (-1); } - /* - * Special cases. - */ - if (cur->type == XML_DTD_NODE) - return; - if (cur->type == XML_HTML_DOCUMENT_NODE) { - htmlDocContentDump(buf, (xmlDocPtr) cur, format); - return; + if (buf == NULL) { + return (-1); } - if (cur->type == HTML_TEXT_NODE) { - if (cur->content != NULL) { - if (((cur->name == (const xmlChar *)xmlStringText) || - (cur->name != (const xmlChar *)xmlStringTextNoenc)) && - ((cur->parent == NULL) || - ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) && - (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) { - xmlChar *buffer; + outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer)); + if (outbuf == NULL) { + xmlGenericError(xmlGenericErrorContext, + "htmlNodeDumpFormat: out of memory!\n"); + return (-1); + } + memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer)); + outbuf->buffer = buf; + outbuf->encoder = NULL; + outbuf->writecallback = NULL; + outbuf->closecallback = NULL; + outbuf->context = NULL; + outbuf->written = 0; - buffer = xmlEncodeEntitiesReentrant(doc, cur->content); - if (buffer != NULL) { - xmlBufferWriteCHAR(buf, buffer); - xmlFree(buffer); - } - } else { - xmlBufferWriteCHAR(buf, cur->content); - } - } - return; - } - if (cur->type == HTML_COMMENT_NODE) { - if (cur->content != NULL) { - xmlBufferWriteChar(buf, ""); - } - return; - } - if (cur->type == HTML_PI_NODE) { - if (cur->name == NULL) - return; - xmlBufferWriteChar(buf, "name); - if (cur->content != NULL) { - xmlBufferWriteChar(buf, " "); - xmlBufferWriteCHAR(buf, cur->content); - } - xmlBufferWriteChar(buf, ">"); - return; - } - if (cur->type == HTML_ENTITY_REF_NODE) { - xmlBufferWriteChar(buf, "&"); - xmlBufferWriteCHAR(buf, cur->name); - xmlBufferWriteChar(buf, ";"); - return; - } - if (cur->type == HTML_PRESERVE_NODE) { - if (cur->content != NULL) { - xmlBufferWriteCHAR(buf, cur->content); - } - return; - } - - /* - * Get specific HTML info for that node. - */ - info = htmlTagLookup(cur->name); - - xmlBufferWriteChar(buf, "<"); - xmlBufferWriteCHAR(buf, cur->name); - if (cur->properties != NULL) - htmlAttrListDump(buf, doc, cur->properties, format); - - if ((info != NULL) && (info->empty)) { - xmlBufferWriteChar(buf, ">"); - if ((format) && (info != NULL) && (!info->isinline) && - (cur->next != NULL)) { - if ((cur->next->type != HTML_TEXT_NODE) && - (cur->next->type != HTML_ENTITY_REF_NODE)) - xmlBufferWriteChar(buf, "\n"); - } - return; - } - if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) && - (cur->children == NULL)) { - if ((info != NULL) && (info->saveEndTag != 0) && - (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) && - (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) { - xmlBufferWriteChar(buf, ">"); - } else { - xmlBufferWriteChar(buf, ">name); - xmlBufferWriteChar(buf, ">"); - } - if ((format) && (info != NULL) && (!info->isinline) && - (cur->next != NULL)) { - if ((cur->next->type != HTML_TEXT_NODE) && - (cur->next->type != HTML_ENTITY_REF_NODE)) - xmlBufferWriteChar(buf, "\n"); - } - return; - } - xmlBufferWriteChar(buf, ">"); - if ((cur->type != XML_ELEMENT_NODE) && (cur->content != NULL)) { - xmlChar *buffer; - - buffer = xmlEncodeEntitiesReentrant(doc, cur->content); - if (buffer != NULL) { - xmlBufferWriteCHAR(buf, buffer); - xmlFree(buffer); - } - } - if (cur->children != NULL) { - if ((format) && (info != NULL) && (!info->isinline) && - (cur->children->type != HTML_TEXT_NODE) && - (cur->children->type != HTML_ENTITY_REF_NODE) && - (cur->children != cur->last)) - xmlBufferWriteChar(buf, "\n"); - htmlNodeListDump(buf, doc, cur->children, format); - if ((format) && (info != NULL) && (!info->isinline) && - (cur->last->type != HTML_TEXT_NODE) && - (cur->last->type != HTML_ENTITY_REF_NODE) && - (cur->children != cur->last)) - xmlBufferWriteChar(buf, "\n"); - } - xmlBufferWriteChar(buf, "name); - xmlBufferWriteChar(buf, ">"); - if ((format) && (info != NULL) && (!info->isinline) && - (cur->next != NULL)) { - if ((cur->next->type != HTML_TEXT_NODE) && - (cur->next->type != HTML_ENTITY_REF_NODE)) - xmlBufferWriteChar(buf, "\n"); - } + use = buf->use; + htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format); + xmlFree(outbuf); + ret = buf->use - use; + return (ret); } /** @@ -621,10 +378,12 @@ htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur, * * Dump an HTML node, recursive behaviour,children are printed too, * and formatting returns are added. + * + * Returns the number of byte written or -1 in case of error */ -void +int htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { - htmlNodeDumpFormat(buf, doc, cur, 1); + return(htmlNodeDumpFormat(buf, doc, cur, 1)); } /** @@ -693,36 +452,6 @@ htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) { htmlNodeDumpFileFormat(out, doc, cur, NULL, 1); } -/** - * htmlDocContentDump: - * @buf: the HTML buffer output - * @cur: the document - * - * Dump an HTML document. - */ -static void -htmlDocContentDump(xmlBufferPtr buf, xmlDocPtr cur, int format) { - int type; - - /* - * force to output the stuff as HTML, especially for entities - */ - type = cur->type; - cur->type = XML_HTML_DOCUMENT_NODE; - if (cur->intSubset != NULL) - htmlDtdDump(buf, cur); - else { - /* Default to HTML-4.0 transitional @@@@ */ - xmlBufferWriteChar(buf, ""); - - } - if (cur->children != NULL) { - htmlNodeListDump(buf, cur, cur->children, format); - } - xmlBufferWriteChar(buf, "\n"); - cur->type = (xmlElementType) type; -} - /** * htmlDocDumpMemory: * @cur: the document diff --git a/doc/APIchunk19.html b/doc/APIchunk19.html index b44afa1a..4d5597e7 100644 --- a/doc/APIchunk19.html +++ b/doc/APIchunk19.html @@ -813,6 +813,37 @@ A:link, A:visited, A:active { text-decoration: underline } xmlUTF8Strloc xmlUTF8Strpos +
provided
+
+docbCreateFileParserCtxt +docbParseFile +docbSAXParseDoc +docbSAXParseFile +ftpDataCallback +ftpListCallback +htmlCreateFileParserCtxt +htmlNewDocNoDtD +htmlParseFile +htmlSAXParseDoc +htmlSAXParseFile +xmlAutomataNewCountedTrans +xmlC14NDocSaveTo +xmlC14NExecute +xmlCreateEntityParserCtxt +xmlCreateFileParserCtxt +xmlHashFree +xmlKeepBlanksDefault +xmlListWalker +xmlNanoHTTPFetch +xmlOutputBufferCreateFilename +xmlParseFile +xmlParserInputBufferCreateFilename +xmlRecoverFile +xmlSAXParseFile +xmlSAXParseFileWithData +xmlSAXUserParseFile +xmlShellValidate +
providing
INPUT_CHUNK diff --git a/doc/APIfiles.html b/doc/APIfiles.html index b6c4a550..209d6110 100644 --- a/doc/APIfiles.html +++ b/doc/APIfiles.html @@ -1117,6 +1117,8 @@ A:link, A:visited, A:active { text-decoration: underline } close getenv strerror +snprintf +vsnprintf perror

diff --git a/doc/libxml2-api.xml b/doc/libxml2-api.xml index 12c626ae..8b33f46e 100644 --- a/doc/libxml2-api.xml +++ b/doc/libxml2-api.xml @@ -330,6 +330,8 @@ + + @@ -3040,6 +3042,8 @@ + + called when the document start being processed. @@ -3097,6 +3101,8 @@ + + Display and format a warning messages, callback. @@ -5742,7 +5748,7 @@ Dump an XML node, recursive behaviour,children are printed too. Note that format = 1 provide node indenting only if xmlIndentTreeOutput = 1 or xmlKeepBlanksDefault(0) was called - + diff --git a/doc/libxml2-refs.xml b/doc/libxml2-refs.xml index 194272bd..dfc0fba9 100644 --- a/doc/libxml2-refs.xml +++ b/doc/libxml2-refs.xml @@ -8402,6 +8402,8 @@ + + @@ -18799,6 +18801,36 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/include/libxml/HTMLtree.h b/include/libxml/HTMLtree.h index 600b0efb..30ca7f22 100644 --- a/include/libxml/HTMLtree.h +++ b/include/libxml/HTMLtree.h @@ -70,7 +70,7 @@ int htmlDocDump (FILE *f, xmlDocPtr cur); int htmlSaveFile (const char *filename, xmlDocPtr cur); -void htmlNodeDump (xmlBufferPtr buf, +int htmlNodeDump (xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur); void htmlNodeDumpFile (FILE *out,