mirror of
				https://gitlab.gnome.org/GNOME/libxml2.git
				synced 2025-10-26 00:37:43 +03:00 
			
		
		
		
	doc: Misc fixes to HTML tree docs
This commit is contained in:
		
							
								
								
									
										163
									
								
								HTMLtree.c
									
									
									
									
									
								
							
							
						
						
									
										163
									
								
								HTMLtree.c
									
									
									
									
									
								
							| @@ -37,9 +37,15 @@ | ||||
| /** | ||||
|  * @param doc  the document | ||||
|  * | ||||
|  * Encoding definition lookup in the Meta tags | ||||
|  * Look up and encoding declaration in the meta tags. | ||||
|  * | ||||
|  * @returns the current encoding as flagged in the HTML source | ||||
|  * Does not support `<meta charset="">` yet. Only supports deprecated | ||||
|  * `<meta http-equiv="Content-Type" content="">`. | ||||
|  * | ||||
|  * The returned string points into attribute content. It should be | ||||
|  * copied before modifying or freeing nodes. | ||||
|  * | ||||
|  * @returns the encoding ot NULL if not found. | ||||
|  */ | ||||
| const xmlChar * | ||||
| htmlGetMetaEncoding(htmlDocPtr doc) { | ||||
| @@ -148,9 +154,12 @@ found_content: | ||||
|  * @param doc  the document | ||||
|  * @param encoding  the encoding string | ||||
|  * | ||||
|  * Sets the current encoding in the Meta tags | ||||
|  * NOTE: this will not change the document content encoding, just | ||||
|  * the META flag associated. | ||||
|  * Creates or updates a meta tag with an encoding declaration. | ||||
|  * | ||||
|  * Does not support `<meta charset="">` yet. Only supports deprecated | ||||
|  * `<meta http-equiv="Content-Type" content="">`. | ||||
|  * | ||||
|  * NOTE: This will not change the document content encoding. | ||||
|  * | ||||
|  * @returns 0 in case of success and -1 in case of error | ||||
|  */ | ||||
| @@ -307,7 +316,8 @@ static const char* const htmlBooleanAttrs[] = { | ||||
|  * | ||||
|  * @deprecated Internal function, don't use. | ||||
|  * | ||||
|  * Determine if a given attribute is a boolean attribute. | ||||
|  * Determine if a given attribute is a boolean attribute. This | ||||
|  * doesn't handle HTML5. | ||||
|  * | ||||
|  * @returns false if the attribute is not boolean, true otherwise. | ||||
|  */ | ||||
| @@ -346,11 +356,11 @@ htmlFindOutputEncoder(const char *encoding, xmlCharEncodingHandler **out) { | ||||
|  * @param buf  the xmlBufPtr output | ||||
|  * @param doc  the document | ||||
|  * @param cur  the current node | ||||
|  * @param format  should formatting spaces been added | ||||
|  * @param format  should formatting newlines been added | ||||
|  * | ||||
|  * Dump an HTML node, recursive behaviour,children are printed too. | ||||
|  * Serialize an HTML document to an xmlBuf. | ||||
|  * | ||||
|  * @returns the number of byte written or -1 in case of error | ||||
|  * @returns the number of bytes written or -1 in case of error | ||||
|  */ | ||||
| static size_t | ||||
| htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur, | ||||
| @@ -391,10 +401,9 @@ htmlBufNodeDumpFormat(xmlBufPtr buf, xmlDocPtr doc, xmlNodePtr cur, | ||||
|  * @param doc  the document | ||||
|  * @param cur  the current node | ||||
|  * | ||||
|  * Dump an HTML node, recursive behaviour,children are printed too, | ||||
|  * and formatting returns are added. | ||||
|  * Serialize an HTML node to an xmlBuffer. Always uses UTF-8. | ||||
|  * | ||||
|  * @returns the number of byte written or -1 in case of error | ||||
|  * @returns the number of bytes written or -1 in case of error | ||||
|  */ | ||||
| int | ||||
| htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { | ||||
| @@ -423,14 +432,16 @@ htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) { | ||||
|  * @param out  the FILE pointer | ||||
|  * @param doc  the document | ||||
|  * @param cur  the current node | ||||
|  * @param encoding  the document encoding | ||||
|  * @param format  should formatting spaces been added | ||||
|  * @param encoding  the document encoding (optional) | ||||
|  * @param format  should formatting newlines been added | ||||
|  * | ||||
|  * Dump an HTML node, recursive behaviour,children are printed too. | ||||
|  * Serialize an HTML node to an xmlBuffer. | ||||
|  * | ||||
|  * TODO: if encoding == NULL try to save in the doc encoding | ||||
|  * If encoding is NULL, ASCII with HTML 4.0 named character entities | ||||
|  * will be used. This is inefficient compared to UTF-8 and might be | ||||
|  * changed in a future version. | ||||
|  * | ||||
|  * @returns the number of byte written or -1 in case of failure. | ||||
|  * @returns the number of bytes written or -1 in case of failure. | ||||
|  */ | ||||
| int | ||||
| htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, | ||||
| @@ -461,8 +472,9 @@ htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc, | ||||
|  * @param doc  the document | ||||
|  * @param cur  the current node | ||||
|  * | ||||
|  * Dump an HTML node, recursive behaviour,children are printed too, | ||||
|  * and formatting returns are added. | ||||
|  * Same as htmlNodeDumpFileFormat() with `format` set to 1 which is | ||||
|  * typically undesired. Use of this function is DISCOURAGED in favor | ||||
|  * of htmlNodeDumpFileFormat(). | ||||
|  */ | ||||
| void | ||||
| htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) { | ||||
| @@ -473,10 +485,19 @@ htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) { | ||||
|  * @param cur  the document | ||||
|  * @param mem  OUT: the memory pointer | ||||
|  * @param size  OUT: the memory length | ||||
|  * @param format  should formatting spaces been added | ||||
|  * @param format  should formatting newlines been added | ||||
|  * | ||||
|  * Dump an HTML document in memory and return the xmlChar * and it's size. | ||||
|  * It's up to the caller to free the memory. | ||||
|  * Serialize an HTML node to a memory, also returning the size of | ||||
|  * the result. It's up to the caller to free the memory. | ||||
|  * | ||||
|  * WARNING: Uses the encoding from a deprecated meta tag, see | ||||
|  * htmlGetMetaEncoding(). This is typically undesired. If no such | ||||
|  * tag was found, ASCII with HTML 4.0 named character entities will | ||||
|  * be used. This is inefficient compared to UTF-8 and might be | ||||
|  * changed in a future version. | ||||
|  * | ||||
|  * Use of this function is therefore DISCOURAGED in favor of | ||||
|  * htmlDocContentDumpFormatOutput(). | ||||
|  */ | ||||
| void | ||||
| htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) { | ||||
| @@ -522,12 +543,14 @@ htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) { | ||||
|  * @param mem  OUT: the memory pointer | ||||
|  * @param size  OUT: the memory length | ||||
|  * | ||||
|  * Dump an HTML document in memory and return the xmlChar * and it's size. | ||||
|  * It's up to the caller to free the memory. | ||||
|  * Same as htmlDocDumpMemoryFormat() with `format` set to 1 which | ||||
|  * is typically undesired. Also see the warnings there. Use of | ||||
|  * this function is DISCOURAGED in favor of | ||||
|  * htmlDocContentDumpFormatOutput(). | ||||
|  */ | ||||
| void | ||||
| htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { | ||||
| 	htmlDocDumpMemoryFormat(cur, mem, size, 1); | ||||
|     htmlDocDumpMemoryFormat(cur, mem, size, 1); | ||||
| } | ||||
|  | ||||
|  | ||||
| @@ -540,11 +563,11 @@ htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) { | ||||
| /** | ||||
|  * @param buf  the HTML buffer output | ||||
|  * @param doc  the document | ||||
|  * @param encoding  the encoding string | ||||
|  * @param encoding  the encoding string (unused) | ||||
|  * | ||||
|  * TODO: check whether encoding is needed | ||||
|  * Serialize the HTML document's DTD, if any. | ||||
|  * | ||||
|  * Dump the HTML document DTD, if any. | ||||
|  * Ignores `encoding` and uses the encoding of the output buffer. | ||||
|  */ | ||||
| static void | ||||
| htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, | ||||
| @@ -575,7 +598,7 @@ htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, | ||||
|  * @param doc  the document | ||||
|  * @param cur  the attribute pointer | ||||
|  * | ||||
|  * Dump an HTML attribute | ||||
|  * Serialize an HTML attribute. | ||||
|  */ | ||||
| static void | ||||
| htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) { | ||||
| @@ -644,9 +667,11 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur) { | ||||
|  * @param doc  the document | ||||
|  * @param cur  the current node | ||||
|  * @param encoding  the encoding string (unused) | ||||
|  * @param format  should formatting spaces been added | ||||
|  * @param format  should formatting newlines been added | ||||
|  * | ||||
|  * Dump an HTML node, recursive behaviour,children are printed too. | ||||
|  * Serialize an HTML node to an output buffer. | ||||
|  * | ||||
|  * Ignores `encoding` and uses the encoding of the output buffer. | ||||
|  */ | ||||
| void | ||||
| htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, | ||||
| @@ -879,8 +904,11 @@ htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, | ||||
|  * @param cur  the current node | ||||
|  * @param encoding  the encoding string (unused) | ||||
|  * | ||||
|  * Dump an HTML node, recursive behaviour,children are printed too, | ||||
|  * and formatting returns/spaces are added. | ||||
|  * Same as htmlNodeDumpFormatOutput() with `format` set to 1 which is | ||||
|  * typically undesired. Use of this function is DISCOURAGED in favor | ||||
|  * of htmlNodeDumpFormatOutput(). | ||||
|  * | ||||
|  * Ignores `encoding` and uses the encoding of the output buffer. | ||||
|  */ | ||||
| void | ||||
| htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, | ||||
| @@ -892,9 +920,11 @@ htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, | ||||
|  * @param buf  the HTML buffer output | ||||
|  * @param cur  the document | ||||
|  * @param encoding  the encoding string (unused) | ||||
|  * @param format  should formatting spaces been added | ||||
|  * @param format  should formatting newlines been added | ||||
|  * | ||||
|  * Dump an HTML document. | ||||
|  * Serialize an HTML document to an output buffer. | ||||
|  * | ||||
|  * Ignores `encoding` and uses the encoding of the output buffer. | ||||
|  */ | ||||
| void | ||||
| htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, | ||||
| @@ -915,7 +945,11 @@ htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, | ||||
|  * @param cur  the document | ||||
|  * @param encoding  the encoding string (unused) | ||||
|  * | ||||
|  * Dump an HTML document. Formatting return/spaces are added. | ||||
|  * Same as htmlNodeDumpFormatOutput() with `format` set to 1 which is | ||||
|  * typically undesired. Use of this function is DISCOURAGED in favor | ||||
|  * of htmlDocContentDumpFormatOutput(). | ||||
|  * | ||||
|  * Ignores `encoding` and uses the encoding of the output buffer. | ||||
|  */ | ||||
| void | ||||
| htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, | ||||
| @@ -933,9 +967,21 @@ htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur, | ||||
|  * @param f  the FILE* | ||||
|  * @param cur  the document | ||||
|  * | ||||
|  * Dump an HTML document to an open FILE. | ||||
|  * Serialize an HTML document to an open `FILE`. | ||||
|  * | ||||
|  * @returns the number of byte written or -1 in case of failure. | ||||
|  * WARNING: Uses the encoding from a deprecated meta tag, see | ||||
|  * htmlGetMetaEncoding(). This is typically undesired. If no such | ||||
|  * tag was found, ASCII with HTML 4.0 named character entities will | ||||
|  * be used. This is inefficient compared to UTF-8 and might be | ||||
|  * changed in a future version. | ||||
|  * | ||||
|  * Also enables "formatting" unconditionally which is typically | ||||
|  * undesired. | ||||
|  * | ||||
|  * Use of this function is DISCOURAGED in favor of | ||||
|  * htmlNodeDumpFileFormat(). | ||||
|  * | ||||
|  * @returns the number of bytes written or -1 in case of failure. | ||||
|  */ | ||||
| int | ||||
| htmlDocDump(FILE *f, xmlDocPtr cur) { | ||||
| @@ -966,9 +1012,23 @@ htmlDocDump(FILE *f, xmlDocPtr cur) { | ||||
|  * @param filename  the filename (or URL) | ||||
|  * @param cur  the document | ||||
|  * | ||||
|  * Dump an HTML document to a file. If `filename` is "-" the stdout file is | ||||
|  * used. | ||||
|  * @returns the number of byte written or -1 in case of failure. | ||||
|  * Serialize an HTML document to a file. If `filename` is `"-"`, | ||||
|  * stdout is used. This is potentially insecure and might be | ||||
|  * changed in a future version. | ||||
|  * | ||||
|  * WARNING: Uses the encoding from a deprecated meta tag, see | ||||
|  * htmlGetMetaEncoding(). This is typically undesired. If no such | ||||
|  * tag was found, ASCII with HTML 4.0 named character entities will | ||||
|  * be used. This is inefficient compared to UTF-8 and might be | ||||
|  * changed in a future version. | ||||
|  * | ||||
|  * Also enables "formatting" unconditionally which is typically | ||||
|  * undesired. | ||||
|  * | ||||
|  * Use of this function is DISCOURAGED in favor of | ||||
|  * htmlSaveFileFormat(). | ||||
|  * | ||||
|  * @returns the number of bytes written or -1 in case of failure. | ||||
|  */ | ||||
| int | ||||
| htmlSaveFile(const char *filename, xmlDocPtr cur) { | ||||
| @@ -998,12 +1058,18 @@ htmlSaveFile(const char *filename, xmlDocPtr cur) { | ||||
| /** | ||||
|  * @param filename  the filename | ||||
|  * @param cur  the document | ||||
|  * @param format  should formatting spaces been added | ||||
|  * @param encoding  the document encoding | ||||
|  * @param format  should formatting newlines been added | ||||
|  * @param encoding  the document encoding (optional) | ||||
|  * | ||||
|  * Dump an HTML document to a file using a given encoding. | ||||
|  * Serialize an HTML document to a file using a given encoding. | ||||
|  * If `filename` is `"-"`, stdout is used. This is potentially | ||||
|  * insecure and might be changed in a future version. | ||||
|  * | ||||
|  * @returns the number of byte written or -1 in case of failure. | ||||
|  * If encoding is NULL, ASCII with HTML 4.0 named character entities | ||||
|  * will be used. This is inefficient compared to UTF-8 and might be | ||||
|  * changed in a future version. | ||||
|  * | ||||
|  * @returns the number of bytes written or -1 in case of failure. | ||||
|  */ | ||||
| int | ||||
| htmlSaveFileFormat(const char *filename, xmlDocPtr cur, | ||||
| @@ -1042,10 +1108,11 @@ htmlSaveFileFormat(const char *filename, xmlDocPtr cur, | ||||
|  * @param cur  the document | ||||
|  * @param encoding  the document encoding | ||||
|  * | ||||
|  * Dump an HTML document to a file using a given encoding | ||||
|  * and formatting returns/spaces are added. | ||||
|  * Same as htmlSaveFileFormat() with `format` set to 1 which is | ||||
|  * typically undesired. Also see the warnings there. Use of this | ||||
|  * function is DISCOURAGED in favor of htmlSaveFileFormat(). | ||||
|  * | ||||
|  * @returns the number of byte written or -1 in case of failure. | ||||
|  * @returns the number of bytes written or -1 in case of failure. | ||||
|  */ | ||||
| int | ||||
| htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) { | ||||
|   | ||||
| @@ -1,10 +1,10 @@ | ||||
| /** | ||||
|  * @file | ||||
|  *  | ||||
|  * @brief specific APIs to process HTML tree, especially serialization | ||||
|  * @brief HTML documents | ||||
|  *  | ||||
|  * this module implements a few function needed to process | ||||
|  *              tree in an HTML specific way. | ||||
|  * This modules implements functions to work with HTML documents, | ||||
|  * most of them related to serialization. | ||||
|  * | ||||
|  * @copyright See Copyright for the status of this software. | ||||
|  * | ||||
| @@ -25,31 +25,11 @@ | ||||
| extern "C" { | ||||
| #endif | ||||
|  | ||||
|  | ||||
| /** | ||||
|  * Macro. A text node in a HTML document is really implemented | ||||
|  * the same way as a text node in an XML document. | ||||
|  */ | ||||
| /* Deprecated */ | ||||
| #define HTML_TEXT_NODE		XML_TEXT_NODE | ||||
| /** | ||||
|  * Macro. An entity reference in a HTML document is really implemented | ||||
|  * the same way as an entity reference in an XML document. | ||||
|  */ | ||||
| #define HTML_ENTITY_REF_NODE	XML_ENTITY_REF_NODE | ||||
| /** | ||||
|  * Macro. A comment in a HTML document is really implemented | ||||
|  * the same way as a comment in an XML document. | ||||
|  */ | ||||
| #define HTML_COMMENT_NODE	XML_COMMENT_NODE | ||||
| /** | ||||
|  * Macro. A preserved node in a HTML document is really implemented | ||||
|  * the same way as a CDATA section in an XML document. | ||||
|  */ | ||||
| #define HTML_PRESERVE_NODE	XML_CDATA_SECTION_NODE | ||||
| /** | ||||
|  * Macro. A processing instruction in a HTML document is really implemented | ||||
|  * the same way as a processing instruction in an XML document. | ||||
|  */ | ||||
| #define HTML_PI_NODE		XML_PI_NODE | ||||
|  | ||||
| XMLPUBFUN htmlDocPtr | ||||
| @@ -73,16 +53,25 @@ XMLPUBFUN void | ||||
| 					 xmlChar **mem, | ||||
| 					 int *size, | ||||
| 					 int format); | ||||
| XMLPUBFUN int | ||||
| 		htmlDocDump		(FILE *f, | ||||
| 					 xmlDocPtr cur); | ||||
| XMLPUBFUN int | ||||
| 		htmlSaveFile		(const char *filename, | ||||
| 					 xmlDocPtr cur); | ||||
| XMLPUBFUN int | ||||
| 		htmlSaveFileEnc		(const char *filename, | ||||
| 					 xmlDocPtr cur, | ||||
| 					 const char *encoding); | ||||
| XMLPUBFUN int | ||||
| 		htmlSaveFileFormat	(const char *filename, | ||||
| 					 xmlDocPtr cur, | ||||
| 					 const char *encoding, | ||||
| 					 int format); | ||||
| XMLPUBFUN int | ||||
| 		htmlNodeDump		(xmlBufferPtr buf, | ||||
| 					 xmlDocPtr doc, | ||||
| 					 xmlNodePtr cur); | ||||
| XMLPUBFUN int | ||||
| 		htmlDocDump		(FILE *f, | ||||
| 					 xmlDocPtr cur); | ||||
| XMLPUBFUN void | ||||
| 		htmlNodeDumpFile	(FILE *out, | ||||
| 					 xmlDocPtr doc, | ||||
| @@ -93,16 +82,12 @@ XMLPUBFUN int | ||||
| 					 xmlNodePtr cur, | ||||
| 					 const char *encoding, | ||||
| 					 int format); | ||||
| XMLPUBFUN int | ||||
| 		htmlSaveFileEnc		(const char *filename, | ||||
| 					 xmlDocPtr cur, | ||||
| 					 const char *encoding); | ||||
| XMLPUBFUN int | ||||
| 		htmlSaveFileFormat	(const char *filename, | ||||
| 					 xmlDocPtr cur, | ||||
| 					 const char *encoding, | ||||
| 					 int format); | ||||
|  | ||||
| XMLPUBFUN void | ||||
| 		htmlNodeDumpOutput	(xmlOutputBufferPtr buf, | ||||
| 					 xmlDocPtr doc, | ||||
| 					 xmlNodePtr cur, | ||||
| 					 const char *encoding); | ||||
| XMLPUBFUN void | ||||
| 		htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, | ||||
| 					 xmlDocPtr doc, | ||||
| @@ -118,11 +103,6 @@ XMLPUBFUN void | ||||
| 					 xmlDocPtr cur, | ||||
| 					 const char *encoding, | ||||
| 					 int format); | ||||
| XMLPUBFUN void | ||||
| 		htmlNodeDumpOutput	(xmlOutputBufferPtr buf, | ||||
| 					 xmlDocPtr doc, | ||||
| 					 xmlNodePtr cur, | ||||
| 					 const char *encoding); | ||||
|  | ||||
| #endif /* LIBXML_OUTPUT_ENABLED */ | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user