Module HTMLparser from libxml2

Module HTMLparser from libxml2

API Menu

API Indexes

Related links

Typedef xmlParserNodeInfo htmlParserNodeInfo

int	htmlIsScriptAttribute	(const xmlChar * name)

int	htmlHandleOmittedElem	(int val)

htmlDocPtr	htmlCtxtReadDoc	(htmlParserCtxtPtr ctxt, 
				 const xmlChar * cur, 
				 const char * URL, 
				 const char * encoding, 
				 int options)

Typedef xmlNodePtr htmlNodePtr

Typedef xmlParserCtxtPtr htmlParserCtxtPtr

htmlDocPtr	htmlParseFile	(const char * filename, 
				 const char * encoding)

htmlDocPtr	htmlReadIO	(xmlInputReadCallback ioread, 
				 xmlInputCloseCallback ioclose, 
				 void * ioctx, 
				 const char * URL, 
				 const char * encoding, 
				 int options)

int	htmlAutoCloseTag	(htmlDocPtr doc, 
				 const xmlChar * name, 
				 htmlNodePtr elem)

int	htmlParseChunk	(htmlParserCtxtPtr ctxt, 
				 const char * chunk, 
				 int size, 
				 int terminate)

const htmlElemDesc *	htmlTagLookup	(const xmlChar * tag)

htmlParserCtxtPtr	htmlCreateMemoryParserCtxt	(const char * buffer, 
				 int size)

void	htmlCtxtReset	(htmlParserCtxtPtr ctxt)

Typedef xmlSAXHandler htmlSAXHandler

int	htmlElementAllowedHere	(const htmlElemDesc * parent, 
				 const xmlChar * elt)

htmlDocPtr	htmlCtxtReadIO	(htmlParserCtxtPtr ctxt, 
				 xmlInputReadCallback ioread, 
				 xmlInputCloseCallback ioclose, 
				 void * ioctx, 
				 const char * URL, 
				 const char * encoding, 
				 int options)

Typedef xmlSAXHandlerPtr htmlSAXHandlerPtr

Enum htmlStatus {
    HTML_NA = 0 : something we don't check at all
    HTML_INVALID = 1
    HTML_DEPRECATED = 2
    HTML_VALID = 4
    HTML_REQUIRED = 12 : VALID bit set so ( & HTML_VALID ) is TRUE
}

htmlParserCtxtPtr	htmlCreatePushParserCtxt	(htmlSAXHandlerPtr sax, 
				 void * user_data, 
				 const char * chunk, 
				 int size, 
				 const char * filename, 
				 xmlCharEncoding enc)

htmlDocPtr	htmlReadMemory	(const char * buffer, 
				 int size, 
				 const char * URL, 
				 const char * encoding, 
				 int options)

int	htmlIsAutoClosed	(htmlDocPtr doc, 
				 htmlNodePtr elem)

Typedef xmlDocPtr htmlDocPtr

htmlDocPtr	htmlReadDoc	(const xmlChar * cur, 
				 const char * URL, 
				 const char * encoding, 
				 int options)

Enum htmlParserOption {
    HTML_PARSE_NOERROR = 32 : suppress error reports
    HTML_PARSE_NOWARNING = 64 : suppress warning reports
    HTML_PARSE_PEDANTIC = 128 : pedantic error reporting
    HTML_PARSE_NOBLANKS = 256 : remove blank nodes
    HTML_PARSE_NONET = 2048 : Forbid network access
}

Typedef htmlEntityDesc * htmlEntityDescPtr

int	htmlEncodeEntities	(unsigned char * out, 
				 int * outlen, 
				 const unsigned char * in, 
				 int * inlen, 
				 int quoteChar)

Typedef xmlParserCtxt htmlParserCtxt

htmlStatus	htmlNodeStatus	(const htmlNodePtr node, 
				 int legacy)

htmlStatus	htmlAttrAllowed	(const htmlElemDesc * elt, 
				 const xmlChar * attr, 
				 int legacy)

#define htmlDefaultSubelement

int	htmlParseCharRef	(htmlParserCtxtPtr ctxt)

htmlDocPtr	htmlSAXParseFile	(const char * filename, 
				 const char * encoding, 
				 htmlSAXHandlerPtr sax, 
				 void * userData)

const htmlEntityDesc *	htmlParseEntityRef	(htmlParserCtxtPtr ctxt, 
				 const xmlChar ** str)

Typedef xmlParserInput htmlParserInput

htmlStatus	htmlElementStatusHere	(const htmlElemDesc * parent, 
				 const htmlElemDesc * elt)

const htmlEntityDesc *	htmlEntityValueLookup	(unsigned int value)

void	htmlParseElement	(htmlParserCtxtPtr ctxt)

int	UTF8ToHtml	(unsigned char * out, 
				 int * outlen, 
				 const unsigned char * in, 
				 int * inlen)

#define htmlRequiredAttrs

const htmlEntityDesc *	htmlEntityLookup	(const xmlChar * name)

void	htmlFreeParserCtxt	(htmlParserCtxtPtr ctxt)

htmlDocPtr	htmlCtxtReadMemory	(htmlParserCtxtPtr ctxt, 
				 const char * buffer, 
				 int size, 
				 const char * URL, 
				 const char * encoding, 
				 int options)

htmlDocPtr	htmlCtxtReadFd	(htmlParserCtxtPtr ctxt, 
				 int fd, 
				 const char * URL, 
				 const char * encoding, 
				 int options)

Structure htmlEntityDesc
struct _htmlEntityDesc {
    unsigned int	value	: the UNICODE value for the character
    const char *	name	: The entity name
    const char *	desc	: the description
}

#define htmlElementAllowedHereDesc

htmlDocPtr	htmlReadFile	(const char * filename, 
				 const char * encoding, 
				 int options)

htmlDocPtr	htmlCtxtReadFile	(htmlParserCtxtPtr ctxt, 
				 const char * filename, 
				 const char * encoding, 
				 int options)

Typedef htmlElemDesc * htmlElemDescPtr

Structure htmlElemDesc
struct _htmlElemDesc {
    const char *	name	: The tag name
    char	startTag	: Whether the start tag can be implied
    char	endTag	: Whether the end tag can be implied
    char	saveEndTag	: Whether the end tag should be saved
    char	empty	: Is this an empty element ?
    char	depr	: Is this a deprecated element ?
    char	dtd	: 1: only in Loose DTD, 2: only Frameset one
    char	isinline	: is this a block 0 or inline 1 element
    const char *	desc	: the description NRK Jan.2003 * New fields encapsu
    const char **	subelts	: allowed sub-elements of this element
    const char *	defaultsubelt	: subelement for suggested auto-repair if necessary
    const char **	attrs_opt	: Optional Attributes
    const char **	attrs_depr	: Additional deprecated attributes
    const char **	attrs_req	: Required attributes
}

htmlDocPtr	htmlSAXParseDoc	(xmlChar * cur, 
				 const char * encoding, 
				 htmlSAXHandlerPtr sax, 
				 void * userData)

int	htmlCtxtUseOptions	(htmlParserCtxtPtr ctxt, 
				 int options)

Typedef xmlParserInputPtr htmlParserInputPtr

htmlDocPtr	htmlReadFd	(int fd, 
				 const char * URL, 
				 const char * encoding, 
				 int options)

htmlDocPtr	htmlParseDoc	(xmlChar * cur, 
				 const char * encoding)

int	htmlParseDocument	(htmlParserCtxtPtr ctxt)

Description

Function: htmlIsScriptAttribute

int	htmlIsScriptAttribute	(const xmlChar * name)

Check if an attribute is of content type Script

`name`:	an attribute name
`Returns`:	1 is the attribute is a script 0 otherwise

Function: htmlHandleOmittedElem

int	htmlHandleOmittedElem	(int val)

Set and return the previous value for handling HTML omitted tags.

`val`:	int 0 or 1
`Returns`:	the last value for 0 for no handling, 1 for auto insertion.

Function: htmlCtxtReadDoc

htmlDocPtr	htmlCtxtReadDoc	(htmlParserCtxtPtr ctxt, 
				 const xmlChar * cur, 
				 const char * URL, 
				 const char * encoding, 
				 int options)

parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context

`ctxt`:	an HTML parser context
`cur`:	a pointer to a zero terminated string
`URL`:	the base URL to use for the document
`encoding`:	the document encoding, or NULL
`options`:	a combination of htmlParserOption(s)
`Returns`:	the resulting document tree

Function: htmlParseFile

htmlDocPtr	htmlParseFile	(const char * filename, 
				 const char * encoding)

parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time.

`filename`:	the filename
`encoding`:	a free form C string describing the HTML document encoding, or NULL
`Returns`:	the resulting document tree

Function: htmlReadIO

htmlDocPtr	htmlReadIO	(xmlInputReadCallback ioread, 
				 xmlInputCloseCallback ioclose, 
				 void * ioctx, 
				 const char * URL, 
				 const char * encoding, 
				 int options)

parse an HTML document from I/O functions and source and build a tree.

`ioread`:	an I/O read function
`ioclose`:	an I/O close function
`ioctx`:	an I/O handler
`URL`:	the base URL to use for the document
`encoding`:	the document encoding, or NULL
`options`:	a combination of htmlParserOption(s)
`Returns`:	the resulting document tree

Function: htmlAutoCloseTag

int	htmlAutoCloseTag	(htmlDocPtr doc, 
				 const xmlChar * name, 
				 htmlNodePtr elem)

The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if the element or one of it's children would autoclose the given tag.

`doc`:	the HTML document
`name`:	The tag name
`elem`:	the HTML element
`Returns`:	1 if autoclose, 0 otherwise

Function: htmlParseChunk

int	htmlParseChunk	(htmlParserCtxtPtr ctxt, 
				 const char * chunk, 
				 int size, 
				 int terminate)

Parse a Chunk of memory

`ctxt`:	an HTML parser context
`chunk`:	an char array
`size`:	the size in byte of the chunk
`terminate`:	last chunk indicator
`Returns`:	zero if no error, the xmlParserErrors otherwise.

Function: htmlTagLookup

const htmlElemDesc *	htmlTagLookup	(const xmlChar * tag)

Lookup the HTML tag in the ElementTable

`tag`:	The tag name in lowercase
`Returns`:	the related htmlElemDescPtr or NULL if not found.

Function: htmlCreateMemoryParserCtxt

htmlParserCtxtPtr	htmlCreateMemoryParserCtxt	(const char * buffer, 
				 int size)

Create a parser context for an HTML in-memory document.

`buffer`:	a pointer to a char array
`size`:	the size of the array
`Returns`:	the new parser context or NULL

Function: htmlCtxtReset

void	htmlCtxtReset	(htmlParserCtxtPtr ctxt)

Reset a parser context

ctxt: an HTML parser context

Function: htmlElementAllowedHere

int	htmlElementAllowedHere	(const htmlElemDesc * parent, 
				 const xmlChar * elt)

Checks whether an HTML element may be a direct child of a parent element. Note - doesn't check for deprecated elements

`parent`:	HTML parent element
`elt`:	HTML element
`Returns`:	1 if allowed; 0 otherwise.

Function: htmlCtxtReadIO

htmlDocPtr	htmlCtxtReadIO	(htmlParserCtxtPtr ctxt, 
				 xmlInputReadCallback ioread, 
				 xmlInputCloseCallback ioclose, 
				 void * ioctx, 
				 const char * URL, 
				 const char * encoding, 
				 int options)

parse an HTML document from I/O functions and source and build a tree. This reuses the existing @ctxt parser context

`ctxt`:	an HTML parser context
`ioread`:	an I/O read function
`ioclose`:	an I/O close function
`ioctx`:	an I/O handler
`URL`:	the base URL to use for the document
`encoding`:	the document encoding, or NULL
`options`:	a combination of htmlParserOption(s)
`Returns`:	the resulting document tree

Function: htmlCreatePushParserCtxt

htmlParserCtxtPtr	htmlCreatePushParserCtxt	(htmlSAXHandlerPtr sax, 
				 void * user_data, 
				 const char * chunk, 
				 int size, 
				 const char * filename, 
				 xmlCharEncoding enc)

Create a parser context for using the HTML parser in push mode The value of @filename is used for fetching external entities and error/warning reports.

`sax`:	a SAX handler
`user_data`:	The user data returned on SAX callbacks
`chunk`:	a pointer to an array of chars
`size`:	number of chars in the array
`filename`:	an optional file name or URI
`enc`:	an optional encoding
`Returns`:	the new parser context or NULL

Function: htmlReadMemory

htmlDocPtr	htmlReadMemory	(const char * buffer, 
				 int size, 
				 const char * URL, 
				 const char * encoding, 
				 int options)

parse an XML in-memory document and build a tree.

`buffer`:	a pointer to a char array
`size`:	the size of the array
`URL`:	the base URL to use for the document
`encoding`:	the document encoding, or NULL
`options`:	a combination of htmlParserOption(s)
`Returns`:	the resulting document tree

Function: htmlIsAutoClosed

int	htmlIsAutoClosed	(htmlDocPtr doc, 
				 htmlNodePtr elem)

The HTML DTD allows a tag to implicitly close other tags. The list is kept in htmlStartClose array. This function checks if a tag is autoclosed by one of it's child

`doc`:	the HTML document
`elem`:	the HTML element
`Returns`:	1 if autoclosed, 0 otherwise

Function: htmlReadDoc

htmlDocPtr	htmlReadDoc	(const xmlChar * cur, 
				 const char * URL, 
				 const char * encoding, 
				 int options)

parse an XML in-memory document and build a tree.

`cur`:	a pointer to a zero terminated string
`URL`:	the base URL to use for the document
`encoding`:	the document encoding, or NULL
`options`:	a combination of htmlParserOption(s)
`Returns`:	the resulting document tree

Function: htmlEncodeEntities

int	htmlEncodeEntities	(unsigned char * out, 
				 int * outlen, 
				 const unsigned char * in, 
				 int * inlen, 
				 int quoteChar)

Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.

`out`:	a pointer to an array of bytes to store the result
`outlen`:	the length of @out
`in`:	a pointer to an array of UTF-8 chars
`inlen`:	the length of @in
`quoteChar`:	the quote character to escape (' or ") or zero.
`Returns`:	0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed.

Function: htmlNodeStatus

htmlStatus	htmlNodeStatus	(const htmlNodePtr node, 
				 int legacy)

Checks whether the tree node is valid. Experimental (the author only uses the HTML enhancements in a SAX parser)

`node`:	an htmlNodePtr in a tree
`legacy`:	whether to allow deprecated elements (YES is faster here for Element nodes)
`Returns`:	for Element nodes, a return from htmlElementAllowedHere (if legacy allowed) or htmlElementStatusHere (otherwise). for Attribute nodes, a return from htmlAttrAllowed for other nodes, HTML_NA (no checks performed)

Function: htmlAttrAllowed

htmlStatus	htmlAttrAllowed	(const htmlElemDesc * elt, 
				 const xmlChar * attr, 
				 int legacy)

Checks whether an attribute is valid for an element Has full knowledge of Required and Deprecated attributes

`elt`:	HTML element
`attr`:	HTML attribute
`legacy`:	whether to allow deprecated attributes
`Returns`:	one of HTML_REQUIRED, HTML_VALID, HTML_DEPRECATED, HTML_INVALID

Macro: htmlDefaultSubelement

#define htmlDefaultSubelement

Returns the default subelement for this element

Function: htmlParseCharRef

int	htmlParseCharRef	(htmlParserCtxtPtr ctxt)

parse Reference declarations [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'

`ctxt`:	an HTML parser context
`Returns`:	the value parsed (as an int)

Function: htmlSAXParseFile

htmlDocPtr	htmlSAXParseFile	(const char * filename, 
				 const char * encoding, 
				 htmlSAXHandlerPtr sax, 
				 void * userData)

parse an HTML file and build a tree. Automatic support for ZLIB/Compress compressed document is provided by default if found at compile-time. It use the given SAX function block to handle the parsing callback. If sax is NULL, fallback to the default DOM tree building routines.

`filename`:	the filename
`encoding`:	a free form C string describing the HTML document encoding, or NULL
`sax`:	the SAX handler block
`userData`:	if using SAX, this pointer will be provided on callbacks.
`Returns`:	the resulting document tree unless SAX is NULL or the document is not well formed.

Function: htmlParseEntityRef

const htmlEntityDesc *	htmlParseEntityRef	(htmlParserCtxtPtr ctxt, 
				 const xmlChar ** str)

parse an HTML ENTITY references [68] EntityRef ::= '&' Name ';'

`ctxt`:	an HTML parser context
`str`:	location to store the entity name
`Returns`:	the associated htmlEntityDescPtr if found, or NULL otherwise, if non-NULL *str will have to be freed by the caller.

Function: htmlElementStatusHere

htmlStatus	htmlElementStatusHere	(const htmlElemDesc * parent, 
				 const htmlElemDesc * elt)

Checks whether an HTML element may be a direct child of a parent element. and if so whether it is valid or deprecated.

`parent`:	HTML parent element
`elt`:	HTML element
`Returns`:	one of HTML_VALID, HTML_DEPRECATED, HTML_INVALID

Function: htmlEntityValueLookup

const htmlEntityDesc *	htmlEntityValueLookup	(unsigned int value)

Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.

`value`:	the entity's unicode value
`Returns`:	the associated htmlEntityDescPtr if found, NULL otherwise.

Function: htmlParseElement

void	htmlParseElement	(htmlParserCtxtPtr ctxt)

parse an HTML element, this is highly recursive [39] element ::= EmptyElemTag | STag content ETag [41] Attribute ::= Name Eq AttValue

ctxt: an HTML parser context

Function: UTF8ToHtml

int	UTF8ToHtml	(unsigned char * out, 
				 int * outlen, 
				 const unsigned char * in, 
				 int * inlen)

Take a block of UTF-8 chars in and try to convert it to an ASCII plus HTML entities block of chars out.

`out`:	a pointer to an array of bytes to store the result
`outlen`:	the length of @out
`in`:	a pointer to an array of UTF-8 chars
`inlen`:	the length of @in
`Returns`:	0 if success, -2 if the transcoding fails, or -1 otherwise The value of @inlen after return is the number of octets consumed as the return value is positive, else unpredictable. The value of @outlen after return is the number of octets consumed.

Macro: htmlRequiredAttrs

#define htmlRequiredAttrs

Returns the attributes required for the specified element.

Function: htmlEntityLookup

const htmlEntityDesc *	htmlEntityLookup	(const xmlChar * name)

Lookup the given entity in EntitiesTable TODO: the linear scan is really ugly, an hash table is really needed.

`name`:	the entity name
`Returns`:	the associated htmlEntityDescPtr if found, NULL otherwise.

Function: htmlFreeParserCtxt

void	htmlFreeParserCtxt	(htmlParserCtxtPtr ctxt)

Free all the memory used by a parser context. However the parsed document in ctxt->myDoc is not freed.

ctxt: an HTML parser context

Function: htmlCtxtReadMemory

htmlDocPtr	htmlCtxtReadMemory	(htmlParserCtxtPtr ctxt, 
				 const char * buffer, 
				 int size, 
				 const char * URL, 
				 const char * encoding, 
				 int options)

parse an XML in-memory document and build a tree. This reuses the existing @ctxt parser context

`ctxt`:	an HTML parser context
`buffer`:	a pointer to a char array
`size`:	the size of the array
`URL`:	the base URL to use for the document
`encoding`:	the document encoding, or NULL
`options`:	a combination of htmlParserOption(s)
`Returns`:	the resulting document tree

Function: htmlCtxtReadFd

htmlDocPtr	htmlCtxtReadFd	(htmlParserCtxtPtr ctxt, 
				 int fd, 
				 const char * URL, 
				 const char * encoding, 
				 int options)

parse an XML from a file descriptor and build a tree. This reuses the existing @ctxt parser context

`ctxt`:	an HTML parser context
`fd`:	an open file descriptor
`URL`:	the base URL to use for the document
`encoding`:	the document encoding, or NULL
`options`:	a combination of htmlParserOption(s)
`Returns`:	the resulting document tree

Macro: htmlElementAllowedHereDesc

#define htmlElementAllowedHereDesc

Checks whether an HTML element description may be a direct child of the specified element. Returns 1 if allowed; 0 otherwise.

Function: htmlReadFile

htmlDocPtr	htmlReadFile	(const char * filename, 
				 const char * encoding, 
				 int options)

parse an XML file from the filesystem or the network.

`filename`:	a file or URL
`encoding`:	the document encoding, or NULL
`options`:	a combination of htmlParserOption(s)
`Returns`:	the resulting document tree

Function: htmlCtxtReadFile

htmlDocPtr	htmlCtxtReadFile	(htmlParserCtxtPtr ctxt, 
				 const char * filename, 
				 const char * encoding, 
				 int options)

parse an XML file from the filesystem or the network. This reuses the existing @ctxt parser context

`ctxt`:	an HTML parser context
`filename`:	a file or URL
`encoding`:	the document encoding, or NULL
`options`:	a combination of htmlParserOption(s)
`Returns`:	the resulting document tree

Function: htmlSAXParseDoc

htmlDocPtr	htmlSAXParseDoc	(xmlChar * cur, 
				 const char * encoding, 
				 htmlSAXHandlerPtr sax, 
				 void * userData)

Parse an HTML in-memory document. If sax is not NULL, use the SAX callbacks to handle parse events. If sax is NULL, fallback to the default DOM behavior and return a tree.

`cur`:	a pointer to an array of xmlChar
`encoding`:	a free form C string describing the HTML document encoding, or NULL
`sax`:	the SAX handler block
`userData`:	if using SAX, this pointer will be provided on callbacks.
`Returns`:	the resulting document tree unless SAX is NULL or the document is not well formed.

Function: htmlCtxtUseOptions

int	htmlCtxtUseOptions	(htmlParserCtxtPtr ctxt, 
				 int options)

Applies the options to the parser context

`ctxt`:	an HTML parser context
`options`:	a combination of htmlParserOption(s)
`Returns`:	0 in case of success, the set of unknown or unimplemented options in case of error.

Function: htmlReadFd

htmlDocPtr	htmlReadFd	(int fd, 
				 const char * URL, 
				 const char * encoding, 
				 int options)

parse an XML from a file descriptor and build a tree.

`fd`:	an open file descriptor
`URL`:	the base URL to use for the document
`encoding`:	the document encoding, or NULL
`options`:	a combination of htmlParserOption(s)
`Returns`:	the resulting document tree

Function: htmlParseDoc

htmlDocPtr	htmlParseDoc	(xmlChar * cur, 
				 const char * encoding)

parse an HTML in-memory document and build a tree.

`cur`:	a pointer to an array of xmlChar
`encoding`:	a free form C string describing the HTML document encoding, or NULL
`Returns`:	the resulting document tree

Function: htmlParseDocument

int	htmlParseDocument	(htmlParserCtxtPtr ctxt)

parse an HTML document (and build a tree if using the standard SAX interface).

`ctxt`:	an HTML parser context
`Returns`:	0, -1 in case of error. the parser context is augmented as a result of the parsing.

Daniel Veillard

Module HTMLparser from libxml2

Table of Contents

Description

Function: htmlIsScriptAttribute

Function: htmlHandleOmittedElem

Function: htmlCtxtReadDoc

Function: htmlParseFile

Function: htmlReadIO

Function: htmlAutoCloseTag

Function: htmlParseChunk

Function: htmlTagLookup

Function: htmlCreateMemoryParserCtxt

Function: htmlCtxtReset

Function: htmlElementAllowedHere

Function: htmlCtxtReadIO

Function: htmlCreatePushParserCtxt

Function: htmlReadMemory

Function: htmlIsAutoClosed

Function: htmlReadDoc

Function: htmlEncodeEntities

Function: htmlNodeStatus

Function: htmlAttrAllowed

Macro: htmlDefaultSubelement

Function: htmlParseCharRef

Function: htmlSAXParseFile

Function: htmlParseEntityRef

Function: htmlElementStatusHere

Function: htmlEntityValueLookup

Function: htmlParseElement

Function: UTF8ToHtml

Macro: htmlRequiredAttrs

Function: htmlEntityLookup

Function: htmlFreeParserCtxt

Function: htmlCtxtReadMemory

Function: htmlCtxtReadFd

Macro: htmlElementAllowedHereDesc

Function: htmlReadFile

Function: htmlCtxtReadFile

Function: htmlSAXParseDoc

Function: htmlCtxtUseOptions

Function: htmlReadFd

Function: htmlParseDoc

Function: htmlParseDocument