mirror of
				https://gitlab.gnome.org/GNOME/libxml2.git
				synced 2025-10-24 13:33:01 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			420 lines
		
	
	
		
			23 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			420 lines
		
	
	
		
			23 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|       * Summary: interface for an HTML 4.0 non-verifying parser
 | |
|       * Description: this module implements an HTML 4.0 non-verifying parser
 | |
|       *              with API compatible with the XML parser ones. It should
 | |
|       *              be able to parse "real world" HTML, even if severely
 | |
|       *              broken from a specification point of view.
 | |
|       *
 | |
|       * Copy: See Copyright for the status of this software.
 | |
|       *
 | |
|       * Author: Patrick Monnerat <pm@datasphere.ch>, DATASPHERE S.A.
 | |
| 
 | |
|       /if not defined(HTML_PARSER_H__)
 | |
|       /define HTML_PARSER_H__
 | |
| 
 | |
|       /include "libxmlrpg/xmlversion"
 | |
| 
 | |
|       /if defined(LIBXML_HTML_ENABLED)
 | |
| 
 | |
|       /include "libxmlrpg/xmlTypesC"
 | |
|       /include "libxmlrpg/parser"
 | |
| 
 | |
|       * Most of the back-end structures from XML and HTML are shared.
 | |
| 
 | |
|      d htmlParserCtxtPtr...
 | |
|      d                 s                   based(######typedef######)
 | |
|      d                                     like(xmlParserCtxtPtr)
 | |
| 
 | |
|      d htmlParserCtxt  ds                  based(htmlParserCtxtPtr)
 | |
|      d                                     likeds(xmlParserCtxt)
 | |
| 
 | |
|      d htmlParserNodeInfoPtr...
 | |
|      d                 s                   based(######typedef######)
 | |
|      d                                     like(xmlParserNodeInfoPtr)
 | |
| 
 | |
|      d htmlParserNodeInfo...
 | |
|      d                 ds                  based(htmlParserNodeInfoPtr)
 | |
|      d                                     likeds(xmlParserNodeInfo)
 | |
| 
 | |
|      d htmlSAXHandlerPtr...
 | |
|      d                 s                   based(######typedef######)
 | |
|      d                                     like(xmlSAXHandlerPtr)
 | |
| 
 | |
|      d htmlSAXHandler  ds                  based(htmlSAXHandlerPtr)
 | |
|      d                                     likeds(xmlSAXHandler)
 | |
| 
 | |
|      d htmlParserInputPtr...
 | |
|      d                 s                   based(######typedef######)
 | |
|      d                                     like(xmlParserInputPtr)
 | |
| 
 | |
|      d htmlParserInput...
 | |
|      d                 ds                  based(htmlParserInputPtr)
 | |
|      d                                     likeds(xmlParserInput)
 | |
| 
 | |
|      d htmlDocPtr      s                   based(######typedef######)
 | |
|      d                                     like(xmlDocPtr)
 | |
| 
 | |
|      d htmlNodePtr     s                   based(######typedef######)
 | |
|      d                                     like(xmlNodePtr)
 | |
| 
 | |
|       * Internal description of an HTML element, representing HTML 4.01
 | |
|       * and XHTML 1.0 (which share the same structure).
 | |
| 
 | |
|      d htmlElemDescPtr...
 | |
|      d                 s               *   based(######typedef######)
 | |
| 
 | |
|      d htmlElemDesc    ds                  based(htmlElemDescPtr)
 | |
|      d                                     align qualified
 | |
|      d  name                           *                                        const char *
 | |
|      d  startTag                           like(xmlCchar)                       Start tag implied ?
 | |
|      d  endTag                             like(xmlCchar)                       End tag implied ?
 | |
|      d  saveEndTag                         like(xmlCchar)                       Save end tag ?
 | |
|      d  empty                              like(xmlCchar)                       Empty element ?
 | |
|      d  depr                               like(xmlCchar)                       Deprecated element ?
 | |
|      d  dtd                                like(xmlCchar)                       Loose DTD/Frameset
 | |
|      d  isinline                           like(xmlCchar)                       Block 0/inline elem?
 | |
|      d  desc                           *                                        const char *
 | |
|       *
 | |
|       * New fields encapsulating HTML structure
 | |
|       *
 | |
|       * Bugs:
 | |
|       *      This is a very limited representation.  It fails to tell us when
 | |
|       *      an element *requires* subelements (we only have whether they're
 | |
|       *      allowed or not), and it doesn't tell us where CDATA and PCDATA
 | |
|       *      are allowed.  Some element relationships are not fully represented:
 | |
|       *      these are flagged with the word MODIFIER
 | |
|       *
 | |
|      d  subelts                        *                                        const char * *
 | |
|      d  defaultsubelt                  *                                        const char *
 | |
|      d  attrs_opt                      *                                        const char * *
 | |
|      d  attrs_depr                     *                                        const char * *
 | |
|      d  attrs_req                      *                                        const char * *
 | |
| 
 | |
|       * Internal description of an HTML entity.
 | |
| 
 | |
|      d htmlEntityDescPtr...
 | |
|      d                 s               *   based(######typedef######)
 | |
| 
 | |
|      d htmlEntityDesc...
 | |
|      d                 ds                  based(htmlEntityDescPtr)
 | |
|      d                                     align qualified
 | |
|      d  value                              like(xmlCuint)
 | |
|      d  name                           *                                        const char *
 | |
|      d  desc                           *                                        const char *
 | |
| 
 | |
|       * There is only few public functions.
 | |
| 
 | |
|      d htmlTagLookup   pr                  extproc('htmlTagLookup')
 | |
|      d                                     like(htmlElemDescPtr)                const
 | |
|      d  tag                            *   value options(*string)               const xmlChar *
 | |
| 
 | |
|      d htmlEntityLookup...
 | |
|      d                 pr                  extproc('htmlEntityLookup')
 | |
|      d                                     like(htmlEntityDescPtr)              const
 | |
|      d  name                           *   value options(*string)               const xmlChar *
 | |
| 
 | |
|      d htmlEntityValueLookup...
 | |
|      d                 pr                  extproc('htmlEntityValueLookup')
 | |
|      d                                     like(htmlEntityDescPtr)              const
 | |
|      d  value                              value like(xmlCuint)
 | |
| 
 | |
|      d htmlIsAutoClosed...
 | |
|      d                 pr                  extproc('htmlIsAutoClosed')
 | |
|      d                                     like(xmlCint)
 | |
|      d  doc                                value like(htmlDocPtr)
 | |
|      d  elem                               value like(htmlNodePtr)
 | |
| 
 | |
|      d htmlAutoCloseTag...
 | |
|      d                 pr                  extproc('htmlAutoCloseTag')
 | |
|      d                                     like(xmlCint)
 | |
|      d  doc                                value like(htmlDocPtr)
 | |
|      d  name                           *   value options(*string)               const xmlChar *
 | |
|      d  elem                               value like(htmlNodePtr)
 | |
| 
 | |
|      d htmlParseEntityRef...
 | |
|      d                 pr                  extproc('htmlParseEntityRef')
 | |
|      d                                     like(htmlEntityDescPtr)              const
 | |
|      d  ctxt                               value like(htmlParserCtxtPtr)
 | |
|      d  str                            *                                        const xmlChar *(*)
 | |
| 
 | |
|      d htmlParseCharRef...
 | |
|      d                 pr                  extproc('htmlParseCharRef')
 | |
|      d                                     like(xmlCint)
 | |
|      d  ctxt                               value like(htmlParserCtxtPtr)
 | |
| 
 | |
|      d htmlParseElement...
 | |
|      d                 pr                  extproc('htmlParseElement')
 | |
|      d  ctxt                               value like(htmlParserCtxtPtr)
 | |
| 
 | |
|      d htmlNewParserCtxt...
 | |
|      d                 pr                  extproc('htmlNewParserCtxt')
 | |
|      d                                     like(htmlParserCtxtPtr)
 | |
| 
 | |
|      d htmlCreateMemoryParserCtxt...
 | |
|      d                 pr                  extproc('htmlCreateMemoryParserCtxt')
 | |
|      d                                     like(htmlParserCtxtPtr)
 | |
|      d  buffer                         *   value options(*string)               const char *
 | |
|      d  size                               value like(xmlCint)
 | |
| 
 | |
|      d htmlParseDocument...
 | |
|      d                 pr                  extproc('htmlParseDocument')
 | |
|      d                                     like(xmlCint)
 | |
|      d  ctxt                               value like(htmlParserCtxtPtr)
 | |
| 
 | |
|      d htmlSAXParseDoc...
 | |
|      d                 pr                  extproc('htmlSAXParseDoc')
 | |
|      d                                     like(htmlDocPtr)
 | |
|      d  cur                            *   value options(*string)               xmlChar *
 | |
|      d  encoding                       *   value options(*string)               const char *
 | |
|      d  sax                                value like(htmlSAXHandlerPtr)
 | |
|      d  userData                       *   value                                void *
 | |
| 
 | |
|      d htmlParseDoc    pr                  extproc('htmlParseDoc')
 | |
|      d                                     like(htmlDocPtr)
 | |
|      d  cur                            *   value options(*string)               xmlChar *
 | |
|      d  encoding                       *   value options(*string)               const char *
 | |
| 
 | |
|      d htmlSAXParseFile...
 | |
|      d                 pr                  extproc('htmlSAXParseFile')
 | |
|      d                                     like(htmlDocPtr)
 | |
|      d  filename                       *   value options(*string)               const char *
 | |
|      d  encoding                       *   value options(*string)               const char *
 | |
|      d  sax                                value like(htmlSAXHandlerPtr)
 | |
|      d  userData                       *   value                                void *
 | |
| 
 | |
|      d htmlParseFile   pr                  extproc('htmlParseFile')
 | |
|      d                                     like(htmlDocPtr)
 | |
|      d  filename                       *   value options(*string)               const char *
 | |
|      d  encoding                       *   value options(*string)               const char *
 | |
| 
 | |
|      d UTF8ToHtml      pr                  extproc('UTF8ToHtml')
 | |
|      d                                     like(xmlCint)
 | |
|      d  out                       65535    options(*varsize)                    unsigned char []
 | |
|      d  outlen                             like(xmlCint)
 | |
|      d  in                             *   value options(*string)               const unsigned char*
 | |
|      d  inlen                              like(xmlCint)
 | |
| 
 | |
|      d htmlEncodeEntities...
 | |
|      d                 pr                  extproc('htmlEncodeEntities')
 | |
|      d                                     like(xmlCint)
 | |
|      d  out                       65535    options(*varsize)                    unsigned char []
 | |
|      d  outlen                             like(xmlCint)
 | |
|      d  in                             *   value options(*string)               const unsigned char*
 | |
|      d  inlen                              like(xmlCint)
 | |
|      d  quoteChar                          value like(xmlCint)
 | |
| 
 | |
|      d htmlIsScriptAttribute...
 | |
|      d                 pr                  extproc('htmlIsScriptAttribute')
 | |
|      d                                     like(xmlCint)
 | |
|      d  name                           *   value options(*string)               const xmlChar *
 | |
| 
 | |
|      d htmlHandleOmittedElem...
 | |
|      d                 pr                  extproc('htmlHandleOmittedElem')
 | |
|      d                                     like(xmlCint)
 | |
|      d  val                                value like(xmlCint)
 | |
| 
 | |
|       /if defined(LIBXML_PUSH_ENABLED)
 | |
| 
 | |
|       * Interfaces for the Push mode.
 | |
| 
 | |
|      d htmlCreatePushParserCtxt...
 | |
|      d                 pr                  extproc('htmlCreatePushParserCtxt')
 | |
|      d                                     like(htmlParserCtxtPtr)
 | |
|      d  sax                                value like(htmlSAXHandlerPtr)
 | |
|      d  user_data                      *   value                                void *
 | |
|      d  chunk                          *   value options(*string)               const char *
 | |
|      d  size                               value like(xmlCint)
 | |
|      d  filename                       *   value options(*string)               const char *
 | |
|      d  enc                                value like(xmlCharEncoding)
 | |
| 
 | |
|      d htmlParseChunk  pr                  extproc('htmlParseChunk')
 | |
|      d                                     like(xmlCint)
 | |
|      d  ctxt                               value like(htmlParserCtxtPtr)
 | |
|      d  chunk                          *   value options(*string)               const char *
 | |
|      d  size                               value like(xmlCint)
 | |
|      d  terminate                          value like(xmlCint)
 | |
|       /endif                                                                    LIBXML_PUSH_ENABLED
 | |
| 
 | |
|      d htmlFreeParserCtxt...
 | |
|      d                 pr                  extproc('htmlFreeParserCtxt')
 | |
|      d  ctxt                               value like(htmlParserCtxtPtr)
 | |
| 
 | |
|       * New set of simpler/more flexible APIs
 | |
| 
 | |
|       * xmlParserOption:
 | |
|       *
 | |
|       * This is the set of XML parser options that can be passed down
 | |
|       * to the xmlReadDoc() and similar calls.
 | |
| 
 | |
|      d htmlParserOption...
 | |
|      d                 s                   based(######typedef######)
 | |
|      d                                     like(xmlCenum)
 | |
|      d  HTML_PARSE_RECOVER...                                                   Relaxed parsing
 | |
|      d                 c                   X'00000001'
 | |
|      d  HTML_PARSE_NODEFDTD...                                                  No default doctype
 | |
|      d                 c                   X'00000004'
 | |
|      d  HTML_PARSE_NOERROR...                                                   No error reports
 | |
|      d                 c                   X'00000020'
 | |
|      d  HTML_PARSE_NOWARNING...                                                 No warning reports
 | |
|      d                 c                   X'00000040'
 | |
|      d  HTML_PARSE_PEDANTIC...                                                  Pedantic err reports
 | |
|      d                 c                   X'00000080'
 | |
|      d  HTML_PARSE_NOBLANKS...                                                  Remove blank nodes
 | |
|      d                 c                   X'00000100'
 | |
|      d  HTML_PARSE_NONET...                                                     Forbid net access
 | |
|      d                 c                   X'00000800'
 | |
|      d  HTML_PARSE_NOIMPLIED...                                                 No implied html/body
 | |
|      d                 c                   X'00002000'
 | |
|      d  HTML_PARSE_COMPACT...                                                   compact small txtnod
 | |
|      d                 c                   X'00010000'
 | |
|      d  HTML_PARSE_IGNORE_ENC...                                                Ignore encoding hint
 | |
|      d                 c                   X'00200000'
 | |
| 
 | |
|      d htmlCtxtReset   pr                  extproc('htmlCtxtReset')
 | |
|      d ctxt                                value like(htmlParserCtxtPtr)
 | |
| 
 | |
|      d htmlCtxtUseOptions...
 | |
|      d                 pr                  extproc('htmlCtxtUseOptions')
 | |
|      d                                     like(xmlCint)
 | |
|      d ctxt                                value like(htmlParserCtxtPtr)
 | |
|      d options                             value like(xmlCint)
 | |
| 
 | |
|      d htmlReadDoc     pr                  extproc('htmlReadDoc')
 | |
|      d                                     like(htmlDocPtr)
 | |
|      d  cur                            *   value options(*string)               const xmlChar *
 | |
|      d  URL                            *   value options(*string)               const char *
 | |
|      d  encoding                       *   value options(*string)               const char *
 | |
|      d  options                            value like(xmlCint)
 | |
| 
 | |
|      d htmlReadFile    pr                  extproc('htmlReadFile')
 | |
|      d                                     like(htmlDocPtr)
 | |
|      d  URL                            *   value options(*string)               const char *
 | |
|      d  encoding                       *   value options(*string)               const char *
 | |
|      d  options                            value like(xmlCint)
 | |
| 
 | |
|      d htmlReadMemory  pr                  extproc('htmlReadMemory')
 | |
|      d                                     like(htmlDocPtr)
 | |
|      d  buffer                         *   value options(*string)               const char *
 | |
|      d  size                               value like(xmlCint)
 | |
|      d  URL                            *   value options(*string)               const char *
 | |
|      d  encoding                       *   value options(*string)               const char *
 | |
|      d  options                            value like(xmlCint)
 | |
| 
 | |
|      d htmlReadFd      pr                  extproc('htmlReadFd')
 | |
|      d                                     like(htmlDocPtr)
 | |
|      d  fd                                 value like(xmlCint)
 | |
|      d  URL                            *   value options(*string)               const char *
 | |
|      d  encoding                       *   value options(*string)               const char *
 | |
|      d  options                            value like(xmlCint)
 | |
| 
 | |
|      d htmlReadIO      pr                  extproc('htmlReadIO')
 | |
|      d                                     like(htmlDocPtr)
 | |
|      d  ioread                             value like(xmlInputReadCallback)
 | |
|      d  ioclose                            value like(xmlInputCloseCallback)
 | |
|      d  ioctx                          *   value                                void *
 | |
|      d  URL                            *   value options(*string)               const char *
 | |
|      d  encoding                       *   value options(*string)               const char *
 | |
|      d  options                            value like(xmlCint)
 | |
| 
 | |
|      d htmlCtxtReadDoc...
 | |
|      d                 pr                  extproc('htmlCtxtReadDoc')
 | |
|      d                                     like(htmlDocPtr)
 | |
|      d  ctxt                               value like(xmlParserCtxtPtr)
 | |
|      d  cur                            *   value options(*string)               const xmlChar *
 | |
|      d  URL                            *   value options(*string)               const char *
 | |
|      d  encoding                       *   value options(*string)               const char *
 | |
|      d  options                            value like(xmlCint)
 | |
| 
 | |
|      d htmlCtxtReadFile...
 | |
|      d                 pr                  extproc('htmlCtxtReadFile')
 | |
|      d                                     like(htmlDocPtr)
 | |
|      d  ctxt                               value like(xmlParserCtxtPtr)
 | |
|      d  filename                       *   value options(*string)               const char *
 | |
|      d  encoding                       *   value options(*string)               const char *
 | |
|      d  options                            value like(xmlCint)
 | |
| 
 | |
|      d htmlCtxtReadMemory...
 | |
|      d                 pr                  extproc('htmlCtxtReadMemory')
 | |
|      d                                     like(htmlDocPtr)
 | |
|      d  ctxt                               value like(xmlParserCtxtPtr)
 | |
|      d  buffer                         *   value options(*string)               const char *
 | |
|      d  size                               value like(xmlCint)
 | |
|      d  URL                            *   value options(*string)               const char *
 | |
|      d  encoding                       *   value options(*string)               const char *
 | |
|      d  options                            value like(xmlCint)
 | |
| 
 | |
|      d htmlCtxtReadFd  pr                  extproc('htmlCtxtReadFd')
 | |
|      d                                     like(htmlDocPtr)
 | |
|      d  ctxt                               value like(xmlParserCtxtPtr)
 | |
|      d  fd                                 value like(xmlCint)
 | |
|      d  URL                            *   value options(*string)               const char *
 | |
|      d  encoding                       *   value options(*string)               const char *
 | |
|      d  options                            value like(xmlCint)
 | |
| 
 | |
|      d htmlCtxtReadIO  pr                  extproc('htmlCtxtReadIO')
 | |
|      d                                     like(htmlDocPtr)
 | |
|      d  ctxt                               value like(xmlParserCtxtPtr)
 | |
|      d  ioread                             value like(xmlInputReadCallback)
 | |
|      d  ioclose                            value like(xmlInputCloseCallback)
 | |
|      d  ioctx                          *   value                                void *
 | |
|      d  URL                            *   value options(*string)               const char *
 | |
|      d  encoding                       *   value options(*string)               const char *
 | |
|      d  options                            value like(xmlCint)
 | |
| 
 | |
|       * Further knowledge of HTML structure
 | |
| 
 | |
|      d htmlStatus      s                   based(######typedef######)
 | |
|      d                                     like(xmlCenum)
 | |
|      d  HTML_NA        c                   X'0000'                              No check at all
 | |
|      d  HTML_INVALID   c                   X'0001'
 | |
|      d  HTML_DEPRECATED...
 | |
|      d                 c                   X'0002'
 | |
|      d  HTML_VALID     c                   X'0004'
 | |
|      d  HTML_REQUIRED  c                   X'000C'                              HTML_VALID ored-in
 | |
| 
 | |
|       * Using htmlElemDesc rather than name here, to emphasise the fact
 | |
|       *  that otherwise there's a lookup overhead
 | |
| 
 | |
|      d htmlAttrAllowed...
 | |
|      d                 pr                  extproc('htmlAttrAllowed')
 | |
|      d                                     like(htmlStatus)
 | |
|      d  #param1                            value like(htmlElemDescPtr)          const
 | |
|      d  #param2                        *   value options(*string)               const xmlChar *
 | |
|      d  #param3                            value like(xmlCint)
 | |
| 
 | |
|      d htmlElementAllowedHere...
 | |
|      d                 pr                  extproc('htmlElementAllowedHere')
 | |
|      d                                     like(xmlCint)
 | |
|      d  #param1                            value like(htmlElemDescPtr)          const
 | |
|      d  #param2                        *   value options(*string)               const xmlChar *
 | |
| 
 | |
|      d htmlElementStatusHere...
 | |
|      d                 pr                  extproc('htmlElementStatusHere')
 | |
|      d                                     like(htmlStatus)
 | |
|      d  #param1                            value like(htmlElemDescPtr)          const
 | |
|      d  #param2                            value like(htmlElemDescPtr)          const
 | |
| 
 | |
|      d htmlNodeStatus  pr                  extproc('htmlNodeStatus')
 | |
|      d                                     like(htmlStatus)
 | |
|      d  #param1                            value like(htmlNodePtr)
 | |
|      d  #param2                            value like(xmlCint)
 | |
| 
 | |
|       * C macros implemented as procedures for ILE/RPG support.
 | |
| 
 | |
|      d htmlDefaultSubelement...
 | |
|      d                 pr              *   extproc('__htmlDefaultSubelement')   const char *
 | |
|      d  elt                            *   value                                const htmlElemDesc *
 | |
| 
 | |
|      d htmlElementAllowedHereDesc...
 | |
|      d                 pr                  extproc(
 | |
|      d                                     '__htmlElementAllowedHereDesc')
 | |
|      d                                     like(xmlCint)
 | |
|      d  parent                         *   value                                const htmlElemDesc *
 | |
|      d  elt                            *   value                                const htmlElemDesc *
 | |
| 
 | |
|      d htmlRequiredAttrs...
 | |
|      d                 pr              *   extproc('__htmlRequiredAttrs')        const char * *
 | |
|      d  elt                            *   value                                const htmlElemDesc *
 | |
| 
 | |
|       /endif                                                                    LIBXML_HTML_ENABLED
 | |
|       /endif                                                                    HTML_PARSER_H__
 |