mirror of
				https://gitlab.gnome.org/GNOME/libxml2.git
				synced 2025-10-24 13:33:01 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			378 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			378 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * parser.h : Interfaces, constants and types related to the XML parser.
 | |
|  *
 | |
|  * See Copyright for the status of this software.
 | |
|  *
 | |
|  * Daniel.Veillard@w3.org
 | |
|  */
 | |
| 
 | |
| #ifndef __XML_PARSER_H__
 | |
| #define __XML_PARSER_H__
 | |
| 
 | |
| #include "tree.h"
 | |
| #include "valid.h"
 | |
| #include "xmlIO.h"
 | |
| 
 | |
| #ifdef __cplusplus
 | |
| extern "C" {
 | |
| #endif
 | |
| 
 | |
| /*
 | |
|  * Constants.
 | |
|  */
 | |
| #define XML_DEFAULT_VERSION	"1.0"
 | |
| 
 | |
| /**
 | |
|  * an xmlParserInput is an input flow for the XML processor.
 | |
|  * Each entity parsed is associated an xmlParserInput (except the
 | |
|  * few predefined ones). This is the case both for internal entities
 | |
|  * - in which case the flow is already completely in memory - or
 | |
|  * external entities - in which case we use the buf structure for
 | |
|  * progressive reading and I18N conversions to the internal UTF-8 format.
 | |
|  */
 | |
| 
 | |
| typedef void (* xmlParserInputDeallocate)(xmlChar *);
 | |
| typedef struct xmlParserInput {
 | |
|     /* Input buffer */
 | |
|     xmlParserInputBufferPtr buf;      /* UTF-8 encoded buffer */
 | |
| 
 | |
|     const char *filename;             /* The file analyzed, if any */
 | |
|     const char *directory;            /* the directory/base of teh file */
 | |
|     const xmlChar *base;                 /* Base of the array to parse */
 | |
|     const xmlChar *cur;                  /* Current char being parsed */
 | |
|     int line;                         /* Current line */
 | |
|     int col;                          /* Current column */
 | |
|     int consumed;                     /* How many xmlChars were already consumed */
 | |
|     xmlParserInputDeallocate free;    /* function to deallocate the base */
 | |
| } xmlParserInput;
 | |
| typedef xmlParserInput *xmlParserInputPtr;
 | |
| 
 | |
| typedef xmlParserInputPtr (*xmlExternalEntityLoader)(const char *URL,
 | |
| 						     const char *ID,
 | |
| 						     xmlParserInputPtr context);
 | |
| 
 | |
| /**
 | |
|  * the parser can be asked to collect Node informations, i.e. at what
 | |
|  * place in the file they were detected. 
 | |
|  * NOTE: This is off by default and not very well tested.
 | |
|  */
 | |
| typedef struct _xmlParserNodeInfo {
 | |
|   const struct xmlNode* node;
 | |
|   /* Position & line # that text that created the node begins & ends on */
 | |
|   unsigned long begin_pos;
 | |
|   unsigned long begin_line;
 | |
|   unsigned long end_pos;
 | |
|   unsigned long end_line;
 | |
| } _xmlParserNodeInfo;
 | |
| typedef _xmlParserNodeInfo xmlParserNodeInfo;
 | |
| 
 | |
| typedef struct xmlParserNodeInfoSeq {
 | |
|   unsigned long maximum;
 | |
|   unsigned long length;
 | |
|   xmlParserNodeInfo* buffer;
 | |
| } _xmlParserNodeInfoSeq;
 | |
| typedef _xmlParserNodeInfoSeq xmlParserNodeInfoSeq;
 | |
| typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr;
 | |
| 
 | |
| /**
 | |
|  * The parser is not a state based parser, but we need to maintain
 | |
|  * minimum state informations, especially for entities processing.
 | |
|  */
 | |
| typedef enum {
 | |
|     XML_PARSER_EOF = 0,
 | |
|     XML_PARSER_PROLOG,
 | |
|     XML_PARSER_CONTENT,
 | |
|     XML_PARSER_ENTITY_DECL,
 | |
|     XML_PARSER_ENTITY_VALUE,
 | |
|     XML_PARSER_ATTRIBUTE_VALUE,
 | |
|     XML_PARSER_DTD,
 | |
|     XML_PARSER_EPILOG,
 | |
|     XML_PARSER_COMMENT,
 | |
|     XML_PARSER_CDATA_SECTION 
 | |
| } xmlParserInputState;
 | |
| 
 | |
| /**
 | |
|  * The parser context.
 | |
|  * NOTE This doesn't completely defines the parser state, the (current ?)
 | |
|  *      design of the parser uses recursive function calls since this allow
 | |
|  *      and easy mapping from the production rules of the specification
 | |
|  *      to the actual code. The drawback is that the actual function call
 | |
|  *      also reflect the parser state. However most of the parsing routines
 | |
|  *      takes as the only argument the parser context pointer, so migrating
 | |
|  *      to a state based parser for progressive parsing shouldn't be too hard.
 | |
|  */
 | |
| typedef struct _xmlParserCtxt {
 | |
|     struct xmlSAXHandler *sax;        /* The SAX handler */
 | |
|     void            *userData;        /* the document being built */
 | |
|     xmlDocPtr           myDoc;        /* the document being built */
 | |
|     int            wellFormed;        /* is the document well formed */
 | |
|     int       replaceEntities;        /* shall we replace entities ? */
 | |
|     const xmlChar       *version;        /* the XML version string */
 | |
|     const xmlChar      *encoding;        /* encoding, if any */
 | |
|     int            standalone;        /* standalone document */
 | |
|     int                  html;        /* are we parsing an HTML document */
 | |
| 
 | |
|     /* Input stream stack */
 | |
|     xmlParserInputPtr  input;         /* Current input stream */
 | |
|     int                inputNr;       /* Number of current input streams */
 | |
|     int                inputMax;      /* Max number of input streams */
 | |
|     xmlParserInputPtr *inputTab;      /* stack of inputs */
 | |
| 
 | |
|     /* Node analysis stack only used for DOM building */
 | |
|     xmlNodePtr         node;          /* Current parsed Node */
 | |
|     int                nodeNr;        /* Depth of the parsing stack */
 | |
|     int                nodeMax;       /* Max depth of the parsing stack */
 | |
|     xmlNodePtr        *nodeTab;       /* array of nodes */
 | |
| 
 | |
|     int record_info;                  /* Whether node info should be kept */
 | |
|     xmlParserNodeInfoSeq node_seq;    /* info about each node parsed */
 | |
| 
 | |
|     int errNo;                        /* error code */
 | |
| 
 | |
|     int     hasExternalSubset;        /* reference and external subset */
 | |
|     int             hasPErefs;        /* the internal subset has PE refs */
 | |
|     int              external;        /* are we parsing an external entity */
 | |
| 
 | |
|     int                 valid;        /* is the document valid */
 | |
|     int              validate;        /* shall we try to validate ? */
 | |
|     xmlValidCtxt        vctxt;        /* The validity context */
 | |
| 
 | |
|     xmlParserInputState instate;      /* current type of input */
 | |
|     int                 token;        /* next char look-ahead */    
 | |
| 
 | |
|     char           *directory;        /* the data directory */
 | |
| } _xmlParserCtxt;
 | |
| typedef _xmlParserCtxt xmlParserCtxt;
 | |
| typedef xmlParserCtxt *xmlParserCtxtPtr;
 | |
| 
 | |
| /**
 | |
|  * a SAX Locator.
 | |
|  */
 | |
| typedef struct xmlSAXLocator {
 | |
|     const xmlChar *(*getPublicId)(void *ctx);
 | |
|     const xmlChar *(*getSystemId)(void *ctx);
 | |
|     int (*getLineNumber)(void *ctx);
 | |
|     int (*getColumnNumber)(void *ctx);
 | |
| } _xmlSAXLocator;
 | |
| typedef _xmlSAXLocator xmlSAXLocator;
 | |
| typedef xmlSAXLocator *xmlSAXLocatorPtr;
 | |
| 
 | |
| /**
 | |
|  * a SAX handler is bunch of callbacks called by the parser when processing
 | |
|  * of the input generate data or structure informations.
 | |
|  */
 | |
| 
 | |
| #include "entities.h"
 | |
| 
 | |
| typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx,
 | |
| 			    const xmlChar *publicId, const xmlChar *systemId);
 | |
| typedef void (*internalSubsetSAXFunc) (void *ctx, const xmlChar *name,
 | |
|                             const xmlChar *ExternalID, const xmlChar *SystemID);
 | |
| typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx,
 | |
|                             const xmlChar *name);
 | |
| typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx,
 | |
|                             const xmlChar *name);
 | |
| typedef void (*entityDeclSAXFunc) (void *ctx,
 | |
|                             const xmlChar *name, int type, const xmlChar *publicId,
 | |
| 			    const xmlChar *systemId, xmlChar *content);
 | |
| typedef void (*notationDeclSAXFunc)(void *ctx, const xmlChar *name,
 | |
| 			    const xmlChar *publicId, const xmlChar *systemId);
 | |
| typedef void (*attributeDeclSAXFunc)(void *ctx, const xmlChar *elem,
 | |
|                             const xmlChar *name, int type, int def,
 | |
| 			    const xmlChar *defaultValue, xmlEnumerationPtr tree);
 | |
| typedef void (*elementDeclSAXFunc)(void *ctx, const xmlChar *name,
 | |
| 			    int type, xmlElementContentPtr content);
 | |
| typedef void (*unparsedEntityDeclSAXFunc)(void *ctx,
 | |
|                             const xmlChar *name, const xmlChar *publicId,
 | |
| 			    const xmlChar *systemId, const xmlChar *notationName);
 | |
| typedef void (*setDocumentLocatorSAXFunc) (void *ctx,
 | |
|                             xmlSAXLocatorPtr loc);
 | |
| typedef void (*startDocumentSAXFunc) (void *ctx);
 | |
| typedef void (*endDocumentSAXFunc) (void *ctx);
 | |
| typedef void (*startElementSAXFunc) (void *ctx, const xmlChar *name,
 | |
|                             const xmlChar **atts);
 | |
| typedef void (*endElementSAXFunc) (void *ctx, const xmlChar *name);
 | |
| typedef void (*attributeSAXFunc) (void *ctx, const xmlChar *name,
 | |
|                                   const xmlChar *value);
 | |
| typedef void (*referenceSAXFunc) (void *ctx, const xmlChar *name);
 | |
| typedef void (*charactersSAXFunc) (void *ctx, const xmlChar *ch,
 | |
| 		            int len);
 | |
| typedef void (*ignorableWhitespaceSAXFunc) (void *ctx,
 | |
| 			    const xmlChar *ch, int len);
 | |
| typedef void (*processingInstructionSAXFunc) (void *ctx,
 | |
|                             const xmlChar *target, const xmlChar *data);
 | |
| typedef void (*commentSAXFunc) (void *ctx, const xmlChar *value);
 | |
| typedef void (*cdataBlockSAXFunc) (void *ctx, const xmlChar *value, int len);
 | |
| typedef void (*warningSAXFunc) (void *ctx, const char *msg, ...);
 | |
| typedef void (*errorSAXFunc) (void *ctx, const char *msg, ...);
 | |
| typedef void (*fatalErrorSAXFunc) (void *ctx, const char *msg, ...);
 | |
| typedef int (*isStandaloneSAXFunc) (void *ctx);
 | |
| typedef int (*hasInternalSubsetSAXFunc) (void *ctx);
 | |
| typedef int (*hasExternalSubsetSAXFunc) (void *ctx);
 | |
| 
 | |
| typedef struct xmlSAXHandler {
 | |
|     internalSubsetSAXFunc internalSubset;
 | |
|     isStandaloneSAXFunc isStandalone;
 | |
|     hasInternalSubsetSAXFunc hasInternalSubset;
 | |
|     hasExternalSubsetSAXFunc hasExternalSubset;
 | |
|     resolveEntitySAXFunc resolveEntity;
 | |
|     getEntitySAXFunc getEntity;
 | |
|     entityDeclSAXFunc entityDecl;
 | |
|     notationDeclSAXFunc notationDecl;
 | |
|     attributeDeclSAXFunc attributeDecl;
 | |
|     elementDeclSAXFunc elementDecl;
 | |
|     unparsedEntityDeclSAXFunc unparsedEntityDecl;
 | |
|     setDocumentLocatorSAXFunc setDocumentLocator;
 | |
|     startDocumentSAXFunc startDocument;
 | |
|     endDocumentSAXFunc endDocument;
 | |
|     startElementSAXFunc startElement;
 | |
|     endElementSAXFunc endElement;
 | |
|     referenceSAXFunc reference;
 | |
|     charactersSAXFunc characters;
 | |
|     ignorableWhitespaceSAXFunc ignorableWhitespace;
 | |
|     processingInstructionSAXFunc processingInstruction;
 | |
|     commentSAXFunc comment;
 | |
|     warningSAXFunc warning;
 | |
|     errorSAXFunc error;
 | |
|     fatalErrorSAXFunc fatalError;
 | |
|     getParameterEntitySAXFunc getParameterEntity;
 | |
|     cdataBlockSAXFunc cdataBlock;
 | |
| } xmlSAXHandler;
 | |
| typedef xmlSAXHandler *xmlSAXHandlerPtr;
 | |
| 
 | |
| /**
 | |
|  * Global variables: just the default SAX interface tables and XML
 | |
|  * version infos.
 | |
|  */
 | |
| extern const char *xmlParserVersion;
 | |
| 
 | |
| extern xmlSAXLocator xmlDefaultSAXLocator;
 | |
| extern xmlSAXHandler xmlDefaultSAXHandler;
 | |
| extern xmlSAXHandler htmlDefaultSAXHandler;
 | |
| 
 | |
| /**
 | |
|  * entity substitution default behaviour.
 | |
|  */
 | |
| 
 | |
| extern int xmlSubstituteEntitiesDefaultValue;
 | |
| 
 | |
| 
 | |
| #include "entities.h"
 | |
| #include "xml-error.h"
 | |
| 
 | |
| /**
 | |
|  * Input functions
 | |
|  */
 | |
| 
 | |
| int		xmlParserInputRead	(xmlParserInputPtr in,
 | |
| 					 int len);
 | |
| int		xmlParserInputGrow	(xmlParserInputPtr in,
 | |
| 					 int len);
 | |
| 
 | |
| /**
 | |
|  * xmlChar handling
 | |
|  */
 | |
| xmlChar *	xmlStrdup		(const xmlChar *cur);
 | |
| xmlChar *	xmlStrndup		(const xmlChar *cur,
 | |
| 					 int len);
 | |
| xmlChar *	xmlStrsub		(const xmlChar *str,
 | |
| 					 int start,
 | |
| 					 int len);
 | |
| const xmlChar *	xmlStrchr		(const xmlChar *str,
 | |
| 					 xmlChar val);
 | |
| const xmlChar *	xmlStrstr		(const xmlChar *str,
 | |
| 					 xmlChar *val);
 | |
| int		xmlStrcmp		(const xmlChar *str1,
 | |
| 					 const xmlChar *str2);
 | |
| int		xmlStrncmp		(const xmlChar *str1,
 | |
| 					 const xmlChar *str2,
 | |
| 					 int len);
 | |
| int		xmlStrlen		(const xmlChar *str);
 | |
| xmlChar *	xmlStrcat		(xmlChar *cur,
 | |
| 					 const xmlChar *add);
 | |
| xmlChar *	xmlStrncat		(xmlChar *cur,
 | |
| 					 const xmlChar *add,
 | |
| 					 int len);
 | |
| 
 | |
| /**
 | |
|  * Basic parsing Interfaces
 | |
|  */
 | |
| xmlDocPtr	xmlParseDoc		(xmlChar *cur);
 | |
| xmlDocPtr	xmlParseMemory		(char *buffer,
 | |
| 					 int size);
 | |
| xmlDocPtr	xmlParseFile		(const char *filename);
 | |
| int		xmlSubstituteEntitiesDefault(int val);
 | |
| 
 | |
| /**
 | |
|  * Recovery mode 
 | |
|  */
 | |
| xmlDocPtr	xmlRecoverDoc		(xmlChar *cur);
 | |
| xmlDocPtr	xmlRecoverMemory	(char *buffer,
 | |
| 					 int size);
 | |
| xmlDocPtr	xmlRecoverFile		(const char *filename);
 | |
| 
 | |
| /**
 | |
|  * Less common routines and SAX interfaces
 | |
|  */
 | |
| int		xmlParseDocument	(xmlParserCtxtPtr ctxt);
 | |
| xmlDocPtr	xmlSAXParseDoc		(xmlSAXHandlerPtr sax,
 | |
| 					 xmlChar *cur,
 | |
| 					 int recovery);
 | |
| int		xmlSAXUserParseFile	(xmlSAXHandlerPtr sax,
 | |
| 					 void *user_data,
 | |
| 					 const char *filename);
 | |
| int		xmlSAXUserParseMemory	(xmlSAXHandlerPtr sax,
 | |
| 					 void *user_data,
 | |
| 					 char *buffer,
 | |
| 					 int size);
 | |
| xmlDocPtr	xmlSAXParseMemory	(xmlSAXHandlerPtr sax,
 | |
| 					 char *buffer,
 | |
|                                    	 int size,
 | |
| 					 int recovery);
 | |
| xmlDocPtr	xmlSAXParseFile		(xmlSAXHandlerPtr sax,
 | |
| 					 const char *filename,
 | |
| 					 int recovery);
 | |
| xmlDtdPtr	xmlParseDTD		(const xmlChar *ExternalID,
 | |
| 					 const xmlChar *SystemID);
 | |
| xmlDtdPtr	xmlSAXParseDTD		(xmlSAXHandlerPtr sax,
 | |
| 					 const xmlChar *ExternalID,
 | |
| 					 const xmlChar *SystemID);
 | |
| void		xmlInitParserCtxt	(xmlParserCtxtPtr ctxt);
 | |
| void		xmlClearParserCtxt	(xmlParserCtxtPtr ctxt);
 | |
| void		xmlSetupParserForBuffer	(xmlParserCtxtPtr ctxt,
 | |
| 					 const xmlChar* buffer,
 | |
| 					 const char* filename);
 | |
| void		xmlDefaultSAXHandlerInit(void);
 | |
| void		htmlDefaultSAXHandlerInit(void);
 | |
| 
 | |
| /**
 | |
|  * Node infos
 | |
|  */
 | |
| const xmlParserNodeInfo*
 | |
| 		xmlParserFindNodeInfo	(const xmlParserCtxt* ctxt,
 | |
|                                                const xmlNode* node);
 | |
| void		xmlInitNodeInfoSeq	(xmlParserNodeInfoSeqPtr seq);
 | |
| void		xmlClearNodeInfoSeq	(xmlParserNodeInfoSeqPtr seq);
 | |
| unsigned long xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeq* seq,
 | |
|                                          const xmlNode* node);
 | |
| void		xmlParserAddNodeInfo	(xmlParserCtxtPtr ctxt,
 | |
| 					 const xmlParserNodeInfo* info);
 | |
| 
 | |
| /*
 | |
|  * External entities handling actually implemented in xmlIO
 | |
|  */
 | |
| 
 | |
| void		xmlSetExternalEntityLoader(xmlExternalEntityLoader f);
 | |
| xmlExternalEntityLoader
 | |
| 		xmlGetExternalEntityLoader(void);
 | |
| xmlParserInputPtr
 | |
| 		xmlLoadExternalEntity	(const char *URL,
 | |
| 					 const char *ID,
 | |
| 					 xmlParserInputPtr context);
 | |
| #ifdef __cplusplus
 | |
| }
 | |
| #endif
 | |
| 
 | |
| #endif /* __XML_PARSER_H__ */
 | |
| 
 |