|   |   |   |   | libxml2 Reference Manual | 
|---|
xmlregexp - regular expressions handling
basic API for libxml regular expressions handling used for XML Schemas and validation.
Author(s): Daniel Veillard
typedef struct _xmlExpCtxt xmlExpCtxt; typedef xmlExpCtxt * xmlExpCtxtPtr; typedef struct _xmlExpNode xmlExpNode; typedef xmlExpNode * xmlExpNodePtr; typedef enum xmlExpNodeType; typedef struct _xmlRegExecCtxt xmlRegExecCtxt; typedef xmlRegExecCtxt * xmlRegExecCtxtPtr; typedef struct _xmlRegexp xmlRegexp; typedef xmlRegexp * xmlRegexpPtr; int xmlExpCtxtNbCons (xmlExpCtxtPtr ctxt); int xmlExpCtxtNbNodes (xmlExpCtxtPtr ctxt); void xmlExpDump (xmlBufferPtr buf,
xmlExpNodePtr expr); xmlExpNodePtr xmlExpExpDerive (xmlExpCtxtPtr ctxt,
xmlExpNodePtr exp,
xmlExpNodePtr sub); void xmlExpFree (xmlExpCtxtPtr ctxt,
xmlExpNodePtr exp); void xmlExpFreeCtxt (xmlExpCtxtPtr ctxt); int xmlExpGetLanguage (xmlExpCtxtPtr ctxt,
xmlExpNodePtr exp,
const xmlChar ** langList,
int len); int xmlExpGetStart (xmlExpCtxtPtr ctxt,
xmlExpNodePtr exp,
const xmlChar ** tokList,
int len); int xmlExpIsNillable (xmlExpNodePtr exp); int xmlExpMaxToken (xmlExpNodePtr expr); xmlExpNodePtr xmlExpNewAtom (xmlExpCtxtPtr ctxt,
const xmlChar * name,
int len); xmlExpCtxtPtr xmlExpNewCtxt (int maxNodes,
xmlDictPtr dict); xmlExpNodePtr xmlExpNewOr (xmlExpCtxtPtr ctxt,
xmlExpNodePtr left,
xmlExpNodePtr right); xmlExpNodePtr xmlExpNewRange (xmlExpCtxtPtr ctxt,
xmlExpNodePtr subset,
int min,
int max); xmlExpNodePtr xmlExpNewSeq (xmlExpCtxtPtr ctxt,
xmlExpNodePtr left,
xmlExpNodePtr right); xmlExpNodePtr xmlExpParse (xmlExpCtxtPtr ctxt,
const char * expr); void xmlExpRef (xmlExpNodePtr exp); xmlExpNodePtr xmlExpStringDerive (xmlExpCtxtPtr ctxt,
xmlExpNodePtr exp,
const xmlChar * str,
int len); int xmlExpSubsume (xmlExpCtxtPtr ctxt,
xmlExpNodePtr exp,
xmlExpNodePtr sub); typedef void xmlRegExecCallbacks (xmlRegExecCtxtPtr exec,
const xmlChar * token,
void * transdata,
void * inputdata); int xmlRegExecErrInfo (xmlRegExecCtxtPtr exec,
const xmlChar ** string,
int * nbval,
int * nbneg,
xmlChar ** values,
int * terminal); int xmlRegExecNextValues (xmlRegExecCtxtPtr exec,
int * nbval,
int * nbneg,
xmlChar ** values,
int * terminal); int xmlRegExecPushString (xmlRegExecCtxtPtr exec,
const xmlChar * value,
void * data); int xmlRegExecPushString2 (xmlRegExecCtxtPtr exec,
const xmlChar * value,
const xmlChar * value2,
void * data); void xmlRegFreeExecCtxt (xmlRegExecCtxtPtr exec); void xmlRegFreeRegexp (xmlRegexpPtr regexp); xmlRegExecCtxtPtr xmlRegNewExecCtxt (xmlRegexpPtr comp,
xmlRegExecCallbacks callback,
void * data); xmlRegexpPtr xmlRegexpCompile (const xmlChar * regexp); int xmlRegexpExec (xmlRegexpPtr comp,
const xmlChar * content); int xmlRegexpIsDeterminist (xmlRegexpPtr comp); void xmlRegexpPrint (FILE * output,
xmlRegexpPtr regexp);
struct _xmlExpCtxt {
The content of this structure is not made public by the API.
} xmlExpCtxt;
xmlExpCtxt * xmlExpCtxtPtr;
struct _xmlExpNode {
The content of this structure is not made public by the API.
} xmlExpNode;
xmlExpNode * xmlExpNodePtr;
enum xmlExpNodeType { XML_EXP_EMPTY = 0 XML_EXP_FORBID = 1 XML_EXP_ATOM = 2 XML_EXP_SEQ = 3 XML_EXP_OR = 4 XML_EXP_COUNT = 5 };
struct _xmlRegExecCtxt {
The content of this structure is not made public by the API.
} xmlRegExecCtxt;
xmlRegExecCtxt * xmlRegExecCtxtPtr;
A libxml progressive regular expression evaluation context
struct _xmlRegexp {
The content of this structure is not made public by the API.
} xmlRegexp;
xmlRegexp * xmlRegexpPtr;
A libxml regular expression, they can actually be far more complex thank the POSIX regex expressions.
void xmlRegExecCallbacks (xmlRegExecCtxtPtr exec,
const xmlChar * token,
void * transdata,
void * inputdata)
Callback function when doing a transition in the automata
| exec: | the regular expression context | 
| token: | the current token string | 
| transdata: | transition data | 
| inputdata: | input data | 
xmlExpNodePtr emptyExp;
xmlExpNodePtr forbiddenExp;
int xmlExpCtxtNbCons (xmlExpCtxtPtr ctxt)
Debugging facility provides the number of allocated nodes over lifetime
| ctxt: | an expression context | 
| Returns: | the number of nodes ever allocated or -1 in case of error | 
int xmlExpCtxtNbNodes (xmlExpCtxtPtr ctxt)
Debugging facility provides the number of allocated nodes at a that point
| ctxt: | an expression context | 
| Returns: | the number of nodes in use or -1 in case of error | 
void xmlExpDump (xmlBufferPtr buf,
xmlExpNodePtr expr)
Serialize the expression as compiled to the buffer
| buf: | a buffer to receive the output | 
| expr: | the compiled expression | 
xmlExpNodePtr xmlExpExpDerive (xmlExpCtxtPtr ctxt,
xmlExpNodePtr exp,
xmlExpNodePtr sub)
Evaluates the expression resulting from @exp consuming a sub expression @sub Based on algebraic derivation and sometimes direct Brzozowski derivation it usually takes less than linear time and can handle expressions generating infinite languages.
| ctxt: | the expressions context | 
| exp: | the englobing expression | 
| sub: | the subexpression | 
| Returns: | the resulting expression or NULL in case of internal error, the result must be freed | 
void xmlExpFree (xmlExpCtxtPtr ctxt,
xmlExpNodePtr exp)
Dereference the expression
| ctxt: | the expression context | 
| exp: | the expression | 
void xmlExpFreeCtxt (xmlExpCtxtPtr ctxt)
Free an expression context
| ctxt: | an expression context | 
int xmlExpGetLanguage (xmlExpCtxtPtr ctxt,
xmlExpNodePtr exp,
const xmlChar ** langList,
int len)
Find all the strings used in @exp and store them in @list
| ctxt: | the expression context | 
| exp: | the expression | 
| langList: | where to store the tokens | 
| len: | the allocated length of @list | 
| Returns: | the number of unique strings found, -1 in case of errors and -2 if there is more than @len strings | 
int xmlExpGetStart (xmlExpCtxtPtr ctxt,
xmlExpNodePtr exp,
const xmlChar ** tokList,
int len)
Find all the strings that appears at the start of the languages accepted by @exp and store them in @list. E.g. for (a, b) | c it will return the list [a, c]
| ctxt: | the expression context | 
| exp: | the expression | 
| tokList: | where to store the tokens | 
| len: | the allocated length of @list | 
| Returns: | the number of unique strings found, -1 in case of errors and -2 if there is more than @len strings | 
int xmlExpIsNillable (xmlExpNodePtr exp)
Finds if the expression is nillable, i.e. if it accepts the empty sequence
| exp: | the expression | 
| Returns: | 1 if nillable, 0 if not and -1 in case of error | 
int xmlExpMaxToken (xmlExpNodePtr expr)
Indicate the maximum number of input a expression can accept
| expr: | a compiled expression | 
| Returns: | the maximum length or -1 in case of error | 
xmlExpNodePtr xmlExpNewAtom (xmlExpCtxtPtr ctxt,
const xmlChar * name,
int len)
Get the atom associated to this name from that context
| ctxt: | the expression context | 
| name: | the atom name | 
| len: | the atom name length in byte (or -1); | 
| Returns: | the node or NULL in case of error | 
xmlExpCtxtPtr xmlExpNewCtxt (int maxNodes,
xmlDictPtr dict)
Creates a new context for manipulating expressions
| maxNodes: | the maximum number of nodes | 
| dict: | optional dictionary to use internally | 
| Returns: | the context or NULL in case of error | 
xmlExpNodePtr xmlExpNewOr (xmlExpCtxtPtr ctxt,
xmlExpNodePtr left,
xmlExpNodePtr right)
Get the atom associated to the choice @left | @right Note that @left and @right are consumed in the operation, to keep an handle on them use xmlExpRef() and use xmlExpFree() to release them, this is true even in case of failure (unless ctxt == NULL).
| ctxt: | the expression context | 
| left: | left expression | 
| right: | right expression | 
| Returns: | the node or NULL in case of error | 
xmlExpNodePtr xmlExpNewRange (xmlExpCtxtPtr ctxt,
xmlExpNodePtr subset,
int min,
int max)
Get the atom associated to the range (@subset){@min, @max} Note that @subset is consumed in the operation, to keep an handle on it use xmlExpRef() and use xmlExpFree() to release it, this is true even in case of failure (unless ctxt == NULL).
| ctxt: | the expression context | 
| subset: | the expression to be repeated | 
| min: | the lower bound for the repetition | 
| max: | the upper bound for the repetition, -1 means infinite | 
| Returns: | the node or NULL in case of error | 
xmlExpNodePtr xmlExpNewSeq (xmlExpCtxtPtr ctxt,
xmlExpNodePtr left,
xmlExpNodePtr right)
Get the atom associated to the sequence @left , @right Note that @left and @right are consumed in the operation, to keep an handle on them use xmlExpRef() and use xmlExpFree() to release them, this is true even in case of failure (unless ctxt == NULL).
| ctxt: | the expression context | 
| left: | left expression | 
| right: | right expression | 
| Returns: | the node or NULL in case of error | 
xmlExpNodePtr xmlExpParse (xmlExpCtxtPtr ctxt,
const char * expr)
Minimal parser for regexps, it understand the following constructs - string terminals - choice operator | - sequence operator , - subexpressions (...) - usual cardinality operators + * and ? - finite sequences { min, max } - infinite sequences { min, * } There is minimal checkings made especially no checking on strings values
| ctxt: | the expressions context | 
| expr: | the 0 terminated string | 
| Returns: | a new expression or NULL in case of failure | 
void xmlExpRef (xmlExpNodePtr exp)
Increase the reference count of the expression
| exp: | the expression | 
xmlExpNodePtr xmlExpStringDerive (xmlExpCtxtPtr ctxt,
xmlExpNodePtr exp,
const xmlChar * str,
int len)
Do one step of Brzozowski derivation of the expression @exp with respect to the input string
| ctxt: | the expression context | 
| exp: | the expression | 
| str: | the string | 
| len: | the string len in bytes if available | 
| Returns: | the resulting expression or NULL in case of internal error | 
int xmlExpSubsume (xmlExpCtxtPtr ctxt,
xmlExpNodePtr exp,
xmlExpNodePtr sub)
Check whether @exp accepts all the languages accepted by @sub the input being a subexpression.
| ctxt: | the expressions context | 
| exp: | the englobing expression | 
| sub: | the subexpression | 
| Returns: | 1 if true 0 if false and -1 in case of failure. | 
int xmlRegExecErrInfo (xmlRegExecCtxtPtr exec,
const xmlChar ** string,
int * nbval,
int * nbneg,
xmlChar ** values,
int * terminal)
Extract error information from the regexp execution, the parameter @string will be updated with the value pushed and not accepted, the parameter @values must point to an array of @nbval string pointers on return nbval will contain the number of possible strings in that state and the @values array will be updated with them. The string values
| exec: | a regexp execution context generating an error | 
| string: | return value for the error string | 
| nbval: | pointer to the number of accepted values IN/OUT | 
| nbneg: | return number of negative transitions | 
| values: | pointer to the array of acceptable values | 
| terminal: | return value if this was a terminal state | 
| Returns: | will be freed with the @exec context and don't need to be deallocated. Returns: 0 in case of success or -1 in case of error. | 
int xmlRegExecNextValues (xmlRegExecCtxtPtr exec,
int * nbval,
int * nbneg,
xmlChar ** values,
int * terminal)
Extract information from the regexp execution, the parameter @values must point to an array of @nbval string pointers on return nbval will contain the number of possible strings in that state and the @values array will be updated with them. The string values
| exec: | a regexp execution context | 
| nbval: | pointer to the number of accepted values IN/OUT | 
| nbneg: | return number of negative transitions | 
| values: | pointer to the array of acceptable values | 
| terminal: | return value if this was a terminal state | 
| Returns: | will be freed with the @exec context and don't need to be deallocated. Returns: 0 in case of success or -1 in case of error. | 
int xmlRegExecPushString (xmlRegExecCtxtPtr exec,
const xmlChar * value,
void * data)
Push one input token in the execution context
| exec: | a regexp execution context or NULL to indicate the end | 
| value: | a string token input | 
| data: | data associated to the token to reuse in callbacks | 
| Returns: | 1 if the regexp reached a final state, 0 if non-final, and a negative value in case of error. | 
int xmlRegExecPushString2 (xmlRegExecCtxtPtr exec,
const xmlChar * value,
const xmlChar * value2,
void * data)
Push one input token in the execution context
| exec: | a regexp execution context or NULL to indicate the end | 
| value: | the first string token input | 
| value2: | the second string token input | 
| data: | data associated to the token to reuse in callbacks | 
| Returns: | 1 if the regexp reached a final state, 0 if non-final, and a negative value in case of error. | 
void xmlRegFreeExecCtxt (xmlRegExecCtxtPtr exec)
Free the structures associated to a regular expression evaluation context.
| exec: | a regular expression evaluation context | 
xmlRegExecCtxtPtr xmlRegNewExecCtxt (xmlRegexpPtr comp,
xmlRegExecCallbacks callback,
void * data)
Build a context used for progressive evaluation of a regexp.
| comp: | a precompiled regular expression | 
| callback: | a callback function used for handling progresses in the automata matching phase | 
| data: | the context data associated to the callback in this context | 
| Returns: | the new context | 
xmlRegexpPtr xmlRegexpCompile (const xmlChar * regexp)
Parses a regular expression conforming to XML Schemas Part 2 Datatype Appendix F and builds an automata suitable for testing strings against that regular expression
| regexp: | a regular expression string | 
| Returns: | the compiled expression or NULL in case of error | 
int xmlRegexpExec (xmlRegexpPtr comp,
const xmlChar * content)
Check if the regular expression generates the value
| comp: | the compiled regular expression | 
| content: | the value to check against the regular expression | 
| Returns: | 1 if it matches, 0 if not and a negative value in case of error | 
int xmlRegexpIsDeterminist (xmlRegexpPtr comp)
Check if the regular expression is determinist
| comp: | the compiled regular expression | 
| Returns: | 1 if it yes, 0 if not and a negative value in case of error | 
void xmlRegexpPrint (FILE * output,
xmlRegexpPtr regexp)
Print the content of the compiled regular expression
| output: | the file for the output debug | 
| regexp: | the compiled regexp |