1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-10-26 00:37:43 +03:00

doc: Improve regexp documentation

This commit is contained in:
Nick Wellnhofer
2025-05-16 21:13:17 +02:00
parent cbad60ff81
commit 954aae907d
2 changed files with 36 additions and 14 deletions

View File

@@ -1,10 +1,10 @@
/** /**
* @file * @file
* *
* @brief regular expressions handling * @brief Regular expressions
* *
* basic API for libxml regular expressions handling used * A regular expression engine used for DTD and XML Schema
* for XML Schemas and validation. * validation.
* *
* @copyright See Copyright for the status of this software. * @copyright See Copyright for the status of this software.
* *
@@ -25,8 +25,7 @@ extern "C" {
#endif #endif
/** /**
* A libxml regular expression, they can actually be far more complex * A libxml regular expression
* thank the POSIX regex expressions.
*/ */
typedef struct _xmlRegexp xmlRegexp; typedef struct _xmlRegexp xmlRegexp;
typedef xmlRegexp *xmlRegexpPtr; typedef xmlRegexp *xmlRegexpPtr;

View File

@@ -3596,6 +3596,8 @@ error:
/** /**
* Build a context used for progressive evaluation of a regexp. * Build a context used for progressive evaluation of a regexp.
* *
* @deprecated Internal function, don't use.
*
* @param comp a precompiled regular expression * @param comp a precompiled regular expression
* @param callback a callback function used for handling progresses in the * @param callback a callback function used for handling progresses in the
* automata matching phase * automata matching phase
@@ -3657,6 +3659,8 @@ xmlRegNewExecCtxt(xmlRegexp *comp, xmlRegExecCallbacks callback, void *data) {
/** /**
* Free the structures associated to a regular expression evaluation context. * Free the structures associated to a regular expression evaluation context.
* *
* @deprecated Internal function, don't use.
*
* @param exec a regular expression evaluation context * @param exec a regular expression evaluation context
*/ */
void void
@@ -4173,6 +4177,8 @@ progress:
/** /**
* Push one input token in the execution context * Push one input token in the execution context
* *
* @deprecated Internal function, don't use.
*
* @param exec a regexp execution context or NULL to indicate the end * @param exec a regexp execution context or NULL to indicate the end
* @param value a string token input * @param value a string token input
* @param data data associated to the token to reuse in callbacks * @param data data associated to the token to reuse in callbacks
@@ -4188,6 +4194,8 @@ xmlRegExecPushString(xmlRegExecCtxt *exec, const xmlChar *value,
/** /**
* Push one input token in the execution context * Push one input token in the execution context
* *
* @deprecated Internal function, don't use.
*
* @param exec a regexp execution context or NULL to indicate the end * @param exec a regexp execution context or NULL to indicate the end
* @param value the first string token input * @param value the first string token input
* @param value2 the second string token input * @param value2 the second string token input
@@ -4240,7 +4248,7 @@ xmlRegExecPushString2(xmlRegExecCtxt *exec, const xmlChar *value,
} }
/** /**
* Extract information from the regexp execution, internal routine to * Extract information from the regexp execution. Internal routine to
* implement xmlRegExecNextValues() and xmlRegExecErrInfo() * implement xmlRegExecNextValues() and xmlRegExecErrInfo()
* *
* @param exec a regexp execution context * @param exec a regexp execution context
@@ -4396,13 +4404,15 @@ xmlRegExecGetValues(xmlRegExecCtxtPtr exec, int err,
} }
/** /**
* Extract information from the regexp execution, * Extract information from the regexp execution.
* the parameter `values` must point to an array of `nbval` string pointers * The parameter `values` must point to an array of `nbval` string pointers
* on return nbval will contain the number of possible strings in that * on return nbval will contain the number of possible strings in that
* state and the `values` array will be updated with them. The string values * state and the `values` array will be updated with them. The string values
* returned will be freed with the `exec` context and don't need to be * returned will be freed with the `exec` context and don't need to be
* deallocated. * deallocated.
* *
* @deprecated Internal function, don't use.
*
* @param exec a regexp execution context * @param exec a regexp execution context
* @param nbval pointer to the number of accepted values IN/OUT * @param nbval pointer to the number of accepted values IN/OUT
* @param nbneg return number of negative transitions * @param nbneg return number of negative transitions
@@ -4417,7 +4427,7 @@ xmlRegExecNextValues(xmlRegExecCtxt *exec, int *nbval, int *nbneg,
} }
/** /**
* Extract error information from the regexp execution, the parameter * Extract error information from the regexp execution. The parameter
* `string` will be updated with the value pushed and not accepted, * `string` will be updated with the value pushed and not accepted,
* the parameter `values` must point to an array of `nbval` string pointers * the parameter `values` must point to an array of `nbval` string pointers
* on return nbval will contain the number of possible strings in that * on return nbval will contain the number of possible strings in that
@@ -4425,6 +4435,8 @@ xmlRegExecNextValues(xmlRegExecCtxt *exec, int *nbval, int *nbneg,
* returned will be freed with the `exec` context and don't need to be * returned will be freed with the `exec` context and don't need to be
* deallocated. * deallocated.
* *
* @deprecated Internal function, don't use.
*
* @param exec a regexp execution context generating an error * @param exec a regexp execution context generating an error
* @param string return value for the error string * @param string return value for the error string
* @param nbval pointer to the number of accepted values IN/OUT * @param nbval pointer to the number of accepted values IN/OUT
@@ -5394,9 +5406,11 @@ xmlRegexpPrint(FILE *output ATTRIBUTE_UNUSED,
} }
/** /**
* Parses an XML Schemas regular expression.
*
* Parses a regular expression conforming to XML Schemas Part 2 Datatype * Parses a regular expression conforming to XML Schemas Part 2 Datatype
* Appendix F and builds an automata suitable for testing strings against * Appendix F and builds an automata suitable for testing strings against
* that regular expression * that regular expression.
* *
* @param regexp a regular expression string * @param regexp a regular expression string
* @returns the compiled expression or NULL in case of error * @returns the compiled expression or NULL in case of error
@@ -5445,7 +5459,7 @@ error:
} }
/** /**
* Check if the regular expression generates the value * Check if the regular expression matches a string.
* *
* @param comp the compiled regular expression * @param comp the compiled regular expression
* @param content the value to check against the regular expression * @param content the value to check against the regular expression
@@ -5459,10 +5473,19 @@ xmlRegexpExec(xmlRegexp *comp, const xmlChar *content) {
} }
/** /**
* Check if the regular expression is determinist * Check if the regular expression is deterministic.
*
* DTD and XML Schemas require a deterministic content model,
* so the automaton compiled from the regex must be a DFA.
*
* The runtime of this function is quadratic in the number of
* outgoing edges, causing serious worst-case performance issues.
*
* @deprecated: Internal function, don't use.
* *
* @param comp the compiled regular expression * @param comp the compiled regular expression
* @returns 1 if it yes, 0 if not and a negative value in case of error * @returns 1 if it yes, 0 if not and a negative value in case
* of error
*/ */
int int
xmlRegexpIsDeterminist(xmlRegexp *comp) { xmlRegexpIsDeterminist(xmlRegexp *comp) {
@@ -5499,7 +5522,7 @@ xmlRegexpIsDeterminist(xmlRegexp *comp) {
} }
/** /**
* Free a regexp * Free a regexp.
* *
* @param regexp the regexp * @param regexp the regexp
*/ */