mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-07-28 00:21:53 +03:00
made configuring with regexps/automata/unicode the default but without
* Makefile.am configure.in include/libxml/xmlversion.h.in: made configuring with regexps/automata/unicode the default but without schemas ATM * testRegexp.c valid.c xmlregexp.c include/libxml/xmlregexp.h: fixed the regexp based DTD validation performance and memory problem by switching to a compact form for determinist regexps and detecting the determinism property in the process. Seems as fast as the old DTD validation specific engine :-) despite the regexp built and compaction process. Daniel
This commit is contained in:
12
ChangeLog
12
ChangeLog
@ -1,3 +1,15 @@
|
|||||||
|
Thu Sep 19 21:46:53 CEST 2002 Daniel Veillard <daniel@veillard.com>
|
||||||
|
|
||||||
|
* Makefile.am configure.in include/libxml/xmlversion.h.in:
|
||||||
|
made configuring with regexps/automata/unicode the default
|
||||||
|
but without schemas ATM
|
||||||
|
* testRegexp.c valid.c xmlregexp.c include/libxml/xmlregexp.h:
|
||||||
|
fixed the regexp based DTD validation performance and memory
|
||||||
|
problem by switching to a compact form for determinist regexps
|
||||||
|
and detecting the determinism property in the process. Seems
|
||||||
|
as fast as the old DTD validation specific engine :-) despite
|
||||||
|
the regexp built and compaction process.
|
||||||
|
|
||||||
Wed Sep 18 18:27:26 CEST 2002 Daniel Veillard <daniel@veillard.com>
|
Wed Sep 18 18:27:26 CEST 2002 Daniel Veillard <daniel@veillard.com>
|
||||||
|
|
||||||
* valid.c: determinism is debugged, new DTD checking code now works
|
* valid.c: determinism is debugged, new DTD checking code now works
|
||||||
|
@ -106,7 +106,7 @@ check-local: tests
|
|||||||
|
|
||||||
testall : tests SVGtests SAXtests
|
testall : tests SVGtests SAXtests
|
||||||
|
|
||||||
tests: XMLtests XMLenttests HTMLtests Validtests URItests XPathtests XPtrtests XIncludetests C14Ntests Scripttests Catatests @TEST_SCHEMAS@ @TEST_THREADS@
|
tests: XMLtests XMLenttests HTMLtests Validtests URItests XPathtests XPtrtests XIncludetests C14Ntests Scripttests Catatests @TEST_REGEXPS@ @TEST_SCHEMAS@ @TEST_THREADS@
|
||||||
@(cd python ; $(MAKE) tests)
|
@(cd python ; $(MAKE) tests)
|
||||||
|
|
||||||
valgrind:
|
valgrind:
|
||||||
|
15
configure.in
15
configure.in
@ -525,7 +525,8 @@ AC_ARG_WITH(schemas, [ --with-schemas Add experimental Schemas sup
|
|||||||
if test "$with_schemas" = "yes" ; then
|
if test "$with_schemas" = "yes" ; then
|
||||||
echo Enabling Schemas support
|
echo Enabling Schemas support
|
||||||
WITH_SCHEMAS=1
|
WITH_SCHEMAS=1
|
||||||
TEST_SCHEMAS="Regexptests Automatatests Schemastests"
|
TEST_SCHEMAS="Schemastests"
|
||||||
|
with_regexps=yes
|
||||||
else
|
else
|
||||||
WITH_SCHEMAS=0
|
WITH_SCHEMAS=0
|
||||||
TEST_SCHEMAS=
|
TEST_SCHEMAS=
|
||||||
@ -533,6 +534,18 @@ fi
|
|||||||
AC_SUBST(WITH_SCHEMAS)
|
AC_SUBST(WITH_SCHEMAS)
|
||||||
AC_SUBST(TEST_SCHEMAS)
|
AC_SUBST(TEST_SCHEMAS)
|
||||||
|
|
||||||
|
AC_ARG_WITH(regexps, [ --with-regexps Add Regular Expressions support (on)])
|
||||||
|
if test "$with_regexps" = "no" ; then
|
||||||
|
echo Disabling Regexps support
|
||||||
|
WITH_REGEXPS=0
|
||||||
|
TEST_REGEXPS=
|
||||||
|
else
|
||||||
|
WITH_REGEXPS=1
|
||||||
|
TEST_REGEXPS="Regexptests Automatatests"
|
||||||
|
fi
|
||||||
|
AC_SUBST(WITH_REGEXPS)
|
||||||
|
AC_SUBST(TEST_REGEXPS)
|
||||||
|
|
||||||
AC_ARG_WITH(debug, [ --with-debug Add the debugging module (on)])
|
AC_ARG_WITH(debug, [ --with-debug Add the debugging module (on)])
|
||||||
if test "$with_debug" = "no" ; then
|
if test "$with_debug" = "no" ; then
|
||||||
echo Disabling DEBUG support
|
echo Disabling DEBUG support
|
||||||
|
@ -55,6 +55,7 @@ int xmlRegexpExec (xmlRegexpPtr comp,
|
|||||||
const xmlChar *value);
|
const xmlChar *value);
|
||||||
void xmlRegexpPrint (FILE *output,
|
void xmlRegexpPrint (FILE *output,
|
||||||
xmlRegexpPtr regexp);
|
xmlRegexpPtr regexp);
|
||||||
|
int xmlRegexpIsDeterminist(xmlRegexpPtr comp);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Callback function when doing a transition in the automata
|
* Callback function when doing a transition in the automata
|
||||||
|
@ -194,7 +194,7 @@ extern void xmlCheckVersion(int version);
|
|||||||
*
|
*
|
||||||
* Whether the Unicode related interfaces are compiled in
|
* Whether the Unicode related interfaces are compiled in
|
||||||
*/
|
*/
|
||||||
#if @WITH_SCHEMAS@
|
#if @WITH_REGEXPS@
|
||||||
#define LIBXML_UNICODE_ENABLED
|
#define LIBXML_UNICODE_ENABLED
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -203,7 +203,7 @@ extern void xmlCheckVersion(int version);
|
|||||||
*
|
*
|
||||||
* Whether the regular expressions interfaces are compiled in
|
* Whether the regular expressions interfaces are compiled in
|
||||||
*/
|
*/
|
||||||
#if @WITH_SCHEMAS@
|
#if @WITH_REGEXPS@
|
||||||
#define LIBXML_REGEXP_ENABLED
|
#define LIBXML_REGEXP_ENABLED
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -212,7 +212,7 @@ extern void xmlCheckVersion(int version);
|
|||||||
*
|
*
|
||||||
* Whether the automata interfaces are compiled in
|
* Whether the automata interfaces are compiled in
|
||||||
*/
|
*/
|
||||||
#if @WITH_SCHEMAS@
|
#if @WITH_REGEXPS@
|
||||||
#define LIBXML_AUTOMATA_ENABLED
|
#define LIBXML_AUTOMATA_ENABLED
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -140,11 +140,12 @@ int main(int argc, char **argv) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
xmlMemoryDump();
|
||||||
if (comp != NULL)
|
if (comp != NULL)
|
||||||
xmlRegFreeRegexp(comp);
|
xmlRegFreeRegexp(comp);
|
||||||
}
|
}
|
||||||
xmlCleanupParser();
|
xmlCleanupParser();
|
||||||
xmlMemoryDump();
|
/* xmlMemoryDump(); */
|
||||||
return(0);
|
return(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
11
valid.c
11
valid.c
@ -553,7 +553,7 @@ xmlValidBuildContentModel(xmlValidCtxtPtr ctxt, xmlElementPtr elem) {
|
|||||||
xmlValidBuildAContentModel(elem->content, ctxt, elem->name);
|
xmlValidBuildAContentModel(elem->content, ctxt, elem->name);
|
||||||
xmlAutomataSetFinalState(ctxt->am, ctxt->state);
|
xmlAutomataSetFinalState(ctxt->am, ctxt->state);
|
||||||
elem->contModel = xmlAutomataCompile(ctxt->am);
|
elem->contModel = xmlAutomataCompile(ctxt->am);
|
||||||
if (!xmlAutomataIsDeterminist(ctxt->am)) {
|
if (!xmlRegexpIsDeterminist(elem->contModel)) {
|
||||||
char expr[5000];
|
char expr[5000];
|
||||||
expr[0] = 0;
|
expr[0] = 0;
|
||||||
xmlSnprintfElementContent(expr, 5000, elem->content, 1);
|
xmlSnprintfElementContent(expr, 5000, elem->content, 1);
|
||||||
@ -3849,6 +3849,7 @@ xmlValidateSkipIgnorable(xmlNodePtr child) {
|
|||||||
return(child);
|
return(child);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifndef LIBXML_REGEXP_ENABLED
|
||||||
/**
|
/**
|
||||||
* xmlValidateElementType:
|
* xmlValidateElementType:
|
||||||
* @ctxt: the validation context
|
* @ctxt: the validation context
|
||||||
@ -4217,6 +4218,7 @@ analyze:
|
|||||||
}
|
}
|
||||||
return(determinist);
|
return(determinist);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* xmlSnprintfElements:
|
* xmlSnprintfElements:
|
||||||
@ -4319,7 +4321,10 @@ static int
|
|||||||
xmlValidateElementContent(xmlValidCtxtPtr ctxt, xmlNodePtr child,
|
xmlValidateElementContent(xmlValidCtxtPtr ctxt, xmlNodePtr child,
|
||||||
xmlElementPtr elemDecl, int warn, xmlNodePtr parent) {
|
xmlElementPtr elemDecl, int warn, xmlNodePtr parent) {
|
||||||
int ret = 1;
|
int ret = 1;
|
||||||
xmlNodePtr repl = NULL, last = NULL, cur, tmp;
|
#ifndef LIBXML_REGEXP_ENABLED
|
||||||
|
xmlNodePtr last = NULL;
|
||||||
|
#endif
|
||||||
|
xmlNodePtr repl = NULL, cur, tmp;
|
||||||
xmlElementContentPtr cont;
|
xmlElementContentPtr cont;
|
||||||
const xmlChar *name;
|
const xmlChar *name;
|
||||||
|
|
||||||
@ -4561,7 +4566,9 @@ fail:
|
|||||||
if (ret == -3)
|
if (ret == -3)
|
||||||
ret = 1;
|
ret = 1;
|
||||||
|
|
||||||
|
#ifndef LIBXML_REGEXP_ENABLED
|
||||||
done:
|
done:
|
||||||
|
#endif
|
||||||
/*
|
/*
|
||||||
* Deallocate the copy if done, and free up the validation stack
|
* Deallocate the copy if done, and free up the validation stack
|
||||||
*/
|
*/
|
||||||
|
348
xmlregexp.c
348
xmlregexp.c
@ -30,6 +30,7 @@
|
|||||||
/* #define DEBUG_REGEXP_GRAPH */
|
/* #define DEBUG_REGEXP_GRAPH */
|
||||||
/* #define DEBUG_REGEXP_EXEC */
|
/* #define DEBUG_REGEXP_EXEC */
|
||||||
/* #define DEBUG_PUSH */
|
/* #define DEBUG_PUSH */
|
||||||
|
/* #define DEBUG_COMPACTION */
|
||||||
|
|
||||||
#define ERROR(str) ctxt->error = 1; \
|
#define ERROR(str) ctxt->error = 1; \
|
||||||
xmlGenericError(xmlGenericErrorContext, "Regexp: %s: %s\n", str, ctxt->cur)
|
xmlGenericError(xmlGenericErrorContext, "Regexp: %s: %s\n", str, ctxt->cur)
|
||||||
@ -193,6 +194,7 @@ struct _xmlRegTrans {
|
|||||||
struct _xmlAutomataState {
|
struct _xmlAutomataState {
|
||||||
xmlRegStateType type;
|
xmlRegStateType type;
|
||||||
xmlRegMarkedType mark;
|
xmlRegMarkedType mark;
|
||||||
|
xmlRegMarkedType reached;
|
||||||
int no;
|
int no;
|
||||||
|
|
||||||
int maxTrans;
|
int maxTrans;
|
||||||
@ -240,6 +242,13 @@ struct _xmlRegexp {
|
|||||||
int nbCounters;
|
int nbCounters;
|
||||||
xmlRegCounter *counters;
|
xmlRegCounter *counters;
|
||||||
int determinist;
|
int determinist;
|
||||||
|
/*
|
||||||
|
* That's the compact form for determinists automatas
|
||||||
|
*/
|
||||||
|
int nbstates;
|
||||||
|
int *compact;
|
||||||
|
int nbstrings;
|
||||||
|
xmlChar **stringMap;
|
||||||
};
|
};
|
||||||
|
|
||||||
typedef struct _xmlRegExecRollback xmlRegExecRollback;
|
typedef struct _xmlRegExecRollback xmlRegExecRollback;
|
||||||
@ -299,6 +308,8 @@ struct _xmlRegExecCtxt {
|
|||||||
#define REGEXP_ALL_LAX_COUNTER 0x123457
|
#define REGEXP_ALL_LAX_COUNTER 0x123457
|
||||||
|
|
||||||
static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
|
static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
|
||||||
|
static void xmlRegFreeState(xmlRegStatePtr state);
|
||||||
|
static void xmlRegFreeAtom(xmlRegAtomPtr atom);
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
* *
|
* *
|
||||||
@ -306,6 +317,7 @@ static void xmlFAParseRegExp(xmlRegParserCtxtPtr ctxt, int top);
|
|||||||
* *
|
* *
|
||||||
************************************************************************/
|
************************************************************************/
|
||||||
|
|
||||||
|
static int xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt);
|
||||||
/**
|
/**
|
||||||
* xmlRegEpxFromParse:
|
* xmlRegEpxFromParse:
|
||||||
* @ctxt: the parser context used to build it
|
* @ctxt: the parser context used to build it
|
||||||
@ -337,6 +349,166 @@ xmlRegEpxFromParse(xmlRegParserCtxtPtr ctxt) {
|
|||||||
ret->counters = ctxt->counters;
|
ret->counters = ctxt->counters;
|
||||||
ctxt->counters = NULL;
|
ctxt->counters = NULL;
|
||||||
ret->determinist = ctxt->determinist;
|
ret->determinist = ctxt->determinist;
|
||||||
|
|
||||||
|
if ((ret->determinist != 0) &&
|
||||||
|
(ret->nbCounters == 0) &&
|
||||||
|
(ret->atoms[0] != NULL) &&
|
||||||
|
(ret->atoms[0]->type == XML_REGEXP_STRING)) {
|
||||||
|
int i, j, nbstates = 0, nbatoms = 0;
|
||||||
|
int *stateRemap;
|
||||||
|
int *stringRemap;
|
||||||
|
int *transitions;
|
||||||
|
xmlChar **stringMap;
|
||||||
|
xmlChar *value;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Switch to a compact representation
|
||||||
|
* 1/ counting the effective number of states left
|
||||||
|
* 2/ conting the unique number of atoms, and check that
|
||||||
|
* they are all of the string type
|
||||||
|
* 3/ build a table state x atom for the transitions
|
||||||
|
*/
|
||||||
|
|
||||||
|
stateRemap = xmlMalloc(ret->nbStates * sizeof(int));
|
||||||
|
for (i = 0;i < ret->nbStates;i++) {
|
||||||
|
if (ret->states[i] != NULL) {
|
||||||
|
stateRemap[i] = nbstates;
|
||||||
|
nbstates++;
|
||||||
|
} else {
|
||||||
|
stateRemap[i] = -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#ifdef DEBUG_COMPACTION
|
||||||
|
printf("Final: %d states\n", nbstates);
|
||||||
|
#endif
|
||||||
|
stringMap = xmlMalloc(ret->nbAtoms * sizeof(char *));
|
||||||
|
stringRemap = xmlMalloc(ret->nbAtoms * sizeof(int));
|
||||||
|
for (i = 0;i < ret->nbAtoms;i++) {
|
||||||
|
if ((ret->atoms[i]->type == XML_REGEXP_STRING) &&
|
||||||
|
(ret->atoms[i]->quant == XML_REGEXP_QUANT_ONCE)) {
|
||||||
|
value = ret->atoms[i]->valuep;
|
||||||
|
for (j = 0;j < nbatoms;j++) {
|
||||||
|
if (xmlStrEqual(stringMap[j], value)) {
|
||||||
|
stringRemap[i] = j;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (j >= nbatoms) {
|
||||||
|
stringRemap[i] = nbatoms;
|
||||||
|
stringMap[nbatoms] = xmlStrdup(value);
|
||||||
|
nbatoms++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
xmlFree(stateRemap);
|
||||||
|
xmlFree(stringRemap);
|
||||||
|
for (i = 0;i < nbatoms;i++)
|
||||||
|
xmlFree(stringMap[i]);
|
||||||
|
xmlFree(stringMap);
|
||||||
|
goto fail_compact;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#ifdef DEBUG_COMPACTION
|
||||||
|
printf("Final: %d atoms\n", nbatoms);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate the transition table. The first entry for each
|
||||||
|
* state correspond to the state type.
|
||||||
|
*/
|
||||||
|
transitions = (int *) xmlMalloc(nbstates * (nbatoms + 1) * sizeof(int));
|
||||||
|
memset(transitions, 0, nbstates * (nbatoms + 1) * sizeof(int));
|
||||||
|
|
||||||
|
for (i = 0;i < ret->nbStates;i++) {
|
||||||
|
int stateno, atomno, targetno, prev;
|
||||||
|
xmlRegStatePtr state;
|
||||||
|
xmlRegTransPtr trans;
|
||||||
|
|
||||||
|
stateno = stateRemap[i];
|
||||||
|
if (stateno == -1)
|
||||||
|
continue;
|
||||||
|
state = ret->states[i];
|
||||||
|
|
||||||
|
transitions[stateno * (nbatoms + 1)] = state->type;
|
||||||
|
|
||||||
|
for (j = 0;j < state->nbTrans;j++) {
|
||||||
|
trans = &(state->trans[j]);
|
||||||
|
if ((trans->to == -1) || (trans->atom == NULL))
|
||||||
|
continue;
|
||||||
|
atomno = stringRemap[trans->atom->no];
|
||||||
|
targetno = stateRemap[trans->to];
|
||||||
|
/*
|
||||||
|
* if the same atome can generate transition to 2 different
|
||||||
|
* states then it means the automata is not determinist and
|
||||||
|
* the compact form can't be used !
|
||||||
|
*/
|
||||||
|
prev = transitions[stateno * (nbatoms + 1) + atomno + 1];
|
||||||
|
if (prev != 0) {
|
||||||
|
if (prev != targetno + 1) {
|
||||||
|
printf("not determinist\n");
|
||||||
|
ret->determinist = 0;
|
||||||
|
#ifdef DEBUG_COMPACTION
|
||||||
|
printf("Indet: state %d trans %d, atom %d to %d : %d to %d\n",
|
||||||
|
i, j, trans->atom->no, trans->to, atomno, targetno);
|
||||||
|
printf(" previous to is %d\n", prev);
|
||||||
|
#endif
|
||||||
|
ret->determinist = 0;
|
||||||
|
xmlFree(transitions);
|
||||||
|
xmlFree(stateRemap);
|
||||||
|
xmlFree(stringRemap);
|
||||||
|
for (i = 0;i < nbatoms;i++)
|
||||||
|
xmlFree(stringMap[i]);
|
||||||
|
xmlFree(stringMap);
|
||||||
|
goto fail_compact;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
#if 0
|
||||||
|
printf("State %d trans %d: atom %d to %d : %d to %d\n",
|
||||||
|
i, j, trans->atom->no, trans->to, atomno, targetno);
|
||||||
|
#endif
|
||||||
|
transitions[stateno * (nbatoms + 1) + atomno + 1] =
|
||||||
|
targetno + 1;; /* to avoid 0 */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ret->determinist = 1;
|
||||||
|
#ifdef DEBUG_COMPACTION
|
||||||
|
/*
|
||||||
|
* Debug
|
||||||
|
*/
|
||||||
|
for (i = 0;i < nbstates;i++) {
|
||||||
|
for (j = 0;j < nbatoms + 1;j++) {
|
||||||
|
printf("%02d ", transitions[i * (nbatoms + 1) + j]);
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
printf("\n");
|
||||||
|
#endif
|
||||||
|
/*
|
||||||
|
* Cleanup of the old data
|
||||||
|
*/
|
||||||
|
if (ret->states != NULL) {
|
||||||
|
for (i = 0;i < ret->nbStates;i++)
|
||||||
|
xmlRegFreeState(ret->states[i]);
|
||||||
|
xmlFree(ret->states);
|
||||||
|
}
|
||||||
|
ret->states = NULL;
|
||||||
|
ret->nbStates = 0;
|
||||||
|
if (ret->atoms != NULL) {
|
||||||
|
for (i = 0;i < ret->nbAtoms;i++)
|
||||||
|
xmlRegFreeAtom(ret->atoms[i]);
|
||||||
|
xmlFree(ret->atoms);
|
||||||
|
}
|
||||||
|
ret->atoms = NULL;
|
||||||
|
ret->nbAtoms = 0;
|
||||||
|
|
||||||
|
ret->compact = transitions;
|
||||||
|
ret->stringMap = stringMap;
|
||||||
|
ret->nbstrings = nbatoms;
|
||||||
|
ret->nbstates = nbstates;
|
||||||
|
xmlFree(stateRemap);
|
||||||
|
xmlFree(stringRemap);
|
||||||
|
}
|
||||||
|
fail_compact:
|
||||||
return(ret);
|
return(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -741,6 +913,7 @@ xmlRegPrintState(FILE *output, xmlRegStatePtr state) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef DEBUG_REGEXP_GRAPH
|
||||||
static void
|
static void
|
||||||
xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
|
xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
|
||||||
int i;
|
int i;
|
||||||
@ -780,6 +953,7 @@ xmlRegPrintCtxt(FILE *output, xmlRegParserCtxtPtr ctxt) {
|
|||||||
ctxt->counters[i].max);
|
ctxt->counters[i].max);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
/************************************************************************
|
/************************************************************************
|
||||||
* *
|
* *
|
||||||
@ -1228,7 +1402,6 @@ xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
|
|||||||
xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
|
xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
|
||||||
ctxt->states[newto], counter, -1);
|
ctxt->states[newto], counter, -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
to->mark = XML_REGEXP_MARK_NORMAL;
|
to->mark = XML_REGEXP_MARK_NORMAL;
|
||||||
@ -1296,6 +1469,63 @@ xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use this pass to detect unreachable states too
|
||||||
|
*/
|
||||||
|
for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
|
||||||
|
state = ctxt->states[statenr];
|
||||||
|
if (state != NULL)
|
||||||
|
state->reached = 0;
|
||||||
|
}
|
||||||
|
state = ctxt->states[0];
|
||||||
|
if (state != NULL)
|
||||||
|
state->reached = 1;
|
||||||
|
while (state != NULL) {
|
||||||
|
xmlRegStatePtr target = NULL;
|
||||||
|
state->reached = 2;
|
||||||
|
/*
|
||||||
|
* Mark all state reachable from the current reachable state
|
||||||
|
*/
|
||||||
|
for (transnr = 0;transnr < state->nbTrans;transnr++) {
|
||||||
|
if ((state->trans[transnr].to >= 0) &&
|
||||||
|
((state->trans[transnr].atom != NULL) ||
|
||||||
|
(state->trans[transnr].count >= 0))) {
|
||||||
|
int newto = state->trans[transnr].to;
|
||||||
|
|
||||||
|
if (ctxt->states[newto] == NULL)
|
||||||
|
continue;
|
||||||
|
if (ctxt->states[newto]->reached == 0) {
|
||||||
|
ctxt->states[newto]->reached = 1;
|
||||||
|
target = ctxt->states[newto];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* find the next accessible state not explored
|
||||||
|
*/
|
||||||
|
if (target == NULL) {
|
||||||
|
for (statenr = 1;statenr < ctxt->nbStates;statenr++) {
|
||||||
|
state = ctxt->states[statenr];
|
||||||
|
if ((state != NULL) && (state->reached == 1)) {
|
||||||
|
target = state;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
state = target;
|
||||||
|
}
|
||||||
|
for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
|
||||||
|
state = ctxt->states[statenr];
|
||||||
|
if ((state != NULL) && (state->reached == 0)) {
|
||||||
|
#ifdef DEBUG_REGEXP_GRAPH
|
||||||
|
printf("Removed unreachable state %d\n", statenr);
|
||||||
|
#endif
|
||||||
|
xmlRegFreeState(state);
|
||||||
|
ctxt->states[statenr] = NULL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -2041,7 +2271,8 @@ xmlRegNewExecCtxt(xmlRegexpPtr comp, xmlRegExecCallbacks callback, void *data) {
|
|||||||
exec->rollbacks = NULL;
|
exec->rollbacks = NULL;
|
||||||
exec->status = 0;
|
exec->status = 0;
|
||||||
exec->comp = comp;
|
exec->comp = comp;
|
||||||
exec->state = comp->states[0];
|
if (comp->compact == NULL)
|
||||||
|
exec->state = comp->states[0];
|
||||||
exec->transno = 0;
|
exec->transno = 0;
|
||||||
exec->transcount = 0;
|
exec->transcount = 0;
|
||||||
exec->callback = callback;
|
exec->callback = callback;
|
||||||
@ -2130,6 +2361,73 @@ xmlFARegExecSaveInputString(xmlRegExecCtxtPtr exec, const xmlChar *value,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* xmlRegCompactPushString:
|
||||||
|
* @exec: a regexp execution context
|
||||||
|
* @comp: the precompiled exec with a compact table
|
||||||
|
* @value: a string token input
|
||||||
|
* @data: data associated to the token to reuse in callbacks
|
||||||
|
*
|
||||||
|
* Push one input token in the execution context
|
||||||
|
*
|
||||||
|
* Returns: 1 if the regexp reached a final state, 0 if non-final, and
|
||||||
|
* a negative value in case of error.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
xmlRegCompactPushString(xmlRegExecCtxtPtr exec,
|
||||||
|
xmlRegexpPtr comp,
|
||||||
|
const xmlChar *value,
|
||||||
|
void *data) {
|
||||||
|
int state = exec->index;
|
||||||
|
int i, target;
|
||||||
|
|
||||||
|
if ((comp == NULL) || (comp->compact == NULL) || (comp->stringMap == NULL))
|
||||||
|
return(-1);
|
||||||
|
|
||||||
|
if (value == NULL) {
|
||||||
|
/*
|
||||||
|
* are we at a final state ?
|
||||||
|
*/
|
||||||
|
if (comp->compact[state * (comp->nbstrings + 1)] ==
|
||||||
|
XML_REGEXP_FINAL_STATE)
|
||||||
|
return(1);
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef DEBUG_PUSH
|
||||||
|
printf("value pushed: %s\n", value);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Examine all outside transition from current state
|
||||||
|
*/
|
||||||
|
for (i = 0;i < comp->nbstrings;i++) {
|
||||||
|
target = comp->compact[state * (comp->nbstrings + 1) + i + 1];
|
||||||
|
if ((target > 0) && (target <= comp->nbstates)) {
|
||||||
|
target--; /* to avoid 0 */
|
||||||
|
if (xmlStrEqual(comp->stringMap[i], value)) {
|
||||||
|
exec->index = target;
|
||||||
|
#ifdef DEBUG_PUSH
|
||||||
|
printf("entering state %d\n", target);
|
||||||
|
#endif
|
||||||
|
if (comp->compact[target * (comp->nbstrings + 1)] ==
|
||||||
|
XML_REGEXP_FINAL_STATE)
|
||||||
|
return(1);
|
||||||
|
return(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Failed to find an exit transition out from current state for the
|
||||||
|
* current token
|
||||||
|
*/
|
||||||
|
#ifdef DEBUG_PUSH
|
||||||
|
printf("failed to find a transition for %s on state %d\n", value, state);
|
||||||
|
#endif
|
||||||
|
exec->status = -1;
|
||||||
|
return(-1);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* xmlRegExecPushString:
|
* xmlRegExecPushString:
|
||||||
* @exec: a regexp execution context
|
* @exec: a regexp execution context
|
||||||
@ -2151,9 +2449,14 @@ xmlRegExecPushString(xmlRegExecCtxtPtr exec, const xmlChar *value,
|
|||||||
|
|
||||||
if (exec == NULL)
|
if (exec == NULL)
|
||||||
return(-1);
|
return(-1);
|
||||||
|
if (exec->comp == NULL)
|
||||||
|
return(-1);
|
||||||
if (exec->status != 0)
|
if (exec->status != 0)
|
||||||
return(exec->status);
|
return(exec->status);
|
||||||
|
|
||||||
|
if (exec->comp->compact != NULL)
|
||||||
|
return(xmlRegCompactPushString(exec, exec->comp, value, data));
|
||||||
|
|
||||||
if (value == NULL) {
|
if (value == NULL) {
|
||||||
if (exec->state->type == XML_REGEXP_FINAL_STATE)
|
if (exec->state->type == XML_REGEXP_FINAL_STATE)
|
||||||
return(1);
|
return(1);
|
||||||
@ -3509,6 +3812,37 @@ xmlRegexpExec(xmlRegexpPtr comp, const xmlChar *content) {
|
|||||||
return(xmlFARegExec(comp, content));
|
return(xmlFARegExec(comp, content));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* xmlRegexpIsDeterminist:
|
||||||
|
* @comp: the compiled regular expression
|
||||||
|
*
|
||||||
|
* Check if the regular expression is determinist
|
||||||
|
*
|
||||||
|
* Returns 1 if it yes, 0 if not and a negativa value in case of error
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
xmlRegexpIsDeterminist(xmlRegexpPtr comp) {
|
||||||
|
xmlAutomataPtr am;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
if (comp == NULL)
|
||||||
|
return(-1);
|
||||||
|
if (comp->determinist != -1)
|
||||||
|
return(comp->determinist);
|
||||||
|
|
||||||
|
am = xmlNewAutomata();
|
||||||
|
am->nbAtoms = comp->nbAtoms;
|
||||||
|
am->atoms = comp->atoms;
|
||||||
|
am->nbStates = comp->nbStates;
|
||||||
|
am->states = comp->states;
|
||||||
|
am->determinist = -1;
|
||||||
|
ret = xmlFAComputesDeterminism(am);
|
||||||
|
am->atoms = NULL;
|
||||||
|
am->states = NULL;
|
||||||
|
xmlFreeAutomata(am);
|
||||||
|
return(ret);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* xmlRegFreeRegexp:
|
* xmlRegFreeRegexp:
|
||||||
* @regexp: the regexp
|
* @regexp: the regexp
|
||||||
@ -3535,6 +3869,14 @@ xmlRegFreeRegexp(xmlRegexpPtr regexp) {
|
|||||||
}
|
}
|
||||||
if (regexp->counters != NULL)
|
if (regexp->counters != NULL)
|
||||||
xmlFree(regexp->counters);
|
xmlFree(regexp->counters);
|
||||||
|
if (regexp->compact != NULL)
|
||||||
|
xmlFree(regexp->compact);
|
||||||
|
if (regexp->stringMap != NULL) {
|
||||||
|
for (i = 0; i < regexp->nbstrings;i++)
|
||||||
|
xmlFree(regexp->stringMap[i]);
|
||||||
|
xmlFree(regexp->stringMap);
|
||||||
|
}
|
||||||
|
|
||||||
xmlFree(regexp);
|
xmlFree(regexp);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3910,7 +4252,7 @@ xmlAutomataCompile(xmlAutomataPtr am) {
|
|||||||
xmlRegexpPtr ret;
|
xmlRegexpPtr ret;
|
||||||
|
|
||||||
xmlFAEliminateEpsilonTransitions(am);
|
xmlFAEliminateEpsilonTransitions(am);
|
||||||
xmlFAComputesDeterminism(am);
|
/* xmlFAComputesDeterminism(am); */
|
||||||
ret = xmlRegEpxFromParse(am);
|
ret = xmlRegEpxFromParse(am);
|
||||||
|
|
||||||
return(ret);
|
return(ret);
|
||||||
|
Reference in New Issue
Block a user