1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-29 11:41:22 +03:00

allows an extra argument to subset the tests big speedup for validation,

* runtest.c: allows an extra argument to subset the tests
* xmlregexp.c: big speedup for validation, basically avoided
  transition creation explosion when removing epsilon transition
Daniel
This commit is contained in:
Daniel Veillard
2005-07-30 13:18:24 +00:00
parent 75e389d4e0
commit db68b74dc7
3 changed files with 219 additions and 58 deletions

View File

@ -1,3 +1,9 @@
Sat Jul 30 15:16:29 CEST 2005 Daniel Veillard <daniel@veillard.com>
* runtest.c: allows an extra argument to subset the tests
* xmlregexp.c: big speedup for validation, basically avoided
transition creation explosion when removing epsilon transition
Sat Jul 30 00:00:46 CEST 2005 Daniel Veillard <daniel@veillard.com> Sat Jul 30 00:00:46 CEST 2005 Daniel Veillard <daniel@veillard.com>
* Makefile.am globals.c parserInternals.c xmlreader.c xmlunicode.c * Makefile.am globals.c parserInternals.c xmlreader.c xmlunicode.c

View File

@ -4169,33 +4169,56 @@ launchTests(testDescPtr tst) {
return(err); return(err);
} }
static int verbose = 0;
static int
runtest(int i) {
int ret = 0, res;
int old_errors, old_tests, old_leaks;
old_errors = nb_errors;
old_tests = nb_tests;
old_leaks = nb_leaks;
if (testDescriptions[i].desc != NULL)
printf("## %s\n", testDescriptions[i].desc);
res = launchTests(&testDescriptions[i]);
if (res != 0)
ret++;
if (verbose) {
if ((nb_errors == old_errors) && (nb_leaks == old_leaks))
printf("Ran %d tests, no errors\n", nb_tests - old_tests);
else
printf("Ran %d tests, %d errors, %d leaks\n",
nb_tests - old_tests,
nb_errors - old_errors,
nb_leaks - old_leaks);
}
return(ret);
}
int int
main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) { main(int argc ATTRIBUTE_UNUSED, char **argv ATTRIBUTE_UNUSED) {
int i = 0, res, ret = 0; int i, a, ret = 0;
int verbose = 0; int subset = 0;
int old_errors, old_tests, old_leaks;
initializeLibxml2(); initializeLibxml2();
if ((argc >= 2) && (!strcmp(argv[1], "-v")))
verbose = 1; for (a = 1; a < argc;a++) {
for (i = 0; testDescriptions[i].func != NULL; i++) { if (!strcmp(argv[a], "-v"))
old_errors = nb_errors; verbose = 1;
old_tests = nb_tests; else {
old_leaks = nb_leaks; for (i = 0; testDescriptions[i].func != NULL; i++) {
if (testDescriptions[i].desc != NULL) if (strstr(testDescriptions[i].desc, argv[a])) {
printf("## %s\n", testDescriptions[i].desc); ret += runtest(i);
res = launchTests(&testDescriptions[i]); subset++;
if (res != 0) }
ret++; }
if (verbose) { }
if ((nb_errors == old_errors) && (nb_leaks == old_leaks)) }
printf("Ran %d tests, no errors\n", nb_tests - old_tests); if (subset == 0) {
else for (i = 0; testDescriptions[i].func != NULL; i++) {
printf("Ran %d tests, %d errors, %d leaks\n", ret += runtest(i);
nb_tests - old_tests,
nb_errors - old_errors,
nb_leaks - old_leaks);
} }
} }
if ((nb_errors == 0) && (nb_leaks == 0)) { if ((nb_errors == 0) && (nb_leaks == 0)) {

View File

@ -211,6 +211,10 @@ struct _xmlAutomataState {
int maxTrans; int maxTrans;
int nbTrans; int nbTrans;
xmlRegTrans *trans; xmlRegTrans *trans;
/* knowing states ponting to us can speed things up */
int maxTransTo;
int nbTransTo;
int *transTo;
}; };
typedef struct _xmlAutomata xmlRegParserCtxt; typedef struct _xmlAutomata xmlRegParserCtxt;
@ -787,6 +791,8 @@ xmlRegFreeState(xmlRegStatePtr state) {
if (state->trans != NULL) if (state->trans != NULL)
xmlFree(state->trans); xmlFree(state->trans);
if (state->transTo != NULL)
xmlFree(state->transTo);
xmlFree(state); xmlFree(state);
} }
@ -1195,10 +1201,38 @@ xmlRegAtomPush(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
return(0); return(0);
} }
static void
xmlRegStateAddTransTo(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr target,
int from) {
if (target->maxTransTo == 0) {
target->maxTransTo = 8;
target->transTo = (int *) xmlMalloc(target->maxTransTo *
sizeof(int));
if (target->transTo == NULL) {
xmlRegexpErrMemory(ctxt, "adding transition");
target->maxTransTo = 0;
return;
}
} else if (target->nbTransTo >= target->maxTransTo) {
int *tmp;
target->maxTransTo *= 2;
tmp = (int *) xmlRealloc(target->transTo, target->maxTransTo *
sizeof(int));
if (tmp == NULL) {
xmlRegexpErrMemory(ctxt, "adding transition");
target->maxTransTo /= 2;
return;
}
target->transTo = tmp;
}
target->transTo[target->nbTransTo] = from;
target->nbTransTo++;
}
static void static void
xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state, xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
xmlRegAtomPtr atom, xmlRegStatePtr target, xmlRegAtomPtr atom, xmlRegStatePtr target,
int counter, int count) { int counter, int count, int nchk) {
int nrtrans; int nrtrans;
@ -1216,21 +1250,24 @@ xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
* so, silently ignore this request. * so, silently ignore this request.
*/ */
for (nrtrans=0; nrtrans<state->nbTrans; nrtrans++) { if (nchk == 0) {
if ((state->trans[nrtrans].atom == atom) && for (nrtrans = state->nbTrans - 1; nrtrans >= 0; nrtrans--) {
(state->trans[nrtrans].to == target->no) && xmlRegTransPtr trans = &(state->trans[nrtrans]);
(state->trans[nrtrans].counter == counter) && if ((trans->atom == atom) &&
(state->trans[nrtrans].count == count)) { (trans->to == target->no) &&
(trans->counter == counter) &&
(trans->count == count)) {
#ifdef DEBUG_REGEXP_GRAPH #ifdef DEBUG_REGEXP_GRAPH
printf("Ignoring duplicate transition from %d to %d\n", printf("Ignoring duplicate transition from %d to %d\n",
state->no, target->no); state->no, target->no);
#endif #endif
return; return;
} }
}
} }
if (state->maxTrans == 0) { if (state->maxTrans == 0) {
state->maxTrans = 4; state->maxTrans = 8;
state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans * state->trans = (xmlRegTrans *) xmlMalloc(state->maxTrans *
sizeof(xmlRegTrans)); sizeof(xmlRegTrans));
if (state->trans == NULL) { if (state->trans == NULL) {
@ -1269,6 +1306,7 @@ xmlRegStateAddTrans(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
state->trans[state->nbTrans].counter = counter; state->trans[state->nbTrans].counter = counter;
state->trans[state->nbTrans].count = count; state->trans[state->nbTrans].count = count;
state->nbTrans++; state->nbTrans++;
xmlRegStateAddTransTo(ctxt, target, state->no);
} }
static int static int
@ -1318,9 +1356,9 @@ xmlFAGenerateAllTransition(xmlRegParserCtxtPtr ctxt,
ctxt->state = to; ctxt->state = to;
} }
if (lax) if (lax)
xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER); xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_LAX_COUNTER, 0);
else else
xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER); xmlRegStateAddTrans(ctxt, from, NULL, to, -1, REGEXP_ALL_COUNTER, 0);
} }
/** /**
@ -1338,7 +1376,7 @@ xmlFAGenerateEpsilonTransition(xmlRegParserCtxtPtr ctxt,
xmlRegStatePush(ctxt, to); xmlRegStatePush(ctxt, to);
ctxt->state = to; ctxt->state = to;
} }
xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1); xmlRegStateAddTrans(ctxt, from, NULL, to, -1, -1, 0);
} }
/** /**
@ -1357,7 +1395,7 @@ xmlFAGenerateCountedEpsilonTransition(xmlRegParserCtxtPtr ctxt,
xmlRegStatePush(ctxt, to); xmlRegStatePush(ctxt, to);
ctxt->state = to; ctxt->state = to;
} }
xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1); xmlRegStateAddTrans(ctxt, from, NULL, to, counter, -1, 0);
} }
/** /**
@ -1376,7 +1414,7 @@ xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt,
xmlRegStatePush(ctxt, to); xmlRegStatePush(ctxt, to);
ctxt->state = to; ctxt->state = to;
} }
xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter); xmlRegStateAddTrans(ctxt, from, NULL, to, -1, counter, 0);
} }
/** /**
@ -1501,7 +1539,7 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
if (xmlRegAtomPush(ctxt, atom) < 0) { if (xmlRegAtomPush(ctxt, atom) < 0) {
return(-1); return(-1);
} }
xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1); xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1, 0);
ctxt->state = to; ctxt->state = to;
} }
switch (atom->quant) { switch (atom->quant) {
@ -1512,11 +1550,11 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
case XML_REGEXP_QUANT_MULT: case XML_REGEXP_QUANT_MULT:
atom->quant = XML_REGEXP_QUANT_ONCE; atom->quant = XML_REGEXP_QUANT_ONCE;
xmlFAGenerateEpsilonTransition(ctxt, from, to); xmlFAGenerateEpsilonTransition(ctxt, from, to);
xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1, 0);
break; break;
case XML_REGEXP_QUANT_PLUS: case XML_REGEXP_QUANT_PLUS:
atom->quant = XML_REGEXP_QUANT_ONCE; atom->quant = XML_REGEXP_QUANT_ONCE;
xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1); xmlRegStateAddTrans(ctxt, to, atom, to, -1, -1, 0);
break; break;
default: default:
break; break;
@ -1560,6 +1598,8 @@ xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
from->type = XML_REGEXP_FINAL_STATE; from->type = XML_REGEXP_FINAL_STATE;
} }
for (transnr = 0;transnr < to->nbTrans;transnr++) { for (transnr = 0;transnr < to->nbTrans;transnr++) {
if (to->trans[transnr].to < 0)
continue;
if (to->trans[transnr].atom == NULL) { if (to->trans[transnr].atom == NULL) {
/* /*
* Don't remove counted transitions * Don't remove counted transitions
@ -1571,7 +1611,7 @@ xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
xmlRegStateAddTrans(ctxt, from, NULL, xmlRegStateAddTrans(ctxt, from, NULL,
ctxt->states[newto], ctxt->states[newto],
-1, to->trans[transnr].count); -1, to->trans[transnr].count, 0);
} else { } else {
#ifdef DEBUG_REGEXP_GRAPH #ifdef DEBUG_REGEXP_GRAPH
printf("Found epsilon trans %d from %d to %d\n", printf("Found epsilon trans %d from %d to %d\n",
@ -1594,16 +1634,99 @@ xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
if (to->trans[transnr].counter >= 0) { if (to->trans[transnr].counter >= 0) {
xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
ctxt->states[newto], ctxt->states[newto],
to->trans[transnr].counter, -1); to->trans[transnr].counter, -1, 1);
} else { } else {
xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom, xmlRegStateAddTrans(ctxt, from, to->trans[transnr].atom,
ctxt->states[newto], counter, -1); ctxt->states[newto], counter, -1, 1);
} }
} }
} }
to->mark = XML_REGEXP_MARK_NORMAL; to->mark = XML_REGEXP_MARK_NORMAL;
} }
/**
* xmlFAEliminateSimpleEpsilonTransitions:
* @ctxt: a regexp parser context
*
* Eliminating general epsilon transitions can get costly in the general
* algorithm due to the large amount of generated new transitions and
* associated comparisons. However for simple epsilon transition used just
* to separate building blocks when generating the automata this can be
* reduced to state elimination:
* - if there exists an epsilon from X to Y
* - if there is no other transition from X
* then X and Y are semantically equivalent and X can be eliminated
* If X is the start state then make Y the start state, else replace the
* target of all transitions to X by transitions to Y.
*/
static void
xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
int statenr, i, j, newto;
xmlRegStatePtr state, tmp;
for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
state = ctxt->states[statenr];
if (state == NULL)
continue;
if (state->nbTrans != 1)
continue;
/* is the only transition out a basic transition */
if ((state->trans[0].atom == NULL) &&
(state->trans[0].to >= 0) &&
(state->trans[0].to != statenr) &&
(state->trans[0].counter < 0) &&
(state->trans[0].count < 0)) {
newto = state->trans[0].to;
if (state->type == XML_REGEXP_START_STATE) {
#ifdef DEBUG_REGEXP_GRAPH
printf("Found simple epsilon trans from start %d to %d\n",
statenr, newto);
#endif
} else {
#ifdef DEBUG_REGEXP_GRAPH
printf("Found simple epsilon trans from %d to %d\n",
statenr, newto);
#endif
for (i = 0;i < state->nbTransTo;i++) {
tmp = ctxt->states[state->transTo[i]];
for (j = 0;j < tmp->nbTrans;j++) {
if (tmp->trans[j].to == statenr) {
tmp->trans[j].to = newto;
#ifdef DEBUG_REGEXP_GRAPH
printf("Changed transition %d on %d to go to %d\n",
j, tmp->no, newto);
#endif
xmlRegStateAddTransTo(ctxt, ctxt->states[newto],
tmp->no);
}
}
}
#if 0
for (i = 0;i < ctxt->nbStates;i++) {
tmp = ctxt->states[i];
for (j = 0;j < tmp->nbTrans;j++) {
if (tmp->trans[j].to == statenr) {
tmp->trans[j].to = newto;
#ifdef DEBUG_REGEXP_GRAPH
printf("Changed transition %d on %d to go to %d\n",
j, tmp->no, newto);
#endif
}
}
}
#endif
if (state->type == XML_REGEXP_FINAL_STATE)
ctxt->states[newto]->type = XML_REGEXP_FINAL_STATE;
/* eliminate the transition completely */
state->nbTrans = 0;
}
}
}
}
/** /**
* xmlFAEliminateEpsilonTransitions: * xmlFAEliminateEpsilonTransitions:
* @ctxt: a regexp parser context * @ctxt: a regexp parser context
@ -1613,9 +1736,13 @@ static void
xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) { xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
int statenr, transnr; int statenr, transnr;
xmlRegStatePtr state; xmlRegStatePtr state;
int has_epsilon;
if (ctxt->states == NULL) return; if (ctxt->states == NULL) return;
xmlFAEliminateSimpleEpsilonTransitions(ctxt);
has_epsilon = 0;
/* /*
* build the completed transitions bypassing the epsilons * build the completed transitions bypassing the epsilons
@ -1647,6 +1774,7 @@ xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
transnr, statenr, newto); transnr, statenr, newto);
#endif #endif
state->mark = XML_REGEXP_MARK_START; state->mark = XML_REGEXP_MARK_START;
has_epsilon = 1;
xmlFAReduceEpsilonTransitions(ctxt, statenr, xmlFAReduceEpsilonTransitions(ctxt, statenr,
newto, state->trans[transnr].counter); newto, state->trans[transnr].counter);
state->mark = XML_REGEXP_MARK_NORMAL; state->mark = XML_REGEXP_MARK_NORMAL;
@ -1662,15 +1790,18 @@ xmlFAEliminateEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
/* /*
* Eliminate the epsilon transitions * Eliminate the epsilon transitions
*/ */
for (statenr = 0;statenr < ctxt->nbStates;statenr++) { if (has_epsilon) {
state = ctxt->states[statenr]; for (statenr = 0;statenr < ctxt->nbStates;statenr++) {
if (state == NULL) state = ctxt->states[statenr];
continue; if (state == NULL)
for (transnr = 0;transnr < state->nbTrans;transnr++) { continue;
if ((state->trans[transnr].atom == NULL) && for (transnr = 0;transnr < state->nbTrans;transnr++) {
(state->trans[transnr].count < 0) && xmlRegTransPtr trans = &(state->trans[transnr]);
(state->trans[transnr].to >= 0)) { if ((trans->atom == NULL) &&
state->trans[transnr].to = -1; (trans->count < 0) &&
(trans->to >= 0)) {
trans->to = -1;
}
} }
} }
} }
@ -4602,6 +4733,7 @@ xmlNewAutomata(void) {
/* initialize the parser */ /* initialize the parser */
ctxt->end = NULL; ctxt->end = NULL;
ctxt->start = ctxt->state = xmlRegNewState(ctxt); ctxt->start = ctxt->state = xmlRegNewState(ctxt);
ctxt->start->type = XML_REGEXP_START_STATE;
if (ctxt->start == NULL) { if (ctxt->start == NULL) {
xmlFreeAutomata(ctxt); xmlFreeAutomata(ctxt);
return(NULL); return(NULL);
@ -4807,7 +4939,7 @@ xmlAutomataNewNegTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
atom->valuep = str; atom->valuep = str;
} }
snprintf(err_msg, 199, "not %s", atom->valuep); snprintf((char *) err_msg, 199, "not %s", (const char *) atom->valuep);
err_msg[199] = 0; err_msg[199] = 0;
atom->valuep2 = xmlStrdup(err_msg); atom->valuep2 = xmlStrdup(err_msg);
@ -4895,7 +5027,7 @@ xmlAutomataNewCountTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
to = xmlRegNewState(am); to = xmlRegNewState(am);
xmlRegStatePush(am, to); xmlRegStatePush(am, to);
} }
xmlRegStateAddTrans(am, from, atom, to, counter, -1); xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0);
xmlRegAtomPush(am, atom); xmlRegAtomPush(am, atom);
am->state = to; am->state = to;
@ -4961,7 +5093,7 @@ xmlAutomataNewCountTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
to = xmlRegNewState(am); to = xmlRegNewState(am);
xmlRegStatePush(am, to); xmlRegStatePush(am, to);
} }
xmlRegStateAddTrans(am, from, atom, to, counter, -1); xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0);
xmlRegAtomPush(am, atom); xmlRegAtomPush(am, atom);
am->state = to; am->state = to;
@ -5050,7 +5182,7 @@ xmlAutomataNewOnceTrans2(xmlAutomataPtr am, xmlAutomataStatePtr from,
to = xmlRegNewState(am); to = xmlRegNewState(am);
xmlRegStatePush(am, to); xmlRegStatePush(am, to);
} }
xmlRegStateAddTrans(am, from, atom, to, counter, -1); xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0);
xmlRegAtomPush(am, atom); xmlRegAtomPush(am, atom);
am->state = to; am->state = to;
return(to); return(to);
@ -5112,7 +5244,7 @@ xmlAutomataNewOnceTrans(xmlAutomataPtr am, xmlAutomataStatePtr from,
to = xmlRegNewState(am); to = xmlRegNewState(am);
xmlRegStatePush(am, to); xmlRegStatePush(am, to);
} }
xmlRegStateAddTrans(am, from, atom, to, counter, -1); xmlRegStateAddTrans(am, from, atom, to, counter, -1, 0);
xmlRegAtomPush(am, atom); xmlRegAtomPush(am, atom);
am->state = to; am->state = to;
return(to); return(to);