1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-29 11:41:22 +03:00

try to fix for the nth time the automata generation in case of complex

* xmlregexp.c: try to fix for the nth time the automata generation
  in case of complex ranges. I suppose that time it is actually okay
Daniel

svn path=/trunk/; revision=3650
This commit is contained in:
Daniel Veillard
2007-08-22 16:29:21 +00:00
parent 3dcd319a46
commit 76d59b6d6f
2 changed files with 159 additions and 38 deletions

View File

@ -189,6 +189,7 @@ struct _xmlRegAtom {
int neg;
int codepoint;
xmlRegStatePtr start;
xmlRegStatePtr start0;
xmlRegStatePtr stop;
int maxRanges;
int nbRanges;
@ -732,12 +733,42 @@ xmlRegFreeRange(xmlRegRangePtr range) {
xmlFree(range);
}
/**
* xmlRegCopyRange:
* @range: the regexp range
*
* Copy a regexp range
*
* Returns the new copy or NULL in case of error.
*/
static xmlRegRangePtr
xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) {
xmlRegRangePtr ret;
if (range == NULL)
return(NULL);
ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start,
range->end);
if (ret == NULL)
return(NULL);
if (range->blockName != NULL) {
ret->blockName = xmlStrdup(range->blockName);
if (ret->blockName == NULL) {
xmlRegexpErrMemory(ctxt, "allocating range");
xmlRegFreeRange(ret);
return(NULL);
}
}
return(ret);
}
/**
* xmlRegNewAtom:
* @ctxt: the regexp parser context
* @type: the type of atom
*
* Allocate a new regexp range
* Allocate a new atom
*
* Returns the new atom or NULL in case of error
*/
@ -784,6 +815,52 @@ xmlRegFreeAtom(xmlRegAtomPtr atom) {
xmlFree(atom);
}
/**
* xmlRegCopyAtom:
* @ctxt: the regexp parser context
* @atom: the oiginal atom
*
* Allocate a new regexp range
*
* Returns the new atom or NULL in case of error
*/
static xmlRegAtomPtr
xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
xmlRegAtomPtr ret;
ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
if (ret == NULL) {
xmlRegexpErrMemory(ctxt, "copying atom");
return(NULL);
}
memset(ret, 0, sizeof(xmlRegAtom));
ret->type = atom->type;
ret->quant = atom->quant;
ret->min = atom->min;
ret->max = atom->max;
if (atom->nbRanges > 0) {
int i;
ret->ranges = (xmlRegRangePtr *) xmlMalloc(sizeof(xmlRegRangePtr) *
atom->nbRanges);
if (ret->ranges == NULL) {
xmlRegexpErrMemory(ctxt, "copying atom");
goto error;
}
for (i = 0;i < atom->nbRanges;i++) {
ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]);
if (ret->ranges[i] == NULL)
goto error;
ret->nbRanges = i + 1;
}
}
return(ret);
error:
xmlRegFreeAtom(ret);
return(NULL);
}
static xmlRegStatePtr
xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
xmlRegStatePtr ret;
@ -1504,52 +1581,84 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
break;
case XML_REGEXP_QUANT_RANGE: {
int counter;
xmlRegStatePtr newstate;
xmlRegStatePtr inter, newstate;
/*
* This one is nasty:
* 1/ if range has minOccurs == 0, create a new state
* and create epsilon transitions from atom->start
* to atom->stop, as well as atom->start to the new
* state
* 2/ register a new counter
* 3/ register an epsilon transition associated to
* this counter going from atom->stop to atom->start
* 4/ create a new state
* 5/ generate a counted transition from atom->stop to
* that state
* create the final state now if needed
*/
if (atom->min == 0) {
xmlFAGenerateEpsilonTransition(ctxt, atom->start,
atom->stop);
newstate = xmlRegNewState(ctxt);
xmlRegStatePush(ctxt, newstate);
ctxt->state = newstate;
xmlFAGenerateEpsilonTransition(ctxt, atom->start,
newstate);
}
counter = xmlRegGetCounter(ctxt);
ctxt->counters[counter].min = atom->min - 1;
ctxt->counters[counter].max = atom->max - 1;
atom->min = 0;
atom->max = 0;
atom->quant = XML_REGEXP_QUANT_ONCE;
if (to != NULL) {
newstate = to;
} else {
newstate = xmlRegNewState(ctxt);
xmlRegStatePush(ctxt, newstate);
}
ctxt->state = newstate;
xmlFAGenerateCountedTransition(ctxt, atom->stop,
newstate, counter);
/*
* first check count and if OK jump forward;
* if checking fail increment count and jump back
/*
* The principle here is to use counted transition
* to avoid explosion in the number of states in the
* graph. This is clearly more complex but should not
* be exploitable at runtime.
*/
xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
atom->start, counter);
if ((atom->min == 0) && (atom->start0 == NULL)) {
xmlRegAtomPtr copy;
/*
* duplicate a transition based on atom to count next
* occurences after 1. We cannot loop to atom->start
* directly because we need an epsilon transition to
* newstate.
*/
/* ???? For some reason it seems we never reach that
case, I suppose this got optimized out before when
building the automata */
if (copy == NULL)
return(-1);
copy = xmlRegCopyAtom(ctxt, atom);
copy->quant = XML_REGEXP_QUANT_ONCE;
copy->min = 0;
copy->max = 0;
if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy)
< 0)
return(-1);
inter = ctxt->state;
counter = xmlRegGetCounter(ctxt);
ctxt->counters[counter].min = atom->min - 1;
ctxt->counters[counter].max = atom->max - 1;
/* count the number of times we see it again */
xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
atom->stop, counter);
/* allow a way out based on the count */
xmlFAGenerateCountedTransition(ctxt, inter,
newstate, counter);
/* and also allow a direct exit for 0 */
xmlFAGenerateEpsilonTransition(ctxt, atom->start,
newstate);
} else {
/*
* either we need the atom at least once or there
* is an atom->start0 allowing to easilly plug the
* epsilon transition.
*/
counter = xmlRegGetCounter(ctxt);
ctxt->counters[counter].min = atom->min - 1;
ctxt->counters[counter].max = atom->max - 1;
/* count the number of times we see it again */
xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
atom->start, counter);
/* allow a way out based on the count */
xmlFAGenerateCountedTransition(ctxt, atom->stop,
newstate, counter);
/* and if needed allow a direct exit for 0 */
if (atom->min == 0)
xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
newstate);
}
atom->min = 0;
atom->max = 0;
atom->quant = XML_REGEXP_QUANT_ONCE;
ctxt->state = newstate;
}
default:
break;
@ -5113,9 +5222,15 @@ xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
} else if (CUR == ')') {
return(0);
} else if (CUR == '(') {
xmlRegStatePtr start, oldend;
xmlRegStatePtr start, oldend, start0;
NEXT;
/*
* this extra Epsilon transition is needed if we count with 0 allowed
* unfortunately this can't be known at that point
*/
xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
start0 = ctxt->state;
xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
start = ctxt->state;
oldend = ctxt->end;
@ -5131,6 +5246,7 @@ xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
if (ctxt->atom == NULL)
return(-1);
ctxt->atom->start = start;
ctxt->atom->start0 = start0;
ctxt->atom->stop = ctxt->state;
ctxt->end = oldend;
return(1);