mirror of
https://gitlab.gnome.org/GNOME/libxml2.git
synced 2025-07-29 11:41:22 +03:00
try to fix for the nth time the automata generation in case of complex
* xmlregexp.c: try to fix for the nth time the automata generation in case of complex ranges. I suppose that time it is actually okay Daniel svn path=/trunk/; revision=3650
This commit is contained in:
@ -1,3 +1,8 @@
|
|||||||
|
Wed Aug 22 18:29:42 CEST 2007 Daniel Veillard <daniel@veillard.com>
|
||||||
|
|
||||||
|
* xmlregexp.c: try to fix for the nth time the automata generation
|
||||||
|
in case of complex ranges. I suppose that time it is actually okay
|
||||||
|
|
||||||
Tue Aug 14 15:51:05 CEST 2007 Daniel Veillard <daniel@veillard.com>
|
Tue Aug 14 15:51:05 CEST 2007 Daniel Veillard <daniel@veillard.com>
|
||||||
|
|
||||||
* parser.c: aligned xmlSAXUserParseMemory() to match
|
* parser.c: aligned xmlSAXUserParseMemory() to match
|
||||||
|
192
xmlregexp.c
192
xmlregexp.c
@ -189,6 +189,7 @@ struct _xmlRegAtom {
|
|||||||
int neg;
|
int neg;
|
||||||
int codepoint;
|
int codepoint;
|
||||||
xmlRegStatePtr start;
|
xmlRegStatePtr start;
|
||||||
|
xmlRegStatePtr start0;
|
||||||
xmlRegStatePtr stop;
|
xmlRegStatePtr stop;
|
||||||
int maxRanges;
|
int maxRanges;
|
||||||
int nbRanges;
|
int nbRanges;
|
||||||
@ -732,12 +733,42 @@ xmlRegFreeRange(xmlRegRangePtr range) {
|
|||||||
xmlFree(range);
|
xmlFree(range);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* xmlRegCopyRange:
|
||||||
|
* @range: the regexp range
|
||||||
|
*
|
||||||
|
* Copy a regexp range
|
||||||
|
*
|
||||||
|
* Returns the new copy or NULL in case of error.
|
||||||
|
*/
|
||||||
|
static xmlRegRangePtr
|
||||||
|
xmlRegCopyRange(xmlRegParserCtxtPtr ctxt, xmlRegRangePtr range) {
|
||||||
|
xmlRegRangePtr ret;
|
||||||
|
|
||||||
|
if (range == NULL)
|
||||||
|
return(NULL);
|
||||||
|
|
||||||
|
ret = xmlRegNewRange(ctxt, range->neg, range->type, range->start,
|
||||||
|
range->end);
|
||||||
|
if (ret == NULL)
|
||||||
|
return(NULL);
|
||||||
|
if (range->blockName != NULL) {
|
||||||
|
ret->blockName = xmlStrdup(range->blockName);
|
||||||
|
if (ret->blockName == NULL) {
|
||||||
|
xmlRegexpErrMemory(ctxt, "allocating range");
|
||||||
|
xmlRegFreeRange(ret);
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(ret);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* xmlRegNewAtom:
|
* xmlRegNewAtom:
|
||||||
* @ctxt: the regexp parser context
|
* @ctxt: the regexp parser context
|
||||||
* @type: the type of atom
|
* @type: the type of atom
|
||||||
*
|
*
|
||||||
* Allocate a new regexp range
|
* Allocate a new atom
|
||||||
*
|
*
|
||||||
* Returns the new atom or NULL in case of error
|
* Returns the new atom or NULL in case of error
|
||||||
*/
|
*/
|
||||||
@ -784,6 +815,52 @@ xmlRegFreeAtom(xmlRegAtomPtr atom) {
|
|||||||
xmlFree(atom);
|
xmlFree(atom);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* xmlRegCopyAtom:
|
||||||
|
* @ctxt: the regexp parser context
|
||||||
|
* @atom: the oiginal atom
|
||||||
|
*
|
||||||
|
* Allocate a new regexp range
|
||||||
|
*
|
||||||
|
* Returns the new atom or NULL in case of error
|
||||||
|
*/
|
||||||
|
static xmlRegAtomPtr
|
||||||
|
xmlRegCopyAtom(xmlRegParserCtxtPtr ctxt, xmlRegAtomPtr atom) {
|
||||||
|
xmlRegAtomPtr ret;
|
||||||
|
|
||||||
|
ret = (xmlRegAtomPtr) xmlMalloc(sizeof(xmlRegAtom));
|
||||||
|
if (ret == NULL) {
|
||||||
|
xmlRegexpErrMemory(ctxt, "copying atom");
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
memset(ret, 0, sizeof(xmlRegAtom));
|
||||||
|
ret->type = atom->type;
|
||||||
|
ret->quant = atom->quant;
|
||||||
|
ret->min = atom->min;
|
||||||
|
ret->max = atom->max;
|
||||||
|
if (atom->nbRanges > 0) {
|
||||||
|
int i;
|
||||||
|
|
||||||
|
ret->ranges = (xmlRegRangePtr *) xmlMalloc(sizeof(xmlRegRangePtr) *
|
||||||
|
atom->nbRanges);
|
||||||
|
if (ret->ranges == NULL) {
|
||||||
|
xmlRegexpErrMemory(ctxt, "copying atom");
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
for (i = 0;i < atom->nbRanges;i++) {
|
||||||
|
ret->ranges[i] = xmlRegCopyRange(ctxt, atom->ranges[i]);
|
||||||
|
if (ret->ranges[i] == NULL)
|
||||||
|
goto error;
|
||||||
|
ret->nbRanges = i + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return(ret);
|
||||||
|
|
||||||
|
error:
|
||||||
|
xmlRegFreeAtom(ret);
|
||||||
|
return(NULL);
|
||||||
|
}
|
||||||
|
|
||||||
static xmlRegStatePtr
|
static xmlRegStatePtr
|
||||||
xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
|
xmlRegNewState(xmlRegParserCtxtPtr ctxt) {
|
||||||
xmlRegStatePtr ret;
|
xmlRegStatePtr ret;
|
||||||
@ -1504,52 +1581,84 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from,
|
|||||||
break;
|
break;
|
||||||
case XML_REGEXP_QUANT_RANGE: {
|
case XML_REGEXP_QUANT_RANGE: {
|
||||||
int counter;
|
int counter;
|
||||||
xmlRegStatePtr newstate;
|
xmlRegStatePtr inter, newstate;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This one is nasty:
|
* create the final state now if needed
|
||||||
* 1/ if range has minOccurs == 0, create a new state
|
|
||||||
* and create epsilon transitions from atom->start
|
|
||||||
* to atom->stop, as well as atom->start to the new
|
|
||||||
* state
|
|
||||||
* 2/ register a new counter
|
|
||||||
* 3/ register an epsilon transition associated to
|
|
||||||
* this counter going from atom->stop to atom->start
|
|
||||||
* 4/ create a new state
|
|
||||||
* 5/ generate a counted transition from atom->stop to
|
|
||||||
* that state
|
|
||||||
*/
|
*/
|
||||||
if (atom->min == 0) {
|
|
||||||
xmlFAGenerateEpsilonTransition(ctxt, atom->start,
|
|
||||||
atom->stop);
|
|
||||||
newstate = xmlRegNewState(ctxt);
|
|
||||||
xmlRegStatePush(ctxt, newstate);
|
|
||||||
ctxt->state = newstate;
|
|
||||||
xmlFAGenerateEpsilonTransition(ctxt, atom->start,
|
|
||||||
newstate);
|
|
||||||
}
|
|
||||||
counter = xmlRegGetCounter(ctxt);
|
|
||||||
ctxt->counters[counter].min = atom->min - 1;
|
|
||||||
ctxt->counters[counter].max = atom->max - 1;
|
|
||||||
atom->min = 0;
|
|
||||||
atom->max = 0;
|
|
||||||
atom->quant = XML_REGEXP_QUANT_ONCE;
|
|
||||||
if (to != NULL) {
|
if (to != NULL) {
|
||||||
newstate = to;
|
newstate = to;
|
||||||
} else {
|
} else {
|
||||||
newstate = xmlRegNewState(ctxt);
|
newstate = xmlRegNewState(ctxt);
|
||||||
xmlRegStatePush(ctxt, newstate);
|
xmlRegStatePush(ctxt, newstate);
|
||||||
}
|
}
|
||||||
ctxt->state = newstate;
|
|
||||||
xmlFAGenerateCountedTransition(ctxt, atom->stop,
|
/*
|
||||||
newstate, counter);
|
* The principle here is to use counted transition
|
||||||
|
* to avoid explosion in the number of states in the
|
||||||
/*
|
* graph. This is clearly more complex but should not
|
||||||
* first check count and if OK jump forward;
|
* be exploitable at runtime.
|
||||||
* if checking fail increment count and jump back
|
|
||||||
*/
|
*/
|
||||||
xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
|
if ((atom->min == 0) && (atom->start0 == NULL)) {
|
||||||
atom->start, counter);
|
xmlRegAtomPtr copy;
|
||||||
|
/*
|
||||||
|
* duplicate a transition based on atom to count next
|
||||||
|
* occurences after 1. We cannot loop to atom->start
|
||||||
|
* directly because we need an epsilon transition to
|
||||||
|
* newstate.
|
||||||
|
*/
|
||||||
|
/* ???? For some reason it seems we never reach that
|
||||||
|
case, I suppose this got optimized out before when
|
||||||
|
building the automata */
|
||||||
|
|
||||||
|
if (copy == NULL)
|
||||||
|
return(-1);
|
||||||
|
copy = xmlRegCopyAtom(ctxt, atom);
|
||||||
|
copy->quant = XML_REGEXP_QUANT_ONCE;
|
||||||
|
copy->min = 0;
|
||||||
|
copy->max = 0;
|
||||||
|
|
||||||
|
if (xmlFAGenerateTransitions(ctxt, atom->start, NULL, copy)
|
||||||
|
< 0)
|
||||||
|
return(-1);
|
||||||
|
inter = ctxt->state;
|
||||||
|
counter = xmlRegGetCounter(ctxt);
|
||||||
|
ctxt->counters[counter].min = atom->min - 1;
|
||||||
|
ctxt->counters[counter].max = atom->max - 1;
|
||||||
|
/* count the number of times we see it again */
|
||||||
|
xmlFAGenerateCountedEpsilonTransition(ctxt, inter,
|
||||||
|
atom->stop, counter);
|
||||||
|
/* allow a way out based on the count */
|
||||||
|
xmlFAGenerateCountedTransition(ctxt, inter,
|
||||||
|
newstate, counter);
|
||||||
|
/* and also allow a direct exit for 0 */
|
||||||
|
xmlFAGenerateEpsilonTransition(ctxt, atom->start,
|
||||||
|
newstate);
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* either we need the atom at least once or there
|
||||||
|
* is an atom->start0 allowing to easilly plug the
|
||||||
|
* epsilon transition.
|
||||||
|
*/
|
||||||
|
counter = xmlRegGetCounter(ctxt);
|
||||||
|
ctxt->counters[counter].min = atom->min - 1;
|
||||||
|
ctxt->counters[counter].max = atom->max - 1;
|
||||||
|
/* count the number of times we see it again */
|
||||||
|
xmlFAGenerateCountedEpsilonTransition(ctxt, atom->stop,
|
||||||
|
atom->start, counter);
|
||||||
|
/* allow a way out based on the count */
|
||||||
|
xmlFAGenerateCountedTransition(ctxt, atom->stop,
|
||||||
|
newstate, counter);
|
||||||
|
/* and if needed allow a direct exit for 0 */
|
||||||
|
if (atom->min == 0)
|
||||||
|
xmlFAGenerateEpsilonTransition(ctxt, atom->start0,
|
||||||
|
newstate);
|
||||||
|
|
||||||
|
}
|
||||||
|
atom->min = 0;
|
||||||
|
atom->max = 0;
|
||||||
|
atom->quant = XML_REGEXP_QUANT_ONCE;
|
||||||
|
ctxt->state = newstate;
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
break;
|
break;
|
||||||
@ -5113,9 +5222,15 @@ xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
|
|||||||
} else if (CUR == ')') {
|
} else if (CUR == ')') {
|
||||||
return(0);
|
return(0);
|
||||||
} else if (CUR == '(') {
|
} else if (CUR == '(') {
|
||||||
xmlRegStatePtr start, oldend;
|
xmlRegStatePtr start, oldend, start0;
|
||||||
|
|
||||||
NEXT;
|
NEXT;
|
||||||
|
/*
|
||||||
|
* this extra Epsilon transition is needed if we count with 0 allowed
|
||||||
|
* unfortunately this can't be known at that point
|
||||||
|
*/
|
||||||
|
xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
|
||||||
|
start0 = ctxt->state;
|
||||||
xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
|
xmlFAGenerateEpsilonTransition(ctxt, ctxt->state, NULL);
|
||||||
start = ctxt->state;
|
start = ctxt->state;
|
||||||
oldend = ctxt->end;
|
oldend = ctxt->end;
|
||||||
@ -5131,6 +5246,7 @@ xmlFAParseAtom(xmlRegParserCtxtPtr ctxt) {
|
|||||||
if (ctxt->atom == NULL)
|
if (ctxt->atom == NULL)
|
||||||
return(-1);
|
return(-1);
|
||||||
ctxt->atom->start = start;
|
ctxt->atom->start = start;
|
||||||
|
ctxt->atom->start0 = start0;
|
||||||
ctxt->atom->stop = ctxt->state;
|
ctxt->atom->stop = ctxt->state;
|
||||||
ctxt->end = oldend;
|
ctxt->end = oldend;
|
||||||
return(1);
|
return(1);
|
||||||
|
Reference in New Issue
Block a user