diff --git a/ChangeLog b/ChangeLog index d62fb131..c60d76ee 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Thu Mar 13 08:17:58 CET 2008 Daniel Veillard + + * xmlregexp.c: found a nasty bug in regexp automata build, + reported by Ashwin and Bjorn Reese + Wed Mar 12 18:56:22 CET 2008 Daniel Veillard * HTMLparser.c: patch from Arnold Hendriks improving parsing of diff --git a/xmlregexp.c b/xmlregexp.c index 52e484cb..389453b9 100644 --- a/xmlregexp.c +++ b/xmlregexp.c @@ -1532,6 +1532,8 @@ xmlFAGenerateCountedTransition(xmlRegParserCtxtPtr ctxt, static int xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, xmlRegStatePtr to, xmlRegAtomPtr atom) { + xmlRegStatePtr end; + if (atom == NULL) { ERROR("genrate transition: atom == NULL"); return(-1); @@ -1689,12 +1691,31 @@ xmlFAGenerateTransitions(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr from, else { return(-1); } + } + end = to; + if ((atom->quant == XML_REGEXP_QUANT_MULT) || + (atom->quant == XML_REGEXP_QUANT_PLUS)) { + /* + * Do not pollute the target state by adding transitions from + * it as it is likely to be the shared target of multiple branches. + * So isolate with an epsilon transition. + */ + xmlRegStatePtr tmp; + + tmp = xmlRegNewState(ctxt); + if (tmp != NULL) + xmlRegStatePush(ctxt, tmp); + else { + return(-1); + } + xmlFAGenerateEpsilonTransition(ctxt, tmp, to); + to = tmp; } if (xmlRegAtomPush(ctxt, atom) < 0) { return(-1); } xmlRegStateAddTrans(ctxt, from, atom, to, -1, -1); - ctxt->state = to; + ctxt->state = end; switch (atom->quant) { case XML_REGEXP_QUANT_OPT: atom->quant = XML_REGEXP_QUANT_ONCE;