diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index 47043badf77..34381ff8d9a 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -226,6 +226,8 @@ pg_regexec(regex_t *re, } else v->pmatch = pmatch; + if (v->nmatch > 0) + zapallsubs(v->pmatch, v->nmatch); v->details = details; v->start = (chr *) string; v->search_start = (chr *) string + search_start; @@ -455,7 +457,6 @@ find(struct vars *v, return REG_OKAY; /* find submatches */ - zapallsubs(v->pmatch, v->nmatch); return cdissect(v, v->g->tree, begin, end); } @@ -566,7 +567,6 @@ cfindloop(struct vars *v, break; /* no match with this begin point, try next */ MDEBUG(("tentative end %ld\n", LOFF(end))); /* Dissect the potential match to see if it really matches */ - zapallsubs(v->pmatch, v->nmatch); er = cdissect(v, v->g->tree, begin, end); if (er == REG_OKAY) { @@ -614,6 +614,8 @@ cfindloop(struct vars *v, /* * zapallsubs - initialize all subexpression matches to "no match" + * + * Note that p[0], the overall-match location, is not touched. */ static void zapallsubs(regmatch_t *p, @@ -685,8 +687,30 @@ subset(struct vars *v, * DFA and found that the proposed substring satisfies the DFA. (We make * the caller do that because in concatenation and iteration nodes, it's * much faster to check all the substrings against the child DFAs before we - * recurse.) Also, caller must have cleared subexpression match data via - * zaptreesubs (or zapallsubs at the top level). + * recurse.) + * + * A side-effect of a successful match is to save match locations for + * capturing subexpressions in v->pmatch[]. This is a little bit tricky, + * so we make the following rules: + * 1. Before initial entry to cdissect, all match data must have been + * cleared (this is seen to by zapallsubs). + * 2. Before any recursive entry to cdissect, the match data for that + * subexpression tree must be guaranteed clear (see zaptreesubs). + * 3. When returning REG_OKAY, each level of cdissect will have saved + * any relevant match locations. + * 4. When returning REG_NOMATCH, each level of cdissect will guarantee + * that its subexpression match locations are again clear. + * 5. No guarantees are made for error cases (i.e., other result codes). + * 6. When a level of cdissect abandons a successful sub-match, it will + * clear that subtree's match locations with zaptreesubs before trying + * any new DFA match or cdissect call for that subtree or any subtree + * to its right (that is, any subtree that could have a backref into the + * abandoned match). + * This may seem overly complicated, but it's difficult to simplify it + * because of the provision that match locations must be reset before + * any fresh DFA match (a rule that is needed to make dfa_backref safe). + * That means it won't work to just reset relevant match locations at the + * start of each cdissect level. */ static int /* regexec return code */ cdissect(struct vars *v, @@ -804,6 +828,8 @@ ccondissect(struct vars *v, MDEBUG(("successful\n")); return REG_OKAY; } + /* Reset left's matches (right should have done so itself) */ + zaptreesubs(v, t->left); } if (er != REG_NOMATCH) return er; @@ -826,8 +852,6 @@ ccondissect(struct vars *v, return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); - zaptreesubs(v, t->left); - zaptreesubs(v, t->right); } /* can't get here */ @@ -882,6 +906,8 @@ crevcondissect(struct vars *v, MDEBUG(("successful\n")); return REG_OKAY; } + /* Reset left's matches (right should have done so itself) */ + zaptreesubs(v, t->left); } if (er != REG_NOMATCH) return er; @@ -904,8 +930,6 @@ crevcondissect(struct vars *v, return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); - zaptreesubs(v, t->left); - zaptreesubs(v, t->right); } /* can't get here */ @@ -1165,6 +1189,7 @@ citerdissect(struct vars *v, for (i = nverified + 1; i <= k; i++) { + /* zap any match data from a non-last iteration */ zaptreesubs(v, t->left); er = cdissect(v, t->left, endpts[i - 1], endpts[i]); if (er == REG_OKAY) @@ -1373,6 +1398,7 @@ creviterdissect(struct vars *v, for (i = nverified + 1; i <= k; i++) { + /* zap any match data from a non-last iteration */ zaptreesubs(v, t->left); er = cdissect(v, t->left, endpts[i - 1], endpts[i]); if (er == REG_OKAY) diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out index 0923ad9b5b0..fccd91566cb 100644 --- a/src/test/regress/expected/regex.out +++ b/src/test/regress/expected/regex.out @@ -567,6 +567,19 @@ select 'a' ~ '()+\1'; t (1 row) +-- Test ancient oversight in when to apply zaptreesubs +select 'abcdef' ~ '^(.)\1|\1.' as f; + f +--- + f +(1 row) + +select 'abadef' ~ '^((.)\2|..)\2' as f; + f +--- + f +(1 row) + -- Error conditions select 'xyz' ~ 'x(\w)(?=\1)'; -- no backrefs in LACONs ERROR: invalid regular expression: invalid backreference number diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql index a1742240c4b..762b3ae69b2 100644 --- a/src/test/regress/sql/regex.sql +++ b/src/test/regress/sql/regex.sql @@ -135,6 +135,10 @@ select 'a' ~ '.. ()|\1'; select 'a' ~ '()*\1'; select 'a' ~ '()+\1'; +-- Test ancient oversight in when to apply zaptreesubs +select 'abcdef' ~ '^(.)\1|\1.' as f; +select 'abadef' ~ '^((.)\2|..)\2' as f; + -- Error conditions select 'xyz' ~ 'x(\w)(?=\1)'; -- no backrefs in LACONs select 'xyz' ~ 'x(\w)(?=(\1))';