diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c index cde82625c85..7ed675f88a4 100644 --- a/src/backend/regex/regc_nfa.c +++ b/src/backend/regex/regc_nfa.c @@ -2902,7 +2902,7 @@ compact(struct nfa *nfa, break; default: NERR(REG_ASSERT); - break; + return; } carcsort(first, ca - first); ca->co = COLORLESS; diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index f0896d2db14..cd0caaa2d03 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -942,7 +942,6 @@ parseqatom(struct vars *v, subno = v->nsubexp; if ((size_t) subno >= v->nsubs) moresubs(v, subno); - assert((size_t) subno < v->nsubs); } else atomtype = PLAIN; /* something that's not '(' */ @@ -960,6 +959,7 @@ parseqatom(struct vars *v, NOERR(); if (cap) { + assert(v->subs[subno] == NULL); v->subs[subno] = atom; t = subre(v, '(', atom->flags | CAP, lp, rp); NOERR(); diff --git a/src/backend/regex/regexec.c b/src/backend/regex/regexec.c index f7eaa76b02c..adcbcc0a8ea 100644 --- a/src/backend/regex/regexec.c +++ b/src/backend/regex/regexec.c @@ -324,6 +324,11 @@ cleanup: if (v->lblastcp != NULL) FREE(v->lblastcp); +#ifdef REG_DEBUG + if (v->eflags & (REG_FTRACE | REG_MTRACE)) + fflush(stdout); +#endif + return st; } @@ -668,7 +673,7 @@ subset(struct vars *v, if ((size_t) n >= v->nmatch) return; - MDEBUG(("setting %d\n", n)); + MDEBUG(("%d: setting %d = %ld-%ld\n", sub->id, n, LOFF(begin), LOFF(end))); v->pmatch[n].rm_so = OFF(begin); v->pmatch[n].rm_eo = OFF(end); } @@ -697,7 +702,7 @@ cdissect(struct vars *v, int er; assert(t != NULL); - MDEBUG(("cdissect %ld-%ld %c\n", LOFF(begin), LOFF(end), t->op)); + MDEBUG(("%d: cdissect %c %ld-%ld\n", t->id, t->op, LOFF(begin), LOFF(end))); /* handy place to check for operation cancel */ if (CANCEL_REQUESTED(v->re)) @@ -779,14 +784,14 @@ ccondissect(struct vars *v, NOERR(); d2 = getsubdfa(v, t->right); NOERR(); - MDEBUG(("cconcat %d\n", t->id)); + MDEBUG(("%d: ccondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end))); /* pick a tentative midpoint */ mid = longest(v, d, begin, end, (int *) NULL); NOERR(); if (mid == NULL) return REG_NOMATCH; - MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); + MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid))); /* iterate until satisfaction or failure */ for (;;) @@ -801,7 +806,7 @@ ccondissect(struct vars *v, if (er == REG_OKAY) { /* satisfaction */ - MDEBUG(("successful\n")); + MDEBUG(("%d: successful\n", t->id)); return REG_OKAY; } } @@ -814,7 +819,7 @@ ccondissect(struct vars *v, if (mid == begin) { /* all possibilities exhausted */ - MDEBUG(("%d no midpoint\n", t->id)); + MDEBUG(("%d: no midpoint\n", t->id)); return REG_NOMATCH; } mid = longest(v, d, begin, mid - 1, (int *) NULL); @@ -822,7 +827,7 @@ ccondissect(struct vars *v, if (mid == NULL) { /* failed to find a new one */ - MDEBUG(("%d failed midpoint\n", t->id)); + MDEBUG(("%d: failed midpoint\n", t->id)); return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); @@ -857,14 +862,14 @@ crevcondissect(struct vars *v, NOERR(); d2 = getsubdfa(v, t->right); NOERR(); - MDEBUG(("crevcon %d\n", t->id)); + MDEBUG(("%d: crevcondissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end))); /* pick a tentative midpoint */ mid = shortest(v, d, begin, begin, end, (chr **) NULL, (int *) NULL); NOERR(); if (mid == NULL) return REG_NOMATCH; - MDEBUG(("tentative midpoint %ld\n", LOFF(mid))); + MDEBUG(("%d: tentative midpoint %ld\n", t->id, LOFF(mid))); /* iterate until satisfaction or failure */ for (;;) @@ -879,7 +884,7 @@ crevcondissect(struct vars *v, if (er == REG_OKAY) { /* satisfaction */ - MDEBUG(("successful\n")); + MDEBUG(("%d: successful\n", t->id)); return REG_OKAY; } } @@ -892,7 +897,7 @@ crevcondissect(struct vars *v, if (mid == end) { /* all possibilities exhausted */ - MDEBUG(("%d no midpoint\n", t->id)); + MDEBUG(("%d: no midpoint\n", t->id)); return REG_NOMATCH; } mid = shortest(v, d, begin, mid + 1, end, (chr **) NULL, (int *) NULL); @@ -900,7 +905,7 @@ crevcondissect(struct vars *v, if (mid == NULL) { /* failed to find a new one */ - MDEBUG(("%d failed midpoint\n", t->id)); + MDEBUG(("%d: failed midpoint\n", t->id)); return REG_NOMATCH; } MDEBUG(("%d: new midpoint %ld\n", t->id, LOFF(mid))); @@ -935,7 +940,8 @@ cbrdissect(struct vars *v, assert(n >= 0); assert((size_t) n < v->nmatch); - MDEBUG(("cbackref n%d %d{%d-%d}\n", t->id, n, min, max)); + MDEBUG(("%d: cbrdissect %d{%d-%d} %ld-%ld\n", t->id, n, min, max, + LOFF(begin), LOFF(end))); /* get the backreferenced string */ if (v->pmatch[n].rm_so == -1) @@ -952,7 +958,7 @@ cbrdissect(struct vars *v, */ if (begin == end && min <= max) { - MDEBUG(("cbackref matched trivially\n")); + MDEBUG(("%d: backref matched trivially\n", t->id)); return REG_OKAY; } return REG_NOMATCH; @@ -962,7 +968,7 @@ cbrdissect(struct vars *v, /* matches only if zero repetitions are okay */ if (min == 0) { - MDEBUG(("cbackref matched trivially\n")); + MDEBUG(("%d: backref matched trivially\n", t->id)); return REG_OKAY; } return REG_NOMATCH; @@ -989,7 +995,7 @@ cbrdissect(struct vars *v, p += brlen; } - MDEBUG(("cbackref matched\n")); + MDEBUG(("%d: backref matched\n", t->id)); return REG_OKAY; } @@ -1011,13 +1017,13 @@ caltdissect(struct vars *v, assert(t->op == '|'); assert(t->left != NULL && t->left->cnfa.nstates > 0); - MDEBUG(("calt n%d\n", t->id)); + MDEBUG(("%d: caltdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end))); d = getsubdfa(v, t->left); NOERR(); if (longest(v, d, begin, end, (int *) NULL) == end) { - MDEBUG(("calt matched\n")); + MDEBUG(("%d: caltdissect matched\n", t->id)); er = cdissect(v, t->left, begin, end); if (er != REG_NOMATCH) return er; @@ -1054,6 +1060,8 @@ citerdissect(struct vars *v, assert(!(t->left->flags & SHORTER)); assert(begin <= end); + MDEBUG(("%d: citerdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end))); + /* * For the moment, assume the minimum number of matches is 1. If zero * matches are allowed, and the target string is empty, we are allowed to @@ -1092,7 +1100,6 @@ citerdissect(struct vars *v, FREE(endpts); return v->err; } - MDEBUG(("citer %d\n", t->id)); /* * Our strategy is to first find a set of sub-match endpoints that are @@ -1182,7 +1189,7 @@ citerdissect(struct vars *v, if (i > k) { /* satisfaction */ - MDEBUG(("%d successful\n", t->id)); + MDEBUG(("%d: successful\n", t->id)); FREE(endpts); return REG_OKAY; } @@ -1223,11 +1230,11 @@ backtrack: */ if (t->min == 0 && begin == end) { - MDEBUG(("%d allowing zero matches\n", t->id)); + MDEBUG(("%d: allowing zero matches\n", t->id)); return REG_OKAY; } - MDEBUG(("%d failed\n", t->id)); + MDEBUG(("%d: failed\n", t->id)); return REG_NOMATCH; } @@ -1255,6 +1262,8 @@ creviterdissect(struct vars *v, assert(t->left->flags & SHORTER); assert(begin <= end); + MDEBUG(("%d: creviterdissect %ld-%ld\n", t->id, LOFF(begin), LOFF(end))); + /* * If zero matches are allowed, and target string is empty, just declare * victory. OTOH, if target string isn't empty, zero matches can't work @@ -1264,7 +1273,10 @@ creviterdissect(struct vars *v, if (min_matches <= 0) { if (begin == end) + { + MDEBUG(("%d: allowing zero matches\n", t->id)); return REG_OKAY; + } min_matches = 1; } @@ -1293,7 +1305,6 @@ creviterdissect(struct vars *v, FREE(endpts); return v->err; } - MDEBUG(("creviter %d\n", t->id)); /* * Our strategy is to first find a set of sub-match endpoints that are @@ -1389,7 +1400,7 @@ creviterdissect(struct vars *v, if (i > k) { /* satisfaction */ - MDEBUG(("%d successful\n", t->id)); + MDEBUG(("%d: successful\n", t->id)); FREE(endpts); return REG_OKAY; } @@ -1415,7 +1426,7 @@ backtrack: } /* all possibilities exhausted */ - MDEBUG(("%d failed\n", t->id)); + MDEBUG(("%d: failed\n", t->id)); FREE(endpts); return REG_NOMATCH; } diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out index 0dc2265d8b2..f01ca071d9b 100644 --- a/src/test/modules/test_regex/expected/test_regex.out +++ b/src/test/modules/test_regex/expected/test_regex.out @@ -3315,6 +3315,21 @@ select * from test_regex('(?=b)b', 'a', 'HP'); {0,REG_ULOOKAROUND,REG_UNONPOSIX} (1 row) +-- expectMatch 23.9 HP ...(?!.) abcde cde +select * from test_regex('...(?!.)', 'abcde', 'HP'); + test_regex +----------------------------------- + {0,REG_ULOOKAROUND,REG_UNONPOSIX} + {cde} +(2 rows) + +-- expectNomatch 23.10 HP ...(?=.) abc +select * from test_regex('...(?=.)', 'abc', 'HP'); + test_regex +----------------------------------- + {0,REG_ULOOKAROUND,REG_UNONPOSIX} +(1 row) + -- Postgres addition: lookbehind constraints -- expectMatch 23.11 HPN (?<=a)b* ab b select * from test_regex('(?<=a)b*', 'ab', 'HPN'); @@ -3376,6 +3391,39 @@ select * from test_regex('(?<=b)b', 'b', 'HP'); {0,REG_ULOOKAROUND,REG_UNONPOSIX} (1 row) +-- expectMatch 23.19 HP (?<=.).. abcde bc +select * from test_regex('(?<=.)..', 'abcde', 'HP'); + test_regex +----------------------------------- + {0,REG_ULOOKAROUND,REG_UNONPOSIX} + {bc} +(2 rows) + +-- expectMatch 23.20 HP (?<=..)a* aaabb a +select * from test_regex('(?<=..)a*', 'aaabb', 'HP'); + test_regex +----------------------------------- + {0,REG_ULOOKAROUND,REG_UNONPOSIX} + {a} +(2 rows) + +-- expectMatch 23.21 HP (?<=..)b* aaabb {} +-- Note: empty match here is correct, it matches after the first 2 characters +select * from test_regex('(?<=..)b*', 'aaabb', 'HP'); + test_regex +----------------------------------- + {0,REG_ULOOKAROUND,REG_UNONPOSIX} + {""} +(2 rows) + +-- expectMatch 23.22 HP (?<=..)b+ aaabb bb +select * from test_regex('(?<=..)b+', 'aaabb', 'HP'); + test_regex +----------------------------------- + {0,REG_ULOOKAROUND,REG_UNONPOSIX} + {bb} +(2 rows) + -- doing 24 "non-greedy quantifiers" -- expectMatch 24.1 PT ab+? abb ab select * from test_regex('ab+?', 'abb', 'PT'); diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql index 1a2bfa62357..ae7d6b43e4a 100644 --- a/src/test/modules/test_regex/sql/test_regex.sql +++ b/src/test/modules/test_regex/sql/test_regex.sql @@ -1049,6 +1049,10 @@ select * from test_regex('a(?!b)b*', 'a', 'HP'); select * from test_regex('(?=b)b', 'b', 'HP'); -- expectNomatch 23.8 HP (?=b)b a select * from test_regex('(?=b)b', 'a', 'HP'); +-- expectMatch 23.9 HP ...(?!.) abcde cde +select * from test_regex('...(?!.)', 'abcde', 'HP'); +-- expectNomatch 23.10 HP ...(?=.) abc +select * from test_regex('...(?=.)', 'abc', 'HP'); -- Postgres addition: lookbehind constraints @@ -1068,6 +1072,15 @@ select * from test_regex('a(?