diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c index 60a220c57ab..bfe12eb08b3 100644 --- a/src/backend/regex/regcomp.c +++ b/src/backend/regex/regcomp.c @@ -1086,11 +1086,23 @@ parseqatom(struct vars *v, /* annoying special case: {0} or {0,0} cancels everything */ if (m == 0 && n == 0) { - if (atom != NULL) - freesubre(v, atom); - if (atomtype == '(') - v->subs[subno] = NULL; - delsub(v->nfa, lp, rp); + /* + * If we had capturing subexpression(s) within the atom, we don't want + * to destroy them, because it's legal (if useless) to back-ref them + * later. Hence, just unlink the atom from lp/rp and then ignore it. + */ + if (atom != NULL && (atom->flags & CAP)) + { + delsub(v->nfa, lp, atom->begin); + delsub(v->nfa, atom->end, rp); + } + else + { + /* Otherwise, we can clean up any subre infrastructure we made */ + if (atom != NULL) + freesubre(v, atom); + delsub(v->nfa, lp, rp); + } EMPTYARC(lp, rp); return top; } diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out index 83fb9a8b9ca..06f8881cfaf 100644 --- a/src/test/modules/test_regex/expected/test_regex.out +++ b/src/test/modules/test_regex/expected/test_regex.out @@ -3490,6 +3490,28 @@ select * from test_regex('((.))(\2){0}', 'xy', 'RPQ'); {x,x,x,NULL} (2 rows) +-- expectNomatch 21.39 PQR {(.){0}(\1)} xxx +select * from test_regex('(.){0}(\1)', 'xxx', 'PQR'); + test_regex +-------------------------------------------- + {2,REG_UBACKREF,REG_UBOUNDS,REG_UNONPOSIX} +(1 row) + +-- expectNomatch 21.40 PQR {((.)){0}(\2)} xxx +select * from test_regex('((.)){0}(\2)', 'xxx', 'PQR'); + test_regex +-------------------------------------------- + {3,REG_UBACKREF,REG_UBOUNDS,REG_UNONPOSIX} +(1 row) + +-- expectMatch 21.41 NPQR {((.)){0}(\2){0}} xyz {} {} {} {} +select * from test_regex('((.)){0}(\2){0}', 'xyz', 'NPQR'); + test_regex +------------------------------------------------------------ + {3,REG_UBACKREF,REG_UBOUNDS,REG_UNONPOSIX,REG_UEMPTYMATCH} + {"",NULL,NULL,NULL} +(2 rows) + -- doing 22 "multicharacter collating elements" -- # again ugh -- MCCEs are not implemented in Postgres, so we skip all these tests diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql index 991f804cb67..bdc090103b1 100644 --- a/src/test/modules/test_regex/sql/test_regex.sql +++ b/src/test/modules/test_regex/sql/test_regex.sql @@ -1015,6 +1015,12 @@ select * from test_regex('(a*)*', 'bc', 'N'); select * from test_regex(' TO (([a-z0-9._]+|"([^"]+|"")+")+)', 'asd TO foo', 'M'); -- expectMatch 21.36 RPQ ((.))(\2){0} xy x x x {} select * from test_regex('((.))(\2){0}', 'xy', 'RPQ'); +-- expectNomatch 21.39 PQR {(.){0}(\1)} xxx +select * from test_regex('(.){0}(\1)', 'xxx', 'PQR'); +-- expectNomatch 21.40 PQR {((.)){0}(\2)} xxx +select * from test_regex('((.)){0}(\2)', 'xxx', 'PQR'); +-- expectMatch 21.41 NPQR {((.)){0}(\2){0}} xyz {} {} {} {} +select * from test_regex('((.)){0}(\2){0}', 'xyz', 'NPQR'); -- doing 22 "multicharacter collating elements" -- # again ugh diff --git a/src/test/regress/expected/regex.out b/src/test/regress/expected/regex.out index cbe2cfc3ea1..ae0de7307db 100644 --- a/src/test/regress/expected/regex.out +++ b/src/test/regress/expected/regex.out @@ -567,6 +567,25 @@ select 'a' ~ '()+\1'; t (1 row) +-- Test incorrect removal of capture groups within {0} +select 'xxx' ~ '(.){0}(\1)' as f; + f +--- + f +(1 row) + +select 'xxx' ~ '((.)){0}(\2)' as f; + f +--- + f +(1 row) + +select 'xyz' ~ '((.)){0}(\2){0}' as t; + t +--- + t +(1 row) + -- Test ancient oversight in when to apply zaptreesubs select 'abcdef' ~ '^(.)\1|\1.' as f; f diff --git a/src/test/regress/sql/regex.sql b/src/test/regress/sql/regex.sql index c6974a43d11..56217104ce6 100644 --- a/src/test/regress/sql/regex.sql +++ b/src/test/regress/sql/regex.sql @@ -135,6 +135,11 @@ select 'a' ~ '.. ()|\1'; select 'a' ~ '()*\1'; select 'a' ~ '()+\1'; +-- Test incorrect removal of capture groups within {0} +select 'xxx' ~ '(.){0}(\1)' as f; +select 'xxx' ~ '((.)){0}(\2)' as f; +select 'xyz' ~ '((.)){0}(\2){0}' as t; + -- Test ancient oversight in when to apply zaptreesubs select 'abcdef' ~ '^(.)\1|\1.' as f; select 'abadef' ~ '^((.)\2|..)\2' as f;