diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 08f08322ca5..6c189bfed25 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -6166,6 +6166,9 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', '\s*') AS foo;
The subexpression must entirely precede the back reference in the RE.
Subexpressions are numbered in the order of their leading parentheses.
Non-capturing parentheses do not define subexpressions.
+ The back reference considers only the string characters matched by the
+ referenced subexpression, not any constraints contained in it. For
+ example, (^\d)\1 will match 22.
diff --git a/src/backend/regex/regc_nfa.c b/src/backend/regex/regc_nfa.c
index a10a346e8f8..77b860cb0fd 100644
--- a/src/backend/regex/regc_nfa.c
+++ b/src/backend/regex/regc_nfa.c
@@ -1382,6 +1382,77 @@ duptraverse(struct nfa *nfa,
}
}
+/*
+ * removeconstraints - remove any constraints in an NFA
+ *
+ * Constraint arcs are replaced by empty arcs, essentially treating all
+ * constraints as automatically satisfied.
+ */
+static void
+removeconstraints(struct nfa *nfa,
+ struct state *start, /* process subNFA starting here */
+ struct state *stop) /* and stopping here */
+{
+ if (start == stop)
+ return;
+
+ stop->tmp = stop;
+ removetraverse(nfa, start);
+ /* done, except for clearing out the tmp pointers */
+
+ stop->tmp = NULL;
+ cleartraverse(nfa, start);
+}
+
+/*
+ * removetraverse - recursive heart of removeconstraints
+ */
+static void
+removetraverse(struct nfa *nfa,
+ struct state *s)
+{
+ struct arc *a;
+ struct arc *oa;
+
+ /* Since this is recursive, it could be driven to stack overflow */
+ if (STACK_TOO_DEEP(nfa->v->re))
+ {
+ NERR(REG_ETOOBIG);
+ return;
+ }
+
+ if (s->tmp != NULL)
+ return; /* already done */
+
+ s->tmp = s;
+ for (a = s->outs; a != NULL && !NISERR(); a = oa)
+ {
+ removetraverse(nfa, a->to);
+ if (NISERR())
+ break;
+ oa = a->outchain;
+ switch (a->type)
+ {
+ case PLAIN:
+ case EMPTY:
+ /* nothing to do */
+ break;
+ case AHEAD:
+ case BEHIND:
+ case '^':
+ case '$':
+ case LACON:
+ /* replace it */
+ newarc(nfa, EMPTY, 0, s, a->to);
+ freearc(nfa, a);
+ break;
+ default:
+ NERR(REG_ASSERT);
+ break;
+ }
+ }
+}
+
/*
* cleartraverse - recursive cleanup for algorithms that leave tmp ptrs set
*/
diff --git a/src/backend/regex/regcomp.c b/src/backend/regex/regcomp.c
index 1f7fa513b26..3c7627a955e 100644
--- a/src/backend/regex/regcomp.c
+++ b/src/backend/regex/regcomp.c
@@ -150,6 +150,8 @@ static void delsub(struct nfa *, struct state *, struct state *);
static void deltraverse(struct nfa *, struct state *, struct state *);
static void dupnfa(struct nfa *, struct state *, struct state *, struct state *, struct state *);
static void duptraverse(struct nfa *, struct state *, struct state *);
+static void removeconstraints(struct nfa *, struct state *, struct state *);
+static void removetraverse(struct nfa *, struct state *);
static void cleartraverse(struct nfa *, struct state *);
static struct state *single_color_transition(struct state *, struct state *);
static void specialcolors(struct nfa *);
@@ -1182,6 +1184,10 @@ parseqatom(struct vars *v,
dupnfa(v->nfa, v->subs[subno]->begin, v->subs[subno]->end,
atom->begin, atom->end);
NOERR();
+
+ /* The backref node's NFA should not enforce any constraints */
+ removeconstraints(v->nfa, atom->begin, atom->end);
+ NOERR();
}
/*
diff --git a/src/test/modules/test_regex/expected/test_regex.out b/src/test/modules/test_regex/expected/test_regex.out
index 5d993f40c25..01d50ec1e3f 100644
--- a/src/test/modules/test_regex/expected/test_regex.out
+++ b/src/test/modules/test_regex/expected/test_regex.out
@@ -2636,6 +2636,28 @@ select * from test_regex('^(.+)( \1)+$', 'abc abc abd', 'RP');
{2,REG_UBACKREF,REG_UNONPOSIX}
(1 row)
+-- back reference only matches the string, not any constraints
+select * from test_regex('(^\w+).*\1', 'abc abc abc', 'LRP');
+ test_regex
+--------------------------------------------
+ {1,REG_UBACKREF,REG_UNONPOSIX,REG_ULOCALE}
+ {"abc abc abc",abc}
+(2 rows)
+
+select * from test_regex('(^\w+\M).*\1', 'abc abcd abd', 'LRP');
+ test_regex
+--------------------------------------------
+ {1,REG_UBACKREF,REG_UNONPOSIX,REG_ULOCALE}
+ {"abc abc",abc}
+(2 rows)
+
+select * from test_regex('(\w+(?= )).*\1', 'abc abcd abd', 'HLRP');
+ test_regex
+------------------------------------------------------------
+ {1,REG_UBACKREF,REG_ULOOKAROUND,REG_UNONPOSIX,REG_ULOCALE}
+ {"abc abc",abc}
+(2 rows)
+
-- doing 15 "octal escapes vs back references"
-- # initial zero is always octal
-- expectMatch 15.1 MP "a\\010b" "a\bb" "a\bb"
diff --git a/src/test/modules/test_regex/sql/test_regex.sql b/src/test/modules/test_regex/sql/test_regex.sql
index b99329391e8..7f5bc6e418f 100644
--- a/src/test/modules/test_regex/sql/test_regex.sql
+++ b/src/test/modules/test_regex/sql/test_regex.sql
@@ -770,6 +770,11 @@ select * from test_regex('^(.+)( \1)+$', 'abc abd abc', 'RP');
-- expectNomatch 14.29 RP {^(.+)( \1)+$} {abc abc abd}
select * from test_regex('^(.+)( \1)+$', 'abc abc abd', 'RP');
+-- back reference only matches the string, not any constraints
+select * from test_regex('(^\w+).*\1', 'abc abc abc', 'LRP');
+select * from test_regex('(^\w+\M).*\1', 'abc abcd abd', 'LRP');
+select * from test_regex('(\w+(?= )).*\1', 'abc abcd abd', 'HLRP');
+
-- doing 15 "octal escapes vs back references"
-- # initial zero is always octal