1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-16 06:01:02 +03:00

Add assorted new regexp_xxx SQL functions.

This patch adds new functions regexp_count(), regexp_instr(),
regexp_like(), and regexp_substr(), and extends regexp_replace()
with some new optional arguments.  All these functions follow
the definitions used in Oracle, although there are small differences
in the regexp language due to using our own regexp engine -- most
notably, that the default newline-matching behavior is different.
Similar functions appear in DB2 and elsewhere, too.  Aside from
easing portability, these functions are easier to use for certain
tasks than our existing regexp_match[es] functions.

Gilles Darold, heavily revised by me

Discussion: https://postgr.es/m/fc160ee0-c843-b024-29bb-97b5da61971f@darold.net
This commit is contained in:
Tom Lane
2021-08-03 13:08:49 -04:00
parent 9e51cc87fd
commit 6424337073
8 changed files with 1340 additions and 47 deletions

View File

@ -3108,6 +3108,78 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
</para></entry>
</row>
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>
<primary>regexp_count</primary>
</indexterm>
<function>regexp_count</function> ( <parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>
[, <parameter>start</parameter> <type>integer</type>
[, <parameter>flags</parameter> <type>text</type> ] ] )
<returnvalue>integer</returnvalue>
</para>
<para>
Returns the number of times the POSIX regular
expression <parameter>pattern</parameter> matches in
the <parameter>string</parameter>; see
<xref linkend="functions-posix-regexp"/>.
</para>
<para>
<literal>regexp_count('123456789012', '\d\d\d', 2)</literal>
<returnvalue>3</returnvalue>
</para></entry>
</row>
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>
<primary>regexp_instr</primary>
</indexterm>
<function>regexp_instr</function> ( <parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>
[, <parameter>start</parameter> <type>integer</type>
[, <parameter>N</parameter> <type>integer</type>
[, <parameter>endoption</parameter> <type>integer</type>
[, <parameter>flags</parameter> <type>text</type>
[, <parameter>subexpr</parameter> <type>integer</type> ] ] ] ] ] )
<returnvalue>integer</returnvalue>
</para>
<para>
Returns the position within <parameter>string</parameter> where
the <parameter>N</parameter>'th match of the POSIX regular
expression <parameter>pattern</parameter> occurs, or zero if there is
no such match; see <xref linkend="functions-posix-regexp"/>.
</para>
<para>
<literal>regexp_instr('ABCDEF', 'c(.)(..)', 1, 1, 0, 'i')</literal>
<returnvalue>3</returnvalue>
</para>
<para>
<literal>regexp_instr('ABCDEF', 'c(.)(..)', 1, 1, 0, 'i', 2)</literal>
<returnvalue>5</returnvalue>
</para></entry>
</row>
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>
<primary>regexp_like</primary>
</indexterm>
<function>regexp_like</function> ( <parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>
[, <parameter>flags</parameter> <type>text</type> ] )
<returnvalue>boolean</returnvalue>
</para>
<para>
Checks whether a match of the POSIX regular
expression <parameter>pattern</parameter> occurs
within <parameter>string</parameter>; see
<xref linkend="functions-posix-regexp"/>.
</para>
<para>
<literal>regexp_like('Hello World', 'world$', 'i')</literal>
<returnvalue>t</returnvalue>
</para></entry>
</row>
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>
@ -3117,8 +3189,9 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
<returnvalue>text[]</returnvalue>
</para>
<para>
Returns captured substrings resulting from the first match of a POSIX
regular expression to the <parameter>string</parameter>; see
Returns substrings within the first match of the POSIX regular
expression <parameter>pattern</parameter> to
the <parameter>string</parameter>; see
<xref linkend="functions-posix-regexp"/>.
</para>
<para>
@ -3136,9 +3209,10 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
<returnvalue>setof text[]</returnvalue>
</para>
<para>
Returns captured substrings resulting from the first match of a
POSIX regular expression to the <parameter>string</parameter>,
or multiple matches if the <literal>g</literal> flag is used;
Returns substrings within the first match of the POSIX regular
expression <parameter>pattern</parameter> to
the <parameter>string</parameter>, or substrings within all
such matches if the <literal>g</literal> flag is used;
see <xref linkend="functions-posix-regexp"/>.
</para>
<para>
@ -3156,14 +3230,16 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
<indexterm>
<primary>regexp_replace</primary>
</indexterm>
<function>regexp_replace</function> ( <parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>, <parameter>replacement</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type> ] )
<function>regexp_replace</function> ( <parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>, <parameter>replacement</parameter> <type>text</type>
[, <parameter>start</parameter> <type>integer</type> ]
[, <parameter>flags</parameter> <type>text</type> ] )
<returnvalue>text</returnvalue>
</para>
<para>
Replaces substrings resulting from the first match of a
POSIX regular expression, or multiple substring matches
if the <literal>g</literal> flag is used; see <xref
linkend="functions-posix-regexp"/>.
Replaces the substring that is the first match to the POSIX
regular expression <parameter>pattern</parameter>, or all such
matches if the <literal>g</literal> flag is used; see
<xref linkend="functions-posix-regexp"/>.
</para>
<para>
<literal>regexp_replace('Thomas', '.[mN]a.', 'M')</literal>
@ -3171,6 +3247,26 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
</para></entry>
</row>
<row>
<entry role="func_table_entry"><para role="func_signature">
<function>regexp_replace</function> ( <parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>, <parameter>replacement</parameter> <type>text</type>,
<parameter>start</parameter> <type>integer</type>,
<parameter>N</parameter> <type>integer</type>
[, <parameter>flags</parameter> <type>text</type> ] )
<returnvalue>text</returnvalue>
</para>
<para>
Replaces the substring that is the <parameter>N</parameter>'th
match to the POSIX regular expression <parameter>pattern</parameter>,
or all such matches if <parameter>N</parameter> is zero; see
<xref linkend="functions-posix-regexp"/>.
</para>
<para>
<literal>regexp_replace('Thomas', '.', 'X', 3, 2)</literal>
<returnvalue>ThoXas</returnvalue>
</para></entry>
</row>
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>
@ -3213,6 +3309,35 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
</para></entry>
</row>
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>
<primary>regexp_substr</primary>
</indexterm>
<function>regexp_substr</function> ( <parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>
[, <parameter>start</parameter> <type>integer</type>
[, <parameter>N</parameter> <type>integer</type>
[, <parameter>flags</parameter> <type>text</type>
[, <parameter>subexpr</parameter> <type>integer</type> ] ] ] ] )
<returnvalue>text</returnvalue>
</para>
<para>
Returns the substring within <parameter>string</parameter> that
matches the <parameter>N</parameter>'th occurrence of the POSIX
regular expression <parameter>pattern</parameter>,
or <literal>NULL</literal> if there is no such match; see
<xref linkend="functions-posix-regexp"/>.
</para>
<para>
<literal>regexp_substr('ABCDEF', 'c(.)(..)', 1, 1, 'i')</literal>
<returnvalue>CDEF</returnvalue>
</para>
<para>
<literal>regexp_substr('ABCDEF', 'c(.)(..)', 1, 1, 'i', 2)</literal>
<returnvalue>EF</returnvalue>
</para></entry>
</row>
<row>
<entry role="func_table_entry"><para role="func_signature">
<indexterm>
@ -5377,6 +5502,15 @@ substring('foobar' similar '#"o_b#"%' escape '#') <lineannotation>NULL</linea
<indexterm>
<primary>substring</primary>
</indexterm>
<indexterm>
<primary>regexp_count</primary>
</indexterm>
<indexterm>
<primary>regexp_instr</primary>
</indexterm>
<indexterm>
<primary>regexp_like</primary>
</indexterm>
<indexterm>
<primary>regexp_match</primary>
</indexterm>
@ -5392,6 +5526,9 @@ substring('foobar' similar '#"o_b#"%' escape '#') <lineannotation>NULL</linea
<indexterm>
<primary>regexp_split_to_array</primary>
</indexterm>
<indexterm>
<primary>regexp_substr</primary>
</indexterm>
<para>
<xref linkend="functions-posix-table"/> lists the available
@ -5539,12 +5676,115 @@ substring('foobar' similar '#"o_b#"%' escape '#') <lineannotation>NULL</linea
<programlisting>
substring('foobar' from 'o.b') <lineannotation>oob</lineannotation>
substring('foobar' from 'o(.)b') <lineannotation>o</lineannotation>
</programlisting>
</para>
<para>
The <function>regexp_count</function> function counts the number of
places where a POSIX regular expression pattern matches a string.
It has the syntax
<function>regexp_count</function>(<replaceable>string</replaceable>,
<replaceable>pattern</replaceable>
<optional>, <replaceable>start</replaceable>
<optional>, <replaceable>flags</replaceable>
</optional></optional>).
<replaceable>pattern</replaceable> is searched for
in <replaceable>string</replaceable>, normally from the beginning of
the string, but if the <replaceable>start</replaceable> parameter is
provided then beginning from that character index.
The <replaceable>flags</replaceable> parameter is an optional text
string containing zero or more single-letter flags that change the
function's behavior. For example, including <literal>i</literal> in
<replaceable>flags</replaceable> specifies case-insensitive matching.
Supported flags are described in
<xref linkend="posix-embedded-options-table"/>.
</para>
<para>
Some examples:
<programlisting>
regexp_count('ABCABCAXYaxy', 'A.') <lineannotation>3</lineannotation>
regexp_count('ABCABCAXYaxy', 'A.', 1, 'i') <lineannotation>4</lineannotation>
</programlisting>
</para>
<para>
The <function>regexp_instr</function> function returns the starting or
ending position of the <replaceable>N</replaceable>'th match of a
POSIX regular expression pattern to a string, or zero if there is no
such match. It has the syntax
<function>regexp_instr</function>(<replaceable>string</replaceable>,
<replaceable>pattern</replaceable>
<optional>, <replaceable>start</replaceable>
<optional>, <replaceable>N</replaceable>
<optional>, <replaceable>endoption</replaceable>
<optional>, <replaceable>flags</replaceable>
<optional>, <replaceable>subexpr</replaceable>
</optional></optional></optional></optional></optional>).
<replaceable>pattern</replaceable> is searched for
in <replaceable>string</replaceable>, normally from the beginning of
the string, but if the <replaceable>start</replaceable> parameter is
provided then beginning from that character index.
If <replaceable>N</replaceable> is specified
then the <replaceable>N</replaceable>'th match of the pattern
is located, otherwise the first match is located.
If the <replaceable>endoption</replaceable> parameter is omitted or
specified as zero, the function returns the position of the first
character of the match. Otherwise, <replaceable>endoption</replaceable>
must be one, and the function returns the position of the character
following the match.
The <replaceable>flags</replaceable> parameter is an optional text
string containing zero or more single-letter flags that change the
function's behavior. Supported flags are described
in <xref linkend="posix-embedded-options-table"/>.
For a pattern containing parenthesized
subexpressions, <replaceable>subexpr</replaceable> is an integer
indicating which subexpression is of interest: the result identifies
the position of the substring matching that subexpression.
Subexpressions are numbered in the order of their leading parentheses.
When <replaceable>subexpr</replaceable> is omitted or zero, the result
identifies the position of the whole match regardless of
parenthesized subexpressions.
</para>
<para>
Some examples:
<programlisting>
regexp_instr('number of your street, town zip, FR', '[^,]+', 1, 2)
<lineannotation>23</lineannotation>
regexp_instr('ABCDEFGHI', '(c..)(...)', 1, 1, 0, 'i', 2)
<lineannotation>6</lineannotation>
</programlisting>
</para>
<para>
The <function>regexp_like</function> function checks whether a match
of a POSIX regular expression pattern occurs within a string,
returning boolean true or false. It has the syntax
<function>regexp_like</function>(<replaceable>string</replaceable>,
<replaceable>pattern</replaceable>
<optional>, <replaceable>flags</replaceable> </optional>).
The <replaceable>flags</replaceable> parameter is an optional text
string containing zero or more single-letter flags that change the
function's behavior. Supported flags are described
in <xref linkend="posix-embedded-options-table"/>.
This function has the same results as the <literal>~</literal>
operator if no flags are specified. If only the <literal>i</literal>
flag is specified, it has the same results as
the <literal>~*</literal> operator.
</para>
<para>
Some examples:
<programlisting>
regexp_like('Hello World', 'world') <lineannotation>false</lineannotation>
regexp_like('Hello World', 'world', 'i') <lineannotation>true</lineannotation>
</programlisting>
</para>
<para>
The <function>regexp_match</function> function returns a text array of
captured substring(s) resulting from the first match of a POSIX
matching substring(s) within the first match of a POSIX
regular expression pattern to a string. It has the syntax
<function>regexp_match</function>(<replaceable>string</replaceable>,
<replaceable>pattern</replaceable> <optional>, <replaceable>flags</replaceable> </optional>).
@ -5579,8 +5819,17 @@ SELECT regexp_match('foobarbequebaz', '(bar)(beque)');
{bar,beque}
(1 row)
</programlisting>
</para>
<tip>
<para>
In the common case where you just want the whole matching substring
or <literal>NULL</literal> for no match, write something like
or <literal>NULL</literal> for no match, the best solution is to
use <function>regexp_substr()</function>.
However, <function>regexp_substr()</function> only exists
in <productname>PostgreSQL</productname> version 15 and up. When
working in older versions, you can extract the first element
of <function>regexp_match()</function>'s result, for example:
<programlisting>
SELECT (regexp_match('foobarbequebaz', 'bar.*que'))[1];
regexp_match
@ -5589,10 +5838,11 @@ SELECT (regexp_match('foobarbequebaz', 'bar.*que'))[1];
(1 row)
</programlisting>
</para>
</tip>
<para>
The <function>regexp_matches</function> function returns a set of text arrays
of captured substring(s) resulting from matching a POSIX regular
of matching substring(s) within matches of a POSIX regular
expression pattern to a string. It has the same syntax as
<function>regexp_match</function>.
This function returns no rows if there is no match, one row if there is
@ -5650,7 +5900,13 @@ SELECT col1, (SELECT regexp_matches(col2, '(bar)(beque)')) FROM tab;
It has the syntax
<function>regexp_replace</function>(<replaceable>source</replaceable>,
<replaceable>pattern</replaceable>, <replaceable>replacement</replaceable>
<optional>, <replaceable>start</replaceable>
<optional>, <replaceable>N</replaceable>
</optional></optional>
<optional>, <replaceable>flags</replaceable> </optional>).
(Notice that <replaceable>N</replaceable> cannot be specified
unless <replaceable>start</replaceable> is,
but <replaceable>flags</replaceable> can be given in any case.)
The <replaceable>source</replaceable> string is returned unchanged if
there is no match to the <replaceable>pattern</replaceable>. If there is a
match, the <replaceable>source</replaceable> string is returned with the
@ -5663,11 +5919,22 @@ SELECT col1, (SELECT regexp_matches(col2, '(bar)(beque)')) FROM tab;
substring matching the entire pattern should be inserted. Write
<literal>\\</literal> if you need to put a literal backslash in the replacement
text.
<replaceable>pattern</replaceable> is searched for
in <replaceable>string</replaceable>, normally from the beginning of
the string, but if the <replaceable>start</replaceable> parameter is
provided then beginning from that character index.
By default, only the first match of the pattern is replaced.
If <replaceable>N</replaceable> is specified and is greater than zero,
then the <replaceable>N</replaceable>'th match of the pattern
is replaced.
If the <literal>g</literal> flag is given, or
if <replaceable>N</replaceable> is specified and is zero, then all
matches at or after the <replaceable>start</replaceable> position are
replaced. (The <literal>g</literal> flag is ignored
when <replaceable>N</replaceable> is specified.)
The <replaceable>flags</replaceable> parameter is an optional text
string containing zero or more single-letter flags that change the
function's behavior. Flag <literal>i</literal> specifies case-insensitive
matching, while flag <literal>g</literal> specifies replacement of each matching
substring rather than only the first one. Supported flags (though
function's behavior. Supported flags (though
not <literal>g</literal>) are
described in <xref linkend="posix-embedded-options-table"/>.
</para>
@ -5681,6 +5948,10 @@ regexp_replace('foobarbaz', 'b..', 'X', 'g')
<lineannotation>fooXX</lineannotation>
regexp_replace('foobarbaz', 'b(..)', 'X\1Y', 'g')
<lineannotation>fooXarYXazY</lineannotation>
regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 0, 'i')
<lineannotation>X PXstgrXSQL fXnctXXn</lineannotation>
regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 3, 'i')
<lineannotation>A PostgrXSQL function</lineannotation>
</programlisting>
</para>
@ -5712,7 +5983,6 @@ regexp_replace('foobarbaz', 'b(..)', 'X\1Y', 'g')
<para>
Some examples:
<programlisting>
SELECT foo FROM regexp_split_to_table('the quick brown fox jumps over the lazy dog', '\s+') AS foo;
foo
-------
@ -5761,11 +6031,51 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', '\s*') AS foo;
zero-length matches that occur at the start or end of the string
or immediately after a previous match. This is contrary to the strict
definition of regexp matching that is implemented by
<function>regexp_match</function> and
<function>regexp_matches</function>, but is usually the most convenient behavior
the other regexp functions, but is usually the most convenient behavior
in practice. Other software systems such as Perl use similar definitions.
</para>
<para>
The <function>regexp_substr</function> function returns the substring
that matches a POSIX regular expression pattern,
or <literal>NULL</literal> if there is no match. It has the syntax
<function>regexp_substr</function>(<replaceable>string</replaceable>,
<replaceable>pattern</replaceable>
<optional>, <replaceable>start</replaceable>
<optional>, <replaceable>N</replaceable>
<optional>, <replaceable>flags</replaceable>
<optional>, <replaceable>subexpr</replaceable>
</optional></optional></optional></optional>).
<replaceable>pattern</replaceable> is searched for
in <replaceable>string</replaceable>, normally from the beginning of
the string, but if the <replaceable>start</replaceable> parameter is
provided then beginning from that character index.
If <replaceable>N</replaceable> is specified
then the <replaceable>N</replaceable>'th match of the pattern
is returned, otherwise the first match is returned.
The <replaceable>flags</replaceable> parameter is an optional text
string containing zero or more single-letter flags that change the
function's behavior. Supported flags are described
in <xref linkend="posix-embedded-options-table"/>.
For a pattern containing parenthesized
subexpressions, <replaceable>subexpr</replaceable> is an integer
indicating which subexpression is of interest: the result is the
substring matching that subexpression.
Subexpressions are numbered in the order of their leading parentheses.
When <replaceable>subexpr</replaceable> is omitted or zero, the result
is the whole match regardless of parenthesized subexpressions.
</para>
<para>
Some examples:
<programlisting>
regexp_substr('number of your street, town zip, FR', '[^,]+', 1, 2)
<lineannotation> town zip</lineannotation>
regexp_substr('ABCDEFGHI', '(c..)(...)', 1, 1, 'i', 2)
<lineannotation>FGH</lineannotation>
</programlisting>
</para>
<!-- derived from the re_syntax.n man page -->
<sect3 id="posix-syntax-details">

View File

@ -113,6 +113,7 @@ static cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */
/* Local functions */
static regexp_matches_ctx *setup_regexp_matches(text *orig_str, text *pattern,
pg_re_flags *flags,
int start_search,
Oid collation,
bool use_subpatterns,
bool ignore_degenerate,
@ -629,7 +630,7 @@ textregexreplace_noopt(PG_FUNCTION_ARGS)
re = RE_compile_and_cache(p, REG_ADVANCED, PG_GET_COLLATION());
PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, false));
PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, 0, 1));
}
/*
@ -646,11 +647,97 @@ textregexreplace(PG_FUNCTION_ARGS)
regex_t *re;
pg_re_flags flags;
/*
* regexp_replace() with four arguments will be preferentially resolved as
* this form when the fourth argument is of type UNKNOWN. However, the
* user might have intended to call textregexreplace_extended_no_n. If we
* see flags that look like an integer, emit the same error that
* parse_re_flags would, but add a HINT about how to fix it.
*/
if (VARSIZE_ANY_EXHDR(opt) > 0)
{
char *opt_p = VARDATA_ANY(opt);
if (*opt_p >= '0' && *opt_p <= '9')
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid regular expression option: \"%.*s\"",
pg_mblen(opt_p), opt_p),
errhint("If you meant to use regexp_replace() with a start parameter, cast the fourth argument to integer explicitly.")));
}
parse_re_flags(&flags, opt);
re = RE_compile_and_cache(p, flags.cflags, PG_GET_COLLATION());
PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, flags.glob));
PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, 0,
flags.glob ? 0 : 1));
}
/*
* textregexreplace_extended()
* Return a string matched by a regular expression, with replacement.
* Extends textregexreplace by allowing a start position and the
* choice of the occurrence to replace (0 means all occurrences).
*/
Datum
textregexreplace_extended(PG_FUNCTION_ARGS)
{
text *s = PG_GETARG_TEXT_PP(0);
text *p = PG_GETARG_TEXT_PP(1);
text *r = PG_GETARG_TEXT_PP(2);
int start = 1;
int n = 1;
text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(5);
pg_re_flags re_flags;
regex_t *re;
/* Collect optional parameters */
if (PG_NARGS() > 3)
{
start = PG_GETARG_INT32(3);
if (start <= 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid value for parameter \"%s\": %d",
"start", start)));
}
if (PG_NARGS() > 4)
{
n = PG_GETARG_INT32(4);
if (n < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid value for parameter \"%s\": %d",
"n", n)));
}
/* Determine options */
parse_re_flags(&re_flags, flags);
/* If N was not specified, deduce it from the 'g' flag */
if (PG_NARGS() <= 4)
n = re_flags.glob ? 0 : 1;
/* Compile the regular expression */
re = RE_compile_and_cache(p, re_flags.cflags, PG_GET_COLLATION());
/* Do the replacement(s) */
PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, start - 1, n));
}
/* This is separate to keep the opr_sanity regression test from complaining */
Datum
textregexreplace_extended_no_n(PG_FUNCTION_ARGS)
{
return textregexreplace_extended(fcinfo);
}
/* This is separate to keep the opr_sanity regression test from complaining */
Datum
textregexreplace_extended_no_flags(PG_FUNCTION_ARGS)
{
return textregexreplace_extended(fcinfo);
}
/*
@ -958,6 +1045,235 @@ similar_escape(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(result);
}
/*
* regexp_count()
* Return the number of matches of a pattern within a string.
*/
Datum
regexp_count(PG_FUNCTION_ARGS)
{
text *str = PG_GETARG_TEXT_PP(0);
text *pattern = PG_GETARG_TEXT_PP(1);
int start = 1;
text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(3);
pg_re_flags re_flags;
regexp_matches_ctx *matchctx;
/* Collect optional parameters */
if (PG_NARGS() > 2)
{
start = PG_GETARG_INT32(2);
if (start <= 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid value for parameter \"%s\": %d",
"start", start)));
}
/* Determine options */
parse_re_flags(&re_flags, flags);
/* User mustn't specify 'g' */
if (re_flags.glob)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
/* translator: %s is a SQL function name */
errmsg("%s does not support the \"global\" option",
"regexp_count()")));
/* But we find all the matches anyway */
re_flags.glob = true;
/* Do the matching */
matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1,
PG_GET_COLLATION(),
false, /* can ignore subexprs */
false, false);
PG_RETURN_INT32(matchctx->nmatches);
}
/* This is separate to keep the opr_sanity regression test from complaining */
Datum
regexp_count_no_start(PG_FUNCTION_ARGS)
{
return regexp_count(fcinfo);
}
/* This is separate to keep the opr_sanity regression test from complaining */
Datum
regexp_count_no_flags(PG_FUNCTION_ARGS)
{
return regexp_count(fcinfo);
}
/*
* regexp_instr()
* Return the match's position within the string
*/
Datum
regexp_instr(PG_FUNCTION_ARGS)
{
text *str = PG_GETARG_TEXT_PP(0);
text *pattern = PG_GETARG_TEXT_PP(1);
int start = 1;
int n = 1;
int endoption = 0;
text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(5);
int subexpr = 0;
int pos;
pg_re_flags re_flags;
regexp_matches_ctx *matchctx;
/* Collect optional parameters */
if (PG_NARGS() > 2)
{
start = PG_GETARG_INT32(2);
if (start <= 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid value for parameter \"%s\": %d",
"start", start)));
}
if (PG_NARGS() > 3)
{
n = PG_GETARG_INT32(3);
if (n <= 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid value for parameter \"%s\": %d",
"n", n)));
}
if (PG_NARGS() > 4)
{
endoption = PG_GETARG_INT32(4);
if (endoption != 0 && endoption != 1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid value for parameter \"%s\": %d",
"endoption", endoption)));
}
if (PG_NARGS() > 6)
{
subexpr = PG_GETARG_INT32(6);
if (subexpr < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid value for parameter \"%s\": %d",
"subexpr", subexpr)));
}
/* Determine options */
parse_re_flags(&re_flags, flags);
/* User mustn't specify 'g' */
if (re_flags.glob)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
/* translator: %s is a SQL function name */
errmsg("%s does not support the \"global\" option",
"regexp_instr()")));
/* But we find all the matches anyway */
re_flags.glob = true;
/* Do the matching */
matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1,
PG_GET_COLLATION(),
(subexpr > 0), /* need submatches? */
false, false);
/* When n exceeds matches return 0 (includes case of no matches) */
if (n > matchctx->nmatches)
PG_RETURN_INT32(0);
/* When subexpr exceeds number of subexpressions return 0 */
if (subexpr > matchctx->npatterns)
PG_RETURN_INT32(0);
/* Select the appropriate match position to return */
pos = (n - 1) * matchctx->npatterns;
if (subexpr > 0)
pos += subexpr - 1;
pos *= 2;
if (endoption == 1)
pos += 1;
if (matchctx->match_locs[pos] >= 0)
PG_RETURN_INT32(matchctx->match_locs[pos] + 1);
else
PG_RETURN_INT32(0); /* position not identifiable */
}
/* This is separate to keep the opr_sanity regression test from complaining */
Datum
regexp_instr_no_start(PG_FUNCTION_ARGS)
{
return regexp_instr(fcinfo);
}
/* This is separate to keep the opr_sanity regression test from complaining */
Datum
regexp_instr_no_n(PG_FUNCTION_ARGS)
{
return regexp_instr(fcinfo);
}
/* This is separate to keep the opr_sanity regression test from complaining */
Datum
regexp_instr_no_endoption(PG_FUNCTION_ARGS)
{
return regexp_instr(fcinfo);
}
/* This is separate to keep the opr_sanity regression test from complaining */
Datum
regexp_instr_no_flags(PG_FUNCTION_ARGS)
{
return regexp_instr(fcinfo);
}
/* This is separate to keep the opr_sanity regression test from complaining */
Datum
regexp_instr_no_subexpr(PG_FUNCTION_ARGS)
{
return regexp_instr(fcinfo);
}
/*
* regexp_like()
* Test for a pattern match within a string.
*/
Datum
regexp_like(PG_FUNCTION_ARGS)
{
text *str = PG_GETARG_TEXT_PP(0);
text *pattern = PG_GETARG_TEXT_PP(1);
text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
pg_re_flags re_flags;
/* Determine options */
parse_re_flags(&re_flags, flags);
/* User mustn't specify 'g' */
if (re_flags.glob)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
/* translator: %s is a SQL function name */
errmsg("%s does not support the \"global\" option",
"regexp_like()")));
/* Otherwise it's like textregexeq/texticregexeq */
PG_RETURN_BOOL(RE_compile_and_execute(pattern,
VARDATA_ANY(str),
VARSIZE_ANY_EXHDR(str),
re_flags.cflags,
PG_GET_COLLATION(),
0, NULL));
}
/* This is separate to keep the opr_sanity regression test from complaining */
Datum
regexp_like_no_flags(PG_FUNCTION_ARGS)
{
return regexp_like(fcinfo);
}
/*
* regexp_match()
* Return the first substring(s) matching a pattern within a string.
@ -982,7 +1298,7 @@ regexp_match(PG_FUNCTION_ARGS)
"regexp_match()"),
errhint("Use the regexp_matches function instead.")));
matchctx = setup_regexp_matches(orig_str, pattern, &re_flags,
matchctx = setup_regexp_matches(orig_str, pattern, &re_flags, 0,
PG_GET_COLLATION(), true, false, false);
if (matchctx->nmatches == 0)
@ -1029,7 +1345,7 @@ regexp_matches(PG_FUNCTION_ARGS)
/* be sure to copy the input string into the multi-call ctx */
matchctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
&re_flags,
&re_flags, 0,
PG_GET_COLLATION(),
true, false, false);
@ -1064,24 +1380,28 @@ regexp_matches_no_flags(PG_FUNCTION_ARGS)
}
/*
* setup_regexp_matches --- do the initial matching for regexp_match
* and regexp_split functions
* setup_regexp_matches --- do the initial matching for regexp_match,
* regexp_split, and related functions
*
* To avoid having to re-find the compiled pattern on each call, we do
* all the matching in one swoop. The returned regexp_matches_ctx contains
* the locations of all the substrings matching the pattern.
*
* The three bool parameters have only two patterns (one for matching, one for
* splitting) but it seems clearer to distinguish the functionality this way
* than to key it all off one "is_split" flag. We don't currently assume that
* fetching_unmatched is exclusive of fetching the matched text too; if it's
* set, the conversion buffer is large enough to fetch any single matched or
* unmatched string, but not any larger substring. (In practice, when splitting
* the matches are usually small anyway, and it didn't seem worth complicating
* the code further.)
* start_search: the character (not byte) offset in orig_str at which to
* begin the search. Returned positions are relative to orig_str anyway.
* use_subpatterns: collect data about matches to parenthesized subexpressions.
* ignore_degenerate: ignore zero-length matches.
* fetching_unmatched: caller wants to fetch unmatched substrings.
*
* We don't currently assume that fetching_unmatched is exclusive of fetching
* the matched text too; if it's set, the conversion buffer is large enough to
* fetch any single matched or unmatched string, but not any larger
* substring. (In practice, when splitting the matches are usually small
* anyway, and it didn't seem worth complicating the code further.)
*/
static regexp_matches_ctx *
setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
int start_search,
Oid collation,
bool use_subpatterns,
bool ignore_degenerate,
@ -1099,7 +1419,6 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
int array_idx;
int prev_match_end;
int prev_valid_match_end;
int start_search;
int maxlen = 0; /* largest fetch length in characters */
/* save original string --- we'll extract result substrings from it */
@ -1142,7 +1461,6 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
/* search for the pattern, perhaps repeatedly */
prev_match_end = 0;
prev_valid_match_end = 0;
start_search = 0;
while (RE_wchar_execute(cpattern, wide_str, wide_len, start_search,
pmatch_len, pmatch))
{
@ -1367,7 +1685,7 @@ regexp_split_to_table(PG_FUNCTION_ARGS)
/* be sure to copy the input string into the multi-call ctx */
splitctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
&re_flags,
&re_flags, 0,
PG_GET_COLLATION(),
false, true, true);
@ -1422,7 +1740,7 @@ regexp_split_to_array(PG_FUNCTION_ARGS)
splitctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0),
PG_GETARG_TEXT_PP(1),
&re_flags,
&re_flags, 0,
PG_GET_COLLATION(),
false, true, true);
@ -1489,6 +1807,125 @@ build_regexp_split_result(regexp_matches_ctx *splitctx)
}
}
/*
* regexp_substr()
* Return the substring that matches a regular expression pattern
*/
Datum
regexp_substr(PG_FUNCTION_ARGS)
{
text *str = PG_GETARG_TEXT_PP(0);
text *pattern = PG_GETARG_TEXT_PP(1);
int start = 1;
int n = 1;
text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(4);
int subexpr = 0;
int so,
eo,
pos;
pg_re_flags re_flags;
regexp_matches_ctx *matchctx;
/* Collect optional parameters */
if (PG_NARGS() > 2)
{
start = PG_GETARG_INT32(2);
if (start <= 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid value for parameter \"%s\": %d",
"start", start)));
}
if (PG_NARGS() > 3)
{
n = PG_GETARG_INT32(3);
if (n <= 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid value for parameter \"%s\": %d",
"n", n)));
}
if (PG_NARGS() > 5)
{
subexpr = PG_GETARG_INT32(5);
if (subexpr < 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid value for parameter \"%s\": %d",
"subexpr", subexpr)));
}
/* Determine options */
parse_re_flags(&re_flags, flags);
/* User mustn't specify 'g' */
if (re_flags.glob)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
/* translator: %s is a SQL function name */
errmsg("%s does not support the \"global\" option",
"regexp_substr()")));
/* But we find all the matches anyway */
re_flags.glob = true;
/* Do the matching */
matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1,
PG_GET_COLLATION(),
(subexpr > 0), /* need submatches? */
false, false);
/* When n exceeds matches return NULL (includes case of no matches) */
if (n > matchctx->nmatches)
PG_RETURN_NULL();
/* When subexpr exceeds number of subexpressions return NULL */
if (subexpr > matchctx->npatterns)
PG_RETURN_NULL();
/* Select the appropriate match position to return */
pos = (n - 1) * matchctx->npatterns;
if (subexpr > 0)
pos += subexpr - 1;
pos *= 2;
so = matchctx->match_locs[pos];
eo = matchctx->match_locs[pos + 1];
if (so < 0 || eo < 0)
PG_RETURN_NULL(); /* unidentifiable location */
PG_RETURN_DATUM(DirectFunctionCall3(text_substr,
PointerGetDatum(matchctx->orig_str),
Int32GetDatum(so + 1),
Int32GetDatum(eo - so)));
}
/* This is separate to keep the opr_sanity regression test from complaining */
Datum
regexp_substr_no_start(PG_FUNCTION_ARGS)
{
return regexp_substr(fcinfo);
}
/* This is separate to keep the opr_sanity regression test from complaining */
Datum
regexp_substr_no_n(PG_FUNCTION_ARGS)
{
return regexp_substr(fcinfo);
}
/* This is separate to keep the opr_sanity regression test from complaining */
Datum
regexp_substr_no_flags(PG_FUNCTION_ARGS)
{
return regexp_substr(fcinfo);
}
/* This is separate to keep the opr_sanity regression test from complaining */
Datum
regexp_substr_no_subexpr(PG_FUNCTION_ARGS)
{
return regexp_substr(fcinfo);
}
/*
* regexp_fixed_prefix - extract fixed prefix, if any, for a regexp
*

View File

@ -4496,23 +4496,28 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
/*
* replace_text_regexp
*
* replace text that matches to regexp in src_text to replace_text.
* replace substring(s) in src_text that match regexp with replace_text.
*
* search_start: the character (not byte) offset in src_text at which to
* begin searching.
* n: if 0, replace all matches; if > 0, replace only the N'th match.
*
* Note: to avoid having to include regex.h in builtins.h, we declare
* the regexp argument as void *, but really it's regex_t *.
*/
text *
replace_text_regexp(text *src_text, void *regexp,
text *replace_text, bool glob)
text *replace_text,
int search_start, int n)
{
text *ret_text;
regex_t *re = (regex_t *) regexp;
int src_text_len = VARSIZE_ANY_EXHDR(src_text);
int nmatches = 0;
StringInfoData buf;
regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
pg_wchar *data;
size_t data_len;
int search_start;
int data_pos;
char *start_ptr;
bool have_escape;
@ -4530,7 +4535,6 @@ replace_text_regexp(text *src_text, void *regexp,
start_ptr = (char *) VARDATA_ANY(src_text);
data_pos = 0;
search_start = 0;
while (search_start <= data_len)
{
int regexec_result;
@ -4560,6 +4564,23 @@ replace_text_regexp(text *src_text, void *regexp,
errmsg("regular expression failed: %s", errMsg)));
}
/*
* Count matches, and decide whether to replace this match.
*/
nmatches++;
if (n > 0 && nmatches != n)
{
/*
* No, so advance search_start, but not start_ptr/data_pos. (Thus,
* we treat the matched text as if it weren't matched, and copy it
* to the output later.)
*/
search_start = pmatch[0].rm_eo;
if (pmatch[0].rm_so == pmatch[0].rm_eo)
search_start++;
continue;
}
/*
* Copy the text to the left of the match position. Note we are given
* character not byte indexes.
@ -4596,9 +4617,9 @@ replace_text_regexp(text *src_text, void *regexp,
data_pos = pmatch[0].rm_eo;
/*
* When global option is off, replace the first instance only.
* If we only want to replace one occurrence, we're done.
*/
if (!glob)
if (n > 0)
break;
/*

View File

@ -53,6 +53,6 @@
*/
/* yyyymmddN */
#define CATALOG_VERSION_NO 202107261
#define CATALOG_VERSION_NO 202108031
#endif

View File

@ -3565,6 +3565,18 @@
{ oid => '2285', descr => 'replace text using regexp',
proname => 'regexp_replace', prorettype => 'text',
proargtypes => 'text text text text', prosrc => 'textregexreplace' },
{ oid => '9611', descr => 'replace text using regexp',
proname => 'regexp_replace', prorettype => 'text',
proargtypes => 'text text text int4 int4 text',
prosrc => 'textregexreplace_extended' },
{ oid => '9612', descr => 'replace text using regexp',
proname => 'regexp_replace', prorettype => 'text',
proargtypes => 'text text text int4 int4',
prosrc => 'textregexreplace_extended_no_flags' },
{ oid => '9613', descr => 'replace text using regexp',
proname => 'regexp_replace', prorettype => 'text',
proargtypes => 'text text text int4',
prosrc => 'textregexreplace_extended_no_n' },
{ oid => '3396', descr => 'find first match for regexp',
proname => 'regexp_match', prorettype => '_text', proargtypes => 'text text',
prosrc => 'regexp_match_no_flags' },
@ -3579,6 +3591,58 @@
proname => 'regexp_matches', prorows => '10', proretset => 't',
prorettype => '_text', proargtypes => 'text text text',
prosrc => 'regexp_matches' },
{ oid => '9614', descr => 'count regexp matches',
proname => 'regexp_count', prorettype => 'int4', proargtypes => 'text text',
prosrc => 'regexp_count_no_start' },
{ oid => '9615', descr => 'count regexp matches',
proname => 'regexp_count', prorettype => 'int4',
proargtypes => 'text text int4', prosrc => 'regexp_count_no_flags' },
{ oid => '9616', descr => 'count regexp matches',
proname => 'regexp_count', prorettype => 'int4',
proargtypes => 'text text int4 text', prosrc => 'regexp_count' },
{ oid => '9617', descr => 'position of regexp match',
proname => 'regexp_instr', prorettype => 'int4', proargtypes => 'text text',
prosrc => 'regexp_instr_no_start' },
{ oid => '9618', descr => 'position of regexp match',
proname => 'regexp_instr', prorettype => 'int4',
proargtypes => 'text text int4', prosrc => 'regexp_instr_no_n' },
{ oid => '9619', descr => 'position of regexp match',
proname => 'regexp_instr', prorettype => 'int4',
proargtypes => 'text text int4 int4', prosrc => 'regexp_instr_no_endoption' },
{ oid => '9620', descr => 'position of regexp match',
proname => 'regexp_instr', prorettype => 'int4',
proargtypes => 'text text int4 int4 int4',
prosrc => 'regexp_instr_no_flags' },
{ oid => '9621', descr => 'position of regexp match',
proname => 'regexp_instr', prorettype => 'int4',
proargtypes => 'text text int4 int4 int4 text',
prosrc => 'regexp_instr_no_subexpr' },
{ oid => '9622', descr => 'position of regexp match',
proname => 'regexp_instr', prorettype => 'int4',
proargtypes => 'text text int4 int4 int4 text int4',
prosrc => 'regexp_instr' },
{ oid => '9623', descr => 'test for regexp match',
proname => 'regexp_like', prorettype => 'bool', proargtypes => 'text text',
prosrc => 'regexp_like_no_flags' },
{ oid => '9624', descr => 'test for regexp match',
proname => 'regexp_like', prorettype => 'bool',
proargtypes => 'text text text', prosrc => 'regexp_like' },
{ oid => '9625', descr => 'extract substring that matches regexp',
proname => 'regexp_substr', prorettype => 'text', proargtypes => 'text text',
prosrc => 'regexp_substr_no_start' },
{ oid => '9626', descr => 'extract substring that matches regexp',
proname => 'regexp_substr', prorettype => 'text',
proargtypes => 'text text int4', prosrc => 'regexp_substr_no_n' },
{ oid => '9627', descr => 'extract substring that matches regexp',
proname => 'regexp_substr', prorettype => 'text',
proargtypes => 'text text int4 int4', prosrc => 'regexp_substr_no_flags' },
{ oid => '9628', descr => 'extract substring that matches regexp',
proname => 'regexp_substr', prorettype => 'text',
proargtypes => 'text text int4 int4 text',
prosrc => 'regexp_substr_no_subexpr' },
{ oid => '9629', descr => 'extract substring that matches regexp',
proname => 'regexp_substr', prorettype => 'text',
proargtypes => 'text text int4 int4 text int4', prosrc => 'regexp_substr' },
{ oid => '2088', descr => 'split string by field_sep and return field_num',
proname => 'split_part', prorettype => 'text',
proargtypes => 'text text int4', prosrc => 'split_part' },

View File

@ -34,6 +34,7 @@ extern bool SplitDirectoriesString(char *rawstring, char separator,
extern bool SplitGUCList(char *rawstring, char separator,
List **namelist);
extern text *replace_text_regexp(text *src_text, void *regexp,
text *replace_text, bool glob);
text *replace_text,
int search_start, int n);
#endif

View File

@ -515,6 +515,13 @@ SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
cde
(1 row)
-- Check case where we have a match, but not a subexpression match
SELECT SUBSTRING('foo' FROM 'foo(bar)?') IS NULL AS t;
t
---
t
(1 row)
-- Check behavior of SIMILAR TO, which uses largely the same regexp variant
SELECT 'abcdefg' SIMILAR TO '_bcd%' AS true;
true
@ -592,6 +599,370 @@ SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
-- invalid regexp option
SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
ERROR: invalid regular expression option: "z"
-- extended regexp_replace tests
SELECT regexp_replace('A PostgreSQL function', 'A|e|i|o|u', 'X', 1);
regexp_replace
-----------------------
X PostgreSQL function
(1 row)
SELECT regexp_replace('A PostgreSQL function', 'A|e|i|o|u', 'X', 1, 2);
regexp_replace
-----------------------
A PXstgreSQL function
(1 row)
SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 0, 'i');
regexp_replace
-----------------------
X PXstgrXSQL fXnctXXn
(1 row)
SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 1, 'i');
regexp_replace
-----------------------
X PostgreSQL function
(1 row)
SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 2, 'i');
regexp_replace
-----------------------
A PXstgreSQL function
(1 row)
SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 3, 'i');
regexp_replace
-----------------------
A PostgrXSQL function
(1 row)
SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 9, 'i');
regexp_replace
-----------------------
A PostgreSQL function
(1 row)
SELECT regexp_replace('A PostgreSQL function', 'A|e|i|o|u', 'X', 7, 0, 'i');
regexp_replace
-----------------------
A PostgrXSQL fXnctXXn
(1 row)
-- 'g' flag should be ignored when N is specified
SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 1, 'g');
regexp_replace
-----------------------
A PXstgreSQL function
(1 row)
-- errors
SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', -1, 0, 'i');
ERROR: invalid value for parameter "start": -1
SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, -1, 'i');
ERROR: invalid value for parameter "n": -1
-- erroneous invocation of non-extended form
SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', '1');
ERROR: invalid regular expression option: "1"
HINT: If you meant to use regexp_replace() with a start parameter, cast the fourth argument to integer explicitly.
-- regexp_count tests
SELECT regexp_count('123123123123123', '(12)3');
regexp_count
--------------
5
(1 row)
SELECT regexp_count('123123123123', '123', 1);
regexp_count
--------------
4
(1 row)
SELECT regexp_count('123123123123', '123', 3);
regexp_count
--------------
3
(1 row)
SELECT regexp_count('123123123123', '123', 33);
regexp_count
--------------
0
(1 row)
SELECT regexp_count('ABCABCABCABC', 'Abc', 1, '');
regexp_count
--------------
0
(1 row)
SELECT regexp_count('ABCABCABCABC', 'Abc', 1, 'i');
regexp_count
--------------
4
(1 row)
-- errors
SELECT regexp_count('123123123123', '123', 0);
ERROR: invalid value for parameter "start": 0
SELECT regexp_count('123123123123', '123', -3);
ERROR: invalid value for parameter "start": -3
-- regexp_like tests
SELECT regexp_like('Steven', '^Ste(v|ph)en$');
regexp_like
-------------
t
(1 row)
SELECT regexp_like('a'||CHR(10)||'d', 'a.d', 'n');
regexp_like
-------------
f
(1 row)
SELECT regexp_like('a'||CHR(10)||'d', 'a.d', 's');
regexp_like
-------------
t
(1 row)
SELECT regexp_like('abc', ' a . c ', 'x');
regexp_like
-------------
t
(1 row)
SELECT regexp_like('abc', 'a.c', 'g'); -- error
ERROR: regexp_like() does not support the "global" option
-- regexp_instr tests
SELECT regexp_instr('abcdefghi', 'd.f');
regexp_instr
--------------
4
(1 row)
SELECT regexp_instr('abcdefghi', 'd.q');
regexp_instr
--------------
0
(1 row)
SELECT regexp_instr('abcabcabc', 'a.c');
regexp_instr
--------------
1
(1 row)
SELECT regexp_instr('abcabcabc', 'a.c', 2);
regexp_instr
--------------
4
(1 row)
SELECT regexp_instr('abcabcabc', 'a.c', 1, 3);
regexp_instr
--------------
7
(1 row)
SELECT regexp_instr('abcabcabc', 'a.c', 1, 4);
regexp_instr
--------------
0
(1 row)
SELECT regexp_instr('abcabcabc', 'A.C', 1, 2, 0, 'i');
regexp_instr
--------------
4
(1 row)
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 0);
regexp_instr
--------------
1
(1 row)
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 1);
regexp_instr
--------------
1
(1 row)
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 2);
regexp_instr
--------------
4
(1 row)
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 3);
regexp_instr
--------------
5
(1 row)
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 4);
regexp_instr
--------------
7
(1 row)
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 5);
regexp_instr
--------------
0
(1 row)
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 0);
regexp_instr
--------------
9
(1 row)
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 1);
regexp_instr
--------------
4
(1 row)
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 2);
regexp_instr
--------------
9
(1 row)
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 3);
regexp_instr
--------------
7
(1 row)
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 4);
regexp_instr
--------------
9
(1 row)
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 5);
regexp_instr
--------------
0
(1 row)
-- Check case where we have a match, but not a subexpression match
SELECT regexp_instr('foo', 'foo(bar)?', 1, 1, 0, '', 1);
regexp_instr
--------------
0
(1 row)
-- errors
SELECT regexp_instr('abcabcabc', 'a.c', 0, 1);
ERROR: invalid value for parameter "start": 0
SELECT regexp_instr('abcabcabc', 'a.c', 1, 0);
ERROR: invalid value for parameter "n": 0
SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, -1);
ERROR: invalid value for parameter "endoption": -1
SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, 2);
ERROR: invalid value for parameter "endoption": 2
SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, 0, 'g');
ERROR: regexp_instr() does not support the "global" option
SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, 0, '', -1);
ERROR: invalid value for parameter "subexpr": -1
-- regexp_substr tests
SELECT regexp_substr('abcdefghi', 'd.f');
regexp_substr
---------------
def
(1 row)
SELECT regexp_substr('abcdefghi', 'd.q') IS NULL AS t;
t
---
t
(1 row)
SELECT regexp_substr('abcabcabc', 'a.c');
regexp_substr
---------------
abc
(1 row)
SELECT regexp_substr('abcabcabc', 'a.c', 2);
regexp_substr
---------------
abc
(1 row)
SELECT regexp_substr('abcabcabc', 'a.c', 1, 3);
regexp_substr
---------------
abc
(1 row)
SELECT regexp_substr('abcabcabc', 'a.c', 1, 4) IS NULL AS t;
t
---
t
(1 row)
SELECT regexp_substr('abcabcabc', 'A.C', 1, 2, 'i');
regexp_substr
---------------
abc
(1 row)
SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 0);
regexp_substr
---------------
12345678
(1 row)
SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 1);
regexp_substr
---------------
123
(1 row)
SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 2);
regexp_substr
---------------
45678
(1 row)
SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 3);
regexp_substr
---------------
56
(1 row)
SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 4);
regexp_substr
---------------
78
(1 row)
SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 5) IS NULL AS t;
t
---
t
(1 row)
-- Check case where we have a match, but not a subexpression match
SELECT regexp_substr('foo', 'foo(bar)?', 1, 1, '', 1) IS NULL AS t;
t
---
t
(1 row)
-- errors
SELECT regexp_substr('abcabcabc', 'a.c', 0, 1);
ERROR: invalid value for parameter "start": 0
SELECT regexp_substr('abcabcabc', 'a.c', 1, 0);
ERROR: invalid value for parameter "n": 0
SELECT regexp_substr('abcabcabc', 'a.c', 1, 1, 'g');
ERROR: regexp_substr() does not support the "global" option
SELECT regexp_substr('abcabcabc', 'a.c', 1, 1, '', -1);
ERROR: invalid value for parameter "subexpr": -1
-- set so we can tell NULL from empty string
\pset null '\\N'
-- return all matches from regexp

View File

@ -171,6 +171,8 @@ SELECT SUBSTRING('abcdefg' FROM 'c.e') AS "cde";
-- With a parenthesized subexpression, return only what matches the subexpr
SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
-- Check case where we have a match, but not a subexpression match
SELECT SUBSTRING('foo' FROM 'foo(bar)?') IS NULL AS t;
-- Check behavior of SIMILAR TO, which uses largely the same regexp variant
SELECT 'abcdefg' SIMILAR TO '_bcd%' AS true;
@ -193,6 +195,93 @@ SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
-- invalid regexp option
SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
-- extended regexp_replace tests
SELECT regexp_replace('A PostgreSQL function', 'A|e|i|o|u', 'X', 1);
SELECT regexp_replace('A PostgreSQL function', 'A|e|i|o|u', 'X', 1, 2);
SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 0, 'i');
SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 1, 'i');
SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 2, 'i');
SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 3, 'i');
SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 9, 'i');
SELECT regexp_replace('A PostgreSQL function', 'A|e|i|o|u', 'X', 7, 0, 'i');
-- 'g' flag should be ignored when N is specified
SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 1, 'g');
-- errors
SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', -1, 0, 'i');
SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, -1, 'i');
-- erroneous invocation of non-extended form
SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', '1');
-- regexp_count tests
SELECT regexp_count('123123123123123', '(12)3');
SELECT regexp_count('123123123123', '123', 1);
SELECT regexp_count('123123123123', '123', 3);
SELECT regexp_count('123123123123', '123', 33);
SELECT regexp_count('ABCABCABCABC', 'Abc', 1, '');
SELECT regexp_count('ABCABCABCABC', 'Abc', 1, 'i');
-- errors
SELECT regexp_count('123123123123', '123', 0);
SELECT regexp_count('123123123123', '123', -3);
-- regexp_like tests
SELECT regexp_like('Steven', '^Ste(v|ph)en$');
SELECT regexp_like('a'||CHR(10)||'d', 'a.d', 'n');
SELECT regexp_like('a'||CHR(10)||'d', 'a.d', 's');
SELECT regexp_like('abc', ' a . c ', 'x');
SELECT regexp_like('abc', 'a.c', 'g'); -- error
-- regexp_instr tests
SELECT regexp_instr('abcdefghi', 'd.f');
SELECT regexp_instr('abcdefghi', 'd.q');
SELECT regexp_instr('abcabcabc', 'a.c');
SELECT regexp_instr('abcabcabc', 'a.c', 2);
SELECT regexp_instr('abcabcabc', 'a.c', 1, 3);
SELECT regexp_instr('abcabcabc', 'a.c', 1, 4);
SELECT regexp_instr('abcabcabc', 'A.C', 1, 2, 0, 'i');
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 0);
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 1);
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 2);
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 3);
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 4);
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 5);
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 0);
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 1);
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 2);
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 3);
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 4);
SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 5);
-- Check case where we have a match, but not a subexpression match
SELECT regexp_instr('foo', 'foo(bar)?', 1, 1, 0, '', 1);
-- errors
SELECT regexp_instr('abcabcabc', 'a.c', 0, 1);
SELECT regexp_instr('abcabcabc', 'a.c', 1, 0);
SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, -1);
SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, 2);
SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, 0, 'g');
SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, 0, '', -1);
-- regexp_substr tests
SELECT regexp_substr('abcdefghi', 'd.f');
SELECT regexp_substr('abcdefghi', 'd.q') IS NULL AS t;
SELECT regexp_substr('abcabcabc', 'a.c');
SELECT regexp_substr('abcabcabc', 'a.c', 2);
SELECT regexp_substr('abcabcabc', 'a.c', 1, 3);
SELECT regexp_substr('abcabcabc', 'a.c', 1, 4) IS NULL AS t;
SELECT regexp_substr('abcabcabc', 'A.C', 1, 2, 'i');
SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 0);
SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 1);
SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 2);
SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 3);
SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 4);
SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 5) IS NULL AS t;
-- Check case where we have a match, but not a subexpression match
SELECT regexp_substr('foo', 'foo(bar)?', 1, 1, '', 1) IS NULL AS t;
-- errors
SELECT regexp_substr('abcabcabc', 'a.c', 0, 1);
SELECT regexp_substr('abcabcabc', 'a.c', 1, 0);
SELECT regexp_substr('abcabcabc', 'a.c', 1, 1, 'g');
SELECT regexp_substr('abcabcabc', 'a.c', 1, 1, '', -1);
-- set so we can tell NULL from empty string
\pset null '\\N'