mirror of
https://github.com/postgres/postgres.git
synced 2025-04-29 13:56:47 +03:00
Implement regexp_match(), a simplified alternative to regexp_matches().
regexp_match() is like regexp_matches(), but it disallows the 'g' flag and in consequence does not need to return a set. Instead, it returns a simple text array value, or NULL if there's no match. Previously people usually got that behavior with a sub-select, but this way is considerably more efficient. Documentation adjusted so that regexp_match() is presented first and then regexp_matches() is introduced as a more complicated version. This is a bit historically revisionist but seems pedagogically better. Still TODO: extend contrib/citext to support this function. Emre Hasegeli, reviewed by David Johnston Discussion: <CAE2gYzy42sna2ME_e3y1KLQ-4UBrB-eVF0SWn8QG39sQSeVhEw@mail.gmail.com>
This commit is contained in:
parent
2d7e591007
commit
cf9b0fea5f
@ -2036,6 +2036,23 @@
|
||||
<entry><literal>'42.5'</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
<indexterm>
|
||||
<primary>regexp_match</primary>
|
||||
</indexterm>
|
||||
<literal><function>regexp_match(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type>])</function></literal>
|
||||
</entry>
|
||||
<entry><type>text[]</type></entry>
|
||||
<entry>
|
||||
Return captured substring(s) resulting from the first match of a POSIX
|
||||
regular expression to the <parameter>string</parameter>. See
|
||||
<xref linkend="functions-posix-regexp"> for more information.
|
||||
</entry>
|
||||
<entry><literal>regexp_match('foobarbequebaz', '(bar)(beque)')</literal></entry>
|
||||
<entry><literal>{bar,beque}</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry>
|
||||
<indexterm>
|
||||
@ -2045,12 +2062,12 @@
|
||||
</entry>
|
||||
<entry><type>setof text[]</type></entry>
|
||||
<entry>
|
||||
Return all captured substrings resulting from matching a POSIX regular
|
||||
expression against the <parameter>string</parameter>. See
|
||||
Return captured substring(s) resulting from matching a POSIX regular
|
||||
expression to the <parameter>string</parameter>. See
|
||||
<xref linkend="functions-posix-regexp"> for more information.
|
||||
</entry>
|
||||
<entry><literal>regexp_matches('foobarbequebaz', '(bar)(beque)')</literal></entry>
|
||||
<entry><literal>{bar,beque}</literal></entry>
|
||||
<entry><literal>regexp_matches('foobarbequebaz', 'ba.', 'g')</literal></entry>
|
||||
<entry><literal>{bar}</literal><para><literal>{baz}</literal></para> (2 rows)</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
@ -4112,6 +4129,9 @@ substring('foobar' from '#"o_b#"%' for '#') <lineannotation>NULL</lineannotat
|
||||
<indexterm>
|
||||
<primary>regexp_replace</primary>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<primary>regexp_match</primary>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<primary>regexp_matches</primary>
|
||||
</indexterm>
|
||||
@ -4272,39 +4292,78 @@ regexp_replace('foobarbaz', 'b(..)', E'X\\1Y', 'g')
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The <function>regexp_matches</> function returns a text array of
|
||||
all of the captured substrings resulting from matching a POSIX
|
||||
regular expression pattern. It has the syntax
|
||||
<function>regexp_matches</function>(<replaceable>string</>, <replaceable>pattern</>
|
||||
<optional>, <replaceable>flags</> </optional>).
|
||||
The function can return no rows, one row, or multiple rows (see
|
||||
the <literal>g</> flag below). If the <replaceable>pattern</>
|
||||
does not match, the function returns no rows. If the pattern
|
||||
contains no parenthesized subexpressions, then each row
|
||||
returned is a single-element text array containing the substring
|
||||
matching the whole pattern. If the pattern contains parenthesized
|
||||
subexpressions, the function returns a text array whose
|
||||
<replaceable>n</>'th element is the substring matching the
|
||||
<replaceable>n</>'th parenthesized subexpression of the pattern
|
||||
(not counting <quote>non-capturing</> parentheses; see below for
|
||||
details).
|
||||
The <replaceable>flags</> parameter is an optional text
|
||||
string containing zero or more single-letter flags that change the
|
||||
function's behavior. Flag <literal>g</> causes the function to find
|
||||
each match in the string, not only the first one, and return a row for
|
||||
each such match. Supported flags (though
|
||||
not <literal>g</>)
|
||||
are described in <xref linkend="posix-embedded-options-table">.
|
||||
The <function>regexp_match</> function returns a text array of
|
||||
captured substring(s) resulting from the first match of a POSIX
|
||||
regular expression pattern to a string. It has the syntax
|
||||
<function>regexp_match</function>(<replaceable>string</>,
|
||||
<replaceable>pattern</> <optional>, <replaceable>flags</> </optional>).
|
||||
If there is no match, the result is <literal>NULL</>.
|
||||
If a match is found, and the <replaceable>pattern</> contains no
|
||||
parenthesized subexpressions, then the result is a single-element text
|
||||
array containing the substring matching the whole pattern.
|
||||
If a match is found, and the <replaceable>pattern</> contains
|
||||
parenthesized subexpressions, then the result is a text array
|
||||
whose <replaceable>n</>'th element is the substring matching
|
||||
the <replaceable>n</>'th parenthesized subexpression of
|
||||
the <replaceable>pattern</> (not counting <quote>non-capturing</>
|
||||
parentheses; see below for details).
|
||||
The <replaceable>flags</> parameter is an optional text string
|
||||
containing zero or more single-letter flags that change the function's
|
||||
behavior. Supported flags are described
|
||||
in <xref linkend="posix-embedded-options-table">.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Some examples:
|
||||
<programlisting>
|
||||
SELECT regexp_matches('foobarbequebaz', '(bar)(beque)');
|
||||
regexp_matches
|
||||
----------------
|
||||
SELECT regexp_match('foobarbequebaz', 'bar.*que');
|
||||
regexp_match
|
||||
--------------
|
||||
{barbeque}
|
||||
(1 row)
|
||||
|
||||
SELECT regexp_match('foobarbequebaz', '(bar)(beque)');
|
||||
regexp_match
|
||||
--------------
|
||||
{bar,beque}
|
||||
(1 row)
|
||||
</programlisting>
|
||||
In the common case where you just want the whole matching substring
|
||||
or <literal>NULL</> for no match, write something like
|
||||
<programlisting>
|
||||
SELECT (regexp_match('foobarbequebaz', 'bar.*que'))[1];
|
||||
regexp_match
|
||||
--------------
|
||||
barbeque
|
||||
(1 row)
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The <function>regexp_matches</> function returns a set of text arrays
|
||||
of captured substring(s) resulting from matching a POSIX regular
|
||||
expression pattern to a string. It has the same syntax as
|
||||
<function>regexp_match</function>.
|
||||
This function returns no rows if there is no match, one row if there is
|
||||
a match and the <literal>g</> flag is not given, or <replaceable>N</>
|
||||
rows if there are <replaceable>N</> matches and the <literal>g</> flag
|
||||
is given. Each returned row is a text array containing the whole
|
||||
matched substring or the substrings matching parenthesized
|
||||
subexpressions of the <replaceable>pattern</>, just as described above
|
||||
for <function>regexp_match</function>.
|
||||
<function>regexp_matches</> accepts all the flags shown
|
||||
in <xref linkend="posix-embedded-options-table">, plus
|
||||
the <literal>g</> flag which commands it to return all matches, not
|
||||
just the first one.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Some examples:
|
||||
<programlisting>
|
||||
SELECT regexp_matches('foo', 'not there');
|
||||
regexp_matches
|
||||
----------------
|
||||
(0 rows)
|
||||
|
||||
SELECT regexp_matches('foobarbequebazilbarfbonk', '(b[^b]+)(b[^b]+)', 'g');
|
||||
regexp_matches
|
||||
@ -4312,24 +4371,27 @@ SELECT regexp_matches('foobarbequebazilbarfbonk', '(b[^b]+)(b[^b]+)', 'g');
|
||||
{bar,beque}
|
||||
{bazil,barf}
|
||||
(2 rows)
|
||||
|
||||
SELECT regexp_matches('foobarbequebaz', 'barbeque');
|
||||
regexp_matches
|
||||
----------------
|
||||
{barbeque}
|
||||
(1 row)
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
<tip>
|
||||
<para>
|
||||
It is possible to force <function>regexp_matches()</> to always
|
||||
return one row by using a sub-select; this is particularly useful
|
||||
in a <literal>SELECT</> target list when you want all rows
|
||||
returned, even non-matching ones:
|
||||
In most cases <function>regexp_matches()</> should be used with
|
||||
the <literal>g</> flag, since if you only want the first match, it's
|
||||
easier and more efficient to use <function>regexp_match()</>.
|
||||
However, <function>regexp_match()</> only exists
|
||||
in <productname>PostgreSQL</> version 10 and up. When working in older
|
||||
versions, a common trick is to place a <function>regexp_matches()</>
|
||||
call in a sub-select, for example:
|
||||
<programlisting>
|
||||
SELECT col1, (SELECT regexp_matches(col2, '(bar)(beque)')) FROM tab;
|
||||
</programlisting>
|
||||
This produces a text array if there's a match, or <literal>NULL</> if
|
||||
not, the same as <function>regexp_match()</> would do. Without the
|
||||
sub-select, this query would produce no output at all for table rows
|
||||
without a match, which is typically not the desired behavior.
|
||||
</para>
|
||||
</tip>
|
||||
|
||||
<para>
|
||||
The <function>regexp_split_to_table</> function splits a string using a POSIX
|
||||
@ -4408,6 +4470,7 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo;
|
||||
zero-length matches that occur at the start or end of the string
|
||||
or immediately after a previous match. This is contrary to the strict
|
||||
definition of regexp matching that is implemented by
|
||||
<function>regexp_match</> and
|
||||
<function>regexp_matches</>, but is usually the most convenient behavior
|
||||
in practice. Other software systems such as Perl use similar definitions.
|
||||
</para>
|
||||
@ -5482,7 +5545,7 @@ SELECT SUBSTRING('XY1234Z', 'Y*?([0-9]{1,3})');
|
||||
into the digits and the parts before and after them. We might try to
|
||||
do that like this:
|
||||
<screen>
|
||||
SELECT regexp_matches('abc01234xyz', '(.*)(\d+)(.*)');
|
||||
SELECT regexp_match('abc01234xyz', '(.*)(\d+)(.*)');
|
||||
<lineannotation>Result: </lineannotation><computeroutput>{abc0123,4,xyz}</computeroutput>
|
||||
</screen>
|
||||
That didn't work: the first <literal>.*</> is greedy so
|
||||
@ -5490,14 +5553,14 @@ SELECT regexp_matches('abc01234xyz', '(.*)(\d+)(.*)');
|
||||
match at the last possible place, the last digit. We might try to fix
|
||||
that by making it non-greedy:
|
||||
<screen>
|
||||
SELECT regexp_matches('abc01234xyz', '(.*?)(\d+)(.*)');
|
||||
SELECT regexp_match('abc01234xyz', '(.*?)(\d+)(.*)');
|
||||
<lineannotation>Result: </lineannotation><computeroutput>{abc,0,""}</computeroutput>
|
||||
</screen>
|
||||
That didn't work either, because now the RE as a whole is non-greedy
|
||||
and so it ends the overall match as soon as possible. We can get what
|
||||
we want by forcing the RE as a whole to be greedy:
|
||||
<screen>
|
||||
SELECT regexp_matches('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
|
||||
SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}');
|
||||
<lineannotation>Result: </lineannotation><computeroutput>{abc,01234,xyz}</computeroutput>
|
||||
</screen>
|
||||
Controlling the RE's overall greediness separately from its components'
|
||||
|
@ -2068,7 +2068,7 @@ CREATE VIEW triggers AS
|
||||
-- XXX strange hacks follow
|
||||
CAST(
|
||||
CASE WHEN pg_has_role(c.relowner, 'USAGE')
|
||||
THEN (SELECT m[1] FROM regexp_matches(pg_get_triggerdef(t.oid), E'.{35,} WHEN \\((.+)\\) EXECUTE PROCEDURE') AS rm(m) LIMIT 1)
|
||||
THEN (regexp_match(pg_get_triggerdef(t.oid), E'.{35,} WHEN \\((.+)\\) EXECUTE PROCEDURE'))[1]
|
||||
ELSE null END
|
||||
AS character_data) AS action_condition,
|
||||
CAST(
|
||||
|
@ -47,7 +47,7 @@ typedef struct pg_re_flags
|
||||
bool glob; /* do it globally (for each occurrence) */
|
||||
} pg_re_flags;
|
||||
|
||||
/* cross-call state for regexp_matches(), also regexp_split() */
|
||||
/* cross-call state for regexp_match and regexp_split functions */
|
||||
typedef struct regexp_matches_ctx
|
||||
{
|
||||
text *orig_str; /* data string in original TEXT form */
|
||||
@ -57,7 +57,7 @@ typedef struct regexp_matches_ctx
|
||||
/* so the number of entries in match_locs is nmatches * npatterns * 2 */
|
||||
int *match_locs; /* 0-based character indexes */
|
||||
int next_match; /* 0-based index of next match to process */
|
||||
/* workspace for build_regexp_matches_result() */
|
||||
/* workspace for build_regexp_match_result() */
|
||||
Datum *elems; /* has npatterns elements */
|
||||
bool *nulls; /* has npatterns elements */
|
||||
} regexp_matches_ctx;
|
||||
@ -107,13 +107,12 @@ static cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */
|
||||
|
||||
/* Local functions */
|
||||
static regexp_matches_ctx *setup_regexp_matches(text *orig_str, text *pattern,
|
||||
text *flags,
|
||||
pg_re_flags *flags,
|
||||
Oid collation,
|
||||
bool force_glob,
|
||||
bool use_subpatterns,
|
||||
bool ignore_degenerate);
|
||||
static void cleanup_regexp_matches(regexp_matches_ctx *matchctx);
|
||||
static ArrayType *build_regexp_matches_result(regexp_matches_ctx *matchctx);
|
||||
static ArrayType *build_regexp_match_result(regexp_matches_ctx *matchctx);
|
||||
static Datum build_regexp_split_result(regexp_matches_ctx *splitctx);
|
||||
|
||||
|
||||
@ -350,7 +349,7 @@ RE_compile_and_execute(text *text_re, char *dat, int dat_len,
|
||||
|
||||
|
||||
/*
|
||||
* parse_re_flags - parse the options argument of regexp_matches and friends
|
||||
* parse_re_flags - parse the options argument of regexp_match and friends
|
||||
*
|
||||
* flags --- output argument, filled with desired options
|
||||
* opts --- TEXT object, or NULL for defaults
|
||||
@ -840,9 +839,53 @@ similar_escape(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_TEXT_P(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* regexp_match()
|
||||
* Return the first substring(s) matching a pattern within a string.
|
||||
*/
|
||||
Datum
|
||||
regexp_match(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *orig_str = PG_GETARG_TEXT_PP(0);
|
||||
text *pattern = PG_GETARG_TEXT_PP(1);
|
||||
text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
|
||||
pg_re_flags re_flags;
|
||||
regexp_matches_ctx *matchctx;
|
||||
|
||||
/* Determine options */
|
||||
parse_re_flags(&re_flags, flags);
|
||||
/* User mustn't specify 'g' */
|
||||
if (re_flags.glob)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("regexp_match does not support the global option"),
|
||||
errhint("Use the regexp_matches function instead.")));
|
||||
|
||||
matchctx = setup_regexp_matches(orig_str, pattern, &re_flags,
|
||||
PG_GET_COLLATION(), true, false);
|
||||
|
||||
if (matchctx->nmatches == 0)
|
||||
PG_RETURN_NULL();
|
||||
|
||||
Assert(matchctx->nmatches == 1);
|
||||
|
||||
/* Create workspace that build_regexp_match_result needs */
|
||||
matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns);
|
||||
matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns);
|
||||
|
||||
PG_RETURN_DATUM(PointerGetDatum(build_regexp_match_result(matchctx)));
|
||||
}
|
||||
|
||||
/* This is separate to keep the opr_sanity regression test from complaining */
|
||||
Datum
|
||||
regexp_match_no_flags(PG_FUNCTION_ARGS)
|
||||
{
|
||||
return regexp_match(fcinfo);
|
||||
}
|
||||
|
||||
/*
|
||||
* regexp_matches()
|
||||
* Return a table of matches of a pattern within a string.
|
||||
* Return a table of all matches of a pattern within a string.
|
||||
*/
|
||||
Datum
|
||||
regexp_matches(PG_FUNCTION_ARGS)
|
||||
@ -854,18 +897,22 @@ regexp_matches(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *pattern = PG_GETARG_TEXT_PP(1);
|
||||
text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
|
||||
pg_re_flags re_flags;
|
||||
MemoryContext oldcontext;
|
||||
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
|
||||
|
||||
/* Determine options */
|
||||
parse_re_flags(&re_flags, flags);
|
||||
|
||||
/* be sure to copy the input string into the multi-call ctx */
|
||||
matchctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
|
||||
flags,
|
||||
&re_flags,
|
||||
PG_GET_COLLATION(),
|
||||
false, true, false);
|
||||
true, false);
|
||||
|
||||
/* Pre-create workspace that build_regexp_matches_result needs */
|
||||
/* Pre-create workspace that build_regexp_match_result needs */
|
||||
matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns);
|
||||
matchctx->nulls = (bool *) palloc(sizeof(bool) * matchctx->npatterns);
|
||||
|
||||
@ -880,7 +927,7 @@ regexp_matches(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ArrayType *result_ary;
|
||||
|
||||
result_ary = build_regexp_matches_result(matchctx);
|
||||
result_ary = build_regexp_match_result(matchctx);
|
||||
matchctx->next_match++;
|
||||
SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary));
|
||||
}
|
||||
@ -899,28 +946,27 @@ regexp_matches_no_flags(PG_FUNCTION_ARGS)
|
||||
}
|
||||
|
||||
/*
|
||||
* setup_regexp_matches --- do the initial matching for regexp_matches()
|
||||
* or regexp_split()
|
||||
* setup_regexp_matches --- do the initial matching for regexp_match
|
||||
* and regexp_split functions
|
||||
*
|
||||
* To avoid having to re-find the compiled pattern on each call, we do
|
||||
* all the matching in one swoop. The returned regexp_matches_ctx contains
|
||||
* the locations of all the substrings matching the pattern.
|
||||
*
|
||||
* The three bool parameters have only two patterns (one for each caller)
|
||||
* but it seems clearer to distinguish the functionality this way than to
|
||||
* key it all off one "is_split" flag.
|
||||
* The two bool parameters have only two patterns (one for matching, one for
|
||||
* splitting) but it seems clearer to distinguish the functionality this way
|
||||
* than to key it all off one "is_split" flag.
|
||||
*/
|
||||
static regexp_matches_ctx *
|
||||
setup_regexp_matches(text *orig_str, text *pattern, text *flags,
|
||||
setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
|
||||
Oid collation,
|
||||
bool force_glob, bool use_subpatterns,
|
||||
bool use_subpatterns,
|
||||
bool ignore_degenerate)
|
||||
{
|
||||
regexp_matches_ctx *matchctx = palloc0(sizeof(regexp_matches_ctx));
|
||||
int orig_len;
|
||||
pg_wchar *wide_str;
|
||||
int wide_len;
|
||||
pg_re_flags re_flags;
|
||||
regex_t *cpattern;
|
||||
regmatch_t *pmatch;
|
||||
int pmatch_len;
|
||||
@ -937,21 +983,8 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags,
|
||||
wide_str = (pg_wchar *) palloc(sizeof(pg_wchar) * (orig_len + 1));
|
||||
wide_len = pg_mb2wchar_with_len(VARDATA_ANY(orig_str), wide_str, orig_len);
|
||||
|
||||
/* determine options */
|
||||
parse_re_flags(&re_flags, flags);
|
||||
if (force_glob)
|
||||
{
|
||||
/* user mustn't specify 'g' for regexp_split */
|
||||
if (re_flags.glob)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("regexp_split does not support the global option")));
|
||||
/* but we find all the matches anyway */
|
||||
re_flags.glob = true;
|
||||
}
|
||||
|
||||
/* set up the compiled pattern */
|
||||
cpattern = RE_compile_and_cache(pattern, re_flags.cflags, collation);
|
||||
cpattern = RE_compile_and_cache(pattern, re_flags->cflags, collation);
|
||||
|
||||
/* do we want to remember subpatterns? */
|
||||
if (use_subpatterns && cpattern->re_nsub > 0)
|
||||
@ -970,7 +1003,7 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags,
|
||||
pmatch = palloc(sizeof(regmatch_t) * pmatch_len);
|
||||
|
||||
/* the real output space (grown dynamically if needed) */
|
||||
array_len = re_flags.glob ? 256 : 32;
|
||||
array_len = re_flags->glob ? 256 : 32;
|
||||
matchctx->match_locs = (int *) palloc(sizeof(int) * array_len);
|
||||
array_idx = 0;
|
||||
|
||||
@ -1018,7 +1051,7 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags,
|
||||
prev_match_end = pmatch[0].rm_eo;
|
||||
|
||||
/* if not glob, stop after one match */
|
||||
if (!re_flags.glob)
|
||||
if (!re_flags->glob)
|
||||
break;
|
||||
|
||||
/*
|
||||
@ -1057,10 +1090,10 @@ cleanup_regexp_matches(regexp_matches_ctx *matchctx)
|
||||
}
|
||||
|
||||
/*
|
||||
* build_regexp_matches_result - build output array for current match
|
||||
* build_regexp_match_result - build output array for current match
|
||||
*/
|
||||
static ArrayType *
|
||||
build_regexp_matches_result(regexp_matches_ctx *matchctx)
|
||||
build_regexp_match_result(regexp_matches_ctx *matchctx)
|
||||
{
|
||||
Datum *elems = matchctx->elems;
|
||||
bool *nulls = matchctx->nulls;
|
||||
@ -1114,16 +1147,27 @@ regexp_split_to_table(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *pattern = PG_GETARG_TEXT_PP(1);
|
||||
text *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
|
||||
pg_re_flags re_flags;
|
||||
MemoryContext oldcontext;
|
||||
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
|
||||
|
||||
/* Determine options */
|
||||
parse_re_flags(&re_flags, flags);
|
||||
/* User mustn't specify 'g' */
|
||||
if (re_flags.glob)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("regexp_split_to_table does not support the global option")));
|
||||
/* But we find all the matches anyway */
|
||||
re_flags.glob = true;
|
||||
|
||||
/* be sure to copy the input string into the multi-call ctx */
|
||||
splitctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
|
||||
flags,
|
||||
&re_flags,
|
||||
PG_GET_COLLATION(),
|
||||
true, false, true);
|
||||
false, true);
|
||||
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
funcctx->user_fctx = (void *) splitctx;
|
||||
@ -1162,13 +1206,24 @@ Datum
|
||||
regexp_split_to_array(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ArrayBuildState *astate = NULL;
|
||||
pg_re_flags re_flags;
|
||||
regexp_matches_ctx *splitctx;
|
||||
|
||||
/* Determine options */
|
||||
parse_re_flags(&re_flags, PG_GETARG_TEXT_PP_IF_EXISTS(2));
|
||||
/* User mustn't specify 'g' */
|
||||
if (re_flags.glob)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("regexp_split_to_array does not support the global option")));
|
||||
/* But we find all the matches anyway */
|
||||
re_flags.glob = true;
|
||||
|
||||
splitctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0),
|
||||
PG_GETARG_TEXT_PP(1),
|
||||
PG_GETARG_TEXT_PP_IF_EXISTS(2),
|
||||
&re_flags,
|
||||
PG_GET_COLLATION(),
|
||||
true, false, true);
|
||||
false, true);
|
||||
|
||||
while (splitctx->next_match <= splitctx->nmatches)
|
||||
{
|
||||
|
@ -53,6 +53,6 @@
|
||||
*/
|
||||
|
||||
/* yyyymmddN */
|
||||
#define CATALOG_VERSION_NO 201608161
|
||||
#define CATALOG_VERSION_NO 201608171
|
||||
|
||||
#endif
|
||||
|
@ -1912,10 +1912,14 @@ DATA(insert OID = 2284 ( regexp_replace PGNSP PGUID 12 1 0 0 0 f f f f t f i
|
||||
DESCR("replace text using regexp");
|
||||
DATA(insert OID = 2285 ( regexp_replace PGNSP PGUID 12 1 0 0 0 f f f f t f i s 4 0 25 "25 25 25 25" _null_ _null_ _null_ _null_ _null_ textregexreplace _null_ _null_ _null_ ));
|
||||
DESCR("replace text using regexp");
|
||||
DATA(insert OID = 3396 ( regexp_match PGNSP PGUID 12 1 0 0 0 f f f f t f i s 2 0 1009 "25 25" _null_ _null_ _null_ _null_ _null_ regexp_match_no_flags _null_ _null_ _null_ ));
|
||||
DESCR("find first match for regexp");
|
||||
DATA(insert OID = 3397 ( regexp_match PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ _null_ regexp_match _null_ _null_ _null_ ));
|
||||
DESCR("find first match for regexp");
|
||||
DATA(insert OID = 2763 ( regexp_matches PGNSP PGUID 12 1 1 0 0 f f f f t t i s 2 0 1009 "25 25" _null_ _null_ _null_ _null_ _null_ regexp_matches_no_flags _null_ _null_ _null_ ));
|
||||
DESCR("find all match groups for regexp");
|
||||
DESCR("find match(es) for regexp");
|
||||
DATA(insert OID = 2764 ( regexp_matches PGNSP PGUID 12 1 10 0 0 f f f f t t i s 3 0 1009 "25 25 25" _null_ _null_ _null_ _null_ _null_ regexp_matches _null_ _null_ _null_ ));
|
||||
DESCR("find all match groups for regexp");
|
||||
DESCR("find match(es) for regexp");
|
||||
DATA(insert OID = 2088 ( split_part PGNSP PGUID 12 1 0 0 0 f f f f t f i s 3 0 25 "25 25 23" _null_ _null_ _null_ _null_ _null_ split_text _null_ _null_ _null_ ));
|
||||
DESCR("split string by field_sep and return field_num");
|
||||
DATA(insert OID = 2765 ( regexp_split_to_table PGNSP PGUID 12 1 1000 0 0 f f f f t t i s 2 0 25 "25 25" _null_ _null_ _null_ _null_ _null_ regexp_split_to_table_no_flags _null_ _null_ _null_ ));
|
||||
|
@ -628,6 +628,8 @@ extern Datum textregexsubstr(PG_FUNCTION_ARGS);
|
||||
extern Datum textregexreplace_noopt(PG_FUNCTION_ARGS);
|
||||
extern Datum textregexreplace(PG_FUNCTION_ARGS);
|
||||
extern Datum similar_escape(PG_FUNCTION_ARGS);
|
||||
extern Datum regexp_match(PG_FUNCTION_ARGS);
|
||||
extern Datum regexp_match_no_flags(PG_FUNCTION_ARGS);
|
||||
extern Datum regexp_matches(PG_FUNCTION_ARGS);
|
||||
extern Datum regexp_matches_no_flags(PG_FUNCTION_ARGS);
|
||||
extern Datum regexp_split_to_table(PG_FUNCTION_ARGS);
|
||||
|
@ -90,6 +90,34 @@ select substring('a' from '((a)+)');
|
||||
a
|
||||
(1 row)
|
||||
|
||||
-- Test regexp_match()
|
||||
select regexp_match('abc', '');
|
||||
regexp_match
|
||||
--------------
|
||||
{""}
|
||||
(1 row)
|
||||
|
||||
select regexp_match('abc', 'bc');
|
||||
regexp_match
|
||||
--------------
|
||||
{bc}
|
||||
(1 row)
|
||||
|
||||
select regexp_match('abc', 'd') is null;
|
||||
?column?
|
||||
----------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
select regexp_match('abc', '(B)(c)', 'i');
|
||||
regexp_match
|
||||
--------------
|
||||
{b,c}
|
||||
(1 row)
|
||||
|
||||
select regexp_match('abc', 'Bd', 'ig'); -- error
|
||||
ERROR: regexp_match does not support the global option
|
||||
HINT: Use the regexp_matches function instead.
|
||||
-- Test lookahead constraints
|
||||
select regexp_matches('ab', 'a(?=b)b*');
|
||||
regexp_matches
|
||||
|
@ -681,9 +681,9 @@ SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e',
|
||||
ERROR: invalid regexp option: "z"
|
||||
-- global option meaningless for regexp_split
|
||||
SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'g') AS foo;
|
||||
ERROR: regexp_split does not support the global option
|
||||
ERROR: regexp_split_to_table does not support the global option
|
||||
SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPs ovEr The lazy dOG', 'e', 'g');
|
||||
ERROR: regexp_split does not support the global option
|
||||
ERROR: regexp_split_to_array does not support the global option
|
||||
-- change NULL-display back
|
||||
\pset null ''
|
||||
-- E021-11 position expression
|
||||
|
@ -25,6 +25,13 @@ select substring('asd TO foo' from ' TO (([a-z0-9._]+|"([^"]+|"")+")+)');
|
||||
select substring('a' from '((a))+');
|
||||
select substring('a' from '((a)+)');
|
||||
|
||||
-- Test regexp_match()
|
||||
select regexp_match('abc', '');
|
||||
select regexp_match('abc', 'bc');
|
||||
select regexp_match('abc', 'd') is null;
|
||||
select regexp_match('abc', '(B)(c)', 'i');
|
||||
select regexp_match('abc', 'Bd', 'ig'); -- error
|
||||
|
||||
-- Test lookahead constraints
|
||||
select regexp_matches('ab', 'a(?=b)b*');
|
||||
select regexp_matches('a', 'a(?=b)b*');
|
||||
|
Loading…
x
Reference in New Issue
Block a user