mirror of
https://github.com/postgres/postgres.git
synced 2025-06-14 18:42:34 +03:00
Add three new regexp functions: regexp_matches, regexp_split_to_array,
and regexp_split_to_table. These functions provide access to the capture groups resulting from a POSIX regular expression match, and provide the ability to split a string on a POSIX regular expression, respectively. Patch from Jeremy Drake; code review by Neil Conway, additional comments and suggestions from Tom and Peter E. This patch bumps the catversion, adds some regression tests, and updates the docs.
This commit is contained in:
@ -1,4 +1,4 @@
|
||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.369 2007/02/20 19:59:04 momjian Exp $ -->
|
||||
<!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.370 2007/03/20 05:44:59 neilc Exp $ -->
|
||||
|
||||
<chapter id="functions">
|
||||
<title>Functions and Operators</title>
|
||||
@ -1468,17 +1468,52 @@
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><literal><function>regexp_replace</function>(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>, <parameter>replacement</parameter> <type>text</type> [,<parameter>flags</parameter> <type>text</type>])</literal></entry>
|
||||
<entry><literal><function>regexp_matches</function>(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type>])</literal></entry>
|
||||
<entry><type>setof text[]</type></entry>
|
||||
<entry>
|
||||
Return all capture groups resulting from matching POSIX regular
|
||||
expression against the <parameter>string</parameter>. See
|
||||
<xref linkend="functions-posix-regexp"> for more information.
|
||||
</entry>
|
||||
<entry><literal>regexp_matches('foobarbequebaz', '(bar)(beque)')</literal></entry>
|
||||
<entry><literal>{bar,beque}</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><literal><function>regexp_replace</function>(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>, <parameter>replacement</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type>])</literal></entry>
|
||||
<entry><type>text</type></entry>
|
||||
<entry>
|
||||
Replace substring matching POSIX regular expression. See
|
||||
<xref linkend="functions-matching"> for more information on pattern
|
||||
matching.
|
||||
<xref linkend="functions-posix-regexp"> for more information.
|
||||
</entry>
|
||||
<entry><literal>regexp_replace('Thomas', '.[mN]a.', 'M')</literal></entry>
|
||||
<entry><literal>ThM</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><literal><function>regexp_split_to_array</function>(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type> ])</literal></entry>
|
||||
<entry><type>text[]</type></entry>
|
||||
<entry>
|
||||
Split <parameter>string</parameter> using POSIX regular expression as
|
||||
the delimiter. See <xref linkend="functions-posix-regexp"> for more
|
||||
information.
|
||||
</entry>
|
||||
<entry><literal>regexp_split_to_array('hello world', E'\\s+')</literal></entry>
|
||||
<entry><literal>{hello,world}</literal></entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><literal><function>regexp_split_to_table</function>(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type>])</literal></entry>
|
||||
<entry><type>setof text</type></entry>
|
||||
<entry>
|
||||
Split <parameter>string</parameter> using POSIX regular expression as
|
||||
the delimiter. See <xref linkend="functions-posix-regexp"> for more
|
||||
information.
|
||||
</entry>
|
||||
<entry><literal>regexp_split_to_table('hello world', E'\\s+')</literal></entry>
|
||||
<entry><literal>hello</literal><para><literal>world</literal></para> (2 rows)</entry>
|
||||
</row>
|
||||
|
||||
<row>
|
||||
<entry><literal><function>repeat</function>(<parameter>string</parameter> <type>text</type>, <parameter>number</parameter> <type>int</type>)</literal></entry>
|
||||
<entry><type>text</type></entry>
|
||||
@ -2883,9 +2918,6 @@ cast(-44 as bit(12)) <lineannotation>111111010100</lineannotation>
|
||||
<indexterm>
|
||||
<primary>substring</primary>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<primary>regexp_replace</primary>
|
||||
</indexterm>
|
||||
|
||||
<synopsis>
|
||||
<replaceable>string</replaceable> SIMILAR TO <replaceable>pattern</replaceable> <optional>ESCAPE <replaceable>escape-character</replaceable></optional>
|
||||
@ -3004,6 +3036,21 @@ substring('foobar' from '#"o_b#"%' for '#') <lineannotation>NULL</lineannotat
|
||||
<primary>regular expression</primary>
|
||||
<seealso>pattern matching</seealso>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<primary>substring</primary>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<primary>regexp_replace</primary>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<primary>regexp_matches</primary>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<primary>regexp_split_to_table</primary>
|
||||
</indexterm>
|
||||
<indexterm>
|
||||
<primary>regexp_split_to_array</primary>
|
||||
</indexterm>
|
||||
|
||||
<para>
|
||||
<xref linkend="functions-posix-table"> lists the available
|
||||
@ -3134,7 +3181,10 @@ substring('foobar' from 'o(.)b') <lineannotation>o</lineannotation>
|
||||
string containing zero or more single-letter flags that change the
|
||||
function's behavior. Flag <literal>i</> specifies case-insensitive
|
||||
matching, while flag <literal>g</> specifies replacement of each matching
|
||||
substring rather than only the first one.
|
||||
substring rather than only the first one. Other supported flags are
|
||||
<literal>m</>, <literal>n</>, <literal>p</>, <literal>w</> and
|
||||
<literal>x</>, whose meanings correspond to those shown in
|
||||
<xref linkend="posix-embedded-options-table">.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
@ -3149,6 +3199,124 @@ regexp_replace('foobarbaz', 'b(..)', E'X\\1Y', 'g')
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The <function>regexp_matches</> function returns all of the capture
|
||||
groups resulting from matching a POSIX regular expression pattern.
|
||||
It has the syntax
|
||||
<function>regexp_matches</function>(<replaceable>string</>, <replaceable>pattern</>
|
||||
<optional>, <replaceable>flags</> </optional>).
|
||||
If there is no match to the <replaceable>pattern</>, the function returns no rows.
|
||||
If there is a match, the function returns the contents of all of the capture groups
|
||||
in a text array, or if there were no capture groups in the pattern, it returns the
|
||||
contents of the entire match as a single-element text array.
|
||||
The <replaceable>flags</> parameter is an optional text
|
||||
string containing zero or more single-letter flags that change the
|
||||
function's behavior. Flag <literal>i</> specifies case-insensitive
|
||||
matching, while flag <literal>g</> causes the return of each matching
|
||||
substring rather than only the first one. Other supported
|
||||
flags are <literal>m</>, <literal>n</>, <literal>p</>, <literal>w</> and
|
||||
<literal>x</>, whose meanings are described in
|
||||
<xref linkend="posix-embedded-options-table">.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Some examples:
|
||||
<programlisting>
|
||||
SELECT regexp_matches('foobarbequebaz', '(bar)(beque)');
|
||||
regexp_matches
|
||||
----------------
|
||||
{bar,beque}
|
||||
(1 row)
|
||||
|
||||
SELECT regexp_matches('foobarbequebazilbarfbonk', '(b[^b]+)(b[^b]+)', 'g');
|
||||
regexp_matches
|
||||
----------------
|
||||
{bar,beque}
|
||||
{bazil,barf}
|
||||
(2 rows)
|
||||
|
||||
SELECT regexp_matches('foobarbequebaz', 'barbeque');
|
||||
regexp_matches
|
||||
----------------
|
||||
{barbeque}
|
||||
(1 row)
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The <function>regexp_split_to_table</> function splits a string using a POSIX
|
||||
regular expression pattern as a delimiter. It has the syntax
|
||||
<function>regexp_split_to_table</function>(<replaceable>string</>, <replaceable>pattern</>
|
||||
<optional>, <replaceable>flags</> </optional>).
|
||||
If there is no match to the <replaceable>pattern</>, the function returns the
|
||||
<replaceable>string</>. If there is at least one match, for each match it returns
|
||||
the text from the end of the last match (or the beginning of the string)
|
||||
to the beginning of the match. When there are no more matches, it
|
||||
returns the text from the end of the last match to the end of the string.
|
||||
The <replaceable>flags</> parameter is an optional text string containing
|
||||
zero or more single-letter flags that change the function's behavior.
|
||||
<function>regexp_split_to_table</function> supports the flags <literal>i</>,
|
||||
<literal>m</>, <literal>n</>, <literal>p</>, <literal>w</> and
|
||||
<literal>x</>, whose meanings are described in
|
||||
<xref linkend="posix-embedded-options-table">.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
The <function>regexp_split_to_array</> function behaves the same as
|
||||
<function>regexp_split_to_table</>, except that <function>regexp_split_to_array</>
|
||||
returns its results as a <type>text[]</>. It has the syntax
|
||||
<function>regexp_split_to_array</function>(<replaceable>string</>, <replaceable>pattern</>
|
||||
<optional>, <replaceable>flags</> </optional>).
|
||||
The parameters are the same as for <function>regexp_split_to_table</>.
|
||||
</para>
|
||||
|
||||
<para>
|
||||
Some examples:
|
||||
<programlisting>
|
||||
|
||||
SELECT foo FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', E'\\\s+') AS foo;
|
||||
foo
|
||||
--------
|
||||
the
|
||||
quick
|
||||
brown
|
||||
fox
|
||||
jumped
|
||||
over
|
||||
the
|
||||
lazy
|
||||
dog
|
||||
(9 rows)
|
||||
|
||||
SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', E'\\s+');
|
||||
regexp_split_to_array
|
||||
------------------------------------------------
|
||||
{the,quick,brown,fox,jumped,over,the,lazy,dog}
|
||||
(1 row)
|
||||
|
||||
SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo;
|
||||
foo
|
||||
-----
|
||||
t
|
||||
h
|
||||
e
|
||||
q
|
||||
u
|
||||
i
|
||||
c
|
||||
k
|
||||
b
|
||||
r
|
||||
o
|
||||
w
|
||||
n
|
||||
f
|
||||
o
|
||||
x
|
||||
(16 rows)
|
||||
</programlisting>
|
||||
</para>
|
||||
|
||||
<para>
|
||||
<productname>PostgreSQL</productname>'s regular expressions are implemented
|
||||
using a package written by Henry Spencer. Much of
|
||||
|
Reference in New Issue
Block a user