mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	Add assorted new regexp_xxx SQL functions.
This patch adds new functions regexp_count(), regexp_instr(), regexp_like(), and regexp_substr(), and extends regexp_replace() with some new optional arguments. All these functions follow the definitions used in Oracle, although there are small differences in the regexp language due to using our own regexp engine -- most notably, that the default newline-matching behavior is different. Similar functions appear in DB2 and elsewhere, too. Aside from easing portability, these functions are easier to use for certain tasks than our existing regexp_match[es] functions. Gilles Darold, heavily revised by me Discussion: https://postgr.es/m/fc160ee0-c843-b024-29bb-97b5da61971f@darold.net
This commit is contained in:
		@@ -3108,6 +3108,78 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
 | 
				
			|||||||
       </para></entry>
 | 
					       </para></entry>
 | 
				
			||||||
      </row>
 | 
					      </row>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      <row>
 | 
				
			||||||
 | 
					       <entry role="func_table_entry"><para role="func_signature">
 | 
				
			||||||
 | 
					        <indexterm>
 | 
				
			||||||
 | 
					         <primary>regexp_count</primary>
 | 
				
			||||||
 | 
					        </indexterm>
 | 
				
			||||||
 | 
					        <function>regexp_count</function> ( <parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>
 | 
				
			||||||
 | 
					         [, <parameter>start</parameter> <type>integer</type>
 | 
				
			||||||
 | 
					         [, <parameter>flags</parameter> <type>text</type> ] ] )
 | 
				
			||||||
 | 
					        <returnvalue>integer</returnvalue>
 | 
				
			||||||
 | 
					       </para>
 | 
				
			||||||
 | 
					       <para>
 | 
				
			||||||
 | 
					        Returns the number of times the POSIX regular
 | 
				
			||||||
 | 
					        expression <parameter>pattern</parameter> matches in
 | 
				
			||||||
 | 
					        the <parameter>string</parameter>; see
 | 
				
			||||||
 | 
					        <xref linkend="functions-posix-regexp"/>.
 | 
				
			||||||
 | 
					       </para>
 | 
				
			||||||
 | 
					       <para>
 | 
				
			||||||
 | 
					        <literal>regexp_count('123456789012', '\d\d\d', 2)</literal>
 | 
				
			||||||
 | 
					        <returnvalue>3</returnvalue>
 | 
				
			||||||
 | 
					       </para></entry>
 | 
				
			||||||
 | 
					      </row>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      <row>
 | 
				
			||||||
 | 
					       <entry role="func_table_entry"><para role="func_signature">
 | 
				
			||||||
 | 
					        <indexterm>
 | 
				
			||||||
 | 
					         <primary>regexp_instr</primary>
 | 
				
			||||||
 | 
					        </indexterm>
 | 
				
			||||||
 | 
					        <function>regexp_instr</function> ( <parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>
 | 
				
			||||||
 | 
					         [, <parameter>start</parameter> <type>integer</type>
 | 
				
			||||||
 | 
					         [, <parameter>N</parameter> <type>integer</type>
 | 
				
			||||||
 | 
					         [, <parameter>endoption</parameter> <type>integer</type>
 | 
				
			||||||
 | 
					         [, <parameter>flags</parameter> <type>text</type>
 | 
				
			||||||
 | 
					         [, <parameter>subexpr</parameter> <type>integer</type> ] ] ] ] ] )
 | 
				
			||||||
 | 
					        <returnvalue>integer</returnvalue>
 | 
				
			||||||
 | 
					       </para>
 | 
				
			||||||
 | 
					       <para>
 | 
				
			||||||
 | 
					        Returns the position within <parameter>string</parameter> where
 | 
				
			||||||
 | 
					        the <parameter>N</parameter>'th match of the POSIX regular
 | 
				
			||||||
 | 
					        expression <parameter>pattern</parameter> occurs, or zero if there is
 | 
				
			||||||
 | 
					        no such match; see <xref linkend="functions-posix-regexp"/>.
 | 
				
			||||||
 | 
					       </para>
 | 
				
			||||||
 | 
					       <para>
 | 
				
			||||||
 | 
					        <literal>regexp_instr('ABCDEF', 'c(.)(..)', 1, 1, 0, 'i')</literal>
 | 
				
			||||||
 | 
					        <returnvalue>3</returnvalue>
 | 
				
			||||||
 | 
					       </para>
 | 
				
			||||||
 | 
					       <para>
 | 
				
			||||||
 | 
					        <literal>regexp_instr('ABCDEF', 'c(.)(..)', 1, 1, 0, 'i', 2)</literal>
 | 
				
			||||||
 | 
					        <returnvalue>5</returnvalue>
 | 
				
			||||||
 | 
					       </para></entry>
 | 
				
			||||||
 | 
					      </row>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      <row>
 | 
				
			||||||
 | 
					       <entry role="func_table_entry"><para role="func_signature">
 | 
				
			||||||
 | 
					        <indexterm>
 | 
				
			||||||
 | 
					         <primary>regexp_like</primary>
 | 
				
			||||||
 | 
					        </indexterm>
 | 
				
			||||||
 | 
					        <function>regexp_like</function> ( <parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>
 | 
				
			||||||
 | 
					         [, <parameter>flags</parameter> <type>text</type> ] )
 | 
				
			||||||
 | 
					        <returnvalue>boolean</returnvalue>
 | 
				
			||||||
 | 
					       </para>
 | 
				
			||||||
 | 
					       <para>
 | 
				
			||||||
 | 
					        Checks whether a match of the POSIX regular
 | 
				
			||||||
 | 
					        expression <parameter>pattern</parameter> occurs
 | 
				
			||||||
 | 
					        within <parameter>string</parameter>; see
 | 
				
			||||||
 | 
					        <xref linkend="functions-posix-regexp"/>.
 | 
				
			||||||
 | 
					       </para>
 | 
				
			||||||
 | 
					       <para>
 | 
				
			||||||
 | 
					        <literal>regexp_like('Hello World', 'world$', 'i')</literal>
 | 
				
			||||||
 | 
					        <returnvalue>t</returnvalue>
 | 
				
			||||||
 | 
					       </para></entry>
 | 
				
			||||||
 | 
					      </row>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      <row>
 | 
					      <row>
 | 
				
			||||||
       <entry role="func_table_entry"><para role="func_signature">
 | 
					       <entry role="func_table_entry"><para role="func_signature">
 | 
				
			||||||
        <indexterm>
 | 
					        <indexterm>
 | 
				
			||||||
@@ -3117,8 +3189,9 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
 | 
				
			|||||||
        <returnvalue>text[]</returnvalue>
 | 
					        <returnvalue>text[]</returnvalue>
 | 
				
			||||||
       </para>
 | 
					       </para>
 | 
				
			||||||
       <para>
 | 
					       <para>
 | 
				
			||||||
        Returns captured substrings resulting from the first match of a POSIX
 | 
					        Returns substrings within the first match of the POSIX regular
 | 
				
			||||||
        regular expression to the <parameter>string</parameter>; see
 | 
					        expression <parameter>pattern</parameter> to
 | 
				
			||||||
 | 
					        the <parameter>string</parameter>; see
 | 
				
			||||||
        <xref linkend="functions-posix-regexp"/>.
 | 
					        <xref linkend="functions-posix-regexp"/>.
 | 
				
			||||||
       </para>
 | 
					       </para>
 | 
				
			||||||
       <para>
 | 
					       <para>
 | 
				
			||||||
@@ -3136,9 +3209,10 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
 | 
				
			|||||||
        <returnvalue>setof text[]</returnvalue>
 | 
					        <returnvalue>setof text[]</returnvalue>
 | 
				
			||||||
       </para>
 | 
					       </para>
 | 
				
			||||||
       <para>
 | 
					       <para>
 | 
				
			||||||
        Returns captured substrings resulting from the first match of a
 | 
					        Returns substrings within the first match of the POSIX regular
 | 
				
			||||||
        POSIX regular expression to the <parameter>string</parameter>,
 | 
					        expression <parameter>pattern</parameter> to
 | 
				
			||||||
        or multiple matches if the <literal>g</literal> flag is used;
 | 
					        the <parameter>string</parameter>, or substrings within all
 | 
				
			||||||
 | 
					        such matches if the <literal>g</literal> flag is used;
 | 
				
			||||||
        see <xref linkend="functions-posix-regexp"/>.
 | 
					        see <xref linkend="functions-posix-regexp"/>.
 | 
				
			||||||
       </para>
 | 
					       </para>
 | 
				
			||||||
       <para>
 | 
					       <para>
 | 
				
			||||||
@@ -3156,14 +3230,16 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
 | 
				
			|||||||
        <indexterm>
 | 
					        <indexterm>
 | 
				
			||||||
         <primary>regexp_replace</primary>
 | 
					         <primary>regexp_replace</primary>
 | 
				
			||||||
        </indexterm>
 | 
					        </indexterm>
 | 
				
			||||||
        <function>regexp_replace</function> ( <parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>, <parameter>replacement</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type> ] )
 | 
					        <function>regexp_replace</function> ( <parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>, <parameter>replacement</parameter> <type>text</type>
 | 
				
			||||||
 | 
					         [, <parameter>start</parameter> <type>integer</type> ]
 | 
				
			||||||
 | 
					         [, <parameter>flags</parameter> <type>text</type> ] )
 | 
				
			||||||
        <returnvalue>text</returnvalue>
 | 
					        <returnvalue>text</returnvalue>
 | 
				
			||||||
       </para>
 | 
					       </para>
 | 
				
			||||||
       <para>
 | 
					       <para>
 | 
				
			||||||
        Replaces substrings resulting from the first match of a
 | 
					        Replaces the substring that is the first match to the POSIX
 | 
				
			||||||
        POSIX regular expression, or multiple substring matches
 | 
					        regular expression <parameter>pattern</parameter>, or all such
 | 
				
			||||||
        if the <literal>g</literal> flag is used; see <xref
 | 
					        matches if the <literal>g</literal> flag is used; see
 | 
				
			||||||
        linkend="functions-posix-regexp"/>.
 | 
					        <xref linkend="functions-posix-regexp"/>.
 | 
				
			||||||
       </para>
 | 
					       </para>
 | 
				
			||||||
       <para>
 | 
					       <para>
 | 
				
			||||||
        <literal>regexp_replace('Thomas', '.[mN]a.', 'M')</literal>
 | 
					        <literal>regexp_replace('Thomas', '.[mN]a.', 'M')</literal>
 | 
				
			||||||
@@ -3171,6 +3247,26 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
 | 
				
			|||||||
       </para></entry>
 | 
					       </para></entry>
 | 
				
			||||||
      </row>
 | 
					      </row>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      <row>
 | 
				
			||||||
 | 
					       <entry role="func_table_entry"><para role="func_signature">
 | 
				
			||||||
 | 
					        <function>regexp_replace</function> ( <parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>, <parameter>replacement</parameter> <type>text</type>,
 | 
				
			||||||
 | 
					         <parameter>start</parameter> <type>integer</type>,
 | 
				
			||||||
 | 
					         <parameter>N</parameter> <type>integer</type>
 | 
				
			||||||
 | 
					         [, <parameter>flags</parameter> <type>text</type> ] )
 | 
				
			||||||
 | 
					        <returnvalue>text</returnvalue>
 | 
				
			||||||
 | 
					       </para>
 | 
				
			||||||
 | 
					       <para>
 | 
				
			||||||
 | 
					        Replaces the substring that is the <parameter>N</parameter>'th
 | 
				
			||||||
 | 
					        match to the POSIX regular expression <parameter>pattern</parameter>,
 | 
				
			||||||
 | 
					        or all such matches if <parameter>N</parameter> is zero; see
 | 
				
			||||||
 | 
					        <xref linkend="functions-posix-regexp"/>.
 | 
				
			||||||
 | 
					       </para>
 | 
				
			||||||
 | 
					       <para>
 | 
				
			||||||
 | 
					        <literal>regexp_replace('Thomas', '.', 'X', 3, 2)</literal>
 | 
				
			||||||
 | 
					        <returnvalue>ThoXas</returnvalue>
 | 
				
			||||||
 | 
					       </para></entry>
 | 
				
			||||||
 | 
					      </row>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      <row>
 | 
					      <row>
 | 
				
			||||||
       <entry role="func_table_entry"><para role="func_signature">
 | 
					       <entry role="func_table_entry"><para role="func_signature">
 | 
				
			||||||
        <indexterm>
 | 
					        <indexterm>
 | 
				
			||||||
@@ -3213,6 +3309,35 @@ repeat('Pg', 4) <returnvalue>PgPgPgPg</returnvalue>
 | 
				
			|||||||
       </para></entry>
 | 
					       </para></entry>
 | 
				
			||||||
      </row>
 | 
					      </row>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      <row>
 | 
				
			||||||
 | 
					       <entry role="func_table_entry"><para role="func_signature">
 | 
				
			||||||
 | 
					        <indexterm>
 | 
				
			||||||
 | 
					         <primary>regexp_substr</primary>
 | 
				
			||||||
 | 
					        </indexterm>
 | 
				
			||||||
 | 
					        <function>regexp_substr</function> ( <parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>
 | 
				
			||||||
 | 
					         [, <parameter>start</parameter> <type>integer</type>
 | 
				
			||||||
 | 
					         [, <parameter>N</parameter> <type>integer</type>
 | 
				
			||||||
 | 
					         [, <parameter>flags</parameter> <type>text</type>
 | 
				
			||||||
 | 
					         [, <parameter>subexpr</parameter> <type>integer</type> ] ] ] ] )
 | 
				
			||||||
 | 
					        <returnvalue>text</returnvalue>
 | 
				
			||||||
 | 
					       </para>
 | 
				
			||||||
 | 
					       <para>
 | 
				
			||||||
 | 
					        Returns the substring within <parameter>string</parameter> that
 | 
				
			||||||
 | 
					        matches the <parameter>N</parameter>'th occurrence of the POSIX
 | 
				
			||||||
 | 
					        regular expression <parameter>pattern</parameter>,
 | 
				
			||||||
 | 
					        or <literal>NULL</literal> if there is no such match; see
 | 
				
			||||||
 | 
					        <xref linkend="functions-posix-regexp"/>.
 | 
				
			||||||
 | 
					       </para>
 | 
				
			||||||
 | 
					       <para>
 | 
				
			||||||
 | 
					        <literal>regexp_substr('ABCDEF', 'c(.)(..)', 1, 1, 'i')</literal>
 | 
				
			||||||
 | 
					        <returnvalue>CDEF</returnvalue>
 | 
				
			||||||
 | 
					       </para>
 | 
				
			||||||
 | 
					       <para>
 | 
				
			||||||
 | 
					        <literal>regexp_substr('ABCDEF', 'c(.)(..)', 1, 1, 'i', 2)</literal>
 | 
				
			||||||
 | 
					        <returnvalue>EF</returnvalue>
 | 
				
			||||||
 | 
					       </para></entry>
 | 
				
			||||||
 | 
					      </row>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      <row>
 | 
					      <row>
 | 
				
			||||||
       <entry role="func_table_entry"><para role="func_signature">
 | 
					       <entry role="func_table_entry"><para role="func_signature">
 | 
				
			||||||
        <indexterm>
 | 
					        <indexterm>
 | 
				
			||||||
@@ -5377,6 +5502,15 @@ substring('foobar' similar '#"o_b#"%' escape '#')    <lineannotation>NULL</linea
 | 
				
			|||||||
   <indexterm>
 | 
					   <indexterm>
 | 
				
			||||||
    <primary>substring</primary>
 | 
					    <primary>substring</primary>
 | 
				
			||||||
   </indexterm>
 | 
					   </indexterm>
 | 
				
			||||||
 | 
					   <indexterm>
 | 
				
			||||||
 | 
					    <primary>regexp_count</primary>
 | 
				
			||||||
 | 
					   </indexterm>
 | 
				
			||||||
 | 
					   <indexterm>
 | 
				
			||||||
 | 
					    <primary>regexp_instr</primary>
 | 
				
			||||||
 | 
					   </indexterm>
 | 
				
			||||||
 | 
					   <indexterm>
 | 
				
			||||||
 | 
					    <primary>regexp_like</primary>
 | 
				
			||||||
 | 
					   </indexterm>
 | 
				
			||||||
   <indexterm>
 | 
					   <indexterm>
 | 
				
			||||||
    <primary>regexp_match</primary>
 | 
					    <primary>regexp_match</primary>
 | 
				
			||||||
   </indexterm>
 | 
					   </indexterm>
 | 
				
			||||||
@@ -5392,6 +5526,9 @@ substring('foobar' similar '#"o_b#"%' escape '#')    <lineannotation>NULL</linea
 | 
				
			|||||||
   <indexterm>
 | 
					   <indexterm>
 | 
				
			||||||
    <primary>regexp_split_to_array</primary>
 | 
					    <primary>regexp_split_to_array</primary>
 | 
				
			||||||
   </indexterm>
 | 
					   </indexterm>
 | 
				
			||||||
 | 
					   <indexterm>
 | 
				
			||||||
 | 
					    <primary>regexp_substr</primary>
 | 
				
			||||||
 | 
					   </indexterm>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   <para>
 | 
					   <para>
 | 
				
			||||||
    <xref linkend="functions-posix-table"/> lists the available
 | 
					    <xref linkend="functions-posix-table"/> lists the available
 | 
				
			||||||
@@ -5542,9 +5679,112 @@ substring('foobar' from 'o(.)b')   <lineannotation>o</lineannotation>
 | 
				
			|||||||
</programlisting>
 | 
					</programlisting>
 | 
				
			||||||
   </para>
 | 
					   </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    <para>
 | 
				
			||||||
 | 
					     The <function>regexp_count</function> function counts the number of
 | 
				
			||||||
 | 
					     places where a POSIX regular expression pattern matches a string.
 | 
				
			||||||
 | 
					     It has the syntax
 | 
				
			||||||
 | 
					     <function>regexp_count</function>(<replaceable>string</replaceable>,
 | 
				
			||||||
 | 
					     <replaceable>pattern</replaceable>
 | 
				
			||||||
 | 
					     <optional>, <replaceable>start</replaceable>
 | 
				
			||||||
 | 
					     <optional>, <replaceable>flags</replaceable>
 | 
				
			||||||
 | 
					     </optional></optional>).
 | 
				
			||||||
 | 
					     <replaceable>pattern</replaceable> is searched for
 | 
				
			||||||
 | 
					     in <replaceable>string</replaceable>, normally from the beginning of
 | 
				
			||||||
 | 
					     the string, but if the <replaceable>start</replaceable> parameter is
 | 
				
			||||||
 | 
					     provided then beginning from that character index.
 | 
				
			||||||
 | 
					     The <replaceable>flags</replaceable> parameter is an optional text
 | 
				
			||||||
 | 
					     string containing zero or more single-letter flags that change the
 | 
				
			||||||
 | 
					     function's behavior.  For example, including <literal>i</literal> in
 | 
				
			||||||
 | 
					     <replaceable>flags</replaceable> specifies case-insensitive matching.
 | 
				
			||||||
 | 
					     Supported flags are described in
 | 
				
			||||||
 | 
					     <xref linkend="posix-embedded-options-table"/>.
 | 
				
			||||||
 | 
					    </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    <para>
 | 
				
			||||||
 | 
					     Some examples:
 | 
				
			||||||
 | 
					<programlisting>
 | 
				
			||||||
 | 
					regexp_count('ABCABCAXYaxy', 'A.')          <lineannotation>3</lineannotation>
 | 
				
			||||||
 | 
					regexp_count('ABCABCAXYaxy', 'A.', 1, 'i')  <lineannotation>4</lineannotation>
 | 
				
			||||||
 | 
					</programlisting>
 | 
				
			||||||
 | 
					    </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    <para>
 | 
				
			||||||
 | 
					     The <function>regexp_instr</function> function returns the starting or
 | 
				
			||||||
 | 
					     ending position of the <replaceable>N</replaceable>'th match of a
 | 
				
			||||||
 | 
					     POSIX regular expression pattern to a string, or zero if there is no
 | 
				
			||||||
 | 
					     such match.  It has the syntax
 | 
				
			||||||
 | 
					     <function>regexp_instr</function>(<replaceable>string</replaceable>,
 | 
				
			||||||
 | 
					     <replaceable>pattern</replaceable>
 | 
				
			||||||
 | 
					     <optional>, <replaceable>start</replaceable>
 | 
				
			||||||
 | 
					     <optional>, <replaceable>N</replaceable>
 | 
				
			||||||
 | 
					     <optional>, <replaceable>endoption</replaceable>
 | 
				
			||||||
 | 
					     <optional>, <replaceable>flags</replaceable>
 | 
				
			||||||
 | 
					     <optional>, <replaceable>subexpr</replaceable>
 | 
				
			||||||
 | 
					     </optional></optional></optional></optional></optional>).
 | 
				
			||||||
 | 
					     <replaceable>pattern</replaceable> is searched for
 | 
				
			||||||
 | 
					     in <replaceable>string</replaceable>, normally from the beginning of
 | 
				
			||||||
 | 
					     the string, but if the <replaceable>start</replaceable> parameter is
 | 
				
			||||||
 | 
					     provided then beginning from that character index.
 | 
				
			||||||
 | 
					     If <replaceable>N</replaceable> is specified
 | 
				
			||||||
 | 
					     then the <replaceable>N</replaceable>'th match of the pattern
 | 
				
			||||||
 | 
					     is located, otherwise the first match is located.
 | 
				
			||||||
 | 
					     If the <replaceable>endoption</replaceable> parameter is omitted or
 | 
				
			||||||
 | 
					     specified as zero, the function returns the position of the first
 | 
				
			||||||
 | 
					     character of the match.  Otherwise, <replaceable>endoption</replaceable>
 | 
				
			||||||
 | 
					     must be one, and the function returns the position of the character
 | 
				
			||||||
 | 
					     following the match.
 | 
				
			||||||
 | 
					     The <replaceable>flags</replaceable> parameter is an optional text
 | 
				
			||||||
 | 
					     string containing zero or more single-letter flags that change the
 | 
				
			||||||
 | 
					     function's behavior.  Supported flags are described
 | 
				
			||||||
 | 
					     in <xref linkend="posix-embedded-options-table"/>.
 | 
				
			||||||
 | 
					     For a pattern containing parenthesized
 | 
				
			||||||
 | 
					     subexpressions, <replaceable>subexpr</replaceable> is an integer
 | 
				
			||||||
 | 
					     indicating which subexpression is of interest: the result identifies
 | 
				
			||||||
 | 
					     the position of the substring matching that subexpression.
 | 
				
			||||||
 | 
					     Subexpressions are numbered in the order of their leading parentheses.
 | 
				
			||||||
 | 
					     When <replaceable>subexpr</replaceable> is omitted or zero, the result
 | 
				
			||||||
 | 
					     identifies the position of the whole match regardless of
 | 
				
			||||||
 | 
					     parenthesized subexpressions.
 | 
				
			||||||
 | 
					    </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    <para>
 | 
				
			||||||
 | 
					     Some examples:
 | 
				
			||||||
 | 
					<programlisting>
 | 
				
			||||||
 | 
					regexp_instr('number of your street, town zip, FR', '[^,]+', 1, 2)
 | 
				
			||||||
 | 
					                                   <lineannotation>23</lineannotation>
 | 
				
			||||||
 | 
					regexp_instr('ABCDEFGHI', '(c..)(...)', 1, 1, 0, 'i', 2)
 | 
				
			||||||
 | 
					                                   <lineannotation>6</lineannotation>
 | 
				
			||||||
 | 
					</programlisting>
 | 
				
			||||||
 | 
					    </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    <para>
 | 
				
			||||||
 | 
					     The <function>regexp_like</function> function checks whether a match
 | 
				
			||||||
 | 
					     of a POSIX regular expression pattern occurs within a string,
 | 
				
			||||||
 | 
					     returning boolean true or false.  It has the syntax
 | 
				
			||||||
 | 
					     <function>regexp_like</function>(<replaceable>string</replaceable>,
 | 
				
			||||||
 | 
					     <replaceable>pattern</replaceable>
 | 
				
			||||||
 | 
					     <optional>, <replaceable>flags</replaceable> </optional>).
 | 
				
			||||||
 | 
					     The <replaceable>flags</replaceable> parameter is an optional text
 | 
				
			||||||
 | 
					     string containing zero or more single-letter flags that change the
 | 
				
			||||||
 | 
					     function's behavior.  Supported flags are described
 | 
				
			||||||
 | 
					     in <xref linkend="posix-embedded-options-table"/>.
 | 
				
			||||||
 | 
					     This function has the same results as the <literal>~</literal>
 | 
				
			||||||
 | 
					     operator if no flags are specified.  If only the <literal>i</literal>
 | 
				
			||||||
 | 
					     flag is specified, it has the same results as
 | 
				
			||||||
 | 
					     the <literal>~*</literal> operator.
 | 
				
			||||||
 | 
					    </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    <para>
 | 
				
			||||||
 | 
					     Some examples:
 | 
				
			||||||
 | 
					<programlisting>
 | 
				
			||||||
 | 
					regexp_like('Hello World', 'world')       <lineannotation>false</lineannotation>
 | 
				
			||||||
 | 
					regexp_like('Hello World', 'world', 'i')  <lineannotation>true</lineannotation>
 | 
				
			||||||
 | 
					</programlisting>
 | 
				
			||||||
 | 
					    </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    <para>
 | 
					    <para>
 | 
				
			||||||
     The <function>regexp_match</function> function returns a text array of
 | 
					     The <function>regexp_match</function> function returns a text array of
 | 
				
			||||||
     captured substring(s) resulting from the first match of a POSIX
 | 
					     matching substring(s) within the first match of a POSIX
 | 
				
			||||||
     regular expression pattern to a string.  It has the syntax
 | 
					     regular expression pattern to a string.  It has the syntax
 | 
				
			||||||
     <function>regexp_match</function>(<replaceable>string</replaceable>,
 | 
					     <function>regexp_match</function>(<replaceable>string</replaceable>,
 | 
				
			||||||
     <replaceable>pattern</replaceable> <optional>, <replaceable>flags</replaceable> </optional>).
 | 
					     <replaceable>pattern</replaceable> <optional>, <replaceable>flags</replaceable> </optional>).
 | 
				
			||||||
@@ -5579,8 +5819,17 @@ SELECT regexp_match('foobarbequebaz', '(bar)(beque)');
 | 
				
			|||||||
 {bar,beque}
 | 
					 {bar,beque}
 | 
				
			||||||
(1 row)
 | 
					(1 row)
 | 
				
			||||||
</programlisting>
 | 
					</programlisting>
 | 
				
			||||||
    In the common case where you just want the whole matching substring
 | 
					   </para>
 | 
				
			||||||
    or <literal>NULL</literal> for no match, write something like
 | 
					
 | 
				
			||||||
 | 
					    <tip>
 | 
				
			||||||
 | 
					     <para>
 | 
				
			||||||
 | 
					      In the common case where you just want the whole matching substring
 | 
				
			||||||
 | 
					      or <literal>NULL</literal> for no match, the best solution is to
 | 
				
			||||||
 | 
					      use <function>regexp_substr()</function>.
 | 
				
			||||||
 | 
					      However, <function>regexp_substr()</function> only exists
 | 
				
			||||||
 | 
					      in <productname>PostgreSQL</productname> version 15 and up.  When
 | 
				
			||||||
 | 
					      working in older versions, you can extract the first element
 | 
				
			||||||
 | 
					      of <function>regexp_match()</function>'s result, for example:
 | 
				
			||||||
<programlisting>
 | 
					<programlisting>
 | 
				
			||||||
SELECT (regexp_match('foobarbequebaz', 'bar.*que'))[1];
 | 
					SELECT (regexp_match('foobarbequebaz', 'bar.*que'))[1];
 | 
				
			||||||
 regexp_match
 | 
					 regexp_match
 | 
				
			||||||
@@ -5588,11 +5837,12 @@ SELECT (regexp_match('foobarbequebaz', 'bar.*que'))[1];
 | 
				
			|||||||
 barbeque
 | 
					 barbeque
 | 
				
			||||||
(1 row)
 | 
					(1 row)
 | 
				
			||||||
</programlisting>
 | 
					</programlisting>
 | 
				
			||||||
   </para>
 | 
					     </para>
 | 
				
			||||||
 | 
					    </tip>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    <para>
 | 
					    <para>
 | 
				
			||||||
     The <function>regexp_matches</function> function returns a set of text arrays
 | 
					     The <function>regexp_matches</function> function returns a set of text arrays
 | 
				
			||||||
     of captured substring(s) resulting from matching a POSIX regular
 | 
					     of matching substring(s) within matches of a POSIX regular
 | 
				
			||||||
     expression pattern to a string.  It has the same syntax as
 | 
					     expression pattern to a string.  It has the same syntax as
 | 
				
			||||||
     <function>regexp_match</function>.
 | 
					     <function>regexp_match</function>.
 | 
				
			||||||
     This function returns no rows if there is no match, one row if there is
 | 
					     This function returns no rows if there is no match, one row if there is
 | 
				
			||||||
@@ -5650,7 +5900,13 @@ SELECT col1, (SELECT regexp_matches(col2, '(bar)(beque)')) FROM tab;
 | 
				
			|||||||
     It has the syntax
 | 
					     It has the syntax
 | 
				
			||||||
     <function>regexp_replace</function>(<replaceable>source</replaceable>,
 | 
					     <function>regexp_replace</function>(<replaceable>source</replaceable>,
 | 
				
			||||||
     <replaceable>pattern</replaceable>, <replaceable>replacement</replaceable>
 | 
					     <replaceable>pattern</replaceable>, <replaceable>replacement</replaceable>
 | 
				
			||||||
 | 
					     <optional>, <replaceable>start</replaceable>
 | 
				
			||||||
 | 
					     <optional>, <replaceable>N</replaceable>
 | 
				
			||||||
 | 
					     </optional></optional>
 | 
				
			||||||
     <optional>, <replaceable>flags</replaceable> </optional>).
 | 
					     <optional>, <replaceable>flags</replaceable> </optional>).
 | 
				
			||||||
 | 
					     (Notice that <replaceable>N</replaceable> cannot be specified
 | 
				
			||||||
 | 
					     unless <replaceable>start</replaceable> is,
 | 
				
			||||||
 | 
					     but <replaceable>flags</replaceable> can be given in any case.)
 | 
				
			||||||
     The <replaceable>source</replaceable> string is returned unchanged if
 | 
					     The <replaceable>source</replaceable> string is returned unchanged if
 | 
				
			||||||
     there is no match to the <replaceable>pattern</replaceable>.  If there is a
 | 
					     there is no match to the <replaceable>pattern</replaceable>.  If there is a
 | 
				
			||||||
     match, the <replaceable>source</replaceable> string is returned with the
 | 
					     match, the <replaceable>source</replaceable> string is returned with the
 | 
				
			||||||
@@ -5663,11 +5919,22 @@ SELECT col1, (SELECT regexp_matches(col2, '(bar)(beque)')) FROM tab;
 | 
				
			|||||||
     substring matching the entire pattern should be inserted.  Write
 | 
					     substring matching the entire pattern should be inserted.  Write
 | 
				
			||||||
     <literal>\\</literal> if you need to put a literal backslash in the replacement
 | 
					     <literal>\\</literal> if you need to put a literal backslash in the replacement
 | 
				
			||||||
     text.
 | 
					     text.
 | 
				
			||||||
 | 
					     <replaceable>pattern</replaceable> is searched for
 | 
				
			||||||
 | 
					     in <replaceable>string</replaceable>, normally from the beginning of
 | 
				
			||||||
 | 
					     the string, but if the <replaceable>start</replaceable> parameter is
 | 
				
			||||||
 | 
					     provided then beginning from that character index.
 | 
				
			||||||
 | 
					     By default, only the first match of the pattern is replaced.
 | 
				
			||||||
 | 
					     If <replaceable>N</replaceable> is specified and is greater than zero,
 | 
				
			||||||
 | 
					     then the <replaceable>N</replaceable>'th match of the pattern
 | 
				
			||||||
 | 
					     is replaced.
 | 
				
			||||||
 | 
					     If the <literal>g</literal> flag is given, or
 | 
				
			||||||
 | 
					     if <replaceable>N</replaceable> is specified and is zero, then all
 | 
				
			||||||
 | 
					     matches at or after the <replaceable>start</replaceable> position are
 | 
				
			||||||
 | 
					     replaced.  (The <literal>g</literal> flag is ignored
 | 
				
			||||||
 | 
					     when <replaceable>N</replaceable> is specified.)
 | 
				
			||||||
     The <replaceable>flags</replaceable> parameter is an optional text
 | 
					     The <replaceable>flags</replaceable> parameter is an optional text
 | 
				
			||||||
     string containing zero or more single-letter flags that change the
 | 
					     string containing zero or more single-letter flags that change the
 | 
				
			||||||
     function's behavior.  Flag <literal>i</literal> specifies case-insensitive
 | 
					     function's behavior.  Supported flags (though
 | 
				
			||||||
     matching, while flag <literal>g</literal> specifies replacement of each matching
 | 
					 | 
				
			||||||
     substring rather than only the first one.  Supported flags (though
 | 
					 | 
				
			||||||
     not <literal>g</literal>) are
 | 
					     not <literal>g</literal>) are
 | 
				
			||||||
     described in <xref linkend="posix-embedded-options-table"/>.
 | 
					     described in <xref linkend="posix-embedded-options-table"/>.
 | 
				
			||||||
    </para>
 | 
					    </para>
 | 
				
			||||||
@@ -5681,6 +5948,10 @@ regexp_replace('foobarbaz', 'b..', 'X', 'g')
 | 
				
			|||||||
                                   <lineannotation>fooXX</lineannotation>
 | 
					                                   <lineannotation>fooXX</lineannotation>
 | 
				
			||||||
regexp_replace('foobarbaz', 'b(..)', 'X\1Y', 'g')
 | 
					regexp_replace('foobarbaz', 'b(..)', 'X\1Y', 'g')
 | 
				
			||||||
                                   <lineannotation>fooXarYXazY</lineannotation>
 | 
					                                   <lineannotation>fooXarYXazY</lineannotation>
 | 
				
			||||||
 | 
					regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 0, 'i')
 | 
				
			||||||
 | 
					                                   <lineannotation>X PXstgrXSQL fXnctXXn</lineannotation>
 | 
				
			||||||
 | 
					regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 3, 'i')
 | 
				
			||||||
 | 
					                                   <lineannotation>A PostgrXSQL function</lineannotation>
 | 
				
			||||||
</programlisting>
 | 
					</programlisting>
 | 
				
			||||||
   </para>
 | 
					   </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -5712,7 +5983,6 @@ regexp_replace('foobarbaz', 'b(..)', 'X\1Y', 'g')
 | 
				
			|||||||
   <para>
 | 
					   <para>
 | 
				
			||||||
    Some examples:
 | 
					    Some examples:
 | 
				
			||||||
<programlisting>
 | 
					<programlisting>
 | 
				
			||||||
 | 
					 | 
				
			||||||
SELECT foo FROM regexp_split_to_table('the quick brown fox jumps over the lazy dog', '\s+') AS foo;
 | 
					SELECT foo FROM regexp_split_to_table('the quick brown fox jumps over the lazy dog', '\s+') AS foo;
 | 
				
			||||||
  foo   
 | 
					  foo   
 | 
				
			||||||
-------
 | 
					-------
 | 
				
			||||||
@@ -5761,11 +6031,51 @@ SELECT foo FROM regexp_split_to_table('the quick brown fox', '\s*') AS foo;
 | 
				
			|||||||
    zero-length matches that occur at the start or end of the string
 | 
					    zero-length matches that occur at the start or end of the string
 | 
				
			||||||
    or immediately after a previous match.  This is contrary to the strict
 | 
					    or immediately after a previous match.  This is contrary to the strict
 | 
				
			||||||
    definition of regexp matching that is implemented by
 | 
					    definition of regexp matching that is implemented by
 | 
				
			||||||
    <function>regexp_match</function> and
 | 
					    the other regexp functions, but is usually the most convenient behavior
 | 
				
			||||||
    <function>regexp_matches</function>, but is usually the most convenient behavior
 | 
					 | 
				
			||||||
    in practice.  Other software systems such as Perl use similar definitions.
 | 
					    in practice.  Other software systems such as Perl use similar definitions.
 | 
				
			||||||
   </para>
 | 
					   </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    <para>
 | 
				
			||||||
 | 
					     The <function>regexp_substr</function> function returns the substring
 | 
				
			||||||
 | 
					     that matches a POSIX regular expression pattern,
 | 
				
			||||||
 | 
					     or <literal>NULL</literal> if there is no match.  It has the syntax
 | 
				
			||||||
 | 
					     <function>regexp_substr</function>(<replaceable>string</replaceable>,
 | 
				
			||||||
 | 
					     <replaceable>pattern</replaceable>
 | 
				
			||||||
 | 
					     <optional>, <replaceable>start</replaceable>
 | 
				
			||||||
 | 
					     <optional>, <replaceable>N</replaceable>
 | 
				
			||||||
 | 
					     <optional>, <replaceable>flags</replaceable>
 | 
				
			||||||
 | 
					     <optional>, <replaceable>subexpr</replaceable>
 | 
				
			||||||
 | 
					     </optional></optional></optional></optional>).
 | 
				
			||||||
 | 
					     <replaceable>pattern</replaceable> is searched for
 | 
				
			||||||
 | 
					     in <replaceable>string</replaceable>, normally from the beginning of
 | 
				
			||||||
 | 
					     the string, but if the <replaceable>start</replaceable> parameter is
 | 
				
			||||||
 | 
					     provided then beginning from that character index.
 | 
				
			||||||
 | 
					     If <replaceable>N</replaceable> is specified
 | 
				
			||||||
 | 
					     then the <replaceable>N</replaceable>'th match of the pattern
 | 
				
			||||||
 | 
					     is returned, otherwise the first match is returned.
 | 
				
			||||||
 | 
					     The <replaceable>flags</replaceable> parameter is an optional text
 | 
				
			||||||
 | 
					     string containing zero or more single-letter flags that change the
 | 
				
			||||||
 | 
					     function's behavior.  Supported flags are described
 | 
				
			||||||
 | 
					     in <xref linkend="posix-embedded-options-table"/>.
 | 
				
			||||||
 | 
					     For a pattern containing parenthesized
 | 
				
			||||||
 | 
					     subexpressions, <replaceable>subexpr</replaceable> is an integer
 | 
				
			||||||
 | 
					     indicating which subexpression is of interest: the result is the
 | 
				
			||||||
 | 
					     substring matching that subexpression.
 | 
				
			||||||
 | 
					     Subexpressions are numbered in the order of their leading parentheses.
 | 
				
			||||||
 | 
					     When <replaceable>subexpr</replaceable> is omitted or zero, the result
 | 
				
			||||||
 | 
					     is the whole match regardless of parenthesized subexpressions.
 | 
				
			||||||
 | 
					    </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    <para>
 | 
				
			||||||
 | 
					     Some examples:
 | 
				
			||||||
 | 
					<programlisting>
 | 
				
			||||||
 | 
					regexp_substr('number of your street, town zip, FR', '[^,]+', 1, 2)
 | 
				
			||||||
 | 
					                                   <lineannotation> town zip</lineannotation>
 | 
				
			||||||
 | 
					regexp_substr('ABCDEFGHI', '(c..)(...)', 1, 1, 'i', 2)
 | 
				
			||||||
 | 
					                                   <lineannotation>FGH</lineannotation>
 | 
				
			||||||
 | 
					</programlisting>
 | 
				
			||||||
 | 
					    </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
<!-- derived from the re_syntax.n man page -->
 | 
					<!-- derived from the re_syntax.n man page -->
 | 
				
			||||||
 | 
					
 | 
				
			||||||
   <sect3 id="posix-syntax-details">
 | 
					   <sect3 id="posix-syntax-details">
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -113,6 +113,7 @@ static cached_re_str re_array[MAX_CACHED_RES];	/* cached re's */
 | 
				
			|||||||
/* Local functions */
 | 
					/* Local functions */
 | 
				
			||||||
static regexp_matches_ctx *setup_regexp_matches(text *orig_str, text *pattern,
 | 
					static regexp_matches_ctx *setup_regexp_matches(text *orig_str, text *pattern,
 | 
				
			||||||
												pg_re_flags *flags,
 | 
																	pg_re_flags *flags,
 | 
				
			||||||
 | 
																	int start_search,
 | 
				
			||||||
												Oid collation,
 | 
																	Oid collation,
 | 
				
			||||||
												bool use_subpatterns,
 | 
																	bool use_subpatterns,
 | 
				
			||||||
												bool ignore_degenerate,
 | 
																	bool ignore_degenerate,
 | 
				
			||||||
@@ -629,7 +630,7 @@ textregexreplace_noopt(PG_FUNCTION_ARGS)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	re = RE_compile_and_cache(p, REG_ADVANCED, PG_GET_COLLATION());
 | 
						re = RE_compile_and_cache(p, REG_ADVANCED, PG_GET_COLLATION());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, false));
 | 
						PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, 0, 1));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
@@ -646,11 +647,97 @@ textregexreplace(PG_FUNCTION_ARGS)
 | 
				
			|||||||
	regex_t    *re;
 | 
						regex_t    *re;
 | 
				
			||||||
	pg_re_flags flags;
 | 
						pg_re_flags flags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * regexp_replace() with four arguments will be preferentially resolved as
 | 
				
			||||||
 | 
						 * this form when the fourth argument is of type UNKNOWN.  However, the
 | 
				
			||||||
 | 
						 * user might have intended to call textregexreplace_extended_no_n.  If we
 | 
				
			||||||
 | 
						 * see flags that look like an integer, emit the same error that
 | 
				
			||||||
 | 
						 * parse_re_flags would, but add a HINT about how to fix it.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						if (VARSIZE_ANY_EXHDR(opt) > 0)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							char	   *opt_p = VARDATA_ANY(opt);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							if (*opt_p >= '0' && *opt_p <= '9')
 | 
				
			||||||
 | 
								ereport(ERROR,
 | 
				
			||||||
 | 
										(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 | 
				
			||||||
 | 
										 errmsg("invalid regular expression option: \"%.*s\"",
 | 
				
			||||||
 | 
												pg_mblen(opt_p), opt_p),
 | 
				
			||||||
 | 
										 errhint("If you meant to use regexp_replace() with a start parameter, cast the fourth argument to integer explicitly.")));
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	parse_re_flags(&flags, opt);
 | 
						parse_re_flags(&flags, opt);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	re = RE_compile_and_cache(p, flags.cflags, PG_GET_COLLATION());
 | 
						re = RE_compile_and_cache(p, flags.cflags, PG_GET_COLLATION());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, flags.glob));
 | 
						PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, 0,
 | 
				
			||||||
 | 
															 flags.glob ? 0 : 1));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * textregexreplace_extended()
 | 
				
			||||||
 | 
					 *		Return a string matched by a regular expression, with replacement.
 | 
				
			||||||
 | 
					 *		Extends textregexreplace by allowing a start position and the
 | 
				
			||||||
 | 
					 *		choice of the occurrence to replace (0 means all occurrences).
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					Datum
 | 
				
			||||||
 | 
					textregexreplace_extended(PG_FUNCTION_ARGS)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						text	   *s = PG_GETARG_TEXT_PP(0);
 | 
				
			||||||
 | 
						text	   *p = PG_GETARG_TEXT_PP(1);
 | 
				
			||||||
 | 
						text	   *r = PG_GETARG_TEXT_PP(2);
 | 
				
			||||||
 | 
						int			start = 1;
 | 
				
			||||||
 | 
						int			n = 1;
 | 
				
			||||||
 | 
						text	   *flags = PG_GETARG_TEXT_PP_IF_EXISTS(5);
 | 
				
			||||||
 | 
						pg_re_flags re_flags;
 | 
				
			||||||
 | 
						regex_t    *re;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Collect optional parameters */
 | 
				
			||||||
 | 
						if (PG_NARGS() > 3)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							start = PG_GETARG_INT32(3);
 | 
				
			||||||
 | 
							if (start <= 0)
 | 
				
			||||||
 | 
								ereport(ERROR,
 | 
				
			||||||
 | 
										(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 | 
				
			||||||
 | 
										 errmsg("invalid value for parameter \"%s\": %d",
 | 
				
			||||||
 | 
												"start", start)));
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (PG_NARGS() > 4)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							n = PG_GETARG_INT32(4);
 | 
				
			||||||
 | 
							if (n < 0)
 | 
				
			||||||
 | 
								ereport(ERROR,
 | 
				
			||||||
 | 
										(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 | 
				
			||||||
 | 
										 errmsg("invalid value for parameter \"%s\": %d",
 | 
				
			||||||
 | 
												"n", n)));
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Determine options */
 | 
				
			||||||
 | 
						parse_re_flags(&re_flags, flags);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* If N was not specified, deduce it from the 'g' flag */
 | 
				
			||||||
 | 
						if (PG_NARGS() <= 4)
 | 
				
			||||||
 | 
							n = re_flags.glob ? 0 : 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Compile the regular expression */
 | 
				
			||||||
 | 
						re = RE_compile_and_cache(p, re_flags.cflags, PG_GET_COLLATION());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Do the replacement(s) */
 | 
				
			||||||
 | 
						PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, start - 1, n));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* This is separate to keep the opr_sanity regression test from complaining */
 | 
				
			||||||
 | 
					Datum
 | 
				
			||||||
 | 
					textregexreplace_extended_no_n(PG_FUNCTION_ARGS)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return textregexreplace_extended(fcinfo);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* This is separate to keep the opr_sanity regression test from complaining */
 | 
				
			||||||
 | 
					Datum
 | 
				
			||||||
 | 
					textregexreplace_extended_no_flags(PG_FUNCTION_ARGS)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return textregexreplace_extended(fcinfo);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
@@ -958,6 +1045,235 @@ similar_escape(PG_FUNCTION_ARGS)
 | 
				
			|||||||
	PG_RETURN_TEXT_P(result);
 | 
						PG_RETURN_TEXT_P(result);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * regexp_count()
 | 
				
			||||||
 | 
					 *		Return the number of matches of a pattern within a string.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					Datum
 | 
				
			||||||
 | 
					regexp_count(PG_FUNCTION_ARGS)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						text	   *str = PG_GETARG_TEXT_PP(0);
 | 
				
			||||||
 | 
						text	   *pattern = PG_GETARG_TEXT_PP(1);
 | 
				
			||||||
 | 
						int			start = 1;
 | 
				
			||||||
 | 
						text	   *flags = PG_GETARG_TEXT_PP_IF_EXISTS(3);
 | 
				
			||||||
 | 
						pg_re_flags re_flags;
 | 
				
			||||||
 | 
						regexp_matches_ctx *matchctx;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Collect optional parameters */
 | 
				
			||||||
 | 
						if (PG_NARGS() > 2)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							start = PG_GETARG_INT32(2);
 | 
				
			||||||
 | 
							if (start <= 0)
 | 
				
			||||||
 | 
								ereport(ERROR,
 | 
				
			||||||
 | 
										(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 | 
				
			||||||
 | 
										 errmsg("invalid value for parameter \"%s\": %d",
 | 
				
			||||||
 | 
												"start", start)));
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Determine options */
 | 
				
			||||||
 | 
						parse_re_flags(&re_flags, flags);
 | 
				
			||||||
 | 
						/* User mustn't specify 'g' */
 | 
				
			||||||
 | 
						if (re_flags.glob)
 | 
				
			||||||
 | 
							ereport(ERROR,
 | 
				
			||||||
 | 
									(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 | 
				
			||||||
 | 
							/* translator: %s is a SQL function name */
 | 
				
			||||||
 | 
									 errmsg("%s does not support the \"global\" option",
 | 
				
			||||||
 | 
											"regexp_count()")));
 | 
				
			||||||
 | 
						/* But we find all the matches anyway */
 | 
				
			||||||
 | 
						re_flags.glob = true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Do the matching */
 | 
				
			||||||
 | 
						matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1,
 | 
				
			||||||
 | 
														PG_GET_COLLATION(),
 | 
				
			||||||
 | 
														false,	/* can ignore subexprs */
 | 
				
			||||||
 | 
														false, false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						PG_RETURN_INT32(matchctx->nmatches);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* This is separate to keep the opr_sanity regression test from complaining */
 | 
				
			||||||
 | 
					Datum
 | 
				
			||||||
 | 
					regexp_count_no_start(PG_FUNCTION_ARGS)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return regexp_count(fcinfo);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* This is separate to keep the opr_sanity regression test from complaining */
 | 
				
			||||||
 | 
					Datum
 | 
				
			||||||
 | 
					regexp_count_no_flags(PG_FUNCTION_ARGS)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return regexp_count(fcinfo);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * regexp_instr()
 | 
				
			||||||
 | 
					 *		Return the match's position within the string
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					Datum
 | 
				
			||||||
 | 
					regexp_instr(PG_FUNCTION_ARGS)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						text	   *str = PG_GETARG_TEXT_PP(0);
 | 
				
			||||||
 | 
						text	   *pattern = PG_GETARG_TEXT_PP(1);
 | 
				
			||||||
 | 
						int			start = 1;
 | 
				
			||||||
 | 
						int			n = 1;
 | 
				
			||||||
 | 
						int			endoption = 0;
 | 
				
			||||||
 | 
						text	   *flags = PG_GETARG_TEXT_PP_IF_EXISTS(5);
 | 
				
			||||||
 | 
						int			subexpr = 0;
 | 
				
			||||||
 | 
						int			pos;
 | 
				
			||||||
 | 
						pg_re_flags re_flags;
 | 
				
			||||||
 | 
						regexp_matches_ctx *matchctx;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Collect optional parameters */
 | 
				
			||||||
 | 
						if (PG_NARGS() > 2)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							start = PG_GETARG_INT32(2);
 | 
				
			||||||
 | 
							if (start <= 0)
 | 
				
			||||||
 | 
								ereport(ERROR,
 | 
				
			||||||
 | 
										(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 | 
				
			||||||
 | 
										 errmsg("invalid value for parameter \"%s\": %d",
 | 
				
			||||||
 | 
												"start", start)));
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (PG_NARGS() > 3)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							n = PG_GETARG_INT32(3);
 | 
				
			||||||
 | 
							if (n <= 0)
 | 
				
			||||||
 | 
								ereport(ERROR,
 | 
				
			||||||
 | 
										(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 | 
				
			||||||
 | 
										 errmsg("invalid value for parameter \"%s\": %d",
 | 
				
			||||||
 | 
												"n", n)));
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (PG_NARGS() > 4)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							endoption = PG_GETARG_INT32(4);
 | 
				
			||||||
 | 
							if (endoption != 0 && endoption != 1)
 | 
				
			||||||
 | 
								ereport(ERROR,
 | 
				
			||||||
 | 
										(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 | 
				
			||||||
 | 
										 errmsg("invalid value for parameter \"%s\": %d",
 | 
				
			||||||
 | 
												"endoption", endoption)));
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (PG_NARGS() > 6)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							subexpr = PG_GETARG_INT32(6);
 | 
				
			||||||
 | 
							if (subexpr < 0)
 | 
				
			||||||
 | 
								ereport(ERROR,
 | 
				
			||||||
 | 
										(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 | 
				
			||||||
 | 
										 errmsg("invalid value for parameter \"%s\": %d",
 | 
				
			||||||
 | 
												"subexpr", subexpr)));
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Determine options */
 | 
				
			||||||
 | 
						parse_re_flags(&re_flags, flags);
 | 
				
			||||||
 | 
						/* User mustn't specify 'g' */
 | 
				
			||||||
 | 
						if (re_flags.glob)
 | 
				
			||||||
 | 
							ereport(ERROR,
 | 
				
			||||||
 | 
									(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 | 
				
			||||||
 | 
							/* translator: %s is a SQL function name */
 | 
				
			||||||
 | 
									 errmsg("%s does not support the \"global\" option",
 | 
				
			||||||
 | 
											"regexp_instr()")));
 | 
				
			||||||
 | 
						/* But we find all the matches anyway */
 | 
				
			||||||
 | 
						re_flags.glob = true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Do the matching */
 | 
				
			||||||
 | 
						matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1,
 | 
				
			||||||
 | 
														PG_GET_COLLATION(),
 | 
				
			||||||
 | 
														(subexpr > 0),	/* need submatches? */
 | 
				
			||||||
 | 
														false, false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* When n exceeds matches return 0 (includes case of no matches) */
 | 
				
			||||||
 | 
						if (n > matchctx->nmatches)
 | 
				
			||||||
 | 
							PG_RETURN_INT32(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* When subexpr exceeds number of subexpressions return 0 */
 | 
				
			||||||
 | 
						if (subexpr > matchctx->npatterns)
 | 
				
			||||||
 | 
							PG_RETURN_INT32(0);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Select the appropriate match position to return */
 | 
				
			||||||
 | 
						pos = (n - 1) * matchctx->npatterns;
 | 
				
			||||||
 | 
						if (subexpr > 0)
 | 
				
			||||||
 | 
							pos += subexpr - 1;
 | 
				
			||||||
 | 
						pos *= 2;
 | 
				
			||||||
 | 
						if (endoption == 1)
 | 
				
			||||||
 | 
							pos += 1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (matchctx->match_locs[pos] >= 0)
 | 
				
			||||||
 | 
							PG_RETURN_INT32(matchctx->match_locs[pos] + 1);
 | 
				
			||||||
 | 
						else
 | 
				
			||||||
 | 
							PG_RETURN_INT32(0);		/* position not identifiable */
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* This is separate to keep the opr_sanity regression test from complaining */
 | 
				
			||||||
 | 
					Datum
 | 
				
			||||||
 | 
					regexp_instr_no_start(PG_FUNCTION_ARGS)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return regexp_instr(fcinfo);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* This is separate to keep the opr_sanity regression test from complaining */
 | 
				
			||||||
 | 
					Datum
 | 
				
			||||||
 | 
					regexp_instr_no_n(PG_FUNCTION_ARGS)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return regexp_instr(fcinfo);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* This is separate to keep the opr_sanity regression test from complaining */
 | 
				
			||||||
 | 
					Datum
 | 
				
			||||||
 | 
					regexp_instr_no_endoption(PG_FUNCTION_ARGS)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return regexp_instr(fcinfo);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* This is separate to keep the opr_sanity regression test from complaining */
 | 
				
			||||||
 | 
					Datum
 | 
				
			||||||
 | 
					regexp_instr_no_flags(PG_FUNCTION_ARGS)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return regexp_instr(fcinfo);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* This is separate to keep the opr_sanity regression test from complaining */
 | 
				
			||||||
 | 
					Datum
 | 
				
			||||||
 | 
					regexp_instr_no_subexpr(PG_FUNCTION_ARGS)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return regexp_instr(fcinfo);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * regexp_like()
 | 
				
			||||||
 | 
					 *		Test for a pattern match within a string.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					Datum
 | 
				
			||||||
 | 
					regexp_like(PG_FUNCTION_ARGS)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						text	   *str = PG_GETARG_TEXT_PP(0);
 | 
				
			||||||
 | 
						text	   *pattern = PG_GETARG_TEXT_PP(1);
 | 
				
			||||||
 | 
						text	   *flags = PG_GETARG_TEXT_PP_IF_EXISTS(2);
 | 
				
			||||||
 | 
						pg_re_flags re_flags;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Determine options */
 | 
				
			||||||
 | 
						parse_re_flags(&re_flags, flags);
 | 
				
			||||||
 | 
						/* User mustn't specify 'g' */
 | 
				
			||||||
 | 
						if (re_flags.glob)
 | 
				
			||||||
 | 
							ereport(ERROR,
 | 
				
			||||||
 | 
									(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 | 
				
			||||||
 | 
							/* translator: %s is a SQL function name */
 | 
				
			||||||
 | 
									 errmsg("%s does not support the \"global\" option",
 | 
				
			||||||
 | 
											"regexp_like()")));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Otherwise it's like textregexeq/texticregexeq */
 | 
				
			||||||
 | 
						PG_RETURN_BOOL(RE_compile_and_execute(pattern,
 | 
				
			||||||
 | 
															  VARDATA_ANY(str),
 | 
				
			||||||
 | 
															  VARSIZE_ANY_EXHDR(str),
 | 
				
			||||||
 | 
															  re_flags.cflags,
 | 
				
			||||||
 | 
															  PG_GET_COLLATION(),
 | 
				
			||||||
 | 
															  0, NULL));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* This is separate to keep the opr_sanity regression test from complaining */
 | 
				
			||||||
 | 
					Datum
 | 
				
			||||||
 | 
					regexp_like_no_flags(PG_FUNCTION_ARGS)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return regexp_like(fcinfo);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * regexp_match()
 | 
					 * regexp_match()
 | 
				
			||||||
 *		Return the first substring(s) matching a pattern within a string.
 | 
					 *		Return the first substring(s) matching a pattern within a string.
 | 
				
			||||||
@@ -982,7 +1298,7 @@ regexp_match(PG_FUNCTION_ARGS)
 | 
				
			|||||||
						"regexp_match()"),
 | 
											"regexp_match()"),
 | 
				
			||||||
				 errhint("Use the regexp_matches function instead.")));
 | 
									 errhint("Use the regexp_matches function instead.")));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	matchctx = setup_regexp_matches(orig_str, pattern, &re_flags,
 | 
						matchctx = setup_regexp_matches(orig_str, pattern, &re_flags, 0,
 | 
				
			||||||
									PG_GET_COLLATION(), true, false, false);
 | 
														PG_GET_COLLATION(), true, false, false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	if (matchctx->nmatches == 0)
 | 
						if (matchctx->nmatches == 0)
 | 
				
			||||||
@@ -1029,7 +1345,7 @@ regexp_matches(PG_FUNCTION_ARGS)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
		/* be sure to copy the input string into the multi-call ctx */
 | 
							/* be sure to copy the input string into the multi-call ctx */
 | 
				
			||||||
		matchctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
 | 
							matchctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
 | 
				
			||||||
										&re_flags,
 | 
															&re_flags, 0,
 | 
				
			||||||
										PG_GET_COLLATION(),
 | 
															PG_GET_COLLATION(),
 | 
				
			||||||
										true, false, false);
 | 
															true, false, false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -1064,24 +1380,28 @@ regexp_matches_no_flags(PG_FUNCTION_ARGS)
 | 
				
			|||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * setup_regexp_matches --- do the initial matching for regexp_match
 | 
					 * setup_regexp_matches --- do the initial matching for regexp_match,
 | 
				
			||||||
 *		and regexp_split functions
 | 
					 *		regexp_split, and related functions
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * To avoid having to re-find the compiled pattern on each call, we do
 | 
					 * To avoid having to re-find the compiled pattern on each call, we do
 | 
				
			||||||
 * all the matching in one swoop.  The returned regexp_matches_ctx contains
 | 
					 * all the matching in one swoop.  The returned regexp_matches_ctx contains
 | 
				
			||||||
 * the locations of all the substrings matching the pattern.
 | 
					 * the locations of all the substrings matching the pattern.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * The three bool parameters have only two patterns (one for matching, one for
 | 
					 * start_search: the character (not byte) offset in orig_str at which to
 | 
				
			||||||
 * splitting) but it seems clearer to distinguish the functionality this way
 | 
					 * begin the search.  Returned positions are relative to orig_str anyway.
 | 
				
			||||||
 * than to key it all off one "is_split" flag. We don't currently assume that
 | 
					 * use_subpatterns: collect data about matches to parenthesized subexpressions.
 | 
				
			||||||
 * fetching_unmatched is exclusive of fetching the matched text too; if it's
 | 
					 * ignore_degenerate: ignore zero-length matches.
 | 
				
			||||||
 * set, the conversion buffer is large enough to fetch any single matched or
 | 
					 * fetching_unmatched: caller wants to fetch unmatched substrings.
 | 
				
			||||||
 * unmatched string, but not any larger substring. (In practice, when splitting
 | 
					 *
 | 
				
			||||||
 * the matches are usually small anyway, and it didn't seem worth complicating
 | 
					 * We don't currently assume that fetching_unmatched is exclusive of fetching
 | 
				
			||||||
 * the code further.)
 | 
					 * the matched text too; if it's set, the conversion buffer is large enough to
 | 
				
			||||||
 | 
					 * fetch any single matched or unmatched string, but not any larger
 | 
				
			||||||
 | 
					 * substring.  (In practice, when splitting the matches are usually small
 | 
				
			||||||
 | 
					 * anyway, and it didn't seem worth complicating the code further.)
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
static regexp_matches_ctx *
 | 
					static regexp_matches_ctx *
 | 
				
			||||||
setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
 | 
					setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
 | 
				
			||||||
 | 
										 int start_search,
 | 
				
			||||||
					 Oid collation,
 | 
										 Oid collation,
 | 
				
			||||||
					 bool use_subpatterns,
 | 
										 bool use_subpatterns,
 | 
				
			||||||
					 bool ignore_degenerate,
 | 
										 bool ignore_degenerate,
 | 
				
			||||||
@@ -1099,7 +1419,6 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
 | 
				
			|||||||
	int			array_idx;
 | 
						int			array_idx;
 | 
				
			||||||
	int			prev_match_end;
 | 
						int			prev_match_end;
 | 
				
			||||||
	int			prev_valid_match_end;
 | 
						int			prev_valid_match_end;
 | 
				
			||||||
	int			start_search;
 | 
					 | 
				
			||||||
	int			maxlen = 0;		/* largest fetch length in characters */
 | 
						int			maxlen = 0;		/* largest fetch length in characters */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* save original string --- we'll extract result substrings from it */
 | 
						/* save original string --- we'll extract result substrings from it */
 | 
				
			||||||
@@ -1142,7 +1461,6 @@ setup_regexp_matches(text *orig_str, text *pattern, pg_re_flags *re_flags,
 | 
				
			|||||||
	/* search for the pattern, perhaps repeatedly */
 | 
						/* search for the pattern, perhaps repeatedly */
 | 
				
			||||||
	prev_match_end = 0;
 | 
						prev_match_end = 0;
 | 
				
			||||||
	prev_valid_match_end = 0;
 | 
						prev_valid_match_end = 0;
 | 
				
			||||||
	start_search = 0;
 | 
					 | 
				
			||||||
	while (RE_wchar_execute(cpattern, wide_str, wide_len, start_search,
 | 
						while (RE_wchar_execute(cpattern, wide_str, wide_len, start_search,
 | 
				
			||||||
							pmatch_len, pmatch))
 | 
												pmatch_len, pmatch))
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
@@ -1367,7 +1685,7 @@ regexp_split_to_table(PG_FUNCTION_ARGS)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
		/* be sure to copy the input string into the multi-call ctx */
 | 
							/* be sure to copy the input string into the multi-call ctx */
 | 
				
			||||||
		splitctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
 | 
							splitctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
 | 
				
			||||||
										&re_flags,
 | 
															&re_flags, 0,
 | 
				
			||||||
										PG_GET_COLLATION(),
 | 
															PG_GET_COLLATION(),
 | 
				
			||||||
										false, true, true);
 | 
															false, true, true);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -1422,7 +1740,7 @@ regexp_split_to_array(PG_FUNCTION_ARGS)
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	splitctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0),
 | 
						splitctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0),
 | 
				
			||||||
									PG_GETARG_TEXT_PP(1),
 | 
														PG_GETARG_TEXT_PP(1),
 | 
				
			||||||
									&re_flags,
 | 
														&re_flags, 0,
 | 
				
			||||||
									PG_GET_COLLATION(),
 | 
														PG_GET_COLLATION(),
 | 
				
			||||||
									false, true, true);
 | 
														false, true, true);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -1489,6 +1807,125 @@ build_regexp_split_result(regexp_matches_ctx *splitctx)
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * regexp_substr()
 | 
				
			||||||
 | 
					 *		Return the substring that matches a regular expression pattern
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					Datum
 | 
				
			||||||
 | 
					regexp_substr(PG_FUNCTION_ARGS)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						text	   *str = PG_GETARG_TEXT_PP(0);
 | 
				
			||||||
 | 
						text	   *pattern = PG_GETARG_TEXT_PP(1);
 | 
				
			||||||
 | 
						int			start = 1;
 | 
				
			||||||
 | 
						int			n = 1;
 | 
				
			||||||
 | 
						text	   *flags = PG_GETARG_TEXT_PP_IF_EXISTS(4);
 | 
				
			||||||
 | 
						int			subexpr = 0;
 | 
				
			||||||
 | 
						int			so,
 | 
				
			||||||
 | 
									eo,
 | 
				
			||||||
 | 
									pos;
 | 
				
			||||||
 | 
						pg_re_flags re_flags;
 | 
				
			||||||
 | 
						regexp_matches_ctx *matchctx;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Collect optional parameters */
 | 
				
			||||||
 | 
						if (PG_NARGS() > 2)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							start = PG_GETARG_INT32(2);
 | 
				
			||||||
 | 
							if (start <= 0)
 | 
				
			||||||
 | 
								ereport(ERROR,
 | 
				
			||||||
 | 
										(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 | 
				
			||||||
 | 
										 errmsg("invalid value for parameter \"%s\": %d",
 | 
				
			||||||
 | 
												"start", start)));
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (PG_NARGS() > 3)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							n = PG_GETARG_INT32(3);
 | 
				
			||||||
 | 
							if (n <= 0)
 | 
				
			||||||
 | 
								ereport(ERROR,
 | 
				
			||||||
 | 
										(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 | 
				
			||||||
 | 
										 errmsg("invalid value for parameter \"%s\": %d",
 | 
				
			||||||
 | 
												"n", n)));
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
						if (PG_NARGS() > 5)
 | 
				
			||||||
 | 
						{
 | 
				
			||||||
 | 
							subexpr = PG_GETARG_INT32(5);
 | 
				
			||||||
 | 
							if (subexpr < 0)
 | 
				
			||||||
 | 
								ereport(ERROR,
 | 
				
			||||||
 | 
										(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 | 
				
			||||||
 | 
										 errmsg("invalid value for parameter \"%s\": %d",
 | 
				
			||||||
 | 
												"subexpr", subexpr)));
 | 
				
			||||||
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Determine options */
 | 
				
			||||||
 | 
						parse_re_flags(&re_flags, flags);
 | 
				
			||||||
 | 
						/* User mustn't specify 'g' */
 | 
				
			||||||
 | 
						if (re_flags.glob)
 | 
				
			||||||
 | 
							ereport(ERROR,
 | 
				
			||||||
 | 
									(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 | 
				
			||||||
 | 
							/* translator: %s is a SQL function name */
 | 
				
			||||||
 | 
									 errmsg("%s does not support the \"global\" option",
 | 
				
			||||||
 | 
											"regexp_substr()")));
 | 
				
			||||||
 | 
						/* But we find all the matches anyway */
 | 
				
			||||||
 | 
						re_flags.glob = true;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Do the matching */
 | 
				
			||||||
 | 
						matchctx = setup_regexp_matches(str, pattern, &re_flags, start - 1,
 | 
				
			||||||
 | 
														PG_GET_COLLATION(),
 | 
				
			||||||
 | 
														(subexpr > 0),	/* need submatches? */
 | 
				
			||||||
 | 
														false, false);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* When n exceeds matches return NULL (includes case of no matches) */
 | 
				
			||||||
 | 
						if (n > matchctx->nmatches)
 | 
				
			||||||
 | 
							PG_RETURN_NULL();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* When subexpr exceeds number of subexpressions return NULL */
 | 
				
			||||||
 | 
						if (subexpr > matchctx->npatterns)
 | 
				
			||||||
 | 
							PG_RETURN_NULL();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Select the appropriate match position to return */
 | 
				
			||||||
 | 
						pos = (n - 1) * matchctx->npatterns;
 | 
				
			||||||
 | 
						if (subexpr > 0)
 | 
				
			||||||
 | 
							pos += subexpr - 1;
 | 
				
			||||||
 | 
						pos *= 2;
 | 
				
			||||||
 | 
						so = matchctx->match_locs[pos];
 | 
				
			||||||
 | 
						eo = matchctx->match_locs[pos + 1];
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						if (so < 0 || eo < 0)
 | 
				
			||||||
 | 
							PG_RETURN_NULL();		/* unidentifiable location */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						PG_RETURN_DATUM(DirectFunctionCall3(text_substr,
 | 
				
			||||||
 | 
															PointerGetDatum(matchctx->orig_str),
 | 
				
			||||||
 | 
															Int32GetDatum(so + 1),
 | 
				
			||||||
 | 
															Int32GetDatum(eo - so)));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* This is separate to keep the opr_sanity regression test from complaining */
 | 
				
			||||||
 | 
					Datum
 | 
				
			||||||
 | 
					regexp_substr_no_start(PG_FUNCTION_ARGS)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return regexp_substr(fcinfo);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* This is separate to keep the opr_sanity regression test from complaining */
 | 
				
			||||||
 | 
					Datum
 | 
				
			||||||
 | 
					regexp_substr_no_n(PG_FUNCTION_ARGS)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return regexp_substr(fcinfo);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* This is separate to keep the opr_sanity regression test from complaining */
 | 
				
			||||||
 | 
					Datum
 | 
				
			||||||
 | 
					regexp_substr_no_flags(PG_FUNCTION_ARGS)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return regexp_substr(fcinfo);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* This is separate to keep the opr_sanity regression test from complaining */
 | 
				
			||||||
 | 
					Datum
 | 
				
			||||||
 | 
					regexp_substr_no_subexpr(PG_FUNCTION_ARGS)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						return regexp_substr(fcinfo);
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 * regexp_fixed_prefix - extract fixed prefix, if any, for a regexp
 | 
					 * regexp_fixed_prefix - extract fixed prefix, if any, for a regexp
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -4496,23 +4496,28 @@ appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
 | 
				
			|||||||
/*
 | 
					/*
 | 
				
			||||||
 * replace_text_regexp
 | 
					 * replace_text_regexp
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * replace text that matches to regexp in src_text to replace_text.
 | 
					 * replace substring(s) in src_text that match regexp with replace_text.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * search_start: the character (not byte) offset in src_text at which to
 | 
				
			||||||
 | 
					 * begin searching.
 | 
				
			||||||
 | 
					 * n: if 0, replace all matches; if > 0, replace only the N'th match.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * Note: to avoid having to include regex.h in builtins.h, we declare
 | 
					 * Note: to avoid having to include regex.h in builtins.h, we declare
 | 
				
			||||||
 * the regexp argument as void *, but really it's regex_t *.
 | 
					 * the regexp argument as void *, but really it's regex_t *.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
text *
 | 
					text *
 | 
				
			||||||
replace_text_regexp(text *src_text, void *regexp,
 | 
					replace_text_regexp(text *src_text, void *regexp,
 | 
				
			||||||
					text *replace_text, bool glob)
 | 
										text *replace_text,
 | 
				
			||||||
 | 
										int search_start, int n)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
	text	   *ret_text;
 | 
						text	   *ret_text;
 | 
				
			||||||
	regex_t    *re = (regex_t *) regexp;
 | 
						regex_t    *re = (regex_t *) regexp;
 | 
				
			||||||
	int			src_text_len = VARSIZE_ANY_EXHDR(src_text);
 | 
						int			src_text_len = VARSIZE_ANY_EXHDR(src_text);
 | 
				
			||||||
 | 
						int			nmatches = 0;
 | 
				
			||||||
	StringInfoData buf;
 | 
						StringInfoData buf;
 | 
				
			||||||
	regmatch_t	pmatch[REGEXP_REPLACE_BACKREF_CNT];
 | 
						regmatch_t	pmatch[REGEXP_REPLACE_BACKREF_CNT];
 | 
				
			||||||
	pg_wchar   *data;
 | 
						pg_wchar   *data;
 | 
				
			||||||
	size_t		data_len;
 | 
						size_t		data_len;
 | 
				
			||||||
	int			search_start;
 | 
					 | 
				
			||||||
	int			data_pos;
 | 
						int			data_pos;
 | 
				
			||||||
	char	   *start_ptr;
 | 
						char	   *start_ptr;
 | 
				
			||||||
	bool		have_escape;
 | 
						bool		have_escape;
 | 
				
			||||||
@@ -4530,7 +4535,6 @@ replace_text_regexp(text *src_text, void *regexp,
 | 
				
			|||||||
	start_ptr = (char *) VARDATA_ANY(src_text);
 | 
						start_ptr = (char *) VARDATA_ANY(src_text);
 | 
				
			||||||
	data_pos = 0;
 | 
						data_pos = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	search_start = 0;
 | 
					 | 
				
			||||||
	while (search_start <= data_len)
 | 
						while (search_start <= data_len)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		int			regexec_result;
 | 
							int			regexec_result;
 | 
				
			||||||
@@ -4560,6 +4564,23 @@ replace_text_regexp(text *src_text, void *regexp,
 | 
				
			|||||||
					 errmsg("regular expression failed: %s", errMsg)));
 | 
										 errmsg("regular expression failed: %s", errMsg)));
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/*
 | 
				
			||||||
 | 
							 * Count matches, and decide whether to replace this match.
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							nmatches++;
 | 
				
			||||||
 | 
							if (n > 0 && nmatches != n)
 | 
				
			||||||
 | 
							{
 | 
				
			||||||
 | 
								/*
 | 
				
			||||||
 | 
								 * No, so advance search_start, but not start_ptr/data_pos. (Thus,
 | 
				
			||||||
 | 
								 * we treat the matched text as if it weren't matched, and copy it
 | 
				
			||||||
 | 
								 * to the output later.)
 | 
				
			||||||
 | 
								 */
 | 
				
			||||||
 | 
								search_start = pmatch[0].rm_eo;
 | 
				
			||||||
 | 
								if (pmatch[0].rm_so == pmatch[0].rm_eo)
 | 
				
			||||||
 | 
									search_start++;
 | 
				
			||||||
 | 
								continue;
 | 
				
			||||||
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * Copy the text to the left of the match position.  Note we are given
 | 
							 * Copy the text to the left of the match position.  Note we are given
 | 
				
			||||||
		 * character not byte indexes.
 | 
							 * character not byte indexes.
 | 
				
			||||||
@@ -4596,9 +4617,9 @@ replace_text_regexp(text *src_text, void *regexp,
 | 
				
			|||||||
		data_pos = pmatch[0].rm_eo;
 | 
							data_pos = pmatch[0].rm_eo;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * When global option is off, replace the first instance only.
 | 
							 * If we only want to replace one occurrence, we're done.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		if (!glob)
 | 
							if (n > 0)
 | 
				
			||||||
			break;
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -53,6 +53,6 @@
 | 
				
			|||||||
 */
 | 
					 */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*							yyyymmddN */
 | 
					/*							yyyymmddN */
 | 
				
			||||||
#define CATALOG_VERSION_NO	202107261
 | 
					#define CATALOG_VERSION_NO	202108031
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -3565,6 +3565,18 @@
 | 
				
			|||||||
{ oid => '2285', descr => 'replace text using regexp',
 | 
					{ oid => '2285', descr => 'replace text using regexp',
 | 
				
			||||||
  proname => 'regexp_replace', prorettype => 'text',
 | 
					  proname => 'regexp_replace', prorettype => 'text',
 | 
				
			||||||
  proargtypes => 'text text text text', prosrc => 'textregexreplace' },
 | 
					  proargtypes => 'text text text text', prosrc => 'textregexreplace' },
 | 
				
			||||||
 | 
					{ oid => '9611', descr => 'replace text using regexp',
 | 
				
			||||||
 | 
					  proname => 'regexp_replace', prorettype => 'text',
 | 
				
			||||||
 | 
					  proargtypes => 'text text text int4 int4 text',
 | 
				
			||||||
 | 
					  prosrc => 'textregexreplace_extended' },
 | 
				
			||||||
 | 
					{ oid => '9612', descr => 'replace text using regexp',
 | 
				
			||||||
 | 
					  proname => 'regexp_replace', prorettype => 'text',
 | 
				
			||||||
 | 
					  proargtypes => 'text text text int4 int4',
 | 
				
			||||||
 | 
					  prosrc => 'textregexreplace_extended_no_flags' },
 | 
				
			||||||
 | 
					{ oid => '9613', descr => 'replace text using regexp',
 | 
				
			||||||
 | 
					  proname => 'regexp_replace', prorettype => 'text',
 | 
				
			||||||
 | 
					  proargtypes => 'text text text int4',
 | 
				
			||||||
 | 
					  prosrc => 'textregexreplace_extended_no_n' },
 | 
				
			||||||
{ oid => '3396', descr => 'find first match for regexp',
 | 
					{ oid => '3396', descr => 'find first match for regexp',
 | 
				
			||||||
  proname => 'regexp_match', prorettype => '_text', proargtypes => 'text text',
 | 
					  proname => 'regexp_match', prorettype => '_text', proargtypes => 'text text',
 | 
				
			||||||
  prosrc => 'regexp_match_no_flags' },
 | 
					  prosrc => 'regexp_match_no_flags' },
 | 
				
			||||||
@@ -3579,6 +3591,58 @@
 | 
				
			|||||||
  proname => 'regexp_matches', prorows => '10', proretset => 't',
 | 
					  proname => 'regexp_matches', prorows => '10', proretset => 't',
 | 
				
			||||||
  prorettype => '_text', proargtypes => 'text text text',
 | 
					  prorettype => '_text', proargtypes => 'text text text',
 | 
				
			||||||
  prosrc => 'regexp_matches' },
 | 
					  prosrc => 'regexp_matches' },
 | 
				
			||||||
 | 
					{ oid => '9614', descr => 'count regexp matches',
 | 
				
			||||||
 | 
					  proname => 'regexp_count', prorettype => 'int4', proargtypes => 'text text',
 | 
				
			||||||
 | 
					  prosrc => 'regexp_count_no_start' },
 | 
				
			||||||
 | 
					{ oid => '9615', descr => 'count regexp matches',
 | 
				
			||||||
 | 
					  proname => 'regexp_count', prorettype => 'int4',
 | 
				
			||||||
 | 
					  proargtypes => 'text text int4', prosrc => 'regexp_count_no_flags' },
 | 
				
			||||||
 | 
					{ oid => '9616', descr => 'count regexp matches',
 | 
				
			||||||
 | 
					  proname => 'regexp_count', prorettype => 'int4',
 | 
				
			||||||
 | 
					  proargtypes => 'text text int4 text', prosrc => 'regexp_count' },
 | 
				
			||||||
 | 
					{ oid => '9617', descr => 'position of regexp match',
 | 
				
			||||||
 | 
					  proname => 'regexp_instr', prorettype => 'int4', proargtypes => 'text text',
 | 
				
			||||||
 | 
					  prosrc => 'regexp_instr_no_start' },
 | 
				
			||||||
 | 
					{ oid => '9618', descr => 'position of regexp match',
 | 
				
			||||||
 | 
					  proname => 'regexp_instr', prorettype => 'int4',
 | 
				
			||||||
 | 
					  proargtypes => 'text text int4', prosrc => 'regexp_instr_no_n' },
 | 
				
			||||||
 | 
					{ oid => '9619', descr => 'position of regexp match',
 | 
				
			||||||
 | 
					  proname => 'regexp_instr', prorettype => 'int4',
 | 
				
			||||||
 | 
					  proargtypes => 'text text int4 int4', prosrc => 'regexp_instr_no_endoption' },
 | 
				
			||||||
 | 
					{ oid => '9620', descr => 'position of regexp match',
 | 
				
			||||||
 | 
					  proname => 'regexp_instr', prorettype => 'int4',
 | 
				
			||||||
 | 
					  proargtypes => 'text text int4 int4 int4',
 | 
				
			||||||
 | 
					  prosrc => 'regexp_instr_no_flags' },
 | 
				
			||||||
 | 
					{ oid => '9621', descr => 'position of regexp match',
 | 
				
			||||||
 | 
					  proname => 'regexp_instr', prorettype => 'int4',
 | 
				
			||||||
 | 
					  proargtypes => 'text text int4 int4 int4 text',
 | 
				
			||||||
 | 
					  prosrc => 'regexp_instr_no_subexpr' },
 | 
				
			||||||
 | 
					{ oid => '9622', descr => 'position of regexp match',
 | 
				
			||||||
 | 
					  proname => 'regexp_instr', prorettype => 'int4',
 | 
				
			||||||
 | 
					  proargtypes => 'text text int4 int4 int4 text int4',
 | 
				
			||||||
 | 
					  prosrc => 'regexp_instr' },
 | 
				
			||||||
 | 
					{ oid => '9623', descr => 'test for regexp match',
 | 
				
			||||||
 | 
					  proname => 'regexp_like', prorettype => 'bool', proargtypes => 'text text',
 | 
				
			||||||
 | 
					  prosrc => 'regexp_like_no_flags' },
 | 
				
			||||||
 | 
					{ oid => '9624', descr => 'test for regexp match',
 | 
				
			||||||
 | 
					  proname => 'regexp_like', prorettype => 'bool',
 | 
				
			||||||
 | 
					  proargtypes => 'text text text', prosrc => 'regexp_like' },
 | 
				
			||||||
 | 
					{ oid => '9625', descr => 'extract substring that matches regexp',
 | 
				
			||||||
 | 
					  proname => 'regexp_substr', prorettype => 'text', proargtypes => 'text text',
 | 
				
			||||||
 | 
					  prosrc => 'regexp_substr_no_start' },
 | 
				
			||||||
 | 
					{ oid => '9626', descr => 'extract substring that matches regexp',
 | 
				
			||||||
 | 
					  proname => 'regexp_substr', prorettype => 'text',
 | 
				
			||||||
 | 
					  proargtypes => 'text text int4', prosrc => 'regexp_substr_no_n' },
 | 
				
			||||||
 | 
					{ oid => '9627', descr => 'extract substring that matches regexp',
 | 
				
			||||||
 | 
					  proname => 'regexp_substr', prorettype => 'text',
 | 
				
			||||||
 | 
					  proargtypes => 'text text int4 int4', prosrc => 'regexp_substr_no_flags' },
 | 
				
			||||||
 | 
					{ oid => '9628', descr => 'extract substring that matches regexp',
 | 
				
			||||||
 | 
					  proname => 'regexp_substr', prorettype => 'text',
 | 
				
			||||||
 | 
					  proargtypes => 'text text int4 int4 text',
 | 
				
			||||||
 | 
					  prosrc => 'regexp_substr_no_subexpr' },
 | 
				
			||||||
 | 
					{ oid => '9629', descr => 'extract substring that matches regexp',
 | 
				
			||||||
 | 
					  proname => 'regexp_substr', prorettype => 'text',
 | 
				
			||||||
 | 
					  proargtypes => 'text text int4 int4 text int4', prosrc => 'regexp_substr' },
 | 
				
			||||||
{ oid => '2088', descr => 'split string by field_sep and return field_num',
 | 
					{ oid => '2088', descr => 'split string by field_sep and return field_num',
 | 
				
			||||||
  proname => 'split_part', prorettype => 'text',
 | 
					  proname => 'split_part', prorettype => 'text',
 | 
				
			||||||
  proargtypes => 'text text int4', prosrc => 'split_part' },
 | 
					  proargtypes => 'text text int4', prosrc => 'split_part' },
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -34,6 +34,7 @@ extern bool SplitDirectoriesString(char *rawstring, char separator,
 | 
				
			|||||||
extern bool SplitGUCList(char *rawstring, char separator,
 | 
					extern bool SplitGUCList(char *rawstring, char separator,
 | 
				
			||||||
						 List **namelist);
 | 
											 List **namelist);
 | 
				
			||||||
extern text *replace_text_regexp(text *src_text, void *regexp,
 | 
					extern text *replace_text_regexp(text *src_text, void *regexp,
 | 
				
			||||||
								 text *replace_text, bool glob);
 | 
													 text *replace_text,
 | 
				
			||||||
 | 
													 int search_start, int n);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#endif
 | 
					#endif
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -515,6 +515,13 @@ SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
 | 
				
			|||||||
 cde
 | 
					 cde
 | 
				
			||||||
(1 row)
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- Check case where we have a match, but not a subexpression match
 | 
				
			||||||
 | 
					SELECT SUBSTRING('foo' FROM 'foo(bar)?') IS NULL AS t;
 | 
				
			||||||
 | 
					 t 
 | 
				
			||||||
 | 
					---
 | 
				
			||||||
 | 
					 t
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- Check behavior of SIMILAR TO, which uses largely the same regexp variant
 | 
					-- Check behavior of SIMILAR TO, which uses largely the same regexp variant
 | 
				
			||||||
SELECT 'abcdefg' SIMILAR TO '_bcd%' AS true;
 | 
					SELECT 'abcdefg' SIMILAR TO '_bcd%' AS true;
 | 
				
			||||||
 true 
 | 
					 true 
 | 
				
			||||||
@@ -592,6 +599,370 @@ SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
 | 
				
			|||||||
-- invalid regexp option
 | 
					-- invalid regexp option
 | 
				
			||||||
SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
 | 
					SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
 | 
				
			||||||
ERROR:  invalid regular expression option: "z"
 | 
					ERROR:  invalid regular expression option: "z"
 | 
				
			||||||
 | 
					-- extended regexp_replace tests
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'A|e|i|o|u', 'X', 1);
 | 
				
			||||||
 | 
					    regexp_replace     
 | 
				
			||||||
 | 
					-----------------------
 | 
				
			||||||
 | 
					 X PostgreSQL function
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'A|e|i|o|u', 'X', 1, 2);
 | 
				
			||||||
 | 
					    regexp_replace     
 | 
				
			||||||
 | 
					-----------------------
 | 
				
			||||||
 | 
					 A PXstgreSQL function
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 0, 'i');
 | 
				
			||||||
 | 
					    regexp_replace     
 | 
				
			||||||
 | 
					-----------------------
 | 
				
			||||||
 | 
					 X PXstgrXSQL fXnctXXn
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 1, 'i');
 | 
				
			||||||
 | 
					    regexp_replace     
 | 
				
			||||||
 | 
					-----------------------
 | 
				
			||||||
 | 
					 X PostgreSQL function
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 2, 'i');
 | 
				
			||||||
 | 
					    regexp_replace     
 | 
				
			||||||
 | 
					-----------------------
 | 
				
			||||||
 | 
					 A PXstgreSQL function
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 3, 'i');
 | 
				
			||||||
 | 
					    regexp_replace     
 | 
				
			||||||
 | 
					-----------------------
 | 
				
			||||||
 | 
					 A PostgrXSQL function
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 9, 'i');
 | 
				
			||||||
 | 
					    regexp_replace     
 | 
				
			||||||
 | 
					-----------------------
 | 
				
			||||||
 | 
					 A PostgreSQL function
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'A|e|i|o|u', 'X', 7, 0, 'i');
 | 
				
			||||||
 | 
					    regexp_replace     
 | 
				
			||||||
 | 
					-----------------------
 | 
				
			||||||
 | 
					 A PostgrXSQL fXnctXXn
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- 'g' flag should be ignored when N is specified
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 1, 'g');
 | 
				
			||||||
 | 
					    regexp_replace     
 | 
				
			||||||
 | 
					-----------------------
 | 
				
			||||||
 | 
					 A PXstgreSQL function
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- errors
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', -1, 0, 'i');
 | 
				
			||||||
 | 
					ERROR:  invalid value for parameter "start": -1
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, -1, 'i');
 | 
				
			||||||
 | 
					ERROR:  invalid value for parameter "n": -1
 | 
				
			||||||
 | 
					-- erroneous invocation of non-extended form
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', '1');
 | 
				
			||||||
 | 
					ERROR:  invalid regular expression option: "1"
 | 
				
			||||||
 | 
					HINT:  If you meant to use regexp_replace() with a start parameter, cast the fourth argument to integer explicitly.
 | 
				
			||||||
 | 
					--  regexp_count tests
 | 
				
			||||||
 | 
					SELECT regexp_count('123123123123123', '(12)3');
 | 
				
			||||||
 | 
					 regexp_count 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            5
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_count('123123123123', '123', 1);
 | 
				
			||||||
 | 
					 regexp_count 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            4
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_count('123123123123', '123', 3);
 | 
				
			||||||
 | 
					 regexp_count 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            3
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_count('123123123123', '123', 33);
 | 
				
			||||||
 | 
					 regexp_count 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            0
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_count('ABCABCABCABC', 'Abc', 1, '');
 | 
				
			||||||
 | 
					 regexp_count 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            0
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_count('ABCABCABCABC', 'Abc', 1, 'i');
 | 
				
			||||||
 | 
					 regexp_count 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            4
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- errors
 | 
				
			||||||
 | 
					SELECT regexp_count('123123123123', '123', 0);
 | 
				
			||||||
 | 
					ERROR:  invalid value for parameter "start": 0
 | 
				
			||||||
 | 
					SELECT regexp_count('123123123123', '123', -3);
 | 
				
			||||||
 | 
					ERROR:  invalid value for parameter "start": -3
 | 
				
			||||||
 | 
					-- regexp_like tests
 | 
				
			||||||
 | 
					SELECT regexp_like('Steven', '^Ste(v|ph)en$');
 | 
				
			||||||
 | 
					 regexp_like 
 | 
				
			||||||
 | 
					-------------
 | 
				
			||||||
 | 
					 t
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_like('a'||CHR(10)||'d', 'a.d', 'n');
 | 
				
			||||||
 | 
					 regexp_like 
 | 
				
			||||||
 | 
					-------------
 | 
				
			||||||
 | 
					 f
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_like('a'||CHR(10)||'d', 'a.d', 's');
 | 
				
			||||||
 | 
					 regexp_like 
 | 
				
			||||||
 | 
					-------------
 | 
				
			||||||
 | 
					 t
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_like('abc', ' a . c ', 'x');
 | 
				
			||||||
 | 
					 regexp_like 
 | 
				
			||||||
 | 
					-------------
 | 
				
			||||||
 | 
					 t
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_like('abc', 'a.c', 'g');  -- error
 | 
				
			||||||
 | 
					ERROR:  regexp_like() does not support the "global" option
 | 
				
			||||||
 | 
					-- regexp_instr tests
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcdefghi', 'd.f');
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            4
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcdefghi', 'd.q');
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            0
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c');
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            1
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c', 2);
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            4
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c', 1, 3);
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            7
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c', 1, 4);
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            0
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'A.C', 1, 2, 0, 'i');
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            4
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 0);
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            1
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 1);
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            1
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 2);
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            4
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 3);
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            5
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 4);
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            7
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 5);
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            0
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 0);
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            9
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 1);
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            4
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 2);
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            9
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 3);
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            7
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 4);
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            9
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 5);
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            0
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- Check case where we have a match, but not a subexpression match
 | 
				
			||||||
 | 
					SELECT regexp_instr('foo', 'foo(bar)?', 1, 1, 0, '', 1);
 | 
				
			||||||
 | 
					 regexp_instr 
 | 
				
			||||||
 | 
					--------------
 | 
				
			||||||
 | 
					            0
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- errors
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c', 0, 1);
 | 
				
			||||||
 | 
					ERROR:  invalid value for parameter "start": 0
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c', 1, 0);
 | 
				
			||||||
 | 
					ERROR:  invalid value for parameter "n": 0
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, -1);
 | 
				
			||||||
 | 
					ERROR:  invalid value for parameter "endoption": -1
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, 2);
 | 
				
			||||||
 | 
					ERROR:  invalid value for parameter "endoption": 2
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, 0, 'g');
 | 
				
			||||||
 | 
					ERROR:  regexp_instr() does not support the "global" option
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, 0, '', -1);
 | 
				
			||||||
 | 
					ERROR:  invalid value for parameter "subexpr": -1
 | 
				
			||||||
 | 
					-- regexp_substr tests
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcdefghi', 'd.f');
 | 
				
			||||||
 | 
					 regexp_substr 
 | 
				
			||||||
 | 
					---------------
 | 
				
			||||||
 | 
					 def
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcdefghi', 'd.q') IS NULL AS t;
 | 
				
			||||||
 | 
					 t 
 | 
				
			||||||
 | 
					---
 | 
				
			||||||
 | 
					 t
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcabcabc', 'a.c');
 | 
				
			||||||
 | 
					 regexp_substr 
 | 
				
			||||||
 | 
					---------------
 | 
				
			||||||
 | 
					 abc
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcabcabc', 'a.c', 2);
 | 
				
			||||||
 | 
					 regexp_substr 
 | 
				
			||||||
 | 
					---------------
 | 
				
			||||||
 | 
					 abc
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcabcabc', 'a.c', 1, 3);
 | 
				
			||||||
 | 
					 regexp_substr 
 | 
				
			||||||
 | 
					---------------
 | 
				
			||||||
 | 
					 abc
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcabcabc', 'a.c', 1, 4) IS NULL AS t;
 | 
				
			||||||
 | 
					 t 
 | 
				
			||||||
 | 
					---
 | 
				
			||||||
 | 
					 t
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcabcabc', 'A.C', 1, 2, 'i');
 | 
				
			||||||
 | 
					 regexp_substr 
 | 
				
			||||||
 | 
					---------------
 | 
				
			||||||
 | 
					 abc
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 0);
 | 
				
			||||||
 | 
					 regexp_substr 
 | 
				
			||||||
 | 
					---------------
 | 
				
			||||||
 | 
					 12345678
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 1);
 | 
				
			||||||
 | 
					 regexp_substr 
 | 
				
			||||||
 | 
					---------------
 | 
				
			||||||
 | 
					 123
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 2);
 | 
				
			||||||
 | 
					 regexp_substr 
 | 
				
			||||||
 | 
					---------------
 | 
				
			||||||
 | 
					 45678
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 3);
 | 
				
			||||||
 | 
					 regexp_substr 
 | 
				
			||||||
 | 
					---------------
 | 
				
			||||||
 | 
					 56
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 4);
 | 
				
			||||||
 | 
					 regexp_substr 
 | 
				
			||||||
 | 
					---------------
 | 
				
			||||||
 | 
					 78
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 5) IS NULL AS t;
 | 
				
			||||||
 | 
					 t 
 | 
				
			||||||
 | 
					---
 | 
				
			||||||
 | 
					 t
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- Check case where we have a match, but not a subexpression match
 | 
				
			||||||
 | 
					SELECT regexp_substr('foo', 'foo(bar)?', 1, 1, '', 1) IS NULL AS t;
 | 
				
			||||||
 | 
					 t 
 | 
				
			||||||
 | 
					---
 | 
				
			||||||
 | 
					 t
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- errors
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcabcabc', 'a.c', 0, 1);
 | 
				
			||||||
 | 
					ERROR:  invalid value for parameter "start": 0
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcabcabc', 'a.c', 1, 0);
 | 
				
			||||||
 | 
					ERROR:  invalid value for parameter "n": 0
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcabcabc', 'a.c', 1, 1, 'g');
 | 
				
			||||||
 | 
					ERROR:  regexp_substr() does not support the "global" option
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcabcabc', 'a.c', 1, 1, '', -1);
 | 
				
			||||||
 | 
					ERROR:  invalid value for parameter "subexpr": -1
 | 
				
			||||||
-- set so we can tell NULL from empty string
 | 
					-- set so we can tell NULL from empty string
 | 
				
			||||||
\pset null '\\N'
 | 
					\pset null '\\N'
 | 
				
			||||||
-- return all matches from regexp
 | 
					-- return all matches from regexp
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -171,6 +171,8 @@ SELECT SUBSTRING('abcdefg' FROM 'c.e') AS "cde";
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
-- With a parenthesized subexpression, return only what matches the subexpr
 | 
					-- With a parenthesized subexpression, return only what matches the subexpr
 | 
				
			||||||
SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
 | 
					SELECT SUBSTRING('abcdefg' FROM 'b(.*)f') AS "cde";
 | 
				
			||||||
 | 
					-- Check case where we have a match, but not a subexpression match
 | 
				
			||||||
 | 
					SELECT SUBSTRING('foo' FROM 'foo(bar)?') IS NULL AS t;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- Check behavior of SIMILAR TO, which uses largely the same regexp variant
 | 
					-- Check behavior of SIMILAR TO, which uses largely the same regexp variant
 | 
				
			||||||
SELECT 'abcdefg' SIMILAR TO '_bcd%' AS true;
 | 
					SELECT 'abcdefg' SIMILAR TO '_bcd%' AS true;
 | 
				
			||||||
@@ -193,6 +195,93 @@ SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi');
 | 
				
			|||||||
-- invalid regexp option
 | 
					-- invalid regexp option
 | 
				
			||||||
SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
 | 
					SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- extended regexp_replace tests
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'A|e|i|o|u', 'X', 1);
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'A|e|i|o|u', 'X', 1, 2);
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 0, 'i');
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 1, 'i');
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 2, 'i');
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 3, 'i');
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 9, 'i');
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'A|e|i|o|u', 'X', 7, 0, 'i');
 | 
				
			||||||
 | 
					-- 'g' flag should be ignored when N is specified
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, 1, 'g');
 | 
				
			||||||
 | 
					-- errors
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', -1, 0, 'i');
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', 1, -1, 'i');
 | 
				
			||||||
 | 
					-- erroneous invocation of non-extended form
 | 
				
			||||||
 | 
					SELECT regexp_replace('A PostgreSQL function', 'a|e|i|o|u', 'X', '1');
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					--  regexp_count tests
 | 
				
			||||||
 | 
					SELECT regexp_count('123123123123123', '(12)3');
 | 
				
			||||||
 | 
					SELECT regexp_count('123123123123', '123', 1);
 | 
				
			||||||
 | 
					SELECT regexp_count('123123123123', '123', 3);
 | 
				
			||||||
 | 
					SELECT regexp_count('123123123123', '123', 33);
 | 
				
			||||||
 | 
					SELECT regexp_count('ABCABCABCABC', 'Abc', 1, '');
 | 
				
			||||||
 | 
					SELECT regexp_count('ABCABCABCABC', 'Abc', 1, 'i');
 | 
				
			||||||
 | 
					-- errors
 | 
				
			||||||
 | 
					SELECT regexp_count('123123123123', '123', 0);
 | 
				
			||||||
 | 
					SELECT regexp_count('123123123123', '123', -3);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- regexp_like tests
 | 
				
			||||||
 | 
					SELECT regexp_like('Steven', '^Ste(v|ph)en$');
 | 
				
			||||||
 | 
					SELECT regexp_like('a'||CHR(10)||'d', 'a.d', 'n');
 | 
				
			||||||
 | 
					SELECT regexp_like('a'||CHR(10)||'d', 'a.d', 's');
 | 
				
			||||||
 | 
					SELECT regexp_like('abc', ' a . c ', 'x');
 | 
				
			||||||
 | 
					SELECT regexp_like('abc', 'a.c', 'g');  -- error
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- regexp_instr tests
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcdefghi', 'd.f');
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcdefghi', 'd.q');
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c');
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c', 2);
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c', 1, 3);
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c', 1, 4);
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'A.C', 1, 2, 0, 'i');
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 0);
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 1);
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 2);
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 3);
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 4);
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 0, 'i', 5);
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 0);
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 1);
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 2);
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 3);
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 4);
 | 
				
			||||||
 | 
					SELECT regexp_instr('1234567890', '(123)(4(56)(78))', 1, 1, 1, 'i', 5);
 | 
				
			||||||
 | 
					-- Check case where we have a match, but not a subexpression match
 | 
				
			||||||
 | 
					SELECT regexp_instr('foo', 'foo(bar)?', 1, 1, 0, '', 1);
 | 
				
			||||||
 | 
					-- errors
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c', 0, 1);
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c', 1, 0);
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, -1);
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, 2);
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, 0, 'g');
 | 
				
			||||||
 | 
					SELECT regexp_instr('abcabcabc', 'a.c', 1, 1, 0, '', -1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					-- regexp_substr tests
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcdefghi', 'd.f');
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcdefghi', 'd.q') IS NULL AS t;
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcabcabc', 'a.c');
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcabcabc', 'a.c', 2);
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcabcabc', 'a.c', 1, 3);
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcabcabc', 'a.c', 1, 4) IS NULL AS t;
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcabcabc', 'A.C', 1, 2, 'i');
 | 
				
			||||||
 | 
					SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 0);
 | 
				
			||||||
 | 
					SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 1);
 | 
				
			||||||
 | 
					SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 2);
 | 
				
			||||||
 | 
					SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 3);
 | 
				
			||||||
 | 
					SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 4);
 | 
				
			||||||
 | 
					SELECT regexp_substr('1234567890', '(123)(4(56)(78))', 1, 1, 'i', 5) IS NULL AS t;
 | 
				
			||||||
 | 
					-- Check case where we have a match, but not a subexpression match
 | 
				
			||||||
 | 
					SELECT regexp_substr('foo', 'foo(bar)?', 1, 1, '', 1) IS NULL AS t;
 | 
				
			||||||
 | 
					-- errors
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcabcabc', 'a.c', 0, 1);
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcabcabc', 'a.c', 1, 0);
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcabcabc', 'a.c', 1, 1, 'g');
 | 
				
			||||||
 | 
					SELECT regexp_substr('abcabcabc', 'a.c', 1, 1, '', -1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
-- set so we can tell NULL from empty string
 | 
					-- set so we can tell NULL from empty string
 | 
				
			||||||
\pset null '\\N'
 | 
					\pset null '\\N'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user