mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Add three new regexp functions: regexp_matches, regexp_split_to_array,
and regexp_split_to_table. These functions provide access to the capture groups resulting from a POSIX regular expression match, and provide the ability to split a string on a POSIX regular expression, respectively. Patch from Jeremy Drake; code review by Neil Conway, additional comments and suggestions from Tom and Peter E. This patch bumps the catversion, adds some regression tests, and updates the docs.
This commit is contained in:
		| @@ -1,4 +1,4 @@ | ||||
| <!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.369 2007/02/20 19:59:04 momjian Exp $ --> | ||||
| <!-- $PostgreSQL: pgsql/doc/src/sgml/func.sgml,v 1.370 2007/03/20 05:44:59 neilc Exp $ --> | ||||
|  | ||||
|  <chapter id="functions"> | ||||
|   <title>Functions and Operators</title> | ||||
| @@ -1468,17 +1468,52 @@ | ||||
|       </row> | ||||
|  | ||||
|       <row> | ||||
|        <entry><literal><function>regexp_replace</function>(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>, <parameter>replacement</parameter> <type>text</type> [,<parameter>flags</parameter> <type>text</type>])</literal></entry> | ||||
|        <entry><literal><function>regexp_matches</function>(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type>])</literal></entry> | ||||
|        <entry><type>setof text[]</type></entry> | ||||
|        <entry> | ||||
|         Return all capture groups resulting from matching POSIX regular | ||||
|         expression against the <parameter>string</parameter>. See | ||||
|         <xref linkend="functions-posix-regexp"> for more information. | ||||
|        </entry> | ||||
|        <entry><literal>regexp_matches('foobarbequebaz', '(bar)(beque)')</literal></entry> | ||||
|        <entry><literal>{bar,beque}</literal></entry> | ||||
|       </row> | ||||
|  | ||||
|       <row> | ||||
|        <entry><literal><function>regexp_replace</function>(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type>, <parameter>replacement</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type>])</literal></entry> | ||||
|        <entry><type>text</type></entry> | ||||
|        <entry> | ||||
|         Replace substring matching POSIX regular expression. See | ||||
|         <xref linkend="functions-matching"> for more information on pattern | ||||
|         matching. | ||||
|         <xref linkend="functions-posix-regexp"> for more information. | ||||
|        </entry> | ||||
|        <entry><literal>regexp_replace('Thomas', '.[mN]a.', 'M')</literal></entry> | ||||
|        <entry><literal>ThM</literal></entry> | ||||
|       </row> | ||||
|  | ||||
|       <row> | ||||
|        <entry><literal><function>regexp_split_to_array</function>(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type> ])</literal></entry> | ||||
|        <entry><type>text[]</type></entry> | ||||
|        <entry> | ||||
|         Split <parameter>string</parameter> using POSIX regular expression as | ||||
|         the delimiter.  See <xref linkend="functions-posix-regexp"> for more | ||||
|         information. | ||||
|        </entry> | ||||
|        <entry><literal>regexp_split_to_array('hello world', E'\\s+')</literal></entry> | ||||
|        <entry><literal>{hello,world}</literal></entry> | ||||
|       </row> | ||||
|  | ||||
|       <row> | ||||
|        <entry><literal><function>regexp_split_to_table</function>(<parameter>string</parameter> <type>text</type>, <parameter>pattern</parameter> <type>text</type> [, <parameter>flags</parameter> <type>text</type>])</literal></entry> | ||||
|        <entry><type>setof text</type></entry> | ||||
|        <entry> | ||||
|         Split <parameter>string</parameter> using POSIX regular expression as | ||||
|         the delimiter.  See <xref linkend="functions-posix-regexp"> for more | ||||
|         information. | ||||
|        </entry> | ||||
|        <entry><literal>regexp_split_to_table('hello world', E'\\s+')</literal></entry> | ||||
|        <entry><literal>hello</literal><para><literal>world</literal></para> (2 rows)</entry> | ||||
|       </row> | ||||
|  | ||||
|       <row> | ||||
|        <entry><literal><function>repeat</function>(<parameter>string</parameter> <type>text</type>, <parameter>number</parameter> <type>int</type>)</literal></entry> | ||||
|        <entry><type>text</type></entry> | ||||
| @@ -2883,9 +2918,6 @@ cast(-44 as bit(12))           <lineannotation>111111010100</lineannotation> | ||||
|    <indexterm> | ||||
|     <primary>substring</primary> | ||||
|    </indexterm> | ||||
|    <indexterm> | ||||
|     <primary>regexp_replace</primary> | ||||
|    </indexterm> | ||||
|  | ||||
| <synopsis> | ||||
| <replaceable>string</replaceable> SIMILAR TO <replaceable>pattern</replaceable> <optional>ESCAPE <replaceable>escape-character</replaceable></optional> | ||||
| @@ -3004,6 +3036,21 @@ substring('foobar' from '#"o_b#"%' for '#')    <lineannotation>NULL</lineannotat | ||||
|     <primary>regular expression</primary> | ||||
|     <seealso>pattern matching</seealso> | ||||
|    </indexterm> | ||||
|    <indexterm> | ||||
|     <primary>substring</primary> | ||||
|    </indexterm> | ||||
|    <indexterm> | ||||
|     <primary>regexp_replace</primary> | ||||
|    </indexterm> | ||||
|    <indexterm> | ||||
|     <primary>regexp_matches</primary> | ||||
|    </indexterm> | ||||
|    <indexterm> | ||||
|     <primary>regexp_split_to_table</primary> | ||||
|    </indexterm> | ||||
|    <indexterm> | ||||
|     <primary>regexp_split_to_array</primary> | ||||
|    </indexterm> | ||||
|  | ||||
|    <para> | ||||
|     <xref linkend="functions-posix-table"> lists the available | ||||
| @@ -3134,7 +3181,10 @@ substring('foobar' from 'o(.)b')   <lineannotation>o</lineannotation> | ||||
|      string containing zero or more single-letter flags that change the | ||||
|      function's behavior.  Flag <literal>i</> specifies case-insensitive | ||||
|      matching, while flag <literal>g</> specifies replacement of each matching | ||||
|      substring rather than only the first one. | ||||
|      substring rather than only the first one.  Other supported flags are | ||||
|      <literal>m</>, <literal>n</>, <literal>p</>, <literal>w</> and | ||||
|      <literal>x</>, whose meanings correspond to those shown in | ||||
|      <xref linkend="posix-embedded-options-table">. | ||||
|     </para> | ||||
|  | ||||
|    <para> | ||||
| @@ -3149,6 +3199,124 @@ regexp_replace('foobarbaz', 'b(..)', E'X\\1Y', 'g') | ||||
| </programlisting> | ||||
|    </para> | ||||
|  | ||||
|     <para> | ||||
|      The <function>regexp_matches</> function returns all of the capture | ||||
|      groups resulting from matching a POSIX regular expression pattern. | ||||
|      It has the syntax | ||||
|      <function>regexp_matches</function>(<replaceable>string</>, <replaceable>pattern</> | ||||
|      <optional>, <replaceable>flags</> </optional>). | ||||
|      If there is no match to the <replaceable>pattern</>, the function returns no rows. | ||||
|      If there is a match, the function returns the contents of all of the capture groups | ||||
|      in a text array, or if there were no capture groups in the pattern, it returns the | ||||
|      contents of the entire match as a single-element text array. | ||||
|      The <replaceable>flags</> parameter is an optional text | ||||
|      string containing zero or more single-letter flags that change the | ||||
|      function's behavior.  Flag <literal>i</> specifies case-insensitive | ||||
|      matching, while flag <literal>g</> causes the return of each matching | ||||
|      substring rather than only the first one.  Other supported | ||||
|      flags are <literal>m</>, <literal>n</>, <literal>p</>, <literal>w</> and | ||||
|      <literal>x</>, whose meanings are described in | ||||
|      <xref linkend="posix-embedded-options-table">. | ||||
|     </para> | ||||
|  | ||||
|    <para> | ||||
|     Some examples: | ||||
| <programlisting> | ||||
| SELECT regexp_matches('foobarbequebaz', '(bar)(beque)'); | ||||
|  regexp_matches  | ||||
| ---------------- | ||||
|  {bar,beque} | ||||
| (1 row) | ||||
|  | ||||
| SELECT regexp_matches('foobarbequebazilbarfbonk', '(b[^b]+)(b[^b]+)', 'g'); | ||||
|  regexp_matches  | ||||
| ---------------- | ||||
|  {bar,beque} | ||||
|  {bazil,barf} | ||||
| (2 rows) | ||||
|  | ||||
| SELECT regexp_matches('foobarbequebaz', 'barbeque'); | ||||
|  regexp_matches  | ||||
| ---------------- | ||||
|  {barbeque} | ||||
| (1 row) | ||||
| </programlisting> | ||||
|    </para> | ||||
|  | ||||
|     <para> | ||||
|      The <function>regexp_split_to_table</> function splits a string using a POSIX | ||||
|      regular expression pattern as a delimiter.  It has the syntax | ||||
|      <function>regexp_split_to_table</function>(<replaceable>string</>, <replaceable>pattern</> | ||||
|      <optional>, <replaceable>flags</> </optional>). | ||||
|      If there is no match to the <replaceable>pattern</>, the function returns the | ||||
|      <replaceable>string</>.  If there is at least one match, for each match it returns | ||||
|      the text from the end of the last match (or the beginning of the string) | ||||
|      to the beginning of the match.  When there are no more matches, it | ||||
|      returns the text from the end of the last match to the end of the string. | ||||
|      The <replaceable>flags</> parameter is an optional text string containing | ||||
|      zero or more single-letter flags that change the function's behavior. | ||||
|      <function>regexp_split_to_table</function> supports the flags <literal>i</>, | ||||
|      <literal>m</>, <literal>n</>, <literal>p</>, <literal>w</> and | ||||
|      <literal>x</>, whose meanings are described in | ||||
|      <xref linkend="posix-embedded-options-table">. | ||||
|     </para> | ||||
|  | ||||
|     <para> | ||||
|      The <function>regexp_split_to_array</> function behaves the same as | ||||
|      <function>regexp_split_to_table</>, except that <function>regexp_split_to_array</> | ||||
|      returns its results as a <type>text[]</>.  It has the syntax | ||||
|      <function>regexp_split_to_array</function>(<replaceable>string</>, <replaceable>pattern</> | ||||
|      <optional>, <replaceable>flags</> </optional>). | ||||
|      The parameters are the same as for <function>regexp_split_to_table</>. | ||||
|     </para> | ||||
|  | ||||
|    <para> | ||||
|     Some examples: | ||||
| <programlisting> | ||||
|  | ||||
| SELECT foo FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', E'\\\s+') AS foo; | ||||
|   foo    | ||||
| -------- | ||||
|  the     | ||||
|  quick   | ||||
|  brown   | ||||
|  fox     | ||||
|  jumped  | ||||
|  over    | ||||
|  the     | ||||
|  lazy    | ||||
|  dog     | ||||
| (9 rows) | ||||
|  | ||||
| SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', E'\\s+'); | ||||
|               regexp_split_to_array              | ||||
| ------------------------------------------------ | ||||
|  {the,quick,brown,fox,jumped,over,the,lazy,dog} | ||||
| (1 row) | ||||
|  | ||||
| SELECT foo FROM regexp_split_to_table('the quick brown fox', E'\\s*') AS foo; | ||||
|  foo  | ||||
| ----- | ||||
|  t          | ||||
|  h          | ||||
|  e          | ||||
|  q          | ||||
|  u          | ||||
|  i          | ||||
|  c          | ||||
|  k          | ||||
|  b          | ||||
|  r          | ||||
|  o          | ||||
|  w          | ||||
|  n          | ||||
|  f          | ||||
|  o          | ||||
|  x          | ||||
| (16 rows) | ||||
| </programlisting> | ||||
|    </para> | ||||
|  | ||||
|    <para> | ||||
|     <productname>PostgreSQL</productname>'s regular expressions are implemented | ||||
|     using a package written by Henry Spencer.  Much of | ||||
|   | ||||
| @@ -8,7 +8,7 @@ | ||||
|  * | ||||
|  * | ||||
|  * IDENTIFICATION | ||||
|  *	  $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.69 2007/02/27 23:48:08 tgl Exp $ | ||||
|  *	  $PostgreSQL: pgsql/src/backend/utils/adt/regexp.c,v 1.70 2007/03/20 05:44:59 neilc Exp $ | ||||
|  * | ||||
|  *		Alistair Crooks added the code for the regex caching | ||||
|  *		agc - cached the regular expressions used - there's a good chance | ||||
| @@ -29,9 +29,12 @@ | ||||
|  */ | ||||
| #include "postgres.h" | ||||
|  | ||||
| #include "access/heapam.h" | ||||
| #include "funcapi.h" | ||||
| #include "regex/regex.h" | ||||
| #include "utils/builtins.h" | ||||
| #include "utils/guc.h" | ||||
| #include "utils/lsyscache.h" | ||||
|  | ||||
|  | ||||
| /* GUC-settable flavor parameter */ | ||||
| @@ -75,9 +78,56 @@ typedef struct cached_re_str | ||||
| 	regex_t		cre_re;			/* the compiled regular expression */ | ||||
| } cached_re_str; | ||||
|  | ||||
| typedef struct re_comp_flags | ||||
| { | ||||
| 	int			  cflags; | ||||
| 	bool		  glob; | ||||
| } re_comp_flags; | ||||
|  | ||||
| typedef struct regexp_matches_ctx | ||||
| { | ||||
| 	text		 *orig_str; | ||||
| 	size_t		  orig_len; | ||||
| 	pg_wchar	 *wide_str; | ||||
| 	size_t		  wide_len; | ||||
| 	regex_t		 *cpattern; | ||||
| 	regmatch_t	 *pmatch; | ||||
| 	size_t		  offset; | ||||
|  | ||||
| 	re_comp_flags flags; | ||||
|  | ||||
| 	/* text type info */ | ||||
| 	Oid			  param_type; | ||||
| 	int16		  typlen; | ||||
| 	bool		  typbyval; | ||||
| 	char		  typalign; | ||||
| } regexp_matches_ctx; | ||||
|  | ||||
| typedef struct regexp_split_ctx | ||||
| { | ||||
| 	text		 *orig_str; | ||||
| 	size_t		  orig_len; | ||||
| 	pg_wchar	 *wide_str; | ||||
| 	size_t		  wide_len; | ||||
| 	regex_t		 *cpattern; | ||||
| 	regmatch_t	  match; | ||||
| 	size_t		  offset; | ||||
| 	re_comp_flags flags; | ||||
| } regexp_split_ctx; | ||||
|  | ||||
|  | ||||
| static int	num_res = 0;		/* # of cached re's */ | ||||
| static cached_re_str re_array[MAX_CACHED_RES];	/* cached re's */ | ||||
|  | ||||
| static regexp_matches_ctx *setup_regexp_matches(FunctionCallInfo fcinfo, | ||||
| 												text *orig_str, text *pattern, | ||||
| 												text *flags); | ||||
| static ArrayType *perform_regexp_matches(regexp_matches_ctx *matchctx); | ||||
|  | ||||
| static regexp_split_ctx *setup_regexp_split(text *str, text *pattern, | ||||
| 											text *flags); | ||||
| static Datum get_next_split(regexp_split_ctx *splitctx); | ||||
|  | ||||
|  | ||||
| /* | ||||
|  * RE_compile_and_cache - compile a RE, caching if possible | ||||
| @@ -88,7 +138,7 @@ static cached_re_str re_array[MAX_CACHED_RES];	/* cached re's */ | ||||
|  *	cflags --- compile options for the pattern | ||||
|  * | ||||
|  * Pattern is given in the database encoding.  We internally convert to | ||||
|  * array of pg_wchar which is what Spencer's regex package wants. | ||||
|  * an array of pg_wchar, which is what Spencer's regex package wants. | ||||
|  */ | ||||
| static regex_t * | ||||
| RE_compile_and_cache(text *text_re, int cflags) | ||||
| @@ -190,6 +240,80 @@ RE_compile_and_cache(text *text_re, int cflags) | ||||
| 	return &re_array[0].cre_re; | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * RE_wchar_execute - execute a RE | ||||
|  * | ||||
|  * Returns TRUE on match, FALSE on no match | ||||
|  * | ||||
|  *	re --- the compiled pattern as returned by RE_compile_and_cache | ||||
|  *	data --- the data to match against (need not be null-terminated) | ||||
|  *	data_len --- the length of the data string | ||||
|  *	start_search -- the offset in the data to start searching | ||||
|  *	nmatch, pmatch	--- optional return area for match details | ||||
|  * | ||||
|  * Data is given as array of pg_wchar which is what Spencer's regex package | ||||
|  * wants. | ||||
|  */ | ||||
| static bool | ||||
| RE_wchar_execute(regex_t *re, pg_wchar *data, int data_len, | ||||
| 				 size_t start_search, int nmatch, regmatch_t *pmatch) | ||||
| { | ||||
| 	int			regexec_result; | ||||
| 	char		errMsg[100]; | ||||
|  | ||||
| 	/* Perform RE match and return result */ | ||||
| 	regexec_result = pg_regexec(re, | ||||
| 								data, | ||||
| 								data_len, | ||||
| 								start_search, | ||||
| 								NULL,	/* no details */ | ||||
| 								nmatch, | ||||
| 								pmatch, | ||||
| 								0); | ||||
|  | ||||
| 	if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH) | ||||
| 	{ | ||||
| 		/* re failed??? */ | ||||
| 		pg_regerror(regexec_result, re, errMsg, sizeof(errMsg)); | ||||
| 		ereport(ERROR, | ||||
| 				(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), | ||||
| 				 errmsg("regular expression failed: %s", errMsg))); | ||||
| 	} | ||||
|  | ||||
| 	return (regexec_result == REG_OKAY); | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * RE_execute - execute a RE | ||||
|  * | ||||
|  * Returns TRUE on match, FALSE on no match | ||||
|  * | ||||
|  *	re --- the compiled pattern as returned by RE_compile_and_cache | ||||
|  *	dat --- the data to match against (need not be null-terminated) | ||||
|  *	dat_len --- the length of the data string | ||||
|  *	nmatch, pmatch	--- optional return area for match details | ||||
|  * | ||||
|  * Data is given in the database encoding.  We internally | ||||
|  * convert to array of pg_wchar which is what Spencer's regex package wants. | ||||
|  */ | ||||
| static bool | ||||
| RE_execute(regex_t *re, char *dat, int dat_len, | ||||
| 		   int nmatch, regmatch_t *pmatch) | ||||
| { | ||||
| 	pg_wchar   *data; | ||||
| 	size_t		data_len; | ||||
| 	bool		match; | ||||
|  | ||||
| 	/* Convert data string to wide characters */ | ||||
| 	data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar)); | ||||
| 	data_len = pg_mb2wchar_with_len(dat, data, dat_len); | ||||
|  | ||||
| 	/* Perform RE match and return result */ | ||||
| 	match = RE_wchar_execute(re, data, data_len, 0, nmatch, pmatch); | ||||
| 	pfree(data); | ||||
| 	return match; | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * RE_compile_and_execute - compile and execute a RE | ||||
|  * | ||||
| @@ -208,41 +332,59 @@ static bool | ||||
| RE_compile_and_execute(text *text_re, char *dat, int dat_len, | ||||
| 					   int cflags, int nmatch, regmatch_t *pmatch) | ||||
| { | ||||
| 	pg_wchar   *data; | ||||
| 	size_t		data_len; | ||||
| 	int			regexec_result; | ||||
| 	regex_t    *re; | ||||
| 	char		errMsg[100]; | ||||
|  | ||||
| 	/* Convert data string to wide characters */ | ||||
| 	data = (pg_wchar *) palloc((dat_len + 1) * sizeof(pg_wchar)); | ||||
| 	data_len = pg_mb2wchar_with_len(dat, data, dat_len); | ||||
|  | ||||
| 	/* Compile RE */ | ||||
| 	re = RE_compile_and_cache(text_re, cflags); | ||||
|  | ||||
| 	/* Perform RE match and return result */ | ||||
| 	regexec_result = pg_regexec(re, | ||||
| 								data, | ||||
| 								data_len, | ||||
| 								0, | ||||
| 								NULL,	/* no details */ | ||||
| 								nmatch, | ||||
| 								pmatch, | ||||
| 								0); | ||||
| 	return RE_execute(re, dat, dat_len, nmatch, pmatch); | ||||
| } | ||||
|  | ||||
| 	pfree(data); | ||||
| static void | ||||
| parse_re_comp_flags(re_comp_flags *flags, text *opts) | ||||
| { | ||||
| 	MemSet(flags, 0, sizeof(re_comp_flags)); | ||||
| 	flags->cflags = regex_flavor; | ||||
|  | ||||
| 	if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH) | ||||
| 	if (opts) | ||||
| 	{ | ||||
| 		/* re failed??? */ | ||||
| 		pg_regerror(regexec_result, re, errMsg, sizeof(errMsg)); | ||||
| 		ereport(ERROR, | ||||
| 				(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION), | ||||
| 				 errmsg("regular expression failed: %s", errMsg))); | ||||
| 	} | ||||
| 		char  *opt_p = VARDATA(opts); | ||||
| 		size_t opt_len = VARSIZE(opts) - VARHDRSZ; | ||||
| 		int i; | ||||
|  | ||||
| 	return (regexec_result == REG_OKAY); | ||||
| 		for (i = 0; i < opt_len; i++) | ||||
| 		{ | ||||
| 			switch (opt_p[i]) | ||||
| 			{ | ||||
| 				case 'g': | ||||
| 					flags->glob = true; | ||||
| 					break; | ||||
| 				case 'i': | ||||
| 					flags->cflags |= REG_ICASE; | ||||
| 					break; | ||||
| 				case 'm': | ||||
| 				case 'n': | ||||
| 					flags->cflags |= REG_NEWLINE; | ||||
| 					break; | ||||
| 				case 'p': | ||||
| 					flags->cflags |= REG_NLSTOP; | ||||
| 					flags->cflags &= ~REG_NLANCH; | ||||
| 					break; | ||||
| 				case 'w': | ||||
| 					flags->cflags &= ~REG_NLSTOP; | ||||
| 					flags->cflags |= REG_NLANCH; | ||||
| 					break; | ||||
| 				case 'x': | ||||
| 					flags->cflags |= REG_EXPANDED; | ||||
| 					break; | ||||
| 				default: | ||||
| 					ereport(ERROR, | ||||
| 							(errcode(ERRCODE_INVALID_PARAMETER_VALUE), | ||||
| 							 errmsg("invalid regexp option: %c", opt_p[i]))); | ||||
| 					break; | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|  | ||||
|  | ||||
| @@ -250,8 +392,7 @@ RE_compile_and_execute(text *text_re, char *dat, int dat_len, | ||||
|  * assign_regex_flavor - GUC hook to validate and set REGEX_FLAVOR | ||||
|  */ | ||||
| const char * | ||||
| assign_regex_flavor(const char *value, | ||||
| 					bool doit, GucSource source) | ||||
| assign_regex_flavor(const char *value, bool doit, GucSource source) | ||||
| { | ||||
| 	if (pg_strcasecmp(value, "advanced") == 0) | ||||
| 	{ | ||||
| @@ -469,39 +610,14 @@ textregexreplace(PG_FUNCTION_ARGS) | ||||
| 	text	   *p = PG_GETARG_TEXT_P(1); | ||||
| 	text	   *r = PG_GETARG_TEXT_P(2); | ||||
| 	text	   *opt = PG_GETARG_TEXT_P(3); | ||||
| 	char	   *opt_p = VARDATA(opt); | ||||
| 	int			opt_len = (VARSIZE(opt) - VARHDRSZ); | ||||
| 	int			i; | ||||
| 	bool		glob = false; | ||||
| 	bool		ignorecase = false; | ||||
| 	regex_t    *re; | ||||
| 	re_comp_flags flags; | ||||
|  | ||||
| 	/* parse options */ | ||||
| 	for (i = 0; i < opt_len; i++) | ||||
| 	{ | ||||
| 		switch (opt_p[i]) | ||||
| 		{ | ||||
| 			case 'i': | ||||
| 				ignorecase = true; | ||||
| 				break; | ||||
| 			case 'g': | ||||
| 				glob = true; | ||||
| 				break; | ||||
| 			default: | ||||
| 				ereport(ERROR, | ||||
| 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE), | ||||
| 						 errmsg("invalid option of regexp_replace: %c", | ||||
| 								opt_p[i]))); | ||||
| 				break; | ||||
| 		} | ||||
| 	} | ||||
| 	parse_re_comp_flags(&flags, opt); | ||||
|  | ||||
| 	if (ignorecase) | ||||
| 		re = RE_compile_and_cache(p, regex_flavor | REG_ICASE); | ||||
| 	else | ||||
| 		re = RE_compile_and_cache(p, regex_flavor); | ||||
| 	re = RE_compile_and_cache(p, flags.cflags); | ||||
|  | ||||
| 	PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, glob)); | ||||
| 	PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, flags.glob)); | ||||
| } | ||||
|  | ||||
| /* similar_escape() | ||||
| @@ -625,6 +741,361 @@ similar_escape(PG_FUNCTION_ARGS) | ||||
| 	PG_RETURN_TEXT_P(result); | ||||
| } | ||||
|  | ||||
| #define PG_GETARG_TEXT_P_IF_EXISTS(_n) \ | ||||
| 	(PG_NARGS() > _n ? PG_GETARG_TEXT_P(_n) : NULL) | ||||
|  | ||||
| Datum | ||||
| regexp_matches(PG_FUNCTION_ARGS) | ||||
| { | ||||
| 	FuncCallContext		*funcctx; | ||||
| 	MemoryContext		 oldcontext; | ||||
| 	regexp_matches_ctx	*matchctx; | ||||
|  | ||||
| 	if (SRF_IS_FIRSTCALL()) | ||||
| 	{ | ||||
| 		text *pattern = PG_GETARG_TEXT_P(1); | ||||
| 		text *flags   = PG_GETARG_TEXT_P_IF_EXISTS(2); | ||||
|  | ||||
| 		funcctx = SRF_FIRSTCALL_INIT(); | ||||
| 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); | ||||
|  | ||||
| 		/* be sure to copy the input string into the multi-call ctx */ | ||||
| 		matchctx = setup_regexp_matches(fcinfo, PG_GETARG_TEXT_P_COPY(0), | ||||
| 										pattern, flags); | ||||
|  | ||||
| 		MemoryContextSwitchTo(oldcontext); | ||||
| 		funcctx->user_fctx = (void *) matchctx; | ||||
|  | ||||
| 		/* | ||||
| 		 * Avoid run-away function by making sure we never iterate | ||||
| 		 * more than the length of the text + 1 (the number of matches | ||||
| 		 * an empty pattern will make is length + 1) | ||||
| 		 */ | ||||
| 		if (matchctx->flags.glob) | ||||
| 			funcctx->max_calls = matchctx->wide_len + 1; | ||||
| 		else | ||||
| 			funcctx->max_calls = 0; | ||||
| 	} | ||||
|  | ||||
| 	funcctx = SRF_PERCALL_SETUP(); | ||||
| 	matchctx = (regexp_matches_ctx *) funcctx->user_fctx; | ||||
|  | ||||
| 	if (funcctx->call_cntr > funcctx->max_calls) | ||||
| 	{ | ||||
| 		/* | ||||
| 		 * If max_calls == 0, then we are doing a non-global match, we | ||||
| 		 * should stop now, no problem.  Otherwise, if we exceed | ||||
| 		 * max_calls something really wonky is going on, since it is | ||||
| 		 * returning more matches than there are characters in the | ||||
| 		 * string, which should not happen | ||||
| 		 */ | ||||
| 		if (funcctx->max_calls != 0) | ||||
| 			elog(ERROR, "set returning match function terminated after iterating %d times", | ||||
| 				 funcctx->call_cntr); | ||||
|  | ||||
| 		SRF_RETURN_DONE(funcctx); | ||||
| 	} | ||||
|  | ||||
| 	if (matchctx->offset < matchctx->wide_len) | ||||
| 	{ | ||||
| 		ArrayType *result_ary; | ||||
|  | ||||
| 		if (matchctx->pmatch[0].rm_so == matchctx->pmatch[0].rm_eo) | ||||
| 			matchctx->offset++; | ||||
|  | ||||
| 		result_ary = perform_regexp_matches(matchctx); | ||||
| 		if (result_ary != NULL) | ||||
| 		{ | ||||
| 			matchctx->offset = matchctx->pmatch[0].rm_eo; | ||||
| 			SRF_RETURN_NEXT(funcctx, PointerGetDatum(result_ary)); | ||||
| 		} | ||||
| 		/* else fall through and return done */ | ||||
| 	} | ||||
|  | ||||
| 	SRF_RETURN_DONE(funcctx); | ||||
| } | ||||
|  | ||||
| Datum | ||||
| regexp_matches_no_flags(PG_FUNCTION_ARGS) | ||||
| { | ||||
| 	return regexp_matches(fcinfo); | ||||
| } | ||||
|  | ||||
| static regexp_matches_ctx * | ||||
| setup_regexp_matches(FunctionCallInfo fcinfo, text *orig_str, text *pattern, text *flags) | ||||
| { | ||||
| 	regexp_matches_ctx	*matchctx = palloc(sizeof(regexp_matches_ctx)); | ||||
|  | ||||
| 	matchctx->orig_str = orig_str; | ||||
| 	matchctx->orig_len = VARSIZE(matchctx->orig_str) - VARHDRSZ; | ||||
|  | ||||
| 	parse_re_comp_flags(&matchctx->flags, flags); | ||||
|  | ||||
| 	matchctx->cpattern = RE_compile_and_cache(pattern, matchctx->flags.cflags); | ||||
| 	matchctx->pmatch = palloc(sizeof(regmatch_t) * (matchctx->cpattern->re_nsub + 1)); | ||||
| 	matchctx->offset = 0; | ||||
|  | ||||
| 	/* get text type oid, too lazy to do it some other way */ | ||||
| 	matchctx->param_type = get_fn_expr_argtype(fcinfo->flinfo, 0); | ||||
| 	get_typlenbyvalalign(matchctx->param_type, &matchctx->typlen, | ||||
| 						 &matchctx->typbyval, &matchctx->typalign); | ||||
|  | ||||
| 	matchctx->wide_str = palloc(sizeof(pg_wchar) * (matchctx->orig_len + 1)); | ||||
| 	matchctx->wide_len = pg_mb2wchar_with_len(VARDATA(matchctx->orig_str), | ||||
| 											  matchctx->wide_str, matchctx->orig_len); | ||||
|  | ||||
| 	matchctx->pmatch[0].rm_so = -1; | ||||
| 	/* both < 0 but not equal */ | ||||
| 	matchctx->pmatch[0].rm_eo = -2; | ||||
|  | ||||
| 	return matchctx; | ||||
| } | ||||
|  | ||||
| static ArrayType * | ||||
| perform_regexp_matches(regexp_matches_ctx *matchctx) | ||||
| { | ||||
| 	Datum 		*elems; | ||||
| 	bool 		*nulls; | ||||
| 	Datum 		 fullmatch;		/* used to avoid a palloc if no matches */ | ||||
| 	int 		 ndims = 1; | ||||
| 	int 		 dims[1]; | ||||
| 	int          lbs[1] = {1}; | ||||
|  | ||||
| 	if (RE_wchar_execute(matchctx->cpattern, | ||||
| 						 matchctx->wide_str, | ||||
| 						 matchctx->wide_len, | ||||
| 						 matchctx->offset, | ||||
| 						 matchctx->cpattern->re_nsub + 1, | ||||
| 						 matchctx->pmatch) == false) | ||||
| 		return NULL; | ||||
|  | ||||
| 	if (matchctx->cpattern->re_nsub > 0) | ||||
| 	{ | ||||
| 		int i; | ||||
|  | ||||
| 		elems = palloc(sizeof(Datum) * matchctx->cpattern->re_nsub); | ||||
| 		nulls = palloc(sizeof(bool) * matchctx->cpattern->re_nsub); | ||||
| 		dims[0] = matchctx->cpattern->re_nsub; | ||||
|  | ||||
| 		for (i = 0; i < matchctx->cpattern->re_nsub; i++) | ||||
| 		{ | ||||
| 			int so = matchctx->pmatch[i + 1].rm_so; | ||||
| 			int	eo = matchctx->pmatch[i + 1].rm_eo; | ||||
|  | ||||
| 			if (so < 0 || eo < 0) | ||||
| 			{ | ||||
| 				elems[i] = 0; | ||||
| 				nulls[i] = true; | ||||
| 			} | ||||
| 			else | ||||
| 			{ | ||||
| 				elems[i] = DirectFunctionCall3(text_substr, | ||||
| 											   PointerGetDatum(matchctx->orig_str), | ||||
| 											   Int32GetDatum(so + 1), | ||||
| 											   Int32GetDatum(eo - so)); | ||||
| 				nulls[i] = false; | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 	else | ||||
| 	{ | ||||
| 		int so = matchctx->pmatch[0].rm_so; | ||||
| 		int	eo = matchctx->pmatch[0].rm_eo; | ||||
|  | ||||
| 		if (so < 0 || eo < 0) | ||||
| 			elog(ERROR, "regexp code said it had a match, but did not return it"); | ||||
|  | ||||
| 		fullmatch = DirectFunctionCall3(text_substr, | ||||
| 										PointerGetDatum(matchctx->orig_str), | ||||
| 										Int32GetDatum(so + 1), | ||||
| 										Int32GetDatum(eo - so)); | ||||
|  | ||||
| 		elems = &fullmatch; | ||||
| 		nulls = NULL; | ||||
| 		dims[0] = 1; | ||||
| 	} | ||||
|  | ||||
| 	return construct_md_array(elems, nulls, ndims, dims, lbs, | ||||
| 							  matchctx->param_type, matchctx->typlen, | ||||
| 							  matchctx->typbyval, matchctx->typalign); | ||||
| } | ||||
|  | ||||
| Datum | ||||
| regexp_split_to_table(PG_FUNCTION_ARGS) | ||||
| { | ||||
| 	FuncCallContext  *funcctx; | ||||
| 	regexp_split_ctx *splitctx; | ||||
|  | ||||
| 	if (SRF_IS_FIRSTCALL()) | ||||
| 	{ | ||||
| 		text 			*pattern = PG_GETARG_TEXT_P(1); | ||||
| 		text 			*flags   = PG_GETARG_TEXT_P_IF_EXISTS(2); | ||||
| 		MemoryContext    oldcontext; | ||||
|  | ||||
| 		funcctx = SRF_FIRSTCALL_INIT(); | ||||
| 		oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); | ||||
|  | ||||
| 		splitctx = setup_regexp_split(PG_GETARG_TEXT_P_COPY(0), pattern, flags); | ||||
|  | ||||
| 		MemoryContextSwitchTo(oldcontext); | ||||
| 		funcctx->user_fctx = (void *) splitctx; | ||||
|  | ||||
| 		/* | ||||
| 		 * Avoid run-away function by making sure we never iterate | ||||
| 		 * more than the length of the text | ||||
| 		 */ | ||||
| 		funcctx->max_calls = splitctx->wide_len; | ||||
| 	} | ||||
|  | ||||
| 	funcctx = SRF_PERCALL_SETUP(); | ||||
| 	splitctx = (regexp_split_ctx *) funcctx->user_fctx; | ||||
|  | ||||
| 	if (funcctx->call_cntr > funcctx->max_calls) | ||||
| 	{ | ||||
| 		/* | ||||
| 		 * If we exceed wide_len something really wonky is going on, | ||||
| 		 * since it is returning more matches than there are | ||||
| 		 * characters in the string, which should not happen | ||||
| 		 */ | ||||
| 		elog(ERROR, "set returning split function terminated after iterating %d times", | ||||
| 			 funcctx->call_cntr); | ||||
| 	} | ||||
|  | ||||
| 	if (splitctx->offset < splitctx->wide_len) | ||||
| 		SRF_RETURN_NEXT(funcctx, get_next_split(splitctx)); | ||||
| 	else | ||||
| 		SRF_RETURN_DONE(funcctx); | ||||
| } | ||||
|  | ||||
| Datum regexp_split_to_table_no_flags(PG_FUNCTION_ARGS) | ||||
| { | ||||
| 	return regexp_split_to_table(fcinfo); | ||||
| } | ||||
|  | ||||
| Datum regexp_split_to_array(PG_FUNCTION_ARGS) | ||||
| { | ||||
| 	ArrayBuildState 	*astate = NULL; | ||||
| 	regexp_split_ctx 	*splitctx; | ||||
| 	Oid 				 param_type; | ||||
| 	int 				 nitems; | ||||
|  | ||||
| 	splitctx = setup_regexp_split(PG_GETARG_TEXT_P(0), | ||||
| 								  PG_GETARG_TEXT_P(1), | ||||
| 								  PG_GETARG_TEXT_P_IF_EXISTS(2)); | ||||
|  | ||||
| 	/* get text type oid, too lazy to do it some other way */ | ||||
| 	param_type = get_fn_expr_argtype(fcinfo->flinfo, 0); | ||||
|  | ||||
| 	for (nitems = 0; splitctx->offset < splitctx->wide_len; nitems++) | ||||
| 	{ | ||||
| 		if (nitems > splitctx->wide_len) | ||||
| 			elog(ERROR, "split function terminated after iterating %d times", | ||||
| 				 nitems); | ||||
|  | ||||
| 		astate = accumArrayResult(astate, | ||||
| 								  get_next_split(splitctx), | ||||
| 								  false, | ||||
| 								  param_type, | ||||
| 								  CurrentMemoryContext); | ||||
| 	} | ||||
|  | ||||
| 	PG_RETURN_ARRAYTYPE_P(makeArrayResult(astate, CurrentMemoryContext)); | ||||
| } | ||||
|  | ||||
| Datum regexp_split_to_array_no_flags(PG_FUNCTION_ARGS) | ||||
| { | ||||
| 	return regexp_split_to_array(fcinfo); | ||||
| } | ||||
|  | ||||
| static regexp_split_ctx * | ||||
| setup_regexp_split(text *str, text *pattern, text *flags) | ||||
| { | ||||
| 	regexp_split_ctx *splitctx = palloc(sizeof(regexp_split_ctx)); | ||||
|  | ||||
| 	splitctx->orig_str = str; | ||||
| 	splitctx->orig_len = VARSIZE(splitctx->orig_str) - VARHDRSZ; | ||||
|  | ||||
| 	parse_re_comp_flags(&splitctx->flags, flags); | ||||
| 	if (splitctx->flags.glob) | ||||
| 		ereport(ERROR, | ||||
| 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE), | ||||
| 				 errmsg("regexp_split does not support the global option"))); | ||||
|  | ||||
| 	splitctx->cpattern = RE_compile_and_cache(pattern, splitctx->flags.cflags); | ||||
|  | ||||
| 	splitctx->wide_str = palloc(sizeof(pg_wchar) * (splitctx->orig_len + 1)); | ||||
| 	splitctx->wide_len = pg_mb2wchar_with_len(VARDATA(splitctx->orig_str), | ||||
| 											  splitctx->wide_str, | ||||
| 											  splitctx->orig_len); | ||||
|  | ||||
| 	splitctx->offset = 0; | ||||
|  | ||||
| 	splitctx->match.rm_so = -1; | ||||
| 	/* both < 0 but not equal */ | ||||
| 	splitctx->match.rm_eo = -2; | ||||
|  | ||||
| 	return splitctx; | ||||
| } | ||||
|  | ||||
| static Datum | ||||
| get_next_split(regexp_split_ctx *splitctx) | ||||
| { | ||||
| 	regmatch_t *pmatch = &(splitctx->match); | ||||
|  | ||||
| 	for (;;) | ||||
| 	{ | ||||
| 		Datum result; | ||||
| 		int	  startpos = splitctx->offset + 1; | ||||
|  | ||||
| 		/* | ||||
| 		 * If the last match was zero-length, we need to push the | ||||
| 		 * offset forward to avoid matching the same place forever | ||||
| 		 */ | ||||
| 		if (pmatch->rm_so == pmatch->rm_eo) | ||||
| 			splitctx->offset++; | ||||
|  | ||||
| 		if (RE_wchar_execute(splitctx->cpattern, | ||||
| 							 splitctx->wide_str, | ||||
| 							 splitctx->wide_len, | ||||
| 							 splitctx->offset, | ||||
| 							 1, | ||||
| 							 pmatch)) | ||||
| 		{ | ||||
| 			int length = splitctx->match.rm_so - startpos + 1; | ||||
|  | ||||
| 			/* | ||||
| 			 * If we are trying to match at the beginning of the string and | ||||
| 			 * we got a zero-length match, or if we just matched where we | ||||
| 			 * left off last time, go around the loop again and increment | ||||
| 			 * the offset.  If we have incremented the offset already and | ||||
| 			 * it matched at the new offset, that's ok | ||||
| 			 */ | ||||
| 			if (length == 0) | ||||
| 				continue; | ||||
|  | ||||
| 			result = DirectFunctionCall3(text_substr, | ||||
| 										 PointerGetDatum(splitctx->orig_str), | ||||
| 										 Int32GetDatum(startpos), | ||||
| 										 Int32GetDatum(length)); | ||||
|  | ||||
| 			/* set the offset to the end of this match for next time */ | ||||
| 			splitctx->offset = pmatch->rm_eo; | ||||
|  | ||||
| 			return result; | ||||
| 		} | ||||
|  | ||||
| 		/* no more matches, return rest of string */ | ||||
| 		result = DirectFunctionCall2(text_substr_no_len, | ||||
| 									 PointerGetDatum(splitctx->orig_str), | ||||
| 									 Int32GetDatum(startpos)); | ||||
|  | ||||
| 		/* so we know we're done next time through */ | ||||
| 		splitctx->offset = splitctx->wide_len; | ||||
|  | ||||
| 		return result; | ||||
| 	} | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * report whether regex_flavor is currently BASIC | ||||
|  */ | ||||
|   | ||||
| @@ -37,7 +37,7 @@ | ||||
|  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group | ||||
|  * Portions Copyright (c) 1994, Regents of the University of California | ||||
|  * | ||||
|  * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.392 2007/03/20 03:53:26 wieck Exp $ | ||||
|  * $PostgreSQL: pgsql/src/include/catalog/catversion.h,v 1.393 2007/03/20 05:45:00 neilc Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -53,6 +53,6 @@ | ||||
|  */ | ||||
|  | ||||
| /*							yyyymmddN */ | ||||
| #define CATALOG_VERSION_NO	200703190 | ||||
| #define CATALOG_VERSION_NO	200703201 | ||||
|  | ||||
| #endif | ||||
|   | ||||
| @@ -7,7 +7,7 @@ | ||||
|  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group | ||||
|  * Portions Copyright (c) 1994, Regents of the University of California | ||||
|  * | ||||
|  * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.448 2007/03/16 17:57:36 mha Exp $ | ||||
|  * $PostgreSQL: pgsql/src/include/catalog/pg_proc.h,v 1.449 2007/03/20 05:45:00 neilc Exp $ | ||||
|  * | ||||
|  * NOTES | ||||
|  *	  The script catalog/genbki.sh reads this file and generates .bki | ||||
| @@ -2261,8 +2261,20 @@ DATA(insert OID =  2284 ( regexp_replace	   PGNSP PGUID 12 1 0 f f t f i 3 25 "2 | ||||
| DESCR("replace text using regexp"); | ||||
| DATA(insert OID =  2285 ( regexp_replace	   PGNSP PGUID 12 1 0 f f t f i 4 25 "25 25 25 25" _null_ _null_ _null_ textregexreplace - _null_ )); | ||||
| DESCR("replace text using regexp"); | ||||
| DATA(insert OID =  2763 ( regexp_matches   PGNSP PGUID 12 1 1 f f t t i 2 1009 "25 25" _null_ _null_ _null_	regexp_matches_no_flags - _null_ )); | ||||
| DESCR("return all match groups for regexp"); | ||||
| DATA(insert OID =  2764 ( regexp_matches   PGNSP PGUID 12 1 10 f f t t i 3 1009 "25 25 25" _null_ _null_ _null_	regexp_matches - _null_ )); | ||||
| DESCR("return all match groups for regexp"); | ||||
| DATA(insert OID =  2088 ( split_part   PGNSP PGUID 12 1 0 f f t f i 3 25 "25 25 23" _null_ _null_ _null_	split_text - _null_ )); | ||||
| DESCR("split string by field_sep and return field_num"); | ||||
| DATA(insert OID =  2765 ( regexp_split_to_table PGNSP PGUID 12 1 1000 f f t t i 2 25 "25 25" _null_ _null_ _null_	regexp_split_to_table_no_flags - _null_ )); | ||||
| DESCR("split string by pattern"); | ||||
| DATA(insert OID =  2766 ( regexp_split_to_table PGNSP PGUID 12 1 1000 f f t t i 3 25 "25 25 25" _null_ _null_ _null_	regexp_split_to_table - _null_ )); | ||||
| DESCR("split string by pattern"); | ||||
| DATA(insert OID =  2767 ( regexp_split_to_array PGNSP PGUID 12 1 0 f f t f i 2 1009 "25 25" _null_ _null_ _null_	regexp_split_to_array_no_flags - _null_ )); | ||||
| DESCR("split string by pattern"); | ||||
| DATA(insert OID =  2768 ( regexp_split_to_array PGNSP PGUID 12 1 0 f f t f i 3 1009 "25 25 25" _null_ _null_ _null_	regexp_split_to_array - _null_ )); | ||||
| DESCR("split string by pattern"); | ||||
| DATA(insert OID =  2089 ( to_hex	   PGNSP PGUID 12 1 0 f f t f i 1 25 "23" _null_ _null_ _null_	to_hex32 - _null_ )); | ||||
| DESCR("convert int4 number to hex"); | ||||
| DATA(insert OID =  2090 ( to_hex	   PGNSP PGUID 12 1 0 f f t f i 1 25 "20" _null_ _null_ _null_	to_hex64 - _null_ )); | ||||
|   | ||||
| @@ -7,7 +7,7 @@ | ||||
|  * Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group | ||||
|  * Portions Copyright (c) 1994, Regents of the University of California | ||||
|  * | ||||
|  * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.289 2007/02/23 21:59:45 tgl Exp $ | ||||
|  * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.290 2007/03/20 05:45:00 neilc Exp $ | ||||
|  * | ||||
|  *------------------------------------------------------------------------- | ||||
|  */ | ||||
| @@ -478,6 +478,12 @@ extern Datum textregexsubstr(PG_FUNCTION_ARGS); | ||||
| extern Datum textregexreplace_noopt(PG_FUNCTION_ARGS); | ||||
| extern Datum textregexreplace(PG_FUNCTION_ARGS); | ||||
| extern Datum similar_escape(PG_FUNCTION_ARGS); | ||||
| extern Datum regexp_matches(PG_FUNCTION_ARGS); | ||||
| extern Datum regexp_matches_no_flags(PG_FUNCTION_ARGS); | ||||
| extern Datum regexp_split_to_table(PG_FUNCTION_ARGS); | ||||
| extern Datum regexp_split_to_table_no_flags(PG_FUNCTION_ARGS); | ||||
| extern Datum regexp_split_to_array(PG_FUNCTION_ARGS); | ||||
| extern Datum regexp_split_to_array_no_flags(PG_FUNCTION_ARGS); | ||||
| extern bool regex_flavor_is_basic(void); | ||||
|  | ||||
| /* regproc.c */ | ||||
|   | ||||
| @@ -217,9 +217,232 @@ SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi'); | ||||
|  Z Z | ||||
| (1 row) | ||||
|  | ||||
| -- invalid option of REGEXP_REPLACE | ||||
| -- invalid regexp option | ||||
| SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z'); | ||||
| ERROR:  invalid option of regexp_replace: z | ||||
| ERROR:  invalid regexp option: z | ||||
| -- set so we can tell NULL from empty string | ||||
| \pset null '\\N' | ||||
| -- return all matches from regexp | ||||
| SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$); | ||||
|  regexp_matches  | ||||
| ---------------- | ||||
|  {bar,beque} | ||||
| (1 row) | ||||
|  | ||||
| -- test case insensitive | ||||
| SELECT regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'i'); | ||||
|  regexp_matches  | ||||
| ---------------- | ||||
|  {bAR,bEqUE} | ||||
| (1 row) | ||||
|  | ||||
| -- global option - more than one match | ||||
| SELECT regexp_matches('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'g'); | ||||
|  regexp_matches  | ||||
| ---------------- | ||||
|  {bar,beque} | ||||
|  {bazil,barf} | ||||
| (2 rows) | ||||
|  | ||||
| -- empty capture group (matched empty string) | ||||
| SELECT regexp_matches('foobarbequebaz', $re$(bar)(.*)(beque)$re$); | ||||
|  regexp_matches  | ||||
| ---------------- | ||||
|  {bar,"",beque} | ||||
| (1 row) | ||||
|  | ||||
| -- no match | ||||
| SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)(beque)$re$); | ||||
|  regexp_matches  | ||||
| ---------------- | ||||
| (0 rows) | ||||
|  | ||||
| -- optional capture group did not match, null entry in array | ||||
| SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)?(beque)$re$); | ||||
|   regexp_matches   | ||||
| ------------------ | ||||
|  {bar,NULL,beque} | ||||
| (1 row) | ||||
|  | ||||
| -- no capture groups | ||||
| SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$); | ||||
|  regexp_matches  | ||||
| ---------------- | ||||
|  {barbeque} | ||||
| (1 row) | ||||
|  | ||||
| -- give me errors | ||||
| SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'zipper'); | ||||
| ERROR:  invalid regexp option: z | ||||
| SELECT regexp_matches('foobarbequebaz', $re$(barbeque$re$); | ||||
| ERROR:  invalid regular expression: parentheses () not balanced | ||||
| SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque){2,1}$re$); | ||||
| ERROR:  invalid regular expression: invalid repetition count(s) | ||||
| -- split string on regexp | ||||
| SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s+$re$) AS foo; | ||||
|   foo   | length  | ||||
| --------+-------- | ||||
|  the    |      3 | ||||
|  quick  |      5 | ||||
|  brown  |      5 | ||||
|  fox    |      3 | ||||
|  jumped |      6 | ||||
|  over   |      4 | ||||
|  the    |      3 | ||||
|  lazy   |      4 | ||||
|  dog    |      3 | ||||
| (9 rows) | ||||
|  | ||||
| SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s+$re$); | ||||
|              regexp_split_to_array               | ||||
| ------------------------------------------------ | ||||
|  {the,quick,brown,fox,jumped,over,the,lazy,dog} | ||||
| (1 row) | ||||
|  | ||||
| SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s*$re$) AS foo; | ||||
|  foo | length  | ||||
| -----+-------- | ||||
|  t   |      1 | ||||
|  h   |      1 | ||||
|  e   |      1 | ||||
|  q   |      1 | ||||
|  u   |      1 | ||||
|  i   |      1 | ||||
|  c   |      1 | ||||
|  k   |      1 | ||||
|  b   |      1 | ||||
|  r   |      1 | ||||
|  o   |      1 | ||||
|  w   |      1 | ||||
|  n   |      1 | ||||
|  f   |      1 | ||||
|  o   |      1 | ||||
|  x   |      1 | ||||
|  j   |      1 | ||||
|  u   |      1 | ||||
|  m   |      1 | ||||
|  p   |      1 | ||||
|  e   |      1 | ||||
|  d   |      1 | ||||
|  o   |      1 | ||||
|  v   |      1 | ||||
|  e   |      1 | ||||
|  r   |      1 | ||||
|  t   |      1 | ||||
|  h   |      1 | ||||
|  e   |      1 | ||||
|  l   |      1 | ||||
|  a   |      1 | ||||
|  z   |      1 | ||||
|  y   |      1 | ||||
|  d   |      1 | ||||
|  o   |      1 | ||||
|  g   |      1 | ||||
| (36 rows) | ||||
|  | ||||
| SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s*$re$); | ||||
|                            regexp_split_to_array                            | ||||
| --------------------------------------------------------------------------- | ||||
|  {t,h,e,q,u,i,c,k,b,r,o,w,n,f,o,x,j,u,m,p,e,d,o,v,e,r,t,h,e,l,a,z,y,d,o,g} | ||||
| (1 row) | ||||
|  | ||||
| SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', '') AS foo; | ||||
|  foo | length  | ||||
| -----+-------- | ||||
|  t   |      1 | ||||
|  h   |      1 | ||||
|  e   |      1 | ||||
|      |      1 | ||||
|  q   |      1 | ||||
|  u   |      1 | ||||
|  i   |      1 | ||||
|  c   |      1 | ||||
|  k   |      1 | ||||
|      |      1 | ||||
|  b   |      1 | ||||
|  r   |      1 | ||||
|  o   |      1 | ||||
|  w   |      1 | ||||
|  n   |      1 | ||||
|      |      1 | ||||
|  f   |      1 | ||||
|  o   |      1 | ||||
|  x   |      1 | ||||
|      |      1 | ||||
|  j   |      1 | ||||
|  u   |      1 | ||||
|  m   |      1 | ||||
|  p   |      1 | ||||
|  e   |      1 | ||||
|  d   |      1 | ||||
|      |      1 | ||||
|  o   |      1 | ||||
|  v   |      1 | ||||
|  e   |      1 | ||||
|  r   |      1 | ||||
|      |      1 | ||||
|  t   |      1 | ||||
|  h   |      1 | ||||
|  e   |      1 | ||||
|      |      1 | ||||
|  l   |      1 | ||||
|  a   |      1 | ||||
|  z   |      1 | ||||
|  y   |      1 | ||||
|      |      1 | ||||
|  d   |      1 | ||||
|  o   |      1 | ||||
|  g   |      1 | ||||
| (44 rows) | ||||
|  | ||||
| SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', ''); | ||||
|                                            regexp_split_to_array                                            | ||||
| ----------------------------------------------------------------------------------------------------------- | ||||
|  {t,h,e," ",q,u,i,c,k," ",b,r,o,w,n," ",f,o,x," ",j,u,m,p,e,d," ",o,v,e,r," ",t,h,e," ",l,a,z,y," ",d,o,g} | ||||
| (1 row) | ||||
|  | ||||
| -- case insensitive | ||||
| SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i') AS foo; | ||||
|           foo          | length  | ||||
| -----------------------+-------- | ||||
|  th                    |      2 | ||||
|   QUick bROWn FOx jUMP |     21 | ||||
|  d ov                  |      4 | ||||
|  r TH                  |      4 | ||||
|   lazy dOG             |      9 | ||||
| (5 rows) | ||||
|  | ||||
| SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i'); | ||||
|                  regexp_split_to_array                   | ||||
| -------------------------------------------------------- | ||||
|  {th," QUick bROWn FOx jUMP","d ov","r TH"," lazy dOG"} | ||||
| (1 row) | ||||
|  | ||||
| -- no match of pattern | ||||
| SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', 'nomatch') AS foo; | ||||
|                      foo                      | length  | ||||
| ----------------------------------------------+-------- | ||||
|  the quick brown fox jumped over the lazy dog |     44 | ||||
| (1 row) | ||||
|  | ||||
| SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', 'nomatch'); | ||||
|               regexp_split_to_array                | ||||
| -------------------------------------------------- | ||||
|  {"the quick brown fox jumped over the lazy dog"} | ||||
| (1 row) | ||||
|  | ||||
| -- errors | ||||
| SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy') AS foo; | ||||
| ERROR:  invalid regexp option: z | ||||
| SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy'); | ||||
| ERROR:  invalid regexp option: z | ||||
| -- global option meaningless for regexp_split | ||||
| SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g') AS foo; | ||||
| ERROR:  regexp_split does not support the global option | ||||
| SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g'); | ||||
| ERROR:  regexp_split does not support the global option | ||||
| -- change NULL-display back | ||||
| \pset null '' | ||||
| -- E021-11 position expression | ||||
| SELECT POSITION('4' IN '1234567890') = '4' AS "4"; | ||||
|  4  | ||||
|   | ||||
| @@ -85,9 +85,60 @@ SELECT regexp_replace('1112223333', E'(\\d{3})(\\d{3})(\\d{4})', E'(\\1) \\2-\\3 | ||||
| SELECT regexp_replace('AAA   BBB   CCC   ', E'\\s+', ' ', 'g'); | ||||
| SELECT regexp_replace('AAA', '^|$', 'Z', 'g'); | ||||
| SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'gi'); | ||||
| -- invalid option of REGEXP_REPLACE | ||||
| -- invalid regexp option | ||||
| SELECT regexp_replace('AAA aaa', 'A+', 'Z', 'z'); | ||||
|  | ||||
| -- set so we can tell NULL from empty string | ||||
| \pset null '\\N' | ||||
|  | ||||
| -- return all matches from regexp | ||||
| SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$); | ||||
|  | ||||
| -- test case insensitive | ||||
| SELECT regexp_matches('foObARbEqUEbAz', $re$(bar)(beque)$re$, 'i'); | ||||
|  | ||||
| -- global option - more than one match | ||||
| SELECT regexp_matches('foobarbequebazilbarfbonk', $re$(b[^b]+)(b[^b]+)$re$, 'g'); | ||||
|  | ||||
| -- empty capture group (matched empty string) | ||||
| SELECT regexp_matches('foobarbequebaz', $re$(bar)(.*)(beque)$re$); | ||||
| -- no match | ||||
| SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)(beque)$re$); | ||||
| -- optional capture group did not match, null entry in array | ||||
| SELECT regexp_matches('foobarbequebaz', $re$(bar)(.+)?(beque)$re$); | ||||
|  | ||||
| -- no capture groups | ||||
| SELECT regexp_matches('foobarbequebaz', $re$barbeque$re$); | ||||
|  | ||||
| -- give me errors | ||||
| SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque)$re$, 'zipper'); | ||||
| SELECT regexp_matches('foobarbequebaz', $re$(barbeque$re$); | ||||
| SELECT regexp_matches('foobarbequebaz', $re$(bar)(beque){2,1}$re$); | ||||
|  | ||||
| -- split string on regexp | ||||
| SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s+$re$) AS foo; | ||||
| SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s+$re$); | ||||
|  | ||||
| SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', $re$\s*$re$) AS foo; | ||||
| SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', $re$\s*$re$); | ||||
| SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', '') AS foo; | ||||
| SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', ''); | ||||
| -- case insensitive | ||||
| SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i') AS foo; | ||||
| SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'i'); | ||||
| -- no match of pattern | ||||
| SELECT foo, length(foo) FROM regexp_split_to_table('the quick brown fox jumped over the lazy dog', 'nomatch') AS foo; | ||||
| SELECT regexp_split_to_array('the quick brown fox jumped over the lazy dog', 'nomatch'); | ||||
| -- errors | ||||
| SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy') AS foo; | ||||
| SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'zippy'); | ||||
| -- global option meaningless for regexp_split | ||||
| SELECT foo, length(foo) FROM regexp_split_to_table('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g') AS foo; | ||||
| SELECT regexp_split_to_array('thE QUick bROWn FOx jUMPed ovEr THE lazy dOG', 'e', 'g'); | ||||
|  | ||||
| -- change NULL-display back | ||||
| \pset null '' | ||||
|  | ||||
| -- E021-11 position expression | ||||
| SELECT POSITION('4' IN '1234567890') = '4' AS "4"; | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user