1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-24 09:27:52 +03:00

I made the patch that implements regexp_replace again.

The specification of this function is as follows.

regexp_replace(source text, pattern text, replacement text, [flags
text])
returns text

Replace string that matches to regular expression in source text to
replacement text.

 - pattern is regular expression pattern.
 - replacement is replace string that can use '\1'-'\9', and '\&'.
    '\1'-'\9': back reference to the n'th subexpression.
    '\&'     : entire matched string.
 - flags can use the following values:
    g: global (replace all)
    i: ignore case
    When the flags is not specified, case sensitive, replace the first
    instance only.

Atsushi Ogawa
This commit is contained in:
Bruce Momjian
2005-07-10 04:54:33 +00:00
parent 73e2431817
commit 75a64eeb4b
10 changed files with 418 additions and 55 deletions
doc/src/sgml
src
backend
include
test
regress

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.126 2005/07/07 04:36:08 momjian Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.127 2005/07/10 04:54:30 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -28,6 +28,7 @@
#include "utils/builtins.h"
#include "utils/lsyscache.h"
#include "utils/pg_locale.h"
#include "regex/regex.h"
typedef struct varlena unknown;
@@ -1993,6 +1994,225 @@ replace_text(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(ret_text);
}
/*
* check_replace_text_has_escape_char
* check whether replace_text has escape char.
*/
static bool
check_replace_text_has_escape_char(const text *replace_text)
{
const char *p = VARDATA(replace_text);
const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
if (pg_database_encoding_max_length() == 1)
{
for (; p < p_end; p++)
if (*p == '\\') return true;
}
else
{
for (; p < p_end; p += pg_mblen(p))
if (*p == '\\') return true;
}
return false;
}
/*
* appendStringInfoRegexpSubstr
* append string by using back references of regexp.
*/
static void
appendStringInfoRegexpSubstr(StringInfo str, text *replace_text,
regmatch_t *pmatch, text *src_text)
{
const char *p = VARDATA(replace_text);
const char *p_end = p + (VARSIZE(replace_text) - VARHDRSZ);
int eml = pg_database_encoding_max_length();
int substr_start = 1;
int ch_cnt;
int so;
int eo;
while (1)
{
/* Find escape char. */
ch_cnt = 0;
if (eml == 1)
{
for (; p < p_end && *p != '\\'; p++)
ch_cnt++;
}
else
{
for (; p < p_end && *p != '\\'; p += pg_mblen(p))
ch_cnt++;
}
/*
* Copy the text when there is a text in the left of escape char
* or escape char is not found.
*/
if (ch_cnt)
{
text *append_text = text_substring(PointerGetDatum(replace_text),
substr_start, ch_cnt, false);
appendStringInfoString(str, PG_TEXT_GET_STR(append_text));
pfree(append_text);
}
substr_start += ch_cnt + 1;
if (p >= p_end) /* When escape char is not found. */
break;
/* See the next character of escape char. */
p++;
so = eo = -1;
if (*p >= '1' && *p <= '9')
{
/* Use the back reference of regexp. */
int idx = *p - '0';
so = pmatch[idx].rm_so;
eo = pmatch[idx].rm_eo;
p++;
substr_start++;
}
else if (*p == '&')
{
/* Use the entire matched string. */
so = pmatch[0].rm_so;
eo = pmatch[0].rm_eo;
p++;
substr_start++;
}
if (so != -1 && eo != -1)
{
/* Copy the text that is back reference of regexp. */
text *append_text = text_substring(PointerGetDatum(src_text),
so + 1, (eo - so), false);
appendStringInfoString(str, PG_TEXT_GET_STR(append_text));
pfree(append_text);
}
}
}
#define REGEXP_REPLACE_BACKREF_CNT 10
/*
* replace_text_regexp
* replace text that matches to regexp in src_text to replace_text.
*/
Datum
replace_text_regexp(PG_FUNCTION_ARGS)
{
text *ret_text;
text *src_text = PG_GETARG_TEXT_P(0);
int src_text_len = VARSIZE(src_text) - VARHDRSZ;
regex_t *re = (regex_t *)PG_GETARG_POINTER(1);
text *replace_text = PG_GETARG_TEXT_P(2);
bool global = PG_GETARG_BOOL(3);
StringInfo str = makeStringInfo();
int regexec_result;
regmatch_t pmatch[REGEXP_REPLACE_BACKREF_CNT];
pg_wchar *data;
size_t data_len;
int search_start;
int data_pos;
bool have_escape;
/* Convert data string to wide characters. */
data = (pg_wchar *) palloc((src_text_len + 1) * sizeof(pg_wchar));
data_len = pg_mb2wchar_with_len(VARDATA(src_text), data, src_text_len);
/* Check whether replace_text has escape char. */
have_escape = check_replace_text_has_escape_char(replace_text);
for (search_start = data_pos = 0; search_start <= data_len;)
{
regexec_result = pg_regexec(re,
data,
data_len,
search_start,
NULL, /* no details */
REGEXP_REPLACE_BACKREF_CNT,
pmatch,
0);
if (regexec_result != REG_OKAY && regexec_result != REG_NOMATCH)
{
char errMsg[100];
/* re failed??? */
pg_regerror(regexec_result, re, errMsg, sizeof(errMsg));
ereport(ERROR,
(errcode(ERRCODE_INVALID_REGULAR_EXPRESSION),
errmsg("regular expression failed: %s", errMsg)));
}
if (regexec_result == REG_NOMATCH)
break;
/*
* Copy the text when there is a text in the left of matched position.
*/
if (pmatch[0].rm_so - data_pos > 0)
{
text *left_text = text_substring(PointerGetDatum(src_text),
data_pos + 1,
pmatch[0].rm_so - data_pos, false);
appendStringInfoString(str, PG_TEXT_GET_STR(left_text));
pfree(left_text);
}
/*
* Copy the replace_text. Process back references when the
* replace_text has escape characters.
*/
if (have_escape)
appendStringInfoRegexpSubstr(str, replace_text, pmatch, src_text);
else
appendStringInfoString(str, PG_TEXT_GET_STR(replace_text));
search_start = data_pos = pmatch[0].rm_eo;
/*
* When global option is off, replace the first instance only.
*/
if (!global)
break;
/*
* Search from next character when the matching text is zero width.
*/
if (pmatch[0].rm_so == pmatch[0].rm_eo)
search_start++;
}
/*
* Copy the text when there is a text at the right of last matched
* or regexp is not matched.
*/
if (data_pos < data_len)
{
text *right_text = text_substring(PointerGetDatum(src_text),
data_pos + 1, -1, true);
appendStringInfoString(str, PG_TEXT_GET_STR(right_text));
pfree(right_text);
}
ret_text = PG_STR_GET_TEXT(str->data);
pfree(str->data);
pfree(str);
pfree(data);
PG_RETURN_TEXT_P(ret_text);
}
/*
* split_text
* parse input string