mirror of
https://github.com/postgres/postgres.git
synced 2025-06-14 18:42:34 +03:00
Teach regular expression operators to honor collations.
This involves getting the character classification and case-folding functions in the regex library to use the collations infrastructure. Most of this work had been done already in connection with the upper/lower and LIKE logic, so it was a simple matter of transposition. While at it, split out these functions into a separate source file regc_pg_locale.c, so that they can be correctly labeled with the Postgres project's license rather than the Scriptics license. These functions are 100% Postgres-written code whereas what remains in regc_locale.c is still mostly not ours, so lumping them both under the same copyright notice was getting more and more misleading.
This commit is contained in:
@ -96,6 +96,7 @@ typedef struct cached_re_str
|
||||
char *cre_pat; /* original RE (not null terminated!) */
|
||||
int cre_pat_len; /* length of original RE, in bytes */
|
||||
int cre_flags; /* compile flags: extended,icase etc */
|
||||
Oid cre_collation; /* collation to use */
|
||||
regex_t cre_re; /* the compiled regular expression */
|
||||
} cached_re_str;
|
||||
|
||||
@ -106,6 +107,7 @@ static cached_re_str re_array[MAX_CACHED_RES]; /* cached re's */
|
||||
/* Local functions */
|
||||
static regexp_matches_ctx *setup_regexp_matches(text *orig_str, text *pattern,
|
||||
text *flags,
|
||||
Oid collation,
|
||||
bool force_glob,
|
||||
bool use_subpatterns,
|
||||
bool ignore_degenerate);
|
||||
@ -121,12 +123,13 @@ static Datum build_regexp_split_result(regexp_matches_ctx *splitctx);
|
||||
*
|
||||
* text_re --- the pattern, expressed as a TEXT object
|
||||
* cflags --- compile options for the pattern
|
||||
* collation --- collation to use for LC_CTYPE-dependent behavior
|
||||
*
|
||||
* Pattern is given in the database encoding. We internally convert to
|
||||
* an array of pg_wchar, which is what Spencer's regex package wants.
|
||||
*/
|
||||
static regex_t *
|
||||
RE_compile_and_cache(text *text_re, int cflags)
|
||||
RE_compile_and_cache(text *text_re, int cflags, Oid collation)
|
||||
{
|
||||
int text_re_len = VARSIZE_ANY_EXHDR(text_re);
|
||||
char *text_re_val = VARDATA_ANY(text_re);
|
||||
@ -146,6 +149,7 @@ RE_compile_and_cache(text *text_re, int cflags)
|
||||
{
|
||||
if (re_array[i].cre_pat_len == text_re_len &&
|
||||
re_array[i].cre_flags == cflags &&
|
||||
re_array[i].cre_collation == collation &&
|
||||
memcmp(re_array[i].cre_pat, text_re_val, text_re_len) == 0)
|
||||
{
|
||||
/*
|
||||
@ -176,7 +180,8 @@ RE_compile_and_cache(text *text_re, int cflags)
|
||||
regcomp_result = pg_regcomp(&re_temp.cre_re,
|
||||
pattern,
|
||||
pattern_len,
|
||||
cflags);
|
||||
cflags,
|
||||
collation);
|
||||
|
||||
pfree(pattern);
|
||||
|
||||
@ -207,6 +212,7 @@ RE_compile_and_cache(text *text_re, int cflags)
|
||||
memcpy(re_temp.cre_pat, text_re_val, text_re_len);
|
||||
re_temp.cre_pat_len = text_re_len;
|
||||
re_temp.cre_flags = cflags;
|
||||
re_temp.cre_collation = collation;
|
||||
|
||||
/*
|
||||
* Okay, we have a valid new item in re_temp; insert it into the storage
|
||||
@ -313,6 +319,7 @@ RE_execute(regex_t *re, char *dat, int dat_len,
|
||||
* dat --- the data to match against (need not be null-terminated)
|
||||
* dat_len --- the length of the data string
|
||||
* cflags --- compile options for the pattern
|
||||
* collation --- collation to use for LC_CTYPE-dependent behavior
|
||||
* nmatch, pmatch --- optional return area for match details
|
||||
*
|
||||
* Both pattern and data are given in the database encoding. We internally
|
||||
@ -320,12 +327,13 @@ RE_execute(regex_t *re, char *dat, int dat_len,
|
||||
*/
|
||||
static bool
|
||||
RE_compile_and_execute(text *text_re, char *dat, int dat_len,
|
||||
int cflags, int nmatch, regmatch_t *pmatch)
|
||||
int cflags, Oid collation,
|
||||
int nmatch, regmatch_t *pmatch)
|
||||
{
|
||||
regex_t *re;
|
||||
|
||||
/* Compile RE */
|
||||
re = RE_compile_and_cache(text_re, cflags);
|
||||
re = RE_compile_and_cache(text_re, cflags, collation);
|
||||
|
||||
return RE_execute(re, dat, dat_len, nmatch, pmatch);
|
||||
}
|
||||
@ -424,6 +432,7 @@ nameregexeq(PG_FUNCTION_ARGS)
|
||||
NameStr(*n),
|
||||
strlen(NameStr(*n)),
|
||||
REG_ADVANCED,
|
||||
PG_GET_COLLATION(),
|
||||
0, NULL));
|
||||
}
|
||||
|
||||
@ -437,6 +446,7 @@ nameregexne(PG_FUNCTION_ARGS)
|
||||
NameStr(*n),
|
||||
strlen(NameStr(*n)),
|
||||
REG_ADVANCED,
|
||||
PG_GET_COLLATION(),
|
||||
0, NULL));
|
||||
}
|
||||
|
||||
@ -450,6 +460,7 @@ textregexeq(PG_FUNCTION_ARGS)
|
||||
VARDATA_ANY(s),
|
||||
VARSIZE_ANY_EXHDR(s),
|
||||
REG_ADVANCED,
|
||||
PG_GET_COLLATION(),
|
||||
0, NULL));
|
||||
}
|
||||
|
||||
@ -463,6 +474,7 @@ textregexne(PG_FUNCTION_ARGS)
|
||||
VARDATA_ANY(s),
|
||||
VARSIZE_ANY_EXHDR(s),
|
||||
REG_ADVANCED,
|
||||
PG_GET_COLLATION(),
|
||||
0, NULL));
|
||||
}
|
||||
|
||||
@ -483,6 +495,7 @@ nameicregexeq(PG_FUNCTION_ARGS)
|
||||
NameStr(*n),
|
||||
strlen(NameStr(*n)),
|
||||
REG_ADVANCED | REG_ICASE,
|
||||
PG_GET_COLLATION(),
|
||||
0, NULL));
|
||||
}
|
||||
|
||||
@ -496,6 +509,7 @@ nameicregexne(PG_FUNCTION_ARGS)
|
||||
NameStr(*n),
|
||||
strlen(NameStr(*n)),
|
||||
REG_ADVANCED | REG_ICASE,
|
||||
PG_GET_COLLATION(),
|
||||
0, NULL));
|
||||
}
|
||||
|
||||
@ -509,6 +523,7 @@ texticregexeq(PG_FUNCTION_ARGS)
|
||||
VARDATA_ANY(s),
|
||||
VARSIZE_ANY_EXHDR(s),
|
||||
REG_ADVANCED | REG_ICASE,
|
||||
PG_GET_COLLATION(),
|
||||
0, NULL));
|
||||
}
|
||||
|
||||
@ -522,6 +537,7 @@ texticregexne(PG_FUNCTION_ARGS)
|
||||
VARDATA_ANY(s),
|
||||
VARSIZE_ANY_EXHDR(s),
|
||||
REG_ADVANCED | REG_ICASE,
|
||||
PG_GET_COLLATION(),
|
||||
0, NULL));
|
||||
}
|
||||
|
||||
@ -541,7 +557,7 @@ textregexsubstr(PG_FUNCTION_ARGS)
|
||||
eo;
|
||||
|
||||
/* Compile RE */
|
||||
re = RE_compile_and_cache(p, REG_ADVANCED);
|
||||
re = RE_compile_and_cache(p, REG_ADVANCED, PG_GET_COLLATION());
|
||||
|
||||
/*
|
||||
* We pass two regmatch_t structs to get info about the overall match and
|
||||
@ -597,7 +613,7 @@ textregexreplace_noopt(PG_FUNCTION_ARGS)
|
||||
text *r = PG_GETARG_TEXT_PP(2);
|
||||
regex_t *re;
|
||||
|
||||
re = RE_compile_and_cache(p, REG_ADVANCED);
|
||||
re = RE_compile_and_cache(p, REG_ADVANCED, PG_GET_COLLATION());
|
||||
|
||||
PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, false));
|
||||
}
|
||||
@ -618,7 +634,7 @@ textregexreplace(PG_FUNCTION_ARGS)
|
||||
|
||||
parse_re_flags(&flags, opt);
|
||||
|
||||
re = RE_compile_and_cache(p, flags.cflags);
|
||||
re = RE_compile_and_cache(p, flags.cflags, PG_GET_COLLATION());
|
||||
|
||||
PG_RETURN_TEXT_P(replace_text_regexp(s, (void *) re, r, flags.glob));
|
||||
}
|
||||
@ -781,7 +797,9 @@ regexp_matches(PG_FUNCTION_ARGS)
|
||||
|
||||
/* be sure to copy the input string into the multi-call ctx */
|
||||
matchctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
|
||||
flags, false, true, false);
|
||||
flags,
|
||||
PG_GET_COLLATION(),
|
||||
false, true, false);
|
||||
|
||||
/* Pre-create workspace that build_regexp_matches_result needs */
|
||||
matchctx->elems = (Datum *) palloc(sizeof(Datum) * matchctx->npatterns);
|
||||
@ -830,6 +848,7 @@ regexp_matches_no_flags(PG_FUNCTION_ARGS)
|
||||
*/
|
||||
static regexp_matches_ctx *
|
||||
setup_regexp_matches(text *orig_str, text *pattern, text *flags,
|
||||
Oid collation,
|
||||
bool force_glob, bool use_subpatterns,
|
||||
bool ignore_degenerate)
|
||||
{
|
||||
@ -868,7 +887,7 @@ setup_regexp_matches(text *orig_str, text *pattern, text *flags,
|
||||
}
|
||||
|
||||
/* set up the compiled pattern */
|
||||
cpattern = RE_compile_and_cache(pattern, re_flags.cflags);
|
||||
cpattern = RE_compile_and_cache(pattern, re_flags.cflags, collation);
|
||||
|
||||
/* do we want to remember subpatterns? */
|
||||
if (use_subpatterns && cpattern->re_nsub > 0)
|
||||
@ -1039,7 +1058,9 @@ regexp_split_to_table(PG_FUNCTION_ARGS)
|
||||
|
||||
/* be sure to copy the input string into the multi-call ctx */
|
||||
splitctx = setup_regexp_matches(PG_GETARG_TEXT_P_COPY(0), pattern,
|
||||
flags, true, false, true);
|
||||
flags,
|
||||
PG_GET_COLLATION(),
|
||||
true, false, true);
|
||||
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
funcctx->user_fctx = (void *) splitctx;
|
||||
@ -1083,6 +1104,7 @@ regexp_split_to_array(PG_FUNCTION_ARGS)
|
||||
splitctx = setup_regexp_matches(PG_GETARG_TEXT_PP(0),
|
||||
PG_GETARG_TEXT_PP(1),
|
||||
PG_GETARG_TEXT_PP_IF_EXISTS(2),
|
||||
PG_GET_COLLATION(),
|
||||
true, false, true);
|
||||
|
||||
while (splitctx->next_match <= splitctx->nmatches)
|
||||
|
Reference in New Issue
Block a user