mirror of
https://github.com/postgres/postgres.git
synced 2025-06-30 21:42:05 +03:00
Add SQL function CASEFOLD().
Useful for caseless matching. Similar to LOWER(), but avoids edge-case problems with using LOWER() for caseless matching. For collations that support it, CASEFOLD() handles characters with more than two case variations or multi-character case variations. Some characters may fold to uppercase. The results of case folding are also more stable across Unicode versions than LOWER() or UPPER(). Discussion: https://postgr.es/m/a1886ddfcd8f60cb3e905c93009b646b4cfb74c5.camel%40j-davis.com Reviewed-by: Ian Lawrence Barwick
This commit is contained in:
@ -1819,6 +1819,75 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* collation-aware, wide-character-aware case folding
|
||||
*
|
||||
* We pass the number of bytes so we can pass varlena and char*
|
||||
* to this function. The result is a palloc'd, null-terminated string.
|
||||
*/
|
||||
char *
|
||||
str_casefold(const char *buff, size_t nbytes, Oid collid)
|
||||
{
|
||||
char *result;
|
||||
pg_locale_t mylocale;
|
||||
|
||||
if (!buff)
|
||||
return NULL;
|
||||
|
||||
if (!OidIsValid(collid))
|
||||
{
|
||||
/*
|
||||
* This typically means that the parser could not resolve a conflict
|
||||
* of implicit collations, so report it that way.
|
||||
*/
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INDETERMINATE_COLLATION),
|
||||
errmsg("could not determine which collation to use for %s function",
|
||||
"lower()"),
|
||||
errhint("Use the COLLATE clause to set the collation explicitly.")));
|
||||
}
|
||||
|
||||
if (GetDatabaseEncoding() != PG_UTF8)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("Unicode case folding can only be performed if server encoding is UTF8")));
|
||||
|
||||
mylocale = pg_newlocale_from_collation(collid);
|
||||
|
||||
/* C/POSIX collations use this path regardless of database encoding */
|
||||
if (mylocale->ctype_is_c)
|
||||
{
|
||||
result = asc_tolower(buff, nbytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
const char *src = buff;
|
||||
size_t srclen = nbytes;
|
||||
size_t dstsize;
|
||||
char *dst;
|
||||
size_t needed;
|
||||
|
||||
/* first try buffer of equal size plus terminating NUL */
|
||||
dstsize = srclen + 1;
|
||||
dst = palloc(dstsize);
|
||||
|
||||
needed = pg_strfold(dst, dstsize, src, srclen, mylocale);
|
||||
if (needed + 1 > dstsize)
|
||||
{
|
||||
/* grow buffer if needed and retry */
|
||||
dstsize = needed + 1;
|
||||
dst = repalloc(dst, dstsize);
|
||||
needed = pg_strfold(dst, dstsize, src, srclen, mylocale);
|
||||
Assert(needed + 1 <= dstsize);
|
||||
}
|
||||
|
||||
Assert(dst[needed] == '\0');
|
||||
result = dst;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* ASCII-only lower function
|
||||
*
|
||||
|
Reference in New Issue
Block a user