1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-19 13:42:17 +03:00

Control ctype behavior internally with a method table.

Previously, pattern matching and case mapping behavior branched based
on the provider. Refactor to use a method table, which is less
error-prone.

This is also a step toward multiple provider versions, which we may
want to support in the future.

Reviewed-by: Andreas Karlsson <andreas@proxel.se>
Reviewed-by: Peter Eisentraut <peter@eisentraut.org>
Discussion: https://postgr.es/m/2830211e1b6e6a2e26d845780b03e125281ea17b.camel%40j-davis.com
This commit is contained in:
Jeff Davis
2025-07-01 07:42:39 -07:00
parent d81dcc8d62
commit 5a38104b36
9 changed files with 686 additions and 508 deletions

View File

@@ -79,31 +79,6 @@ extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
extern char *get_collation_actual_version_libc(const char *collcollate);
extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strfold_builtin(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strlower_icu(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strupper_icu(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strfold_icu(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
/* GUC settings */
char *locale_messages;
char *locale_monetary;
@@ -1092,6 +1067,9 @@ create_pg_locale(Oid collid, MemoryContext context)
Assert((result->collate_is_c && result->collate == NULL) ||
(!result->collate_is_c && result->collate != NULL));
Assert((result->ctype_is_c && result->ctype == NULL) ||
(!result->ctype_is_c && result->ctype != NULL));
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
&isnull);
if (!isnull)
@@ -1256,77 +1234,31 @@ size_t
pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
if (locale->provider == COLLPROVIDER_BUILTIN)
return strlower_builtin(dst, dstsize, src, srclen, locale);
#ifdef USE_ICU
else if (locale->provider == COLLPROVIDER_ICU)
return strlower_icu(dst, dstsize, src, srclen, locale);
#endif
else if (locale->provider == COLLPROVIDER_LIBC)
return strlower_libc(dst, dstsize, src, srclen, locale);
else
/* shouldn't happen */
PGLOCALE_SUPPORT_ERROR(locale->provider);
return 0; /* keep compiler quiet */
return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
}
size_t
pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
if (locale->provider == COLLPROVIDER_BUILTIN)
return strtitle_builtin(dst, dstsize, src, srclen, locale);
#ifdef USE_ICU
else if (locale->provider == COLLPROVIDER_ICU)
return strtitle_icu(dst, dstsize, src, srclen, locale);
#endif
else if (locale->provider == COLLPROVIDER_LIBC)
return strtitle_libc(dst, dstsize, src, srclen, locale);
else
/* shouldn't happen */
PGLOCALE_SUPPORT_ERROR(locale->provider);
return 0; /* keep compiler quiet */
return locale->ctype->strtitle(dst, dstsize, src, srclen, locale);
}
size_t
pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
if (locale->provider == COLLPROVIDER_BUILTIN)
return strupper_builtin(dst, dstsize, src, srclen, locale);
#ifdef USE_ICU
else if (locale->provider == COLLPROVIDER_ICU)
return strupper_icu(dst, dstsize, src, srclen, locale);
#endif
else if (locale->provider == COLLPROVIDER_LIBC)
return strupper_libc(dst, dstsize, src, srclen, locale);
else
/* shouldn't happen */
PGLOCALE_SUPPORT_ERROR(locale->provider);
return 0; /* keep compiler quiet */
return locale->ctype->strupper(dst, dstsize, src, srclen, locale);
}
size_t
pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
if (locale->provider == COLLPROVIDER_BUILTIN)
return strfold_builtin(dst, dstsize, src, srclen, locale);
#ifdef USE_ICU
else if (locale->provider == COLLPROVIDER_ICU)
return strfold_icu(dst, dstsize, src, srclen, locale);
#endif
/* for libc, just use strlower */
else if (locale->provider == COLLPROVIDER_LIBC)
return strlower_libc(dst, dstsize, src, srclen, locale);
if (locale->ctype->strfold)
return locale->ctype->strfold(dst, dstsize, src, srclen, locale);
else
/* shouldn't happen */
PGLOCALE_SUPPORT_ERROR(locale->provider);
return 0; /* keep compiler quiet */
return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
}
/*
@@ -1463,6 +1395,41 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
return locale->collate->strnxfrm_prefix(dest, destsize, src, srclen, locale);
}
/*
* char_is_cased()
*
* Fuzzy test of whether the given char is case-varying or not. The argument
* is a single byte, so in a multibyte encoding, just assume any non-ASCII
* char is case-varying.
*/
bool
char_is_cased(char ch, pg_locale_t locale)
{
return locale->ctype->char_is_cased(ch, locale);
}
/*
* char_tolower_enabled()
*
* Does the provider support char_tolower()?
*/
bool
char_tolower_enabled(pg_locale_t locale)
{
return (locale->ctype->char_tolower != NULL);
}
/*
* char_tolower()
*
* Convert char (single-byte encoding) to lowercase.
*/
char
char_tolower(unsigned char ch, pg_locale_t locale)
{
return locale->ctype->char_tolower(ch, locale);
}
/*
* Return required encoding ID for the given locale, or -1 if any encoding is
* valid for the locale.