1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-06 07:49:08 +03:00

Control collation behavior with a method table.

Previously, behavior branched based on the provider. A method table is
less error-prone and more flexible.

The ctype behavior will be addressed in an upcoming commit.

Reviewed-by: Andreas Karlsson
Discussion: https://postgr.es/m/2830211e1b6e6a2e26d845780b03e125281ea17b.camel%40j-davis.com
This commit is contained in:
Jeff Davis
2025-01-08 14:26:33 -08:00
parent 4f5cef2607
commit a2f17f004d
4 changed files with 180 additions and 176 deletions

View File

@@ -58,13 +58,14 @@ extern size_t strupper_icu(char *dst, size_t dstsize, const char *src,
#ifdef USE_ICU
extern UCollator *pg_ucol_open(const char *loc_str);
extern int strncoll_icu(const char *arg1, ssize_t len1,
static int strncoll_icu(const char *arg1, ssize_t len1,
const char *arg2, ssize_t len2,
pg_locale_t locale);
extern size_t strnxfrm_icu(char *dest, size_t destsize,
static size_t strnxfrm_icu(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
extern size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
static size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
extern char *get_collation_actual_version_icu(const char *collcollate);
@@ -83,12 +84,20 @@ static UConverter *icu_converter = NULL;
static UCollator *make_icu_collator(const char *iculocstr,
const char *icurules);
static int strncoll_icu_no_utf8(const char *arg1, ssize_t len1,
const char *arg2, ssize_t len2,
pg_locale_t locale);
static size_t strnxfrm_prefix_icu_no_utf8(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
static int strncoll_icu(const char *arg1, ssize_t len1,
const char *arg2, ssize_t len2,
pg_locale_t locale);
static size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
#ifdef HAVE_UCOL_STRCOLLUTF8
static int strncoll_icu_utf8(const char *arg1, ssize_t len1,
const char *arg2, ssize_t len2,
pg_locale_t locale);
#endif
static size_t strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
static void init_icu_converter(void);
static size_t uchar_length(UConverter *converter,
const char *str, int32_t len);
@@ -108,6 +117,25 @@ static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode);
static const struct collate_methods collate_methods_icu = {
.strncoll = strncoll_icu,
.strnxfrm = strnxfrm_icu,
.strnxfrm_prefix = strnxfrm_prefix_icu,
.strxfrm_is_safe = true,
};
static const struct collate_methods collate_methods_icu_utf8 = {
#ifdef HAVE_UCOL_STRCOLLUTF8
.strncoll = strncoll_icu_utf8,
#else
.strncoll = strncoll_icu,
#endif
.strnxfrm = strnxfrm_icu,
.strnxfrm_prefix = strnxfrm_prefix_icu_utf8,
.strxfrm_is_safe = true,
};
#endif
pg_locale_t
@@ -174,6 +202,10 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
result->deterministic = deterministic;
result->collate_is_c = false;
result->ctype_is_c = false;
if (GetDatabaseEncoding() == PG_UTF8)
result->collate = &collate_methods_icu_utf8;
else
result->collate = &collate_methods_icu;
return result;
#else
@@ -408,42 +440,36 @@ strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
}
/*
* strncoll_icu
* strncoll_icu_utf8
*
* Call ucol_strcollUTF8() or ucol_strcoll() as appropriate for the given
* database encoding. An argument length of -1 means the string is
* NUL-terminated.
*/
#ifdef HAVE_UCOL_STRCOLLUTF8
int
strncoll_icu(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
pg_locale_t locale)
strncoll_icu_utf8(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
pg_locale_t locale)
{
int result;
UErrorCode status;
Assert(locale->provider == COLLPROVIDER_ICU);
#ifdef HAVE_UCOL_STRCOLLUTF8
if (GetDatabaseEncoding() == PG_UTF8)
{
UErrorCode status;
Assert(GetDatabaseEncoding() == PG_UTF8);
status = U_ZERO_ERROR;
result = ucol_strcollUTF8(locale->info.icu.ucol,
arg1, len1,
arg2, len2,
&status);
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("collation failed: %s", u_errorName(status))));
}
else
#endif
{
result = strncoll_icu_no_utf8(arg1, len1, arg2, len2, locale);
}
status = U_ZERO_ERROR;
result = ucol_strcollUTF8(locale->info.icu.ucol,
arg1, len1,
arg2, len2,
&status);
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("collation failed: %s", u_errorName(status))));
return result;
}
#endif
/* 'srclen' of -1 means the strings are NUL-terminated */
size_t
@@ -494,37 +520,32 @@ strnxfrm_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
/* 'srclen' of -1 means the strings are NUL-terminated */
size_t
strnxfrm_prefix_icu(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale)
strnxfrm_prefix_icu_utf8(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale)
{
size_t result;
UCharIterator iter;
uint32_t state[2];
UErrorCode status;
Assert(locale->provider == COLLPROVIDER_ICU);
if (GetDatabaseEncoding() == PG_UTF8)
{
UCharIterator iter;
uint32_t state[2];
UErrorCode status;
Assert(GetDatabaseEncoding() == PG_UTF8);
uiter_setUTF8(&iter, src, srclen);
state[0] = state[1] = 0; /* won't need that again */
status = U_ZERO_ERROR;
result = ucol_nextSortKeyPart(locale->info.icu.ucol,
&iter,
state,
(uint8_t *) dest,
destsize,
&status);
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("sort key generation failed: %s",
u_errorName(status))));
}
else
result = strnxfrm_prefix_icu_no_utf8(dest, destsize, src, srclen,
locale);
uiter_setUTF8(&iter, src, srclen);
state[0] = state[1] = 0; /* won't need that again */
status = U_ZERO_ERROR;
result = ucol_nextSortKeyPart(locale->info.icu.ucol,
&iter,
state,
(uint8_t *) dest,
destsize,
&status);
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("sort key generation failed: %s",
u_errorName(status))));
return result;
}
@@ -653,7 +674,7 @@ u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
}
/*
* strncoll_icu_no_utf8
* strncoll_icu
*
* Convert the arguments from the database encoding to UChar strings, then
* call ucol_strcoll(). An argument length of -1 means that the string is
@@ -663,8 +684,8 @@ u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
* caller should call that instead.
*/
static int
strncoll_icu_no_utf8(const char *arg1, ssize_t len1,
const char *arg2, ssize_t len2, pg_locale_t locale)
strncoll_icu(const char *arg1, ssize_t len1,
const char *arg2, ssize_t len2, pg_locale_t locale)
{
char sbuf[TEXTBUFLEN];
char *buf = sbuf;
@@ -677,6 +698,8 @@ strncoll_icu_no_utf8(const char *arg1, ssize_t len1,
int result;
Assert(locale->provider == COLLPROVIDER_ICU);
/* if encoding is UTF8, use more efficient strncoll_icu_utf8 */
#ifdef HAVE_UCOL_STRCOLLUTF8
Assert(GetDatabaseEncoding() != PG_UTF8);
#endif
@@ -710,9 +733,9 @@ strncoll_icu_no_utf8(const char *arg1, ssize_t len1,
/* 'srclen' of -1 means the strings are NUL-terminated */
static size_t
strnxfrm_prefix_icu_no_utf8(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale)
strnxfrm_prefix_icu(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale)
{
char sbuf[TEXTBUFLEN];
char *buf = sbuf;
@@ -725,6 +748,8 @@ strnxfrm_prefix_icu_no_utf8(char *dest, size_t destsize,
Size result_bsize;
Assert(locale->provider == COLLPROVIDER_ICU);
/* if encoding is UTF8, use more efficient strnxfrm_prefix_icu_utf8 */
Assert(GetDatabaseEncoding() != PG_UTF8);
init_icu_converter();