1
0
mirror of https://github.com/postgres/postgres.git synced 2025-12-18 05:01:01 +03:00

Avoid global LC_CTYPE dependency in pg_locale_icu.c.

ICU still depends on libc for compatibility with certain historical
behavior for single-byte encodings. Make the dependency explicit by
holding a locale_t object when required.

We should consider a better solution in the future, such as decoding
the text to UTF-32 and using u_tolower(). That would be a behavior
change and require additional infrastructure though; so for now, just
avoid the global LC_CTYPE dependency.

Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://postgr.es/m/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com
This commit is contained in:
Jeff Davis
2025-12-16 15:32:57 -08:00
parent 87b2968df0
commit 0a90df58cf
2 changed files with 44 additions and 4 deletions

View File

@@ -244,6 +244,29 @@ static const struct ctype_methods ctype_methods_icu = {
.wc_toupper = toupper_icu, .wc_toupper = toupper_icu,
.wc_tolower = tolower_icu, .wc_tolower = tolower_icu,
}; };
/*
* ICU still depends on libc for compatibility with certain historical
* behavior for single-byte encodings. See downcase_ident_icu().
*
* XXX: consider fixing by decoding the single byte into a code point, and
* using u_tolower().
*/
static locale_t
make_libc_ctype_locale(const char *ctype)
{
locale_t loc;
#ifndef WIN32
loc = newlocale(LC_CTYPE_MASK, ctype, NULL);
#else
loc = _create_locale(LC_ALL, ctype);
#endif
if (!loc)
report_newlocale_failure(ctype);
return loc;
}
#endif #endif
pg_locale_t pg_locale_t
@@ -254,6 +277,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
const char *iculocstr; const char *iculocstr;
const char *icurules = NULL; const char *icurules = NULL;
UCollator *collator; UCollator *collator;
locale_t loc = (locale_t) 0;
pg_locale_t result; pg_locale_t result;
if (collid == DEFAULT_COLLATION_OID) if (collid == DEFAULT_COLLATION_OID)
@@ -276,6 +300,18 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
if (!isnull) if (!isnull)
icurules = TextDatumGetCString(datum); icurules = TextDatumGetCString(datum);
/* libc only needed for default locale and single-byte encoding */
if (pg_database_encoding_max_length() == 1)
{
const char *ctype;
datum = SysCacheGetAttrNotNull(DATABASEOID, tp,
Anum_pg_database_datctype);
ctype = TextDatumGetCString(datum);
loc = make_libc_ctype_locale(ctype);
}
ReleaseSysCache(tp); ReleaseSysCache(tp);
} }
else else
@@ -306,6 +342,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct)); result = MemoryContextAllocZero(context, sizeof(struct pg_locale_struct));
result->icu.locale = MemoryContextStrdup(context, iculocstr); result->icu.locale = MemoryContextStrdup(context, iculocstr);
result->icu.ucol = collator; result->icu.ucol = collator;
result->icu.lt = loc;
result->deterministic = deterministic; result->deterministic = deterministic;
result->collate_is_c = false; result->collate_is_c = false;
result->ctype_is_c = false; result->ctype_is_c = false;
@@ -578,17 +615,19 @@ downcase_ident_icu(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale) ssize_t srclen, pg_locale_t locale)
{ {
int i; int i;
bool enc_is_single_byte; bool libc_lower;
locale_t lt = locale->icu.lt;
libc_lower = lt && (pg_database_encoding_max_length() == 1);
enc_is_single_byte = pg_database_encoding_max_length() == 1;
for (i = 0; i < srclen && i < dstsize; i++) for (i = 0; i < srclen && i < dstsize; i++)
{ {
unsigned char ch = (unsigned char) src[i]; unsigned char ch = (unsigned char) src[i];
if (ch >= 'A' && ch <= 'Z') if (ch >= 'A' && ch <= 'Z')
ch = pg_ascii_tolower(ch); ch = pg_ascii_tolower(ch);
else if (enc_is_single_byte && IS_HIGHBIT_SET(ch) && isupper(ch)) else if (libc_lower && IS_HIGHBIT_SET(ch) && isupper_l(ch, lt))
ch = tolower(ch); ch = tolower_l(ch, lt);
dst[i] = (char) ch; dst[i] = (char) ch;
} }

View File

@@ -167,6 +167,7 @@ struct pg_locale_struct
{ {
const char *locale; const char *locale;
UCollator *ucol; UCollator *ucol;
locale_t lt;
} icu; } icu;
#endif #endif
}; };