1
0
mirror of https://github.com/postgres/postgres.git synced 2025-04-20 00:42:27 +03:00

Make collation not depend on setlocale().

Now that the result of pg_newlocale_from_collation() is always
non-NULL, then we can move the collate_is_c and ctype_is_c flags into
pg_locale_t. That simplifies the logic in lc_collate_is_c() and
lc_ctype_is_c(), removing the dependence on setlocale().

This commit also eliminates the multi-stage initialization of the
collation cache.

As long as we have catalog access, then it's now safe to call
pg_newlocale_from_collation() without checking lc_collate_is_c()
first.

Discussion: https://postgr.es/m/cfd9eb85-c52a-4ec9-a90e-a5e4de56e57d@eisentraut.org
Reviewed-by: Peter Eisentraut, Andreas Karlsson
This commit is contained in:
Jeff Davis 2024-07-30 00:58:06 -07:00
parent 9b282a9359
commit 72fe6d24a3
4 changed files with 81 additions and 154 deletions

View File

@ -128,9 +128,6 @@ static bool CurrentLCTimeValid = false;
typedef struct
{
Oid collid; /* hash key: pg_collation OID */
bool collate_is_c; /* is collation's LC_COLLATE C? */
bool ctype_is_c; /* is collation's LC_CTYPE C? */
bool flags_valid; /* true if above flags are valid */
pg_locale_t locale; /* locale_t struct, or 0 if not valid */
/* needed for simplehash */
@ -1225,29 +1222,13 @@ IsoLocaleName(const char *winlocname)
/*
* Cache mechanism for collation information.
*
* We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
* (or POSIX), so we can optimize a few code paths in various places.
* For the built-in C and POSIX collations, we can know that without even
* doing a cache lookup, but we want to support aliases for C/POSIX too.
* For the "default" collation, there are separate static cache variables,
* since consulting the pg_collation catalog doesn't tell us what we need.
*
* Also, if a pg_locale_t has been requested for a collation, we cache that
* for the life of a backend.
*
* Note that some code relies on the flags not reporting false negatives
* (that is, saying it's not C when it is). For example, char2wchar()
* could fail if the locale is C, so str_tolower() shouldn't call it
* in that case.
*
* Note that we currently lack any way to flush the cache. Since we don't
* support ALTER COLLATION, this is OK. The worst case is that someone
* drops a collation, and a useless cache entry hangs around in existing
* backends.
*/
static collation_cache_entry *
lookup_collation_cache(Oid collation, bool set_flags)
lookup_collation_cache(Oid collation)
{
collation_cache_entry *cache_entry;
bool found;
@ -1271,59 +1252,9 @@ lookup_collation_cache(Oid collation, bool set_flags)
* Make sure cache entry is marked invalid, in case we fail before
* setting things.
*/
cache_entry->flags_valid = false;
cache_entry->locale = 0;
}
if (set_flags && !cache_entry->flags_valid)
{
/* Attempt to set the flags */
HeapTuple tp;
Form_pg_collation collform;
tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
if (!HeapTupleIsValid(tp))
elog(ERROR, "cache lookup failed for collation %u", collation);
collform = (Form_pg_collation) GETSTRUCT(tp);
if (collform->collprovider == COLLPROVIDER_BUILTIN)
{
Datum datum;
const char *colllocale;
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
colllocale = TextDatumGetCString(datum);
cache_entry->collate_is_c = true;
cache_entry->ctype_is_c = (strcmp(colllocale, "C") == 0);
}
else if (collform->collprovider == COLLPROVIDER_LIBC)
{
Datum datum;
const char *collcollate;
const char *collctype;
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
collcollate = TextDatumGetCString(datum);
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
collctype = TextDatumGetCString(datum);
cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
(strcmp(collcollate, "POSIX") == 0));
cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
(strcmp(collctype, "POSIX") == 0));
}
else
{
cache_entry->collate_is_c = false;
cache_entry->ctype_is_c = false;
}
cache_entry->flags_valid = true;
ReleaseSysCache(tp);
}
return cache_entry;
}
@ -1341,47 +1272,6 @@ lc_collate_is_c(Oid collation)
if (!OidIsValid(collation))
return false;
/*
* If we're asked about the default collation, we have to inquire of the C
* library. Cache the result so we only have to compute it once.
*/
if (collation == DEFAULT_COLLATION_OID)
{
static int result = -1;
const char *localeptr;
if (result >= 0)
return (bool) result;
if (default_locale.provider == COLLPROVIDER_BUILTIN)
{
result = true;
return (bool) result;
}
else if (default_locale.provider == COLLPROVIDER_ICU)
{
result = false;
return (bool) result;
}
else if (default_locale.provider == COLLPROVIDER_LIBC)
{
localeptr = setlocale(LC_CTYPE, NULL);
if (!localeptr)
elog(ERROR, "invalid LC_CTYPE setting");
}
else
elog(ERROR, "unexpected collation provider '%c'",
default_locale.provider);
if (strcmp(localeptr, "C") == 0)
result = true;
else if (strcmp(localeptr, "POSIX") == 0)
result = true;
else
result = false;
return (bool) result;
}
/*
* If we're asked about the built-in C/POSIX collations, we know that.
*/
@ -1392,7 +1282,7 @@ lc_collate_is_c(Oid collation)
/*
* Otherwise, we have to consult pg_collation, but we cache that.
*/
return (lookup_collation_cache(collation, true))->collate_is_c;
return pg_newlocale_from_collation(collation)->collate_is_c;
}
/*
@ -1408,46 +1298,6 @@ lc_ctype_is_c(Oid collation)
if (!OidIsValid(collation))
return false;
/*
* If we're asked about the default collation, we have to inquire of the C
* library. Cache the result so we only have to compute it once.
*/
if (collation == DEFAULT_COLLATION_OID)
{
static int result = -1;
const char *localeptr;
if (result >= 0)
return (bool) result;
if (default_locale.provider == COLLPROVIDER_BUILTIN)
{
localeptr = default_locale.info.builtin.locale;
}
else if (default_locale.provider == COLLPROVIDER_ICU)
{
result = false;
return (bool) result;
}
else if (default_locale.provider == COLLPROVIDER_LIBC)
{
localeptr = setlocale(LC_CTYPE, NULL);
if (!localeptr)
elog(ERROR, "invalid LC_CTYPE setting");
}
else
elog(ERROR, "unexpected collation provider '%c'",
default_locale.provider);
if (strcmp(localeptr, "C") == 0)
result = true;
else if (strcmp(localeptr, "POSIX") == 0)
result = true;
else
result = false;
return (bool) result;
}
/*
* If we're asked about the built-in C/POSIX collations, we know that.
*/
@ -1458,7 +1308,7 @@ lc_ctype_is_c(Oid collation)
/*
* Otherwise, we have to consult pg_collation, but we cache that.
*/
return (lookup_collation_cache(collation, true))->ctype_is_c;
return pg_newlocale_from_collation(collation)->ctype_is_c;
}
/* simple subroutine for reporting errors from newlocale() */
@ -1647,6 +1497,9 @@ init_database_collation(void)
builtin_validate_locale(dbform->encoding, datlocale);
default_locale.collate_is_c = true;
default_locale.ctype_is_c = (strcmp(datlocale, "C") == 0);
default_locale.info.builtin.locale = MemoryContextStrdup(
TopMemoryContext, datlocale);
}
@ -1658,6 +1511,9 @@ init_database_collation(void)
datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
datlocale = TextDatumGetCString(datum);
default_locale.collate_is_c = false;
default_locale.ctype_is_c = false;
datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull);
if (!isnull)
icurules = TextDatumGetCString(datum);
@ -1678,6 +1534,11 @@ init_database_collation(void)
datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype);
datctype = TextDatumGetCString(datum);
default_locale.collate_is_c = (strcmp(datcollate, "C") == 0) ||
(strcmp(datcollate, "POSIX") == 0);
default_locale.ctype_is_c = (strcmp(datctype, "C") == 0) ||
(strcmp(datctype, "POSIX") == 0);
make_libc_collator(datcollate, datctype, &default_locale);
}
@ -1712,7 +1573,7 @@ pg_newlocale_from_collation(Oid collid)
if (collid == DEFAULT_COLLATION_OID)
return &default_locale;
cache_entry = lookup_collation_cache(collid, false);
cache_entry = lookup_collation_cache(collid);
if (cache_entry->locale == 0)
{
@ -1741,6 +1602,9 @@ pg_newlocale_from_collation(Oid collid)
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
locstr = TextDatumGetCString(datum);
result.collate_is_c = true;
result.ctype_is_c = (strcmp(locstr, "C") == 0);
builtin_validate_locale(GetDatabaseEncoding(), locstr);
result.info.builtin.locale = MemoryContextStrdup(TopMemoryContext,
@ -1756,6 +1620,11 @@ pg_newlocale_from_collation(Oid collid)
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
collctype = TextDatumGetCString(datum);
result.collate_is_c = (strcmp(collcollate, "C") == 0) ||
(strcmp(collcollate, "POSIX") == 0);
result.ctype_is_c = (strcmp(collctype, "C") == 0) ||
(strcmp(collctype, "POSIX") == 0);
make_libc_collator(collcollate, collctype, &result);
}
else if (collform->collprovider == COLLPROVIDER_ICU)
@ -1766,6 +1635,9 @@ pg_newlocale_from_collation(Oid collid)
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
iculocstr = TextDatumGetCString(datum);
result.collate_is_c = false;
result.ctype_is_c = false;
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
if (!isnull)
icurules = TextDatumGetCString(datum);

View File

@ -69,11 +69,25 @@ extern void cache_locale_time(void);
/*
* We use a discriminated union to hold either a locale_t or an ICU collator.
* pg_locale_t is occasionally checked for truth, so make it a pointer.
*
* Also, hold two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
* (or POSIX), so we can optimize a few code paths in various places. For the
* built-in C and POSIX collations, we can know that without even doing a
* cache lookup, but we want to support aliases for C/POSIX too. For the
* "default" collation, there are separate static cache variables, since
* consulting the pg_collation catalog doesn't tell us what we need.
*
* Note that some code relies on the flags not reporting false negatives
* (that is, saying it's not C when it is). For example, char2wchar()
* could fail if the locale is C, so str_tolower() shouldn't call it
* in that case.
*/
struct pg_locale_struct
{
char provider;
bool deterministic;
bool collate_is_c;
bool ctype_is_c;
union
{
struct

View File

@ -9,6 +9,32 @@ SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
\endif
SET client_encoding TO UTF8;
--
-- Test builtin "C"
--
CREATE COLLATION regress_builtin_c (
provider = builtin, locale = 'C');
-- non-ASCII characters are unchanged
SELECT LOWER(U&'\00C1' COLLATE regress_builtin_c) = U&'\00C1';
?column?
----------
t
(1 row)
SELECT UPPER(U&'\00E1' COLLATE regress_builtin_c) = U&'\00E1';
?column?
----------
t
(1 row)
-- non-ASCII characters are not alphabetic
SELECT U&'\00C1\00E1' !~ '[[:alpha:]]' COLLATE regress_builtin_c;
?column?
----------
t
(1 row)
DROP COLLATION regress_builtin_c;
--
-- Test PG_C_UTF8
--
CREATE COLLATION regress_pg_c_utf8 (

View File

@ -11,6 +11,21 @@ SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
SET client_encoding TO UTF8;
--
-- Test builtin "C"
--
CREATE COLLATION regress_builtin_c (
provider = builtin, locale = 'C');
-- non-ASCII characters are unchanged
SELECT LOWER(U&'\00C1' COLLATE regress_builtin_c) = U&'\00C1';
SELECT UPPER(U&'\00E1' COLLATE regress_builtin_c) = U&'\00E1';
-- non-ASCII characters are not alphabetic
SELECT U&'\00C1\00E1' !~ '[[:alpha:]]' COLLATE regress_builtin_c;
DROP COLLATION regress_builtin_c;
--
-- Test PG_C_UTF8
--