mirror of
https://github.com/postgres/postgres.git
synced 2025-04-20 00:42:27 +03:00
Make collation not depend on setlocale().
Now that the result of pg_newlocale_from_collation() is always non-NULL, then we can move the collate_is_c and ctype_is_c flags into pg_locale_t. That simplifies the logic in lc_collate_is_c() and lc_ctype_is_c(), removing the dependence on setlocale(). This commit also eliminates the multi-stage initialization of the collation cache. As long as we have catalog access, then it's now safe to call pg_newlocale_from_collation() without checking lc_collate_is_c() first. Discussion: https://postgr.es/m/cfd9eb85-c52a-4ec9-a90e-a5e4de56e57d@eisentraut.org Reviewed-by: Peter Eisentraut, Andreas Karlsson
This commit is contained in:
parent
9b282a9359
commit
72fe6d24a3
@ -128,9 +128,6 @@ static bool CurrentLCTimeValid = false;
|
||||
typedef struct
|
||||
{
|
||||
Oid collid; /* hash key: pg_collation OID */
|
||||
bool collate_is_c; /* is collation's LC_COLLATE C? */
|
||||
bool ctype_is_c; /* is collation's LC_CTYPE C? */
|
||||
bool flags_valid; /* true if above flags are valid */
|
||||
pg_locale_t locale; /* locale_t struct, or 0 if not valid */
|
||||
|
||||
/* needed for simplehash */
|
||||
@ -1225,29 +1222,13 @@ IsoLocaleName(const char *winlocname)
|
||||
/*
|
||||
* Cache mechanism for collation information.
|
||||
*
|
||||
* We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
|
||||
* (or POSIX), so we can optimize a few code paths in various places.
|
||||
* For the built-in C and POSIX collations, we can know that without even
|
||||
* doing a cache lookup, but we want to support aliases for C/POSIX too.
|
||||
* For the "default" collation, there are separate static cache variables,
|
||||
* since consulting the pg_collation catalog doesn't tell us what we need.
|
||||
*
|
||||
* Also, if a pg_locale_t has been requested for a collation, we cache that
|
||||
* for the life of a backend.
|
||||
*
|
||||
* Note that some code relies on the flags not reporting false negatives
|
||||
* (that is, saying it's not C when it is). For example, char2wchar()
|
||||
* could fail if the locale is C, so str_tolower() shouldn't call it
|
||||
* in that case.
|
||||
*
|
||||
* Note that we currently lack any way to flush the cache. Since we don't
|
||||
* support ALTER COLLATION, this is OK. The worst case is that someone
|
||||
* drops a collation, and a useless cache entry hangs around in existing
|
||||
* backends.
|
||||
*/
|
||||
|
||||
static collation_cache_entry *
|
||||
lookup_collation_cache(Oid collation, bool set_flags)
|
||||
lookup_collation_cache(Oid collation)
|
||||
{
|
||||
collation_cache_entry *cache_entry;
|
||||
bool found;
|
||||
@ -1271,59 +1252,9 @@ lookup_collation_cache(Oid collation, bool set_flags)
|
||||
* Make sure cache entry is marked invalid, in case we fail before
|
||||
* setting things.
|
||||
*/
|
||||
cache_entry->flags_valid = false;
|
||||
cache_entry->locale = 0;
|
||||
}
|
||||
|
||||
if (set_flags && !cache_entry->flags_valid)
|
||||
{
|
||||
/* Attempt to set the flags */
|
||||
HeapTuple tp;
|
||||
Form_pg_collation collform;
|
||||
|
||||
tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
|
||||
if (!HeapTupleIsValid(tp))
|
||||
elog(ERROR, "cache lookup failed for collation %u", collation);
|
||||
collform = (Form_pg_collation) GETSTRUCT(tp);
|
||||
|
||||
if (collform->collprovider == COLLPROVIDER_BUILTIN)
|
||||
{
|
||||
Datum datum;
|
||||
const char *colllocale;
|
||||
|
||||
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
|
||||
colllocale = TextDatumGetCString(datum);
|
||||
|
||||
cache_entry->collate_is_c = true;
|
||||
cache_entry->ctype_is_c = (strcmp(colllocale, "C") == 0);
|
||||
}
|
||||
else if (collform->collprovider == COLLPROVIDER_LIBC)
|
||||
{
|
||||
Datum datum;
|
||||
const char *collcollate;
|
||||
const char *collctype;
|
||||
|
||||
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
|
||||
collcollate = TextDatumGetCString(datum);
|
||||
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
|
||||
collctype = TextDatumGetCString(datum);
|
||||
|
||||
cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
|
||||
(strcmp(collcollate, "POSIX") == 0));
|
||||
cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
|
||||
(strcmp(collctype, "POSIX") == 0));
|
||||
}
|
||||
else
|
||||
{
|
||||
cache_entry->collate_is_c = false;
|
||||
cache_entry->ctype_is_c = false;
|
||||
}
|
||||
|
||||
cache_entry->flags_valid = true;
|
||||
|
||||
ReleaseSysCache(tp);
|
||||
}
|
||||
|
||||
return cache_entry;
|
||||
}
|
||||
|
||||
@ -1341,47 +1272,6 @@ lc_collate_is_c(Oid collation)
|
||||
if (!OidIsValid(collation))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If we're asked about the default collation, we have to inquire of the C
|
||||
* library. Cache the result so we only have to compute it once.
|
||||
*/
|
||||
if (collation == DEFAULT_COLLATION_OID)
|
||||
{
|
||||
static int result = -1;
|
||||
const char *localeptr;
|
||||
|
||||
if (result >= 0)
|
||||
return (bool) result;
|
||||
|
||||
if (default_locale.provider == COLLPROVIDER_BUILTIN)
|
||||
{
|
||||
result = true;
|
||||
return (bool) result;
|
||||
}
|
||||
else if (default_locale.provider == COLLPROVIDER_ICU)
|
||||
{
|
||||
result = false;
|
||||
return (bool) result;
|
||||
}
|
||||
else if (default_locale.provider == COLLPROVIDER_LIBC)
|
||||
{
|
||||
localeptr = setlocale(LC_CTYPE, NULL);
|
||||
if (!localeptr)
|
||||
elog(ERROR, "invalid LC_CTYPE setting");
|
||||
}
|
||||
else
|
||||
elog(ERROR, "unexpected collation provider '%c'",
|
||||
default_locale.provider);
|
||||
|
||||
if (strcmp(localeptr, "C") == 0)
|
||||
result = true;
|
||||
else if (strcmp(localeptr, "POSIX") == 0)
|
||||
result = true;
|
||||
else
|
||||
result = false;
|
||||
return (bool) result;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're asked about the built-in C/POSIX collations, we know that.
|
||||
*/
|
||||
@ -1392,7 +1282,7 @@ lc_collate_is_c(Oid collation)
|
||||
/*
|
||||
* Otherwise, we have to consult pg_collation, but we cache that.
|
||||
*/
|
||||
return (lookup_collation_cache(collation, true))->collate_is_c;
|
||||
return pg_newlocale_from_collation(collation)->collate_is_c;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1408,46 +1298,6 @@ lc_ctype_is_c(Oid collation)
|
||||
if (!OidIsValid(collation))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* If we're asked about the default collation, we have to inquire of the C
|
||||
* library. Cache the result so we only have to compute it once.
|
||||
*/
|
||||
if (collation == DEFAULT_COLLATION_OID)
|
||||
{
|
||||
static int result = -1;
|
||||
const char *localeptr;
|
||||
|
||||
if (result >= 0)
|
||||
return (bool) result;
|
||||
|
||||
if (default_locale.provider == COLLPROVIDER_BUILTIN)
|
||||
{
|
||||
localeptr = default_locale.info.builtin.locale;
|
||||
}
|
||||
else if (default_locale.provider == COLLPROVIDER_ICU)
|
||||
{
|
||||
result = false;
|
||||
return (bool) result;
|
||||
}
|
||||
else if (default_locale.provider == COLLPROVIDER_LIBC)
|
||||
{
|
||||
localeptr = setlocale(LC_CTYPE, NULL);
|
||||
if (!localeptr)
|
||||
elog(ERROR, "invalid LC_CTYPE setting");
|
||||
}
|
||||
else
|
||||
elog(ERROR, "unexpected collation provider '%c'",
|
||||
default_locale.provider);
|
||||
|
||||
if (strcmp(localeptr, "C") == 0)
|
||||
result = true;
|
||||
else if (strcmp(localeptr, "POSIX") == 0)
|
||||
result = true;
|
||||
else
|
||||
result = false;
|
||||
return (bool) result;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we're asked about the built-in C/POSIX collations, we know that.
|
||||
*/
|
||||
@ -1458,7 +1308,7 @@ lc_ctype_is_c(Oid collation)
|
||||
/*
|
||||
* Otherwise, we have to consult pg_collation, but we cache that.
|
||||
*/
|
||||
return (lookup_collation_cache(collation, true))->ctype_is_c;
|
||||
return pg_newlocale_from_collation(collation)->ctype_is_c;
|
||||
}
|
||||
|
||||
/* simple subroutine for reporting errors from newlocale() */
|
||||
@ -1647,6 +1497,9 @@ init_database_collation(void)
|
||||
|
||||
builtin_validate_locale(dbform->encoding, datlocale);
|
||||
|
||||
default_locale.collate_is_c = true;
|
||||
default_locale.ctype_is_c = (strcmp(datlocale, "C") == 0);
|
||||
|
||||
default_locale.info.builtin.locale = MemoryContextStrdup(
|
||||
TopMemoryContext, datlocale);
|
||||
}
|
||||
@ -1658,6 +1511,9 @@ init_database_collation(void)
|
||||
datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
|
||||
datlocale = TextDatumGetCString(datum);
|
||||
|
||||
default_locale.collate_is_c = false;
|
||||
default_locale.ctype_is_c = false;
|
||||
|
||||
datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull);
|
||||
if (!isnull)
|
||||
icurules = TextDatumGetCString(datum);
|
||||
@ -1678,6 +1534,11 @@ init_database_collation(void)
|
||||
datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype);
|
||||
datctype = TextDatumGetCString(datum);
|
||||
|
||||
default_locale.collate_is_c = (strcmp(datcollate, "C") == 0) ||
|
||||
(strcmp(datcollate, "POSIX") == 0);
|
||||
default_locale.ctype_is_c = (strcmp(datctype, "C") == 0) ||
|
||||
(strcmp(datctype, "POSIX") == 0);
|
||||
|
||||
make_libc_collator(datcollate, datctype, &default_locale);
|
||||
}
|
||||
|
||||
@ -1712,7 +1573,7 @@ pg_newlocale_from_collation(Oid collid)
|
||||
if (collid == DEFAULT_COLLATION_OID)
|
||||
return &default_locale;
|
||||
|
||||
cache_entry = lookup_collation_cache(collid, false);
|
||||
cache_entry = lookup_collation_cache(collid);
|
||||
|
||||
if (cache_entry->locale == 0)
|
||||
{
|
||||
@ -1741,6 +1602,9 @@ pg_newlocale_from_collation(Oid collid)
|
||||
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
|
||||
locstr = TextDatumGetCString(datum);
|
||||
|
||||
result.collate_is_c = true;
|
||||
result.ctype_is_c = (strcmp(locstr, "C") == 0);
|
||||
|
||||
builtin_validate_locale(GetDatabaseEncoding(), locstr);
|
||||
|
||||
result.info.builtin.locale = MemoryContextStrdup(TopMemoryContext,
|
||||
@ -1756,6 +1620,11 @@ pg_newlocale_from_collation(Oid collid)
|
||||
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
|
||||
collctype = TextDatumGetCString(datum);
|
||||
|
||||
result.collate_is_c = (strcmp(collcollate, "C") == 0) ||
|
||||
(strcmp(collcollate, "POSIX") == 0);
|
||||
result.ctype_is_c = (strcmp(collctype, "C") == 0) ||
|
||||
(strcmp(collctype, "POSIX") == 0);
|
||||
|
||||
make_libc_collator(collcollate, collctype, &result);
|
||||
}
|
||||
else if (collform->collprovider == COLLPROVIDER_ICU)
|
||||
@ -1766,6 +1635,9 @@ pg_newlocale_from_collation(Oid collid)
|
||||
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
|
||||
iculocstr = TextDatumGetCString(datum);
|
||||
|
||||
result.collate_is_c = false;
|
||||
result.ctype_is_c = false;
|
||||
|
||||
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
|
||||
if (!isnull)
|
||||
icurules = TextDatumGetCString(datum);
|
||||
|
@ -69,11 +69,25 @@ extern void cache_locale_time(void);
|
||||
/*
|
||||
* We use a discriminated union to hold either a locale_t or an ICU collator.
|
||||
* pg_locale_t is occasionally checked for truth, so make it a pointer.
|
||||
*
|
||||
* Also, hold two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
|
||||
* (or POSIX), so we can optimize a few code paths in various places. For the
|
||||
* built-in C and POSIX collations, we can know that without even doing a
|
||||
* cache lookup, but we want to support aliases for C/POSIX too. For the
|
||||
* "default" collation, there are separate static cache variables, since
|
||||
* consulting the pg_collation catalog doesn't tell us what we need.
|
||||
*
|
||||
* Note that some code relies on the flags not reporting false negatives
|
||||
* (that is, saying it's not C when it is). For example, char2wchar()
|
||||
* could fail if the locale is C, so str_tolower() shouldn't call it
|
||||
* in that case.
|
||||
*/
|
||||
struct pg_locale_struct
|
||||
{
|
||||
char provider;
|
||||
bool deterministic;
|
||||
bool collate_is_c;
|
||||
bool ctype_is_c;
|
||||
union
|
||||
{
|
||||
struct
|
||||
|
@ -9,6 +9,32 @@ SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
|
||||
\endif
|
||||
SET client_encoding TO UTF8;
|
||||
--
|
||||
-- Test builtin "C"
|
||||
--
|
||||
CREATE COLLATION regress_builtin_c (
|
||||
provider = builtin, locale = 'C');
|
||||
-- non-ASCII characters are unchanged
|
||||
SELECT LOWER(U&'\00C1' COLLATE regress_builtin_c) = U&'\00C1';
|
||||
?column?
|
||||
----------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
SELECT UPPER(U&'\00E1' COLLATE regress_builtin_c) = U&'\00E1';
|
||||
?column?
|
||||
----------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
-- non-ASCII characters are not alphabetic
|
||||
SELECT U&'\00C1\00E1' !~ '[[:alpha:]]' COLLATE regress_builtin_c;
|
||||
?column?
|
||||
----------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
DROP COLLATION regress_builtin_c;
|
||||
--
|
||||
-- Test PG_C_UTF8
|
||||
--
|
||||
CREATE COLLATION regress_pg_c_utf8 (
|
||||
|
@ -11,6 +11,21 @@ SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
|
||||
|
||||
SET client_encoding TO UTF8;
|
||||
|
||||
--
|
||||
-- Test builtin "C"
|
||||
--
|
||||
CREATE COLLATION regress_builtin_c (
|
||||
provider = builtin, locale = 'C');
|
||||
|
||||
-- non-ASCII characters are unchanged
|
||||
SELECT LOWER(U&'\00C1' COLLATE regress_builtin_c) = U&'\00C1';
|
||||
SELECT UPPER(U&'\00E1' COLLATE regress_builtin_c) = U&'\00E1';
|
||||
|
||||
-- non-ASCII characters are not alphabetic
|
||||
SELECT U&'\00C1\00E1' !~ '[[:alpha:]]' COLLATE regress_builtin_c;
|
||||
|
||||
DROP COLLATION regress_builtin_c;
|
||||
|
||||
--
|
||||
-- Test PG_C_UTF8
|
||||
--
|
||||
|
Loading…
x
Reference in New Issue
Block a user