mirror of
https://github.com/postgres/postgres.git
synced 2025-10-24 01:29:19 +03:00
Make collation not depend on setlocale().
Now that the result of pg_newlocale_from_collation() is always non-NULL, then we can move the collate_is_c and ctype_is_c flags into pg_locale_t. That simplifies the logic in lc_collate_is_c() and lc_ctype_is_c(), removing the dependence on setlocale(). This commit also eliminates the multi-stage initialization of the collation cache. As long as we have catalog access, then it's now safe to call pg_newlocale_from_collation() without checking lc_collate_is_c() first. Discussion: https://postgr.es/m/cfd9eb85-c52a-4ec9-a90e-a5e4de56e57d@eisentraut.org Reviewed-by: Peter Eisentraut, Andreas Karlsson
This commit is contained in:
@@ -128,9 +128,6 @@ static bool CurrentLCTimeValid = false;
|
|||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
Oid collid; /* hash key: pg_collation OID */
|
Oid collid; /* hash key: pg_collation OID */
|
||||||
bool collate_is_c; /* is collation's LC_COLLATE C? */
|
|
||||||
bool ctype_is_c; /* is collation's LC_CTYPE C? */
|
|
||||||
bool flags_valid; /* true if above flags are valid */
|
|
||||||
pg_locale_t locale; /* locale_t struct, or 0 if not valid */
|
pg_locale_t locale; /* locale_t struct, or 0 if not valid */
|
||||||
|
|
||||||
/* needed for simplehash */
|
/* needed for simplehash */
|
||||||
@@ -1225,29 +1222,13 @@ IsoLocaleName(const char *winlocname)
|
|||||||
/*
|
/*
|
||||||
* Cache mechanism for collation information.
|
* Cache mechanism for collation information.
|
||||||
*
|
*
|
||||||
* We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
|
|
||||||
* (or POSIX), so we can optimize a few code paths in various places.
|
|
||||||
* For the built-in C and POSIX collations, we can know that without even
|
|
||||||
* doing a cache lookup, but we want to support aliases for C/POSIX too.
|
|
||||||
* For the "default" collation, there are separate static cache variables,
|
|
||||||
* since consulting the pg_collation catalog doesn't tell us what we need.
|
|
||||||
*
|
|
||||||
* Also, if a pg_locale_t has been requested for a collation, we cache that
|
|
||||||
* for the life of a backend.
|
|
||||||
*
|
|
||||||
* Note that some code relies on the flags not reporting false negatives
|
|
||||||
* (that is, saying it's not C when it is). For example, char2wchar()
|
|
||||||
* could fail if the locale is C, so str_tolower() shouldn't call it
|
|
||||||
* in that case.
|
|
||||||
*
|
|
||||||
* Note that we currently lack any way to flush the cache. Since we don't
|
* Note that we currently lack any way to flush the cache. Since we don't
|
||||||
* support ALTER COLLATION, this is OK. The worst case is that someone
|
* support ALTER COLLATION, this is OK. The worst case is that someone
|
||||||
* drops a collation, and a useless cache entry hangs around in existing
|
* drops a collation, and a useless cache entry hangs around in existing
|
||||||
* backends.
|
* backends.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
static collation_cache_entry *
|
static collation_cache_entry *
|
||||||
lookup_collation_cache(Oid collation, bool set_flags)
|
lookup_collation_cache(Oid collation)
|
||||||
{
|
{
|
||||||
collation_cache_entry *cache_entry;
|
collation_cache_entry *cache_entry;
|
||||||
bool found;
|
bool found;
|
||||||
@@ -1271,59 +1252,9 @@ lookup_collation_cache(Oid collation, bool set_flags)
|
|||||||
* Make sure cache entry is marked invalid, in case we fail before
|
* Make sure cache entry is marked invalid, in case we fail before
|
||||||
* setting things.
|
* setting things.
|
||||||
*/
|
*/
|
||||||
cache_entry->flags_valid = false;
|
|
||||||
cache_entry->locale = 0;
|
cache_entry->locale = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (set_flags && !cache_entry->flags_valid)
|
|
||||||
{
|
|
||||||
/* Attempt to set the flags */
|
|
||||||
HeapTuple tp;
|
|
||||||
Form_pg_collation collform;
|
|
||||||
|
|
||||||
tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
|
|
||||||
if (!HeapTupleIsValid(tp))
|
|
||||||
elog(ERROR, "cache lookup failed for collation %u", collation);
|
|
||||||
collform = (Form_pg_collation) GETSTRUCT(tp);
|
|
||||||
|
|
||||||
if (collform->collprovider == COLLPROVIDER_BUILTIN)
|
|
||||||
{
|
|
||||||
Datum datum;
|
|
||||||
const char *colllocale;
|
|
||||||
|
|
||||||
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
|
|
||||||
colllocale = TextDatumGetCString(datum);
|
|
||||||
|
|
||||||
cache_entry->collate_is_c = true;
|
|
||||||
cache_entry->ctype_is_c = (strcmp(colllocale, "C") == 0);
|
|
||||||
}
|
|
||||||
else if (collform->collprovider == COLLPROVIDER_LIBC)
|
|
||||||
{
|
|
||||||
Datum datum;
|
|
||||||
const char *collcollate;
|
|
||||||
const char *collctype;
|
|
||||||
|
|
||||||
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
|
|
||||||
collcollate = TextDatumGetCString(datum);
|
|
||||||
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
|
|
||||||
collctype = TextDatumGetCString(datum);
|
|
||||||
|
|
||||||
cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
|
|
||||||
(strcmp(collcollate, "POSIX") == 0));
|
|
||||||
cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
|
|
||||||
(strcmp(collctype, "POSIX") == 0));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
cache_entry->collate_is_c = false;
|
|
||||||
cache_entry->ctype_is_c = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
cache_entry->flags_valid = true;
|
|
||||||
|
|
||||||
ReleaseSysCache(tp);
|
|
||||||
}
|
|
||||||
|
|
||||||
return cache_entry;
|
return cache_entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1341,47 +1272,6 @@ lc_collate_is_c(Oid collation)
|
|||||||
if (!OidIsValid(collation))
|
if (!OidIsValid(collation))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/*
|
|
||||||
* If we're asked about the default collation, we have to inquire of the C
|
|
||||||
* library. Cache the result so we only have to compute it once.
|
|
||||||
*/
|
|
||||||
if (collation == DEFAULT_COLLATION_OID)
|
|
||||||
{
|
|
||||||
static int result = -1;
|
|
||||||
const char *localeptr;
|
|
||||||
|
|
||||||
if (result >= 0)
|
|
||||||
return (bool) result;
|
|
||||||
|
|
||||||
if (default_locale.provider == COLLPROVIDER_BUILTIN)
|
|
||||||
{
|
|
||||||
result = true;
|
|
||||||
return (bool) result;
|
|
||||||
}
|
|
||||||
else if (default_locale.provider == COLLPROVIDER_ICU)
|
|
||||||
{
|
|
||||||
result = false;
|
|
||||||
return (bool) result;
|
|
||||||
}
|
|
||||||
else if (default_locale.provider == COLLPROVIDER_LIBC)
|
|
||||||
{
|
|
||||||
localeptr = setlocale(LC_CTYPE, NULL);
|
|
||||||
if (!localeptr)
|
|
||||||
elog(ERROR, "invalid LC_CTYPE setting");
|
|
||||||
}
|
|
||||||
else
|
|
||||||
elog(ERROR, "unexpected collation provider '%c'",
|
|
||||||
default_locale.provider);
|
|
||||||
|
|
||||||
if (strcmp(localeptr, "C") == 0)
|
|
||||||
result = true;
|
|
||||||
else if (strcmp(localeptr, "POSIX") == 0)
|
|
||||||
result = true;
|
|
||||||
else
|
|
||||||
result = false;
|
|
||||||
return (bool) result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we're asked about the built-in C/POSIX collations, we know that.
|
* If we're asked about the built-in C/POSIX collations, we know that.
|
||||||
*/
|
*/
|
||||||
@@ -1392,7 +1282,7 @@ lc_collate_is_c(Oid collation)
|
|||||||
/*
|
/*
|
||||||
* Otherwise, we have to consult pg_collation, but we cache that.
|
* Otherwise, we have to consult pg_collation, but we cache that.
|
||||||
*/
|
*/
|
||||||
return (lookup_collation_cache(collation, true))->collate_is_c;
|
return pg_newlocale_from_collation(collation)->collate_is_c;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@@ -1408,46 +1298,6 @@ lc_ctype_is_c(Oid collation)
|
|||||||
if (!OidIsValid(collation))
|
if (!OidIsValid(collation))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/*
|
|
||||||
* If we're asked about the default collation, we have to inquire of the C
|
|
||||||
* library. Cache the result so we only have to compute it once.
|
|
||||||
*/
|
|
||||||
if (collation == DEFAULT_COLLATION_OID)
|
|
||||||
{
|
|
||||||
static int result = -1;
|
|
||||||
const char *localeptr;
|
|
||||||
|
|
||||||
if (result >= 0)
|
|
||||||
return (bool) result;
|
|
||||||
|
|
||||||
if (default_locale.provider == COLLPROVIDER_BUILTIN)
|
|
||||||
{
|
|
||||||
localeptr = default_locale.info.builtin.locale;
|
|
||||||
}
|
|
||||||
else if (default_locale.provider == COLLPROVIDER_ICU)
|
|
||||||
{
|
|
||||||
result = false;
|
|
||||||
return (bool) result;
|
|
||||||
}
|
|
||||||
else if (default_locale.provider == COLLPROVIDER_LIBC)
|
|
||||||
{
|
|
||||||
localeptr = setlocale(LC_CTYPE, NULL);
|
|
||||||
if (!localeptr)
|
|
||||||
elog(ERROR, "invalid LC_CTYPE setting");
|
|
||||||
}
|
|
||||||
else
|
|
||||||
elog(ERROR, "unexpected collation provider '%c'",
|
|
||||||
default_locale.provider);
|
|
||||||
|
|
||||||
if (strcmp(localeptr, "C") == 0)
|
|
||||||
result = true;
|
|
||||||
else if (strcmp(localeptr, "POSIX") == 0)
|
|
||||||
result = true;
|
|
||||||
else
|
|
||||||
result = false;
|
|
||||||
return (bool) result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we're asked about the built-in C/POSIX collations, we know that.
|
* If we're asked about the built-in C/POSIX collations, we know that.
|
||||||
*/
|
*/
|
||||||
@@ -1458,7 +1308,7 @@ lc_ctype_is_c(Oid collation)
|
|||||||
/*
|
/*
|
||||||
* Otherwise, we have to consult pg_collation, but we cache that.
|
* Otherwise, we have to consult pg_collation, but we cache that.
|
||||||
*/
|
*/
|
||||||
return (lookup_collation_cache(collation, true))->ctype_is_c;
|
return pg_newlocale_from_collation(collation)->ctype_is_c;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* simple subroutine for reporting errors from newlocale() */
|
/* simple subroutine for reporting errors from newlocale() */
|
||||||
@@ -1647,6 +1497,9 @@ init_database_collation(void)
|
|||||||
|
|
||||||
builtin_validate_locale(dbform->encoding, datlocale);
|
builtin_validate_locale(dbform->encoding, datlocale);
|
||||||
|
|
||||||
|
default_locale.collate_is_c = true;
|
||||||
|
default_locale.ctype_is_c = (strcmp(datlocale, "C") == 0);
|
||||||
|
|
||||||
default_locale.info.builtin.locale = MemoryContextStrdup(
|
default_locale.info.builtin.locale = MemoryContextStrdup(
|
||||||
TopMemoryContext, datlocale);
|
TopMemoryContext, datlocale);
|
||||||
}
|
}
|
||||||
@@ -1658,6 +1511,9 @@ init_database_collation(void)
|
|||||||
datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
|
datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
|
||||||
datlocale = TextDatumGetCString(datum);
|
datlocale = TextDatumGetCString(datum);
|
||||||
|
|
||||||
|
default_locale.collate_is_c = false;
|
||||||
|
default_locale.ctype_is_c = false;
|
||||||
|
|
||||||
datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull);
|
datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull);
|
||||||
if (!isnull)
|
if (!isnull)
|
||||||
icurules = TextDatumGetCString(datum);
|
icurules = TextDatumGetCString(datum);
|
||||||
@@ -1678,6 +1534,11 @@ init_database_collation(void)
|
|||||||
datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype);
|
datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype);
|
||||||
datctype = TextDatumGetCString(datum);
|
datctype = TextDatumGetCString(datum);
|
||||||
|
|
||||||
|
default_locale.collate_is_c = (strcmp(datcollate, "C") == 0) ||
|
||||||
|
(strcmp(datcollate, "POSIX") == 0);
|
||||||
|
default_locale.ctype_is_c = (strcmp(datctype, "C") == 0) ||
|
||||||
|
(strcmp(datctype, "POSIX") == 0);
|
||||||
|
|
||||||
make_libc_collator(datcollate, datctype, &default_locale);
|
make_libc_collator(datcollate, datctype, &default_locale);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1712,7 +1573,7 @@ pg_newlocale_from_collation(Oid collid)
|
|||||||
if (collid == DEFAULT_COLLATION_OID)
|
if (collid == DEFAULT_COLLATION_OID)
|
||||||
return &default_locale;
|
return &default_locale;
|
||||||
|
|
||||||
cache_entry = lookup_collation_cache(collid, false);
|
cache_entry = lookup_collation_cache(collid);
|
||||||
|
|
||||||
if (cache_entry->locale == 0)
|
if (cache_entry->locale == 0)
|
||||||
{
|
{
|
||||||
@@ -1741,6 +1602,9 @@ pg_newlocale_from_collation(Oid collid)
|
|||||||
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
|
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
|
||||||
locstr = TextDatumGetCString(datum);
|
locstr = TextDatumGetCString(datum);
|
||||||
|
|
||||||
|
result.collate_is_c = true;
|
||||||
|
result.ctype_is_c = (strcmp(locstr, "C") == 0);
|
||||||
|
|
||||||
builtin_validate_locale(GetDatabaseEncoding(), locstr);
|
builtin_validate_locale(GetDatabaseEncoding(), locstr);
|
||||||
|
|
||||||
result.info.builtin.locale = MemoryContextStrdup(TopMemoryContext,
|
result.info.builtin.locale = MemoryContextStrdup(TopMemoryContext,
|
||||||
@@ -1756,6 +1620,11 @@ pg_newlocale_from_collation(Oid collid)
|
|||||||
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
|
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
|
||||||
collctype = TextDatumGetCString(datum);
|
collctype = TextDatumGetCString(datum);
|
||||||
|
|
||||||
|
result.collate_is_c = (strcmp(collcollate, "C") == 0) ||
|
||||||
|
(strcmp(collcollate, "POSIX") == 0);
|
||||||
|
result.ctype_is_c = (strcmp(collctype, "C") == 0) ||
|
||||||
|
(strcmp(collctype, "POSIX") == 0);
|
||||||
|
|
||||||
make_libc_collator(collcollate, collctype, &result);
|
make_libc_collator(collcollate, collctype, &result);
|
||||||
}
|
}
|
||||||
else if (collform->collprovider == COLLPROVIDER_ICU)
|
else if (collform->collprovider == COLLPROVIDER_ICU)
|
||||||
@@ -1766,6 +1635,9 @@ pg_newlocale_from_collation(Oid collid)
|
|||||||
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
|
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
|
||||||
iculocstr = TextDatumGetCString(datum);
|
iculocstr = TextDatumGetCString(datum);
|
||||||
|
|
||||||
|
result.collate_is_c = false;
|
||||||
|
result.ctype_is_c = false;
|
||||||
|
|
||||||
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
|
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
|
||||||
if (!isnull)
|
if (!isnull)
|
||||||
icurules = TextDatumGetCString(datum);
|
icurules = TextDatumGetCString(datum);
|
||||||
|
@@ -69,11 +69,25 @@ extern void cache_locale_time(void);
|
|||||||
/*
|
/*
|
||||||
* We use a discriminated union to hold either a locale_t or an ICU collator.
|
* We use a discriminated union to hold either a locale_t or an ICU collator.
|
||||||
* pg_locale_t is occasionally checked for truth, so make it a pointer.
|
* pg_locale_t is occasionally checked for truth, so make it a pointer.
|
||||||
|
*
|
||||||
|
* Also, hold two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
|
||||||
|
* (or POSIX), so we can optimize a few code paths in various places. For the
|
||||||
|
* built-in C and POSIX collations, we can know that without even doing a
|
||||||
|
* cache lookup, but we want to support aliases for C/POSIX too. For the
|
||||||
|
* "default" collation, there are separate static cache variables, since
|
||||||
|
* consulting the pg_collation catalog doesn't tell us what we need.
|
||||||
|
*
|
||||||
|
* Note that some code relies on the flags not reporting false negatives
|
||||||
|
* (that is, saying it's not C when it is). For example, char2wchar()
|
||||||
|
* could fail if the locale is C, so str_tolower() shouldn't call it
|
||||||
|
* in that case.
|
||||||
*/
|
*/
|
||||||
struct pg_locale_struct
|
struct pg_locale_struct
|
||||||
{
|
{
|
||||||
char provider;
|
char provider;
|
||||||
bool deterministic;
|
bool deterministic;
|
||||||
|
bool collate_is_c;
|
||||||
|
bool ctype_is_c;
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
struct
|
struct
|
||||||
|
@@ -9,6 +9,32 @@ SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
|
|||||||
\endif
|
\endif
|
||||||
SET client_encoding TO UTF8;
|
SET client_encoding TO UTF8;
|
||||||
--
|
--
|
||||||
|
-- Test builtin "C"
|
||||||
|
--
|
||||||
|
CREATE COLLATION regress_builtin_c (
|
||||||
|
provider = builtin, locale = 'C');
|
||||||
|
-- non-ASCII characters are unchanged
|
||||||
|
SELECT LOWER(U&'\00C1' COLLATE regress_builtin_c) = U&'\00C1';
|
||||||
|
?column?
|
||||||
|
----------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT UPPER(U&'\00E1' COLLATE regress_builtin_c) = U&'\00E1';
|
||||||
|
?column?
|
||||||
|
----------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- non-ASCII characters are not alphabetic
|
||||||
|
SELECT U&'\00C1\00E1' !~ '[[:alpha:]]' COLLATE regress_builtin_c;
|
||||||
|
?column?
|
||||||
|
----------
|
||||||
|
t
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
DROP COLLATION regress_builtin_c;
|
||||||
|
--
|
||||||
-- Test PG_C_UTF8
|
-- Test PG_C_UTF8
|
||||||
--
|
--
|
||||||
CREATE COLLATION regress_pg_c_utf8 (
|
CREATE COLLATION regress_pg_c_utf8 (
|
||||||
|
@@ -11,6 +11,21 @@ SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
|
|||||||
|
|
||||||
SET client_encoding TO UTF8;
|
SET client_encoding TO UTF8;
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Test builtin "C"
|
||||||
|
--
|
||||||
|
CREATE COLLATION regress_builtin_c (
|
||||||
|
provider = builtin, locale = 'C');
|
||||||
|
|
||||||
|
-- non-ASCII characters are unchanged
|
||||||
|
SELECT LOWER(U&'\00C1' COLLATE regress_builtin_c) = U&'\00C1';
|
||||||
|
SELECT UPPER(U&'\00E1' COLLATE regress_builtin_c) = U&'\00E1';
|
||||||
|
|
||||||
|
-- non-ASCII characters are not alphabetic
|
||||||
|
SELECT U&'\00C1\00E1' !~ '[[:alpha:]]' COLLATE regress_builtin_c;
|
||||||
|
|
||||||
|
DROP COLLATION regress_builtin_c;
|
||||||
|
|
||||||
--
|
--
|
||||||
-- Test PG_C_UTF8
|
-- Test PG_C_UTF8
|
||||||
--
|
--
|
||||||
|
Reference in New Issue
Block a user