1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-06 07:49:08 +03:00

Introduce "builtin" collation provider.

New provider for collations, like "libc" or "icu", but without any
external dependency.

Initially, the only locale supported by the builtin provider is "C",
which is identical to the libc provider's "C" locale. The libc
provider's "C" locale has always been treated as a special case that
uses an internal implementation, without using libc at all -- so the
new builtin provider uses the same implementation.

The builtin provider's locale is independent of the server environment
variables LC_COLLATE and LC_CTYPE. Using the builtin provider, the
database collation locale can be "C" while LC_COLLATE and LC_CTYPE are
set to "en_US", which is impossible with the libc provider.

By offering a new builtin provider, it clarifies that the semantics of
a collation using this provider will never depend on libc, and makes
it easier to document the behavior.

Discussion: https://postgr.es/m/ab925f69-5f9d-f85e-b87c-bd2a44798659@joeconway.com
Discussion: https://postgr.es/m/dd9261f4-7a98-4565-93ec-336c1c110d90@manitou-mail.org
Discussion: https://postgr.es/m/ff4c2f2f9c8fc7ca27c1c24ae37ecaeaeaff6b53.camel%40j-davis.com
Reviewed-by: Daniel Vérité, Peter Eisentraut, Jeremy Schneider
This commit is contained in:
Jeff Davis
2024-03-13 23:33:44 -07:00
parent 6ab2e8385d
commit 2d819a08a1
25 changed files with 671 additions and 158 deletions

View File

@@ -1268,7 +1268,18 @@ lookup_collation_cache(Oid collation, bool set_flags)
elog(ERROR, "cache lookup failed for collation %u", collation);
collform = (Form_pg_collation) GETSTRUCT(tp);
if (collform->collprovider == COLLPROVIDER_LIBC)
if (collform->collprovider == COLLPROVIDER_BUILTIN)
{
Datum datum;
const char *colllocale;
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
colllocale = TextDatumGetCString(datum);
cache_entry->collate_is_c = true;
cache_entry->ctype_is_c = (strcmp(colllocale, "C") == 0);
}
else if (collform->collprovider == COLLPROVIDER_LIBC)
{
Datum datum;
const char *collcollate;
@@ -1319,16 +1330,30 @@ lc_collate_is_c(Oid collation)
if (collation == DEFAULT_COLLATION_OID)
{
static int result = -1;
char *localeptr;
if (default_locale.provider == COLLPROVIDER_ICU)
return false;
const char *localeptr;
if (result >= 0)
return (bool) result;
localeptr = setlocale(LC_COLLATE, NULL);
if (!localeptr)
elog(ERROR, "invalid LC_COLLATE setting");
if (default_locale.provider == COLLPROVIDER_BUILTIN)
{
result = true;
return (bool) result;
}
else if (default_locale.provider == COLLPROVIDER_ICU)
{
result = false;
return (bool) result;
}
else if (default_locale.provider == COLLPROVIDER_LIBC)
{
localeptr = setlocale(LC_CTYPE, NULL);
if (!localeptr)
elog(ERROR, "invalid LC_CTYPE setting");
}
else
elog(ERROR, "unexpected collation provider '%c'",
default_locale.provider);
if (strcmp(localeptr, "C") == 0)
result = true;
@@ -1372,16 +1397,29 @@ lc_ctype_is_c(Oid collation)
if (collation == DEFAULT_COLLATION_OID)
{
static int result = -1;
char *localeptr;
if (default_locale.provider == COLLPROVIDER_ICU)
return false;
const char *localeptr;
if (result >= 0)
return (bool) result;
localeptr = setlocale(LC_CTYPE, NULL);
if (!localeptr)
elog(ERROR, "invalid LC_CTYPE setting");
if (default_locale.provider == COLLPROVIDER_BUILTIN)
{
localeptr = default_locale.info.builtin.locale;
}
else if (default_locale.provider == COLLPROVIDER_ICU)
{
result = false;
return (bool) result;
}
else if (default_locale.provider == COLLPROVIDER_LIBC)
{
localeptr = setlocale(LC_CTYPE, NULL);
if (!localeptr)
elog(ERROR, "invalid LC_CTYPE setting");
}
else
elog(ERROR, "unexpected collation provider '%c'",
default_locale.provider);
if (strcmp(localeptr, "C") == 0)
result = true;
@@ -1519,10 +1557,10 @@ pg_newlocale_from_collation(Oid collid)
if (collid == DEFAULT_COLLATION_OID)
{
if (default_locale.provider == COLLPROVIDER_ICU)
return &default_locale;
else
if (default_locale.provider == COLLPROVIDER_LIBC)
return (pg_locale_t) 0;
else
return &default_locale;
}
cache_entry = lookup_collation_cache(collid, false);
@@ -1547,7 +1585,19 @@ pg_newlocale_from_collation(Oid collid)
result.provider = collform->collprovider;
result.deterministic = collform->collisdeterministic;
if (collform->collprovider == COLLPROVIDER_LIBC)
if (collform->collprovider == COLLPROVIDER_BUILTIN)
{
const char *locstr;
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
locstr = TextDatumGetCString(datum);
builtin_validate_locale(GetDatabaseEncoding(), locstr);
result.info.builtin.locale = MemoryContextStrdup(TopMemoryContext,
locstr);
}
else if (collform->collprovider == COLLPROVIDER_LIBC)
{
const char *collcollate;
const char *collctype pg_attribute_unused();
@@ -1626,7 +1676,11 @@ pg_newlocale_from_collation(Oid collid)
collversionstr = TextDatumGetCString(datum);
datum = SysCacheGetAttrNotNull(COLLOID, tp, collform->collprovider == COLLPROVIDER_ICU ? Anum_pg_collation_colllocale : Anum_pg_collation_collcollate);
Assert(collform->collprovider != COLLPROVIDER_BUILTIN);
if (collform->collprovider == COLLPROVIDER_LIBC)
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
else
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
actual_versionstr = get_collation_actual_version(collform->collprovider,
TextDatumGetCString(datum));
@@ -1677,6 +1731,10 @@ get_collation_actual_version(char collprovider, const char *collcollate)
{
char *collversion = NULL;
/* the builtin collation provider is not versioned */
if (collprovider == COLLPROVIDER_BUILTIN)
return NULL;
#ifdef USE_ICU
if (collprovider == COLLPROVIDER_ICU)
{
@@ -2443,6 +2501,31 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
return result;
}
const char *
builtin_validate_locale(int encoding, const char *locale)
{
const char *canonical_name = NULL;
int required_encoding = -1;
if (strcmp(locale, "C") == 0)
canonical_name = "C";
if (!canonical_name)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("invalid locale name \"%s\" for builtin provider",
locale)));
if (required_encoding >= 0 && encoding != required_encoding)
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("encoding \"%s\" does not match locale \"%s\"",
pg_encoding_to_char(encoding), locale)));
return canonical_name;
}
#ifdef USE_ICU
/*