1
0
mirror of https://github.com/postgres/postgres.git synced 2025-12-22 17:42:17 +03:00

Allow tailoring of ICU locales with custom rules

This exposes the ICU facility to add custom collation rules to a
standard collation.

New options are added to CREATE COLLATION, CREATE DATABASE, createdb,
and initdb to set the rules.

Reviewed-by: Laurenz Albe <laurenz.albe@cybertec.at>
Reviewed-by: Daniel Verite <daniel@manitou-mail.org>
Discussion: https://www.postgresql.org/message-id/flat/821c71a4-6ef0-d366-9acf-bb8e367f739f@enterprisedb.com
This commit is contained in:
Peter Eisentraut
2023-03-08 16:35:42 +01:00
parent b1534ed99d
commit 30a53b7929
22 changed files with 380 additions and 59 deletions

View File

@@ -50,6 +50,7 @@ CollationCreate(const char *collname, Oid collnamespace,
int32 collencoding,
const char *collcollate, const char *collctype,
const char *colliculocale,
const char *collicurules,
const char *collversion,
bool if_not_exists,
bool quiet)
@@ -194,6 +195,10 @@ CollationCreate(const char *collname, Oid collnamespace,
values[Anum_pg_collation_colliculocale - 1] = CStringGetTextDatum(colliculocale);
else
nulls[Anum_pg_collation_colliculocale - 1] = true;
if (collicurules)
values[Anum_pg_collation_collicurules - 1] = CStringGetTextDatum(collicurules);
else
nulls[Anum_pg_collation_collicurules - 1] = true;
if (collversion)
values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(collversion);
else

View File

@@ -64,10 +64,12 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
DefElem *lcctypeEl = NULL;
DefElem *providerEl = NULL;
DefElem *deterministicEl = NULL;
DefElem *rulesEl = NULL;
DefElem *versionEl = NULL;
char *collcollate;
char *collctype;
char *colliculocale;
char *collicurules;
bool collisdeterministic;
int collencoding;
char collprovider;
@@ -99,6 +101,8 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
defelp = &providerEl;
else if (strcmp(defel->defname, "deterministic") == 0)
defelp = &deterministicEl;
else if (strcmp(defel->defname, "rules") == 0)
defelp = &rulesEl;
else if (strcmp(defel->defname, "version") == 0)
defelp = &versionEl;
else
@@ -161,6 +165,12 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
else
colliculocale = NULL;
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
if (!isnull)
collicurules = TextDatumGetCString(datum);
else
collicurules = NULL;
ReleaseSysCache(tp);
/*
@@ -182,6 +192,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
collcollate = NULL;
collctype = NULL;
colliculocale = NULL;
collicurules = NULL;
if (providerEl)
collproviderstr = defGetString(providerEl);
@@ -191,6 +202,9 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
else
collisdeterministic = true;
if (rulesEl)
collicurules = defGetString(rulesEl);
if (versionEl)
collversion = defGetString(versionEl);
@@ -297,6 +311,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
collcollate,
collctype,
colliculocale,
collicurules,
collversion,
if_not_exists,
false); /* not quiet */
@@ -680,7 +695,7 @@ create_collation_from_locale(const char *locale, int nspid,
*/
collid = CollationCreate(locale, nspid, GetUserId(),
COLLPROVIDER_LIBC, true, enc,
locale, locale, NULL,
locale, locale, NULL, NULL,
get_collation_actual_version(COLLPROVIDER_LIBC, locale),
true, true);
if (OidIsValid(collid))
@@ -755,7 +770,7 @@ win32_read_locale(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
collid = CollationCreate(alias, param->nspid, GetUserId(),
COLLPROVIDER_LIBC, true, enc,
localebuf, localebuf, NULL,
localebuf, localebuf, NULL, NULL,
get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
true, true);
if (OidIsValid(collid))
@@ -889,7 +904,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
collid = CollationCreate(alias, nspid, GetUserId(),
COLLPROVIDER_LIBC, true, enc,
locale, locale, NULL,
locale, locale, NULL, NULL,
get_collation_actual_version(COLLPROVIDER_LIBC, locale),
true, true);
if (OidIsValid(collid))
@@ -951,7 +966,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
collid = CollationCreate(psprintf("%s-x-icu", langtag),
nspid, GetUserId(),
COLLPROVIDER_ICU, true, -1,
NULL, NULL, iculocstr,
NULL, NULL, iculocstr, NULL,
get_collation_actual_version(COLLPROVIDER_ICU, iculocstr),
true, true);
if (OidIsValid(collid))

View File

@@ -119,6 +119,7 @@ static bool get_db_info(const char *name, LOCKMODE lockmode,
int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP,
TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP,
Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIculocale,
char **dbIcurules,
char *dbLocProvider,
char **dbCollversion);
static void remove_dbtablespaces(Oid db_id);
@@ -675,6 +676,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
char *src_collate = NULL;
char *src_ctype = NULL;
char *src_iculocale = NULL;
char *src_icurules = NULL;
char src_locprovider = '\0';
char *src_collversion = NULL;
bool src_istemplate;
@@ -698,6 +700,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
DefElem *dcollate = NULL;
DefElem *dctype = NULL;
DefElem *diculocale = NULL;
DefElem *dicurules = NULL;
DefElem *dlocprovider = NULL;
DefElem *distemplate = NULL;
DefElem *dallowconnections = NULL;
@@ -710,6 +713,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
char *dbcollate = NULL;
char *dbctype = NULL;
char *dbiculocale = NULL;
char *dbicurules = NULL;
char dblocprovider = '\0';
char *canonname;
int encoding = -1;
@@ -775,6 +779,12 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
errorConflictingDefElem(defel, pstate);
diculocale = defel;
}
else if (strcmp(defel->defname, "icu_rules") == 0)
{
if (dicurules)
errorConflictingDefElem(defel, pstate);
dicurules = defel;
}
else if (strcmp(defel->defname, "locale_provider") == 0)
{
if (dlocprovider)
@@ -893,6 +903,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
dbctype = defGetString(dctype);
if (diculocale && diculocale->arg)
dbiculocale = defGetString(diculocale);
if (dicurules && dicurules->arg)
dbicurules = defGetString(dicurules);
if (dlocprovider && dlocprovider->arg)
{
char *locproviderstr = defGetString(dlocprovider);
@@ -958,7 +970,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
&src_dboid, &src_owner, &src_encoding,
&src_istemplate, &src_allowconn,
&src_frozenxid, &src_minmxid, &src_deftablespace,
&src_collate, &src_ctype, &src_iculocale, &src_locprovider,
&src_collate, &src_ctype, &src_iculocale, &src_icurules, &src_locprovider,
&src_collversion))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_DATABASE),
@@ -1006,6 +1018,8 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
dblocprovider = src_locprovider;
if (dbiculocale == NULL && dblocprovider == COLLPROVIDER_ICU)
dbiculocale = src_iculocale;
if (dbicurules == NULL && dblocprovider == COLLPROVIDER_ICU)
dbicurules = src_icurules;
/* Some encodings are client only */
if (!PG_VALID_BE_ENCODING(encoding))
@@ -1097,6 +1111,9 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
if (dblocprovider == COLLPROVIDER_ICU)
{
char *val1;
char *val2;
Assert(dbiculocale);
Assert(src_iculocale);
if (strcmp(dbiculocale, src_iculocale) != 0)
@@ -1105,6 +1122,19 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
errmsg("new ICU locale (%s) is incompatible with the ICU locale of the template database (%s)",
dbiculocale, src_iculocale),
errhint("Use the same ICU locale as in the template database, or use template0 as template.")));
val1 = dbicurules;
if (!val1)
val1 = "";
val2 = src_icurules;
if (!val2)
val2 = "";
if (strcmp(val1, val2) != 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("new ICU collation rules (%s) are incompatible with the ICU collation rules of the template database (%s)",
val1, val2),
errhint("Use the same ICU collation rules as in the template database, or use template0 as template.")));
}
}
@@ -1313,6 +1343,10 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt)
new_record[Anum_pg_database_daticulocale - 1] = CStringGetTextDatum(dbiculocale);
else
new_record_nulls[Anum_pg_database_daticulocale - 1] = true;
if (dbicurules)
new_record[Anum_pg_database_daticurules - 1] = CStringGetTextDatum(dbicurules);
else
new_record_nulls[Anum_pg_database_daticurules - 1] = true;
if (dbcollversion)
new_record[Anum_pg_database_datcollversion - 1] = CStringGetTextDatum(dbcollversion);
else
@@ -1526,7 +1560,7 @@ dropdb(const char *dbname, bool missing_ok, bool force)
pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock);
if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL,
&db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
&db_istemplate, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
{
if (!missing_ok)
{
@@ -1726,7 +1760,7 @@ RenameDatabase(const char *oldname, const char *newname)
rel = table_open(DatabaseRelationId, RowExclusiveLock);
if (!get_db_info(oldname, AccessExclusiveLock, &db_id, NULL, NULL,
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_DATABASE),
errmsg("database \"%s\" does not exist", oldname)));
@@ -1836,7 +1870,7 @@ movedb(const char *dbname, const char *tblspcname)
pgdbrel = table_open(DatabaseRelationId, RowExclusiveLock);
if (!get_db_info(dbname, AccessExclusiveLock, &db_id, NULL, NULL,
NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL, NULL, NULL, NULL))
NULL, NULL, NULL, NULL, &src_tblspcoid, NULL, NULL, NULL, NULL, NULL, NULL))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_DATABASE),
errmsg("database \"%s\" does not exist", dbname)));
@@ -2599,6 +2633,7 @@ get_db_info(const char *name, LOCKMODE lockmode,
int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP,
TransactionId *dbFrozenXidP, MultiXactId *dbMinMultiP,
Oid *dbTablespace, char **dbCollate, char **dbCtype, char **dbIculocale,
char **dbIcurules,
char *dbLocProvider,
char **dbCollversion)
{
@@ -2715,6 +2750,14 @@ get_db_info(const char *name, LOCKMODE lockmode,
else
*dbIculocale = TextDatumGetCString(datum);
}
if (dbIcurules)
{
datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_daticurules, &isnull);
if (isnull)
*dbIcurules = NULL;
else
*dbIcurules = TextDatumGetCString(datum);
}
if (dbCollversion)
{
datum = SysCacheGetAttr(DATABASEOID, tuple, Anum_pg_database_datcollversion, &isnull);

View File

@@ -69,6 +69,7 @@
#ifdef USE_ICU
#include <unicode/ucnv.h>
#include <unicode/ustring.h>
#endif
#ifdef __GLIBC__
@@ -1421,6 +1422,7 @@ struct pg_locale_struct default_locale;
void
make_icu_collator(const char *iculocstr,
const char *icurules,
struct pg_locale_struct *resultp)
{
#ifdef USE_ICU
@@ -1437,6 +1439,35 @@ make_icu_collator(const char *iculocstr,
if (U_ICU_VERSION_MAJOR_NUM < 54)
icu_set_collation_attributes(collator, iculocstr);
/*
* If rules are specified, we extract the rules of the standard collation,
* add our own rules, and make a new collator with the combined rules.
*/
if (icurules)
{
const UChar *default_rules;
UChar *agg_rules;
UChar *my_rules;
int32_t length;
default_rules = ucol_getRules(collator, &length);
icu_to_uchar(&my_rules, icurules, strlen(icurules));
agg_rules = palloc_array(UChar, u_strlen(default_rules) + u_strlen(my_rules) + 1);
u_strcpy(agg_rules, default_rules);
u_strcat(agg_rules, my_rules);
ucol_close(collator);
status = U_ZERO_ERROR;
collator = ucol_openRules(agg_rules, u_strlen(agg_rules),
UCOL_DEFAULT, UCOL_DEFAULT_STRENGTH, NULL, &status);
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("could not open collator for locale \"%s\" with rules \"%s\": %s",
iculocstr, icurules, u_errorName(status))));
}
/* We will leak this string if the caller errors later :-( */
resultp->info.icu.locale = MemoryContextStrdup(TopMemoryContext, iculocstr);
resultp->info.icu.ucol = collator;
@@ -1608,11 +1639,19 @@ pg_newlocale_from_collation(Oid collid)
else if (collform->collprovider == COLLPROVIDER_ICU)
{
const char *iculocstr;
const char *icurules;
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_colliculocale, &isnull);
Assert(!isnull);
iculocstr = TextDatumGetCString(datum);
make_icu_collator(iculocstr, &result);
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
if (!isnull)
icurules = TextDatumGetCString(datum);
else
icurules = NULL;
make_icu_collator(iculocstr, icurules, &result);
}
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,

View File

@@ -421,10 +421,19 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect
if (dbform->datlocprovider == COLLPROVIDER_ICU)
{
char *icurules;
datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticulocale, &isnull);
Assert(!isnull);
iculocale = TextDatumGetCString(datum);
make_icu_collator(iculocale, &default_locale);
datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull);
if (!isnull)
icurules = TextDatumGetCString(datum);
else
icurules = NULL;
make_icu_collator(iculocale, icurules, &default_locale);
}
else
iculocale = NULL;