mirror of
https://github.com/postgres/postgres.git
synced 2025-07-15 19:21:59 +03:00
ICU support
Add a column collprovider to pg_collation that determines which library provides the collation data. The existing choices are default and libc, and this adds an icu choice, which uses the ICU4C library. The pg_locale_t type is changed to a union that contains the provider-specific locale handles. Users of locale information are changed to look into that struct for the appropriate handle to use. Also add a collversion column that records the version of the collation when it is created, and check at run time whether it is still the same. This detects potentially incompatible library upgrades that can corrupt indexes and other structures. This is currently only supported by ICU-provided collations. initdb initializes the default collation set as before from the `locale -a` output but also adds all available ICU locales with a "-x-icu" appended. Currently, ICU-provided collations can only be explicitly named collations. The global database locales are still always libc-provided. ICU support is enabled by configure --with-icu. Reviewed-by: Thomas Munro <thomas.munro@enterprisedb.com> Reviewed-by: Andreas Karlsson <andreas@proxel.se>
This commit is contained in:
@ -14,15 +14,18 @@
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/heapam.h"
|
||||
#include "access/htup_details.h"
|
||||
#include "access/xact.h"
|
||||
#include "catalog/dependency.h"
|
||||
#include "catalog/indexing.h"
|
||||
#include "catalog/namespace.h"
|
||||
#include "catalog/objectaccess.h"
|
||||
#include "catalog/pg_collation.h"
|
||||
#include "catalog/pg_collation_fn.h"
|
||||
#include "commands/alter.h"
|
||||
#include "commands/collationcmds.h"
|
||||
#include "commands/comment.h"
|
||||
#include "commands/dbcommands.h"
|
||||
#include "commands/defrem.h"
|
||||
#include "mb/pg_wchar.h"
|
||||
@ -33,6 +36,7 @@
|
||||
#include "utils/rel.h"
|
||||
#include "utils/syscache.h"
|
||||
|
||||
|
||||
/*
|
||||
* CREATE COLLATION
|
||||
*/
|
||||
@ -47,8 +51,14 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
|
||||
DefElem *localeEl = NULL;
|
||||
DefElem *lccollateEl = NULL;
|
||||
DefElem *lcctypeEl = NULL;
|
||||
DefElem *providerEl = NULL;
|
||||
DefElem *versionEl = NULL;
|
||||
char *collcollate = NULL;
|
||||
char *collctype = NULL;
|
||||
char *collproviderstr = NULL;
|
||||
int collencoding;
|
||||
char collprovider = 0;
|
||||
char *collversion = NULL;
|
||||
Oid newoid;
|
||||
ObjectAddress address;
|
||||
|
||||
@ -72,6 +82,10 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
|
||||
defelp = &lccollateEl;
|
||||
else if (pg_strcasecmp(defel->defname, "lc_ctype") == 0)
|
||||
defelp = &lcctypeEl;
|
||||
else if (pg_strcasecmp(defel->defname, "provider") == 0)
|
||||
defelp = &providerEl;
|
||||
else if (pg_strcasecmp(defel->defname, "version") == 0)
|
||||
defelp = &versionEl;
|
||||
else
|
||||
{
|
||||
ereport(ERROR,
|
||||
@ -103,6 +117,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
|
||||
|
||||
collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate));
|
||||
collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype));
|
||||
collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
|
||||
|
||||
ReleaseSysCache(tp);
|
||||
}
|
||||
@ -119,6 +134,27 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
|
||||
if (lcctypeEl)
|
||||
collctype = defGetString(lcctypeEl);
|
||||
|
||||
if (providerEl)
|
||||
collproviderstr = defGetString(providerEl);
|
||||
|
||||
if (versionEl)
|
||||
collversion = defGetString(versionEl);
|
||||
|
||||
if (collproviderstr)
|
||||
{
|
||||
if (pg_strcasecmp(collproviderstr, "icu") == 0)
|
||||
collprovider = COLLPROVIDER_ICU;
|
||||
else if (pg_strcasecmp(collproviderstr, "libc") == 0)
|
||||
collprovider = COLLPROVIDER_LIBC;
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
||||
errmsg("unrecognized collation provider: %s",
|
||||
collproviderstr)));
|
||||
}
|
||||
else if (!fromEl)
|
||||
collprovider = COLLPROVIDER_LIBC;
|
||||
|
||||
if (!collcollate)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
||||
@ -129,14 +165,25 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
|
||||
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
|
||||
errmsg("parameter \"lc_ctype\" must be specified")));
|
||||
|
||||
check_encoding_locale_matches(GetDatabaseEncoding(), collcollate, collctype);
|
||||
if (collprovider == COLLPROVIDER_ICU)
|
||||
collencoding = -1;
|
||||
else
|
||||
{
|
||||
collencoding = GetDatabaseEncoding();
|
||||
check_encoding_locale_matches(collencoding, collcollate, collctype);
|
||||
}
|
||||
|
||||
if (!collversion)
|
||||
collversion = get_collation_actual_version(collprovider, collcollate);
|
||||
|
||||
newoid = CollationCreate(collName,
|
||||
collNamespace,
|
||||
GetUserId(),
|
||||
GetDatabaseEncoding(),
|
||||
collprovider,
|
||||
collencoding,
|
||||
collcollate,
|
||||
collctype,
|
||||
collversion,
|
||||
if_not_exists);
|
||||
|
||||
if (!OidIsValid(newoid))
|
||||
@ -182,16 +229,118 @@ IsThereCollationInNamespace(const char *collname, Oid nspOid)
|
||||
collname, get_namespace_name(nspOid))));
|
||||
}
|
||||
|
||||
/*
|
||||
* ALTER COLLATION
|
||||
*/
|
||||
ObjectAddress
|
||||
AlterCollation(AlterCollationStmt *stmt)
|
||||
{
|
||||
Relation rel;
|
||||
Oid collOid;
|
||||
HeapTuple tup;
|
||||
Form_pg_collation collForm;
|
||||
Datum collversion;
|
||||
bool isnull;
|
||||
char *oldversion;
|
||||
char *newversion;
|
||||
ObjectAddress address;
|
||||
|
||||
rel = heap_open(CollationRelationId, RowExclusiveLock);
|
||||
collOid = get_collation_oid(stmt->collname, false);
|
||||
|
||||
if (!pg_collation_ownercheck(collOid, GetUserId()))
|
||||
aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_COLLATION,
|
||||
NameListToString(stmt->collname));
|
||||
|
||||
tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid));
|
||||
if (!HeapTupleIsValid(tup))
|
||||
elog(ERROR, "cache lookup failed for collation %u", collOid);
|
||||
|
||||
collForm = (Form_pg_collation) GETSTRUCT(tup);
|
||||
collversion = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion,
|
||||
&isnull);
|
||||
oldversion = isnull ? NULL : TextDatumGetCString(collversion);
|
||||
|
||||
newversion = get_collation_actual_version(collForm->collprovider, NameStr(collForm->collcollate));
|
||||
|
||||
/* cannot change from NULL to non-NULL or vice versa */
|
||||
if ((!oldversion && newversion) || (oldversion && !newversion))
|
||||
elog(ERROR, "invalid collation version change");
|
||||
else if (oldversion && newversion && strcmp(newversion, oldversion) != 0)
|
||||
{
|
||||
bool nulls[Natts_pg_collation];
|
||||
bool replaces[Natts_pg_collation];
|
||||
Datum values[Natts_pg_collation];
|
||||
|
||||
ereport(NOTICE,
|
||||
(errmsg("changing version from %s to %s",
|
||||
oldversion, newversion)));
|
||||
|
||||
memset(values, 0, sizeof(values));
|
||||
memset(nulls, false, sizeof(nulls));
|
||||
memset(replaces, false, sizeof(replaces));
|
||||
|
||||
values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion);
|
||||
replaces[Anum_pg_collation_collversion - 1] = true;
|
||||
|
||||
tup = heap_modify_tuple(tup, RelationGetDescr(rel),
|
||||
values, nulls, replaces);
|
||||
}
|
||||
else
|
||||
ereport(NOTICE,
|
||||
(errmsg("version has not changed")));
|
||||
|
||||
CatalogTupleUpdate(rel, &tup->t_self, tup);
|
||||
|
||||
InvokeObjectPostAlterHook(CollationRelationId, collOid, 0);
|
||||
|
||||
ObjectAddressSet(address, CollationRelationId, collOid);
|
||||
|
||||
heap_freetuple(tup);
|
||||
heap_close(rel, NoLock);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
pg_collation_actual_version(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid collid = PG_GETARG_OID(0);
|
||||
HeapTuple tp;
|
||||
char *collcollate;
|
||||
char collprovider;
|
||||
char *version;
|
||||
|
||||
tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
|
||||
if (!HeapTupleIsValid(tp))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_UNDEFINED_OBJECT),
|
||||
errmsg("collation with OID %u does not exist", collid)));
|
||||
|
||||
collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate));
|
||||
collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
|
||||
|
||||
ReleaseSysCache(tp);
|
||||
|
||||
version = get_collation_actual_version(collprovider, collcollate);
|
||||
|
||||
if (version)
|
||||
PG_RETURN_TEXT_P(cstring_to_text(version));
|
||||
else
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* "Normalize" a locale name, stripping off encoding tags such as
|
||||
* "Normalize" a libc locale name, stripping off encoding tags such as
|
||||
* ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
|
||||
* -> "br_FR@euro"). Return true if a new, different name was
|
||||
* generated.
|
||||
*/
|
||||
pg_attribute_unused()
|
||||
static bool
|
||||
normalize_locale_name(char *new, const char *old)
|
||||
normalize_libc_locale_name(char *new, const char *old)
|
||||
{
|
||||
char *n = new;
|
||||
const char *o = old;
|
||||
@ -219,6 +368,46 @@ normalize_locale_name(char *new, const char *old)
|
||||
}
|
||||
|
||||
|
||||
#ifdef USE_ICU
|
||||
static char *
|
||||
get_icu_language_tag(const char *localename)
|
||||
{
|
||||
char buf[ULOC_FULLNAME_CAPACITY];
|
||||
UErrorCode status;
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
uloc_toLanguageTag(localename, buf, sizeof(buf), TRUE, &status);
|
||||
if (U_FAILURE(status))
|
||||
ereport(ERROR,
|
||||
(errmsg("could not convert locale name \"%s\" to language tag: %s",
|
||||
localename, u_errorName(status))));
|
||||
|
||||
return pstrdup(buf);
|
||||
}
|
||||
|
||||
|
||||
static char *
|
||||
get_icu_locale_comment(const char *localename)
|
||||
{
|
||||
UErrorCode status;
|
||||
UChar displayname[128];
|
||||
int32 len_uchar;
|
||||
char *result;
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
len_uchar = uloc_getDisplayName(localename, "en", &displayname[0], sizeof(displayname), &status);
|
||||
if (U_FAILURE(status))
|
||||
ereport(ERROR,
|
||||
(errmsg("could get display name for locale \"%s\": %s",
|
||||
localename, u_errorName(status))));
|
||||
|
||||
icu_from_uchar(&result, displayname, len_uchar);
|
||||
|
||||
return result;
|
||||
}
|
||||
#endif /* USE_ICU */
|
||||
|
||||
|
||||
Datum
|
||||
pg_import_system_collations(PG_FUNCTION_ARGS)
|
||||
{
|
||||
@ -302,8 +491,10 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
|
||||
|
||||
count++;
|
||||
|
||||
CollationCreate(localebuf, nspid, GetUserId(), enc,
|
||||
localebuf, localebuf, if_not_exists);
|
||||
CollationCreate(localebuf, nspid, GetUserId(), COLLPROVIDER_LIBC, enc,
|
||||
localebuf, localebuf,
|
||||
get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
|
||||
if_not_exists);
|
||||
|
||||
CommandCounterIncrement();
|
||||
|
||||
@ -316,7 +507,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
|
||||
* "locale -a" output. So save up the aliases and try to add them
|
||||
* after we've read all the output.
|
||||
*/
|
||||
if (normalize_locale_name(alias, localebuf))
|
||||
if (normalize_libc_locale_name(alias, localebuf))
|
||||
{
|
||||
aliaslist = lappend(aliaslist, pstrdup(alias));
|
||||
localelist = lappend(localelist, pstrdup(localebuf));
|
||||
@ -333,8 +524,10 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
|
||||
char *locale = (char *) lfirst(lcl);
|
||||
int enc = lfirst_int(lce);
|
||||
|
||||
CollationCreate(alias, nspid, GetUserId(), enc,
|
||||
locale, locale, true);
|
||||
CollationCreate(alias, nspid, GetUserId(), COLLPROVIDER_LIBC, enc,
|
||||
locale, locale,
|
||||
get_collation_actual_version(COLLPROVIDER_LIBC, locale),
|
||||
true);
|
||||
CommandCounterIncrement();
|
||||
}
|
||||
|
||||
@ -343,5 +536,82 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
|
||||
(errmsg("no usable system locales were found")));
|
||||
#endif /* not HAVE_LOCALE_T && not WIN32 */
|
||||
|
||||
#ifdef USE_ICU
|
||||
if (!is_encoding_supported_by_icu(GetDatabaseEncoding()))
|
||||
{
|
||||
ereport(NOTICE,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("encoding \"%s\" not supported by ICU",
|
||||
pg_encoding_to_char(GetDatabaseEncoding()))));
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
|
||||
/*
|
||||
* Start the loop at -1 to sneak in the root locale without too much
|
||||
* code duplication.
|
||||
*/
|
||||
for (i = -1; i < ucol_countAvailable(); i++)
|
||||
{
|
||||
const char *name;
|
||||
char *langtag;
|
||||
const char *collcollate;
|
||||
UEnumeration *en;
|
||||
UErrorCode status;
|
||||
const char *val;
|
||||
Oid collid;
|
||||
|
||||
if (i == -1)
|
||||
name = ""; /* ICU root locale */
|
||||
else
|
||||
name = ucol_getAvailable(i);
|
||||
|
||||
langtag = get_icu_language_tag(name);
|
||||
collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name;
|
||||
collid = CollationCreate(psprintf("%s-x-icu", langtag),
|
||||
nspid, GetUserId(), COLLPROVIDER_ICU, -1,
|
||||
collcollate, collcollate,
|
||||
get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
|
||||
if_not_exists);
|
||||
|
||||
CreateComments(collid, CollationRelationId, 0,
|
||||
get_icu_locale_comment(name));
|
||||
|
||||
/*
|
||||
* Add keyword variants
|
||||
*/
|
||||
status = U_ZERO_ERROR;
|
||||
en = ucol_getKeywordValuesForLocale("collation", name, TRUE, &status);
|
||||
if (U_FAILURE(status))
|
||||
ereport(ERROR,
|
||||
(errmsg("could not get keyword values for locale \"%s\": %s",
|
||||
name, u_errorName(status))));
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
uenum_reset(en, &status);
|
||||
while ((val = uenum_next(en, NULL, &status)))
|
||||
{
|
||||
char *localeid = psprintf("%s@collation=%s", name, val);
|
||||
|
||||
langtag = get_icu_language_tag(localeid);
|
||||
collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : localeid;
|
||||
collid = CollationCreate(psprintf("%s-x-icu", langtag),
|
||||
nspid, GetUserId(), COLLPROVIDER_ICU, -1,
|
||||
collcollate, collcollate,
|
||||
get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
|
||||
if_not_exists);
|
||||
CreateComments(collid, CollationRelationId, 0,
|
||||
get_icu_locale_comment(localeid));
|
||||
}
|
||||
if (U_FAILURE(status))
|
||||
ereport(ERROR,
|
||||
(errmsg("could not get keyword values for locale \"%s\": %s",
|
||||
name, u_errorName(status))));
|
||||
uenum_close(en);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
PG_RETURN_VOID();
|
||||
}
|
||||
|
Reference in New Issue
Block a user