mirror of
https://github.com/postgres/postgres.git
synced 2025-05-29 16:21:20 +03:00
Don't install ICU collation keyword variants
Users can still create them themselves. Instead, document Unicode TR 35 collation options for ICU, so users can create all this themselves. Reviewed-by: Peter Geoghegan <pg@bowt.ie>
This commit is contained in:
parent
a79fb8e0c4
commit
958ffb8c28
@ -664,13 +664,6 @@ SELECT a COLLATE "C" < b COLLATE "POSIX" FROM test1;
|
|||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
|
||||||
<term><literal>de-u-co-phonebk-x-icu</literal></term>
|
|
||||||
<listitem>
|
|
||||||
<para>German collation, phone book variant</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><literal>de-AT-x-icu</literal></term>
|
<term><literal>de-AT-x-icu</literal></term>
|
||||||
<listitem>
|
<listitem>
|
||||||
@ -683,13 +676,6 @@ SELECT a COLLATE "C" < b COLLATE "POSIX" FROM test1;
|
|||||||
</listitem>
|
</listitem>
|
||||||
</varlistentry>
|
</varlistentry>
|
||||||
|
|
||||||
<varlistentry>
|
|
||||||
<term><literal>de-AT-u-co-phonebk-x-icu</literal></term>
|
|
||||||
<listitem>
|
|
||||||
<para>German collation for Austria, phone book variant</para>
|
|
||||||
</listitem>
|
|
||||||
</varlistentry>
|
|
||||||
|
|
||||||
<varlistentry>
|
<varlistentry>
|
||||||
<term><literal>und-x-icu</literal> (for <quote>undefined</quote>)</term>
|
<term><literal>und-x-icu</literal> (for <quote>undefined</quote>)</term>
|
||||||
<listitem>
|
<listitem>
|
||||||
@ -709,6 +695,90 @@ SELECT a COLLATE "C" < b COLLATE "POSIX" FROM test1;
|
|||||||
will draw an error along the lines of <quote>collation "de-x-icu" for
|
will draw an error along the lines of <quote>collation "de-x-icu" for
|
||||||
encoding "WIN874" does not exist</>.
|
encoding "WIN874" does not exist</>.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
ICU allows collations to be customized beyond the basic language+country
|
||||||
|
set that is preloaded by <command>initdb</command>. Users are encouraged
|
||||||
|
to define their own collation objects that make use of these facilities to
|
||||||
|
suit the sorting behavior to their requirements. Here are some examples:
|
||||||
|
|
||||||
|
<variablelist>
|
||||||
|
<varlistentry>
|
||||||
|
<term><literal>CREATE COLLATION "de-u-co-phonebk-x-icu" (provider = icu, locale = 'de-u-co-phonebk')</literal></term>
|
||||||
|
<listitem>
|
||||||
|
<para>German collation with phone book collation type</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><literal>CREATE COLLATION "und-u-co-emoji-x-icu" (provider = icu, locale = 'und-u-co-emoji')</literal></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Root collation with Emoji collation type, per Unicode Technical Standard #51
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><literal>CREATE COLLATION digitslast (provider = icu, locale = 'en-u-kr-latn-digit')</literal></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Sort digits after Latin letters. (The default is digits before letters.)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><literal>CREATE COLLATION upperfirst (provider = icu, locale = 'en-u-kf-upper')</literal></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Sort upper-case letters before lower-case letters. (The default is
|
||||||
|
lower-case letters first.)
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><literal>CREATE COLLATION special (provider = icu, locale = 'en-u-kf-upper-kr-latn-digit')</literal></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Combines both of the above options.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
|
||||||
|
<varlistentry>
|
||||||
|
<term><literal>CREATE COLLATION numeric (provider = icu, locale = 'en-u-kn-true')</literal></term>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
Numeric ordering, sorts sequences of digits by their numeric value,
|
||||||
|
for example: <literal>A-21</literal> < <literal>A-123</literal>
|
||||||
|
(also known as natural sort).
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
|
</varlistentry>
|
||||||
|
</variablelist>
|
||||||
|
|
||||||
|
See <ulink url="http://unicode.org/reports/tr35/tr35-collation.html">Unicode
|
||||||
|
Technical Standard #35</ulink>
|
||||||
|
and <ulink url="https://tools.ietf.org/html/bcp47">BCP 47</ulink> for
|
||||||
|
details. The list of possible collation types (<literal>co</literal>
|
||||||
|
subtag) can be found in
|
||||||
|
the <ulink url="http://www.unicode.org/repos/cldr/trunk/common/bcp47/collation.xml">CLDR
|
||||||
|
repository</ulink>.
|
||||||
|
The <ulink url="https://ssl.icu-project.org/icu-bin/locexp">ICU Locale
|
||||||
|
Explorer</ulink> can be used to check the details of a particular locale
|
||||||
|
definition.
|
||||||
|
</para>
|
||||||
|
|
||||||
|
<para>
|
||||||
|
Note that while this system allows creating collations that <quote>ignore
|
||||||
|
case</quote> or <quote>ignore accents</quote> or similar (using
|
||||||
|
the <literal>ks</literal> key), PostgreSQL does not at the moment allow
|
||||||
|
such collations to act in a truly case- or accent-insensitive manner. Any
|
||||||
|
strings that compare equal according to the collation but are not
|
||||||
|
byte-wise equal will be sorted according to their byte values.
|
||||||
|
</para>
|
||||||
</sect4>
|
</sect4>
|
||||||
</sect3>
|
</sect3>
|
||||||
|
|
||||||
|
@ -687,30 +687,11 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
|
|||||||
*/
|
*/
|
||||||
for (i = -1; i < uloc_countAvailable(); i++)
|
for (i = -1; i < uloc_countAvailable(); i++)
|
||||||
{
|
{
|
||||||
/*
|
|
||||||
* In ICU 4.2, ucol_getKeywordValuesForLocale() sometimes returns
|
|
||||||
* values that will not be accepted by uloc_toLanguageTag(). Skip
|
|
||||||
* loading keyword variants in that version. (Both
|
|
||||||
* ucol_getKeywordValuesForLocale() and uloc_toLanguageTag() are
|
|
||||||
* new in ICU 4.2, so older versions are not supported at all.)
|
|
||||||
*
|
|
||||||
* XXX We have no information about ICU 4.3 through 4.7, but we
|
|
||||||
* know the code below works with 4.8.
|
|
||||||
*/
|
|
||||||
#if U_ICU_VERSION_MAJOR_NUM > 4 || (U_ICU_VERSION_MAJOR_NUM == 4 && U_ICU_VERSION_MINOR_NUM > 2)
|
|
||||||
#define LOAD_ICU_KEYWORD_VARIANTS
|
|
||||||
#endif
|
|
||||||
|
|
||||||
const char *name;
|
const char *name;
|
||||||
char *langtag;
|
char *langtag;
|
||||||
char *icucomment;
|
char *icucomment;
|
||||||
const char *collcollate;
|
const char *collcollate;
|
||||||
Oid collid;
|
Oid collid;
|
||||||
#ifdef LOAD_ICU_KEYWORD_VARIANTS
|
|
||||||
UEnumeration *en;
|
|
||||||
UErrorCode status;
|
|
||||||
const char *val;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (i == -1)
|
if (i == -1)
|
||||||
name = ""; /* ICU root locale */
|
name = ""; /* ICU root locale */
|
||||||
@ -744,58 +725,6 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
|
|||||||
CreateComments(collid, CollationRelationId, 0,
|
CreateComments(collid, CollationRelationId, 0,
|
||||||
icucomment);
|
icucomment);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Add keyword variants, if enabled.
|
|
||||||
*/
|
|
||||||
#ifdef LOAD_ICU_KEYWORD_VARIANTS
|
|
||||||
status = U_ZERO_ERROR;
|
|
||||||
en = ucol_getKeywordValuesForLocale("collation", name, TRUE, &status);
|
|
||||||
if (U_FAILURE(status))
|
|
||||||
ereport(ERROR,
|
|
||||||
(errmsg("could not get keyword values for locale \"%s\": %s",
|
|
||||||
name, u_errorName(status))));
|
|
||||||
|
|
||||||
status = U_ZERO_ERROR;
|
|
||||||
uenum_reset(en, &status);
|
|
||||||
while ((val = uenum_next(en, NULL, &status)))
|
|
||||||
{
|
|
||||||
char *localeid = psprintf("%s@collation=%s", name, val);
|
|
||||||
|
|
||||||
langtag = get_icu_language_tag(localeid);
|
|
||||||
collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : localeid;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Be paranoid about not allowing any non-ASCII strings into
|
|
||||||
* pg_collation
|
|
||||||
*/
|
|
||||||
if (!is_all_ascii(langtag) || !is_all_ascii(collcollate))
|
|
||||||
continue;
|
|
||||||
|
|
||||||
collid = CollationCreate(psprintf("%s-x-icu", langtag),
|
|
||||||
nspid, GetUserId(),
|
|
||||||
COLLPROVIDER_ICU, -1,
|
|
||||||
collcollate, collcollate,
|
|
||||||
get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
|
|
||||||
true, true);
|
|
||||||
if (OidIsValid(collid))
|
|
||||||
{
|
|
||||||
ncreated++;
|
|
||||||
|
|
||||||
CommandCounterIncrement();
|
|
||||||
|
|
||||||
icucomment = get_icu_locale_comment(localeid);
|
|
||||||
if (icucomment)
|
|
||||||
CreateComments(collid, CollationRelationId, 0,
|
|
||||||
icucomment);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (U_FAILURE(status))
|
|
||||||
ereport(ERROR,
|
|
||||||
(errmsg("could not get keyword values for locale \"%s\": %s",
|
|
||||||
name, u_errorName(status))));
|
|
||||||
uenum_close(en);
|
|
||||||
#endif /* LOAD_ICU_KEYWORD_VARIANTS */
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* USE_ICU */
|
#endif /* USE_ICU */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user