mirror of
https://github.com/postgres/postgres.git
synced 2025-08-28 18:48:04 +03:00
The PG_UNICODE_FAST locale uses code point sort order (fast, memcmp-based) combined with Unicode character semantics. The character semantics are based on Unicode full case mapping. Full case mapping can map a single codepoint to multiple codepoints, such as "ß" uppercasing to "SS". Additionally, it handles context-sensitive mappings like the "final sigma", and it uses titlecase mappings such as "Dž" when titlecasing (rather than plain uppercase mappings). Importantly, the uppercasing of "ß" as "SS" is specifically mentioned by the SQL standard. In Postgres, UCS_BASIC uses plain ASCII semantics for case mapping and pattern matching, so if we changed it to use the PG_UNICODE_FAST locale, it would offer better compliance with the standard. For now, though, do not change the behavior of UCS_BASIC. Discussion: https://postgr.es/m/ddfd67928818f138f51635712529bc5e1d25e4e7.camel@j-davis.com Discussion: https://postgr.es/m/27bb0e52-801d-4f73-a0a4-02cfdd4a9ada@eisentraut.org Reviewed-by: Peter Eisentraut, Daniel Verite
302 lines
9.3 KiB
Plaintext
302 lines
9.3 KiB
Plaintext
<!--
|
|
doc/src/sgml/ref/create_collation.sgml
|
|
PostgreSQL documentation
|
|
-->
|
|
|
|
<refentry id="sql-createcollation">
|
|
<indexterm zone="sql-createcollation">
|
|
<primary>CREATE COLLATION</primary>
|
|
</indexterm>
|
|
|
|
<refmeta>
|
|
<refentrytitle>CREATE COLLATION</refentrytitle>
|
|
<manvolnum>7</manvolnum>
|
|
<refmiscinfo>SQL - Language Statements</refmiscinfo>
|
|
</refmeta>
|
|
|
|
<refnamediv>
|
|
<refname>CREATE COLLATION</refname>
|
|
<refpurpose>define a new collation</refpurpose>
|
|
</refnamediv>
|
|
|
|
<refsynopsisdiv>
|
|
<synopsis>
|
|
CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> (
|
|
[ LOCALE = <replaceable>locale</replaceable>, ]
|
|
[ LC_COLLATE = <replaceable>lc_collate</replaceable>, ]
|
|
[ LC_CTYPE = <replaceable>lc_ctype</replaceable>, ]
|
|
[ PROVIDER = <replaceable>provider</replaceable>, ]
|
|
[ DETERMINISTIC = <replaceable>boolean</replaceable>, ]
|
|
[ RULES = <replaceable>rules</replaceable>, ]
|
|
[ VERSION = <replaceable>version</replaceable> ]
|
|
)
|
|
CREATE COLLATION [ IF NOT EXISTS ] <replaceable>name</replaceable> FROM <replaceable>existing_collation</replaceable>
|
|
</synopsis>
|
|
</refsynopsisdiv>
|
|
|
|
<refsect1 id="sql-createcollation-description">
|
|
<title>Description</title>
|
|
|
|
<para>
|
|
<command>CREATE COLLATION</command> defines a new collation using
|
|
the specified operating system locale settings,
|
|
or by copying an existing collation.
|
|
</para>
|
|
|
|
<para>
|
|
To be able to create a collation, you must
|
|
have <literal>CREATE</literal> privilege on the destination schema.
|
|
</para>
|
|
</refsect1>
|
|
|
|
|
|
<refsect1>
|
|
<title>Parameters</title>
|
|
|
|
<variablelist>
|
|
<varlistentry>
|
|
<term><literal>IF NOT EXISTS</literal></term>
|
|
<listitem>
|
|
<para>
|
|
Do not throw an error if a collation with the same name already exists.
|
|
A notice is issued in this case. Note that there is no guarantee that
|
|
the existing collation is anything like the one that would have been created.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><replaceable>name</replaceable></term>
|
|
|
|
<listitem>
|
|
<para>
|
|
The name of the collation. The collation name can be
|
|
schema-qualified. If it is not, the collation is defined in the
|
|
current schema. The collation name must be unique within that
|
|
schema. (The system catalogs can contain collations with the
|
|
same name for other encodings, but these are ignored if the
|
|
database encoding does not match.)
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><replaceable>locale</replaceable></term>
|
|
|
|
<listitem>
|
|
<para>
|
|
The locale name for this collation. See <xref
|
|
linkend="collation-managing-create-libc"/> and <xref
|
|
linkend="collation-managing-create-icu"/> for details.
|
|
</para>
|
|
<para>
|
|
If <replaceable>provider</replaceable> is <literal>libc</literal>, this
|
|
is a shortcut for setting <symbol>LC_COLLATE</symbol> and
|
|
<symbol>LC_CTYPE</symbol> at once. If you specify
|
|
<replaceable>locale</replaceable>, you cannot specify either of those
|
|
parameters.
|
|
</para>
|
|
<para>
|
|
If <replaceable>provider</replaceable> is <literal>builtin</literal>,
|
|
then <replaceable>locale</replaceable> must be specified and set to
|
|
either <literal>C</literal>, <literal>C.UTF-8</literal> or
|
|
<literal>PG_UNICODE_FAST</literal>.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><replaceable>lc_collate</replaceable></term>
|
|
|
|
<listitem>
|
|
<para>
|
|
If <replaceable>provider</replaceable> is <literal>libc</literal>, use
|
|
the specified operating system locale for the
|
|
<symbol>LC_COLLATE</symbol> locale category.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><replaceable>lc_ctype</replaceable></term>
|
|
|
|
<listitem>
|
|
<para>
|
|
If <replaceable>provider</replaceable> is <literal>libc</literal>, use
|
|
the specified operating system locale for the <symbol>LC_CTYPE</symbol>
|
|
locale category.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><replaceable>provider</replaceable></term>
|
|
|
|
<listitem>
|
|
<para>
|
|
Specifies the provider to use for locale services associated with this
|
|
collation. Possible values are <literal>builtin</literal>,
|
|
<literal>icu</literal><indexterm><primary>ICU</primary></indexterm> (if
|
|
the server was built with ICU support) or <literal>libc</literal>.
|
|
<literal>libc</literal> is the default. See <xref
|
|
linkend="locale-providers"/> for details.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><literal>DETERMINISTIC</literal></term>
|
|
|
|
<listitem>
|
|
<para>
|
|
Specifies whether the collation should use deterministic comparisons.
|
|
The default is true. A deterministic comparison considers strings that
|
|
are not byte-wise equal to be unequal even if they are considered
|
|
logically equal by the comparison. PostgreSQL breaks ties using a
|
|
byte-wise comparison. Comparison that is not deterministic can make the
|
|
collation be, say, case- or accent-insensitive. For that, you need to
|
|
choose an appropriate <literal>LOCALE</literal> setting
|
|
<emphasis>and</emphasis> set the collation to not deterministic here.
|
|
</para>
|
|
|
|
<para>
|
|
Nondeterministic collations are only supported with the ICU provider.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><replaceable>rules</replaceable></term>
|
|
|
|
<listitem>
|
|
<para>
|
|
Specifies additional collation rules to customize the behavior of the
|
|
collation. This is supported for ICU only. See <xref
|
|
linkend="icu-tailoring-rules"/> for details.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><replaceable>version</replaceable></term>
|
|
|
|
<listitem>
|
|
<para>
|
|
Specifies the version string to store with the collation. Normally,
|
|
this should be omitted, which will cause the version to be computed
|
|
from the actual version of the collation as provided by the operating
|
|
system. This option is intended to be used
|
|
by <command>pg_upgrade</command> for copying the version from an
|
|
existing installation.
|
|
</para>
|
|
|
|
<para>
|
|
See also <xref linkend="sql-altercollation"/> for how to handle
|
|
collation version mismatches.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
|
|
<varlistentry>
|
|
<term><replaceable>existing_collation</replaceable></term>
|
|
|
|
<listitem>
|
|
<para>
|
|
The name of an existing collation to copy. The new collation
|
|
will have the same properties as the existing one, but it
|
|
will be an independent object.
|
|
</para>
|
|
</listitem>
|
|
</varlistentry>
|
|
</variablelist>
|
|
</refsect1>
|
|
|
|
|
|
<refsect1 id="sql-createcollation-notes">
|
|
<title>Notes</title>
|
|
|
|
<para>
|
|
<command>CREATE COLLATION</command> takes a <literal>SHARE ROW
|
|
EXCLUSIVE</literal> lock, which is self-conflicting, on the
|
|
<structname>pg_collation</structname> system catalog, so only one
|
|
<command>CREATE COLLATION</command> command can run at a time.
|
|
</para>
|
|
|
|
<para>
|
|
Use <command>DROP COLLATION</command> to remove user-defined collations.
|
|
</para>
|
|
|
|
<para>
|
|
See <xref linkend="collation-create"/> for more information on how to create collations.
|
|
</para>
|
|
|
|
<para>
|
|
When using the <literal>libc</literal> collation provider, the locale must
|
|
be applicable to the current database encoding.
|
|
See <xref linkend="sql-createdatabase"/> for the precise rules.
|
|
</para>
|
|
</refsect1>
|
|
|
|
<refsect1 id="sql-createcollation-examples">
|
|
<title>Examples</title>
|
|
|
|
<para>
|
|
To create a collation from the operating system locale
|
|
<literal>fr_FR.utf8</literal>
|
|
(assuming the current database encoding is <literal>UTF8</literal>):
|
|
<programlisting>
|
|
CREATE COLLATION french (locale = 'fr_FR.utf8');
|
|
</programlisting>
|
|
</para>
|
|
|
|
<para>
|
|
To create a collation using the ICU provider using German phone book sort order:
|
|
<programlisting>
|
|
CREATE COLLATION german_phonebook (provider = icu, locale = 'de-u-co-phonebk');
|
|
</programlisting>
|
|
</para>
|
|
|
|
<para>
|
|
To create a collation using the ICU provider, based on the root ICU locale,
|
|
with custom rules:
|
|
<programlisting>
|
|
<![CDATA[CREATE COLLATION custom (provider = icu, locale = 'und', rules = '&V << w <<< W');]]>
|
|
</programlisting>
|
|
See <xref linkend="icu-tailoring-rules"/> for further details and examples
|
|
on the rules syntax.
|
|
</para>
|
|
|
|
<para>
|
|
To create a collation from an existing collation:
|
|
<programlisting>
|
|
CREATE COLLATION german FROM "de_DE";
|
|
</programlisting>
|
|
This can be convenient to be able to use operating-system-independent
|
|
collation names in applications.
|
|
</para>
|
|
</refsect1>
|
|
|
|
|
|
<refsect1 id="sql-createcollation-compat">
|
|
<title>Compatibility</title>
|
|
|
|
<para>
|
|
There is a <command>CREATE COLLATION</command> statement in the SQL
|
|
standard, but it is limited to copying an existing collation. The
|
|
syntax to create a new collation is
|
|
a <productname>PostgreSQL</productname> extension.
|
|
</para>
|
|
</refsect1>
|
|
|
|
|
|
<refsect1 id="sql-createcollation-seealso">
|
|
<title>See Also</title>
|
|
|
|
<simplelist type="inline">
|
|
<member><xref linkend="sql-altercollation"/></member>
|
|
<member><xref linkend="sql-dropcollation"/></member>
|
|
</simplelist>
|
|
</refsect1>
|
|
|
|
</refentry>
|