From a14e75eb0b6a73821e0d66c0d407372ec8376105 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 16 Jun 2023 10:27:32 -0700 Subject: [PATCH] CREATE DATABASE: make LOCALE apply to all collation providers. For CREATE DATABASE, make LOCALE parameter apply regardless of the provider used. Also affects initdb and createdb --locale arguments. Previously, LOCALE (and --locale) only affected the database default collation when using the libc provider. Discussion: https://postgr.es/m/1a63084d-221e-4075-619e-6b3e590f673e@enterprisedb.com Reviewed-by: Peter Eisentraut --- doc/src/sgml/ref/create_collation.sgml | 23 +++++--- doc/src/sgml/ref/create_database.sgml | 57 +++++++++++++++---- doc/src/sgml/ref/createdb.sgml | 5 +- doc/src/sgml/ref/initdb.sgml | 7 ++- src/backend/commands/collationcmds.c | 2 +- src/backend/commands/dbcommands.c | 17 ++++-- src/bin/initdb/initdb.c | 10 +++- src/bin/initdb/t/001_initdb.pl | 11 ++++ src/bin/scripts/createdb.c | 13 ++--- src/bin/scripts/t/020_createdb.pl | 15 ++++- src/test/icu/t/010_database.pl | 33 ++++++++--- .../regress/expected/collate.icu.utf8.out | 22 +++---- 12 files changed, 155 insertions(+), 60 deletions(-) diff --git a/doc/src/sgml/ref/create_collation.sgml b/doc/src/sgml/ref/create_collation.sgml index f6353da5c15..b86a9bbb9ce 100644 --- a/doc/src/sgml/ref/create_collation.sgml +++ b/doc/src/sgml/ref/create_collation.sgml @@ -85,9 +85,16 @@ CREATE COLLATION [ IF NOT EXISTS ] name FROM - This is a shortcut for setting LC_COLLATE - and LC_CTYPE at once. If you specify this, - you cannot specify either of those parameters. + The locale name for this collation. See and for details. + + + If provider is libc, this + is a shortcut for setting LC_COLLATE and + LC_CTYPE at once. If you specify + locale, you cannot specify either of those + parameters. @@ -97,8 +104,9 @@ CREATE COLLATION [ IF NOT EXISTS ] name FROM - Use the specified operating system locale for - the LC_COLLATE locale category. + If provider is libc, use + the specified operating system locale for the + LC_COLLATE locale category. @@ -108,8 +116,9 @@ CREATE COLLATION [ IF NOT EXISTS ] name FROM - Use the specified operating system locale for - the LC_CTYPE locale category. + If provider is libc, use + the specified operating system locale for the LC_CTYPE + locale category. diff --git a/doc/src/sgml/ref/create_database.sgml b/doc/src/sgml/ref/create_database.sgml index 13793bb6b79..b2c8aef1ad2 100644 --- a/doc/src/sgml/ref/create_database.sgml +++ b/doc/src/sgml/ref/create_database.sgml @@ -145,8 +145,22 @@ CREATE DATABASE name locale - This is a shortcut for setting LC_COLLATE - and LC_CTYPE at once. + Sets the default collation order and character classification in the + new database. Collation affects the sort order applied to strings, + e.g., in queries with ORDER BY, as well as the order used in indexes + on text columns. Character classification affects the categorization + of characters, e.g., lower, upper, and digit. Also sets the + associated aspects of the operating system environment, + LC_COLLATE and LC_CTYPE. The + default is the same setting as the template database. See and for details. + + + Can be overridden by setting , , or individually. @@ -164,11 +178,17 @@ CREATE DATABASE name lc_collate - Collation order (LC_COLLATE) to use in the new database. - This affects the sort order applied to strings, e.g., in queries with - ORDER BY, as well as the order used in indexes on text columns. - The default is to use the collation order of the template database. - See below for additional restrictions. + Sets LC_COLLATE in the database server's operating + system environment. The default is the setting of if specified, otherwise the same + setting as the template database. See below for additional + restrictions. + + + If is + libc, also sets the default collation order to use + in the new database, overriding the setting . @@ -176,10 +196,17 @@ CREATE DATABASE name lc_ctype - Character classification (LC_CTYPE) to use in the new - database. This affects the categorization of characters, e.g., lower, - upper and digit. The default is to use the character classification of - the template database. See below for additional restrictions. + Sets LC_CTYPE in the database server's operating + system environment. The default is the setting of if specified, otherwise the same + setting as the template database. See below for additional + restrictions. + + + If is + libc, also sets the default character + classification to use in the new database, overriding the setting + . @@ -188,7 +215,13 @@ CREATE DATABASE name icu_locale - Specifies the ICU locale ID if the ICU locale provider is used. + Specifies the ICU locale (see ) for the database default + collation order and character classification, overriding the setting + . The locale provider must be ICU. The default + is the setting of if + specified; otherwise the same setting as the template database. diff --git a/doc/src/sgml/ref/createdb.sgml b/doc/src/sgml/ref/createdb.sgml index e23419ba6cb..e4647d5ce71 100644 --- a/doc/src/sgml/ref/createdb.sgml +++ b/doc/src/sgml/ref/createdb.sgml @@ -124,7 +124,10 @@ PostgreSQL documentation Specifies the locale to be used in this database. This is equivalent - to specifying both and . + to specifying , + , and to the + same value. Some locales are only valid for ICU and must be set with + . diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml index 87945b4b62f..f850dc404d1 100644 --- a/doc/src/sgml/ref/initdb.sgml +++ b/doc/src/sgml/ref/initdb.sgml @@ -116,9 +116,10 @@ PostgreSQL documentation To choose a different locale for the cluster, use the option . There are also individual options - (see below) to set values for the individual locale - categories. Note that inconsistent settings for different locale - categories can give nonsensical results, so this should be used with care. + and (see below) to + set values for the individual locale categories. Note that inconsistent + settings for different locale categories can give nonsensical results, so + this should be used with care. diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c index 2969a2bb212..efb8b4d289f 100644 --- a/src/backend/commands/collationcmds.c +++ b/src/backend/commands/collationcmds.c @@ -276,7 +276,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e if (langtag && strcmp(colliculocale, langtag) != 0) { ereport(NOTICE, - (errmsg("using standard form \"%s\" for locale \"%s\"", + (errmsg("using standard form \"%s\" for ICU locale \"%s\"", langtag, colliculocale))); colliculocale = langtag; diff --git a/src/backend/commands/dbcommands.c b/src/backend/commands/dbcommands.c index 99d4080ea95..09f1ab41ad3 100644 --- a/src/backend/commands/dbcommands.c +++ b/src/backend/commands/dbcommands.c @@ -1017,7 +1017,12 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) if (dblocprovider == '\0') dblocprovider = src_locprovider; if (dbiculocale == NULL && dblocprovider == COLLPROVIDER_ICU) - dbiculocale = src_iculocale; + { + if (dlocale && dlocale->arg) + dbiculocale = defGetString(dlocale); + else + dbiculocale = src_iculocale; + } if (dbicurules == NULL && dblocprovider == COLLPROVIDER_ICU) dbicurules = src_icurules; @@ -1031,12 +1036,14 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) if (!check_locale(LC_COLLATE, dbcollate, &canonname)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("invalid locale name: \"%s\"", dbcollate))); + errmsg("invalid LC_COLLATE locale name: \"%s\"", dbcollate), + errhint("If the locale name is specific to ICU, use ICU_LOCALE."))); dbcollate = canonname; if (!check_locale(LC_CTYPE, dbctype, &canonname)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), - errmsg("invalid locale name: \"%s\"", dbctype))); + errmsg("invalid LC_CTYPE locale name: \"%s\"", dbctype), + errhint("If the locale name is specific to ICU, use ICU_LOCALE."))); dbctype = canonname; check_encoding_locale_matches(encoding, dbcollate, dbctype); @@ -1056,7 +1063,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) if (!dbiculocale) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("ICU locale must be specified"))); + errmsg("LOCALE or ICU_LOCALE must be specified"))); /* * During binary upgrade, or when the locale came from the template @@ -1071,7 +1078,7 @@ createdb(ParseState *pstate, const CreatedbStmt *stmt) if (langtag && strcmp(dbiculocale, langtag) != 0) { ereport(NOTICE, - (errmsg("using standard form \"%s\" for locale \"%s\"", + (errmsg("using standard form \"%s\" for ICU locale \"%s\"", langtag, dbiculocale))); dbiculocale = langtag; diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c index 09a5c98cc04..71a3d26c378 100644 --- a/src/bin/initdb/initdb.c +++ b/src/bin/initdb/initdb.c @@ -2163,7 +2163,11 @@ check_locale_name(int category, const char *locale, char **canonname) if (res == NULL) { if (*locale) - pg_fatal("invalid locale name \"%s\"", locale); + { + pg_log_error("invalid locale name \"%s\"", locale); + pg_log_error_hint("If the locale name is specific to ICU, use --icu-locale."); + exit(1); + } else { /* @@ -2376,7 +2380,7 @@ setlocales(void) { char *canonname; - /* set empty lc_* values to locale config if set */ + /* set empty lc_* and iculocale values to locale config if set */ if (locale) { @@ -2392,6 +2396,8 @@ setlocales(void) lc_monetary = locale; if (!lc_messages) lc_messages = locale; + if (!icu_locale && locale_provider == COLLPROVIDER_ICU) + icu_locale = locale; } /* diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl index fa00bb3dabe..cf55a84cd18 100644 --- a/src/bin/initdb/t/001_initdb.pl +++ b/src/bin/initdb/t/001_initdb.pl @@ -111,6 +111,17 @@ if ($ENV{with_icu} eq 'yes') ], 'option --icu-locale'); + command_like( + [ + 'initdb', '--no-sync', '-A', 'trust', + '--locale-provider=icu', '--locale=und', + '--lc-collate=C', '--lc-ctype=C', '--lc-messages=C', + '--lc-numeric=C', '--lc-monetary=C', '--lc-time=C', + "$tempdir/data4" + ], + qr/^\s+ICU locale:\s+und\n/ms, + 'options --locale-provider=icu --locale=und --lc-*=C'); + command_fails_like( [ 'initdb', '--no-sync', diff --git a/src/bin/scripts/createdb.c b/src/bin/scripts/createdb.c index b4205c4fa51..9ca86a3e53d 100644 --- a/src/bin/scripts/createdb.c +++ b/src/bin/scripts/createdb.c @@ -164,14 +164,6 @@ main(int argc, char *argv[]) exit(1); } - if (locale) - { - if (!lc_ctype) - lc_ctype = locale; - if (!lc_collate) - lc_collate = locale; - } - if (encoding) { if (pg_char_to_encoding(encoding) < 0) @@ -219,6 +211,11 @@ main(int argc, char *argv[]) appendPQExpBuffer(&sql, " STRATEGY %s", fmtId(strategy)); if (template) appendPQExpBuffer(&sql, " TEMPLATE %s", fmtId(template)); + if (locale) + { + appendPQExpBufferStr(&sql, " LOCALE "); + appendStringLiteralConn(&sql, locale, conn); + } if (lc_collate) { appendPQExpBufferStr(&sql, " LC_COLLATE "); diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl index d0830a4a1d3..694ec568047 100644 --- a/src/bin/scripts/t/020_createdb.pl +++ b/src/bin/scripts/t/020_createdb.pl @@ -86,6 +86,15 @@ if ($ENV{with_icu} eq 'yes') ], 'create database with icu locale from template database with icu provider' ); + + $node2->command_ok( + [ + 'createdb', '-T', 'template0', '--locale-provider', 'icu', + '--locale', 'en', '--lc-collate', 'C', '--lc-ctype', 'C', + 'foobar57' + ], + 'create database with locale as ICU locale' + ); } else { @@ -110,7 +119,7 @@ ALTER TABLE tab_foobar owner to role_foobar; CREATE POLICY pol_foobar ON tab_foobar FOR ALL TO role_foobar;'); $node->issues_sql_like( [ 'createdb', '-l', 'C', '-T', 'foobar2', 'foobar3' ], - qr/statement: CREATE DATABASE foobar3 TEMPLATE foobar2/, + qr/statement: CREATE DATABASE foobar3 TEMPLATE foobar2 LOCALE 'C'/, 'create database with template'); ($ret, $stdout, $stderr) = $node->psql( 'foobar3', @@ -137,7 +146,7 @@ $node->command_checks_all( 1, [qr/^$/], [ - qr/^createdb: error: database creation failed: ERROR: invalid locale name|^createdb: error: database creation failed: ERROR: new collation \(foo'; SELECT '1\) is incompatible with the collation of the template database/s + qr/^createdb: error: database creation failed: ERROR: invalid LC_COLLATE locale name|^createdb: error: database creation failed: ERROR: new collation \(foo'; SELECT '1\) is incompatible with the collation of the template database/s ], 'createdb with incorrect --lc-collate'); $node->command_checks_all( @@ -145,7 +154,7 @@ $node->command_checks_all( 1, [qr/^$/], [ - qr/^createdb: error: database creation failed: ERROR: invalid locale name|^createdb: error: database creation failed: ERROR: new LC_CTYPE \(foo'; SELECT '1\) is incompatible with the LC_CTYPE of the template database/s + qr/^createdb: error: database creation failed: ERROR: invalid LC_CTYPE locale name|^createdb: error: database creation failed: ERROR: new LC_CTYPE \(foo'; SELECT '1\) is incompatible with the LC_CTYPE of the template database/s ], 'createdb with incorrect --lc-ctype'); diff --git a/src/test/icu/t/010_database.pl b/src/test/icu/t/010_database.pl index d3901f5d3f6..cbe5467f3c6 100644 --- a/src/test/icu/t/010_database.pl +++ b/src/test/icu/t/010_database.pl @@ -51,17 +51,36 @@ b), 'sort by explicit collation upper first'); -# Test error cases in CREATE DATABASE involving locale-related options +# Test that LOCALE='C' works for ICU +is( $node1->psql( + 'postgres', + q{CREATE DATABASE dbicu1 LOCALE_PROVIDER icu LOCALE 'C' TEMPLATE template0 ENCODING UTF8} + ), + 0, + "C locale works for ICU"); -my ($ret, $stdout, $stderr) = $node1->psql('postgres', - q{CREATE DATABASE dbicu LOCALE_PROVIDER icu LOCALE 'C' TEMPLATE template0 ENCODING UTF8} -); +# Test that LOCALE works for ICU locales if LC_COLLATE and LC_CTYPE +# are specified +is( $node1->psql( + 'postgres', + q{CREATE DATABASE dbicu2 LOCALE_PROVIDER icu LOCALE '@colStrength=primary' + LC_COLLATE='C' LC_CTYPE='C' TEMPLATE template0 ENCODING UTF8} + ), + 0, + "LOCALE works for ICU locales if LC_COLLATE and LC_CTYPE are specified"); + +# Test that ICU-specific LOCALE without LC_COLLATE and LC_CTYPE must +# be specified with ICU_LOCALE +my ($ret, $stdout, $stderr) = $node1->psql( + 'postgres', + q{CREATE DATABASE dbicu3 LOCALE_PROVIDER icu LOCALE '@colStrength=primary' + TEMPLATE template0 ENCODING UTF8}); isnt($ret, 0, - "ICU locale must be specified for ICU provider: exit code not 0"); + "ICU-specific locale must be specified with ICU_LOCALE: exit code not 0"); like( $stderr, - qr/ERROR: ICU locale must be specified/, - "ICU locale must be specified for ICU provider: error message"); + qr/ERROR: invalid LC_COLLATE locale name/, + "ICU-specific locale must be specified with ICU_LOCALE: error message"); done_testing(); diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index 00dee24549a..dc96e590f74 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -1194,9 +1194,9 @@ SELECT 'coté' < 'côte' COLLATE "und-x-icu", 'coté' > 'côte' COLLATE testcoll (1 row) CREATE COLLATION testcoll_lower_first (provider = icu, locale = '@colCaseFirst=lower'); -NOTICE: using standard form "und-u-kf-lower" for locale "@colCaseFirst=lower" +NOTICE: using standard form "und-u-kf-lower" for ICU locale "@colCaseFirst=lower" CREATE COLLATION testcoll_upper_first (provider = icu, locale = '@colCaseFirst=upper'); -NOTICE: using standard form "und-u-kf-upper" for locale "@colCaseFirst=upper" +NOTICE: using standard form "und-u-kf-upper" for ICU locale "@colCaseFirst=upper" SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcoll_upper_first; ?column? | ?column? ----------+---------- @@ -1204,7 +1204,7 @@ SELECT 'aaa' < 'AAA' COLLATE testcoll_lower_first, 'aaa' > 'AAA' COLLATE testcol (1 row) CREATE COLLATION testcoll_shifted (provider = icu, locale = '@colAlternate=shifted'); -NOTICE: using standard form "und-u-ka-shifted" for locale "@colAlternate=shifted" +NOTICE: using standard form "und-u-ka-shifted" for ICU locale "@colAlternate=shifted" SELECT 'de-luge' < 'deanza' COLLATE "und-x-icu", 'de-luge' > 'deanza' COLLATE testcoll_shifted; ?column? | ?column? ----------+---------- @@ -1221,12 +1221,12 @@ SELECT 'A-21' > 'A-123' COLLATE "und-x-icu", 'A-21' < 'A-123' COLLATE testcoll_n (1 row) CREATE COLLATION testcoll_error1 (provider = icu, locale = '@colNumeric=lower'); -NOTICE: using standard form "und-u-kn-lower" for locale "@colNumeric=lower" +NOTICE: using standard form "und-u-kn-lower" for ICU locale "@colNumeric=lower" ERROR: could not open collator for locale "und-u-kn-lower": U_ILLEGAL_ARGUMENT_ERROR -- test that attributes not handled by icu_set_collation_attributes() -- (handled by ucol_open() directly) also work CREATE COLLATION testcoll_de_phonebook (provider = icu, locale = 'de@collation=phonebook'); -NOTICE: using standard form "de-u-co-phonebk" for locale "de@collation=phonebook" +NOTICE: using standard form "de-u-co-phonebk" for ICU locale "de@collation=phonebook" SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE testcoll_de_phonebook; ?column? | ?column? ----------+---------- @@ -1235,7 +1235,7 @@ SELECT 'Goldmann' < 'Götz' COLLATE "de-x-icu", 'Goldmann' > 'Götz' COLLATE tes -- rules CREATE COLLATION testcoll_rules1 (provider = icu, locale = '', rules = '&a < g'); -NOTICE: using standard form "und" for locale "" +NOTICE: using standard form "und" for ICU locale "" CREATE TABLE test7 (a text); -- example from https://unicode-org.github.io/icu/userguide/collation/customization/#syntax INSERT INTO test7 VALUES ('Abernathy'), ('apple'), ('bird'), ('Boston'), ('Graham'), ('green'); @@ -1263,13 +1263,13 @@ SELECT * FROM test7 ORDER BY a COLLATE testcoll_rules1; DROP TABLE test7; CREATE COLLATION testcoll_rulesx (provider = icu, locale = '', rules = '!!wrong!!'); -NOTICE: using standard form "und" for locale "" +NOTICE: using standard form "und" for ICU locale "" ERROR: could not open collator for locale "und" with rules "!!wrong!!": U_INVALID_FORMAT_ERROR -- nondeterministic collations CREATE COLLATION ctest_det (provider = icu, locale = '', deterministic = true); -NOTICE: using standard form "und" for locale "" +NOTICE: using standard form "und" for ICU locale "" CREATE COLLATION ctest_nondet (provider = icu, locale = '', deterministic = false); -NOTICE: using standard form "und" for locale "" +NOTICE: using standard form "und" for ICU locale "" CREATE TABLE test6 (a int, b text); -- same string in different normal forms INSERT INTO test6 VALUES (1, U&'\00E4bc'); @@ -1319,9 +1319,9 @@ SELECT * FROM test6a WHERE b = ARRAY['äbc'] COLLATE ctest_nondet; (2 rows) CREATE COLLATION case_sensitive (provider = icu, locale = ''); -NOTICE: using standard form "und" for locale "" +NOTICE: using standard form "und" for ICU locale "" CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=secondary', deterministic = false); -NOTICE: using standard form "und-u-ks-level2" for locale "@colStrength=secondary" +NOTICE: using standard form "und-u-ks-level2" for ICU locale "@colStrength=secondary" SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive; ?column? | ?column? ----------+----------