diff --git a/contrib/citext/expected/citext_utf8.out b/contrib/citext/expected/citext_utf8.out
index 666b07ccec4..77b4586d8fa 100644
--- a/contrib/citext/expected/citext_utf8.out
+++ b/contrib/citext/expected/citext_utf8.out
@@ -1,9 +1,16 @@
/*
* This test must be run in a database with UTF-8 encoding
* and a Unicode-aware locale.
+ *
+ * Also disable this file for ICU, because the test for the the
+ * Turkish dotted I is not correct for many ICU locales. citext always
+ * uses the default collation, so it's not easy to restrict the test
+ * to the "tr-TR-x-icu" collation where it will succeed.
*/
SELECT getdatabaseencoding() <> 'UTF8' OR
- current_setting('lc_ctype') = 'C'
+ current_setting('lc_ctype') = 'C' OR
+ (SELECT datlocprovider='i' FROM pg_database
+ WHERE datname=current_database())
AS skip_test \gset
\if :skip_test
\quit
diff --git a/contrib/citext/expected/citext_utf8_1.out b/contrib/citext/expected/citext_utf8_1.out
index 433e9853497..d1e1fe1a9d8 100644
--- a/contrib/citext/expected/citext_utf8_1.out
+++ b/contrib/citext/expected/citext_utf8_1.out
@@ -1,9 +1,16 @@
/*
* This test must be run in a database with UTF-8 encoding
* and a Unicode-aware locale.
+ *
+ * Also disable this file for ICU, because the test for the the
+ * Turkish dotted I is not correct for many ICU locales. citext always
+ * uses the default collation, so it's not easy to restrict the test
+ * to the "tr-TR-x-icu" collation where it will succeed.
*/
SELECT getdatabaseencoding() <> 'UTF8' OR
- current_setting('lc_ctype') = 'C'
+ current_setting('lc_ctype') = 'C' OR
+ (SELECT datlocprovider='i' FROM pg_database
+ WHERE datname=current_database())
AS skip_test \gset
\if :skip_test
\quit
diff --git a/contrib/citext/sql/citext_utf8.sql b/contrib/citext/sql/citext_utf8.sql
index d068000b423..8530c68dd7e 100644
--- a/contrib/citext/sql/citext_utf8.sql
+++ b/contrib/citext/sql/citext_utf8.sql
@@ -1,10 +1,17 @@
/*
* This test must be run in a database with UTF-8 encoding
* and a Unicode-aware locale.
+ *
+ * Also disable this file for ICU, because the test for the the
+ * Turkish dotted I is not correct for many ICU locales. citext always
+ * uses the default collation, so it's not easy to restrict the test
+ * to the "tr-TR-x-icu" collation where it will succeed.
*/
SELECT getdatabaseencoding() <> 'UTF8' OR
- current_setting('lc_ctype') = 'C'
+ current_setting('lc_ctype') = 'C' OR
+ (SELECT datlocprovider='i' FROM pg_database
+ WHERE datname=current_database())
AS skip_test \gset
\if :skip_test
\quit
diff --git a/contrib/unaccent/expected/unaccent.out b/contrib/unaccent/expected/unaccent.out
index ee0ac71a1cc..cef98ee60cc 100644
--- a/contrib/unaccent/expected/unaccent.out
+++ b/contrib/unaccent/expected/unaccent.out
@@ -1,3 +1,12 @@
+-- unaccent is broken if the default collation is provided by ICU and
+-- LC_CTYPE=C
+SELECT current_setting('lc_ctype') = 'C' AND
+ (SELECT datlocprovider='i' FROM pg_database
+ WHERE datname=current_database())
+ AS skip_test \gset
+\if :skip_test
+\quit
+\endif
CREATE EXTENSION unaccent;
-- must have a UTF8 database
SELECT getdatabaseencoding();
diff --git a/contrib/unaccent/expected/unaccent_1.out b/contrib/unaccent/expected/unaccent_1.out
new file mode 100644
index 00000000000..0a4a3838abd
--- /dev/null
+++ b/contrib/unaccent/expected/unaccent_1.out
@@ -0,0 +1,8 @@
+-- unaccent is broken if the default collation is provided by ICU and
+-- LC_CTYPE=C
+SELECT current_setting('lc_ctype') = 'C' AND
+ (SELECT datlocprovider='i' FROM pg_database
+ WHERE datname=current_database())
+ AS skip_test \gset
+\if :skip_test
+\quit
diff --git a/contrib/unaccent/sql/unaccent.sql b/contrib/unaccent/sql/unaccent.sql
index 3fc0c706be3..027dfb964a7 100644
--- a/contrib/unaccent/sql/unaccent.sql
+++ b/contrib/unaccent/sql/unaccent.sql
@@ -1,3 +1,14 @@
+
+-- unaccent is broken if the default collation is provided by ICU and
+-- LC_CTYPE=C
+SELECT current_setting('lc_ctype') = 'C' AND
+ (SELECT datlocprovider='i' FROM pg_database
+ WHERE datname=current_database())
+ AS skip_test \gset
+\if :skip_test
+\quit
+\endif
+
CREATE EXTENSION unaccent;
-- must have a UTF8 database
diff --git a/doc/src/sgml/ref/initdb.sgml b/doc/src/sgml/ref/initdb.sgml
index c96164195d4..05a9c2cf588 100644
--- a/doc/src/sgml/ref/initdb.sgml
+++ b/doc/src/sgml/ref/initdb.sgml
@@ -89,10 +89,28 @@ PostgreSQL documentation
and character set encoding. These can also be set separately for each
database when it is created. initdb determines those
settings for the template databases, which will serve as the default for
- all other databases. By default, initdb uses the
- locale provider libc, takes the locale settings from
- the environment, and determines the encoding from the locale settings.
- This is almost always sufficient, unless there are special requirements.
+ all other databases.
+
+
+
+ By default, initdb uses the ICU library to provide
+ locale services if the server was built with ICU support; otherwise it uses
+ the libc locale provider (see ). To choose the specific ICU locale ID to
+ apply, use the option . Note that for
+ implementation reasons and to support legacy code,
+ initdb will still select and initialize libc locale
+ settings when the ICU locale provider is used.
+
+
+
+ Alternatively, initdb can use the locale provider
+ libc. To select this option, specify
+ --locale-provider=libc, or build the server without ICU
+ support. The libc locale provider takes the locale
+ settings from the environment, and determines the encoding from the locale
+ settings. This is almost always sufficient, unless there are special
+ requirements.
@@ -103,17 +121,6 @@ PostgreSQL documentation
categories can give nonsensical results, so this should be used with care.
-
- Alternatively, the ICU library can be used to provide locale services.
- (Again, this only sets the default for subsequently created databases.) To
- select this option, specify --locale-provider=icu.
- To choose the specific ICU locale ID to apply, use the option
- . Note that
- for implementation reasons and to support legacy code,
- initdb will still select and initialize libc locale
- settings when the ICU locale provider is used.
-
-
When initdb runs, it will print out the locale settings
it has chosen. If you have complex requirements or specified multiple
@@ -234,7 +241,13 @@ PostgreSQL documentation
- Specifies the ICU locale ID, if the ICU locale provider is used.
+ Specifies the ICU locale when the ICU provider is used. Locale support
+ is described in .
+
+
+ If this option is not specified, the locale is inherited from the
+ environment in which initdb runs. The environment's
+ locale is matched to a similar ICU locale name, if possible.
@@ -307,10 +320,12 @@ PostgreSQL documentation
- This option sets the locale provider for databases created in the
- new cluster. It can be overridden in the CREATE
+ This option sets the locale provider for databases created in the new
+ cluster. It can be overridden in the CREATE
DATABASE command when new databases are subsequently
- created. The default is libc.
+ created. The default is icu if the server was
+ built with ICU support; otherwise the default is
+ libc (see ).
diff --git a/src/bin/initdb/Makefile b/src/bin/initdb/Makefile
index eab89c55013..d69bd89572a 100644
--- a/src/bin/initdb/Makefile
+++ b/src/bin/initdb/Makefile
@@ -16,7 +16,7 @@ subdir = src/bin/initdb
top_builddir = ../../..
include $(top_builddir)/src/Makefile.global
-override CPPFLAGS := -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(CPPFLAGS)
+override CPPFLAGS := -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(ICU_CFLAGS) $(CPPFLAGS)
# Note: it's important that we link to encnames.o from libpgcommon, not
# from libpq, else we have risks of version skew if we run with a libpq
@@ -24,7 +24,7 @@ override CPPFLAGS := -I$(libpq_srcdir) -I$(top_srcdir)/src/timezone $(CPPFLAGS)
# should ensure that that happens.
#
# We need libpq only because fe_utils does.
-LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport)
+LDFLAGS_INTERNAL += -L$(top_builddir)/src/fe_utils -lpgfeutils $(libpq_pgport) $(ICU_LIBS)
# use system timezone data?
ifneq (,$(with_system_tzdata))
diff --git a/src/bin/initdb/initdb.c b/src/bin/initdb/initdb.c
index 5e3c6a27c48..bf88cd24396 100644
--- a/src/bin/initdb/initdb.c
+++ b/src/bin/initdb/initdb.c
@@ -53,6 +53,9 @@
#include
#include
#include
+#ifdef USE_ICU
+#include
+#endif
#include
#include
#include
@@ -133,7 +136,11 @@ static char *lc_monetary = NULL;
static char *lc_numeric = NULL;
static char *lc_time = NULL;
static char *lc_messages = NULL;
+#ifdef USE_ICU
+static char locale_provider = COLLPROVIDER_ICU;
+#else
static char locale_provider = COLLPROVIDER_LIBC;
+#endif
static char *icu_locale = NULL;
static char *icu_rules = NULL;
static const char *default_text_search_config = NULL;
@@ -2028,6 +2035,50 @@ check_icu_locale_encoding(int user_enc)
return true;
}
+/*
+ * Check that ICU accepts the locale name; or if not specified, retrieve the
+ * default ICU locale.
+ */
+static void
+check_icu_locale(void)
+{
+#ifdef USE_ICU
+ UCollator *collator;
+ UErrorCode status;
+
+ status = U_ZERO_ERROR;
+ collator = ucol_open(icu_locale, &status);
+ if (U_FAILURE(status))
+ {
+ if (icu_locale)
+ pg_fatal("could not open collator for locale \"%s\": %s",
+ icu_locale, u_errorName(status));
+ else
+ pg_fatal("could not open collator for default locale: %s",
+ u_errorName(status));
+ }
+
+ /* if not specified, get locale from default collator */
+ if (icu_locale == NULL)
+ {
+ const char *default_locale;
+
+ status = U_ZERO_ERROR;
+ default_locale = ucol_getLocaleByType(collator, ULOC_VALID_LOCALE,
+ &status);
+ if (U_FAILURE(status))
+ {
+ ucol_close(collator);
+ pg_fatal("could not determine default ICU locale");
+ }
+
+ icu_locale = pg_strdup(default_locale);
+ }
+
+ ucol_close(collator);
+#endif
+}
+
/*
* set up the locale variables
*
@@ -2081,8 +2132,7 @@ setlocales(void)
if (locale_provider == COLLPROVIDER_ICU)
{
- if (!icu_locale)
- pg_fatal("ICU locale must be specified");
+ check_icu_locale();
/*
* In supported builds, the ICU locale ID will be checked by the
diff --git a/src/bin/initdb/t/001_initdb.pl b/src/bin/initdb/t/001_initdb.pl
index 772769acabf..e5d214e09c4 100644
--- a/src/bin/initdb/t/001_initdb.pl
+++ b/src/bin/initdb/t/001_initdb.pl
@@ -97,11 +97,6 @@ SKIP:
if ($ENV{with_icu} eq 'yes')
{
- command_fails_like(
- [ 'initdb', '--no-sync', '--locale-provider=icu', "$tempdir/data2" ],
- qr/initdb: error: ICU locale must be specified/,
- 'locale provider ICU requires --icu-locale');
-
command_ok(
[
'initdb', '--no-sync',
@@ -116,7 +111,7 @@ if ($ENV{with_icu} eq 'yes')
'--locale-provider=icu', '--icu-locale=@colNumeric=lower',
"$tempdir/dataX"
],
- qr/FATAL: could not open collator for locale/,
+ qr/error: could not open collator for locale/,
'fails for invalid ICU locale');
command_fails_like(
diff --git a/src/bin/pg_dump/t/002_pg_dump.pl b/src/bin/pg_dump/t/002_pg_dump.pl
index 187e4b8d07d..9c354213ceb 100644
--- a/src/bin/pg_dump/t/002_pg_dump.pl
+++ b/src/bin/pg_dump/t/002_pg_dump.pl
@@ -1758,7 +1758,7 @@ my %tests = (
create_sql =>
"CREATE DATABASE dump_test2 LOCALE = 'C' TEMPLATE = template0;",
regexp => qr/^
- \QCREATE DATABASE dump_test2 \E.*\QLOCALE = 'C';\E
+ \QCREATE DATABASE dump_test2 \E.*\QLOCALE = 'C'\E
/xm,
like => { pg_dumpall_dbprivs => 1, },
},
diff --git a/src/bin/scripts/t/020_createdb.pl b/src/bin/scripts/t/020_createdb.pl
index 3ad4fbb00c8..8ec58cdd648 100644
--- a/src/bin/scripts/t/020_createdb.pl
+++ b/src/bin/scripts/t/020_createdb.pl
@@ -13,7 +13,7 @@ program_version_ok('createdb');
program_options_handling_ok('createdb');
my $node = PostgreSQL::Test::Cluster->new('main');
-$node->init;
+$node->init(extra => ['--locale-provider=libc']);
$node->start;
$node->issues_sql_like(
diff --git a/src/interfaces/ecpg/test/Makefile b/src/interfaces/ecpg/test/Makefile
index d7a7d1d1ca5..cf841a3a5b2 100644
--- a/src/interfaces/ecpg/test/Makefile
+++ b/src/interfaces/ecpg/test/Makefile
@@ -14,9 +14,6 @@ override CPPFLAGS := \
'-DSHELLPROG="$(SHELL)"' \
$(CPPFLAGS)
-# default encoding for regression tests
-ENCODING = SQL_ASCII
-
ifneq ($(build_os),mingw32)
abs_builddir := $(shell pwd)
else
diff --git a/src/interfaces/ecpg/test/connect/test5.pgc b/src/interfaces/ecpg/test/connect/test5.pgc
index de291600899..d5125536777 100644
--- a/src/interfaces/ecpg/test/connect/test5.pgc
+++ b/src/interfaces/ecpg/test/connect/test5.pgc
@@ -55,7 +55,7 @@ exec sql end declare section;
exec sql connect to 'unix:postgresql://localhost/ecpg2_regression' as main user :user USING "connectpw";
exec sql disconnect main;
- exec sql connect to unix:postgresql://localhost/ecpg2_regression?connect_timeout=180&client_encoding=latin1 as main user regress_ecpg_user1/connectpw;
+ exec sql connect to unix:postgresql://localhost/ecpg2_regression?connect_timeout=180&client_encoding=sql_ascii as main user regress_ecpg_user1/connectpw;
exec sql disconnect main;
exec sql connect to "unix:postgresql://200.46.204.71/ecpg2_regression" as main user regress_ecpg_user1/connectpw;
diff --git a/src/interfaces/ecpg/test/expected/connect-test5.c b/src/interfaces/ecpg/test/expected/connect-test5.c
index c1124c627ff..ec1514ed9ab 100644
--- a/src/interfaces/ecpg/test/expected/connect-test5.c
+++ b/src/interfaces/ecpg/test/expected/connect-test5.c
@@ -117,7 +117,7 @@ main(void)
#line 56 "test5.pgc"
- { ECPGconnect(__LINE__, 0, "unix:postgresql://localhost/ecpg2_regression?connect_timeout=180 & client_encoding=latin1" , "regress_ecpg_user1" , "connectpw" , "main", 0); }
+ { ECPGconnect(__LINE__, 0, "unix:postgresql://localhost/ecpg2_regression?connect_timeout=180 & client_encoding=sql_ascii" , "regress_ecpg_user1" , "connectpw" , "main", 0); }
#line 58 "test5.pgc"
{ ECPGdisconnect(__LINE__, "main");}
diff --git a/src/interfaces/ecpg/test/expected/connect-test5.stderr b/src/interfaces/ecpg/test/expected/connect-test5.stderr
index 01a6a0a13b2..51cc18916a1 100644
--- a/src/interfaces/ecpg/test/expected/connect-test5.stderr
+++ b/src/interfaces/ecpg/test/expected/connect-test5.stderr
@@ -50,7 +50,7 @@
[NO_PID]: sqlca: code: 0, state: 00000
[NO_PID]: ecpg_finish: connection main closed
[NO_PID]: sqlca: code: 0, state: 00000
-[NO_PID]: ECPGconnect: opening database ecpg2_regression on port with options connect_timeout=180 & client_encoding=latin1 for user regress_ecpg_user1
+[NO_PID]: ECPGconnect: opening database ecpg2_regression on port with options connect_timeout=180 & client_encoding=sql_ascii for user regress_ecpg_user1
[NO_PID]: sqlca: code: 0, state: 00000
[NO_PID]: ecpg_finish: connection main closed
[NO_PID]: sqlca: code: 0, state: 00000
diff --git a/src/interfaces/ecpg/test/meson.build b/src/interfaces/ecpg/test/meson.build
index d0be73ccf98..04c6819a799 100644
--- a/src/interfaces/ecpg/test/meson.build
+++ b/src/interfaces/ecpg/test/meson.build
@@ -69,7 +69,6 @@ ecpg_test_files = files(
ecpg_regress_args = [
'--dbname=ecpg1_regression,ecpg2_regression',
'--create-role=regress_ecpg_user1,regress_ecpg_user2',
- '--encoding=SQL_ASCII',
]
tests += {
diff --git a/src/test/icu/t/010_database.pl b/src/test/icu/t/010_database.pl
index 80ab1c7789c..45d77c319a3 100644
--- a/src/test/icu/t/010_database.pl
+++ b/src/test/icu/t/010_database.pl
@@ -12,7 +12,7 @@ if ($ENV{with_icu} ne 'yes')
}
my $node1 = PostgreSQL::Test::Cluster->new('node1');
-$node1->init;
+$node1->init(extra => ['--locale-provider=libc']);
$node1->start;
$node1->safe_psql('postgres',