From 5e6e42e44fe10cab616b4fbe9725df03c987c90a Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Wed, 16 Jul 2025 14:13:18 -0700 Subject: [PATCH] Force LC_COLLATE to C in postmaster. Avoid dependence on setlocale(). strcoll(), etc., are not called directly; all collation-sensitive calls should go through pg_locale.c and use the appropriate provider. By setting LC_COLLATE to C, we avoid accidentally depending on libc behavior when using a different provider. No behavior change in the backend, but it's possible that some extensions will be affected. Such extensions should be updated to use the pg_locale_t APIs. Discussion: https://postgr.es/m/9875f7f9-50f1-4b5d-86fc-ee8b03e8c162@eisentraut.org Reviewed-by: Peter Eisentraut --- doc/src/sgml/catalogs.sgml | 2 +- doc/src/sgml/charset.sgml | 2 +- doc/src/sgml/ref/create_database.sgml | 45 ++++++++++++++------------- doc/src/sgml/ref/createdb.sgml | 3 +- src/backend/main/main.c | 16 ++++++---- src/backend/utils/init/postinit.c | 11 +++---- 6 files changed, 42 insertions(+), 37 deletions(-) diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index aa5b8772436..0d23bc1b122 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -3158,7 +3158,7 @@ SCRAM-SHA-256$<iteration count>:&l datcollate text - LC_COLLATE for this database + LC_COLLATE for this database (ignored unless datlocprovider is c) diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index 5a0e97f6f31..59b27c3c370 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -100,7 +100,7 @@ initdb --locale=sv_SE LC_COLLATE - String sort order + String sort order (ignored unless the provider is libc) LC_CTYPE diff --git a/doc/src/sgml/ref/create_database.sgml b/doc/src/sgml/ref/create_database.sgml index 4da8aeebb50..3544b15efda 100644 --- a/doc/src/sgml/ref/create_database.sgml +++ b/doc/src/sgml/ref/create_database.sgml @@ -150,12 +150,12 @@ CREATE DATABASE name Sets the default collation order and character classification in the new database. Collation affects the sort order applied to strings, - e.g., in queries with ORDER BY, as well as the order used in indexes - on text columns. Character classification affects the categorization - of characters, e.g., lower, upper, and digit. Also sets the - associated aspects of the operating system environment, - LC_COLLATE and LC_CTYPE. The - default is the same setting as the template database. See ORDER BY, as well as the + order used in indexes on text columns. Character classification + affects the categorization of characters, e.g., lower, upper, and + digit. Also sets the LC_CTYPE aspect of the + operating system environment. The default is the same setting as the + template database. See and for details. @@ -189,17 +189,16 @@ CREATE DATABASE name lc_collate - Sets LC_COLLATE in the database server's operating - system environment. The default is the setting of if specified, otherwise the same - setting as the template database. See below for additional - restrictions. + If is + libc, sets the default collation order to use in + the new database, overriding the setting . Otherwise, this setting is + ignored. - If is - libc, also sets the default collation order to use - in the new database, overriding the setting . + The default is the setting of + if specified, otherwise the same setting as the template database. + See below for additional restrictions. @@ -208,16 +207,18 @@ CREATE DATABASE name Sets LC_CTYPE in the database server's operating - system environment. The default is the setting of if specified, otherwise the same - setting as the template database. See below for additional - restrictions. + system environment. If is - libc, also sets the default character - classification to use in the new database, overriding the setting - . + libc, sets the default character classification to + use in the new database, overriding the setting . + + + The default is the setting of + if specified, otherwise the same setting as the template database. + See below for additional restrictions. diff --git a/doc/src/sgml/ref/createdb.sgml b/doc/src/sgml/ref/createdb.sgml index 5c4e0465ed9..2ccbe13f390 100644 --- a/doc/src/sgml/ref/createdb.sgml +++ b/doc/src/sgml/ref/createdb.sgml @@ -136,7 +136,8 @@ PostgreSQL documentation - Specifies the LC_COLLATE setting to be used in this database. + Specifies the LC_COLLATE setting to be used in this database (ignored + unless the locale provider is libc). diff --git a/src/backend/main/main.c b/src/backend/main/main.c index 7d63cf94a6b..bdcb5e4f261 100644 --- a/src/backend/main/main.c +++ b/src/backend/main/main.c @@ -125,13 +125,17 @@ main(int argc, char *argv[]) set_pglocale_pgservice(argv[0], PG_TEXTDOMAIN("postgres")); /* - * In the postmaster, absorb the environment values for LC_COLLATE and - * LC_CTYPE. Individual backends will change these later to settings - * taken from pg_database, but the postmaster cannot do that. If we leave - * these set to "C" then message localization might not work well in the - * postmaster. + * Collation is handled by pg_locale.c, and the behavior is dependent on + * the provider. strcoll(), etc., should not be called directly. + */ + init_locale("LC_COLLATE", LC_COLLATE, "C"); + + /* + * In the postmaster, absorb the environment value for LC_CTYPE. + * Individual backends will change it later to pg_database.datctype, but + * the postmaster cannot do that. If we leave it set to "C" then message + * localization might not work well in the postmaster. */ - init_locale("LC_COLLATE", LC_COLLATE, ""); init_locale("LC_CTYPE", LC_CTYPE, ""); /* diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index c86ceefda94..641e535a73c 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -417,12 +417,11 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype); ctype = TextDatumGetCString(datum); - if (pg_perm_setlocale(LC_COLLATE, collate) == NULL) - ereport(FATAL, - (errmsg("database locale is incompatible with operating system"), - errdetail("The database was initialized with LC_COLLATE \"%s\", " - " which is not recognized by setlocale().", collate), - errhint("Recreate the database with another locale or install the missing locale."))); + /* + * Historcally, we set LC_COLLATE from datcollate, as well. That's no + * longer necessary because all collation behavior is handled through + * pg_locale_t. + */ if (pg_perm_setlocale(LC_CTYPE, ctype) == NULL) ereport(FATAL,