From 8b87e92919cd0e8e8ffbae543d996063149c3ccc Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Fri, 17 Mar 2023 11:47:35 -0700 Subject: [PATCH] Fix t_isspace(), etc., when datlocprovider=i and datctype=C. Check whether the datctype is C to determine whether t_isspace() and related functions use isspace() or iswspace(). Previously, t_isspace() checked whether the database default collation was C; which is incorrect when the default collation uses the ICU provider. Discussion: https://postgr.es/m/79e4354d9eccfdb00483146a6b9f6295202e7890.camel@j-davis.com Reviewed-by: Peter Eisentraut Backpatch-through: 15 --- src/backend/tsearch/ts_locale.c | 15 +++++---------- src/backend/tsearch/wparser_def.c | 3 +-- src/backend/utils/adt/pg_locale.c | 3 +++ src/backend/utils/init/postinit.c | 4 ++++ src/include/utils/pg_locale.h | 2 ++ 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/backend/tsearch/ts_locale.c b/src/backend/tsearch/ts_locale.c index e0aa570bf5e..3a475a0f5fc 100644 --- a/src/backend/tsearch/ts_locale.c +++ b/src/backend/tsearch/ts_locale.c @@ -38,10 +38,9 @@ t_isdigit(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - Oid collation = DEFAULT_COLLATION_OID; /* TODO */ pg_locale_t mylocale = 0; /* TODO */ - if (clen == 1 || lc_ctype_is_c(collation)) + if (clen == 1 || database_ctype_is_c) return isdigit(TOUCHAR(ptr)); char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); @@ -54,10 +53,9 @@ t_isspace(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - Oid collation = DEFAULT_COLLATION_OID; /* TODO */ pg_locale_t mylocale = 0; /* TODO */ - if (clen == 1 || lc_ctype_is_c(collation)) + if (clen == 1 || database_ctype_is_c) return isspace(TOUCHAR(ptr)); char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); @@ -70,10 +68,9 @@ t_isalpha(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - Oid collation = DEFAULT_COLLATION_OID; /* TODO */ pg_locale_t mylocale = 0; /* TODO */ - if (clen == 1 || lc_ctype_is_c(collation)) + if (clen == 1 || database_ctype_is_c) return isalpha(TOUCHAR(ptr)); char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); @@ -86,10 +83,9 @@ t_isprint(const char *ptr) { int clen = pg_mblen(ptr); wchar_t character[WC_BUF_LEN]; - Oid collation = DEFAULT_COLLATION_OID; /* TODO */ pg_locale_t mylocale = 0; /* TODO */ - if (clen == 1 || lc_ctype_is_c(collation)) + if (clen == 1 || database_ctype_is_c) return isprint(TOUCHAR(ptr)); char2wchar(character, WC_BUF_LEN, ptr, clen, mylocale); @@ -257,7 +253,6 @@ char * lowerstr_with_len(const char *str, int len) { char *out; - Oid collation = DEFAULT_COLLATION_OID; /* TODO */ pg_locale_t mylocale = 0; /* TODO */ if (len == 0) @@ -269,7 +264,7 @@ lowerstr_with_len(const char *str, int len) * Also, for a C locale there is no need to process as multibyte. From * backend/utils/adt/oracle_compat.c Teodor */ - if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c(collation)) + if (pg_database_encoding_max_length() > 1 && !database_ctype_is_c) { wchar_t *wstr, *wptr; diff --git a/src/backend/tsearch/wparser_def.c b/src/backend/tsearch/wparser_def.c index 2323a3b9086..de6ff79489a 100644 --- a/src/backend/tsearch/wparser_def.c +++ b/src/backend/tsearch/wparser_def.c @@ -297,11 +297,10 @@ TParserInit(char *str, int len) */ if (prs->charmaxlen > 1) { - Oid collation = DEFAULT_COLLATION_OID; /* TODO */ pg_locale_t mylocale = 0; /* TODO */ prs->usewide = true; - if (lc_ctype_is_c(collation)) + if (database_ctype_is_c) { /* * char2wchar doesn't work for C-locale and sizeof(pg_wchar) could diff --git a/src/backend/utils/adt/pg_locale.c b/src/backend/utils/adt/pg_locale.c index a0490a75224..3c8fbe63a61 100644 --- a/src/backend/utils/adt/pg_locale.c +++ b/src/backend/utils/adt/pg_locale.c @@ -99,6 +99,9 @@ char *localized_full_days[7 + 1]; char *localized_abbrev_months[12 + 1]; char *localized_full_months[12 + 1]; +/* is the databases's LC_CTYPE the C locale? */ +bool database_ctype_is_c = false; + /* indicates whether locale information cache is valid */ static bool CurrentLocaleConvValid = false; static bool CurrentLCTimeValid = false; diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index 28d5e19848a..ae03706d840 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -418,6 +418,10 @@ CheckMyDatabase(const char *name, bool am_superuser, bool override_allow_connect " which is not recognized by setlocale().", ctype), errhint("Recreate the database with another locale or install the missing locale."))); + if (strcmp(ctype, "C") == 0 || + strcmp(ctype, "POSIX") == 0) + database_ctype_is_c = true; + if (dbform->datlocprovider == COLLPROVIDER_ICU) { datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticulocale, &isnull); diff --git a/src/include/utils/pg_locale.h b/src/include/utils/pg_locale.h index e7385faef86..de7c5e30d2d 100644 --- a/src/include/utils/pg_locale.h +++ b/src/include/utils/pg_locale.h @@ -49,6 +49,8 @@ extern PGDLLIMPORT char *localized_full_days[]; extern PGDLLIMPORT char *localized_abbrev_months[]; extern PGDLLIMPORT char *localized_full_months[]; +/* is the databases's LC_CTYPE the C locale? */ +extern PGDLLIMPORT bool database_ctype_is_c; extern bool check_locale_messages(char **newval, void **extra, GucSource source); extern void assign_locale_messages(const char *newval, void *extra);