From 4da12e9e2e3c011a3fc8354ca451d6a82c017fa3 Mon Sep 17 00:00:00 2001 From: Jeff Davis Date: Tue, 28 Oct 2025 10:49:20 -0700 Subject: [PATCH] Move comment about casts from pg_wchar. Suggested-by: Thomas Munro Discussion: https://postgr.es/m/CA+hUKGLXQUYK7Cq5KbLGgTWo7pORs7yhBWO1AEnZt7xTYbLRhg@mail.gmail.com --- src/backend/utils/adt/pg_locale_icu.c | 5 +++++ src/backend/utils/adt/pg_locale_libc.c | 3 +-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/src/backend/utils/adt/pg_locale_icu.c b/src/backend/utils/adt/pg_locale_icu.c index 05bad202669..f5a0cc8fe41 100644 --- a/src/backend/utils/adt/pg_locale_icu.c +++ b/src/backend/utils/adt/pg_locale_icu.c @@ -128,6 +128,11 @@ char_is_cased_icu(char ch, pg_locale_t locale) (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z'); } +/* + * XXX: many of the functions below rely on casts directly from pg_wchar to + * UChar32, which is correct for the UTF-8 encoding, but not in general. + */ + static pg_wchar toupper_icu(pg_wchar wc, pg_locale_t locale) { diff --git a/src/backend/utils/adt/pg_locale_libc.c b/src/backend/utils/adt/pg_locale_libc.c index 7ae778dc296..9c7fcd1fc7a 100644 --- a/src/backend/utils/adt/pg_locale_libc.c +++ b/src/backend/utils/adt/pg_locale_libc.c @@ -45,8 +45,7 @@ * * 2. When working in UTF8 encoding, we use the functions. * This assumes that every platform uses Unicode codepoints directly - * as the wchar_t representation of Unicode. (XXX: ICU makes this assumption - * even for non-UTF8 encodings, which may be a problem.) On some platforms + * as the wchar_t representation of Unicode. On some platforms * wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF. * * 3. In all other encodings, we use the functions for pg_wchar