1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-07 19:06:32 +03:00

All supported systems have locale_t.

locale_t is defined by POSIX.1-2008 and SUSv4, and available on all
targeted systems.  For Windows, win32_port.h redirects to a partial
implementation called _locale_t.  We can now remove a lot of
compile-time tests for HAVE_LOCALE_T, and associated comments and dead
code branches that were needed for older computers.

Since configure + MinGW builds didn't detect locale_t but now we assume
that all systems have it, further inconsistencies among the 3 Windows build
systems were revealed.  With this commit, we no longer define
HAVE_WCSTOMBS_L and HAVE_MBSTOWCS_L on any Windows build system, but
we have logic to deal with that so that replacements are available where
appropriate.

Reviewed-by: Noah Misch <noah@leadboat.com>
Reviewed-by: Tristan Partin <tristan@neon.tech>
Reviewed-by: Peter Eisentraut <peter@eisentraut.org>
Discussion: https://postgr.es/m/CA%2BhUKGLg7_T2GKwZFAkEf0V7vbnur-NfCjZPKZb%3DZfAXSV1ORw%40mail.gmail.com
This commit is contained in:
Thomas Munro
2023-07-09 11:55:03 +12:00
parent e9f15bc9db
commit 8d9a9f034e
12 changed files with 46 additions and 169 deletions

View File

@@ -534,7 +534,7 @@ pg_collation_actual_version(PG_FUNCTION_ARGS)
/* will we use "locale -a" in pg_import_system_collations? */
#if defined(HAVE_LOCALE_T) && !defined(WIN32)
#if !defined(WIN32)
#define READ_LOCALE_A_OUTPUT
#endif

View File

@@ -44,8 +44,7 @@
* the platform's wchar_t representation matches what we do in pg_wchar
* conversions.
*
* 3. Other collations are only supported on platforms that HAVE_LOCALE_T.
* Here, we use the locale_t-extended forms of the <wctype.h> and <ctype.h>
* 3. Here, we use the locale_t-extended forms of the <wctype.h> and <ctype.h>
* functions, under exactly the same cases as #2.
*
* There is one notable difference between cases 2 and 3: in the "default"
@@ -252,11 +251,6 @@ pg_set_regex_collation(Oid collation)
}
else
{
/*
* NB: pg_newlocale_from_collation will fail if not HAVE_LOCALE_T; the
* case of pg_regex_locale != 0 but not HAVE_LOCALE_T does not have to
* be considered below.
*/
pg_regex_locale = pg_newlocale_from_collation(collation);
if (!pg_locale_deterministic(pg_regex_locale))
@@ -304,16 +298,12 @@ pg_wc_isdigit(pg_wchar c)
return (c <= (pg_wchar) UCHAR_MAX &&
isdigit((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
#ifdef HAVE_LOCALE_T
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
return (c <= (pg_wchar) UCHAR_MAX &&
isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
#endif
break;
case PG_REGEX_LOCALE_ICU:
#ifdef USE_ICU
@@ -340,16 +330,12 @@ pg_wc_isalpha(pg_wchar c)
return (c <= (pg_wchar) UCHAR_MAX &&
isalpha((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
#ifdef HAVE_LOCALE_T
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
return (c <= (pg_wchar) UCHAR_MAX &&
isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
#endif
break;
case PG_REGEX_LOCALE_ICU:
#ifdef USE_ICU
@@ -376,16 +362,12 @@ pg_wc_isalnum(pg_wchar c)
return (c <= (pg_wchar) UCHAR_MAX &&
isalnum((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
#ifdef HAVE_LOCALE_T
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
return (c <= (pg_wchar) UCHAR_MAX &&
isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
#endif
break;
case PG_REGEX_LOCALE_ICU:
#ifdef USE_ICU
@@ -421,16 +403,12 @@ pg_wc_isupper(pg_wchar c)
return (c <= (pg_wchar) UCHAR_MAX &&
isupper((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
#ifdef HAVE_LOCALE_T
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
return (c <= (pg_wchar) UCHAR_MAX &&
isupper_l((unsigned char) c, pg_regex_locale->info.lt));
#endif
break;
case PG_REGEX_LOCALE_ICU:
#ifdef USE_ICU
@@ -457,16 +435,12 @@ pg_wc_islower(pg_wchar c)
return (c <= (pg_wchar) UCHAR_MAX &&
islower((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
#ifdef HAVE_LOCALE_T
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
return (c <= (pg_wchar) UCHAR_MAX &&
islower_l((unsigned char) c, pg_regex_locale->info.lt));
#endif
break;
case PG_REGEX_LOCALE_ICU:
#ifdef USE_ICU
@@ -493,16 +467,12 @@ pg_wc_isgraph(pg_wchar c)
return (c <= (pg_wchar) UCHAR_MAX &&
isgraph((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
#ifdef HAVE_LOCALE_T
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
return (c <= (pg_wchar) UCHAR_MAX &&
isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
#endif
break;
case PG_REGEX_LOCALE_ICU:
#ifdef USE_ICU
@@ -529,16 +499,12 @@ pg_wc_isprint(pg_wchar c)
return (c <= (pg_wchar) UCHAR_MAX &&
isprint((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
#ifdef HAVE_LOCALE_T
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
return (c <= (pg_wchar) UCHAR_MAX &&
isprint_l((unsigned char) c, pg_regex_locale->info.lt));
#endif
break;
case PG_REGEX_LOCALE_ICU:
#ifdef USE_ICU
@@ -565,16 +531,12 @@ pg_wc_ispunct(pg_wchar c)
return (c <= (pg_wchar) UCHAR_MAX &&
ispunct((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
#ifdef HAVE_LOCALE_T
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
return (c <= (pg_wchar) UCHAR_MAX &&
ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
#endif
break;
case PG_REGEX_LOCALE_ICU:
#ifdef USE_ICU
@@ -601,16 +563,12 @@ pg_wc_isspace(pg_wchar c)
return (c <= (pg_wchar) UCHAR_MAX &&
isspace((unsigned char) c));
case PG_REGEX_LOCALE_WIDE_L:
#ifdef HAVE_LOCALE_T
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
return (c <= (pg_wchar) UCHAR_MAX &&
isspace_l((unsigned char) c, pg_regex_locale->info.lt));
#endif
break;
case PG_REGEX_LOCALE_ICU:
#ifdef USE_ICU
@@ -645,16 +603,12 @@ pg_wc_toupper(pg_wchar c)
return toupper((unsigned char) c);
return c;
case PG_REGEX_LOCALE_WIDE_L:
#ifdef HAVE_LOCALE_T
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return towupper_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
if (c <= (pg_wchar) UCHAR_MAX)
return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
#endif
return c;
case PG_REGEX_LOCALE_ICU:
#ifdef USE_ICU
@@ -689,16 +643,12 @@ pg_wc_tolower(pg_wchar c)
return tolower((unsigned char) c);
return c;
case PG_REGEX_LOCALE_WIDE_L:
#ifdef HAVE_LOCALE_T
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return towlower_l((wint_t) c, pg_regex_locale->info.lt);
#endif
/* FALL THRU */
case PG_REGEX_LOCALE_1BYTE_L:
#ifdef HAVE_LOCALE_T
if (c <= (pg_wchar) UCHAR_MAX)
return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
#endif
return c;
case PG_REGEX_LOCALE_ICU:
#ifdef USE_ICU

View File

@@ -1613,12 +1613,6 @@ u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
* in multibyte character sets. Note that in either case we are effectively
* assuming that the database character encoding matches the encoding implied
* by LC_CTYPE.
*
* If the system provides locale_t and associated functions (which are
* standardized by Open Group's XBD), we can support collations that are
* neither default nor C. The code is written to handle both combinations
* of have-wide-characters and have-locale_t, though it's rather unlikely
* a platform would have the latter without the former.
*/
/*
@@ -1696,11 +1690,9 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
{
#ifdef HAVE_LOCALE_T
if (mylocale)
workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
else
#endif
workspace[curr_char] = towlower(workspace[curr_char]);
}
@@ -1729,11 +1721,9 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
*/
for (p = result; *p; p++)
{
#ifdef HAVE_LOCALE_T
if (mylocale)
*p = tolower_l((unsigned char) *p, mylocale->info.lt);
else
#endif
*p = pg_tolower((unsigned char) *p);
}
}
@@ -1818,11 +1808,9 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
{
#ifdef HAVE_LOCALE_T
if (mylocale)
workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
else
#endif
workspace[curr_char] = towupper(workspace[curr_char]);
}
@@ -1851,11 +1839,9 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
*/
for (p = result; *p; p++)
{
#ifdef HAVE_LOCALE_T
if (mylocale)
*p = toupper_l((unsigned char) *p, mylocale->info.lt);
else
#endif
*p = pg_toupper((unsigned char) *p);
}
}
@@ -1941,7 +1927,6 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
{
#ifdef HAVE_LOCALE_T
if (mylocale)
{
if (wasalnum)
@@ -1951,7 +1936,6 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
}
else
#endif
{
if (wasalnum)
workspace[curr_char] = towlower(workspace[curr_char]);
@@ -1986,7 +1970,6 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
*/
for (p = result; *p; p++)
{
#ifdef HAVE_LOCALE_T
if (mylocale)
{
if (wasalnum)
@@ -1996,7 +1979,6 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
}
else
#endif
{
if (wasalnum)
*p = pg_tolower((unsigned char) *p);

View File

@@ -95,10 +95,8 @@ SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
{
if (locale_is_c)
return pg_ascii_tolower(c);
#ifdef HAVE_LOCALE_T
else if (locale)
return tolower_l(c, locale->info.lt);
#endif
else
return pg_tolower(c);
}

View File

@@ -1509,10 +1509,8 @@ pattern_char_isalpha(char c, bool is_multibyte,
else if (locale && locale->provider == COLLPROVIDER_ICU)
return IS_HIGHBIT_SET(c) ||
(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
#ifdef HAVE_LOCALE_T
else if (locale && locale->provider == COLLPROVIDER_LIBC)
return isalpha_l((unsigned char) c, locale->info.lt);
#endif
else
return isalpha((unsigned char) c);
}

View File

@@ -154,6 +154,38 @@ static void icu_set_collation_attributes(UCollator *collator, const char *loc,
UErrorCode *status);
#endif
#ifndef WIN32
/*
* POSIX doesn't define _l-variants of these functions, but several systems
* have them. We provide our own replacements here. For Windows, we have
* macros in win32_port.h.
*/
#ifndef HAVE_MBSTOWCS_L
static size_t
mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
{
size_t result;
locale_t save_locale = uselocale(loc);
result = mbstowcs(dest, src, n);
uselocale(save_locale);
return result;
}
#endif
#ifndef HAVE_WCSTOMBS_L
static size_t
wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
{
size_t result;
locale_t save_locale = uselocale(loc);
result = wcstombs(dest, src, n);
uselocale(save_locale);
return result;
}
#endif
#endif
/*
* pg_perm_setlocale
*
@@ -1420,7 +1452,6 @@ make_icu_collator(const char *iculocstr,
/* simple subroutine for reporting errors from newlocale() */
#ifdef HAVE_LOCALE_T
static void
report_newlocale_failure(const char *localename)
{
@@ -1449,7 +1480,6 @@ report_newlocale_failure(const char *localename)
errdetail("The operating system could not find any locale data for the locale name \"%s\".",
localename) : 0)));
}
#endif /* HAVE_LOCALE_T */
bool
pg_locale_deterministic(pg_locale_t locale)
@@ -1466,10 +1496,6 @@ pg_locale_deterministic(pg_locale_t locale)
* lifetime of the backend. Thus, do not free the result with freelocale().
*
* As a special optimization, the default/database collation returns 0.
* Callers should then revert to the non-locale_t-enabled code path.
* Also, callers should avoid calling this before going down a C/POSIX
* fastpath, because such a fastpath should work even on platforms without
* locale_t support in the C library.
*
* For simplicity, we always generate COLLATE + CTYPE even though we
* might only need one of them. Since this is called only once per session,
@@ -1515,7 +1541,6 @@ pg_newlocale_from_collation(Oid collid)
if (collform->collprovider == COLLPROVIDER_LIBC)
{
#ifdef HAVE_LOCALE_T
const char *collcollate;
const char *collctype pg_attribute_unused();
locale_t loc;
@@ -1566,12 +1591,6 @@ pg_newlocale_from_collation(Oid collid)
}
result.info.lt = loc;
#else /* not HAVE_LOCALE_T */
/* platform that doesn't support locale_t */
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("collation provider LIBC is not supported on this platform")));
#endif /* not HAVE_LOCALE_T */
}
else if (collform->collprovider == COLLPROVIDER_ICU)
{
@@ -1788,11 +1807,9 @@ pg_strncoll_libc_win32_utf8(const char *arg1, size_t len1, const char *arg2,
((LPWSTR) a2p)[r] = 0;
errno = 0;
#ifdef HAVE_LOCALE_T
if (locale)
result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
else
#endif
result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */
ereport(ERROR,
@@ -1831,14 +1848,7 @@ pg_strcoll_libc(const char *arg1, const char *arg2, pg_locale_t locale)
else
#endif /* WIN32 */
if (locale)
{
#ifdef HAVE_LOCALE_T
result = strcoll_l(arg1, arg2, locale->info.lt);
#else
/* shouldn't happen */
elog(ERROR, "unsupported collprovider: %c", locale->provider);
#endif
}
else
result = strcoll(arg1, arg2);
@@ -2065,11 +2075,9 @@ pg_strxfrm_libc(char *dest, const char *src, size_t destsize,
Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
#ifdef TRUST_STRXFRM
#ifdef HAVE_LOCALE_T
if (locale)
return strxfrm_l(dest, src, destsize, locale->info.lt);
else
#endif
return strxfrm(dest, src, destsize);
#else
/* shouldn't happen */
@@ -2955,23 +2963,8 @@ wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
}
else
{
#ifdef HAVE_LOCALE_T
#ifdef HAVE_WCSTOMBS_L
/* Use wcstombs_l for nondefault locales */
result = wcstombs_l(to, from, tolen, locale->info.lt);
#else /* !HAVE_WCSTOMBS_L */
/* We have to temporarily set the locale as current ... ugh */
locale_t save_locale = uselocale(locale->info.lt);
result = wcstombs(to, from, tolen);
uselocale(save_locale);
#endif /* HAVE_WCSTOMBS_L */
#else /* !HAVE_LOCALE_T */
/* Can't have locale != 0 without HAVE_LOCALE_T */
elog(ERROR, "wcstombs_l is not available");
result = 0; /* keep compiler quiet */
#endif /* HAVE_LOCALE_T */
}
return result;
@@ -3032,23 +3025,8 @@ char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
}
else
{
#ifdef HAVE_LOCALE_T
#ifdef HAVE_MBSTOWCS_L
/* Use mbstowcs_l for nondefault locales */
result = mbstowcs_l(to, str, tolen, locale->info.lt);
#else /* !HAVE_MBSTOWCS_L */
/* We have to temporarily set the locale as current ... ugh */
locale_t save_locale = uselocale(locale->info.lt);
result = mbstowcs(to, str, tolen);
uselocale(save_locale);
#endif /* HAVE_MBSTOWCS_L */
#else /* !HAVE_LOCALE_T */
/* Can't have locale != 0 without HAVE_LOCALE_T */
elog(ERROR, "mbstowcs_l is not available");
result = 0; /* keep compiler quiet */
#endif /* HAVE_LOCALE_T */
}
pfree(str);