mirror of
https://github.com/postgres/postgres.git
synced 2025-05-08 07:21:33 +03:00
Move libc-specific code from pg_locale.c into pg_locale_libc.c.
Move implementation of pg_locale_t code for libc collations into pg_locale_libc.c. Other locale-related code, such as pg_perm_setlocale(), remains in pg_locale.c for now. Discussion: https://postgr.es/m/flat/2830211e1b6e6a2e26d845780b03e125281ea17b.camel@j-davis.com
This commit is contained in:
parent
9812138593
commit
66ac94cdc7
@ -80,6 +80,7 @@ OBJS = \
|
|||||||
partitionfuncs.o \
|
partitionfuncs.o \
|
||||||
pg_locale.o \
|
pg_locale.o \
|
||||||
pg_locale_icu.o \
|
pg_locale_icu.o \
|
||||||
|
pg_locale_libc.o \
|
||||||
pg_lsn.o \
|
pg_lsn.o \
|
||||||
pg_upgrade_support.o \
|
pg_upgrade_support.o \
|
||||||
pgstatfuncs.o \
|
pgstatfuncs.o \
|
||||||
|
@ -67,6 +67,7 @@ backend_sources += files(
|
|||||||
'partitionfuncs.c',
|
'partitionfuncs.c',
|
||||||
'pg_locale.c',
|
'pg_locale.c',
|
||||||
'pg_locale_icu.c',
|
'pg_locale_icu.c',
|
||||||
|
'pg_locale_libc.c',
|
||||||
'pg_lsn.c',
|
'pg_lsn.c',
|
||||||
'pg_upgrade_support.c',
|
'pg_upgrade_support.c',
|
||||||
'pgstatfuncs.c',
|
'pgstatfuncs.c',
|
||||||
|
@ -105,6 +105,16 @@ extern size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
|
|||||||
pg_locale_t locale);
|
pg_locale_t locale);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* pg_locale_libc.c */
|
||||||
|
extern locale_t make_libc_collator(const char *collate,
|
||||||
|
const char *ctype);
|
||||||
|
extern int strncoll_libc(const char *arg1, ssize_t len1,
|
||||||
|
const char *arg2, ssize_t len2,
|
||||||
|
pg_locale_t locale);
|
||||||
|
extern size_t strnxfrm_libc(char *dest, size_t destsize,
|
||||||
|
const char *src, ssize_t srclen,
|
||||||
|
pg_locale_t locale);
|
||||||
|
|
||||||
/* GUC settings */
|
/* GUC settings */
|
||||||
char *locale_messages;
|
char *locale_messages;
|
||||||
char *locale_monetary;
|
char *locale_monetary;
|
||||||
@ -173,43 +183,6 @@ static pg_locale_t last_collation_cache_locale = NULL;
|
|||||||
static char *IsoLocaleName(const char *);
|
static char *IsoLocaleName(const char *);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
|
||||||
* POSIX doesn't define _l-variants of these functions, but several systems
|
|
||||||
* have them. We provide our own replacements here.
|
|
||||||
*/
|
|
||||||
#ifndef HAVE_MBSTOWCS_L
|
|
||||||
static size_t
|
|
||||||
mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
|
|
||||||
{
|
|
||||||
#ifdef WIN32
|
|
||||||
return _mbstowcs_l(dest, src, n, loc);
|
|
||||||
#else
|
|
||||||
size_t result;
|
|
||||||
locale_t save_locale = uselocale(loc);
|
|
||||||
|
|
||||||
result = mbstowcs(dest, src, n);
|
|
||||||
uselocale(save_locale);
|
|
||||||
return result;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#ifndef HAVE_WCSTOMBS_L
|
|
||||||
static size_t
|
|
||||||
wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
|
|
||||||
{
|
|
||||||
#ifdef WIN32
|
|
||||||
return _wcstombs_l(dest, src, n, loc);
|
|
||||||
#else
|
|
||||||
size_t result;
|
|
||||||
locale_t save_locale = uselocale(loc);
|
|
||||||
|
|
||||||
result = wcstombs(dest, src, n);
|
|
||||||
uselocale(save_locale);
|
|
||||||
return result;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* pg_perm_setlocale
|
* pg_perm_setlocale
|
||||||
*
|
*
|
||||||
@ -1280,108 +1253,6 @@ lookup_collation_cache(Oid collation)
|
|||||||
return cache_entry;
|
return cache_entry;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* simple subroutine for reporting errors from newlocale() */
|
|
||||||
static void
|
|
||||||
report_newlocale_failure(const char *localename)
|
|
||||||
{
|
|
||||||
int save_errno;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Windows doesn't provide any useful error indication from
|
|
||||||
* _create_locale(), and BSD-derived platforms don't seem to feel they
|
|
||||||
* need to set errno either (even though POSIX is pretty clear that
|
|
||||||
* newlocale should do so). So, if errno hasn't been set, assume ENOENT
|
|
||||||
* is what to report.
|
|
||||||
*/
|
|
||||||
if (errno == 0)
|
|
||||||
errno = ENOENT;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* ENOENT means "no such locale", not "no such file", so clarify that
|
|
||||||
* errno with an errdetail message.
|
|
||||||
*/
|
|
||||||
save_errno = errno; /* auxiliary funcs might change errno */
|
|
||||||
ereport(ERROR,
|
|
||||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
|
||||||
errmsg("could not create locale \"%s\": %m",
|
|
||||||
localename),
|
|
||||||
(save_errno == ENOENT ?
|
|
||||||
errdetail("The operating system could not find any locale data for the locale name \"%s\".",
|
|
||||||
localename) : 0)));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Create a locale_t with the given collation and ctype.
|
|
||||||
*
|
|
||||||
* The "C" and "POSIX" locales are not actually handled by libc, so return
|
|
||||||
* NULL.
|
|
||||||
*
|
|
||||||
* Ensure that no path leaks a locale_t.
|
|
||||||
*/
|
|
||||||
static locale_t
|
|
||||||
make_libc_collator(const char *collate, const char *ctype)
|
|
||||||
{
|
|
||||||
locale_t loc = 0;
|
|
||||||
|
|
||||||
if (strcmp(collate, ctype) == 0)
|
|
||||||
{
|
|
||||||
if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
|
|
||||||
{
|
|
||||||
/* Normal case where they're the same */
|
|
||||||
errno = 0;
|
|
||||||
#ifndef WIN32
|
|
||||||
loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate,
|
|
||||||
NULL);
|
|
||||||
#else
|
|
||||||
loc = _create_locale(LC_ALL, collate);
|
|
||||||
#endif
|
|
||||||
if (!loc)
|
|
||||||
report_newlocale_failure(collate);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
#ifndef WIN32
|
|
||||||
/* We need two newlocale() steps */
|
|
||||||
locale_t loc1 = 0;
|
|
||||||
|
|
||||||
if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0)
|
|
||||||
{
|
|
||||||
errno = 0;
|
|
||||||
loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
|
|
||||||
if (!loc1)
|
|
||||||
report_newlocale_failure(collate);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
|
|
||||||
{
|
|
||||||
errno = 0;
|
|
||||||
loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
|
|
||||||
if (!loc)
|
|
||||||
{
|
|
||||||
if (loc1)
|
|
||||||
freelocale(loc1);
|
|
||||||
report_newlocale_failure(ctype);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
loc = loc1;
|
|
||||||
#else
|
|
||||||
|
|
||||||
/*
|
|
||||||
* XXX The _create_locale() API doesn't appear to support this. Could
|
|
||||||
* perhaps be worked around by changing pg_locale_t to contain two
|
|
||||||
* separate fields.
|
|
||||||
*/
|
|
||||||
ereport(ERROR,
|
|
||||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
|
||||||
errmsg("collations with different collate and ctype values are not supported on this platform")));
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
return loc;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize default_locale with database locale settings.
|
* Initialize default_locale with database locale settings.
|
||||||
*/
|
*/
|
||||||
@ -1746,150 +1617,6 @@ get_collation_actual_version(char collprovider, const char *collcollate)
|
|||||||
return collversion;
|
return collversion;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* strncoll_libc_win32_utf8
|
|
||||||
*
|
|
||||||
* Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
|
|
||||||
* invoke wcscoll_l().
|
|
||||||
*
|
|
||||||
* An input string length of -1 means that it's NUL-terminated.
|
|
||||||
*/
|
|
||||||
#ifdef WIN32
|
|
||||||
static int
|
|
||||||
strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
|
|
||||||
ssize_t len2, pg_locale_t locale)
|
|
||||||
{
|
|
||||||
char sbuf[TEXTBUFLEN];
|
|
||||||
char *buf = sbuf;
|
|
||||||
char *a1p,
|
|
||||||
*a2p;
|
|
||||||
int a1len;
|
|
||||||
int a2len;
|
|
||||||
int r;
|
|
||||||
int result;
|
|
||||||
|
|
||||||
Assert(locale->provider == COLLPROVIDER_LIBC);
|
|
||||||
Assert(GetDatabaseEncoding() == PG_UTF8);
|
|
||||||
|
|
||||||
if (len1 == -1)
|
|
||||||
len1 = strlen(arg1);
|
|
||||||
if (len2 == -1)
|
|
||||||
len2 = strlen(arg2);
|
|
||||||
|
|
||||||
a1len = len1 * 2 + 2;
|
|
||||||
a2len = len2 * 2 + 2;
|
|
||||||
|
|
||||||
if (a1len + a2len > TEXTBUFLEN)
|
|
||||||
buf = palloc(a1len + a2len);
|
|
||||||
|
|
||||||
a1p = buf;
|
|
||||||
a2p = buf + a1len;
|
|
||||||
|
|
||||||
/* API does not work for zero-length input */
|
|
||||||
if (len1 == 0)
|
|
||||||
r = 0;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
|
|
||||||
(LPWSTR) a1p, a1len / 2);
|
|
||||||
if (!r)
|
|
||||||
ereport(ERROR,
|
|
||||||
(errmsg("could not convert string to UTF-16: error code %lu",
|
|
||||||
GetLastError())));
|
|
||||||
}
|
|
||||||
((LPWSTR) a1p)[r] = 0;
|
|
||||||
|
|
||||||
if (len2 == 0)
|
|
||||||
r = 0;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
|
|
||||||
(LPWSTR) a2p, a2len / 2);
|
|
||||||
if (!r)
|
|
||||||
ereport(ERROR,
|
|
||||||
(errmsg("could not convert string to UTF-16: error code %lu",
|
|
||||||
GetLastError())));
|
|
||||||
}
|
|
||||||
((LPWSTR) a2p)[r] = 0;
|
|
||||||
|
|
||||||
errno = 0;
|
|
||||||
result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
|
|
||||||
if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */
|
|
||||||
ereport(ERROR,
|
|
||||||
(errmsg("could not compare Unicode strings: %m")));
|
|
||||||
|
|
||||||
if (buf != sbuf)
|
|
||||||
pfree(buf);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
#endif /* WIN32 */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* strncoll_libc
|
|
||||||
*
|
|
||||||
* NUL-terminate arguments, if necessary, and pass to strcoll_l().
|
|
||||||
*
|
|
||||||
* An input string length of -1 means that it's already NUL-terminated.
|
|
||||||
*/
|
|
||||||
static int
|
|
||||||
strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
|
|
||||||
pg_locale_t locale)
|
|
||||||
{
|
|
||||||
char sbuf[TEXTBUFLEN];
|
|
||||||
char *buf = sbuf;
|
|
||||||
size_t bufsize1 = (len1 == -1) ? 0 : len1 + 1;
|
|
||||||
size_t bufsize2 = (len2 == -1) ? 0 : len2 + 1;
|
|
||||||
const char *arg1n;
|
|
||||||
const char *arg2n;
|
|
||||||
int result;
|
|
||||||
|
|
||||||
Assert(locale->provider == COLLPROVIDER_LIBC);
|
|
||||||
|
|
||||||
#ifdef WIN32
|
|
||||||
/* check for this case before doing the work for nul-termination */
|
|
||||||
if (GetDatabaseEncoding() == PG_UTF8)
|
|
||||||
return strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale);
|
|
||||||
#endif /* WIN32 */
|
|
||||||
|
|
||||||
if (bufsize1 + bufsize2 > TEXTBUFLEN)
|
|
||||||
buf = palloc(bufsize1 + bufsize2);
|
|
||||||
|
|
||||||
/* nul-terminate arguments if necessary */
|
|
||||||
if (len1 == -1)
|
|
||||||
{
|
|
||||||
arg1n = arg1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
char *buf1 = buf;
|
|
||||||
|
|
||||||
memcpy(buf1, arg1, len1);
|
|
||||||
buf1[len1] = '\0';
|
|
||||||
arg1n = buf1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (len2 == -1)
|
|
||||||
{
|
|
||||||
arg2n = arg2;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
char *buf2 = buf + bufsize1;
|
|
||||||
|
|
||||||
memcpy(buf2, arg2, len2);
|
|
||||||
buf2[len2] = '\0';
|
|
||||||
arg2n = buf2;
|
|
||||||
}
|
|
||||||
|
|
||||||
result = strcoll_l(arg1n, arg2n, locale->info.lt);
|
|
||||||
|
|
||||||
if (buf != sbuf)
|
|
||||||
pfree(buf);
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* pg_strcoll
|
* pg_strcoll
|
||||||
*
|
*
|
||||||
@ -1946,45 +1673,6 @@ pg_strncoll(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* strnxfrm_libc
|
|
||||||
*
|
|
||||||
* NUL-terminate src, if necessary, and pass to strxfrm_l().
|
|
||||||
*
|
|
||||||
* A source length of -1 means that it's already NUL-terminated.
|
|
||||||
*/
|
|
||||||
static size_t
|
|
||||||
strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|
||||||
pg_locale_t locale)
|
|
||||||
{
|
|
||||||
char sbuf[TEXTBUFLEN];
|
|
||||||
char *buf = sbuf;
|
|
||||||
size_t bufsize = srclen + 1;
|
|
||||||
size_t result;
|
|
||||||
|
|
||||||
Assert(locale->provider == COLLPROVIDER_LIBC);
|
|
||||||
|
|
||||||
if (srclen == -1)
|
|
||||||
return strxfrm_l(dest, src, destsize, locale->info.lt);
|
|
||||||
|
|
||||||
if (bufsize > TEXTBUFLEN)
|
|
||||||
buf = palloc(bufsize);
|
|
||||||
|
|
||||||
/* nul-terminate argument */
|
|
||||||
memcpy(buf, src, srclen);
|
|
||||||
buf[srclen] = '\0';
|
|
||||||
|
|
||||||
result = strxfrm_l(dest, buf, destsize, locale->info.lt);
|
|
||||||
|
|
||||||
if (buf != sbuf)
|
|
||||||
pfree(buf);
|
|
||||||
|
|
||||||
/* if dest is defined, it should be nul-terminated */
|
|
||||||
Assert(result >= destsize || dest[result] == '\0');
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return true if the collation provider supports pg_strxfrm() and
|
* Return true if the collation provider supports pg_strxfrm() and
|
||||||
* pg_strnxfrm(); otherwise false.
|
* pg_strnxfrm(); otherwise false.
|
||||||
@ -2333,145 +2021,3 @@ icu_validate_locale(const char *loc_str)
|
|||||||
errmsg("ICU is not supported in this build")));
|
errmsg("ICU is not supported in this build")));
|
||||||
#endif /* not USE_ICU */
|
#endif /* not USE_ICU */
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
|
|
||||||
* Therefore we keep them here rather than with the mbutils code.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* wchar2char --- convert wide characters to multibyte format
|
|
||||||
*
|
|
||||||
* This has the same API as the standard wcstombs_l() function; in particular,
|
|
||||||
* tolen is the maximum number of bytes to store at *to, and *from must be
|
|
||||||
* zero-terminated. The output will be zero-terminated iff there is room.
|
|
||||||
*/
|
|
||||||
size_t
|
|
||||||
wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
|
|
||||||
{
|
|
||||||
size_t result;
|
|
||||||
|
|
||||||
Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
|
|
||||||
|
|
||||||
if (tolen == 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
#ifdef WIN32
|
|
||||||
|
|
||||||
/*
|
|
||||||
* On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
|
|
||||||
* for some reason mbstowcs and wcstombs won't do this for us, so we use
|
|
||||||
* MultiByteToWideChar().
|
|
||||||
*/
|
|
||||||
if (GetDatabaseEncoding() == PG_UTF8)
|
|
||||||
{
|
|
||||||
result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
|
|
||||||
NULL, NULL);
|
|
||||||
/* A zero return is failure */
|
|
||||||
if (result <= 0)
|
|
||||||
result = -1;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
Assert(result <= tolen);
|
|
||||||
/* Microsoft counts the zero terminator in the result */
|
|
||||||
result--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
#endif /* WIN32 */
|
|
||||||
if (locale == (pg_locale_t) 0)
|
|
||||||
{
|
|
||||||
/* Use wcstombs directly for the default locale */
|
|
||||||
result = wcstombs(to, from, tolen);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* Use wcstombs_l for nondefault locales */
|
|
||||||
result = wcstombs_l(to, from, tolen, locale->info.lt);
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* char2wchar --- convert multibyte characters to wide characters
|
|
||||||
*
|
|
||||||
* This has almost the API of mbstowcs_l(), except that *from need not be
|
|
||||||
* null-terminated; instead, the number of input bytes is specified as
|
|
||||||
* fromlen. Also, we ereport() rather than returning -1 for invalid
|
|
||||||
* input encoding. tolen is the maximum number of wchar_t's to store at *to.
|
|
||||||
* The output will be zero-terminated iff there is room.
|
|
||||||
*/
|
|
||||||
size_t
|
|
||||||
char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
|
|
||||||
pg_locale_t locale)
|
|
||||||
{
|
|
||||||
size_t result;
|
|
||||||
|
|
||||||
Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
|
|
||||||
|
|
||||||
if (tolen == 0)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
#ifdef WIN32
|
|
||||||
/* See WIN32 "Unicode" comment above */
|
|
||||||
if (GetDatabaseEncoding() == PG_UTF8)
|
|
||||||
{
|
|
||||||
/* Win32 API does not work for zero-length input */
|
|
||||||
if (fromlen == 0)
|
|
||||||
result = 0;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
|
|
||||||
/* A zero return is failure */
|
|
||||||
if (result == 0)
|
|
||||||
result = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (result != -1)
|
|
||||||
{
|
|
||||||
Assert(result < tolen);
|
|
||||||
/* Append trailing null wchar (MultiByteToWideChar() does not) */
|
|
||||||
to[result] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
#endif /* WIN32 */
|
|
||||||
{
|
|
||||||
/* mbstowcs requires ending '\0' */
|
|
||||||
char *str = pnstrdup(from, fromlen);
|
|
||||||
|
|
||||||
if (locale == (pg_locale_t) 0)
|
|
||||||
{
|
|
||||||
/* Use mbstowcs directly for the default locale */
|
|
||||||
result = mbstowcs(to, str, tolen);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* Use mbstowcs_l for nondefault locales */
|
|
||||||
result = mbstowcs_l(to, str, tolen, locale->info.lt);
|
|
||||||
}
|
|
||||||
|
|
||||||
pfree(str);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (result == -1)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Invalid multibyte character encountered. We try to give a useful
|
|
||||||
* error message by letting pg_verifymbstr check the string. But it's
|
|
||||||
* possible that the string is OK to us, and not OK to mbstowcs ---
|
|
||||||
* this suggests that the LC_CTYPE locale is different from the
|
|
||||||
* database encoding. Give a generic error message if pg_verifymbstr
|
|
||||||
* can't find anything wrong.
|
|
||||||
*/
|
|
||||||
pg_verifymbstr(from, fromlen, false); /* might not return */
|
|
||||||
/* but if it does ... */
|
|
||||||
ereport(ERROR,
|
|
||||||
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
|
||||||
errmsg("invalid multibyte character for locale"),
|
|
||||||
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
502
src/backend/utils/adt/pg_locale_libc.c
Normal file
502
src/backend/utils/adt/pg_locale_libc.c
Normal file
@ -0,0 +1,502 @@
|
|||||||
|
/*-----------------------------------------------------------------------
|
||||||
|
*
|
||||||
|
* PostgreSQL locale utilities for libc
|
||||||
|
*
|
||||||
|
* Portions Copyright (c) 2002-2024, PostgreSQL Global Development Group
|
||||||
|
*
|
||||||
|
* src/backend/utils/adt/pg_locale_libc.c
|
||||||
|
*
|
||||||
|
*-----------------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include "catalog/pg_collation.h"
|
||||||
|
#include "mb/pg_wchar.h"
|
||||||
|
#include "utils/formatting.h"
|
||||||
|
#include "utils/pg_locale.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Size of stack buffer to use for string transformations, used to avoid heap
|
||||||
|
* allocations in typical cases. This should be large enough that most strings
|
||||||
|
* will fit, but small enough that we feel comfortable putting it on the
|
||||||
|
* stack.
|
||||||
|
*/
|
||||||
|
#define TEXTBUFLEN 1024
|
||||||
|
|
||||||
|
extern locale_t make_libc_collator(const char *collate,
|
||||||
|
const char *ctype);
|
||||||
|
extern int strncoll_libc(const char *arg1, ssize_t len1,
|
||||||
|
const char *arg2, ssize_t len2,
|
||||||
|
pg_locale_t locale);
|
||||||
|
extern size_t strnxfrm_libc(char *dest, size_t destsize,
|
||||||
|
const char *src, ssize_t srclen,
|
||||||
|
pg_locale_t locale);
|
||||||
|
|
||||||
|
static void report_newlocale_failure(const char *localename);
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
static int strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
|
||||||
|
const char *arg2, ssize_t len2,
|
||||||
|
pg_locale_t locale);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create a locale_t with the given collation and ctype.
|
||||||
|
*
|
||||||
|
* The "C" and "POSIX" locales are not actually handled by libc, so return
|
||||||
|
* NULL.
|
||||||
|
*
|
||||||
|
* Ensure that no path leaks a locale_t.
|
||||||
|
*/
|
||||||
|
locale_t
|
||||||
|
make_libc_collator(const char *collate, const char *ctype)
|
||||||
|
{
|
||||||
|
locale_t loc = 0;
|
||||||
|
|
||||||
|
if (strcmp(collate, ctype) == 0)
|
||||||
|
{
|
||||||
|
if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
|
||||||
|
{
|
||||||
|
/* Normal case where they're the same */
|
||||||
|
errno = 0;
|
||||||
|
#ifndef WIN32
|
||||||
|
loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collate,
|
||||||
|
NULL);
|
||||||
|
#else
|
||||||
|
loc = _create_locale(LC_ALL, collate);
|
||||||
|
#endif
|
||||||
|
if (!loc)
|
||||||
|
report_newlocale_failure(collate);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
#ifndef WIN32
|
||||||
|
/* We need two newlocale() steps */
|
||||||
|
locale_t loc1 = 0;
|
||||||
|
|
||||||
|
if (strcmp(collate, "C") != 0 && strcmp(collate, "POSIX") != 0)
|
||||||
|
{
|
||||||
|
errno = 0;
|
||||||
|
loc1 = newlocale(LC_COLLATE_MASK, collate, NULL);
|
||||||
|
if (!loc1)
|
||||||
|
report_newlocale_failure(collate);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (strcmp(ctype, "C") != 0 && strcmp(ctype, "POSIX") != 0)
|
||||||
|
{
|
||||||
|
errno = 0;
|
||||||
|
loc = newlocale(LC_CTYPE_MASK, ctype, loc1);
|
||||||
|
if (!loc)
|
||||||
|
{
|
||||||
|
if (loc1)
|
||||||
|
freelocale(loc1);
|
||||||
|
report_newlocale_failure(ctype);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
loc = loc1;
|
||||||
|
#else
|
||||||
|
|
||||||
|
/*
|
||||||
|
* XXX The _create_locale() API doesn't appear to support this. Could
|
||||||
|
* perhaps be worked around by changing pg_locale_t to contain two
|
||||||
|
* separate fields.
|
||||||
|
*/
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
errmsg("collations with different collate and ctype values are not supported on this platform")));
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
return loc;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* strncoll_libc
|
||||||
|
*
|
||||||
|
* NUL-terminate arguments, if necessary, and pass to strcoll_l().
|
||||||
|
*
|
||||||
|
* An input string length of -1 means that it's already NUL-terminated.
|
||||||
|
*/
|
||||||
|
int
|
||||||
|
strncoll_libc(const char *arg1, ssize_t len1, const char *arg2, ssize_t len2,
|
||||||
|
pg_locale_t locale)
|
||||||
|
{
|
||||||
|
char sbuf[TEXTBUFLEN];
|
||||||
|
char *buf = sbuf;
|
||||||
|
size_t bufsize1 = (len1 == -1) ? 0 : len1 + 1;
|
||||||
|
size_t bufsize2 = (len2 == -1) ? 0 : len2 + 1;
|
||||||
|
const char *arg1n;
|
||||||
|
const char *arg2n;
|
||||||
|
int result;
|
||||||
|
|
||||||
|
Assert(locale->provider == COLLPROVIDER_LIBC);
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
/* check for this case before doing the work for nul-termination */
|
||||||
|
if (GetDatabaseEncoding() == PG_UTF8)
|
||||||
|
return strncoll_libc_win32_utf8(arg1, len1, arg2, len2, locale);
|
||||||
|
#endif /* WIN32 */
|
||||||
|
|
||||||
|
if (bufsize1 + bufsize2 > TEXTBUFLEN)
|
||||||
|
buf = palloc(bufsize1 + bufsize2);
|
||||||
|
|
||||||
|
/* nul-terminate arguments if necessary */
|
||||||
|
if (len1 == -1)
|
||||||
|
{
|
||||||
|
arg1n = arg1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
char *buf1 = buf;
|
||||||
|
|
||||||
|
memcpy(buf1, arg1, len1);
|
||||||
|
buf1[len1] = '\0';
|
||||||
|
arg1n = buf1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (len2 == -1)
|
||||||
|
{
|
||||||
|
arg2n = arg2;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
char *buf2 = buf + bufsize1;
|
||||||
|
|
||||||
|
memcpy(buf2, arg2, len2);
|
||||||
|
buf2[len2] = '\0';
|
||||||
|
arg2n = buf2;
|
||||||
|
}
|
||||||
|
|
||||||
|
result = strcoll_l(arg1n, arg2n, locale->info.lt);
|
||||||
|
|
||||||
|
if (buf != sbuf)
|
||||||
|
pfree(buf);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* strnxfrm_libc
|
||||||
|
*
|
||||||
|
* NUL-terminate src, if necessary, and pass to strxfrm_l().
|
||||||
|
*
|
||||||
|
* A source length of -1 means that it's already NUL-terminated.
|
||||||
|
*/
|
||||||
|
size_t
|
||||||
|
strnxfrm_libc(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||||
|
pg_locale_t locale)
|
||||||
|
{
|
||||||
|
char sbuf[TEXTBUFLEN];
|
||||||
|
char *buf = sbuf;
|
||||||
|
size_t bufsize = srclen + 1;
|
||||||
|
size_t result;
|
||||||
|
|
||||||
|
Assert(locale->provider == COLLPROVIDER_LIBC);
|
||||||
|
|
||||||
|
if (srclen == -1)
|
||||||
|
return strxfrm_l(dest, src, destsize, locale->info.lt);
|
||||||
|
|
||||||
|
if (bufsize > TEXTBUFLEN)
|
||||||
|
buf = palloc(bufsize);
|
||||||
|
|
||||||
|
/* nul-terminate argument */
|
||||||
|
memcpy(buf, src, srclen);
|
||||||
|
buf[srclen] = '\0';
|
||||||
|
|
||||||
|
result = strxfrm_l(dest, buf, destsize, locale->info.lt);
|
||||||
|
|
||||||
|
if (buf != sbuf)
|
||||||
|
pfree(buf);
|
||||||
|
|
||||||
|
/* if dest is defined, it should be nul-terminated */
|
||||||
|
Assert(result >= destsize || dest[result] == '\0');
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* strncoll_libc_win32_utf8
|
||||||
|
*
|
||||||
|
* Win32 does not have UTF-8. Convert UTF8 arguments to wide characters and
|
||||||
|
* invoke wcscoll_l().
|
||||||
|
*
|
||||||
|
* An input string length of -1 means that it's NUL-terminated.
|
||||||
|
*/
|
||||||
|
#ifdef WIN32
|
||||||
|
static int
|
||||||
|
strncoll_libc_win32_utf8(const char *arg1, ssize_t len1, const char *arg2,
|
||||||
|
ssize_t len2, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
char sbuf[TEXTBUFLEN];
|
||||||
|
char *buf = sbuf;
|
||||||
|
char *a1p,
|
||||||
|
*a2p;
|
||||||
|
int a1len;
|
||||||
|
int a2len;
|
||||||
|
int r;
|
||||||
|
int result;
|
||||||
|
|
||||||
|
Assert(locale->provider == COLLPROVIDER_LIBC);
|
||||||
|
Assert(GetDatabaseEncoding() == PG_UTF8);
|
||||||
|
|
||||||
|
if (len1 == -1)
|
||||||
|
len1 = strlen(arg1);
|
||||||
|
if (len2 == -1)
|
||||||
|
len2 = strlen(arg2);
|
||||||
|
|
||||||
|
a1len = len1 * 2 + 2;
|
||||||
|
a2len = len2 * 2 + 2;
|
||||||
|
|
||||||
|
if (a1len + a2len > TEXTBUFLEN)
|
||||||
|
buf = palloc(a1len + a2len);
|
||||||
|
|
||||||
|
a1p = buf;
|
||||||
|
a2p = buf + a1len;
|
||||||
|
|
||||||
|
/* API does not work for zero-length input */
|
||||||
|
if (len1 == 0)
|
||||||
|
r = 0;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
|
||||||
|
(LPWSTR) a1p, a1len / 2);
|
||||||
|
if (!r)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errmsg("could not convert string to UTF-16: error code %lu",
|
||||||
|
GetLastError())));
|
||||||
|
}
|
||||||
|
((LPWSTR) a1p)[r] = 0;
|
||||||
|
|
||||||
|
if (len2 == 0)
|
||||||
|
r = 0;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
|
||||||
|
(LPWSTR) a2p, a2len / 2);
|
||||||
|
if (!r)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errmsg("could not convert string to UTF-16: error code %lu",
|
||||||
|
GetLastError())));
|
||||||
|
}
|
||||||
|
((LPWSTR) a2p)[r] = 0;
|
||||||
|
|
||||||
|
errno = 0;
|
||||||
|
result = wcscoll_l((LPWSTR) a1p, (LPWSTR) a2p, locale->info.lt);
|
||||||
|
if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */
|
||||||
|
ereport(ERROR,
|
||||||
|
(errmsg("could not compare Unicode strings: %m")));
|
||||||
|
|
||||||
|
if (buf != sbuf)
|
||||||
|
pfree(buf);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
#endif /* WIN32 */
|
||||||
|
|
||||||
|
/* simple subroutine for reporting errors from newlocale() */
|
||||||
|
static void
|
||||||
|
report_newlocale_failure(const char *localename)
|
||||||
|
{
|
||||||
|
int save_errno;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Windows doesn't provide any useful error indication from
|
||||||
|
* _create_locale(), and BSD-derived platforms don't seem to feel they
|
||||||
|
* need to set errno either (even though POSIX is pretty clear that
|
||||||
|
* newlocale should do so). So, if errno hasn't been set, assume ENOENT
|
||||||
|
* is what to report.
|
||||||
|
*/
|
||||||
|
if (errno == 0)
|
||||||
|
errno = ENOENT;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ENOENT means "no such locale", not "no such file", so clarify that
|
||||||
|
* errno with an errdetail message.
|
||||||
|
*/
|
||||||
|
save_errno = errno; /* auxiliary funcs might change errno */
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
|
errmsg("could not create locale \"%s\": %m",
|
||||||
|
localename),
|
||||||
|
(save_errno == ENOENT ?
|
||||||
|
errdetail("The operating system could not find any locale data for the locale name \"%s\".",
|
||||||
|
localename) : 0)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* POSIX doesn't define _l-variants of these functions, but several systems
|
||||||
|
* have them. We provide our own replacements here.
|
||||||
|
*/
|
||||||
|
#ifndef HAVE_MBSTOWCS_L
|
||||||
|
static size_t
|
||||||
|
mbstowcs_l(wchar_t *dest, const char *src, size_t n, locale_t loc)
|
||||||
|
{
|
||||||
|
#ifdef WIN32
|
||||||
|
return _mbstowcs_l(dest, src, n, loc);
|
||||||
|
#else
|
||||||
|
size_t result;
|
||||||
|
locale_t save_locale = uselocale(loc);
|
||||||
|
|
||||||
|
result = mbstowcs(dest, src, n);
|
||||||
|
uselocale(save_locale);
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#ifndef HAVE_WCSTOMBS_L
|
||||||
|
static size_t
|
||||||
|
wcstombs_l(char *dest, const wchar_t *src, size_t n, locale_t loc)
|
||||||
|
{
|
||||||
|
#ifdef WIN32
|
||||||
|
return _wcstombs_l(dest, src, n, loc);
|
||||||
|
#else
|
||||||
|
size_t result;
|
||||||
|
locale_t save_locale = uselocale(loc);
|
||||||
|
|
||||||
|
result = wcstombs(dest, src, n);
|
||||||
|
uselocale(save_locale);
|
||||||
|
return result;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
|
||||||
|
* Therefore we keep them here rather than with the mbutils code.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* wchar2char --- convert wide characters to multibyte format
|
||||||
|
*
|
||||||
|
* This has the same API as the standard wcstombs_l() function; in particular,
|
||||||
|
* tolen is the maximum number of bytes to store at *to, and *from must be
|
||||||
|
* zero-terminated. The output will be zero-terminated iff there is room.
|
||||||
|
*/
|
||||||
|
size_t
|
||||||
|
wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
size_t result;
|
||||||
|
|
||||||
|
if (tolen == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
|
||||||
|
* for some reason mbstowcs and wcstombs won't do this for us, so we use
|
||||||
|
* MultiByteToWideChar().
|
||||||
|
*/
|
||||||
|
if (GetDatabaseEncoding() == PG_UTF8)
|
||||||
|
{
|
||||||
|
result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
|
||||||
|
NULL, NULL);
|
||||||
|
/* A zero return is failure */
|
||||||
|
if (result <= 0)
|
||||||
|
result = -1;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
Assert(result <= tolen);
|
||||||
|
/* Microsoft counts the zero terminator in the result */
|
||||||
|
result--;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif /* WIN32 */
|
||||||
|
if (locale == (pg_locale_t) 0)
|
||||||
|
{
|
||||||
|
/* Use wcstombs directly for the default locale */
|
||||||
|
result = wcstombs(to, from, tolen);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Use wcstombs_l for nondefault locales */
|
||||||
|
result = wcstombs_l(to, from, tolen, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* char2wchar --- convert multibyte characters to wide characters
|
||||||
|
*
|
||||||
|
* This has almost the API of mbstowcs_l(), except that *from need not be
|
||||||
|
* null-terminated; instead, the number of input bytes is specified as
|
||||||
|
* fromlen. Also, we ereport() rather than returning -1 for invalid
|
||||||
|
* input encoding. tolen is the maximum number of wchar_t's to store at *to.
|
||||||
|
* The output will be zero-terminated iff there is room.
|
||||||
|
*/
|
||||||
|
size_t
|
||||||
|
char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
|
||||||
|
pg_locale_t locale)
|
||||||
|
{
|
||||||
|
size_t result;
|
||||||
|
|
||||||
|
if (tolen == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
#ifdef WIN32
|
||||||
|
/* See WIN32 "Unicode" comment above */
|
||||||
|
if (GetDatabaseEncoding() == PG_UTF8)
|
||||||
|
{
|
||||||
|
/* Win32 API does not work for zero-length input */
|
||||||
|
if (fromlen == 0)
|
||||||
|
result = 0;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
|
||||||
|
/* A zero return is failure */
|
||||||
|
if (result == 0)
|
||||||
|
result = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result != -1)
|
||||||
|
{
|
||||||
|
Assert(result < tolen);
|
||||||
|
/* Append trailing null wchar (MultiByteToWideChar() does not) */
|
||||||
|
to[result] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
#endif /* WIN32 */
|
||||||
|
{
|
||||||
|
/* mbstowcs requires ending '\0' */
|
||||||
|
char *str = pnstrdup(from, fromlen);
|
||||||
|
|
||||||
|
if (locale == (pg_locale_t) 0)
|
||||||
|
{
|
||||||
|
/* Use mbstowcs directly for the default locale */
|
||||||
|
result = mbstowcs(to, str, tolen);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Use mbstowcs_l for nondefault locales */
|
||||||
|
result = mbstowcs_l(to, str, tolen, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
pfree(str);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (result == -1)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Invalid multibyte character encountered. We try to give a useful
|
||||||
|
* error message by letting pg_verifymbstr check the string. But it's
|
||||||
|
* possible that the string is OK to us, and not OK to mbstowcs ---
|
||||||
|
* this suggests that the LC_CTYPE locale is different from the
|
||||||
|
* database encoding. Give a generic error message if pg_verifymbstr
|
||||||
|
* can't find anything wrong.
|
||||||
|
*/
|
||||||
|
pg_verifymbstr(from, fromlen, false); /* might not return */
|
||||||
|
/* but if it does ... */
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
|
||||||
|
errmsg("invalid multibyte character for locale"),
|
||||||
|
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user