mirror of
https://github.com/postgres/postgres.git
synced 2025-07-30 11:03:19 +03:00
Refactor string case conversion into provider-specific files.
Create API entry points pg_strlower(), etc., that work with any provider and give the caller control over the destination buffer. Then, move provider-specific logic into pg_locale_builtin.c, pg_locale_icu.c, and pg_locale_libc.c as appropriate. Discussion: https://postgr.es/m/7aa46d77b377428058403723440862d12a8a129a.camel@j-davis.com
This commit is contained in:
@ -1571,52 +1571,6 @@ str_numth(char *dest, char *num, int type)
|
||||
* upper/lower/initcap functions
|
||||
*****************************************************************************/
|
||||
|
||||
#ifdef USE_ICU
|
||||
|
||||
typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
static int32_t
|
||||
icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
|
||||
UChar **buff_dest, UChar *buff_source, int32_t len_source)
|
||||
{
|
||||
UErrorCode status;
|
||||
int32_t len_dest;
|
||||
|
||||
len_dest = len_source; /* try first with same length */
|
||||
*buff_dest = palloc(len_dest * sizeof(**buff_dest));
|
||||
status = U_ZERO_ERROR;
|
||||
len_dest = func(*buff_dest, len_dest, buff_source, len_source,
|
||||
mylocale->info.icu.locale, &status);
|
||||
if (status == U_BUFFER_OVERFLOW_ERROR)
|
||||
{
|
||||
/* try again with adjusted length */
|
||||
pfree(*buff_dest);
|
||||
*buff_dest = palloc(len_dest * sizeof(**buff_dest));
|
||||
status = U_ZERO_ERROR;
|
||||
len_dest = func(*buff_dest, len_dest, buff_source, len_source,
|
||||
mylocale->info.icu.locale, &status);
|
||||
}
|
||||
if (U_FAILURE(status))
|
||||
ereport(ERROR,
|
||||
(errmsg("case conversion failed: %s", u_errorName(status))));
|
||||
return len_dest;
|
||||
}
|
||||
|
||||
static int32_t
|
||||
u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode)
|
||||
{
|
||||
return u_strToTitle(dest, destCapacity, src, srcLength,
|
||||
NULL, locale, pErrorCode);
|
||||
}
|
||||
|
||||
#endif /* USE_ICU */
|
||||
|
||||
/*
|
||||
* If the system provides the needed functions for wide-character manipulation
|
||||
* (which are all standardized by C99), then we implement upper/lower/initcap
|
||||
@ -1663,25 +1617,6 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
|
||||
result = asc_tolower(buff, nbytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef USE_ICU
|
||||
if (mylocale->provider == COLLPROVIDER_ICU)
|
||||
{
|
||||
int32_t len_uchar;
|
||||
int32_t len_conv;
|
||||
UChar *buff_uchar;
|
||||
UChar *buff_conv;
|
||||
|
||||
len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
|
||||
len_conv = icu_convert_case(u_strToLower, mylocale,
|
||||
&buff_conv, buff_uchar, len_uchar);
|
||||
icu_from_uchar(&result, buff_conv, len_conv);
|
||||
pfree(buff_uchar);
|
||||
pfree(buff_conv);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (mylocale->provider == COLLPROVIDER_BUILTIN)
|
||||
{
|
||||
const char *src = buff;
|
||||
size_t srclen = nbytes;
|
||||
@ -1689,82 +1624,23 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
|
||||
char *dst;
|
||||
size_t needed;
|
||||
|
||||
Assert(GetDatabaseEncoding() == PG_UTF8);
|
||||
|
||||
/* first try buffer of equal size plus terminating NUL */
|
||||
dstsize = srclen + 1;
|
||||
dst = palloc(dstsize);
|
||||
|
||||
needed = unicode_strlower(dst, dstsize, src, srclen);
|
||||
needed = pg_strlower(dst, dstsize, src, srclen, mylocale);
|
||||
if (needed + 1 > dstsize)
|
||||
{
|
||||
/* grow buffer if needed and retry */
|
||||
dstsize = needed + 1;
|
||||
dst = repalloc(dst, dstsize);
|
||||
needed = unicode_strlower(dst, dstsize, src, srclen);
|
||||
Assert(needed + 1 == dstsize);
|
||||
needed = pg_strlower(dst, dstsize, src, srclen, mylocale);
|
||||
Assert(needed + 1 <= dstsize);
|
||||
}
|
||||
|
||||
Assert(dst[needed] == '\0');
|
||||
result = dst;
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert(mylocale->provider == COLLPROVIDER_LIBC);
|
||||
|
||||
if (pg_database_encoding_max_length() > 1)
|
||||
{
|
||||
wchar_t *workspace;
|
||||
size_t curr_char;
|
||||
size_t result_size;
|
||||
|
||||
/* Overflow paranoia */
|
||||
if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
|
||||
/* Output workspace cannot have more codes than input bytes */
|
||||
workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
|
||||
|
||||
char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
|
||||
|
||||
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
|
||||
workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
|
||||
|
||||
/*
|
||||
* Make result large enough; case change might change number
|
||||
* of bytes
|
||||
*/
|
||||
result_size = curr_char * pg_database_encoding_max_length() + 1;
|
||||
result = palloc(result_size);
|
||||
|
||||
wchar2char(result, workspace, result_size, mylocale);
|
||||
pfree(workspace);
|
||||
}
|
||||
else
|
||||
{
|
||||
char *p;
|
||||
|
||||
result = pnstrdup(buff, nbytes);
|
||||
|
||||
/*
|
||||
* Note: we assume that tolower_l() will not be so broken as
|
||||
* to need an isupper_l() guard test. When using the default
|
||||
* collation, we apply the traditional Postgres behavior that
|
||||
* forces ASCII-style treatment of I/i, but in non-default
|
||||
* collations you get exactly what the collation says.
|
||||
*/
|
||||
for (p = result; *p; p++)
|
||||
{
|
||||
if (mylocale->is_default)
|
||||
*p = pg_tolower((unsigned char) *p);
|
||||
else
|
||||
*p = tolower_l((unsigned char) *p, mylocale->info.lt);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
@ -1805,25 +1681,6 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
|
||||
result = asc_toupper(buff, nbytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef USE_ICU
|
||||
if (mylocale->provider == COLLPROVIDER_ICU)
|
||||
{
|
||||
int32_t len_uchar,
|
||||
len_conv;
|
||||
UChar *buff_uchar;
|
||||
UChar *buff_conv;
|
||||
|
||||
len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
|
||||
len_conv = icu_convert_case(u_strToUpper, mylocale,
|
||||
&buff_conv, buff_uchar, len_uchar);
|
||||
icu_from_uchar(&result, buff_conv, len_conv);
|
||||
pfree(buff_uchar);
|
||||
pfree(buff_conv);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (mylocale->provider == COLLPROVIDER_BUILTIN)
|
||||
{
|
||||
const char *src = buff;
|
||||
size_t srclen = nbytes;
|
||||
@ -1831,127 +1688,27 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
|
||||
char *dst;
|
||||
size_t needed;
|
||||
|
||||
Assert(GetDatabaseEncoding() == PG_UTF8);
|
||||
|
||||
/* first try buffer of equal size plus terminating NUL */
|
||||
dstsize = srclen + 1;
|
||||
dst = palloc(dstsize);
|
||||
|
||||
needed = unicode_strupper(dst, dstsize, src, srclen);
|
||||
needed = pg_strupper(dst, dstsize, src, srclen, mylocale);
|
||||
if (needed + 1 > dstsize)
|
||||
{
|
||||
/* grow buffer if needed and retry */
|
||||
dstsize = needed + 1;
|
||||
dst = repalloc(dst, dstsize);
|
||||
needed = unicode_strupper(dst, dstsize, src, srclen);
|
||||
Assert(needed + 1 == dstsize);
|
||||
needed = pg_strupper(dst, dstsize, src, srclen, mylocale);
|
||||
Assert(needed + 1 <= dstsize);
|
||||
}
|
||||
|
||||
Assert(dst[needed] == '\0');
|
||||
result = dst;
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert(mylocale->provider == COLLPROVIDER_LIBC);
|
||||
|
||||
if (pg_database_encoding_max_length() > 1)
|
||||
{
|
||||
wchar_t *workspace;
|
||||
size_t curr_char;
|
||||
size_t result_size;
|
||||
|
||||
/* Overflow paranoia */
|
||||
if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
|
||||
/* Output workspace cannot have more codes than input bytes */
|
||||
workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
|
||||
|
||||
char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
|
||||
|
||||
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
|
||||
workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
|
||||
|
||||
/*
|
||||
* Make result large enough; case change might change number
|
||||
* of bytes
|
||||
*/
|
||||
result_size = curr_char * pg_database_encoding_max_length() + 1;
|
||||
result = palloc(result_size);
|
||||
|
||||
wchar2char(result, workspace, result_size, mylocale);
|
||||
pfree(workspace);
|
||||
}
|
||||
else
|
||||
{
|
||||
char *p;
|
||||
|
||||
result = pnstrdup(buff, nbytes);
|
||||
|
||||
/*
|
||||
* Note: we assume that toupper_l() will not be so broken as
|
||||
* to need an islower_l() guard test. When using the default
|
||||
* collation, we apply the traditional Postgres behavior that
|
||||
* forces ASCII-style treatment of I/i, but in non-default
|
||||
* collations you get exactly what the collation says.
|
||||
*/
|
||||
for (p = result; *p; p++)
|
||||
{
|
||||
if (mylocale->is_default)
|
||||
*p = pg_toupper((unsigned char) *p);
|
||||
else
|
||||
*p = toupper_l((unsigned char) *p, mylocale->info.lt);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
struct WordBoundaryState
|
||||
{
|
||||
const char *str;
|
||||
size_t len;
|
||||
size_t offset;
|
||||
bool init;
|
||||
bool prev_alnum;
|
||||
};
|
||||
|
||||
/*
|
||||
* Simple word boundary iterator that draws boundaries each time the result of
|
||||
* pg_u_isalnum() changes.
|
||||
*/
|
||||
static size_t
|
||||
initcap_wbnext(void *state)
|
||||
{
|
||||
struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
|
||||
|
||||
while (wbstate->offset < wbstate->len &&
|
||||
wbstate->str[wbstate->offset] != '\0')
|
||||
{
|
||||
pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
|
||||
wbstate->offset);
|
||||
bool curr_alnum = pg_u_isalnum(u, true);
|
||||
|
||||
if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
|
||||
{
|
||||
size_t prev_offset = wbstate->offset;
|
||||
|
||||
wbstate->init = true;
|
||||
wbstate->offset += unicode_utf8len(u);
|
||||
wbstate->prev_alnum = curr_alnum;
|
||||
return prev_offset;
|
||||
}
|
||||
|
||||
wbstate->offset += unicode_utf8len(u);
|
||||
}
|
||||
|
||||
return wbstate->len;
|
||||
}
|
||||
|
||||
/*
|
||||
* collation-aware, wide-character-aware initcap function
|
||||
*
|
||||
@ -1962,7 +1719,6 @@ char *
|
||||
str_initcap(const char *buff, size_t nbytes, Oid collid)
|
||||
{
|
||||
char *result;
|
||||
int wasalnum = false;
|
||||
pg_locale_t mylocale;
|
||||
|
||||
if (!buff)
|
||||
@ -1989,137 +1745,30 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
|
||||
result = asc_initcap(buff, nbytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef USE_ICU
|
||||
if (mylocale->provider == COLLPROVIDER_ICU)
|
||||
{
|
||||
int32_t len_uchar,
|
||||
len_conv;
|
||||
UChar *buff_uchar;
|
||||
UChar *buff_conv;
|
||||
|
||||
len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
|
||||
len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
|
||||
&buff_conv, buff_uchar, len_uchar);
|
||||
icu_from_uchar(&result, buff_conv, len_conv);
|
||||
pfree(buff_uchar);
|
||||
pfree(buff_conv);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (mylocale->provider == COLLPROVIDER_BUILTIN)
|
||||
{
|
||||
const char *src = buff;
|
||||
size_t srclen = nbytes;
|
||||
size_t dstsize;
|
||||
char *dst;
|
||||
size_t needed;
|
||||
struct WordBoundaryState wbstate = {
|
||||
.str = src,
|
||||
.len = srclen,
|
||||
.offset = 0,
|
||||
.init = false,
|
||||
.prev_alnum = false,
|
||||
};
|
||||
|
||||
Assert(GetDatabaseEncoding() == PG_UTF8);
|
||||
|
||||
/* first try buffer of equal size plus terminating NUL */
|
||||
dstsize = srclen + 1;
|
||||
dst = palloc(dstsize);
|
||||
|
||||
needed = unicode_strtitle(dst, dstsize, src, srclen,
|
||||
initcap_wbnext, &wbstate);
|
||||
needed = pg_strtitle(dst, dstsize, src, srclen, mylocale);
|
||||
if (needed + 1 > dstsize)
|
||||
{
|
||||
/* reset iterator */
|
||||
wbstate.offset = 0;
|
||||
wbstate.init = false;
|
||||
|
||||
/* grow buffer if needed and retry */
|
||||
dstsize = needed + 1;
|
||||
dst = repalloc(dst, dstsize);
|
||||
needed = unicode_strtitle(dst, dstsize, src, srclen,
|
||||
initcap_wbnext, &wbstate);
|
||||
Assert(needed + 1 == dstsize);
|
||||
needed = pg_strtitle(dst, dstsize, src, srclen, mylocale);
|
||||
Assert(needed + 1 <= dstsize);
|
||||
}
|
||||
|
||||
Assert(dst[needed] == '\0');
|
||||
result = dst;
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert(mylocale->provider == COLLPROVIDER_LIBC);
|
||||
|
||||
if (pg_database_encoding_max_length() > 1)
|
||||
{
|
||||
wchar_t *workspace;
|
||||
size_t curr_char;
|
||||
size_t result_size;
|
||||
|
||||
/* Overflow paranoia */
|
||||
if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
|
||||
/* Output workspace cannot have more codes than input bytes */
|
||||
workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
|
||||
|
||||
char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
|
||||
|
||||
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
|
||||
{
|
||||
if (wasalnum)
|
||||
workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
|
||||
else
|
||||
workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
|
||||
wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Make result large enough; case change might change number
|
||||
* of bytes
|
||||
*/
|
||||
result_size = curr_char * pg_database_encoding_max_length() + 1;
|
||||
result = palloc(result_size);
|
||||
|
||||
wchar2char(result, workspace, result_size, mylocale);
|
||||
pfree(workspace);
|
||||
}
|
||||
else
|
||||
{
|
||||
char *p;
|
||||
|
||||
result = pnstrdup(buff, nbytes);
|
||||
|
||||
/*
|
||||
* Note: we assume that toupper_l()/tolower_l() will not be so
|
||||
* broken as to need guard tests. When using the default
|
||||
* collation, we apply the traditional Postgres behavior that
|
||||
* forces ASCII-style treatment of I/i, but in non-default
|
||||
* collations you get exactly what the collation says.
|
||||
*/
|
||||
for (p = result; *p; p++)
|
||||
{
|
||||
if (mylocale->is_default)
|
||||
{
|
||||
if (wasalnum)
|
||||
*p = pg_tolower((unsigned char) *p);
|
||||
else
|
||||
*p = pg_toupper((unsigned char) *p);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (wasalnum)
|
||||
*p = tolower_l((unsigned char) *p, mylocale->info.lt);
|
||||
else
|
||||
*p = toupper_l((unsigned char) *p, mylocale->info.lt);
|
||||
}
|
||||
wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
@ -116,6 +116,27 @@ extern size_t strnxfrm_libc(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
pg_locale_t locale);
|
||||
|
||||
extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
|
||||
extern size_t strlower_icu(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
extern size_t strupper_icu(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
|
||||
extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
|
||||
/* GUC settings */
|
||||
char *locale_messages;
|
||||
char *locale_monetary;
|
||||
@ -1468,6 +1489,63 @@ get_collation_actual_version(char collprovider, const char *collcollate)
|
||||
return collversion;
|
||||
}
|
||||
|
||||
size_t
|
||||
pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
if (locale->provider == COLLPROVIDER_BUILTIN)
|
||||
return strlower_builtin(dst, dstsize, src, srclen, locale);
|
||||
#ifdef USE_ICU
|
||||
else if (locale->provider == COLLPROVIDER_ICU)
|
||||
return strlower_icu(dst, dstsize, src, srclen, locale);
|
||||
#endif
|
||||
else if (locale->provider == COLLPROVIDER_LIBC)
|
||||
return strlower_libc(dst, dstsize, src, srclen, locale);
|
||||
else
|
||||
/* shouldn't happen */
|
||||
PGLOCALE_SUPPORT_ERROR(locale->provider);
|
||||
|
||||
return 0; /* keep compiler quiet */
|
||||
}
|
||||
|
||||
size_t
|
||||
pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
if (locale->provider == COLLPROVIDER_BUILTIN)
|
||||
return strtitle_builtin(dst, dstsize, src, srclen, locale);
|
||||
#ifdef USE_ICU
|
||||
else if (locale->provider == COLLPROVIDER_ICU)
|
||||
return strtitle_icu(dst, dstsize, src, srclen, locale);
|
||||
#endif
|
||||
else if (locale->provider == COLLPROVIDER_LIBC)
|
||||
return strtitle_libc(dst, dstsize, src, srclen, locale);
|
||||
else
|
||||
/* shouldn't happen */
|
||||
PGLOCALE_SUPPORT_ERROR(locale->provider);
|
||||
|
||||
return 0; /* keep compiler quiet */
|
||||
}
|
||||
|
||||
size_t
|
||||
pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
if (locale->provider == COLLPROVIDER_BUILTIN)
|
||||
return strupper_builtin(dst, dstsize, src, srclen, locale);
|
||||
#ifdef USE_ICU
|
||||
else if (locale->provider == COLLPROVIDER_ICU)
|
||||
return strupper_icu(dst, dstsize, src, srclen, locale);
|
||||
#endif
|
||||
else if (locale->provider == COLLPROVIDER_LIBC)
|
||||
return strupper_libc(dst, dstsize, src, srclen, locale);
|
||||
else
|
||||
/* shouldn't happen */
|
||||
PGLOCALE_SUPPORT_ERROR(locale->provider);
|
||||
|
||||
return 0; /* keep compiler quiet */
|
||||
}
|
||||
|
||||
/*
|
||||
* pg_strcoll
|
||||
*
|
||||
|
@ -13,6 +13,8 @@
|
||||
|
||||
#include "catalog/pg_database.h"
|
||||
#include "catalog/pg_collation.h"
|
||||
#include "common/unicode_case.h"
|
||||
#include "common/unicode_category.h"
|
||||
#include "mb/pg_wchar.h"
|
||||
#include "miscadmin.h"
|
||||
#include "utils/builtins.h"
|
||||
@ -22,6 +24,84 @@
|
||||
|
||||
extern pg_locale_t create_pg_locale_builtin(Oid collid,
|
||||
MemoryContext context);
|
||||
extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
|
||||
|
||||
struct WordBoundaryState
|
||||
{
|
||||
const char *str;
|
||||
size_t len;
|
||||
size_t offset;
|
||||
bool init;
|
||||
bool prev_alnum;
|
||||
};
|
||||
|
||||
/*
|
||||
* Simple word boundary iterator that draws boundaries each time the result of
|
||||
* pg_u_isalnum() changes.
|
||||
*/
|
||||
static size_t
|
||||
initcap_wbnext(void *state)
|
||||
{
|
||||
struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
|
||||
|
||||
while (wbstate->offset < wbstate->len &&
|
||||
wbstate->str[wbstate->offset] != '\0')
|
||||
{
|
||||
pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
|
||||
wbstate->offset);
|
||||
bool curr_alnum = pg_u_isalnum(u, true);
|
||||
|
||||
if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
|
||||
{
|
||||
size_t prev_offset = wbstate->offset;
|
||||
|
||||
wbstate->init = true;
|
||||
wbstate->offset += unicode_utf8len(u);
|
||||
wbstate->prev_alnum = curr_alnum;
|
||||
return prev_offset;
|
||||
}
|
||||
|
||||
wbstate->offset += unicode_utf8len(u);
|
||||
}
|
||||
|
||||
return wbstate->len;
|
||||
}
|
||||
|
||||
size_t
|
||||
strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
return unicode_strlower(dest, destsize, src, srclen);
|
||||
}
|
||||
|
||||
size_t
|
||||
strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
struct WordBoundaryState wbstate = {
|
||||
.str = src,
|
||||
.len = srclen,
|
||||
.offset = 0,
|
||||
.init = false,
|
||||
.prev_alnum = false,
|
||||
};
|
||||
|
||||
return unicode_strtitle(dest, destsize, src, srclen,
|
||||
initcap_wbnext, &wbstate);
|
||||
}
|
||||
|
||||
size_t
|
||||
strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
return unicode_strupper(dest, destsize, src, srclen);
|
||||
}
|
||||
|
||||
pg_locale_t
|
||||
create_pg_locale_builtin(Oid collid, MemoryContext context)
|
||||
|
@ -48,6 +48,12 @@
|
||||
#define TEXTBUFLEN 1024
|
||||
|
||||
extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
|
||||
extern size_t strlower_icu(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
extern size_t strupper_icu(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
|
||||
#ifdef USE_ICU
|
||||
|
||||
@ -62,6 +68,11 @@ extern size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
pg_locale_t locale);
|
||||
|
||||
typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode);
|
||||
|
||||
/*
|
||||
* Converter object for converting between ICU's UChar strings and C strings
|
||||
* in database encoding. Since the database encoding doesn't change, we only
|
||||
@ -83,8 +94,19 @@ static size_t uchar_length(UConverter *converter,
|
||||
static int32_t uchar_convert(UConverter *converter,
|
||||
UChar *dest, int32_t destlen,
|
||||
const char *src, int32_t srclen);
|
||||
static int32_t icu_to_uchar(UChar **buff_uchar, const char *buff,
|
||||
size_t nbytes);
|
||||
static size_t icu_from_uchar(char *dest, size_t destsize,
|
||||
const UChar *buff_uchar, int32_t len_uchar);
|
||||
static void icu_set_collation_attributes(UCollator *collator, const char *loc,
|
||||
UErrorCode *status);
|
||||
static int32_t icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
|
||||
UChar **buff_dest, UChar *buff_source,
|
||||
int32_t len_source);
|
||||
static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode);
|
||||
#endif
|
||||
|
||||
pg_locale_t
|
||||
@ -324,6 +346,66 @@ make_icu_collator(const char *iculocstr, const char *icurules)
|
||||
}
|
||||
}
|
||||
|
||||
size_t
|
||||
strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
int32_t len_uchar;
|
||||
int32_t len_conv;
|
||||
UChar *buff_uchar;
|
||||
UChar *buff_conv;
|
||||
size_t result_len;
|
||||
|
||||
len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
|
||||
len_conv = icu_convert_case(u_strToLower, locale,
|
||||
&buff_conv, buff_uchar, len_uchar);
|
||||
result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
|
||||
pfree(buff_uchar);
|
||||
pfree(buff_conv);
|
||||
|
||||
return result_len;
|
||||
}
|
||||
|
||||
size_t
|
||||
strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
int32_t len_uchar;
|
||||
int32_t len_conv;
|
||||
UChar *buff_uchar;
|
||||
UChar *buff_conv;
|
||||
size_t result_len;
|
||||
|
||||
len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
|
||||
len_conv = icu_convert_case(u_strToTitle_default_BI, locale,
|
||||
&buff_conv, buff_uchar, len_uchar);
|
||||
result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
|
||||
pfree(buff_uchar);
|
||||
pfree(buff_conv);
|
||||
|
||||
return result_len;
|
||||
}
|
||||
|
||||
size_t
|
||||
strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
int32_t len_uchar;
|
||||
int32_t len_conv;
|
||||
UChar *buff_uchar;
|
||||
UChar *buff_conv;
|
||||
size_t result_len;
|
||||
|
||||
len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
|
||||
len_conv = icu_convert_case(u_strToUpper, locale,
|
||||
&buff_conv, buff_uchar, len_uchar);
|
||||
result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
|
||||
pfree(buff_uchar);
|
||||
pfree(buff_conv);
|
||||
|
||||
return result_len;
|
||||
}
|
||||
|
||||
/*
|
||||
* strncoll_icu
|
||||
*
|
||||
@ -458,7 +540,7 @@ strnxfrm_prefix_icu(char *dest, size_t destsize,
|
||||
* The result string is nul-terminated, though most callers rely on the
|
||||
* result length instead.
|
||||
*/
|
||||
int32_t
|
||||
static int32_t
|
||||
icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
|
||||
{
|
||||
int32_t len_uchar;
|
||||
@ -485,8 +567,8 @@ icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
|
||||
*
|
||||
* The result string is nul-terminated.
|
||||
*/
|
||||
int32_t
|
||||
icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
|
||||
static size_t
|
||||
icu_from_uchar(char *dest, size_t destsize, const UChar *buff_uchar, int32_t len_uchar)
|
||||
{
|
||||
UErrorCode status;
|
||||
int32_t len_result;
|
||||
@ -501,10 +583,11 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
|
||||
(errmsg("%s failed: %s", "ucnv_fromUChars",
|
||||
u_errorName(status))));
|
||||
|
||||
*result = palloc(len_result + 1);
|
||||
if (len_result + 1 > destsize)
|
||||
return len_result;
|
||||
|
||||
status = U_ZERO_ERROR;
|
||||
len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
|
||||
len_result = ucnv_fromUChars(icu_converter, dest, len_result + 1,
|
||||
buff_uchar, len_uchar, &status);
|
||||
if (U_FAILURE(status) ||
|
||||
status == U_STRING_NOT_TERMINATED_WARNING)
|
||||
@ -515,6 +598,43 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
|
||||
return len_result;
|
||||
}
|
||||
|
||||
static int32_t
|
||||
icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
|
||||
UChar **buff_dest, UChar *buff_source, int32_t len_source)
|
||||
{
|
||||
UErrorCode status;
|
||||
int32_t len_dest;
|
||||
|
||||
len_dest = len_source; /* try first with same length */
|
||||
*buff_dest = palloc(len_dest * sizeof(**buff_dest));
|
||||
status = U_ZERO_ERROR;
|
||||
len_dest = func(*buff_dest, len_dest, buff_source, len_source,
|
||||
mylocale->info.icu.locale, &status);
|
||||
if (status == U_BUFFER_OVERFLOW_ERROR)
|
||||
{
|
||||
/* try again with adjusted length */
|
||||
pfree(*buff_dest);
|
||||
*buff_dest = palloc(len_dest * sizeof(**buff_dest));
|
||||
status = U_ZERO_ERROR;
|
||||
len_dest = func(*buff_dest, len_dest, buff_source, len_source,
|
||||
mylocale->info.icu.locale, &status);
|
||||
}
|
||||
if (U_FAILURE(status))
|
||||
ereport(ERROR,
|
||||
(errmsg("case conversion failed: %s", u_errorName(status))));
|
||||
return len_dest;
|
||||
}
|
||||
|
||||
static int32_t
|
||||
u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
|
||||
const UChar *src, int32_t srcLength,
|
||||
const char *locale,
|
||||
UErrorCode *pErrorCode)
|
||||
{
|
||||
return u_strToTitle(dest, destCapacity, src, srcLength,
|
||||
NULL, locale, pErrorCode);
|
||||
}
|
||||
|
||||
/*
|
||||
* strncoll_icu_no_utf8
|
||||
*
|
||||
|
@ -11,6 +11,9 @@
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include <limits.h>
|
||||
#include <wctype.h>
|
||||
|
||||
#include "access/htup_details.h"
|
||||
#include "catalog/pg_database.h"
|
||||
#include "catalog/pg_collation.h"
|
||||
@ -32,6 +35,13 @@
|
||||
|
||||
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
|
||||
|
||||
extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale);
|
||||
|
||||
extern int strncoll_libc(const char *arg1, ssize_t len1,
|
||||
const char *arg2, ssize_t len2,
|
||||
pg_locale_t locale);
|
||||
@ -48,6 +58,323 @@ static int strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
|
||||
pg_locale_t locale);
|
||||
#endif
|
||||
|
||||
static size_t strlower_libc_sb(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
pg_locale_t locale);
|
||||
static size_t strlower_libc_mb(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
pg_locale_t locale);
|
||||
static size_t strtitle_libc_sb(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
pg_locale_t locale);
|
||||
static size_t strtitle_libc_mb(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
pg_locale_t locale);
|
||||
static size_t strupper_libc_sb(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
pg_locale_t locale);
|
||||
static size_t strupper_libc_mb(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
pg_locale_t locale);
|
||||
|
||||
size_t
|
||||
strlower_libc(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale)
|
||||
{
|
||||
if (pg_database_encoding_max_length() > 1)
|
||||
return strlower_libc_mb(dst, dstsize, src, srclen, locale);
|
||||
else
|
||||
return strlower_libc_sb(dst, dstsize, src, srclen, locale);
|
||||
}
|
||||
|
||||
size_t
|
||||
strtitle_libc(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale)
|
||||
{
|
||||
if (pg_database_encoding_max_length() > 1)
|
||||
return strtitle_libc_mb(dst, dstsize, src, srclen, locale);
|
||||
else
|
||||
return strtitle_libc_sb(dst, dstsize, src, srclen, locale);
|
||||
}
|
||||
|
||||
size_t
|
||||
strupper_libc(char *dst, size_t dstsize, const char *src,
|
||||
ssize_t srclen, pg_locale_t locale)
|
||||
{
|
||||
if (pg_database_encoding_max_length() > 1)
|
||||
return strupper_libc_mb(dst, dstsize, src, srclen, locale);
|
||||
else
|
||||
return strupper_libc_sb(dst, dstsize, src, srclen, locale);
|
||||
}
|
||||
|
||||
static size_t
|
||||
strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
if (srclen < 0)
|
||||
srclen = strlen(src);
|
||||
|
||||
if (srclen + 1 <= destsize)
|
||||
{
|
||||
locale_t loc = locale->info.lt;
|
||||
char *p;
|
||||
|
||||
if (srclen + 1 > destsize)
|
||||
return srclen;
|
||||
|
||||
memcpy(dest, src, srclen);
|
||||
dest[srclen] = '\0';
|
||||
|
||||
/*
|
||||
* Note: we assume that tolower_l() will not be so broken as to need
|
||||
* an isupper_l() guard test. When using the default collation, we
|
||||
* apply the traditional Postgres behavior that forces ASCII-style
|
||||
* treatment of I/i, but in non-default collations you get exactly
|
||||
* what the collation says.
|
||||
*/
|
||||
for (p = dest; *p; p++)
|
||||
{
|
||||
if (locale->is_default)
|
||||
*p = pg_tolower((unsigned char) *p);
|
||||
else
|
||||
*p = tolower_l((unsigned char) *p, loc);
|
||||
}
|
||||
}
|
||||
|
||||
return srclen;
|
||||
}
|
||||
|
||||
static size_t
|
||||
strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
locale_t loc = locale->info.lt;
|
||||
size_t result_size;
|
||||
wchar_t *workspace;
|
||||
char *result;
|
||||
size_t curr_char;
|
||||
size_t max_size;
|
||||
|
||||
if (srclen < 0)
|
||||
srclen = strlen(src);
|
||||
|
||||
/* Overflow paranoia */
|
||||
if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
|
||||
/* Output workspace cannot have more codes than input bytes */
|
||||
workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
|
||||
|
||||
char2wchar(workspace, srclen + 1, src, srclen, locale);
|
||||
|
||||
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
|
||||
workspace[curr_char] = towlower_l(workspace[curr_char], loc);
|
||||
|
||||
/*
|
||||
* Make result large enough; case change might change number of bytes
|
||||
*/
|
||||
max_size = curr_char * pg_database_encoding_max_length();
|
||||
result = palloc(max_size + 1);
|
||||
|
||||
result_size = wchar2char(result, workspace, max_size + 1, locale);
|
||||
|
||||
if (result_size + 1 > destsize)
|
||||
return result_size;
|
||||
|
||||
memcpy(dest, result, result_size);
|
||||
dest[result_size] = '\0';
|
||||
|
||||
pfree(workspace);
|
||||
pfree(result);
|
||||
|
||||
return result_size;
|
||||
}
|
||||
|
||||
static size_t
|
||||
strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
if (srclen < 0)
|
||||
srclen = strlen(src);
|
||||
|
||||
if (srclen + 1 <= destsize)
|
||||
{
|
||||
locale_t loc = locale->info.lt;
|
||||
int wasalnum = false;
|
||||
char *p;
|
||||
|
||||
memcpy(dest, src, srclen);
|
||||
dest[srclen] = '\0';
|
||||
|
||||
/*
|
||||
* Note: we assume that toupper_l()/tolower_l() will not be so broken
|
||||
* as to need guard tests. When using the default collation, we apply
|
||||
* the traditional Postgres behavior that forces ASCII-style treatment
|
||||
* of I/i, but in non-default collations you get exactly what the
|
||||
* collation says.
|
||||
*/
|
||||
for (p = dest; *p; p++)
|
||||
{
|
||||
if (locale->is_default)
|
||||
{
|
||||
if (wasalnum)
|
||||
*p = pg_tolower((unsigned char) *p);
|
||||
else
|
||||
*p = pg_toupper((unsigned char) *p);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (wasalnum)
|
||||
*p = tolower_l((unsigned char) *p, loc);
|
||||
else
|
||||
*p = toupper_l((unsigned char) *p, loc);
|
||||
}
|
||||
wasalnum = isalnum_l((unsigned char) *p, loc);
|
||||
}
|
||||
}
|
||||
|
||||
return srclen;
|
||||
}
|
||||
|
||||
static size_t
|
||||
strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
locale_t loc = locale->info.lt;
|
||||
int wasalnum = false;
|
||||
size_t result_size;
|
||||
wchar_t *workspace;
|
||||
char *result;
|
||||
size_t curr_char;
|
||||
size_t max_size;
|
||||
|
||||
if (srclen < 0)
|
||||
srclen = strlen(src);
|
||||
|
||||
/* Overflow paranoia */
|
||||
if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
|
||||
/* Output workspace cannot have more codes than input bytes */
|
||||
workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
|
||||
|
||||
char2wchar(workspace, srclen + 1, src, srclen, locale);
|
||||
|
||||
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
|
||||
{
|
||||
if (wasalnum)
|
||||
workspace[curr_char] = towlower_l(workspace[curr_char], loc);
|
||||
else
|
||||
workspace[curr_char] = towupper_l(workspace[curr_char], loc);
|
||||
wasalnum = iswalnum_l(workspace[curr_char], loc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Make result large enough; case change might change number of bytes
|
||||
*/
|
||||
max_size = curr_char * pg_database_encoding_max_length();
|
||||
result = palloc(max_size + 1);
|
||||
|
||||
result_size = wchar2char(result, workspace, max_size + 1, locale);
|
||||
|
||||
if (result_size + 1 > destsize)
|
||||
return result_size;
|
||||
|
||||
memcpy(dest, result, result_size);
|
||||
dest[result_size] = '\0';
|
||||
|
||||
pfree(workspace);
|
||||
pfree(result);
|
||||
|
||||
return result_size;
|
||||
}
|
||||
|
||||
static size_t
|
||||
strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
if (srclen < 0)
|
||||
srclen = strlen(src);
|
||||
|
||||
if (srclen + 1 <= destsize)
|
||||
{
|
||||
locale_t loc = locale->info.lt;
|
||||
char *p;
|
||||
|
||||
memcpy(dest, src, srclen);
|
||||
dest[srclen] = '\0';
|
||||
|
||||
/*
|
||||
* Note: we assume that toupper_l() will not be so broken as to need
|
||||
* an islower_l() guard test. When using the default collation, we
|
||||
* apply the traditional Postgres behavior that forces ASCII-style
|
||||
* treatment of I/i, but in non-default collations you get exactly
|
||||
* what the collation says.
|
||||
*/
|
||||
for (p = dest; *p; p++)
|
||||
{
|
||||
if (locale->is_default)
|
||||
*p = pg_toupper((unsigned char) *p);
|
||||
else
|
||||
*p = toupper_l((unsigned char) *p, loc);
|
||||
}
|
||||
}
|
||||
|
||||
return srclen;
|
||||
}
|
||||
|
||||
static size_t
|
||||
strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
pg_locale_t locale)
|
||||
{
|
||||
locale_t loc = locale->info.lt;
|
||||
size_t result_size;
|
||||
wchar_t *workspace;
|
||||
char *result;
|
||||
size_t curr_char;
|
||||
size_t max_size;
|
||||
|
||||
if (srclen < 0)
|
||||
srclen = strlen(src);
|
||||
|
||||
/* Overflow paranoia */
|
||||
if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory")));
|
||||
|
||||
/* Output workspace cannot have more codes than input bytes */
|
||||
workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
|
||||
|
||||
char2wchar(workspace, srclen + 1, src, srclen, locale);
|
||||
|
||||
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
|
||||
workspace[curr_char] = towupper_l(workspace[curr_char], loc);
|
||||
|
||||
/*
|
||||
* Make result large enough; case change might change number of bytes
|
||||
*/
|
||||
max_size = curr_char * pg_database_encoding_max_length();
|
||||
result = palloc(max_size + 1);
|
||||
|
||||
result_size = wchar2char(result, workspace, max_size + 1, locale);
|
||||
|
||||
if (result_size + 1 > destsize)
|
||||
return result_size;
|
||||
|
||||
memcpy(dest, result, result_size);
|
||||
dest[result_size] = '\0';
|
||||
|
||||
pfree(workspace);
|
||||
pfree(result);
|
||||
|
||||
return result_size;
|
||||
}
|
||||
|
||||
pg_locale_t
|
||||
create_pg_locale_libc(Oid collid, MemoryContext context)
|
||||
{
|
||||
|
@ -93,6 +93,15 @@ extern void init_database_collation(void);
|
||||
extern pg_locale_t pg_newlocale_from_collation(Oid collid);
|
||||
|
||||
extern char *get_collation_actual_version(char collprovider, const char *collcollate);
|
||||
extern size_t pg_strlower(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
pg_locale_t locale);
|
||||
extern size_t pg_strtitle(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
pg_locale_t locale);
|
||||
extern size_t pg_strupper(char *dest, size_t destsize,
|
||||
const char *src, ssize_t srclen,
|
||||
pg_locale_t locale);
|
||||
extern int pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale);
|
||||
extern int pg_strncoll(const char *arg1, ssize_t len1,
|
||||
const char *arg2, ssize_t len2, pg_locale_t locale);
|
||||
@ -112,11 +121,6 @@ extern const char *builtin_validate_locale(int encoding, const char *locale);
|
||||
extern void icu_validate_locale(const char *loc_str);
|
||||
extern char *icu_language_tag(const char *loc_str, int elevel);
|
||||
|
||||
#ifdef USE_ICU
|
||||
extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
|
||||
extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar);
|
||||
#endif
|
||||
|
||||
/* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */
|
||||
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen,
|
||||
pg_locale_t locale);
|
||||
|
Reference in New Issue
Block a user