1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-30 11:03:19 +03:00

Refactor string case conversion into provider-specific files.

Create API entry points pg_strlower(), etc., that work with any
provider and give the caller control over the destination
buffer. Then, move provider-specific logic into pg_locale_builtin.c,
pg_locale_icu.c, and pg_locale_libc.c as appropriate.

Discussion: https://postgr.es/m/7aa46d77b377428058403723440862d12a8a129a.camel@j-davis.com
This commit is contained in:
Jeff Davis
2024-12-16 09:35:18 -08:00
parent de1e298857
commit 86a5d6006a
6 changed files with 676 additions and 418 deletions

View File

@ -1571,52 +1571,6 @@ str_numth(char *dest, char *num, int type)
* upper/lower/initcap functions
*****************************************************************************/
#ifdef USE_ICU
typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode);
static int32_t
icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
UChar **buff_dest, UChar *buff_source, int32_t len_source)
{
UErrorCode status;
int32_t len_dest;
len_dest = len_source; /* try first with same length */
*buff_dest = palloc(len_dest * sizeof(**buff_dest));
status = U_ZERO_ERROR;
len_dest = func(*buff_dest, len_dest, buff_source, len_source,
mylocale->info.icu.locale, &status);
if (status == U_BUFFER_OVERFLOW_ERROR)
{
/* try again with adjusted length */
pfree(*buff_dest);
*buff_dest = palloc(len_dest * sizeof(**buff_dest));
status = U_ZERO_ERROR;
len_dest = func(*buff_dest, len_dest, buff_source, len_source,
mylocale->info.icu.locale, &status);
}
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("case conversion failed: %s", u_errorName(status))));
return len_dest;
}
static int32_t
u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode)
{
return u_strToTitle(dest, destCapacity, src, srcLength,
NULL, locale, pErrorCode);
}
#endif /* USE_ICU */
/*
* If the system provides the needed functions for wide-character manipulation
* (which are all standardized by C99), then we implement upper/lower/initcap
@ -1663,25 +1617,6 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
result = asc_tolower(buff, nbytes);
}
else
{
#ifdef USE_ICU
if (mylocale->provider == COLLPROVIDER_ICU)
{
int32_t len_uchar;
int32_t len_conv;
UChar *buff_uchar;
UChar *buff_conv;
len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
len_conv = icu_convert_case(u_strToLower, mylocale,
&buff_conv, buff_uchar, len_uchar);
icu_from_uchar(&result, buff_conv, len_conv);
pfree(buff_uchar);
pfree(buff_conv);
}
else
#endif
if (mylocale->provider == COLLPROVIDER_BUILTIN)
{
const char *src = buff;
size_t srclen = nbytes;
@ -1689,82 +1624,23 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
char *dst;
size_t needed;
Assert(GetDatabaseEncoding() == PG_UTF8);
/* first try buffer of equal size plus terminating NUL */
dstsize = srclen + 1;
dst = palloc(dstsize);
needed = unicode_strlower(dst, dstsize, src, srclen);
needed = pg_strlower(dst, dstsize, src, srclen, mylocale);
if (needed + 1 > dstsize)
{
/* grow buffer if needed and retry */
dstsize = needed + 1;
dst = repalloc(dst, dstsize);
needed = unicode_strlower(dst, dstsize, src, srclen);
Assert(needed + 1 == dstsize);
needed = pg_strlower(dst, dstsize, src, srclen, mylocale);
Assert(needed + 1 <= dstsize);
}
Assert(dst[needed] == '\0');
result = dst;
}
else
{
Assert(mylocale->provider == COLLPROVIDER_LIBC);
if (pg_database_encoding_max_length() > 1)
{
wchar_t *workspace;
size_t curr_char;
size_t result_size;
/* Overflow paranoia */
if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
/* Output workspace cannot have more codes than input bytes */
workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
/*
* Make result large enough; case change might change number
* of bytes
*/
result_size = curr_char * pg_database_encoding_max_length() + 1;
result = palloc(result_size);
wchar2char(result, workspace, result_size, mylocale);
pfree(workspace);
}
else
{
char *p;
result = pnstrdup(buff, nbytes);
/*
* Note: we assume that tolower_l() will not be so broken as
* to need an isupper_l() guard test. When using the default
* collation, we apply the traditional Postgres behavior that
* forces ASCII-style treatment of I/i, but in non-default
* collations you get exactly what the collation says.
*/
for (p = result; *p; p++)
{
if (mylocale->is_default)
*p = pg_tolower((unsigned char) *p);
else
*p = tolower_l((unsigned char) *p, mylocale->info.lt);
}
}
}
}
return result;
}
@ -1805,25 +1681,6 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
result = asc_toupper(buff, nbytes);
}
else
{
#ifdef USE_ICU
if (mylocale->provider == COLLPROVIDER_ICU)
{
int32_t len_uchar,
len_conv;
UChar *buff_uchar;
UChar *buff_conv;
len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
len_conv = icu_convert_case(u_strToUpper, mylocale,
&buff_conv, buff_uchar, len_uchar);
icu_from_uchar(&result, buff_conv, len_conv);
pfree(buff_uchar);
pfree(buff_conv);
}
else
#endif
if (mylocale->provider == COLLPROVIDER_BUILTIN)
{
const char *src = buff;
size_t srclen = nbytes;
@ -1831,127 +1688,27 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
char *dst;
size_t needed;
Assert(GetDatabaseEncoding() == PG_UTF8);
/* first try buffer of equal size plus terminating NUL */
dstsize = srclen + 1;
dst = palloc(dstsize);
needed = unicode_strupper(dst, dstsize, src, srclen);
needed = pg_strupper(dst, dstsize, src, srclen, mylocale);
if (needed + 1 > dstsize)
{
/* grow buffer if needed and retry */
dstsize = needed + 1;
dst = repalloc(dst, dstsize);
needed = unicode_strupper(dst, dstsize, src, srclen);
Assert(needed + 1 == dstsize);
needed = pg_strupper(dst, dstsize, src, srclen, mylocale);
Assert(needed + 1 <= dstsize);
}
Assert(dst[needed] == '\0');
result = dst;
}
else
{
Assert(mylocale->provider == COLLPROVIDER_LIBC);
if (pg_database_encoding_max_length() > 1)
{
wchar_t *workspace;
size_t curr_char;
size_t result_size;
/* Overflow paranoia */
if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
/* Output workspace cannot have more codes than input bytes */
workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
/*
* Make result large enough; case change might change number
* of bytes
*/
result_size = curr_char * pg_database_encoding_max_length() + 1;
result = palloc(result_size);
wchar2char(result, workspace, result_size, mylocale);
pfree(workspace);
}
else
{
char *p;
result = pnstrdup(buff, nbytes);
/*
* Note: we assume that toupper_l() will not be so broken as
* to need an islower_l() guard test. When using the default
* collation, we apply the traditional Postgres behavior that
* forces ASCII-style treatment of I/i, but in non-default
* collations you get exactly what the collation says.
*/
for (p = result; *p; p++)
{
if (mylocale->is_default)
*p = pg_toupper((unsigned char) *p);
else
*p = toupper_l((unsigned char) *p, mylocale->info.lt);
}
}
}
}
return result;
}
struct WordBoundaryState
{
const char *str;
size_t len;
size_t offset;
bool init;
bool prev_alnum;
};
/*
* Simple word boundary iterator that draws boundaries each time the result of
* pg_u_isalnum() changes.
*/
static size_t
initcap_wbnext(void *state)
{
struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
while (wbstate->offset < wbstate->len &&
wbstate->str[wbstate->offset] != '\0')
{
pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
wbstate->offset);
bool curr_alnum = pg_u_isalnum(u, true);
if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
{
size_t prev_offset = wbstate->offset;
wbstate->init = true;
wbstate->offset += unicode_utf8len(u);
wbstate->prev_alnum = curr_alnum;
return prev_offset;
}
wbstate->offset += unicode_utf8len(u);
}
return wbstate->len;
}
/*
* collation-aware, wide-character-aware initcap function
*
@ -1962,7 +1719,6 @@ char *
str_initcap(const char *buff, size_t nbytes, Oid collid)
{
char *result;
int wasalnum = false;
pg_locale_t mylocale;
if (!buff)
@ -1989,137 +1745,30 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
result = asc_initcap(buff, nbytes);
}
else
{
#ifdef USE_ICU
if (mylocale->provider == COLLPROVIDER_ICU)
{
int32_t len_uchar,
len_conv;
UChar *buff_uchar;
UChar *buff_conv;
len_uchar = icu_to_uchar(&buff_uchar, buff, nbytes);
len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
&buff_conv, buff_uchar, len_uchar);
icu_from_uchar(&result, buff_conv, len_conv);
pfree(buff_uchar);
pfree(buff_conv);
}
else
#endif
if (mylocale->provider == COLLPROVIDER_BUILTIN)
{
const char *src = buff;
size_t srclen = nbytes;
size_t dstsize;
char *dst;
size_t needed;
struct WordBoundaryState wbstate = {
.str = src,
.len = srclen,
.offset = 0,
.init = false,
.prev_alnum = false,
};
Assert(GetDatabaseEncoding() == PG_UTF8);
/* first try buffer of equal size plus terminating NUL */
dstsize = srclen + 1;
dst = palloc(dstsize);
needed = unicode_strtitle(dst, dstsize, src, srclen,
initcap_wbnext, &wbstate);
needed = pg_strtitle(dst, dstsize, src, srclen, mylocale);
if (needed + 1 > dstsize)
{
/* reset iterator */
wbstate.offset = 0;
wbstate.init = false;
/* grow buffer if needed and retry */
dstsize = needed + 1;
dst = repalloc(dst, dstsize);
needed = unicode_strtitle(dst, dstsize, src, srclen,
initcap_wbnext, &wbstate);
Assert(needed + 1 == dstsize);
needed = pg_strtitle(dst, dstsize, src, srclen, mylocale);
Assert(needed + 1 <= dstsize);
}
Assert(dst[needed] == '\0');
result = dst;
}
else
{
Assert(mylocale->provider == COLLPROVIDER_LIBC);
if (pg_database_encoding_max_length() > 1)
{
wchar_t *workspace;
size_t curr_char;
size_t result_size;
/* Overflow paranoia */
if ((nbytes + 1) > (INT_MAX / sizeof(wchar_t)))
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
/* Output workspace cannot have more codes than input bytes */
workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
{
if (wasalnum)
workspace[curr_char] = towlower_l(workspace[curr_char], mylocale->info.lt);
else
workspace[curr_char] = towupper_l(workspace[curr_char], mylocale->info.lt);
wasalnum = iswalnum_l(workspace[curr_char], mylocale->info.lt);
}
/*
* Make result large enough; case change might change number
* of bytes
*/
result_size = curr_char * pg_database_encoding_max_length() + 1;
result = palloc(result_size);
wchar2char(result, workspace, result_size, mylocale);
pfree(workspace);
}
else
{
char *p;
result = pnstrdup(buff, nbytes);
/*
* Note: we assume that toupper_l()/tolower_l() will not be so
* broken as to need guard tests. When using the default
* collation, we apply the traditional Postgres behavior that
* forces ASCII-style treatment of I/i, but in non-default
* collations you get exactly what the collation says.
*/
for (p = result; *p; p++)
{
if (mylocale->is_default)
{
if (wasalnum)
*p = pg_tolower((unsigned char) *p);
else
*p = pg_toupper((unsigned char) *p);
}
else
{
if (wasalnum)
*p = tolower_l((unsigned char) *p, mylocale->info.lt);
else
*p = toupper_l((unsigned char) *p, mylocale->info.lt);
}
wasalnum = isalnum_l((unsigned char) *p, mylocale->info.lt);
}
}
}
}
return result;
}

View File

@ -116,6 +116,27 @@ extern size_t strnxfrm_libc(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strlower_icu(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strupper_icu(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
/* GUC settings */
char *locale_messages;
char *locale_monetary;
@ -1468,6 +1489,63 @@ get_collation_actual_version(char collprovider, const char *collcollate)
return collversion;
}
size_t
pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
if (locale->provider == COLLPROVIDER_BUILTIN)
return strlower_builtin(dst, dstsize, src, srclen, locale);
#ifdef USE_ICU
else if (locale->provider == COLLPROVIDER_ICU)
return strlower_icu(dst, dstsize, src, srclen, locale);
#endif
else if (locale->provider == COLLPROVIDER_LIBC)
return strlower_libc(dst, dstsize, src, srclen, locale);
else
/* shouldn't happen */
PGLOCALE_SUPPORT_ERROR(locale->provider);
return 0; /* keep compiler quiet */
}
size_t
pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
if (locale->provider == COLLPROVIDER_BUILTIN)
return strtitle_builtin(dst, dstsize, src, srclen, locale);
#ifdef USE_ICU
else if (locale->provider == COLLPROVIDER_ICU)
return strtitle_icu(dst, dstsize, src, srclen, locale);
#endif
else if (locale->provider == COLLPROVIDER_LIBC)
return strtitle_libc(dst, dstsize, src, srclen, locale);
else
/* shouldn't happen */
PGLOCALE_SUPPORT_ERROR(locale->provider);
return 0; /* keep compiler quiet */
}
size_t
pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
if (locale->provider == COLLPROVIDER_BUILTIN)
return strupper_builtin(dst, dstsize, src, srclen, locale);
#ifdef USE_ICU
else if (locale->provider == COLLPROVIDER_ICU)
return strupper_icu(dst, dstsize, src, srclen, locale);
#endif
else if (locale->provider == COLLPROVIDER_LIBC)
return strupper_libc(dst, dstsize, src, srclen, locale);
else
/* shouldn't happen */
PGLOCALE_SUPPORT_ERROR(locale->provider);
return 0; /* keep compiler quiet */
}
/*
* pg_strcoll
*

View File

@ -13,6 +13,8 @@
#include "catalog/pg_database.h"
#include "catalog/pg_collation.h"
#include "common/unicode_case.h"
#include "common/unicode_category.h"
#include "mb/pg_wchar.h"
#include "miscadmin.h"
#include "utils/builtins.h"
@ -22,6 +24,84 @@
extern pg_locale_t create_pg_locale_builtin(Oid collid,
MemoryContext context);
extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
struct WordBoundaryState
{
const char *str;
size_t len;
size_t offset;
bool init;
bool prev_alnum;
};
/*
* Simple word boundary iterator that draws boundaries each time the result of
* pg_u_isalnum() changes.
*/
static size_t
initcap_wbnext(void *state)
{
struct WordBoundaryState *wbstate = (struct WordBoundaryState *) state;
while (wbstate->offset < wbstate->len &&
wbstate->str[wbstate->offset] != '\0')
{
pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
wbstate->offset);
bool curr_alnum = pg_u_isalnum(u, true);
if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
{
size_t prev_offset = wbstate->offset;
wbstate->init = true;
wbstate->offset += unicode_utf8len(u);
wbstate->prev_alnum = curr_alnum;
return prev_offset;
}
wbstate->offset += unicode_utf8len(u);
}
return wbstate->len;
}
size_t
strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
return unicode_strlower(dest, destsize, src, srclen);
}
size_t
strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
struct WordBoundaryState wbstate = {
.str = src,
.len = srclen,
.offset = 0,
.init = false,
.prev_alnum = false,
};
return unicode_strtitle(dest, destsize, src, srclen,
initcap_wbnext, &wbstate);
}
size_t
strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
return unicode_strupper(dest, destsize, src, srclen);
}
pg_locale_t
create_pg_locale_builtin(Oid collid, MemoryContext context)

View File

@ -48,6 +48,12 @@
#define TEXTBUFLEN 1024
extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
extern size_t strlower_icu(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strupper_icu(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
#ifdef USE_ICU
@ -62,6 +68,11 @@ extern size_t strnxfrm_prefix_icu(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
typedef int32_t (*ICU_Convert_Func) (UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode);
/*
* Converter object for converting between ICU's UChar strings and C strings
* in database encoding. Since the database encoding doesn't change, we only
@ -83,8 +94,19 @@ static size_t uchar_length(UConverter *converter,
static int32_t uchar_convert(UConverter *converter,
UChar *dest, int32_t destlen,
const char *src, int32_t srclen);
static int32_t icu_to_uchar(UChar **buff_uchar, const char *buff,
size_t nbytes);
static size_t icu_from_uchar(char *dest, size_t destsize,
const UChar *buff_uchar, int32_t len_uchar);
static void icu_set_collation_attributes(UCollator *collator, const char *loc,
UErrorCode *status);
static int32_t icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
UChar **buff_dest, UChar *buff_source,
int32_t len_source);
static int32_t u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode);
#endif
pg_locale_t
@ -324,6 +346,66 @@ make_icu_collator(const char *iculocstr, const char *icurules)
}
}
size_t
strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
int32_t len_uchar;
int32_t len_conv;
UChar *buff_uchar;
UChar *buff_conv;
size_t result_len;
len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
len_conv = icu_convert_case(u_strToLower, locale,
&buff_conv, buff_uchar, len_uchar);
result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
pfree(buff_uchar);
pfree(buff_conv);
return result_len;
}
size_t
strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
int32_t len_uchar;
int32_t len_conv;
UChar *buff_uchar;
UChar *buff_conv;
size_t result_len;
len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
len_conv = icu_convert_case(u_strToTitle_default_BI, locale,
&buff_conv, buff_uchar, len_uchar);
result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
pfree(buff_uchar);
pfree(buff_conv);
return result_len;
}
size_t
strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
int32_t len_uchar;
int32_t len_conv;
UChar *buff_uchar;
UChar *buff_conv;
size_t result_len;
len_uchar = icu_to_uchar(&buff_uchar, src, srclen);
len_conv = icu_convert_case(u_strToUpper, locale,
&buff_conv, buff_uchar, len_uchar);
result_len = icu_from_uchar(dest, destsize, buff_conv, len_conv);
pfree(buff_uchar);
pfree(buff_conv);
return result_len;
}
/*
* strncoll_icu
*
@ -458,7 +540,7 @@ strnxfrm_prefix_icu(char *dest, size_t destsize,
* The result string is nul-terminated, though most callers rely on the
* result length instead.
*/
int32_t
static int32_t
icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
{
int32_t len_uchar;
@ -485,8 +567,8 @@ icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
*
* The result string is nul-terminated.
*/
int32_t
icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
static size_t
icu_from_uchar(char *dest, size_t destsize, const UChar *buff_uchar, int32_t len_uchar)
{
UErrorCode status;
int32_t len_result;
@ -501,10 +583,11 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
(errmsg("%s failed: %s", "ucnv_fromUChars",
u_errorName(status))));
*result = palloc(len_result + 1);
if (len_result + 1 > destsize)
return len_result;
status = U_ZERO_ERROR;
len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
len_result = ucnv_fromUChars(icu_converter, dest, len_result + 1,
buff_uchar, len_uchar, &status);
if (U_FAILURE(status) ||
status == U_STRING_NOT_TERMINATED_WARNING)
@ -515,6 +598,43 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
return len_result;
}
static int32_t
icu_convert_case(ICU_Convert_Func func, pg_locale_t mylocale,
UChar **buff_dest, UChar *buff_source, int32_t len_source)
{
UErrorCode status;
int32_t len_dest;
len_dest = len_source; /* try first with same length */
*buff_dest = palloc(len_dest * sizeof(**buff_dest));
status = U_ZERO_ERROR;
len_dest = func(*buff_dest, len_dest, buff_source, len_source,
mylocale->info.icu.locale, &status);
if (status == U_BUFFER_OVERFLOW_ERROR)
{
/* try again with adjusted length */
pfree(*buff_dest);
*buff_dest = palloc(len_dest * sizeof(**buff_dest));
status = U_ZERO_ERROR;
len_dest = func(*buff_dest, len_dest, buff_source, len_source,
mylocale->info.icu.locale, &status);
}
if (U_FAILURE(status))
ereport(ERROR,
(errmsg("case conversion failed: %s", u_errorName(status))));
return len_dest;
}
static int32_t
u_strToTitle_default_BI(UChar *dest, int32_t destCapacity,
const UChar *src, int32_t srcLength,
const char *locale,
UErrorCode *pErrorCode)
{
return u_strToTitle(dest, destCapacity, src, srcLength,
NULL, locale, pErrorCode);
}
/*
* strncoll_icu_no_utf8
*

View File

@ -11,6 +11,9 @@
#include "postgres.h"
#include <limits.h>
#include <wctype.h>
#include "access/htup_details.h"
#include "catalog/pg_database.h"
#include "catalog/pg_collation.h"
@ -32,6 +35,13 @@
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale);
extern int strncoll_libc(const char *arg1, ssize_t len1,
const char *arg2, ssize_t len2,
pg_locale_t locale);
@ -48,6 +58,323 @@ static int strncoll_libc_win32_utf8(const char *arg1, ssize_t len1,
pg_locale_t locale);
#endif
static size_t strlower_libc_sb(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
static size_t strlower_libc_mb(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
static size_t strtitle_libc_sb(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
static size_t strtitle_libc_mb(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
static size_t strupper_libc_sb(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
static size_t strupper_libc_mb(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
size_t
strlower_libc(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale)
{
if (pg_database_encoding_max_length() > 1)
return strlower_libc_mb(dst, dstsize, src, srclen, locale);
else
return strlower_libc_sb(dst, dstsize, src, srclen, locale);
}
size_t
strtitle_libc(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale)
{
if (pg_database_encoding_max_length() > 1)
return strtitle_libc_mb(dst, dstsize, src, srclen, locale);
else
return strtitle_libc_sb(dst, dstsize, src, srclen, locale);
}
size_t
strupper_libc(char *dst, size_t dstsize, const char *src,
ssize_t srclen, pg_locale_t locale)
{
if (pg_database_encoding_max_length() > 1)
return strupper_libc_mb(dst, dstsize, src, srclen, locale);
else
return strupper_libc_sb(dst, dstsize, src, srclen, locale);
}
static size_t
strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
if (srclen < 0)
srclen = strlen(src);
if (srclen + 1 <= destsize)
{
locale_t loc = locale->info.lt;
char *p;
if (srclen + 1 > destsize)
return srclen;
memcpy(dest, src, srclen);
dest[srclen] = '\0';
/*
* Note: we assume that tolower_l() will not be so broken as to need
* an isupper_l() guard test. When using the default collation, we
* apply the traditional Postgres behavior that forces ASCII-style
* treatment of I/i, but in non-default collations you get exactly
* what the collation says.
*/
for (p = dest; *p; p++)
{
if (locale->is_default)
*p = pg_tolower((unsigned char) *p);
else
*p = tolower_l((unsigned char) *p, loc);
}
}
return srclen;
}
static size_t
strlower_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
locale_t loc = locale->info.lt;
size_t result_size;
wchar_t *workspace;
char *result;
size_t curr_char;
size_t max_size;
if (srclen < 0)
srclen = strlen(src);
/* Overflow paranoia */
if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
/* Output workspace cannot have more codes than input bytes */
workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
char2wchar(workspace, srclen + 1, src, srclen, locale);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
workspace[curr_char] = towlower_l(workspace[curr_char], loc);
/*
* Make result large enough; case change might change number of bytes
*/
max_size = curr_char * pg_database_encoding_max_length();
result = palloc(max_size + 1);
result_size = wchar2char(result, workspace, max_size + 1, locale);
if (result_size + 1 > destsize)
return result_size;
memcpy(dest, result, result_size);
dest[result_size] = '\0';
pfree(workspace);
pfree(result);
return result_size;
}
static size_t
strtitle_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
if (srclen < 0)
srclen = strlen(src);
if (srclen + 1 <= destsize)
{
locale_t loc = locale->info.lt;
int wasalnum = false;
char *p;
memcpy(dest, src, srclen);
dest[srclen] = '\0';
/*
* Note: we assume that toupper_l()/tolower_l() will not be so broken
* as to need guard tests. When using the default collation, we apply
* the traditional Postgres behavior that forces ASCII-style treatment
* of I/i, but in non-default collations you get exactly what the
* collation says.
*/
for (p = dest; *p; p++)
{
if (locale->is_default)
{
if (wasalnum)
*p = pg_tolower((unsigned char) *p);
else
*p = pg_toupper((unsigned char) *p);
}
else
{
if (wasalnum)
*p = tolower_l((unsigned char) *p, loc);
else
*p = toupper_l((unsigned char) *p, loc);
}
wasalnum = isalnum_l((unsigned char) *p, loc);
}
}
return srclen;
}
static size_t
strtitle_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
locale_t loc = locale->info.lt;
int wasalnum = false;
size_t result_size;
wchar_t *workspace;
char *result;
size_t curr_char;
size_t max_size;
if (srclen < 0)
srclen = strlen(src);
/* Overflow paranoia */
if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
/* Output workspace cannot have more codes than input bytes */
workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
char2wchar(workspace, srclen + 1, src, srclen, locale);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
{
if (wasalnum)
workspace[curr_char] = towlower_l(workspace[curr_char], loc);
else
workspace[curr_char] = towupper_l(workspace[curr_char], loc);
wasalnum = iswalnum_l(workspace[curr_char], loc);
}
/*
* Make result large enough; case change might change number of bytes
*/
max_size = curr_char * pg_database_encoding_max_length();
result = palloc(max_size + 1);
result_size = wchar2char(result, workspace, max_size + 1, locale);
if (result_size + 1 > destsize)
return result_size;
memcpy(dest, result, result_size);
dest[result_size] = '\0';
pfree(workspace);
pfree(result);
return result_size;
}
static size_t
strupper_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
if (srclen < 0)
srclen = strlen(src);
if (srclen + 1 <= destsize)
{
locale_t loc = locale->info.lt;
char *p;
memcpy(dest, src, srclen);
dest[srclen] = '\0';
/*
* Note: we assume that toupper_l() will not be so broken as to need
* an islower_l() guard test. When using the default collation, we
* apply the traditional Postgres behavior that forces ASCII-style
* treatment of I/i, but in non-default collations you get exactly
* what the collation says.
*/
for (p = dest; *p; p++)
{
if (locale->is_default)
*p = pg_toupper((unsigned char) *p);
else
*p = toupper_l((unsigned char) *p, loc);
}
}
return srclen;
}
static size_t
strupper_libc_mb(char *dest, size_t destsize, const char *src, ssize_t srclen,
pg_locale_t locale)
{
locale_t loc = locale->info.lt;
size_t result_size;
wchar_t *workspace;
char *result;
size_t curr_char;
size_t max_size;
if (srclen < 0)
srclen = strlen(src);
/* Overflow paranoia */
if ((srclen + 1) > (INT_MAX / sizeof(wchar_t)))
ereport(ERROR,
(errcode(ERRCODE_OUT_OF_MEMORY),
errmsg("out of memory")));
/* Output workspace cannot have more codes than input bytes */
workspace = (wchar_t *) palloc((srclen + 1) * sizeof(wchar_t));
char2wchar(workspace, srclen + 1, src, srclen, locale);
for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
workspace[curr_char] = towupper_l(workspace[curr_char], loc);
/*
* Make result large enough; case change might change number of bytes
*/
max_size = curr_char * pg_database_encoding_max_length();
result = palloc(max_size + 1);
result_size = wchar2char(result, workspace, max_size + 1, locale);
if (result_size + 1 > destsize)
return result_size;
memcpy(dest, result, result_size);
dest[result_size] = '\0';
pfree(workspace);
pfree(result);
return result_size;
}
pg_locale_t
create_pg_locale_libc(Oid collid, MemoryContext context)
{

View File

@ -93,6 +93,15 @@ extern void init_database_collation(void);
extern pg_locale_t pg_newlocale_from_collation(Oid collid);
extern char *get_collation_actual_version(char collprovider, const char *collcollate);
extern size_t pg_strlower(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
extern size_t pg_strtitle(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
extern size_t pg_strupper(char *dest, size_t destsize,
const char *src, ssize_t srclen,
pg_locale_t locale);
extern int pg_strcoll(const char *arg1, const char *arg2, pg_locale_t locale);
extern int pg_strncoll(const char *arg1, ssize_t len1,
const char *arg2, ssize_t len2, pg_locale_t locale);
@ -112,11 +121,6 @@ extern const char *builtin_validate_locale(int encoding, const char *locale);
extern void icu_validate_locale(const char *loc_str);
extern char *icu_language_tag(const char *loc_str, int elevel);
#ifdef USE_ICU
extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar);
#endif
/* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen,
pg_locale_t locale);