mirror of
https://github.com/postgres/postgres.git
synced 2025-07-30 11:03:19 +03:00
Control ctype behavior internally with a method table.
Previously, pattern matching and case mapping behavior branched based on the provider. Refactor to use a method table, which is less error-prone. This is also a step toward multiple provider versions, which we may want to support in the future. Reviewed-by: Andreas Karlsson <andreas@proxel.se> Reviewed-by: Peter Eisentraut <peter@eisentraut.org> Discussion: https://postgr.es/m/2830211e1b6e6a2e26d845780b03e125281ea17b.camel%40j-davis.com
This commit is contained in:
@ -20,58 +20,13 @@
|
|||||||
#include "common/unicode_category.h"
|
#include "common/unicode_category.h"
|
||||||
#include "utils/pg_locale.h"
|
#include "utils/pg_locale.h"
|
||||||
|
|
||||||
/*
|
|
||||||
* For the libc provider, to provide as much functionality as possible on a
|
|
||||||
* variety of platforms without going so far as to implement everything from
|
|
||||||
* scratch, we use several implementation strategies depending on the
|
|
||||||
* situation:
|
|
||||||
*
|
|
||||||
* 1. In C/POSIX collations, we use hard-wired code. We can't depend on
|
|
||||||
* the <ctype.h> functions since those will obey LC_CTYPE. Note that these
|
|
||||||
* collations don't give a fig about multibyte characters.
|
|
||||||
*
|
|
||||||
* 2. When working in UTF8 encoding, we use the <wctype.h> functions.
|
|
||||||
* This assumes that every platform uses Unicode codepoints directly
|
|
||||||
* as the wchar_t representation of Unicode. (XXX: ICU makes this assumption
|
|
||||||
* even for non-UTF8 encodings, which may be a problem.) On some platforms
|
|
||||||
* wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
|
|
||||||
*
|
|
||||||
* 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
|
|
||||||
* values up to 255, and punt for values above that. This is 100% correct
|
|
||||||
* only in single-byte encodings such as LATINn. However, non-Unicode
|
|
||||||
* multibyte encodings are mostly Far Eastern character sets for which the
|
|
||||||
* properties being tested here aren't very relevant for higher code values
|
|
||||||
* anyway. The difficulty with using the <wctype.h> functions with
|
|
||||||
* non-Unicode multibyte encodings is that we can have no certainty that
|
|
||||||
* the platform's wchar_t representation matches what we do in pg_wchar
|
|
||||||
* conversions.
|
|
||||||
*
|
|
||||||
* As a special case, in the "default" collation, (2) and (3) force ASCII
|
|
||||||
* letters to follow ASCII upcase/downcase rules, while in a non-default
|
|
||||||
* collation we just let the library functions do what they will. The case
|
|
||||||
* where this matters is treatment of I/i in Turkish, and the behavior is
|
|
||||||
* meant to match the upper()/lower() SQL functions.
|
|
||||||
*
|
|
||||||
* We store the active collation setting in static variables. In principle
|
|
||||||
* it could be passed down to here via the regex library's "struct vars" data
|
|
||||||
* structure; but that would require somewhat invasive changes in the regex
|
|
||||||
* library, and right now there's no real benefit to be gained from that.
|
|
||||||
*
|
|
||||||
* NB: the coding here assumes pg_wchar is an unsigned type.
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef enum
|
|
||||||
{
|
|
||||||
PG_REGEX_STRATEGY_C, /* C locale (encoding independent) */
|
|
||||||
PG_REGEX_STRATEGY_BUILTIN, /* built-in Unicode semantics */
|
|
||||||
PG_REGEX_STRATEGY_LIBC_WIDE, /* Use locale_t <wctype.h> functions */
|
|
||||||
PG_REGEX_STRATEGY_LIBC_1BYTE, /* Use locale_t <ctype.h> functions */
|
|
||||||
PG_REGEX_STRATEGY_ICU, /* Use ICU uchar.h functions */
|
|
||||||
} PG_Locale_Strategy;
|
|
||||||
|
|
||||||
static PG_Locale_Strategy pg_regex_strategy;
|
|
||||||
static pg_locale_t pg_regex_locale;
|
static pg_locale_t pg_regex_locale;
|
||||||
|
|
||||||
|
static struct pg_locale_struct dummy_c_locale = {
|
||||||
|
.collate_is_c = true,
|
||||||
|
.ctype_is_c = true,
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Hard-wired character properties for C locale
|
* Hard-wired character properties for C locale
|
||||||
*/
|
*/
|
||||||
@ -228,7 +183,6 @@ void
|
|||||||
pg_set_regex_collation(Oid collation)
|
pg_set_regex_collation(Oid collation)
|
||||||
{
|
{
|
||||||
pg_locale_t locale = 0;
|
pg_locale_t locale = 0;
|
||||||
PG_Locale_Strategy strategy;
|
|
||||||
|
|
||||||
if (!OidIsValid(collation))
|
if (!OidIsValid(collation))
|
||||||
{
|
{
|
||||||
@ -249,8 +203,7 @@ pg_set_regex_collation(Oid collation)
|
|||||||
* catalog access is available, so we can't call
|
* catalog access is available, so we can't call
|
||||||
* pg_newlocale_from_collation().
|
* pg_newlocale_from_collation().
|
||||||
*/
|
*/
|
||||||
strategy = PG_REGEX_STRATEGY_C;
|
locale = &dummy_c_locale;
|
||||||
locale = 0;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -267,113 +220,41 @@ pg_set_regex_collation(Oid collation)
|
|||||||
* C/POSIX collations use this path regardless of database
|
* C/POSIX collations use this path regardless of database
|
||||||
* encoding
|
* encoding
|
||||||
*/
|
*/
|
||||||
strategy = PG_REGEX_STRATEGY_C;
|
locale = &dummy_c_locale;
|
||||||
locale = 0;
|
|
||||||
}
|
|
||||||
else if (locale->provider == COLLPROVIDER_BUILTIN)
|
|
||||||
{
|
|
||||||
Assert(GetDatabaseEncoding() == PG_UTF8);
|
|
||||||
strategy = PG_REGEX_STRATEGY_BUILTIN;
|
|
||||||
}
|
|
||||||
#ifdef USE_ICU
|
|
||||||
else if (locale->provider == COLLPROVIDER_ICU)
|
|
||||||
{
|
|
||||||
strategy = PG_REGEX_STRATEGY_ICU;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
else
|
|
||||||
{
|
|
||||||
Assert(locale->provider == COLLPROVIDER_LIBC);
|
|
||||||
if (GetDatabaseEncoding() == PG_UTF8)
|
|
||||||
strategy = PG_REGEX_STRATEGY_LIBC_WIDE;
|
|
||||||
else
|
|
||||||
strategy = PG_REGEX_STRATEGY_LIBC_1BYTE;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pg_regex_strategy = strategy;
|
|
||||||
pg_regex_locale = locale;
|
pg_regex_locale = locale;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_wc_isdigit(pg_wchar c)
|
pg_wc_isdigit(pg_wchar c)
|
||||||
{
|
{
|
||||||
switch (pg_regex_strategy)
|
if (pg_regex_locale->ctype_is_c)
|
||||||
{
|
return (c <= (pg_wchar) 127 &&
|
||||||
case PG_REGEX_STRATEGY_C:
|
(pg_char_properties[c] & PG_ISDIGIT));
|
||||||
return (c <= (pg_wchar) 127 &&
|
else
|
||||||
(pg_char_properties[c] & PG_ISDIGIT));
|
return pg_regex_locale->ctype->wc_isdigit(c, pg_regex_locale);
|
||||||
case PG_REGEX_STRATEGY_BUILTIN:
|
|
||||||
return pg_u_isdigit(c, !pg_regex_locale->info.builtin.casemap_full);
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_WIDE:
|
|
||||||
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
|
|
||||||
return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
|
|
||||||
/* FALL THRU */
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_1BYTE:
|
|
||||||
return (c <= (pg_wchar) UCHAR_MAX &&
|
|
||||||
isdigit_l((unsigned char) c, pg_regex_locale->info.lt));
|
|
||||||
break;
|
|
||||||
case PG_REGEX_STRATEGY_ICU:
|
|
||||||
#ifdef USE_ICU
|
|
||||||
return u_isdigit(c);
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return 0; /* can't get here, but keep compiler quiet */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_wc_isalpha(pg_wchar c)
|
pg_wc_isalpha(pg_wchar c)
|
||||||
{
|
{
|
||||||
switch (pg_regex_strategy)
|
if (pg_regex_locale->ctype_is_c)
|
||||||
{
|
return (c <= (pg_wchar) 127 &&
|
||||||
case PG_REGEX_STRATEGY_C:
|
(pg_char_properties[c] & PG_ISALPHA));
|
||||||
return (c <= (pg_wchar) 127 &&
|
else
|
||||||
(pg_char_properties[c] & PG_ISALPHA));
|
return pg_regex_locale->ctype->wc_isalpha(c, pg_regex_locale);
|
||||||
case PG_REGEX_STRATEGY_BUILTIN:
|
|
||||||
return pg_u_isalpha(c);
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_WIDE:
|
|
||||||
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
|
|
||||||
return iswalpha_l((wint_t) c, pg_regex_locale->info.lt);
|
|
||||||
/* FALL THRU */
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_1BYTE:
|
|
||||||
return (c <= (pg_wchar) UCHAR_MAX &&
|
|
||||||
isalpha_l((unsigned char) c, pg_regex_locale->info.lt));
|
|
||||||
break;
|
|
||||||
case PG_REGEX_STRATEGY_ICU:
|
|
||||||
#ifdef USE_ICU
|
|
||||||
return u_isalpha(c);
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return 0; /* can't get here, but keep compiler quiet */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_wc_isalnum(pg_wchar c)
|
pg_wc_isalnum(pg_wchar c)
|
||||||
{
|
{
|
||||||
switch (pg_regex_strategy)
|
if (pg_regex_locale->ctype_is_c)
|
||||||
{
|
return (c <= (pg_wchar) 127 &&
|
||||||
case PG_REGEX_STRATEGY_C:
|
(pg_char_properties[c] & PG_ISALNUM));
|
||||||
return (c <= (pg_wchar) 127 &&
|
else
|
||||||
(pg_char_properties[c] & PG_ISALNUM));
|
return pg_regex_locale->ctype->wc_isalnum(c, pg_regex_locale);
|
||||||
case PG_REGEX_STRATEGY_BUILTIN:
|
|
||||||
return pg_u_isalnum(c, !pg_regex_locale->info.builtin.casemap_full);
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_WIDE:
|
|
||||||
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
|
|
||||||
return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
|
|
||||||
/* FALL THRU */
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_1BYTE:
|
|
||||||
return (c <= (pg_wchar) UCHAR_MAX &&
|
|
||||||
isalnum_l((unsigned char) c, pg_regex_locale->info.lt));
|
|
||||||
break;
|
|
||||||
case PG_REGEX_STRATEGY_ICU:
|
|
||||||
#ifdef USE_ICU
|
|
||||||
return u_isalnum(c);
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return 0; /* can't get here, but keep compiler quiet */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
@ -388,231 +269,87 @@ pg_wc_isword(pg_wchar c)
|
|||||||
static int
|
static int
|
||||||
pg_wc_isupper(pg_wchar c)
|
pg_wc_isupper(pg_wchar c)
|
||||||
{
|
{
|
||||||
switch (pg_regex_strategy)
|
if (pg_regex_locale->ctype_is_c)
|
||||||
{
|
return (c <= (pg_wchar) 127 &&
|
||||||
case PG_REGEX_STRATEGY_C:
|
(pg_char_properties[c] & PG_ISUPPER));
|
||||||
return (c <= (pg_wchar) 127 &&
|
else
|
||||||
(pg_char_properties[c] & PG_ISUPPER));
|
return pg_regex_locale->ctype->wc_isupper(c, pg_regex_locale);
|
||||||
case PG_REGEX_STRATEGY_BUILTIN:
|
|
||||||
return pg_u_isupper(c);
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_WIDE:
|
|
||||||
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
|
|
||||||
return iswupper_l((wint_t) c, pg_regex_locale->info.lt);
|
|
||||||
/* FALL THRU */
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_1BYTE:
|
|
||||||
return (c <= (pg_wchar) UCHAR_MAX &&
|
|
||||||
isupper_l((unsigned char) c, pg_regex_locale->info.lt));
|
|
||||||
break;
|
|
||||||
case PG_REGEX_STRATEGY_ICU:
|
|
||||||
#ifdef USE_ICU
|
|
||||||
return u_isupper(c);
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return 0; /* can't get here, but keep compiler quiet */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_wc_islower(pg_wchar c)
|
pg_wc_islower(pg_wchar c)
|
||||||
{
|
{
|
||||||
switch (pg_regex_strategy)
|
if (pg_regex_locale->ctype_is_c)
|
||||||
{
|
return (c <= (pg_wchar) 127 &&
|
||||||
case PG_REGEX_STRATEGY_C:
|
(pg_char_properties[c] & PG_ISLOWER));
|
||||||
return (c <= (pg_wchar) 127 &&
|
else
|
||||||
(pg_char_properties[c] & PG_ISLOWER));
|
return pg_regex_locale->ctype->wc_islower(c, pg_regex_locale);
|
||||||
case PG_REGEX_STRATEGY_BUILTIN:
|
|
||||||
return pg_u_islower(c);
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_WIDE:
|
|
||||||
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
|
|
||||||
return iswlower_l((wint_t) c, pg_regex_locale->info.lt);
|
|
||||||
/* FALL THRU */
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_1BYTE:
|
|
||||||
return (c <= (pg_wchar) UCHAR_MAX &&
|
|
||||||
islower_l((unsigned char) c, pg_regex_locale->info.lt));
|
|
||||||
break;
|
|
||||||
case PG_REGEX_STRATEGY_ICU:
|
|
||||||
#ifdef USE_ICU
|
|
||||||
return u_islower(c);
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return 0; /* can't get here, but keep compiler quiet */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_wc_isgraph(pg_wchar c)
|
pg_wc_isgraph(pg_wchar c)
|
||||||
{
|
{
|
||||||
switch (pg_regex_strategy)
|
if (pg_regex_locale->ctype_is_c)
|
||||||
{
|
return (c <= (pg_wchar) 127 &&
|
||||||
case PG_REGEX_STRATEGY_C:
|
(pg_char_properties[c] & PG_ISGRAPH));
|
||||||
return (c <= (pg_wchar) 127 &&
|
else
|
||||||
(pg_char_properties[c] & PG_ISGRAPH));
|
return pg_regex_locale->ctype->wc_isgraph(c, pg_regex_locale);
|
||||||
case PG_REGEX_STRATEGY_BUILTIN:
|
|
||||||
return pg_u_isgraph(c);
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_WIDE:
|
|
||||||
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
|
|
||||||
return iswgraph_l((wint_t) c, pg_regex_locale->info.lt);
|
|
||||||
/* FALL THRU */
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_1BYTE:
|
|
||||||
return (c <= (pg_wchar) UCHAR_MAX &&
|
|
||||||
isgraph_l((unsigned char) c, pg_regex_locale->info.lt));
|
|
||||||
break;
|
|
||||||
case PG_REGEX_STRATEGY_ICU:
|
|
||||||
#ifdef USE_ICU
|
|
||||||
return u_isgraph(c);
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return 0; /* can't get here, but keep compiler quiet */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_wc_isprint(pg_wchar c)
|
pg_wc_isprint(pg_wchar c)
|
||||||
{
|
{
|
||||||
switch (pg_regex_strategy)
|
if (pg_regex_locale->ctype_is_c)
|
||||||
{
|
return (c <= (pg_wchar) 127 &&
|
||||||
case PG_REGEX_STRATEGY_C:
|
(pg_char_properties[c] & PG_ISPRINT));
|
||||||
return (c <= (pg_wchar) 127 &&
|
else
|
||||||
(pg_char_properties[c] & PG_ISPRINT));
|
return pg_regex_locale->ctype->wc_isprint(c, pg_regex_locale);
|
||||||
case PG_REGEX_STRATEGY_BUILTIN:
|
|
||||||
return pg_u_isprint(c);
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_WIDE:
|
|
||||||
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
|
|
||||||
return iswprint_l((wint_t) c, pg_regex_locale->info.lt);
|
|
||||||
/* FALL THRU */
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_1BYTE:
|
|
||||||
return (c <= (pg_wchar) UCHAR_MAX &&
|
|
||||||
isprint_l((unsigned char) c, pg_regex_locale->info.lt));
|
|
||||||
break;
|
|
||||||
case PG_REGEX_STRATEGY_ICU:
|
|
||||||
#ifdef USE_ICU
|
|
||||||
return u_isprint(c);
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return 0; /* can't get here, but keep compiler quiet */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_wc_ispunct(pg_wchar c)
|
pg_wc_ispunct(pg_wchar c)
|
||||||
{
|
{
|
||||||
switch (pg_regex_strategy)
|
if (pg_regex_locale->ctype_is_c)
|
||||||
{
|
return (c <= (pg_wchar) 127 &&
|
||||||
case PG_REGEX_STRATEGY_C:
|
(pg_char_properties[c] & PG_ISPUNCT));
|
||||||
return (c <= (pg_wchar) 127 &&
|
else
|
||||||
(pg_char_properties[c] & PG_ISPUNCT));
|
return pg_regex_locale->ctype->wc_ispunct(c, pg_regex_locale);
|
||||||
case PG_REGEX_STRATEGY_BUILTIN:
|
|
||||||
return pg_u_ispunct(c, !pg_regex_locale->info.builtin.casemap_full);
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_WIDE:
|
|
||||||
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
|
|
||||||
return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);
|
|
||||||
/* FALL THRU */
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_1BYTE:
|
|
||||||
return (c <= (pg_wchar) UCHAR_MAX &&
|
|
||||||
ispunct_l((unsigned char) c, pg_regex_locale->info.lt));
|
|
||||||
break;
|
|
||||||
case PG_REGEX_STRATEGY_ICU:
|
|
||||||
#ifdef USE_ICU
|
|
||||||
return u_ispunct(c);
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return 0; /* can't get here, but keep compiler quiet */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
pg_wc_isspace(pg_wchar c)
|
pg_wc_isspace(pg_wchar c)
|
||||||
{
|
{
|
||||||
switch (pg_regex_strategy)
|
if (pg_regex_locale->ctype_is_c)
|
||||||
{
|
return (c <= (pg_wchar) 127 &&
|
||||||
case PG_REGEX_STRATEGY_C:
|
(pg_char_properties[c] & PG_ISSPACE));
|
||||||
return (c <= (pg_wchar) 127 &&
|
else
|
||||||
(pg_char_properties[c] & PG_ISSPACE));
|
return pg_regex_locale->ctype->wc_isspace(c, pg_regex_locale);
|
||||||
case PG_REGEX_STRATEGY_BUILTIN:
|
|
||||||
return pg_u_isspace(c);
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_WIDE:
|
|
||||||
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
|
|
||||||
return iswspace_l((wint_t) c, pg_regex_locale->info.lt);
|
|
||||||
/* FALL THRU */
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_1BYTE:
|
|
||||||
return (c <= (pg_wchar) UCHAR_MAX &&
|
|
||||||
isspace_l((unsigned char) c, pg_regex_locale->info.lt));
|
|
||||||
break;
|
|
||||||
case PG_REGEX_STRATEGY_ICU:
|
|
||||||
#ifdef USE_ICU
|
|
||||||
return u_isspace(c);
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
return 0; /* can't get here, but keep compiler quiet */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static pg_wchar
|
static pg_wchar
|
||||||
pg_wc_toupper(pg_wchar c)
|
pg_wc_toupper(pg_wchar c)
|
||||||
{
|
{
|
||||||
switch (pg_regex_strategy)
|
if (pg_regex_locale->ctype_is_c)
|
||||||
{
|
{
|
||||||
case PG_REGEX_STRATEGY_C:
|
if (c <= (pg_wchar) 127)
|
||||||
if (c <= (pg_wchar) 127)
|
return pg_ascii_toupper((unsigned char) c);
|
||||||
return pg_ascii_toupper((unsigned char) c);
|
return c;
|
||||||
return c;
|
|
||||||
case PG_REGEX_STRATEGY_BUILTIN:
|
|
||||||
return unicode_uppercase_simple(c);
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_WIDE:
|
|
||||||
/* force C behavior for ASCII characters, per comments above */
|
|
||||||
if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
|
|
||||||
return pg_ascii_toupper((unsigned char) c);
|
|
||||||
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
|
|
||||||
return towupper_l((wint_t) c, pg_regex_locale->info.lt);
|
|
||||||
/* FALL THRU */
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_1BYTE:
|
|
||||||
/* force C behavior for ASCII characters, per comments above */
|
|
||||||
if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
|
|
||||||
return pg_ascii_toupper((unsigned char) c);
|
|
||||||
if (c <= (pg_wchar) UCHAR_MAX)
|
|
||||||
return toupper_l((unsigned char) c, pg_regex_locale->info.lt);
|
|
||||||
return c;
|
|
||||||
case PG_REGEX_STRATEGY_ICU:
|
|
||||||
#ifdef USE_ICU
|
|
||||||
return u_toupper(c);
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
return 0; /* can't get here, but keep compiler quiet */
|
else
|
||||||
|
return pg_regex_locale->ctype->wc_toupper(c, pg_regex_locale);
|
||||||
}
|
}
|
||||||
|
|
||||||
static pg_wchar
|
static pg_wchar
|
||||||
pg_wc_tolower(pg_wchar c)
|
pg_wc_tolower(pg_wchar c)
|
||||||
{
|
{
|
||||||
switch (pg_regex_strategy)
|
if (pg_regex_locale->ctype_is_c)
|
||||||
{
|
{
|
||||||
case PG_REGEX_STRATEGY_C:
|
if (c <= (pg_wchar) 127)
|
||||||
if (c <= (pg_wchar) 127)
|
return pg_ascii_tolower((unsigned char) c);
|
||||||
return pg_ascii_tolower((unsigned char) c);
|
return c;
|
||||||
return c;
|
|
||||||
case PG_REGEX_STRATEGY_BUILTIN:
|
|
||||||
return unicode_lowercase_simple(c);
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_WIDE:
|
|
||||||
/* force C behavior for ASCII characters, per comments above */
|
|
||||||
if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
|
|
||||||
return pg_ascii_tolower((unsigned char) c);
|
|
||||||
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
|
|
||||||
return towlower_l((wint_t) c, pg_regex_locale->info.lt);
|
|
||||||
/* FALL THRU */
|
|
||||||
case PG_REGEX_STRATEGY_LIBC_1BYTE:
|
|
||||||
/* force C behavior for ASCII characters, per comments above */
|
|
||||||
if (pg_regex_locale->is_default && c <= (pg_wchar) 127)
|
|
||||||
return pg_ascii_tolower((unsigned char) c);
|
|
||||||
if (c <= (pg_wchar) UCHAR_MAX)
|
|
||||||
return tolower_l((unsigned char) c, pg_regex_locale->info.lt);
|
|
||||||
return c;
|
|
||||||
case PG_REGEX_STRATEGY_ICU:
|
|
||||||
#ifdef USE_ICU
|
|
||||||
return u_tolower(c);
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
return 0; /* can't get here, but keep compiler quiet */
|
else
|
||||||
|
return pg_regex_locale->ctype->wc_tolower(c, pg_regex_locale);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -738,37 +475,25 @@ pg_ctype_get_cache(pg_wc_probefunc probefunc, int cclasscode)
|
|||||||
* would always be true for production values of MAX_SIMPLE_CHR, but it's
|
* would always be true for production values of MAX_SIMPLE_CHR, but it's
|
||||||
* useful to allow it to be small for testing purposes.)
|
* useful to allow it to be small for testing purposes.)
|
||||||
*/
|
*/
|
||||||
switch (pg_regex_strategy)
|
if (pg_regex_locale->ctype_is_c)
|
||||||
{
|
{
|
||||||
case PG_REGEX_STRATEGY_C:
|
|
||||||
#if MAX_SIMPLE_CHR >= 127
|
#if MAX_SIMPLE_CHR >= 127
|
||||||
max_chr = (pg_wchar) 127;
|
max_chr = (pg_wchar) 127;
|
||||||
pcc->cv.cclasscode = -1;
|
pcc->cv.cclasscode = -1;
|
||||||
#else
|
#else
|
||||||
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
|
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
|
||||||
#endif
|
#endif
|
||||||
break;
|
}
|
||||||
case PG_REGEX_STRATEGY_BUILTIN:
|
else
|
||||||
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
|
{
|
||||||
break;
|
if (pg_regex_locale->ctype->max_chr != 0 &&
|
||||||
case PG_REGEX_STRATEGY_LIBC_WIDE:
|
pg_regex_locale->ctype->max_chr <= MAX_SIMPLE_CHR)
|
||||||
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
|
{
|
||||||
break;
|
max_chr = pg_regex_locale->ctype->max_chr;
|
||||||
case PG_REGEX_STRATEGY_LIBC_1BYTE:
|
|
||||||
#if MAX_SIMPLE_CHR >= UCHAR_MAX
|
|
||||||
max_chr = (pg_wchar) UCHAR_MAX;
|
|
||||||
pcc->cv.cclasscode = -1;
|
pcc->cv.cclasscode = -1;
|
||||||
#else
|
}
|
||||||
|
else
|
||||||
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
|
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
case PG_REGEX_STRATEGY_ICU:
|
|
||||||
max_chr = (pg_wchar) MAX_SIMPLE_CHR;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
Assert(false);
|
|
||||||
max_chr = 0; /* can't get here, but keep compiler quiet */
|
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -98,7 +98,7 @@ SB_lower_char(unsigned char c, pg_locale_t locale)
|
|||||||
else if (locale->is_default)
|
else if (locale->is_default)
|
||||||
return pg_tolower(c);
|
return pg_tolower(c);
|
||||||
else
|
else
|
||||||
return tolower_l(c, locale->info.lt);
|
return char_tolower(c, locale);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -209,7 +209,17 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
|
|||||||
* way.
|
* way.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
if (pg_database_encoding_max_length() > 1 || (locale->provider == COLLPROVIDER_ICU))
|
if (locale->ctype_is_c ||
|
||||||
|
(char_tolower_enabled(locale) &&
|
||||||
|
pg_database_encoding_max_length() == 1))
|
||||||
|
{
|
||||||
|
p = VARDATA_ANY(pat);
|
||||||
|
plen = VARSIZE_ANY_EXHDR(pat);
|
||||||
|
s = VARDATA_ANY(str);
|
||||||
|
slen = VARSIZE_ANY_EXHDR(str);
|
||||||
|
return SB_IMatchText(s, slen, p, plen, locale);
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
|
pat = DatumGetTextPP(DirectFunctionCall1Coll(lower, collation,
|
||||||
PointerGetDatum(pat)));
|
PointerGetDatum(pat)));
|
||||||
@ -224,14 +234,6 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
|
|||||||
else
|
else
|
||||||
return MB_MatchText(s, slen, p, plen, 0);
|
return MB_MatchText(s, slen, p, plen, 0);
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
p = VARDATA_ANY(pat);
|
|
||||||
plen = VARSIZE_ANY_EXHDR(pat);
|
|
||||||
s = VARDATA_ANY(str);
|
|
||||||
slen = VARSIZE_ANY_EXHDR(str);
|
|
||||||
return SB_IMatchText(s, slen, p, plen, locale);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1495,13 +1495,8 @@ pattern_char_isalpha(char c, bool is_multibyte,
|
|||||||
{
|
{
|
||||||
if (locale->ctype_is_c)
|
if (locale->ctype_is_c)
|
||||||
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
|
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
|
||||||
else if (is_multibyte && IS_HIGHBIT_SET(c))
|
|
||||||
return true;
|
|
||||||
else if (locale->provider != COLLPROVIDER_LIBC)
|
|
||||||
return IS_HIGHBIT_SET(c) ||
|
|
||||||
(c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
|
|
||||||
else
|
else
|
||||||
return isalpha_l((unsigned char) c, locale->info.lt);
|
return char_is_cased(c, locale);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -79,31 +79,6 @@ extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
|
|||||||
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
|
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
|
||||||
extern char *get_collation_actual_version_libc(const char *collcollate);
|
extern char *get_collation_actual_version_libc(const char *collcollate);
|
||||||
|
|
||||||
extern size_t strlower_builtin(char *dst, size_t dstsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
extern size_t strtitle_builtin(char *dst, size_t dstsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
extern size_t strupper_builtin(char *dst, size_t dstsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
extern size_t strfold_builtin(char *dst, size_t dstsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
|
|
||||||
extern size_t strlower_icu(char *dst, size_t dstsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
extern size_t strtitle_icu(char *dst, size_t dstsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
extern size_t strupper_icu(char *dst, size_t dstsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
extern size_t strfold_icu(char *dst, size_t dstsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
|
|
||||||
extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
|
|
||||||
/* GUC settings */
|
/* GUC settings */
|
||||||
char *locale_messages;
|
char *locale_messages;
|
||||||
char *locale_monetary;
|
char *locale_monetary;
|
||||||
@ -1092,6 +1067,9 @@ create_pg_locale(Oid collid, MemoryContext context)
|
|||||||
Assert((result->collate_is_c && result->collate == NULL) ||
|
Assert((result->collate_is_c && result->collate == NULL) ||
|
||||||
(!result->collate_is_c && result->collate != NULL));
|
(!result->collate_is_c && result->collate != NULL));
|
||||||
|
|
||||||
|
Assert((result->ctype_is_c && result->ctype == NULL) ||
|
||||||
|
(!result->ctype_is_c && result->ctype != NULL));
|
||||||
|
|
||||||
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
|
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
|
||||||
&isnull);
|
&isnull);
|
||||||
if (!isnull)
|
if (!isnull)
|
||||||
@ -1256,77 +1234,31 @@ size_t
|
|||||||
pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
pg_strlower(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
||||||
pg_locale_t locale)
|
pg_locale_t locale)
|
||||||
{
|
{
|
||||||
if (locale->provider == COLLPROVIDER_BUILTIN)
|
return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
|
||||||
return strlower_builtin(dst, dstsize, src, srclen, locale);
|
|
||||||
#ifdef USE_ICU
|
|
||||||
else if (locale->provider == COLLPROVIDER_ICU)
|
|
||||||
return strlower_icu(dst, dstsize, src, srclen, locale);
|
|
||||||
#endif
|
|
||||||
else if (locale->provider == COLLPROVIDER_LIBC)
|
|
||||||
return strlower_libc(dst, dstsize, src, srclen, locale);
|
|
||||||
else
|
|
||||||
/* shouldn't happen */
|
|
||||||
PGLOCALE_SUPPORT_ERROR(locale->provider);
|
|
||||||
|
|
||||||
return 0; /* keep compiler quiet */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
pg_strtitle(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
||||||
pg_locale_t locale)
|
pg_locale_t locale)
|
||||||
{
|
{
|
||||||
if (locale->provider == COLLPROVIDER_BUILTIN)
|
return locale->ctype->strtitle(dst, dstsize, src, srclen, locale);
|
||||||
return strtitle_builtin(dst, dstsize, src, srclen, locale);
|
|
||||||
#ifdef USE_ICU
|
|
||||||
else if (locale->provider == COLLPROVIDER_ICU)
|
|
||||||
return strtitle_icu(dst, dstsize, src, srclen, locale);
|
|
||||||
#endif
|
|
||||||
else if (locale->provider == COLLPROVIDER_LIBC)
|
|
||||||
return strtitle_libc(dst, dstsize, src, srclen, locale);
|
|
||||||
else
|
|
||||||
/* shouldn't happen */
|
|
||||||
PGLOCALE_SUPPORT_ERROR(locale->provider);
|
|
||||||
|
|
||||||
return 0; /* keep compiler quiet */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
pg_strupper(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
||||||
pg_locale_t locale)
|
pg_locale_t locale)
|
||||||
{
|
{
|
||||||
if (locale->provider == COLLPROVIDER_BUILTIN)
|
return locale->ctype->strupper(dst, dstsize, src, srclen, locale);
|
||||||
return strupper_builtin(dst, dstsize, src, srclen, locale);
|
|
||||||
#ifdef USE_ICU
|
|
||||||
else if (locale->provider == COLLPROVIDER_ICU)
|
|
||||||
return strupper_icu(dst, dstsize, src, srclen, locale);
|
|
||||||
#endif
|
|
||||||
else if (locale->provider == COLLPROVIDER_LIBC)
|
|
||||||
return strupper_libc(dst, dstsize, src, srclen, locale);
|
|
||||||
else
|
|
||||||
/* shouldn't happen */
|
|
||||||
PGLOCALE_SUPPORT_ERROR(locale->provider);
|
|
||||||
|
|
||||||
return 0; /* keep compiler quiet */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
pg_strfold(char *dst, size_t dstsize, const char *src, ssize_t srclen,
|
||||||
pg_locale_t locale)
|
pg_locale_t locale)
|
||||||
{
|
{
|
||||||
if (locale->provider == COLLPROVIDER_BUILTIN)
|
if (locale->ctype->strfold)
|
||||||
return strfold_builtin(dst, dstsize, src, srclen, locale);
|
return locale->ctype->strfold(dst, dstsize, src, srclen, locale);
|
||||||
#ifdef USE_ICU
|
|
||||||
else if (locale->provider == COLLPROVIDER_ICU)
|
|
||||||
return strfold_icu(dst, dstsize, src, srclen, locale);
|
|
||||||
#endif
|
|
||||||
/* for libc, just use strlower */
|
|
||||||
else if (locale->provider == COLLPROVIDER_LIBC)
|
|
||||||
return strlower_libc(dst, dstsize, src, srclen, locale);
|
|
||||||
else
|
else
|
||||||
/* shouldn't happen */
|
return locale->ctype->strlower(dst, dstsize, src, srclen, locale);
|
||||||
PGLOCALE_SUPPORT_ERROR(locale->provider);
|
|
||||||
|
|
||||||
return 0; /* keep compiler quiet */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1463,6 +1395,41 @@ pg_strnxfrm_prefix(char *dest, size_t destsize, const char *src,
|
|||||||
return locale->collate->strnxfrm_prefix(dest, destsize, src, srclen, locale);
|
return locale->collate->strnxfrm_prefix(dest, destsize, src, srclen, locale);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* char_is_cased()
|
||||||
|
*
|
||||||
|
* Fuzzy test of whether the given char is case-varying or not. The argument
|
||||||
|
* is a single byte, so in a multibyte encoding, just assume any non-ASCII
|
||||||
|
* char is case-varying.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
char_is_cased(char ch, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return locale->ctype->char_is_cased(ch, locale);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* char_tolower_enabled()
|
||||||
|
*
|
||||||
|
* Does the provider support char_tolower()?
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
char_tolower_enabled(pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return (locale->ctype->char_tolower != NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* char_tolower()
|
||||||
|
*
|
||||||
|
* Convert char (single-byte encoding) to lowercase.
|
||||||
|
*/
|
||||||
|
char
|
||||||
|
char_tolower(unsigned char ch, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return locale->ctype->char_tolower(ch, locale);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Return required encoding ID for the given locale, or -1 if any encoding is
|
* Return required encoding ID for the given locale, or -1 if any encoding is
|
||||||
* valid for the locale.
|
* valid for the locale.
|
||||||
|
@ -24,15 +24,6 @@
|
|||||||
extern pg_locale_t create_pg_locale_builtin(Oid collid,
|
extern pg_locale_t create_pg_locale_builtin(Oid collid,
|
||||||
MemoryContext context);
|
MemoryContext context);
|
||||||
extern char *get_collation_actual_version_builtin(const char *collcollate);
|
extern char *get_collation_actual_version_builtin(const char *collcollate);
|
||||||
extern size_t strlower_builtin(char *dest, size_t destsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
extern size_t strtitle_builtin(char *dest, size_t destsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
extern size_t strupper_builtin(char *dest, size_t destsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
extern size_t strfold_builtin(char *dest, size_t destsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
|
|
||||||
|
|
||||||
struct WordBoundaryState
|
struct WordBoundaryState
|
||||||
{
|
{
|
||||||
@ -76,7 +67,7 @@ initcap_wbnext(void *state)
|
|||||||
return wbstate->len;
|
return wbstate->len;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t
|
static size_t
|
||||||
strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||||
pg_locale_t locale)
|
pg_locale_t locale)
|
||||||
{
|
{
|
||||||
@ -84,7 +75,7 @@ strlower_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||||||
locale->info.builtin.casemap_full);
|
locale->info.builtin.casemap_full);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t
|
static size_t
|
||||||
strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||||
pg_locale_t locale)
|
pg_locale_t locale)
|
||||||
{
|
{
|
||||||
@ -102,7 +93,7 @@ strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||||||
initcap_wbnext, &wbstate);
|
initcap_wbnext, &wbstate);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t
|
static size_t
|
||||||
strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||||
pg_locale_t locale)
|
pg_locale_t locale)
|
||||||
{
|
{
|
||||||
@ -110,7 +101,7 @@ strupper_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||||||
locale->info.builtin.casemap_full);
|
locale->info.builtin.casemap_full);
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t
|
static size_t
|
||||||
strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||||
pg_locale_t locale)
|
pg_locale_t locale)
|
||||||
{
|
{
|
||||||
@ -118,6 +109,98 @@ strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||||||
locale->info.builtin.casemap_full);
|
locale->info.builtin.casemap_full);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return pg_u_isdigit(wc, !locale->info.builtin.casemap_full);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return pg_u_isalpha(wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return pg_u_isalnum(wc, !locale->info.builtin.casemap_full);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isupper_builtin(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return pg_u_isupper(wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_islower_builtin(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return pg_u_islower(wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return pg_u_isgraph(wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return pg_u_isprint(wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return pg_u_ispunct(wc, !locale->info.builtin.casemap_full);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isspace_builtin(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return pg_u_isspace(wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
char_is_cased_builtin(char ch, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return IS_HIGHBIT_SET(ch) ||
|
||||||
|
(ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
|
||||||
|
}
|
||||||
|
|
||||||
|
static pg_wchar
|
||||||
|
wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return unicode_uppercase_simple(wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static pg_wchar
|
||||||
|
wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return unicode_lowercase_simple(wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct ctype_methods ctype_methods_builtin = {
|
||||||
|
.strlower = strlower_builtin,
|
||||||
|
.strtitle = strtitle_builtin,
|
||||||
|
.strupper = strupper_builtin,
|
||||||
|
.strfold = strfold_builtin,
|
||||||
|
.wc_isdigit = wc_isdigit_builtin,
|
||||||
|
.wc_isalpha = wc_isalpha_builtin,
|
||||||
|
.wc_isalnum = wc_isalnum_builtin,
|
||||||
|
.wc_isupper = wc_isupper_builtin,
|
||||||
|
.wc_islower = wc_islower_builtin,
|
||||||
|
.wc_isgraph = wc_isgraph_builtin,
|
||||||
|
.wc_isprint = wc_isprint_builtin,
|
||||||
|
.wc_ispunct = wc_ispunct_builtin,
|
||||||
|
.wc_isspace = wc_isspace_builtin,
|
||||||
|
.char_is_cased = char_is_cased_builtin,
|
||||||
|
.wc_tolower = wc_tolower_builtin,
|
||||||
|
.wc_toupper = wc_toupper_builtin,
|
||||||
|
};
|
||||||
|
|
||||||
pg_locale_t
|
pg_locale_t
|
||||||
create_pg_locale_builtin(Oid collid, MemoryContext context)
|
create_pg_locale_builtin(Oid collid, MemoryContext context)
|
||||||
{
|
{
|
||||||
@ -161,6 +244,8 @@ create_pg_locale_builtin(Oid collid, MemoryContext context)
|
|||||||
result->deterministic = true;
|
result->deterministic = true;
|
||||||
result->collate_is_c = true;
|
result->collate_is_c = true;
|
||||||
result->ctype_is_c = (strcmp(locstr, "C") == 0);
|
result->ctype_is_c = (strcmp(locstr, "C") == 0);
|
||||||
|
if (!result->ctype_is_c)
|
||||||
|
result->ctype = &ctype_methods_builtin;
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -48,19 +48,22 @@
|
|||||||
#define TEXTBUFLEN 1024
|
#define TEXTBUFLEN 1024
|
||||||
|
|
||||||
extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
|
extern pg_locale_t create_pg_locale_icu(Oid collid, MemoryContext context);
|
||||||
extern size_t strlower_icu(char *dest, size_t destsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
extern size_t strtitle_icu(char *dest, size_t destsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
extern size_t strupper_icu(char *dest, size_t destsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
extern size_t strfold_icu(char *dest, size_t destsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
|
|
||||||
#ifdef USE_ICU
|
#ifdef USE_ICU
|
||||||
|
|
||||||
extern UCollator *pg_ucol_open(const char *loc_str);
|
extern UCollator *pg_ucol_open(const char *loc_str);
|
||||||
|
|
||||||
|
static size_t strlower_icu(char *dest, size_t destsize, const char *src,
|
||||||
|
ssize_t srclen, pg_locale_t locale);
|
||||||
|
static size_t strtitle_icu(char *dest, size_t destsize, const char *src,
|
||||||
|
ssize_t srclen, pg_locale_t locale);
|
||||||
|
static size_t strupper_icu(char *dest, size_t destsize, const char *src,
|
||||||
|
ssize_t srclen, pg_locale_t locale);
|
||||||
|
static size_t strfold_icu(char *dest, size_t destsize, const char *src,
|
||||||
|
ssize_t srclen, pg_locale_t locale);
|
||||||
|
static int strncoll_icu(const char *arg1, ssize_t len1,
|
||||||
|
const char *arg2, ssize_t len2,
|
||||||
|
pg_locale_t locale);
|
||||||
static size_t strnxfrm_icu(char *dest, size_t destsize,
|
static size_t strnxfrm_icu(char *dest, size_t destsize,
|
||||||
const char *src, ssize_t srclen,
|
const char *src, ssize_t srclen,
|
||||||
pg_locale_t locale);
|
pg_locale_t locale);
|
||||||
@ -118,6 +121,25 @@ static int32_t u_strFoldCase_default(UChar *dest, int32_t destCapacity,
|
|||||||
const char *locale,
|
const char *locale,
|
||||||
UErrorCode *pErrorCode);
|
UErrorCode *pErrorCode);
|
||||||
|
|
||||||
|
static bool
|
||||||
|
char_is_cased_icu(char ch, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return IS_HIGHBIT_SET(ch) ||
|
||||||
|
(ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
|
||||||
|
}
|
||||||
|
|
||||||
|
static pg_wchar
|
||||||
|
toupper_icu(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return u_toupper(wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static pg_wchar
|
||||||
|
tolower_icu(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return u_tolower(wc);
|
||||||
|
}
|
||||||
|
|
||||||
static const struct collate_methods collate_methods_icu = {
|
static const struct collate_methods collate_methods_icu = {
|
||||||
.strncoll = strncoll_icu,
|
.strncoll = strncoll_icu,
|
||||||
.strnxfrm = strnxfrm_icu,
|
.strnxfrm = strnxfrm_icu,
|
||||||
@ -136,6 +158,78 @@ static const struct collate_methods collate_methods_icu_utf8 = {
|
|||||||
.strxfrm_is_safe = true,
|
.strxfrm_is_safe = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isdigit_icu(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return u_isdigit(wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isalpha_icu(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return u_isalpha(wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isalnum_icu(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return u_isalnum(wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isupper_icu(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return u_isupper(wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_islower_icu(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return u_islower(wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isgraph_icu(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return u_isgraph(wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isprint_icu(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return u_isprint(wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_ispunct_icu(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return u_ispunct(wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isspace_icu(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return u_isspace(wc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct ctype_methods ctype_methods_icu = {
|
||||||
|
.strlower = strlower_icu,
|
||||||
|
.strtitle = strtitle_icu,
|
||||||
|
.strupper = strupper_icu,
|
||||||
|
.strfold = strfold_icu,
|
||||||
|
.wc_isdigit = wc_isdigit_icu,
|
||||||
|
.wc_isalpha = wc_isalpha_icu,
|
||||||
|
.wc_isalnum = wc_isalnum_icu,
|
||||||
|
.wc_isupper = wc_isupper_icu,
|
||||||
|
.wc_islower = wc_islower_icu,
|
||||||
|
.wc_isgraph = wc_isgraph_icu,
|
||||||
|
.wc_isprint = wc_isprint_icu,
|
||||||
|
.wc_ispunct = wc_ispunct_icu,
|
||||||
|
.wc_isspace = wc_isspace_icu,
|
||||||
|
.char_is_cased = char_is_cased_icu,
|
||||||
|
.wc_toupper = toupper_icu,
|
||||||
|
.wc_tolower = tolower_icu,
|
||||||
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
pg_locale_t
|
pg_locale_t
|
||||||
@ -206,6 +300,7 @@ create_pg_locale_icu(Oid collid, MemoryContext context)
|
|||||||
result->collate = &collate_methods_icu_utf8;
|
result->collate = &collate_methods_icu_utf8;
|
||||||
else
|
else
|
||||||
result->collate = &collate_methods_icu;
|
result->collate = &collate_methods_icu;
|
||||||
|
result->ctype = &ctype_methods_icu;
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
#else
|
#else
|
||||||
@ -379,7 +474,7 @@ make_icu_collator(const char *iculocstr, const char *icurules)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t
|
static size_t
|
||||||
strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||||
pg_locale_t locale)
|
pg_locale_t locale)
|
||||||
{
|
{
|
||||||
@ -399,7 +494,7 @@ strlower_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||||||
return result_len;
|
return result_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t
|
static size_t
|
||||||
strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||||
pg_locale_t locale)
|
pg_locale_t locale)
|
||||||
{
|
{
|
||||||
@ -419,7 +514,7 @@ strtitle_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||||||
return result_len;
|
return result_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t
|
static size_t
|
||||||
strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||||
pg_locale_t locale)
|
pg_locale_t locale)
|
||||||
{
|
{
|
||||||
@ -439,7 +534,7 @@ strupper_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
|||||||
return result_len;
|
return result_len;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t
|
static size_t
|
||||||
strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
strfold_icu(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||||
pg_locale_t locale)
|
pg_locale_t locale)
|
||||||
{
|
{
|
||||||
|
@ -33,6 +33,46 @@
|
|||||||
#include <shlwapi.h>
|
#include <shlwapi.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For the libc provider, to provide as much functionality as possible on a
|
||||||
|
* variety of platforms without going so far as to implement everything from
|
||||||
|
* scratch, we use several implementation strategies depending on the
|
||||||
|
* situation:
|
||||||
|
*
|
||||||
|
* 1. In C/POSIX collations, we use hard-wired code. We can't depend on
|
||||||
|
* the <ctype.h> functions since those will obey LC_CTYPE. Note that these
|
||||||
|
* collations don't give a fig about multibyte characters.
|
||||||
|
*
|
||||||
|
* 2. When working in UTF8 encoding, we use the <wctype.h> functions.
|
||||||
|
* This assumes that every platform uses Unicode codepoints directly
|
||||||
|
* as the wchar_t representation of Unicode. (XXX: ICU makes this assumption
|
||||||
|
* even for non-UTF8 encodings, which may be a problem.) On some platforms
|
||||||
|
* wchar_t is only 16 bits wide, so we have to punt for codepoints > 0xFFFF.
|
||||||
|
*
|
||||||
|
* 3. In all other encodings, we use the <ctype.h> functions for pg_wchar
|
||||||
|
* values up to 255, and punt for values above that. This is 100% correct
|
||||||
|
* only in single-byte encodings such as LATINn. However, non-Unicode
|
||||||
|
* multibyte encodings are mostly Far Eastern character sets for which the
|
||||||
|
* properties being tested here aren't very relevant for higher code values
|
||||||
|
* anyway. The difficulty with using the <wctype.h> functions with
|
||||||
|
* non-Unicode multibyte encodings is that we can have no certainty that
|
||||||
|
* the platform's wchar_t representation matches what we do in pg_wchar
|
||||||
|
* conversions.
|
||||||
|
*
|
||||||
|
* As a special case, in the "default" collation, (2) and (3) force ASCII
|
||||||
|
* letters to follow ASCII upcase/downcase rules, while in a non-default
|
||||||
|
* collation we just let the library functions do what they will. The case
|
||||||
|
* where this matters is treatment of I/i in Turkish, and the behavior is
|
||||||
|
* meant to match the upper()/lower() SQL functions.
|
||||||
|
*
|
||||||
|
* We store the active collation setting in static variables. In principle
|
||||||
|
* it could be passed down to here via the regex library's "struct vars" data
|
||||||
|
* structure; but that would require somewhat invasive changes in the regex
|
||||||
|
* library, and right now there's no real benefit to be gained from that.
|
||||||
|
*
|
||||||
|
* NB: the coding here assumes pg_wchar is an unsigned type.
|
||||||
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Size of stack buffer to use for string transformations, used to avoid heap
|
* Size of stack buffer to use for string transformations, used to avoid heap
|
||||||
* allocations in typical cases. This should be large enough that most strings
|
* allocations in typical cases. This should be large enough that most strings
|
||||||
@ -43,13 +83,6 @@
|
|||||||
|
|
||||||
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
|
extern pg_locale_t create_pg_locale_libc(Oid collid, MemoryContext context);
|
||||||
|
|
||||||
extern size_t strlower_libc(char *dst, size_t dstsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
extern size_t strtitle_libc(char *dst, size_t dstsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
extern size_t strupper_libc(char *dst, size_t dstsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale);
|
|
||||||
|
|
||||||
static int strncoll_libc(const char *arg1, ssize_t len1,
|
static int strncoll_libc(const char *arg1, ssize_t len1,
|
||||||
const char *arg2, ssize_t len2,
|
const char *arg2, ssize_t len2,
|
||||||
pg_locale_t locale);
|
pg_locale_t locale);
|
||||||
@ -85,6 +118,251 @@ static size_t strupper_libc_mb(char *dest, size_t destsize,
|
|||||||
const char *src, ssize_t srclen,
|
const char *src, ssize_t srclen,
|
||||||
pg_locale_t locale);
|
pg_locale_t locale);
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isdigit_libc_sb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return isdigit_l((unsigned char) wc, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isalpha_libc_sb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return isalpha_l((unsigned char) wc, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isalnum_libc_sb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return isalnum_l((unsigned char) wc, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isupper_libc_sb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return isupper_l((unsigned char) wc, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_islower_libc_sb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return islower_l((unsigned char) wc, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isgraph_libc_sb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return isgraph_l((unsigned char) wc, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isprint_libc_sb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return isprint_l((unsigned char) wc, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_ispunct_libc_sb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return ispunct_l((unsigned char) wc, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isspace_libc_sb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return isspace_l((unsigned char) wc, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return iswdigit_l((wint_t) wc, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isalpha_libc_mb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return iswalpha_l((wint_t) wc, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isalnum_libc_mb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return iswalnum_l((wint_t) wc, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isupper_libc_mb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return iswupper_l((wint_t) wc, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_islower_libc_mb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return iswlower_l((wint_t) wc, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isgraph_libc_mb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return iswgraph_l((wint_t) wc, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isprint_libc_mb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return iswprint_l((wint_t) wc, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_ispunct_libc_mb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return iswpunct_l((wint_t) wc, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
wc_isspace_libc_mb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
return iswspace_l((wint_t) wc, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static char
|
||||||
|
char_tolower_libc(unsigned char ch, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
Assert(pg_database_encoding_max_length() == 1);
|
||||||
|
return tolower_l(ch, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool
|
||||||
|
char_is_cased_libc(char ch, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
bool is_multibyte = pg_database_encoding_max_length() > 1;
|
||||||
|
|
||||||
|
if (is_multibyte && IS_HIGHBIT_SET(ch))
|
||||||
|
return true;
|
||||||
|
else
|
||||||
|
return isalpha_l((unsigned char) ch, locale->info.lt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static pg_wchar
|
||||||
|
toupper_libc_sb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
Assert(GetDatabaseEncoding() != PG_UTF8);
|
||||||
|
|
||||||
|
/* force C behavior for ASCII characters, per comments above */
|
||||||
|
if (locale->is_default && wc <= (pg_wchar) 127)
|
||||||
|
return pg_ascii_toupper((unsigned char) wc);
|
||||||
|
if (wc <= (pg_wchar) UCHAR_MAX)
|
||||||
|
return toupper_l((unsigned char) wc, locale->info.lt);
|
||||||
|
else
|
||||||
|
return wc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static pg_wchar
|
||||||
|
toupper_libc_mb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
Assert(GetDatabaseEncoding() == PG_UTF8);
|
||||||
|
|
||||||
|
/* force C behavior for ASCII characters, per comments above */
|
||||||
|
if (locale->is_default && wc <= (pg_wchar) 127)
|
||||||
|
return pg_ascii_toupper((unsigned char) wc);
|
||||||
|
if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
|
||||||
|
return towupper_l((wint_t) wc, locale->info.lt);
|
||||||
|
else
|
||||||
|
return wc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static pg_wchar
|
||||||
|
tolower_libc_sb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
Assert(GetDatabaseEncoding() != PG_UTF8);
|
||||||
|
|
||||||
|
/* force C behavior for ASCII characters, per comments above */
|
||||||
|
if (locale->is_default && wc <= (pg_wchar) 127)
|
||||||
|
return pg_ascii_tolower((unsigned char) wc);
|
||||||
|
if (wc <= (pg_wchar) UCHAR_MAX)
|
||||||
|
return tolower_l((unsigned char) wc, locale->info.lt);
|
||||||
|
else
|
||||||
|
return wc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static pg_wchar
|
||||||
|
tolower_libc_mb(pg_wchar wc, pg_locale_t locale)
|
||||||
|
{
|
||||||
|
Assert(GetDatabaseEncoding() == PG_UTF8);
|
||||||
|
|
||||||
|
/* force C behavior for ASCII characters, per comments above */
|
||||||
|
if (locale->is_default && wc <= (pg_wchar) 127)
|
||||||
|
return pg_ascii_tolower((unsigned char) wc);
|
||||||
|
if (sizeof(wchar_t) >= 4 || wc <= (pg_wchar) 0xFFFF)
|
||||||
|
return towlower_l((wint_t) wc, locale->info.lt);
|
||||||
|
else
|
||||||
|
return wc;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct ctype_methods ctype_methods_libc_sb = {
|
||||||
|
.strlower = strlower_libc_sb,
|
||||||
|
.strtitle = strtitle_libc_sb,
|
||||||
|
.strupper = strupper_libc_sb,
|
||||||
|
.wc_isdigit = wc_isdigit_libc_sb,
|
||||||
|
.wc_isalpha = wc_isalpha_libc_sb,
|
||||||
|
.wc_isalnum = wc_isalnum_libc_sb,
|
||||||
|
.wc_isupper = wc_isupper_libc_sb,
|
||||||
|
.wc_islower = wc_islower_libc_sb,
|
||||||
|
.wc_isgraph = wc_isgraph_libc_sb,
|
||||||
|
.wc_isprint = wc_isprint_libc_sb,
|
||||||
|
.wc_ispunct = wc_ispunct_libc_sb,
|
||||||
|
.wc_isspace = wc_isspace_libc_sb,
|
||||||
|
.char_is_cased = char_is_cased_libc,
|
||||||
|
.char_tolower = char_tolower_libc,
|
||||||
|
.wc_toupper = toupper_libc_sb,
|
||||||
|
.wc_tolower = tolower_libc_sb,
|
||||||
|
.max_chr = UCHAR_MAX,
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Non-UTF8 multibyte encodings use multibyte semantics for case mapping, but
|
||||||
|
* single-byte semantics for pattern matching.
|
||||||
|
*/
|
||||||
|
static const struct ctype_methods ctype_methods_libc_other_mb = {
|
||||||
|
.strlower = strlower_libc_mb,
|
||||||
|
.strtitle = strtitle_libc_mb,
|
||||||
|
.strupper = strupper_libc_mb,
|
||||||
|
.wc_isdigit = wc_isdigit_libc_sb,
|
||||||
|
.wc_isalpha = wc_isalpha_libc_sb,
|
||||||
|
.wc_isalnum = wc_isalnum_libc_sb,
|
||||||
|
.wc_isupper = wc_isupper_libc_sb,
|
||||||
|
.wc_islower = wc_islower_libc_sb,
|
||||||
|
.wc_isgraph = wc_isgraph_libc_sb,
|
||||||
|
.wc_isprint = wc_isprint_libc_sb,
|
||||||
|
.wc_ispunct = wc_ispunct_libc_sb,
|
||||||
|
.wc_isspace = wc_isspace_libc_sb,
|
||||||
|
.char_is_cased = char_is_cased_libc,
|
||||||
|
.char_tolower = char_tolower_libc,
|
||||||
|
.wc_toupper = toupper_libc_sb,
|
||||||
|
.wc_tolower = tolower_libc_sb,
|
||||||
|
.max_chr = UCHAR_MAX,
|
||||||
|
};
|
||||||
|
|
||||||
|
static const struct ctype_methods ctype_methods_libc_utf8 = {
|
||||||
|
.strlower = strlower_libc_mb,
|
||||||
|
.strtitle = strtitle_libc_mb,
|
||||||
|
.strupper = strupper_libc_mb,
|
||||||
|
.wc_isdigit = wc_isdigit_libc_mb,
|
||||||
|
.wc_isalpha = wc_isalpha_libc_mb,
|
||||||
|
.wc_isalnum = wc_isalnum_libc_mb,
|
||||||
|
.wc_isupper = wc_isupper_libc_mb,
|
||||||
|
.wc_islower = wc_islower_libc_mb,
|
||||||
|
.wc_isgraph = wc_isgraph_libc_mb,
|
||||||
|
.wc_isprint = wc_isprint_libc_mb,
|
||||||
|
.wc_ispunct = wc_ispunct_libc_mb,
|
||||||
|
.wc_isspace = wc_isspace_libc_mb,
|
||||||
|
.char_is_cased = char_is_cased_libc,
|
||||||
|
.char_tolower = char_tolower_libc,
|
||||||
|
.wc_toupper = toupper_libc_mb,
|
||||||
|
.wc_tolower = tolower_libc_mb,
|
||||||
|
};
|
||||||
|
|
||||||
static const struct collate_methods collate_methods_libc = {
|
static const struct collate_methods collate_methods_libc = {
|
||||||
.strncoll = strncoll_libc,
|
.strncoll = strncoll_libc,
|
||||||
.strnxfrm = strnxfrm_libc,
|
.strnxfrm = strnxfrm_libc,
|
||||||
@ -119,36 +397,6 @@ static const struct collate_methods collate_methods_libc_win32_utf8 = {
|
|||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
size_t
|
|
||||||
strlower_libc(char *dst, size_t dstsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale)
|
|
||||||
{
|
|
||||||
if (pg_database_encoding_max_length() > 1)
|
|
||||||
return strlower_libc_mb(dst, dstsize, src, srclen, locale);
|
|
||||||
else
|
|
||||||
return strlower_libc_sb(dst, dstsize, src, srclen, locale);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t
|
|
||||||
strtitle_libc(char *dst, size_t dstsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale)
|
|
||||||
{
|
|
||||||
if (pg_database_encoding_max_length() > 1)
|
|
||||||
return strtitle_libc_mb(dst, dstsize, src, srclen, locale);
|
|
||||||
else
|
|
||||||
return strtitle_libc_sb(dst, dstsize, src, srclen, locale);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t
|
|
||||||
strupper_libc(char *dst, size_t dstsize, const char *src,
|
|
||||||
ssize_t srclen, pg_locale_t locale)
|
|
||||||
{
|
|
||||||
if (pg_database_encoding_max_length() > 1)
|
|
||||||
return strupper_libc_mb(dst, dstsize, src, srclen, locale);
|
|
||||||
else
|
|
||||||
return strupper_libc_sb(dst, dstsize, src, srclen, locale);
|
|
||||||
}
|
|
||||||
|
|
||||||
static size_t
|
static size_t
|
||||||
strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
strlower_libc_sb(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||||
pg_locale_t locale)
|
pg_locale_t locale)
|
||||||
@ -481,6 +729,15 @@ create_pg_locale_libc(Oid collid, MemoryContext context)
|
|||||||
#endif
|
#endif
|
||||||
result->collate = &collate_methods_libc;
|
result->collate = &collate_methods_libc;
|
||||||
}
|
}
|
||||||
|
if (!result->ctype_is_c)
|
||||||
|
{
|
||||||
|
if (GetDatabaseEncoding() == PG_UTF8)
|
||||||
|
result->ctype = &ctype_methods_libc_utf8;
|
||||||
|
else if (pg_database_encoding_max_length() > 1)
|
||||||
|
result->ctype = &ctype_methods_libc_other_mb;
|
||||||
|
else
|
||||||
|
result->ctype = &ctype_methods_libc_sb;
|
||||||
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
@ -12,6 +12,8 @@
|
|||||||
#ifndef _PG_LOCALE_
|
#ifndef _PG_LOCALE_
|
||||||
#define _PG_LOCALE_
|
#define _PG_LOCALE_
|
||||||
|
|
||||||
|
#include "mb/pg_wchar.h"
|
||||||
|
|
||||||
#ifdef USE_ICU
|
#ifdef USE_ICU
|
||||||
#include <unicode/ucol.h>
|
#include <unicode/ucol.h>
|
||||||
#endif
|
#endif
|
||||||
@ -77,6 +79,52 @@ struct collate_methods
|
|||||||
bool strxfrm_is_safe;
|
bool strxfrm_is_safe;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ctype_methods
|
||||||
|
{
|
||||||
|
/* case mapping: LOWER()/INITCAP()/UPPER() */
|
||||||
|
size_t (*strlower) (char *dest, size_t destsize,
|
||||||
|
const char *src, ssize_t srclen,
|
||||||
|
pg_locale_t locale);
|
||||||
|
size_t (*strtitle) (char *dest, size_t destsize,
|
||||||
|
const char *src, ssize_t srclen,
|
||||||
|
pg_locale_t locale);
|
||||||
|
size_t (*strupper) (char *dest, size_t destsize,
|
||||||
|
const char *src, ssize_t srclen,
|
||||||
|
pg_locale_t locale);
|
||||||
|
size_t (*strfold) (char *dest, size_t destsize,
|
||||||
|
const char *src, ssize_t srclen,
|
||||||
|
pg_locale_t locale);
|
||||||
|
|
||||||
|
/* required */
|
||||||
|
bool (*wc_isdigit) (pg_wchar wc, pg_locale_t locale);
|
||||||
|
bool (*wc_isalpha) (pg_wchar wc, pg_locale_t locale);
|
||||||
|
bool (*wc_isalnum) (pg_wchar wc, pg_locale_t locale);
|
||||||
|
bool (*wc_isupper) (pg_wchar wc, pg_locale_t locale);
|
||||||
|
bool (*wc_islower) (pg_wchar wc, pg_locale_t locale);
|
||||||
|
bool (*wc_isgraph) (pg_wchar wc, pg_locale_t locale);
|
||||||
|
bool (*wc_isprint) (pg_wchar wc, pg_locale_t locale);
|
||||||
|
bool (*wc_ispunct) (pg_wchar wc, pg_locale_t locale);
|
||||||
|
bool (*wc_isspace) (pg_wchar wc, pg_locale_t locale);
|
||||||
|
pg_wchar (*wc_toupper) (pg_wchar wc, pg_locale_t locale);
|
||||||
|
pg_wchar (*wc_tolower) (pg_wchar wc, pg_locale_t locale);
|
||||||
|
|
||||||
|
/* required */
|
||||||
|
bool (*char_is_cased) (char ch, pg_locale_t locale);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Optional. If defined, will only be called for single-byte encodings. If
|
||||||
|
* not defined, or if the encoding is multibyte, will fall back to
|
||||||
|
* pg_strlower().
|
||||||
|
*/
|
||||||
|
char (*char_tolower) (unsigned char ch, pg_locale_t locale);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For regex and pattern matching efficiency, the maximum char value
|
||||||
|
* supported by the above methods. If zero, limit is set by regex code.
|
||||||
|
*/
|
||||||
|
pg_wchar max_chr;
|
||||||
|
};
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We use a discriminated union to hold either a locale_t or an ICU collator.
|
* We use a discriminated union to hold either a locale_t or an ICU collator.
|
||||||
* pg_locale_t is occasionally checked for truth, so make it a pointer.
|
* pg_locale_t is occasionally checked for truth, so make it a pointer.
|
||||||
@ -102,6 +150,7 @@ struct pg_locale_struct
|
|||||||
bool is_default;
|
bool is_default;
|
||||||
|
|
||||||
const struct collate_methods *collate; /* NULL if collate_is_c */
|
const struct collate_methods *collate; /* NULL if collate_is_c */
|
||||||
|
const struct ctype_methods *ctype; /* NULL if ctype_is_c */
|
||||||
|
|
||||||
union
|
union
|
||||||
{
|
{
|
||||||
@ -125,6 +174,10 @@ extern void init_database_collation(void);
|
|||||||
extern pg_locale_t pg_newlocale_from_collation(Oid collid);
|
extern pg_locale_t pg_newlocale_from_collation(Oid collid);
|
||||||
|
|
||||||
extern char *get_collation_actual_version(char collprovider, const char *collcollate);
|
extern char *get_collation_actual_version(char collprovider, const char *collcollate);
|
||||||
|
|
||||||
|
extern bool char_is_cased(char ch, pg_locale_t locale);
|
||||||
|
extern bool char_tolower_enabled(pg_locale_t locale);
|
||||||
|
extern char char_tolower(unsigned char ch, pg_locale_t locale);
|
||||||
extern size_t pg_strlower(char *dst, size_t dstsize,
|
extern size_t pg_strlower(char *dst, size_t dstsize,
|
||||||
const char *src, ssize_t srclen,
|
const char *src, ssize_t srclen,
|
||||||
pg_locale_t locale);
|
pg_locale_t locale);
|
||||||
|
@ -1878,7 +1878,6 @@ PGTargetServerType
|
|||||||
PGTernaryBool
|
PGTernaryBool
|
||||||
PGTransactionStatusType
|
PGTransactionStatusType
|
||||||
PGVerbosity
|
PGVerbosity
|
||||||
PG_Locale_Strategy
|
|
||||||
PG_Lock_Status
|
PG_Lock_Status
|
||||||
PG_init_t
|
PG_init_t
|
||||||
PGauthData
|
PGauthData
|
||||||
|
Reference in New Issue
Block a user