1
0
mirror of https://github.com/postgres/postgres.git synced 2025-12-13 14:22:43 +03:00

Remove char_tolower() API.

It's only useful for an ILIKE optimization for the libc provider using
a single-byte encoding and a non-C locale, but it creates significant
internal complexity.

Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Reviewed-by: Peter Eisentraut <peter@eisentraut.org>
Discussion: https://postgr.es/m/450ceb6260cad30d7afdf155d991a9caafee7c0d.camel@j-davis.com
This commit is contained in:
Jeff Davis
2025-12-10 11:55:59 -08:00
parent 820343bab3
commit 1e493158d3
5 changed files with 28 additions and 81 deletions

View File

@@ -43,8 +43,8 @@ static text *MB_do_like_escape(text *pat, text *esc);
static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen, static int UTF8_MatchText(const char *t, int tlen, const char *p, int plen,
pg_locale_t locale); pg_locale_t locale);
static int SB_IMatchText(const char *t, int tlen, const char *p, int plen, static int C_IMatchText(const char *t, int tlen, const char *p, int plen,
pg_locale_t locale); pg_locale_t locale);
static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation); static int GenericMatchText(const char *s, int slen, const char *p, int plen, Oid collation);
static int Generic_Text_IC_like(text *str, text *pat, Oid collation); static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
@@ -84,22 +84,10 @@ wchareq(const char *p1, const char *p2)
* of getting a single character transformed to the system's wchar_t format. * of getting a single character transformed to the system's wchar_t format.
* So now, we just downcase the strings using lower() and apply regular LIKE * So now, we just downcase the strings using lower() and apply regular LIKE
* comparison. This should be revisited when we install better locale support. * comparison. This should be revisited when we install better locale support.
*/ *
* We do handle case-insensitive matching for the C locale using
/*
* We do handle case-insensitive matching for single-byte encodings using
* fold-on-the-fly processing, however. * fold-on-the-fly processing, however.
*/ */
static char
SB_lower_char(unsigned char c, pg_locale_t locale)
{
if (locale->ctype_is_c)
return pg_ascii_tolower(c);
else if (locale->is_default)
return pg_tolower(c);
else
return char_tolower(c, locale);
}
#define NextByte(p, plen) ((p)++, (plen)--) #define NextByte(p, plen) ((p)++, (plen)--)
@@ -130,10 +118,10 @@ SB_lower_char(unsigned char c, pg_locale_t locale)
#include "like_match.c" #include "like_match.c"
/* setup to compile like_match.c for single byte case insensitive matches */ /* setup to compile like_match.c for case-insensitive matches in C locale */
#define MATCH_LOWER(t, locale) SB_lower_char((unsigned char) (t), locale) #define MATCH_LOWER
#define NextChar(p, plen) NextByte((p), (plen)) #define NextChar(p, plen) NextByte((p), (plen))
#define MatchText SB_IMatchText #define MatchText C_IMatchText
#include "like_match.c" #include "like_match.c"
@@ -202,22 +190,19 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
errmsg("nondeterministic collations are not supported for ILIKE"))); errmsg("nondeterministic collations are not supported for ILIKE")));
/* /*
* For efficiency reasons, in the single byte case we don't call lower() * For efficiency reasons, in the C locale we don't call lower() on the
* on the pattern and text, but instead call SB_lower_char on each * pattern and text, but instead lowercase each character lazily.
* character. In the multi-byte case we don't have much choice :-(. Also, *
* ICU does not support single-character case folding, so we go the long * XXX: use casefolding instead?
* way.
*/ */
if (locale->ctype_is_c || if (locale->ctype_is_c)
(char_tolower_enabled(locale) &&
pg_database_encoding_max_length() == 1))
{ {
p = VARDATA_ANY(pat); p = VARDATA_ANY(pat);
plen = VARSIZE_ANY_EXHDR(pat); plen = VARSIZE_ANY_EXHDR(pat);
s = VARDATA_ANY(str); s = VARDATA_ANY(str);
slen = VARSIZE_ANY_EXHDR(str); slen = VARSIZE_ANY_EXHDR(str);
return SB_IMatchText(s, slen, p, plen, locale); return C_IMatchText(s, slen, p, plen, locale);
} }
else else
{ {
@@ -229,10 +214,13 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
PointerGetDatum(str))); PointerGetDatum(str)));
s = VARDATA_ANY(str); s = VARDATA_ANY(str);
slen = VARSIZE_ANY_EXHDR(str); slen = VARSIZE_ANY_EXHDR(str);
if (GetDatabaseEncoding() == PG_UTF8) if (GetDatabaseEncoding() == PG_UTF8)
return UTF8_MatchText(s, slen, p, plen, 0); return UTF8_MatchText(s, slen, p, plen, 0);
else else if (pg_database_encoding_max_length() > 1)
return MB_MatchText(s, slen, p, plen, 0); return MB_MatchText(s, slen, p, plen, 0);
else
return SB_MatchText(s, slen, p, plen, 0);
} }
} }

View File

@@ -70,10 +70,14 @@
*-------------------- *--------------------
*/ */
/*
* MATCH_LOWER is defined for ILIKE in the C locale as an optimization. Other
* locales must casefold the inputs before matching.
*/
#ifdef MATCH_LOWER #ifdef MATCH_LOWER
#define GETCHAR(t, locale) MATCH_LOWER(t, locale) #define GETCHAR(t) pg_ascii_tolower(t)
#else #else
#define GETCHAR(t, locale) (t) #define GETCHAR(t) (t)
#endif #endif
static int static int
@@ -105,7 +109,7 @@ MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE), (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
errmsg("LIKE pattern must not end with escape character"))); errmsg("LIKE pattern must not end with escape character")));
if (GETCHAR(*p, locale) != GETCHAR(*t, locale)) if (GETCHAR(*p) != GETCHAR(*t))
return LIKE_FALSE; return LIKE_FALSE;
} }
else if (*p == '%') else if (*p == '%')
@@ -167,14 +171,14 @@ MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE), (errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
errmsg("LIKE pattern must not end with escape character"))); errmsg("LIKE pattern must not end with escape character")));
firstpat = GETCHAR(p[1], locale); firstpat = GETCHAR(p[1]);
} }
else else
firstpat = GETCHAR(*p, locale); firstpat = GETCHAR(*p);
while (tlen > 0) while (tlen > 0)
{ {
if (GETCHAR(*t, locale) == firstpat || (locale && !locale->deterministic)) if (GETCHAR(*t) == firstpat || (locale && !locale->deterministic))
{ {
int matched = MatchText(t, tlen, p, plen, locale); int matched = MatchText(t, tlen, p, plen, locale);
@@ -342,7 +346,7 @@ MatchText(const char *t, int tlen, const char *p, int plen, pg_locale_t locale)
NextChar(t1, t1len); NextChar(t1, t1len);
} }
} }
else if (GETCHAR(*p, locale) != GETCHAR(*t, locale)) else if (GETCHAR(*p) != GETCHAR(*t))
{ {
/* non-wildcard pattern char fails to match text char */ /* non-wildcard pattern char fails to match text char */
return LIKE_FALSE; return LIKE_FALSE;

View File

@@ -1629,32 +1629,6 @@ char_is_cased(char ch, pg_locale_t locale)
return locale->ctype->char_is_cased(ch, locale); return locale->ctype->char_is_cased(ch, locale);
} }
/*
* char_tolower_enabled()
*
* Does the provider support char_tolower()?
*/
bool
char_tolower_enabled(pg_locale_t locale)
{
if (locale->ctype == NULL)
return true;
return (locale->ctype->char_tolower != NULL);
}
/*
* char_tolower()
*
* Convert char (single-byte encoding) to lowercase.
*/
char
char_tolower(unsigned char ch, pg_locale_t locale)
{
if (locale->ctype == NULL)
return pg_ascii_tolower(ch);
return locale->ctype->char_tolower(ch, locale);
}
/* /*
* Return required encoding ID for the given locale, or -1 if any encoding is * Return required encoding ID for the given locale, or -1 if any encoding is
* valid for the locale. * valid for the locale.

View File

@@ -248,13 +248,6 @@ wc_isxdigit_libc_mb(pg_wchar wc, pg_locale_t locale)
#endif #endif
} }
static char
char_tolower_libc(unsigned char ch, pg_locale_t locale)
{
Assert(pg_database_encoding_max_length() == 1);
return tolower_l(ch, locale->lt);
}
static bool static bool
char_is_cased_libc(char ch, pg_locale_t locale) char_is_cased_libc(char ch, pg_locale_t locale)
{ {
@@ -339,7 +332,6 @@ static const struct ctype_methods ctype_methods_libc_sb = {
.wc_isspace = wc_isspace_libc_sb, .wc_isspace = wc_isspace_libc_sb,
.wc_isxdigit = wc_isxdigit_libc_sb, .wc_isxdigit = wc_isxdigit_libc_sb,
.char_is_cased = char_is_cased_libc, .char_is_cased = char_is_cased_libc,
.char_tolower = char_tolower_libc,
.wc_toupper = toupper_libc_sb, .wc_toupper = toupper_libc_sb,
.wc_tolower = tolower_libc_sb, .wc_tolower = tolower_libc_sb,
}; };
@@ -365,7 +357,6 @@ static const struct ctype_methods ctype_methods_libc_other_mb = {
.wc_isspace = wc_isspace_libc_sb, .wc_isspace = wc_isspace_libc_sb,
.wc_isxdigit = wc_isxdigit_libc_sb, .wc_isxdigit = wc_isxdigit_libc_sb,
.char_is_cased = char_is_cased_libc, .char_is_cased = char_is_cased_libc,
.char_tolower = char_tolower_libc,
.wc_toupper = toupper_libc_sb, .wc_toupper = toupper_libc_sb,
.wc_tolower = tolower_libc_sb, .wc_tolower = tolower_libc_sb,
}; };
@@ -387,7 +378,6 @@ static const struct ctype_methods ctype_methods_libc_utf8 = {
.wc_isspace = wc_isspace_libc_mb, .wc_isspace = wc_isspace_libc_mb,
.wc_isxdigit = wc_isxdigit_libc_mb, .wc_isxdigit = wc_isxdigit_libc_mb,
.char_is_cased = char_is_cased_libc, .char_is_cased = char_is_cased_libc,
.char_tolower = char_tolower_libc,
.wc_toupper = toupper_libc_mb, .wc_toupper = toupper_libc_mb,
.wc_tolower = tolower_libc_mb, .wc_tolower = tolower_libc_mb,
}; };

View File

@@ -127,13 +127,6 @@ struct ctype_methods
/* required */ /* required */
bool (*char_is_cased) (char ch, pg_locale_t locale); bool (*char_is_cased) (char ch, pg_locale_t locale);
/*
* Optional. If defined, will only be called for single-byte encodings. If
* not defined, or if the encoding is multibyte, will fall back to
* pg_strlower().
*/
char (*char_tolower) (unsigned char ch, pg_locale_t locale);
}; };
/* /*
@@ -185,8 +178,6 @@ extern pg_locale_t pg_newlocale_from_collation(Oid collid);
extern char *get_collation_actual_version(char collprovider, const char *collcollate); extern char *get_collation_actual_version(char collprovider, const char *collcollate);
extern bool char_is_cased(char ch, pg_locale_t locale); extern bool char_is_cased(char ch, pg_locale_t locale);
extern bool char_tolower_enabled(pg_locale_t locale);
extern char char_tolower(unsigned char ch, pg_locale_t locale);
extern size_t pg_strlower(char *dst, size_t dstsize, extern size_t pg_strlower(char *dst, size_t dstsize,
const char *src, ssize_t srclen, const char *src, ssize_t srclen,
pg_locale_t locale); pg_locale_t locale);