mirror of
https://github.com/postgres/postgres.git
synced 2025-09-05 02:22:28 +03:00
Fix INITCAP() word boundaries for PG_UNICODE_FAST.
Word boundaries are based on whether a character is alphanumeric or not. For the PG_UNICODE_FAST collation, alphanumeric includes non-ASCII digits; whereas for the PG_C_UTF8 collation, it only includes digits 0-9. Pass down the right information from the pg_locale_t into initcap_wbnext to differentiate the behavior. Reported-by: Noah Misch <noah@leadboat.com> Reviewed-by: Noah Misch <noah@leadboat.com> Discussion: https://postgr.es/m/20250417135841.33.nmisch@google.com
This commit is contained in:
@@ -40,6 +40,7 @@ struct WordBoundaryState
|
||||
const char *str;
|
||||
size_t len;
|
||||
size_t offset;
|
||||
bool posix;
|
||||
bool init;
|
||||
bool prev_alnum;
|
||||
};
|
||||
@@ -58,7 +59,7 @@ initcap_wbnext(void *state)
|
||||
{
|
||||
pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
|
||||
wbstate->offset);
|
||||
bool curr_alnum = pg_u_isalnum(u, true);
|
||||
bool curr_alnum = pg_u_isalnum(u, wbstate->posix);
|
||||
|
||||
if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
|
||||
{
|
||||
@@ -92,6 +93,7 @@ strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
|
||||
.str = src,
|
||||
.len = srclen,
|
||||
.offset = 0,
|
||||
.posix = !locale->info.builtin.casemap_full,
|
||||
.init = false,
|
||||
.prev_alnum = false,
|
||||
};
|
||||
|
Reference in New Issue
Block a user