1
0
mirror of https://github.com/postgres/postgres.git synced 2025-09-05 02:22:28 +03:00

Fix INITCAP() word boundaries for PG_UNICODE_FAST.

Word boundaries are based on whether a character is alphanumeric or
not. For the PG_UNICODE_FAST collation, alphanumeric includes
non-ASCII digits; whereas for the PG_C_UTF8 collation, it only
includes digits 0-9. Pass down the right information from the
pg_locale_t into initcap_wbnext to differentiate the behavior.

Reported-by: Noah Misch <noah@leadboat.com>
Reviewed-by: Noah Misch <noah@leadboat.com>
Discussion: https://postgr.es/m/20250417135841.33.nmisch@google.com
This commit is contained in:
Jeff Davis
2025-04-21 12:34:58 -07:00
parent 80b727eb9d
commit 90260e2ec6
4 changed files with 23 additions and 4 deletions

View File

@@ -40,6 +40,7 @@ struct WordBoundaryState
const char *str;
size_t len;
size_t offset;
bool posix;
bool init;
bool prev_alnum;
};
@@ -58,7 +59,7 @@ initcap_wbnext(void *state)
{
pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
wbstate->offset);
bool curr_alnum = pg_u_isalnum(u, true);
bool curr_alnum = pg_u_isalnum(u, wbstate->posix);
if (!wbstate->init || curr_alnum != wbstate->prev_alnum)
{
@@ -92,6 +93,7 @@ strtitle_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
.str = src,
.len = srclen,
.offset = 0,
.posix = !locale->info.builtin.casemap_full,
.init = false,
.prev_alnum = false,
};