mirror of
https://github.com/postgres/postgres.git
synced 2025-11-21 00:42:43 +03:00
Fix char2wchar/wchar2char to support collations properly.
These functions should take a pg_locale_t, not a collation OID, and should call mbstowcs_l/wcstombs_l where available. Where those functions are not available, temporarily select the correct locale with uselocale(). This change removes the bogus assumption that all locales selectable in a given database have the same wide-character conversion method; in particular, the collate.linux.utf8 regression test now passes with LC_CTYPE=C, so long as the database encoding is UTF8. I decided to move the char2wchar/wchar2char functions out of mbutils.c and into pg_locale.c, because they work on wchar_t not pg_wchar_t and thus don't really belong with the mbutils.c functions. Keeping them where they were would have required importing pg_locale_t into pg_wchar.h somehow, which did not seem like a good plan.
This commit is contained in:
@@ -29,11 +29,12 @@ t_isdigit(const char *ptr)
|
||||
int clen = pg_mblen(ptr);
|
||||
wchar_t character[2];
|
||||
Oid collation = DEFAULT_COLLATION_OID; /* TODO */
|
||||
pg_locale_t mylocale = 0; /* TODO */
|
||||
|
||||
if (clen == 1 || lc_ctype_is_c(collation))
|
||||
return isdigit(TOUCHAR(ptr));
|
||||
|
||||
char2wchar(character, 2, ptr, clen, collation);
|
||||
char2wchar(character, 2, ptr, clen, mylocale);
|
||||
|
||||
return iswdigit((wint_t) character[0]);
|
||||
}
|
||||
@@ -44,11 +45,12 @@ t_isspace(const char *ptr)
|
||||
int clen = pg_mblen(ptr);
|
||||
wchar_t character[2];
|
||||
Oid collation = DEFAULT_COLLATION_OID; /* TODO */
|
||||
pg_locale_t mylocale = 0; /* TODO */
|
||||
|
||||
if (clen == 1 || lc_ctype_is_c(collation))
|
||||
return isspace(TOUCHAR(ptr));
|
||||
|
||||
char2wchar(character, 2, ptr, clen, collation);
|
||||
char2wchar(character, 2, ptr, clen, mylocale);
|
||||
|
||||
return iswspace((wint_t) character[0]);
|
||||
}
|
||||
@@ -59,11 +61,12 @@ t_isalpha(const char *ptr)
|
||||
int clen = pg_mblen(ptr);
|
||||
wchar_t character[2];
|
||||
Oid collation = DEFAULT_COLLATION_OID; /* TODO */
|
||||
pg_locale_t mylocale = 0; /* TODO */
|
||||
|
||||
if (clen == 1 || lc_ctype_is_c(collation))
|
||||
return isalpha(TOUCHAR(ptr));
|
||||
|
||||
char2wchar(character, 2, ptr, clen, collation);
|
||||
char2wchar(character, 2, ptr, clen, mylocale);
|
||||
|
||||
return iswalpha((wint_t) character[0]);
|
||||
}
|
||||
@@ -74,11 +77,12 @@ t_isprint(const char *ptr)
|
||||
int clen = pg_mblen(ptr);
|
||||
wchar_t character[2];
|
||||
Oid collation = DEFAULT_COLLATION_OID; /* TODO */
|
||||
pg_locale_t mylocale = 0; /* TODO */
|
||||
|
||||
if (clen == 1 || lc_ctype_is_c(collation))
|
||||
return isprint(TOUCHAR(ptr));
|
||||
|
||||
char2wchar(character, 2, ptr, clen, collation);
|
||||
char2wchar(character, 2, ptr, clen, mylocale);
|
||||
|
||||
return iswprint((wint_t) character[0]);
|
||||
}
|
||||
@@ -246,6 +250,7 @@ lowerstr_with_len(const char *str, int len)
|
||||
|
||||
#ifdef USE_WIDE_UPPER_LOWER
|
||||
Oid collation = DEFAULT_COLLATION_OID; /* TODO */
|
||||
pg_locale_t mylocale = 0; /* TODO */
|
||||
#endif
|
||||
|
||||
if (len == 0)
|
||||
@@ -272,7 +277,7 @@ lowerstr_with_len(const char *str, int len)
|
||||
*/
|
||||
wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
|
||||
|
||||
wlen = char2wchar(wstr, len + 1, str, len, collation);
|
||||
wlen = char2wchar(wstr, len + 1, str, len, mylocale);
|
||||
Assert(wlen <= len);
|
||||
|
||||
while (*wptr)
|
||||
@@ -287,7 +292,7 @@ lowerstr_with_len(const char *str, int len)
|
||||
len = pg_database_encoding_max_length() * wlen + 1;
|
||||
out = (char *) palloc(len);
|
||||
|
||||
wlen = wchar2char(out, wstr, len, collation);
|
||||
wlen = wchar2char(out, wstr, len, mylocale);
|
||||
|
||||
pfree(wstr);
|
||||
|
||||
|
||||
@@ -300,13 +300,14 @@ TParserInit(char *str, int len)
|
||||
if (prs->charmaxlen > 1)
|
||||
{
|
||||
Oid collation = DEFAULT_COLLATION_OID; /* TODO */
|
||||
pg_locale_t mylocale = 0; /* TODO */
|
||||
|
||||
prs->usewide = true;
|
||||
if (lc_ctype_is_c(collation))
|
||||
{
|
||||
/*
|
||||
* char2wchar doesn't work for C-locale and sizeof(pg_wchar) could
|
||||
* be not equal to sizeof(wchar_t)
|
||||
* be different from sizeof(wchar_t)
|
||||
*/
|
||||
prs->pgwstr = (pg_wchar *) palloc(sizeof(pg_wchar) * (prs->lenstr + 1));
|
||||
pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr);
|
||||
@@ -314,7 +315,8 @@ TParserInit(char *str, int len)
|
||||
else
|
||||
{
|
||||
prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr + 1));
|
||||
char2wchar(prs->wstr, prs->lenstr + 1, prs->str, prs->lenstr, collation);
|
||||
char2wchar(prs->wstr, prs->lenstr + 1, prs->str, prs->lenstr,
|
||||
mylocale);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
Reference in New Issue
Block a user