1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-21 00:42:43 +03:00

Fix char2wchar/wchar2char to support collations properly.

These functions should take a pg_locale_t, not a collation OID, and should
call mbstowcs_l/wcstombs_l where available.  Where those functions are not
available, temporarily select the correct locale with uselocale().

This change removes the bogus assumption that all locales selectable in
a given database have the same wide-character conversion method; in
particular, the collate.linux.utf8 regression test now passes with
LC_CTYPE=C, so long as the database encoding is UTF8.

I decided to move the char2wchar/wchar2char functions out of mbutils.c and
into pg_locale.c, because they work on wchar_t not pg_wchar_t and thus
don't really belong with the mbutils.c functions.  Keeping them where they
were would have required importing pg_locale_t into pg_wchar.h somehow,
which did not seem like a good plan.
This commit is contained in:
Tom Lane
2011-04-23 12:35:41 -04:00
parent bb85030630
commit 2ab0796d7a
12 changed files with 217 additions and 144 deletions

View File

@@ -29,11 +29,12 @@ t_isdigit(const char *ptr)
int clen = pg_mblen(ptr);
wchar_t character[2];
Oid collation = DEFAULT_COLLATION_OID; /* TODO */
pg_locale_t mylocale = 0; /* TODO */
if (clen == 1 || lc_ctype_is_c(collation))
return isdigit(TOUCHAR(ptr));
char2wchar(character, 2, ptr, clen, collation);
char2wchar(character, 2, ptr, clen, mylocale);
return iswdigit((wint_t) character[0]);
}
@@ -44,11 +45,12 @@ t_isspace(const char *ptr)
int clen = pg_mblen(ptr);
wchar_t character[2];
Oid collation = DEFAULT_COLLATION_OID; /* TODO */
pg_locale_t mylocale = 0; /* TODO */
if (clen == 1 || lc_ctype_is_c(collation))
return isspace(TOUCHAR(ptr));
char2wchar(character, 2, ptr, clen, collation);
char2wchar(character, 2, ptr, clen, mylocale);
return iswspace((wint_t) character[0]);
}
@@ -59,11 +61,12 @@ t_isalpha(const char *ptr)
int clen = pg_mblen(ptr);
wchar_t character[2];
Oid collation = DEFAULT_COLLATION_OID; /* TODO */
pg_locale_t mylocale = 0; /* TODO */
if (clen == 1 || lc_ctype_is_c(collation))
return isalpha(TOUCHAR(ptr));
char2wchar(character, 2, ptr, clen, collation);
char2wchar(character, 2, ptr, clen, mylocale);
return iswalpha((wint_t) character[0]);
}
@@ -74,11 +77,12 @@ t_isprint(const char *ptr)
int clen = pg_mblen(ptr);
wchar_t character[2];
Oid collation = DEFAULT_COLLATION_OID; /* TODO */
pg_locale_t mylocale = 0; /* TODO */
if (clen == 1 || lc_ctype_is_c(collation))
return isprint(TOUCHAR(ptr));
char2wchar(character, 2, ptr, clen, collation);
char2wchar(character, 2, ptr, clen, mylocale);
return iswprint((wint_t) character[0]);
}
@@ -246,6 +250,7 @@ lowerstr_with_len(const char *str, int len)
#ifdef USE_WIDE_UPPER_LOWER
Oid collation = DEFAULT_COLLATION_OID; /* TODO */
pg_locale_t mylocale = 0; /* TODO */
#endif
if (len == 0)
@@ -272,7 +277,7 @@ lowerstr_with_len(const char *str, int len)
*/
wptr = wstr = (wchar_t *) palloc(sizeof(wchar_t) * (len + 1));
wlen = char2wchar(wstr, len + 1, str, len, collation);
wlen = char2wchar(wstr, len + 1, str, len, mylocale);
Assert(wlen <= len);
while (*wptr)
@@ -287,7 +292,7 @@ lowerstr_with_len(const char *str, int len)
len = pg_database_encoding_max_length() * wlen + 1;
out = (char *) palloc(len);
wlen = wchar2char(out, wstr, len, collation);
wlen = wchar2char(out, wstr, len, mylocale);
pfree(wstr);

View File

@@ -300,13 +300,14 @@ TParserInit(char *str, int len)
if (prs->charmaxlen > 1)
{
Oid collation = DEFAULT_COLLATION_OID; /* TODO */
pg_locale_t mylocale = 0; /* TODO */
prs->usewide = true;
if (lc_ctype_is_c(collation))
{
/*
* char2wchar doesn't work for C-locale and sizeof(pg_wchar) could
* be not equal to sizeof(wchar_t)
* be different from sizeof(wchar_t)
*/
prs->pgwstr = (pg_wchar *) palloc(sizeof(pg_wchar) * (prs->lenstr + 1));
pg_mb2wchar_with_len(prs->str, prs->pgwstr, prs->lenstr);
@@ -314,7 +315,8 @@ TParserInit(char *str, int len)
else
{
prs->wstr = (wchar_t *) palloc(sizeof(wchar_t) * (prs->lenstr + 1));
char2wchar(prs->wstr, prs->lenstr + 1, prs->str, prs->lenstr, collation);
char2wchar(prs->wstr, prs->lenstr + 1, prs->str, prs->lenstr,
mylocale);
}
}
else