Fix char2wchar/wchar2char to support collations properly.

These functions should take a pg_locale_t, not a collation OID, and should call mbstowcs_l/wcstombs_l where available. Where those functions are not available, temporarily select the correct locale with uselocale(). This change removes the bogus assumption that all locales selectable in a given database have the same wide-character conversion method; in particular, the collate.linux.utf8 regression test now passes with LC_CTYPE=C, so long as the database encoding is UTF8. I decided to move the char2wchar/wchar2char functions out of mbutils.c and into pg_locale.c, because they work on wchar_t not pg_wchar_t and thus don't really belong with the mbutils.c functions. Keeping them where they were would have required importing pg_locale_t into pg_wchar.h somehow, which did not seem like a good plan.
2025-12-22 17:42:17 +03:00 · 2011-04-23 12:35:41 -04:00
parent bb85030630
commit 2ab0796d7a
12 changed files with 217 additions and 144 deletions
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1454,6 +1454,10 @@ str_numth(char *dest, char *num, int type)
 	return dest;
 }

+/*****************************************************************************
+ *			upper/lower/initcap functions
+ *****************************************************************************/
+
 /*
 * If the system provides the needed functions for wide-character manipulation
 * (which are all standardized by C99), then we implement upper/lower/initcap
@@ -1527,7 +1531,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 		/* Output workspace cannot have more codes than input bytes */
 		workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));

-		char2wchar(workspace, nbytes + 1, buff, nbytes, collid);
+		char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);

 		for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
 		{
@@ -1543,7 +1547,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
 		result_size = curr_char * pg_database_encoding_max_length() + 1;
 		result = palloc(result_size);

-		wchar2char(result, workspace, result_size, collid);
+		wchar2char(result, workspace, result_size, mylocale);
 		pfree(workspace);
 	}
 #endif   /* USE_WIDE_UPPER_LOWER */
@@ -1648,7 +1652,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 		/* Output workspace cannot have more codes than input bytes */
 		workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));

-		char2wchar(workspace, nbytes + 1, buff, nbytes, collid);
+		char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);

 		for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
 		{
@@ -1664,7 +1668,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
 		result_size = curr_char * pg_database_encoding_max_length() + 1;
 		result = palloc(result_size);

-		wchar2char(result, workspace, result_size, collid);
+		wchar2char(result, workspace, result_size, mylocale);
 		pfree(workspace);
 	}
 #endif   /* USE_WIDE_UPPER_LOWER */
@@ -1781,7 +1785,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 		/* Output workspace cannot have more codes than input bytes */
 		workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));

-		char2wchar(workspace, nbytes + 1, buff, nbytes, collid);
+		char2wchar(workspace, nbytes + 1, buff, nbytes, mylocale);

 		for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
 		{
@@ -1809,7 +1813,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
 		result_size = curr_char * pg_database_encoding_max_length() + 1;
 		result = palloc(result_size);

-		wchar2char(result, workspace, result_size, collid);
+		wchar2char(result, workspace, result_size, mylocale);
 		pfree(workspace);
 	}
 #endif   /* USE_WIDE_UPPER_LOWER */