Use C11 char16_t and char32_t for Unicode code points.

Reviewed-by: Tatsuo Ishii <ishii@postgresql.org> Reviewed-by: Thomas Munro <thomas.munro@gmail.com> Reviewed-by: Peter Eisentraut <peter@eisentraut.org> Discussion: https://postgr.es/m/bedcc93d06203dfd89815b10f815ca2de8626e85.camel%40j-davis.com
2025-11-16 15:02:33 +03:00 · 2025-10-29 14:17:13 -07:00
parent 16edc1b94f
commit 3853a6956c
29 changed files with 284 additions and 244 deletions
--- a/src/common/unicode/case_test.c
+++ b/src/common/unicode/case_test.c
@@ -24,6 +24,7 @@
 #include "common/unicode_case.h"
 #include "common/unicode_category.h"
 #include "common/unicode_version.h"
+#include "mb/pg_wchar.h"

 /* enough to hold largest source or result string, including NUL */
 #define BUFSZ 256
@@ -54,7 +55,7 @@ initcap_wbnext(void *state)
 	while (wbstate->offset < wbstate->len &&
 		   wbstate->str[wbstate->offset] != '\0')
 	{
-		pg_wchar	u = utf8_to_unicode((unsigned char *) wbstate->str +
+		char32_t	u = utf8_to_unicode((unsigned char *) wbstate->str +
 										wbstate->offset);
 		bool		curr_alnum = pg_u_isalnum(u, wbstate->posix);

@@ -77,16 +78,16 @@ initcap_wbnext(void *state)
 #ifdef USE_ICU

 static void
-icu_test_simple(pg_wchar code)
+icu_test_simple(char32_t code)
 {
-	pg_wchar	lower = unicode_lowercase_simple(code);
-	pg_wchar	title = unicode_titlecase_simple(code);
-	pg_wchar	upper = unicode_uppercase_simple(code);
-	pg_wchar	fold = unicode_casefold_simple(code);
-	pg_wchar	iculower = u_tolower(code);
-	pg_wchar	icutitle = u_totitle(code);
-	pg_wchar	icuupper = u_toupper(code);
-	pg_wchar	icufold = u_foldCase(code, U_FOLD_CASE_DEFAULT);
+	char32_t	lower = unicode_lowercase_simple(code);
+	char32_t	title = unicode_titlecase_simple(code);
+	char32_t	upper = unicode_uppercase_simple(code);
+	char32_t	fold = unicode_casefold_simple(code);
+	char32_t	iculower = u_tolower(code);
+	char32_t	icutitle = u_totitle(code);
+	char32_t	icuupper = u_toupper(code);
+	char32_t	icufold = u_foldCase(code, U_FOLD_CASE_DEFAULT);

 	if (lower != iculower || title != icutitle || upper != icuupper ||
 		fold != icufold)
@@ -172,7 +173,7 @@ test_icu(void)
 	int			successful = 0;
 	int			skipped_mismatch = 0;

-	for (pg_wchar code = 0; code <= 0x10ffff; code++)
+	for (char32_t code = 0; code <= 0x10ffff; code++)
 	{
 		pg_unicode_category category = unicode_category(code);

--- a/src/common/unicode/category_test.c
+++ b/src/common/unicode/category_test.c
@@ -22,6 +22,7 @@

 #include "common/unicode_category.h"
 #include "common/unicode_version.h"
+#include "mb/pg_wchar.h"

 static int	pg_unicode_version = 0;
 #ifdef USE_ICU
@@ -59,7 +60,7 @@ icu_test()
 	int			pg_skipped_codepoints = 0;
 	int			icu_skipped_codepoints = 0;

-	for (pg_wchar code = 0; code <= 0x10ffff; code++)
+	for (char32_t code = 0; code <= 0x10ffff; code++)
 	{
 		uint8_t		pg_category = unicode_category(code);
 		uint8_t		icu_category = u_charType(code);
--- a/src/common/unicode/generate-norm_test_table.pl
+++ b/src/common/unicode/generate-norm_test_table.pl
@@ -47,8 +47,8 @@ print $OUTPUT <<HEADER;
 typedef struct
 {
 	int			linenum;
-	pg_wchar	input[50];
-	pg_wchar	output[4][50];
+	char32_t	input[50];
+	char32_t	output[4][50];
 } pg_unicode_test;

 /* test table */
--- a/src/common/unicode/generate-unicode_case_table.pl
+++ b/src/common/unicode/generate-unicode_case_table.pl
@@ -270,7 +270,6 @@ print $OT <<"EOS";
 */

 #include "common/unicode_case.h"
-#include "mb/pg_wchar.h"

 /*
 * The maximum number of codepoints that can result from case mapping
@@ -297,7 +296,7 @@ typedef enum
 typedef struct
 {
 	int16		conditions;
-	pg_wchar	map[NCaseKind][MAX_CASE_EXPANSION];
+	char32_t	map[NCaseKind][MAX_CASE_EXPANSION];
 } pg_special_case;

 /*
@@ -430,7 +429,7 @@ foreach my $kind ('lower', 'title', 'upper', 'fold')
 * The entry case_map_${kind}[case_index(codepoint)] is the mapping for the
 * given codepoint.
 */
-static const pg_wchar case_map_$kind\[$index\] =
+static const char32_t case_map_$kind\[$index\] =
 {
 EOS

@@ -502,7 +501,7 @@ print $OT <<"EOS";
 * the offset into the mapping tables.
 */
 static inline uint16
-case_index(pg_wchar cp)
+case_index(char32_t cp)
 {
 	/* Fast path for codepoints < $fastpath_limit */
 	if (cp < $fastpath_limit)
--- a/src/common/unicode/generate-unicode_category_table.pl
+++ b/src/common/unicode/generate-unicode_category_table.pl
@@ -366,15 +366,15 @@ print $OT <<"EOS";
 */
 typedef struct
 {
-	uint32		first;			/* Unicode codepoint */
-	uint32		last;			/* Unicode codepoint */
+	char32_t	first;			/* Unicode codepoint */
+	char32_t	last;			/* Unicode codepoint */
 	uint8		category;		/* General Category */
 } pg_category_range;

 typedef struct
 {
-	uint32		first;			/* Unicode codepoint */
-	uint32		last;			/* Unicode codepoint */
+	char32_t	first;			/* Unicode codepoint */
+	char32_t	last;			/* Unicode codepoint */
 } pg_unicode_range;

 typedef struct
--- a/src/common/unicode/norm_test.c
+++ b/src/common/unicode/norm_test.c
@@ -20,7 +20,7 @@
 #include "norm_test_table.h"

 static char *
-print_wchar_str(const pg_wchar *s)
+print_wchar_str(const char32_t *s)
 {
 #define BUF_DIGITS 50
 	static char buf[BUF_DIGITS * 11 + 1];
@@ -41,7 +41,7 @@ print_wchar_str(const pg_wchar *s)
 }

 static int
-pg_wcscmp(const pg_wchar *s1, const pg_wchar *s2)
+pg_wcscmp(const char32_t *s1, const char32_t *s2)
 {
 	for (;;)
 	{
@@ -65,7 +65,7 @@ main(int argc, char **argv)
 	{
 		for (int form = 0; form < 4; form++)
 		{
-			pg_wchar   *result;
+			char32_t   *result;

 			result = unicode_normalize(form, test->input);