1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-16 15:02:33 +03:00

Use C11 char16_t and char32_t for Unicode code points.

Reviewed-by: Tatsuo Ishii <ishii@postgresql.org>
Reviewed-by: Thomas Munro <thomas.munro@gmail.com>
Reviewed-by: Peter Eisentraut <peter@eisentraut.org>
Discussion: https://postgr.es/m/bedcc93d06203dfd89815b10f815ca2de8626e85.camel%40j-davis.com
This commit is contained in:
Jeff Davis
2025-10-29 14:17:13 -07:00
parent 16edc1b94f
commit 3853a6956c
29 changed files with 284 additions and 244 deletions

View File

@@ -24,6 +24,7 @@
#include "common/unicode_case.h"
#include "common/unicode_category.h"
#include "common/unicode_version.h"
#include "mb/pg_wchar.h"
/* enough to hold largest source or result string, including NUL */
#define BUFSZ 256
@@ -54,7 +55,7 @@ initcap_wbnext(void *state)
while (wbstate->offset < wbstate->len &&
wbstate->str[wbstate->offset] != '\0')
{
pg_wchar u = utf8_to_unicode((unsigned char *) wbstate->str +
char32_t u = utf8_to_unicode((unsigned char *) wbstate->str +
wbstate->offset);
bool curr_alnum = pg_u_isalnum(u, wbstate->posix);
@@ -77,16 +78,16 @@ initcap_wbnext(void *state)
#ifdef USE_ICU
static void
icu_test_simple(pg_wchar code)
icu_test_simple(char32_t code)
{
pg_wchar lower = unicode_lowercase_simple(code);
pg_wchar title = unicode_titlecase_simple(code);
pg_wchar upper = unicode_uppercase_simple(code);
pg_wchar fold = unicode_casefold_simple(code);
pg_wchar iculower = u_tolower(code);
pg_wchar icutitle = u_totitle(code);
pg_wchar icuupper = u_toupper(code);
pg_wchar icufold = u_foldCase(code, U_FOLD_CASE_DEFAULT);
char32_t lower = unicode_lowercase_simple(code);
char32_t title = unicode_titlecase_simple(code);
char32_t upper = unicode_uppercase_simple(code);
char32_t fold = unicode_casefold_simple(code);
char32_t iculower = u_tolower(code);
char32_t icutitle = u_totitle(code);
char32_t icuupper = u_toupper(code);
char32_t icufold = u_foldCase(code, U_FOLD_CASE_DEFAULT);
if (lower != iculower || title != icutitle || upper != icuupper ||
fold != icufold)
@@ -172,7 +173,7 @@ test_icu(void)
int successful = 0;
int skipped_mismatch = 0;
for (pg_wchar code = 0; code <= 0x10ffff; code++)
for (char32_t code = 0; code <= 0x10ffff; code++)
{
pg_unicode_category category = unicode_category(code);

View File

@@ -22,6 +22,7 @@
#include "common/unicode_category.h"
#include "common/unicode_version.h"
#include "mb/pg_wchar.h"
static int pg_unicode_version = 0;
#ifdef USE_ICU
@@ -59,7 +60,7 @@ icu_test()
int pg_skipped_codepoints = 0;
int icu_skipped_codepoints = 0;
for (pg_wchar code = 0; code <= 0x10ffff; code++)
for (char32_t code = 0; code <= 0x10ffff; code++)
{
uint8_t pg_category = unicode_category(code);
uint8_t icu_category = u_charType(code);

View File

@@ -47,8 +47,8 @@ print $OUTPUT <<HEADER;
typedef struct
{
int linenum;
pg_wchar input[50];
pg_wchar output[4][50];
char32_t input[50];
char32_t output[4][50];
} pg_unicode_test;
/* test table */

View File

@@ -270,7 +270,6 @@ print $OT <<"EOS";
*/
#include "common/unicode_case.h"
#include "mb/pg_wchar.h"
/*
* The maximum number of codepoints that can result from case mapping
@@ -297,7 +296,7 @@ typedef enum
typedef struct
{
int16 conditions;
pg_wchar map[NCaseKind][MAX_CASE_EXPANSION];
char32_t map[NCaseKind][MAX_CASE_EXPANSION];
} pg_special_case;
/*
@@ -430,7 +429,7 @@ foreach my $kind ('lower', 'title', 'upper', 'fold')
* The entry case_map_${kind}[case_index(codepoint)] is the mapping for the
* given codepoint.
*/
static const pg_wchar case_map_$kind\[$index\] =
static const char32_t case_map_$kind\[$index\] =
{
EOS
@@ -502,7 +501,7 @@ print $OT <<"EOS";
* the offset into the mapping tables.
*/
static inline uint16
case_index(pg_wchar cp)
case_index(char32_t cp)
{
/* Fast path for codepoints < $fastpath_limit */
if (cp < $fastpath_limit)

View File

@@ -366,15 +366,15 @@ print $OT <<"EOS";
*/
typedef struct
{
uint32 first; /* Unicode codepoint */
uint32 last; /* Unicode codepoint */
char32_t first; /* Unicode codepoint */
char32_t last; /* Unicode codepoint */
uint8 category; /* General Category */
} pg_category_range;
typedef struct
{
uint32 first; /* Unicode codepoint */
uint32 last; /* Unicode codepoint */
char32_t first; /* Unicode codepoint */
char32_t last; /* Unicode codepoint */
} pg_unicode_range;
typedef struct

View File

@@ -20,7 +20,7 @@
#include "norm_test_table.h"
static char *
print_wchar_str(const pg_wchar *s)
print_wchar_str(const char32_t *s)
{
#define BUF_DIGITS 50
static char buf[BUF_DIGITS * 11 + 1];
@@ -41,7 +41,7 @@ print_wchar_str(const pg_wchar *s)
}
static int
pg_wcscmp(const pg_wchar *s1, const pg_wchar *s2)
pg_wcscmp(const char32_t *s1, const char32_t *s2)
{
for (;;)
{
@@ -65,7 +65,7 @@ main(int argc, char **argv)
{
for (int form = 0; form < 4; form++)
{
pg_wchar *result;
char32_t *result;
result = unicode_normalize(form, test->input);