mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	Use C11 char16_t and char32_t for Unicode code points.
Reviewed-by: Tatsuo Ishii <ishii@postgresql.org> Reviewed-by: Thomas Munro <thomas.munro@gmail.com> Reviewed-by: Peter Eisentraut <peter@eisentraut.org> Discussion: https://postgr.es/m/bedcc93d06203dfd89815b10f815ca2de8626e85.camel%40j-davis.com
This commit is contained in:
		
							
								
								
									
										2
									
								
								configure
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								configure
									
									
									
									
										vendored
									
									
								
							@@ -13627,7 +13627,7 @@ fi
 | 
			
		||||
## Header files
 | 
			
		||||
##
 | 
			
		||||
 | 
			
		||||
for ac_header in atomic.h copyfile.h execinfo.h getopt.h ifaddrs.h mbarrier.h sys/epoll.h sys/event.h sys/personality.h sys/prctl.h sys/procctl.h sys/signalfd.h sys/ucred.h termios.h ucred.h xlocale.h
 | 
			
		||||
for ac_header in atomic.h copyfile.h execinfo.h getopt.h ifaddrs.h mbarrier.h sys/epoll.h sys/event.h sys/personality.h sys/prctl.h sys/procctl.h sys/signalfd.h sys/ucred.h termios.h uchar.h ucred.h xlocale.h
 | 
			
		||||
do :
 | 
			
		||||
  as_ac_Header=`$as_echo "ac_cv_header_$ac_header" | $as_tr_sh`
 | 
			
		||||
ac_fn_c_check_header_mongrel "$LINENO" "$ac_header" "$as_ac_Header" "$ac_includes_default"
 | 
			
		||||
 
 | 
			
		||||
@@ -1513,6 +1513,7 @@ AC_CHECK_HEADERS(m4_normalize([
 | 
			
		||||
	sys/signalfd.h
 | 
			
		||||
	sys/ucred.h
 | 
			
		||||
	termios.h
 | 
			
		||||
	uchar.h
 | 
			
		||||
	ucred.h
 | 
			
		||||
	xlocale.h
 | 
			
		||||
]))
 | 
			
		||||
 
 | 
			
		||||
@@ -2613,6 +2613,7 @@ header_checks = [
 | 
			
		||||
  'sys/signalfd.h',
 | 
			
		||||
  'sys/ucred.h',
 | 
			
		||||
  'termios.h',
 | 
			
		||||
  'uchar.h',
 | 
			
		||||
  'ucred.h',
 | 
			
		||||
  'xlocale.h',
 | 
			
		||||
]
 | 
			
		||||
 
 | 
			
		||||
@@ -339,7 +339,7 @@ hexval(unsigned char c)
 | 
			
		||||
 | 
			
		||||
/* is Unicode code point acceptable? */
 | 
			
		||||
static void
 | 
			
		||||
check_unicode_value(pg_wchar c)
 | 
			
		||||
check_unicode_value(char32_t c)
 | 
			
		||||
{
 | 
			
		||||
	if (!is_valid_unicode_codepoint(c))
 | 
			
		||||
		ereport(ERROR,
 | 
			
		||||
@@ -376,7 +376,7 @@ str_udeescape(const char *str, char escape,
 | 
			
		||||
	char	   *new,
 | 
			
		||||
			   *out;
 | 
			
		||||
	size_t		new_len;
 | 
			
		||||
	pg_wchar	pair_first = 0;
 | 
			
		||||
	char16_t	pair_first = 0;
 | 
			
		||||
	ScannerCallbackState scbstate;
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
@@ -420,7 +420,7 @@ str_udeescape(const char *str, char escape,
 | 
			
		||||
					 isxdigit((unsigned char) in[3]) &&
 | 
			
		||||
					 isxdigit((unsigned char) in[4]))
 | 
			
		||||
			{
 | 
			
		||||
				pg_wchar	unicode;
 | 
			
		||||
				char32_t	unicode;
 | 
			
		||||
 | 
			
		||||
				unicode = (hexval(in[1]) << 12) +
 | 
			
		||||
					(hexval(in[2]) << 8) +
 | 
			
		||||
@@ -457,7 +457,7 @@ str_udeescape(const char *str, char escape,
 | 
			
		||||
					 isxdigit((unsigned char) in[6]) &&
 | 
			
		||||
					 isxdigit((unsigned char) in[7]))
 | 
			
		||||
			{
 | 
			
		||||
				pg_wchar	unicode;
 | 
			
		||||
				char32_t	unicode;
 | 
			
		||||
 | 
			
		||||
				unicode = (hexval(in[2]) << 20) +
 | 
			
		||||
					(hexval(in[3]) << 16) +
 | 
			
		||||
 
 | 
			
		||||
@@ -121,7 +121,7 @@ static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner);
 | 
			
		||||
static char *litbufdup(core_yyscan_t yyscanner);
 | 
			
		||||
static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner);
 | 
			
		||||
static int	process_integer_literal(const char *token, YYSTYPE *lval, int base);
 | 
			
		||||
static void addunicode(pg_wchar c, yyscan_t yyscanner);
 | 
			
		||||
static void addunicode(char32_t c, yyscan_t yyscanner);
 | 
			
		||||
 | 
			
		||||
#define yyerror(msg)  scanner_yyerror(msg, yyscanner)
 | 
			
		||||
 | 
			
		||||
@@ -640,7 +640,7 @@ other			.
 | 
			
		||||
					addlit(yytext, yyleng, yyscanner);
 | 
			
		||||
				}
 | 
			
		||||
<xe>{xeunicode} {
 | 
			
		||||
					pg_wchar	c = strtoul(yytext + 2, NULL, 16);
 | 
			
		||||
					char32_t	c = strtoul(yytext + 2, NULL, 16);
 | 
			
		||||
 | 
			
		||||
					/*
 | 
			
		||||
					 * For consistency with other productions, issue any
 | 
			
		||||
@@ -668,7 +668,7 @@ other			.
 | 
			
		||||
					POP_YYLLOC();
 | 
			
		||||
				}
 | 
			
		||||
<xeu>{xeunicode} {
 | 
			
		||||
					pg_wchar	c = strtoul(yytext + 2, NULL, 16);
 | 
			
		||||
					char32_t	c = strtoul(yytext + 2, NULL, 16);
 | 
			
		||||
 | 
			
		||||
					/* Remember start of overall string token ... */
 | 
			
		||||
					PUSH_YYLLOC();
 | 
			
		||||
@@ -1376,7 +1376,7 @@ process_integer_literal(const char *token, YYSTYPE *lval, int base)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void
 | 
			
		||||
addunicode(pg_wchar c, core_yyscan_t yyscanner)
 | 
			
		||||
addunicode(char32_t c, core_yyscan_t yyscanner)
 | 
			
		||||
{
 | 
			
		||||
	ScannerCallbackState scbstate;
 | 
			
		||||
	char		buf[MAX_UNICODE_EQUIVALENT_STRING + 1];
 | 
			
		||||
 
 | 
			
		||||
@@ -574,7 +574,7 @@ hexval(char c, int *result, struct Node *escontext, yyscan_t yyscanner)
 | 
			
		||||
 | 
			
		||||
/* Add given unicode character to scanstring */
 | 
			
		||||
static bool
 | 
			
		||||
addUnicodeChar(int ch, struct Node *escontext, yyscan_t yyscanner)
 | 
			
		||||
addUnicodeChar(char32_t ch, struct Node *escontext, yyscan_t yyscanner)
 | 
			
		||||
{
 | 
			
		||||
	if (ch == 0)
 | 
			
		||||
	{
 | 
			
		||||
@@ -607,7 +607,7 @@ addUnicodeChar(int ch, struct Node *escontext, yyscan_t yyscanner)
 | 
			
		||||
 | 
			
		||||
/* Add unicode character, processing any surrogate pairs */
 | 
			
		||||
static bool
 | 
			
		||||
addUnicode(int ch, int *hi_surrogate, struct Node *escontext, yyscan_t yyscanner)
 | 
			
		||||
addUnicode(char32_t ch, int *hi_surrogate, struct Node *escontext, yyscan_t yyscanner)
 | 
			
		||||
{
 | 
			
		||||
	if (is_utf16_surrogate_first(ch))
 | 
			
		||||
	{
 | 
			
		||||
@@ -655,7 +655,7 @@ parseUnicode(char *s, int l, struct Node *escontext, yyscan_t yyscanner)
 | 
			
		||||
 | 
			
		||||
	for (i = 2; i < l; i += 2)	/* skip '\u' */
 | 
			
		||||
	{
 | 
			
		||||
		int			ch = 0;
 | 
			
		||||
		char32_t		ch = 0;
 | 
			
		||||
		int			j,
 | 
			
		||||
					si;
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -15,7 +15,6 @@
 | 
			
		||||
#include "catalog/pg_collation.h"
 | 
			
		||||
#include "common/unicode_case.h"
 | 
			
		||||
#include "common/unicode_category.h"
 | 
			
		||||
#include "mb/pg_wchar.h"
 | 
			
		||||
#include "miscadmin.h"
 | 
			
		||||
#include "utils/builtins.h"
 | 
			
		||||
#include "utils/pg_locale.h"
 | 
			
		||||
@@ -35,6 +34,23 @@ struct WordBoundaryState
 | 
			
		||||
	bool		prev_alnum;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * In UTF-8, pg_wchar is guaranteed to be the code point value.
 | 
			
		||||
 */
 | 
			
		||||
static inline char32_t
 | 
			
		||||
to_char32(pg_wchar wc)
 | 
			
		||||
{
 | 
			
		||||
	Assert(GetDatabaseEncoding() == PG_UTF8);
 | 
			
		||||
	return (char32_t) wc;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline pg_wchar
 | 
			
		||||
to_pg_wchar(char32_t c32)
 | 
			
		||||
{
 | 
			
		||||
	Assert(GetDatabaseEncoding() == PG_UTF8);
 | 
			
		||||
	return (pg_wchar) c32;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Simple word boundary iterator that draws boundaries each time the result of
 | 
			
		||||
 * pg_u_isalnum() changes.
 | 
			
		||||
@@ -47,7 +63,7 @@ initcap_wbnext(void *state)
 | 
			
		||||
	while (wbstate->offset < wbstate->len &&
 | 
			
		||||
		   wbstate->str[wbstate->offset] != '\0')
 | 
			
		||||
	{
 | 
			
		||||
		pg_wchar	u = utf8_to_unicode((unsigned char *) wbstate->str +
 | 
			
		||||
		char32_t	u = utf8_to_unicode((unsigned char *) wbstate->str +
 | 
			
		||||
										wbstate->offset);
 | 
			
		||||
		bool		curr_alnum = pg_u_isalnum(u, wbstate->posix);
 | 
			
		||||
 | 
			
		||||
@@ -112,61 +128,61 @@ strfold_builtin(char *dest, size_t destsize, const char *src, ssize_t srclen,
 | 
			
		||||
static bool
 | 
			
		||||
wc_isdigit_builtin(pg_wchar wc, pg_locale_t locale)
 | 
			
		||||
{
 | 
			
		||||
	return pg_u_isdigit(wc, !locale->builtin.casemap_full);
 | 
			
		||||
	return pg_u_isdigit(to_char32(wc), !locale->builtin.casemap_full);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool
 | 
			
		||||
wc_isalpha_builtin(pg_wchar wc, pg_locale_t locale)
 | 
			
		||||
{
 | 
			
		||||
	return pg_u_isalpha(wc);
 | 
			
		||||
	return pg_u_isalpha(to_char32(wc));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool
 | 
			
		||||
wc_isalnum_builtin(pg_wchar wc, pg_locale_t locale)
 | 
			
		||||
{
 | 
			
		||||
	return pg_u_isalnum(wc, !locale->builtin.casemap_full);
 | 
			
		||||
	return pg_u_isalnum(to_char32(wc), !locale->builtin.casemap_full);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool
 | 
			
		||||
wc_isupper_builtin(pg_wchar wc, pg_locale_t locale)
 | 
			
		||||
{
 | 
			
		||||
	return pg_u_isupper(wc);
 | 
			
		||||
	return pg_u_isupper(to_char32(wc));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool
 | 
			
		||||
wc_islower_builtin(pg_wchar wc, pg_locale_t locale)
 | 
			
		||||
{
 | 
			
		||||
	return pg_u_islower(wc);
 | 
			
		||||
	return pg_u_islower(to_char32(wc));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool
 | 
			
		||||
wc_isgraph_builtin(pg_wchar wc, pg_locale_t locale)
 | 
			
		||||
{
 | 
			
		||||
	return pg_u_isgraph(wc);
 | 
			
		||||
	return pg_u_isgraph(to_char32(wc));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool
 | 
			
		||||
wc_isprint_builtin(pg_wchar wc, pg_locale_t locale)
 | 
			
		||||
{
 | 
			
		||||
	return pg_u_isprint(wc);
 | 
			
		||||
	return pg_u_isprint(to_char32(wc));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool
 | 
			
		||||
wc_ispunct_builtin(pg_wchar wc, pg_locale_t locale)
 | 
			
		||||
{
 | 
			
		||||
	return pg_u_ispunct(wc, !locale->builtin.casemap_full);
 | 
			
		||||
	return pg_u_ispunct(to_char32(wc), !locale->builtin.casemap_full);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool
 | 
			
		||||
wc_isspace_builtin(pg_wchar wc, pg_locale_t locale)
 | 
			
		||||
{
 | 
			
		||||
	return pg_u_isspace(wc);
 | 
			
		||||
	return pg_u_isspace(to_char32(wc));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool
 | 
			
		||||
wc_isxdigit_builtin(pg_wchar wc, pg_locale_t locale)
 | 
			
		||||
{
 | 
			
		||||
	return pg_u_isxdigit(wc, !locale->builtin.casemap_full);
 | 
			
		||||
	return pg_u_isxdigit(to_char32(wc), !locale->builtin.casemap_full);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool
 | 
			
		||||
@@ -179,13 +195,13 @@ char_is_cased_builtin(char ch, pg_locale_t locale)
 | 
			
		||||
static pg_wchar
 | 
			
		||||
wc_toupper_builtin(pg_wchar wc, pg_locale_t locale)
 | 
			
		||||
{
 | 
			
		||||
	return unicode_uppercase_simple(wc);
 | 
			
		||||
	return to_pg_wchar(unicode_uppercase_simple(to_char32(wc)));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static pg_wchar
 | 
			
		||||
wc_tolower_builtin(pg_wchar wc, pg_locale_t locale)
 | 
			
		||||
{
 | 
			
		||||
	return unicode_lowercase_simple(wc);
 | 
			
		||||
	return to_pg_wchar(unicode_lowercase_simple(to_char32(wc)));
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static const struct ctype_methods ctype_methods_builtin = {
 | 
			
		||||
 
 | 
			
		||||
@@ -5419,12 +5419,12 @@ unicode_assigned(PG_FUNCTION_ARGS)
 | 
			
		||||
		ereport(ERROR,
 | 
			
		||||
				(errmsg("Unicode categorization can only be performed if server encoding is UTF8")));
 | 
			
		||||
 | 
			
		||||
	/* convert to pg_wchar */
 | 
			
		||||
	/* convert to char32_t */
 | 
			
		||||
	size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
 | 
			
		||||
	p = (unsigned char *) VARDATA_ANY(input);
 | 
			
		||||
	for (int i = 0; i < size; i++)
 | 
			
		||||
	{
 | 
			
		||||
		pg_wchar	uchar = utf8_to_unicode(p);
 | 
			
		||||
		char32_t	uchar = utf8_to_unicode(p);
 | 
			
		||||
		int			category = unicode_category(uchar);
 | 
			
		||||
 | 
			
		||||
		if (category == PG_U_UNASSIGNED)
 | 
			
		||||
@@ -5443,24 +5443,24 @@ unicode_normalize_func(PG_FUNCTION_ARGS)
 | 
			
		||||
	char	   *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
 | 
			
		||||
	UnicodeNormalizationForm form;
 | 
			
		||||
	int			size;
 | 
			
		||||
	pg_wchar   *input_chars;
 | 
			
		||||
	pg_wchar   *output_chars;
 | 
			
		||||
	char32_t   *input_chars;
 | 
			
		||||
	char32_t   *output_chars;
 | 
			
		||||
	unsigned char *p;
 | 
			
		||||
	text	   *result;
 | 
			
		||||
	int			i;
 | 
			
		||||
 | 
			
		||||
	form = unicode_norm_form_from_string(formstr);
 | 
			
		||||
 | 
			
		||||
	/* convert to pg_wchar */
 | 
			
		||||
	/* convert to char32_t */
 | 
			
		||||
	size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
 | 
			
		||||
	input_chars = palloc((size + 1) * sizeof(pg_wchar));
 | 
			
		||||
	input_chars = palloc((size + 1) * sizeof(char32_t));
 | 
			
		||||
	p = (unsigned char *) VARDATA_ANY(input);
 | 
			
		||||
	for (i = 0; i < size; i++)
 | 
			
		||||
	{
 | 
			
		||||
		input_chars[i] = utf8_to_unicode(p);
 | 
			
		||||
		p += pg_utf_mblen(p);
 | 
			
		||||
	}
 | 
			
		||||
	input_chars[i] = (pg_wchar) '\0';
 | 
			
		||||
	input_chars[i] = (char32_t) '\0';
 | 
			
		||||
	Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
 | 
			
		||||
 | 
			
		||||
	/* action */
 | 
			
		||||
@@ -5468,7 +5468,7 @@ unicode_normalize_func(PG_FUNCTION_ARGS)
 | 
			
		||||
 | 
			
		||||
	/* convert back to UTF-8 string */
 | 
			
		||||
	size = 0;
 | 
			
		||||
	for (pg_wchar *wp = output_chars; *wp; wp++)
 | 
			
		||||
	for (char32_t *wp = output_chars; *wp; wp++)
 | 
			
		||||
	{
 | 
			
		||||
		unsigned char buf[4];
 | 
			
		||||
 | 
			
		||||
@@ -5480,7 +5480,7 @@ unicode_normalize_func(PG_FUNCTION_ARGS)
 | 
			
		||||
	SET_VARSIZE(result, size + VARHDRSZ);
 | 
			
		||||
 | 
			
		||||
	p = (unsigned char *) VARDATA_ANY(result);
 | 
			
		||||
	for (pg_wchar *wp = output_chars; *wp; wp++)
 | 
			
		||||
	for (char32_t *wp = output_chars; *wp; wp++)
 | 
			
		||||
	{
 | 
			
		||||
		unicode_to_utf8(*wp, p);
 | 
			
		||||
		p += pg_utf_mblen(p);
 | 
			
		||||
@@ -5509,8 +5509,8 @@ unicode_is_normalized(PG_FUNCTION_ARGS)
 | 
			
		||||
	char	   *formstr = text_to_cstring(PG_GETARG_TEXT_PP(1));
 | 
			
		||||
	UnicodeNormalizationForm form;
 | 
			
		||||
	int			size;
 | 
			
		||||
	pg_wchar   *input_chars;
 | 
			
		||||
	pg_wchar   *output_chars;
 | 
			
		||||
	char32_t   *input_chars;
 | 
			
		||||
	char32_t   *output_chars;
 | 
			
		||||
	unsigned char *p;
 | 
			
		||||
	int			i;
 | 
			
		||||
	UnicodeNormalizationQC quickcheck;
 | 
			
		||||
@@ -5519,16 +5519,16 @@ unicode_is_normalized(PG_FUNCTION_ARGS)
 | 
			
		||||
 | 
			
		||||
	form = unicode_norm_form_from_string(formstr);
 | 
			
		||||
 | 
			
		||||
	/* convert to pg_wchar */
 | 
			
		||||
	/* convert to char32_t */
 | 
			
		||||
	size = pg_mbstrlen_with_len(VARDATA_ANY(input), VARSIZE_ANY_EXHDR(input));
 | 
			
		||||
	input_chars = palloc((size + 1) * sizeof(pg_wchar));
 | 
			
		||||
	input_chars = palloc((size + 1) * sizeof(char32_t));
 | 
			
		||||
	p = (unsigned char *) VARDATA_ANY(input);
 | 
			
		||||
	for (i = 0; i < size; i++)
 | 
			
		||||
	{
 | 
			
		||||
		input_chars[i] = utf8_to_unicode(p);
 | 
			
		||||
		p += pg_utf_mblen(p);
 | 
			
		||||
	}
 | 
			
		||||
	input_chars[i] = (pg_wchar) '\0';
 | 
			
		||||
	input_chars[i] = (char32_t) '\0';
 | 
			
		||||
	Assert((char *) p == VARDATA_ANY(input) + VARSIZE_ANY_EXHDR(input));
 | 
			
		||||
 | 
			
		||||
	/* quick check (see UAX #15) */
 | 
			
		||||
@@ -5542,11 +5542,11 @@ unicode_is_normalized(PG_FUNCTION_ARGS)
 | 
			
		||||
	output_chars = unicode_normalize(form, input_chars);
 | 
			
		||||
 | 
			
		||||
	output_size = 0;
 | 
			
		||||
	for (pg_wchar *wp = output_chars; *wp; wp++)
 | 
			
		||||
	for (char32_t *wp = output_chars; *wp; wp++)
 | 
			
		||||
		output_size++;
 | 
			
		||||
 | 
			
		||||
	result = (size == output_size) &&
 | 
			
		||||
		(memcmp(input_chars, output_chars, size * sizeof(pg_wchar)) == 0);
 | 
			
		||||
		(memcmp(input_chars, output_chars, size * sizeof(char32_t)) == 0);
 | 
			
		||||
 | 
			
		||||
	PG_RETURN_BOOL(result);
 | 
			
		||||
}
 | 
			
		||||
@@ -5602,7 +5602,7 @@ unistr(PG_FUNCTION_ARGS)
 | 
			
		||||
	int			len;
 | 
			
		||||
	StringInfoData str;
 | 
			
		||||
	text	   *result;
 | 
			
		||||
	pg_wchar	pair_first = 0;
 | 
			
		||||
	char16_t	pair_first = 0;
 | 
			
		||||
	char		cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
 | 
			
		||||
 | 
			
		||||
	instr = VARDATA_ANY(input_text);
 | 
			
		||||
@@ -5626,7 +5626,7 @@ unistr(PG_FUNCTION_ARGS)
 | 
			
		||||
			else if ((len >= 5 && isxdigits_n(instr + 1, 4)) ||
 | 
			
		||||
					 (len >= 6 && instr[1] == 'u' && isxdigits_n(instr + 2, 4)))
 | 
			
		||||
			{
 | 
			
		||||
				pg_wchar	unicode;
 | 
			
		||||
				char32_t	unicode;
 | 
			
		||||
				int			offset = instr[1] == 'u' ? 2 : 1;
 | 
			
		||||
 | 
			
		||||
				unicode = hexval_n(instr + offset, 4);
 | 
			
		||||
@@ -5662,7 +5662,7 @@ unistr(PG_FUNCTION_ARGS)
 | 
			
		||||
			}
 | 
			
		||||
			else if (len >= 8 && instr[1] == '+' && isxdigits_n(instr + 2, 6))
 | 
			
		||||
			{
 | 
			
		||||
				pg_wchar	unicode;
 | 
			
		||||
				char32_t	unicode;
 | 
			
		||||
 | 
			
		||||
				unicode = hexval_n(instr + 2, 6);
 | 
			
		||||
 | 
			
		||||
@@ -5697,7 +5697,7 @@ unistr(PG_FUNCTION_ARGS)
 | 
			
		||||
			}
 | 
			
		||||
			else if (len >= 10 && instr[1] == 'U' && isxdigits_n(instr + 2, 8))
 | 
			
		||||
			{
 | 
			
		||||
				pg_wchar	unicode;
 | 
			
		||||
				char32_t	unicode;
 | 
			
		||||
 | 
			
		||||
				unicode = hexval_n(instr + 2, 8);
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -862,7 +862,7 @@ perform_default_encoding_conversion(const char *src, int len,
 | 
			
		||||
 * may call this outside any transaction, or in an aborted transaction.
 | 
			
		||||
 */
 | 
			
		||||
void
 | 
			
		||||
pg_unicode_to_server(pg_wchar c, unsigned char *s)
 | 
			
		||||
pg_unicode_to_server(char32_t c, unsigned char *s)
 | 
			
		||||
{
 | 
			
		||||
	unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
 | 
			
		||||
	int			c_as_utf8_len;
 | 
			
		||||
@@ -924,7 +924,7 @@ pg_unicode_to_server(pg_wchar c, unsigned char *s)
 | 
			
		||||
 * but simply return false on conversion failure.
 | 
			
		||||
 */
 | 
			
		||||
bool
 | 
			
		||||
pg_unicode_to_server_noerror(pg_wchar c, unsigned char *s)
 | 
			
		||||
pg_unicode_to_server_noerror(char32_t c, unsigned char *s)
 | 
			
		||||
{
 | 
			
		||||
	unsigned char c_as_utf8[MAX_MULTIBYTE_CHAR_LEN + 1];
 | 
			
		||||
	int			c_as_utf8_len;
 | 
			
		||||
 
 | 
			
		||||
@@ -47,7 +47,7 @@
 | 
			
		||||
 | 
			
		||||
/* Prototypes for local functions */
 | 
			
		||||
static int	codepoint_range_cmp(const void *a, const void *b);
 | 
			
		||||
static bool is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize);
 | 
			
		||||
static bool is_code_in_table(char32_t code, const char32_t *map, int mapsize);
 | 
			
		||||
static int	pg_utf8_string_len(const char *source);
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
@@ -64,7 +64,7 @@ static int	pg_utf8_string_len(const char *source);
 | 
			
		||||
 *
 | 
			
		||||
 * These are all mapped to the ASCII space character (U+00A0).
 | 
			
		||||
 */
 | 
			
		||||
static const pg_wchar non_ascii_space_ranges[] =
 | 
			
		||||
static const char32_t non_ascii_space_ranges[] =
 | 
			
		||||
{
 | 
			
		||||
	0x00A0, 0x00A0,
 | 
			
		||||
	0x1680, 0x1680,
 | 
			
		||||
@@ -79,7 +79,7 @@ static const pg_wchar non_ascii_space_ranges[] =
 | 
			
		||||
 *
 | 
			
		||||
 * If any of these appear in the input, they are removed.
 | 
			
		||||
 */
 | 
			
		||||
static const pg_wchar commonly_mapped_to_nothing_ranges[] =
 | 
			
		||||
static const char32_t commonly_mapped_to_nothing_ranges[] =
 | 
			
		||||
{
 | 
			
		||||
	0x00AD, 0x00AD,
 | 
			
		||||
	0x034F, 0x034F,
 | 
			
		||||
@@ -114,7 +114,7 @@ static const pg_wchar commonly_mapped_to_nothing_ranges[] =
 | 
			
		||||
 * tables, so one code might originate from multiple source tables.
 | 
			
		||||
 * Adjacent ranges have also been merged together, to save space.
 | 
			
		||||
 */
 | 
			
		||||
static const pg_wchar prohibited_output_ranges[] =
 | 
			
		||||
static const char32_t prohibited_output_ranges[] =
 | 
			
		||||
{
 | 
			
		||||
	0x0000, 0x001F,				/* C.2.1 */
 | 
			
		||||
	0x007F, 0x00A0,				/* C.1.2, C.2.1, C.2.2 */
 | 
			
		||||
@@ -155,7 +155,7 @@ static const pg_wchar prohibited_output_ranges[] =
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* A.1 Unassigned code points in Unicode 3.2 */
 | 
			
		||||
static const pg_wchar unassigned_codepoint_ranges[] =
 | 
			
		||||
static const char32_t unassigned_codepoint_ranges[] =
 | 
			
		||||
{
 | 
			
		||||
	0x0221, 0x0221,
 | 
			
		||||
	0x0234, 0x024F,
 | 
			
		||||
@@ -556,7 +556,7 @@ static const pg_wchar unassigned_codepoint_ranges[] =
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* D.1 Characters with bidirectional property "R" or "AL" */
 | 
			
		||||
static const pg_wchar RandALCat_codepoint_ranges[] =
 | 
			
		||||
static const char32_t RandALCat_codepoint_ranges[] =
 | 
			
		||||
{
 | 
			
		||||
	0x05BE, 0x05BE,
 | 
			
		||||
	0x05C0, 0x05C0,
 | 
			
		||||
@@ -595,7 +595,7 @@ static const pg_wchar RandALCat_codepoint_ranges[] =
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
/* D.2 Characters with bidirectional property "L" */
 | 
			
		||||
static const pg_wchar LCat_codepoint_ranges[] =
 | 
			
		||||
static const char32_t LCat_codepoint_ranges[] =
 | 
			
		||||
{
 | 
			
		||||
	0x0041, 0x005A,
 | 
			
		||||
	0x0061, 0x007A,
 | 
			
		||||
@@ -968,8 +968,8 @@ static const pg_wchar LCat_codepoint_ranges[] =
 | 
			
		||||
static int
 | 
			
		||||
codepoint_range_cmp(const void *a, const void *b)
 | 
			
		||||
{
 | 
			
		||||
	const pg_wchar *key = (const pg_wchar *) a;
 | 
			
		||||
	const pg_wchar *range = (const pg_wchar *) b;
 | 
			
		||||
	const char32_t *key = (const char32_t *) a;
 | 
			
		||||
	const char32_t *range = (const char32_t *) b;
 | 
			
		||||
 | 
			
		||||
	if (*key < range[0])
 | 
			
		||||
		return -1;				/* less than lower bound */
 | 
			
		||||
@@ -980,14 +980,14 @@ codepoint_range_cmp(const void *a, const void *b)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static bool
 | 
			
		||||
is_code_in_table(pg_wchar code, const pg_wchar *map, int mapsize)
 | 
			
		||||
is_code_in_table(char32_t code, const char32_t *map, int mapsize)
 | 
			
		||||
{
 | 
			
		||||
	Assert(mapsize % 2 == 0);
 | 
			
		||||
 | 
			
		||||
	if (code < map[0] || code > map[mapsize - 1])
 | 
			
		||||
		return false;
 | 
			
		||||
 | 
			
		||||
	if (bsearch(&code, map, mapsize / 2, sizeof(pg_wchar) * 2,
 | 
			
		||||
	if (bsearch(&code, map, mapsize / 2, sizeof(char32_t) * 2,
 | 
			
		||||
				codepoint_range_cmp))
 | 
			
		||||
		return true;
 | 
			
		||||
	else
 | 
			
		||||
@@ -1046,8 +1046,8 @@ pg_utf8_string_len(const char *source)
 | 
			
		||||
pg_saslprep_rc
 | 
			
		||||
pg_saslprep(const char *input, char **output)
 | 
			
		||||
{
 | 
			
		||||
	pg_wchar   *input_chars = NULL;
 | 
			
		||||
	pg_wchar   *output_chars = NULL;
 | 
			
		||||
	char32_t   *input_chars = NULL;
 | 
			
		||||
	char32_t   *output_chars = NULL;
 | 
			
		||||
	int			input_size;
 | 
			
		||||
	char	   *result;
 | 
			
		||||
	int			result_size;
 | 
			
		||||
@@ -1055,7 +1055,7 @@ pg_saslprep(const char *input, char **output)
 | 
			
		||||
	int			i;
 | 
			
		||||
	bool		contains_RandALCat;
 | 
			
		||||
	unsigned char *p;
 | 
			
		||||
	pg_wchar   *wp;
 | 
			
		||||
	char32_t   *wp;
 | 
			
		||||
 | 
			
		||||
	/* Ensure we return *output as NULL on failure */
 | 
			
		||||
	*output = NULL;
 | 
			
		||||
@@ -1080,10 +1080,10 @@ pg_saslprep(const char *input, char **output)
 | 
			
		||||
	input_size = pg_utf8_string_len(input);
 | 
			
		||||
	if (input_size < 0)
 | 
			
		||||
		return SASLPREP_INVALID_UTF8;
 | 
			
		||||
	if (input_size >= MaxAllocSize / sizeof(pg_wchar))
 | 
			
		||||
	if (input_size >= MaxAllocSize / sizeof(char32_t))
 | 
			
		||||
		goto oom;
 | 
			
		||||
 | 
			
		||||
	input_chars = ALLOC((input_size + 1) * sizeof(pg_wchar));
 | 
			
		||||
	input_chars = ALLOC((input_size + 1) * sizeof(char32_t));
 | 
			
		||||
	if (!input_chars)
 | 
			
		||||
		goto oom;
 | 
			
		||||
 | 
			
		||||
@@ -1093,7 +1093,7 @@ pg_saslprep(const char *input, char **output)
 | 
			
		||||
		input_chars[i] = utf8_to_unicode(p);
 | 
			
		||||
		p += pg_utf_mblen(p);
 | 
			
		||||
	}
 | 
			
		||||
	input_chars[i] = (pg_wchar) '\0';
 | 
			
		||||
	input_chars[i] = (char32_t) '\0';
 | 
			
		||||
 | 
			
		||||
	/*
 | 
			
		||||
	 * The steps below correspond to the steps listed in [RFC3454], Section
 | 
			
		||||
@@ -1107,7 +1107,7 @@ pg_saslprep(const char *input, char **output)
 | 
			
		||||
	count = 0;
 | 
			
		||||
	for (i = 0; i < input_size; i++)
 | 
			
		||||
	{
 | 
			
		||||
		pg_wchar	code = input_chars[i];
 | 
			
		||||
		char32_t	code = input_chars[i];
 | 
			
		||||
 | 
			
		||||
		if (IS_CODE_IN_TABLE(code, non_ascii_space_ranges))
 | 
			
		||||
			input_chars[count++] = 0x0020;
 | 
			
		||||
@@ -1118,7 +1118,7 @@ pg_saslprep(const char *input, char **output)
 | 
			
		||||
		else
 | 
			
		||||
			input_chars[count++] = code;
 | 
			
		||||
	}
 | 
			
		||||
	input_chars[count] = (pg_wchar) '\0';
 | 
			
		||||
	input_chars[count] = (char32_t) '\0';
 | 
			
		||||
	input_size = count;
 | 
			
		||||
 | 
			
		||||
	if (input_size == 0)
 | 
			
		||||
@@ -1138,7 +1138,7 @@ pg_saslprep(const char *input, char **output)
 | 
			
		||||
	 */
 | 
			
		||||
	for (i = 0; i < input_size; i++)
 | 
			
		||||
	{
 | 
			
		||||
		pg_wchar	code = input_chars[i];
 | 
			
		||||
		char32_t	code = input_chars[i];
 | 
			
		||||
 | 
			
		||||
		if (IS_CODE_IN_TABLE(code, prohibited_output_ranges))
 | 
			
		||||
			goto prohibited;
 | 
			
		||||
@@ -1170,7 +1170,7 @@ pg_saslprep(const char *input, char **output)
 | 
			
		||||
	contains_RandALCat = false;
 | 
			
		||||
	for (i = 0; i < input_size; i++)
 | 
			
		||||
	{
 | 
			
		||||
		pg_wchar	code = input_chars[i];
 | 
			
		||||
		char32_t	code = input_chars[i];
 | 
			
		||||
 | 
			
		||||
		if (IS_CODE_IN_TABLE(code, RandALCat_codepoint_ranges))
 | 
			
		||||
		{
 | 
			
		||||
@@ -1181,12 +1181,12 @@ pg_saslprep(const char *input, char **output)
 | 
			
		||||
 | 
			
		||||
	if (contains_RandALCat)
 | 
			
		||||
	{
 | 
			
		||||
		pg_wchar	first = input_chars[0];
 | 
			
		||||
		pg_wchar	last = input_chars[input_size - 1];
 | 
			
		||||
		char32_t	first = input_chars[0];
 | 
			
		||||
		char32_t	last = input_chars[input_size - 1];
 | 
			
		||||
 | 
			
		||||
		for (i = 0; i < input_size; i++)
 | 
			
		||||
		{
 | 
			
		||||
			pg_wchar	code = input_chars[i];
 | 
			
		||||
			char32_t	code = input_chars[i];
 | 
			
		||||
 | 
			
		||||
			if (IS_CODE_IN_TABLE(code, LCat_codepoint_ranges))
 | 
			
		||||
				goto prohibited;
 | 
			
		||||
 
 | 
			
		||||
@@ -24,6 +24,7 @@
 | 
			
		||||
#include "common/unicode_case.h"
 | 
			
		||||
#include "common/unicode_category.h"
 | 
			
		||||
#include "common/unicode_version.h"
 | 
			
		||||
#include "mb/pg_wchar.h"
 | 
			
		||||
 | 
			
		||||
/* enough to hold largest source or result string, including NUL */
 | 
			
		||||
#define BUFSZ 256
 | 
			
		||||
@@ -54,7 +55,7 @@ initcap_wbnext(void *state)
 | 
			
		||||
	while (wbstate->offset < wbstate->len &&
 | 
			
		||||
		   wbstate->str[wbstate->offset] != '\0')
 | 
			
		||||
	{
 | 
			
		||||
		pg_wchar	u = utf8_to_unicode((unsigned char *) wbstate->str +
 | 
			
		||||
		char32_t	u = utf8_to_unicode((unsigned char *) wbstate->str +
 | 
			
		||||
										wbstate->offset);
 | 
			
		||||
		bool		curr_alnum = pg_u_isalnum(u, wbstate->posix);
 | 
			
		||||
 | 
			
		||||
@@ -77,16 +78,16 @@ initcap_wbnext(void *state)
 | 
			
		||||
#ifdef USE_ICU
 | 
			
		||||
 | 
			
		||||
static void
 | 
			
		||||
icu_test_simple(pg_wchar code)
 | 
			
		||||
icu_test_simple(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	pg_wchar	lower = unicode_lowercase_simple(code);
 | 
			
		||||
	pg_wchar	title = unicode_titlecase_simple(code);
 | 
			
		||||
	pg_wchar	upper = unicode_uppercase_simple(code);
 | 
			
		||||
	pg_wchar	fold = unicode_casefold_simple(code);
 | 
			
		||||
	pg_wchar	iculower = u_tolower(code);
 | 
			
		||||
	pg_wchar	icutitle = u_totitle(code);
 | 
			
		||||
	pg_wchar	icuupper = u_toupper(code);
 | 
			
		||||
	pg_wchar	icufold = u_foldCase(code, U_FOLD_CASE_DEFAULT);
 | 
			
		||||
	char32_t	lower = unicode_lowercase_simple(code);
 | 
			
		||||
	char32_t	title = unicode_titlecase_simple(code);
 | 
			
		||||
	char32_t	upper = unicode_uppercase_simple(code);
 | 
			
		||||
	char32_t	fold = unicode_casefold_simple(code);
 | 
			
		||||
	char32_t	iculower = u_tolower(code);
 | 
			
		||||
	char32_t	icutitle = u_totitle(code);
 | 
			
		||||
	char32_t	icuupper = u_toupper(code);
 | 
			
		||||
	char32_t	icufold = u_foldCase(code, U_FOLD_CASE_DEFAULT);
 | 
			
		||||
 | 
			
		||||
	if (lower != iculower || title != icutitle || upper != icuupper ||
 | 
			
		||||
		fold != icufold)
 | 
			
		||||
@@ -172,7 +173,7 @@ test_icu(void)
 | 
			
		||||
	int			successful = 0;
 | 
			
		||||
	int			skipped_mismatch = 0;
 | 
			
		||||
 | 
			
		||||
	for (pg_wchar code = 0; code <= 0x10ffff; code++)
 | 
			
		||||
	for (char32_t code = 0; code <= 0x10ffff; code++)
 | 
			
		||||
	{
 | 
			
		||||
		pg_unicode_category category = unicode_category(code);
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -22,6 +22,7 @@
 | 
			
		||||
 | 
			
		||||
#include "common/unicode_category.h"
 | 
			
		||||
#include "common/unicode_version.h"
 | 
			
		||||
#include "mb/pg_wchar.h"
 | 
			
		||||
 | 
			
		||||
static int	pg_unicode_version = 0;
 | 
			
		||||
#ifdef USE_ICU
 | 
			
		||||
@@ -59,7 +60,7 @@ icu_test()
 | 
			
		||||
	int			pg_skipped_codepoints = 0;
 | 
			
		||||
	int			icu_skipped_codepoints = 0;
 | 
			
		||||
 | 
			
		||||
	for (pg_wchar code = 0; code <= 0x10ffff; code++)
 | 
			
		||||
	for (char32_t code = 0; code <= 0x10ffff; code++)
 | 
			
		||||
	{
 | 
			
		||||
		uint8_t		pg_category = unicode_category(code);
 | 
			
		||||
		uint8_t		icu_category = u_charType(code);
 | 
			
		||||
 
 | 
			
		||||
@@ -47,8 +47,8 @@ print $OUTPUT <<HEADER;
 | 
			
		||||
typedef struct
 | 
			
		||||
{
 | 
			
		||||
	int			linenum;
 | 
			
		||||
	pg_wchar	input[50];
 | 
			
		||||
	pg_wchar	output[4][50];
 | 
			
		||||
	char32_t	input[50];
 | 
			
		||||
	char32_t	output[4][50];
 | 
			
		||||
} pg_unicode_test;
 | 
			
		||||
 | 
			
		||||
/* test table */
 | 
			
		||||
 
 | 
			
		||||
@@ -270,7 +270,6 @@ print $OT <<"EOS";
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include "common/unicode_case.h"
 | 
			
		||||
#include "mb/pg_wchar.h"
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * The maximum number of codepoints that can result from case mapping
 | 
			
		||||
@@ -297,7 +296,7 @@ typedef enum
 | 
			
		||||
typedef struct
 | 
			
		||||
{
 | 
			
		||||
	int16		conditions;
 | 
			
		||||
	pg_wchar	map[NCaseKind][MAX_CASE_EXPANSION];
 | 
			
		||||
	char32_t	map[NCaseKind][MAX_CASE_EXPANSION];
 | 
			
		||||
} pg_special_case;
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
@@ -430,7 +429,7 @@ foreach my $kind ('lower', 'title', 'upper', 'fold')
 | 
			
		||||
 * The entry case_map_${kind}[case_index(codepoint)] is the mapping for the
 | 
			
		||||
 * given codepoint.
 | 
			
		||||
 */
 | 
			
		||||
static const pg_wchar case_map_$kind\[$index\] =
 | 
			
		||||
static const char32_t case_map_$kind\[$index\] =
 | 
			
		||||
{
 | 
			
		||||
EOS
 | 
			
		||||
 | 
			
		||||
@@ -502,7 +501,7 @@ print $OT <<"EOS";
 | 
			
		||||
 * the offset into the mapping tables.
 | 
			
		||||
 */
 | 
			
		||||
static inline uint16
 | 
			
		||||
case_index(pg_wchar cp)
 | 
			
		||||
case_index(char32_t cp)
 | 
			
		||||
{
 | 
			
		||||
	/* Fast path for codepoints < $fastpath_limit */
 | 
			
		||||
	if (cp < $fastpath_limit)
 | 
			
		||||
 
 | 
			
		||||
@@ -366,15 +366,15 @@ print $OT <<"EOS";
 | 
			
		||||
 */
 | 
			
		||||
typedef struct
 | 
			
		||||
{
 | 
			
		||||
	uint32		first;			/* Unicode codepoint */
 | 
			
		||||
	uint32		last;			/* Unicode codepoint */
 | 
			
		||||
	char32_t	first;			/* Unicode codepoint */
 | 
			
		||||
	char32_t	last;			/* Unicode codepoint */
 | 
			
		||||
	uint8		category;		/* General Category */
 | 
			
		||||
} pg_category_range;
 | 
			
		||||
 | 
			
		||||
typedef struct
 | 
			
		||||
{
 | 
			
		||||
	uint32		first;			/* Unicode codepoint */
 | 
			
		||||
	uint32		last;			/* Unicode codepoint */
 | 
			
		||||
	char32_t	first;			/* Unicode codepoint */
 | 
			
		||||
	char32_t	last;			/* Unicode codepoint */
 | 
			
		||||
} pg_unicode_range;
 | 
			
		||||
 | 
			
		||||
typedef struct
 | 
			
		||||
 
 | 
			
		||||
@@ -20,7 +20,7 @@
 | 
			
		||||
#include "norm_test_table.h"
 | 
			
		||||
 | 
			
		||||
static char *
 | 
			
		||||
print_wchar_str(const pg_wchar *s)
 | 
			
		||||
print_wchar_str(const char32_t *s)
 | 
			
		||||
{
 | 
			
		||||
#define BUF_DIGITS 50
 | 
			
		||||
	static char buf[BUF_DIGITS * 11 + 1];
 | 
			
		||||
@@ -41,7 +41,7 @@ print_wchar_str(const pg_wchar *s)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static int
 | 
			
		||||
pg_wcscmp(const pg_wchar *s1, const pg_wchar *s2)
 | 
			
		||||
pg_wcscmp(const char32_t *s1, const char32_t *s2)
 | 
			
		||||
{
 | 
			
		||||
	for (;;)
 | 
			
		||||
	{
 | 
			
		||||
@@ -65,7 +65,7 @@ main(int argc, char **argv)
 | 
			
		||||
	{
 | 
			
		||||
		for (int form = 0; form < 4; form++)
 | 
			
		||||
		{
 | 
			
		||||
			pg_wchar   *result;
 | 
			
		||||
			char32_t   *result;
 | 
			
		||||
 | 
			
		||||
			result = unicode_normalize(form, test->input);
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -30,7 +30,7 @@ enum CaseMapResult
 | 
			
		||||
/*
 | 
			
		||||
 * Map for each case kind.
 | 
			
		||||
 */
 | 
			
		||||
static const pg_wchar *const casekind_map[NCaseKind] =
 | 
			
		||||
static const char32_t *const casekind_map[NCaseKind] =
 | 
			
		||||
{
 | 
			
		||||
	[CaseLower] = case_map_lower,
 | 
			
		||||
	[CaseTitle] = case_map_title,
 | 
			
		||||
@@ -38,42 +38,42 @@ static const pg_wchar *const casekind_map[NCaseKind] =
 | 
			
		||||
	[CaseFold] = case_map_fold,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static pg_wchar find_case_map(pg_wchar ucs, const pg_wchar *map);
 | 
			
		||||
static char32_t find_case_map(char32_t ucs, const char32_t *map);
 | 
			
		||||
static size_t convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 | 
			
		||||
						   CaseKind str_casekind, bool full, WordBoundaryNext wbnext,
 | 
			
		||||
						   void *wbstate);
 | 
			
		||||
static enum CaseMapResult casemap(pg_wchar u1, CaseKind casekind, bool full,
 | 
			
		||||
static enum CaseMapResult casemap(char32_t u1, CaseKind casekind, bool full,
 | 
			
		||||
								  const char *src, size_t srclen, size_t srcoff,
 | 
			
		||||
								  pg_wchar *simple, const pg_wchar **special);
 | 
			
		||||
								  char32_t *simple, const char32_t **special);
 | 
			
		||||
 | 
			
		||||
pg_wchar
 | 
			
		||||
unicode_lowercase_simple(pg_wchar code)
 | 
			
		||||
char32_t
 | 
			
		||||
unicode_lowercase_simple(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	pg_wchar	cp = find_case_map(code, case_map_lower);
 | 
			
		||||
	char32_t	cp = find_case_map(code, case_map_lower);
 | 
			
		||||
 | 
			
		||||
	return cp != 0 ? cp : code;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pg_wchar
 | 
			
		||||
unicode_titlecase_simple(pg_wchar code)
 | 
			
		||||
char32_t
 | 
			
		||||
unicode_titlecase_simple(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	pg_wchar	cp = find_case_map(code, case_map_title);
 | 
			
		||||
	char32_t	cp = find_case_map(code, case_map_title);
 | 
			
		||||
 | 
			
		||||
	return cp != 0 ? cp : code;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pg_wchar
 | 
			
		||||
unicode_uppercase_simple(pg_wchar code)
 | 
			
		||||
char32_t
 | 
			
		||||
unicode_uppercase_simple(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	pg_wchar	cp = find_case_map(code, case_map_upper);
 | 
			
		||||
	char32_t	cp = find_case_map(code, case_map_upper);
 | 
			
		||||
 | 
			
		||||
	return cp != 0 ? cp : code;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
pg_wchar
 | 
			
		||||
unicode_casefold_simple(pg_wchar code)
 | 
			
		||||
char32_t
 | 
			
		||||
unicode_casefold_simple(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	pg_wchar	cp = find_case_map(code, case_map_fold);
 | 
			
		||||
	char32_t	cp = find_case_map(code, case_map_fold);
 | 
			
		||||
 | 
			
		||||
	return cp != 0 ? cp : code;
 | 
			
		||||
}
 | 
			
		||||
@@ -231,10 +231,10 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 | 
			
		||||
 | 
			
		||||
	while ((srclen < 0 || srcoff < srclen) && src[srcoff] != '\0')
 | 
			
		||||
	{
 | 
			
		||||
		pg_wchar	u1 = utf8_to_unicode((unsigned char *) src + srcoff);
 | 
			
		||||
		char32_t	u1 = utf8_to_unicode((unsigned char *) src + srcoff);
 | 
			
		||||
		int			u1len = unicode_utf8len(u1);
 | 
			
		||||
		pg_wchar	simple = 0;
 | 
			
		||||
		const pg_wchar *special = NULL;
 | 
			
		||||
		char32_t	simple = 0;
 | 
			
		||||
		const char32_t *special = NULL;
 | 
			
		||||
		enum CaseMapResult casemap_result;
 | 
			
		||||
 | 
			
		||||
		if (str_casekind == CaseTitle)
 | 
			
		||||
@@ -265,8 +265,8 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 | 
			
		||||
			case CASEMAP_SIMPLE:
 | 
			
		||||
				{
 | 
			
		||||
					/* replace with single character */
 | 
			
		||||
					pg_wchar	u2 = simple;
 | 
			
		||||
					pg_wchar	u2len = unicode_utf8len(u2);
 | 
			
		||||
					char32_t	u2 = simple;
 | 
			
		||||
					char32_t	u2len = unicode_utf8len(u2);
 | 
			
		||||
 | 
			
		||||
					Assert(special == NULL);
 | 
			
		||||
					if (result_len + u2len <= dstsize)
 | 
			
		||||
@@ -280,7 +280,7 @@ convert_case(char *dst, size_t dstsize, const char *src, ssize_t srclen,
 | 
			
		||||
				Assert(simple == 0);
 | 
			
		||||
				for (int i = 0; i < MAX_CASE_EXPANSION && special[i]; i++)
 | 
			
		||||
				{
 | 
			
		||||
					pg_wchar	u2 = special[i];
 | 
			
		||||
					char32_t	u2 = special[i];
 | 
			
		||||
					size_t		u2len = unicode_utf8len(u2);
 | 
			
		||||
 | 
			
		||||
					if (result_len + u2len <= dstsize)
 | 
			
		||||
@@ -320,7 +320,7 @@ check_final_sigma(const unsigned char *str, size_t len, size_t offset)
 | 
			
		||||
	{
 | 
			
		||||
		if ((str[i] & 0x80) == 0 || (str[i] & 0xC0) == 0xC0)
 | 
			
		||||
		{
 | 
			
		||||
			pg_wchar	curr = utf8_to_unicode(str + i);
 | 
			
		||||
			char32_t	curr = utf8_to_unicode(str + i);
 | 
			
		||||
 | 
			
		||||
			if (pg_u_prop_case_ignorable(curr))
 | 
			
		||||
				continue;
 | 
			
		||||
@@ -344,7 +344,7 @@ check_final_sigma(const unsigned char *str, size_t len, size_t offset)
 | 
			
		||||
	{
 | 
			
		||||
		if ((str[i] & 0x80) == 0 || (str[i] & 0xC0) == 0xC0)
 | 
			
		||||
		{
 | 
			
		||||
			pg_wchar	curr = utf8_to_unicode(str + i);
 | 
			
		||||
			char32_t	curr = utf8_to_unicode(str + i);
 | 
			
		||||
 | 
			
		||||
			if (pg_u_prop_case_ignorable(curr))
 | 
			
		||||
				continue;
 | 
			
		||||
@@ -394,9 +394,9 @@ check_special_conditions(int conditions, const char *str, size_t len,
 | 
			
		||||
 * character without modification.
 | 
			
		||||
 */
 | 
			
		||||
static enum CaseMapResult
 | 
			
		||||
casemap(pg_wchar u1, CaseKind casekind, bool full,
 | 
			
		||||
casemap(char32_t u1, CaseKind casekind, bool full,
 | 
			
		||||
		const char *src, size_t srclen, size_t srcoff,
 | 
			
		||||
		pg_wchar *simple, const pg_wchar **special)
 | 
			
		||||
		char32_t *simple, const char32_t **special)
 | 
			
		||||
{
 | 
			
		||||
	uint16		idx;
 | 
			
		||||
 | 
			
		||||
@@ -434,8 +434,8 @@ casemap(pg_wchar u1, CaseKind casekind, bool full,
 | 
			
		||||
 * Find entry in simple case map.
 | 
			
		||||
 * If the entry does not exist, 0 will be returned.
 | 
			
		||||
 */
 | 
			
		||||
static pg_wchar
 | 
			
		||||
find_case_map(pg_wchar ucs, const pg_wchar *map)
 | 
			
		||||
static char32_t
 | 
			
		||||
find_case_map(char32_t ucs, const char32_t *map)
 | 
			
		||||
{
 | 
			
		||||
	/* Fast path for codepoints < 0x80 */
 | 
			
		||||
	if (ucs < 0x80)
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
/*-------------------------------------------------------------------------
 | 
			
		||||
 * unicode_category.c
 | 
			
		||||
 *		Determine general category and character properties of Unicode
 | 
			
		||||
 *		characters. Encoding must be UTF8, where we assume that the pg_wchar
 | 
			
		||||
 *		characters. Encoding must be UTF8, where we assume that the char32_t
 | 
			
		||||
 *		representation is a code point.
 | 
			
		||||
 *
 | 
			
		||||
 * Portions Copyright (c) 2017-2025, PostgreSQL Global Development Group
 | 
			
		||||
@@ -76,13 +76,13 @@
 | 
			
		||||
#define PG_U_CHARACTER_TAB	0x09
 | 
			
		||||
 | 
			
		||||
static bool range_search(const pg_unicode_range *tbl, size_t size,
 | 
			
		||||
						 pg_wchar code);
 | 
			
		||||
						 char32_t code);
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Unicode general category for the given codepoint.
 | 
			
		||||
 */
 | 
			
		||||
pg_unicode_category
 | 
			
		||||
unicode_category(pg_wchar code)
 | 
			
		||||
unicode_category(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	int			min = 0;
 | 
			
		||||
	int			mid;
 | 
			
		||||
@@ -108,7 +108,7 @@ unicode_category(pg_wchar code)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_prop_alphabetic(pg_wchar code)
 | 
			
		||||
pg_u_prop_alphabetic(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	if (code < 0x80)
 | 
			
		||||
		return unicode_opt_ascii[code].properties & PG_U_PROP_ALPHABETIC;
 | 
			
		||||
@@ -119,7 +119,7 @@ pg_u_prop_alphabetic(pg_wchar code)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_prop_lowercase(pg_wchar code)
 | 
			
		||||
pg_u_prop_lowercase(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	if (code < 0x80)
 | 
			
		||||
		return unicode_opt_ascii[code].properties & PG_U_PROP_LOWERCASE;
 | 
			
		||||
@@ -130,7 +130,7 @@ pg_u_prop_lowercase(pg_wchar code)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_prop_uppercase(pg_wchar code)
 | 
			
		||||
pg_u_prop_uppercase(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	if (code < 0x80)
 | 
			
		||||
		return unicode_opt_ascii[code].properties & PG_U_PROP_UPPERCASE;
 | 
			
		||||
@@ -141,7 +141,7 @@ pg_u_prop_uppercase(pg_wchar code)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_prop_cased(pg_wchar code)
 | 
			
		||||
pg_u_prop_cased(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	uint32		category_mask;
 | 
			
		||||
 | 
			
		||||
@@ -156,7 +156,7 @@ pg_u_prop_cased(pg_wchar code)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_prop_case_ignorable(pg_wchar code)
 | 
			
		||||
pg_u_prop_case_ignorable(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	if (code < 0x80)
 | 
			
		||||
		return unicode_opt_ascii[code].properties & PG_U_PROP_CASE_IGNORABLE;
 | 
			
		||||
@@ -167,7 +167,7 @@ pg_u_prop_case_ignorable(pg_wchar code)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_prop_white_space(pg_wchar code)
 | 
			
		||||
pg_u_prop_white_space(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	if (code < 0x80)
 | 
			
		||||
		return unicode_opt_ascii[code].properties & PG_U_PROP_WHITE_SPACE;
 | 
			
		||||
@@ -178,7 +178,7 @@ pg_u_prop_white_space(pg_wchar code)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_prop_hex_digit(pg_wchar code)
 | 
			
		||||
pg_u_prop_hex_digit(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	if (code < 0x80)
 | 
			
		||||
		return unicode_opt_ascii[code].properties & PG_U_PROP_HEX_DIGIT;
 | 
			
		||||
@@ -189,7 +189,7 @@ pg_u_prop_hex_digit(pg_wchar code)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_prop_join_control(pg_wchar code)
 | 
			
		||||
pg_u_prop_join_control(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	if (code < 0x80)
 | 
			
		||||
		return unicode_opt_ascii[code].properties & PG_U_PROP_JOIN_CONTROL;
 | 
			
		||||
@@ -208,7 +208,7 @@ pg_u_prop_join_control(pg_wchar code)
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_isdigit(pg_wchar code, bool posix)
 | 
			
		||||
pg_u_isdigit(char32_t code, bool posix)
 | 
			
		||||
{
 | 
			
		||||
	if (posix)
 | 
			
		||||
		return ('0' <= code && code <= '9');
 | 
			
		||||
@@ -217,19 +217,19 @@ pg_u_isdigit(pg_wchar code, bool posix)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_isalpha(pg_wchar code)
 | 
			
		||||
pg_u_isalpha(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	return pg_u_prop_alphabetic(code);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_isalnum(pg_wchar code, bool posix)
 | 
			
		||||
pg_u_isalnum(char32_t code, bool posix)
 | 
			
		||||
{
 | 
			
		||||
	return pg_u_isalpha(code) || pg_u_isdigit(code, posix);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_isword(pg_wchar code)
 | 
			
		||||
pg_u_isword(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	uint32		category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
 | 
			
		||||
 | 
			
		||||
@@ -240,32 +240,32 @@ pg_u_isword(pg_wchar code)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_isupper(pg_wchar code)
 | 
			
		||||
pg_u_isupper(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	return pg_u_prop_uppercase(code);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_islower(pg_wchar code)
 | 
			
		||||
pg_u_islower(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	return pg_u_prop_lowercase(code);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_isblank(pg_wchar code)
 | 
			
		||||
pg_u_isblank(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	return code == PG_U_CHARACTER_TAB ||
 | 
			
		||||
		unicode_category(code) == PG_U_SPACE_SEPARATOR;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_iscntrl(pg_wchar code)
 | 
			
		||||
pg_u_iscntrl(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	return unicode_category(code) == PG_U_CONTROL;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_isgraph(pg_wchar code)
 | 
			
		||||
pg_u_isgraph(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	uint32		category_mask = PG_U_CATEGORY_MASK(unicode_category(code));
 | 
			
		||||
 | 
			
		||||
@@ -276,7 +276,7 @@ pg_u_isgraph(pg_wchar code)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_isprint(pg_wchar code)
 | 
			
		||||
pg_u_isprint(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	pg_unicode_category category = unicode_category(code);
 | 
			
		||||
 | 
			
		||||
@@ -287,7 +287,7 @@ pg_u_isprint(pg_wchar code)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_ispunct(pg_wchar code, bool posix)
 | 
			
		||||
pg_u_ispunct(char32_t code, bool posix)
 | 
			
		||||
{
 | 
			
		||||
	uint32		category_mask;
 | 
			
		||||
 | 
			
		||||
@@ -308,13 +308,13 @@ pg_u_ispunct(pg_wchar code, bool posix)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_isspace(pg_wchar code)
 | 
			
		||||
pg_u_isspace(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	return pg_u_prop_white_space(code);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool
 | 
			
		||||
pg_u_isxdigit(pg_wchar code, bool posix)
 | 
			
		||||
pg_u_isxdigit(char32_t code, bool posix)
 | 
			
		||||
{
 | 
			
		||||
	if (posix)
 | 
			
		||||
		return (('0' <= code && code <= '9') ||
 | 
			
		||||
@@ -478,7 +478,7 @@ unicode_category_abbrev(pg_unicode_category category)
 | 
			
		||||
 * given table.
 | 
			
		||||
 */
 | 
			
		||||
static bool
 | 
			
		||||
range_search(const pg_unicode_range *tbl, size_t size, pg_wchar code)
 | 
			
		||||
range_search(const pg_unicode_range *tbl, size_t size, char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	int			min = 0;
 | 
			
		||||
	int			mid;
 | 
			
		||||
 
 | 
			
		||||
@@ -69,7 +69,7 @@ conv_compare(const void *p1, const void *p2)
 | 
			
		||||
 * lookup, while the frontend version uses a binary search.
 | 
			
		||||
 */
 | 
			
		||||
static const pg_unicode_decomposition *
 | 
			
		||||
get_code_entry(pg_wchar code)
 | 
			
		||||
get_code_entry(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
#ifndef FRONTEND
 | 
			
		||||
	int			h;
 | 
			
		||||
@@ -109,7 +109,7 @@ get_code_entry(pg_wchar code)
 | 
			
		||||
 * Get the combining class of the given codepoint.
 | 
			
		||||
 */
 | 
			
		||||
static uint8
 | 
			
		||||
get_canonical_class(pg_wchar code)
 | 
			
		||||
get_canonical_class(char32_t code)
 | 
			
		||||
{
 | 
			
		||||
	const pg_unicode_decomposition *entry = get_code_entry(code);
 | 
			
		||||
 | 
			
		||||
@@ -130,15 +130,15 @@ get_canonical_class(pg_wchar code)
 | 
			
		||||
 * Note: the returned pointer can point to statically allocated buffer, and
 | 
			
		||||
 * is only valid until next call to this function!
 | 
			
		||||
 */
 | 
			
		||||
static const pg_wchar *
 | 
			
		||||
static const char32_t *
 | 
			
		||||
get_code_decomposition(const pg_unicode_decomposition *entry, int *dec_size)
 | 
			
		||||
{
 | 
			
		||||
	static pg_wchar x;
 | 
			
		||||
	static char32_t x;
 | 
			
		||||
 | 
			
		||||
	if (DECOMPOSITION_IS_INLINE(entry))
 | 
			
		||||
	{
 | 
			
		||||
		Assert(DECOMPOSITION_SIZE(entry) == 1);
 | 
			
		||||
		x = (pg_wchar) entry->dec_index;
 | 
			
		||||
		x = (char32_t) entry->dec_index;
 | 
			
		||||
		*dec_size = 1;
 | 
			
		||||
		return &x;
 | 
			
		||||
	}
 | 
			
		||||
@@ -156,7 +156,7 @@ get_code_decomposition(const pg_unicode_decomposition *entry, int *dec_size)
 | 
			
		||||
 * are, in turn, decomposable.
 | 
			
		||||
 */
 | 
			
		||||
static int
 | 
			
		||||
get_decomposed_size(pg_wchar code, bool compat)
 | 
			
		||||
get_decomposed_size(char32_t code, bool compat)
 | 
			
		||||
{
 | 
			
		||||
	const pg_unicode_decomposition *entry;
 | 
			
		||||
	int			size = 0;
 | 
			
		||||
@@ -318,7 +318,7 @@ recompose_code(uint32 start, uint32 code, uint32 *result)
 | 
			
		||||
 * in the array result.
 | 
			
		||||
 */
 | 
			
		||||
static void
 | 
			
		||||
decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)
 | 
			
		||||
decompose_code(char32_t code, bool compat, char32_t **result, int *current)
 | 
			
		||||
{
 | 
			
		||||
	const pg_unicode_decomposition *entry;
 | 
			
		||||
	int			i;
 | 
			
		||||
@@ -337,7 +337,7 @@ decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)
 | 
			
		||||
					v,
 | 
			
		||||
					tindex,
 | 
			
		||||
					sindex;
 | 
			
		||||
		pg_wchar   *res = *result;
 | 
			
		||||
		char32_t   *res = *result;
 | 
			
		||||
 | 
			
		||||
		sindex = code - SBASE;
 | 
			
		||||
		l = LBASE + sindex / (VCOUNT * TCOUNT);
 | 
			
		||||
@@ -369,7 +369,7 @@ decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)
 | 
			
		||||
	if (entry == NULL || DECOMPOSITION_SIZE(entry) == 0 ||
 | 
			
		||||
		(!compat && DECOMPOSITION_IS_COMPAT(entry)))
 | 
			
		||||
	{
 | 
			
		||||
		pg_wchar   *res = *result;
 | 
			
		||||
		char32_t   *res = *result;
 | 
			
		||||
 | 
			
		||||
		res[*current] = code;
 | 
			
		||||
		(*current)++;
 | 
			
		||||
@@ -382,7 +382,7 @@ decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)
 | 
			
		||||
	decomp = get_code_decomposition(entry, &dec_size);
 | 
			
		||||
	for (i = 0; i < dec_size; i++)
 | 
			
		||||
	{
 | 
			
		||||
		pg_wchar	lcode = (pg_wchar) decomp[i];
 | 
			
		||||
		char32_t	lcode = (char32_t) decomp[i];
 | 
			
		||||
 | 
			
		||||
		/* Leave if no more decompositions */
 | 
			
		||||
		decompose_code(lcode, compat, result, current);
 | 
			
		||||
@@ -398,17 +398,17 @@ decompose_code(pg_wchar code, bool compat, pg_wchar **result, int *current)
 | 
			
		||||
 * malloc. Or NULL if we run out of memory. In backend, the returned
 | 
			
		||||
 * string is palloc'd instead, and OOM is reported with ereport().
 | 
			
		||||
 */
 | 
			
		||||
pg_wchar *
 | 
			
		||||
unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
 | 
			
		||||
char32_t *
 | 
			
		||||
unicode_normalize(UnicodeNormalizationForm form, const char32_t *input)
 | 
			
		||||
{
 | 
			
		||||
	bool		compat = (form == UNICODE_NFKC || form == UNICODE_NFKD);
 | 
			
		||||
	bool		recompose = (form == UNICODE_NFC || form == UNICODE_NFKC);
 | 
			
		||||
	pg_wchar   *decomp_chars;
 | 
			
		||||
	pg_wchar   *recomp_chars;
 | 
			
		||||
	char32_t   *decomp_chars;
 | 
			
		||||
	char32_t   *recomp_chars;
 | 
			
		||||
	int			decomp_size,
 | 
			
		||||
				current_size;
 | 
			
		||||
	int			count;
 | 
			
		||||
	const pg_wchar *p;
 | 
			
		||||
	const char32_t *p;
 | 
			
		||||
 | 
			
		||||
	/* variables for recomposition */
 | 
			
		||||
	int			last_class;
 | 
			
		||||
@@ -425,7 +425,7 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
 | 
			
		||||
	for (p = input; *p; p++)
 | 
			
		||||
		decomp_size += get_decomposed_size(*p, compat);
 | 
			
		||||
 | 
			
		||||
	decomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar));
 | 
			
		||||
	decomp_chars = (char32_t *) ALLOC((decomp_size + 1) * sizeof(char32_t));
 | 
			
		||||
	if (decomp_chars == NULL)
 | 
			
		||||
		return NULL;
 | 
			
		||||
 | 
			
		||||
@@ -448,9 +448,9 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
 | 
			
		||||
	 */
 | 
			
		||||
	for (count = 1; count < decomp_size; count++)
 | 
			
		||||
	{
 | 
			
		||||
		pg_wchar	prev = decomp_chars[count - 1];
 | 
			
		||||
		pg_wchar	next = decomp_chars[count];
 | 
			
		||||
		pg_wchar	tmp;
 | 
			
		||||
		char32_t	prev = decomp_chars[count - 1];
 | 
			
		||||
		char32_t	next = decomp_chars[count];
 | 
			
		||||
		char32_t	tmp;
 | 
			
		||||
		const uint8 prevClass = get_canonical_class(prev);
 | 
			
		||||
		const uint8 nextClass = get_canonical_class(next);
 | 
			
		||||
 | 
			
		||||
@@ -487,7 +487,7 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
 | 
			
		||||
	 * longer than the decomposed one, so make the allocation of the output
 | 
			
		||||
	 * string based on that assumption.
 | 
			
		||||
	 */
 | 
			
		||||
	recomp_chars = (pg_wchar *) ALLOC((decomp_size + 1) * sizeof(pg_wchar));
 | 
			
		||||
	recomp_chars = (char32_t *) ALLOC((decomp_size + 1) * sizeof(char32_t));
 | 
			
		||||
	if (!recomp_chars)
 | 
			
		||||
	{
 | 
			
		||||
		FREE(decomp_chars);
 | 
			
		||||
@@ -501,9 +501,9 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
 | 
			
		||||
 | 
			
		||||
	for (count = 1; count < decomp_size; count++)
 | 
			
		||||
	{
 | 
			
		||||
		pg_wchar	ch = decomp_chars[count];
 | 
			
		||||
		char32_t	ch = decomp_chars[count];
 | 
			
		||||
		int			ch_class = get_canonical_class(ch);
 | 
			
		||||
		pg_wchar	composite;
 | 
			
		||||
		char32_t	composite;
 | 
			
		||||
 | 
			
		||||
		if (last_class < ch_class &&
 | 
			
		||||
			recompose_code(starter_ch, ch, &composite))
 | 
			
		||||
@@ -524,7 +524,7 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
 | 
			
		||||
			recomp_chars[target_pos++] = ch;
 | 
			
		||||
		}
 | 
			
		||||
	}
 | 
			
		||||
	recomp_chars[target_pos] = (pg_wchar) '\0';
 | 
			
		||||
	recomp_chars[target_pos] = (char32_t) '\0';
 | 
			
		||||
 | 
			
		||||
	FREE(decomp_chars);
 | 
			
		||||
 | 
			
		||||
@@ -540,7 +540,7 @@ unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input)
 | 
			
		||||
#ifndef FRONTEND
 | 
			
		||||
 | 
			
		||||
static const pg_unicode_normprops *
 | 
			
		||||
qc_hash_lookup(pg_wchar ch, const pg_unicode_norminfo *norminfo)
 | 
			
		||||
qc_hash_lookup(char32_t ch, const pg_unicode_norminfo *norminfo)
 | 
			
		||||
{
 | 
			
		||||
	int			h;
 | 
			
		||||
	uint32		hashkey;
 | 
			
		||||
@@ -571,7 +571,7 @@ qc_hash_lookup(pg_wchar ch, const pg_unicode_norminfo *norminfo)
 | 
			
		||||
 * Look up the normalization quick check character property
 | 
			
		||||
 */
 | 
			
		||||
static UnicodeNormalizationQC
 | 
			
		||||
qc_is_allowed(UnicodeNormalizationForm form, pg_wchar ch)
 | 
			
		||||
qc_is_allowed(UnicodeNormalizationForm form, char32_t ch)
 | 
			
		||||
{
 | 
			
		||||
	const pg_unicode_normprops *found = NULL;
 | 
			
		||||
 | 
			
		||||
@@ -595,7 +595,7 @@ qc_is_allowed(UnicodeNormalizationForm form, pg_wchar ch)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
UnicodeNormalizationQC
 | 
			
		||||
unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *input)
 | 
			
		||||
unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const char32_t *input)
 | 
			
		||||
{
 | 
			
		||||
	uint8		lastCanonicalClass = 0;
 | 
			
		||||
	UnicodeNormalizationQC result = UNICODE_NORM_QC_YES;
 | 
			
		||||
@@ -610,9 +610,9 @@ unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *
 | 
			
		||||
	if (form == UNICODE_NFD || form == UNICODE_NFKD)
 | 
			
		||||
		return UNICODE_NORM_QC_MAYBE;
 | 
			
		||||
 | 
			
		||||
	for (const pg_wchar *p = input; *p; p++)
 | 
			
		||||
	for (const char32_t *p = input; *p; p++)
 | 
			
		||||
	{
 | 
			
		||||
		pg_wchar	ch = *p;
 | 
			
		||||
		char32_t	ch = *p;
 | 
			
		||||
		uint8		canonicalClass;
 | 
			
		||||
		UnicodeNormalizationQC check;
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -49,20 +49,20 @@ pg_get_utf8_id(void)
 | 
			
		||||
 *
 | 
			
		||||
 * No error checks here, c must point to a long-enough string.
 | 
			
		||||
 */
 | 
			
		||||
static pg_wchar
 | 
			
		||||
static char32_t
 | 
			
		||||
utf8_to_unicode(const unsigned char *c)
 | 
			
		||||
{
 | 
			
		||||
	if ((*c & 0x80) == 0)
 | 
			
		||||
		return (pg_wchar) c[0];
 | 
			
		||||
		return (char32_t) c[0];
 | 
			
		||||
	else if ((*c & 0xe0) == 0xc0)
 | 
			
		||||
		return (pg_wchar) (((c[0] & 0x1f) << 6) |
 | 
			
		||||
		return (char32_t) (((c[0] & 0x1f) << 6) |
 | 
			
		||||
						   (c[1] & 0x3f));
 | 
			
		||||
	else if ((*c & 0xf0) == 0xe0)
 | 
			
		||||
		return (pg_wchar) (((c[0] & 0x0f) << 12) |
 | 
			
		||||
		return (char32_t) (((c[0] & 0x0f) << 12) |
 | 
			
		||||
						   ((c[1] & 0x3f) << 6) |
 | 
			
		||||
						   (c[2] & 0x3f));
 | 
			
		||||
	else if ((*c & 0xf8) == 0xf0)
 | 
			
		||||
		return (pg_wchar) (((c[0] & 0x07) << 18) |
 | 
			
		||||
		return (char32_t) (((c[0] & 0x07) << 18) |
 | 
			
		||||
						   ((c[1] & 0x3f) << 12) |
 | 
			
		||||
						   ((c[2] & 0x3f) << 6) |
 | 
			
		||||
						   (c[3] & 0x3f));
 | 
			
		||||
 
 | 
			
		||||
@@ -1376,6 +1376,29 @@ typedef intptr_t sigjmp_buf[5];
 | 
			
		||||
/* /port compatibility functions */
 | 
			
		||||
#include "port.h"
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * char16_t and char32_t
 | 
			
		||||
 *      Unicode code points.
 | 
			
		||||
 *
 | 
			
		||||
 * uchar.h should always be available in C11, but it's not available on
 | 
			
		||||
 * Mac. However, these types are keywords in C++11, so when using C++, we
 | 
			
		||||
 * can't redefine the types.
 | 
			
		||||
 *
 | 
			
		||||
 * XXX: when uchar.h is available everywhere, we can remove this check and
 | 
			
		||||
 * just include uchar.h unconditionally.
 | 
			
		||||
 *
 | 
			
		||||
 * XXX: this section is out of place because uchar.h needs to be included
 | 
			
		||||
 * after port.h, due to an interaction with win32_port.h in some cases.
 | 
			
		||||
 */
 | 
			
		||||
#ifdef HAVE_UCHAR_H
 | 
			
		||||
#include <uchar.h>
 | 
			
		||||
#else
 | 
			
		||||
#ifndef __cplusplus
 | 
			
		||||
typedef uint16_t char16_t;
 | 
			
		||||
typedef uint32_t char32_t;
 | 
			
		||||
#endif
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
/* IWYU pragma: end_exports */
 | 
			
		||||
 | 
			
		||||
#endif							/* C_H */
 | 
			
		||||
 
 | 
			
		||||
@@ -14,14 +14,12 @@
 | 
			
		||||
#ifndef UNICODE_CASE_H
 | 
			
		||||
#define UNICODE_CASE_H
 | 
			
		||||
 | 
			
		||||
#include "mb/pg_wchar.h"
 | 
			
		||||
 | 
			
		||||
typedef size_t (*WordBoundaryNext) (void *wbstate);
 | 
			
		||||
 | 
			
		||||
pg_wchar	unicode_lowercase_simple(pg_wchar code);
 | 
			
		||||
pg_wchar	unicode_titlecase_simple(pg_wchar code);
 | 
			
		||||
pg_wchar	unicode_uppercase_simple(pg_wchar code);
 | 
			
		||||
pg_wchar	unicode_casefold_simple(pg_wchar code);
 | 
			
		||||
char32_t	unicode_lowercase_simple(char32_t code);
 | 
			
		||||
char32_t	unicode_titlecase_simple(char32_t code);
 | 
			
		||||
char32_t	unicode_uppercase_simple(char32_t code);
 | 
			
		||||
char32_t	unicode_casefold_simple(char32_t code);
 | 
			
		||||
size_t		unicode_strlower(char *dst, size_t dstsize, const char *src,
 | 
			
		||||
							 ssize_t srclen, bool full);
 | 
			
		||||
size_t		unicode_strtitle(char *dst, size_t dstsize, const char *src,
 | 
			
		||||
 
 | 
			
		||||
@@ -18,7 +18,6 @@
 | 
			
		||||
 */
 | 
			
		||||
 | 
			
		||||
#include "common/unicode_case.h"
 | 
			
		||||
#include "mb/pg_wchar.h"
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * The maximum number of codepoints that can result from case mapping
 | 
			
		||||
@@ -45,7 +44,7 @@ typedef enum
 | 
			
		||||
typedef struct
 | 
			
		||||
{
 | 
			
		||||
	int16		conditions;
 | 
			
		||||
	pg_wchar	map[NCaseKind][MAX_CASE_EXPANSION];
 | 
			
		||||
	char32_t	map[NCaseKind][MAX_CASE_EXPANSION];
 | 
			
		||||
} pg_special_case;
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
@@ -166,7 +165,7 @@ static const pg_special_case special_case[106] =
 | 
			
		||||
 * The entry case_map_lower[case_index(codepoint)] is the mapping for the
 | 
			
		||||
 * given codepoint.
 | 
			
		||||
 */
 | 
			
		||||
static const pg_wchar case_map_lower[1704] =
 | 
			
		||||
static const char32_t case_map_lower[1704] =
 | 
			
		||||
{
 | 
			
		||||
	0x000000,					/* reserved */
 | 
			
		||||
	0x000000,					/* U+000000 */
 | 
			
		||||
@@ -1879,7 +1878,7 @@ static const pg_wchar case_map_lower[1704] =
 | 
			
		||||
 * The entry case_map_title[case_index(codepoint)] is the mapping for the
 | 
			
		||||
 * given codepoint.
 | 
			
		||||
 */
 | 
			
		||||
static const pg_wchar case_map_title[1704] =
 | 
			
		||||
static const char32_t case_map_title[1704] =
 | 
			
		||||
{
 | 
			
		||||
	0x000000,					/* reserved */
 | 
			
		||||
	0x000000,					/* U+000000 */
 | 
			
		||||
@@ -3592,7 +3591,7 @@ static const pg_wchar case_map_title[1704] =
 | 
			
		||||
 * The entry case_map_upper[case_index(codepoint)] is the mapping for the
 | 
			
		||||
 * given codepoint.
 | 
			
		||||
 */
 | 
			
		||||
static const pg_wchar case_map_upper[1704] =
 | 
			
		||||
static const char32_t case_map_upper[1704] =
 | 
			
		||||
{
 | 
			
		||||
	0x000000,					/* reserved */
 | 
			
		||||
	0x000000,					/* U+000000 */
 | 
			
		||||
@@ -5305,7 +5304,7 @@ static const pg_wchar case_map_upper[1704] =
 | 
			
		||||
 * The entry case_map_fold[case_index(codepoint)] is the mapping for the
 | 
			
		||||
 * given codepoint.
 | 
			
		||||
 */
 | 
			
		||||
static const pg_wchar case_map_fold[1704] =
 | 
			
		||||
static const char32_t case_map_fold[1704] =
 | 
			
		||||
{
 | 
			
		||||
	0x000000,					/* reserved */
 | 
			
		||||
	0x000000,					/* U+000000 */
 | 
			
		||||
@@ -13522,7 +13521,7 @@ static const uint16 case_map[4778] =
 | 
			
		||||
 * the offset into the mapping tables.
 | 
			
		||||
 */
 | 
			
		||||
static inline uint16
 | 
			
		||||
case_index(pg_wchar cp)
 | 
			
		||||
case_index(char32_t cp)
 | 
			
		||||
{
 | 
			
		||||
	/* Fast path for codepoints < 0x0588 */
 | 
			
		||||
	if (cp < 0x0588)
 | 
			
		||||
 
 | 
			
		||||
@@ -14,8 +14,6 @@
 | 
			
		||||
#ifndef UNICODE_CATEGORY_H
 | 
			
		||||
#define UNICODE_CATEGORY_H
 | 
			
		||||
 | 
			
		||||
#include "mb/pg_wchar.h"
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
 * Unicode General Category Values
 | 
			
		||||
 *
 | 
			
		||||
@@ -61,31 +59,31 @@ typedef enum pg_unicode_category
 | 
			
		||||
	PG_U_FINAL_PUNCTUATION = 29 /* Pf */
 | 
			
		||||
} pg_unicode_category;
 | 
			
		||||
 | 
			
		||||
extern pg_unicode_category unicode_category(pg_wchar code);
 | 
			
		||||
extern pg_unicode_category unicode_category(char32_t code);
 | 
			
		||||
extern const char *unicode_category_string(pg_unicode_category category);
 | 
			
		||||
extern const char *unicode_category_abbrev(pg_unicode_category category);
 | 
			
		||||
 | 
			
		||||
extern bool pg_u_prop_alphabetic(pg_wchar code);
 | 
			
		||||
extern bool pg_u_prop_lowercase(pg_wchar code);
 | 
			
		||||
extern bool pg_u_prop_uppercase(pg_wchar code);
 | 
			
		||||
extern bool pg_u_prop_cased(pg_wchar code);
 | 
			
		||||
extern bool pg_u_prop_case_ignorable(pg_wchar code);
 | 
			
		||||
extern bool pg_u_prop_white_space(pg_wchar code);
 | 
			
		||||
extern bool pg_u_prop_hex_digit(pg_wchar code);
 | 
			
		||||
extern bool pg_u_prop_join_control(pg_wchar code);
 | 
			
		||||
extern bool pg_u_prop_alphabetic(char32_t code);
 | 
			
		||||
extern bool pg_u_prop_lowercase(char32_t code);
 | 
			
		||||
extern bool pg_u_prop_uppercase(char32_t code);
 | 
			
		||||
extern bool pg_u_prop_cased(char32_t code);
 | 
			
		||||
extern bool pg_u_prop_case_ignorable(char32_t code);
 | 
			
		||||
extern bool pg_u_prop_white_space(char32_t code);
 | 
			
		||||
extern bool pg_u_prop_hex_digit(char32_t code);
 | 
			
		||||
extern bool pg_u_prop_join_control(char32_t code);
 | 
			
		||||
 | 
			
		||||
extern bool pg_u_isdigit(pg_wchar code, bool posix);
 | 
			
		||||
extern bool pg_u_isalpha(pg_wchar code);
 | 
			
		||||
extern bool pg_u_isalnum(pg_wchar code, bool posix);
 | 
			
		||||
extern bool pg_u_isword(pg_wchar code);
 | 
			
		||||
extern bool pg_u_isupper(pg_wchar code);
 | 
			
		||||
extern bool pg_u_islower(pg_wchar code);
 | 
			
		||||
extern bool pg_u_isblank(pg_wchar code);
 | 
			
		||||
extern bool pg_u_iscntrl(pg_wchar code);
 | 
			
		||||
extern bool pg_u_isgraph(pg_wchar code);
 | 
			
		||||
extern bool pg_u_isprint(pg_wchar code);
 | 
			
		||||
extern bool pg_u_ispunct(pg_wchar code, bool posix);
 | 
			
		||||
extern bool pg_u_isspace(pg_wchar code);
 | 
			
		||||
extern bool pg_u_isxdigit(pg_wchar code, bool posix);
 | 
			
		||||
extern bool pg_u_isdigit(char32_t code, bool posix);
 | 
			
		||||
extern bool pg_u_isalpha(char32_t code);
 | 
			
		||||
extern bool pg_u_isalnum(char32_t code, bool posix);
 | 
			
		||||
extern bool pg_u_isword(char32_t code);
 | 
			
		||||
extern bool pg_u_isupper(char32_t code);
 | 
			
		||||
extern bool pg_u_islower(char32_t code);
 | 
			
		||||
extern bool pg_u_isblank(char32_t code);
 | 
			
		||||
extern bool pg_u_iscntrl(char32_t code);
 | 
			
		||||
extern bool pg_u_isgraph(char32_t code);
 | 
			
		||||
extern bool pg_u_isprint(char32_t code);
 | 
			
		||||
extern bool pg_u_ispunct(char32_t code, bool posix);
 | 
			
		||||
extern bool pg_u_isspace(char32_t code);
 | 
			
		||||
extern bool pg_u_isxdigit(char32_t code, bool posix);
 | 
			
		||||
 | 
			
		||||
#endif							/* UNICODE_CATEGORY_H */
 | 
			
		||||
 
 | 
			
		||||
@@ -20,15 +20,15 @@
 | 
			
		||||
 */
 | 
			
		||||
typedef struct
 | 
			
		||||
{
 | 
			
		||||
	uint32		first;			/* Unicode codepoint */
 | 
			
		||||
	uint32		last;			/* Unicode codepoint */
 | 
			
		||||
	char32_t	first;			/* Unicode codepoint */
 | 
			
		||||
	char32_t	last;			/* Unicode codepoint */
 | 
			
		||||
	uint8		category;		/* General Category */
 | 
			
		||||
} pg_category_range;
 | 
			
		||||
 | 
			
		||||
typedef struct
 | 
			
		||||
{
 | 
			
		||||
	uint32		first;			/* Unicode codepoint */
 | 
			
		||||
	uint32		last;			/* Unicode codepoint */
 | 
			
		||||
	char32_t	first;			/* Unicode codepoint */
 | 
			
		||||
	char32_t	last;			/* Unicode codepoint */
 | 
			
		||||
} pg_unicode_range;
 | 
			
		||||
 | 
			
		||||
typedef struct
 | 
			
		||||
 
 | 
			
		||||
@@ -14,8 +14,6 @@
 | 
			
		||||
#ifndef UNICODE_NORM_H
 | 
			
		||||
#define UNICODE_NORM_H
 | 
			
		||||
 | 
			
		||||
#include "mb/pg_wchar.h"
 | 
			
		||||
 | 
			
		||||
typedef enum
 | 
			
		||||
{
 | 
			
		||||
	UNICODE_NFC = 0,
 | 
			
		||||
@@ -32,8 +30,8 @@ typedef enum
 | 
			
		||||
	UNICODE_NORM_QC_MAYBE = -1,
 | 
			
		||||
} UnicodeNormalizationQC;
 | 
			
		||||
 | 
			
		||||
extern pg_wchar *unicode_normalize(UnicodeNormalizationForm form, const pg_wchar *input);
 | 
			
		||||
extern char32_t *unicode_normalize(UnicodeNormalizationForm form, const char32_t *input);
 | 
			
		||||
 | 
			
		||||
extern UnicodeNormalizationQC unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const pg_wchar *input);
 | 
			
		||||
extern UnicodeNormalizationQC unicode_is_normalized_quickcheck(UnicodeNormalizationForm form, const char32_t *input);
 | 
			
		||||
 | 
			
		||||
#endif							/* UNICODE_NORM_H */
 | 
			
		||||
 
 | 
			
		||||
@@ -532,25 +532,25 @@ typedef uint32 (*utf_local_conversion_func) (uint32 code);
 | 
			
		||||
 * Some handy functions for Unicode-specific tests.
 | 
			
		||||
 */
 | 
			
		||||
static inline bool
 | 
			
		||||
is_valid_unicode_codepoint(pg_wchar c)
 | 
			
		||||
is_valid_unicode_codepoint(char32_t c)
 | 
			
		||||
{
 | 
			
		||||
	return (c > 0 && c <= 0x10FFFF);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline bool
 | 
			
		||||
is_utf16_surrogate_first(pg_wchar c)
 | 
			
		||||
is_utf16_surrogate_first(char32_t c)
 | 
			
		||||
{
 | 
			
		||||
	return (c >= 0xD800 && c <= 0xDBFF);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline bool
 | 
			
		||||
is_utf16_surrogate_second(pg_wchar c)
 | 
			
		||||
is_utf16_surrogate_second(char32_t c)
 | 
			
		||||
{
 | 
			
		||||
	return (c >= 0xDC00 && c <= 0xDFFF);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static inline pg_wchar
 | 
			
		||||
surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
 | 
			
		||||
static inline char32_t
 | 
			
		||||
surrogate_pair_to_codepoint(char16_t first, char16_t second)
 | 
			
		||||
{
 | 
			
		||||
	return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF);
 | 
			
		||||
}
 | 
			
		||||
@@ -561,20 +561,20 @@ surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second)
 | 
			
		||||
 *
 | 
			
		||||
 * No error checks here, c must point to a long-enough string.
 | 
			
		||||
 */
 | 
			
		||||
static inline pg_wchar
 | 
			
		||||
static inline char32_t
 | 
			
		||||
utf8_to_unicode(const unsigned char *c)
 | 
			
		||||
{
 | 
			
		||||
	if ((*c & 0x80) == 0)
 | 
			
		||||
		return (pg_wchar) c[0];
 | 
			
		||||
		return (char32_t) c[0];
 | 
			
		||||
	else if ((*c & 0xe0) == 0xc0)
 | 
			
		||||
		return (pg_wchar) (((c[0] & 0x1f) << 6) |
 | 
			
		||||
		return (char32_t) (((c[0] & 0x1f) << 6) |
 | 
			
		||||
						   (c[1] & 0x3f));
 | 
			
		||||
	else if ((*c & 0xf0) == 0xe0)
 | 
			
		||||
		return (pg_wchar) (((c[0] & 0x0f) << 12) |
 | 
			
		||||
		return (char32_t) (((c[0] & 0x0f) << 12) |
 | 
			
		||||
						   ((c[1] & 0x3f) << 6) |
 | 
			
		||||
						   (c[2] & 0x3f));
 | 
			
		||||
	else if ((*c & 0xf8) == 0xf0)
 | 
			
		||||
		return (pg_wchar) (((c[0] & 0x07) << 18) |
 | 
			
		||||
		return (char32_t) (((c[0] & 0x07) << 18) |
 | 
			
		||||
						   ((c[1] & 0x3f) << 12) |
 | 
			
		||||
						   ((c[2] & 0x3f) << 6) |
 | 
			
		||||
						   (c[3] & 0x3f));
 | 
			
		||||
@@ -588,7 +588,7 @@ utf8_to_unicode(const unsigned char *c)
 | 
			
		||||
 * unicode_utf8len(c) bytes available.
 | 
			
		||||
 */
 | 
			
		||||
static inline unsigned char *
 | 
			
		||||
unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
 | 
			
		||||
unicode_to_utf8(char32_t c, unsigned char *utf8string)
 | 
			
		||||
{
 | 
			
		||||
	if (c <= 0x7F)
 | 
			
		||||
	{
 | 
			
		||||
@@ -620,7 +620,7 @@ unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
 | 
			
		||||
 * Number of bytes needed to represent the given char in UTF8.
 | 
			
		||||
 */
 | 
			
		||||
static inline int
 | 
			
		||||
unicode_utf8len(pg_wchar c)
 | 
			
		||||
unicode_utf8len(char32_t c)
 | 
			
		||||
{
 | 
			
		||||
	if (c <= 0x7F)
 | 
			
		||||
		return 1;
 | 
			
		||||
@@ -676,8 +676,8 @@ extern int	pg_valid_server_encoding(const char *name);
 | 
			
		||||
extern bool is_encoding_supported_by_icu(int encoding);
 | 
			
		||||
extern const char *get_encoding_name_for_icu(int encoding);
 | 
			
		||||
 | 
			
		||||
extern unsigned char *unicode_to_utf8(pg_wchar c, unsigned char *utf8string);
 | 
			
		||||
extern pg_wchar utf8_to_unicode(const unsigned char *c);
 | 
			
		||||
extern unsigned char *unicode_to_utf8(char32_t c, unsigned char *utf8string);
 | 
			
		||||
extern char32_t utf8_to_unicode(const unsigned char *c);
 | 
			
		||||
extern bool pg_utf8_islegal(const unsigned char *source, int length);
 | 
			
		||||
extern int	pg_utf_mblen(const unsigned char *s);
 | 
			
		||||
extern int	pg_mule_mblen(const unsigned char *s);
 | 
			
		||||
@@ -739,8 +739,8 @@ extern char *pg_server_to_client(const char *s, int len);
 | 
			
		||||
extern char *pg_any_to_server(const char *s, int len, int encoding);
 | 
			
		||||
extern char *pg_server_to_any(const char *s, int len, int encoding);
 | 
			
		||||
 | 
			
		||||
extern void pg_unicode_to_server(pg_wchar c, unsigned char *s);
 | 
			
		||||
extern bool pg_unicode_to_server_noerror(pg_wchar c, unsigned char *s);
 | 
			
		||||
extern void pg_unicode_to_server(char32_t c, unsigned char *s);
 | 
			
		||||
extern bool pg_unicode_to_server_noerror(char32_t c, unsigned char *s);
 | 
			
		||||
 | 
			
		||||
extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc);
 | 
			
		||||
extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
 | 
			
		||||
 
 | 
			
		||||
@@ -463,6 +463,9 @@
 | 
			
		||||
/* Define to 1 if you have the <termios.h> header file. */
 | 
			
		||||
#undef HAVE_TERMIOS_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if you have the <uchar.h> header file. */
 | 
			
		||||
#undef HAVE_UCHAR_H
 | 
			
		||||
 | 
			
		||||
/* Define to 1 if curl_global_init() is guaranteed to be thread-safe. */
 | 
			
		||||
#undef HAVE_THREADSAFE_CURL_GLOBAL_INIT
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -3505,6 +3505,8 @@ cb_cleanup_dir
 | 
			
		||||
cb_options
 | 
			
		||||
cb_tablespace
 | 
			
		||||
cb_tablespace_mapping
 | 
			
		||||
char16_t
 | 
			
		||||
char32_t
 | 
			
		||||
check_agg_arguments_context
 | 
			
		||||
check_function_callback
 | 
			
		||||
check_network_data
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user