diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 19285ae1360..fbf6062d0a8 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -3551,6 +3551,52 @@ repeat('Pg', 4) PgPgPgPg
+
+
+
+ unistr
+
+ unistr ( text )
+ text
+
+
+ Evaluate escaped Unicode characters in argument. Unicode characters
+ can be specified as
+ \XXXX (4 hexadecimal
+ digits), \+XXXXXX (6
+ hexadecimal digits),
+ \uXXXX (4 hexadecimal
+ digits), or \UXXXXXXXX
+ (8 hexadecimal digits). To specify a backslash, write two
+ backslashes. All other characters are taken literally.
+
+
+
+ If the server encoding is not UTF-8, the Unicode code point identified
+ by one of these escape sequences is converted to the actual server
+ encoding; an error is reported if that's not possible.
+
+
+
+ This function provides a (non-standard) alternative to string
+ constants with Unicode escapes (see ).
+
+
+
+ unistr('\0441\043B\043E\043D')
+ слон
+
+
+ unistr('d\0061t\+000061')
+ data
+
+
+ unistr('d\u0061t\U00000061')
+ data
+
+
+
diff --git a/src/backend/utils/adt/varlena.c b/src/backend/utils/adt/varlena.c
index 640e3fd4c04..efc74e8f2d7 100644
--- a/src/backend/utils/adt/varlena.c
+++ b/src/backend/utils/adt/varlena.c
@@ -6380,3 +6380,213 @@ unicode_is_normalized(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(result);
}
+
+/*
+ * Check if first n chars are hexadecimal digits
+ */
+static bool
+isxdigits_n(const char *instr, size_t n)
+{
+ for (size_t i = 0; i < n; i++)
+ if (!isxdigit((unsigned char) instr[i]))
+ return false;
+
+ return true;
+}
+
+static unsigned int
+hexval(unsigned char c)
+{
+ if (c >= '0' && c <= '9')
+ return c - '0';
+ if (c >= 'a' && c <= 'f')
+ return c - 'a' + 0xA;
+ if (c >= 'A' && c <= 'F')
+ return c - 'A' + 0xA;
+ elog(ERROR, "invalid hexadecimal digit");
+ return 0; /* not reached */
+}
+
+/*
+ * Translate string with hexadecimal digits to number
+ */
+static unsigned int
+hexval_n(const char *instr, size_t n)
+{
+ unsigned int result = 0;
+
+ for (size_t i = 0; i < n; i++)
+ result += hexval(instr[i]) << (4 * (n - i - 1));
+
+ return result;
+}
+
+/*
+ * Replaces Unicode escape sequences by Unicode characters
+ */
+Datum
+unistr(PG_FUNCTION_ARGS)
+{
+ text *input_text = PG_GETARG_TEXT_PP(0);
+ char *instr;
+ int len;
+ StringInfoData str;
+ text *result;
+ pg_wchar pair_first = 0;
+ char cbuf[MAX_UNICODE_EQUIVALENT_STRING + 1];
+
+ instr = VARDATA_ANY(input_text);
+ len = VARSIZE_ANY_EXHDR(input_text);
+
+ initStringInfo(&str);
+
+ while (len > 0)
+ {
+ if (instr[0] == '\\')
+ {
+ if (len >= 2 &&
+ instr[1] == '\\')
+ {
+ if (pair_first)
+ goto invalid_pair;
+ appendStringInfoChar(&str, '\\');
+ instr += 2;
+ len -= 2;
+ }
+ else if ((len >= 5 && isxdigits_n(instr + 1, 4)) ||
+ (len >= 6 && instr[1] == 'u' && isxdigits_n(instr + 2, 4)))
+ {
+ pg_wchar unicode;
+ int offset = instr[1] == 'u' ? 2 : 1;
+
+ unicode = hexval_n(instr + offset, 4);
+
+ if (!is_valid_unicode_codepoint(unicode))
+ ereport(ERROR,
+ errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid Unicode code point: %04X", unicode));
+
+ if (pair_first)
+ {
+ if (is_utf16_surrogate_second(unicode))
+ {
+ unicode = surrogate_pair_to_codepoint(pair_first, unicode);
+ pair_first = 0;
+ }
+ else
+ goto invalid_pair;
+ }
+ else if (is_utf16_surrogate_second(unicode))
+ goto invalid_pair;
+
+ if (is_utf16_surrogate_first(unicode))
+ pair_first = unicode;
+ else
+ {
+ pg_unicode_to_server(unicode, (unsigned char *) cbuf);
+ appendStringInfoString(&str, cbuf);
+ }
+
+ instr += 4 + offset;
+ len -= 4 + offset;
+ }
+ else if (len >= 8 && instr[1] == '+' && isxdigits_n(instr + 2, 6))
+ {
+ pg_wchar unicode;
+
+ unicode = hexval_n(instr + 2, 6);
+
+ if (!is_valid_unicode_codepoint(unicode))
+ ereport(ERROR,
+ errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid Unicode code point: %04X", unicode));
+
+ if (pair_first)
+ {
+ if (is_utf16_surrogate_second(unicode))
+ {
+ unicode = surrogate_pair_to_codepoint(pair_first, unicode);
+ pair_first = 0;
+ }
+ else
+ goto invalid_pair;
+ }
+ else if (is_utf16_surrogate_second(unicode))
+ goto invalid_pair;
+
+ if (is_utf16_surrogate_first(unicode))
+ pair_first = unicode;
+ else
+ {
+ pg_unicode_to_server(unicode, (unsigned char *) cbuf);
+ appendStringInfoString(&str, cbuf);
+ }
+
+ instr += 8;
+ len -= 8;
+ }
+ else if (len >= 10 && instr[1] == 'U' && isxdigits_n(instr + 2, 8))
+ {
+ pg_wchar unicode;
+
+ unicode = hexval_n(instr + 2, 8);
+
+ if (!is_valid_unicode_codepoint(unicode))
+ ereport(ERROR,
+ errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid Unicode code point: %04X", unicode));
+
+ if (pair_first)
+ {
+ if (is_utf16_surrogate_second(unicode))
+ {
+ unicode = surrogate_pair_to_codepoint(pair_first, unicode);
+ pair_first = 0;
+ }
+ else
+ goto invalid_pair;
+ }
+ else if (is_utf16_surrogate_second(unicode))
+ goto invalid_pair;
+
+ if (is_utf16_surrogate_first(unicode))
+ pair_first = unicode;
+ else
+ {
+ pg_unicode_to_server(unicode, (unsigned char *) cbuf);
+ appendStringInfoString(&str, cbuf);
+ }
+
+ instr += 10;
+ len -= 10;
+ }
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid Unicode escape"),
+ errhint("Unicode escapes must be \\XXXX, \\+XXXXXX, \\uXXXX, or \\UXXXXXXXX.")));
+ }
+ else
+ {
+ if (pair_first)
+ goto invalid_pair;
+
+ appendStringInfoChar(&str, *instr++);
+ len--;
+ }
+ }
+
+ /* unfinished surrogate pair? */
+ if (pair_first)
+ goto invalid_pair;
+
+ result = cstring_to_text_with_len(str.data, str.len);
+ pfree(str.data);
+
+ PG_RETURN_TEXT_P(result);
+
+invalid_pair:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid Unicode surrogate pair")));
+}
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 4a39da3c9d4..489f5be427f 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 202103266
+#define CATALOG_VERSION_NO 202103291
#endif
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index cc7d90d2b0b..bfb89e0575d 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -11527,6 +11527,10 @@
proname => 'is_normalized', prorettype => 'bool', proargtypes => 'text text',
prosrc => 'unicode_is_normalized' },
+{ oid => '9822', descr => 'unescape Unicode characters',
+ proname => 'unistr', prorettype => 'text', proargtypes => 'text',
+ prosrc => 'unistr' },
+
{ oid => '4596', descr => 'I/O',
proname => 'brin_bloom_summary_in', prorettype => 'pg_brin_bloom_summary',
proargtypes => 'cstring', prosrc => 'brin_bloom_summary_in' },
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index afd84249c82..91aa8198045 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -2234,3 +2234,39 @@ SELECT bit_count('\x1234567890'::bytea);
15
(1 row)
+SELECT unistr('\0064at\+0000610');
+ unistr
+--------
+ data0
+(1 row)
+
+SELECT unistr('d\u0061t\U000000610');
+ unistr
+--------
+ data0
+(1 row)
+
+SELECT unistr('a\\b');
+ unistr
+--------
+ a\b
+(1 row)
+
+-- errors:
+SELECT unistr('wrong: \db99');
+ERROR: invalid Unicode surrogate pair
+SELECT unistr('wrong: \db99\0061');
+ERROR: invalid Unicode surrogate pair
+SELECT unistr('wrong: \+00db99\+000061');
+ERROR: invalid Unicode surrogate pair
+SELECT unistr('wrong: \+2FFFFF');
+ERROR: invalid Unicode code point: 2FFFFF
+SELECT unistr('wrong: \udb99\u0061');
+ERROR: invalid Unicode surrogate pair
+SELECT unistr('wrong: \U0000db99\U00000061');
+ERROR: invalid Unicode surrogate pair
+SELECT unistr('wrong: \U002FFFFF');
+ERROR: invalid Unicode code point: 2FFFFF
+SELECT unistr('wrong: \xyz');
+ERROR: invalid Unicode escape
+HINT: Unicode escapes must be \XXXX, \+XXXXXX, \uXXXX, or \UXXXXXXXX.
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 9aa1825f921..2c502534c2b 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -746,3 +746,16 @@ SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 8)
SELECT encode(overlay(E'Th\\000omas'::bytea placing E'\\002\\003'::bytea from 5 for 3),'escape');
SELECT bit_count('\x1234567890'::bytea);
+
+SELECT unistr('\0064at\+0000610');
+SELECT unistr('d\u0061t\U000000610');
+SELECT unistr('a\\b');
+-- errors:
+SELECT unistr('wrong: \db99');
+SELECT unistr('wrong: \db99\0061');
+SELECT unistr('wrong: \+00db99\+000061');
+SELECT unistr('wrong: \+2FFFFF');
+SELECT unistr('wrong: \udb99\u0061');
+SELECT unistr('wrong: \U0000db99\U00000061');
+SELECT unistr('wrong: \U002FFFFF');
+SELECT unistr('wrong: \xyz');