mirror of
https://github.com/postgres/postgres.git
synced 2025-11-29 23:43:17 +03:00
Unicode escapes in strings and identifiers
This commit is contained in:
@@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.79 2008/10/14 17:12:33 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/xml.c,v 1.80 2008/10/29 08:04:53 petere Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -1497,28 +1497,7 @@ unicode_to_sqlchar(pg_wchar c)
|
||||
{
|
||||
static unsigned char utf8string[5]; /* need trailing zero */
|
||||
|
||||
if (c <= 0x7F)
|
||||
{
|
||||
utf8string[0] = c;
|
||||
}
|
||||
else if (c <= 0x7FF)
|
||||
{
|
||||
utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
|
||||
utf8string[1] = 0x80 | (c & 0x3F);
|
||||
}
|
||||
else if (c <= 0xFFFF)
|
||||
{
|
||||
utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
|
||||
utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
|
||||
utf8string[2] = 0x80 | (c & 0x3F);
|
||||
}
|
||||
else
|
||||
{
|
||||
utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
|
||||
utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
|
||||
utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
|
||||
utf8string[3] = 0x80 | (c & 0x3F);
|
||||
}
|
||||
unicode_to_utf8(c, utf8string);
|
||||
|
||||
return (char *) pg_do_encoding_conversion(utf8string,
|
||||
pg_mblen((char *) utf8string),
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
/*
|
||||
* conversion functions between pg_wchar and multibyte streams.
|
||||
* Tatsuo Ishii
|
||||
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.67 2008/10/27 19:37:22 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.68 2008/10/29 08:04:53 petere Exp $
|
||||
*
|
||||
*/
|
||||
/* can be used in either frontend or backend */
|
||||
@@ -419,6 +419,41 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
|
||||
return cnt;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Map a Unicode code point to UTF-8. utf8string must have 4 bytes of
|
||||
* space allocated.
|
||||
*/
|
||||
unsigned char *
|
||||
unicode_to_utf8(pg_wchar c, unsigned char *utf8string)
|
||||
{
|
||||
if (c <= 0x7F)
|
||||
{
|
||||
utf8string[0] = c;
|
||||
}
|
||||
else if (c <= 0x7FF)
|
||||
{
|
||||
utf8string[0] = 0xC0 | ((c >> 6) & 0x1F);
|
||||
utf8string[1] = 0x80 | (c & 0x3F);
|
||||
}
|
||||
else if (c <= 0xFFFF)
|
||||
{
|
||||
utf8string[0] = 0xE0 | ((c >> 12) & 0x0F);
|
||||
utf8string[1] = 0x80 | ((c >> 6) & 0x3F);
|
||||
utf8string[2] = 0x80 | (c & 0x3F);
|
||||
}
|
||||
else
|
||||
{
|
||||
utf8string[0] = 0xF0 | ((c >> 18) & 0x07);
|
||||
utf8string[1] = 0x80 | ((c >> 12) & 0x3F);
|
||||
utf8string[2] = 0x80 | ((c >> 6) & 0x3F);
|
||||
utf8string[3] = 0x80 | (c & 0x3F);
|
||||
}
|
||||
|
||||
return utf8string;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Return the byte length of a UTF8 character pointed to by s
|
||||
*
|
||||
|
||||
Reference in New Issue
Block a user