1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Unicode escapes in E'...' strings

Author: Marko Kreen <markokr@gmail.com>
This commit is contained in:
Peter Eisentraut
2009-09-22 23:52:53 +00:00
parent 9048b73184
commit c2bb0378cf
3 changed files with 98 additions and 9 deletions

View File

@ -24,7 +24,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.158 2009/09/21 22:22:07 petere Exp $
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.159 2009/09/22 23:52:53 petere Exp $
*
*-------------------------------------------------------------------------
*/
@ -80,6 +80,9 @@ static void addlitchar(unsigned char ychar, base_yyscan_t yyscanner);
static char *litbufdup(base_yyscan_t yyscanner);
static char *litbuf_udeescape(unsigned char escape, base_yyscan_t yyscanner);
static unsigned char unescape_single_char(unsigned char c, base_yyscan_t yyscanner);
static bool is_utf16_surrogate_first(pg_wchar c);
static bool is_utf16_surrogate_second(pg_wchar c);
static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second);
#define yyerror(msg) scanner_yyerror(msg, yyscanner)
@ -97,6 +100,8 @@ static void check_escape_warning(base_yyscan_t yyscanner);
extern int base_yyget_column(yyscan_t yyscanner);
extern void base_yyset_column(int column_no, yyscan_t yyscanner);
static void addunicode(pg_wchar c, yyscan_t yyscanner);
%}
%option reentrant
@ -134,6 +139,7 @@ extern void base_yyset_column(int column_no, yyscan_t yyscanner);
* <xdolq> $foo$ quoted strings
* <xui> quoted identifier with Unicode escapes
* <xus> quoted string with Unicode escapes
* <xeu> Unicode surrogate pair in extended quoted string
*/
%x xb
@ -145,6 +151,7 @@ extern void base_yyset_column(int column_no, yyscan_t yyscanner);
%x xdolq
%x xui
%x xus
%x xeu
/*
* In order to make the world safe for Windows and Mac clients as well as
@ -223,6 +230,8 @@ xeinside [^\\']+
xeescape [\\][^0-7]
xeoctesc [\\][0-7]{1,3}
xehexesc [\\]x[0-9A-Fa-f]{1,2}
xeunicode [\\](u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})
xeunicodebad [\\]([uU])
/* Extended quote
* xqdouble implements embedded quote, ''''
@ -535,6 +544,45 @@ other .
<xe>{xeinside} {
addlit(yytext, yyleng, yyscanner);
}
<xe>{xeunicode} {
pg_wchar c = strtoul(yytext+2, NULL, 16);
check_escape_warning(yyscanner);
if (is_utf16_surrogate_first(c))
{
yyextra->utf16_first_part = c;
BEGIN(xeu);
}
else if (is_utf16_surrogate_second(c))
yyerror("invalid Unicode surrogate pair");
else
addunicode(c, yyscanner);
}
<xeu>{xeunicode} {
pg_wchar c = strtoul(yytext+2, NULL, 16);
if (!is_utf16_surrogate_second(c))
yyerror("invalid Unicode surrogate pair");
c = surrogate_pair_to_codepoint(yyextra->utf16_first_part, c);
addunicode(c, yyscanner);
BEGIN(xe);
}
<xeu>. |
<xeu>\n |
<xeu><<EOF>> { yyerror("invalid Unicode surrogate pair"); }
<xe>{xeunicodebad} {
ereport(ERROR,
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
errmsg("invalid Unicode escape"),
errhint("Unicode escapes must be \\uXXXX or \\UXXXXXXXX."),
lexer_errposition()));
}
<xe>{xeescape} {
if (yytext[1] == '\'')
{
@ -1330,3 +1378,21 @@ base_yyfree(void *ptr, base_yyscan_t yyscanner)
if (ptr)
pfree(ptr);
}
static void
addunicode(pg_wchar c, base_yyscan_t yyscanner)
{
char buf[8];
if (c == 0 || c > 0x10FFFF)
yyerror("invalid Unicode escape value");
if (c > 0x7F)
{
if (GetDatabaseEncoding() != PG_UTF8)
yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8");
yyextra->saw_non_ascii = true;
}
unicode_to_utf8(c, (unsigned char *)buf);
addlit(buf, pg_mblen(buf), yyscanner);
}

View File

@ -11,7 +11,7 @@
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.47 2009/07/14 20:24:10 tgl Exp $
* $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.48 2009/09/22 23:52:53 petere Exp $
*
*-------------------------------------------------------------------------
*/
@ -71,6 +71,9 @@ typedef struct base_yy_extra_type
int xcdepth; /* depth of nesting in slash-star comments */
char *dolqstart; /* current $foo$ quote start string */
/* first part of UTF16 surrogate pair for Unicode escapes */
int32 utf16_first_part;
/* state variables for literal-lexing warnings */
bool warn_on_first_escape;
bool saw_non_ascii;