diff --git a/src/backend/parser/gram.y b/src/backend/parser/gram.y index d3c7c356d9f..4325e4d0eda 100644 --- a/src/backend/parser/gram.y +++ b/src/backend/parser/gram.y @@ -11,7 +11,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.689 2009/11/09 02:36:56 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/gram.y,v 2.690 2009/11/09 18:38:48 tgl Exp $ * * HISTORY * AUTHOR DATE MAJOR EVENT @@ -75,12 +75,6 @@ (Current) = (Rhs)[0]; \ } while (0) -/* - * The %name-prefix option below will make bison call base_yylex, but we - * really want it to call filtered_base_yylex (see parser.c). - */ -#define base_yylex filtered_base_yylex - /* * Bison doesn't allocate anything that needs to live across parser calls, * so we can easily have it use palloc instead of malloc. This prevents @@ -104,10 +98,10 @@ typedef struct PrivTarget #define parser_yyerror(msg) scanner_yyerror(msg, yyscanner) #define parser_errposition(pos) scanner_errposition(pos, yyscanner) -static void base_yyerror(YYLTYPE *yylloc, base_yyscan_t yyscanner, +static void base_yyerror(YYLTYPE *yylloc, core_yyscan_t yyscanner, const char *msg); static Node *makeColumnRef(char *colname, List *indirection, - int location, base_yyscan_t yyscanner); + int location, core_yyscan_t yyscanner); static Node *makeTypeCast(Node *arg, TypeName *typename, int location); static Node *makeStringConst(char *str, int location); static Node *makeStringConstCast(char *str, int location, TypeName *typename); @@ -118,17 +112,17 @@ static Node *makeNullAConst(int location); static Node *makeAConst(Value *v, int location); static Node *makeBoolAConst(bool state, int location); static FuncCall *makeOverlaps(List *largs, List *rargs, - int location, base_yyscan_t yyscanner); -static void check_qualified_name(List *names, base_yyscan_t yyscanner); -static List *check_func_name(List *names, base_yyscan_t yyscanner); -static List *check_indirection(List *indirection, base_yyscan_t yyscanner); + int location, core_yyscan_t yyscanner); +static void check_qualified_name(List *names, core_yyscan_t yyscanner); +static List *check_func_name(List *names, core_yyscan_t yyscanner); +static List *check_indirection(List *indirection, core_yyscan_t yyscanner); static List *extractArgTypes(List *parameters); static SelectStmt *findLeftmostSelect(SelectStmt *node); static void insertSelectOptions(SelectStmt *stmt, List *sortClause, List *lockingClause, Node *limitOffset, Node *limitCount, WithClause *withClause, - base_yyscan_t yyscanner); + core_yyscan_t yyscanner); static Node *makeSetOp(SetOperation op, bool all, Node *larg, Node *rarg); static Node *doNegate(Node *n, int location); static void doNegateFloat(Value *v); @@ -145,15 +139,18 @@ static TypeName *TableFuncTypeName(List *columns); %name-prefix="base_yy" %locations -%parse-param {base_yyscan_t yyscanner} -%lex-param {base_yyscan_t yyscanner} +%parse-param {core_yyscan_t yyscanner} +%lex-param {core_yyscan_t yyscanner} %union { + core_YYSTYPE core_yystype; + /* these fields must match core_YYSTYPE: */ int ival; - char chr; char *str; const char *keyword; + + char chr; bool boolean; JoinType jtype; DropBehavior dbehavior; @@ -162,7 +159,6 @@ static TypeName *TableFuncTypeName(List *columns); Node *node; Value *value; ObjectType objtype; - TypeName *typnam; FunctionParameter *fun_param; FunctionParameterMode fun_param_mode; @@ -180,7 +176,6 @@ static TypeName *TableFuncTypeName(List *columns); ResTarget *target; struct PrivTarget *privtarget; AccessPriv *accesspriv; - InsertStmt *istmt; VariableSetStmt *vsetstmt; } @@ -602,6 +597,7 @@ static TypeName *TableFuncTypeName(List *columns); %left JOIN CROSS LEFT FULL RIGHT INNER_P NATURAL /* kluge to keep xml_whitespace_option from causing shift/reduce conflicts */ %right PRESERVE STRIP_P + %% /* @@ -10932,14 +10928,14 @@ reserved_keyword: * available from the scanner. */ static void -base_yyerror(YYLTYPE *yylloc, base_yyscan_t yyscanner, const char *msg) +base_yyerror(YYLTYPE *yylloc, core_yyscan_t yyscanner, const char *msg) { parser_yyerror(msg); } static Node * makeColumnRef(char *colname, List *indirection, - int location, base_yyscan_t yyscanner) + int location, core_yyscan_t yyscanner) { /* * Generate a ColumnRef node, with an A_Indirection node added if there @@ -11109,7 +11105,7 @@ makeBoolAConst(bool state, int location) * Create and populate a FuncCall node to support the OVERLAPS operator. */ static FuncCall * -makeOverlaps(List *largs, List *rargs, int location, base_yyscan_t yyscanner) +makeOverlaps(List *largs, List *rargs, int location, core_yyscan_t yyscanner) { FuncCall *n = makeNode(FuncCall); @@ -11143,7 +11139,7 @@ makeOverlaps(List *largs, List *rargs, int location, base_yyscan_t yyscanner) * subscripts and '*', which we then must reject here. */ static void -check_qualified_name(List *names, base_yyscan_t yyscanner) +check_qualified_name(List *names, core_yyscan_t yyscanner) { ListCell *i; @@ -11160,7 +11156,7 @@ check_qualified_name(List *names, base_yyscan_t yyscanner) * and '*', which we then must reject here. */ static List * -check_func_name(List *names, base_yyscan_t yyscanner) +check_func_name(List *names, core_yyscan_t yyscanner) { ListCell *i; @@ -11178,7 +11174,7 @@ check_func_name(List *names, base_yyscan_t yyscanner) * in the grammar, so do it here. */ static List * -check_indirection(List *indirection, base_yyscan_t yyscanner) +check_indirection(List *indirection, core_yyscan_t yyscanner) { ListCell *l; @@ -11237,7 +11233,7 @@ insertSelectOptions(SelectStmt *stmt, List *sortClause, List *lockingClause, Node *limitOffset, Node *limitCount, WithClause *withClause, - base_yyscan_t yyscanner) + core_yyscan_t yyscanner) { Assert(IsA(stmt, SelectStmt)); @@ -11463,12 +11459,9 @@ TableFuncTypeName(List *columns) } /* - * Must undefine base_yylex before including scan.c, since we want it - * to create the function base_yylex not filtered_base_yylex. + * Must undefine this stuff before including scan.c, since it has different + * definitions for these macros. */ -#undef base_yylex - -/* Undefine some other stuff that would conflict in scan.c, too */ #undef yyerror #undef yylval #undef yylloc diff --git a/src/backend/parser/parser.c b/src/backend/parser/parser.c index 93632c88114..354e335ce91 100644 --- a/src/backend/parser/parser.c +++ b/src/backend/parser/parser.c @@ -14,7 +14,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.81 2009/07/14 20:24:10 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/parser.c,v 1.82 2009/11/09 18:38:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -34,14 +34,15 @@ List * raw_parser(const char *str) { - base_yyscan_t yyscanner; + core_yyscan_t yyscanner; base_yy_extra_type yyextra; int yyresult; /* initialize the flex scanner */ - yyscanner = scanner_init(str, &yyextra, ScanKeywords, NumScanKeywords); + yyscanner = scanner_init(str, &yyextra.core_yy_extra, + ScanKeywords, NumScanKeywords); - /* filtered_base_yylex() only needs this much initialization */ + /* base_yylex() only needs this much initialization */ yyextra.have_lookahead = false; /* initialize the bison parser */ @@ -73,15 +74,16 @@ raw_parser(const char *str) char * pg_parse_string_token(const char *token) { - base_yyscan_t yyscanner; + core_yyscan_t yyscanner; base_yy_extra_type yyextra; int ctoken; - YYSTYPE yylval; + core_YYSTYPE yylval; YYLTYPE yylloc; - yyscanner = scanner_init(token, &yyextra, ScanKeywords, NumScanKeywords); + yyscanner = scanner_init(token, &yyextra.core_yy_extra, + ScanKeywords, NumScanKeywords); - ctoken = base_yylex(&yylval, &yylloc, yyscanner); + ctoken = core_yylex(&yylval, &yylloc, yyscanner); if (ctoken != SCONST) /* caller error */ elog(ERROR, "expected string constant, got token code %d", ctoken); @@ -93,7 +95,7 @@ pg_parse_string_token(const char *token) /* - * Intermediate filter between parser and base lexer (base_yylex in scan.l). + * Intermediate filter between parser and core lexer (core_yylex in scan.l). * * The filter is needed because in some cases the standard SQL grammar * requires more than one token lookahead. We reduce these cases to one-token @@ -104,26 +106,30 @@ pg_parse_string_token(const char *token) * words. Furthermore it's not clear how to do it without re-introducing * scanner backtrack, which would cost more performance than this filter * layer does. + * + * The filter also provides a convenient place to translate between + * the core_YYSTYPE and YYSTYPE representations (which are really the + * same thing anyway, but notationally they're different). */ int -filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner) +base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, core_yyscan_t yyscanner) { base_yy_extra_type *yyextra = pg_yyget_extra(yyscanner); int cur_token; int next_token; - YYSTYPE cur_yylval; + core_YYSTYPE cur_yylval; YYLTYPE cur_yylloc; /* Get next token --- we might already have it */ if (yyextra->have_lookahead) { cur_token = yyextra->lookahead_token; - *lvalp = yyextra->lookahead_yylval; + lvalp->core_yystype = yyextra->lookahead_yylval; *llocp = yyextra->lookahead_yylloc; yyextra->have_lookahead = false; } else - cur_token = base_yylex(lvalp, llocp, yyscanner); + cur_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner); /* Do we need to look ahead for a possible multiword token? */ switch (cur_token) @@ -133,9 +139,9 @@ filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner) /* * NULLS FIRST and NULLS LAST must be reduced to one token */ - cur_yylval = *lvalp; + cur_yylval = lvalp->core_yystype; cur_yylloc = *llocp; - next_token = base_yylex(lvalp, llocp, yyscanner); + next_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner); switch (next_token) { case FIRST_P: @@ -147,11 +153,11 @@ filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner) default: /* save the lookahead token for next time */ yyextra->lookahead_token = next_token; - yyextra->lookahead_yylval = *lvalp; + yyextra->lookahead_yylval = lvalp->core_yystype; yyextra->lookahead_yylloc = *llocp; yyextra->have_lookahead = true; /* and back up the output info to cur_token */ - *lvalp = cur_yylval; + lvalp->core_yystype = cur_yylval; *llocp = cur_yylloc; break; } @@ -162,9 +168,9 @@ filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner) /* * WITH TIME must be reduced to one token */ - cur_yylval = *lvalp; + cur_yylval = lvalp->core_yystype; cur_yylloc = *llocp; - next_token = base_yylex(lvalp, llocp, yyscanner); + next_token = core_yylex(&(lvalp->core_yystype), llocp, yyscanner); switch (next_token) { case TIME: @@ -173,11 +179,11 @@ filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, base_yyscan_t yyscanner) default: /* save the lookahead token for next time */ yyextra->lookahead_token = next_token; - yyextra->lookahead_yylval = *lvalp; + yyextra->lookahead_yylval = lvalp->core_yystype; yyextra->lookahead_yylloc = *llocp; yyextra->have_lookahead = true; /* and back up the output info to cur_token */ - *lvalp = cur_yylval; + lvalp->core_yystype = cur_yylval; *llocp = cur_yylloc; break; } diff --git a/src/backend/parser/scan.l b/src/backend/parser/scan.l index 150202e77ce..8a53221930c 100644 --- a/src/backend/parser/scan.l +++ b/src/backend/parser/scan.l @@ -24,7 +24,7 @@ * Portions Copyright (c) 1994, Regents of the University of California * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.162 2009/09/27 03:27:23 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.163 2009/11/09 18:38:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -33,8 +33,8 @@ #include #include -#include "parser/gramparse.h" -#include "parser/keywords.h" +#include "parser/parser.h" /* only needed for GUC variables */ +#include "parser/scanner.h" #include "parser/scansup.h" #include "mb/pg_wchar.h" @@ -54,11 +54,16 @@ int backslash_quote = BACKSLASH_QUOTE_SAFE_ENCODING; bool escape_string_warning = true; bool standard_conforming_strings = false; +/* + * Set the type of YYSTYPE. + */ +#define YYSTYPE core_YYSTYPE + /* * Set the type of yyextra. All state variables used by the scanner should * be in yyextra, *not* statically allocated. */ -#define YY_EXTRA_TYPE base_yy_extra_type * +#define YY_EXTRA_TYPE core_yy_extra_type * /* * Each call to yylex must set yylloc to the location of the found token @@ -75,21 +80,22 @@ bool standard_conforming_strings = false; #define ADVANCE_YYLLOC(delta) ( *(yylloc) += (delta) ) #define startlit() ( yyextra->literallen = 0 ) -static void addlit(char *ytext, int yleng, base_yyscan_t yyscanner); -static void addlitchar(unsigned char ychar, base_yyscan_t yyscanner); -static char *litbufdup(base_yyscan_t yyscanner); -static char *litbuf_udeescape(unsigned char escape, base_yyscan_t yyscanner); -static unsigned char unescape_single_char(unsigned char c, base_yyscan_t yyscanner); +static void addlit(char *ytext, int yleng, core_yyscan_t yyscanner); +static void addlitchar(unsigned char ychar, core_yyscan_t yyscanner); +static char *litbufdup(core_yyscan_t yyscanner); +static char *litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner); +static unsigned char unescape_single_char(unsigned char c, core_yyscan_t yyscanner); static bool is_utf16_surrogate_first(pg_wchar c); static bool is_utf16_surrogate_second(pg_wchar c); static pg_wchar surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second); +static void addunicode(pg_wchar c, yyscan_t yyscanner); #define yyerror(msg) scanner_yyerror(msg, yyscanner) #define lexer_errposition() scanner_errposition(*(yylloc), yyscanner) -static void check_string_escape_warning(unsigned char ychar, base_yyscan_t yyscanner); -static void check_escape_warning(base_yyscan_t yyscanner); +static void check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner); +static void check_escape_warning(core_yyscan_t yyscanner); /* * Work around a bug in flex 2.5.35: it emits a couple of functions that @@ -97,10 +103,8 @@ static void check_escape_warning(base_yyscan_t yyscanner); * this would cause warnings. Providing our own declarations should be * harmless even when the bug gets fixed. */ -extern int base_yyget_column(yyscan_t yyscanner); -extern void base_yyset_column(int column_no, yyscan_t yyscanner); - -static void addunicode(pg_wchar c, yyscan_t yyscanner); +extern int core_yyget_column(yyscan_t yyscanner); +extern void core_yyset_column(int column_no, yyscan_t yyscanner); %} @@ -117,7 +121,7 @@ static void addunicode(pg_wchar c, yyscan_t yyscanner); %option noyyrealloc %option noyyfree %option warn -%option prefix="base_yy" +%option prefix="core_yy" /* * OK, here is a short description of lex/flex rules behavior. @@ -958,7 +962,7 @@ other . * to still be available. */ int -scanner_errposition(int location, base_yyscan_t yyscanner) +scanner_errposition(int location, core_yyscan_t yyscanner) { int pos; @@ -984,7 +988,7 @@ scanner_errposition(int location, base_yyscan_t yyscanner) * be misleading! */ void -scanner_yyerror(const char *message, base_yyscan_t yyscanner) +scanner_yyerror(const char *message, core_yyscan_t yyscanner) { const char *loc = yyextra->scanbuf + *yylloc; @@ -1010,9 +1014,9 @@ scanner_yyerror(const char *message, base_yyscan_t yyscanner) /* * Called before any actual parsing is done */ -base_yyscan_t +core_yyscan_t scanner_init(const char *str, - base_yy_extra_type *yyext, + core_yy_extra_type *yyext, const ScanKeyword *keywords, int num_keywords) { @@ -1022,7 +1026,7 @@ scanner_init(const char *str, if (yylex_init(&scanner) != 0) elog(ERROR, "yylex_init() failed: %m"); - base_yyset_extra(yyext, scanner); + core_yyset_extra(yyext, scanner); yyext->keywords = keywords; yyext->num_keywords = num_keywords; @@ -1049,7 +1053,7 @@ scanner_init(const char *str, * Called after parsing is done to clean up after scanner_init() */ void -scanner_finish(base_yyscan_t yyscanner) +scanner_finish(core_yyscan_t yyscanner) { /* * We don't bother to call yylex_destroy(), because all it would do @@ -1069,7 +1073,7 @@ scanner_finish(base_yyscan_t yyscanner) static void -addlit(char *ytext, int yleng, base_yyscan_t yyscanner) +addlit(char *ytext, int yleng, core_yyscan_t yyscanner) { /* enlarge buffer if needed */ if ((yyextra->literallen + yleng) >= yyextra->literalalloc) @@ -1087,7 +1091,7 @@ addlit(char *ytext, int yleng, base_yyscan_t yyscanner) static void -addlitchar(unsigned char ychar, base_yyscan_t yyscanner) +addlitchar(unsigned char ychar, core_yyscan_t yyscanner) { /* enlarge buffer if needed */ if ((yyextra->literallen + 1) >= yyextra->literalalloc) @@ -1106,7 +1110,7 @@ addlitchar(unsigned char ychar, base_yyscan_t yyscanner) * Create a palloc'd copy of literalbuf, adding a trailing null. */ static char * -litbufdup(base_yyscan_t yyscanner) +litbufdup(core_yyscan_t yyscanner) { int llen = yyextra->literallen; char *new; @@ -1131,7 +1135,7 @@ hexval(unsigned char c) } static void -check_unicode_value(pg_wchar c, char *loc, base_yyscan_t yyscanner) +check_unicode_value(pg_wchar c, char *loc, core_yyscan_t yyscanner) { if (GetDatabaseEncoding() == PG_UTF8) return; @@ -1161,8 +1165,25 @@ surrogate_pair_to_codepoint(pg_wchar first, pg_wchar second) return ((first & 0x3FF) << 10) + 0x10000 + (second & 0x3FF); } +static void +addunicode(pg_wchar c, core_yyscan_t yyscanner) +{ + char buf[8]; + + if (c == 0 || c > 0x10FFFF) + yyerror("invalid Unicode escape value"); + if (c > 0x7F) + { + if (GetDatabaseEncoding() != PG_UTF8) + yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8"); + yyextra->saw_non_ascii = true; + } + unicode_to_utf8(c, (unsigned char *)buf); + addlit(buf, pg_mblen(buf), yyscanner); +} + static char * -litbuf_udeescape(unsigned char escape, base_yyscan_t yyscanner) +litbuf_udeescape(unsigned char escape, core_yyscan_t yyscanner) { char *new; char *litbuf, *in, *out; @@ -1294,7 +1315,7 @@ litbuf_udeescape(unsigned char escape, base_yyscan_t yyscanner) } static unsigned char -unescape_single_char(unsigned char c, base_yyscan_t yyscanner) +unescape_single_char(unsigned char c, core_yyscan_t yyscanner) { switch (c) { @@ -1318,7 +1339,7 @@ unescape_single_char(unsigned char c, base_yyscan_t yyscanner) } static void -check_string_escape_warning(unsigned char ychar, base_yyscan_t yyscanner) +check_string_escape_warning(unsigned char ychar, core_yyscan_t yyscanner) { if (ychar == '\'') { @@ -1345,7 +1366,7 @@ check_string_escape_warning(unsigned char ychar, base_yyscan_t yyscanner) } static void -check_escape_warning(base_yyscan_t yyscanner) +check_escape_warning(core_yyscan_t yyscanner) { if (yyextra->warn_on_first_escape && escape_string_warning) ereport(WARNING, @@ -1362,13 +1383,13 @@ check_escape_warning(base_yyscan_t yyscanner) */ void * -base_yyalloc(yy_size_t bytes, base_yyscan_t yyscanner) +core_yyalloc(yy_size_t bytes, core_yyscan_t yyscanner) { return palloc(bytes); } void * -base_yyrealloc(void *ptr, yy_size_t bytes, base_yyscan_t yyscanner) +core_yyrealloc(void *ptr, yy_size_t bytes, core_yyscan_t yyscanner) { if (ptr) return repalloc(ptr, bytes); @@ -1377,26 +1398,8 @@ base_yyrealloc(void *ptr, yy_size_t bytes, base_yyscan_t yyscanner) } void -base_yyfree(void *ptr, base_yyscan_t yyscanner) +core_yyfree(void *ptr, core_yyscan_t yyscanner) { if (ptr) pfree(ptr); } - -static void -addunicode(pg_wchar c, base_yyscan_t yyscanner) -{ - char buf[8]; - - if (c == 0 || c > 0x10FFFF) - yyerror("invalid Unicode escape value"); - if (c > 0x7F) - { - if (GetDatabaseEncoding() != PG_UTF8) - yyerror("Unicode escape values cannot be used for code point values above 007F when the server encoding is not UTF8"); - yyextra->saw_non_ascii = true; - } - unicode_to_utf8(c, (unsigned char *)buf); - addlit(buf, pg_mblen(buf), yyscanner); -} - diff --git a/src/include/parser/gramparse.h b/src/include/parser/gramparse.h index 09c99091361..41774028b5b 100644 --- a/src/include/parser/gramparse.h +++ b/src/include/parser/gramparse.h @@ -11,7 +11,7 @@ * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.49 2009/11/05 23:24:26 tgl Exp $ + * $PostgreSQL: pgsql/src/include/parser/gramparse.h,v 1.50 2009/11/09 18:38:48 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -20,20 +20,11 @@ #define GRAMPARSE_H #include "nodes/parsenodes.h" -#include "parser/keywords.h" +#include "parser/scanner.h" /* - * We track token locations in terms of byte offsets from the start of the - * source string, not the column number/line number representation that - * bison uses by default. Also, to minimize overhead we track only one - * location (usually the first token location) for each construct, not - * the beginning and ending locations as bison does by default. It's - * therefore sufficient to make YYLTYPE an int. - */ -#define YYLTYPE int - -/* - * After defining YYLTYPE, it's safe to include gram.h. + * NB: include gram.h only AFTER including scanner.h, because scanner.h + * is what #defines YYLTYPE. */ #include "parser/gram.h" @@ -44,62 +35,24 @@ typedef struct base_yy_extra_type { /* - * The string the lexer is physically scanning. We keep this mainly so - * that we can cheaply compute the offset of the current token (yytext). + * Fields used by the core scanner. */ - char *scanbuf; - Size scanbuflen; + core_yy_extra_type core_yy_extra; /* - * The keyword list to use. - */ - const ScanKeyword *keywords; - int num_keywords; - - /* - * literalbuf is used to accumulate literal values when multiple rules - * are needed to parse a single literal. Call startlit() to reset buffer - * to empty, addlit() to add text. NOTE: the string in literalbuf is - * NOT necessarily null-terminated, but there always IS room to add a - * trailing null at offset literallen. We store a null only when we - * need it. - */ - char *literalbuf; /* palloc'd expandable buffer */ - int literallen; /* actual current string length */ - int literalalloc; /* current allocated buffer size */ - - int xcdepth; /* depth of nesting in slash-star comments */ - char *dolqstart; /* current $foo$ quote start string */ - - /* first part of UTF16 surrogate pair for Unicode escapes */ - int32 utf16_first_part; - - /* state variables for literal-lexing warnings */ - bool warn_on_first_escape; - bool saw_non_ascii; - - /* - * State variables for filtered_base_yylex(). + * State variables for base_yylex(). */ bool have_lookahead; /* is lookahead info valid? */ int lookahead_token; /* one-token lookahead */ - YYSTYPE lookahead_yylval; /* yylval for lookahead token */ + core_YYSTYPE lookahead_yylval; /* yylval for lookahead token */ YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */ /* - * State variables that belong to the grammar, not the lexer. It's - * simpler to keep these here than to invent a separate structure. - * These fields are unused/undefined if the lexer is invoked on its own. + * State variables that belong to the grammar. */ - List *parsetree; /* final parse result is delivered here */ } base_yy_extra_type; -/* - * The type of yyscanner is opaque outside scan.l. - */ -typedef void *base_yyscan_t; - /* * In principle we should use yyget_extra() to fetch the yyextra field * from a yyscanner struct. However, flex always puts that field first, @@ -110,22 +63,11 @@ typedef void *base_yyscan_t; /* from parser.c */ -extern int filtered_base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, - base_yyscan_t yyscanner); - -/* from scan.l */ -extern base_yyscan_t scanner_init(const char *str, - base_yy_extra_type *yyext, - const ScanKeyword *keywords, - int num_keywords); -extern void scanner_finish(base_yyscan_t yyscanner); extern int base_yylex(YYSTYPE *lvalp, YYLTYPE *llocp, - base_yyscan_t yyscanner); -extern int scanner_errposition(int location, base_yyscan_t yyscanner); -extern void scanner_yyerror(const char *message, base_yyscan_t yyscanner); + core_yyscan_t yyscanner); /* from gram.y */ extern void parser_init(base_yy_extra_type *yyext); -extern int base_yyparse(base_yyscan_t yyscanner); +extern int base_yyparse(core_yyscan_t yyscanner); #endif /* GRAMPARSE_H */ diff --git a/src/include/parser/scanner.h b/src/include/parser/scanner.h new file mode 100644 index 00000000000..ccab1db862c --- /dev/null +++ b/src/include/parser/scanner.h @@ -0,0 +1,120 @@ +/*------------------------------------------------------------------------- + * + * scanner.h + * API for the core scanner (flex machine) + * + * The core scanner is also used by PL/pgsql, so we provide a public API + * for it. However, the rest of the backend is only expected to use the + * higher-level API provided by parser.h. + * + * + * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * $PostgreSQL: pgsql/src/include/parser/scanner.h,v 1.1 2009/11/09 18:38:48 tgl Exp $ + * + *------------------------------------------------------------------------- + */ + +#ifndef SCANNER_H +#define SCANNER_H + +#include "parser/keywords.h" + +/* + * The scanner returns extra data about scanned tokens in this union type. + * Note that this is a subset of the fields used in YYSTYPE of the bison + * parsers built atop the scanner. + */ +typedef union core_YYSTYPE +{ + int ival; /* for integer literals */ + char *str; /* for identifiers and non-integer literals */ + const char *keyword; /* canonical spelling of keywords */ +} core_YYSTYPE; + +/* + * We track token locations in terms of byte offsets from the start of the + * source string, not the column number/line number representation that + * bison uses by default. Also, to minimize overhead we track only one + * location (usually the first token location) for each construct, not + * the beginning and ending locations as bison does by default. It's + * therefore sufficient to make YYLTYPE an int. + */ +#define YYLTYPE int + +/* + * Another important component of the scanner's API is the token code numbers. + * However, those are not defined in this file, because bison insists on + * defining them for itself. The token codes used by the core scanner are + * the ASCII characters plus these: + * %token IDENT FCONST SCONST BCONST XCONST Op + * %token ICONST PARAM + * %token TYPECAST DOT_DOT COLON_EQUALS + * The above token definitions *must* be the first ones declared in any + * bison parser built atop this scanner, so that they will have consistent + * numbers assigned to them (specifically, IDENT = 258 and so on). + */ + +/* + * The YY_EXTRA data that a flex scanner allows us to pass around. + * Private state needed by the core scanner goes here. Note that the actual + * yy_extra struct may be larger and have this as its first component, thus + * allowing the calling parser to keep some fields of its own in YY_EXTRA. + */ +typedef struct core_yy_extra_type +{ + /* + * The string the scanner is physically scanning. We keep this mainly so + * that we can cheaply compute the offset of the current token (yytext). + */ + char *scanbuf; + Size scanbuflen; + + /* + * The keyword list to use. + */ + const ScanKeyword *keywords; + int num_keywords; + + /* + * literalbuf is used to accumulate literal values when multiple rules + * are needed to parse a single literal. Call startlit() to reset buffer + * to empty, addlit() to add text. NOTE: the string in literalbuf is + * NOT necessarily null-terminated, but there always IS room to add a + * trailing null at offset literallen. We store a null only when we + * need it. + */ + char *literalbuf; /* palloc'd expandable buffer */ + int literallen; /* actual current string length */ + int literalalloc; /* current allocated buffer size */ + + int xcdepth; /* depth of nesting in slash-star comments */ + char *dolqstart; /* current $foo$ quote start string */ + + /* first part of UTF16 surrogate pair for Unicode escapes */ + int32 utf16_first_part; + + /* state variables for literal-lexing warnings */ + bool warn_on_first_escape; + bool saw_non_ascii; +} core_yy_extra_type; + +/* + * The type of yyscanner is opaque outside scan.l. + */ +typedef void *core_yyscan_t; + + +/* Entry points in parser/scan.l */ +extern core_yyscan_t scanner_init(const char *str, + core_yy_extra_type *yyext, + const ScanKeyword *keywords, + int num_keywords); +extern void scanner_finish(core_yyscan_t yyscanner); +extern int core_yylex(core_YYSTYPE *lvalp, YYLTYPE *llocp, + core_yyscan_t yyscanner); +extern int scanner_errposition(int location, core_yyscan_t yyscanner); +extern void scanner_yyerror(const char *message, core_yyscan_t yyscanner); + +#endif /* SCANNER_H */