mirror of
https://github.com/postgres/postgres.git
synced 2025-06-13 07:41:39 +03:00
164 lines
4.2 KiB
C
164 lines
4.2 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* parser.c
|
|
* Main entry point/driver for PostgreSQL grammar
|
|
*
|
|
* This should match src/backend/parser/parser.c, except that we do not
|
|
* need to bother with re-entrant interfaces.
|
|
*
|
|
*
|
|
* Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
|
|
* Portions Copyright (c) 1994, Regents of the University of California
|
|
*
|
|
* IDENTIFICATION
|
|
* src/interfaces/ecpg/preproc/parser.c
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
|
|
#include "postgres_fe.h"
|
|
|
|
#include "preproc_extern.h"
|
|
#include "preproc.h"
|
|
|
|
|
|
static bool have_lookahead; /* is lookahead info valid? */
|
|
static int lookahead_token; /* one-token lookahead */
|
|
static YYSTYPE lookahead_yylval; /* yylval for lookahead token */
|
|
static YYLTYPE lookahead_yylloc; /* yylloc for lookahead token */
|
|
static char *lookahead_yytext; /* start current token */
|
|
static char *lookahead_end; /* end of current token */
|
|
static char lookahead_hold_char; /* to be put back at *lookahead_end */
|
|
|
|
|
|
/*
|
|
* Intermediate filter between parser and base lexer (base_yylex in scan.l).
|
|
*
|
|
* This filter is needed because in some cases the standard SQL grammar
|
|
* requires more than one token lookahead. We reduce these cases to one-token
|
|
* lookahead by replacing tokens here, in order to keep the grammar LALR(1).
|
|
*
|
|
* Using a filter is simpler than trying to recognize multiword tokens
|
|
* directly in scan.l, because we'd have to allow for comments between the
|
|
* words. Furthermore it's not clear how to do that without re-introducing
|
|
* scanner backtrack, which would cost more performance than this filter
|
|
* layer does.
|
|
*/
|
|
int
|
|
filtered_base_yylex(void)
|
|
{
|
|
int cur_token;
|
|
int next_token;
|
|
int cur_token_length;
|
|
YYSTYPE cur_yylval;
|
|
YYLTYPE cur_yylloc;
|
|
char *cur_yytext;
|
|
|
|
/* Get next token --- we might already have it */
|
|
if (have_lookahead)
|
|
{
|
|
cur_token = lookahead_token;
|
|
base_yylval = lookahead_yylval;
|
|
base_yylloc = lookahead_yylloc;
|
|
base_yytext = lookahead_yytext;
|
|
*lookahead_end = lookahead_hold_char;
|
|
have_lookahead = false;
|
|
}
|
|
else
|
|
cur_token = base_yylex();
|
|
|
|
/*
|
|
* If this token isn't one that requires lookahead, just return it. If it
|
|
* does, determine the token length. (We could get that via strlen(), but
|
|
* since we have such a small set of possibilities, hardwiring seems
|
|
* feasible and more efficient.)
|
|
*/
|
|
switch (cur_token)
|
|
{
|
|
case NOT:
|
|
cur_token_length = 3;
|
|
break;
|
|
case NULLS_P:
|
|
cur_token_length = 5;
|
|
break;
|
|
case WITH:
|
|
cur_token_length = 4;
|
|
break;
|
|
default:
|
|
return cur_token;
|
|
}
|
|
|
|
/*
|
|
* Identify end+1 of current token. base_yylex() has temporarily stored a
|
|
* '\0' here, and will undo that when we call it again. We need to redo
|
|
* it to fully revert the lookahead call for error reporting purposes.
|
|
*/
|
|
lookahead_end = base_yytext + cur_token_length;
|
|
Assert(*lookahead_end == '\0');
|
|
|
|
/* Save and restore lexer output variables around the call */
|
|
cur_yylval = base_yylval;
|
|
cur_yylloc = base_yylloc;
|
|
cur_yytext = base_yytext;
|
|
|
|
/* Get next token, saving outputs into lookahead variables */
|
|
next_token = base_yylex();
|
|
|
|
lookahead_token = next_token;
|
|
lookahead_yylval = base_yylval;
|
|
lookahead_yylloc = base_yylloc;
|
|
lookahead_yytext = base_yytext;
|
|
|
|
base_yylval = cur_yylval;
|
|
base_yylloc = cur_yylloc;
|
|
base_yytext = cur_yytext;
|
|
|
|
/* Now revert the un-truncation of the current token */
|
|
lookahead_hold_char = *lookahead_end;
|
|
*lookahead_end = '\0';
|
|
|
|
have_lookahead = true;
|
|
|
|
/* Replace cur_token if needed, based on lookahead */
|
|
switch (cur_token)
|
|
{
|
|
case NOT:
|
|
/* Replace NOT by NOT_LA if it's followed by BETWEEN, IN, etc */
|
|
switch (next_token)
|
|
{
|
|
case BETWEEN:
|
|
case IN_P:
|
|
case LIKE:
|
|
case ILIKE:
|
|
case SIMILAR:
|
|
cur_token = NOT_LA;
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case NULLS_P:
|
|
/* Replace NULLS_P by NULLS_LA if it's followed by FIRST or LAST */
|
|
switch (next_token)
|
|
{
|
|
case FIRST_P:
|
|
case LAST_P:
|
|
cur_token = NULLS_LA;
|
|
break;
|
|
}
|
|
break;
|
|
|
|
case WITH:
|
|
/* Replace WITH by WITH_LA if it's followed by TIME or ORDINALITY */
|
|
switch (next_token)
|
|
{
|
|
case TIME:
|
|
case ORDINALITY:
|
|
cur_token = WITH_LA;
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
|
|
return cur_token;
|
|
}
|