mirror of
https://github.com/postgres/postgres.git
synced 2025-04-21 12:05:57 +03:00
Split psql's lexer into two separate .l files for SQL and backslash cases.
This gets us to a point where psqlscan.l can be used by other frontend programs for the same purpose psql uses it for, ie to detect when it's collected a complete SQL command from input that is divided across line boundaries. Moreover, other programs can supply their own lexers for backslash commands of their own choosing. A follow-on patch will use this in pgbench. The end result here is roughly the same as in Kyotaro Horiguchi's 0001-Make-SQL-parser-part-of-psqlscan-independent-from-ps.patch, although the details of the method for switching between lexers are quite different. Basically, in this patch we share the entire PsqlScanState, YY_BUFFER_STATE stack, *and* yyscan_t between different lexers. The only thing we need to do to switch to a different lexer is to make sure the start_state is valid for the new lexer. This works because flex doesn't keep any other persistent state that depends on the specific lexing tables generated for a particular .l file. (We are assuming that both lexers are built with the same flex version, or at least versions that are compatible with respect to the contents of yyscan_t; but that doesn't seem likely to be a big problem in practice, considering how slowly flex changes.) Aside from being more efficient than Horiguchi-san's original solution, this avoids possible corner-case changes in semantics: the original code was capable of popping the input buffer stack while still staying in backslash-related parsing states. I'm not sure that that equates to any useful user-visible behaviors, but I'm not sure it doesn't either, so I'm loath to assume that we only need to consider the topmost buffer when parsing a backslash command. I've attempted to update the MSVC build scripts for the added .l file, but will rely on the buildfarm to see if I missed anything. Kyotaro Horiguchi and Tom Lane
This commit is contained in:
parent
27199058d9
commit
0ea9efbe9e
1
src/bin/psql/.gitignore
vendored
1
src/bin/psql/.gitignore
vendored
@ -1,4 +1,5 @@
|
||||
/psqlscan.c
|
||||
/psqlscanslash.c
|
||||
/sql_help.h
|
||||
/sql_help.c
|
||||
/dumputils.c
|
||||
|
@ -23,7 +23,7 @@ override CPPFLAGS := -I. -I$(srcdir) -I$(libpq_srcdir) -I$(top_srcdir)/src/bin/p
|
||||
OBJS= command.o common.o help.o input.o stringutils.o mainloop.o copy.o \
|
||||
startup.o prompt.o variables.o large_obj.o print.o describe.o \
|
||||
tab-complete.o mbprint.o dumputils.o keywords.o kwlookup.o \
|
||||
sql_help.o psqlscan.o \
|
||||
sql_help.o psqlscan.o psqlscanslash.o \
|
||||
$(WIN32RES)
|
||||
|
||||
|
||||
@ -47,12 +47,16 @@ sql_help.h: create_help.pl $(wildcard $(REFDOCDIR)/*.sgml)
|
||||
psqlscan.c: FLEXFLAGS = -Cfe -p -p
|
||||
psqlscan.c: FLEX_NO_BACKUP=yes
|
||||
|
||||
# Latest flex causes warnings in this file.
|
||||
psqlscanslash.c: FLEXFLAGS = -Cfe -p -p
|
||||
psqlscanslash.c: FLEX_NO_BACKUP=yes
|
||||
|
||||
# Latest flex causes warnings in these files.
|
||||
ifeq ($(GCC),yes)
|
||||
psqlscan.o: CFLAGS += -Wno-error
|
||||
psqlscanslash.o: CFLAGS += -Wno-error
|
||||
endif
|
||||
|
||||
distprep: sql_help.h psqlscan.c
|
||||
distprep: sql_help.h psqlscan.c psqlscanslash.c
|
||||
|
||||
install: all installdirs
|
||||
$(INSTALL_PROGRAM) psql$(X) '$(DESTDIR)$(bindir)/psql$(X)'
|
||||
@ -64,9 +68,10 @@ installdirs:
|
||||
uninstall:
|
||||
rm -f '$(DESTDIR)$(bindir)/psql$(X)' '$(DESTDIR)$(datadir)/psqlrc.sample'
|
||||
|
||||
# psqlscan.c is in the distribution tarball, so is not cleaned here
|
||||
clean distclean:
|
||||
rm -f psql$(X) $(OBJS) dumputils.c keywords.c kwlookup.c lex.backup
|
||||
|
||||
# files removed here are supposed to be in the distribution tarball,
|
||||
# so do not clean them in the clean/distclean rules
|
||||
maintainer-clean: distclean
|
||||
rm -f sql_help.h sql_help.c psqlscan.c
|
||||
rm -f sql_help.h sql_help.c psqlscan.c psqlscanslash.c
|
||||
|
@ -45,7 +45,7 @@
|
||||
#include "large_obj.h"
|
||||
#include "mainloop.h"
|
||||
#include "print.h"
|
||||
#include "psqlscan.h"
|
||||
#include "psqlscanslash.h"
|
||||
#include "settings.h"
|
||||
#include "variables.h"
|
||||
|
||||
|
@ -2,7 +2,8 @@
|
||||
CATALOG_NAME = psql
|
||||
AVAIL_LANGUAGES = cs de es fr it ja pl pt_BR ru zh_CN zh_TW
|
||||
GETTEXT_FILES = command.c common.c copy.c help.c input.c large_obj.c \
|
||||
mainloop.c print.c psqlscan.c startup.c describe.c sql_help.h sql_help.c \
|
||||
mainloop.c print.c psqlscan.c psqlscanslash.c startup.c \
|
||||
describe.c sql_help.h sql_help.c \
|
||||
tab-complete.c variables.c \
|
||||
../../common/exec.c ../../common/fe_memutils.c ../../common/username.c \
|
||||
../../common/wait_error.c
|
||||
|
@ -25,17 +25,6 @@ typedef enum
|
||||
PSCAN_EOL /* end of line, SQL possibly complete */
|
||||
} PsqlScanResult;
|
||||
|
||||
/* Different ways for scan_slash_option to handle parameter words */
|
||||
enum slash_option_type
|
||||
{
|
||||
OT_NORMAL, /* normal case */
|
||||
OT_SQLID, /* treat as SQL identifier */
|
||||
OT_SQLIDHACK, /* SQL identifier, but don't downcase */
|
||||
OT_FILEPIPE, /* it's a filename or pipe */
|
||||
OT_WHOLE_LINE, /* just snarf the rest of the line */
|
||||
OT_NO_EVAL /* no expansion of backticks or variables */
|
||||
};
|
||||
|
||||
/* Callback functions to be used by the lexer */
|
||||
typedef struct PsqlScanCallbacks
|
||||
{
|
||||
@ -61,15 +50,8 @@ extern PsqlScanResult psql_scan(PsqlScanState state,
|
||||
|
||||
extern void psql_scan_reset(PsqlScanState state);
|
||||
|
||||
extern void psql_scan_reselect_sql_lexer(PsqlScanState state);
|
||||
|
||||
extern bool psql_scan_in_quote(PsqlScanState state);
|
||||
|
||||
extern char *psql_scan_slash_command(PsqlScanState state);
|
||||
|
||||
extern char *psql_scan_slash_option(PsqlScanState state,
|
||||
enum slash_option_type type,
|
||||
char *quote,
|
||||
bool semicolon);
|
||||
|
||||
extern void psql_scan_slash_command_end(PsqlScanState state);
|
||||
|
||||
#endif /* PSQLSCAN_H */
|
||||
|
File diff suppressed because it is too large
Load Diff
129
src/bin/psql/psqlscan_int.h
Normal file
129
src/bin/psql/psqlscan_int.h
Normal file
@ -0,0 +1,129 @@
|
||||
/*
|
||||
* psqlscan_int.h
|
||||
* lexical scanner internal declarations
|
||||
*
|
||||
* This file declares the PsqlScanStateData structure used by psqlscan.l
|
||||
* and shared by other lexers compatible with it, such as psqlscanslash.l.
|
||||
*
|
||||
* One difficult aspect of this code is that we need to work in multibyte
|
||||
* encodings that are not ASCII-safe. A "safe" encoding is one in which each
|
||||
* byte of a multibyte character has the high bit set (it's >= 0x80). Since
|
||||
* all our lexing rules treat all high-bit-set characters alike, we don't
|
||||
* really need to care whether such a byte is part of a sequence or not.
|
||||
* In an "unsafe" encoding, we still expect the first byte of a multibyte
|
||||
* sequence to be >= 0x80, but later bytes might not be. If we scan such
|
||||
* a sequence as-is, the lexing rules could easily be fooled into matching
|
||||
* such bytes to ordinary ASCII characters. Our solution for this is to
|
||||
* substitute 0xFF for each non-first byte within the data presented to flex.
|
||||
* The flex rules will then pass the FF's through unmolested. The
|
||||
* psqlscan_emit() subroutine is responsible for looking back to the original
|
||||
* string and replacing FF's with the corresponding original bytes.
|
||||
*
|
||||
* Another interesting thing we do here is scan different parts of the same
|
||||
* input with physically separate flex lexers (ie, lexers written in separate
|
||||
* .l files). We can get away with this because the only part of the
|
||||
* persistent state of a flex lexer that depends on its parsing rule tables
|
||||
* is the start state number, which is easy enough to manage --- usually,
|
||||
* in fact, we just need to set it to INITIAL when changing lexers. But to
|
||||
* make that work at all, we must use re-entrant lexers, so that all the
|
||||
* relevant state is in the yyscanner_t attached to the PsqlScanState;
|
||||
* if we were using lexers with separate static state we would soon end up
|
||||
* with dangling buffer pointers in one or the other. Also note that this
|
||||
* is unlikely to work very nicely if the lexers aren't all built with the
|
||||
* same flex version.
|
||||
*
|
||||
* Copyright (c) 2000-2016, PostgreSQL Global Development Group
|
||||
*
|
||||
* src/bin/psql/psqlscan_int.h
|
||||
*/
|
||||
#ifndef PSQLSCAN_INT_H
|
||||
#define PSQLSCAN_INT_H
|
||||
|
||||
#include "psqlscan.h"
|
||||
|
||||
/* This is just to allow this file to be compilable standalone */
|
||||
#ifndef YY_TYPEDEF_YY_BUFFER_STATE
|
||||
#define YY_TYPEDEF_YY_BUFFER_STATE
|
||||
typedef struct yy_buffer_state *YY_BUFFER_STATE;
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We use a stack of flex buffers to handle substitution of psql variables.
|
||||
* Each stacked buffer contains the as-yet-unread text from one psql variable.
|
||||
* When we pop the stack all the way, we resume reading from the outer buffer
|
||||
* identified by scanbufhandle.
|
||||
*/
|
||||
typedef struct StackElem
|
||||
{
|
||||
YY_BUFFER_STATE buf; /* flex input control structure */
|
||||
char *bufstring; /* data actually being scanned by flex */
|
||||
char *origstring; /* copy of original data, if needed */
|
||||
char *varname; /* name of variable providing data, or NULL */
|
||||
struct StackElem *next;
|
||||
} StackElem;
|
||||
|
||||
/*
|
||||
* All working state of the lexer must be stored in PsqlScanStateData
|
||||
* between calls. This allows us to have multiple open lexer operations,
|
||||
* which is needed for nested include files. The lexer itself is not
|
||||
* recursive, but it must be re-entrant.
|
||||
*/
|
||||
typedef struct PsqlScanStateData
|
||||
{
|
||||
yyscan_t scanner; /* Flex's state for this PsqlScanState */
|
||||
|
||||
PQExpBuffer output_buf; /* current output buffer */
|
||||
|
||||
StackElem *buffer_stack; /* stack of variable expansion buffers */
|
||||
|
||||
/*
|
||||
* These variables always refer to the outer buffer, never to any stacked
|
||||
* variable-expansion buffer.
|
||||
*/
|
||||
YY_BUFFER_STATE scanbufhandle;
|
||||
char *scanbuf; /* start of outer-level input buffer */
|
||||
const char *scanline; /* current input line at outer level */
|
||||
|
||||
/* safe_encoding, curline, refline are used by emit() to replace FFs */
|
||||
int encoding; /* encoding being used now */
|
||||
bool safe_encoding; /* is current encoding "safe"? */
|
||||
bool std_strings; /* are string literals standard? */
|
||||
const char *curline; /* actual flex input string for cur buf */
|
||||
const char *refline; /* original data for cur buffer */
|
||||
|
||||
/*
|
||||
* All this state lives across successive input lines, until explicitly
|
||||
* reset by psql_scan_reset. start_state is adopted by yylex() on entry,
|
||||
* and updated with its finishing state on exit.
|
||||
*/
|
||||
int start_state; /* yylex's starting/finishing state */
|
||||
int paren_depth; /* depth of nesting in parentheses */
|
||||
int xcdepth; /* depth of nesting in slash-star comments */
|
||||
char *dolqstart; /* current $foo$ quote start string */
|
||||
|
||||
/*
|
||||
* Callback functions provided by the program making use of the lexer.
|
||||
*/
|
||||
const PsqlScanCallbacks *callbacks;
|
||||
} PsqlScanStateData;
|
||||
|
||||
|
||||
/*
|
||||
* Functions exported by psqlscan.l, but only meant for use within
|
||||
* compatible lexers.
|
||||
*/
|
||||
extern void psqlscan_push_new_buffer(PsqlScanState state,
|
||||
const char *newstr, const char *varname);
|
||||
extern void psqlscan_pop_buffer_stack(PsqlScanState state);
|
||||
extern void psqlscan_select_top_buffer(PsqlScanState state);
|
||||
extern YY_BUFFER_STATE psqlscan_prepare_buffer(PsqlScanState state,
|
||||
const char *txt, int len,
|
||||
char **txtcopy);
|
||||
extern void psqlscan_emit(PsqlScanState state, const char *txt, int len);
|
||||
extern char *psqlscan_extract_substring(PsqlScanState state,
|
||||
const char *txt, int len);
|
||||
extern void psqlscan_escape_variable(PsqlScanState state,
|
||||
const char *txt, int len,
|
||||
bool as_ident);
|
||||
|
||||
#endif /* PSQLSCAN_INT_H */
|
35
src/bin/psql/psqlscanslash.h
Normal file
35
src/bin/psql/psqlscanslash.h
Normal file
@ -0,0 +1,35 @@
|
||||
/*
|
||||
* psql - the PostgreSQL interactive terminal
|
||||
*
|
||||
* Copyright (c) 2000-2016, PostgreSQL Global Development Group
|
||||
*
|
||||
* src/bin/psql/psqlscanslash.h
|
||||
*/
|
||||
#ifndef PSQLSCANSLASH_H
|
||||
#define PSQLSCANSLASH_H
|
||||
|
||||
#include "psqlscan.h"
|
||||
|
||||
|
||||
/* Different ways for scan_slash_option to handle parameter words */
|
||||
enum slash_option_type
|
||||
{
|
||||
OT_NORMAL, /* normal case */
|
||||
OT_SQLID, /* treat as SQL identifier */
|
||||
OT_SQLIDHACK, /* SQL identifier, but don't downcase */
|
||||
OT_FILEPIPE, /* it's a filename or pipe */
|
||||
OT_WHOLE_LINE, /* just snarf the rest of the line */
|
||||
OT_NO_EVAL /* no expansion of backticks or variables */
|
||||
};
|
||||
|
||||
|
||||
extern char *psql_scan_slash_command(PsqlScanState state);
|
||||
|
||||
extern char *psql_scan_slash_option(PsqlScanState state,
|
||||
enum slash_option_type type,
|
||||
char *quote,
|
||||
bool semicolon);
|
||||
|
||||
extern void psql_scan_slash_command_end(PsqlScanState state);
|
||||
|
||||
#endif /* PSQLSCANSLASH_H */
|
735
src/bin/psql/psqlscanslash.l
Normal file
735
src/bin/psql/psqlscanslash.l
Normal file
@ -0,0 +1,735 @@
|
||||
%top{
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* psqlscanslash.l
|
||||
* lexical scanner for psql backslash commands
|
||||
*
|
||||
* XXX Avoid creating backtracking cases --- see the backend lexer for info.
|
||||
*
|
||||
* See psqlscan_int.h for additional commentary.
|
||||
*
|
||||
* Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* src/bin/psql/psqlscanslash.l
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres_fe.h"
|
||||
|
||||
#include "psqlscanslash.h"
|
||||
|
||||
#include "libpq-fe.h"
|
||||
}
|
||||
|
||||
%{
|
||||
#include "psqlscan_int.h"
|
||||
|
||||
/*
|
||||
* Set the type of yyextra; we use it as a pointer back to the containing
|
||||
* PsqlScanState.
|
||||
*/
|
||||
#define YY_EXTRA_TYPE PsqlScanState
|
||||
|
||||
/*
|
||||
* These variables do not need to be saved across calls. Yeah, it's a bit
|
||||
* of a hack, but putting them into PsqlScanStateData would be klugy too.
|
||||
*/
|
||||
static enum slash_option_type option_type;
|
||||
static char *option_quote;
|
||||
static int unquoted_option_chars;
|
||||
static int backtick_start_offset;
|
||||
|
||||
|
||||
/* Return values from yylex() */
|
||||
#define LEXRES_EOL 0 /* end of input */
|
||||
#define LEXRES_OK 1 /* OK completion of backslash argument */
|
||||
|
||||
|
||||
static void evaluate_backtick(PsqlScanState state);
|
||||
|
||||
#define ECHO psqlscan_emit(cur_state, yytext, yyleng)
|
||||
|
||||
/*
|
||||
* Work around a bug in flex 2.5.35: it emits a couple of functions that
|
||||
* it forgets to emit declarations for. Since we use -Wmissing-prototypes,
|
||||
* this would cause warnings. Providing our own declarations should be
|
||||
* harmless even when the bug gets fixed.
|
||||
*/
|
||||
extern int slash_yyget_column(yyscan_t yyscanner);
|
||||
extern void slash_yyset_column(int column_no, yyscan_t yyscanner);
|
||||
|
||||
%}
|
||||
|
||||
%option reentrant
|
||||
%option 8bit
|
||||
%option never-interactive
|
||||
%option nodefault
|
||||
%option noinput
|
||||
%option nounput
|
||||
%option noyywrap
|
||||
%option warn
|
||||
%option prefix="slash_yy"
|
||||
|
||||
/*
|
||||
* OK, here is a short description of lex/flex rules behavior.
|
||||
* The longest pattern which matches an input string is always chosen.
|
||||
* For equal-length patterns, the first occurring in the rules list is chosen.
|
||||
* INITIAL is the starting state, to which all non-conditional rules apply.
|
||||
* Exclusive states change parsing rules while the state is active. When in
|
||||
* an exclusive state, only those rules defined for that state apply.
|
||||
*/
|
||||
|
||||
/* Exclusive states for lexing backslash commands */
|
||||
%x xslashcmd
|
||||
%x xslashargstart
|
||||
%x xslasharg
|
||||
%x xslashquote
|
||||
%x xslashbackquote
|
||||
%x xslashdquote
|
||||
%x xslashwholeline
|
||||
%x xslashend
|
||||
|
||||
/*
|
||||
* Assorted character class definitions that should match psqlscan.l.
|
||||
*/
|
||||
space [ \t\n\r\f]
|
||||
quote '
|
||||
xeoctesc [\\][0-7]{1,3}
|
||||
xehexesc [\\]x[0-9A-Fa-f]{1,2}
|
||||
xqdouble {quote}{quote}
|
||||
dquote \"
|
||||
variable_char [A-Za-z\200-\377_0-9]
|
||||
|
||||
other .
|
||||
|
||||
%%
|
||||
|
||||
%{
|
||||
/* Declare some local variables inside yylex(), for convenience */
|
||||
PsqlScanState cur_state = yyextra;
|
||||
PQExpBuffer output_buf = cur_state->output_buf;
|
||||
|
||||
/*
|
||||
* Force flex into the state indicated by start_state. This has a
|
||||
* couple of purposes: it lets some of the functions below set a
|
||||
* new starting state without ugly direct access to flex variables,
|
||||
* and it allows us to transition from one flex lexer to another
|
||||
* so that we can lex different parts of the source string using
|
||||
* separate lexers.
|
||||
*/
|
||||
BEGIN(cur_state->start_state);
|
||||
%}
|
||||
|
||||
/*
|
||||
* We don't really expect to be invoked in the INITIAL state in this
|
||||
* lexer; but if we are, just spit data to the output_buf until EOF.
|
||||
*/
|
||||
|
||||
{other}|\n { ECHO; }
|
||||
|
||||
/*
|
||||
* Exclusive lexer states to handle backslash command lexing
|
||||
*/
|
||||
|
||||
<xslashcmd>{
|
||||
/* command name ends at whitespace or backslash; eat all else */
|
||||
|
||||
{space}|"\\" {
|
||||
yyless(0);
|
||||
cur_state->start_state = YY_START;
|
||||
return LEXRES_OK;
|
||||
}
|
||||
|
||||
{other} { ECHO; }
|
||||
|
||||
}
|
||||
|
||||
<xslashargstart>{
|
||||
/*
|
||||
* Discard any whitespace before argument, then go to xslasharg state.
|
||||
* An exception is that "|" is only special at start of argument, so we
|
||||
* check for it here.
|
||||
*/
|
||||
|
||||
{space}+ { }
|
||||
|
||||
"|" {
|
||||
if (option_type == OT_FILEPIPE)
|
||||
{
|
||||
/* treat like whole-string case */
|
||||
ECHO;
|
||||
BEGIN(xslashwholeline);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* vertical bar is not special otherwise */
|
||||
yyless(0);
|
||||
BEGIN(xslasharg);
|
||||
}
|
||||
}
|
||||
|
||||
{other} {
|
||||
yyless(0);
|
||||
BEGIN(xslasharg);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
<xslasharg>{
|
||||
/*
|
||||
* Default processing of text in a slash command's argument.
|
||||
*
|
||||
* Note: unquoted_option_chars counts the number of characters at the
|
||||
* end of the argument that were not subject to any form of quoting.
|
||||
* psql_scan_slash_option needs this to strip trailing semicolons safely.
|
||||
*/
|
||||
|
||||
{space}|"\\" {
|
||||
/*
|
||||
* Unquoted space is end of arg; do not eat. Likewise
|
||||
* backslash is end of command or next command, do not eat
|
||||
*
|
||||
* XXX this means we can't conveniently accept options
|
||||
* that include unquoted backslashes; therefore, option
|
||||
* processing that encourages use of backslashes is rather
|
||||
* broken.
|
||||
*/
|
||||
yyless(0);
|
||||
cur_state->start_state = YY_START;
|
||||
return LEXRES_OK;
|
||||
}
|
||||
|
||||
{quote} {
|
||||
*option_quote = '\'';
|
||||
unquoted_option_chars = 0;
|
||||
BEGIN(xslashquote);
|
||||
}
|
||||
|
||||
"`" {
|
||||
backtick_start_offset = output_buf->len;
|
||||
*option_quote = '`';
|
||||
unquoted_option_chars = 0;
|
||||
BEGIN(xslashbackquote);
|
||||
}
|
||||
|
||||
{dquote} {
|
||||
ECHO;
|
||||
*option_quote = '"';
|
||||
unquoted_option_chars = 0;
|
||||
BEGIN(xslashdquote);
|
||||
}
|
||||
|
||||
:{variable_char}+ {
|
||||
/* Possible psql variable substitution */
|
||||
if (option_type == OT_NO_EVAL ||
|
||||
cur_state->callbacks->get_variable == NULL)
|
||||
ECHO;
|
||||
else
|
||||
{
|
||||
char *varname;
|
||||
char *value;
|
||||
|
||||
varname = psqlscan_extract_substring(cur_state,
|
||||
yytext + 1,
|
||||
yyleng - 1);
|
||||
value = cur_state->callbacks->get_variable(varname,
|
||||
false,
|
||||
false);
|
||||
free(varname);
|
||||
|
||||
/*
|
||||
* The variable value is just emitted without any
|
||||
* further examination. This is consistent with the
|
||||
* pre-8.0 code behavior, if not with the way that
|
||||
* variables are handled outside backslash commands.
|
||||
* Note that we needn't guard against recursion here.
|
||||
*/
|
||||
if (value)
|
||||
{
|
||||
appendPQExpBufferStr(output_buf, value);
|
||||
free(value);
|
||||
}
|
||||
else
|
||||
ECHO;
|
||||
|
||||
*option_quote = ':';
|
||||
}
|
||||
unquoted_option_chars = 0;
|
||||
}
|
||||
|
||||
:'{variable_char}+' {
|
||||
if (option_type == OT_NO_EVAL)
|
||||
ECHO;
|
||||
else
|
||||
{
|
||||
psqlscan_escape_variable(cur_state, yytext, yyleng, false);
|
||||
*option_quote = ':';
|
||||
}
|
||||
unquoted_option_chars = 0;
|
||||
}
|
||||
|
||||
|
||||
:\"{variable_char}+\" {
|
||||
if (option_type == OT_NO_EVAL)
|
||||
ECHO;
|
||||
else
|
||||
{
|
||||
psqlscan_escape_variable(cur_state, yytext, yyleng, true);
|
||||
*option_quote = ':';
|
||||
}
|
||||
unquoted_option_chars = 0;
|
||||
}
|
||||
|
||||
:'{variable_char}* {
|
||||
/* Throw back everything but the colon */
|
||||
yyless(1);
|
||||
unquoted_option_chars++;
|
||||
ECHO;
|
||||
}
|
||||
|
||||
:\"{variable_char}* {
|
||||
/* Throw back everything but the colon */
|
||||
yyless(1);
|
||||
unquoted_option_chars++;
|
||||
ECHO;
|
||||
}
|
||||
|
||||
{other} {
|
||||
unquoted_option_chars++;
|
||||
ECHO;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
<xslashquote>{
|
||||
/*
|
||||
* single-quoted text: copy literally except for '' and backslash
|
||||
* sequences
|
||||
*/
|
||||
|
||||
{quote} { BEGIN(xslasharg); }
|
||||
|
||||
{xqdouble} { appendPQExpBufferChar(output_buf, '\''); }
|
||||
|
||||
"\\n" { appendPQExpBufferChar(output_buf, '\n'); }
|
||||
"\\t" { appendPQExpBufferChar(output_buf, '\t'); }
|
||||
"\\b" { appendPQExpBufferChar(output_buf, '\b'); }
|
||||
"\\r" { appendPQExpBufferChar(output_buf, '\r'); }
|
||||
"\\f" { appendPQExpBufferChar(output_buf, '\f'); }
|
||||
|
||||
{xeoctesc} {
|
||||
/* octal case */
|
||||
appendPQExpBufferChar(output_buf,
|
||||
(char) strtol(yytext + 1, NULL, 8));
|
||||
}
|
||||
|
||||
{xehexesc} {
|
||||
/* hex case */
|
||||
appendPQExpBufferChar(output_buf,
|
||||
(char) strtol(yytext + 2, NULL, 16));
|
||||
}
|
||||
|
||||
"\\". { psqlscan_emit(cur_state, yytext + 1, 1); }
|
||||
|
||||
{other}|\n { ECHO; }
|
||||
|
||||
}
|
||||
|
||||
<xslashbackquote>{
|
||||
/*
|
||||
* backticked text: copy everything until next backquote, then evaluate.
|
||||
*
|
||||
* XXX Possible future behavioral change: substitute for :VARIABLE?
|
||||
*/
|
||||
|
||||
"`" {
|
||||
/* In NO_EVAL mode, don't evaluate the command */
|
||||
if (option_type != OT_NO_EVAL)
|
||||
evaluate_backtick(cur_state);
|
||||
BEGIN(xslasharg);
|
||||
}
|
||||
|
||||
{other}|\n { ECHO; }
|
||||
|
||||
}
|
||||
|
||||
<xslashdquote>{
|
||||
/* double-quoted text: copy verbatim, including the double quotes */
|
||||
|
||||
{dquote} {
|
||||
ECHO;
|
||||
BEGIN(xslasharg);
|
||||
}
|
||||
|
||||
{other}|\n { ECHO; }
|
||||
|
||||
}
|
||||
|
||||
<xslashwholeline>{
|
||||
/* copy everything until end of input line */
|
||||
/* but suppress leading whitespace */
|
||||
|
||||
{space}+ {
|
||||
if (output_buf->len > 0)
|
||||
ECHO;
|
||||
}
|
||||
|
||||
{other} { ECHO; }
|
||||
|
||||
}
|
||||
|
||||
<xslashend>{
|
||||
/* at end of command, eat a double backslash, but not anything else */
|
||||
|
||||
"\\\\" {
|
||||
cur_state->start_state = YY_START;
|
||||
return LEXRES_OK;
|
||||
}
|
||||
|
||||
{other}|\n {
|
||||
yyless(0);
|
||||
cur_state->start_state = YY_START;
|
||||
return LEXRES_OK;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* psql uses a single <<EOF>> rule, unlike the backend.
|
||||
*/
|
||||
|
||||
<<EOF>> {
|
||||
if (cur_state->buffer_stack == NULL)
|
||||
{
|
||||
cur_state->start_state = YY_START;
|
||||
return LEXRES_EOL; /* end of input reached */
|
||||
}
|
||||
|
||||
/*
|
||||
* We were expanding a variable, so pop the inclusion
|
||||
* stack and keep lexing
|
||||
*/
|
||||
psqlscan_pop_buffer_stack(cur_state);
|
||||
psqlscan_select_top_buffer(cur_state);
|
||||
}
|
||||
|
||||
%%
|
||||
|
||||
/*
|
||||
* Scan the command name of a psql backslash command. This should be called
|
||||
* after psql_scan() returns PSCAN_BACKSLASH. It is assumed that the input
|
||||
* has been consumed through the leading backslash.
|
||||
*
|
||||
* The return value is a malloc'd copy of the command name, as parsed off
|
||||
* from the input.
|
||||
*/
|
||||
char *
|
||||
psql_scan_slash_command(PsqlScanState state)
|
||||
{
|
||||
PQExpBufferData mybuf;
|
||||
|
||||
/* Must be scanning already */
|
||||
Assert(state->scanbufhandle != NULL);
|
||||
|
||||
/* Build a local buffer that we'll return the data of */
|
||||
initPQExpBuffer(&mybuf);
|
||||
|
||||
/* Set current output target */
|
||||
state->output_buf = &mybuf;
|
||||
|
||||
/* Set input source */
|
||||
if (state->buffer_stack != NULL)
|
||||
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
|
||||
else
|
||||
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
|
||||
|
||||
/*
|
||||
* Set lexer start state. Note that this is sufficient to switch
|
||||
* state->scanner over to using the tables in this lexer file.
|
||||
*/
|
||||
state->start_state = xslashcmd;
|
||||
|
||||
/* And lex. */
|
||||
yylex(state->scanner);
|
||||
|
||||
/* There are no possible errors in this lex state... */
|
||||
|
||||
/*
|
||||
* In case the caller returns to using the regular SQL lexer, reselect the
|
||||
* appropriate initial state.
|
||||
*/
|
||||
psql_scan_reselect_sql_lexer(state);
|
||||
|
||||
return mybuf.data;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse off the next argument for a backslash command, and return it as a
|
||||
* malloc'd string. If there are no more arguments, returns NULL.
|
||||
*
|
||||
* type tells what processing, if any, to perform on the option string;
|
||||
* for example, if it's a SQL identifier, we want to downcase any unquoted
|
||||
* letters.
|
||||
*
|
||||
* if quote is not NULL, *quote is set to 0 if no quoting was found, else
|
||||
* the last quote symbol used in the argument.
|
||||
*
|
||||
* if semicolon is true, unquoted trailing semicolon(s) that would otherwise
|
||||
* be taken as part of the option string will be stripped.
|
||||
*
|
||||
* NOTE: the only possible syntax errors for backslash options are unmatched
|
||||
* quotes, which are detected when we run out of input. Therefore, on a
|
||||
* syntax error we just throw away the string and return NULL; there is no
|
||||
* need to worry about flushing remaining input.
|
||||
*/
|
||||
char *
|
||||
psql_scan_slash_option(PsqlScanState state,
|
||||
enum slash_option_type type,
|
||||
char *quote,
|
||||
bool semicolon)
|
||||
{
|
||||
PQExpBufferData mybuf;
|
||||
int lexresult PG_USED_FOR_ASSERTS_ONLY;
|
||||
int final_state;
|
||||
char local_quote;
|
||||
|
||||
/* Must be scanning already */
|
||||
Assert(state->scanbufhandle != NULL);
|
||||
|
||||
if (quote == NULL)
|
||||
quote = &local_quote;
|
||||
*quote = 0;
|
||||
|
||||
/* Build a local buffer that we'll return the data of */
|
||||
initPQExpBuffer(&mybuf);
|
||||
|
||||
/* Set up static variables that will be used by yylex */
|
||||
option_type = type;
|
||||
option_quote = quote;
|
||||
unquoted_option_chars = 0;
|
||||
|
||||
/* Set current output target */
|
||||
state->output_buf = &mybuf;
|
||||
|
||||
/* Set input source */
|
||||
if (state->buffer_stack != NULL)
|
||||
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
|
||||
else
|
||||
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
|
||||
|
||||
/* Set lexer start state */
|
||||
if (type == OT_WHOLE_LINE)
|
||||
state->start_state = xslashwholeline;
|
||||
else
|
||||
state->start_state = xslashargstart;
|
||||
|
||||
/* And lex. */
|
||||
lexresult = yylex(state->scanner);
|
||||
|
||||
/* Save final state for a moment... */
|
||||
final_state = state->start_state;
|
||||
|
||||
/*
|
||||
* In case the caller returns to using the regular SQL lexer, reselect the
|
||||
* appropriate initial state.
|
||||
*/
|
||||
psql_scan_reselect_sql_lexer(state);
|
||||
|
||||
/*
|
||||
* Check the lex result: we should have gotten back either LEXRES_OK
|
||||
* or LEXRES_EOL (the latter indicating end of string). If we were inside
|
||||
* a quoted string, as indicated by final_state, EOL is an error.
|
||||
*/
|
||||
Assert(lexresult == LEXRES_EOL || lexresult == LEXRES_OK);
|
||||
|
||||
switch (final_state)
|
||||
{
|
||||
case xslashargstart:
|
||||
/* empty arg */
|
||||
break;
|
||||
case xslasharg:
|
||||
/* Strip any unquoted trailing semi-colons if requested */
|
||||
if (semicolon)
|
||||
{
|
||||
while (unquoted_option_chars-- > 0 &&
|
||||
mybuf.len > 0 &&
|
||||
mybuf.data[mybuf.len - 1] == ';')
|
||||
{
|
||||
mybuf.data[--mybuf.len] = '\0';
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If SQL identifier processing was requested, then we strip out
|
||||
* excess double quotes and downcase unquoted letters.
|
||||
* Doubled double-quotes become output double-quotes, per spec.
|
||||
*
|
||||
* Note that a string like FOO"BAR"BAZ will be converted to
|
||||
* fooBARbaz; this is somewhat inconsistent with the SQL spec,
|
||||
* which would have us parse it as several identifiers. But
|
||||
* for psql's purposes, we want a string like "foo"."bar" to
|
||||
* be treated as one option, so there's little choice.
|
||||
*/
|
||||
if (type == OT_SQLID || type == OT_SQLIDHACK)
|
||||
{
|
||||
bool inquotes = false;
|
||||
char *cp = mybuf.data;
|
||||
|
||||
while (*cp)
|
||||
{
|
||||
if (*cp == '"')
|
||||
{
|
||||
if (inquotes && cp[1] == '"')
|
||||
{
|
||||
/* Keep the first quote, remove the second */
|
||||
cp++;
|
||||
}
|
||||
inquotes = !inquotes;
|
||||
/* Collapse out quote at *cp */
|
||||
memmove(cp, cp + 1, strlen(cp));
|
||||
mybuf.len--;
|
||||
/* do not advance cp */
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!inquotes && type == OT_SQLID)
|
||||
*cp = pg_tolower((unsigned char) *cp);
|
||||
cp += PQmblen(cp, state->encoding);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
case xslashquote:
|
||||
case xslashbackquote:
|
||||
case xslashdquote:
|
||||
/* must have hit EOL inside quotes */
|
||||
state->callbacks->write_error("unterminated quoted string\n");
|
||||
termPQExpBuffer(&mybuf);
|
||||
return NULL;
|
||||
case xslashwholeline:
|
||||
/* always okay */
|
||||
break;
|
||||
default:
|
||||
/* can't get here */
|
||||
fprintf(stderr, "invalid YY_START\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
/*
|
||||
* An unquoted empty argument isn't possible unless we are at end of
|
||||
* command. Return NULL instead.
|
||||
*/
|
||||
if (mybuf.len == 0 && *quote == 0)
|
||||
{
|
||||
termPQExpBuffer(&mybuf);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Else return the completed string. */
|
||||
return mybuf.data;
|
||||
}
|
||||
|
||||
/*
|
||||
* Eat up any unused \\ to complete a backslash command.
|
||||
*/
|
||||
void
|
||||
psql_scan_slash_command_end(PsqlScanState state)
|
||||
{
|
||||
/* Must be scanning already */
|
||||
Assert(state->scanbufhandle != NULL);
|
||||
|
||||
/* Set current output target */
|
||||
state->output_buf = NULL; /* we won't output anything */
|
||||
|
||||
/* Set input source */
|
||||
if (state->buffer_stack != NULL)
|
||||
yy_switch_to_buffer(state->buffer_stack->buf, state->scanner);
|
||||
else
|
||||
yy_switch_to_buffer(state->scanbufhandle, state->scanner);
|
||||
|
||||
/* Set lexer start state */
|
||||
state->start_state = xslashend;
|
||||
|
||||
/* And lex. */
|
||||
yylex(state->scanner);
|
||||
|
||||
/* There are no possible errors in this lex state... */
|
||||
|
||||
/*
|
||||
* We expect the caller to return to using the regular SQL lexer, so
|
||||
* reselect the appropriate initial state.
|
||||
*/
|
||||
psql_scan_reselect_sql_lexer(state);
|
||||
}
|
||||
|
||||
/*
|
||||
* Evaluate a backticked substring of a slash command's argument.
|
||||
*
|
||||
* The portion of output_buf starting at backtick_start_offset is evaluated
|
||||
* as a shell command and then replaced by the command's output.
|
||||
*/
|
||||
static void
|
||||
evaluate_backtick(PsqlScanState state)
|
||||
{
|
||||
PQExpBuffer output_buf = state->output_buf;
|
||||
char *cmd = output_buf->data + backtick_start_offset;
|
||||
PQExpBufferData cmd_output;
|
||||
FILE *fd;
|
||||
bool error = false;
|
||||
char buf[512];
|
||||
size_t result;
|
||||
|
||||
initPQExpBuffer(&cmd_output);
|
||||
|
||||
fd = popen(cmd, PG_BINARY_R);
|
||||
if (!fd)
|
||||
{
|
||||
state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
|
||||
error = true;
|
||||
}
|
||||
|
||||
if (!error)
|
||||
{
|
||||
do
|
||||
{
|
||||
result = fread(buf, 1, sizeof(buf), fd);
|
||||
if (ferror(fd))
|
||||
{
|
||||
state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
|
||||
error = true;
|
||||
break;
|
||||
}
|
||||
appendBinaryPQExpBuffer(&cmd_output, buf, result);
|
||||
} while (!feof(fd));
|
||||
}
|
||||
|
||||
if (fd && pclose(fd) == -1)
|
||||
{
|
||||
state->callbacks->write_error("%s: %s\n", cmd, strerror(errno));
|
||||
error = true;
|
||||
}
|
||||
|
||||
if (PQExpBufferDataBroken(cmd_output))
|
||||
{
|
||||
state->callbacks->write_error("%s: out of memory\n", cmd);
|
||||
error = true;
|
||||
}
|
||||
|
||||
/* Now done with cmd, delete it from output_buf */
|
||||
output_buf->len = backtick_start_offset;
|
||||
output_buf->data[output_buf->len] = '\0';
|
||||
|
||||
/* If no error, transfer result to output_buf */
|
||||
if (!error)
|
||||
{
|
||||
/* strip any trailing newline */
|
||||
if (cmd_output.len > 0 &&
|
||||
cmd_output.data[cmd_output.len - 1] == '\n')
|
||||
cmd_output.len--;
|
||||
appendBinaryPQExpBuffer(output_buf, cmd_output.data, cmd_output.len);
|
||||
}
|
||||
|
||||
termPQExpBuffer(&cmd_output);
|
||||
}
|
@ -16,7 +16,7 @@
|
||||
*
|
||||
* We allow any non-ASCII character, as well as ASCII letters, digits, and
|
||||
* underscore. Keep this in sync with the definition of variable_char in
|
||||
* psqlscan.l.
|
||||
* psqlscan.l and psqlscanslash.l.
|
||||
*/
|
||||
static bool
|
||||
valid_variable_name(const char *name)
|
||||
|
@ -64,7 +64,7 @@ my $frontend_extraincludes = {
|
||||
'initdb' => ['src/timezone'],
|
||||
'psql' => [ 'src/bin/pg_dump', 'src/backend' ] };
|
||||
my $frontend_extrasource = {
|
||||
'psql' => ['src/bin/psql/psqlscan.l'],
|
||||
'psql' => ['src/bin/psql/psqlscan.l', 'src/bin/psql/psqlscanslash.l'],
|
||||
'pgbench' =>
|
||||
[ 'src/bin/pgbench/exprscan.l', 'src/bin/pgbench/exprparse.y' ], };
|
||||
my @frontend_excludes = (
|
||||
|
@ -76,6 +76,7 @@ if %DIST%==1 if exist src\pl\plpgsql\src\pl_gram.c del /q src\pl\plpgsql\src\pl_
|
||||
if %DIST%==1 if exist src\pl\plpgsql\src\pl_gram.h del /q src\pl\plpgsql\src\pl_gram.h
|
||||
|
||||
if %DIST%==1 if exist src\bin\psql\psqlscan.c del /q src\bin\psql\psqlscan.c
|
||||
if %DIST%==1 if exist src\bin\psql\psqlscanslash.c del /q src\bin\psql\psqlscanslash.c
|
||||
|
||||
if %DIST%==1 if exist contrib\cube\cubescan.c del /q contrib\cube\cubescan.c
|
||||
if %DIST%==1 if exist contrib\cube\cubeparse.c del /q contrib\cube\cubeparse.c
|
||||
|
Loading…
x
Reference in New Issue
Block a user