mirror of
https://github.com/postgres/postgres.git
synced 2025-04-22 23:02:54 +03:00
This gets us to a point where psqlscan.l can be used by other frontend programs for the same purpose psql uses it for, ie to detect when it's collected a complete SQL command from input that is divided across line boundaries. Moreover, other programs can supply their own lexers for backslash commands of their own choosing. A follow-on patch will use this in pgbench. The end result here is roughly the same as in Kyotaro Horiguchi's 0001-Make-SQL-parser-part-of-psqlscan-independent-from-ps.patch, although the details of the method for switching between lexers are quite different. Basically, in this patch we share the entire PsqlScanState, YY_BUFFER_STATE stack, *and* yyscan_t between different lexers. The only thing we need to do to switch to a different lexer is to make sure the start_state is valid for the new lexer. This works because flex doesn't keep any other persistent state that depends on the specific lexing tables generated for a particular .l file. (We are assuming that both lexers are built with the same flex version, or at least versions that are compatible with respect to the contents of yyscan_t; but that doesn't seem likely to be a big problem in practice, considering how slowly flex changes.) Aside from being more efficient than Horiguchi-san's original solution, this avoids possible corner-case changes in semantics: the original code was capable of popping the input buffer stack while still staying in backslash-related parsing states. I'm not sure that that equates to any useful user-visible behaviors, but I'm not sure it doesn't either, so I'm loath to assume that we only need to consider the topmost buffer when parsing a backslash command. I've attempted to update the MSVC build scripts for the added .l file, but will rely on the buildfarm to see if I missed anything. Kyotaro Horiguchi and Tom Lane
130 lines
5.3 KiB
C
130 lines
5.3 KiB
C
/*
|
|
* psqlscan_int.h
|
|
* lexical scanner internal declarations
|
|
*
|
|
* This file declares the PsqlScanStateData structure used by psqlscan.l
|
|
* and shared by other lexers compatible with it, such as psqlscanslash.l.
|
|
*
|
|
* One difficult aspect of this code is that we need to work in multibyte
|
|
* encodings that are not ASCII-safe. A "safe" encoding is one in which each
|
|
* byte of a multibyte character has the high bit set (it's >= 0x80). Since
|
|
* all our lexing rules treat all high-bit-set characters alike, we don't
|
|
* really need to care whether such a byte is part of a sequence or not.
|
|
* In an "unsafe" encoding, we still expect the first byte of a multibyte
|
|
* sequence to be >= 0x80, but later bytes might not be. If we scan such
|
|
* a sequence as-is, the lexing rules could easily be fooled into matching
|
|
* such bytes to ordinary ASCII characters. Our solution for this is to
|
|
* substitute 0xFF for each non-first byte within the data presented to flex.
|
|
* The flex rules will then pass the FF's through unmolested. The
|
|
* psqlscan_emit() subroutine is responsible for looking back to the original
|
|
* string and replacing FF's with the corresponding original bytes.
|
|
*
|
|
* Another interesting thing we do here is scan different parts of the same
|
|
* input with physically separate flex lexers (ie, lexers written in separate
|
|
* .l files). We can get away with this because the only part of the
|
|
* persistent state of a flex lexer that depends on its parsing rule tables
|
|
* is the start state number, which is easy enough to manage --- usually,
|
|
* in fact, we just need to set it to INITIAL when changing lexers. But to
|
|
* make that work at all, we must use re-entrant lexers, so that all the
|
|
* relevant state is in the yyscanner_t attached to the PsqlScanState;
|
|
* if we were using lexers with separate static state we would soon end up
|
|
* with dangling buffer pointers in one or the other. Also note that this
|
|
* is unlikely to work very nicely if the lexers aren't all built with the
|
|
* same flex version.
|
|
*
|
|
* Copyright (c) 2000-2016, PostgreSQL Global Development Group
|
|
*
|
|
* src/bin/psql/psqlscan_int.h
|
|
*/
|
|
#ifndef PSQLSCAN_INT_H
|
|
#define PSQLSCAN_INT_H
|
|
|
|
#include "psqlscan.h"
|
|
|
|
/* This is just to allow this file to be compilable standalone */
|
|
#ifndef YY_TYPEDEF_YY_BUFFER_STATE
|
|
#define YY_TYPEDEF_YY_BUFFER_STATE
|
|
typedef struct yy_buffer_state *YY_BUFFER_STATE;
|
|
#endif
|
|
|
|
/*
|
|
* We use a stack of flex buffers to handle substitution of psql variables.
|
|
* Each stacked buffer contains the as-yet-unread text from one psql variable.
|
|
* When we pop the stack all the way, we resume reading from the outer buffer
|
|
* identified by scanbufhandle.
|
|
*/
|
|
typedef struct StackElem
|
|
{
|
|
YY_BUFFER_STATE buf; /* flex input control structure */
|
|
char *bufstring; /* data actually being scanned by flex */
|
|
char *origstring; /* copy of original data, if needed */
|
|
char *varname; /* name of variable providing data, or NULL */
|
|
struct StackElem *next;
|
|
} StackElem;
|
|
|
|
/*
|
|
* All working state of the lexer must be stored in PsqlScanStateData
|
|
* between calls. This allows us to have multiple open lexer operations,
|
|
* which is needed for nested include files. The lexer itself is not
|
|
* recursive, but it must be re-entrant.
|
|
*/
|
|
typedef struct PsqlScanStateData
|
|
{
|
|
yyscan_t scanner; /* Flex's state for this PsqlScanState */
|
|
|
|
PQExpBuffer output_buf; /* current output buffer */
|
|
|
|
StackElem *buffer_stack; /* stack of variable expansion buffers */
|
|
|
|
/*
|
|
* These variables always refer to the outer buffer, never to any stacked
|
|
* variable-expansion buffer.
|
|
*/
|
|
YY_BUFFER_STATE scanbufhandle;
|
|
char *scanbuf; /* start of outer-level input buffer */
|
|
const char *scanline; /* current input line at outer level */
|
|
|
|
/* safe_encoding, curline, refline are used by emit() to replace FFs */
|
|
int encoding; /* encoding being used now */
|
|
bool safe_encoding; /* is current encoding "safe"? */
|
|
bool std_strings; /* are string literals standard? */
|
|
const char *curline; /* actual flex input string for cur buf */
|
|
const char *refline; /* original data for cur buffer */
|
|
|
|
/*
|
|
* All this state lives across successive input lines, until explicitly
|
|
* reset by psql_scan_reset. start_state is adopted by yylex() on entry,
|
|
* and updated with its finishing state on exit.
|
|
*/
|
|
int start_state; /* yylex's starting/finishing state */
|
|
int paren_depth; /* depth of nesting in parentheses */
|
|
int xcdepth; /* depth of nesting in slash-star comments */
|
|
char *dolqstart; /* current $foo$ quote start string */
|
|
|
|
/*
|
|
* Callback functions provided by the program making use of the lexer.
|
|
*/
|
|
const PsqlScanCallbacks *callbacks;
|
|
} PsqlScanStateData;
|
|
|
|
|
|
/*
|
|
* Functions exported by psqlscan.l, but only meant for use within
|
|
* compatible lexers.
|
|
*/
|
|
extern void psqlscan_push_new_buffer(PsqlScanState state,
|
|
const char *newstr, const char *varname);
|
|
extern void psqlscan_pop_buffer_stack(PsqlScanState state);
|
|
extern void psqlscan_select_top_buffer(PsqlScanState state);
|
|
extern YY_BUFFER_STATE psqlscan_prepare_buffer(PsqlScanState state,
|
|
const char *txt, int len,
|
|
char **txtcopy);
|
|
extern void psqlscan_emit(PsqlScanState state, const char *txt, int len);
|
|
extern char *psqlscan_extract_substring(PsqlScanState state,
|
|
const char *txt, int len);
|
|
extern void psqlscan_escape_variable(PsqlScanState state,
|
|
const char *txt, int len,
|
|
bool as_ident);
|
|
|
|
#endif /* PSQLSCAN_INT_H */
|