1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-26 12:21:12 +03:00

replication parser: pure parser and reentrant scanner

Use the flex %option reentrant and the bison option %pure-parser to
make the generated scanner and parser pure, reentrant, and
thread-safe.

Make the generated scanner use palloc() etc. instead of malloc() etc.
Previously, we only used palloc() for the buffer, but flex would still
use malloc() for its internal structures.  As a result, there could be
some small memory leaks in case of uncaught errors.  Now, all the
memory is under palloc() control, so there are no more such issues.

Simplify flex scan buffer management: Instead of constructing the
buffer from pieces and then using yy_scan_buffer(), we can just use
yy_scan_string(), which does the same thing internally.

The previous code was necessary because we allocated the buffer with
palloc() and the rest of the state was handled by malloc().  But this
is no longer the case; everything is under palloc() now.

Use flex yyextra to handle context information, instead of global
variables.  This complements the other changes to make the scanner
reentrant.

Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi>
Co-authored-by: Andreas Karlsson <andreas@proxel.se>
Reviewed-by: Andreas Karlsson <andreas@proxel.se>
Discussion: https://www.postgresql.org/message-id/flat/eb6faeac-2a8a-4b69-9189-c33c520e5b7b@eisentraut.org
This commit is contained in:
Peter Eisentraut
2024-12-02 10:35:37 +01:00
parent 5af699066f
commit e4a8fb8fef
5 changed files with 113 additions and 77 deletions

View File

@ -9,7 +9,7 @@ GETTEXT_TRIGGERS = $(BACKEND_COMMON_GETTEXT_TRIGGERS) \
yyerror \ yyerror \
jsonpath_yyerror:3 \ jsonpath_yyerror:3 \
parser_yyerror \ parser_yyerror \
replication_yyerror \ replication_yyerror:2 \
scanner_yyerror \ scanner_yyerror \
syncrep_yyerror \ syncrep_yyerror \
report_invalid_record:2 \ report_invalid_record:2 \

View File

@ -24,10 +24,6 @@
#include "repl_gram.h" #include "repl_gram.h"
/* silence -Wmissing-variable-declarations */
extern int replication_yychar;
extern int replication_yynerrs;
/* Result of the parsing is returned here */ /* Result of the parsing is returned here */
Node *replication_parse_result; Node *replication_parse_result;
@ -43,6 +39,9 @@ Node *replication_parse_result;
%} %}
%parse-param {yyscan_t yyscanner}
%lex-param {yyscan_t yyscanner}
%pure-parser
%expect 0 %expect 0
%name-prefix="replication_yy" %name-prefix="replication_yy"
@ -106,6 +105,8 @@ Node *replication_parse_result;
firstcmd: command opt_semicolon firstcmd: command opt_semicolon
{ {
replication_parse_result = $1; replication_parse_result = $1;
(void) yynerrs; /* suppress compiler warning */
} }
; ;

View File

@ -38,30 +38,36 @@ fprintf_to_ereport(const char *fmt, const char *msg)
ereport(ERROR, (errmsg_internal("%s", msg))); ereport(ERROR, (errmsg_internal("%s", msg)));
} }
/* Handle to the buffer that the lexer uses internally */ struct replication_yy_extra_type
static YY_BUFFER_STATE scanbufhandle; {
/* Pushed-back token (we only handle one) */
int repl_pushed_back_token;
/* Pushed-back token (we only handle one) */ /* Work area for collecting literals */
static int repl_pushed_back_token; StringInfoData litbuf;
};
#define YY_EXTRA_TYPE struct replication_yy_extra_type *
/* Work area for collecting literals */ static void startlit(yyscan_t yyscanner);
static StringInfoData litbuf; static char *litbufdup(yyscan_t yyscanner);
static void addlit(char *ytext, int yleng, yyscan_t yyscanner);
static void startlit(void); static void addlitchar(unsigned char ychar, yyscan_t yyscanner);
static char *litbufdup(void);
static void addlit(char *ytext, int yleng);
static void addlitchar(unsigned char ychar);
/* LCOV_EXCL_START */ /* LCOV_EXCL_START */
%} %}
%option reentrant
%option bison-bridge
%option 8bit %option 8bit
%option never-interactive %option never-interactive
%option nodefault %option nodefault
%option noinput %option noinput
%option nounput %option nounput
%option noyywrap %option noyywrap
%option noyyalloc
%option noyyrealloc
%option noyyfree
%option warn %option warn
%option prefix="replication_yy" %option prefix="replication_yy"
@ -108,11 +114,11 @@ identifier {ident_start}{ident_cont}*
/* This code is inserted at the start of replication_yylex() */ /* This code is inserted at the start of replication_yylex() */
/* If we have a pushed-back token, return that. */ /* If we have a pushed-back token, return that. */
if (repl_pushed_back_token) if (yyextra->repl_pushed_back_token)
{ {
int result = repl_pushed_back_token; int result = yyextra->repl_pushed_back_token;
repl_pushed_back_token = 0; yyextra->repl_pushed_back_token = 0;
return result; return result;
} }
%} %}
@ -142,7 +148,7 @@ UPLOAD_MANIFEST { return K_UPLOAD_MANIFEST; }
{space}+ { /* do nothing */ } {space}+ { /* do nothing */ }
{digit}+ { {digit}+ {
replication_yylval.uintval = strtoul(yytext, NULL, 10); yylval->uintval = strtoul(yytext, NULL, 10);
return UCONST; return UCONST;
} }
@ -150,34 +156,34 @@ UPLOAD_MANIFEST { return K_UPLOAD_MANIFEST; }
uint32 hi, uint32 hi,
lo; lo;
if (sscanf(yytext, "%X/%X", &hi, &lo) != 2) if (sscanf(yytext, "%X/%X", &hi, &lo) != 2)
replication_yyerror("invalid streaming start location"); replication_yyerror(yyscanner, "invalid streaming start location");
replication_yylval.recptr = ((uint64) hi) << 32 | lo; yylval->recptr = ((uint64) hi) << 32 | lo;
return RECPTR; return RECPTR;
} }
{xqstart} { {xqstart} {
BEGIN(xq); BEGIN(xq);
startlit(); startlit(yyscanner);
} }
<xq>{quotestop} { <xq>{quotestop} {
yyless(1); yyless(1);
BEGIN(INITIAL); BEGIN(INITIAL);
replication_yylval.str = litbufdup(); yylval->str = litbufdup(yyscanner);
return SCONST; return SCONST;
} }
<xq>{xqdouble} { <xq>{xqdouble} {
addlitchar('\''); addlitchar('\'', yyscanner);
} }
<xq>{xqinside} { <xq>{xqinside} {
addlit(yytext, yyleng); addlit(yytext, yyleng, yyscanner);
} }
{xdstart} { {xdstart} {
BEGIN(xd); BEGIN(xd);
startlit(); startlit(yyscanner);
} }
<xd>{xdstop} { <xd>{xdstop} {
@ -185,20 +191,20 @@ UPLOAD_MANIFEST { return K_UPLOAD_MANIFEST; }
yyless(1); yyless(1);
BEGIN(INITIAL); BEGIN(INITIAL);
replication_yylval.str = litbufdup(); yylval->str = litbufdup(yyscanner);
len = strlen(replication_yylval.str); len = strlen(yylval->str);
truncate_identifier(replication_yylval.str, len, true); truncate_identifier(yylval->str, len, true);
return IDENT; return IDENT;
} }
<xd>{xdinside} { <xd>{xdinside} {
addlit(yytext, yyleng); addlit(yytext, yyleng, yyscanner);
} }
{identifier} { {identifier} {
int len = strlen(yytext); int len = strlen(yytext);
replication_yylval.str = downcase_truncate_identifier(yytext, len, true); yylval->str = downcase_truncate_identifier(yytext, len, true);
return IDENT; return IDENT;
} }
@ -207,7 +213,7 @@ UPLOAD_MANIFEST { return K_UPLOAD_MANIFEST; }
return yytext[0]; return yytext[0];
} }
<xq,xd><<EOF>> { replication_yyerror("unterminated quoted string"); } <xq,xd><<EOF>> { replication_yyerror(yyscanner, "unterminated quoted string"); }
<<EOF>> { <<EOF>> {
@ -218,32 +224,36 @@ UPLOAD_MANIFEST { return K_UPLOAD_MANIFEST; }
/* LCOV_EXCL_STOP */ /* LCOV_EXCL_STOP */
/* see scan.l */
#undef yyextra
#define yyextra (((struct yyguts_t *) yyscanner)->yyextra_r)
static void static void
startlit(void) startlit(yyscan_t yyscanner)
{ {
initStringInfo(&litbuf); initStringInfo(&yyextra->litbuf);
} }
static char * static char *
litbufdup(void) litbufdup(yyscan_t yyscanner)
{ {
return litbuf.data; return yyextra->litbuf.data;
} }
static void static void
addlit(char *ytext, int yleng) addlit(char *ytext, int yleng, yyscan_t yyscanner)
{ {
appendBinaryStringInfo(&litbuf, ytext, yleng); appendBinaryStringInfo(&yyextra->litbuf, ytext, yleng);
} }
static void static void
addlitchar(unsigned char ychar) addlitchar(unsigned char ychar, yyscan_t yyscanner)
{ {
appendStringInfoChar(&litbuf, ychar); appendStringInfoChar(&yyextra->litbuf, ychar);
} }
void void
replication_yyerror(const char *message) replication_yyerror(yyscan_t yyscanner, const char *message)
{ {
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
@ -251,35 +261,26 @@ replication_yyerror(const char *message)
} }
void void
replication_scanner_init(const char *str) replication_scanner_init(const char *str, yyscan_t *yyscannerp)
{ {
Size slen = strlen(str); yyscan_t yyscanner;
char *scanbuf; struct replication_yy_extra_type *yyext = palloc0_object(struct replication_yy_extra_type);
/* if (yylex_init(yyscannerp) != 0)
* Might be left over after ereport() elog(ERROR, "yylex_init() failed: %m");
*/
if (YY_CURRENT_BUFFER)
yy_delete_buffer(YY_CURRENT_BUFFER);
/* yyscanner = *yyscannerp;
* Make a scan buffer with special termination needed by flex.
*/
scanbuf = (char *) palloc(slen + 2);
memcpy(scanbuf, str, slen);
scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
/* Make sure we start in proper state */ yyset_extra(yyext, yyscanner);
BEGIN(INITIAL);
repl_pushed_back_token = 0; yy_scan_string(str, yyscanner);
} }
void void
replication_scanner_finish(void) replication_scanner_finish(yyscan_t yyscanner)
{ {
yy_delete_buffer(scanbufhandle); pfree(yyextra);
scanbufhandle = NULL; yylex_destroy(yyscanner);
} }
/* /*
@ -291,9 +292,10 @@ replication_scanner_finish(void)
* IDENT token here, although some other cases are possible. * IDENT token here, although some other cases are possible.
*/ */
bool bool
replication_scanner_is_replication_command(void) replication_scanner_is_replication_command(yyscan_t yyscanner)
{ {
int first_token = replication_yylex(); YYSTYPE dummy;
int first_token = replication_yylex(&dummy, yyscanner);
switch (first_token) switch (first_token)
{ {
@ -308,10 +310,37 @@ replication_scanner_is_replication_command(void)
case K_UPLOAD_MANIFEST: case K_UPLOAD_MANIFEST:
case K_SHOW: case K_SHOW:
/* Yes; push back the first token so we can parse later. */ /* Yes; push back the first token so we can parse later. */
repl_pushed_back_token = first_token; yyextra->repl_pushed_back_token = first_token;
return true; return true;
default: default:
/* Nope; we don't bother to push back the token. */ /* Nope; we don't bother to push back the token. */
return false; return false;
} }
} }
/*
* Interface functions to make flex use palloc() instead of malloc().
* It'd be better to make these static, but flex insists otherwise.
*/
void *
yyalloc(yy_size_t size, yyscan_t yyscanner)
{
return palloc(size);
}
void *
yyrealloc(void *ptr, yy_size_t size, yyscan_t yyscanner)
{
if (ptr)
return repalloc(ptr, size);
else
return palloc(size);
}
void
yyfree(void *ptr, yyscan_t yyscanner)
{
if (ptr)
pfree(ptr);
}

View File

@ -1951,6 +1951,7 @@ WalSndWaitForWal(XLogRecPtr loc)
bool bool
exec_replication_command(const char *cmd_string) exec_replication_command(const char *cmd_string)
{ {
yyscan_t scanner;
int parse_rc; int parse_rc;
Node *cmd_node; Node *cmd_node;
const char *cmdtag; const char *cmdtag;
@ -1990,15 +1991,15 @@ exec_replication_command(const char *cmd_string)
ALLOCSET_DEFAULT_SIZES); ALLOCSET_DEFAULT_SIZES);
old_context = MemoryContextSwitchTo(cmd_context); old_context = MemoryContextSwitchTo(cmd_context);
replication_scanner_init(cmd_string); replication_scanner_init(cmd_string, &scanner);
/* /*
* Is it a WalSender command? * Is it a WalSender command?
*/ */
if (!replication_scanner_is_replication_command()) if (!replication_scanner_is_replication_command(scanner))
{ {
/* Nope; clean up and get out. */ /* Nope; clean up and get out. */
replication_scanner_finish(); replication_scanner_finish(scanner);
MemoryContextSwitchTo(old_context); MemoryContextSwitchTo(old_context);
MemoryContextDelete(cmd_context); MemoryContextDelete(cmd_context);
@ -2016,13 +2017,13 @@ exec_replication_command(const char *cmd_string)
/* /*
* Looks like a WalSender command, so parse it. * Looks like a WalSender command, so parse it.
*/ */
parse_rc = replication_yyparse(); parse_rc = replication_yyparse(scanner);
if (parse_rc != 0) if (parse_rc != 0)
ereport(ERROR, ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR), (errcode(ERRCODE_SYNTAX_ERROR),
errmsg_internal("replication command parser returned %d", errmsg_internal("replication command parser returned %d",
parse_rc))); parse_rc)));
replication_scanner_finish(); replication_scanner_finish(scanner);
cmd_node = replication_parse_result; cmd_node = replication_parse_result;

View File

@ -125,12 +125,17 @@ extern void WalSndSetState(WalSndState state);
* Internal functions for parsing the replication grammar, in repl_gram.y and * Internal functions for parsing the replication grammar, in repl_gram.y and
* repl_scanner.l * repl_scanner.l
*/ */
extern int replication_yyparse(void); union YYSTYPE;
extern int replication_yylex(void); #ifndef YY_TYPEDEF_YY_SCANNER_T
extern void replication_yyerror(const char *message) pg_attribute_noreturn(); #define YY_TYPEDEF_YY_SCANNER_T
extern void replication_scanner_init(const char *str); typedef void *yyscan_t;
extern void replication_scanner_finish(void); #endif
extern bool replication_scanner_is_replication_command(void); extern int replication_yyparse(yyscan_t yyscanner);
extern int replication_yylex(union YYSTYPE *yylval_param, yyscan_t yyscanner);
extern void replication_yyerror(yyscan_t yyscanner, const char *message) pg_attribute_noreturn();
extern void replication_scanner_init(const char *str, yyscan_t *yyscannerp);
extern void replication_scanner_finish(yyscan_t yyscanner);
extern bool replication_scanner_is_replication_command(yyscan_t yyscanner);
extern PGDLLIMPORT Node *replication_parse_result; extern PGDLLIMPORT Node *replication_parse_result;