1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-26 12:21:12 +03:00

replication parser: pure parser and reentrant scanner

Use the flex %option reentrant and the bison option %pure-parser to
make the generated scanner and parser pure, reentrant, and
thread-safe.

Make the generated scanner use palloc() etc. instead of malloc() etc.
Previously, we only used palloc() for the buffer, but flex would still
use malloc() for its internal structures.  As a result, there could be
some small memory leaks in case of uncaught errors.  Now, all the
memory is under palloc() control, so there are no more such issues.

Simplify flex scan buffer management: Instead of constructing the
buffer from pieces and then using yy_scan_buffer(), we can just use
yy_scan_string(), which does the same thing internally.

The previous code was necessary because we allocated the buffer with
palloc() and the rest of the state was handled by malloc().  But this
is no longer the case; everything is under palloc() now.

Use flex yyextra to handle context information, instead of global
variables.  This complements the other changes to make the scanner
reentrant.

Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi>
Co-authored-by: Andreas Karlsson <andreas@proxel.se>
Reviewed-by: Andreas Karlsson <andreas@proxel.se>
Discussion: https://www.postgresql.org/message-id/flat/eb6faeac-2a8a-4b69-9189-c33c520e5b7b@eisentraut.org
This commit is contained in:
Peter Eisentraut
2024-12-02 10:35:37 +01:00
parent 5af699066f
commit e4a8fb8fef
5 changed files with 113 additions and 77 deletions

View File

@ -9,7 +9,7 @@ GETTEXT_TRIGGERS = $(BACKEND_COMMON_GETTEXT_TRIGGERS) \
yyerror \
jsonpath_yyerror:3 \
parser_yyerror \
replication_yyerror \
replication_yyerror:2 \
scanner_yyerror \
syncrep_yyerror \
report_invalid_record:2 \

View File

@ -24,10 +24,6 @@
#include "repl_gram.h"
/* silence -Wmissing-variable-declarations */
extern int replication_yychar;
extern int replication_yynerrs;
/* Result of the parsing is returned here */
Node *replication_parse_result;
@ -43,6 +39,9 @@ Node *replication_parse_result;
%}
%parse-param {yyscan_t yyscanner}
%lex-param {yyscan_t yyscanner}
%pure-parser
%expect 0
%name-prefix="replication_yy"
@ -106,6 +105,8 @@ Node *replication_parse_result;
firstcmd: command opt_semicolon
{
replication_parse_result = $1;
(void) yynerrs; /* suppress compiler warning */
}
;

View File

@ -38,30 +38,36 @@ fprintf_to_ereport(const char *fmt, const char *msg)
ereport(ERROR, (errmsg_internal("%s", msg)));
}
/* Handle to the buffer that the lexer uses internally */
static YY_BUFFER_STATE scanbufhandle;
struct replication_yy_extra_type
{
/* Pushed-back token (we only handle one) */
int repl_pushed_back_token;
/* Pushed-back token (we only handle one) */
static int repl_pushed_back_token;
/* Work area for collecting literals */
StringInfoData litbuf;
};
#define YY_EXTRA_TYPE struct replication_yy_extra_type *
/* Work area for collecting literals */
static StringInfoData litbuf;
static void startlit(void);
static char *litbufdup(void);
static void addlit(char *ytext, int yleng);
static void addlitchar(unsigned char ychar);
static void startlit(yyscan_t yyscanner);
static char *litbufdup(yyscan_t yyscanner);
static void addlit(char *ytext, int yleng, yyscan_t yyscanner);
static void addlitchar(unsigned char ychar, yyscan_t yyscanner);
/* LCOV_EXCL_START */
%}
%option reentrant
%option bison-bridge
%option 8bit
%option never-interactive
%option nodefault
%option noinput
%option nounput
%option noyywrap
%option noyyalloc
%option noyyrealloc
%option noyyfree
%option warn
%option prefix="replication_yy"
@ -108,11 +114,11 @@ identifier {ident_start}{ident_cont}*
/* This code is inserted at the start of replication_yylex() */
/* If we have a pushed-back token, return that. */
if (repl_pushed_back_token)
if (yyextra->repl_pushed_back_token)
{
int result = repl_pushed_back_token;
int result = yyextra->repl_pushed_back_token;
repl_pushed_back_token = 0;
yyextra->repl_pushed_back_token = 0;
return result;
}
%}
@ -142,7 +148,7 @@ UPLOAD_MANIFEST { return K_UPLOAD_MANIFEST; }
{space}+ { /* do nothing */ }
{digit}+ {
replication_yylval.uintval = strtoul(yytext, NULL, 10);
yylval->uintval = strtoul(yytext, NULL, 10);
return UCONST;
}
@ -150,34 +156,34 @@ UPLOAD_MANIFEST { return K_UPLOAD_MANIFEST; }
uint32 hi,
lo;
if (sscanf(yytext, "%X/%X", &hi, &lo) != 2)
replication_yyerror("invalid streaming start location");
replication_yylval.recptr = ((uint64) hi) << 32 | lo;
replication_yyerror(yyscanner, "invalid streaming start location");
yylval->recptr = ((uint64) hi) << 32 | lo;
return RECPTR;
}
{xqstart} {
BEGIN(xq);
startlit();
startlit(yyscanner);
}
<xq>{quotestop} {
yyless(1);
BEGIN(INITIAL);
replication_yylval.str = litbufdup();
yylval->str = litbufdup(yyscanner);
return SCONST;
}
<xq>{xqdouble} {
addlitchar('\'');
addlitchar('\'', yyscanner);
}
<xq>{xqinside} {
addlit(yytext, yyleng);
addlit(yytext, yyleng, yyscanner);
}
{xdstart} {
BEGIN(xd);
startlit();
startlit(yyscanner);
}
<xd>{xdstop} {
@ -185,20 +191,20 @@ UPLOAD_MANIFEST { return K_UPLOAD_MANIFEST; }
yyless(1);
BEGIN(INITIAL);
replication_yylval.str = litbufdup();
len = strlen(replication_yylval.str);
truncate_identifier(replication_yylval.str, len, true);
yylval->str = litbufdup(yyscanner);
len = strlen(yylval->str);
truncate_identifier(yylval->str, len, true);
return IDENT;
}
<xd>{xdinside} {
addlit(yytext, yyleng);
addlit(yytext, yyleng, yyscanner);
}
{identifier} {
int len = strlen(yytext);
replication_yylval.str = downcase_truncate_identifier(yytext, len, true);
yylval->str = downcase_truncate_identifier(yytext, len, true);
return IDENT;
}
@ -207,7 +213,7 @@ UPLOAD_MANIFEST { return K_UPLOAD_MANIFEST; }
return yytext[0];
}
<xq,xd><<EOF>> { replication_yyerror("unterminated quoted string"); }
<xq,xd><<EOF>> { replication_yyerror(yyscanner, "unterminated quoted string"); }
<<EOF>> {
@ -218,32 +224,36 @@ UPLOAD_MANIFEST { return K_UPLOAD_MANIFEST; }
/* LCOV_EXCL_STOP */
/* see scan.l */
#undef yyextra
#define yyextra (((struct yyguts_t *) yyscanner)->yyextra_r)
static void
startlit(void)
startlit(yyscan_t yyscanner)
{
initStringInfo(&litbuf);
initStringInfo(&yyextra->litbuf);
}
static char *
litbufdup(void)
litbufdup(yyscan_t yyscanner)
{
return litbuf.data;
return yyextra->litbuf.data;
}
static void
addlit(char *ytext, int yleng)
addlit(char *ytext, int yleng, yyscan_t yyscanner)
{
appendBinaryStringInfo(&litbuf, ytext, yleng);
appendBinaryStringInfo(&yyextra->litbuf, ytext, yleng);
}
static void
addlitchar(unsigned char ychar)
addlitchar(unsigned char ychar, yyscan_t yyscanner)
{
appendStringInfoChar(&litbuf, ychar);
appendStringInfoChar(&yyextra->litbuf, ychar);
}
void
replication_yyerror(const char *message)
replication_yyerror(yyscan_t yyscanner, const char *message)
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
@ -251,35 +261,26 @@ replication_yyerror(const char *message)
}
void
replication_scanner_init(const char *str)
replication_scanner_init(const char *str, yyscan_t *yyscannerp)
{
Size slen = strlen(str);
char *scanbuf;
yyscan_t yyscanner;
struct replication_yy_extra_type *yyext = palloc0_object(struct replication_yy_extra_type);
/*
* Might be left over after ereport()
*/
if (YY_CURRENT_BUFFER)
yy_delete_buffer(YY_CURRENT_BUFFER);
if (yylex_init(yyscannerp) != 0)
elog(ERROR, "yylex_init() failed: %m");
/*
* Make a scan buffer with special termination needed by flex.
*/
scanbuf = (char *) palloc(slen + 2);
memcpy(scanbuf, str, slen);
scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
yyscanner = *yyscannerp;
/* Make sure we start in proper state */
BEGIN(INITIAL);
repl_pushed_back_token = 0;
yyset_extra(yyext, yyscanner);
yy_scan_string(str, yyscanner);
}
void
replication_scanner_finish(void)
replication_scanner_finish(yyscan_t yyscanner)
{
yy_delete_buffer(scanbufhandle);
scanbufhandle = NULL;
pfree(yyextra);
yylex_destroy(yyscanner);
}
/*
@ -291,9 +292,10 @@ replication_scanner_finish(void)
* IDENT token here, although some other cases are possible.
*/
bool
replication_scanner_is_replication_command(void)
replication_scanner_is_replication_command(yyscan_t yyscanner)
{
int first_token = replication_yylex();
YYSTYPE dummy;
int first_token = replication_yylex(&dummy, yyscanner);
switch (first_token)
{
@ -308,10 +310,37 @@ replication_scanner_is_replication_command(void)
case K_UPLOAD_MANIFEST:
case K_SHOW:
/* Yes; push back the first token so we can parse later. */
repl_pushed_back_token = first_token;
yyextra->repl_pushed_back_token = first_token;
return true;
default:
/* Nope; we don't bother to push back the token. */
return false;
}
}
/*
* Interface functions to make flex use palloc() instead of malloc().
* It'd be better to make these static, but flex insists otherwise.
*/
void *
yyalloc(yy_size_t size, yyscan_t yyscanner)
{
return palloc(size);
}
void *
yyrealloc(void *ptr, yy_size_t size, yyscan_t yyscanner)
{
if (ptr)
return repalloc(ptr, size);
else
return palloc(size);
}
void
yyfree(void *ptr, yyscan_t yyscanner)
{
if (ptr)
pfree(ptr);
}

View File

@ -1951,6 +1951,7 @@ WalSndWaitForWal(XLogRecPtr loc)
bool
exec_replication_command(const char *cmd_string)
{
yyscan_t scanner;
int parse_rc;
Node *cmd_node;
const char *cmdtag;
@ -1990,15 +1991,15 @@ exec_replication_command(const char *cmd_string)
ALLOCSET_DEFAULT_SIZES);
old_context = MemoryContextSwitchTo(cmd_context);
replication_scanner_init(cmd_string);
replication_scanner_init(cmd_string, &scanner);
/*
* Is it a WalSender command?
*/
if (!replication_scanner_is_replication_command())
if (!replication_scanner_is_replication_command(scanner))
{
/* Nope; clean up and get out. */
replication_scanner_finish();
replication_scanner_finish(scanner);
MemoryContextSwitchTo(old_context);
MemoryContextDelete(cmd_context);
@ -2016,13 +2017,13 @@ exec_replication_command(const char *cmd_string)
/*
* Looks like a WalSender command, so parse it.
*/
parse_rc = replication_yyparse();
parse_rc = replication_yyparse(scanner);
if (parse_rc != 0)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg_internal("replication command parser returned %d",
parse_rc)));
replication_scanner_finish();
replication_scanner_finish(scanner);
cmd_node = replication_parse_result;

View File

@ -125,12 +125,17 @@ extern void WalSndSetState(WalSndState state);
* Internal functions for parsing the replication grammar, in repl_gram.y and
* repl_scanner.l
*/
extern int replication_yyparse(void);
extern int replication_yylex(void);
extern void replication_yyerror(const char *message) pg_attribute_noreturn();
extern void replication_scanner_init(const char *str);
extern void replication_scanner_finish(void);
extern bool replication_scanner_is_replication_command(void);
union YYSTYPE;
#ifndef YY_TYPEDEF_YY_SCANNER_T
#define YY_TYPEDEF_YY_SCANNER_T
typedef void *yyscan_t;
#endif
extern int replication_yyparse(yyscan_t yyscanner);
extern int replication_yylex(union YYSTYPE *yylval_param, yyscan_t yyscanner);
extern void replication_yyerror(yyscan_t yyscanner, const char *message) pg_attribute_noreturn();
extern void replication_scanner_init(const char *str, yyscan_t *yyscannerp);
extern void replication_scanner_finish(yyscan_t yyscanner);
extern bool replication_scanner_is_replication_command(yyscan_t yyscanner);
extern PGDLLIMPORT Node *replication_parse_result;