1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-26 12:21:12 +03:00

jsonpath scanner: reentrant scanner

Use the flex %option reentrant to make the generated scanner
reentrant and thread-safe.  Note: The parser was already pure.

Simplify flex scan buffer management: Instead of constructing the
buffer from pieces and then using yy_scan_buffer(), we can just use
yy_scan_string(), which does the same thing internally.  (Actually, we
use yy_scan_bytes() here because we already have the length.)

Use flex yyextra to handle context information, instead of global
variables.  This complements the other changes to make the scanner
reentrant.

Reviewed-by: Heikki Linnakangas <hlinnaka@iki.fi>
Reviewed-by: Andreas Karlsson <andreas@proxel.se>
Discussion: https://www.postgresql.org/message-id/flat/eb6faeac-2a8a-4b69-9189-c33c520e5b7b@eisentraut.org
This commit is contained in:
Peter Eisentraut
2024-12-24 23:42:41 +01:00
parent 9b254895c3
commit 2a7425d7ee
4 changed files with 145 additions and 154 deletions

View File

@ -7,7 +7,7 @@ GETTEXT_TRIGGERS = $(BACKEND_COMMON_GETTEXT_TRIGGERS) \
GUC_check_errhint \
write_stderr \
yyerror \
jsonpath_yyerror:3 \
jsonpath_yyerror:4 \
parser_yyerror \
replication_yyerror:2 \
scanner_yyerror \

View File

@ -60,8 +60,10 @@ static bool makeItemLikeRegex(JsonPathParseItem *expr,
%name-prefix="jsonpath_yy"
%parse-param {JsonPathParseResult **result}
%parse-param {struct Node *escontext}
%parse-param {yyscan_t yyscanner}
%lex-param {JsonPathParseResult **result}
%lex-param {struct Node *escontext}
%lex-param {yyscan_t yyscanner}
%union
{

View File

@ -22,17 +22,25 @@ typedef struct JsonPathString
int total;
} JsonPathString;
#ifndef YY_TYPEDEF_YY_SCANNER_T
#define YY_TYPEDEF_YY_SCANNER_T
typedef void *yyscan_t;
#endif
#include "utils/jsonpath.h"
#include "jsonpath_gram.h"
#define YY_DECL extern int jsonpath_yylex(YYSTYPE *yylval_param, \
JsonPathParseResult **result, \
struct Node *escontext)
struct Node *escontext, \
yyscan_t yyscanner)
YY_DECL;
extern int jsonpath_yyparse(JsonPathParseResult **result,
struct Node *escontext);
struct Node *escontext,
yyscan_t yyscanner);
extern void jsonpath_yyerror(JsonPathParseResult **result,
struct Node *escontext,
yyscan_t yyscanner,
const char *message);
#endif /* JSONPATH_INTERNAL_H */

View File

@ -30,18 +30,17 @@
}
%{
static JsonPathString scanstring;
struct jsonpath_yy_extra_type
{
JsonPathString scanstring;
};
#define YY_EXTRA_TYPE struct jsonpath_yy_extra_type *
/* Handles to the buffer that the lexer uses internally */
static YY_BUFFER_STATE scanbufhandle;
static char *scanbuf;
static int scanbuflen;
static void addstring(bool init, char *s, int l);
static void addchar(bool init, char c);
static enum yytokentype checkKeyword(void);
static bool parseUnicode(char *s, int l, struct Node *escontext);
static bool parseHexChar(char *s, struct Node *escontext);
static void addstring(bool init, char *s, int l, yyscan_t yyscanner);
static void addchar(bool init, char c, yyscan_t yyscanner);
static enum yytokentype checkKeyword(yyscan_t yyscanner);
static bool parseUnicode(char *s, int l, struct Node *escontext, yyscan_t yyscanner);
static bool parseHexChar(char *s, struct Node *escontext, yyscan_t yyscanner);
/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
#undef fprintf
@ -65,6 +64,7 @@ fprintf_to_ereport(const char *fmt, const char *msg)
%option noyywrap
%option warn
%option prefix="jsonpath_yy"
%option reentrant
%option bison-bridge
%option noyyalloc
%option noyyrealloc
@ -120,63 +120,63 @@ hex_fail \\x{hexdigit}{0,1}
%%
<xnq>{other}+ {
addstring(false, yytext, yyleng);
addstring(false, yytext, yyleng, yyscanner);
}
<xnq>{blank}+ {
yylval->str = scanstring;
yylval->str = yyextra->scanstring;
BEGIN INITIAL;
return checkKeyword();
return checkKeyword(yyscanner);
}
<xnq>\/\* {
yylval->str = scanstring;
yylval->str = yyextra->scanstring;
BEGIN xc;
}
<xnq>({special}|\") {
yylval->str = scanstring;
yylval->str = yyextra->scanstring;
yyless(0);
BEGIN INITIAL;
return checkKeyword();
return checkKeyword(yyscanner);
}
<xnq><<EOF>> {
yylval->str = scanstring;
yylval->str = yyextra->scanstring;
BEGIN INITIAL;
return checkKeyword();
return checkKeyword(yyscanner);
}
<xnq,xq,xvq>\\b { addchar(false, '\b'); }
<xnq,xq,xvq>\\b { addchar(false, '\b', yyscanner); }
<xnq,xq,xvq>\\f { addchar(false, '\f'); }
<xnq,xq,xvq>\\f { addchar(false, '\f', yyscanner); }
<xnq,xq,xvq>\\n { addchar(false, '\n'); }
<xnq,xq,xvq>\\n { addchar(false, '\n', yyscanner); }
<xnq,xq,xvq>\\r { addchar(false, '\r'); }
<xnq,xq,xvq>\\r { addchar(false, '\r', yyscanner); }
<xnq,xq,xvq>\\t { addchar(false, '\t'); }
<xnq,xq,xvq>\\t { addchar(false, '\t', yyscanner); }
<xnq,xq,xvq>\\v { addchar(false, '\v'); }
<xnq,xq,xvq>\\v { addchar(false, '\v', yyscanner); }
<xnq,xq,xvq>{unicode}+ {
if (!parseUnicode(yytext, yyleng, escontext))
if (!parseUnicode(yytext, yyleng, escontext, yyscanner))
yyterminate();
}
<xnq,xq,xvq>{hex_char} {
if (!parseHexChar(yytext, escontext))
if (!parseHexChar(yytext, escontext, yyscanner))
yyterminate();
}
<xnq,xq,xvq>{unicode}*{unicodefail} {
jsonpath_yyerror(NULL, escontext,
jsonpath_yyerror(NULL, escontext, yyscanner,
"invalid Unicode escape sequence");
yyterminate();
}
<xnq,xq,xvq>{hex_fail} {
jsonpath_yyerror(NULL, escontext,
jsonpath_yyerror(NULL, escontext, yyscanner,
"invalid hexadecimal character sequence");
yyterminate();
}
@ -184,37 +184,37 @@ hex_fail \\x{hexdigit}{0,1}
<xnq,xq,xvq>{unicode}+\\ {
/* throw back the \\, and treat as unicode */
yyless(yyleng - 1);
if (!parseUnicode(yytext, yyleng, escontext))
if (!parseUnicode(yytext, yyleng, escontext, yyscanner))
yyterminate();
}
<xnq,xq,xvq>\\. { addchar(false, yytext[1]); }
<xnq,xq,xvq>\\. { addchar(false, yytext[1], yyscanner); }
<xnq,xq,xvq>\\ {
jsonpath_yyerror(NULL, escontext,
jsonpath_yyerror(NULL, escontext, yyscanner,
"unexpected end after backslash");
yyterminate();
}
<xq,xvq><<EOF>> {
jsonpath_yyerror(NULL, escontext,
jsonpath_yyerror(NULL, escontext, yyscanner,
"unterminated quoted string");
yyterminate();
}
<xq>\" {
yylval->str = scanstring;
yylval->str = yyextra->scanstring;
BEGIN INITIAL;
return STRING_P;
}
<xvq>\" {
yylval->str = scanstring;
yylval->str = yyextra->scanstring;
BEGIN INITIAL;
return VARIABLE_P;
}
<xq,xvq>[^\\\"]+ { addstring(false, yytext, yyleng); }
<xq,xvq>[^\\\"]+ { addstring(false, yytext, yyleng, yyscanner); }
<xc>\*\/ { BEGIN INITIAL; }
@ -223,7 +223,7 @@ hex_fail \\x{hexdigit}{0,1}
<xc>\* { }
<xc><<EOF>> {
jsonpath_yyerror(NULL, escontext,
jsonpath_yyerror(NULL, escontext, yyscanner,
"unexpected end of comment");
yyterminate();
}
@ -250,14 +250,14 @@ hex_fail \\x{hexdigit}{0,1}
\> { return GREATER_P; }
\${other}+ {
addstring(true, yytext + 1, yyleng - 1);
addchar(false, '\0');
yylval->str = scanstring;
addstring(true, yytext + 1, yyleng - 1, yyscanner);
addchar(false, '\0', yyscanner);
yylval->str = yyextra->scanstring;
return VARIABLE_P;
}
\$\" {
addchar(true, '\0');
addchar(true, '\0', yyscanner);
BEGIN xvq;
}
@ -266,85 +266,85 @@ hex_fail \\x{hexdigit}{0,1}
{blank}+ { /* ignore */ }
\/\* {
addchar(true, '\0');
addchar(true, '\0', yyscanner);
BEGIN xc;
}
{real} {
addstring(true, yytext, yyleng);
addchar(false, '\0');
yylval->str = scanstring;
addstring(true, yytext, yyleng, yyscanner);
addchar(false, '\0', yyscanner);
yylval->str = yyextra->scanstring;
return NUMERIC_P;
}
{decimal} {
addstring(true, yytext, yyleng);
addchar(false, '\0');
yylval->str = scanstring;
addstring(true, yytext, yyleng, yyscanner);
addchar(false, '\0', yyscanner);
yylval->str = yyextra->scanstring;
return NUMERIC_P;
}
{decinteger} {
addstring(true, yytext, yyleng);
addchar(false, '\0');
yylval->str = scanstring;
addstring(true, yytext, yyleng, yyscanner);
addchar(false, '\0', yyscanner);
yylval->str = yyextra->scanstring;
return INT_P;
}
{hexinteger} {
addstring(true, yytext, yyleng);
addchar(false, '\0');
yylval->str = scanstring;
addstring(true, yytext, yyleng, yyscanner);
addchar(false, '\0', yyscanner);
yylval->str = yyextra->scanstring;
return INT_P;
}
{octinteger} {
addstring(true, yytext, yyleng);
addchar(false, '\0');
yylval->str = scanstring;
addstring(true, yytext, yyleng, yyscanner);
addchar(false, '\0', yyscanner);
yylval->str = yyextra->scanstring;
return INT_P;
}
{bininteger} {
addstring(true, yytext, yyleng);
addchar(false, '\0');
yylval->str = scanstring;
addstring(true, yytext, yyleng, yyscanner);
addchar(false, '\0', yyscanner);
yylval->str = yyextra->scanstring;
return INT_P;
}
{realfail} {
jsonpath_yyerror(NULL, escontext,
jsonpath_yyerror(NULL, escontext, yyscanner,
"invalid numeric literal");
yyterminate();
}
{decinteger_junk} {
jsonpath_yyerror(NULL, escontext,
jsonpath_yyerror(NULL, escontext, yyscanner,
"trailing junk after numeric literal");
yyterminate();
}
{decimal_junk} {
jsonpath_yyerror(NULL, escontext,
jsonpath_yyerror(NULL, escontext, yyscanner,
"trailing junk after numeric literal");
yyterminate();
}
{real_junk} {
jsonpath_yyerror(NULL, escontext,
jsonpath_yyerror(NULL, escontext, yyscanner,
"trailing junk after numeric literal");
yyterminate();
}
\" {
addchar(true, '\0');
addchar(true, '\0', yyscanner);
BEGIN xq;
}
\\ {
yyless(0);
addchar(true, '\0');
addchar(true, '\0', yyscanner);
BEGIN xnq;
}
{other}+ {
addstring(true, yytext, yyleng);
addstring(true, yytext, yyleng, yyscanner);
BEGIN xnq;
}
@ -354,10 +354,17 @@ hex_fail \\x{hexdigit}{0,1}
/* LCOV_EXCL_STOP */
/* see scan.l */
#undef yyextra
#define yyextra (((struct yyguts_t *) yyscanner)->yyextra_r)
void
jsonpath_yyerror(JsonPathParseResult **result, struct Node *escontext,
yyscan_t yyscanner,
const char *message)
{
struct yyguts_t * yyg = (struct yyguts_t *) yyscanner; /* needed for yytext macro */
/* don't overwrite escontext if it's already been set */
if (SOFT_ERROR_OCCURRED(escontext))
return;
@ -427,9 +434,11 @@ static const JsonPathKeyword keywords[] = {
{ 12,false, TIMESTAMP_TZ_P, "timestamp_tz"},
};
/* Check if current scanstring value is a keyword */
/*
* Check if current scanstring value is a keyword
*/
static enum yytokentype
checkKeyword()
checkKeyword(yyscan_t yyscanner)
{
int res = IDENT_P;
int diff;
@ -437,18 +446,18 @@ checkKeyword()
*StopHigh = keywords + lengthof(keywords),
*StopMiddle;
if (scanstring.len > keywords[lengthof(keywords) - 1].len)
if (yyextra->scanstring.len > keywords[lengthof(keywords) - 1].len)
return res;
while (StopLow < StopHigh)
{
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
if (StopMiddle->len == scanstring.len)
diff = pg_strncasecmp(StopMiddle->keyword, scanstring.val,
scanstring.len);
if (StopMiddle->len == yyextra->scanstring.len)
diff = pg_strncasecmp(StopMiddle->keyword, yyextra->scanstring.val,
yyextra->scanstring.len);
else
diff = StopMiddle->len - scanstring.len;
diff = StopMiddle->len - yyextra->scanstring.len;
if (diff < 0)
StopLow = StopMiddle + 1;
@ -457,8 +466,8 @@ checkKeyword()
else
{
if (StopMiddle->lowercase)
diff = strncmp(StopMiddle->keyword, scanstring.val,
scanstring.len);
diff = strncmp(StopMiddle->keyword, yyextra->scanstring.val,
yyextra->scanstring.len);
if (diff == 0)
res = StopMiddle->val;
@ -470,85 +479,47 @@ checkKeyword()
return res;
}
/*
* Called before any actual parsing is done
*/
static void
jsonpath_scanner_init(const char *str, int slen)
{
if (slen <= 0)
slen = strlen(str);
/*
* Might be left over after ereport()
*/
yy_init_globals();
/*
* Make a scan buffer with special termination needed by flex.
*/
scanbuflen = slen;
scanbuf = palloc(slen + 2);
memcpy(scanbuf, str, slen);
scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
BEGIN(INITIAL);
}
/*
* Called after parsing is done to clean up after jsonpath_scanner_init()
*/
static void
jsonpath_scanner_finish(void)
{
yy_delete_buffer(scanbufhandle);
pfree(scanbuf);
}
/*
* Resize scanstring so that it can append string of given length.
* Reinitialize if required.
*/
static void
resizeString(bool init, int appendLen)
resizeString(bool init, int appendLen, yyscan_t yyscanner)
{
if (init)
{
scanstring.total = Max(32, appendLen);
scanstring.val = (char *) palloc(scanstring.total);
scanstring.len = 0;
yyextra->scanstring.total = Max(32, appendLen);
yyextra->scanstring.val = (char *) palloc(yyextra->scanstring.total);
yyextra->scanstring.len = 0;
}
else
{
if (scanstring.len + appendLen >= scanstring.total)
if (yyextra->scanstring.len + appendLen >= yyextra->scanstring.total)
{
while (scanstring.len + appendLen >= scanstring.total)
scanstring.total *= 2;
scanstring.val = repalloc(scanstring.val, scanstring.total);
while (yyextra->scanstring.len + appendLen >= yyextra->scanstring.total)
yyextra->scanstring.total *= 2;
yyextra->scanstring.val = repalloc(yyextra->scanstring.val, yyextra->scanstring.total);
}
}
}
/* Add set of bytes at "s" of length "l" to scanstring */
static void
addstring(bool init, char *s, int l)
addstring(bool init, char *s, int l, yyscan_t yyscanner)
{
resizeString(init, l + 1);
memcpy(scanstring.val + scanstring.len, s, l);
scanstring.len += l;
resizeString(init, l + 1, yyscanner);
memcpy(yyextra->scanstring.val + yyextra->scanstring.len, s, l);
yyextra->scanstring.len += l;
}
/* Add single byte "c" to scanstring */
static void
addchar(bool init, char c)
addchar(bool init, char c, yyscan_t yyscanner)
{
resizeString(init, 1);
scanstring.val[scanstring.len] = c;
resizeString(init, 1, yyscanner);
yyextra->scanstring.val[yyextra->scanstring.len] = c;
if (c != '\0')
scanstring.len++;
yyextra->scanstring.len++;
}
/* Interface to jsonpath parser */
@ -556,20 +527,30 @@ JsonPathParseResult *
parsejsonpath(const char *str, int len, struct Node *escontext)
{
JsonPathParseResult *parseresult;
yyscan_t scanner;
struct jsonpath_yy_extra_type yyext;
jsonpath_scanner_init(str, len);
if (jsonpath_yylex_init(&scanner) != 0)
elog(ERROR, "yylex_init() failed: %m");
if (jsonpath_yyparse(&parseresult, escontext) != 0)
jsonpath_yyerror(NULL, escontext, "invalid input"); /* shouldn't happen */
yyset_extra(&yyext, scanner);
jsonpath_scanner_finish();
if (len <= 0)
len = strlen(str);
jsonpath_yy_scan_bytes(str, len, scanner);
if (jsonpath_yyparse(&parseresult, escontext, scanner) != 0)
jsonpath_yyerror(NULL, escontext, scanner, "invalid input"); /* shouldn't happen */
jsonpath_yylex_destroy(scanner);
return parseresult;
}
/* Turn hex character into integer */
static bool
hexval(char c, int *result, struct Node *escontext)
hexval(char c, int *result, struct Node *escontext, yyscan_t yyscanner)
{
if (c >= '0' && c <= '9')
{
@ -586,13 +567,13 @@ hexval(char c, int *result, struct Node *escontext)
*result = c - 'A' + 0xA;
return true;
}
jsonpath_yyerror(NULL, escontext, "invalid hexadecimal digit");
jsonpath_yyerror(NULL, escontext, yyscanner, "invalid hexadecimal digit");
return false;
}
/* Add given unicode character to scanstring */
static bool
addUnicodeChar(int ch, struct Node *escontext)
addUnicodeChar(int ch, struct Node *escontext, yyscan_t yyscanner)
{
if (ch == 0)
{
@ -618,14 +599,14 @@ addUnicodeChar(int ch, struct Node *escontext)
ereturn(escontext, false,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("could not convert Unicode to server encoding")));
addstring(false, cbuf, strlen(cbuf));
addstring(false, cbuf, strlen(cbuf), yyscanner);
}
return true;
}
/* Add unicode character, processing any surrogate pairs */
static bool
addUnicode(int ch, int *hi_surrogate, struct Node *escontext)
addUnicode(int ch, int *hi_surrogate, struct Node *escontext, yyscan_t yyscanner)
{
if (is_utf16_surrogate_first(ch))
{
@ -658,7 +639,7 @@ addUnicode(int ch, int *hi_surrogate, struct Node *escontext)
"surrogate.")));
}
return addUnicodeChar(ch, escontext);
return addUnicodeChar(ch, escontext, yyscanner);
}
/*
@ -666,7 +647,7 @@ addUnicode(int ch, int *hi_surrogate, struct Node *escontext)
* src/backend/utils/adt/json.c
*/
static bool
parseUnicode(char *s, int l, struct Node *escontext)
parseUnicode(char *s, int l, struct Node *escontext, yyscan_t yyscanner)
{
int i = 2;
int hi_surrogate = -1;
@ -680,7 +661,7 @@ parseUnicode(char *s, int l, struct Node *escontext)
{
while (s[++i] != '}' && i < l)
{
if (!hexval(s[i], &si, escontext))
if (!hexval(s[i], &si, escontext, yyscanner))
return false;
ch = (ch << 4) | si;
}
@ -690,13 +671,13 @@ parseUnicode(char *s, int l, struct Node *escontext)
{
for (j = 0; j < 4 && i < l; j++)
{
if (!hexval(s[i++], &si, escontext))
if (!hexval(s[i++], &si, escontext, yyscanner))
return false;
ch = (ch << 4) | si;
}
}
if (! addUnicode(ch, &hi_surrogate, escontext))
if (! addUnicode(ch, &hi_surrogate, escontext, yyscanner))
return false;
}
@ -714,17 +695,17 @@ parseUnicode(char *s, int l, struct Node *escontext)
/* Parse sequence of hex-encoded characters */
static bool
parseHexChar(char *s, struct Node *escontext)
parseHexChar(char *s, struct Node *escontext, yyscan_t yyscanner)
{
int s2, s3, ch;
if (!hexval(s[2], &s2, escontext))
if (!hexval(s[2], &s2, escontext, yyscanner))
return false;
if (!hexval(s[3], &s3, escontext))
if (!hexval(s[3], &s3, escontext, yyscanner))
return false;
ch = (s2 << 4) | s3;
return addUnicodeChar(ch, escontext);
return addUnicodeChar(ch, escontext, yyscanner);
}
/*
@ -733,13 +714,13 @@ parseHexChar(char *s, struct Node *escontext)
*/
void *
jsonpath_yyalloc(yy_size_t bytes)
jsonpath_yyalloc(yy_size_t bytes, yyscan_t yyscanner)
{
return palloc(bytes);
}
void *
jsonpath_yyrealloc(void *ptr, yy_size_t bytes)
jsonpath_yyrealloc(void *ptr, yy_size_t bytes, yyscan_t yyscanner)
{
if (ptr)
return repalloc(ptr, bytes);
@ -748,7 +729,7 @@ jsonpath_yyrealloc(void *ptr, yy_size_t bytes)
}
void
jsonpath_yyfree(void *ptr)
jsonpath_yyfree(void *ptr, yyscan_t yyscanner)
{
if (ptr)
pfree(ptr);