diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y index 47ebb2a0e09..76155963fc6 100644 --- a/src/backend/utils/adt/jsonpath_gram.y +++ b/src/backend/utils/adt/jsonpath_gram.y @@ -4,6 +4,8 @@ * jsonpath_gram.y * Grammar definitions for jsonpath datatype * + * Transforms tokenized jsonpath into tree of JsonPathParseItem structs. + * * Copyright (c) 2019, PostgreSQL Global Development Group * * IDENTIFICATION @@ -37,15 +39,17 @@ int jsonpath_yylex(union YYSTYPE *yylval_param); int jsonpath_yyparse(JsonPathParseResult **result); void jsonpath_yyerror(JsonPathParseResult **result, const char *message); -static JsonPathParseItem *makeItemType(int type); +static JsonPathParseItem *makeItemType(JsonPathItemType type); static JsonPathParseItem *makeItemString(JsonPathString *s); static JsonPathParseItem *makeItemVariable(JsonPathString *s); static JsonPathParseItem *makeItemKey(JsonPathString *s); static JsonPathParseItem *makeItemNumeric(JsonPathString *s); static JsonPathParseItem *makeItemBool(bool val); -static JsonPathParseItem *makeItemBinary(int type, JsonPathParseItem *la, +static JsonPathParseItem *makeItemBinary(JsonPathItemType type, + JsonPathParseItem *la, JsonPathParseItem *ra); -static JsonPathParseItem *makeItemUnary(int type, JsonPathParseItem *a); +static JsonPathParseItem *makeItemUnary(JsonPathItemType type, + JsonPathParseItem *a); static JsonPathParseItem *makeItemList(List *list); static JsonPathParseItem *makeIndexArray(List *list); static JsonPathParseItem *makeAny(int first, int last); @@ -75,9 +79,9 @@ static JsonPathParseItem *makeItemLikeRegex(JsonPathParseItem *expr, %union { JsonPathString str; - List *elems; /* list of JsonPathParseItem */ - List *indexs; /* list of integers */ - JsonPathParseItem *value; + List *elems; /* list of JsonPathParseItem */ + List *indexs; /* list of integers */ + JsonPathParseItem *value; JsonPathParseResult *result; JsonPathItemType optype; bool boolean; @@ -160,7 +164,7 @@ comp_op: ; delimited_predicate: - '(' predicate ')' { $$ = $2; } + '(' predicate ')' { $$ = $2; } | EXISTS_P '(' expr ')' { $$ = makeItemUnary(jpiExists, $3); } ; @@ -170,9 +174,10 @@ predicate: | predicate AND_P predicate { $$ = makeItemBinary(jpiAnd, $1, $3); } | predicate OR_P predicate { $$ = makeItemBinary(jpiOr, $1, $3); } | NOT_P delimited_predicate { $$ = makeItemUnary(jpiNot, $2); } - | '(' predicate ')' IS_P UNKNOWN_P { $$ = makeItemUnary(jpiIsUnknown, $2); } + | '(' predicate ')' IS_P UNKNOWN_P + { $$ = makeItemUnary(jpiIsUnknown, $2); } | expr STARTS_P WITH_P starts_with_initial - { $$ = makeItemBinary(jpiStartsWith, $1, $4); } + { $$ = makeItemBinary(jpiStartsWith, $1, $4); } | expr LIKE_REGEX_P STRING_P { $$ = makeItemLikeRegex($1, &$3, NULL); } | expr LIKE_REGEX_P STRING_P FLAG_P STRING_P { $$ = makeItemLikeRegex($1, &$3, &$5); } @@ -232,7 +237,8 @@ any_level: any_path: ANY_P { $$ = makeAny(0, -1); } | ANY_P '{' any_level '}' { $$ = makeAny($3, $3); } - | ANY_P '{' any_level TO_P any_level '}' { $$ = makeAny($3, $5); } + | ANY_P '{' any_level TO_P any_level '}' + { $$ = makeAny($3, $5); } ; accessor_op: @@ -285,10 +291,15 @@ method: ; %% -static JsonPathParseItem* -makeItemType(int type) +/* + * The helper functions below allocate and fill JsonPathParseItem's of various + * types. + */ + +static JsonPathParseItem * +makeItemType(JsonPathItemType type) { - JsonPathParseItem* v = palloc(sizeof(*v)); + JsonPathParseItem *v = palloc(sizeof(*v)); CHECK_FOR_INTERRUPTS(); @@ -298,10 +309,10 @@ makeItemType(int type) return v; } -static JsonPathParseItem* +static JsonPathParseItem * makeItemString(JsonPathString *s) { - JsonPathParseItem *v; + JsonPathParseItem *v; if (s == NULL) { @@ -320,7 +331,7 @@ makeItemString(JsonPathString *s) static JsonPathParseItem * makeItemVariable(JsonPathString *s) { - JsonPathParseItem *v; + JsonPathParseItem *v; v = makeItemType(jpiVariable); v->value.string.val = s->val; @@ -332,7 +343,7 @@ makeItemVariable(JsonPathString *s) static JsonPathParseItem * makeItemKey(JsonPathString *s) { - JsonPathParseItem *v; + JsonPathParseItem *v; v = makeItemString(s); v->type = jpiKey; @@ -343,7 +354,7 @@ makeItemKey(JsonPathString *s) static JsonPathParseItem * makeItemNumeric(JsonPathString *s) { - JsonPathParseItem *v; + JsonPathParseItem *v; v = makeItemType(jpiNumeric); v->value.numeric = @@ -356,7 +367,7 @@ makeItemNumeric(JsonPathString *s) static JsonPathParseItem * makeItemBool(bool val) { - JsonPathParseItem *v = makeItemType(jpiBool); + JsonPathParseItem *v = makeItemType(jpiBool); v->value.boolean = val; @@ -364,7 +375,7 @@ makeItemBool(bool val) } static JsonPathParseItem * -makeItemBinary(int type, JsonPathParseItem* la, JsonPathParseItem *ra) +makeItemBinary(JsonPathItemType type, JsonPathParseItem *la, JsonPathParseItem *ra) { JsonPathParseItem *v = makeItemType(type); @@ -375,7 +386,7 @@ makeItemBinary(int type, JsonPathParseItem* la, JsonPathParseItem *ra) } static JsonPathParseItem * -makeItemUnary(int type, JsonPathParseItem* a) +makeItemUnary(JsonPathItemType type, JsonPathParseItem *a) { JsonPathParseItem *v; @@ -401,8 +412,9 @@ makeItemUnary(int type, JsonPathParseItem* a) static JsonPathParseItem * makeItemList(List *list) { - JsonPathParseItem *head, *end; - ListCell *cell = list_head(list); + JsonPathParseItem *head, + *end; + ListCell *cell = list_head(list); head = end = (JsonPathParseItem *) lfirst(cell); @@ -427,8 +439,8 @@ makeItemList(List *list) static JsonPathParseItem * makeIndexArray(List *list) { - JsonPathParseItem *v = makeItemType(jpiIndexArray); - ListCell *cell; + JsonPathParseItem *v = makeItemType(jpiIndexArray); + ListCell *cell; int i = 0; Assert(list_length(list) > 0); @@ -439,7 +451,7 @@ makeIndexArray(List *list) foreach(cell, list) { - JsonPathParseItem *jpi = lfirst(cell); + JsonPathParseItem *jpi = lfirst(cell); Assert(jpi->type == jpiSubscript); @@ -453,7 +465,7 @@ makeIndexArray(List *list) static JsonPathParseItem * makeAny(int first, int last) { - JsonPathParseItem *v = makeItemType(jpiAny); + JsonPathParseItem *v = makeItemType(jpiAny); v->value.anybounds.first = (first >= 0) ? first : PG_UINT32_MAX; v->value.anybounds.last = (last >= 0) ? last : PG_UINT32_MAX; @@ -465,9 +477,9 @@ static JsonPathParseItem * makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern, JsonPathString *flags) { - JsonPathParseItem *v = makeItemType(jpiLikeRegex); - int i; - int cflags = REG_ADVANCED; + JsonPathParseItem *v = makeItemType(jpiLikeRegex); + int i; + int cflags = REG_ADVANCED; v->value.like_regex.expr = expr; v->value.like_regex.pattern = pattern->val; @@ -510,4 +522,12 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern, return v; } +/* + * jsonpath_scan.l is compiled as part of jsonpath_gram.y. Currently, this is + * unavoidable because jsonpath_gram does not create a .h file to export its + * token symbols. If these files ever grow large enough to be worth compiling + * separately, that could be fixed; but for now it seems like useless + * complication. + */ + #include "jsonpath_scan.c" diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l index 02cb54ee7f9..e93307f4073 100644 --- a/src/backend/utils/adt/jsonpath_scan.l +++ b/src/backend/utils/adt/jsonpath_scan.l @@ -4,6 +4,9 @@ * jsonpath_scan.l * Lexical parser for jsonpath datatype * + * Splits jsonpath string into tokens represented as JsonPathString structs. + * Decodes unicode and hex escaped strings. + * * Copyright (c) 2019, PostgreSQL Global Development Group * * IDENTIFICATION @@ -19,9 +22,6 @@ static JsonPathString scanstring; -/* No reason to constrain amount of data slurped */ -/* #define YY_READ_BUF_SIZE 16777216 */ - /* Handles to the buffer that the lexer uses internally */ static YY_BUFFER_STATE scanbufhandle; static char *scanbuf; @@ -29,9 +29,7 @@ static int scanbuflen; static void addstring(bool init, char *s, int l); static void addchar(bool init, char s); -static int checkSpecialVal(void); /* examine scanstring for the special - * value */ - +static enum yytokentype checkKeyword(void); static void parseUnicode(char *s, int l); static void parseHexChars(char *s, int l); @@ -60,11 +58,22 @@ fprintf_to_ereport(const char *fmt, const char *msg) %option noyyrealloc %option noyyfree -%x xQUOTED -%x xNONQUOTED -%x xVARQUOTED -%x xSINGLEQUOTED -%x xCOMMENT +/* + * We use exclusive states for quoted, signle-quoted and non-quoted strings, + * quoted variable names and C-tyle comments. + * Exclusive states: + * - quoted strings + * - non-quoted strings + * - quoted variable names + * - single-quoted strings + * - C-style comment + */ + +%x xq +%x xnq +%x xvq +%x xsq +%x xc special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/] any [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f] @@ -73,189 +82,188 @@ hex_dig [0-9A-Fa-f] unicode \\u({hex_dig}{4}|\{{hex_dig}{1,6}\}) hex_char \\x{hex_dig}{2} - %% -\&\& { return AND_P; } +{any}+ { + addstring(false, yytext, yyleng); + } -\|\| { return OR_P; } +{blank}+ { + yylval->str = scanstring; + BEGIN INITIAL; + return checkKeyword(); + } -\! { return NOT_P; } -\*\* { return ANY_P; } +\/\* { + yylval->str = scanstring; + BEGIN xc; + } -\< { return LESS_P; } +({special}|\"|\') { + yylval->str = scanstring; + yyless(0); + BEGIN INITIAL; + return checkKeyword(); + } -\<\= { return LESSEQUAL_P; } +<> { + yylval->str = scanstring; + BEGIN INITIAL; + return checkKeyword(); + } -\=\= { return EQUAL_P; } +\\[\"\'\\] { addchar(false, yytext[1]); } -\<\> { return NOTEQUAL_P; } +\\b { addchar(false, '\b'); } -\!\= { return NOTEQUAL_P; } +\\f { addchar(false, '\f'); } -\>\= { return GREATEREQUAL_P; } +\\n { addchar(false, '\n'); } -\> { return GREATER_P; } +\\r { addchar(false, '\r'); } -\${any}+ { +\\t { addchar(false, '\t'); } + +\\v { addchar(false, '\v'); } + +{unicode}+ { parseUnicode(yytext, yyleng); } + +{hex_char}+ { parseHexChars(yytext, yyleng); } + +\\x { yyerror(NULL, "Hex character sequence is invalid"); } + +\\u { yyerror(NULL, "Unicode sequence is invalid"); } + +\\. { yyerror(NULL, "Escape sequence is invalid"); } + +\\ { yyerror(NULL, "Unexpected end after backslash"); } + +<> { yyerror(NULL, "Unexpected end of quoted string"); } + +\" { + yylval->str = scanstring; + BEGIN INITIAL; + return STRING_P; + } + +\" { + yylval->str = scanstring; + BEGIN INITIAL; + return VARIABLE_P; + } + +\' { + yylval->str = scanstring; + BEGIN INITIAL; + return STRING_P; + } + +[^\\\"]+ { addstring(false, yytext, yyleng); } + +[^\\\']+ { addstring(false, yytext, yyleng); } + +\*\/ { BEGIN INITIAL; } + +[^\*]+ { } + +\* { } + +<> { yyerror(NULL, "Unexpected end of comment"); } + +\&\& { return AND_P; } + +\|\| { return OR_P; } + +\! { return NOT_P; } + +\*\* { return ANY_P; } + +\< { return LESS_P; } + +\<\= { return LESSEQUAL_P; } + +\=\= { return EQUAL_P; } + +\<\> { return NOTEQUAL_P; } + +\!\= { return NOTEQUAL_P; } + +\>\= { return GREATEREQUAL_P; } + +\> { return GREATER_P; } + +\${any}+ { addstring(true, yytext + 1, yyleng - 1); addchar(false, '\0'); yylval->str = scanstring; return VARIABLE_P; } -\$\" { +\$\" { addchar(true, '\0'); - BEGIN xVARQUOTED; + BEGIN xvq; } -{special} { return *yytext; } +{special} { return *yytext; } -{blank}+ { /* ignore */ } +{blank}+ { /* ignore */ } -\/\* { +\/\* { addchar(true, '\0'); - BEGIN xCOMMENT; + BEGIN xc; } -[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+ /* float */ { +[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+ { /* float */ addstring(true, yytext, yyleng); addchar(false, '\0'); yylval->str = scanstring; return NUMERIC_P; } -\.[0-9]+[eE][+-]?[0-9]+ /* float */ { +\.[0-9]+[eE][+-]?[0-9]+ { /* float */ addstring(true, yytext, yyleng); addchar(false, '\0'); yylval->str = scanstring; return NUMERIC_P; } -([0-9]+)?\.[0-9]+ { +([0-9]+)?\.[0-9]+ { addstring(true, yytext, yyleng); addchar(false, '\0'); yylval->str = scanstring; return NUMERIC_P; } -[0-9]+ { +[0-9]+ { addstring(true, yytext, yyleng); addchar(false, '\0'); yylval->str = scanstring; return INT_P; } -{any}+ { +{any}+ { addstring(true, yytext, yyleng); - BEGIN xNONQUOTED; + BEGIN xnq; } -\" { +\" { addchar(true, '\0'); - BEGIN xQUOTED; + BEGIN xq; } -\' { +\' { addchar(true, '\0'); - BEGIN xSINGLEQUOTED; + BEGIN xsq; } -\\ { +\\ { yyless(0); addchar(true, '\0'); - BEGIN xNONQUOTED; + BEGIN xnq; } -{any}+ { - addstring(false, yytext, yyleng); - } - -{blank}+ { - yylval->str = scanstring; - BEGIN INITIAL; - return checkSpecialVal(); - } - - -\/\* { - yylval->str = scanstring; - BEGIN xCOMMENT; - } - -({special}|\"|\') { - yylval->str = scanstring; - yyless(0); - BEGIN INITIAL; - return checkSpecialVal(); - } - -<> { - yylval->str = scanstring; - BEGIN INITIAL; - return checkSpecialVal(); - } - -\\[\"\'\\] { addchar(false, yytext[1]); } - -\\b { addchar(false, '\b'); } - -\\f { addchar(false, '\f'); } - -\\n { addchar(false, '\n'); } - -\\r { addchar(false, '\r'); } - -\\t { addchar(false, '\t'); } - -\\v { addchar(false, '\v'); } - -{unicode}+ { parseUnicode(yytext, yyleng); } - -{hex_char}+ { parseHexChars(yytext, yyleng); } - -\\x { yyerror(NULL, "Hex character sequence is invalid"); } - -\\u { yyerror(NULL, "Unicode sequence is invalid"); } - -\\. { yyerror(NULL, "Escape sequence is invalid"); } - -\\ { yyerror(NULL, "Unexpected end after backslash"); } - -<> { yyerror(NULL, "Unexpected end of quoted string"); } - -\" { - yylval->str = scanstring; - BEGIN INITIAL; - return STRING_P; - } - -\" { - yylval->str = scanstring; - BEGIN INITIAL; - return VARIABLE_P; - } - -\' { - yylval->str = scanstring; - BEGIN INITIAL; - return STRING_P; - } - -[^\\\"]+ { addstring(false, yytext, yyleng); } - -[^\\\']+ { addstring(false, yytext, yyleng); } - -<> { yyterminate(); } - -\*\/ { BEGIN INITIAL; } - -[^\*]+ { } - -\* { } - -<> { yyerror(NULL, "Unexpected end of comment"); } +<> { yyterminate(); } %% @@ -292,7 +300,6 @@ typedef struct JsonPathKeyword * Array of key words should be sorted by length and then * alphabetical order */ - static const JsonPathKeyword keywords[] = { { 2, false, IS_P, "is"}, { 2, false, TO_P, "to"}, @@ -317,8 +324,9 @@ static const JsonPathKeyword keywords[] = { { 10,false, LIKE_REGEX_P, "like_regex"}, }; -static int -checkSpecialVal() +/* Check if current scanstring value is a keyword */ +static enum yytokentype +checkKeyword() { int res = IDENT_P; int diff; @@ -329,7 +337,7 @@ checkSpecialVal() if (scanstring.len > keywords[lengthof(keywords) - 1].len) return res; - while(StopLow < StopHigh) + while (StopLow < StopHigh) { StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); @@ -397,49 +405,50 @@ jsonpath_scanner_finish(void) pfree(scanbuf); } +/* + * Resize scanstring so that it can append string of given length. + * Reinitialize if required. + */ +static void +resizeString(bool init, int appendLen) +{ + if (init) + { + scanstring.total = Max(32, appendLen); + scanstring.val = (char *) palloc(scanstring.total); + scanstring.len = 0; + } + else + { + if (scanstring.len + appendLen >= scanstring.total) + { + while (scanstring.len + appendLen >= scanstring.total) + scanstring.total *= 2; + scanstring.val = repalloc(scanstring.val, scanstring.total); + } + } +} + +/* Add set of bytes at "s" of length "l" to scanstring */ static void addstring(bool init, char *s, int l) { - if (init) - { - scanstring.total = 32; - scanstring.val = palloc(scanstring.total); - scanstring.len = 0; - } - - if (s && l) - { - while(scanstring.len + l + 1 >= scanstring.total) - { - scanstring.total *= 2; - scanstring.val = repalloc(scanstring.val, scanstring.total); - } - - memcpy(scanstring.val + scanstring.len, s, l); - scanstring.len += l; - } + resizeString(init, l + 1); + memcpy(scanstring.val + scanstring.len, s, l); + scanstring.len += l; } +/* Add single byte "c" to scanstring */ static void -addchar(bool init, char s) +addchar(bool init, char c) { - if (init) - { - scanstring.total = 32; - scanstring.val = palloc(scanstring.total); - scanstring.len = 0; - } - else if(scanstring.len + 1 >= scanstring.total) - { - scanstring.total *= 2; - scanstring.val = repalloc(scanstring.val, scanstring.total); - } - - scanstring.val[ scanstring.len ] = s; - if (s != '\0') + resizeString(init, 1); + scanstring.val[scanstring.len] = c; + if (c != '\0') scanstring.len++; } +/* Interface to jsonpath parser */ JsonPathParseResult * parsejsonpath(const char *str, int len) { @@ -447,7 +456,7 @@ parsejsonpath(const char *str, int len) jsonpath_scanner_init(str, len); - if (jsonpath_yyparse((void*)&parseresult) != 0) + if (jsonpath_yyparse((void *) &parseresult) != 0) jsonpath_yyerror(NULL, "bugus input"); jsonpath_scanner_finish(); @@ -455,6 +464,7 @@ parsejsonpath(const char *str, int len) return parseresult; } +/* Turn hex character into integer */ static int hexval(char c) { @@ -468,6 +478,7 @@ hexval(char c) return 0; /* not reached */ } +/* Add given unicode character to scanstring */ static void addUnicodeChar(int ch) { @@ -515,6 +526,7 @@ addUnicodeChar(int ch) } } +/* Add unicode character and process its hi surrogate */ static void addUnicode(int ch, int *hi_surrogate) { @@ -592,6 +604,7 @@ parseUnicode(char *s, int l) } } +/* Parse sequence of hex-encoded characters */ static void parseHexChars(char *s, int l) { @@ -601,7 +614,8 @@ parseHexChars(char *s, int l) for (i = 0; i < l / 4; i++) { - int ch = (hexval(s[i * 4 + 2]) << 4) | hexval(s[i * 4 + 3]); + int ch = (hexval(s[i * 4 + 2]) << 4) | + hexval(s[i * 4 + 3]); addUnicodeChar(ch); }