From 8b17298f0b6bb2a64b55fab0339c8fd6ec2d74fb Mon Sep 17 00:00:00 2001 From: Alexander Korotkov Date: Mon, 25 Mar 2019 15:42:51 +0300 Subject: [PATCH] Cosmetic changes for jsonpath_gram.y and jsonpath_scan.l This commit include formatting improvements, renamings and comments. Also, it makes jsonpath_scan.l be more uniform with other our lexers. Firstly, states names are renamed to more short alternatives. Secondly, prefix removed from the rules. Corresponding rules are moved to the tail, so they would anyway work only in initial state. Author: Alexander Korotkov Reviewed-by: John Naylor --- src/backend/utils/adt/jsonpath_gram.y | 80 +++--- src/backend/utils/adt/jsonpath_scan.l | 352 +++++++++++++------------- 2 files changed, 233 insertions(+), 199 deletions(-) diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y index 47ebb2a0e09..76155963fc6 100644 --- a/src/backend/utils/adt/jsonpath_gram.y +++ b/src/backend/utils/adt/jsonpath_gram.y @@ -4,6 +4,8 @@ * jsonpath_gram.y * Grammar definitions for jsonpath datatype * + * Transforms tokenized jsonpath into tree of JsonPathParseItem structs. + * * Copyright (c) 2019, PostgreSQL Global Development Group * * IDENTIFICATION @@ -37,15 +39,17 @@ int jsonpath_yylex(union YYSTYPE *yylval_param); int jsonpath_yyparse(JsonPathParseResult **result); void jsonpath_yyerror(JsonPathParseResult **result, const char *message); -static JsonPathParseItem *makeItemType(int type); +static JsonPathParseItem *makeItemType(JsonPathItemType type); static JsonPathParseItem *makeItemString(JsonPathString *s); static JsonPathParseItem *makeItemVariable(JsonPathString *s); static JsonPathParseItem *makeItemKey(JsonPathString *s); static JsonPathParseItem *makeItemNumeric(JsonPathString *s); static JsonPathParseItem *makeItemBool(bool val); -static JsonPathParseItem *makeItemBinary(int type, JsonPathParseItem *la, +static JsonPathParseItem *makeItemBinary(JsonPathItemType type, + JsonPathParseItem *la, JsonPathParseItem *ra); -static JsonPathParseItem *makeItemUnary(int type, JsonPathParseItem *a); +static JsonPathParseItem *makeItemUnary(JsonPathItemType type, + JsonPathParseItem *a); static JsonPathParseItem *makeItemList(List *list); static JsonPathParseItem *makeIndexArray(List *list); static JsonPathParseItem *makeAny(int first, int last); @@ -75,9 +79,9 @@ static JsonPathParseItem *makeItemLikeRegex(JsonPathParseItem *expr, %union { JsonPathString str; - List *elems; /* list of JsonPathParseItem */ - List *indexs; /* list of integers */ - JsonPathParseItem *value; + List *elems; /* list of JsonPathParseItem */ + List *indexs; /* list of integers */ + JsonPathParseItem *value; JsonPathParseResult *result; JsonPathItemType optype; bool boolean; @@ -160,7 +164,7 @@ comp_op: ; delimited_predicate: - '(' predicate ')' { $$ = $2; } + '(' predicate ')' { $$ = $2; } | EXISTS_P '(' expr ')' { $$ = makeItemUnary(jpiExists, $3); } ; @@ -170,9 +174,10 @@ predicate: | predicate AND_P predicate { $$ = makeItemBinary(jpiAnd, $1, $3); } | predicate OR_P predicate { $$ = makeItemBinary(jpiOr, $1, $3); } | NOT_P delimited_predicate { $$ = makeItemUnary(jpiNot, $2); } - | '(' predicate ')' IS_P UNKNOWN_P { $$ = makeItemUnary(jpiIsUnknown, $2); } + | '(' predicate ')' IS_P UNKNOWN_P + { $$ = makeItemUnary(jpiIsUnknown, $2); } | expr STARTS_P WITH_P starts_with_initial - { $$ = makeItemBinary(jpiStartsWith, $1, $4); } + { $$ = makeItemBinary(jpiStartsWith, $1, $4); } | expr LIKE_REGEX_P STRING_P { $$ = makeItemLikeRegex($1, &$3, NULL); } | expr LIKE_REGEX_P STRING_P FLAG_P STRING_P { $$ = makeItemLikeRegex($1, &$3, &$5); } @@ -232,7 +237,8 @@ any_level: any_path: ANY_P { $$ = makeAny(0, -1); } | ANY_P '{' any_level '}' { $$ = makeAny($3, $3); } - | ANY_P '{' any_level TO_P any_level '}' { $$ = makeAny($3, $5); } + | ANY_P '{' any_level TO_P any_level '}' + { $$ = makeAny($3, $5); } ; accessor_op: @@ -285,10 +291,15 @@ method: ; %% -static JsonPathParseItem* -makeItemType(int type) +/* + * The helper functions below allocate and fill JsonPathParseItem's of various + * types. + */ + +static JsonPathParseItem * +makeItemType(JsonPathItemType type) { - JsonPathParseItem* v = palloc(sizeof(*v)); + JsonPathParseItem *v = palloc(sizeof(*v)); CHECK_FOR_INTERRUPTS(); @@ -298,10 +309,10 @@ makeItemType(int type) return v; } -static JsonPathParseItem* +static JsonPathParseItem * makeItemString(JsonPathString *s) { - JsonPathParseItem *v; + JsonPathParseItem *v; if (s == NULL) { @@ -320,7 +331,7 @@ makeItemString(JsonPathString *s) static JsonPathParseItem * makeItemVariable(JsonPathString *s) { - JsonPathParseItem *v; + JsonPathParseItem *v; v = makeItemType(jpiVariable); v->value.string.val = s->val; @@ -332,7 +343,7 @@ makeItemVariable(JsonPathString *s) static JsonPathParseItem * makeItemKey(JsonPathString *s) { - JsonPathParseItem *v; + JsonPathParseItem *v; v = makeItemString(s); v->type = jpiKey; @@ -343,7 +354,7 @@ makeItemKey(JsonPathString *s) static JsonPathParseItem * makeItemNumeric(JsonPathString *s) { - JsonPathParseItem *v; + JsonPathParseItem *v; v = makeItemType(jpiNumeric); v->value.numeric = @@ -356,7 +367,7 @@ makeItemNumeric(JsonPathString *s) static JsonPathParseItem * makeItemBool(bool val) { - JsonPathParseItem *v = makeItemType(jpiBool); + JsonPathParseItem *v = makeItemType(jpiBool); v->value.boolean = val; @@ -364,7 +375,7 @@ makeItemBool(bool val) } static JsonPathParseItem * -makeItemBinary(int type, JsonPathParseItem* la, JsonPathParseItem *ra) +makeItemBinary(JsonPathItemType type, JsonPathParseItem *la, JsonPathParseItem *ra) { JsonPathParseItem *v = makeItemType(type); @@ -375,7 +386,7 @@ makeItemBinary(int type, JsonPathParseItem* la, JsonPathParseItem *ra) } static JsonPathParseItem * -makeItemUnary(int type, JsonPathParseItem* a) +makeItemUnary(JsonPathItemType type, JsonPathParseItem *a) { JsonPathParseItem *v; @@ -401,8 +412,9 @@ makeItemUnary(int type, JsonPathParseItem* a) static JsonPathParseItem * makeItemList(List *list) { - JsonPathParseItem *head, *end; - ListCell *cell = list_head(list); + JsonPathParseItem *head, + *end; + ListCell *cell = list_head(list); head = end = (JsonPathParseItem *) lfirst(cell); @@ -427,8 +439,8 @@ makeItemList(List *list) static JsonPathParseItem * makeIndexArray(List *list) { - JsonPathParseItem *v = makeItemType(jpiIndexArray); - ListCell *cell; + JsonPathParseItem *v = makeItemType(jpiIndexArray); + ListCell *cell; int i = 0; Assert(list_length(list) > 0); @@ -439,7 +451,7 @@ makeIndexArray(List *list) foreach(cell, list) { - JsonPathParseItem *jpi = lfirst(cell); + JsonPathParseItem *jpi = lfirst(cell); Assert(jpi->type == jpiSubscript); @@ -453,7 +465,7 @@ makeIndexArray(List *list) static JsonPathParseItem * makeAny(int first, int last) { - JsonPathParseItem *v = makeItemType(jpiAny); + JsonPathParseItem *v = makeItemType(jpiAny); v->value.anybounds.first = (first >= 0) ? first : PG_UINT32_MAX; v->value.anybounds.last = (last >= 0) ? last : PG_UINT32_MAX; @@ -465,9 +477,9 @@ static JsonPathParseItem * makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern, JsonPathString *flags) { - JsonPathParseItem *v = makeItemType(jpiLikeRegex); - int i; - int cflags = REG_ADVANCED; + JsonPathParseItem *v = makeItemType(jpiLikeRegex); + int i; + int cflags = REG_ADVANCED; v->value.like_regex.expr = expr; v->value.like_regex.pattern = pattern->val; @@ -510,4 +522,12 @@ makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern, return v; } +/* + * jsonpath_scan.l is compiled as part of jsonpath_gram.y. Currently, this is + * unavoidable because jsonpath_gram does not create a .h file to export its + * token symbols. If these files ever grow large enough to be worth compiling + * separately, that could be fixed; but for now it seems like useless + * complication. + */ + #include "jsonpath_scan.c" diff --git a/src/backend/utils/adt/jsonpath_scan.l b/src/backend/utils/adt/jsonpath_scan.l index 02cb54ee7f9..e93307f4073 100644 --- a/src/backend/utils/adt/jsonpath_scan.l +++ b/src/backend/utils/adt/jsonpath_scan.l @@ -4,6 +4,9 @@ * jsonpath_scan.l * Lexical parser for jsonpath datatype * + * Splits jsonpath string into tokens represented as JsonPathString structs. + * Decodes unicode and hex escaped strings. + * * Copyright (c) 2019, PostgreSQL Global Development Group * * IDENTIFICATION @@ -19,9 +22,6 @@ static JsonPathString scanstring; -/* No reason to constrain amount of data slurped */ -/* #define YY_READ_BUF_SIZE 16777216 */ - /* Handles to the buffer that the lexer uses internally */ static YY_BUFFER_STATE scanbufhandle; static char *scanbuf; @@ -29,9 +29,7 @@ static int scanbuflen; static void addstring(bool init, char *s, int l); static void addchar(bool init, char s); -static int checkSpecialVal(void); /* examine scanstring for the special - * value */ - +static enum yytokentype checkKeyword(void); static void parseUnicode(char *s, int l); static void parseHexChars(char *s, int l); @@ -60,11 +58,22 @@ fprintf_to_ereport(const char *fmt, const char *msg) %option noyyrealloc %option noyyfree -%x xQUOTED -%x xNONQUOTED -%x xVARQUOTED -%x xSINGLEQUOTED -%x xCOMMENT +/* + * We use exclusive states for quoted, signle-quoted and non-quoted strings, + * quoted variable names and C-tyle comments. + * Exclusive states: + * - quoted strings + * - non-quoted strings + * - quoted variable names + * - single-quoted strings + * - C-style comment + */ + +%x xq +%x xnq +%x xvq +%x xsq +%x xc special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/] any [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f] @@ -73,189 +82,188 @@ hex_dig [0-9A-Fa-f] unicode \\u({hex_dig}{4}|\{{hex_dig}{1,6}\}) hex_char \\x{hex_dig}{2} - %% -\&\& { return AND_P; } +{any}+ { + addstring(false, yytext, yyleng); + } -\|\| { return OR_P; } +{blank}+ { + yylval->str = scanstring; + BEGIN INITIAL; + return checkKeyword(); + } -\! { return NOT_P; } -\*\* { return ANY_P; } +\/\* { + yylval->str = scanstring; + BEGIN xc; + } -\< { return LESS_P; } +({special}|\"|\') { + yylval->str = scanstring; + yyless(0); + BEGIN INITIAL; + return checkKeyword(); + } -\<\= { return LESSEQUAL_P; } +<> { + yylval->str = scanstring; + BEGIN INITIAL; + return checkKeyword(); + } -\=\= { return EQUAL_P; } +\\[\"\'\\] { addchar(false, yytext[1]); } -\<\> { return NOTEQUAL_P; } +\\b { addchar(false, '\b'); } -\!\= { return NOTEQUAL_P; } +\\f { addchar(false, '\f'); } -\>\= { return GREATEREQUAL_P; } +\\n { addchar(false, '\n'); } -\> { return GREATER_P; } +\\r { addchar(false, '\r'); } -\${any}+ { +\\t { addchar(false, '\t'); } + +\\v { addchar(false, '\v'); } + +{unicode}+ { parseUnicode(yytext, yyleng); } + +{hex_char}+ { parseHexChars(yytext, yyleng); } + +\\x { yyerror(NULL, "Hex character sequence is invalid"); } + +\\u { yyerror(NULL, "Unicode sequence is invalid"); } + +\\. { yyerror(NULL, "Escape sequence is invalid"); } + +\\ { yyerror(NULL, "Unexpected end after backslash"); } + +<> { yyerror(NULL, "Unexpected end of quoted string"); } + +\" { + yylval->str = scanstring; + BEGIN INITIAL; + return STRING_P; + } + +\" { + yylval->str = scanstring; + BEGIN INITIAL; + return VARIABLE_P; + } + +\' { + yylval->str = scanstring; + BEGIN INITIAL; + return STRING_P; + } + +[^\\\"]+ { addstring(false, yytext, yyleng); } + +[^\\\']+ { addstring(false, yytext, yyleng); } + +\*\/ { BEGIN INITIAL; } + +[^\*]+ { } + +\* { } + +<> { yyerror(NULL, "Unexpected end of comment"); } + +\&\& { return AND_P; } + +\|\| { return OR_P; } + +\! { return NOT_P; } + +\*\* { return ANY_P; } + +\< { return LESS_P; } + +\<\= { return LESSEQUAL_P; } + +\=\= { return EQUAL_P; } + +\<\> { return NOTEQUAL_P; } + +\!\= { return NOTEQUAL_P; } + +\>\= { return GREATEREQUAL_P; } + +\> { return GREATER_P; } + +\${any}+ { addstring(true, yytext + 1, yyleng - 1); addchar(false, '\0'); yylval->str = scanstring; return VARIABLE_P; } -\$\" { +\$\" { addchar(true, '\0'); - BEGIN xVARQUOTED; + BEGIN xvq; } -{special} { return *yytext; } +{special} { return *yytext; } -{blank}+ { /* ignore */ } +{blank}+ { /* ignore */ } -\/\* { +\/\* { addchar(true, '\0'); - BEGIN xCOMMENT; + BEGIN xc; } -[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+ /* float */ { +[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+ { /* float */ addstring(true, yytext, yyleng); addchar(false, '\0'); yylval->str = scanstring; return NUMERIC_P; } -\.[0-9]+[eE][+-]?[0-9]+ /* float */ { +\.[0-9]+[eE][+-]?[0-9]+ { /* float */ addstring(true, yytext, yyleng); addchar(false, '\0'); yylval->str = scanstring; return NUMERIC_P; } -([0-9]+)?\.[0-9]+ { +([0-9]+)?\.[0-9]+ { addstring(true, yytext, yyleng); addchar(false, '\0'); yylval->str = scanstring; return NUMERIC_P; } -[0-9]+ { +[0-9]+ { addstring(true, yytext, yyleng); addchar(false, '\0'); yylval->str = scanstring; return INT_P; } -{any}+ { +{any}+ { addstring(true, yytext, yyleng); - BEGIN xNONQUOTED; + BEGIN xnq; } -\" { +\" { addchar(true, '\0'); - BEGIN xQUOTED; + BEGIN xq; } -\' { +\' { addchar(true, '\0'); - BEGIN xSINGLEQUOTED; + BEGIN xsq; } -\\ { +\\ { yyless(0); addchar(true, '\0'); - BEGIN xNONQUOTED; + BEGIN xnq; } -{any}+ { - addstring(false, yytext, yyleng); - } - -{blank}+ { - yylval->str = scanstring; - BEGIN INITIAL; - return checkSpecialVal(); - } - - -\/\* { - yylval->str = scanstring; - BEGIN xCOMMENT; - } - -({special}|\"|\') { - yylval->str = scanstring; - yyless(0); - BEGIN INITIAL; - return checkSpecialVal(); - } - -<> { - yylval->str = scanstring; - BEGIN INITIAL; - return checkSpecialVal(); - } - -\\[\"\'\\] { addchar(false, yytext[1]); } - -\\b { addchar(false, '\b'); } - -\\f { addchar(false, '\f'); } - -\\n { addchar(false, '\n'); } - -\\r { addchar(false, '\r'); } - -\\t { addchar(false, '\t'); } - -\\v { addchar(false, '\v'); } - -{unicode}+ { parseUnicode(yytext, yyleng); } - -{hex_char}+ { parseHexChars(yytext, yyleng); } - -\\x { yyerror(NULL, "Hex character sequence is invalid"); } - -\\u { yyerror(NULL, "Unicode sequence is invalid"); } - -\\. { yyerror(NULL, "Escape sequence is invalid"); } - -\\ { yyerror(NULL, "Unexpected end after backslash"); } - -<> { yyerror(NULL, "Unexpected end of quoted string"); } - -\" { - yylval->str = scanstring; - BEGIN INITIAL; - return STRING_P; - } - -\" { - yylval->str = scanstring; - BEGIN INITIAL; - return VARIABLE_P; - } - -\' { - yylval->str = scanstring; - BEGIN INITIAL; - return STRING_P; - } - -[^\\\"]+ { addstring(false, yytext, yyleng); } - -[^\\\']+ { addstring(false, yytext, yyleng); } - -<> { yyterminate(); } - -\*\/ { BEGIN INITIAL; } - -[^\*]+ { } - -\* { } - -<> { yyerror(NULL, "Unexpected end of comment"); } +<> { yyterminate(); } %% @@ -292,7 +300,6 @@ typedef struct JsonPathKeyword * Array of key words should be sorted by length and then * alphabetical order */ - static const JsonPathKeyword keywords[] = { { 2, false, IS_P, "is"}, { 2, false, TO_P, "to"}, @@ -317,8 +324,9 @@ static const JsonPathKeyword keywords[] = { { 10,false, LIKE_REGEX_P, "like_regex"}, }; -static int -checkSpecialVal() +/* Check if current scanstring value is a keyword */ +static enum yytokentype +checkKeyword() { int res = IDENT_P; int diff; @@ -329,7 +337,7 @@ checkSpecialVal() if (scanstring.len > keywords[lengthof(keywords) - 1].len) return res; - while(StopLow < StopHigh) + while (StopLow < StopHigh) { StopMiddle = StopLow + ((StopHigh - StopLow) >> 1); @@ -397,49 +405,50 @@ jsonpath_scanner_finish(void) pfree(scanbuf); } +/* + * Resize scanstring so that it can append string of given length. + * Reinitialize if required. + */ +static void +resizeString(bool init, int appendLen) +{ + if (init) + { + scanstring.total = Max(32, appendLen); + scanstring.val = (char *) palloc(scanstring.total); + scanstring.len = 0; + } + else + { + if (scanstring.len + appendLen >= scanstring.total) + { + while (scanstring.len + appendLen >= scanstring.total) + scanstring.total *= 2; + scanstring.val = repalloc(scanstring.val, scanstring.total); + } + } +} + +/* Add set of bytes at "s" of length "l" to scanstring */ static void addstring(bool init, char *s, int l) { - if (init) - { - scanstring.total = 32; - scanstring.val = palloc(scanstring.total); - scanstring.len = 0; - } - - if (s && l) - { - while(scanstring.len + l + 1 >= scanstring.total) - { - scanstring.total *= 2; - scanstring.val = repalloc(scanstring.val, scanstring.total); - } - - memcpy(scanstring.val + scanstring.len, s, l); - scanstring.len += l; - } + resizeString(init, l + 1); + memcpy(scanstring.val + scanstring.len, s, l); + scanstring.len += l; } +/* Add single byte "c" to scanstring */ static void -addchar(bool init, char s) +addchar(bool init, char c) { - if (init) - { - scanstring.total = 32; - scanstring.val = palloc(scanstring.total); - scanstring.len = 0; - } - else if(scanstring.len + 1 >= scanstring.total) - { - scanstring.total *= 2; - scanstring.val = repalloc(scanstring.val, scanstring.total); - } - - scanstring.val[ scanstring.len ] = s; - if (s != '\0') + resizeString(init, 1); + scanstring.val[scanstring.len] = c; + if (c != '\0') scanstring.len++; } +/* Interface to jsonpath parser */ JsonPathParseResult * parsejsonpath(const char *str, int len) { @@ -447,7 +456,7 @@ parsejsonpath(const char *str, int len) jsonpath_scanner_init(str, len); - if (jsonpath_yyparse((void*)&parseresult) != 0) + if (jsonpath_yyparse((void *) &parseresult) != 0) jsonpath_yyerror(NULL, "bugus input"); jsonpath_scanner_finish(); @@ -455,6 +464,7 @@ parsejsonpath(const char *str, int len) return parseresult; } +/* Turn hex character into integer */ static int hexval(char c) { @@ -468,6 +478,7 @@ hexval(char c) return 0; /* not reached */ } +/* Add given unicode character to scanstring */ static void addUnicodeChar(int ch) { @@ -515,6 +526,7 @@ addUnicodeChar(int ch) } } +/* Add unicode character and process its hi surrogate */ static void addUnicode(int ch, int *hi_surrogate) { @@ -592,6 +604,7 @@ parseUnicode(char *s, int l) } } +/* Parse sequence of hex-encoded characters */ static void parseHexChars(char *s, int l) { @@ -601,7 +614,8 @@ parseHexChars(char *s, int l) for (i = 0; i < l / 4; i++) { - int ch = (hexval(s[i * 4 + 2]) << 4) | hexval(s[i * 4 + 3]); + int ch = (hexval(s[i * 4 + 2]) << 4) | + hexval(s[i * 4 + 3]); addUnicodeChar(ch); }