1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-08 11:42:09 +03:00
Files
postgres/src/backend/utils/adt/jsonpath_gram.y
Tom Lane 16e3ad5d14 Avoid using %c printf format for potentially non-ASCII characters.
Since %c only passes a C "char" to printf, it's incapable of dealing
with multibyte characters.  Passing just the first byte of such a
character leads to an output string that is visibly not correctly
encoded, resulting in undesirable behavior such as encoding conversion
failures while sending error messages to clients.

We've lived with this issue for a long time because it was inconvenient
to avoid in a portable fashion.  However, now that we always use our own
snprintf code, it's reasonable to use the %.*s format to print just one
possibly-multibyte character in a string.  (We previously avoided that
obvious-looking answer in order to work around glibc's bug #6530, cf
commits 54cd4f045 and ed437e2b2.)

Hence, run around and fix a bunch of places that used %c to report
a character found in a user-supplied string.  For simplicity, I did
not touch places that were emitting non-user-facing debug messages,
or reporting catalog data that should always be ASCII.  (It's also
unclear how useful this approach could be in frontend code, where
it's less certain that we know what encoding we're dealing with.)

In passing, improve a couple of poorly-written error messages in
pageinspect/heapfuncs.c.

This is a longstanding issue, but I'm hesitant to back-patch because
of the impact on translatable message strings.  In any case this fix
would not work reliably before v12.

Tom Lane and Quan Zongliang

Discussion: https://postgr.es/m/a120087c-4c88-d9d4-1ec5-808d7a7f133d@gmail.com
2020-06-29 11:41:19 -04:00

598 lines
14 KiB
Plaintext

%{
/*-------------------------------------------------------------------------
*
* jsonpath_gram.y
* Grammar definitions for jsonpath datatype
*
* Transforms tokenized jsonpath into tree of JsonPathParseItem structs.
*
* Copyright (c) 2019-2020, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/backend/utils/adt/jsonpath_gram.y
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "catalog/pg_collation.h"
#include "fmgr.h"
#include "miscadmin.h"
#include "nodes/pg_list.h"
#include "regex/regex.h"
#include "utils/builtins.h"
#include "utils/jsonpath.h"
/* struct JsonPathString is shared between scan and gram */
typedef struct JsonPathString
{
char *val;
int len;
int total;
} JsonPathString;
union YYSTYPE;
/* flex 2.5.4 doesn't bother with a decl for this */
int jsonpath_yylex(union YYSTYPE *yylval_param);
int jsonpath_yyparse(JsonPathParseResult **result);
void jsonpath_yyerror(JsonPathParseResult **result, const char *message);
static JsonPathParseItem *makeItemType(JsonPathItemType type);
static JsonPathParseItem *makeItemString(JsonPathString *s);
static JsonPathParseItem *makeItemVariable(JsonPathString *s);
static JsonPathParseItem *makeItemKey(JsonPathString *s);
static JsonPathParseItem *makeItemNumeric(JsonPathString *s);
static JsonPathParseItem *makeItemBool(bool val);
static JsonPathParseItem *makeItemBinary(JsonPathItemType type,
JsonPathParseItem *la,
JsonPathParseItem *ra);
static JsonPathParseItem *makeItemUnary(JsonPathItemType type,
JsonPathParseItem *a);
static JsonPathParseItem *makeItemList(List *list);
static JsonPathParseItem *makeIndexArray(List *list);
static JsonPathParseItem *makeAny(int first, int last);
static JsonPathParseItem *makeItemLikeRegex(JsonPathParseItem *expr,
JsonPathString *pattern,
JsonPathString *flags);
/*
* Bison doesn't allocate anything that needs to live across parser calls,
* so we can easily have it use palloc instead of malloc. This prevents
* memory leaks if we error out during parsing. Note this only works with
* bison >= 2.0. However, in bison 1.875 the default is to use alloca()
* if possible, so there's not really much problem anyhow, at least if
* you're building with gcc.
*/
#define YYMALLOC palloc
#define YYFREE pfree
%}
/* BISON Declarations */
%pure-parser
%expect 0
%name-prefix="jsonpath_yy"
%error-verbose
%parse-param {JsonPathParseResult **result}
%union {
JsonPathString str;
List *elems; /* list of JsonPathParseItem */
List *indexs; /* list of integers */
JsonPathParseItem *value;
JsonPathParseResult *result;
JsonPathItemType optype;
bool boolean;
int integer;
}
%token <str> TO_P NULL_P TRUE_P FALSE_P IS_P UNKNOWN_P EXISTS_P
%token <str> IDENT_P STRING_P NUMERIC_P INT_P VARIABLE_P
%token <str> OR_P AND_P NOT_P
%token <str> LESS_P LESSEQUAL_P EQUAL_P NOTEQUAL_P GREATEREQUAL_P GREATER_P
%token <str> ANY_P STRICT_P LAX_P LAST_P STARTS_P WITH_P LIKE_REGEX_P FLAG_P
%token <str> ABS_P SIZE_P TYPE_P FLOOR_P DOUBLE_P CEILING_P KEYVALUE_P
%token <str> DATETIME_P
%type <result> result
%type <value> scalar_value path_primary expr array_accessor
any_path accessor_op key predicate delimited_predicate
index_elem starts_with_initial expr_or_predicate
datetime_template opt_datetime_template
%type <elems> accessor_expr
%type <indexs> index_list
%type <optype> comp_op method
%type <boolean> mode
%type <str> key_name
%type <integer> any_level
%left OR_P
%left AND_P
%right NOT_P
%left '+' '-'
%left '*' '/' '%'
%left UMINUS
%nonassoc '(' ')'
/* Grammar follows */
%%
result:
mode expr_or_predicate {
*result = palloc(sizeof(JsonPathParseResult));
(*result)->expr = $2;
(*result)->lax = $1;
}
| /* EMPTY */ { *result = NULL; }
;
expr_or_predicate:
expr { $$ = $1; }
| predicate { $$ = $1; }
;
mode:
STRICT_P { $$ = false; }
| LAX_P { $$ = true; }
| /* EMPTY */ { $$ = true; }
;
scalar_value:
STRING_P { $$ = makeItemString(&$1); }
| NULL_P { $$ = makeItemString(NULL); }
| TRUE_P { $$ = makeItemBool(true); }
| FALSE_P { $$ = makeItemBool(false); }
| NUMERIC_P { $$ = makeItemNumeric(&$1); }
| INT_P { $$ = makeItemNumeric(&$1); }
| VARIABLE_P { $$ = makeItemVariable(&$1); }
;
comp_op:
EQUAL_P { $$ = jpiEqual; }
| NOTEQUAL_P { $$ = jpiNotEqual; }
| LESS_P { $$ = jpiLess; }
| GREATER_P { $$ = jpiGreater; }
| LESSEQUAL_P { $$ = jpiLessOrEqual; }
| GREATEREQUAL_P { $$ = jpiGreaterOrEqual; }
;
delimited_predicate:
'(' predicate ')' { $$ = $2; }
| EXISTS_P '(' expr ')' { $$ = makeItemUnary(jpiExists, $3); }
;
predicate:
delimited_predicate { $$ = $1; }
| expr comp_op expr { $$ = makeItemBinary($2, $1, $3); }
| predicate AND_P predicate { $$ = makeItemBinary(jpiAnd, $1, $3); }
| predicate OR_P predicate { $$ = makeItemBinary(jpiOr, $1, $3); }
| NOT_P delimited_predicate { $$ = makeItemUnary(jpiNot, $2); }
| '(' predicate ')' IS_P UNKNOWN_P
{ $$ = makeItemUnary(jpiIsUnknown, $2); }
| expr STARTS_P WITH_P starts_with_initial
{ $$ = makeItemBinary(jpiStartsWith, $1, $4); }
| expr LIKE_REGEX_P STRING_P { $$ = makeItemLikeRegex($1, &$3, NULL); }
| expr LIKE_REGEX_P STRING_P FLAG_P STRING_P
{ $$ = makeItemLikeRegex($1, &$3, &$5); }
;
starts_with_initial:
STRING_P { $$ = makeItemString(&$1); }
| VARIABLE_P { $$ = makeItemVariable(&$1); }
;
path_primary:
scalar_value { $$ = $1; }
| '$' { $$ = makeItemType(jpiRoot); }
| '@' { $$ = makeItemType(jpiCurrent); }
| LAST_P { $$ = makeItemType(jpiLast); }
;
accessor_expr:
path_primary { $$ = list_make1($1); }
| '(' expr ')' accessor_op { $$ = list_make2($2, $4); }
| '(' predicate ')' accessor_op { $$ = list_make2($2, $4); }
| accessor_expr accessor_op { $$ = lappend($1, $2); }
;
expr:
accessor_expr { $$ = makeItemList($1); }
| '(' expr ')' { $$ = $2; }
| '+' expr %prec UMINUS { $$ = makeItemUnary(jpiPlus, $2); }
| '-' expr %prec UMINUS { $$ = makeItemUnary(jpiMinus, $2); }
| expr '+' expr { $$ = makeItemBinary(jpiAdd, $1, $3); }
| expr '-' expr { $$ = makeItemBinary(jpiSub, $1, $3); }
| expr '*' expr { $$ = makeItemBinary(jpiMul, $1, $3); }
| expr '/' expr { $$ = makeItemBinary(jpiDiv, $1, $3); }
| expr '%' expr { $$ = makeItemBinary(jpiMod, $1, $3); }
;
index_elem:
expr { $$ = makeItemBinary(jpiSubscript, $1, NULL); }
| expr TO_P expr { $$ = makeItemBinary(jpiSubscript, $1, $3); }
;
index_list:
index_elem { $$ = list_make1($1); }
| index_list ',' index_elem { $$ = lappend($1, $3); }
;
array_accessor:
'[' '*' ']' { $$ = makeItemType(jpiAnyArray); }
| '[' index_list ']' { $$ = makeIndexArray($2); }
;
any_level:
INT_P { $$ = pg_atoi($1.val, 4, 0); }
| LAST_P { $$ = -1; }
;
any_path:
ANY_P { $$ = makeAny(0, -1); }
| ANY_P '{' any_level '}' { $$ = makeAny($3, $3); }
| ANY_P '{' any_level TO_P any_level '}'
{ $$ = makeAny($3, $5); }
;
accessor_op:
'.' key { $$ = $2; }
| '.' '*' { $$ = makeItemType(jpiAnyKey); }
| array_accessor { $$ = $1; }
| '.' any_path { $$ = $2; }
| '.' method '(' ')' { $$ = makeItemType($2); }
| '.' DATETIME_P '(' opt_datetime_template ')'
{ $$ = makeItemUnary(jpiDatetime, $4); }
| '?' '(' predicate ')' { $$ = makeItemUnary(jpiFilter, $3); }
;
datetime_template:
STRING_P { $$ = makeItemString(&$1); }
;
opt_datetime_template:
datetime_template { $$ = $1; }
| /* EMPTY */ { $$ = NULL; }
;
key:
key_name { $$ = makeItemKey(&$1); }
;
key_name:
IDENT_P
| STRING_P
| TO_P
| NULL_P
| TRUE_P
| FALSE_P
| IS_P
| UNKNOWN_P
| EXISTS_P
| STRICT_P
| LAX_P
| ABS_P
| SIZE_P
| TYPE_P
| FLOOR_P
| DOUBLE_P
| CEILING_P
| DATETIME_P
| KEYVALUE_P
| LAST_P
| STARTS_P
| WITH_P
| LIKE_REGEX_P
| FLAG_P
;
method:
ABS_P { $$ = jpiAbs; }
| SIZE_P { $$ = jpiSize; }
| TYPE_P { $$ = jpiType; }
| FLOOR_P { $$ = jpiFloor; }
| DOUBLE_P { $$ = jpiDouble; }
| CEILING_P { $$ = jpiCeiling; }
| KEYVALUE_P { $$ = jpiKeyValue; }
;
%%
/*
* The helper functions below allocate and fill JsonPathParseItem's of various
* types.
*/
static JsonPathParseItem *
makeItemType(JsonPathItemType type)
{
JsonPathParseItem *v = palloc(sizeof(*v));
CHECK_FOR_INTERRUPTS();
v->type = type;
v->next = NULL;
return v;
}
static JsonPathParseItem *
makeItemString(JsonPathString *s)
{
JsonPathParseItem *v;
if (s == NULL)
{
v = makeItemType(jpiNull);
}
else
{
v = makeItemType(jpiString);
v->value.string.val = s->val;
v->value.string.len = s->len;
}
return v;
}
static JsonPathParseItem *
makeItemVariable(JsonPathString *s)
{
JsonPathParseItem *v;
v = makeItemType(jpiVariable);
v->value.string.val = s->val;
v->value.string.len = s->len;
return v;
}
static JsonPathParseItem *
makeItemKey(JsonPathString *s)
{
JsonPathParseItem *v;
v = makeItemString(s);
v->type = jpiKey;
return v;
}
static JsonPathParseItem *
makeItemNumeric(JsonPathString *s)
{
JsonPathParseItem *v;
v = makeItemType(jpiNumeric);
v->value.numeric =
DatumGetNumeric(DirectFunctionCall3(numeric_in,
CStringGetDatum(s->val),
ObjectIdGetDatum(InvalidOid),
Int32GetDatum(-1)));
return v;
}
static JsonPathParseItem *
makeItemBool(bool val)
{
JsonPathParseItem *v = makeItemType(jpiBool);
v->value.boolean = val;
return v;
}
static JsonPathParseItem *
makeItemBinary(JsonPathItemType type, JsonPathParseItem *la, JsonPathParseItem *ra)
{
JsonPathParseItem *v = makeItemType(type);
v->value.args.left = la;
v->value.args.right = ra;
return v;
}
static JsonPathParseItem *
makeItemUnary(JsonPathItemType type, JsonPathParseItem *a)
{
JsonPathParseItem *v;
if (type == jpiPlus && a->type == jpiNumeric && !a->next)
return a;
if (type == jpiMinus && a->type == jpiNumeric && !a->next)
{
v = makeItemType(jpiNumeric);
v->value.numeric =
DatumGetNumeric(DirectFunctionCall1(numeric_uminus,
NumericGetDatum(a->value.numeric)));
return v;
}
v = makeItemType(type);
v->value.arg = a;
return v;
}
static JsonPathParseItem *
makeItemList(List *list)
{
JsonPathParseItem *head,
*end;
ListCell *cell;
head = end = (JsonPathParseItem *) linitial(list);
if (list_length(list) == 1)
return head;
/* append items to the end of already existing list */
while (end->next)
end = end->next;
for_each_cell(cell, list, list_second_cell(list))
{
JsonPathParseItem *c = (JsonPathParseItem *) lfirst(cell);
end->next = c;
end = c;
}
return head;
}
static JsonPathParseItem *
makeIndexArray(List *list)
{
JsonPathParseItem *v = makeItemType(jpiIndexArray);
ListCell *cell;
int i = 0;
Assert(list_length(list) > 0);
v->value.array.nelems = list_length(list);
v->value.array.elems = palloc(sizeof(v->value.array.elems[0]) *
v->value.array.nelems);
foreach(cell, list)
{
JsonPathParseItem *jpi = lfirst(cell);
Assert(jpi->type == jpiSubscript);
v->value.array.elems[i].from = jpi->value.args.left;
v->value.array.elems[i++].to = jpi->value.args.right;
}
return v;
}
static JsonPathParseItem *
makeAny(int first, int last)
{
JsonPathParseItem *v = makeItemType(jpiAny);
v->value.anybounds.first = (first >= 0) ? first : PG_UINT32_MAX;
v->value.anybounds.last = (last >= 0) ? last : PG_UINT32_MAX;
return v;
}
static JsonPathParseItem *
makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
JsonPathString *flags)
{
JsonPathParseItem *v = makeItemType(jpiLikeRegex);
int i;
int cflags;
v->value.like_regex.expr = expr;
v->value.like_regex.pattern = pattern->val;
v->value.like_regex.patternlen = pattern->len;
/* Parse the flags string, convert to bitmask. Duplicate flags are OK. */
v->value.like_regex.flags = 0;
for (i = 0; flags && i < flags->len; i++)
{
switch (flags->val[i])
{
case 'i':
v->value.like_regex.flags |= JSP_REGEX_ICASE;
break;
case 's':
v->value.like_regex.flags |= JSP_REGEX_DOTALL;
break;
case 'm':
v->value.like_regex.flags |= JSP_REGEX_MLINE;
break;
case 'x':
v->value.like_regex.flags |= JSP_REGEX_WSPACE;
break;
case 'q':
v->value.like_regex.flags |= JSP_REGEX_QUOTE;
break;
default:
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("invalid input syntax for type %s", "jsonpath"),
errdetail("unrecognized flag character \"%.*s\" in LIKE_REGEX predicate",
pg_mblen(flags->val + i), flags->val + i)));
break;
}
}
/* Convert flags to what RE_compile_and_cache needs */
cflags = jspConvertRegexFlags(v->value.like_regex.flags);
/* check regex validity */
(void) RE_compile_and_cache(cstring_to_text_with_len(pattern->val,
pattern->len),
cflags, DEFAULT_COLLATION_OID);
return v;
}
/*
* Convert from XQuery regex flags to those recognized by our regex library.
*/
int
jspConvertRegexFlags(uint32 xflags)
{
/* By default, XQuery is very nearly the same as Spencer's AREs */
int cflags = REG_ADVANCED;
/* Ignore-case means the same thing, too, modulo locale issues */
if (xflags & JSP_REGEX_ICASE)
cflags |= REG_ICASE;
/* Per XQuery spec, if 'q' is specified then 'm', 's', 'x' are ignored */
if (xflags & JSP_REGEX_QUOTE)
{
cflags &= ~REG_ADVANCED;
cflags |= REG_QUOTE;
}
else
{
/* Note that dotall mode is the default in POSIX */
if (!(xflags & JSP_REGEX_DOTALL))
cflags |= REG_NLSTOP;
if (xflags & JSP_REGEX_MLINE)
cflags |= REG_NLANCH;
/*
* XQuery's 'x' mode is related to Spencer's expanded mode, but it's
* not really enough alike to justify treating JSP_REGEX_WSPACE as
* REG_EXPANDED. For now we treat 'x' as unimplemented; perhaps in
* future we'll modify the regex library to have an option for
* XQuery-style ignore-whitespace mode.
*/
if (xflags & JSP_REGEX_WSPACE)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("XQuery \"x\" flag (expanded regular expressions) is not implemented")));
}
return cflags;
}
/*
* jsonpath_scan.l is compiled as part of jsonpath_gram.y. Currently, this is
* unavoidable because jsonpath_gram does not create a .h file to export its
* token symbols. If these files ever grow large enough to be worth compiling
* separately, that could be fixed; but for now it seems like useless
* complication.
*/
#include "jsonpath_scan.c"