1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-07 00:36:50 +03:00

Partial implementation of SQL/JSON path language

SQL 2016 standards among other things contains set of SQL/JSON features for
JSON processing inside of relational database.  The core of SQL/JSON is JSON
path language, allowing access parts of JSON documents and make computations
over them.  This commit implements partial support JSON path language as
separate datatype called "jsonpath".  The implementation is partial because
it's lacking datetime support and suppression of numeric errors.  Missing
features will be added later by separate commits.

Support of SQL/JSON features requires implementation of separate nodes, and it
will be considered in subsequent patches.  This commit includes following
set of plain functions, allowing to execute jsonpath over jsonb values:

 * jsonb_path_exists(jsonb, jsonpath[, jsonb, bool]),
 * jsonb_path_match(jsonb, jsonpath[, jsonb, bool]),
 * jsonb_path_query(jsonb, jsonpath[, jsonb, bool]),
 * jsonb_path_query_array(jsonb, jsonpath[, jsonb, bool]).
 * jsonb_path_query_first(jsonb, jsonpath[, jsonb, bool]).

This commit also implements "jsonb @? jsonpath" and "jsonb @@ jsonpath", which
are wrappers over jsonpath_exists(jsonb, jsonpath) and jsonpath_predicate(jsonb,
jsonpath) correspondingly.  These operators will have an index support
(implemented in subsequent patches).

Catversion bumped, to add new functions and operators.

Code was written by Nikita Glukhov and Teodor Sigaev, revised by me.
Documentation was written by Oleg Bartunov and Liudmila Mantrova.  The work
was inspired by Oleg Bartunov.

Discussion: https://postgr.es/m/fcc6fc6a-b497-f39a-923d-aa34d0c588e8%402ndQuadrant.com
Author: Nikita Glukhov, Teodor Sigaev, Alexander Korotkov, Oleg Bartunov, Liudmila Mantrova
Reviewed-by: Tomas Vondra, Andrew Dunstan, Pavel Stehule, Alexander Korotkov
This commit is contained in:
Alexander Korotkov
2019-03-16 12:15:37 +03:00
parent 893d6f8a1f
commit 72b6460336
33 changed files with 9079 additions and 55 deletions

View File

@ -136,6 +136,9 @@ parser/gram.h: parser/gram.y
storage/lmgr/lwlocknames.h: storage/lmgr/generate-lwlocknames.pl storage/lmgr/lwlocknames.txt
$(MAKE) -C storage/lmgr lwlocknames.h lwlocknames.c
utils/adt/jsonpath_gram.h: utils/adt/jsonpath_gram.y
$(MAKE) -C utils/adt jsonpath_gram.h
# run this unconditionally to avoid needing to know its dependencies here:
submake-catalog-headers:
$(MAKE) -C catalog distprep generated-header-symlinks
@ -159,7 +162,7 @@ submake-utils-headers:
.PHONY: generated-headers
generated-headers: $(top_builddir)/src/include/parser/gram.h $(top_builddir)/src/include/storage/lwlocknames.h submake-catalog-headers submake-utils-headers
generated-headers: $(top_builddir)/src/include/parser/gram.h $(top_builddir)/src/include/storage/lwlocknames.h $(top_builddir)/src/include/utils/jsonpath_gram.h submake-catalog-headers submake-utils-headers
$(top_builddir)/src/include/parser/gram.h: parser/gram.h
prereqdir=`cd '$(dir $<)' >/dev/null && pwd` && \
@ -171,6 +174,10 @@ $(top_builddir)/src/include/storage/lwlocknames.h: storage/lmgr/lwlocknames.h
cd '$(dir $@)' && rm -f $(notdir $@) && \
$(LN_S) "$$prereqdir/$(notdir $<)" .
$(top_builddir)/src/include/utils/jsonpath_gram.h: utils/adt/jsonpath_gram.h
prereqdir=`cd '$(dir $<)' >/dev/null && pwd` && \
cd '$(dir $@)' && rm -f $(notdir $@) && \
$(LN_S) "$$prereqdir/$(notdir $<)" .
utils/probes.o: utils/probes.d $(SUBDIROBJS)
$(DTRACE) $(DTRACEFLAGS) -C -G -s $(call expand_subsys,$^) -o $@
@ -186,6 +193,7 @@ distprep:
$(MAKE) -C replication repl_gram.c repl_scanner.c syncrep_gram.c syncrep_scanner.c
$(MAKE) -C storage/lmgr lwlocknames.h lwlocknames.c
$(MAKE) -C utils distprep
$(MAKE) -C utils/adt jsonpath_gram.c jsonpath_gram.h jsonpath_scan.c
$(MAKE) -C utils/misc guc-file.c
$(MAKE) -C utils/sort qsort_tuple.c
@ -308,6 +316,7 @@ maintainer-clean: distclean
storage/lmgr/lwlocknames.c \
storage/lmgr/lwlocknames.h \
utils/misc/guc-file.c \
utils/adt/jsonpath_gram.h \
utils/sort/qsort_tuple.c

View File

@ -1128,6 +1128,46 @@ LANGUAGE INTERNAL
STRICT IMMUTABLE PARALLEL SAFE
AS 'jsonb_insert';
CREATE OR REPLACE FUNCTION
jsonb_path_exists(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
silent boolean DEFAULT false)
RETURNS boolean
LANGUAGE INTERNAL
STRICT IMMUTABLE PARALLEL SAFE
AS 'jsonb_path_exists';
CREATE OR REPLACE FUNCTION
jsonb_path_match(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
silent boolean DEFAULT false)
RETURNS boolean
LANGUAGE INTERNAL
STRICT IMMUTABLE PARALLEL SAFE
AS 'jsonb_path_match';
CREATE OR REPLACE FUNCTION
jsonb_path_query(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
silent boolean DEFAULT false)
RETURNS SETOF jsonb
LANGUAGE INTERNAL
STRICT IMMUTABLE PARALLEL SAFE
AS 'jsonb_path_query';
CREATE OR REPLACE FUNCTION
jsonb_path_query_array(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
silent boolean DEFAULT false)
RETURNS jsonb
LANGUAGE INTERNAL
STRICT IMMUTABLE PARALLEL SAFE
AS 'jsonb_path_query_array';
CREATE OR REPLACE FUNCTION
jsonb_path_query_first(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
silent boolean DEFAULT false)
RETURNS jsonb
LANGUAGE INTERNAL
STRICT IMMUTABLE PARALLEL SAFE
AS 'jsonb_path_query_first';
--
-- The default permissions for functions mean that anyone can execute them.
-- A number of functions shouldn't be executable by just anyone, but rather

3
src/backend/utils/adt/.gitignore vendored Normal file
View File

@ -0,0 +1,3 @@
/jsonpath_gram.h
/jsonpath_gram.c
/jsonpath_scan.c

View File

@ -17,8 +17,8 @@ OBJS = acl.o amutils.o arrayfuncs.o array_expanded.o array_selfuncs.o \
float.o format_type.o formatting.o genfile.o \
geo_ops.o geo_selfuncs.o geo_spgist.o inet_cidr_ntop.o inet_net_pton.o \
int.o int8.o json.o jsonb.o jsonb_gin.o jsonb_op.o jsonb_util.o \
jsonfuncs.o like.o like_support.o lockfuncs.o \
mac.o mac8.o misc.o name.o \
jsonfuncs.o jsonpath_gram.o jsonpath_scan.o jsonpath.o jsonpath_exec.o \
like.o like_support.o lockfuncs.o mac.o mac8.o misc.o name.o \
network.o network_gist.o network_selfuncs.o network_spgist.o \
numeric.o numutils.o oid.o oracle_compat.o \
orderedsetaggs.o partitionfuncs.o pg_locale.o pg_lsn.o \
@ -33,6 +33,21 @@ OBJS = acl.o amutils.o arrayfuncs.o array_expanded.o array_selfuncs.o \
txid.o uuid.o varbit.o varchar.o varlena.o version.o \
windowfuncs.o xid.o xml.o
jsonpath_gram.c: BISONFLAGS += -d
jsonpath_scan.c: FLEXFLAGS = -CF -p -p
jsonpath_gram.h: jsonpath_gram.c ;
# Force these dependencies to be known even without dependency info built:
jsonpath_gram.o jsonpath_scan.o jsonpath_parser.o: jsonpath_gram.h
# jsonpath_gram.c, jsonpath_gram.h, and jsonpath_scan.c are in the
# distribution tarball, so they are not cleaned here.
clean distclean maintainer-clean:
rm -f lex.backup
like.o: like.c like_match.c
varlena.o: varlena.c levenshtein.c

View File

@ -163,6 +163,55 @@ jsonb_send(PG_FUNCTION_ARGS)
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
/*
* Get the type name of a jsonb container.
*/
static const char *
JsonbContainerTypeName(JsonbContainer *jbc)
{
JsonbValue scalar;
if (JsonbExtractScalar(jbc, &scalar))
return JsonbTypeName(&scalar);
else if (JsonContainerIsArray(jbc))
return "array";
else if (JsonContainerIsObject(jbc))
return "object";
else
{
elog(ERROR, "invalid jsonb container type: 0x%08x", jbc->header);
return "unknown";
}
}
/*
* Get the type name of a jsonb value.
*/
const char *
JsonbTypeName(JsonbValue *jbv)
{
switch (jbv->type)
{
case jbvBinary:
return JsonbContainerTypeName(jbv->val.binary.data);
case jbvObject:
return "object";
case jbvArray:
return "array";
case jbvNumeric:
return "number";
case jbvString:
return "string";
case jbvBool:
return "boolean";
case jbvNull:
return "null";
default:
elog(ERROR, "unrecognized jsonb value type: %d", jbv->type);
return "unknown";
}
}
/*
* SQL function jsonb_typeof(jsonb) -> text
*
@ -173,45 +222,7 @@ Datum
jsonb_typeof(PG_FUNCTION_ARGS)
{
Jsonb *in = PG_GETARG_JSONB_P(0);
JsonbIterator *it;
JsonbValue v;
char *result;
if (JB_ROOT_IS_OBJECT(in))
result = "object";
else if (JB_ROOT_IS_ARRAY(in) && !JB_ROOT_IS_SCALAR(in))
result = "array";
else
{
Assert(JB_ROOT_IS_SCALAR(in));
it = JsonbIteratorInit(&in->root);
/*
* A root scalar is stored as an array of one element, so we get the
* array and then its first (and only) member.
*/
(void) JsonbIteratorNext(&it, &v, true);
Assert(v.type == jbvArray);
(void) JsonbIteratorNext(&it, &v, true);
switch (v.type)
{
case jbvNull:
result = "null";
break;
case jbvString:
result = "string";
break;
case jbvNumeric:
result = "number";
break;
case jbvBool:
result = "boolean";
break;
default:
elog(ERROR, "unknown jsonb scalar type");
}
}
const char *result = JsonbContainerTypeName(&in->root);
PG_RETURN_TEXT_P(cstring_to_text(result));
}
@ -1857,7 +1868,7 @@ jsonb_object_agg_finalfn(PG_FUNCTION_ARGS)
/*
* Extract scalar value from raw-scalar pseudo-array jsonb.
*/
static bool
bool
JsonbExtractScalar(JsonbContainer *jbc, JsonbValue *res)
{
JsonbIterator *it;

View File

@ -1728,6 +1728,14 @@ convertJsonbScalar(StringInfo buffer, JEntry *jentry, JsonbValue *scalarVal)
break;
case jbvNumeric:
/* replace numeric NaN with string "NaN" */
if (numeric_is_nan(scalarVal->val.numeric))
{
appendToBuffer(buffer, "NaN", 3);
*jentry = 3;
break;
}
numlen = VARSIZE_ANY(scalarVal->val.numeric);
padlen = padBufferToInt(buffer);

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,480 @@
/*-------------------------------------------------------------------------
*
* jsonpath_gram.y
* Grammar definitions for jsonpath datatype
*
* Copyright (c) 2019, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/backend/utils/adt/jsonpath_gram.y
*
*-------------------------------------------------------------------------
*/
%{
#include "postgres.h"
#include "catalog/pg_collation.h"
#include "fmgr.h"
#include "miscadmin.h"
#include "nodes/pg_list.h"
#include "regex/regex.h"
#include "utils/builtins.h"
#include "utils/jsonpath.h"
#include "utils/jsonpath_scanner.h"
/*
* Bison doesn't allocate anything that needs to live across parser calls,
* so we can easily have it use palloc instead of malloc. This prevents
* memory leaks if we error out during parsing. Note this only works with
* bison >= 2.0. However, in bison 1.875 the default is to use alloca()
* if possible, so there's not really much problem anyhow, at least if
* you're building with gcc.
*/
#define YYMALLOC palloc
#define YYFREE pfree
static JsonPathParseItem*
makeItemType(int type)
{
JsonPathParseItem* v = palloc(sizeof(*v));
CHECK_FOR_INTERRUPTS();
v->type = type;
v->next = NULL;
return v;
}
static JsonPathParseItem*
makeItemString(string *s)
{
JsonPathParseItem *v;
if (s == NULL)
{
v = makeItemType(jpiNull);
}
else
{
v = makeItemType(jpiString);
v->value.string.val = s->val;
v->value.string.len = s->len;
}
return v;
}
static JsonPathParseItem*
makeItemVariable(string *s)
{
JsonPathParseItem *v;
v = makeItemType(jpiVariable);
v->value.string.val = s->val;
v->value.string.len = s->len;
return v;
}
static JsonPathParseItem*
makeItemKey(string *s)
{
JsonPathParseItem *v;
v = makeItemString(s);
v->type = jpiKey;
return v;
}
static JsonPathParseItem*
makeItemNumeric(string *s)
{
JsonPathParseItem *v;
v = makeItemType(jpiNumeric);
v->value.numeric =
DatumGetNumeric(DirectFunctionCall3(numeric_in,
CStringGetDatum(s->val), 0, -1));
return v;
}
static JsonPathParseItem*
makeItemBool(bool val) {
JsonPathParseItem *v = makeItemType(jpiBool);
v->value.boolean = val;
return v;
}
static JsonPathParseItem*
makeItemBinary(int type, JsonPathParseItem* la, JsonPathParseItem *ra)
{
JsonPathParseItem *v = makeItemType(type);
v->value.args.left = la;
v->value.args.right = ra;
return v;
}
static JsonPathParseItem*
makeItemUnary(int type, JsonPathParseItem* a)
{
JsonPathParseItem *v;
if (type == jpiPlus && a->type == jpiNumeric && !a->next)
return a;
if (type == jpiMinus && a->type == jpiNumeric && !a->next)
{
v = makeItemType(jpiNumeric);
v->value.numeric =
DatumGetNumeric(DirectFunctionCall1(numeric_uminus,
NumericGetDatum(a->value.numeric)));
return v;
}
v = makeItemType(type);
v->value.arg = a;
return v;
}
static JsonPathParseItem*
makeItemList(List *list)
{
JsonPathParseItem *head, *end;
ListCell *cell = list_head(list);
head = end = (JsonPathParseItem *) lfirst(cell);
if (!lnext(cell))
return head;
/* append items to the end of already existing list */
while (end->next)
end = end->next;
for_each_cell(cell, lnext(cell))
{
JsonPathParseItem *c = (JsonPathParseItem *) lfirst(cell);
end->next = c;
end = c;
}
return head;
}
static JsonPathParseItem*
makeIndexArray(List *list)
{
JsonPathParseItem *v = makeItemType(jpiIndexArray);
ListCell *cell;
int i = 0;
Assert(list_length(list) > 0);
v->value.array.nelems = list_length(list);
v->value.array.elems = palloc(sizeof(v->value.array.elems[0]) *
v->value.array.nelems);
foreach(cell, list)
{
JsonPathParseItem *jpi = lfirst(cell);
Assert(jpi->type == jpiSubscript);
v->value.array.elems[i].from = jpi->value.args.left;
v->value.array.elems[i++].to = jpi->value.args.right;
}
return v;
}
static JsonPathParseItem*
makeAny(int first, int last)
{
JsonPathParseItem *v = makeItemType(jpiAny);
v->value.anybounds.first = (first >= 0) ? first : PG_UINT32_MAX;
v->value.anybounds.last = (last >= 0) ? last : PG_UINT32_MAX;
return v;
}
static JsonPathParseItem *
makeItemLikeRegex(JsonPathParseItem *expr, string *pattern, string *flags)
{
JsonPathParseItem *v = makeItemType(jpiLikeRegex);
int i;
int cflags = REG_ADVANCED;
v->value.like_regex.expr = expr;
v->value.like_regex.pattern = pattern->val;
v->value.like_regex.patternlen = pattern->len;
v->value.like_regex.flags = 0;
for (i = 0; flags && i < flags->len; i++)
{
switch (flags->val[i])
{
case 'i':
v->value.like_regex.flags |= JSP_REGEX_ICASE;
cflags |= REG_ICASE;
break;
case 's':
v->value.like_regex.flags &= ~JSP_REGEX_MLINE;
v->value.like_regex.flags |= JSP_REGEX_SLINE;
cflags |= REG_NEWLINE;
break;
case 'm':
v->value.like_regex.flags &= ~JSP_REGEX_SLINE;
v->value.like_regex.flags |= JSP_REGEX_MLINE;
cflags &= ~REG_NEWLINE;
break;
case 'x':
v->value.like_regex.flags |= JSP_REGEX_WSPACE;
cflags |= REG_EXPANDED;
break;
default:
yyerror(NULL, "unrecognized flag of LIKE_REGEX predicate");
break;
}
}
/* check regex validity */
(void) RE_compile_and_cache(cstring_to_text_with_len(pattern->val,
pattern->len),
cflags, DEFAULT_COLLATION_OID);
return v;
}
%}
/* BISON Declarations */
%pure-parser
%expect 0
%name-prefix="jsonpath_yy"
%error-verbose
%parse-param {JsonPathParseResult **result}
%union {
string str;
List *elems; /* list of JsonPathParseItem */
List *indexs; /* list of integers */
JsonPathParseItem *value;
JsonPathParseResult *result;
JsonPathItemType optype;
bool boolean;
int integer;
}
%token <str> TO_P NULL_P TRUE_P FALSE_P IS_P UNKNOWN_P EXISTS_P
%token <str> IDENT_P STRING_P NUMERIC_P INT_P VARIABLE_P
%token <str> OR_P AND_P NOT_P
%token <str> LESS_P LESSEQUAL_P EQUAL_P NOTEQUAL_P GREATEREQUAL_P GREATER_P
%token <str> ANY_P STRICT_P LAX_P LAST_P STARTS_P WITH_P LIKE_REGEX_P FLAG_P
%token <str> ABS_P SIZE_P TYPE_P FLOOR_P DOUBLE_P CEILING_P KEYVALUE_P
%type <result> result
%type <value> scalar_value path_primary expr array_accessor
any_path accessor_op key predicate delimited_predicate
index_elem starts_with_initial expr_or_predicate
%type <elems> accessor_expr
%type <indexs> index_list
%type <optype> comp_op method
%type <boolean> mode
%type <str> key_name
%type <integer> any_level
%left OR_P
%left AND_P
%right NOT_P
%left '+' '-'
%left '*' '/' '%'
%left UMINUS
%nonassoc '(' ')'
/* Grammar follows */
%%
result:
mode expr_or_predicate {
*result = palloc(sizeof(JsonPathParseResult));
(*result)->expr = $2;
(*result)->lax = $1;
}
| /* EMPTY */ { *result = NULL; }
;
expr_or_predicate:
expr { $$ = $1; }
| predicate { $$ = $1; }
;
mode:
STRICT_P { $$ = false; }
| LAX_P { $$ = true; }
| /* EMPTY */ { $$ = true; }
;
scalar_value:
STRING_P { $$ = makeItemString(&$1); }
| NULL_P { $$ = makeItemString(NULL); }
| TRUE_P { $$ = makeItemBool(true); }
| FALSE_P { $$ = makeItemBool(false); }
| NUMERIC_P { $$ = makeItemNumeric(&$1); }
| INT_P { $$ = makeItemNumeric(&$1); }
| VARIABLE_P { $$ = makeItemVariable(&$1); }
;
comp_op:
EQUAL_P { $$ = jpiEqual; }
| NOTEQUAL_P { $$ = jpiNotEqual; }
| LESS_P { $$ = jpiLess; }
| GREATER_P { $$ = jpiGreater; }
| LESSEQUAL_P { $$ = jpiLessOrEqual; }
| GREATEREQUAL_P { $$ = jpiGreaterOrEqual; }
;
delimited_predicate:
'(' predicate ')' { $$ = $2; }
| EXISTS_P '(' expr ')' { $$ = makeItemUnary(jpiExists, $3); }
;
predicate:
delimited_predicate { $$ = $1; }
| expr comp_op expr { $$ = makeItemBinary($2, $1, $3); }
| predicate AND_P predicate { $$ = makeItemBinary(jpiAnd, $1, $3); }
| predicate OR_P predicate { $$ = makeItemBinary(jpiOr, $1, $3); }
| NOT_P delimited_predicate { $$ = makeItemUnary(jpiNot, $2); }
| '(' predicate ')' IS_P UNKNOWN_P { $$ = makeItemUnary(jpiIsUnknown, $2); }
| expr STARTS_P WITH_P starts_with_initial
{ $$ = makeItemBinary(jpiStartsWith, $1, $4); }
| expr LIKE_REGEX_P STRING_P { $$ = makeItemLikeRegex($1, &$3, NULL); }
| expr LIKE_REGEX_P STRING_P FLAG_P STRING_P
{ $$ = makeItemLikeRegex($1, &$3, &$5); }
;
starts_with_initial:
STRING_P { $$ = makeItemString(&$1); }
| VARIABLE_P { $$ = makeItemVariable(&$1); }
;
path_primary:
scalar_value { $$ = $1; }
| '$' { $$ = makeItemType(jpiRoot); }
| '@' { $$ = makeItemType(jpiCurrent); }
| LAST_P { $$ = makeItemType(jpiLast); }
;
accessor_expr:
path_primary { $$ = list_make1($1); }
| '(' expr ')' accessor_op { $$ = list_make2($2, $4); }
| '(' predicate ')' accessor_op { $$ = list_make2($2, $4); }
| accessor_expr accessor_op { $$ = lappend($1, $2); }
;
expr:
accessor_expr { $$ = makeItemList($1); }
| '(' expr ')' { $$ = $2; }
| '+' expr %prec UMINUS { $$ = makeItemUnary(jpiPlus, $2); }
| '-' expr %prec UMINUS { $$ = makeItemUnary(jpiMinus, $2); }
| expr '+' expr { $$ = makeItemBinary(jpiAdd, $1, $3); }
| expr '-' expr { $$ = makeItemBinary(jpiSub, $1, $3); }
| expr '*' expr { $$ = makeItemBinary(jpiMul, $1, $3); }
| expr '/' expr { $$ = makeItemBinary(jpiDiv, $1, $3); }
| expr '%' expr { $$ = makeItemBinary(jpiMod, $1, $3); }
;
index_elem:
expr { $$ = makeItemBinary(jpiSubscript, $1, NULL); }
| expr TO_P expr { $$ = makeItemBinary(jpiSubscript, $1, $3); }
;
index_list:
index_elem { $$ = list_make1($1); }
| index_list ',' index_elem { $$ = lappend($1, $3); }
;
array_accessor:
'[' '*' ']' { $$ = makeItemType(jpiAnyArray); }
| '[' index_list ']' { $$ = makeIndexArray($2); }
;
any_level:
INT_P { $$ = pg_atoi($1.val, 4, 0); }
| LAST_P { $$ = -1; }
;
any_path:
ANY_P { $$ = makeAny(0, -1); }
| ANY_P '{' any_level '}' { $$ = makeAny($3, $3); }
| ANY_P '{' any_level TO_P any_level '}' { $$ = makeAny($3, $5); }
;
accessor_op:
'.' key { $$ = $2; }
| '.' '*' { $$ = makeItemType(jpiAnyKey); }
| array_accessor { $$ = $1; }
| '.' any_path { $$ = $2; }
| '.' method '(' ')' { $$ = makeItemType($2); }
| '?' '(' predicate ')' { $$ = makeItemUnary(jpiFilter, $3); }
;
key:
key_name { $$ = makeItemKey(&$1); }
;
key_name:
IDENT_P
| STRING_P
| TO_P
| NULL_P
| TRUE_P
| FALSE_P
| IS_P
| UNKNOWN_P
| EXISTS_P
| STRICT_P
| LAX_P
| ABS_P
| SIZE_P
| TYPE_P
| FLOOR_P
| DOUBLE_P
| CEILING_P
| KEYVALUE_P
| LAST_P
| STARTS_P
| WITH_P
| LIKE_REGEX_P
| FLAG_P
;
method:
ABS_P { $$ = jpiAbs; }
| SIZE_P { $$ = jpiSize; }
| TYPE_P { $$ = jpiType; }
| FLOOR_P { $$ = jpiFloor; }
| DOUBLE_P { $$ = jpiDouble; }
| CEILING_P { $$ = jpiCeiling; }
| KEYVALUE_P { $$ = jpiKeyValue; }
;
%%

View File

@ -0,0 +1,638 @@
/*-------------------------------------------------------------------------
*
* jsonpath_scan.l
* Lexical parser for jsonpath datatype
*
* Copyright (c) 2019, PostgreSQL Global Development Group
*
* IDENTIFICATION
* src/backend/utils/adt/jsonpath_scan.l
*
*-------------------------------------------------------------------------
*/
%{
#include "postgres.h"
#include "mb/pg_wchar.h"
#include "nodes/pg_list.h"
#include "utils/jsonpath_scanner.h"
static string scanstring;
/* No reason to constrain amount of data slurped */
/* #define YY_READ_BUF_SIZE 16777216 */
/* Handles to the buffer that the lexer uses internally */
static YY_BUFFER_STATE scanbufhandle;
static char *scanbuf;
static int scanbuflen;
static void addstring(bool init, char *s, int l);
static void addchar(bool init, char s);
static int checkSpecialVal(void); /* examine scanstring for the special
* value */
static void parseUnicode(char *s, int l);
static void parseHexChars(char *s, int l);
/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
#undef fprintf
#define fprintf(file, fmt, msg) fprintf_to_ereport(fmt, msg)
static void
fprintf_to_ereport(const char *fmt, const char *msg)
{
ereport(ERROR, (errmsg_internal("%s", msg)));
}
#define yyerror jsonpath_yyerror
%}
%option 8bit
%option never-interactive
%option nodefault
%option noinput
%option nounput
%option noyywrap
%option warn
%option prefix="jsonpath_yy"
%option bison-bridge
%option noyyalloc
%option noyyrealloc
%option noyyfree
%x xQUOTED
%x xNONQUOTED
%x xVARQUOTED
%x xSINGLEQUOTED
%x xCOMMENT
special [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/]
any [^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f]
blank [ \t\n\r\f]
hex_dig [0-9A-Fa-f]
unicode \\u({hex_dig}{4}|\{{hex_dig}{1,6}\})
hex_char \\x{hex_dig}{2}
%%
<INITIAL>\&\& { return AND_P; }
<INITIAL>\|\| { return OR_P; }
<INITIAL>\! { return NOT_P; }
<INITIAL>\*\* { return ANY_P; }
<INITIAL>\< { return LESS_P; }
<INITIAL>\<\= { return LESSEQUAL_P; }
<INITIAL>\=\= { return EQUAL_P; }
<INITIAL>\<\> { return NOTEQUAL_P; }
<INITIAL>\!\= { return NOTEQUAL_P; }
<INITIAL>\>\= { return GREATEREQUAL_P; }
<INITIAL>\> { return GREATER_P; }
<INITIAL>\${any}+ {
addstring(true, yytext + 1, yyleng - 1);
addchar(false, '\0');
yylval->str = scanstring;
return VARIABLE_P;
}
<INITIAL>\$\" {
addchar(true, '\0');
BEGIN xVARQUOTED;
}
<INITIAL>{special} { return *yytext; }
<INITIAL>{blank}+ { /* ignore */ }
<INITIAL>\/\* {
addchar(true, '\0');
BEGIN xCOMMENT;
}
<INITIAL>[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+ /* float */ {
addstring(true, yytext, yyleng);
addchar(false, '\0');
yylval->str = scanstring;
return NUMERIC_P;
}
<INITIAL>\.[0-9]+[eE][+-]?[0-9]+ /* float */ {
addstring(true, yytext, yyleng);
addchar(false, '\0');
yylval->str = scanstring;
return NUMERIC_P;
}
<INITIAL>([0-9]+)?\.[0-9]+ {
addstring(true, yytext, yyleng);
addchar(false, '\0');
yylval->str = scanstring;
return NUMERIC_P;
}
<INITIAL>[0-9]+ {
addstring(true, yytext, yyleng);
addchar(false, '\0');
yylval->str = scanstring;
return INT_P;
}
<INITIAL>{any}+ {
addstring(true, yytext, yyleng);
BEGIN xNONQUOTED;
}
<INITIAL>\" {
addchar(true, '\0');
BEGIN xQUOTED;
}
<INITIAL>\' {
addchar(true, '\0');
BEGIN xSINGLEQUOTED;
}
<INITIAL>\\ {
yyless(0);
addchar(true, '\0');
BEGIN xNONQUOTED;
}
<xNONQUOTED>{any}+ {
addstring(false, yytext, yyleng);
}
<xNONQUOTED>{blank}+ {
yylval->str = scanstring;
BEGIN INITIAL;
return checkSpecialVal();
}
<xNONQUOTED>\/\* {
yylval->str = scanstring;
BEGIN xCOMMENT;
}
<xNONQUOTED>({special}|\"|\') {
yylval->str = scanstring;
yyless(0);
BEGIN INITIAL;
return checkSpecialVal();
}
<xNONQUOTED><<EOF>> {
yylval->str = scanstring;
BEGIN INITIAL;
return checkSpecialVal();
}
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\[\"\'\\] { addchar(false, yytext[1]); }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\b { addchar(false, '\b'); }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\f { addchar(false, '\f'); }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\n { addchar(false, '\n'); }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\r { addchar(false, '\r'); }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\t { addchar(false, '\t'); }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\v { addchar(false, '\v'); }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>{unicode}+ { parseUnicode(yytext, yyleng); }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>{hex_char}+ { parseHexChars(yytext, yyleng); }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\x { yyerror(NULL, "Hex character sequence is invalid"); }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\u { yyerror(NULL, "Unicode sequence is invalid"); }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\. { yyerror(NULL, "Escape sequence is invalid"); }
<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\ { yyerror(NULL, "Unexpected end after backslash"); }
<xQUOTED,xVARQUOTED,xSINGLEQUOTED><<EOF>> { yyerror(NULL, "Unexpected end of quoted string"); }
<xQUOTED>\" {
yylval->str = scanstring;
BEGIN INITIAL;
return STRING_P;
}
<xVARQUOTED>\" {
yylval->str = scanstring;
BEGIN INITIAL;
return VARIABLE_P;
}
<xSINGLEQUOTED>\' {
yylval->str = scanstring;
BEGIN INITIAL;
return STRING_P;
}
<xQUOTED,xVARQUOTED>[^\\\"]+ { addstring(false, yytext, yyleng); }
<xSINGLEQUOTED>[^\\\']+ { addstring(false, yytext, yyleng); }
<INITIAL><<EOF>> { yyterminate(); }
<xCOMMENT>\*\/ { BEGIN INITIAL; }
<xCOMMENT>[^\*]+ { }
<xCOMMENT>\* { }
<xCOMMENT><<EOF>> { yyerror(NULL, "Unexpected end of comment"); }
%%
void
jsonpath_yyerror(JsonPathParseResult **result, const char *message)
{
if (*yytext == YY_END_OF_BUFFER_CHAR)
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("bad jsonpath representation"),
/* translator: %s is typically "syntax error" */
errdetail("%s at end of input", message)));
}
else
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("bad jsonpath representation"),
/* translator: first %s is typically "syntax error" */
errdetail("%s at or near \"%s\"", message, yytext)));
}
}
typedef struct keyword
{
int16 len;
bool lowercase;
int val;
char *keyword;
} keyword;
/*
* Array of key words should be sorted by length and then
* alphabetical order
*/
static keyword keywords[] = {
{ 2, false, IS_P, "is"},
{ 2, false, TO_P, "to"},
{ 3, false, ABS_P, "abs"},
{ 3, false, LAX_P, "lax"},
{ 4, false, FLAG_P, "flag"},
{ 4, false, LAST_P, "last"},
{ 4, true, NULL_P, "null"},
{ 4, false, SIZE_P, "size"},
{ 4, true, TRUE_P, "true"},
{ 4, false, TYPE_P, "type"},
{ 4, false, WITH_P, "with"},
{ 5, true, FALSE_P, "false"},
{ 5, false, FLOOR_P, "floor"},
{ 6, false, DOUBLE_P, "double"},
{ 6, false, EXISTS_P, "exists"},
{ 6, false, STARTS_P, "starts"},
{ 6, false, STRICT_P, "strict"},
{ 7, false, CEILING_P, "ceiling"},
{ 7, false, UNKNOWN_P, "unknown"},
{ 8, false, KEYVALUE_P, "keyvalue"},
{ 10,false, LIKE_REGEX_P, "like_regex"},
};
static int
checkSpecialVal()
{
int res = IDENT_P;
int diff;
keyword *StopLow = keywords,
*StopHigh = keywords + lengthof(keywords),
*StopMiddle;
if (scanstring.len > keywords[lengthof(keywords) - 1].len)
return res;
while(StopLow < StopHigh)
{
StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
if (StopMiddle->len == scanstring.len)
diff = pg_strncasecmp(StopMiddle->keyword, scanstring.val,
scanstring.len);
else
diff = StopMiddle->len - scanstring.len;
if (diff < 0)
StopLow = StopMiddle + 1;
else if (diff > 0)
StopHigh = StopMiddle;
else
{
if (StopMiddle->lowercase)
diff = strncmp(StopMiddle->keyword, scanstring.val,
scanstring.len);
if (diff == 0)
res = StopMiddle->val;
break;
}
}
return res;
}
/*
* Called before any actual parsing is done
*/
static void
jsonpath_scanner_init(const char *str, int slen)
{
if (slen <= 0)
slen = strlen(str);
/*
* Might be left over after ereport()
*/
yy_init_globals();
/*
* Make a scan buffer with special termination needed by flex.
*/
scanbuflen = slen;
scanbuf = palloc(slen + 2);
memcpy(scanbuf, str, slen);
scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
BEGIN(INITIAL);
}
/*
* Called after parsing is done to clean up after jsonpath_scanner_init()
*/
static void
jsonpath_scanner_finish(void)
{
yy_delete_buffer(scanbufhandle);
pfree(scanbuf);
}
static void
addstring(bool init, char *s, int l)
{
if (init)
{
scanstring.total = 32;
scanstring.val = palloc(scanstring.total);
scanstring.len = 0;
}
if (s && l)
{
while(scanstring.len + l + 1 >= scanstring.total)
{
scanstring.total *= 2;
scanstring.val = repalloc(scanstring.val, scanstring.total);
}
memcpy(scanstring.val + scanstring.len, s, l);
scanstring.len += l;
}
}
static void
addchar(bool init, char s)
{
if (init)
{
scanstring.total = 32;
scanstring.val = palloc(scanstring.total);
scanstring.len = 0;
}
else if(scanstring.len + 1 >= scanstring.total)
{
scanstring.total *= 2;
scanstring.val = repalloc(scanstring.val, scanstring.total);
}
scanstring.val[ scanstring.len ] = s;
if (s != '\0')
scanstring.len++;
}
JsonPathParseResult *
parsejsonpath(const char *str, int len)
{
JsonPathParseResult *parseresult;
jsonpath_scanner_init(str, len);
if (jsonpath_yyparse((void*)&parseresult) != 0)
jsonpath_yyerror(NULL, "bugus input");
jsonpath_scanner_finish();
return parseresult;
}
static int
hexval(char c)
{
if (c >= '0' && c <= '9')
return c - '0';
if (c >= 'a' && c <= 'f')
return c - 'a' + 0xA;
if (c >= 'A' && c <= 'F')
return c - 'A' + 0xA;
elog(ERROR, "invalid hexadecimal digit");
return 0; /* not reached */
}
static void
addUnicodeChar(int ch)
{
/*
* For UTF8, replace the escape sequence by the actual
* utf8 character in lex->strval. Do this also for other
* encodings if the escape designates an ASCII character,
* otherwise raise an error.
*/
if (ch == 0)
{
/* We can't allow this, since our TEXT type doesn't */
ereport(ERROR,
(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
errmsg("unsupported Unicode escape sequence"),
errdetail("\\u0000 cannot be converted to text.")));
}
else if (GetDatabaseEncoding() == PG_UTF8)
{
char utf8str[5];
int utf8len;
unicode_to_utf8(ch, (unsigned char *) utf8str);
utf8len = pg_utf_mblen((unsigned char *) utf8str);
addstring(false, utf8str, utf8len);
}
else if (ch <= 0x007f)
{
/*
* This is the only way to designate things like a
* form feed character in JSON, so it's useful in all
* encodings.
*/
addchar(false, (char) ch);
}
else
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type jsonpath"),
errdetail("Unicode escape values cannot be used for code "
"point values above 007F when the server encoding "
"is not UTF8.")));
}
}
static void
addUnicode(int ch, int *hi_surrogate)
{
if (ch >= 0xd800 && ch <= 0xdbff)
{
if (*hi_surrogate != -1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type jsonpath"),
errdetail("Unicode high surrogate must not follow "
"a high surrogate.")));
*hi_surrogate = (ch & 0x3ff) << 10;
return;
}
else if (ch >= 0xdc00 && ch <= 0xdfff)
{
if (*hi_surrogate == -1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type jsonpath"),
errdetail("Unicode low surrogate must follow a high "
"surrogate.")));
ch = 0x10000 + *hi_surrogate + (ch & 0x3ff);
*hi_surrogate = -1;
}
else if (*hi_surrogate != -1)
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type jsonpath"),
errdetail("Unicode low surrogate must follow a high "
"surrogate.")));
}
addUnicodeChar(ch);
}
/*
* parseUnicode was adopted from json_lex_string() in
* src/backend/utils/adt/json.c
*/
static void
parseUnicode(char *s, int l)
{
int i;
int hi_surrogate = -1;
for (i = 2; i < l; i += 2) /* skip '\u' */
{
int ch = 0;
int j;
if (s[i] == '{') /* parse '\u{XX...}' */
{
while (s[++i] != '}' && i < l)
ch = (ch << 4) | hexval(s[i]);
i++; /* ski p '}' */
}
else /* parse '\uXXXX' */
{
for (j = 0; j < 4 && i < l; j++)
ch = (ch << 4) | hexval(s[i++]);
}
addUnicode(ch, &hi_surrogate);
}
if (hi_surrogate != -1)
{
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type jsonpath"),
errdetail("Unicode low surrogate must follow a high "
"surrogate.")));
}
}
static void
parseHexChars(char *s, int l)
{
int i;
Assert(l % 4 /* \xXX */ == 0);
for (i = 0; i < l / 4; i++)
{
int ch = (hexval(s[i * 4 + 2]) << 4) | hexval(s[i * 4 + 3]);
addUnicodeChar(ch);
}
}
/*
* Interface functions to make flex use palloc() instead of malloc().
* It'd be better to make these static, but flex insists otherwise.
*/
void *
jsonpath_yyalloc(yy_size_t bytes)
{
return palloc(bytes);
}
void *
jsonpath_yyrealloc(void *ptr, yy_size_t bytes)
{
if (ptr)
return repalloc(ptr, bytes);
else
return palloc(bytes);
}
void
jsonpath_yyfree(void *ptr)
{
if (ptr)
pfree(ptr);
}

View File

@ -133,7 +133,7 @@ static Datum build_regexp_split_result(regexp_matches_ctx *splitctx);
* Pattern is given in the database encoding. We internally convert to
* an array of pg_wchar, which is what Spencer's regex package wants.
*/
static regex_t *
regex_t *
RE_compile_and_cache(text *text_re, int cflags, Oid collation)
{
int text_re_len = VARSIZE_ANY_EXHDR(text_re);
@ -339,7 +339,7 @@ RE_execute(regex_t *re, char *dat, int dat_len,
* Both pattern and data are given in the database encoding. We internally
* convert to array of pg_wchar which is what Spencer's regex package wants.
*/
static bool
bool
RE_compile_and_execute(text *text_re, char *dat, int dat_len,
int cflags, Oid collation,
int nmatch, regmatch_t *pmatch)

View File

@ -206,6 +206,21 @@ Section: Class 22 - Data Exception
2200N E ERRCODE_INVALID_XML_CONTENT invalid_xml_content
2200S E ERRCODE_INVALID_XML_COMMENT invalid_xml_comment
2200T E ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION invalid_xml_processing_instruction
22030 E ERRCODE_DUPLICATE_JSON_OBJECT_KEY_VALUE duplicate_json_object_key_value
22032 E ERRCODE_INVALID_JSON_TEXT invalid_json_text
22033 E ERRCODE_INVALID_JSON_SUBSCRIPT invalid_json_subscript
22034 E ERRCODE_MORE_THAN_ONE_JSON_ITEM more_than_one_json_item
22035 E ERRCODE_NO_JSON_ITEM no_json_item
22036 E ERRCODE_NON_NUMERIC_JSON_ITEM non_numeric_json_item
22037 E ERRCODE_NON_UNIQUE_KEYS_IN_JSON_OBJECT non_unique_keys_in_json_object
22038 E ERRCODE_SINGLETON_JSON_ITEM_REQUIRED singleton_json_item_required
22039 E ERRCODE_JSON_ARRAY_NOT_FOUND json_array_not_found
2203A E ERRCODE_JSON_MEMBER_NOT_FOUND json_member_not_found
2203B E ERRCODE_JSON_NUMBER_NOT_FOUND json_number_not_found
2203C E ERRCODE_JSON_OBJECT_NOT_FOUND object_not_found
2203F E ERRCODE_JSON_SCALAR_REQUIRED json_scalar_required
2203D E ERRCODE_TOO_MANY_JSON_ARRAY_ELEMENTS too_many_json_array_elements
2203E E ERRCODE_TOO_MANY_JSON_OBJECT_MEMBERS too_many_json_object_members
Section: Class 23 - Integrity Constraint Violation