Partial implementation of SQL/JSON path language

SQL 2016 standards among other things contains set of SQL/JSON features for JSON processing inside of relational database. The core of SQL/JSON is JSON path language, allowing access parts of JSON documents and make computations over them. This commit implements partial support JSON path language as separate datatype called "jsonpath". The implementation is partial because it's lacking datetime support and suppression of numeric errors. Missing features will be added later by separate commits. Support of SQL/JSON features requires implementation of separate nodes, and it will be considered in subsequent patches. This commit includes following set of plain functions, allowing to execute jsonpath over jsonb values: * jsonb_path_exists(jsonb, jsonpath[, jsonb, bool]), * jsonb_path_match(jsonb, jsonpath[, jsonb, bool]), * jsonb_path_query(jsonb, jsonpath[, jsonb, bool]), * jsonb_path_query_array(jsonb, jsonpath[, jsonb, bool]). * jsonb_path_query_first(jsonb, jsonpath[, jsonb, bool]). This commit also implements "jsonb @? jsonpath" and "jsonb @@ jsonpath", which are wrappers over jsonpath_exists(jsonb, jsonpath) and jsonpath_predicate(jsonb, jsonpath) correspondingly. These operators will have an index support (implemented in subsequent patches). Catversion bumped, to add new functions and operators. Code was written by Nikita Glukhov and Teodor Sigaev, revised by me. Documentation was written by Oleg Bartunov and Liudmila Mantrova. The work was inspired by Oleg Bartunov. Discussion: https://postgr.es/m/fcc6fc6a-b497-f39a-923d-aa34d0c588e8%402ndQuadrant.com Author: Nikita Glukhov, Teodor Sigaev, Alexander Korotkov, Oleg Bartunov, Liudmila Mantrova Reviewed-by: Tomas Vondra, Andrew Dunstan, Pavel Stehule, Alexander Korotkov
2025-07-07 00:36:50 +03:00 · 2019-03-16 12:15:37 +03:00
parent 893d6f8a1f
commit 72b6460336
33 changed files with 9079 additions and 55 deletions
--- a/src/backend/Makefile
+++ b/src/backend/Makefile
@ -136,6 +136,9 @@ parser/gram.h: parser/gram.y
 storage/lmgr/lwlocknames.h: storage/lmgr/generate-lwlocknames.pl storage/lmgr/lwlocknames.txt
 	$(MAKE) -C storage/lmgr lwlocknames.h lwlocknames.c

+utils/adt/jsonpath_gram.h: utils/adt/jsonpath_gram.y
+	$(MAKE) -C utils/adt jsonpath_gram.h
+
 # run this unconditionally to avoid needing to know its dependencies here:
 submake-catalog-headers:
 	$(MAKE) -C catalog distprep generated-header-symlinks
@ -159,7 +162,7 @@ submake-utils-headers:

 .PHONY: generated-headers

-generated-headers: $(top_builddir)/src/include/parser/gram.h $(top_builddir)/src/include/storage/lwlocknames.h submake-catalog-headers submake-utils-headers
+generated-headers: $(top_builddir)/src/include/parser/gram.h $(top_builddir)/src/include/storage/lwlocknames.h $(top_builddir)/src/include/utils/jsonpath_gram.h submake-catalog-headers submake-utils-headers

 $(top_builddir)/src/include/parser/gram.h: parser/gram.h
 	prereqdir=`cd '$(dir $<)' >/dev/null && pwd` && \
@ -171,6 +174,10 @@ $(top_builddir)/src/include/storage/lwlocknames.h: storage/lmgr/lwlocknames.h
 	  cd '$(dir $@)' && rm -f $(notdir $@) && \
 	  $(LN_S) "$$prereqdir/$(notdir $<)" .

+$(top_builddir)/src/include/utils/jsonpath_gram.h: utils/adt/jsonpath_gram.h
+	prereqdir=`cd '$(dir $<)' >/dev/null && pwd` && \
+	  cd '$(dir $@)' && rm -f $(notdir $@) && \
+	  $(LN_S) "$$prereqdir/$(notdir $<)" .

 utils/probes.o: utils/probes.d $(SUBDIROBJS)
 	$(DTRACE) $(DTRACEFLAGS) -C -G -s $(call expand_subsys,$^) -o $@
@ -186,6 +193,7 @@ distprep:
 	$(MAKE) -C replication	repl_gram.c repl_scanner.c syncrep_gram.c syncrep_scanner.c
 	$(MAKE) -C storage/lmgr	lwlocknames.h lwlocknames.c
 	$(MAKE) -C utils	distprep
+	$(MAKE) -C utils/adt	jsonpath_gram.c jsonpath_gram.h jsonpath_scan.c
 	$(MAKE) -C utils/misc	guc-file.c
 	$(MAKE) -C utils/sort	qsort_tuple.c

@ -308,6 +316,7 @@ maintainer-clean: distclean
 	      storage/lmgr/lwlocknames.c \
 	      storage/lmgr/lwlocknames.h \
 	      utils/misc/guc-file.c \
+	      utils/adt/jsonpath_gram.h \
 	      utils/sort/qsort_tuple.c


--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@ -1128,6 +1128,46 @@ LANGUAGE INTERNAL
 STRICT IMMUTABLE PARALLEL SAFE
 AS 'jsonb_insert';

+CREATE OR REPLACE FUNCTION
+  jsonb_path_exists(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
+                    silent boolean DEFAULT false)
+RETURNS boolean
+LANGUAGE INTERNAL
+STRICT IMMUTABLE PARALLEL SAFE
+AS 'jsonb_path_exists';
+
+CREATE OR REPLACE FUNCTION
+  jsonb_path_match(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
+                   silent boolean DEFAULT false)
+RETURNS boolean
+LANGUAGE INTERNAL
+STRICT IMMUTABLE PARALLEL SAFE
+AS 'jsonb_path_match';
+
+CREATE OR REPLACE FUNCTION
+  jsonb_path_query(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
+                   silent boolean DEFAULT false)
+RETURNS SETOF jsonb
+LANGUAGE INTERNAL
+STRICT IMMUTABLE PARALLEL SAFE
+AS 'jsonb_path_query';
+
+CREATE OR REPLACE FUNCTION
+  jsonb_path_query_array(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
+                         silent boolean DEFAULT false)
+RETURNS jsonb
+LANGUAGE INTERNAL
+STRICT IMMUTABLE PARALLEL SAFE
+AS 'jsonb_path_query_array';
+
+CREATE OR REPLACE FUNCTION
+  jsonb_path_query_first(target jsonb, path jsonpath, vars jsonb DEFAULT '{}',
+                         silent boolean DEFAULT false)
+RETURNS jsonb
+LANGUAGE INTERNAL
+STRICT IMMUTABLE PARALLEL SAFE
+AS 'jsonb_path_query_first';
+
 --
 -- The default permissions for functions mean that anyone can execute them.
 -- A number of functions shouldn't be executable by just anyone, but rather
--- a/src/backend/utils/adt/.gitignore
+++ b/src/backend/utils/adt/.gitignore
@ -0,0 +1,3 @@
+/jsonpath_gram.h
+/jsonpath_gram.c
+/jsonpath_scan.c
--- a/src/backend/utils/adt/Makefile
+++ b/src/backend/utils/adt/Makefile
@ -17,8 +17,8 @@ OBJS = acl.o amutils.o arrayfuncs.o array_expanded.o array_selfuncs.o \
 	float.o format_type.o formatting.o genfile.o \
 	geo_ops.o geo_selfuncs.o geo_spgist.o inet_cidr_ntop.o inet_net_pton.o \
 	int.o int8.o json.o jsonb.o jsonb_gin.o jsonb_op.o jsonb_util.o \
-	jsonfuncs.o like.o like_support.o lockfuncs.o \
-	mac.o mac8.o misc.o name.o \
+	jsonfuncs.o jsonpath_gram.o jsonpath_scan.o jsonpath.o jsonpath_exec.o \
+	like.o like_support.o lockfuncs.o mac.o mac8.o misc.o name.o \
 	network.o network_gist.o network_selfuncs.o network_spgist.o \
 	numeric.o numutils.o oid.o oracle_compat.o \
 	orderedsetaggs.o partitionfuncs.o pg_locale.o pg_lsn.o \
@ -33,6 +33,21 @@ OBJS = acl.o amutils.o arrayfuncs.o array_expanded.o array_selfuncs.o \
 	txid.o uuid.o varbit.o varchar.o varlena.o version.o \
 	windowfuncs.o xid.o xml.o

+jsonpath_gram.c: BISONFLAGS += -d
+
+jsonpath_scan.c: FLEXFLAGS = -CF -p -p
+
+jsonpath_gram.h: jsonpath_gram.c ;
+
+# Force these dependencies to be known even without dependency info built:
+jsonpath_gram.o jsonpath_scan.o jsonpath_parser.o: jsonpath_gram.h
+
+# jsonpath_gram.c, jsonpath_gram.h, and jsonpath_scan.c are in the
+# distribution tarball, so they are not cleaned here.
+clean distclean maintainer-clean:
+	rm -f lex.backup
+
+
 like.o: like.c like_match.c

 varlena.o: varlena.c levenshtein.c
--- a/src/backend/utils/adt/jsonb.c
+++ b/src/backend/utils/adt/jsonb.c
@ -163,6 +163,55 @@ jsonb_send(PG_FUNCTION_ARGS)
 	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
 }

+/*
+ * Get the type name of a jsonb container.
+ */
+static const char *
+JsonbContainerTypeName(JsonbContainer *jbc)
+{
+	JsonbValue	scalar;
+
+	if (JsonbExtractScalar(jbc, &scalar))
+		return JsonbTypeName(&scalar);
+	else if (JsonContainerIsArray(jbc))
+		return "array";
+	else if (JsonContainerIsObject(jbc))
+		return "object";
+	else
+	{
+		elog(ERROR, "invalid jsonb container type: 0x%08x", jbc->header);
+		return "unknown";
+	}
+}
+
+/*
+ * Get the type name of a jsonb value.
+ */
+const char *
+JsonbTypeName(JsonbValue *jbv)
+{
+	switch (jbv->type)
+	{
+		case jbvBinary:
+			return JsonbContainerTypeName(jbv->val.binary.data);
+		case jbvObject:
+			return "object";
+		case jbvArray:
+			return "array";
+		case jbvNumeric:
+			return "number";
+		case jbvString:
+			return "string";
+		case jbvBool:
+			return "boolean";
+		case jbvNull:
+			return "null";
+		default:
+			elog(ERROR, "unrecognized jsonb value type: %d", jbv->type);
+			return "unknown";
+	}
+}
+
 /*
 * SQL function jsonb_typeof(jsonb) -> text
 *
@ -173,45 +222,7 @@ Datum
 jsonb_typeof(PG_FUNCTION_ARGS)
 {
 	Jsonb	   *in = PG_GETARG_JSONB_P(0);
-	JsonbIterator *it;
-	JsonbValue	v;
-	char	   *result;
-
-	if (JB_ROOT_IS_OBJECT(in))
-		result = "object";
-	else if (JB_ROOT_IS_ARRAY(in) && !JB_ROOT_IS_SCALAR(in))
-		result = "array";
-	else
-	{
-		Assert(JB_ROOT_IS_SCALAR(in));
-
-		it = JsonbIteratorInit(&in->root);
-
-		/*
-		 * A root scalar is stored as an array of one element, so we get the
-		 * array and then its first (and only) member.
-		 */
-		(void) JsonbIteratorNext(&it, &v, true);
-		Assert(v.type == jbvArray);
-		(void) JsonbIteratorNext(&it, &v, true);
-		switch (v.type)
-		{
-			case jbvNull:
-				result = "null";
-				break;
-			case jbvString:
-				result = "string";
-				break;
-			case jbvNumeric:
-				result = "number";
-				break;
-			case jbvBool:
-				result = "boolean";
-				break;
-			default:
-				elog(ERROR, "unknown jsonb scalar type");
-		}
-	}
+	const char *result = JsonbContainerTypeName(&in->root);

 	PG_RETURN_TEXT_P(cstring_to_text(result));
 }
@ -1857,7 +1868,7 @@ jsonb_object_agg_finalfn(PG_FUNCTION_ARGS)
 /*
 * Extract scalar value from raw-scalar pseudo-array jsonb.
 */
-static bool
+bool
 JsonbExtractScalar(JsonbContainer *jbc, JsonbValue *res)
 {
 	JsonbIterator *it;
--- a/src/backend/utils/adt/jsonb_util.c
+++ b/src/backend/utils/adt/jsonb_util.c
@ -1728,6 +1728,14 @@ convertJsonbScalar(StringInfo buffer, JEntry *jentry, JsonbValue *scalarVal)
 			break;

 		case jbvNumeric:
+			/* replace numeric NaN with string "NaN" */
+			if (numeric_is_nan(scalarVal->val.numeric))
+			{
+				appendToBuffer(buffer, "NaN", 3);
+				*jentry = 3;
+				break;
+			}
+
 			numlen = VARSIZE_ANY(scalarVal->val.numeric);
 			padlen = padBufferToInt(buffer);

--- a/src/backend/utils/adt/jsonpath.c
+++ b/src/backend/utils/adt/jsonpath.c
--- a/src/backend/utils/adt/jsonpath_exec.c
+++ b/src/backend/utils/adt/jsonpath_exec.c
--- a/src/backend/utils/adt/jsonpath_gram.y
+++ b/src/backend/utils/adt/jsonpath_gram.y
@ -0,0 +1,480 @@
+/*-------------------------------------------------------------------------
+ *
+ * jsonpath_gram.y
+ *	 Grammar definitions for jsonpath datatype
+ *
+ * Copyright (c) 2019, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	src/backend/utils/adt/jsonpath_gram.y
+ *
+ *-------------------------------------------------------------------------
+ */
+
+%{
+#include "postgres.h"
+
+#include "catalog/pg_collation.h"
+#include "fmgr.h"
+#include "miscadmin.h"
+#include "nodes/pg_list.h"
+#include "regex/regex.h"
+#include "utils/builtins.h"
+#include "utils/jsonpath.h"
+#include "utils/jsonpath_scanner.h"
+
+/*
+ * Bison doesn't allocate anything that needs to live across parser calls,
+ * so we can easily have it use palloc instead of malloc.  This prevents
+ * memory leaks if we error out during parsing.  Note this only works with
+ * bison >= 2.0.  However, in bison 1.875 the default is to use alloca()
+ * if possible, so there's not really much problem anyhow, at least if
+ * you're building with gcc.
+ */
+#define YYMALLOC palloc
+#define YYFREE   pfree
+
+static JsonPathParseItem*
+makeItemType(int type)
+{
+	JsonPathParseItem* v = palloc(sizeof(*v));
+
+	CHECK_FOR_INTERRUPTS();
+
+	v->type = type;
+	v->next = NULL;
+
+	return v;
+}
+
+static JsonPathParseItem*
+makeItemString(string *s)
+{
+	JsonPathParseItem *v;
+
+	if (s == NULL)
+	{
+		v = makeItemType(jpiNull);
+	}
+	else
+	{
+		v = makeItemType(jpiString);
+		v->value.string.val = s->val;
+		v->value.string.len = s->len;
+	}
+
+	return v;
+}
+
+static JsonPathParseItem*
+makeItemVariable(string *s)
+{
+	JsonPathParseItem *v;
+
+	v = makeItemType(jpiVariable);
+	v->value.string.val = s->val;
+	v->value.string.len = s->len;
+
+	return v;
+}
+
+static JsonPathParseItem*
+makeItemKey(string *s)
+{
+	JsonPathParseItem *v;
+
+	v = makeItemString(s);
+	v->type = jpiKey;
+
+	return v;
+}
+
+static JsonPathParseItem*
+makeItemNumeric(string *s)
+{
+	JsonPathParseItem		*v;
+
+	v = makeItemType(jpiNumeric);
+	v->value.numeric =
+		DatumGetNumeric(DirectFunctionCall3(numeric_in,
+											CStringGetDatum(s->val), 0, -1));
+
+	return v;
+}
+
+static JsonPathParseItem*
+makeItemBool(bool val) {
+	JsonPathParseItem *v = makeItemType(jpiBool);
+
+	v->value.boolean = val;
+
+	return v;
+}
+
+static JsonPathParseItem*
+makeItemBinary(int type, JsonPathParseItem* la, JsonPathParseItem *ra)
+{
+	JsonPathParseItem  *v = makeItemType(type);
+
+	v->value.args.left = la;
+	v->value.args.right = ra;
+
+	return v;
+}
+
+static JsonPathParseItem*
+makeItemUnary(int type, JsonPathParseItem* a)
+{
+	JsonPathParseItem  *v;
+
+	if (type == jpiPlus && a->type == jpiNumeric && !a->next)
+		return a;
+
+	if (type == jpiMinus && a->type == jpiNumeric && !a->next)
+	{
+		v = makeItemType(jpiNumeric);
+		v->value.numeric =
+			DatumGetNumeric(DirectFunctionCall1(numeric_uminus,
+												NumericGetDatum(a->value.numeric)));
+		return v;
+	}
+
+	v = makeItemType(type);
+
+	v->value.arg = a;
+
+	return v;
+}
+
+static JsonPathParseItem*
+makeItemList(List *list)
+{
+	JsonPathParseItem *head, *end;
+	ListCell   *cell = list_head(list);
+
+	head = end = (JsonPathParseItem *) lfirst(cell);
+
+	if (!lnext(cell))
+		return head;
+
+	/* append items to the end of already existing list */
+	while (end->next)
+		end = end->next;
+
+	for_each_cell(cell, lnext(cell))
+	{
+		JsonPathParseItem *c = (JsonPathParseItem *) lfirst(cell);
+
+		end->next = c;
+		end = c;
+	}
+
+	return head;
+}
+
+static JsonPathParseItem*
+makeIndexArray(List *list)
+{
+	JsonPathParseItem	*v = makeItemType(jpiIndexArray);
+	ListCell			*cell;
+	int					i = 0;
+
+	Assert(list_length(list) > 0);
+	v->value.array.nelems = list_length(list);
+
+	v->value.array.elems = palloc(sizeof(v->value.array.elems[0]) *
+								  v->value.array.nelems);
+
+	foreach(cell, list)
+	{
+		JsonPathParseItem *jpi = lfirst(cell);
+
+		Assert(jpi->type == jpiSubscript);
+
+		v->value.array.elems[i].from = jpi->value.args.left;
+		v->value.array.elems[i++].to = jpi->value.args.right;
+	}
+
+	return v;
+}
+
+static JsonPathParseItem*
+makeAny(int first, int last)
+{
+	JsonPathParseItem *v = makeItemType(jpiAny);
+
+	v->value.anybounds.first = (first >= 0) ? first : PG_UINT32_MAX;
+	v->value.anybounds.last = (last >= 0) ? last : PG_UINT32_MAX;
+
+	return v;
+}
+
+static JsonPathParseItem *
+makeItemLikeRegex(JsonPathParseItem *expr, string *pattern, string *flags)
+{
+	JsonPathParseItem *v = makeItemType(jpiLikeRegex);
+	int			i;
+	int			cflags = REG_ADVANCED;
+
+	v->value.like_regex.expr = expr;
+	v->value.like_regex.pattern = pattern->val;
+	v->value.like_regex.patternlen = pattern->len;
+	v->value.like_regex.flags = 0;
+
+	for (i = 0; flags && i < flags->len; i++)
+	{
+		switch (flags->val[i])
+		{
+			case 'i':
+				v->value.like_regex.flags |= JSP_REGEX_ICASE;
+				cflags |= REG_ICASE;
+				break;
+			case 's':
+				v->value.like_regex.flags &= ~JSP_REGEX_MLINE;
+				v->value.like_regex.flags |= JSP_REGEX_SLINE;
+				cflags |= REG_NEWLINE;
+				break;
+			case 'm':
+				v->value.like_regex.flags &= ~JSP_REGEX_SLINE;
+				v->value.like_regex.flags |= JSP_REGEX_MLINE;
+				cflags &= ~REG_NEWLINE;
+				break;
+			case 'x':
+				v->value.like_regex.flags |= JSP_REGEX_WSPACE;
+				cflags |= REG_EXPANDED;
+				break;
+			default:
+				yyerror(NULL, "unrecognized flag of LIKE_REGEX predicate");
+				break;
+		}
+	}
+
+	/* check regex validity */
+	(void) RE_compile_and_cache(cstring_to_text_with_len(pattern->val,
+														 pattern->len),
+								cflags, DEFAULT_COLLATION_OID);
+
+	return v;
+}
+
+%}
+
+/* BISON Declarations */
+%pure-parser
+%expect 0
+%name-prefix="jsonpath_yy"
+%error-verbose
+%parse-param {JsonPathParseResult **result}
+
+%union {
+	string				str;
+	List				*elems;		/* list of JsonPathParseItem */
+	List				*indexs;	/* list of integers */
+	JsonPathParseItem	*value;
+	JsonPathParseResult *result;
+	JsonPathItemType	optype;
+	bool				boolean;
+	int					integer;
+}
+
+%token	<str>		TO_P NULL_P TRUE_P FALSE_P IS_P UNKNOWN_P EXISTS_P
+%token	<str>		IDENT_P STRING_P NUMERIC_P INT_P VARIABLE_P
+%token	<str>		OR_P AND_P NOT_P
+%token	<str>		LESS_P LESSEQUAL_P EQUAL_P NOTEQUAL_P GREATEREQUAL_P GREATER_P
+%token	<str>		ANY_P STRICT_P LAX_P LAST_P STARTS_P WITH_P LIKE_REGEX_P FLAG_P
+%token	<str>		ABS_P SIZE_P TYPE_P FLOOR_P DOUBLE_P CEILING_P KEYVALUE_P
+
+%type	<result>	result
+
+%type	<value>		scalar_value path_primary expr array_accessor
+					any_path accessor_op key predicate delimited_predicate
+					index_elem starts_with_initial expr_or_predicate
+
+%type	<elems>		accessor_expr
+
+%type	<indexs>	index_list
+
+%type	<optype>	comp_op method
+
+%type	<boolean>	mode
+
+%type	<str>		key_name
+
+%type	<integer>	any_level
+
+%left	OR_P
+%left	AND_P
+%right	NOT_P
+%left	'+' '-'
+%left	'*' '/' '%'
+%left	UMINUS
+%nonassoc '(' ')'
+
+/* Grammar follows */
+%%
+
+result:
+	mode expr_or_predicate			{
+										*result = palloc(sizeof(JsonPathParseResult));
+										(*result)->expr = $2;
+										(*result)->lax = $1;
+									}
+	| /* EMPTY */					{ *result = NULL; }
+	;
+
+expr_or_predicate:
+	expr							{ $$ = $1; }
+	| predicate						{ $$ = $1; }
+	;
+
+mode:
+	STRICT_P						{ $$ = false; }
+	| LAX_P							{ $$ = true; }
+	| /* EMPTY */					{ $$ = true; }
+	;
+
+scalar_value:
+	STRING_P						{ $$ = makeItemString(&$1); }
+	| NULL_P						{ $$ = makeItemString(NULL); }
+	| TRUE_P						{ $$ = makeItemBool(true); }
+	| FALSE_P						{ $$ = makeItemBool(false); }
+	| NUMERIC_P						{ $$ = makeItemNumeric(&$1); }
+	| INT_P							{ $$ = makeItemNumeric(&$1); }
+	| VARIABLE_P 					{ $$ = makeItemVariable(&$1); }
+	;
+
+comp_op:
+	EQUAL_P							{ $$ = jpiEqual; }
+	| NOTEQUAL_P					{ $$ = jpiNotEqual; }
+	| LESS_P						{ $$ = jpiLess; }
+	| GREATER_P						{ $$ = jpiGreater; }
+	| LESSEQUAL_P					{ $$ = jpiLessOrEqual; }
+	| GREATEREQUAL_P				{ $$ = jpiGreaterOrEqual; }
+	;
+
+delimited_predicate:
+	'(' predicate ')'						{ $$ = $2; }
+	| EXISTS_P '(' expr ')'			{ $$ = makeItemUnary(jpiExists, $3); }
+	;
+
+predicate:
+	delimited_predicate				{ $$ = $1; }
+	| expr comp_op expr				{ $$ = makeItemBinary($2, $1, $3); }
+	| predicate AND_P predicate		{ $$ = makeItemBinary(jpiAnd, $1, $3); }
+	| predicate OR_P predicate		{ $$ = makeItemBinary(jpiOr, $1, $3); }
+	| NOT_P delimited_predicate 	{ $$ = makeItemUnary(jpiNot, $2); }
+	| '(' predicate ')' IS_P UNKNOWN_P	{ $$ = makeItemUnary(jpiIsUnknown, $2); }
+	| expr STARTS_P WITH_P starts_with_initial
+		{ $$ = makeItemBinary(jpiStartsWith, $1, $4); }
+	| expr LIKE_REGEX_P STRING_P 	{ $$ = makeItemLikeRegex($1, &$3, NULL); }
+	| expr LIKE_REGEX_P STRING_P FLAG_P STRING_P
+									{ $$ = makeItemLikeRegex($1, &$3, &$5); }
+	;
+
+starts_with_initial:
+	STRING_P						{ $$ = makeItemString(&$1); }
+	| VARIABLE_P					{ $$ = makeItemVariable(&$1); }
+	;
+
+path_primary:
+	scalar_value					{ $$ = $1; }
+	| '$'							{ $$ = makeItemType(jpiRoot); }
+	| '@'							{ $$ = makeItemType(jpiCurrent); }
+	| LAST_P						{ $$ = makeItemType(jpiLast); }
+	;
+
+accessor_expr:
+	path_primary					{ $$ = list_make1($1); }
+	| '(' expr ')' accessor_op		{ $$ = list_make2($2, $4); }
+	| '(' predicate ')' accessor_op	{ $$ = list_make2($2, $4); }
+	| accessor_expr accessor_op		{ $$ = lappend($1, $2); }
+	;
+
+expr:
+	accessor_expr					{ $$ = makeItemList($1); }
+	| '(' expr ')'					{ $$ = $2; }
+	| '+' expr %prec UMINUS			{ $$ = makeItemUnary(jpiPlus, $2); }
+	| '-' expr %prec UMINUS			{ $$ = makeItemUnary(jpiMinus, $2); }
+	| expr '+' expr					{ $$ = makeItemBinary(jpiAdd, $1, $3); }
+	| expr '-' expr					{ $$ = makeItemBinary(jpiSub, $1, $3); }
+	| expr '*' expr					{ $$ = makeItemBinary(jpiMul, $1, $3); }
+	| expr '/' expr					{ $$ = makeItemBinary(jpiDiv, $1, $3); }
+	| expr '%' expr					{ $$ = makeItemBinary(jpiMod, $1, $3); }
+	;
+
+index_elem:
+	expr							{ $$ = makeItemBinary(jpiSubscript, $1, NULL); }
+	| expr TO_P expr				{ $$ = makeItemBinary(jpiSubscript, $1, $3); }
+	;
+
+index_list:
+	index_elem						{ $$ = list_make1($1); }
+	| index_list ',' index_elem		{ $$ = lappend($1, $3); }
+	;
+
+array_accessor:
+	'[' '*' ']'						{ $$ = makeItemType(jpiAnyArray); }
+	| '[' index_list ']'			{ $$ = makeIndexArray($2); }
+	;
+
+any_level:
+	INT_P							{ $$ = pg_atoi($1.val, 4, 0); }
+	| LAST_P						{ $$ = -1; }
+	;
+
+any_path:
+	ANY_P							{ $$ = makeAny(0, -1); }
+	| ANY_P '{' any_level '}'		{ $$ = makeAny($3, $3); }
+	| ANY_P '{' any_level TO_P any_level '}'	{ $$ = makeAny($3, $5); }
+	;
+
+accessor_op:
+	'.' key							{ $$ = $2; }
+	| '.' '*'						{ $$ = makeItemType(jpiAnyKey); }
+	| array_accessor				{ $$ = $1; }
+	| '.' any_path					{ $$ = $2; }
+	| '.' method '(' ')'			{ $$ = makeItemType($2); }
+	| '?' '(' predicate ')'			{ $$ = makeItemUnary(jpiFilter, $3); }
+	;
+
+key:
+	key_name						{ $$ = makeItemKey(&$1); }
+	;
+
+key_name:
+	IDENT_P
+	| STRING_P
+	| TO_P
+	| NULL_P
+	| TRUE_P
+	| FALSE_P
+	| IS_P
+	| UNKNOWN_P
+	| EXISTS_P
+	| STRICT_P
+	| LAX_P
+	| ABS_P
+	| SIZE_P
+	| TYPE_P
+	| FLOOR_P
+	| DOUBLE_P
+	| CEILING_P
+	| KEYVALUE_P
+	| LAST_P
+	| STARTS_P
+	| WITH_P
+	| LIKE_REGEX_P
+	| FLAG_P
+	;
+
+method:
+	ABS_P							{ $$ = jpiAbs; }
+	| SIZE_P						{ $$ = jpiSize; }
+	| TYPE_P						{ $$ = jpiType; }
+	| FLOOR_P						{ $$ = jpiFloor; }
+	| DOUBLE_P						{ $$ = jpiDouble; }
+	| CEILING_P						{ $$ = jpiCeiling; }
+	| KEYVALUE_P					{ $$ = jpiKeyValue; }
+	;
+%%
+
--- a/src/backend/utils/adt/jsonpath_scan.l
+++ b/src/backend/utils/adt/jsonpath_scan.l
@ -0,0 +1,638 @@
+/*-------------------------------------------------------------------------
+ *
+ * jsonpath_scan.l
+ *	Lexical parser for jsonpath datatype
+ *
+ * Copyright (c) 2019, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ *	src/backend/utils/adt/jsonpath_scan.l
+ *
+ *-------------------------------------------------------------------------
+ */
+
+%{
+#include "postgres.h"
+
+#include "mb/pg_wchar.h"
+#include "nodes/pg_list.h"
+#include "utils/jsonpath_scanner.h"
+
+static string scanstring;
+
+/* No reason to constrain amount of data slurped */
+/* #define YY_READ_BUF_SIZE 16777216 */
+
+/* Handles to the buffer that the lexer uses internally */
+static YY_BUFFER_STATE scanbufhandle;
+static char *scanbuf;
+static int	scanbuflen;
+
+static void addstring(bool init, char *s, int l);
+static void addchar(bool init, char s);
+static int checkSpecialVal(void); /* examine scanstring for the special
+								   * value */
+
+static void parseUnicode(char *s, int l);
+static void parseHexChars(char *s, int l);
+
+/* Avoid exit() on fatal scanner errors (a bit ugly -- see yy_fatal_error) */
+#undef fprintf
+#define fprintf(file, fmt, msg)  fprintf_to_ereport(fmt, msg)
+
+static void
+fprintf_to_ereport(const char *fmt, const char *msg)
+{
+	ereport(ERROR, (errmsg_internal("%s", msg)));
+}
+
+#define yyerror jsonpath_yyerror
+%}
+
+%option 8bit
+%option never-interactive
+%option nodefault
+%option noinput
+%option nounput
+%option noyywrap
+%option warn
+%option prefix="jsonpath_yy"
+%option bison-bridge
+%option noyyalloc
+%option noyyrealloc
+%option noyyfree
+
+%x xQUOTED
+%x xNONQUOTED
+%x xVARQUOTED
+%x xSINGLEQUOTED
+%x xCOMMENT
+
+special		 [\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/]
+any			[^\?\%\$\.\[\]\{\}\(\)\|\&\!\=\<\>\@\#\,\*:\-\+\/\\\"\' \t\n\r\f]
+blank		[ \t\n\r\f]
+hex_dig		[0-9A-Fa-f]
+unicode		\\u({hex_dig}{4}|\{{hex_dig}{1,6}\})
+hex_char	\\x{hex_dig}{2}
+
+
+%%
+
+<INITIAL>\&\&					{ return AND_P; }
+
+<INITIAL>\|\|					{ return OR_P; }
+
+<INITIAL>\!						{ return NOT_P; }
+
+<INITIAL>\*\*					{ return ANY_P; }
+
+<INITIAL>\<						{ return LESS_P; }
+
+<INITIAL>\<\=					{ return LESSEQUAL_P; }
+
+<INITIAL>\=\=					{ return EQUAL_P; }
+
+<INITIAL>\<\>					{ return NOTEQUAL_P; }
+
+<INITIAL>\!\=					{ return NOTEQUAL_P; }
+
+<INITIAL>\>\=					{ return GREATEREQUAL_P; }
+
+<INITIAL>\>						{ return GREATER_P; }
+
+<INITIAL>\${any}+				{
+									addstring(true, yytext + 1, yyleng - 1);
+									addchar(false, '\0');
+									yylval->str = scanstring;
+									return VARIABLE_P;
+								}
+
+<INITIAL>\$\"					{
+									addchar(true, '\0');
+									BEGIN xVARQUOTED;
+								}
+
+<INITIAL>{special}				{ return *yytext; }
+
+<INITIAL>{blank}+				{ /* ignore */ }
+
+<INITIAL>\/\*					{
+									addchar(true, '\0');
+									BEGIN xCOMMENT;
+								}
+
+<INITIAL>[0-9]+(\.[0-9]+)?[eE][+-]?[0-9]+  /* float */  {
+									addstring(true, yytext, yyleng);
+									addchar(false, '\0');
+									yylval->str = scanstring;
+									return NUMERIC_P;
+								}
+
+<INITIAL>\.[0-9]+[eE][+-]?[0-9]+  /* float */  {
+									addstring(true, yytext, yyleng);
+									addchar(false, '\0');
+									yylval->str = scanstring;
+									return NUMERIC_P;
+								}
+
+<INITIAL>([0-9]+)?\.[0-9]+		{
+									addstring(true, yytext, yyleng);
+									addchar(false, '\0');
+									yylval->str = scanstring;
+									return NUMERIC_P;
+								}
+
+<INITIAL>[0-9]+					{
+									addstring(true, yytext, yyleng);
+									addchar(false, '\0');
+									yylval->str = scanstring;
+									return INT_P;
+								}
+
+<INITIAL>{any}+					{
+									addstring(true, yytext, yyleng);
+									BEGIN xNONQUOTED;
+								}
+
+<INITIAL>\"						{
+									addchar(true, '\0');
+									BEGIN xQUOTED;
+								}
+
+<INITIAL>\'						{
+									addchar(true, '\0');
+									BEGIN xSINGLEQUOTED;
+								}
+
+<INITIAL>\\						{
+									yyless(0);
+									addchar(true, '\0');
+									BEGIN xNONQUOTED;
+								}
+
+<xNONQUOTED>{any}+				{
+									addstring(false, yytext, yyleng);
+								}
+
+<xNONQUOTED>{blank}+			{
+									yylval->str = scanstring;
+									BEGIN INITIAL;
+									return checkSpecialVal();
+								}
+
+
+<xNONQUOTED>\/\*				{
+									yylval->str = scanstring;
+									BEGIN xCOMMENT;
+								}
+
+<xNONQUOTED>({special}|\"|\')	{
+									yylval->str = scanstring;
+									yyless(0);
+									BEGIN INITIAL;
+									return checkSpecialVal();
+								}
+
+<xNONQUOTED><<EOF>>				{
+									yylval->str = scanstring;
+									BEGIN INITIAL;
+									return checkSpecialVal();
+								}
+
+<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\[\"\'\\]	{ addchar(false, yytext[1]); }
+
+<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\b	{ addchar(false, '\b'); }
+
+<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\f	{ addchar(false, '\f'); }
+
+<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\n	{ addchar(false, '\n'); }
+
+<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\r	{ addchar(false, '\r'); }
+
+<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\t	{ addchar(false, '\t'); }
+
+<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\v	{ addchar(false, '\v'); }
+
+<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>{unicode}+		{ parseUnicode(yytext, yyleng); }
+
+<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>{hex_char}+	{ parseHexChars(yytext, yyleng); }
+
+<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\x	{ yyerror(NULL, "Hex character sequence is invalid"); }
+
+<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\u	{ yyerror(NULL, "Unicode sequence is invalid"); }
+
+<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\.	{ yyerror(NULL, "Escape sequence is invalid"); }
+
+<xNONQUOTED,xQUOTED,xVARQUOTED,xSINGLEQUOTED>\\		{ yyerror(NULL, "Unexpected end after backslash"); }
+
+<xQUOTED,xVARQUOTED,xSINGLEQUOTED><<EOF>>			{ yyerror(NULL, "Unexpected end of quoted string"); }
+
+<xQUOTED>\"						{
+									yylval->str = scanstring;
+									BEGIN INITIAL;
+									return STRING_P;
+								}
+
+<xVARQUOTED>\"					{
+									yylval->str = scanstring;
+									BEGIN INITIAL;
+									return VARIABLE_P;
+								}
+
+<xSINGLEQUOTED>\'				{
+									yylval->str = scanstring;
+									BEGIN INITIAL;
+									return STRING_P;
+								}
+
+<xQUOTED,xVARQUOTED>[^\\\"]+	{ addstring(false, yytext, yyleng); }
+
+<xSINGLEQUOTED>[^\\\']+			{ addstring(false, yytext, yyleng); }
+
+<INITIAL><<EOF>>				{ yyterminate(); }
+
+<xCOMMENT>\*\/					{ BEGIN INITIAL; }
+
+<xCOMMENT>[^\*]+				{ }
+
+<xCOMMENT>\*					{ }
+
+<xCOMMENT><<EOF>>				{ yyerror(NULL, "Unexpected end of comment"); }
+
+%%
+
+void
+jsonpath_yyerror(JsonPathParseResult **result, const char *message)
+{
+	if (*yytext == YY_END_OF_BUFFER_CHAR)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("bad jsonpath representation"),
+				 /* translator: %s is typically "syntax error" */
+				 errdetail("%s at end of input", message)));
+	}
+	else
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_SYNTAX_ERROR),
+				 errmsg("bad jsonpath representation"),
+				 /* translator: first %s is typically "syntax error" */
+				 errdetail("%s at or near \"%s\"", message, yytext)));
+	}
+}
+
+typedef struct keyword
+{
+	int16	len;
+	bool	lowercase;
+	int		val;
+	char	*keyword;
+} keyword;
+
+/*
+ * Array of key words should be sorted by length and then
+ * alphabetical order
+ */
+
+static keyword keywords[] = {
+	{ 2, false,	IS_P,		"is"},
+	{ 2, false,	TO_P,		"to"},
+	{ 3, false,	ABS_P,		"abs"},
+	{ 3, false,	LAX_P,		"lax"},
+	{ 4, false,	FLAG_P,		"flag"},
+	{ 4, false,	LAST_P,		"last"},
+	{ 4, true,	NULL_P,		"null"},
+	{ 4, false,	SIZE_P,		"size"},
+	{ 4, true,	TRUE_P,		"true"},
+	{ 4, false,	TYPE_P,		"type"},
+	{ 4, false,	WITH_P,		"with"},
+	{ 5, true,	FALSE_P,	"false"},
+	{ 5, false,	FLOOR_P,	"floor"},
+	{ 6, false,	DOUBLE_P,	"double"},
+	{ 6, false,	EXISTS_P,	"exists"},
+	{ 6, false,	STARTS_P,	"starts"},
+	{ 6, false,	STRICT_P,	"strict"},
+	{ 7, false,	CEILING_P,	"ceiling"},
+	{ 7, false,	UNKNOWN_P,	"unknown"},
+	{ 8, false,	KEYVALUE_P,	"keyvalue"},
+	{ 10,false, LIKE_REGEX_P, "like_regex"},
+};
+
+static int
+checkSpecialVal()
+{
+	int			res = IDENT_P;
+	int			diff;
+	keyword		*StopLow = keywords,
+				*StopHigh = keywords + lengthof(keywords),
+				*StopMiddle;
+
+	if (scanstring.len > keywords[lengthof(keywords) - 1].len)
+		return res;
+
+	while(StopLow < StopHigh)
+	{
+		StopMiddle = StopLow + ((StopHigh - StopLow) >> 1);
+
+		if (StopMiddle->len == scanstring.len)
+			diff = pg_strncasecmp(StopMiddle->keyword, scanstring.val,
+								  scanstring.len);
+		else
+			diff = StopMiddle->len - scanstring.len;
+
+		if (diff < 0)
+			StopLow = StopMiddle + 1;
+		else if (diff > 0)
+			StopHigh = StopMiddle;
+		else
+		{
+			if (StopMiddle->lowercase)
+				diff = strncmp(StopMiddle->keyword, scanstring.val,
+							   scanstring.len);
+
+			if (diff == 0)
+				res = StopMiddle->val;
+
+			break;
+		}
+	}
+
+	return res;
+}
+
+/*
+ * Called before any actual parsing is done
+ */
+static void
+jsonpath_scanner_init(const char *str, int slen)
+{
+	if (slen <= 0)
+		slen = strlen(str);
+
+	/*
+	 * Might be left over after ereport()
+	 */
+	yy_init_globals();
+
+	/*
+	 * Make a scan buffer with special termination needed by flex.
+	 */
+
+	scanbuflen = slen;
+	scanbuf = palloc(slen + 2);
+	memcpy(scanbuf, str, slen);
+	scanbuf[slen] = scanbuf[slen + 1] = YY_END_OF_BUFFER_CHAR;
+	scanbufhandle = yy_scan_buffer(scanbuf, slen + 2);
+
+	BEGIN(INITIAL);
+}
+
+
+/*
+ * Called after parsing is done to clean up after jsonpath_scanner_init()
+ */
+static void
+jsonpath_scanner_finish(void)
+{
+	yy_delete_buffer(scanbufhandle);
+	pfree(scanbuf);
+}
+
+static void
+addstring(bool init, char *s, int l)
+{
+	if (init)
+	{
+		scanstring.total = 32;
+		scanstring.val = palloc(scanstring.total);
+		scanstring.len = 0;
+	}
+
+	if (s && l)
+	{
+		while(scanstring.len + l + 1 >= scanstring.total)
+		{
+			scanstring.total *= 2;
+			scanstring.val = repalloc(scanstring.val, scanstring.total);
+		}
+
+		memcpy(scanstring.val + scanstring.len, s, l);
+		scanstring.len += l;
+	}
+}
+
+static void
+addchar(bool init, char s)
+{
+	if (init)
+	{
+		scanstring.total = 32;
+		scanstring.val = palloc(scanstring.total);
+		scanstring.len = 0;
+	}
+	else if(scanstring.len + 1 >= scanstring.total)
+	{
+		scanstring.total *= 2;
+		scanstring.val = repalloc(scanstring.val, scanstring.total);
+	}
+
+	scanstring.val[ scanstring.len ] = s;
+	if (s != '\0')
+		scanstring.len++;
+}
+
+JsonPathParseResult *
+parsejsonpath(const char *str, int len)
+{
+	JsonPathParseResult	*parseresult;
+
+	jsonpath_scanner_init(str, len);
+
+	if (jsonpath_yyparse((void*)&parseresult) != 0)
+		jsonpath_yyerror(NULL, "bugus input");
+
+	jsonpath_scanner_finish();
+
+	return parseresult;
+}
+
+static int
+hexval(char c)
+{
+	if (c >= '0' && c <= '9')
+		return c - '0';
+	if (c >= 'a' && c <= 'f')
+		return c - 'a' + 0xA;
+	if (c >= 'A' && c <= 'F')
+		return c - 'A' + 0xA;
+	elog(ERROR, "invalid hexadecimal digit");
+	return 0; /* not reached */
+}
+
+static void
+addUnicodeChar(int ch)
+{
+	/*
+	 * For UTF8, replace the escape sequence by the actual
+	 * utf8 character in lex->strval. Do this also for other
+	 * encodings if the escape designates an ASCII character,
+	 * otherwise raise an error.
+	 */
+
+	if (ch == 0)
+	{
+		/* We can't allow this, since our TEXT type doesn't */
+		ereport(ERROR,
+				(errcode(ERRCODE_UNTRANSLATABLE_CHARACTER),
+				 errmsg("unsupported Unicode escape sequence"),
+				  errdetail("\\u0000 cannot be converted to text.")));
+	}
+	else if (GetDatabaseEncoding() == PG_UTF8)
+	{
+		char utf8str[5];
+		int utf8len;
+
+		unicode_to_utf8(ch, (unsigned char *) utf8str);
+		utf8len = pg_utf_mblen((unsigned char *) utf8str);
+		addstring(false, utf8str, utf8len);
+	}
+	else if (ch <= 0x007f)
+	{
+		/*
+		 * This is the only way to designate things like a
+		 * form feed character in JSON, so it's useful in all
+		 * encodings.
+		 */
+		addchar(false, (char) ch);
+	}
+	else
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("invalid input syntax for type jsonpath"),
+				 errdetail("Unicode escape values cannot be used for code "
+						   "point values above 007F when the server encoding "
+						   "is not UTF8.")));
+	}
+}
+
+static void
+addUnicode(int ch, int *hi_surrogate)
+{
+	if (ch >= 0xd800 && ch <= 0xdbff)
+	{
+		if (*hi_surrogate != -1)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type jsonpath"),
+					 errdetail("Unicode high surrogate must not follow "
+							   "a high surrogate.")));
+		*hi_surrogate = (ch & 0x3ff) << 10;
+		return;
+	}
+	else if (ch >= 0xdc00 && ch <= 0xdfff)
+	{
+		if (*hi_surrogate == -1)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type jsonpath"),
+					 errdetail("Unicode low surrogate must follow a high "
+							   "surrogate.")));
+		ch = 0x10000 + *hi_surrogate + (ch & 0x3ff);
+		*hi_surrogate = -1;
+	}
+	else if (*hi_surrogate != -1)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("invalid input syntax for type jsonpath"),
+				 errdetail("Unicode low surrogate must follow a high "
+						   "surrogate.")));
+	}
+
+	addUnicodeChar(ch);
+}
+
+/*
+ * parseUnicode was adopted from json_lex_string() in
+ * src/backend/utils/adt/json.c
+ */
+static void
+parseUnicode(char *s, int l)
+{
+	int			i;
+	int			hi_surrogate = -1;
+
+	for (i = 2; i < l; i += 2)	/* skip '\u' */
+	{
+		int			ch = 0;
+		int			j;
+
+		if (s[i] == '{')	/* parse '\u{XX...}' */
+		{
+			while (s[++i] != '}' && i < l)
+				ch = (ch << 4) | hexval(s[i]);
+			i++;	/* ski p '}' */
+		}
+		else		/* parse '\uXXXX' */
+		{
+			for (j = 0; j < 4 && i < l; j++)
+				ch = (ch << 4) | hexval(s[i++]);
+		}
+
+		addUnicode(ch, &hi_surrogate);
+	}
+
+	if (hi_surrogate != -1)
+	{
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("invalid input syntax for type jsonpath"),
+				 errdetail("Unicode low surrogate must follow a high "
+						   "surrogate.")));
+	}
+}
+
+static void
+parseHexChars(char *s, int l)
+{
+	int i;
+
+	Assert(l % 4 /* \xXX */ == 0);
+
+	for (i = 0; i < l / 4; i++)
+	{
+		int			ch = (hexval(s[i * 4 + 2]) << 4) | hexval(s[i * 4 + 3]);
+
+		addUnicodeChar(ch);
+	}
+}
+
+/*
+ * Interface functions to make flex use palloc() instead of malloc().
+ * It'd be better to make these static, but flex insists otherwise.
+ */
+
+void *
+jsonpath_yyalloc(yy_size_t bytes)
+{
+	return palloc(bytes);
+}
+
+void *
+jsonpath_yyrealloc(void *ptr, yy_size_t bytes)
+{
+	if (ptr)
+		return repalloc(ptr, bytes);
+	else
+		return palloc(bytes);
+}
+
+void
+jsonpath_yyfree(void *ptr)
+{
+	if (ptr)
+		pfree(ptr);
+}
+
--- a/src/backend/utils/adt/regexp.c
+++ b/src/backend/utils/adt/regexp.c
@ -133,7 +133,7 @@ static Datum build_regexp_split_result(regexp_matches_ctx *splitctx);
 * Pattern is given in the database encoding.  We internally convert to
 * an array of pg_wchar, which is what Spencer's regex package wants.
 */
-static regex_t *
+regex_t *
 RE_compile_and_cache(text *text_re, int cflags, Oid collation)
 {
 	int			text_re_len = VARSIZE_ANY_EXHDR(text_re);
@ -339,7 +339,7 @@ RE_execute(regex_t *re, char *dat, int dat_len,
 * Both pattern and data are given in the database encoding.  We internally
 * convert to array of pg_wchar which is what Spencer's regex package wants.
 */
-static bool
+bool
 RE_compile_and_execute(text *text_re, char *dat, int dat_len,
 					   int cflags, Oid collation,
 					   int nmatch, regmatch_t *pmatch)
--- a/src/backend/utils/errcodes.txt
+++ b/src/backend/utils/errcodes.txt
@ -206,6 +206,21 @@ Section: Class 22 - Data Exception
 2200N    E    ERRCODE_INVALID_XML_CONTENT                                    invalid_xml_content
 2200S    E    ERRCODE_INVALID_XML_COMMENT                                    invalid_xml_comment
 2200T    E    ERRCODE_INVALID_XML_PROCESSING_INSTRUCTION                     invalid_xml_processing_instruction
+22030    E    ERRCODE_DUPLICATE_JSON_OBJECT_KEY_VALUE                        duplicate_json_object_key_value
+22032    E    ERRCODE_INVALID_JSON_TEXT                                      invalid_json_text
+22033    E    ERRCODE_INVALID_JSON_SUBSCRIPT                                 invalid_json_subscript
+22034    E    ERRCODE_MORE_THAN_ONE_JSON_ITEM                                more_than_one_json_item
+22035    E    ERRCODE_NO_JSON_ITEM                                           no_json_item
+22036    E    ERRCODE_NON_NUMERIC_JSON_ITEM                                  non_numeric_json_item
+22037    E    ERRCODE_NON_UNIQUE_KEYS_IN_JSON_OBJECT                         non_unique_keys_in_json_object
+22038    E    ERRCODE_SINGLETON_JSON_ITEM_REQUIRED                           singleton_json_item_required
+22039    E    ERRCODE_JSON_ARRAY_NOT_FOUND                                   json_array_not_found
+2203A    E    ERRCODE_JSON_MEMBER_NOT_FOUND                                  json_member_not_found
+2203B    E    ERRCODE_JSON_NUMBER_NOT_FOUND                                  json_number_not_found
+2203C    E    ERRCODE_JSON_OBJECT_NOT_FOUND                                  object_not_found
+2203F    E    ERRCODE_JSON_SCALAR_REQUIRED                                   json_scalar_required
+2203D    E    ERRCODE_TOO_MANY_JSON_ARRAY_ELEMENTS                           too_many_json_array_elements
+2203E    E    ERRCODE_TOO_MANY_JSON_OBJECT_MEMBERS                           too_many_json_object_members

 Section: Class 23 - Integrity Constraint Violation