1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-16 06:01:02 +03:00

SQL/JSON constructors

This patch introduces the SQL/JSON standard constructors for JSON:

JSON()
JSON_ARRAY()
JSON_ARRAYAGG()
JSON_OBJECT()
JSON_OBJECTAGG()

For the most part these functions provide facilities that mimic
existing json/jsonb functions. However, they also offer some useful
additional functionality. In addition to text input, the JSON() function
accepts bytea input, which it will decode and constuct a json value from.
The other functions provide useful options for handling duplicate keys
and null values.

This series of patches will be followed by a consolidated documentation
patch.

Nikita Glukhov

Reviewers have included (in no particular order) Andres Freund, Alexander
Korotkov, Pavel Stehule, Andrew Alsup, Erik Rijkers, Zihong Yu,
Himanshu Upadhyaya, Daniel Gustafsson, Justin Pryzby.

Discussion: https://postgr.es/m/cd0bb935-0158-78a7-08b5-904886deac4b@postgrespro.ru
This commit is contained in:
Andrew Dunstan
2022-03-03 13:02:10 -05:00
parent f79b803dcc
commit f4fb45d15c
37 changed files with 3615 additions and 130 deletions

View File

@ -13,7 +13,9 @@
*/
#include "postgres.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
#include "common/hashfn.h"
#include "funcapi.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
@ -42,6 +44,42 @@ typedef enum /* type categories for datum_to_json */
JSONTYPE_OTHER /* all else */
} JsonTypeCategory;
/* Common context for key uniqueness check */
typedef struct HTAB *JsonUniqueCheckState; /* hash table for key names */
/* Hash entry for JsonUniqueCheckState */
typedef struct JsonUniqueHashEntry
{
const char *key;
int key_len;
int object_id;
} JsonUniqueHashEntry;
/* Context for key uniqueness check in builder functions */
typedef struct JsonUniqueBuilderState
{
JsonUniqueCheckState check; /* unique check */
StringInfoData skipped_keys; /* skipped keys with NULL values */
MemoryContext mcxt; /* context for saving skipped keys */
} JsonUniqueBuilderState;
/* Element of object stack for key uniqueness check during json parsing */
typedef struct JsonUniqueStackEntry
{
struct JsonUniqueStackEntry *parent;
int object_id;
} JsonUniqueStackEntry;
/* State for key uniqueness check during json parsing */
typedef struct JsonUniqueParsingState
{
JsonLexContext *lex;
JsonUniqueCheckState check;
JsonUniqueStackEntry *stack;
int id_counter;
bool unique;
} JsonUniqueParsingState;
typedef struct JsonAggState
{
StringInfo str;
@ -49,6 +87,7 @@ typedef struct JsonAggState
Oid key_output_func;
JsonTypeCategory val_category;
Oid val_output_func;
JsonUniqueBuilderState unique_check;
} JsonAggState;
static void composite_to_json(Datum composite, StringInfo result,
@ -722,6 +761,38 @@ row_to_json_pretty(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
}
bool
to_json_is_immutable(Oid typoid)
{
JsonTypeCategory tcategory;
Oid outfuncoid;
json_categorize_type(typoid, &tcategory, &outfuncoid);
switch (tcategory)
{
case JSONTYPE_BOOL:
case JSONTYPE_JSON:
return true;
case JSONTYPE_DATE:
case JSONTYPE_TIMESTAMP:
case JSONTYPE_TIMESTAMPTZ:
return false;
case JSONTYPE_ARRAY:
return false; /* TODO recurse into elements */
case JSONTYPE_COMPOSITE:
return false; /* TODO recurse into fields */
case JSONTYPE_NUMERIC:
case JSONTYPE_CAST:
default:
return func_volatile(outfuncoid) == PROVOLATILE_IMMUTABLE;
}
}
/*
* SQL function to_json(anyvalue)
*/
@ -754,8 +825,8 @@ to_json(PG_FUNCTION_ARGS)
*
* aggregate input column as a json array value.
*/
Datum
json_agg_transfn(PG_FUNCTION_ARGS)
static Datum
json_agg_transfn_worker(FunctionCallInfo fcinfo, bool absent_on_null)
{
MemoryContext aggcontext,
oldcontext;
@ -795,9 +866,14 @@ json_agg_transfn(PG_FUNCTION_ARGS)
else
{
state = (JsonAggState *) PG_GETARG_POINTER(0);
appendStringInfoString(state->str, ", ");
}
if (absent_on_null && PG_ARGISNULL(1))
PG_RETURN_POINTER(state);
if (state->str->len > 1)
appendStringInfoString(state->str, ", ");
/* fast path for NULLs */
if (PG_ARGISNULL(1))
{
@ -809,7 +885,7 @@ json_agg_transfn(PG_FUNCTION_ARGS)
val = PG_GETARG_DATUM(1);
/* add some whitespace if structured type and not first item */
if (!PG_ARGISNULL(0) &&
if (!PG_ARGISNULL(0) && state->str->len > 1 &&
(state->val_category == JSONTYPE_ARRAY ||
state->val_category == JSONTYPE_COMPOSITE))
{
@ -827,6 +903,25 @@ json_agg_transfn(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(state);
}
/*
* json_agg aggregate function
*/
Datum
json_agg_transfn(PG_FUNCTION_ARGS)
{
return json_agg_transfn_worker(fcinfo, false);
}
/*
* json_agg_strict aggregate function
*/
Datum
json_agg_strict_transfn(PG_FUNCTION_ARGS)
{
return json_agg_transfn_worker(fcinfo, true);
}
/*
* json_agg final function
*/
@ -850,18 +945,122 @@ json_agg_finalfn(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(catenate_stringinfo_string(state->str, "]"));
}
/* Functions implementing hash table for key uniqueness check */
static uint32
json_unique_hash(const void *key, Size keysize)
{
const JsonUniqueHashEntry *entry = (JsonUniqueHashEntry *) key;
uint32 hash = hash_bytes_uint32(entry->object_id);
hash ^= hash_bytes((const unsigned char *) entry->key, entry->key_len);
return DatumGetUInt32(hash);
}
static int
json_unique_hash_match(const void *key1, const void *key2, Size keysize)
{
const JsonUniqueHashEntry *entry1 = (const JsonUniqueHashEntry *) key1;
const JsonUniqueHashEntry *entry2 = (const JsonUniqueHashEntry *) key2;
if (entry1->object_id != entry2->object_id)
return entry1->object_id > entry2->object_id ? 1 : -1;
if (entry1->key_len != entry2->key_len)
return entry1->key_len > entry2->key_len ? 1 : -1;
return strncmp(entry1->key, entry2->key, entry1->key_len);
}
/* Functions implementing object key uniqueness check */
static void
json_unique_check_init(JsonUniqueCheckState *cxt)
{
HASHCTL ctl;
memset(&ctl, 0, sizeof(ctl));
ctl.keysize = sizeof(JsonUniqueHashEntry);
ctl.entrysize = sizeof(JsonUniqueHashEntry);
ctl.hcxt = CurrentMemoryContext;
ctl.hash = json_unique_hash;
ctl.match = json_unique_hash_match;
*cxt = hash_create("json object hashtable",
32,
&ctl,
HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION | HASH_COMPARE);
}
static void
json_unique_check_free(JsonUniqueCheckState *cxt)
{
hash_destroy(*cxt);
}
static bool
json_unique_check_key(JsonUniqueCheckState *cxt, const char *key, int object_id)
{
JsonUniqueHashEntry entry;
bool found;
entry.key = key;
entry.key_len = strlen(key);
entry.object_id = object_id;
(void) hash_search(*cxt, &entry, HASH_ENTER, &found);
return !found;
}
static void
json_unique_builder_init(JsonUniqueBuilderState *cxt)
{
json_unique_check_init(&cxt->check);
cxt->mcxt = CurrentMemoryContext;
cxt->skipped_keys.data = NULL;
}
static void
json_unique_builder_free(JsonUniqueBuilderState *cxt)
{
json_unique_check_free(&cxt->check);
if (cxt->skipped_keys.data)
pfree(cxt->skipped_keys.data);
}
/* On-demand initialization of skipped_keys StringInfo structure */
static StringInfo
json_unique_builder_get_skipped_keys(JsonUniqueBuilderState *cxt)
{
StringInfo out = &cxt->skipped_keys;
if (!out->data)
{
MemoryContext oldcxt = MemoryContextSwitchTo(cxt->mcxt);
initStringInfo(out);
MemoryContextSwitchTo(oldcxt);
}
return out;
}
/*
* json_object_agg transition function.
*
* aggregate two input columns as a single json object value.
*/
Datum
json_object_agg_transfn(PG_FUNCTION_ARGS)
static Datum
json_object_agg_transfn_worker(FunctionCallInfo fcinfo,
bool absent_on_null, bool unique_keys)
{
MemoryContext aggcontext,
oldcontext;
JsonAggState *state;
StringInfo out;
Datum arg;
bool skip;
int key_offset;
if (!AggCheckCallContext(fcinfo, &aggcontext))
{
@ -882,6 +1081,10 @@ json_object_agg_transfn(PG_FUNCTION_ARGS)
oldcontext = MemoryContextSwitchTo(aggcontext);
state = (JsonAggState *) palloc(sizeof(JsonAggState));
state->str = makeStringInfo();
if (unique_keys)
json_unique_builder_init(&state->unique_check);
else
memset(&state->unique_check, 0, sizeof(state->unique_check));
MemoryContextSwitchTo(oldcontext);
arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
@ -909,7 +1112,6 @@ json_object_agg_transfn(PG_FUNCTION_ARGS)
else
{
state = (JsonAggState *) PG_GETARG_POINTER(0);
appendStringInfoString(state->str, ", ");
}
/*
@ -925,11 +1127,49 @@ json_object_agg_transfn(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("field name must not be null")));
/* Skip null values if absent_on_null */
skip = absent_on_null && PG_ARGISNULL(2);
if (skip)
{
/* If key uniqueness check is needed we must save skipped keys */
if (!unique_keys)
PG_RETURN_POINTER(state);
out = json_unique_builder_get_skipped_keys(&state->unique_check);
}
else
{
out = state->str;
/*
* Append comma delimiter only if we have already outputted some fields
* after the initial string "{ ".
*/
if (out->len > 2)
appendStringInfoString(out, ", ");
}
arg = PG_GETARG_DATUM(1);
datum_to_json(arg, false, state->str, state->key_category,
key_offset = out->len;
datum_to_json(arg, false, out, state->key_category,
state->key_output_func, true);
if (unique_keys)
{
const char *key = &out->data[key_offset];
if (!json_unique_check_key(&state->unique_check.check, key, 0))
ereport(ERROR,
(errcode(ERRCODE_DUPLICATE_JSON_OBJECT_KEY_VALUE),
errmsg("duplicate JSON key %s", key)));
if (skip)
PG_RETURN_POINTER(state);
}
appendStringInfoString(state->str, " : ");
if (PG_ARGISNULL(2))
@ -943,6 +1183,42 @@ json_object_agg_transfn(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(state);
}
/*
* json_object_agg aggregate function
*/
Datum
json_object_agg_transfn(PG_FUNCTION_ARGS)
{
return json_object_agg_transfn_worker(fcinfo, false, false);
}
/*
* json_object_agg_strict aggregate function
*/
Datum
json_object_agg_strict_transfn(PG_FUNCTION_ARGS)
{
return json_object_agg_transfn_worker(fcinfo, true, false);
}
/*
* json_object_agg_unique aggregate function
*/
Datum
json_object_agg_unique_transfn(PG_FUNCTION_ARGS)
{
return json_object_agg_transfn_worker(fcinfo, false, true);
}
/*
* json_object_agg_unique_strict aggregate function
*/
Datum
json_object_agg_unique_strict_transfn(PG_FUNCTION_ARGS)
{
return json_object_agg_transfn_worker(fcinfo, true, true);
}
/*
* json_object_agg final function.
*/
@ -960,6 +1236,8 @@ json_object_agg_finalfn(PG_FUNCTION_ARGS)
if (state == NULL)
PG_RETURN_NULL();
json_unique_builder_free(&state->unique_check);
/* Else return state with appropriate object terminator added */
PG_RETURN_TEXT_P(catenate_stringinfo_string(state->str, " }"));
}
@ -984,25 +1262,14 @@ catenate_stringinfo_string(StringInfo buffer, const char *addon)
return result;
}
/*
* SQL function json_build_object(variadic "any")
*/
Datum
json_build_object(PG_FUNCTION_ARGS)
json_build_object_worker(int nargs, Datum *args, bool *nulls, Oid *types,
bool absent_on_null, bool unique_keys)
{
int nargs;
int i;
const char *sep = "";
StringInfo result;
Datum *args;
bool *nulls;
Oid *types;
/* fetch argument values to build the object */
nargs = extract_variadic_args(fcinfo, 0, false, &args, &types, &nulls);
if (nargs < 0)
PG_RETURN_NULL();
JsonUniqueBuilderState unique_check;
if (nargs % 2 != 0)
ereport(ERROR,
@ -1016,19 +1283,58 @@ json_build_object(PG_FUNCTION_ARGS)
appendStringInfoChar(result, '{');
if (unique_keys)
json_unique_builder_init(&unique_check);
for (i = 0; i < nargs; i += 2)
{
appendStringInfoString(result, sep);
sep = ", ";
StringInfo out;
bool skip;
int key_offset;
/* Skip null values if absent_on_null */
skip = absent_on_null && nulls[i + 1];
if (skip)
{
/* If key uniqueness check is needed we must save skipped keys */
if (!unique_keys)
continue;
out = json_unique_builder_get_skipped_keys(&unique_check);
}
else
{
appendStringInfoString(result, sep);
sep = ", ";
out = result;
}
/* process key */
if (nulls[i])
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("argument %d cannot be null", i + 1),
errmsg("argument %d cannot be null", i + 1),
errhint("Object keys should be text.")));
add_json(args[i], false, result, types[i], true);
/* save key offset before key appending */
key_offset = out->len;
add_json(args[i], false, out, types[i], true);
if (unique_keys)
{
/* check key uniqueness after key appending */
const char *key = &out->data[key_offset];
if (!json_unique_check_key(&unique_check.check, key, 0))
ereport(ERROR,
(errcode(ERRCODE_DUPLICATE_JSON_OBJECT_KEY_VALUE),
errmsg("duplicate JSON key %s", key)));
if (skip)
continue;
}
appendStringInfoString(result, " : ");
@ -1038,7 +1344,29 @@ json_build_object(PG_FUNCTION_ARGS)
appendStringInfoChar(result, '}');
PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
if (unique_keys)
json_unique_builder_free(&unique_check);
return PointerGetDatum(cstring_to_text_with_len(result->data, result->len));
}
/*
* SQL function json_build_object(variadic "any")
*/
Datum
json_build_object(PG_FUNCTION_ARGS)
{
Datum *args;
bool *nulls;
Oid *types;
/* build argument values to build the object */
int nargs = extract_variadic_args(fcinfo, 0, true,
&args, &types, &nulls);
if (nargs < 0)
PG_RETURN_NULL();
PG_RETURN_DATUM(json_build_object_worker(nargs, args, nulls, types, false, false));
}
/*
@ -1050,25 +1378,13 @@ json_build_object_noargs(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", 2));
}
/*
* SQL function json_build_array(variadic "any")
*/
Datum
json_build_array(PG_FUNCTION_ARGS)
json_build_array_worker(int nargs, Datum *args, bool *nulls, Oid *types,
bool absent_on_null)
{
int nargs;
int i;
const char *sep = "";
StringInfo result;
Datum *args;
bool *nulls;
Oid *types;
/* fetch argument values to build the array */
nargs = extract_variadic_args(fcinfo, 0, false, &args, &types, &nulls);
if (nargs < 0)
PG_RETURN_NULL();
result = makeStringInfo();
@ -1076,6 +1392,9 @@ json_build_array(PG_FUNCTION_ARGS)
for (i = 0; i < nargs; i++)
{
if (absent_on_null && nulls[i])
continue;
appendStringInfoString(result, sep);
sep = ", ";
add_json(args[i], nulls[i], result, types[i], false);
@ -1083,7 +1402,26 @@ json_build_array(PG_FUNCTION_ARGS)
appendStringInfoChar(result, ']');
PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
return PointerGetDatum(cstring_to_text_with_len(result->data, result->len));
}
/*
* SQL function json_build_array(variadic "any")
*/
Datum
json_build_array(PG_FUNCTION_ARGS)
{
Datum *args;
bool *nulls;
Oid *types;
/* build argument values to build the object */
int nargs = extract_variadic_args(fcinfo, 0, true,
&args, &types, &nulls);
if (nargs < 0)
PG_RETURN_NULL();
PG_RETURN_DATUM(json_build_array_worker(nargs, args, nulls, types, false));
}
/*