1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-16 06:01:02 +03:00

SQL/JSON: add standard JSON constructor functions

This commit introduces the SQL/JSON standard-conforming constructors for
JSON types:

JSON_ARRAY()
JSON_ARRAYAGG()
JSON_OBJECT()
JSON_OBJECTAGG()

Most of the functionality was already present in PostgreSQL-specific
functions, but these include some new functionality such as the ability
to skip or include NULL values, and to allow duplicate keys or throw
error when they are found, as well as the standard specified syntax to
specify output type and format.

Author: Nikita Glukhov <n.gluhov@postgrespro.ru>
Author: Teodor Sigaev <teodor@sigaev.ru>
Author: Oleg Bartunov <obartunov@gmail.com>
Author: Alexander Korotkov <aekorotkov@gmail.com>
Author: Amit Langote <amitlangote09@gmail.com>

Reviewers have included (in no particular order) Andres Freund, Alexander
Korotkov, Pavel Stehule, Andrew Alsup, Erik Rijkers, Zihong Yu,
Himanshu Upadhyaya, Daniel Gustafsson, Justin Pryzby.

Discussion: https://postgr.es/m/CAF4Au4w2x-5LTnN_bxky-mq4=WOqsGsxSpENCzHRAzSnEd8+WQ@mail.gmail.com
Discussion: https://postgr.es/m/cd0bb935-0158-78a7-08b5-904886deac4b@postgrespro.ru
Discussion: https://postgr.es/m/20220616233130.rparivafipt6doj3@alap3.anarazel.de
Discussion: https://postgr.es/m/abd9b83b-aa66-f230-3d6d-734817f0995d%40postgresql.org
This commit is contained in:
Alvaro Herrera
2023-03-29 12:11:36 +02:00
parent 38b7437b90
commit 7081ac46ac
42 changed files with 4472 additions and 143 deletions

View File

@ -13,7 +13,9 @@
*/
#include "postgres.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_type.h"
#include "common/hashfn.h"
#include "funcapi.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
@ -42,6 +44,34 @@ typedef enum /* type categories for datum_to_json */
JSONTYPE_OTHER /* all else */
} JsonTypeCategory;
/*
* Support for fast key uniqueness checking.
*
* We maintain a hash table of used keys in JSON objects for fast detection
* of duplicates.
*/
/* Common context for key uniqueness check */
typedef struct HTAB *JsonUniqueCheckState; /* hash table for key names */
/* Hash entry for JsonUniqueCheckState */
typedef struct JsonUniqueHashEntry
{
const char *key;
int key_len;
int object_id;
} JsonUniqueHashEntry;
/* Context struct for key uniqueness check during JSON building */
typedef struct JsonUniqueBuilderState
{
JsonUniqueCheckState check; /* unique check */
StringInfoData skipped_keys; /* skipped keys with NULL values */
MemoryContext mcxt; /* context for saving skipped keys */
} JsonUniqueBuilderState;
/* State struct for JSON aggregation */
typedef struct JsonAggState
{
StringInfo str;
@ -49,6 +79,7 @@ typedef struct JsonAggState
Oid key_output_func;
JsonTypeCategory val_category;
Oid val_output_func;
JsonUniqueBuilderState unique_check;
} JsonAggState;
static void composite_to_json(Datum composite, StringInfo result,
@ -723,6 +754,48 @@ row_to_json_pretty(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
}
/*
* Is the given type immutable when coming out of a JSON context?
*
* At present, datetimes are all considered mutable, because they
* depend on timezone. XXX we should also drill down into objects
* and arrays, but do not.
*/
bool
to_json_is_immutable(Oid typoid)
{
JsonTypeCategory tcategory;
Oid outfuncoid;
json_categorize_type(typoid, &tcategory, &outfuncoid);
switch (tcategory)
{
case JSONTYPE_BOOL:
case JSONTYPE_JSON:
case JSONTYPE_NULL:
return true;
case JSONTYPE_DATE:
case JSONTYPE_TIMESTAMP:
case JSONTYPE_TIMESTAMPTZ:
return false;
case JSONTYPE_ARRAY:
return false; /* TODO recurse into elements */
case JSONTYPE_COMPOSITE:
return false; /* TODO recurse into fields */
case JSONTYPE_NUMERIC:
case JSONTYPE_CAST:
case JSONTYPE_OTHER:
return func_volatile(outfuncoid) == PROVOLATILE_IMMUTABLE;
}
return false; /* not reached */
}
/*
* SQL function to_json(anyvalue)
*/
@ -755,8 +828,8 @@ to_json(PG_FUNCTION_ARGS)
*
* aggregate input column as a json array value.
*/
Datum
json_agg_transfn(PG_FUNCTION_ARGS)
static Datum
json_agg_transfn_worker(FunctionCallInfo fcinfo, bool absent_on_null)
{
MemoryContext aggcontext,
oldcontext;
@ -796,9 +869,14 @@ json_agg_transfn(PG_FUNCTION_ARGS)
else
{
state = (JsonAggState *) PG_GETARG_POINTER(0);
appendStringInfoString(state->str, ", ");
}
if (absent_on_null && PG_ARGISNULL(1))
PG_RETURN_POINTER(state);
if (state->str->len > 1)
appendStringInfoString(state->str, ", ");
/* fast path for NULLs */
if (PG_ARGISNULL(1))
{
@ -810,7 +888,7 @@ json_agg_transfn(PG_FUNCTION_ARGS)
val = PG_GETARG_DATUM(1);
/* add some whitespace if structured type and not first item */
if (!PG_ARGISNULL(0) &&
if (!PG_ARGISNULL(0) && state->str->len > 1 &&
(state->val_category == JSONTYPE_ARRAY ||
state->val_category == JSONTYPE_COMPOSITE))
{
@ -828,6 +906,25 @@ json_agg_transfn(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(state);
}
/*
* json_agg aggregate function
*/
Datum
json_agg_transfn(PG_FUNCTION_ARGS)
{
return json_agg_transfn_worker(fcinfo, false);
}
/*
* json_agg_strict aggregate function
*/
Datum
json_agg_strict_transfn(PG_FUNCTION_ARGS)
{
return json_agg_transfn_worker(fcinfo, true);
}
/*
* json_agg final function
*/
@ -851,18 +948,120 @@ json_agg_finalfn(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(catenate_stringinfo_string(state->str, "]"));
}
/* Functions implementing hash table for key uniqueness check */
static uint32
json_unique_hash(const void *key, Size keysize)
{
const JsonUniqueHashEntry *entry = (JsonUniqueHashEntry *) key;
uint32 hash = hash_bytes_uint32(entry->object_id);
hash ^= hash_bytes((const unsigned char *) entry->key, entry->key_len);
return DatumGetUInt32(hash);
}
static int
json_unique_hash_match(const void *key1, const void *key2, Size keysize)
{
const JsonUniqueHashEntry *entry1 = (const JsonUniqueHashEntry *) key1;
const JsonUniqueHashEntry *entry2 = (const JsonUniqueHashEntry *) key2;
if (entry1->object_id != entry2->object_id)
return entry1->object_id > entry2->object_id ? 1 : -1;
if (entry1->key_len != entry2->key_len)
return entry1->key_len > entry2->key_len ? 1 : -1;
return strncmp(entry1->key, entry2->key, entry1->key_len);
}
/*
* Uniqueness detection support.
*
* In order to detect uniqueness during building or parsing of a JSON
* object, we maintain a hash table of key names already seen.
*/
static void
json_unique_check_init(JsonUniqueCheckState *cxt)
{
HASHCTL ctl;
memset(&ctl, 0, sizeof(ctl));
ctl.keysize = sizeof(JsonUniqueHashEntry);
ctl.entrysize = sizeof(JsonUniqueHashEntry);
ctl.hcxt = CurrentMemoryContext;
ctl.hash = json_unique_hash;
ctl.match = json_unique_hash_match;
*cxt = hash_create("json object hashtable",
32,
&ctl,
HASH_ELEM | HASH_CONTEXT | HASH_FUNCTION | HASH_COMPARE);
}
static void
json_unique_builder_init(JsonUniqueBuilderState *cxt)
{
json_unique_check_init(&cxt->check);
cxt->mcxt = CurrentMemoryContext;
cxt->skipped_keys.data = NULL;
}
static bool
json_unique_check_key(JsonUniqueCheckState *cxt, const char *key, int object_id)
{
JsonUniqueHashEntry entry;
bool found;
entry.key = key;
entry.key_len = strlen(key);
entry.object_id = object_id;
(void) hash_search(*cxt, &entry, HASH_ENTER, &found);
return !found;
}
/*
* On-demand initialization of a throwaway StringInfo. This is used to
* read a key name that we don't need to store in the output object, for
* duplicate key detection when the value is NULL.
*/
static StringInfo
json_unique_builder_get_throwawaybuf(JsonUniqueBuilderState *cxt)
{
StringInfo out = &cxt->skipped_keys;
if (!out->data)
{
MemoryContext oldcxt = MemoryContextSwitchTo(cxt->mcxt);
initStringInfo(out);
MemoryContextSwitchTo(oldcxt);
}
else
/* Just reset the string to empty */
out->len = 0;
return out;
}
/*
* json_object_agg transition function.
*
* aggregate two input columns as a single json object value.
*/
Datum
json_object_agg_transfn(PG_FUNCTION_ARGS)
static Datum
json_object_agg_transfn_worker(FunctionCallInfo fcinfo,
bool absent_on_null, bool unique_keys)
{
MemoryContext aggcontext,
oldcontext;
JsonAggState *state;
StringInfo out;
Datum arg;
bool skip;
int key_offset;
if (!AggCheckCallContext(fcinfo, &aggcontext))
{
@ -877,12 +1076,16 @@ json_object_agg_transfn(PG_FUNCTION_ARGS)
/*
* Make the StringInfo in a context where it will persist for the
* duration of the aggregate call. Switching context is only needed
* for this initial step, as the StringInfo routines make sure they
* use the right context to enlarge the object if necessary.
* for this initial step, as the StringInfo and dynahash routines make
* sure they use the right context to enlarge the object if necessary.
*/
oldcontext = MemoryContextSwitchTo(aggcontext);
state = (JsonAggState *) palloc(sizeof(JsonAggState));
state->str = makeStringInfo();
if (unique_keys)
json_unique_builder_init(&state->unique_check);
else
memset(&state->unique_check, 0, sizeof(state->unique_check));
MemoryContextSwitchTo(oldcontext);
arg_type = get_fn_expr_argtype(fcinfo->flinfo, 1);
@ -910,7 +1113,6 @@ json_object_agg_transfn(PG_FUNCTION_ARGS)
else
{
state = (JsonAggState *) PG_GETARG_POINTER(0);
appendStringInfoString(state->str, ", ");
}
/*
@ -923,14 +1125,56 @@ json_object_agg_transfn(PG_FUNCTION_ARGS)
if (PG_ARGISNULL(1))
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("field name must not be null")));
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("null value not allowed for object key")));
/* Skip null values if absent_on_null */
skip = absent_on_null && PG_ARGISNULL(2);
if (skip)
{
/*
* We got a NULL value and we're not storing those; if we're not
* testing key uniqueness, we're done. If we are, use the throwaway
* buffer to store the key name so that we can check it.
*/
if (!unique_keys)
PG_RETURN_POINTER(state);
out = json_unique_builder_get_throwawaybuf(&state->unique_check);
}
else
{
out = state->str;
/*
* Append comma delimiter only if we have already output some fields
* after the initial string "{ ".
*/
if (out->len > 2)
appendStringInfoString(out, ", ");
}
arg = PG_GETARG_DATUM(1);
datum_to_json(arg, false, state->str, state->key_category,
key_offset = out->len;
datum_to_json(arg, false, out, state->key_category,
state->key_output_func, true);
if (unique_keys)
{
const char *key = &out->data[key_offset];
if (!json_unique_check_key(&state->unique_check.check, key, 0))
ereport(ERROR,
errcode(ERRCODE_DUPLICATE_JSON_OBJECT_KEY_VALUE),
errmsg("duplicate JSON key %s", key));
if (skip)
PG_RETURN_POINTER(state);
}
appendStringInfoString(state->str, " : ");
if (PG_ARGISNULL(2))
@ -944,6 +1188,42 @@ json_object_agg_transfn(PG_FUNCTION_ARGS)
PG_RETURN_POINTER(state);
}
/*
* json_object_agg aggregate function
*/
Datum
json_object_agg_transfn(PG_FUNCTION_ARGS)
{
return json_object_agg_transfn_worker(fcinfo, false, false);
}
/*
* json_object_agg_strict aggregate function
*/
Datum
json_object_agg_strict_transfn(PG_FUNCTION_ARGS)
{
return json_object_agg_transfn_worker(fcinfo, true, false);
}
/*
* json_object_agg_unique aggregate function
*/
Datum
json_object_agg_unique_transfn(PG_FUNCTION_ARGS)
{
return json_object_agg_transfn_worker(fcinfo, false, true);
}
/*
* json_object_agg_unique_strict aggregate function
*/
Datum
json_object_agg_unique_strict_transfn(PG_FUNCTION_ARGS)
{
return json_object_agg_transfn_worker(fcinfo, true, true);
}
/*
* json_object_agg final function.
*/
@ -985,25 +1265,14 @@ catenate_stringinfo_string(StringInfo buffer, const char *addon)
return result;
}
/*
* SQL function json_build_object(variadic "any")
*/
Datum
json_build_object(PG_FUNCTION_ARGS)
json_build_object_worker(int nargs, Datum *args, bool *nulls, Oid *types,
bool absent_on_null, bool unique_keys)
{
int nargs;
int i;
const char *sep = "";
StringInfo result;
Datum *args;
bool *nulls;
Oid *types;
/* fetch argument values to build the object */
nargs = extract_variadic_args(fcinfo, 0, false, &args, &types, &nulls);
if (nargs < 0)
PG_RETURN_NULL();
JsonUniqueBuilderState unique_check;
if (nargs % 2 != 0)
ereport(ERROR,
@ -1017,19 +1286,57 @@ json_build_object(PG_FUNCTION_ARGS)
appendStringInfoChar(result, '{');
if (unique_keys)
json_unique_builder_init(&unique_check);
for (i = 0; i < nargs; i += 2)
{
appendStringInfoString(result, sep);
sep = ", ";
StringInfo out;
bool skip;
int key_offset;
/* Skip null values if absent_on_null */
skip = absent_on_null && nulls[i + 1];
if (skip)
{
/* If key uniqueness check is needed we must save skipped keys */
if (!unique_keys)
continue;
out = json_unique_builder_get_throwawaybuf(&unique_check);
}
else
{
appendStringInfoString(result, sep);
sep = ", ";
out = result;
}
/* process key */
if (nulls[i])
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("argument %d cannot be null", i + 1),
errhint("Object keys should be text.")));
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("null value not allowed for object key")));
add_json(args[i], false, result, types[i], true);
/* save key offset before appending it */
key_offset = out->len;
add_json(args[i], false, out, types[i], true);
if (unique_keys)
{
/* check key uniqueness after key appending */
const char *key = &out->data[key_offset];
if (!json_unique_check_key(&unique_check.check, key, 0))
ereport(ERROR,
errcode(ERRCODE_DUPLICATE_JSON_OBJECT_KEY_VALUE),
errmsg("duplicate JSON key %s", key));
if (skip)
continue;
}
appendStringInfoString(result, " : ");
@ -1039,7 +1346,27 @@ json_build_object(PG_FUNCTION_ARGS)
appendStringInfoChar(result, '}');
PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
return PointerGetDatum(cstring_to_text_with_len(result->data, result->len));
}
/*
* SQL function json_build_object(variadic "any")
*/
Datum
json_build_object(PG_FUNCTION_ARGS)
{
Datum *args;
bool *nulls;
Oid *types;
/* build argument values to build the object */
int nargs = extract_variadic_args(fcinfo, 0, true,
&args, &types, &nulls);
if (nargs < 0)
PG_RETURN_NULL();
PG_RETURN_DATUM(json_build_object_worker(nargs, args, nulls, types, false, false));
}
/*
@ -1051,25 +1378,13 @@ json_build_object_noargs(PG_FUNCTION_ARGS)
PG_RETURN_TEXT_P(cstring_to_text_with_len("{}", 2));
}
/*
* SQL function json_build_array(variadic "any")
*/
Datum
json_build_array(PG_FUNCTION_ARGS)
json_build_array_worker(int nargs, Datum *args, bool *nulls, Oid *types,
bool absent_on_null)
{
int nargs;
int i;
const char *sep = "";
StringInfo result;
Datum *args;
bool *nulls;
Oid *types;
/* fetch argument values to build the array */
nargs = extract_variadic_args(fcinfo, 0, false, &args, &types, &nulls);
if (nargs < 0)
PG_RETURN_NULL();
result = makeStringInfo();
@ -1077,6 +1392,9 @@ json_build_array(PG_FUNCTION_ARGS)
for (i = 0; i < nargs; i++)
{
if (absent_on_null && nulls[i])
continue;
appendStringInfoString(result, sep);
sep = ", ";
add_json(args[i], nulls[i], result, types[i], false);
@ -1084,7 +1402,27 @@ json_build_array(PG_FUNCTION_ARGS)
appendStringInfoChar(result, ']');
PG_RETURN_TEXT_P(cstring_to_text_with_len(result->data, result->len));
return PointerGetDatum(cstring_to_text_with_len(result->data, result->len));
}
/*
* SQL function json_build_array(variadic "any")
*/
Datum
json_build_array(PG_FUNCTION_ARGS)
{
Datum *args;
bool *nulls;
Oid *types;
/* build argument values to build the object */
int nargs = extract_variadic_args(fcinfo, 0, true,
&args, &types, &nulls);
if (nargs < 0)
PG_RETURN_NULL();
PG_RETURN_DATUM(json_build_array_worker(nargs, args, nulls, types, false));
}
/*