|
|
|
@ -14,6 +14,7 @@
|
|
|
|
|
#include "postgres.h"
|
|
|
|
|
|
|
|
|
|
#include "access/gin.h"
|
|
|
|
|
#include "access/hash.h"
|
|
|
|
|
#include "access/skey.h"
|
|
|
|
|
#include "catalog/pg_collation.h"
|
|
|
|
|
#include "catalog/pg_type.h"
|
|
|
|
@ -26,14 +27,15 @@ typedef struct PathHashStack
|
|
|
|
|
struct PathHashStack *parent;
|
|
|
|
|
} PathHashStack;
|
|
|
|
|
|
|
|
|
|
static text *make_text_key(const char *str, int len, char flag);
|
|
|
|
|
static text *make_scalar_key(const JsonbValue *scalarVal, char flag);
|
|
|
|
|
static Datum make_text_key(char flag, const char *str, int len);
|
|
|
|
|
static Datum make_scalar_key(const JsonbValue *scalarVal, bool is_key);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
*
|
|
|
|
|
* jsonb_ops GIN opclass support functions
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
Datum
|
|
|
|
|
gin_compare_jsonb(PG_FUNCTION_ARGS)
|
|
|
|
|
{
|
|
|
|
@ -65,80 +67,49 @@ gin_extract_jsonb(PG_FUNCTION_ARGS)
|
|
|
|
|
{
|
|
|
|
|
Jsonb *jb = (Jsonb *) PG_GETARG_JSONB(0);
|
|
|
|
|
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
|
|
|
|
Datum *entries = NULL;
|
|
|
|
|
int total = 2 * JB_ROOT_COUNT(jb);
|
|
|
|
|
int i = 0,
|
|
|
|
|
r;
|
|
|
|
|
JsonbIterator *it;
|
|
|
|
|
JsonbValue v;
|
|
|
|
|
int i = 0,
|
|
|
|
|
r;
|
|
|
|
|
Datum *entries;
|
|
|
|
|
|
|
|
|
|
/* If the root level is empty, we certainly have no keys */
|
|
|
|
|
if (total == 0)
|
|
|
|
|
{
|
|
|
|
|
*nentries = 0;
|
|
|
|
|
PG_RETURN_POINTER(NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Otherwise, use 2 * root count as initial estimate of result size */
|
|
|
|
|
entries = (Datum *) palloc(sizeof(Datum) * total);
|
|
|
|
|
|
|
|
|
|
it = JsonbIteratorInit(&jb->root);
|
|
|
|
|
|
|
|
|
|
while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
|
|
|
|
|
{
|
|
|
|
|
/* Since we recurse into the object, we might need more space */
|
|
|
|
|
if (i >= total)
|
|
|
|
|
{
|
|
|
|
|
total *= 2;
|
|
|
|
|
entries = (Datum *) repalloc(entries, sizeof(Datum) * total);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Serialize keys and elements equivalently, but only when elements
|
|
|
|
|
* are Jsonb strings. Otherwise, serialize elements as values. Array
|
|
|
|
|
* elements are indexed as keys, for the benefit of
|
|
|
|
|
* JsonbExistsStrategyNumber. Our definition of existence does not
|
|
|
|
|
* allow for checking the existence of a non-jbvString element (just
|
|
|
|
|
* like the definition of the underlying operator), because the
|
|
|
|
|
* operator takes a text rhs argument (which is taken as a proxy for
|
|
|
|
|
* an equivalent Jsonb string).
|
|
|
|
|
*
|
|
|
|
|
* The way existence is represented does not preclude an alternative
|
|
|
|
|
* existence operator, that takes as its rhs value an arbitrarily
|
|
|
|
|
* internally-typed Jsonb. The only reason that isn't the case here
|
|
|
|
|
* is that the existence operator is only really intended to determine
|
|
|
|
|
* if an object has a certain key (object pair keys are of course
|
|
|
|
|
* invariably strings), which is extended to jsonb arrays. You could
|
|
|
|
|
* think of the default Jsonb definition of existence as being
|
|
|
|
|
* equivalent to a definition where all types of scalar array elements
|
|
|
|
|
* are keys that we can check the existence of, while just forbidding
|
|
|
|
|
* non-string notation. This inflexibility prevents the user from
|
|
|
|
|
* having to qualify that the rhs string is a raw scalar string (that
|
|
|
|
|
* is, naturally no internal string quoting in required for the text
|
|
|
|
|
* argument), and allows us to not set the reset flag for
|
|
|
|
|
* JsonbExistsStrategyNumber, since we know that keys are strings for
|
|
|
|
|
* both objects and arrays, and don't have to further account for type
|
|
|
|
|
* mismatch. Not having to set the reset flag makes it less than
|
|
|
|
|
* tempting to tighten up the definition of existence to preclude
|
|
|
|
|
* array elements entirely, which would arguably be a simpler
|
|
|
|
|
* alternative. In any case the infrastructure used to implement the
|
|
|
|
|
* existence operator could trivially support this hypothetical,
|
|
|
|
|
* slightly distinct definition of existence.
|
|
|
|
|
*/
|
|
|
|
|
switch (r)
|
|
|
|
|
{
|
|
|
|
|
case WJB_KEY:
|
|
|
|
|
/* Serialize key separately, for existence strategies */
|
|
|
|
|
entries[i++] = PointerGetDatum(make_scalar_key(&v, JKEYELEM));
|
|
|
|
|
entries[i++] = make_scalar_key(&v, true);
|
|
|
|
|
break;
|
|
|
|
|
case WJB_ELEM:
|
|
|
|
|
if (v.type == jbvString)
|
|
|
|
|
entries[i++] = PointerGetDatum(make_scalar_key(&v, JKEYELEM));
|
|
|
|
|
else
|
|
|
|
|
entries[i++] = PointerGetDatum(make_scalar_key(&v, JVAL));
|
|
|
|
|
/* Pretend string array elements are keys, see jsonb.h */
|
|
|
|
|
entries[i++] = make_scalar_key(&v, (v.type == jbvString));
|
|
|
|
|
break;
|
|
|
|
|
case WJB_VALUE:
|
|
|
|
|
entries[i++] = PointerGetDatum(make_scalar_key(&v, JVAL));
|
|
|
|
|
entries[i++] = make_scalar_key(&v, false);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
continue;
|
|
|
|
|
/* we can ignore structural items */
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
@ -163,30 +134,30 @@ gin_extract_jsonb_query(PG_FUNCTION_ARGS)
|
|
|
|
|
PG_GETARG_DATUM(0),
|
|
|
|
|
PointerGetDatum(nentries)));
|
|
|
|
|
/* ...although "contains {}" requires a full index scan */
|
|
|
|
|
if (entries == NULL)
|
|
|
|
|
if (*nentries == 0)
|
|
|
|
|
*searchMode = GIN_SEARCH_MODE_ALL;
|
|
|
|
|
}
|
|
|
|
|
else if (strategy == JsonbExistsStrategyNumber)
|
|
|
|
|
{
|
|
|
|
|
/* Query is a text string, which we treat as a key */
|
|
|
|
|
text *query = PG_GETARG_TEXT_PP(0);
|
|
|
|
|
text *item;
|
|
|
|
|
|
|
|
|
|
*nentries = 1;
|
|
|
|
|
entries = (Datum *) palloc(sizeof(Datum));
|
|
|
|
|
item = make_text_key(VARDATA_ANY(query), VARSIZE_ANY_EXHDR(query),
|
|
|
|
|
JKEYELEM);
|
|
|
|
|
entries[0] = PointerGetDatum(item);
|
|
|
|
|
entries[0] = make_text_key(JGINFLAG_KEY,
|
|
|
|
|
VARDATA_ANY(query),
|
|
|
|
|
VARSIZE_ANY_EXHDR(query));
|
|
|
|
|
}
|
|
|
|
|
else if (strategy == JsonbExistsAnyStrategyNumber ||
|
|
|
|
|
strategy == JsonbExistsAllStrategyNumber)
|
|
|
|
|
{
|
|
|
|
|
/* Query is a text array; each element is treated as a key */
|
|
|
|
|
ArrayType *query = PG_GETARG_ARRAYTYPE_P(0);
|
|
|
|
|
Datum *key_datums;
|
|
|
|
|
bool *key_nulls;
|
|
|
|
|
int key_count;
|
|
|
|
|
int i,
|
|
|
|
|
j;
|
|
|
|
|
text *item;
|
|
|
|
|
|
|
|
|
|
deconstruct_array(query,
|
|
|
|
|
TEXTOID, -1, false, 'i',
|
|
|
|
@ -194,15 +165,14 @@ gin_extract_jsonb_query(PG_FUNCTION_ARGS)
|
|
|
|
|
|
|
|
|
|
entries = (Datum *) palloc(sizeof(Datum) * key_count);
|
|
|
|
|
|
|
|
|
|
for (i = 0, j = 0; i < key_count; ++i)
|
|
|
|
|
for (i = 0, j = 0; i < key_count; i++)
|
|
|
|
|
{
|
|
|
|
|
/* Nulls in the array are ignored */
|
|
|
|
|
if (key_nulls[i])
|
|
|
|
|
continue;
|
|
|
|
|
item = make_text_key(VARDATA(key_datums[i]),
|
|
|
|
|
VARSIZE(key_datums[i]) - VARHDRSZ,
|
|
|
|
|
JKEYELEM);
|
|
|
|
|
entries[j++] = PointerGetDatum(item);
|
|
|
|
|
entries[j++] = make_text_key(JGINFLAG_KEY,
|
|
|
|
|
VARDATA_ANY(key_datums[i]),
|
|
|
|
|
VARSIZE_ANY_EXHDR(key_datums[i]));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*nentries = j;
|
|
|
|
@ -236,13 +206,12 @@ gin_consistent_jsonb(PG_FUNCTION_ARGS)
|
|
|
|
|
if (strategy == JsonbContainsStrategyNumber)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* Index doesn't have information about correspondence of Jsonb keys
|
|
|
|
|
* and values (as distinct from GIN keys, which a key/value pair is
|
|
|
|
|
* stored as), so invariably we recheck. Besides, there are some
|
|
|
|
|
* special rules around the containment of raw scalar arrays and
|
|
|
|
|
* regular arrays that are not represented here. However, if all of
|
|
|
|
|
* the keys are not present, that's sufficient reason to return false
|
|
|
|
|
* and finish immediately.
|
|
|
|
|
* We must always recheck, since we can't tell from the index whether
|
|
|
|
|
* the positions of the matched items match the structure of the query
|
|
|
|
|
* object. (Even if we could, we'd also have to worry about hashed
|
|
|
|
|
* keys and the index's failure to distinguish keys from string array
|
|
|
|
|
* elements.) However, the tuple certainly doesn't match unless it
|
|
|
|
|
* contains all the query keys.
|
|
|
|
|
*/
|
|
|
|
|
*recheck = true;
|
|
|
|
|
for (i = 0; i < nkeys; i++)
|
|
|
|
@ -256,20 +225,27 @@ gin_consistent_jsonb(PG_FUNCTION_ARGS)
|
|
|
|
|
}
|
|
|
|
|
else if (strategy == JsonbExistsStrategyNumber)
|
|
|
|
|
{
|
|
|
|
|
/* Existence of key guaranteed in default search mode */
|
|
|
|
|
*recheck = false;
|
|
|
|
|
/*
|
|
|
|
|
* Although the key is certainly present in the index, we must recheck
|
|
|
|
|
* because (1) the key might be hashed, and (2) the index match might
|
|
|
|
|
* be for a key that's not at top level of the JSON object. For (1),
|
|
|
|
|
* we could look at the query key to see if it's hashed and not
|
|
|
|
|
* recheck if not, but the index lacks enough info to tell about (2).
|
|
|
|
|
*/
|
|
|
|
|
*recheck = true;
|
|
|
|
|
res = true;
|
|
|
|
|
}
|
|
|
|
|
else if (strategy == JsonbExistsAnyStrategyNumber)
|
|
|
|
|
{
|
|
|
|
|
/* Existence of key guaranteed in default search mode */
|
|
|
|
|
*recheck = false;
|
|
|
|
|
/* As for plain exists, we must recheck */
|
|
|
|
|
*recheck = true;
|
|
|
|
|
res = true;
|
|
|
|
|
}
|
|
|
|
|
else if (strategy == JsonbExistsAllStrategyNumber)
|
|
|
|
|
{
|
|
|
|
|
/* Testing for the presence of all keys gives an exact result */
|
|
|
|
|
*recheck = false;
|
|
|
|
|
/* As for plain exists, we must recheck */
|
|
|
|
|
*recheck = true;
|
|
|
|
|
/* ... but unless all the keys are present, we can say "false" */
|
|
|
|
|
for (i = 0; i < nkeys; i++)
|
|
|
|
|
{
|
|
|
|
|
if (!check[i])
|
|
|
|
@ -295,19 +271,18 @@ gin_triconsistent_jsonb(PG_FUNCTION_ARGS)
|
|
|
|
|
int32 nkeys = PG_GETARG_INT32(3);
|
|
|
|
|
|
|
|
|
|
/* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
|
|
|
|
|
GinTernaryValue res = GIN_TRUE;
|
|
|
|
|
|
|
|
|
|
GinTernaryValue res = GIN_MAYBE;
|
|
|
|
|
int32 i;
|
|
|
|
|
|
|
|
|
|
if (strategy == JsonbContainsStrategyNumber)
|
|
|
|
|
{
|
|
|
|
|
bool has_maybe = false;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* All extracted keys must be present. Combination of GIN_MAYBE and
|
|
|
|
|
* GIN_TRUE gives GIN_MAYBE result because then all keys may be
|
|
|
|
|
* present.
|
|
|
|
|
* Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this
|
|
|
|
|
* corresponds to always forcing recheck in the regular consistent
|
|
|
|
|
* function, for the reasons listed there.
|
|
|
|
|
*/
|
|
|
|
|
if (strategy == JsonbContainsStrategyNumber ||
|
|
|
|
|
strategy == JsonbExistsAllStrategyNumber)
|
|
|
|
|
{
|
|
|
|
|
/* All extracted keys must be present */
|
|
|
|
|
for (i = 0; i < nkeys; i++)
|
|
|
|
|
{
|
|
|
|
|
if (check[i] == GIN_FALSE)
|
|
|
|
@ -315,55 +290,21 @@ gin_triconsistent_jsonb(PG_FUNCTION_ARGS)
|
|
|
|
|
res = GIN_FALSE;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (check[i] == GIN_MAYBE)
|
|
|
|
|
{
|
|
|
|
|
res = GIN_MAYBE;
|
|
|
|
|
has_maybe = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Index doesn't have information about correspondence of Jsonb keys
|
|
|
|
|
* and values (as distinct from GIN keys, which a key/value pair is
|
|
|
|
|
* stored as), so invariably we recheck. This is also reflected in
|
|
|
|
|
* how GIN_MAYBE is given in response to there being no GIN_MAYBE
|
|
|
|
|
* input.
|
|
|
|
|
*/
|
|
|
|
|
if (!has_maybe && res == GIN_TRUE)
|
|
|
|
|
res = GIN_MAYBE;
|
|
|
|
|
}
|
|
|
|
|
else if (strategy == JsonbExistsStrategyNumber ||
|
|
|
|
|
strategy == JsonbExistsAnyStrategyNumber)
|
|
|
|
|
{
|
|
|
|
|
/* Existence of key guaranteed in default search mode */
|
|
|
|
|
/* At least one extracted key must be present */
|
|
|
|
|
res = GIN_FALSE;
|
|
|
|
|
for (i = 0; i < nkeys; i++)
|
|
|
|
|
{
|
|
|
|
|
if (check[i] == GIN_TRUE)
|
|
|
|
|
{
|
|
|
|
|
res = GIN_TRUE;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (check[i] == GIN_MAYBE)
|
|
|
|
|
if (check[i] == GIN_TRUE ||
|
|
|
|
|
check[i] == GIN_MAYBE)
|
|
|
|
|
{
|
|
|
|
|
res = GIN_MAYBE;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (strategy == JsonbExistsAllStrategyNumber)
|
|
|
|
|
{
|
|
|
|
|
/* Testing for the presence of all keys gives an exact result */
|
|
|
|
|
for (i = 0; i < nkeys; i++)
|
|
|
|
|
{
|
|
|
|
|
if (check[i] == GIN_FALSE)
|
|
|
|
|
{
|
|
|
|
|
res = GIN_FALSE;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (check[i] == GIN_MAYBE)
|
|
|
|
|
{
|
|
|
|
|
res = GIN_MAYBE;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
@ -376,7 +317,151 @@ gin_triconsistent_jsonb(PG_FUNCTION_ARGS)
|
|
|
|
|
*
|
|
|
|
|
* jsonb_hash_ops GIN opclass support functions
|
|
|
|
|
*
|
|
|
|
|
* In a jsonb_hash_ops index, the GIN keys are uint32 hashes, one per JSON
|
|
|
|
|
* value; but the JSON key(s) leading to each value are also included in its
|
|
|
|
|
* hash computation. This means we can only support containment queries,
|
|
|
|
|
* but the index can distinguish, for example, {"foo": 42} from {"bar": 42}
|
|
|
|
|
* since different hashes will be generated.
|
|
|
|
|
*
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
Datum
|
|
|
|
|
gin_extract_jsonb_hash(PG_FUNCTION_ARGS)
|
|
|
|
|
{
|
|
|
|
|
Jsonb *jb = PG_GETARG_JSONB(0);
|
|
|
|
|
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
|
|
|
|
int total = 2 * JB_ROOT_COUNT(jb);
|
|
|
|
|
JsonbIterator *it;
|
|
|
|
|
JsonbValue v;
|
|
|
|
|
PathHashStack tail;
|
|
|
|
|
PathHashStack *stack;
|
|
|
|
|
int i = 0,
|
|
|
|
|
r;
|
|
|
|
|
Datum *entries;
|
|
|
|
|
|
|
|
|
|
/* If the root level is empty, we certainly have no keys */
|
|
|
|
|
if (total == 0)
|
|
|
|
|
{
|
|
|
|
|
*nentries = 0;
|
|
|
|
|
PG_RETURN_POINTER(NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Otherwise, use 2 * root count as initial estimate of result size */
|
|
|
|
|
entries = (Datum *) palloc(sizeof(Datum) * total);
|
|
|
|
|
|
|
|
|
|
/* We keep a stack of hashes corresponding to parent key levels */
|
|
|
|
|
tail.parent = NULL;
|
|
|
|
|
tail.hash = 0;
|
|
|
|
|
stack = &tail;
|
|
|
|
|
|
|
|
|
|
it = JsonbIteratorInit(&jb->root);
|
|
|
|
|
|
|
|
|
|
while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
|
|
|
|
|
{
|
|
|
|
|
PathHashStack *parent;
|
|
|
|
|
|
|
|
|
|
/* Since we recurse into the object, we might need more space */
|
|
|
|
|
if (i >= total)
|
|
|
|
|
{
|
|
|
|
|
total *= 2;
|
|
|
|
|
entries = (Datum *) repalloc(entries, sizeof(Datum) * total);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (r)
|
|
|
|
|
{
|
|
|
|
|
case WJB_BEGIN_ARRAY:
|
|
|
|
|
case WJB_BEGIN_OBJECT:
|
|
|
|
|
/* Push a stack level for this object */
|
|
|
|
|
parent = stack;
|
|
|
|
|
stack = (PathHashStack *) palloc(sizeof(PathHashStack));
|
|
|
|
|
|
|
|
|
|
if (parent->parent)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* We pass forward hashes from previous container nesting
|
|
|
|
|
* levels so that nested arrays with an outermost nested
|
|
|
|
|
* object will have element hashes mixed with the
|
|
|
|
|
* outermost key. It's also somewhat useful to have
|
|
|
|
|
* nested objects' innermost values have hashes that are a
|
|
|
|
|
* function of not just their own key, but outer keys too.
|
|
|
|
|
*
|
|
|
|
|
* Nesting an array within another array will not alter
|
|
|
|
|
* innermost scalar element hash values, but that seems
|
|
|
|
|
* inconsequential.
|
|
|
|
|
*/
|
|
|
|
|
stack->hash = parent->hash;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* At the outermost level, initialize hash with container
|
|
|
|
|
* type proxy value. Note that this makes JB_FARRAY and
|
|
|
|
|
* JB_FOBJECT part of the on-disk representation, but they
|
|
|
|
|
* are that in the base jsonb object storage already.
|
|
|
|
|
*/
|
|
|
|
|
stack->hash = (r == WJB_BEGIN_ARRAY) ? JB_FARRAY : JB_FOBJECT;
|
|
|
|
|
}
|
|
|
|
|
stack->parent = parent;
|
|
|
|
|
break;
|
|
|
|
|
case WJB_KEY:
|
|
|
|
|
/* initialize hash from parent */
|
|
|
|
|
stack->hash = stack->parent->hash;
|
|
|
|
|
/* and mix in this key */
|
|
|
|
|
JsonbHashScalarValue(&v, &stack->hash);
|
|
|
|
|
/* hash is now ready to incorporate the value */
|
|
|
|
|
break;
|
|
|
|
|
case WJB_ELEM:
|
|
|
|
|
/* array elements use parent hash mixed with element's hash */
|
|
|
|
|
stack->hash = stack->parent->hash;
|
|
|
|
|
/* FALL THRU */
|
|
|
|
|
case WJB_VALUE:
|
|
|
|
|
/* mix the element or value's hash into the prepared hash */
|
|
|
|
|
JsonbHashScalarValue(&v, &stack->hash);
|
|
|
|
|
/* and emit an index entry */
|
|
|
|
|
entries[i++] = UInt32GetDatum(stack->hash);
|
|
|
|
|
/* Note: we assume we'll see KEY before another VALUE */
|
|
|
|
|
break;
|
|
|
|
|
case WJB_END_ARRAY:
|
|
|
|
|
case WJB_END_OBJECT:
|
|
|
|
|
/* Pop the stack */
|
|
|
|
|
parent = stack->parent;
|
|
|
|
|
pfree(stack);
|
|
|
|
|
stack = parent;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
elog(ERROR, "invalid JsonbIteratorNext rc: %d", r);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*nentries = i;
|
|
|
|
|
|
|
|
|
|
PG_RETURN_POINTER(entries);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Datum
|
|
|
|
|
gin_extract_jsonb_query_hash(PG_FUNCTION_ARGS)
|
|
|
|
|
{
|
|
|
|
|
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
|
|
|
|
StrategyNumber strategy = PG_GETARG_UINT16(2);
|
|
|
|
|
int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
|
|
|
|
|
Datum *entries;
|
|
|
|
|
|
|
|
|
|
if (strategy != JsonbContainsStrategyNumber)
|
|
|
|
|
elog(ERROR, "unrecognized strategy number: %d", strategy);
|
|
|
|
|
|
|
|
|
|
/* Query is a jsonb, so just apply gin_extract_jsonb_hash ... */
|
|
|
|
|
entries = (Datum *)
|
|
|
|
|
DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_hash,
|
|
|
|
|
PG_GETARG_DATUM(0),
|
|
|
|
|
PointerGetDatum(nentries)));
|
|
|
|
|
|
|
|
|
|
/* ... although "contains {}" requires a full index scan */
|
|
|
|
|
if (*nentries == 0)
|
|
|
|
|
*searchMode = GIN_SEARCH_MODE_ALL;
|
|
|
|
|
|
|
|
|
|
PG_RETURN_POINTER(entries);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Datum
|
|
|
|
|
gin_consistent_jsonb_hash(PG_FUNCTION_ARGS)
|
|
|
|
|
{
|
|
|
|
@ -395,13 +480,13 @@ gin_consistent_jsonb_hash(PG_FUNCTION_ARGS)
|
|
|
|
|
elog(ERROR, "unrecognized strategy number: %d", strategy);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* jsonb_hash_ops index doesn't have information about correspondence of
|
|
|
|
|
* Jsonb keys and values (as distinct from GIN keys, which a key/value
|
|
|
|
|
* pair is stored as), so invariably we recheck. Besides, there are some
|
|
|
|
|
* jsonb_hash_ops is necessarily lossy, not only because of hash
|
|
|
|
|
* collisions but also because it doesn't preserve complete information
|
|
|
|
|
* about the structure of the JSON object. Besides, there are some
|
|
|
|
|
* special rules around the containment of raw scalar arrays and regular
|
|
|
|
|
* arrays that are not represented here. However, if all of the keys are
|
|
|
|
|
* not present, that's sufficient reason to return false and finish
|
|
|
|
|
* immediately.
|
|
|
|
|
* arrays that are not handled here. So we must always recheck a match.
|
|
|
|
|
* However, if not all of the keys are present, the tuple certainly
|
|
|
|
|
* doesn't match.
|
|
|
|
|
*/
|
|
|
|
|
*recheck = true;
|
|
|
|
|
for (i = 0; i < nkeys; i++)
|
|
|
|
@ -426,17 +511,16 @@ gin_triconsistent_jsonb_hash(PG_FUNCTION_ARGS)
|
|
|
|
|
int32 nkeys = PG_GETARG_INT32(3);
|
|
|
|
|
|
|
|
|
|
/* Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); */
|
|
|
|
|
GinTernaryValue res = GIN_TRUE;
|
|
|
|
|
GinTernaryValue res = GIN_MAYBE;
|
|
|
|
|
int32 i;
|
|
|
|
|
bool has_maybe = false;
|
|
|
|
|
|
|
|
|
|
if (strategy != JsonbContainsStrategyNumber)
|
|
|
|
|
elog(ERROR, "unrecognized strategy number: %d", strategy);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* All extracted keys must be present. A combination of GIN_MAYBE and
|
|
|
|
|
* GIN_TRUE induces a GIN_MAYBE result, because then all keys may be
|
|
|
|
|
* present.
|
|
|
|
|
* Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this
|
|
|
|
|
* corresponds to always forcing recheck in the regular consistent
|
|
|
|
|
* function, for the reasons listed there.
|
|
|
|
|
*/
|
|
|
|
|
for (i = 0; i < nkeys; i++)
|
|
|
|
|
{
|
|
|
|
@ -445,161 +529,39 @@ gin_triconsistent_jsonb_hash(PG_FUNCTION_ARGS)
|
|
|
|
|
res = GIN_FALSE;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (check[i] == GIN_MAYBE)
|
|
|
|
|
{
|
|
|
|
|
res = GIN_MAYBE;
|
|
|
|
|
has_maybe = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* jsonb_hash_ops index doesn't have information about correspondence of
|
|
|
|
|
* Jsonb keys and values (as distinct from GIN keys, which for this
|
|
|
|
|
* opclass are a hash of a pair, or a hash of just an element), so
|
|
|
|
|
* invariably we recheck. This is also reflected in how GIN_MAYBE is
|
|
|
|
|
* given in response to there being no GIN_MAYBE input.
|
|
|
|
|
*/
|
|
|
|
|
if (!has_maybe && res == GIN_TRUE)
|
|
|
|
|
res = GIN_MAYBE;
|
|
|
|
|
|
|
|
|
|
PG_RETURN_GIN_TERNARY_VALUE(res);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Datum
|
|
|
|
|
gin_extract_jsonb_hash(PG_FUNCTION_ARGS)
|
|
|
|
|
{
|
|
|
|
|
Jsonb *jb = PG_GETARG_JSONB(0);
|
|
|
|
|
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
|
|
|
|
int total = 2 * JB_ROOT_COUNT(jb);
|
|
|
|
|
JsonbIterator *it;
|
|
|
|
|
JsonbValue v;
|
|
|
|
|
PathHashStack tail;
|
|
|
|
|
PathHashStack *stack;
|
|
|
|
|
int i = 0,
|
|
|
|
|
r;
|
|
|
|
|
Datum *entries = NULL;
|
|
|
|
|
|
|
|
|
|
if (total == 0)
|
|
|
|
|
{
|
|
|
|
|
*nentries = 0;
|
|
|
|
|
PG_RETURN_POINTER(NULL);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
entries = (Datum *) palloc(sizeof(Datum) * total);
|
|
|
|
|
|
|
|
|
|
it = JsonbIteratorInit(&jb->root);
|
|
|
|
|
|
|
|
|
|
tail.parent = NULL;
|
|
|
|
|
tail.hash = 0;
|
|
|
|
|
stack = &tail;
|
|
|
|
|
|
|
|
|
|
while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
|
|
|
|
|
{
|
|
|
|
|
PathHashStack *tmp;
|
|
|
|
|
|
|
|
|
|
if (i >= total)
|
|
|
|
|
{
|
|
|
|
|
total *= 2;
|
|
|
|
|
entries = (Datum *) repalloc(entries, sizeof(Datum) * total);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
switch (r)
|
|
|
|
|
{
|
|
|
|
|
case WJB_BEGIN_ARRAY:
|
|
|
|
|
case WJB_BEGIN_OBJECT:
|
|
|
|
|
tmp = stack;
|
|
|
|
|
stack = (PathHashStack *) palloc(sizeof(PathHashStack));
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Nesting an array within another array will not alter
|
|
|
|
|
* innermost scalar element hash values, but that seems
|
|
|
|
|
* inconsequential
|
|
|
|
|
* Construct a jsonb_ops GIN key from a flag byte and a textual representation
|
|
|
|
|
* (which need not be null-terminated). This function is responsible
|
|
|
|
|
* for hashing overlength text representations; it will add the
|
|
|
|
|
* JGINFLAG_HASHED bit to the flag value if it does that.
|
|
|
|
|
*/
|
|
|
|
|
if (tmp->parent)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* We pass forward hashes from previous container nesting
|
|
|
|
|
* levels so that nested arrays with an outermost nested
|
|
|
|
|
* object will have element hashes mixed with the
|
|
|
|
|
* outermost key. It's also somewhat useful to have
|
|
|
|
|
* nested objects innermost values have hashes that are a
|
|
|
|
|
* function of not just their own key, but outer keys too.
|
|
|
|
|
*/
|
|
|
|
|
stack->hash = tmp->hash;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
* At least nested level, initialize with stable container
|
|
|
|
|
* type proxy value
|
|
|
|
|
*/
|
|
|
|
|
stack->hash = (r == WJB_BEGIN_ARRAY) ? JB_FARRAY : JB_FOBJECT;
|
|
|
|
|
}
|
|
|
|
|
stack->parent = tmp;
|
|
|
|
|
break;
|
|
|
|
|
case WJB_KEY:
|
|
|
|
|
/* Initialize hash from parent */
|
|
|
|
|
stack->hash = stack->parent->hash;
|
|
|
|
|
JsonbHashScalarValue(&v, &stack->hash);
|
|
|
|
|
break;
|
|
|
|
|
case WJB_ELEM:
|
|
|
|
|
/* Elements have parent hash mixed in separately */
|
|
|
|
|
stack->hash = stack->parent->hash;
|
|
|
|
|
case WJB_VALUE:
|
|
|
|
|
/* Element/value case */
|
|
|
|
|
JsonbHashScalarValue(&v, &stack->hash);
|
|
|
|
|
entries[i++] = UInt32GetDatum(stack->hash);
|
|
|
|
|
break;
|
|
|
|
|
case WJB_END_ARRAY:
|
|
|
|
|
case WJB_END_OBJECT:
|
|
|
|
|
/* Pop the stack */
|
|
|
|
|
tmp = stack->parent;
|
|
|
|
|
pfree(stack);
|
|
|
|
|
stack = tmp;
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
elog(ERROR, "invalid JsonbIteratorNext rc: %d", r);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
*nentries = i;
|
|
|
|
|
|
|
|
|
|
PG_RETURN_POINTER(entries);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Datum
|
|
|
|
|
gin_extract_jsonb_query_hash(PG_FUNCTION_ARGS)
|
|
|
|
|
{
|
|
|
|
|
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
|
|
|
|
StrategyNumber strategy = PG_GETARG_UINT16(2);
|
|
|
|
|
int32 *searchMode = (int32 *) PG_GETARG_POINTER(6);
|
|
|
|
|
Datum *entries;
|
|
|
|
|
|
|
|
|
|
if (strategy != JsonbContainsStrategyNumber)
|
|
|
|
|
elog(ERROR, "unrecognized strategy number: %d", strategy);
|
|
|
|
|
|
|
|
|
|
/* Query is a jsonb, so just apply gin_extract_jsonb... */
|
|
|
|
|
entries = (Datum *)
|
|
|
|
|
DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_hash,
|
|
|
|
|
PG_GETARG_DATUM(0),
|
|
|
|
|
PointerGetDatum(nentries)));
|
|
|
|
|
|
|
|
|
|
/* ...although "contains {}" requires a full index scan */
|
|
|
|
|
if (entries == NULL)
|
|
|
|
|
*searchMode = GIN_SEARCH_MODE_ALL;
|
|
|
|
|
|
|
|
|
|
PG_RETURN_POINTER(entries);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Build a text value from a cstring and flag suitable for storage as a key
|
|
|
|
|
* value
|
|
|
|
|
*/
|
|
|
|
|
static text *
|
|
|
|
|
make_text_key(const char *str, int len, char flag)
|
|
|
|
|
static Datum
|
|
|
|
|
make_text_key(char flag, const char *str, int len)
|
|
|
|
|
{
|
|
|
|
|
text *item;
|
|
|
|
|
char hashbuf[10];
|
|
|
|
|
|
|
|
|
|
if (len > JGIN_MAXLENGTH)
|
|
|
|
|
{
|
|
|
|
|
uint32 hashval;
|
|
|
|
|
|
|
|
|
|
hashval = DatumGetUInt32(hash_any((const unsigned char *) str, len));
|
|
|
|
|
snprintf(hashbuf, sizeof(hashbuf), "%08x", hashval);
|
|
|
|
|
str = hashbuf;
|
|
|
|
|
len = 8;
|
|
|
|
|
flag |= JGINFLAG_HASHED;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Now build the text Datum. For simplicity we build a 4-byte-header
|
|
|
|
|
* varlena text Datum here, but we expect it will get converted to short
|
|
|
|
|
* header format when stored in the index.
|
|
|
|
|
*/
|
|
|
|
|
item = (text *) palloc(VARHDRSZ + len + 1);
|
|
|
|
|
SET_VARSIZE(item, VARHDRSZ + len + 1);
|
|
|
|
|
|
|
|
|
@ -607,31 +569,39 @@ make_text_key(const char *str, int len, char flag)
|
|
|
|
|
|
|
|
|
|
memcpy(VARDATA(item) + 1, str, len);
|
|
|
|
|
|
|
|
|
|
return item;
|
|
|
|
|
return PointerGetDatum(item);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Create a textual representation of a jsonbValue for GIN storage.
|
|
|
|
|
* Create a textual representation of a JsonbValue that will serve as a GIN
|
|
|
|
|
* key in a jsonb_ops index. is_key is true if the JsonbValue is a key,
|
|
|
|
|
* or if it is a string array element (since we pretend those are keys,
|
|
|
|
|
* see jsonb.h).
|
|
|
|
|
*/
|
|
|
|
|
static text *
|
|
|
|
|
make_scalar_key(const JsonbValue *scalarVal, char flag)
|
|
|
|
|
static Datum
|
|
|
|
|
make_scalar_key(const JsonbValue *scalarVal, bool is_key)
|
|
|
|
|
{
|
|
|
|
|
text *item;
|
|
|
|
|
Datum item;
|
|
|
|
|
char *cstr;
|
|
|
|
|
|
|
|
|
|
switch (scalarVal->type)
|
|
|
|
|
{
|
|
|
|
|
case jbvNull:
|
|
|
|
|
item = make_text_key("n", 1, flag);
|
|
|
|
|
Assert(!is_key);
|
|
|
|
|
item = make_text_key(JGINFLAG_NULL, "", 0);
|
|
|
|
|
break;
|
|
|
|
|
case jbvBool:
|
|
|
|
|
item = make_text_key(scalarVal->val.boolean ? "t" : "f", 1, flag);
|
|
|
|
|
Assert(!is_key);
|
|
|
|
|
item = make_text_key(JGINFLAG_BOOL,
|
|
|
|
|
scalarVal->val.boolean ? "t" : "f", 1);
|
|
|
|
|
break;
|
|
|
|
|
case jbvNumeric:
|
|
|
|
|
Assert(!is_key);
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* A normalized textual representation, free of trailing zeroes is
|
|
|
|
|
* is required.
|
|
|
|
|
* A normalized textual representation, free of trailing zeroes,
|
|
|
|
|
* is required so that numerically equal values will produce equal
|
|
|
|
|
* strings.
|
|
|
|
|
*
|
|
|
|
|
* It isn't ideal that numerics are stored in a relatively bulky
|
|
|
|
|
* textual format. However, it's a notationally convenient way of
|
|
|
|
@ -639,15 +609,18 @@ make_scalar_key(const JsonbValue *scalarVal, char flag)
|
|
|
|
|
* strings takes precedence.
|
|
|
|
|
*/
|
|
|
|
|
cstr = numeric_normalize(scalarVal->val.numeric);
|
|
|
|
|
item = make_text_key(cstr, strlen(cstr), flag);
|
|
|
|
|
item = make_text_key(JGINFLAG_NUM, cstr, strlen(cstr));
|
|
|
|
|
pfree(cstr);
|
|
|
|
|
break;
|
|
|
|
|
case jbvString:
|
|
|
|
|
item = make_text_key(scalarVal->val.string.val, scalarVal->val.string.len,
|
|
|
|
|
flag);
|
|
|
|
|
item = make_text_key(is_key ? JGINFLAG_KEY : JGINFLAG_STR,
|
|
|
|
|
scalarVal->val.string.val,
|
|
|
|
|
scalarVal->val.string.len);
|
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
elog(ERROR, "invalid jsonb scalar type");
|
|
|
|
|
elog(ERROR, "unrecognized jsonb scalar type: %d", scalarVal->type);
|
|
|
|
|
item = 0; /* keep compiler quiet */
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return item;
|
|
|
|
|