1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-02 09:02:37 +03:00

Add json(b)_to_tsvector function

Jsonb has a complex nature so there isn't best-for-everything way to convert it
to tsvector for full text search. Current to_tsvector(json(b)) suggests to
convert only string values, but it's possible to index keys, numerics and even
booleans value. To solve that json(b)_to_tsvector has a second required
argument contained a list of desired types of json fields. Second argument is
a jsonb scalar or array right now with possibility to add new options in a
future.

Bump catalog version

Author: Dmitry Dolgov with some editorization by me
Reviewed by: Teodor Sigaev
Discussion: https://www.postgresql.org/message-id/CA+q6zcXJQbS1b4kJ_HeAOoOc=unfnOrUEL=KGgE32QKDww7d8g@mail.gmail.com
This commit is contained in:
Teodor Sigaev
2018-04-07 20:58:03 +03:00
parent 529ab7bd1f
commit 1c1791e000
10 changed files with 630 additions and 50 deletions

View File

@ -60,6 +60,7 @@ typedef struct IterateJsonStringValuesState
JsonIterateStringValuesAction action; /* an action that will be applied
* to each json value */
void *action_state; /* any necessary context for iteration */
uint32 flags; /* what kind of elements from a json we want to iterate */
} IterateJsonStringValuesState;
/* state for transform_json_string_values function */
@ -474,8 +475,9 @@ static void setPathArray(JsonbIterator **it, Datum *path_elems,
int level, Jsonb *newval, uint32 nelems, int op_type);
static void addJsonbToParseState(JsonbParseState **jbps, Jsonb *jb);
/* function supporting iterate_json_string_values */
static void iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype);
/* function supporting iterate_json_values */
static void iterate_values_scalar(void *state, char *token, JsonTokenType tokentype);
static void iterate_values_object_field_start(void *state, char *fname, bool isnull);
/* functions supporting transform_json_string_values */
static void transform_string_values_object_start(void *state);
@ -4939,11 +4941,79 @@ setPathArray(JsonbIterator **it, Datum *path_elems, bool *path_nulls,
}
/*
* Iterate over jsonb string values or elements, and pass them together with an
* iteration state to a specified JsonIterateStringValuesAction.
* Parse information about what elements of a jsonb document we want to iterate
* in functions iterate_json(b)_values. This information is presented in jsonb
* format, so that it can be easily extended in the future.
*/
uint32
parse_jsonb_index_flags(Jsonb *jb)
{
JsonbIterator *it;
JsonbValue v;
JsonbIteratorToken type;
uint32 flags = 0;
it = JsonbIteratorInit(&jb->root);
type = JsonbIteratorNext(&it, &v, false);
/*
* We iterate over array (scalar internally is represented as array, so, we
* will accept it too) to check all its elements. Flag's names are choosen
* the same as jsonb_typeof uses.
*/
if (type != WJB_BEGIN_ARRAY)
ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("wrong flag type, only arrays and scalars are allowed")));
while ((type = JsonbIteratorNext(&it, &v, false)) == WJB_ELEM)
{
if (v.type != jbvString)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("flag array element is not a string"),
errhint("Possible values are: \"string\", \"numeric\", \"boolean\", \"key\" and \"all\"")));
if (v.val.string.len == 3 &&
pg_strncasecmp(v.val.string.val, "all", 3) == 0)
flags |= jtiAll;
else if (v.val.string.len == 3 &&
pg_strncasecmp(v.val.string.val, "key", 3) == 0)
flags |= jtiKey;
else if (v.val.string.len == 6 &&
pg_strncasecmp(v.val.string.val, "string", 5) == 0)
flags |= jtiString;
else if (v.val.string.len == 7 &&
pg_strncasecmp(v.val.string.val, "numeric", 7) == 0)
flags |= jtiNumeric;
else if (v.val.string.len == 7 &&
pg_strncasecmp(v.val.string.val, "boolean", 7) == 0)
flags |= jtiBool;
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("wrong flag in flag array: \"%s\"",
pnstrdup(v.val.string.val, v.val.string.len)),
errhint("Possible values are: \"string\", \"numeric\", \"boolean\", \"key\" and \"all\"")));
}
/* user should not get it */
if (type != WJB_END_ARRAY)
elog(ERROR, "unexpected end of flag array");
/* get final WJB_DONE and free iterator */
JsonbIteratorNext(&it, &v, false);
return flags;
}
/*
* Iterate over jsonb values or elements, specified by flags, and pass them
* together with an iteration state to a specified JsonIterateStringValuesAction.
*/
void
iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesAction action)
iterate_jsonb_values(Jsonb *jb, uint32 flags, void *state,
JsonIterateStringValuesAction action)
{
JsonbIterator *it;
JsonbValue v;
@ -4951,21 +5021,67 @@ iterate_jsonb_string_values(Jsonb *jb, void *state, JsonIterateStringValuesActio
it = JsonbIteratorInit(&jb->root);
/*
* Just recursively iterating over jsonb and call callback on all
* correspoding elements
*/
while ((type = JsonbIteratorNext(&it, &v, false)) != WJB_DONE)
{
if ((type == WJB_VALUE || type == WJB_ELEM) && v.type == jbvString)
if (type == WJB_KEY)
{
action(state, v.val.string.val, v.val.string.len);
if (flags & jtiKey)
action(state, v.val.string.val, v.val.string.len);
continue;
}
else if (!(type == WJB_VALUE || type == WJB_ELEM))
{
/* do not call callback for composite JsonbValue */
continue;
}
/* JsonbValue is a value of object or element of array */
switch(v.type)
{
case jbvString:
if (flags & jtiString)
action(state, v.val.string.val, v.val.string.len);
break;
case jbvNumeric:
if (flags & jtiNumeric)
{
char *val;
val = DatumGetCString(DirectFunctionCall1(numeric_out,
NumericGetDatum(v.val.numeric)));
action(state, val, strlen(val));
pfree(val);
}
break;
case jbvBool:
if (flags & jtiBool)
{
if (v.val.boolean)
action(state, "true", 4);
else
action(state, "false", 5);
}
break;
default:
/* do not call callback for composite JsonbValue */
break;
}
}
}
/*
* Iterate over json string values or elements, and pass them together with an
* iteration state to a specified JsonIterateStringValuesAction.
* Iterate over json values and elements, specified by flags, and pass them
* together with an iteration state to a specified JsonIterateStringValuesAction.
*/
void
iterate_json_string_values(text *json, void *action_state, JsonIterateStringValuesAction action)
iterate_json_values(text *json, uint32 flags, void *action_state,
JsonIterateStringValuesAction action)
{
JsonLexContext *lex = makeJsonLexContext(json, true);
JsonSemAction *sem = palloc0(sizeof(JsonSemAction));
@ -4974,24 +5090,55 @@ iterate_json_string_values(text *json, void *action_state, JsonIterateStringValu
state->lex = lex;
state->action = action;
state->action_state = action_state;
state->flags = flags;
sem->semstate = (void *) state;
sem->scalar = iterate_string_values_scalar;
sem->scalar = iterate_values_scalar;
sem->object_field_start = iterate_values_object_field_start;
pg_parse_json(lex, sem);
}
/*
* An auxiliary function for iterate_json_string_values to invoke a specified
* JsonIterateStringValuesAction.
* An auxiliary function for iterate_json_values to invoke a specified
* JsonIterateStringValuesAction for specified values.
*/
static void
iterate_string_values_scalar(void *state, char *token, JsonTokenType tokentype)
iterate_values_scalar(void *state, char *token, JsonTokenType tokentype)
{
IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state;
if (tokentype == JSON_TOKEN_STRING)
_state->action(_state->action_state, token, strlen(token));
switch(tokentype)
{
case JSON_TOKEN_STRING:
if (_state->flags & jtiString)
_state->action(_state->action_state, token, strlen(token));
break;
case JSON_TOKEN_NUMBER:
if (_state->flags & jtiNumeric)
_state->action(_state->action_state, token, strlen(token));
break;
case JSON_TOKEN_TRUE:
case JSON_TOKEN_FALSE:
if (_state->flags & jtiBool)
_state->action(_state->action_state, token, strlen(token));
break;
default:
/* do not call callback for any other token */
break;
}
}
static void
iterate_values_object_field_start(void *state, char *fname, bool isnull)
{
IterateJsonStringValuesState *_state = (IterateJsonStringValuesState *) state;
if (_state->flags & jtiKey)
{
char *val = pstrdup(fname);
_state->action(_state->action_state, val, strlen(val));
}
}
/*