1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-22 21:53:06 +03:00

Add json(b)_to_tsvector function

Jsonb has a complex nature so there isn't best-for-everything way to convert it
to tsvector for full text search. Current to_tsvector(json(b)) suggests to
convert only string values, but it's possible to index keys, numerics and even
booleans value. To solve that json(b)_to_tsvector has a second required
argument contained a list of desired types of json fields. Second argument is
a jsonb scalar or array right now with possibility to add new options in a
future.

Bump catalog version

Author: Dmitry Dolgov with some editorization by me
Reviewed by: Teodor Sigaev
Discussion: https://www.postgresql.org/message-id/CA+q6zcXJQbS1b4kJ_HeAOoOc=unfnOrUEL=KGgE32QKDww7d8g@mail.gmail.com
This commit is contained in:
Teodor Sigaev
2018-04-07 20:58:03 +03:00
parent 529ab7bd1f
commit 1c1791e000
10 changed files with 630 additions and 50 deletions

View File

@@ -267,12 +267,12 @@ to_tsvector(PG_FUNCTION_ARGS)
PointerGetDatum(in)));
}
Datum
jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
/*
* Worker function for jsonb(_string)_to_tsvector(_byid)
*/
static TSVector
jsonb_to_tsvector_worker(Oid cfgId, Jsonb *jb, uint32 flags)
{
Oid cfgId = PG_GETARG_OID(0);
Jsonb *jb = PG_GETARG_JSONB_P(1);
TSVector result;
TSVectorBuildState state;
ParsedText prs;
@@ -281,11 +281,50 @@ jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
state.prs = &prs;
state.cfgId = cfgId;
iterate_jsonb_string_values(jb, &state, add_to_tsvector);
iterate_jsonb_values(jb, flags, &state, add_to_tsvector);
return make_tsvector(&prs);
}
Datum
jsonb_string_to_tsvector_byid(PG_FUNCTION_ARGS)
{
Oid cfgId = PG_GETARG_OID(0);
Jsonb *jb = PG_GETARG_JSONB_P(1);
TSVector result;
result = jsonb_to_tsvector_worker(cfgId, jb, jtiString);
PG_FREE_IF_COPY(jb, 1);
result = make_tsvector(&prs);
PG_RETURN_TSVECTOR(result);
}
Datum
jsonb_string_to_tsvector(PG_FUNCTION_ARGS)
{
Jsonb *jb = PG_GETARG_JSONB_P(0);
Oid cfgId;
TSVector result;
cfgId = getTSCurrentConfig(true);
result = jsonb_to_tsvector_worker(cfgId, jb, jtiString);
PG_FREE_IF_COPY(jb, 0);
PG_RETURN_TSVECTOR(result);
}
Datum
jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
{
Oid cfgId = PG_GETARG_OID(0);
Jsonb *jb = PG_GETARG_JSONB_P(1);
Jsonb *jbFlags = PG_GETARG_JSONB_P(2);
TSVector result;
uint32 flags = parse_jsonb_index_flags(jbFlags);
result = jsonb_to_tsvector_worker(cfgId, jb, flags);
PG_FREE_IF_COPY(jb, 1);
PG_FREE_IF_COPY(jbFlags, 2);
PG_RETURN_TSVECTOR(result);
}
@@ -294,20 +333,25 @@ Datum
jsonb_to_tsvector(PG_FUNCTION_ARGS)
{
Jsonb *jb = PG_GETARG_JSONB_P(0);
Jsonb *jbFlags = PG_GETARG_JSONB_P(1);
Oid cfgId;
TSVector result;
uint32 flags = parse_jsonb_index_flags(jbFlags);
cfgId = getTSCurrentConfig(true);
PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid,
ObjectIdGetDatum(cfgId),
JsonbPGetDatum(jb)));
result = jsonb_to_tsvector_worker(cfgId, jb, flags);
PG_FREE_IF_COPY(jb, 0);
PG_FREE_IF_COPY(jbFlags, 1);
PG_RETURN_TSVECTOR(result);
}
Datum
json_to_tsvector_byid(PG_FUNCTION_ARGS)
/*
* Worker function for json(_string)_to_tsvector(_byid)
*/
static TSVector
json_to_tsvector_worker(Oid cfgId, text *json, uint32 flags)
{
Oid cfgId = PG_GETARG_OID(0);
text *json = PG_GETARG_TEXT_P(1);
TSVector result;
TSVectorBuildState state;
ParsedText prs;
@@ -316,11 +360,50 @@ json_to_tsvector_byid(PG_FUNCTION_ARGS)
state.prs = &prs;
state.cfgId = cfgId;
iterate_json_string_values(json, &state, add_to_tsvector);
iterate_json_values(json, flags, &state, add_to_tsvector);
return make_tsvector(&prs);
}
Datum
json_string_to_tsvector_byid(PG_FUNCTION_ARGS)
{
Oid cfgId = PG_GETARG_OID(0);
text *json = PG_GETARG_TEXT_P(1);
TSVector result;
result = json_to_tsvector_worker(cfgId, json, jtiString);
PG_FREE_IF_COPY(json, 1);
result = make_tsvector(&prs);
PG_RETURN_TSVECTOR(result);
}
Datum
json_string_to_tsvector(PG_FUNCTION_ARGS)
{
text *json = PG_GETARG_TEXT_P(0);
Oid cfgId;
TSVector result;
cfgId = getTSCurrentConfig(true);
result = json_to_tsvector_worker(cfgId, json, jtiString);
PG_FREE_IF_COPY(json, 0);
PG_RETURN_TSVECTOR(result);
}
Datum
json_to_tsvector_byid(PG_FUNCTION_ARGS)
{
Oid cfgId = PG_GETARG_OID(0);
text *json = PG_GETARG_TEXT_P(1);
Jsonb *jbFlags = PG_GETARG_JSONB_P(2);
TSVector result;
uint32 flags = parse_jsonb_index_flags(jbFlags);
result = json_to_tsvector_worker(cfgId, json, flags);
PG_FREE_IF_COPY(json, 1);
PG_FREE_IF_COPY(jbFlags, 2);
PG_RETURN_TSVECTOR(result);
}
@@ -329,12 +412,17 @@ Datum
json_to_tsvector(PG_FUNCTION_ARGS)
{
text *json = PG_GETARG_TEXT_P(0);
Jsonb *jbFlags = PG_GETARG_JSONB_P(1);
Oid cfgId;
TSVector result;
uint32 flags = parse_jsonb_index_flags(jbFlags);
cfgId = getTSCurrentConfig(true);
PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid,
ObjectIdGetDatum(cfgId),
PointerGetDatum(json)));
result = json_to_tsvector_worker(cfgId, json, flags);
PG_FREE_IF_COPY(json, 0);
PG_FREE_IF_COPY(jbFlags, 1);
PG_RETURN_TSVECTOR(result);
}
/*
@@ -353,7 +441,7 @@ add_to_tsvector(void *_state, char *elem_value, int elem_len)
* First time through: initialize words array to a reasonable size.
* (parsetext() will realloc it bigger as needed.)
*/
prs->lenwords = Max(elem_len / 6, 64);
prs->lenwords = 16;
prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords);
prs->curwords = 0;
prs->pos = 0;