mirror of
https://github.com/postgres/postgres.git
synced 2025-08-22 21:53:06 +03:00
Add json(b)_to_tsvector function
Jsonb has a complex nature so there isn't best-for-everything way to convert it to tsvector for full text search. Current to_tsvector(json(b)) suggests to convert only string values, but it's possible to index keys, numerics and even booleans value. To solve that json(b)_to_tsvector has a second required argument contained a list of desired types of json fields. Second argument is a jsonb scalar or array right now with possibility to add new options in a future. Bump catalog version Author: Dmitry Dolgov with some editorization by me Reviewed by: Teodor Sigaev Discussion: https://www.postgresql.org/message-id/CA+q6zcXJQbS1b4kJ_HeAOoOc=unfnOrUEL=KGgE32QKDww7d8g@mail.gmail.com
This commit is contained in:
@@ -267,12 +267,12 @@ to_tsvector(PG_FUNCTION_ARGS)
|
||||
PointerGetDatum(in)));
|
||||
}
|
||||
|
||||
Datum
|
||||
jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
|
||||
/*
|
||||
* Worker function for jsonb(_string)_to_tsvector(_byid)
|
||||
*/
|
||||
static TSVector
|
||||
jsonb_to_tsvector_worker(Oid cfgId, Jsonb *jb, uint32 flags)
|
||||
{
|
||||
Oid cfgId = PG_GETARG_OID(0);
|
||||
Jsonb *jb = PG_GETARG_JSONB_P(1);
|
||||
TSVector result;
|
||||
TSVectorBuildState state;
|
||||
ParsedText prs;
|
||||
|
||||
@@ -281,11 +281,50 @@ jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
|
||||
state.prs = &prs;
|
||||
state.cfgId = cfgId;
|
||||
|
||||
iterate_jsonb_string_values(jb, &state, add_to_tsvector);
|
||||
iterate_jsonb_values(jb, flags, &state, add_to_tsvector);
|
||||
|
||||
return make_tsvector(&prs);
|
||||
}
|
||||
|
||||
Datum
|
||||
jsonb_string_to_tsvector_byid(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid cfgId = PG_GETARG_OID(0);
|
||||
Jsonb *jb = PG_GETARG_JSONB_P(1);
|
||||
TSVector result;
|
||||
|
||||
result = jsonb_to_tsvector_worker(cfgId, jb, jtiString);
|
||||
PG_FREE_IF_COPY(jb, 1);
|
||||
|
||||
result = make_tsvector(&prs);
|
||||
PG_RETURN_TSVECTOR(result);
|
||||
}
|
||||
|
||||
Datum
|
||||
jsonb_string_to_tsvector(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Jsonb *jb = PG_GETARG_JSONB_P(0);
|
||||
Oid cfgId;
|
||||
TSVector result;
|
||||
|
||||
cfgId = getTSCurrentConfig(true);
|
||||
result = jsonb_to_tsvector_worker(cfgId, jb, jtiString);
|
||||
PG_FREE_IF_COPY(jb, 0);
|
||||
|
||||
PG_RETURN_TSVECTOR(result);
|
||||
}
|
||||
|
||||
Datum
|
||||
jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid cfgId = PG_GETARG_OID(0);
|
||||
Jsonb *jb = PG_GETARG_JSONB_P(1);
|
||||
Jsonb *jbFlags = PG_GETARG_JSONB_P(2);
|
||||
TSVector result;
|
||||
uint32 flags = parse_jsonb_index_flags(jbFlags);
|
||||
|
||||
result = jsonb_to_tsvector_worker(cfgId, jb, flags);
|
||||
PG_FREE_IF_COPY(jb, 1);
|
||||
PG_FREE_IF_COPY(jbFlags, 2);
|
||||
|
||||
PG_RETURN_TSVECTOR(result);
|
||||
}
|
||||
@@ -294,20 +333,25 @@ Datum
|
||||
jsonb_to_tsvector(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Jsonb *jb = PG_GETARG_JSONB_P(0);
|
||||
Jsonb *jbFlags = PG_GETARG_JSONB_P(1);
|
||||
Oid cfgId;
|
||||
TSVector result;
|
||||
uint32 flags = parse_jsonb_index_flags(jbFlags);
|
||||
|
||||
cfgId = getTSCurrentConfig(true);
|
||||
PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid,
|
||||
ObjectIdGetDatum(cfgId),
|
||||
JsonbPGetDatum(jb)));
|
||||
result = jsonb_to_tsvector_worker(cfgId, jb, flags);
|
||||
PG_FREE_IF_COPY(jb, 0);
|
||||
PG_FREE_IF_COPY(jbFlags, 1);
|
||||
|
||||
PG_RETURN_TSVECTOR(result);
|
||||
}
|
||||
|
||||
Datum
|
||||
json_to_tsvector_byid(PG_FUNCTION_ARGS)
|
||||
/*
|
||||
* Worker function for json(_string)_to_tsvector(_byid)
|
||||
*/
|
||||
static TSVector
|
||||
json_to_tsvector_worker(Oid cfgId, text *json, uint32 flags)
|
||||
{
|
||||
Oid cfgId = PG_GETARG_OID(0);
|
||||
text *json = PG_GETARG_TEXT_P(1);
|
||||
TSVector result;
|
||||
TSVectorBuildState state;
|
||||
ParsedText prs;
|
||||
|
||||
@@ -316,11 +360,50 @@ json_to_tsvector_byid(PG_FUNCTION_ARGS)
|
||||
state.prs = &prs;
|
||||
state.cfgId = cfgId;
|
||||
|
||||
iterate_json_string_values(json, &state, add_to_tsvector);
|
||||
iterate_json_values(json, flags, &state, add_to_tsvector);
|
||||
|
||||
return make_tsvector(&prs);
|
||||
}
|
||||
|
||||
Datum
|
||||
json_string_to_tsvector_byid(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid cfgId = PG_GETARG_OID(0);
|
||||
text *json = PG_GETARG_TEXT_P(1);
|
||||
TSVector result;
|
||||
|
||||
result = json_to_tsvector_worker(cfgId, json, jtiString);
|
||||
PG_FREE_IF_COPY(json, 1);
|
||||
|
||||
result = make_tsvector(&prs);
|
||||
PG_RETURN_TSVECTOR(result);
|
||||
}
|
||||
|
||||
Datum
|
||||
json_string_to_tsvector(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *json = PG_GETARG_TEXT_P(0);
|
||||
Oid cfgId;
|
||||
TSVector result;
|
||||
|
||||
cfgId = getTSCurrentConfig(true);
|
||||
result = json_to_tsvector_worker(cfgId, json, jtiString);
|
||||
PG_FREE_IF_COPY(json, 0);
|
||||
|
||||
PG_RETURN_TSVECTOR(result);
|
||||
}
|
||||
|
||||
Datum
|
||||
json_to_tsvector_byid(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid cfgId = PG_GETARG_OID(0);
|
||||
text *json = PG_GETARG_TEXT_P(1);
|
||||
Jsonb *jbFlags = PG_GETARG_JSONB_P(2);
|
||||
TSVector result;
|
||||
uint32 flags = parse_jsonb_index_flags(jbFlags);
|
||||
|
||||
result = json_to_tsvector_worker(cfgId, json, flags);
|
||||
PG_FREE_IF_COPY(json, 1);
|
||||
PG_FREE_IF_COPY(jbFlags, 2);
|
||||
|
||||
PG_RETURN_TSVECTOR(result);
|
||||
}
|
||||
@@ -329,12 +412,17 @@ Datum
|
||||
json_to_tsvector(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *json = PG_GETARG_TEXT_P(0);
|
||||
Jsonb *jbFlags = PG_GETARG_JSONB_P(1);
|
||||
Oid cfgId;
|
||||
TSVector result;
|
||||
uint32 flags = parse_jsonb_index_flags(jbFlags);
|
||||
|
||||
cfgId = getTSCurrentConfig(true);
|
||||
PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid,
|
||||
ObjectIdGetDatum(cfgId),
|
||||
PointerGetDatum(json)));
|
||||
result = json_to_tsvector_worker(cfgId, json, flags);
|
||||
PG_FREE_IF_COPY(json, 0);
|
||||
PG_FREE_IF_COPY(jbFlags, 1);
|
||||
|
||||
PG_RETURN_TSVECTOR(result);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -353,7 +441,7 @@ add_to_tsvector(void *_state, char *elem_value, int elem_len)
|
||||
* First time through: initialize words array to a reasonable size.
|
||||
* (parsetext() will realloc it bigger as needed.)
|
||||
*/
|
||||
prs->lenwords = Max(elem_len / 6, 64);
|
||||
prs->lenwords = 16;
|
||||
prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords);
|
||||
prs->curwords = 0;
|
||||
prs->pos = 0;
|
||||
|
Reference in New Issue
Block a user