1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-05 07:21:24 +03:00

Full Text Search support for json and jsonb

The new functions are ts_headline() and to_tsvector.

Dmitry Dolgov, edited and documented by me.
This commit is contained in:
Andrew Dunstan
2017-03-31 14:26:03 -04:00
parent c80b9920fc
commit e306df7f9c
9 changed files with 613 additions and 0 deletions

View File

@ -16,6 +16,7 @@
#include "tsearch/ts_cache.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
#include "utils/jsonapi.h"
typedef struct MorphOpaque
@ -24,6 +25,14 @@ typedef struct MorphOpaque
int qoperator; /* query operator */
} MorphOpaque;
typedef struct TSVectorBuildState
{
ParsedText *prs;
TSVector result;
Oid cfgId;
} TSVectorBuildState;
static void add_to_tsvector(void *state, char *elem_value, int elem_len);
Datum
get_current_ts_config(PG_FUNCTION_ARGS)
@ -256,6 +265,135 @@ to_tsvector(PG_FUNCTION_ARGS)
PointerGetDatum(in)));
}
Datum
jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
{
Oid cfgId = PG_GETARG_OID(0);
Jsonb *jb = PG_GETARG_JSONB(1);
TSVectorBuildState state;
ParsedText *prs = (ParsedText *) palloc(sizeof(ParsedText));
prs->words = NULL;
state.result = NULL;
state.cfgId = cfgId;
state.prs = prs;
iterate_jsonb_string_values(jb, &state, (JsonIterateStringValuesAction) add_to_tsvector);
PG_FREE_IF_COPY(jb, 1);
if (state.result == NULL)
{
/* There weren't any string elements in jsonb,
* so wee need to return an empty vector */
if (prs->words != NULL)
pfree(prs->words);
state.result = palloc(CALCDATASIZE(0, 0));
SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
state.result->size = 0;
}
PG_RETURN_TSVECTOR(state.result);
}
Datum
jsonb_to_tsvector(PG_FUNCTION_ARGS)
{
Jsonb *jb = PG_GETARG_JSONB(0);
Oid cfgId;
cfgId = getTSCurrentConfig(true);
PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid,
ObjectIdGetDatum(cfgId),
JsonbGetDatum(jb)));
}
Datum
json_to_tsvector_byid(PG_FUNCTION_ARGS)
{
Oid cfgId = PG_GETARG_OID(0);
text *json = PG_GETARG_TEXT_P(1);
TSVectorBuildState state;
ParsedText *prs = (ParsedText *) palloc(sizeof(ParsedText));
prs->words = NULL;
state.result = NULL;
state.cfgId = cfgId;
state.prs = prs;
iterate_json_string_values(json, &state, (JsonIterateStringValuesAction) add_to_tsvector);
PG_FREE_IF_COPY(json, 1);
if (state.result == NULL)
{
/* There weren't any string elements in json,
* so wee need to return an empty vector */
if (prs->words != NULL)
pfree(prs->words);
state.result = palloc(CALCDATASIZE(0, 0));
SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
state.result->size = 0;
}
PG_RETURN_TSVECTOR(state.result);
}
Datum
json_to_tsvector(PG_FUNCTION_ARGS)
{
text *json = PG_GETARG_TEXT_P(0);
Oid cfgId;
cfgId = getTSCurrentConfig(true);
PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid,
ObjectIdGetDatum(cfgId),
PointerGetDatum(json)));
}
/*
* Extend current TSVector from _state with a new one,
* build over a json(b) element.
*/
static void
add_to_tsvector(void *_state, char *elem_value, int elem_len)
{
TSVectorBuildState *state = (TSVectorBuildState *) _state;
ParsedText *prs = state->prs;
TSVector item_vector;
int i;
prs->lenwords = elem_len / 6;
if (prs->lenwords == 0)
prs->lenwords = 2;
prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords);
prs->curwords = 0;
prs->pos = 0;
parsetext(state->cfgId, prs, elem_value, elem_len);
if (prs->curwords)
{
if (state->result != NULL)
{
for (i = 0; i < prs->curwords; i++)
prs->words[i].pos.pos = prs->words[i].pos.pos + TS_JUMP;
item_vector = make_tsvector(prs);
state->result = (TSVector) DirectFunctionCall2(tsvector_concat,
TSVectorGetDatum(state->result),
PointerGetDatum(item_vector));
}
else
state->result = make_tsvector(prs);
}
}
/*
* to_tsquery
*/

View File

@ -20,6 +20,7 @@
#include "tsearch/ts_cache.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
#include "utils/jsonapi.h"
#include "utils/varlena.h"
@ -31,6 +32,19 @@ typedef struct
LexDescr *list;
} TSTokenTypeStorage;
/* state for ts_headline_json_* */
typedef struct HeadlineJsonState
{
HeadlineParsedText *prs;
TSConfigCacheEntry *cfg;
TSParserCacheEntry *prsobj;
TSQuery query;
List *prsoptions;
bool transformed;
} HeadlineJsonState;
static text * headline_json_value(void *_state, char *elem_value, int elem_len);
static void
tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
{
@ -363,3 +377,179 @@ ts_headline_opt(PG_FUNCTION_ARGS)
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
Datum
ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
{
Jsonb *out, *jb = PG_GETARG_JSONB(1);
TSQuery query = PG_GETARG_TSQUERY(2);
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
HeadlineParsedText prs;
HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
memset(&prs, 0, sizeof(HeadlineParsedText));
prs.lenwords = 32;
prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
state->prs = &prs;
state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
state->query = query;
if (opt)
state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
else
state->prsoptions = NIL;
if (!OidIsValid(state->prsobj->headlineOid))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("text search parser does not support headline creation")));
out = transform_jsonb_string_values(jb, state, action);
PG_FREE_IF_COPY(jb, 1);
PG_FREE_IF_COPY(query, 2);
if (opt)
PG_FREE_IF_COPY(opt, 3);
pfree(prs.words);
if (state->transformed)
{
pfree(prs.startsel);
pfree(prs.stopsel);
}
PG_RETURN_JSONB(out);
}
Datum
ts_headline_jsonb(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1)));
}
Datum
ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
Datum
ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
Datum
ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
{
text *json = PG_GETARG_TEXT_P(1);
TSQuery query = PG_GETARG_TSQUERY(2);
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
text *out;
JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
HeadlineParsedText prs;
HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
memset(&prs, 0, sizeof(HeadlineParsedText));
prs.lenwords = 32;
prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
state->prs = &prs;
state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
state->query = query;
if (opt)
state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
else
state->prsoptions = NIL;
if (!OidIsValid(state->prsobj->headlineOid))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("text search parser does not support headline creation")));
out = transform_json_string_values(json, state, action);
PG_FREE_IF_COPY(json, 1);
PG_FREE_IF_COPY(query, 2);
if (opt)
PG_FREE_IF_COPY(opt, 3);
pfree(prs.words);
if (state->transformed)
{
pfree(prs.startsel);
pfree(prs.stopsel);
}
PG_RETURN_TEXT_P(out);
}
Datum
ts_headline_json(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1)));
}
Datum
ts_headline_json_byid(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
Datum
ts_headline_json_opt(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
/*
* Return headline in text from, generated from a json(b) element
*/
static text *
headline_json_value(void *_state, char *elem_value, int elem_len)
{
HeadlineJsonState *state = (HeadlineJsonState *) _state;
HeadlineParsedText *prs = state->prs;
TSConfigCacheEntry *cfg = state->cfg;
TSParserCacheEntry *prsobj = state->prsobj;
TSQuery query = state->query;
List *prsoptions = state->prsoptions;
prs->curwords = 0;
hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
FunctionCall3(&(prsobj->prsheadline),
PointerGetDatum(prs),
PointerGetDatum(prsoptions),
PointerGetDatum(query));
state->transformed = true;
return generateHeadline(prs);
}