1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-11 10:01:57 +03:00

Full Text Search support for json and jsonb

The new functions are ts_headline() and to_tsvector.

Dmitry Dolgov, edited and documented by me.
This commit is contained in:
Andrew Dunstan
2017-03-31 14:26:03 -04:00
parent c80b9920fc
commit e306df7f9c
9 changed files with 613 additions and 0 deletions

View File

@ -20,6 +20,7 @@
#include "tsearch/ts_cache.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
#include "utils/jsonapi.h"
#include "utils/varlena.h"
@ -31,6 +32,19 @@ typedef struct
LexDescr *list;
} TSTokenTypeStorage;
/* state for ts_headline_json_* */
typedef struct HeadlineJsonState
{
HeadlineParsedText *prs;
TSConfigCacheEntry *cfg;
TSParserCacheEntry *prsobj;
TSQuery query;
List *prsoptions;
bool transformed;
} HeadlineJsonState;
static text * headline_json_value(void *_state, char *elem_value, int elem_len);
static void
tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
{
@ -363,3 +377,179 @@ ts_headline_opt(PG_FUNCTION_ARGS)
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
Datum
ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
{
Jsonb *out, *jb = PG_GETARG_JSONB(1);
TSQuery query = PG_GETARG_TSQUERY(2);
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
HeadlineParsedText prs;
HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
memset(&prs, 0, sizeof(HeadlineParsedText));
prs.lenwords = 32;
prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
state->prs = &prs;
state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
state->query = query;
if (opt)
state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
else
state->prsoptions = NIL;
if (!OidIsValid(state->prsobj->headlineOid))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("text search parser does not support headline creation")));
out = transform_jsonb_string_values(jb, state, action);
PG_FREE_IF_COPY(jb, 1);
PG_FREE_IF_COPY(query, 2);
if (opt)
PG_FREE_IF_COPY(opt, 3);
pfree(prs.words);
if (state->transformed)
{
pfree(prs.startsel);
pfree(prs.stopsel);
}
PG_RETURN_JSONB(out);
}
Datum
ts_headline_jsonb(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1)));
}
Datum
ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
Datum
ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
Datum
ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
{
text *json = PG_GETARG_TEXT_P(1);
TSQuery query = PG_GETARG_TSQUERY(2);
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
text *out;
JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
HeadlineParsedText prs;
HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
memset(&prs, 0, sizeof(HeadlineParsedText));
prs.lenwords = 32;
prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
state->prs = &prs;
state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
state->query = query;
if (opt)
state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
else
state->prsoptions = NIL;
if (!OidIsValid(state->prsobj->headlineOid))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("text search parser does not support headline creation")));
out = transform_json_string_values(json, state, action);
PG_FREE_IF_COPY(json, 1);
PG_FREE_IF_COPY(query, 2);
if (opt)
PG_FREE_IF_COPY(opt, 3);
pfree(prs.words);
if (state->transformed)
{
pfree(prs.startsel);
pfree(prs.stopsel);
}
PG_RETURN_TEXT_P(out);
}
Datum
ts_headline_json(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1)));
}
Datum
ts_headline_json_byid(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
Datum
ts_headline_json_opt(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
ObjectIdGetDatum(getTSCurrentConfig(true)),
PG_GETARG_DATUM(0),
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(2)));
}
/*
* Return headline in text from, generated from a json(b) element
*/
static text *
headline_json_value(void *_state, char *elem_value, int elem_len)
{
HeadlineJsonState *state = (HeadlineJsonState *) _state;
HeadlineParsedText *prs = state->prs;
TSConfigCacheEntry *cfg = state->cfg;
TSParserCacheEntry *prsobj = state->prsobj;
TSQuery query = state->query;
List *prsoptions = state->prsoptions;
prs->curwords = 0;
hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
FunctionCall3(&(prsobj->prsheadline),
PointerGetDatum(prs),
PointerGetDatum(prsoptions),
PointerGetDatum(query));
state->transformed = true;
return generateHeadline(prs);
}