mirror of
https://github.com/postgres/postgres.git
synced 2025-07-05 07:21:24 +03:00
Full Text Search support for json and jsonb
The new functions are ts_headline() and to_tsvector. Dmitry Dolgov, edited and documented by me.
This commit is contained in:
@ -16,6 +16,7 @@
|
||||
#include "tsearch/ts_cache.h"
|
||||
#include "tsearch/ts_utils.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/jsonapi.h"
|
||||
|
||||
|
||||
typedef struct MorphOpaque
|
||||
@ -24,6 +25,14 @@ typedef struct MorphOpaque
|
||||
int qoperator; /* query operator */
|
||||
} MorphOpaque;
|
||||
|
||||
typedef struct TSVectorBuildState
|
||||
{
|
||||
ParsedText *prs;
|
||||
TSVector result;
|
||||
Oid cfgId;
|
||||
} TSVectorBuildState;
|
||||
|
||||
static void add_to_tsvector(void *state, char *elem_value, int elem_len);
|
||||
|
||||
Datum
|
||||
get_current_ts_config(PG_FUNCTION_ARGS)
|
||||
@ -256,6 +265,135 @@ to_tsvector(PG_FUNCTION_ARGS)
|
||||
PointerGetDatum(in)));
|
||||
}
|
||||
|
||||
Datum
|
||||
jsonb_to_tsvector_byid(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid cfgId = PG_GETARG_OID(0);
|
||||
Jsonb *jb = PG_GETARG_JSONB(1);
|
||||
TSVectorBuildState state;
|
||||
ParsedText *prs = (ParsedText *) palloc(sizeof(ParsedText));
|
||||
|
||||
prs->words = NULL;
|
||||
state.result = NULL;
|
||||
state.cfgId = cfgId;
|
||||
state.prs = prs;
|
||||
|
||||
iterate_jsonb_string_values(jb, &state, (JsonIterateStringValuesAction) add_to_tsvector);
|
||||
|
||||
PG_FREE_IF_COPY(jb, 1);
|
||||
|
||||
if (state.result == NULL)
|
||||
{
|
||||
/* There weren't any string elements in jsonb,
|
||||
* so wee need to return an empty vector */
|
||||
|
||||
if (prs->words != NULL)
|
||||
pfree(prs->words);
|
||||
|
||||
state.result = palloc(CALCDATASIZE(0, 0));
|
||||
SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
|
||||
state.result->size = 0;
|
||||
}
|
||||
|
||||
PG_RETURN_TSVECTOR(state.result);
|
||||
}
|
||||
|
||||
Datum
|
||||
jsonb_to_tsvector(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Jsonb *jb = PG_GETARG_JSONB(0);
|
||||
Oid cfgId;
|
||||
|
||||
cfgId = getTSCurrentConfig(true);
|
||||
PG_RETURN_DATUM(DirectFunctionCall2(jsonb_to_tsvector_byid,
|
||||
ObjectIdGetDatum(cfgId),
|
||||
JsonbGetDatum(jb)));
|
||||
}
|
||||
|
||||
Datum
|
||||
json_to_tsvector_byid(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid cfgId = PG_GETARG_OID(0);
|
||||
text *json = PG_GETARG_TEXT_P(1);
|
||||
TSVectorBuildState state;
|
||||
ParsedText *prs = (ParsedText *) palloc(sizeof(ParsedText));
|
||||
|
||||
prs->words = NULL;
|
||||
state.result = NULL;
|
||||
state.cfgId = cfgId;
|
||||
state.prs = prs;
|
||||
|
||||
iterate_json_string_values(json, &state, (JsonIterateStringValuesAction) add_to_tsvector);
|
||||
|
||||
PG_FREE_IF_COPY(json, 1);
|
||||
if (state.result == NULL)
|
||||
{
|
||||
/* There weren't any string elements in json,
|
||||
* so wee need to return an empty vector */
|
||||
|
||||
if (prs->words != NULL)
|
||||
pfree(prs->words);
|
||||
|
||||
state.result = palloc(CALCDATASIZE(0, 0));
|
||||
SET_VARSIZE(state.result, CALCDATASIZE(0, 0));
|
||||
state.result->size = 0;
|
||||
}
|
||||
|
||||
PG_RETURN_TSVECTOR(state.result);
|
||||
}
|
||||
|
||||
Datum
|
||||
json_to_tsvector(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *json = PG_GETARG_TEXT_P(0);
|
||||
Oid cfgId;
|
||||
|
||||
cfgId = getTSCurrentConfig(true);
|
||||
PG_RETURN_DATUM(DirectFunctionCall2(json_to_tsvector_byid,
|
||||
ObjectIdGetDatum(cfgId),
|
||||
PointerGetDatum(json)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Extend current TSVector from _state with a new one,
|
||||
* build over a json(b) element.
|
||||
*/
|
||||
static void
|
||||
add_to_tsvector(void *_state, char *elem_value, int elem_len)
|
||||
{
|
||||
TSVectorBuildState *state = (TSVectorBuildState *) _state;
|
||||
ParsedText *prs = state->prs;
|
||||
TSVector item_vector;
|
||||
int i;
|
||||
|
||||
prs->lenwords = elem_len / 6;
|
||||
if (prs->lenwords == 0)
|
||||
prs->lenwords = 2;
|
||||
|
||||
prs->words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs->lenwords);
|
||||
prs->curwords = 0;
|
||||
prs->pos = 0;
|
||||
|
||||
parsetext(state->cfgId, prs, elem_value, elem_len);
|
||||
|
||||
if (prs->curwords)
|
||||
{
|
||||
if (state->result != NULL)
|
||||
{
|
||||
for (i = 0; i < prs->curwords; i++)
|
||||
prs->words[i].pos.pos = prs->words[i].pos.pos + TS_JUMP;
|
||||
|
||||
item_vector = make_tsvector(prs);
|
||||
|
||||
state->result = (TSVector) DirectFunctionCall2(tsvector_concat,
|
||||
TSVectorGetDatum(state->result),
|
||||
PointerGetDatum(item_vector));
|
||||
}
|
||||
else
|
||||
state->result = make_tsvector(prs);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* to_tsquery
|
||||
*/
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "tsearch/ts_cache.h"
|
||||
#include "tsearch/ts_utils.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/jsonapi.h"
|
||||
#include "utils/varlena.h"
|
||||
|
||||
|
||||
@ -31,6 +32,19 @@ typedef struct
|
||||
LexDescr *list;
|
||||
} TSTokenTypeStorage;
|
||||
|
||||
/* state for ts_headline_json_* */
|
||||
typedef struct HeadlineJsonState
|
||||
{
|
||||
HeadlineParsedText *prs;
|
||||
TSConfigCacheEntry *cfg;
|
||||
TSParserCacheEntry *prsobj;
|
||||
TSQuery query;
|
||||
List *prsoptions;
|
||||
bool transformed;
|
||||
} HeadlineJsonState;
|
||||
|
||||
static text * headline_json_value(void *_state, char *elem_value, int elem_len);
|
||||
|
||||
static void
|
||||
tt_setup_firstcall(FuncCallContext *funcctx, Oid prsid)
|
||||
{
|
||||
@ -363,3 +377,179 @@ ts_headline_opt(PG_FUNCTION_ARGS)
|
||||
PG_GETARG_DATUM(1),
|
||||
PG_GETARG_DATUM(2)));
|
||||
}
|
||||
|
||||
Datum
|
||||
ts_headline_jsonb_byid_opt(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Jsonb *out, *jb = PG_GETARG_JSONB(1);
|
||||
TSQuery query = PG_GETARG_TSQUERY(2);
|
||||
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
|
||||
JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
|
||||
|
||||
HeadlineParsedText prs;
|
||||
HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
|
||||
|
||||
memset(&prs, 0, sizeof(HeadlineParsedText));
|
||||
prs.lenwords = 32;
|
||||
prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
|
||||
|
||||
state->prs = &prs;
|
||||
state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
|
||||
state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
|
||||
state->query = query;
|
||||
if (opt)
|
||||
state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
|
||||
else
|
||||
state->prsoptions = NIL;
|
||||
|
||||
if (!OidIsValid(state->prsobj->headlineOid))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("text search parser does not support headline creation")));
|
||||
|
||||
out = transform_jsonb_string_values(jb, state, action);
|
||||
|
||||
PG_FREE_IF_COPY(jb, 1);
|
||||
PG_FREE_IF_COPY(query, 2);
|
||||
if (opt)
|
||||
PG_FREE_IF_COPY(opt, 3);
|
||||
|
||||
pfree(prs.words);
|
||||
|
||||
if (state->transformed)
|
||||
{
|
||||
pfree(prs.startsel);
|
||||
pfree(prs.stopsel);
|
||||
}
|
||||
|
||||
PG_RETURN_JSONB(out);
|
||||
}
|
||||
|
||||
Datum
|
||||
ts_headline_jsonb(PG_FUNCTION_ARGS)
|
||||
{
|
||||
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
|
||||
ObjectIdGetDatum(getTSCurrentConfig(true)),
|
||||
PG_GETARG_DATUM(0),
|
||||
PG_GETARG_DATUM(1)));
|
||||
}
|
||||
|
||||
Datum
|
||||
ts_headline_jsonb_byid(PG_FUNCTION_ARGS)
|
||||
{
|
||||
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_jsonb_byid_opt,
|
||||
PG_GETARG_DATUM(0),
|
||||
PG_GETARG_DATUM(1),
|
||||
PG_GETARG_DATUM(2)));
|
||||
}
|
||||
|
||||
Datum
|
||||
ts_headline_jsonb_opt(PG_FUNCTION_ARGS)
|
||||
{
|
||||
PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_jsonb_byid_opt,
|
||||
ObjectIdGetDatum(getTSCurrentConfig(true)),
|
||||
PG_GETARG_DATUM(0),
|
||||
PG_GETARG_DATUM(1),
|
||||
PG_GETARG_DATUM(2)));
|
||||
}
|
||||
|
||||
Datum
|
||||
ts_headline_json_byid_opt(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *json = PG_GETARG_TEXT_P(1);
|
||||
TSQuery query = PG_GETARG_TSQUERY(2);
|
||||
text *opt = (PG_NARGS() > 3 && PG_GETARG_POINTER(3)) ? PG_GETARG_TEXT_P(3) : NULL;
|
||||
text *out;
|
||||
JsonTransformStringValuesAction action = (JsonTransformStringValuesAction) headline_json_value;
|
||||
|
||||
HeadlineParsedText prs;
|
||||
HeadlineJsonState *state = palloc0(sizeof(HeadlineJsonState));
|
||||
|
||||
memset(&prs, 0, sizeof(HeadlineParsedText));
|
||||
prs.lenwords = 32;
|
||||
prs.words = (HeadlineWordEntry *) palloc(sizeof(HeadlineWordEntry) * prs.lenwords);
|
||||
|
||||
state->prs = &prs;
|
||||
state->cfg = lookup_ts_config_cache(PG_GETARG_OID(0));
|
||||
state->prsobj = lookup_ts_parser_cache(state->cfg->prsId);
|
||||
state->query = query;
|
||||
if (opt)
|
||||
state->prsoptions = deserialize_deflist(PointerGetDatum(opt));
|
||||
else
|
||||
state->prsoptions = NIL;
|
||||
|
||||
if (!OidIsValid(state->prsobj->headlineOid))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("text search parser does not support headline creation")));
|
||||
|
||||
out = transform_json_string_values(json, state, action);
|
||||
|
||||
PG_FREE_IF_COPY(json, 1);
|
||||
PG_FREE_IF_COPY(query, 2);
|
||||
if (opt)
|
||||
PG_FREE_IF_COPY(opt, 3);
|
||||
pfree(prs.words);
|
||||
|
||||
if (state->transformed)
|
||||
{
|
||||
pfree(prs.startsel);
|
||||
pfree(prs.stopsel);
|
||||
}
|
||||
|
||||
PG_RETURN_TEXT_P(out);
|
||||
}
|
||||
|
||||
Datum
|
||||
ts_headline_json(PG_FUNCTION_ARGS)
|
||||
{
|
||||
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
|
||||
ObjectIdGetDatum(getTSCurrentConfig(true)),
|
||||
PG_GETARG_DATUM(0),
|
||||
PG_GETARG_DATUM(1)));
|
||||
}
|
||||
|
||||
Datum
|
||||
ts_headline_json_byid(PG_FUNCTION_ARGS)
|
||||
{
|
||||
PG_RETURN_DATUM(DirectFunctionCall3(ts_headline_json_byid_opt,
|
||||
PG_GETARG_DATUM(0),
|
||||
PG_GETARG_DATUM(1),
|
||||
PG_GETARG_DATUM(2)));
|
||||
}
|
||||
|
||||
Datum
|
||||
ts_headline_json_opt(PG_FUNCTION_ARGS)
|
||||
{
|
||||
PG_RETURN_DATUM(DirectFunctionCall4(ts_headline_json_byid_opt,
|
||||
ObjectIdGetDatum(getTSCurrentConfig(true)),
|
||||
PG_GETARG_DATUM(0),
|
||||
PG_GETARG_DATUM(1),
|
||||
PG_GETARG_DATUM(2)));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Return headline in text from, generated from a json(b) element
|
||||
*/
|
||||
static text *
|
||||
headline_json_value(void *_state, char *elem_value, int elem_len)
|
||||
{
|
||||
HeadlineJsonState *state = (HeadlineJsonState *) _state;
|
||||
|
||||
HeadlineParsedText *prs = state->prs;
|
||||
TSConfigCacheEntry *cfg = state->cfg;
|
||||
TSParserCacheEntry *prsobj = state->prsobj;
|
||||
TSQuery query = state->query;
|
||||
List *prsoptions = state->prsoptions;
|
||||
|
||||
prs->curwords = 0;
|
||||
hlparsetext(cfg->cfgId, prs, query, elem_value, elem_len);
|
||||
FunctionCall3(&(prsobj->prsheadline),
|
||||
PointerGetDatum(prs),
|
||||
PointerGetDatum(prsoptions),
|
||||
PointerGetDatum(query));
|
||||
|
||||
state->transformed = true;
|
||||
return generateHeadline(prs);
|
||||
}
|
||||
|
Reference in New Issue
Block a user