mirror of
https://github.com/postgres/postgres.git
synced 2025-08-31 17:02:12 +03:00
432 lines
9.3 KiB
C
432 lines
9.3 KiB
C
/*-------------------------------------------------------------------------
|
|
*
|
|
* to_tsany.c
|
|
* to_ts* function definitions
|
|
*
|
|
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
|
|
*
|
|
*
|
|
* IDENTIFICATION
|
|
* $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.14 2009/06/11 14:49:03 momjian Exp $
|
|
*
|
|
*-------------------------------------------------------------------------
|
|
*/
|
|
#include "postgres.h"
|
|
|
|
#include "catalog/namespace.h"
|
|
#include "tsearch/ts_cache.h"
|
|
#include "tsearch/ts_utils.h"
|
|
#include "utils/builtins.h"
|
|
#include "utils/syscache.h"
|
|
|
|
|
|
Datum
|
|
get_current_ts_config(PG_FUNCTION_ARGS)
|
|
{
|
|
PG_RETURN_OID(getTSCurrentConfig(true));
|
|
}
|
|
|
|
/*
|
|
* to_tsvector
|
|
*/
|
|
static int
|
|
compareWORD(const void *a, const void *b)
|
|
{
|
|
int res;
|
|
|
|
res = tsCompareString(
|
|
((ParsedWord *) a)->word, ((ParsedWord *) a)->len,
|
|
((ParsedWord *) b)->word, ((ParsedWord *) b)->len,
|
|
false);
|
|
|
|
if (res == 0)
|
|
{
|
|
if (((ParsedWord *) a)->pos.pos == ((ParsedWord *) b)->pos.pos)
|
|
return 0;
|
|
|
|
res = (((ParsedWord *) a)->pos.pos > ((ParsedWord *) b)->pos.pos) ? 1 : -1;
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
static int
|
|
uniqueWORD(ParsedWord *a, int4 l)
|
|
{
|
|
ParsedWord *ptr,
|
|
*res;
|
|
int tmppos;
|
|
|
|
if (l == 1)
|
|
{
|
|
tmppos = LIMITPOS(a->pos.pos);
|
|
a->alen = 2;
|
|
a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
|
|
a->pos.apos[0] = 1;
|
|
a->pos.apos[1] = tmppos;
|
|
return l;
|
|
}
|
|
|
|
res = a;
|
|
ptr = a + 1;
|
|
|
|
/*
|
|
* Sort words with its positions
|
|
*/
|
|
qsort((void *) a, l, sizeof(ParsedWord), compareWORD);
|
|
|
|
/*
|
|
* Initialize first word and its first position
|
|
*/
|
|
tmppos = LIMITPOS(a->pos.pos);
|
|
a->alen = 2;
|
|
a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
|
|
a->pos.apos[0] = 1;
|
|
a->pos.apos[1] = tmppos;
|
|
|
|
/*
|
|
* Summarize position information for each word
|
|
*/
|
|
while (ptr - a < l)
|
|
{
|
|
if (!(ptr->len == res->len &&
|
|
strncmp(ptr->word, res->word, res->len) == 0))
|
|
{
|
|
/*
|
|
* Got a new word, so put it in result
|
|
*/
|
|
res++;
|
|
res->len = ptr->len;
|
|
res->word = ptr->word;
|
|
tmppos = LIMITPOS(ptr->pos.pos);
|
|
res->alen = 2;
|
|
res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen);
|
|
res->pos.apos[0] = 1;
|
|
res->pos.apos[1] = tmppos;
|
|
}
|
|
else
|
|
{
|
|
/*
|
|
* The word already exists, so adjust position information. But
|
|
* before we should check size of position's array, max allowed
|
|
* value for position and uniqueness of position
|
|
*/
|
|
pfree(ptr->word);
|
|
if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1 &&
|
|
res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
|
|
{
|
|
if (res->pos.apos[0] + 1 >= res->alen)
|
|
{
|
|
res->alen *= 2;
|
|
res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen);
|
|
}
|
|
if (res->pos.apos[0] == 0 || res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
|
|
{
|
|
res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos);
|
|
res->pos.apos[0]++;
|
|
}
|
|
}
|
|
}
|
|
ptr++;
|
|
}
|
|
|
|
return res + 1 - a;
|
|
}
|
|
|
|
/*
|
|
* make value of tsvector, given parsed text
|
|
*/
|
|
TSVector
|
|
make_tsvector(ParsedText *prs)
|
|
{
|
|
int i,
|
|
j,
|
|
lenstr = 0,
|
|
totallen;
|
|
TSVector in;
|
|
WordEntry *ptr;
|
|
char *str;
|
|
int stroff;
|
|
|
|
prs->curwords = uniqueWORD(prs->words, prs->curwords);
|
|
for (i = 0; i < prs->curwords; i++)
|
|
{
|
|
lenstr += prs->words[i].len;
|
|
if (prs->words[i].alen)
|
|
{
|
|
lenstr = SHORTALIGN(lenstr);
|
|
lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
|
|
}
|
|
}
|
|
|
|
if (lenstr > MAXSTRPOS)
|
|
ereport(ERROR,
|
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
|
errmsg("string is too long for tsvector (%d bytes, max %d bytes)", lenstr, MAXSTRPOS)));
|
|
|
|
totallen = CALCDATASIZE(prs->curwords, lenstr);
|
|
in = (TSVector) palloc0(totallen);
|
|
SET_VARSIZE(in, totallen);
|
|
in->size = prs->curwords;
|
|
|
|
ptr = ARRPTR(in);
|
|
str = STRPTR(in);
|
|
stroff = 0;
|
|
for (i = 0; i < prs->curwords; i++)
|
|
{
|
|
ptr->len = prs->words[i].len;
|
|
ptr->pos = stroff;
|
|
memcpy(str + stroff, prs->words[i].word, prs->words[i].len);
|
|
stroff += prs->words[i].len;
|
|
pfree(prs->words[i].word);
|
|
if (prs->words[i].alen)
|
|
{
|
|
int k = prs->words[i].pos.apos[0];
|
|
WordEntryPos *wptr;
|
|
|
|
if (k > 0xFFFF)
|
|
elog(ERROR, "positions array too long");
|
|
|
|
ptr->haspos = 1;
|
|
stroff = SHORTALIGN(stroff);
|
|
*(uint16 *) (str + stroff) = (uint16) k;
|
|
wptr = POSDATAPTR(in, ptr);
|
|
for (j = 0; j < k; j++)
|
|
{
|
|
WEP_SETWEIGHT(wptr[j], 0);
|
|
WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]);
|
|
}
|
|
stroff += sizeof(uint16) + k * sizeof(WordEntryPos);
|
|
pfree(prs->words[i].pos.apos);
|
|
}
|
|
else
|
|
ptr->haspos = 0;
|
|
ptr++;
|
|
}
|
|
pfree(prs->words);
|
|
return in;
|
|
}
|
|
|
|
Datum
|
|
to_tsvector_byid(PG_FUNCTION_ARGS)
|
|
{
|
|
Oid cfgId = PG_GETARG_OID(0);
|
|
text *in = PG_GETARG_TEXT_P(1);
|
|
ParsedText prs;
|
|
TSVector out;
|
|
|
|
prs.lenwords = (VARSIZE(in) - VARHDRSZ) / 6; /* just estimation of
|
|
* word's number */
|
|
if (prs.lenwords == 0)
|
|
prs.lenwords = 2;
|
|
prs.curwords = 0;
|
|
prs.pos = 0;
|
|
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
|
|
|
|
parsetext(cfgId, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
|
|
PG_FREE_IF_COPY(in, 1);
|
|
|
|
if (prs.curwords)
|
|
out = make_tsvector(&prs);
|
|
else
|
|
{
|
|
pfree(prs.words);
|
|
out = palloc(CALCDATASIZE(0, 0));
|
|
SET_VARSIZE(out, CALCDATASIZE(0, 0));
|
|
out->size = 0;
|
|
}
|
|
|
|
PG_RETURN_POINTER(out);
|
|
}
|
|
|
|
Datum
|
|
to_tsvector(PG_FUNCTION_ARGS)
|
|
{
|
|
text *in = PG_GETARG_TEXT_P(0);
|
|
Oid cfgId;
|
|
|
|
cfgId = getTSCurrentConfig(true);
|
|
PG_RETURN_DATUM(DirectFunctionCall2(to_tsvector_byid,
|
|
ObjectIdGetDatum(cfgId),
|
|
PointerGetDatum(in)));
|
|
}
|
|
|
|
/*
|
|
* to_tsquery
|
|
*/
|
|
|
|
|
|
/*
|
|
* This function is used for morph parsing.
|
|
*
|
|
* The value is passed to parsetext which will call the right dictionary to
|
|
* lexize the word. If it turns out to be a stopword, we push a QI_VALSTOP
|
|
* to the stack.
|
|
*
|
|
* All words belonging to the same variant are pushed as an ANDed list,
|
|
* and different variants are ORred together.
|
|
*/
|
|
static void
|
|
pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int2 weight, bool prefix)
|
|
{
|
|
int4 count = 0;
|
|
ParsedText prs;
|
|
uint32 variant,
|
|
pos,
|
|
cntvar = 0,
|
|
cntpos = 0,
|
|
cnt = 0;
|
|
Oid cfg_id = DatumGetObjectId(opaque); /* the input is actually
|
|
* an Oid, not a pointer */
|
|
|
|
prs.lenwords = 4;
|
|
prs.curwords = 0;
|
|
prs.pos = 0;
|
|
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
|
|
|
|
parsetext(cfg_id, &prs, strval, lenval);
|
|
|
|
if (prs.curwords > 0)
|
|
{
|
|
|
|
while (count < prs.curwords)
|
|
{
|
|
pos = prs.words[count].pos.pos;
|
|
cntvar = 0;
|
|
while (count < prs.curwords && pos == prs.words[count].pos.pos)
|
|
{
|
|
variant = prs.words[count].nvariant;
|
|
|
|
cnt = 0;
|
|
while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
|
|
{
|
|
|
|
pushValue(state, prs.words[count].word, prs.words[count].len, weight,
|
|
((prs.words[count].flags & TSL_PREFIX) || prefix) ? true : false);
|
|
pfree(prs.words[count].word);
|
|
if (cnt)
|
|
pushOperator(state, OP_AND);
|
|
cnt++;
|
|
count++;
|
|
}
|
|
|
|
if (cntvar)
|
|
pushOperator(state, OP_OR);
|
|
cntvar++;
|
|
}
|
|
|
|
if (cntpos)
|
|
pushOperator(state, OP_AND);
|
|
|
|
cntpos++;
|
|
}
|
|
|
|
pfree(prs.words);
|
|
|
|
}
|
|
else
|
|
pushStop(state);
|
|
}
|
|
|
|
Datum
|
|
to_tsquery_byid(PG_FUNCTION_ARGS)
|
|
{
|
|
Oid cfgid = PG_GETARG_OID(0);
|
|
text *in = PG_GETARG_TEXT_P(1);
|
|
TSQuery query;
|
|
QueryItem *res;
|
|
int4 len;
|
|
|
|
query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), false);
|
|
|
|
if (query->size == 0)
|
|
PG_RETURN_TSQUERY(query);
|
|
|
|
res = clean_fakeval(GETQUERY(query), &len);
|
|
if (!res)
|
|
{
|
|
SET_VARSIZE(query, HDRSIZETQ);
|
|
query->size = 0;
|
|
PG_RETURN_POINTER(query);
|
|
}
|
|
memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
|
|
|
|
if (len != query->size)
|
|
{
|
|
char *oldoperand = GETOPERAND(query);
|
|
int4 lenoperand = VARSIZE(query) - (oldoperand - (char *) query);
|
|
|
|
Assert(len < query->size);
|
|
|
|
query->size = len;
|
|
memcpy((void *) GETOPERAND(query), oldoperand, VARSIZE(query) - (oldoperand - (char *) query));
|
|
SET_VARSIZE(query, COMPUTESIZE(len, lenoperand));
|
|
}
|
|
|
|
pfree(res);
|
|
PG_RETURN_TSQUERY(query);
|
|
}
|
|
|
|
Datum
|
|
to_tsquery(PG_FUNCTION_ARGS)
|
|
{
|
|
text *in = PG_GETARG_TEXT_P(0);
|
|
Oid cfgId;
|
|
|
|
cfgId = getTSCurrentConfig(true);
|
|
PG_RETURN_DATUM(DirectFunctionCall2(to_tsquery_byid,
|
|
ObjectIdGetDatum(cfgId),
|
|
PointerGetDatum(in)));
|
|
}
|
|
|
|
Datum
|
|
plainto_tsquery_byid(PG_FUNCTION_ARGS)
|
|
{
|
|
Oid cfgid = PG_GETARG_OID(0);
|
|
text *in = PG_GETARG_TEXT_P(1);
|
|
TSQuery query;
|
|
QueryItem *res;
|
|
int4 len;
|
|
|
|
query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), true);
|
|
|
|
if (query->size == 0)
|
|
PG_RETURN_TSQUERY(query);
|
|
|
|
res = clean_fakeval(GETQUERY(query), &len);
|
|
if (!res)
|
|
{
|
|
SET_VARSIZE(query, HDRSIZETQ);
|
|
query->size = 0;
|
|
PG_RETURN_POINTER(query);
|
|
}
|
|
memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
|
|
|
|
if (len != query->size)
|
|
{
|
|
char *oldoperand = GETOPERAND(query);
|
|
int4 lenoperand = VARSIZE(query) - (oldoperand - (char *) query);
|
|
|
|
Assert(len < query->size);
|
|
|
|
query->size = len;
|
|
memcpy((void *) GETOPERAND(query), oldoperand, lenoperand);
|
|
SET_VARSIZE(query, COMPUTESIZE(len, lenoperand));
|
|
}
|
|
|
|
pfree(res);
|
|
PG_RETURN_POINTER(query);
|
|
}
|
|
|
|
Datum
|
|
plainto_tsquery(PG_FUNCTION_ARGS)
|
|
{
|
|
text *in = PG_GETARG_TEXT_P(0);
|
|
Oid cfgId;
|
|
|
|
cfgId = getTSCurrentConfig(true);
|
|
PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid,
|
|
ObjectIdGetDatum(cfgId),
|
|
PointerGetDatum(in)));
|
|
}
|