1
0
mirror of https://github.com/postgres/postgres.git synced 2025-08-31 17:02:12 +03:00
Files
postgres/src/backend/tsearch/to_tsany.c
2009-06-11 14:49:15 +00:00

432 lines
9.3 KiB
C

/*-------------------------------------------------------------------------
*
* to_tsany.c
* to_ts* function definitions
*
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/tsearch/to_tsany.c,v 1.14 2009/06/11 14:49:03 momjian Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "catalog/namespace.h"
#include "tsearch/ts_cache.h"
#include "tsearch/ts_utils.h"
#include "utils/builtins.h"
#include "utils/syscache.h"
Datum
get_current_ts_config(PG_FUNCTION_ARGS)
{
PG_RETURN_OID(getTSCurrentConfig(true));
}
/*
* to_tsvector
*/
static int
compareWORD(const void *a, const void *b)
{
int res;
res = tsCompareString(
((ParsedWord *) a)->word, ((ParsedWord *) a)->len,
((ParsedWord *) b)->word, ((ParsedWord *) b)->len,
false);
if (res == 0)
{
if (((ParsedWord *) a)->pos.pos == ((ParsedWord *) b)->pos.pos)
return 0;
res = (((ParsedWord *) a)->pos.pos > ((ParsedWord *) b)->pos.pos) ? 1 : -1;
}
return res;
}
static int
uniqueWORD(ParsedWord *a, int4 l)
{
ParsedWord *ptr,
*res;
int tmppos;
if (l == 1)
{
tmppos = LIMITPOS(a->pos.pos);
a->alen = 2;
a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
a->pos.apos[0] = 1;
a->pos.apos[1] = tmppos;
return l;
}
res = a;
ptr = a + 1;
/*
* Sort words with its positions
*/
qsort((void *) a, l, sizeof(ParsedWord), compareWORD);
/*
* Initialize first word and its first position
*/
tmppos = LIMITPOS(a->pos.pos);
a->alen = 2;
a->pos.apos = (uint16 *) palloc(sizeof(uint16) * a->alen);
a->pos.apos[0] = 1;
a->pos.apos[1] = tmppos;
/*
* Summarize position information for each word
*/
while (ptr - a < l)
{
if (!(ptr->len == res->len &&
strncmp(ptr->word, res->word, res->len) == 0))
{
/*
* Got a new word, so put it in result
*/
res++;
res->len = ptr->len;
res->word = ptr->word;
tmppos = LIMITPOS(ptr->pos.pos);
res->alen = 2;
res->pos.apos = (uint16 *) palloc(sizeof(uint16) * res->alen);
res->pos.apos[0] = 1;
res->pos.apos[1] = tmppos;
}
else
{
/*
* The word already exists, so adjust position information. But
* before we should check size of position's array, max allowed
* value for position and uniqueness of position
*/
pfree(ptr->word);
if (res->pos.apos[0] < MAXNUMPOS - 1 && res->pos.apos[res->pos.apos[0]] != MAXENTRYPOS - 1 &&
res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
{
if (res->pos.apos[0] + 1 >= res->alen)
{
res->alen *= 2;
res->pos.apos = (uint16 *) repalloc(res->pos.apos, sizeof(uint16) * res->alen);
}
if (res->pos.apos[0] == 0 || res->pos.apos[res->pos.apos[0]] != LIMITPOS(ptr->pos.pos))
{
res->pos.apos[res->pos.apos[0] + 1] = LIMITPOS(ptr->pos.pos);
res->pos.apos[0]++;
}
}
}
ptr++;
}
return res + 1 - a;
}
/*
* make value of tsvector, given parsed text
*/
TSVector
make_tsvector(ParsedText *prs)
{
int i,
j,
lenstr = 0,
totallen;
TSVector in;
WordEntry *ptr;
char *str;
int stroff;
prs->curwords = uniqueWORD(prs->words, prs->curwords);
for (i = 0; i < prs->curwords; i++)
{
lenstr += prs->words[i].len;
if (prs->words[i].alen)
{
lenstr = SHORTALIGN(lenstr);
lenstr += sizeof(uint16) + prs->words[i].pos.apos[0] * sizeof(WordEntryPos);
}
}
if (lenstr > MAXSTRPOS)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("string is too long for tsvector (%d bytes, max %d bytes)", lenstr, MAXSTRPOS)));
totallen = CALCDATASIZE(prs->curwords, lenstr);
in = (TSVector) palloc0(totallen);
SET_VARSIZE(in, totallen);
in->size = prs->curwords;
ptr = ARRPTR(in);
str = STRPTR(in);
stroff = 0;
for (i = 0; i < prs->curwords; i++)
{
ptr->len = prs->words[i].len;
ptr->pos = stroff;
memcpy(str + stroff, prs->words[i].word, prs->words[i].len);
stroff += prs->words[i].len;
pfree(prs->words[i].word);
if (prs->words[i].alen)
{
int k = prs->words[i].pos.apos[0];
WordEntryPos *wptr;
if (k > 0xFFFF)
elog(ERROR, "positions array too long");
ptr->haspos = 1;
stroff = SHORTALIGN(stroff);
*(uint16 *) (str + stroff) = (uint16) k;
wptr = POSDATAPTR(in, ptr);
for (j = 0; j < k; j++)
{
WEP_SETWEIGHT(wptr[j], 0);
WEP_SETPOS(wptr[j], prs->words[i].pos.apos[j + 1]);
}
stroff += sizeof(uint16) + k * sizeof(WordEntryPos);
pfree(prs->words[i].pos.apos);
}
else
ptr->haspos = 0;
ptr++;
}
pfree(prs->words);
return in;
}
Datum
to_tsvector_byid(PG_FUNCTION_ARGS)
{
Oid cfgId = PG_GETARG_OID(0);
text *in = PG_GETARG_TEXT_P(1);
ParsedText prs;
TSVector out;
prs.lenwords = (VARSIZE(in) - VARHDRSZ) / 6; /* just estimation of
* word's number */
if (prs.lenwords == 0)
prs.lenwords = 2;
prs.curwords = 0;
prs.pos = 0;
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
parsetext(cfgId, &prs, VARDATA(in), VARSIZE(in) - VARHDRSZ);
PG_FREE_IF_COPY(in, 1);
if (prs.curwords)
out = make_tsvector(&prs);
else
{
pfree(prs.words);
out = palloc(CALCDATASIZE(0, 0));
SET_VARSIZE(out, CALCDATASIZE(0, 0));
out->size = 0;
}
PG_RETURN_POINTER(out);
}
Datum
to_tsvector(PG_FUNCTION_ARGS)
{
text *in = PG_GETARG_TEXT_P(0);
Oid cfgId;
cfgId = getTSCurrentConfig(true);
PG_RETURN_DATUM(DirectFunctionCall2(to_tsvector_byid,
ObjectIdGetDatum(cfgId),
PointerGetDatum(in)));
}
/*
* to_tsquery
*/
/*
* This function is used for morph parsing.
*
* The value is passed to parsetext which will call the right dictionary to
* lexize the word. If it turns out to be a stopword, we push a QI_VALSTOP
* to the stack.
*
* All words belonging to the same variant are pushed as an ANDed list,
* and different variants are ORred together.
*/
static void
pushval_morph(Datum opaque, TSQueryParserState state, char *strval, int lenval, int2 weight, bool prefix)
{
int4 count = 0;
ParsedText prs;
uint32 variant,
pos,
cntvar = 0,
cntpos = 0,
cnt = 0;
Oid cfg_id = DatumGetObjectId(opaque); /* the input is actually
* an Oid, not a pointer */
prs.lenwords = 4;
prs.curwords = 0;
prs.pos = 0;
prs.words = (ParsedWord *) palloc(sizeof(ParsedWord) * prs.lenwords);
parsetext(cfg_id, &prs, strval, lenval);
if (prs.curwords > 0)
{
while (count < prs.curwords)
{
pos = prs.words[count].pos.pos;
cntvar = 0;
while (count < prs.curwords && pos == prs.words[count].pos.pos)
{
variant = prs.words[count].nvariant;
cnt = 0;
while (count < prs.curwords && pos == prs.words[count].pos.pos && variant == prs.words[count].nvariant)
{
pushValue(state, prs.words[count].word, prs.words[count].len, weight,
((prs.words[count].flags & TSL_PREFIX) || prefix) ? true : false);
pfree(prs.words[count].word);
if (cnt)
pushOperator(state, OP_AND);
cnt++;
count++;
}
if (cntvar)
pushOperator(state, OP_OR);
cntvar++;
}
if (cntpos)
pushOperator(state, OP_AND);
cntpos++;
}
pfree(prs.words);
}
else
pushStop(state);
}
Datum
to_tsquery_byid(PG_FUNCTION_ARGS)
{
Oid cfgid = PG_GETARG_OID(0);
text *in = PG_GETARG_TEXT_P(1);
TSQuery query;
QueryItem *res;
int4 len;
query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), false);
if (query->size == 0)
PG_RETURN_TSQUERY(query);
res = clean_fakeval(GETQUERY(query), &len);
if (!res)
{
SET_VARSIZE(query, HDRSIZETQ);
query->size = 0;
PG_RETURN_POINTER(query);
}
memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
if (len != query->size)
{
char *oldoperand = GETOPERAND(query);
int4 lenoperand = VARSIZE(query) - (oldoperand - (char *) query);
Assert(len < query->size);
query->size = len;
memcpy((void *) GETOPERAND(query), oldoperand, VARSIZE(query) - (oldoperand - (char *) query));
SET_VARSIZE(query, COMPUTESIZE(len, lenoperand));
}
pfree(res);
PG_RETURN_TSQUERY(query);
}
Datum
to_tsquery(PG_FUNCTION_ARGS)
{
text *in = PG_GETARG_TEXT_P(0);
Oid cfgId;
cfgId = getTSCurrentConfig(true);
PG_RETURN_DATUM(DirectFunctionCall2(to_tsquery_byid,
ObjectIdGetDatum(cfgId),
PointerGetDatum(in)));
}
Datum
plainto_tsquery_byid(PG_FUNCTION_ARGS)
{
Oid cfgid = PG_GETARG_OID(0);
text *in = PG_GETARG_TEXT_P(1);
TSQuery query;
QueryItem *res;
int4 len;
query = parse_tsquery(text_to_cstring(in), pushval_morph, ObjectIdGetDatum(cfgid), true);
if (query->size == 0)
PG_RETURN_TSQUERY(query);
res = clean_fakeval(GETQUERY(query), &len);
if (!res)
{
SET_VARSIZE(query, HDRSIZETQ);
query->size = 0;
PG_RETURN_POINTER(query);
}
memcpy((void *) GETQUERY(query), (void *) res, len * sizeof(QueryItem));
if (len != query->size)
{
char *oldoperand = GETOPERAND(query);
int4 lenoperand = VARSIZE(query) - (oldoperand - (char *) query);
Assert(len < query->size);
query->size = len;
memcpy((void *) GETOPERAND(query), oldoperand, lenoperand);
SET_VARSIZE(query, COMPUTESIZE(len, lenoperand));
}
pfree(res);
PG_RETURN_POINTER(query);
}
Datum
plainto_tsquery(PG_FUNCTION_ARGS)
{
text *in = PG_GETARG_TEXT_P(0);
Oid cfgId;
cfgId = getTSCurrentConfig(true);
PG_RETURN_DATUM(DirectFunctionCall2(plainto_tsquery_byid,
ObjectIdGetDatum(cfgId),
PointerGetDatum(in)));
}