1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-29 23:43:17 +03:00

Tsearch2 functionality migrates to core. The bulk of this work is by

Oleg Bartunov and Teodor Sigaev, but I did a lot of editorializing,
so anything that's broken is probably my fault.

Documentation is nonexistent as yet, but let's land the patch so we can
get some portability testing done.
This commit is contained in:
Tom Lane
2007-08-21 01:11:32 +00:00
parent 4e94d1f952
commit 140d4ebcb4
200 changed files with 54388 additions and 147 deletions

View File

@@ -1,7 +1,7 @@
#
# Makefile for utils/adt
#
# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.64 2007/04/02 03:49:39 tgl Exp $
# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.65 2007/08/21 01:11:18 tgl Exp $
#
subdir = src/backend/utils/adt
@@ -25,8 +25,11 @@ OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o bool.o \
tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \
network.o mac.o inet_net_ntop.o inet_net_pton.o \
ri_triggers.o pg_lzcompress.o pg_locale.o formatting.o \
ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o xml.o \
uuid.o
ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o \
tsginidx.o tsgistidx.o tsquery.o tsquery_cleanup.o tsquery_gist.o \
tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \
tsvector.o tsvector_op.o \
uuid.o xml.o
like.o: like.c like_match.c

View File

@@ -13,7 +13,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/regproc.c,v 1.102 2007/06/26 16:48:09 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/regproc.c,v 1.103 2007/08/21 01:11:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -27,6 +27,8 @@
#include "catalog/namespace.h"
#include "catalog/pg_operator.h"
#include "catalog/pg_proc.h"
#include "catalog/pg_ts_config.h"
#include "catalog/pg_ts_dict.h"
#include "catalog/pg_type.h"
#include "miscadmin.h"
#include "parser/parse_type.h"
@@ -1065,6 +1067,231 @@ regtypesend(PG_FUNCTION_ARGS)
}
/*
* regconfigin - converts "tsconfigname" to tsconfig OID
*
* We also accept a numeric OID, for symmetry with the output routine.
*
* '-' signifies unknown (OID 0). In all other cases, the input must
* match an existing pg_ts_config entry.
*
* This function is not needed in bootstrap mode, so we don't worry about
* making it work then.
*/
Datum
regconfigin(PG_FUNCTION_ARGS)
{
char *cfg_name_or_oid = PG_GETARG_CSTRING(0);
Oid result;
List *names;
/* '-' ? */
if (strcmp(cfg_name_or_oid, "-") == 0)
PG_RETURN_OID(InvalidOid);
/* Numeric OID? */
if (cfg_name_or_oid[0] >= '0' &&
cfg_name_or_oid[0] <= '9' &&
strspn(cfg_name_or_oid, "0123456789") == strlen(cfg_name_or_oid))
{
result = DatumGetObjectId(DirectFunctionCall1(oidin,
CStringGetDatum(cfg_name_or_oid)));
PG_RETURN_OID(result);
}
/*
* Normal case: parse the name into components and see if it matches any
* pg_ts_config entries in the current search path.
*/
names = stringToQualifiedNameList(cfg_name_or_oid);
result = TSConfigGetCfgid(names, false);
PG_RETURN_OID(result);
}
/*
* regconfigout - converts tsconfig OID to "tsconfigname"
*/
Datum
regconfigout(PG_FUNCTION_ARGS)
{
Oid cfgid = PG_GETARG_OID(0);
char *result;
HeapTuple cfgtup;
if (cfgid == InvalidOid)
{
result = pstrdup("-");
PG_RETURN_CSTRING(result);
}
cfgtup = SearchSysCache(TSCONFIGOID,
ObjectIdGetDatum(cfgid),
0, 0, 0);
if (HeapTupleIsValid(cfgtup))
{
Form_pg_ts_config cfgform = (Form_pg_ts_config) GETSTRUCT(cfgtup);
char *cfgname = NameStr(cfgform->cfgname);
char *nspname;
/*
* Would this config be found by regconfigin? If not, qualify it.
*/
if (TSConfigIsVisible(cfgid))
nspname = NULL;
else
nspname = get_namespace_name(cfgform->cfgnamespace);
result = quote_qualified_identifier(nspname, cfgname);
ReleaseSysCache(cfgtup);
}
else
{
/* If OID doesn't match any pg_ts_config row, return it numerically */
result = (char *) palloc(NAMEDATALEN);
snprintf(result, NAMEDATALEN, "%u", cfgid);
}
PG_RETURN_CSTRING(result);
}
/*
* regconfigrecv - converts external binary format to regconfig
*/
Datum
regconfigrecv(PG_FUNCTION_ARGS)
{
/* Exactly the same as oidrecv, so share code */
return oidrecv(fcinfo);
}
/*
* regconfigsend - converts regconfig to binary format
*/
Datum
regconfigsend(PG_FUNCTION_ARGS)
{
/* Exactly the same as oidsend, so share code */
return oidsend(fcinfo);
}
/*
* regdictionaryin - converts "tsdictionaryname" to tsdictionary OID
*
* We also accept a numeric OID, for symmetry with the output routine.
*
* '-' signifies unknown (OID 0). In all other cases, the input must
* match an existing pg_ts_dict entry.
*
* This function is not needed in bootstrap mode, so we don't worry about
* making it work then.
*/
Datum
regdictionaryin(PG_FUNCTION_ARGS)
{
char *dict_name_or_oid = PG_GETARG_CSTRING(0);
Oid result;
List *names;
/* '-' ? */
if (strcmp(dict_name_or_oid, "-") == 0)
PG_RETURN_OID(InvalidOid);
/* Numeric OID? */
if (dict_name_or_oid[0] >= '0' &&
dict_name_or_oid[0] <= '9' &&
strspn(dict_name_or_oid, "0123456789") == strlen(dict_name_or_oid))
{
result = DatumGetObjectId(DirectFunctionCall1(oidin,
CStringGetDatum(dict_name_or_oid)));
PG_RETURN_OID(result);
}
/*
* Normal case: parse the name into components and see if it matches any
* pg_ts_dict entries in the current search path.
*/
names = stringToQualifiedNameList(dict_name_or_oid);
result = TSDictionaryGetDictid(names, false);
PG_RETURN_OID(result);
}
/*
* regdictionaryout - converts tsdictionary OID to "tsdictionaryname"
*/
Datum
regdictionaryout(PG_FUNCTION_ARGS)
{
Oid dictid = PG_GETARG_OID(0);
char *result;
HeapTuple dicttup;
if (dictid == InvalidOid)
{
result = pstrdup("-");
PG_RETURN_CSTRING(result);
}
dicttup = SearchSysCache(TSDICTOID,
ObjectIdGetDatum(dictid),
0, 0, 0);
if (HeapTupleIsValid(dicttup))
{
Form_pg_ts_dict dictform = (Form_pg_ts_dict) GETSTRUCT(dicttup);
char *dictname = NameStr(dictform->dictname);
char *nspname;
/*
* Would this dictionary be found by regdictionaryin?
* If not, qualify it.
*/
if (TSDictionaryIsVisible(dictid))
nspname = NULL;
else
nspname = get_namespace_name(dictform->dictnamespace);
result = quote_qualified_identifier(nspname, dictname);
ReleaseSysCache(dicttup);
}
else
{
/* If OID doesn't match any pg_ts_dict row, return it numerically */
result = (char *) palloc(NAMEDATALEN);
snprintf(result, NAMEDATALEN, "%u", dictid);
}
PG_RETURN_CSTRING(result);
}
/*
* regdictionaryrecv - converts external binary format to regdictionary
*/
Datum
regdictionaryrecv(PG_FUNCTION_ARGS)
{
/* Exactly the same as oidrecv, so share code */
return oidrecv(fcinfo);
}
/*
* regdictionarysend - converts regdictionary to binary format
*/
Datum
regdictionarysend(PG_FUNCTION_ARGS)
{
/* Exactly the same as oidsend, so share code */
return oidsend(fcinfo);
}
/*
* text_regclass: convert text to regclass
*

View File

@@ -15,7 +15,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.234 2007/05/05 17:05:48 mha Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.235 2007/08/21 01:11:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -2822,6 +2822,8 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
case REGOPERATOROID:
case REGCLASSOID:
case REGTYPEOID:
case REGCONFIGOID:
case REGDICTIONARYOID:
*scaledvalue = convert_numeric_to_scalar(value, valuetypid);
*scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
*scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
@@ -2925,6 +2927,8 @@ convert_numeric_to_scalar(Datum value, Oid typid)
case REGOPERATOROID:
case REGCLASSOID:
case REGTYPEOID:
case REGCONFIGOID:
case REGDICTIONARYOID:
/* we can treat OIDs as integers... */
return (double) DatumGetObjectId(value);
}

View File

@@ -0,0 +1,157 @@
/*-------------------------------------------------------------------------
*
* tsginidx.c
* GIN support functions for tsvector_ops
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/skey.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
Datum
gin_extract_tsvector(PG_FUNCTION_ARGS)
{
TSVector vector = PG_GETARG_TSVECTOR(0);
uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1);
Datum *entries = NULL;
*nentries = 0;
if (vector->size > 0)
{
int i;
WordEntry *we = ARRPTR(vector);
*nentries = (uint32) vector->size;
entries = (Datum *) palloc(sizeof(Datum) * vector->size);
for (i = 0; i < vector->size; i++)
{
text *txt = (text *) palloc(VARHDRSZ + we->len);
SET_VARSIZE(txt, VARHDRSZ + we->len);
memcpy(VARDATA(txt), STRPTR(vector) + we->pos, we->len);
entries[i] = PointerGetDatum(txt);
we++;
}
}
PG_FREE_IF_COPY(vector, 0);
PG_RETURN_POINTER(entries);
}
Datum
gin_extract_query(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY(0);
uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1);
StrategyNumber strategy = PG_GETARG_UINT16(2);
Datum *entries = NULL;
*nentries = 0;
if (query->size > 0)
{
int4 i,
j = 0,
len;
QueryItem *item;
item = clean_NOT(GETQUERY(query), &len);
if (!item)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("query requires full scan, which is not supported by GIN indexes")));
item = GETQUERY(query);
for (i = 0; i < query->size; i++)
if (item[i].type == VAL)
(*nentries)++;
entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
for (i = 0; i < query->size; i++)
if (item[i].type == VAL)
{
text *txt;
txt = (text *) palloc(VARHDRSZ + item[i].length);
SET_VARSIZE(txt, VARHDRSZ + item[i].length);
memcpy(VARDATA(txt), GETOPERAND(query) + item[i].distance, item[i].length);
entries[j++] = PointerGetDatum(txt);
if (strategy != TSearchWithClassStrategyNumber && item[i].weight != 0)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("@@ operator does not support lexeme class restrictions"),
errhint("Use the @@@ operator instead.")));
}
}
else
*nentries = -1; /* nothing can be found */
PG_FREE_IF_COPY(query, 0);
PG_RETURN_POINTER(entries);
}
typedef struct
{
QueryItem *frst;
bool *mapped_check;
} GinChkVal;
static bool
checkcondition_gin(void *checkval, QueryItem * val)
{
GinChkVal *gcv = (GinChkVal *) checkval;
return gcv->mapped_check[val - gcv->frst];
}
Datum
gin_ts_consistent(PG_FUNCTION_ARGS)
{
bool *check = (bool *) PG_GETARG_POINTER(0);
/* StrategyNumber strategy = PG_GETARG_UINT16(1); */
TSQuery query = PG_GETARG_TSQUERY(2);
bool res = FALSE;
if (query->size > 0)
{
int4 i,
j = 0;
QueryItem *item;
GinChkVal gcv;
gcv.frst = item = GETQUERY(query);
gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size);
for (i = 0; i < query->size; i++)
if (item[i].type == VAL)
gcv.mapped_check[i] = check[j++];
res = TS_execute(
GETQUERY(query),
&gcv,
true,
checkcondition_gin
);
}
PG_RETURN_BOOL(res);
}

View File

@@ -0,0 +1,784 @@
/*-------------------------------------------------------------------------
*
* tsgistidx.c
* GiST support functions for tsvector_ops
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/gist.h"
#include "access/tuptoaster.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
#include "utils/pg_crc.h"
#define SIGLENINT 31 /* >121 => key will toast, so it will not work
* !!! */
#define SIGLEN ( sizeof(int4) * SIGLENINT )
#define SIGLENBIT (SIGLEN * BITS_PER_BYTE)
typedef char BITVEC[SIGLEN];
typedef char *BITVECP;
#define LOOPBYTE(a) \
for(i=0;i<SIGLEN;i++) {\
a;\
}
#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITS_PER_BYTE ) ) )
#define GETBITBYTE(x,i) ( ((char)(x)) >> (i) & 0x01 )
#define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITS_PER_BYTE ) )
#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITS_PER_BYTE ) )
#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITS_PER_BYTE )) & 0x01 )
#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
#define GETENTRY(vec,pos) ((SignTSVector *) DatumGetPointer((vec)->vector[(pos)].key))
/*
* type of GiST index key
*/
typedef struct
{
int32 vl_len_; /* varlena header (do not touch directly!) */ ;
int4 flag;
char data[1];
} SignTSVector;
#define ARRKEY 0x01
#define SIGNKEY 0x02
#define ALLISTRUE 0x04
#define ISARRKEY(x) ( ((SignTSVector*)(x))->flag & ARRKEY )
#define ISSIGNKEY(x) ( ((SignTSVector*)(x))->flag & SIGNKEY )
#define ISALLTRUE(x) ( ((SignTSVector*)(x))->flag & ALLISTRUE )
#define GTHDRSIZE ( VARHDRSZ + sizeof(int4) )
#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
#define GETSIGN(x) ( (BITVECP)( (char*)(x)+GTHDRSIZE ) )
#define GETARR(x) ( (int4*)( (char*)(x)+GTHDRSIZE ) )
#define ARRNELEM(x) ( ( VARSIZE(x) - GTHDRSIZE )/sizeof(int4) )
/* Number of one-bits in an unsigned byte */
static const uint8 number_of_ones[256] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};
static int4 sizebitvec(BITVECP sign);
Datum
gtsvectorin(PG_FUNCTION_ARGS)
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("gtsvector_in not implemented")));
PG_RETURN_DATUM(0);
}
#define SINGOUTSTR "%d true bits, %d false bits"
#define ARROUTSTR "%d unique words"
#define EXTRALEN ( 2*13 )
static int outbuf_maxlen = 0;
Datum
gtsvectorout(PG_FUNCTION_ARGS)
{
SignTSVector *key = (SignTSVector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_POINTER(0)));
char *outbuf;
if (outbuf_maxlen == 0)
outbuf_maxlen = 2 * EXTRALEN + Max(strlen(SINGOUTSTR), strlen(ARROUTSTR)) + 1;
outbuf = palloc(outbuf_maxlen);
if (ISARRKEY(key))
sprintf(outbuf, ARROUTSTR, (int) ARRNELEM(key));
else
{
int cnttrue = (ISALLTRUE(key)) ? SIGLENBIT : sizebitvec(GETSIGN(key));
sprintf(outbuf, SINGOUTSTR, cnttrue, (int) SIGLENBIT - cnttrue);
}
PG_FREE_IF_COPY(key, 0);
PG_RETURN_POINTER(outbuf);
}
static int
compareint(const void *a, const void *b)
{
if (*((int4 *) a) == *((int4 *) b))
return 0;
return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
}
static int
uniqueint(int4 *a, int4 l)
{
int4 *ptr,
*res;
if (l == 1)
return l;
ptr = res = a;
qsort((void *) a, l, sizeof(int4), compareint);
while (ptr - a < l)
if (*ptr != *res)
*(++res) = *ptr++;
else
ptr++;
return res + 1 - a;
}
static void
makesign(BITVECP sign, SignTSVector * a)
{
int4 k,
len = ARRNELEM(a);
int4 *ptr = GETARR(a);
MemSet((void *) sign, 0, sizeof(BITVEC));
for (k = 0; k < len; k++)
HASH(sign, ptr[k]);
}
Datum
gtsvector_compress(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *retval = entry;
if (entry->leafkey)
{ /* tsvector */
SignTSVector *res;
TSVector val = DatumGetTSVector(entry->key);
int4 len;
int4 *arr;
WordEntry *ptr = ARRPTR(val);
char *words = STRPTR(val);
len = CALCGTSIZE(ARRKEY, val->size);
res = (SignTSVector *) palloc(len);
SET_VARSIZE(res, len);
res->flag = ARRKEY;
arr = GETARR(res);
len = val->size;
while (len--)
{
pg_crc32 c;
INIT_CRC32(c);
COMP_CRC32(c, words + ptr->pos, ptr->len);
FIN_CRC32(c);
*arr = *(int4 *) &c;
arr++;
ptr++;
}
len = uniqueint(GETARR(res), val->size);
if (len != val->size)
{
/*
* there is a collision of hash-function; len is always less than
* val->size
*/
len = CALCGTSIZE(ARRKEY, len);
res = (SignTSVector *) repalloc((void *) res, len);
SET_VARSIZE(res, len);
}
/* make signature, if array is too long */
if (VARSIZE(res) > TOAST_INDEX_TARGET)
{
SignTSVector *ressign;
len = CALCGTSIZE(SIGNKEY, 0);
ressign = (SignTSVector *) palloc(len);
SET_VARSIZE(ressign, len);
ressign->flag = SIGNKEY;
makesign(GETSIGN(ressign), res);
res = ressign;
}
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(res),
entry->rel, entry->page,
entry->offset, FALSE);
}
else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
!ISALLTRUE(DatumGetPointer(entry->key)))
{
int4 i,
len;
SignTSVector *res;
BITVECP sign = GETSIGN(DatumGetPointer(entry->key));
LOOPBYTE(
if ((sign[i] & 0xff) != 0xff)
PG_RETURN_POINTER(retval);
);
len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
res = (SignTSVector *) palloc(len);
SET_VARSIZE(res, len);
res->flag = SIGNKEY | ALLISTRUE;
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(res),
entry->rel, entry->page,
entry->offset, FALSE);
}
PG_RETURN_POINTER(retval);
}
Datum
gtsvector_decompress(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
SignTSVector *key = (SignTSVector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
if (key != (SignTSVector *) DatumGetPointer(entry->key))
{
GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
gistentryinit(*retval, PointerGetDatum(key),
entry->rel, entry->page,
entry->offset, FALSE);
PG_RETURN_POINTER(retval);
}
PG_RETURN_POINTER(entry);
}
typedef struct
{
int4 *arrb;
int4 *arre;
} CHKVAL;
/*
* is there value 'val' in array or not ?
*/
static bool
checkcondition_arr(void *checkval, QueryItem * val)
{
int4 *StopLow = ((CHKVAL *) checkval)->arrb;
int4 *StopHigh = ((CHKVAL *) checkval)->arre;
int4 *StopMiddle;
/* Loop invariant: StopLow <= val < StopHigh */
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
if (*StopMiddle == val->val)
return (true);
else if (*StopMiddle < val->val)
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
return (false);
}
static bool
checkcondition_bit(void *checkval, QueryItem * val)
{
return GETBIT(checkval, HASHVAL(val->val));
}
Datum
gtsvector_consistent(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY(1);
SignTSVector *key = (SignTSVector *) DatumGetPointer(
((GISTENTRY *) PG_GETARG_POINTER(0))->key
);
if (!query->size)
PG_RETURN_BOOL(false);
if (ISSIGNKEY(key))
{
if (ISALLTRUE(key))
PG_RETURN_BOOL(true);
PG_RETURN_BOOL(TS_execute(
GETQUERY(query),
(void *) GETSIGN(key), false,
checkcondition_bit
));
}
else
{ /* only leaf pages */
CHKVAL chkval;
chkval.arrb = GETARR(key);
chkval.arre = chkval.arrb + ARRNELEM(key);
PG_RETURN_BOOL(TS_execute(
GETQUERY(query),
(void *) &chkval, true,
checkcondition_arr
));
}
}
static int4
unionkey(BITVECP sbase, SignTSVector * add)
{
int4 i;
if (ISSIGNKEY(add))
{
BITVECP sadd = GETSIGN(add);
if (ISALLTRUE(add))
return 1;
LOOPBYTE(
sbase[i] |= sadd[i];
);
}
else
{
int4 *ptr = GETARR(add);
for (i = 0; i < ARRNELEM(add); i++)
HASH(sbase, ptr[i]);
}
return 0;
}
Datum
gtsvector_union(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
int *size = (int *) PG_GETARG_POINTER(1);
BITVEC base;
int4 i,
len;
int4 flag = 0;
SignTSVector *result;
MemSet((void *) base, 0, sizeof(BITVEC));
for (i = 0; i < entryvec->n; i++)
{
if (unionkey(base, GETENTRY(entryvec, i)))
{
flag = ALLISTRUE;
break;
}
}
flag |= SIGNKEY;
len = CALCGTSIZE(flag, 0);
result = (SignTSVector *) palloc(len);
*size = len;
SET_VARSIZE(result, len);
result->flag = flag;
if (!ISALLTRUE(result))
memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
PG_RETURN_POINTER(result);
}
Datum
gtsvector_same(PG_FUNCTION_ARGS)
{
SignTSVector *a = (SignTSVector *) PG_GETARG_POINTER(0);
SignTSVector *b = (SignTSVector *) PG_GETARG_POINTER(1);
bool *result = (bool *) PG_GETARG_POINTER(2);
if (ISSIGNKEY(a))
{ /* then b also ISSIGNKEY */
if (ISALLTRUE(a) && ISALLTRUE(b))
*result = true;
else if (ISALLTRUE(a))
*result = false;
else if (ISALLTRUE(b))
*result = false;
else
{
int4 i;
BITVECP sa = GETSIGN(a),
sb = GETSIGN(b);
*result = true;
LOOPBYTE(
if (sa[i] != sb[i])
{
*result = false;
break;
}
);
}
}
else
{ /* a and b ISARRKEY */
int4 lena = ARRNELEM(a),
lenb = ARRNELEM(b);
if (lena != lenb)
*result = false;
else
{
int4 *ptra = GETARR(a),
*ptrb = GETARR(b);
int4 i;
*result = true;
for (i = 0; i < lena; i++)
if (ptra[i] != ptrb[i])
{
*result = false;
break;
}
}
}
PG_RETURN_POINTER(result);
}
static int4
sizebitvec(BITVECP sign)
{
int4 size = 0,
i;
LOOPBYTE(
size += number_of_ones[(unsigned char) sign[i]];
);
return size;
}
static int
hemdistsign(BITVECP a, BITVECP b)
{
int i,
diff,
dist = 0;
LOOPBYTE(
diff = (unsigned char) (a[i] ^ b[i]);
dist += number_of_ones[diff];
);
return dist;
}
static int
hemdist(SignTSVector * a, SignTSVector * b)
{
if (ISALLTRUE(a))
{
if (ISALLTRUE(b))
return 0;
else
return SIGLENBIT - sizebitvec(GETSIGN(b));
}
else if (ISALLTRUE(b))
return SIGLENBIT - sizebitvec(GETSIGN(a));
return hemdistsign(GETSIGN(a), GETSIGN(b));
}
Datum
gtsvector_penalty(PG_FUNCTION_ARGS)
{
GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
float *penalty = (float *) PG_GETARG_POINTER(2);
SignTSVector *origval = (SignTSVector *) DatumGetPointer(origentry->key);
SignTSVector *newval = (SignTSVector *) DatumGetPointer(newentry->key);
BITVECP orig = GETSIGN(origval);
*penalty = 0.0;
if (ISARRKEY(newval))
{
BITVEC sign;
makesign(sign, newval);
if (ISALLTRUE(origval))
*penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1);
else
*penalty = hemdistsign(sign, orig);
}
else
*penalty = hemdist(origval, newval);
PG_RETURN_POINTER(penalty);
}
typedef struct
{
bool allistrue;
BITVEC sign;
} CACHESIGN;
static void
fillcache(CACHESIGN * item, SignTSVector * key)
{
item->allistrue = false;
if (ISARRKEY(key))
makesign(item->sign, key);
else if (ISALLTRUE(key))
item->allistrue = true;
else
memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
}
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
typedef struct
{
OffsetNumber pos;
int4 cost;
} SPLITCOST;
static int
comparecost(const void *a, const void *b)
{
if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
return 0;
else
return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
}
static int
hemdistcache(CACHESIGN * a, CACHESIGN * b)
{
if (a->allistrue)
{
if (b->allistrue)
return 0;
else
return SIGLENBIT - sizebitvec(b->sign);
}
else if (b->allistrue)
return SIGLENBIT - sizebitvec(a->sign);
return hemdistsign(a->sign, b->sign);
}
Datum
gtsvector_picksplit(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
OffsetNumber k,
j;
SignTSVector *datum_l,
*datum_r;
BITVECP union_l,
union_r;
int4 size_alpha,
size_beta;
int4 size_waste,
waste = -1;
int4 nbytes;
OffsetNumber seed_1 = 0,
seed_2 = 0;
OffsetNumber *left,
*right;
OffsetNumber maxoff;
BITVECP ptr;
int i;
CACHESIGN *cache;
SPLITCOST *costvector;
maxoff = entryvec->n - 2;
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
v->spl_left = (OffsetNumber *) palloc(nbytes);
v->spl_right = (OffsetNumber *) palloc(nbytes);
cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
{
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
{
if (k == FirstOffsetNumber)
fillcache(&cache[j], GETENTRY(entryvec, j));
size_waste = hemdistcache(&(cache[j]), &(cache[k]));
if (size_waste > waste)
{
waste = size_waste;
seed_1 = k;
seed_2 = j;
}
}
}
left = v->spl_left;
v->spl_nleft = 0;
right = v->spl_right;
v->spl_nright = 0;
if (seed_1 == 0 || seed_2 == 0)
{
seed_1 = 1;
seed_2 = 2;
}
/* form initial .. */
if (cache[seed_1].allistrue)
{
datum_l = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
datum_l->flag = SIGNKEY | ALLISTRUE;
}
else
{
datum_l = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY, 0));
SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0));
datum_l->flag = SIGNKEY;
memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
}
if (cache[seed_2].allistrue)
{
datum_r = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
datum_r->flag = SIGNKEY | ALLISTRUE;
}
else
{
datum_r = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY, 0));
SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0));
datum_r->flag = SIGNKEY;
memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
}
union_l = GETSIGN(datum_l);
union_r = GETSIGN(datum_r);
maxoff = OffsetNumberNext(maxoff);
fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
/* sort before ... */
costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
{
costvector[j - 1].pos = j;
size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
costvector[j - 1].cost = Abs(size_alpha - size_beta);
}
qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
for (k = 0; k < maxoff; k++)
{
j = costvector[k].pos;
if (j == seed_1)
{
*left++ = j;
v->spl_nleft++;
continue;
}
else if (j == seed_2)
{
*right++ = j;
v->spl_nright++;
continue;
}
if (ISALLTRUE(datum_l) || cache[j].allistrue)
{
if (ISALLTRUE(datum_l) && cache[j].allistrue)
size_alpha = 0;
else
size_alpha = SIGLENBIT - sizebitvec(
(cache[j].allistrue) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)
);
}
else
size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l));
if (ISALLTRUE(datum_r) || cache[j].allistrue)
{
if (ISALLTRUE(datum_r) && cache[j].allistrue)
size_beta = 0;
else
size_beta = SIGLENBIT - sizebitvec(
(cache[j].allistrue) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)
);
}
else
size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r));
if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
{
if (ISALLTRUE(datum_l) || cache[j].allistrue)
{
if (!ISALLTRUE(datum_l))
MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
}
else
{
ptr = cache[j].sign;
LOOPBYTE(
union_l[i] |= ptr[i];
);
}
*left++ = j;
v->spl_nleft++;
}
else
{
if (ISALLTRUE(datum_r) || cache[j].allistrue)
{
if (!ISALLTRUE(datum_r))
MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
}
else
{
ptr = cache[j].sign;
LOOPBYTE(
union_r[i] |= ptr[i];
);
}
*right++ = j;
v->spl_nright++;
}
}
*right = *left = FirstOffsetNumber;
v->spl_ldatum = PointerGetDatum(datum_l);
v->spl_rdatum = PointerGetDatum(datum_r);
PG_RETURN_POINTER(v);
}

View File

@@ -0,0 +1,767 @@
/*-------------------------------------------------------------------------
*
* tsquery.c
* I/O functions for tsquery
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "libpq/pqformat.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
#include "utils/memutils.h"
#include "utils/pg_crc.h"
/* parser's states */
#define WAITOPERAND 1
#define WAITOPERATOR 2
#define WAITFIRSTOPERAND 3
#define WAITSINGLEOPERAND 4
/*
* node of query tree, also used
* for storing polish notation in parser
*/
typedef struct ParseQueryNode
{
int2 weight;
int2 type;
int4 val;
int2 distance;
int2 length;
struct ParseQueryNode *next;
} ParseQueryNode;
static char *
get_weight(char *buf, int2 *weight)
{
*weight = 0;
if (!t_iseq(buf, ':'))
return buf;
buf++;
while (*buf && pg_mblen(buf) == 1)
{
switch (*buf)
{
case 'a':
case 'A':
*weight |= 1 << 3;
break;
case 'b':
case 'B':
*weight |= 1 << 2;
break;
case 'c':
case 'C':
*weight |= 1 << 1;
break;
case 'd':
case 'D':
*weight |= 1;
break;
default:
return buf;
}
buf++;
}
return buf;
}
/*
* get token from query string
*/
static int4
gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
{
while (1)
{
switch (state->state)
{
case WAITFIRSTOPERAND:
case WAITOPERAND:
if (t_iseq(state->buf, '!'))
{
(state->buf)++; /* can safely ++, t_iseq guarantee
* that pg_mblen()==1 */
*val = (int4) '!';
state->state = WAITOPERAND;
return OPR;
}
else if (t_iseq(state->buf, '('))
{
state->count++;
(state->buf)++;
state->state = WAITOPERAND;
return OPEN;
}
else if (t_iseq(state->buf, ':'))
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error at start of operand in tsearch query: \"%s\"",
state->buffer)));
}
else if (!t_isspace(state->buf))
{
state->valstate.prsbuf = state->buf;
if (gettoken_tsvector(&(state->valstate)))
{
*strval = state->valstate.word;
*lenval = state->valstate.curpos - state->valstate.word;
state->buf = get_weight(state->valstate.prsbuf, weight);
state->state = WAITOPERATOR;
return VAL;
}
else if (state->state == WAITFIRSTOPERAND)
return END;
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("no operand in tsearch query: \"%s\"",
state->buffer)));
}
break;
case WAITOPERATOR:
if (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))
{
state->state = WAITOPERAND;
*val = (int4) *(state->buf);
(state->buf)++;
return OPR;
}
else if (t_iseq(state->buf, ')'))
{
(state->buf)++;
state->count--;
return (state->count < 0) ? ERR : CLOSE;
}
else if (*(state->buf) == '\0')
return (state->count) ? ERR : END;
else if (!t_isspace(state->buf))
return ERR;
break;
case WAITSINGLEOPERAND:
if (*(state->buf) == '\0')
return END;
*strval = state->buf;
*lenval = strlen(state->buf);
state->buf += strlen(state->buf);
state->count++;
return VAL;
default:
return ERR;
break;
}
state->buf += pg_mblen(state->buf);
}
return END;
}
/*
* push new one in polish notation reverse view
*/
void
pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
{
ParseQueryNode *tmp = (ParseQueryNode *) palloc(sizeof(ParseQueryNode));
tmp->weight = weight;
tmp->type = type;
tmp->val = val;
if (distance >= MAXSTRPOS)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("value is too big in tsearch query: \"%s\"",
state->buffer)));
if (lenval >= MAXSTRLEN)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("operand is too long in tsearch query: \"%s\"",
state->buffer)));
tmp->distance = distance;
tmp->length = lenval;
tmp->next = state->str;
state->str = tmp;
state->num++;
}
/*
* This function is used for tsquery parsing
*/
void
pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int2 weight)
{
pg_crc32 c;
if (lenval >= MAXSTRLEN)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long in tsearch query: \"%s\"",
state->buffer)));
INIT_CRC32(c);
COMP_CRC32(c, strval, lenval);
FIN_CRC32(c);
pushquery(state, type, *(int4 *) &c,
state->curop - state->op, lenval, weight);
while (state->curop - state->op + lenval + 1 >= state->lenop)
{
int4 tmp = state->curop - state->op;
state->lenop *= 2;
state->op = (char *) repalloc((void *) state->op, state->lenop);
state->curop = state->op + tmp;
}
memcpy((void *) state->curop, (void *) strval, lenval);
state->curop += lenval;
*(state->curop) = '\0';
state->curop++;
state->sumlen += lenval + 1 /* \0 */ ;
return;
}
#define STACKDEPTH 32
/*
* make polish notation of query
*/
static int4
makepol(TSQueryParserState * state, void (*pushval) (TSQueryParserState *, int, char *, int, int2))
{
int4 val = 0,
type;
int4 lenval = 0;
char *strval = NULL;
int4 stack[STACKDEPTH];
int4 lenstack = 0;
int2 weight = 0;
while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
{
switch (type)
{
case VAL:
pushval(state, VAL, strval, lenval, weight);
while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
stack[lenstack - 1] == (int4) '!'))
{
lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
}
break;
case OPR:
if (lenstack && val == (int4) '|')
pushquery(state, OPR, val, 0, 0, 0);
else
{
if (lenstack == STACKDEPTH) /* internal error */
elog(ERROR, "tsquery stack too small");
stack[lenstack] = val;
lenstack++;
}
break;
case OPEN:
if (makepol(state, pushval) == ERR)
return ERR;
if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
stack[lenstack - 1] == (int4) '!'))
{
lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
}
break;
case CLOSE:
while (lenstack)
{
lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
};
return END;
break;
case ERR:
default:
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsearch query: \"%s\"",
state->buffer)));
return ERR;
}
}
while (lenstack)
{
lenstack--;
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
};
return END;
}
static void
findoprnd(QueryItem * ptr, int4 *pos)
{
if (ptr[*pos].type == VAL || ptr[*pos].type == VALSTOP)
{
ptr[*pos].left = 0;
(*pos)++;
}
else if (ptr[*pos].val == (int4) '!')
{
ptr[*pos].left = 1;
(*pos)++;
findoprnd(ptr, pos);
}
else
{
QueryItem *curitem = &ptr[*pos];
int4 tmp = *pos;
(*pos)++;
findoprnd(ptr, pos);
curitem->left = *pos - tmp;
findoprnd(ptr, pos);
}
}
/*
* input
*/
TSQuery
parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int, int2), Oid cfg_id, bool isplain)
{
TSQueryParserState state;
int4 i;
TSQuery query;
int4 commonlen;
QueryItem *ptr;
ParseQueryNode *tmp;
int4 pos = 0;
/* init state */
state.buffer = buf;
state.buf = buf;
state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
state.count = 0;
state.num = 0;
state.str = NULL;
state.cfg_id = cfg_id;
/* init value parser's state */
state.valstate.oprisdelim = true;
state.valstate.len = 32;
state.valstate.word = (char *) palloc(state.valstate.len);
/* init list of operand */
state.sumlen = 0;
state.lenop = 64;
state.curop = state.op = (char *) palloc(state.lenop);
*(state.curop) = '\0';
/* parse query & make polish notation (postfix, but in reverse order) */
makepol(&state, pushval);
pfree(state.valstate.word);
if (!state.num)
{
ereport(NOTICE,
(errmsg("tsearch query doesn't contain lexeme(s): \"%s\"",
state.buffer)));
query = (TSQuery) palloc(HDRSIZETQ);
SET_VARSIZE(query, HDRSIZETQ);
query->size = 0;
return query;
}
/* make finish struct */
commonlen = COMPUTESIZE(state.num, state.sumlen);
query = (TSQuery) palloc(commonlen);
SET_VARSIZE(query, commonlen);
query->size = state.num;
ptr = GETQUERY(query);
/* set item in polish notation */
for (i = 0; i < state.num; i++)
{
ptr[i].weight = state.str->weight;
ptr[i].type = state.str->type;
ptr[i].val = state.str->val;
ptr[i].distance = state.str->distance;
ptr[i].length = state.str->length;
tmp = state.str->next;
pfree(state.str);
state.str = tmp;
}
/* set user friendly-operand view */
memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
pfree(state.op);
/* set left operand's position for every operator */
pos = 0;
findoprnd(ptr, &pos);
return query;
}
/*
* in without morphology
*/
Datum
tsqueryin(PG_FUNCTION_ARGS)
{
char *in = PG_GETARG_CSTRING(0);
pg_verifymbstr(in, strlen(in), false);
PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, InvalidOid, false));
}
/*
* out function
*/
typedef struct
{
QueryItem *curpol;
char *buf;
char *cur;
char *op;
int4 buflen;
} INFIX;
#define RESIZEBUF(inf,addsize) \
while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
{ \
int4 len = (inf)->cur - (inf)->buf; \
(inf)->buflen *= 2; \
(inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
(inf)->cur = (inf)->buf + len; \
}
/*
* recursive walk on tree and print it in
* infix (human-readable) view
*/
static void
infix(INFIX * in, bool first)
{
if (in->curpol->type == VAL)
{
char *op = in->op + in->curpol->distance;
int clen;
RESIZEBUF(in, in->curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
*(in->cur) = '\'';
in->cur++;
while (*op)
{
if (t_iseq(op, '\''))
{
*(in->cur) = '\'';
in->cur++;
}
COPYCHAR(in->cur, op);
clen = pg_mblen(op);
op += clen;
in->cur += clen;
}
*(in->cur) = '\'';
in->cur++;
if (in->curpol->weight)
{
*(in->cur) = ':';
in->cur++;
if (in->curpol->weight & (1 << 3))
{
*(in->cur) = 'A';
in->cur++;
}
if (in->curpol->weight & (1 << 2))
{
*(in->cur) = 'B';
in->cur++;
}
if (in->curpol->weight & (1 << 1))
{
*(in->cur) = 'C';
in->cur++;
}
if (in->curpol->weight & 1)
{
*(in->cur) = 'D';
in->cur++;
}
}
*(in->cur) = '\0';
in->curpol++;
}
else if (in->curpol->val == (int4) '!')
{
bool isopr = false;
RESIZEBUF(in, 1);
*(in->cur) = '!';
in->cur++;
*(in->cur) = '\0';
in->curpol++;
if (in->curpol->type == OPR)
{
isopr = true;
RESIZEBUF(in, 2);
sprintf(in->cur, "( ");
in->cur = strchr(in->cur, '\0');
}
infix(in, isopr);
if (isopr)
{
RESIZEBUF(in, 2);
sprintf(in->cur, " )");
in->cur = strchr(in->cur, '\0');
}
}
else
{
int4 op = in->curpol->val;
INFIX nrm;
in->curpol++;
if (op == (int4) '|' && !first)
{
RESIZEBUF(in, 2);
sprintf(in->cur, "( ");
in->cur = strchr(in->cur, '\0');
}
nrm.curpol = in->curpol;
nrm.op = in->op;
nrm.buflen = 16;
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
/* get right operand */
infix(&nrm, false);
/* get & print left operand */
in->curpol = nrm.curpol;
infix(in, false);
/* print operator & right operand */
RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
sprintf(in->cur, " %c %s", op, nrm.buf);
in->cur = strchr(in->cur, '\0');
pfree(nrm.buf);
if (op == (int4) '|' && !first)
{
RESIZEBUF(in, 2);
sprintf(in->cur, " )");
in->cur = strchr(in->cur, '\0');
}
}
}
Datum
tsqueryout(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY(0);
INFIX nrm;
if (query->size == 0)
{
char *b = palloc(1);
*b = '\0';
PG_RETURN_POINTER(b);
}
nrm.curpol = GETQUERY(query);
nrm.buflen = 32;
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
*(nrm.cur) = '\0';
nrm.op = GETOPERAND(query);
infix(&nrm, true);
PG_FREE_IF_COPY(query, 0);
PG_RETURN_CSTRING(nrm.buf);
}
Datum
tsquerysend(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY(0);
StringInfoData buf;
int i;
QueryItem *item = GETQUERY(query);
pq_begintypsend(&buf);
pq_sendint(&buf, query->size, sizeof(int32));
for (i = 0; i < query->size; i++)
{
int tmp;
pq_sendint(&buf, item->type, sizeof(item->type));
pq_sendint(&buf, item->weight, sizeof(item->weight));
pq_sendint(&buf, item->left, sizeof(item->left));
pq_sendint(&buf, item->val, sizeof(item->val));
/*
* We are sure that sizeof(WordEntry) == sizeof(int32), and about
* layout of QueryItem
*/
tmp = *(int32 *) (((char *) item) + HDRSIZEQI);
pq_sendint(&buf, tmp, sizeof(tmp));
item++;
}
item = GETQUERY(query);
for (i = 0; i < query->size; i++)
{
if (item->type == VAL)
pq_sendbytes(&buf, GETOPERAND(query) + item->distance, item->length);
item++;
}
PG_FREE_IF_COPY(query, 0);
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
Datum
tsqueryrecv(PG_FUNCTION_ARGS)
{
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
TSQuery query;
int i,
size,
tmp,
len = HDRSIZETQ;
QueryItem *item;
int datalen = 0;
char *ptr;
size = pq_getmsgint(buf, sizeof(uint32));
if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem)))
elog(ERROR, "invalid size of tsquery");
len += sizeof(QueryItem) * size;
query = (TSQuery) palloc(len);
query->size = size;
item = GETQUERY(query);
for (i = 0; i < size; i++)
{
item->type = (int8) pq_getmsgint(buf, sizeof(int8));
item->weight = (int8) pq_getmsgint(buf, sizeof(int8));
item->left = (int16) pq_getmsgint(buf, sizeof(int16));
item->val = (int32) pq_getmsgint(buf, sizeof(int32));
tmp = pq_getmsgint(buf, sizeof(int32));
memcpy((((char *) item) + HDRSIZEQI), &tmp, sizeof(int32));
/*
* Sanity checks
*/
if (item->type == VAL)
{
datalen += item->length + 1; /* \0 */
}
else if (item->type == OPR)
{
if (item->val == '|' || item->val == '&')
{
if (item->left <= 0 || i + item->left >= size)
elog(ERROR, "invalid pointer to left operand");
}
if (i == size - 1)
elog(ERROR, "invalid pointer to right operand");
}
else
elog(ERROR, "unknown tsquery node type");
item++;
}
query = (TSQuery) repalloc(query, len + datalen);
item = GETQUERY(query);
ptr = GETOPERAND(query);
for (i = 0; i < size; i++)
{
if (item->type == VAL)
{
item->distance = ptr - GETOPERAND(query);
memcpy(ptr,
pq_getmsgbytes(buf, item->length),
item->length);
ptr += item->length;
*ptr++ = '\0';
}
item++;
}
Assert(ptr - GETOPERAND(query) == datalen);
SET_VARSIZE(query, len + datalen);
PG_RETURN_TSVECTOR(query);
}
/*
* debug function, used only for view query
* which will be executed in non-leaf pages in index
*/
Datum
tsquerytree(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY(0);
INFIX nrm;
text *res;
QueryItem *q;
int4 len;
if (query->size == 0)
{
res = (text *) palloc(VARHDRSZ);
SET_VARSIZE(res, VARHDRSZ);
PG_RETURN_POINTER(res);
}
q = clean_NOT(GETQUERY(query), &len);
if (!q)
{
res = (text *) palloc(1 + VARHDRSZ);
SET_VARSIZE(res, 1 + VARHDRSZ);
*((char *) VARDATA(res)) = 'T';
}
else
{
nrm.curpol = q;
nrm.buflen = 32;
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
*(nrm.cur) = '\0';
nrm.op = GETOPERAND(query);
infix(&nrm, true);
res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
SET_VARSIZE(res, nrm.cur - nrm.buf + VARHDRSZ);
strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
pfree(q);
}
PG_FREE_IF_COPY(query, 0);
PG_RETURN_POINTER(res);
}

View File

@@ -0,0 +1,261 @@
/*-------------------------------------------------------------------------
*
* tsquery_cleanup.c
* Cleanup query from NOT values and/or stopword
* Utility functions to correct work.
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
typedef struct NODE
{
struct NODE *left;
struct NODE *right;
QueryItem *valnode;
} NODE;
/*
* make query tree from plain view of query
*/
static NODE *
maketree(QueryItem * in)
{
NODE *node = (NODE *) palloc(sizeof(NODE));
node->valnode = in;
node->right = node->left = NULL;
if (in->type == OPR)
{
node->right = maketree(in + 1);
if (in->val != (int4) '!')
node->left = maketree(in + in->left);
}
return node;
}
typedef struct
{
QueryItem *ptr;
int4 len;
int4 cur;
} PLAINTREE;
static void
plainnode(PLAINTREE * state, NODE * node)
{
if (state->cur == state->len)
{
state->len *= 2;
state->ptr = (QueryItem *) repalloc((void *) state->ptr, state->len * sizeof(QueryItem));
}
memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(QueryItem));
if (node->valnode->type == VAL)
state->cur++;
else if (node->valnode->val == (int4) '!')
{
state->ptr[state->cur].left = 1;
state->cur++;
plainnode(state, node->right);
}
else
{
int4 cur = state->cur;
state->cur++;
plainnode(state, node->right);
state->ptr[cur].left = state->cur - cur;
plainnode(state, node->left);
}
pfree(node);
}
/*
* make plain view of tree from 'normal' view of tree
*/
static QueryItem *
plaintree(NODE * root, int4 *len)
{
PLAINTREE pl;
pl.cur = 0;
pl.len = 16;
if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
{
pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem));
plainnode(&pl, root);
}
else
pl.ptr = NULL;
*len = pl.cur;
return pl.ptr;
}
static void
freetree(NODE * node)
{
if (!node)
return;
if (node->left)
freetree(node->left);
if (node->right)
freetree(node->right);
pfree(node);
}
/*
* clean tree for ! operator.
* It's usefull for debug, but in
* other case, such view is used with search in index.
* Operator ! always return TRUE
*/
static NODE *
clean_NOT_intree(NODE * node)
{
if (node->valnode->type == VAL)
return node;
if (node->valnode->val == (int4) '!')
{
freetree(node);
return NULL;
}
/* operator & or | */
if (node->valnode->val == (int4) '|')
{
if ((node->left = clean_NOT_intree(node->left)) == NULL ||
(node->right = clean_NOT_intree(node->right)) == NULL)
{
freetree(node);
return NULL;
}
}
else
{
NODE *res = node;
node->left = clean_NOT_intree(node->left);
node->right = clean_NOT_intree(node->right);
if (node->left == NULL && node->right == NULL)
{
pfree(node);
res = NULL;
}
else if (node->left == NULL)
{
res = node->right;
pfree(node);
}
else if (node->right == NULL)
{
res = node->left;
pfree(node);
}
return res;
}
return node;
}
QueryItem *
clean_NOT(QueryItem * ptr, int4 *len)
{
NODE *root = maketree(ptr);
return plaintree(clean_NOT_intree(root), len);
}
#ifdef V_UNKNOWN /* exists in Windows headers */
#undef V_UNKNOWN
#endif
#define V_UNKNOWN 0
#define V_TRUE 1
#define V_FALSE 2
#define V_STOP 3
/*
* Clean query tree from values which is always in
* text (stopword)
*/
static NODE *
clean_fakeval_intree(NODE * node, char *result)
{
char lresult = V_UNKNOWN,
rresult = V_UNKNOWN;
if (node->valnode->type == VAL)
return node;
else if (node->valnode->type == VALSTOP)
{
pfree(node);
*result = V_STOP;
return NULL;
}
if (node->valnode->val == (int4) '!')
{
node->right = clean_fakeval_intree(node->right, &rresult);
if (!node->right)
{
*result = V_STOP;
freetree(node);
return NULL;
}
}
else
{
NODE *res = node;
node->left = clean_fakeval_intree(node->left, &lresult);
node->right = clean_fakeval_intree(node->right, &rresult);
if (lresult == V_STOP && rresult == V_STOP)
{
freetree(node);
*result = V_STOP;
return NULL;
}
else if (lresult == V_STOP)
{
res = node->right;
pfree(node);
}
else if (rresult == V_STOP)
{
res = node->left;
pfree(node);
}
return res;
}
return node;
}
QueryItem *
clean_fakeval(QueryItem * ptr, int4 *len)
{
NODE *root = maketree(ptr);
char result = V_UNKNOWN;
NODE *resroot;
resroot = clean_fakeval_intree(root, &result);
if (result != V_UNKNOWN)
{
elog(NOTICE, "query contains only stopword(s) or doesn't contain lexeme(s), ignored");
*len = 0;
return NULL;
}
return plaintree(resroot, len);
}

View File

@@ -0,0 +1,259 @@
/*-------------------------------------------------------------------------
*
* tsquery_gist.c
* GiST index support for tsquery
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_gist.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "access/skey.h"
#include "access/gist.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
#define GETENTRY(vec,pos) ((TSQuerySign *) DatumGetPointer((vec)->vector[(pos)].key))
Datum
gtsquery_compress(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
GISTENTRY *retval = entry;
if (entry->leafkey)
{
TSQuerySign *sign = (TSQuerySign *) palloc(sizeof(TSQuerySign));
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
*sign = makeTSQuerySign(DatumGetTSQuery(entry->key));
gistentryinit(*retval, PointerGetDatum(sign),
entry->rel, entry->page,
entry->offset, FALSE);
}
PG_RETURN_POINTER(retval);
}
Datum
gtsquery_decompress(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(PG_GETARG_DATUM(0));
}
Datum
gtsquery_consistent(PG_FUNCTION_ARGS)
{
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
TSQuerySign *key = (TSQuerySign *) DatumGetPointer(entry->key);
TSQuery query = PG_GETARG_TSQUERY(1);
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
TSQuerySign sq = makeTSQuerySign(query);
bool retval;
switch (strategy)
{
case RTContainsStrategyNumber:
if (GIST_LEAF(entry))
retval = (*key & sq) == sq;
else
retval = (*key & sq) != 0;
break;
case RTContainedByStrategyNumber:
if (GIST_LEAF(entry))
retval = (*key & sq) == *key;
else
retval = (*key & sq) != 0;
break;
default:
retval = FALSE;
}
PG_RETURN_BOOL(retval);
}
Datum
gtsquery_union(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
int *size = (int *) PG_GETARG_POINTER(1);
TSQuerySign *sign = (TSQuerySign *) palloc(sizeof(TSQuerySign));
int i;
memset(sign, 0, sizeof(TSQuerySign));
for (i = 0; i < entryvec->n; i++)
*sign |= *GETENTRY(entryvec, i);
*size = sizeof(TSQuerySign);
PG_RETURN_POINTER(sign);
}
Datum
gtsquery_same(PG_FUNCTION_ARGS)
{
TSQuerySign *a = (TSQuerySign *) PG_GETARG_POINTER(0);
TSQuerySign *b = (TSQuerySign *) PG_GETARG_POINTER(1);
PG_RETURN_POINTER(*a == *b);
}
static int
sizebitvec(TSQuerySign sign)
{
int size = 0,
i;
for (i = 0; i < TSQS_SIGLEN; i++)
size += 0x01 & (sign >> i);
return size;
}
static int
hemdist(TSQuerySign a, TSQuerySign b)
{
TSQuerySign res = a ^ b;
return sizebitvec(res);
}
Datum
gtsquery_penalty(PG_FUNCTION_ARGS)
{
TSQuerySign *origval = (TSQuerySign *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
TSQuerySign *newval = (TSQuerySign *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
float *penalty = (float *) PG_GETARG_POINTER(2);
*penalty = hemdist(*origval, *newval);
PG_RETURN_POINTER(penalty);
}
typedef struct
{
OffsetNumber pos;
int4 cost;
} SPLITCOST;
static int
comparecost(const void *a, const void *b)
{
if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
return 0;
else
return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
}
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
Datum
gtsquery_picksplit(PG_FUNCTION_ARGS)
{
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
OffsetNumber maxoff = entryvec->n - 2;
OffsetNumber k,
j;
TSQuerySign *datum_l,
*datum_r;
int4 size_alpha,
size_beta;
int4 size_waste,
waste = -1;
int4 nbytes;
OffsetNumber seed_1 = 0,
seed_2 = 0;
OffsetNumber *left,
*right;
SPLITCOST *costvector;
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
left = v->spl_left = (OffsetNumber *) palloc(nbytes);
right = v->spl_right = (OffsetNumber *) palloc(nbytes);
v->spl_nleft = v->spl_nright = 0;
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
{
size_waste = hemdist(*GETENTRY(entryvec, j), *GETENTRY(entryvec, k));
if (size_waste > waste)
{
waste = size_waste;
seed_1 = k;
seed_2 = j;
}
}
if (seed_1 == 0 || seed_2 == 0)
{
seed_1 = 1;
seed_2 = 2;
}
datum_l = (TSQuerySign *) palloc(sizeof(TSQuerySign));
*datum_l = *GETENTRY(entryvec, seed_1);
datum_r = (TSQuerySign *) palloc(sizeof(TSQuerySign));
*datum_r = *GETENTRY(entryvec, seed_2);
maxoff = OffsetNumberNext(maxoff);
costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
{
costvector[j - 1].pos = j;
size_alpha = hemdist(*GETENTRY(entryvec, seed_1), *GETENTRY(entryvec, j));
size_beta = hemdist(*GETENTRY(entryvec, seed_2), *GETENTRY(entryvec, j));
costvector[j - 1].cost = abs(size_alpha - size_beta);
}
qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
for (k = 0; k < maxoff; k++)
{
j = costvector[k].pos;
if (j == seed_1)
{
*left++ = j;
v->spl_nleft++;
continue;
}
else if (j == seed_2)
{
*right++ = j;
v->spl_nright++;
continue;
}
size_alpha = hemdist(*datum_l, *GETENTRY(entryvec, j));
size_beta = hemdist(*datum_r, *GETENTRY(entryvec, j));
if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05))
{
*datum_l |= *GETENTRY(entryvec, j);
*left++ = j;
v->spl_nleft++;
}
else
{
*datum_r |= *GETENTRY(entryvec, j);
*right++ = j;
v->spl_nright++;
}
}
*right = *left = FirstOffsetNumber;
v->spl_ldatum = PointerGetDatum(datum_l);
v->spl_rdatum = PointerGetDatum(datum_r);
PG_RETURN_POINTER(v);
}

View File

@@ -0,0 +1,289 @@
/*-------------------------------------------------------------------------
*
* tsquery_op.c
* Various operations with tsquery
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
#include "utils/pg_crc.h"
Datum
tsquery_numnode(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY(0);
int nnode = query->size;
PG_FREE_IF_COPY(query, 0);
PG_RETURN_INT32(nnode);
}
static QTNode *
join_tsqueries(TSQuery a, TSQuery b)
{
QTNode *res = (QTNode *) palloc0(sizeof(QTNode));
res->flags |= QTN_NEEDFREE;
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
res->valnode->type = OPR;
res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b));
res->child[1] = QT2QTN(GETQUERY(a), GETOPERAND(a));
res->nchild = 2;
return res;
}
Datum
tsquery_and(PG_FUNCTION_ARGS)
{
TSQuery a = PG_GETARG_TSQUERY_COPY(0);
TSQuery b = PG_GETARG_TSQUERY_COPY(1);
QTNode *res;
TSQuery query;
if (a->size == 0)
{
PG_FREE_IF_COPY(a, 1);
PG_RETURN_POINTER(b);
}
else if (b->size == 0)
{
PG_FREE_IF_COPY(b, 1);
PG_RETURN_POINTER(a);
}
res = join_tsqueries(a, b);
res->valnode->val = '&';
query = QTN2QT(res);
QTNFree(res);
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
PG_RETURN_TSQUERY(query);
}
Datum
tsquery_or(PG_FUNCTION_ARGS)
{
TSQuery a = PG_GETARG_TSQUERY_COPY(0);
TSQuery b = PG_GETARG_TSQUERY_COPY(1);
QTNode *res;
TSQuery query;
if (a->size == 0)
{
PG_FREE_IF_COPY(a, 1);
PG_RETURN_POINTER(b);
}
else if (b->size == 0)
{
PG_FREE_IF_COPY(b, 1);
PG_RETURN_POINTER(a);
}
res = join_tsqueries(a, b);
res->valnode->val = '|';
query = QTN2QT(res);
QTNFree(res);
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
PG_RETURN_POINTER(query);
}
Datum
tsquery_not(PG_FUNCTION_ARGS)
{
TSQuery a = PG_GETARG_TSQUERY_COPY(0);
QTNode *res;
TSQuery query;
if (a->size == 0)
PG_RETURN_POINTER(a);
res = (QTNode *) palloc0(sizeof(QTNode));
res->flags |= QTN_NEEDFREE;
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
res->valnode->type = OPR;
res->valnode->val = '!';
res->child = (QTNode **) palloc0(sizeof(QTNode *));
res->child[0] = QT2QTN(GETQUERY(a), GETOPERAND(a));
res->nchild = 1;
query = QTN2QT(res);
QTNFree(res);
PG_FREE_IF_COPY(a, 0);
PG_RETURN_POINTER(query);
}
static int
CompareTSQ(TSQuery a, TSQuery b)
{
if (a->size != b->size)
{
return (a->size < b->size) ? -1 : 1;
}
else if (VARSIZE(a) != VARSIZE(b))
{
return (VARSIZE(a) < VARSIZE(b)) ? -1 : 1;
}
else
{
QTNode *an = QT2QTN(GETQUERY(a), GETOPERAND(a));
QTNode *bn = QT2QTN(GETQUERY(b), GETOPERAND(b));
int res = QTNodeCompare(an, bn);
QTNFree(an);
QTNFree(bn);
return res;
}
return 0;
}
Datum
tsquery_cmp(PG_FUNCTION_ARGS)
{
TSQuery a = PG_GETARG_TSQUERY_COPY(0);
TSQuery b = PG_GETARG_TSQUERY_COPY(1);
int res = CompareTSQ(a, b);
PG_FREE_IF_COPY(a, 0);
PG_FREE_IF_COPY(b, 1);
PG_RETURN_INT32(res);
}
#define CMPFUNC( NAME, CONDITION ) \
Datum \
NAME(PG_FUNCTION_ARGS) { \
TSQuery a = PG_GETARG_TSQUERY_COPY(0); \
TSQuery b = PG_GETARG_TSQUERY_COPY(1); \
int res = CompareTSQ(a,b); \
\
PG_FREE_IF_COPY(a,0); \
PG_FREE_IF_COPY(b,1); \
\
PG_RETURN_BOOL( CONDITION ); \
}
CMPFUNC(tsquery_lt, res < 0);
CMPFUNC(tsquery_le, res <= 0);
CMPFUNC(tsquery_eq, res == 0);
CMPFUNC(tsquery_ge, res >= 0);
CMPFUNC(tsquery_gt, res > 0);
CMPFUNC(tsquery_ne, res != 0);
TSQuerySign
makeTSQuerySign(TSQuery a)
{
int i;
QueryItem *ptr = GETQUERY(a);
TSQuerySign sign = 0;
for (i = 0; i < a->size; i++)
{
if (ptr->type == VAL)
sign |= ((TSQuerySign) 1) << (ptr->val % TSQS_SIGLEN);
ptr++;
}
return sign;
}
Datum
tsq_mcontains(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY(0);
TSQuery ex = PG_GETARG_TSQUERY(1);
TSQuerySign sq,
se;
int i,
j;
QueryItem *iq,
*ie;
if (query->size < ex->size)
{
PG_FREE_IF_COPY(query, 0);
PG_FREE_IF_COPY(ex, 1);
PG_RETURN_BOOL(false);
}
sq = makeTSQuerySign(query);
se = makeTSQuerySign(ex);
if ((sq & se) != se)
{
PG_FREE_IF_COPY(query, 0);
PG_FREE_IF_COPY(ex, 1);
PG_RETURN_BOOL(false);
}
ie = GETQUERY(ex);
for (i = 0; i < ex->size; i++)
{
iq = GETQUERY(query);
if (ie[i].type != VAL)
continue;
for (j = 0; j < query->size; j++)
if (iq[j].type == VAL && ie[i].val == iq[j].val)
{
j = query->size + 1;
break;
}
if (j == query->size)
{
PG_FREE_IF_COPY(query, 0);
PG_FREE_IF_COPY(ex, 1);
PG_RETURN_BOOL(false);
}
}
PG_FREE_IF_COPY(query, 0);
PG_FREE_IF_COPY(ex, 1);
PG_RETURN_BOOL(true);
}
Datum
tsq_mcontained(PG_FUNCTION_ARGS)
{
PG_RETURN_DATUM(
DirectFunctionCall2(
tsq_mcontains,
PG_GETARG_DATUM(1),
PG_GETARG_DATUM(0)
)
);
}

View File

@@ -0,0 +1,524 @@
/*-------------------------------------------------------------------------
*
* tsquery_rewrite.c
* Utilities for reconstructing tsquery
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "executor/spi.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
static int
addone(int *counters, int last, int total)
{
counters[last]++;
if (counters[last] >= total)
{
if (last == 0)
return 0;
if (addone(counters, last - 1, total - 1) == 0)
return 0;
counters[last] = counters[last - 1] + 1;
}
return 1;
}
static QTNode *
findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
{
if ((node->sign & ex->sign) != ex->sign || node->valnode->type != ex->valnode->type || node->valnode->val != ex->valnode->val)
return node;
if (node->flags & QTN_NOCHANGE)
return node;
if (node->valnode->type == OPR)
{
if (node->nchild == ex->nchild)
{
if (QTNEq(node, ex))
{
QTNFree(node);
if (subs)
{
node = QTNCopy(subs);
node->flags |= QTN_NOCHANGE;
}
else
node = NULL;
*isfind = true;
}
}
else if (node->nchild > ex->nchild)
{
int *counters = (int *) palloc(sizeof(int) * node->nchild);
int i;
QTNode *tnode = (QTNode *) palloc(sizeof(QTNode));
memset(tnode, 0, sizeof(QTNode));
tnode->child = (QTNode **) palloc(sizeof(QTNode *) * ex->nchild);
tnode->nchild = ex->nchild;
tnode->valnode = (QueryItem *) palloc(sizeof(QueryItem));
*(tnode->valnode) = *(ex->valnode);
for (i = 0; i < ex->nchild; i++)
counters[i] = i;
do
{
tnode->sign = 0;
for (i = 0; i < ex->nchild; i++)
{
tnode->child[i] = node->child[counters[i]];
tnode->sign |= tnode->child[i]->sign;
}
if (QTNEq(tnode, ex))
{
int j = 0;
pfree(tnode->valnode);
pfree(tnode->child);
pfree(tnode);
if (subs)
{
tnode = QTNCopy(subs);
tnode->flags = QTN_NOCHANGE | QTN_NEEDFREE;
}
else
tnode = NULL;
node->child[counters[0]] = tnode;
for (i = 1; i < ex->nchild; i++)
node->child[counters[i]] = NULL;
for (i = 0; i < node->nchild; i++)
{
if (node->child[i])
{
node->child[j] = node->child[i];
j++;
}
}
node->nchild = j;
*isfind = true;
break;
}
} while (addone(counters, ex->nchild - 1, node->nchild));
if (tnode && (tnode->flags & QTN_NOCHANGE) == 0)
{
pfree(tnode->valnode);
pfree(tnode->child);
pfree(tnode);
}
else
QTNSort(node);
pfree(counters);
}
}
else if (QTNEq(node, ex))
{
QTNFree(node);
if (subs)
{
node = QTNCopy(subs);
node->flags |= QTN_NOCHANGE;
}
else
{
node = NULL;
}
*isfind = true;
}
return node;
}
static QTNode *
dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
{
root = findeq(root, ex, subs, isfind);
if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == OPR)
{
int i;
for (i = 0; i < root->nchild; i++)
root->child[i] = dofindsubquery(root->child[i], ex, subs, isfind);
}
return root;
}
static QTNode *
dropvoidsubtree(QTNode * root)
{
if (!root)
return NULL;
if (root->valnode->type == OPR)
{
int i,
j = 0;
for (i = 0; i < root->nchild; i++)
{
if (root->child[i])
{
root->child[j] = root->child[i];
j++;
}
}
root->nchild = j;
if (root->valnode->val == (int4) '!' && root->nchild == 0)
{
QTNFree(root);
root = NULL;
}
else if (root->nchild == 1)
{
QTNode *nroot = root->child[0];
pfree(root);
root = nroot;
}
}
return root;
}
static QTNode *
findsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
{
bool DidFind = false;
root = dofindsubquery(root, ex, subs, &DidFind);
if (!subs && DidFind)
root = dropvoidsubtree(root);
if (isfind)
*isfind = DidFind;
return root;
}
Datum
ts_rewrite_accum(PG_FUNCTION_ARGS)
{
TSQuery acc;
ArrayType *qa;
TSQuery q;
QTNode *qex = NULL,
*subs = NULL,
*acctree = NULL;
bool isfind = false;
Datum *elemsp;
int nelemsp;
MemoryContext aggcontext;
MemoryContext oldcontext;
aggcontext = ((AggState *) fcinfo->context)->aggcontext;
if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
{
acc = (TSQuery) MemoryContextAlloc(aggcontext, HDRSIZETQ);
SET_VARSIZE(acc, HDRSIZETQ);
acc->size = 0;
}
else
acc = PG_GETARG_TSQUERY(0);
if (PG_ARGISNULL(1) || PG_GETARG_POINTER(1) == NULL)
PG_RETURN_TSQUERY(acc);
else
qa = PG_GETARG_ARRAYTYPE_P_COPY(1);
if (ARR_NDIM(qa) != 1)
elog(ERROR, "array must be one-dimensional, not %d dimensions",
ARR_NDIM(qa));
if (ArrayGetNItems(ARR_NDIM(qa), ARR_DIMS(qa)) != 3)
elog(ERROR, "array should have only three elements");
if (ARR_ELEMTYPE(qa) != TSQUERYOID)
elog(ERROR, "array should contain tsquery type");
deconstruct_array(qa, TSQUERYOID, -1, false, 'i', &elemsp, NULL, &nelemsp);
q = DatumGetTSQuery(elemsp[0]);
if (q->size == 0)
{
pfree(elemsp);
PG_RETURN_POINTER(acc);
}
if (!acc->size)
{
if (VARSIZE(acc) > HDRSIZETQ)
{
pfree(elemsp);
PG_RETURN_POINTER(acc);
}
else
acctree = QT2QTN(GETQUERY(q), GETOPERAND(q));
}
else
acctree = QT2QTN(GETQUERY(acc), GETOPERAND(acc));
QTNTernary(acctree);
QTNSort(acctree);
q = DatumGetTSQuery(elemsp[1]);
if (q->size == 0)
{
pfree(elemsp);
PG_RETURN_POINTER(acc);
}
qex = QT2QTN(GETQUERY(q), GETOPERAND(q));
QTNTernary(qex);
QTNSort(qex);
q = DatumGetTSQuery(elemsp[2]);
if (q->size)
subs = QT2QTN(GETQUERY(q), GETOPERAND(q));
acctree = findsubquery(acctree, qex, subs, &isfind);
if (isfind || !acc->size)
{
/* pfree( acc ); do not pfree(p), because nodeAgg.c will */
if (acctree)
{
QTNBinary(acctree);
oldcontext = MemoryContextSwitchTo(aggcontext);
acc = QTN2QT(acctree);
MemoryContextSwitchTo(oldcontext);
}
else
{
acc = (TSQuery) MemoryContextAlloc(aggcontext, HDRSIZETQ);
SET_VARSIZE(acc, HDRSIZETQ);
acc->size = 0;
}
}
pfree(elemsp);
QTNFree(qex);
QTNFree(subs);
QTNFree(acctree);
PG_RETURN_TSQUERY(acc);
}
Datum
ts_rewrite_finish(PG_FUNCTION_ARGS)
{
TSQuery acc = PG_GETARG_TSQUERY(0);
TSQuery rewrited;
if (acc == NULL || PG_ARGISNULL(0) || acc->size == 0)
{
rewrited = (TSQuery) palloc(HDRSIZETQ);
SET_VARSIZE(rewrited, HDRSIZETQ);
rewrited->size = 0;
}
else
{
rewrited = (TSQuery) palloc(VARSIZE(acc));
memcpy(rewrited, acc, VARSIZE(acc));
pfree(acc);
}
PG_RETURN_POINTER(rewrited);
}
Datum
tsquery_rewrite(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY_COPY(0);
text *in = PG_GETARG_TEXT_P(1);
TSQuery rewrited = query;
MemoryContext outercontext = CurrentMemoryContext;
MemoryContext oldcontext;
QTNode *tree;
char *buf;
void *plan;
Portal portal;
bool isnull;
int i;
if (query->size == 0)
{
PG_FREE_IF_COPY(in, 1);
PG_RETURN_POINTER(rewrited);
}
tree = QT2QTN(GETQUERY(query), GETOPERAND(query));
QTNTernary(tree);
QTNSort(tree);
buf = TextPGetCString(in);
SPI_connect();
if ((plan = SPI_prepare(buf, 0, NULL)) == NULL)
elog(ERROR, "SPI_prepare(\"%s\") failed", buf);
if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, false)) == NULL)
elog(ERROR, "SPI_cursor_open(\"%s\") failed", buf);
SPI_cursor_fetch(portal, true, 100);
if (SPI_tuptable->tupdesc->natts != 2 ||
SPI_gettypeid(SPI_tuptable->tupdesc, 1) != TSQUERYOID ||
SPI_gettypeid(SPI_tuptable->tupdesc, 2) != TSQUERYOID)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("ts_rewrite query must return two tsquery columns")));
while (SPI_processed > 0 && tree)
{
for (i = 0; i < SPI_processed && tree; i++)
{
Datum qdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
Datum sdata;
if (isnull)
continue;
sdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull);
if (!isnull)
{
TSQuery qtex = DatumGetTSQuery(qdata);
TSQuery qtsubs = DatumGetTSQuery(sdata);
QTNode *qex,
*qsubs = NULL;
if (qtex->size == 0)
{
if (qtex != (TSQuery) DatumGetPointer(qdata))
pfree(qtex);
if (qtsubs != (TSQuery) DatumGetPointer(sdata))
pfree(qtsubs);
continue;
}
qex = QT2QTN(GETQUERY(qtex), GETOPERAND(qtex));
QTNTernary(qex);
QTNSort(qex);
if (qtsubs->size)
qsubs = QT2QTN(GETQUERY(qtsubs), GETOPERAND(qtsubs));
oldcontext = MemoryContextSwitchTo(outercontext);
tree = findsubquery(tree, qex, qsubs, NULL);
MemoryContextSwitchTo(oldcontext);
QTNFree(qex);
if (qtex != (TSQuery) DatumGetPointer(qdata))
pfree(qtex);
QTNFree(qsubs);
if (qtsubs != (TSQuery) DatumGetPointer(sdata))
pfree(qtsubs);
}
}
SPI_freetuptable(SPI_tuptable);
SPI_cursor_fetch(portal, true, 100);
}
SPI_freetuptable(SPI_tuptable);
SPI_cursor_close(portal);
SPI_freeplan(plan);
SPI_finish();
if (tree)
{
QTNBinary(tree);
rewrited = QTN2QT(tree);
QTNFree(tree);
PG_FREE_IF_COPY(query, 0);
}
else
{
SET_VARSIZE(rewrited, HDRSIZETQ);
rewrited->size = 0;
}
pfree(buf);
PG_FREE_IF_COPY(in, 1);
PG_RETURN_POINTER(rewrited);
}
Datum
tsquery_rewrite_query(PG_FUNCTION_ARGS)
{
TSQuery query = PG_GETARG_TSQUERY_COPY(0);
TSQuery ex = PG_GETARG_TSQUERY(1);
TSQuery subst = PG_GETARG_TSQUERY(2);
TSQuery rewrited = query;
QTNode *tree,
*qex,
*subs = NULL;
if (query->size == 0 || ex->size == 0)
{
PG_FREE_IF_COPY(ex, 1);
PG_FREE_IF_COPY(subst, 2);
PG_RETURN_POINTER(rewrited);
}
tree = QT2QTN(GETQUERY(query), GETOPERAND(query));
QTNTernary(tree);
QTNSort(tree);
qex = QT2QTN(GETQUERY(ex), GETOPERAND(ex));
QTNTernary(qex);
QTNSort(qex);
if (subst->size)
subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst));
tree = findsubquery(tree, qex, subs, NULL);
QTNFree(qex);
QTNFree(subs);
if (!tree)
{
SET_VARSIZE(rewrited, HDRSIZETQ);
rewrited->size = 0;
PG_FREE_IF_COPY(ex, 1);
PG_FREE_IF_COPY(subst, 2);
PG_RETURN_POINTER(rewrited);
}
else
{
QTNBinary(tree);
rewrited = QTN2QT(tree);
QTNFree(tree);
}
PG_FREE_IF_COPY(query, 0);
PG_FREE_IF_COPY(ex, 1);
PG_FREE_IF_COPY(subst, 2);
PG_RETURN_POINTER(rewrited);
}

View File

@@ -0,0 +1,317 @@
/*-------------------------------------------------------------------------
*
* tsquery_util.c
* Utilities for tsquery datatype
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
QTNode *
QT2QTN(QueryItem * in, char *operand)
{
QTNode *node = (QTNode *) palloc0(sizeof(QTNode));
node->valnode = in;
if (in->type == OPR)
{
node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
node->child[0] = QT2QTN(in + 1, operand);
node->sign = node->child[0]->sign;
if (in->val == (int4) '!')
node->nchild = 1;
else
{
node->nchild = 2;
node->child[1] = QT2QTN(in + in->left, operand);
node->sign |= node->child[1]->sign;
}
}
else if (operand)
{
node->word = operand + in->distance;
node->sign = 1 << (in->val % 32);
}
return node;
}
void
QTNFree(QTNode * in)
{
if (!in)
return;
if (in->valnode->type == VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
pfree(in->word);
if (in->child)
{
if (in->valnode)
{
if (in->valnode->type == OPR && in->nchild > 0)
{
int i;
for (i = 0; i < in->nchild; i++)
QTNFree(in->child[i]);
}
if (in->flags & QTN_NEEDFREE)
pfree(in->valnode);
}
pfree(in->child);
}
pfree(in);
}
int
QTNodeCompare(QTNode * an, QTNode * bn)
{
if (an->valnode->type != bn->valnode->type)
return (an->valnode->type > bn->valnode->type) ? -1 : 1;
else if (an->valnode->val != bn->valnode->val)
return (an->valnode->val > bn->valnode->val) ? -1 : 1;
else if (an->valnode->type == VAL)
{
if (an->valnode->length == bn->valnode->length)
return strncmp(an->word, bn->word, an->valnode->length);
else
return (an->valnode->length > bn->valnode->length) ? -1 : 1;
}
else if (an->nchild != bn->nchild)
{
return (an->nchild > bn->nchild) ? -1 : 1;
}
else
{
int i,
res;
for (i = 0; i < an->nchild; i++)
if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
return res;
}
return 0;
}
static int
cmpQTN(const void *a, const void *b)
{
return QTNodeCompare(*(QTNode **) a, *(QTNode **) b);
}
void
QTNSort(QTNode * in)
{
int i;
if (in->valnode->type != OPR)
return;
for (i = 0; i < in->nchild; i++)
QTNSort(in->child[i]);
if (in->nchild > 1)
qsort((void *) in->child, in->nchild, sizeof(QTNode *), cmpQTN);
}
bool
QTNEq(QTNode * a, QTNode * b)
{
uint32 sign = a->sign & b->sign;
if (!(sign == a->sign && sign == b->sign))
return 0;
return (QTNodeCompare(a, b) == 0) ? true : false;
}
void
QTNTernary(QTNode * in)
{
int i;
if (in->valnode->type != OPR)
return;
for (i = 0; i < in->nchild; i++)
QTNTernary(in->child[i]);
for (i = 0; i < in->nchild; i++)
{
if (in->valnode->type == in->child[i]->valnode->type && in->valnode->val == in->child[i]->valnode->val)
{
QTNode *cc = in->child[i];
int oldnchild = in->nchild;
in->nchild += cc->nchild - 1;
in->child = (QTNode **) repalloc(in->child, in->nchild * sizeof(QTNode *));
if (i + 1 != oldnchild)
memmove(in->child + i + cc->nchild, in->child + i + 1,
(oldnchild - i - 1) * sizeof(QTNode *));
memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *));
i += cc->nchild - 1;
pfree(cc);
}
}
}
void
QTNBinary(QTNode * in)
{
int i;
if (in->valnode->type != OPR)
return;
for (i = 0; i < in->nchild; i++)
QTNBinary(in->child[i]);
if (in->nchild <= 2)
return;
while (in->nchild > 2)
{
QTNode *nn = (QTNode *) palloc0(sizeof(QTNode));
nn->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
nn->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
nn->nchild = 2;
nn->flags = QTN_NEEDFREE;
nn->child[0] = in->child[0];
nn->child[1] = in->child[1];
nn->sign = nn->child[0]->sign | nn->child[1]->sign;
nn->valnode->type = in->valnode->type;
nn->valnode->val = in->valnode->val;
in->child[0] = nn;
in->child[1] = in->child[in->nchild - 1];
in->nchild--;
}
}
static void
cntsize(QTNode * in, int4 *sumlen, int4 *nnode)
{
*nnode += 1;
if (in->valnode->type == OPR)
{
int i;
for (i = 0; i < in->nchild; i++)
cntsize(in->child[i], sumlen, nnode);
}
else
{
*sumlen += in->valnode->length + 1;
}
}
typedef struct
{
QueryItem *curitem;
char *operand;
char *curoperand;
} QTN2QTState;
static void
fillQT(QTN2QTState * state, QTNode * in)
{
*(state->curitem) = *(in->valnode);
if (in->valnode->type == VAL)
{
memcpy(state->curoperand, in->word, in->valnode->length);
state->curitem->distance = state->curoperand - state->operand;
state->curoperand[in->valnode->length] = '\0';
state->curoperand += in->valnode->length + 1;
state->curitem++;
}
else
{
QueryItem *curitem = state->curitem;
Assert(in->nchild <= 2);
state->curitem++;
fillQT(state, in->child[0]);
if (in->nchild == 2)
{
curitem->left = state->curitem - curitem;
fillQT(state, in->child[1]);
}
}
}
TSQuery
QTN2QT(QTNode *in)
{
TSQuery out;
int len;
int sumlen = 0,
nnode = 0;
QTN2QTState state;
cntsize(in, &sumlen, &nnode);
len = COMPUTESIZE(nnode, sumlen);
out = (TSQuery) palloc(len);
SET_VARSIZE(out, len);
out->size = nnode;
state.curitem = GETQUERY(out);
state.operand = state.curoperand = GETOPERAND(out);
fillQT(&state, in);
return out;
}
QTNode *
QTNCopy(QTNode *in)
{
QTNode *out = (QTNode *) palloc(sizeof(QTNode));
*out = *in;
out->valnode = (QueryItem *) palloc(sizeof(QueryItem));
*(out->valnode) = *(in->valnode);
out->flags |= QTN_NEEDFREE;
if (in->valnode->type == VAL)
{
out->word = palloc(in->valnode->length + 1);
memcpy(out->word, in->word, in->valnode->length);
out->word[in->valnode->length] = '\0';
out->flags |= QTN_WORDFREE;
}
else
{
int i;
out->child = (QTNode **) palloc(sizeof(QTNode *) * in->nchild);
for (i = 0; i < in->nchild; i++)
out->child[i] = QTNCopy(in->child[i]);
}
return out;
}

View File

@@ -0,0 +1,804 @@
/*-------------------------------------------------------------------------
*
* tsrank.c
* rank tsvector by tsquery
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include <math.h>
#include "tsearch/ts_type.h"
#include "tsearch/ts_utils.h"
#include "utils/array.h"
static float weights[] = {0.1, 0.2, 0.4, 1.0};
#define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] )
#define RANK_NO_NORM 0x00
#define RANK_NORM_LOGLENGTH 0x01
#define RANK_NORM_LENGTH 0x02
#define RANK_NORM_EXTDIST 0x04
#define RANK_NORM_UNIQ 0x08
#define RANK_NORM_LOGUNIQ 0x10
#define DEF_NORM_METHOD RANK_NO_NORM
static float calc_rank_or(float *w, TSVector t, TSQuery q);
static float calc_rank_and(float *w, TSVector t, TSQuery q);
/*
* Returns a weight of a word collocation
*/
static float4
word_distance(int4 w)
{
if (w > 100)
return 1e-30;
return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
}
static int
cnt_length(TSVector t)
{
WordEntry *ptr = ARRPTR(t),
*end = (WordEntry *) STRPTR(t);
int len = 0,
clen;
while (ptr < end)
{
if ((clen = POSDATALEN(t, ptr)) == 0)
len += 1;
else
len += clen;
ptr++;
}
return len;
}
static int4
WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item)
{
if (ptr->len == item->length)
return strncmp(
eval + ptr->pos,
qval + item->distance,
item->length);
return (ptr->len > item->length) ? 1 : -1;
}
static WordEntry *
find_wordentry(TSVector t, TSQuery q, QueryItem * item)
{
WordEntry *StopLow = ARRPTR(t);
WordEntry *StopHigh = (WordEntry *) STRPTR(t);
WordEntry *StopMiddle;
int difference;
/* Loop invariant: StopLow <= item < StopHigh */
while (StopLow < StopHigh)
{
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item);
if (difference == 0)
return StopMiddle;
else if (difference < 0)
StopLow = StopMiddle + 1;
else
StopHigh = StopMiddle;
}
return NULL;
}
static int
compareQueryItem(const void *a, const void *b, void *arg)
{
char *operand = (char *) arg;
if ((*(QueryItem **) a)->length == (*(QueryItem **) b)->length)
return strncmp(operand + (*(QueryItem **) a)->distance,
operand + (*(QueryItem **) b)->distance,
(*(QueryItem **) b)->length);
return ((*(QueryItem **) a)->length > (*(QueryItem **) b)->length) ? 1 : -1;
}
static QueryItem **
SortAndUniqItems(char *operand, QueryItem * item, int *size)
{
QueryItem **res,
**ptr,
**prevptr;
ptr = res = (QueryItem **) palloc(sizeof(QueryItem *) * *size);
while ((*size)--)
{
if (item->type == VAL)
{
*ptr = item;
ptr++;
}
item++;
}
*size = ptr - res;
if (*size < 2)
return res;
qsort_arg(res, *size, sizeof(QueryItem **), compareQueryItem, (void *) operand);
ptr = res + 1;
prevptr = res;
while (ptr - res < *size)
{
if (compareQueryItem((void *) ptr, (void *) prevptr, (void *) operand) != 0)
{
prevptr++;
*prevptr = *ptr;
}
ptr++;
}
*size = prevptr + 1 - res;
return res;
}
static WordEntryPos POSNULL[] = {
0,
0
};
static float
calc_rank_and(float *w, TSVector t, TSQuery q)
{
uint16 **pos;
int i,
k,
l,
p;
WordEntry *entry;
WordEntryPos *post,
*ct;
int4 dimt,
lenct,
dist;
float res = -1.0;
QueryItem **item;
int size = q->size;
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
if (size < 2)
{
pfree(item);
return calc_rank_or(w, t, q);
}
pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
memset(pos, 0, sizeof(uint16 *) * q->size);
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1);
for (i = 0; i < size; i++)
{
entry = find_wordentry(t, q, item[i]);
if (!entry)
continue;
if (entry->haspos)
pos[i] = (uint16 *) _POSDATAPTR(t, entry);
else
pos[i] = (uint16 *) POSNULL;
dimt = *(uint16 *) (pos[i]);
post = (WordEntryPos *) (pos[i] + 1);
for (k = 0; k < i; k++)
{
if (!pos[k])
continue;
lenct = *(uint16 *) (pos[k]);
ct = (WordEntryPos *) (pos[k] + 1);
for (l = 0; l < dimt; l++)
{
for (p = 0; p < lenct; p++)
{
dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL)))
{
float curw;
if (!dist)
dist = MAXENTRYPOS;
curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
}
}
}
}
}
pfree(pos);
pfree(item);
return res;
}
static float
calc_rank_or(float *w, TSVector t, TSQuery q)
{
WordEntry *entry;
WordEntryPos *post;
int4 dimt,
j,
i;
float res = 0.0;
QueryItem **item;
int size = q->size;
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
for (i = 0; i < size; i++)
{
float resj,
wjm;
int4 jm;
entry = find_wordentry(t, q, item[i]);
if (!entry)
continue;
if (entry->haspos)
{
dimt = POSDATALEN(t, entry);
post = POSDATAPTR(t, entry);
}
else
{
dimt = *(uint16 *) POSNULL;
post = POSNULL + 1;
}
resj = 0.0;
wjm = -1.0;
jm = 0;
for (j = 0; j < dimt; j++)
{
resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
if (wpos(post[j]) > wjm)
{
wjm = wpos(post[j]);
jm = j;
}
}
/*
limit (sum(i/i^2),i->inf) = pi^2/6
resj = sum(wi/i^2),i=1,noccurence,
wi - should be sorted desc,
don't sort for now, just choose maximum weight. This should be corrected
Oleg Bartunov
*/
res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
}
if (size > 0)
res = res / size;
pfree(item);
return res;
}
static float
calc_rank(float *w, TSVector t, TSQuery q, int4 method)
{
QueryItem *item = GETQUERY(q);
float res = 0.0;
int len;
if (!t->size || !q->size)
return 0.0;
res = (item->type != VAL && item->val == (int4) '&') ?
calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
if (res < 0)
res = 1e-20;
if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
res /= log((double) (cnt_length(t) + 1)) / log(2.0);
if (method & RANK_NORM_LENGTH)
{
len = cnt_length(t);
if (len > 0)
res /= (float) len;
}
if ((method & RANK_NORM_UNIQ) && t->size > 0)
res /= (float) (t->size);
if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
res /= log((double) (t->size + 1)) / log(2.0);
return res;
}
static float *
getWeights(ArrayType *win)
{
static float ws[lengthof(weights)];
int i;
float4 *arrdata;
if (win == 0)
return weights;
if (ARR_NDIM(win) != 1)
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("array of weight must be one-dimensional")));
if (ArrayGetNItems(ARR_NDIM(win), ARR_DIMS(win)) < lengthof(weights))
ereport(ERROR,
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
errmsg("array of weight is too short")));
if (ARR_HASNULL(win))
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("array of weight must not contain nulls")));
arrdata = (float4 *) ARR_DATA_PTR(win);
for (i = 0; i < lengthof(weights); i++)
{
ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
if (ws[i] > 1.0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("weight out of range")));
}
return ws;
}
Datum
ts_rank_wttf(PG_FUNCTION_ARGS)
{
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
TSVector txt = PG_GETARG_TSVECTOR(1);
TSQuery query = PG_GETARG_TSQUERY(2);
int method = PG_GETARG_INT32(3);
float res;
res = calc_rank(getWeights(win), txt, query, method);
PG_FREE_IF_COPY(win, 0);
PG_FREE_IF_COPY(txt, 1);
PG_FREE_IF_COPY(query, 2);
PG_RETURN_FLOAT4(res);
}
Datum
ts_rank_wtt(PG_FUNCTION_ARGS)
{
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
TSVector txt = PG_GETARG_TSVECTOR(1);
TSQuery query = PG_GETARG_TSQUERY(2);
float res;
res = calc_rank(getWeights(win), txt, query, DEF_NORM_METHOD);
PG_FREE_IF_COPY(win, 0);
PG_FREE_IF_COPY(txt, 1);
PG_FREE_IF_COPY(query, 2);
PG_RETURN_FLOAT4(res);
}
Datum
ts_rank_ttf(PG_FUNCTION_ARGS)
{
TSVector txt = PG_GETARG_TSVECTOR(0);
TSQuery query = PG_GETARG_TSQUERY(1);
int method = PG_GETARG_INT32(2);
float res;
res = calc_rank(getWeights(NULL), txt, query, method);
PG_FREE_IF_COPY(txt, 0);
PG_FREE_IF_COPY(query, 1);
PG_RETURN_FLOAT4(res);
}
Datum
ts_rank_tt(PG_FUNCTION_ARGS)
{
TSVector txt = PG_GETARG_TSVECTOR(0);
TSQuery query = PG_GETARG_TSQUERY(1);
float res;
res = calc_rank(getWeights(NULL), txt, query, DEF_NORM_METHOD);
PG_FREE_IF_COPY(txt, 0);
PG_FREE_IF_COPY(query, 1);
PG_RETURN_FLOAT4(res);
}
typedef struct
{
QueryItem **item;
int16 nitem;
bool needfree;
uint8 wclass;
int32 pos;
} DocRepresentation;
static int
compareDocR(const void *a, const void *b)
{
if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos)
return 0;
return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
}
static bool
checkcondition_QueryItem(void *checkval, QueryItem * val)
{
return (bool) (val->istrue);
}
static void
reset_istrue_flag(TSQuery query)
{
QueryItem *item = GETQUERY(query);
int i;
/* reset istrue flag */
for (i = 0; i < query->size; i++)
{
if (item->type == VAL)
item->istrue = 0;
item++;
}
}
typedef struct
{
int pos;
int p;
int q;
DocRepresentation *begin;
DocRepresentation *end;
} Extention;
static bool
Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
{
DocRepresentation *ptr;
int lastpos = ext->pos;
int i;
bool found = false;
reset_istrue_flag(query);
ext->p = 0x7fffffff;
ext->q = 0;
ptr = doc + ext->pos;
/* find upper bound of cover from current position, move up */
while (ptr - doc < len)
{
for (i = 0; i < ptr->nitem; i++)
ptr->item[i]->istrue = 1;
if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryItem))
{
if (ptr->pos > ext->q)
{
ext->q = ptr->pos;
ext->end = ptr;
lastpos = ptr - doc;
found = true;
}
break;
}
ptr++;
}
if (!found)
return false;
reset_istrue_flag(query);
ptr = doc + lastpos;
/* find lower bound of cover from founded upper bound, move down */
while (ptr >= doc + ext->pos)
{
for (i = 0; i < ptr->nitem; i++)
ptr->item[i]->istrue = 1;
if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryItem))
{
if (ptr->pos < ext->p)
{
ext->begin = ptr;
ext->p = ptr->pos;
}
break;
}
ptr--;
}
if (ext->p <= ext->q)
{
/*
* set position for next try to next lexeme after begining of founded
* cover
*/
ext->pos = (ptr - doc) + 1;
return true;
}
ext->pos++;
return Cover(doc, len, query, ext);
}
static DocRepresentation *
get_docrep(TSVector txt, TSQuery query, int *doclen)
{
QueryItem *item = GETQUERY(query);
WordEntry *entry;
WordEntryPos *post;
int4 dimt,
j,
i;
int len = query->size * 4,
cur = 0;
DocRepresentation *doc;
char *operand;
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
operand = GETOPERAND(query);
reset_istrue_flag(query);
for (i = 0; i < query->size; i++)
{
if (item[i].type != VAL || item[i].istrue)
continue;
entry = find_wordentry(txt, query, &(item[i]));
if (!entry)
continue;
if (entry->haspos)
{
dimt = POSDATALEN(txt, entry);
post = POSDATAPTR(txt, entry);
}
else
{
dimt = *(uint16 *) POSNULL;
post = POSNULL + 1;
}
while (cur + dimt >= len)
{
len *= 2;
doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
}
for (j = 0; j < dimt; j++)
{
if (j == 0)
{
QueryItem *kptr,
*iptr = item + i;
int k;
doc[cur].needfree = false;
doc[cur].nitem = 0;
doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * query->size);
for (k = 0; k < query->size; k++)
{
kptr = item + k;
if (k == i ||
(item[k].type == VAL &&
compareQueryItem(&kptr, &iptr, operand) == 0))
{
doc[cur].item[doc[cur].nitem] = item + k;
doc[cur].nitem++;
kptr->istrue = 1;
}
}
}
else
{
doc[cur].needfree = false;
doc[cur].nitem = doc[cur - 1].nitem;
doc[cur].item = doc[cur - 1].item;
}
doc[cur].pos = WEP_GETPOS(post[j]);
doc[cur].wclass = WEP_GETWEIGHT(post[j]);
cur++;
}
}
*doclen = cur;
if (cur > 0)
{
if (cur > 1)
qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
return doc;
}
pfree(doc);
return NULL;
}
static float4
calc_rank_cd(float4 *arrdata, TSVector txt, TSQuery query, int method)
{
DocRepresentation *doc;
int len,
i,
doclen = 0;
Extention ext;
double Wdoc = 0.0;
double invws[lengthof(weights)];
double SumDist = 0.0,
PrevExtPos = 0.0,
CurExtPos = 0.0;
int NExtent = 0;
for (i = 0; i < lengthof(weights); i++)
{
invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
if (invws[i] > 1.0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("weight out of range")));
invws[i] = 1.0 / invws[i];
}
doc = get_docrep(txt, query, &doclen);
if (!doc)
return 0.0;
MemSet(&ext, 0, sizeof(Extention));
while (Cover(doc, doclen, query, &ext))
{
double Cpos = 0.0;
double InvSum = 0.0;
int nNoise;
DocRepresentation *ptr = ext.begin;
while (ptr <= ext.end)
{
InvSum += invws[ptr->wclass];
ptr++;
}
Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
/*
* if doc are big enough then ext.q may be equal to ext.p due to limit
* of posional information. In this case we approximate number of
* noise word as half cover's length
*/
nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
if (nNoise < 0)
nNoise = (ext.end - ext.begin) / 2;
Wdoc += Cpos / ((double) (1 + nNoise));
CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
if (NExtent > 0 && CurExtPos > PrevExtPos /* prevent devision by
* zero in a case of
multiple lexize */ )
SumDist += 1.0 / (CurExtPos - PrevExtPos);
PrevExtPos = CurExtPos;
NExtent++;
}
if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
Wdoc /= log((double) (cnt_length(txt) + 1));
if (method & RANK_NORM_LENGTH)
{
len = cnt_length(txt);
if (len > 0)
Wdoc /= (double) len;
}
if ((method & RANK_NORM_EXTDIST) && SumDist > 0)
Wdoc /= ((double) NExtent) / SumDist;
if ((method & RANK_NORM_UNIQ) && txt->size > 0)
Wdoc /= (double) (txt->size);
if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
Wdoc /= log((double) (txt->size + 1)) / log(2.0);
for (i = 0; i < doclen; i++)
if (doc[i].needfree)
pfree(doc[i].item);
pfree(doc);
return (float4) Wdoc;
}
Datum
ts_rankcd_wttf(PG_FUNCTION_ARGS)
{
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
TSVector txt = PG_GETARG_TSVECTOR(1);
TSQuery query = PG_GETARG_TSQUERY_COPY(2);
int method = PG_GETARG_INT32(3);
float res;
res = calc_rank_cd(getWeights(win), txt, query, method);
PG_FREE_IF_COPY(win, 0);
PG_FREE_IF_COPY(txt, 1);
PG_FREE_IF_COPY(query, 2);
PG_RETURN_FLOAT4(res);
}
Datum
ts_rankcd_wtt(PG_FUNCTION_ARGS)
{
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
TSVector txt = PG_GETARG_TSVECTOR(1);
TSQuery query = PG_GETARG_TSQUERY_COPY(2);
float res;
res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
PG_FREE_IF_COPY(win, 0);
PG_FREE_IF_COPY(txt, 1);
PG_FREE_IF_COPY(query, 2);
PG_RETURN_FLOAT4(res);
}
Datum
ts_rankcd_ttf(PG_FUNCTION_ARGS)
{
TSVector txt = PG_GETARG_TSVECTOR(0);
TSQuery query = PG_GETARG_TSQUERY_COPY(1);
int method = PG_GETARG_INT32(2);
float res;
res = calc_rank_cd(getWeights(NULL), txt, query, method);
PG_FREE_IF_COPY(txt, 0);
PG_FREE_IF_COPY(query, 1);
PG_RETURN_FLOAT4(res);
}
Datum
ts_rankcd_tt(PG_FUNCTION_ARGS)
{
TSVector txt = PG_GETARG_TSVECTOR(0);
TSQuery query = PG_GETARG_TSQUERY_COPY(1);
float res;
res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
PG_FREE_IF_COPY(txt, 0);
PG_FREE_IF_COPY(query, 1);
PG_RETURN_FLOAT4(res);
}

View File

@@ -0,0 +1,683 @@
/*-------------------------------------------------------------------------
*
* tsvector.c
* I/O functions for tsvector
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
*
*-------------------------------------------------------------------------
*/
#include "postgres.h"
#include "libpq/pqformat.h"
#include "tsearch/ts_type.h"
#include "tsearch/ts_locale.h"
#include "tsearch/ts_utils.h"
#include "utils/memutils.h"
static int
comparePos(const void *a, const void *b)
{
if (WEP_GETPOS(*(WordEntryPos *) a) == WEP_GETPOS(*(WordEntryPos *) b))
return 0;
return (WEP_GETPOS(*(WordEntryPos *) a) > WEP_GETPOS(*(WordEntryPos *) b)) ? 1 : -1;
}
static int
uniquePos(WordEntryPos * a, int4 l)
{
WordEntryPos *ptr,
*res;
if (l == 1)
return l;
res = a;
qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
ptr = a + 1;
while (ptr - a < l)
{
if (WEP_GETPOS(*ptr) != WEP_GETPOS(*res))
{
res++;
*res = *ptr;
if (res - a >= MAXNUMPOS - 1 || WEP_GETPOS(*res) == MAXENTRYPOS - 1)
break;
}
else if (WEP_GETWEIGHT(*ptr) > WEP_GETWEIGHT(*res))
WEP_SETWEIGHT(*res, WEP_GETWEIGHT(*ptr));
ptr++;
}
return res + 1 - a;
}
static int
compareentry(const void *a, const void *b, void *arg)
{
char *BufferStr = (char *) arg;
if (((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
{
return strncmp(&BufferStr[((WordEntryIN *) a)->entry.pos],
&BufferStr[((WordEntryIN *) b)->entry.pos],
((WordEntryIN *) a)->entry.len);
}
return (((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len) ? 1 : -1;
}
static int
uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
{
WordEntryIN *ptr,
*res;
res = a;
if (l == 1)
{
if (a->entry.haspos)
{
*(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos));
*outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos);
}
return l;
}
ptr = a + 1;
qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry, (void *) buf);
while (ptr - a < l)
{
if (!(ptr->entry.len == res->entry.len &&
strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
{
if (res->entry.haspos)
{
*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
}
*outbuflen += SHORTALIGN(res->entry.len);
res++;
memcpy(res, ptr, sizeof(WordEntryIN));
}
else if (ptr->entry.haspos)
{
if (res->entry.haspos)
{
int4 len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos);
res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos));
memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]),
&(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos));
*(uint16 *) (res->pos) += *(uint16 *) (ptr->pos);
pfree(ptr->pos);
}
else
{
res->entry.haspos = 1;
res->pos = ptr->pos;
}
}
ptr++;
}
if (res->entry.haspos)
{
*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
}
*outbuflen += SHORTALIGN(res->entry.len);
return res + 1 - a;
}
static int
WordEntryCMP(WordEntry * a, WordEntry * b, char *buf)
{
return compareentry(a, b, buf);
}
#define WAITWORD 1
#define WAITENDWORD 2
#define WAITNEXTCHAR 3
#define WAITENDCMPLX 4
#define WAITPOSINFO 5
#define INPOSINFO 6
#define WAITPOSDELIM 7
#define WAITCHARCMPLX 8
#define RESIZEPRSBUF \
do { \
if ( state->curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
{ \
int4 clen = state->curpos - state->word; \
state->len *= 2; \
state->word = (char*)repalloc( (void*)state->word, state->len ); \
state->curpos = state->word + clen; \
} \
} while (0)
bool
gettoken_tsvector(TSVectorParseState *state)
{
int4 oldstate = 0;
state->curpos = state->word;
state->state = WAITWORD;
state->alen = 0;
while (1)
{
if (state->state == WAITWORD)
{
if (*(state->prsbuf) == '\0')
return false;
else if (t_iseq(state->prsbuf, '\''))
state->state = WAITENDCMPLX;
else if (t_iseq(state->prsbuf, '\\'))
{
state->state = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
else if (!t_isspace(state->prsbuf))
{
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
state->state = WAITENDWORD;
}
}
else if (state->state == WAITNEXTCHAR)
{
if (*(state->prsbuf) == '\0')
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("there is no escaped character")));
else
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
state->state = oldstate;
}
}
else if (state->state == WAITENDWORD)
{
if (t_iseq(state->prsbuf, '\\'))
{
state->state = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
(state->oprisdelim && ISOPERATOR(state->prsbuf)))
{
RESIZEPRSBUF;
if (state->curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
*(state->curpos) = '\0';
return true;
}
else if (t_iseq(state->prsbuf, ':'))
{
if (state->curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
*(state->curpos) = '\0';
if (state->oprisdelim)
return true;
else
state->state = INPOSINFO;
}
else
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
}
}
else if (state->state == WAITENDCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
state->state = WAITCHARCMPLX;
}
else if (t_iseq(state->prsbuf, '\\'))
{
state->state = WAITNEXTCHAR;
oldstate = WAITENDCMPLX;
}
else if (*(state->prsbuf) == '\0')
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
else
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
}
}
else if (state->state == WAITCHARCMPLX)
{
if (t_iseq(state->prsbuf, '\''))
{
RESIZEPRSBUF;
COPYCHAR(state->curpos, state->prsbuf);
state->curpos += pg_mblen(state->prsbuf);
state->state = WAITENDCMPLX;
}
else
{
RESIZEPRSBUF;
*(state->curpos) = '\0';
if (state->curpos == state->word)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
if (state->oprisdelim)
{
/* state->prsbuf+=pg_mblen(state->prsbuf); */
return true;
}
else
state->state = WAITPOSINFO;
continue; /* recheck current character */
}
}
else if (state->state == WAITPOSINFO)
{
if (t_iseq(state->prsbuf, ':'))
state->state = INPOSINFO;
else
return true;
}
else if (state->state == INPOSINFO)
{
if (t_isdigit(state->prsbuf))
{
if (state->alen == 0)
{
state->alen = 4;
state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen);
*(uint16 *) (state->pos) = 0;
}
else if (*(uint16 *) (state->pos) + 1 >= state->alen)
{
state->alen *= 2;
state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen);
}
(*(uint16 *) (state->pos))++;
WEP_SETPOS(state->pos[*(uint16 *) (state->pos)], LIMITPOS(atoi(state->prsbuf)));
if (WEP_GETPOS(state->pos[*(uint16 *) (state->pos)]) == 0)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("wrong position info in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
state->state = WAITPOSDELIM;
}
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
}
else if (state->state == WAITPOSDELIM)
{
if (t_iseq(state->prsbuf, ','))
state->state = INPOSINFO;
else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 3);
}
else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 2);
}
else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 1);
}
else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
{
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
}
else if (t_isspace(state->prsbuf) ||
*(state->prsbuf) == '\0')
return true;
else if (!t_isdigit(state->prsbuf))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("syntax error in tsvector")));
}
else /* internal error */
elog(ERROR, "internal error in gettoken_tsvector");
/* get next char */
state->prsbuf += pg_mblen(state->prsbuf);
}
return false;
}
Datum
tsvectorin(PG_FUNCTION_ARGS)
{
char *buf = PG_GETARG_CSTRING(0);
TSVectorParseState state;
WordEntryIN *arr;
WordEntry *inarr;
int4 len = 0,
totallen = 64;
TSVector in;
char *tmpbuf,
*cur;
int4 i,
buflen = 256;
pg_verifymbstr(buf, strlen(buf), false);
state.prsbuf = buf;
state.len = 32;
state.word = (char *) palloc(state.len);
state.oprisdelim = false;
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
cur = tmpbuf = (char *) palloc(buflen);
while (gettoken_tsvector(&state))
{
/*
* Realloc buffers if it's needed
*/
if (len >= totallen)
{
totallen *= 2;
arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
}
while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
{
int4 dist = cur - tmpbuf;
buflen *= 2;
tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
cur = tmpbuf + dist;
}
if (state.curpos - state.word >= MAXSTRLEN)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("word is too long (%d bytes, max %d bytes)",
state.curpos - state.word, MAXSTRLEN)));
arr[len].entry.len = state.curpos - state.word;
if (cur - tmpbuf > MAXSTRPOS)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("position value too large")));
arr[len].entry.pos = cur - tmpbuf;
memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
cur += arr[len].entry.len;
if (state.alen)
{
arr[len].entry.haspos = 1;
arr[len].pos = state.pos;
}
else
arr[len].entry.haspos = 0;
len++;
}
pfree(state.word);
if (len > 0)
len = uniqueentry(arr, len, tmpbuf, &buflen);
else
buflen = 0;
totallen = CALCDATASIZE(len, buflen);
in = (TSVector) palloc0(totallen);
SET_VARSIZE(in, totallen);
in->size = len;
cur = STRPTR(in);
inarr = ARRPTR(in);
for (i = 0; i < len; i++)
{
memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
arr[i].entry.pos = cur - STRPTR(in);
cur += SHORTALIGN(arr[i].entry.len);
if (arr[i].entry.haspos)
{
memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos));
cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos);
pfree(arr[i].pos);
}
inarr[i] = arr[i].entry;
}
PG_RETURN_TSVECTOR(in);
}
Datum
tsvectorout(PG_FUNCTION_ARGS)
{
TSVector out = PG_GETARG_TSVECTOR(0);
char *outbuf;
int4 i,
lenbuf = 0,
pp;
WordEntry *ptr = ARRPTR(out);
char *curbegin,
*curin,
*curout;
lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
for (i = 0; i < out->size; i++)
{
lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length() /* for escape */ ;
if (ptr[i].haspos)
lenbuf += 1 /* : */ + 7 /* int2 + , + weight */ * POSDATALEN(out, &(ptr[i]));
}
curout = outbuf = (char *) palloc(lenbuf);
for (i = 0; i < out->size; i++)
{
curbegin = curin = STRPTR(out) + ptr->pos;
if (i != 0)
*curout++ = ' ';
*curout++ = '\'';
while (curin - curbegin < ptr->len)
{
int len = pg_mblen(curin);
if (t_iseq(curin, '\''))
*curout++ = '\'';
while (len--)
*curout++ = *curin++;
}
*curout++ = '\'';
if ((pp = POSDATALEN(out, ptr)) != 0)
{
WordEntryPos *wptr;
*curout++ = ':';
wptr = POSDATAPTR(out, ptr);
while (pp)
{
curout += sprintf(curout, "%d", WEP_GETPOS(*wptr));
switch (WEP_GETWEIGHT(*wptr))
{
case 3:
*curout++ = 'A';
break;
case 2:
*curout++ = 'B';
break;
case 1:
*curout++ = 'C';
break;
case 0:
default:
break;
}
if (pp > 1)
*curout++ = ',';
pp--;
wptr++;
}
}
ptr++;
}
*curout = '\0';
PG_FREE_IF_COPY(out, 0);
PG_RETURN_CSTRING(outbuf);
}
Datum
tsvectorsend(PG_FUNCTION_ARGS)
{
TSVector vec = PG_GETARG_TSVECTOR(0);
StringInfoData buf;
int i,
j;
WordEntry *weptr = ARRPTR(vec);
pq_begintypsend(&buf);
pq_sendint(&buf, vec->size, sizeof(int32));
for (i = 0; i < vec->size; i++)
{
/*
* We are sure that sizeof(WordEntry) == sizeof(int32)
*/
pq_sendint(&buf, *(int32 *) weptr, sizeof(int32));
pq_sendbytes(&buf, STRPTR(vec) + weptr->pos, weptr->len);
if (weptr->haspos)
{
WordEntryPos *wepptr = POSDATAPTR(vec, weptr);
pq_sendint(&buf, POSDATALEN(vec, weptr), sizeof(WordEntryPos));
for (j = 0; j < POSDATALEN(vec, weptr); j++)
pq_sendint(&buf, wepptr[j], sizeof(WordEntryPos));
}
weptr++;
}
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
Datum
tsvectorrecv(PG_FUNCTION_ARGS)
{
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
TSVector vec;
int i,
size,
len = DATAHDRSIZE;
WordEntry *weptr;
int datalen = 0;
size = pq_getmsgint(buf, sizeof(uint32));
if (size < 0 || size > (MaxAllocSize / sizeof(WordEntry)))
elog(ERROR, "invalid size of tsvector");
len += sizeof(WordEntry) * size;
len *= 2;
vec = (TSVector) palloc0(len);
vec->size = size;
weptr = ARRPTR(vec);
for (i = 0; i < size; i++)
{
int tmp;
weptr = ARRPTR(vec) + i;
/*
* We are sure that sizeof(WordEntry) == sizeof(int32)
*/
tmp = pq_getmsgint(buf, sizeof(int32));
*weptr = *(WordEntry *) & tmp;
while (CALCDATASIZE(size, datalen + SHORTALIGN(weptr->len)) >= len)
{
len *= 2;
vec = (TSVector) repalloc(vec, len);
weptr = ARRPTR(vec) + i;
}
memcpy(STRPTR(vec) + weptr->pos,
pq_getmsgbytes(buf, weptr->len),
weptr->len);
datalen += SHORTALIGN(weptr->len);
if (i > 0 && WordEntryCMP(weptr, weptr - 1, STRPTR(vec)) <= 0)
elog(ERROR, "lexemes are unordered");
if (weptr->haspos)
{
uint16 j,
npos;
WordEntryPos *wepptr;
npos = (uint16) pq_getmsgint(buf, sizeof(int16));
if (npos > MAXNUMPOS)
elog(ERROR, "unexpected number of positions");
while (CALCDATASIZE(size, datalen + (npos + 1) * sizeof(WordEntryPos)) >= len)
{
len *= 2;
vec = (TSVector) repalloc(vec, len);
weptr = ARRPTR(vec) + i;
}
memcpy(_POSDATAPTR(vec, weptr), &npos, sizeof(int16));
wepptr = POSDATAPTR(vec, weptr);
for (j = 0; j < npos; j++)
{
wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(int16));
if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
elog(ERROR, "position information is unordered");
}
datalen += (npos + 1) * sizeof(WordEntry);
}
}
SET_VARSIZE(vec, CALCDATASIZE(vec->size, datalen));
PG_RETURN_TSVECTOR(vec);
}

File diff suppressed because it is too large Load Diff