mirror of
https://github.com/postgres/postgres.git
synced 2025-11-29 23:43:17 +03:00
Tsearch2 functionality migrates to core. The bulk of this work is by
Oleg Bartunov and Teodor Sigaev, but I did a lot of editorializing, so anything that's broken is probably my fault. Documentation is nonexistent as yet, but let's land the patch so we can get some portability testing done.
This commit is contained in:
@@ -1,7 +1,7 @@
|
||||
#
|
||||
# Makefile for utils/adt
|
||||
#
|
||||
# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.64 2007/04/02 03:49:39 tgl Exp $
|
||||
# $PostgreSQL: pgsql/src/backend/utils/adt/Makefile,v 1.65 2007/08/21 01:11:18 tgl Exp $
|
||||
#
|
||||
|
||||
subdir = src/backend/utils/adt
|
||||
@@ -25,8 +25,11 @@ OBJS = acl.o arrayfuncs.o array_userfuncs.o arrayutils.o bool.o \
|
||||
tid.o timestamp.o varbit.o varchar.o varlena.o version.o xid.o \
|
||||
network.o mac.o inet_net_ntop.o inet_net_pton.o \
|
||||
ri_triggers.o pg_lzcompress.o pg_locale.o formatting.o \
|
||||
ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o xml.o \
|
||||
uuid.o
|
||||
ascii.o quote.o pgstatfuncs.o encode.o dbsize.o genfile.o \
|
||||
tsginidx.o tsgistidx.o tsquery.o tsquery_cleanup.o tsquery_gist.o \
|
||||
tsquery_op.o tsquery_rewrite.o tsquery_util.o tsrank.o \
|
||||
tsvector.o tsvector_op.o \
|
||||
uuid.o xml.o
|
||||
|
||||
like.o: like.c like_match.c
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/regproc.c,v 1.102 2007/06/26 16:48:09 alvherre Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/regproc.c,v 1.103 2007/08/21 01:11:18 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -27,6 +27,8 @@
|
||||
#include "catalog/namespace.h"
|
||||
#include "catalog/pg_operator.h"
|
||||
#include "catalog/pg_proc.h"
|
||||
#include "catalog/pg_ts_config.h"
|
||||
#include "catalog/pg_ts_dict.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "miscadmin.h"
|
||||
#include "parser/parse_type.h"
|
||||
@@ -1065,6 +1067,231 @@ regtypesend(PG_FUNCTION_ARGS)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* regconfigin - converts "tsconfigname" to tsconfig OID
|
||||
*
|
||||
* We also accept a numeric OID, for symmetry with the output routine.
|
||||
*
|
||||
* '-' signifies unknown (OID 0). In all other cases, the input must
|
||||
* match an existing pg_ts_config entry.
|
||||
*
|
||||
* This function is not needed in bootstrap mode, so we don't worry about
|
||||
* making it work then.
|
||||
*/
|
||||
Datum
|
||||
regconfigin(PG_FUNCTION_ARGS)
|
||||
{
|
||||
char *cfg_name_or_oid = PG_GETARG_CSTRING(0);
|
||||
Oid result;
|
||||
List *names;
|
||||
|
||||
/* '-' ? */
|
||||
if (strcmp(cfg_name_or_oid, "-") == 0)
|
||||
PG_RETURN_OID(InvalidOid);
|
||||
|
||||
/* Numeric OID? */
|
||||
if (cfg_name_or_oid[0] >= '0' &&
|
||||
cfg_name_or_oid[0] <= '9' &&
|
||||
strspn(cfg_name_or_oid, "0123456789") == strlen(cfg_name_or_oid))
|
||||
{
|
||||
result = DatumGetObjectId(DirectFunctionCall1(oidin,
|
||||
CStringGetDatum(cfg_name_or_oid)));
|
||||
PG_RETURN_OID(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* Normal case: parse the name into components and see if it matches any
|
||||
* pg_ts_config entries in the current search path.
|
||||
*/
|
||||
names = stringToQualifiedNameList(cfg_name_or_oid);
|
||||
|
||||
result = TSConfigGetCfgid(names, false);
|
||||
|
||||
PG_RETURN_OID(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* regconfigout - converts tsconfig OID to "tsconfigname"
|
||||
*/
|
||||
Datum
|
||||
regconfigout(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid cfgid = PG_GETARG_OID(0);
|
||||
char *result;
|
||||
HeapTuple cfgtup;
|
||||
|
||||
if (cfgid == InvalidOid)
|
||||
{
|
||||
result = pstrdup("-");
|
||||
PG_RETURN_CSTRING(result);
|
||||
}
|
||||
|
||||
cfgtup = SearchSysCache(TSCONFIGOID,
|
||||
ObjectIdGetDatum(cfgid),
|
||||
0, 0, 0);
|
||||
|
||||
if (HeapTupleIsValid(cfgtup))
|
||||
{
|
||||
Form_pg_ts_config cfgform = (Form_pg_ts_config) GETSTRUCT(cfgtup);
|
||||
char *cfgname = NameStr(cfgform->cfgname);
|
||||
char *nspname;
|
||||
|
||||
/*
|
||||
* Would this config be found by regconfigin? If not, qualify it.
|
||||
*/
|
||||
if (TSConfigIsVisible(cfgid))
|
||||
nspname = NULL;
|
||||
else
|
||||
nspname = get_namespace_name(cfgform->cfgnamespace);
|
||||
|
||||
result = quote_qualified_identifier(nspname, cfgname);
|
||||
|
||||
ReleaseSysCache(cfgtup);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* If OID doesn't match any pg_ts_config row, return it numerically */
|
||||
result = (char *) palloc(NAMEDATALEN);
|
||||
snprintf(result, NAMEDATALEN, "%u", cfgid);
|
||||
}
|
||||
|
||||
PG_RETURN_CSTRING(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* regconfigrecv - converts external binary format to regconfig
|
||||
*/
|
||||
Datum
|
||||
regconfigrecv(PG_FUNCTION_ARGS)
|
||||
{
|
||||
/* Exactly the same as oidrecv, so share code */
|
||||
return oidrecv(fcinfo);
|
||||
}
|
||||
|
||||
/*
|
||||
* regconfigsend - converts regconfig to binary format
|
||||
*/
|
||||
Datum
|
||||
regconfigsend(PG_FUNCTION_ARGS)
|
||||
{
|
||||
/* Exactly the same as oidsend, so share code */
|
||||
return oidsend(fcinfo);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* regdictionaryin - converts "tsdictionaryname" to tsdictionary OID
|
||||
*
|
||||
* We also accept a numeric OID, for symmetry with the output routine.
|
||||
*
|
||||
* '-' signifies unknown (OID 0). In all other cases, the input must
|
||||
* match an existing pg_ts_dict entry.
|
||||
*
|
||||
* This function is not needed in bootstrap mode, so we don't worry about
|
||||
* making it work then.
|
||||
*/
|
||||
Datum
|
||||
regdictionaryin(PG_FUNCTION_ARGS)
|
||||
{
|
||||
char *dict_name_or_oid = PG_GETARG_CSTRING(0);
|
||||
Oid result;
|
||||
List *names;
|
||||
|
||||
/* '-' ? */
|
||||
if (strcmp(dict_name_or_oid, "-") == 0)
|
||||
PG_RETURN_OID(InvalidOid);
|
||||
|
||||
/* Numeric OID? */
|
||||
if (dict_name_or_oid[0] >= '0' &&
|
||||
dict_name_or_oid[0] <= '9' &&
|
||||
strspn(dict_name_or_oid, "0123456789") == strlen(dict_name_or_oid))
|
||||
{
|
||||
result = DatumGetObjectId(DirectFunctionCall1(oidin,
|
||||
CStringGetDatum(dict_name_or_oid)));
|
||||
PG_RETURN_OID(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* Normal case: parse the name into components and see if it matches any
|
||||
* pg_ts_dict entries in the current search path.
|
||||
*/
|
||||
names = stringToQualifiedNameList(dict_name_or_oid);
|
||||
|
||||
result = TSDictionaryGetDictid(names, false);
|
||||
|
||||
PG_RETURN_OID(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* regdictionaryout - converts tsdictionary OID to "tsdictionaryname"
|
||||
*/
|
||||
Datum
|
||||
regdictionaryout(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid dictid = PG_GETARG_OID(0);
|
||||
char *result;
|
||||
HeapTuple dicttup;
|
||||
|
||||
if (dictid == InvalidOid)
|
||||
{
|
||||
result = pstrdup("-");
|
||||
PG_RETURN_CSTRING(result);
|
||||
}
|
||||
|
||||
dicttup = SearchSysCache(TSDICTOID,
|
||||
ObjectIdGetDatum(dictid),
|
||||
0, 0, 0);
|
||||
|
||||
if (HeapTupleIsValid(dicttup))
|
||||
{
|
||||
Form_pg_ts_dict dictform = (Form_pg_ts_dict) GETSTRUCT(dicttup);
|
||||
char *dictname = NameStr(dictform->dictname);
|
||||
char *nspname;
|
||||
|
||||
/*
|
||||
* Would this dictionary be found by regdictionaryin?
|
||||
* If not, qualify it.
|
||||
*/
|
||||
if (TSDictionaryIsVisible(dictid))
|
||||
nspname = NULL;
|
||||
else
|
||||
nspname = get_namespace_name(dictform->dictnamespace);
|
||||
|
||||
result = quote_qualified_identifier(nspname, dictname);
|
||||
|
||||
ReleaseSysCache(dicttup);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* If OID doesn't match any pg_ts_dict row, return it numerically */
|
||||
result = (char *) palloc(NAMEDATALEN);
|
||||
snprintf(result, NAMEDATALEN, "%u", dictid);
|
||||
}
|
||||
|
||||
PG_RETURN_CSTRING(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* regdictionaryrecv - converts external binary format to regdictionary
|
||||
*/
|
||||
Datum
|
||||
regdictionaryrecv(PG_FUNCTION_ARGS)
|
||||
{
|
||||
/* Exactly the same as oidrecv, so share code */
|
||||
return oidrecv(fcinfo);
|
||||
}
|
||||
|
||||
/*
|
||||
* regdictionarysend - converts regdictionary to binary format
|
||||
*/
|
||||
Datum
|
||||
regdictionarysend(PG_FUNCTION_ARGS)
|
||||
{
|
||||
/* Exactly the same as oidsend, so share code */
|
||||
return oidsend(fcinfo);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* text_regclass: convert text to regclass
|
||||
*
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.234 2007/05/05 17:05:48 mha Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.235 2007/08/21 01:11:18 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -2822,6 +2822,8 @@ convert_to_scalar(Datum value, Oid valuetypid, double *scaledvalue,
|
||||
case REGOPERATOROID:
|
||||
case REGCLASSOID:
|
||||
case REGTYPEOID:
|
||||
case REGCONFIGOID:
|
||||
case REGDICTIONARYOID:
|
||||
*scaledvalue = convert_numeric_to_scalar(value, valuetypid);
|
||||
*scaledlobound = convert_numeric_to_scalar(lobound, boundstypid);
|
||||
*scaledhibound = convert_numeric_to_scalar(hibound, boundstypid);
|
||||
@@ -2925,6 +2927,8 @@ convert_numeric_to_scalar(Datum value, Oid typid)
|
||||
case REGOPERATOROID:
|
||||
case REGCLASSOID:
|
||||
case REGTYPEOID:
|
||||
case REGCONFIGOID:
|
||||
case REGDICTIONARYOID:
|
||||
/* we can treat OIDs as integers... */
|
||||
return (double) DatumGetObjectId(value);
|
||||
}
|
||||
|
||||
157
src/backend/utils/adt/tsginidx.c
Normal file
157
src/backend/utils/adt/tsginidx.c
Normal file
@@ -0,0 +1,157 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* tsginidx.c
|
||||
* GIN support functions for tsvector_ops
|
||||
*
|
||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/skey.h"
|
||||
#include "tsearch/ts_type.h"
|
||||
#include "tsearch/ts_utils.h"
|
||||
|
||||
|
||||
Datum
|
||||
gin_extract_tsvector(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSVector vector = PG_GETARG_TSVECTOR(0);
|
||||
uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1);
|
||||
Datum *entries = NULL;
|
||||
|
||||
*nentries = 0;
|
||||
if (vector->size > 0)
|
||||
{
|
||||
int i;
|
||||
WordEntry *we = ARRPTR(vector);
|
||||
|
||||
*nentries = (uint32) vector->size;
|
||||
entries = (Datum *) palloc(sizeof(Datum) * vector->size);
|
||||
|
||||
for (i = 0; i < vector->size; i++)
|
||||
{
|
||||
text *txt = (text *) palloc(VARHDRSZ + we->len);
|
||||
|
||||
SET_VARSIZE(txt, VARHDRSZ + we->len);
|
||||
memcpy(VARDATA(txt), STRPTR(vector) + we->pos, we->len);
|
||||
|
||||
entries[i] = PointerGetDatum(txt);
|
||||
|
||||
we++;
|
||||
}
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(vector, 0);
|
||||
PG_RETURN_POINTER(entries);
|
||||
}
|
||||
|
||||
Datum
|
||||
gin_extract_query(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSQuery query = PG_GETARG_TSQUERY(0);
|
||||
uint32 *nentries = (uint32 *) PG_GETARG_POINTER(1);
|
||||
StrategyNumber strategy = PG_GETARG_UINT16(2);
|
||||
Datum *entries = NULL;
|
||||
|
||||
*nentries = 0;
|
||||
|
||||
if (query->size > 0)
|
||||
{
|
||||
int4 i,
|
||||
j = 0,
|
||||
len;
|
||||
QueryItem *item;
|
||||
|
||||
item = clean_NOT(GETQUERY(query), &len);
|
||||
if (!item)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("query requires full scan, which is not supported by GIN indexes")));
|
||||
|
||||
item = GETQUERY(query);
|
||||
|
||||
for (i = 0; i < query->size; i++)
|
||||
if (item[i].type == VAL)
|
||||
(*nentries)++;
|
||||
|
||||
entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
|
||||
|
||||
for (i = 0; i < query->size; i++)
|
||||
if (item[i].type == VAL)
|
||||
{
|
||||
text *txt;
|
||||
|
||||
txt = (text *) palloc(VARHDRSZ + item[i].length);
|
||||
|
||||
SET_VARSIZE(txt, VARHDRSZ + item[i].length);
|
||||
memcpy(VARDATA(txt), GETOPERAND(query) + item[i].distance, item[i].length);
|
||||
|
||||
entries[j++] = PointerGetDatum(txt);
|
||||
|
||||
if (strategy != TSearchWithClassStrategyNumber && item[i].weight != 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("@@ operator does not support lexeme class restrictions"),
|
||||
errhint("Use the @@@ operator instead.")));
|
||||
}
|
||||
}
|
||||
else
|
||||
*nentries = -1; /* nothing can be found */
|
||||
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
|
||||
PG_RETURN_POINTER(entries);
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
QueryItem *frst;
|
||||
bool *mapped_check;
|
||||
} GinChkVal;
|
||||
|
||||
static bool
|
||||
checkcondition_gin(void *checkval, QueryItem * val)
|
||||
{
|
||||
GinChkVal *gcv = (GinChkVal *) checkval;
|
||||
|
||||
return gcv->mapped_check[val - gcv->frst];
|
||||
}
|
||||
|
||||
Datum
|
||||
gin_ts_consistent(PG_FUNCTION_ARGS)
|
||||
{
|
||||
bool *check = (bool *) PG_GETARG_POINTER(0);
|
||||
/* StrategyNumber strategy = PG_GETARG_UINT16(1); */
|
||||
TSQuery query = PG_GETARG_TSQUERY(2);
|
||||
bool res = FALSE;
|
||||
|
||||
if (query->size > 0)
|
||||
{
|
||||
int4 i,
|
||||
j = 0;
|
||||
QueryItem *item;
|
||||
GinChkVal gcv;
|
||||
|
||||
gcv.frst = item = GETQUERY(query);
|
||||
gcv.mapped_check = (bool *) palloc(sizeof(bool) * query->size);
|
||||
|
||||
for (i = 0; i < query->size; i++)
|
||||
if (item[i].type == VAL)
|
||||
gcv.mapped_check[i] = check[j++];
|
||||
|
||||
res = TS_execute(
|
||||
GETQUERY(query),
|
||||
&gcv,
|
||||
true,
|
||||
checkcondition_gin
|
||||
);
|
||||
}
|
||||
|
||||
PG_RETURN_BOOL(res);
|
||||
}
|
||||
784
src/backend/utils/adt/tsgistidx.c
Normal file
784
src/backend/utils/adt/tsgistidx.c
Normal file
@@ -0,0 +1,784 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* tsgistidx.c
|
||||
* GiST support functions for tsvector_ops
|
||||
*
|
||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/gist.h"
|
||||
#include "access/tuptoaster.h"
|
||||
#include "tsearch/ts_type.h"
|
||||
#include "tsearch/ts_utils.h"
|
||||
#include "utils/pg_crc.h"
|
||||
|
||||
|
||||
#define SIGLENINT 31 /* >121 => key will toast, so it will not work
|
||||
* !!! */
|
||||
|
||||
#define SIGLEN ( sizeof(int4) * SIGLENINT )
|
||||
#define SIGLENBIT (SIGLEN * BITS_PER_BYTE)
|
||||
|
||||
typedef char BITVEC[SIGLEN];
|
||||
typedef char *BITVECP;
|
||||
|
||||
#define LOOPBYTE(a) \
|
||||
for(i=0;i<SIGLEN;i++) {\
|
||||
a;\
|
||||
}
|
||||
|
||||
#define GETBYTE(x,i) ( *( (BITVECP)(x) + (int)( (i) / BITS_PER_BYTE ) ) )
|
||||
#define GETBITBYTE(x,i) ( ((char)(x)) >> (i) & 0x01 )
|
||||
#define CLRBIT(x,i) GETBYTE(x,i) &= ~( 0x01 << ( (i) % BITS_PER_BYTE ) )
|
||||
#define SETBIT(x,i) GETBYTE(x,i) |= ( 0x01 << ( (i) % BITS_PER_BYTE ) )
|
||||
#define GETBIT(x,i) ( (GETBYTE(x,i) >> ( (i) % BITS_PER_BYTE )) & 0x01 )
|
||||
|
||||
#define HASHVAL(val) (((unsigned int)(val)) % SIGLENBIT)
|
||||
#define HASH(sign, val) SETBIT((sign), HASHVAL(val))
|
||||
|
||||
#define GETENTRY(vec,pos) ((SignTSVector *) DatumGetPointer((vec)->vector[(pos)].key))
|
||||
|
||||
/*
|
||||
* type of GiST index key
|
||||
*/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int32 vl_len_; /* varlena header (do not touch directly!) */ ;
|
||||
int4 flag;
|
||||
char data[1];
|
||||
} SignTSVector;
|
||||
|
||||
#define ARRKEY 0x01
|
||||
#define SIGNKEY 0x02
|
||||
#define ALLISTRUE 0x04
|
||||
|
||||
#define ISARRKEY(x) ( ((SignTSVector*)(x))->flag & ARRKEY )
|
||||
#define ISSIGNKEY(x) ( ((SignTSVector*)(x))->flag & SIGNKEY )
|
||||
#define ISALLTRUE(x) ( ((SignTSVector*)(x))->flag & ALLISTRUE )
|
||||
|
||||
#define GTHDRSIZE ( VARHDRSZ + sizeof(int4) )
|
||||
#define CALCGTSIZE(flag, len) ( GTHDRSIZE + ( ( (flag) & ARRKEY ) ? ((len)*sizeof(int4)) : (((flag) & ALLISTRUE) ? 0 : SIGLEN) ) )
|
||||
|
||||
#define GETSIGN(x) ( (BITVECP)( (char*)(x)+GTHDRSIZE ) )
|
||||
#define GETARR(x) ( (int4*)( (char*)(x)+GTHDRSIZE ) )
|
||||
#define ARRNELEM(x) ( ( VARSIZE(x) - GTHDRSIZE )/sizeof(int4) )
|
||||
|
||||
/* Number of one-bits in an unsigned byte */
|
||||
static const uint8 number_of_ones[256] = {
|
||||
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
||||
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
|
||||
};
|
||||
|
||||
static int4 sizebitvec(BITVECP sign);
|
||||
|
||||
Datum
|
||||
gtsvectorin(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("gtsvector_in not implemented")));
|
||||
PG_RETURN_DATUM(0);
|
||||
}
|
||||
|
||||
#define SINGOUTSTR "%d true bits, %d false bits"
|
||||
#define ARROUTSTR "%d unique words"
|
||||
#define EXTRALEN ( 2*13 )
|
||||
|
||||
static int outbuf_maxlen = 0;
|
||||
|
||||
Datum
|
||||
gtsvectorout(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SignTSVector *key = (SignTSVector *) DatumGetPointer(PG_DETOAST_DATUM(PG_GETARG_POINTER(0)));
|
||||
char *outbuf;
|
||||
|
||||
if (outbuf_maxlen == 0)
|
||||
outbuf_maxlen = 2 * EXTRALEN + Max(strlen(SINGOUTSTR), strlen(ARROUTSTR)) + 1;
|
||||
outbuf = palloc(outbuf_maxlen);
|
||||
|
||||
if (ISARRKEY(key))
|
||||
sprintf(outbuf, ARROUTSTR, (int) ARRNELEM(key));
|
||||
else
|
||||
{
|
||||
int cnttrue = (ISALLTRUE(key)) ? SIGLENBIT : sizebitvec(GETSIGN(key));
|
||||
|
||||
sprintf(outbuf, SINGOUTSTR, cnttrue, (int) SIGLENBIT - cnttrue);
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(key, 0);
|
||||
PG_RETURN_POINTER(outbuf);
|
||||
}
|
||||
|
||||
static int
|
||||
compareint(const void *a, const void *b)
|
||||
{
|
||||
if (*((int4 *) a) == *((int4 *) b))
|
||||
return 0;
|
||||
return (*((int4 *) a) > *((int4 *) b)) ? 1 : -1;
|
||||
}
|
||||
|
||||
static int
|
||||
uniqueint(int4 *a, int4 l)
|
||||
{
|
||||
int4 *ptr,
|
||||
*res;
|
||||
|
||||
if (l == 1)
|
||||
return l;
|
||||
|
||||
ptr = res = a;
|
||||
|
||||
qsort((void *) a, l, sizeof(int4), compareint);
|
||||
|
||||
while (ptr - a < l)
|
||||
if (*ptr != *res)
|
||||
*(++res) = *ptr++;
|
||||
else
|
||||
ptr++;
|
||||
return res + 1 - a;
|
||||
}
|
||||
|
||||
static void
|
||||
makesign(BITVECP sign, SignTSVector * a)
|
||||
{
|
||||
int4 k,
|
||||
len = ARRNELEM(a);
|
||||
int4 *ptr = GETARR(a);
|
||||
|
||||
MemSet((void *) sign, 0, sizeof(BITVEC));
|
||||
for (k = 0; k < len; k++)
|
||||
HASH(sign, ptr[k]);
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsvector_compress(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
|
||||
GISTENTRY *retval = entry;
|
||||
|
||||
if (entry->leafkey)
|
||||
{ /* tsvector */
|
||||
SignTSVector *res;
|
||||
TSVector val = DatumGetTSVector(entry->key);
|
||||
int4 len;
|
||||
int4 *arr;
|
||||
WordEntry *ptr = ARRPTR(val);
|
||||
char *words = STRPTR(val);
|
||||
|
||||
len = CALCGTSIZE(ARRKEY, val->size);
|
||||
res = (SignTSVector *) palloc(len);
|
||||
SET_VARSIZE(res, len);
|
||||
res->flag = ARRKEY;
|
||||
arr = GETARR(res);
|
||||
len = val->size;
|
||||
while (len--)
|
||||
{
|
||||
pg_crc32 c;
|
||||
|
||||
INIT_CRC32(c);
|
||||
COMP_CRC32(c, words + ptr->pos, ptr->len);
|
||||
FIN_CRC32(c);
|
||||
|
||||
*arr = *(int4 *) &c;
|
||||
arr++;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
len = uniqueint(GETARR(res), val->size);
|
||||
if (len != val->size)
|
||||
{
|
||||
/*
|
||||
* there is a collision of hash-function; len is always less than
|
||||
* val->size
|
||||
*/
|
||||
len = CALCGTSIZE(ARRKEY, len);
|
||||
res = (SignTSVector *) repalloc((void *) res, len);
|
||||
SET_VARSIZE(res, len);
|
||||
}
|
||||
|
||||
/* make signature, if array is too long */
|
||||
if (VARSIZE(res) > TOAST_INDEX_TARGET)
|
||||
{
|
||||
SignTSVector *ressign;
|
||||
|
||||
len = CALCGTSIZE(SIGNKEY, 0);
|
||||
ressign = (SignTSVector *) palloc(len);
|
||||
SET_VARSIZE(ressign, len);
|
||||
ressign->flag = SIGNKEY;
|
||||
makesign(GETSIGN(ressign), res);
|
||||
res = ressign;
|
||||
}
|
||||
|
||||
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
|
||||
gistentryinit(*retval, PointerGetDatum(res),
|
||||
entry->rel, entry->page,
|
||||
entry->offset, FALSE);
|
||||
}
|
||||
else if (ISSIGNKEY(DatumGetPointer(entry->key)) &&
|
||||
!ISALLTRUE(DatumGetPointer(entry->key)))
|
||||
{
|
||||
int4 i,
|
||||
len;
|
||||
SignTSVector *res;
|
||||
BITVECP sign = GETSIGN(DatumGetPointer(entry->key));
|
||||
|
||||
LOOPBYTE(
|
||||
if ((sign[i] & 0xff) != 0xff)
|
||||
PG_RETURN_POINTER(retval);
|
||||
);
|
||||
|
||||
len = CALCGTSIZE(SIGNKEY | ALLISTRUE, 0);
|
||||
res = (SignTSVector *) palloc(len);
|
||||
SET_VARSIZE(res, len);
|
||||
res->flag = SIGNKEY | ALLISTRUE;
|
||||
|
||||
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
|
||||
gistentryinit(*retval, PointerGetDatum(res),
|
||||
entry->rel, entry->page,
|
||||
entry->offset, FALSE);
|
||||
}
|
||||
PG_RETURN_POINTER(retval);
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsvector_decompress(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
|
||||
SignTSVector *key = (SignTSVector *) DatumGetPointer(PG_DETOAST_DATUM(entry->key));
|
||||
|
||||
if (key != (SignTSVector *) DatumGetPointer(entry->key))
|
||||
{
|
||||
GISTENTRY *retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
|
||||
|
||||
gistentryinit(*retval, PointerGetDatum(key),
|
||||
entry->rel, entry->page,
|
||||
entry->offset, FALSE);
|
||||
|
||||
PG_RETURN_POINTER(retval);
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(entry);
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int4 *arrb;
|
||||
int4 *arre;
|
||||
} CHKVAL;
|
||||
|
||||
/*
|
||||
* is there value 'val' in array or not ?
|
||||
*/
|
||||
static bool
|
||||
checkcondition_arr(void *checkval, QueryItem * val)
|
||||
{
|
||||
int4 *StopLow = ((CHKVAL *) checkval)->arrb;
|
||||
int4 *StopHigh = ((CHKVAL *) checkval)->arre;
|
||||
int4 *StopMiddle;
|
||||
|
||||
/* Loop invariant: StopLow <= val < StopHigh */
|
||||
|
||||
while (StopLow < StopHigh)
|
||||
{
|
||||
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
||||
if (*StopMiddle == val->val)
|
||||
return (true);
|
||||
else if (*StopMiddle < val->val)
|
||||
StopLow = StopMiddle + 1;
|
||||
else
|
||||
StopHigh = StopMiddle;
|
||||
}
|
||||
|
||||
return (false);
|
||||
}
|
||||
|
||||
static bool
|
||||
checkcondition_bit(void *checkval, QueryItem * val)
|
||||
{
|
||||
return GETBIT(checkval, HASHVAL(val->val));
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsvector_consistent(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSQuery query = PG_GETARG_TSQUERY(1);
|
||||
SignTSVector *key = (SignTSVector *) DatumGetPointer(
|
||||
((GISTENTRY *) PG_GETARG_POINTER(0))->key
|
||||
);
|
||||
|
||||
if (!query->size)
|
||||
PG_RETURN_BOOL(false);
|
||||
|
||||
if (ISSIGNKEY(key))
|
||||
{
|
||||
if (ISALLTRUE(key))
|
||||
PG_RETURN_BOOL(true);
|
||||
|
||||
PG_RETURN_BOOL(TS_execute(
|
||||
GETQUERY(query),
|
||||
(void *) GETSIGN(key), false,
|
||||
checkcondition_bit
|
||||
));
|
||||
}
|
||||
else
|
||||
{ /* only leaf pages */
|
||||
CHKVAL chkval;
|
||||
|
||||
chkval.arrb = GETARR(key);
|
||||
chkval.arre = chkval.arrb + ARRNELEM(key);
|
||||
PG_RETURN_BOOL(TS_execute(
|
||||
GETQUERY(query),
|
||||
(void *) &chkval, true,
|
||||
checkcondition_arr
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
static int4
|
||||
unionkey(BITVECP sbase, SignTSVector * add)
|
||||
{
|
||||
int4 i;
|
||||
|
||||
if (ISSIGNKEY(add))
|
||||
{
|
||||
BITVECP sadd = GETSIGN(add);
|
||||
|
||||
if (ISALLTRUE(add))
|
||||
return 1;
|
||||
|
||||
LOOPBYTE(
|
||||
sbase[i] |= sadd[i];
|
||||
);
|
||||
}
|
||||
else
|
||||
{
|
||||
int4 *ptr = GETARR(add);
|
||||
|
||||
for (i = 0; i < ARRNELEM(add); i++)
|
||||
HASH(sbase, ptr[i]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
gtsvector_union(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
|
||||
int *size = (int *) PG_GETARG_POINTER(1);
|
||||
BITVEC base;
|
||||
int4 i,
|
||||
len;
|
||||
int4 flag = 0;
|
||||
SignTSVector *result;
|
||||
|
||||
MemSet((void *) base, 0, sizeof(BITVEC));
|
||||
for (i = 0; i < entryvec->n; i++)
|
||||
{
|
||||
if (unionkey(base, GETENTRY(entryvec, i)))
|
||||
{
|
||||
flag = ALLISTRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
flag |= SIGNKEY;
|
||||
len = CALCGTSIZE(flag, 0);
|
||||
result = (SignTSVector *) palloc(len);
|
||||
*size = len;
|
||||
SET_VARSIZE(result, len);
|
||||
result->flag = flag;
|
||||
if (!ISALLTRUE(result))
|
||||
memcpy((void *) GETSIGN(result), (void *) base, sizeof(BITVEC));
|
||||
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsvector_same(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SignTSVector *a = (SignTSVector *) PG_GETARG_POINTER(0);
|
||||
SignTSVector *b = (SignTSVector *) PG_GETARG_POINTER(1);
|
||||
bool *result = (bool *) PG_GETARG_POINTER(2);
|
||||
|
||||
if (ISSIGNKEY(a))
|
||||
{ /* then b also ISSIGNKEY */
|
||||
if (ISALLTRUE(a) && ISALLTRUE(b))
|
||||
*result = true;
|
||||
else if (ISALLTRUE(a))
|
||||
*result = false;
|
||||
else if (ISALLTRUE(b))
|
||||
*result = false;
|
||||
else
|
||||
{
|
||||
int4 i;
|
||||
BITVECP sa = GETSIGN(a),
|
||||
sb = GETSIGN(b);
|
||||
|
||||
*result = true;
|
||||
LOOPBYTE(
|
||||
if (sa[i] != sb[i])
|
||||
{
|
||||
*result = false;
|
||||
break;
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
||||
else
|
||||
{ /* a and b ISARRKEY */
|
||||
int4 lena = ARRNELEM(a),
|
||||
lenb = ARRNELEM(b);
|
||||
|
||||
if (lena != lenb)
|
||||
*result = false;
|
||||
else
|
||||
{
|
||||
int4 *ptra = GETARR(a),
|
||||
*ptrb = GETARR(b);
|
||||
int4 i;
|
||||
|
||||
*result = true;
|
||||
for (i = 0; i < lena; i++)
|
||||
if (ptra[i] != ptrb[i])
|
||||
{
|
||||
*result = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
||||
static int4
|
||||
sizebitvec(BITVECP sign)
|
||||
{
|
||||
int4 size = 0,
|
||||
i;
|
||||
|
||||
LOOPBYTE(
|
||||
size += number_of_ones[(unsigned char) sign[i]];
|
||||
);
|
||||
return size;
|
||||
}
|
||||
|
||||
static int
|
||||
hemdistsign(BITVECP a, BITVECP b)
|
||||
{
|
||||
int i,
|
||||
diff,
|
||||
dist = 0;
|
||||
|
||||
LOOPBYTE(
|
||||
diff = (unsigned char) (a[i] ^ b[i]);
|
||||
dist += number_of_ones[diff];
|
||||
);
|
||||
return dist;
|
||||
}
|
||||
|
||||
static int
|
||||
hemdist(SignTSVector * a, SignTSVector * b)
|
||||
{
|
||||
if (ISALLTRUE(a))
|
||||
{
|
||||
if (ISALLTRUE(b))
|
||||
return 0;
|
||||
else
|
||||
return SIGLENBIT - sizebitvec(GETSIGN(b));
|
||||
}
|
||||
else if (ISALLTRUE(b))
|
||||
return SIGLENBIT - sizebitvec(GETSIGN(a));
|
||||
|
||||
return hemdistsign(GETSIGN(a), GETSIGN(b));
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsvector_penalty(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GISTENTRY *origentry = (GISTENTRY *) PG_GETARG_POINTER(0); /* always ISSIGNKEY */
|
||||
GISTENTRY *newentry = (GISTENTRY *) PG_GETARG_POINTER(1);
|
||||
float *penalty = (float *) PG_GETARG_POINTER(2);
|
||||
SignTSVector *origval = (SignTSVector *) DatumGetPointer(origentry->key);
|
||||
SignTSVector *newval = (SignTSVector *) DatumGetPointer(newentry->key);
|
||||
BITVECP orig = GETSIGN(origval);
|
||||
|
||||
*penalty = 0.0;
|
||||
|
||||
if (ISARRKEY(newval))
|
||||
{
|
||||
BITVEC sign;
|
||||
|
||||
makesign(sign, newval);
|
||||
|
||||
if (ISALLTRUE(origval))
|
||||
*penalty = ((float) (SIGLENBIT - sizebitvec(sign))) / (float) (SIGLENBIT + 1);
|
||||
else
|
||||
*penalty = hemdistsign(sign, orig);
|
||||
}
|
||||
else
|
||||
*penalty = hemdist(origval, newval);
|
||||
PG_RETURN_POINTER(penalty);
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
bool allistrue;
|
||||
BITVEC sign;
|
||||
} CACHESIGN;
|
||||
|
||||
static void
|
||||
fillcache(CACHESIGN * item, SignTSVector * key)
|
||||
{
|
||||
item->allistrue = false;
|
||||
if (ISARRKEY(key))
|
||||
makesign(item->sign, key);
|
||||
else if (ISALLTRUE(key))
|
||||
item->allistrue = true;
|
||||
else
|
||||
memcpy((void *) item->sign, (void *) GETSIGN(key), sizeof(BITVEC));
|
||||
}
|
||||
|
||||
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
|
||||
typedef struct
|
||||
{
|
||||
OffsetNumber pos;
|
||||
int4 cost;
|
||||
} SPLITCOST;
|
||||
|
||||
static int
|
||||
comparecost(const void *a, const void *b)
|
||||
{
|
||||
if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
|
||||
return 0;
|
||||
else
|
||||
return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
hemdistcache(CACHESIGN * a, CACHESIGN * b)
|
||||
{
|
||||
if (a->allistrue)
|
||||
{
|
||||
if (b->allistrue)
|
||||
return 0;
|
||||
else
|
||||
return SIGLENBIT - sizebitvec(b->sign);
|
||||
}
|
||||
else if (b->allistrue)
|
||||
return SIGLENBIT - sizebitvec(a->sign);
|
||||
|
||||
return hemdistsign(a->sign, b->sign);
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsvector_picksplit(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
|
||||
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
|
||||
OffsetNumber k,
|
||||
j;
|
||||
SignTSVector *datum_l,
|
||||
*datum_r;
|
||||
BITVECP union_l,
|
||||
union_r;
|
||||
int4 size_alpha,
|
||||
size_beta;
|
||||
int4 size_waste,
|
||||
waste = -1;
|
||||
int4 nbytes;
|
||||
OffsetNumber seed_1 = 0,
|
||||
seed_2 = 0;
|
||||
OffsetNumber *left,
|
||||
*right;
|
||||
OffsetNumber maxoff;
|
||||
BITVECP ptr;
|
||||
int i;
|
||||
CACHESIGN *cache;
|
||||
SPLITCOST *costvector;
|
||||
|
||||
maxoff = entryvec->n - 2;
|
||||
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
|
||||
v->spl_left = (OffsetNumber *) palloc(nbytes);
|
||||
v->spl_right = (OffsetNumber *) palloc(nbytes);
|
||||
|
||||
cache = (CACHESIGN *) palloc(sizeof(CACHESIGN) * (maxoff + 2));
|
||||
fillcache(&cache[FirstOffsetNumber], GETENTRY(entryvec, FirstOffsetNumber));
|
||||
|
||||
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
|
||||
{
|
||||
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
|
||||
{
|
||||
if (k == FirstOffsetNumber)
|
||||
fillcache(&cache[j], GETENTRY(entryvec, j));
|
||||
|
||||
size_waste = hemdistcache(&(cache[j]), &(cache[k]));
|
||||
if (size_waste > waste)
|
||||
{
|
||||
waste = size_waste;
|
||||
seed_1 = k;
|
||||
seed_2 = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
left = v->spl_left;
|
||||
v->spl_nleft = 0;
|
||||
right = v->spl_right;
|
||||
v->spl_nright = 0;
|
||||
|
||||
if (seed_1 == 0 || seed_2 == 0)
|
||||
{
|
||||
seed_1 = 1;
|
||||
seed_2 = 2;
|
||||
}
|
||||
|
||||
/* form initial .. */
|
||||
if (cache[seed_1].allistrue)
|
||||
{
|
||||
datum_l = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
|
||||
SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
|
||||
datum_l->flag = SIGNKEY | ALLISTRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
datum_l = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY, 0));
|
||||
SET_VARSIZE(datum_l, CALCGTSIZE(SIGNKEY, 0));
|
||||
datum_l->flag = SIGNKEY;
|
||||
memcpy((void *) GETSIGN(datum_l), (void *) cache[seed_1].sign, sizeof(BITVEC));
|
||||
}
|
||||
if (cache[seed_2].allistrue)
|
||||
{
|
||||
datum_r = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
|
||||
SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY | ALLISTRUE, 0));
|
||||
datum_r->flag = SIGNKEY | ALLISTRUE;
|
||||
}
|
||||
else
|
||||
{
|
||||
datum_r = (SignTSVector *) palloc(CALCGTSIZE(SIGNKEY, 0));
|
||||
SET_VARSIZE(datum_r, CALCGTSIZE(SIGNKEY, 0));
|
||||
datum_r->flag = SIGNKEY;
|
||||
memcpy((void *) GETSIGN(datum_r), (void *) cache[seed_2].sign, sizeof(BITVEC));
|
||||
}
|
||||
|
||||
union_l = GETSIGN(datum_l);
|
||||
union_r = GETSIGN(datum_r);
|
||||
maxoff = OffsetNumberNext(maxoff);
|
||||
fillcache(&cache[maxoff], GETENTRY(entryvec, maxoff));
|
||||
/* sort before ... */
|
||||
costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
|
||||
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
|
||||
{
|
||||
costvector[j - 1].pos = j;
|
||||
size_alpha = hemdistcache(&(cache[seed_1]), &(cache[j]));
|
||||
size_beta = hemdistcache(&(cache[seed_2]), &(cache[j]));
|
||||
costvector[j - 1].cost = Abs(size_alpha - size_beta);
|
||||
}
|
||||
qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
|
||||
|
||||
for (k = 0; k < maxoff; k++)
|
||||
{
|
||||
j = costvector[k].pos;
|
||||
if (j == seed_1)
|
||||
{
|
||||
*left++ = j;
|
||||
v->spl_nleft++;
|
||||
continue;
|
||||
}
|
||||
else if (j == seed_2)
|
||||
{
|
||||
*right++ = j;
|
||||
v->spl_nright++;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (ISALLTRUE(datum_l) || cache[j].allistrue)
|
||||
{
|
||||
if (ISALLTRUE(datum_l) && cache[j].allistrue)
|
||||
size_alpha = 0;
|
||||
else
|
||||
size_alpha = SIGLENBIT - sizebitvec(
|
||||
(cache[j].allistrue) ? GETSIGN(datum_l) : GETSIGN(cache[j].sign)
|
||||
);
|
||||
}
|
||||
else
|
||||
size_alpha = hemdistsign(cache[j].sign, GETSIGN(datum_l));
|
||||
|
||||
if (ISALLTRUE(datum_r) || cache[j].allistrue)
|
||||
{
|
||||
if (ISALLTRUE(datum_r) && cache[j].allistrue)
|
||||
size_beta = 0;
|
||||
else
|
||||
size_beta = SIGLENBIT - sizebitvec(
|
||||
(cache[j].allistrue) ? GETSIGN(datum_r) : GETSIGN(cache[j].sign)
|
||||
);
|
||||
}
|
||||
else
|
||||
size_beta = hemdistsign(cache[j].sign, GETSIGN(datum_r));
|
||||
|
||||
if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.1))
|
||||
{
|
||||
if (ISALLTRUE(datum_l) || cache[j].allistrue)
|
||||
{
|
||||
if (!ISALLTRUE(datum_l))
|
||||
MemSet((void *) GETSIGN(datum_l), 0xff, sizeof(BITVEC));
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr = cache[j].sign;
|
||||
LOOPBYTE(
|
||||
union_l[i] |= ptr[i];
|
||||
);
|
||||
}
|
||||
*left++ = j;
|
||||
v->spl_nleft++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (ISALLTRUE(datum_r) || cache[j].allistrue)
|
||||
{
|
||||
if (!ISALLTRUE(datum_r))
|
||||
MemSet((void *) GETSIGN(datum_r), 0xff, sizeof(BITVEC));
|
||||
}
|
||||
else
|
||||
{
|
||||
ptr = cache[j].sign;
|
||||
LOOPBYTE(
|
||||
union_r[i] |= ptr[i];
|
||||
);
|
||||
}
|
||||
*right++ = j;
|
||||
v->spl_nright++;
|
||||
}
|
||||
}
|
||||
|
||||
*right = *left = FirstOffsetNumber;
|
||||
v->spl_ldatum = PointerGetDatum(datum_l);
|
||||
v->spl_rdatum = PointerGetDatum(datum_r);
|
||||
|
||||
PG_RETURN_POINTER(v);
|
||||
}
|
||||
767
src/backend/utils/adt/tsquery.c
Normal file
767
src/backend/utils/adt/tsquery.c
Normal file
@@ -0,0 +1,767 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* tsquery.c
|
||||
* I/O functions for tsquery
|
||||
*
|
||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "libpq/pqformat.h"
|
||||
#include "tsearch/ts_locale.h"
|
||||
#include "tsearch/ts_type.h"
|
||||
#include "tsearch/ts_utils.h"
|
||||
#include "utils/memutils.h"
|
||||
#include "utils/pg_crc.h"
|
||||
|
||||
/* parser's states */
|
||||
#define WAITOPERAND 1
|
||||
#define WAITOPERATOR 2
|
||||
#define WAITFIRSTOPERAND 3
|
||||
#define WAITSINGLEOPERAND 4
|
||||
|
||||
/*
|
||||
* node of query tree, also used
|
||||
* for storing polish notation in parser
|
||||
*/
|
||||
typedef struct ParseQueryNode
|
||||
{
|
||||
int2 weight;
|
||||
int2 type;
|
||||
int4 val;
|
||||
int2 distance;
|
||||
int2 length;
|
||||
struct ParseQueryNode *next;
|
||||
} ParseQueryNode;
|
||||
|
||||
static char *
|
||||
get_weight(char *buf, int2 *weight)
|
||||
{
|
||||
*weight = 0;
|
||||
|
||||
if (!t_iseq(buf, ':'))
|
||||
return buf;
|
||||
|
||||
buf++;
|
||||
while (*buf && pg_mblen(buf) == 1)
|
||||
{
|
||||
switch (*buf)
|
||||
{
|
||||
case 'a':
|
||||
case 'A':
|
||||
*weight |= 1 << 3;
|
||||
break;
|
||||
case 'b':
|
||||
case 'B':
|
||||
*weight |= 1 << 2;
|
||||
break;
|
||||
case 'c':
|
||||
case 'C':
|
||||
*weight |= 1 << 1;
|
||||
break;
|
||||
case 'd':
|
||||
case 'D':
|
||||
*weight |= 1;
|
||||
break;
|
||||
default:
|
||||
return buf;
|
||||
}
|
||||
buf++;
|
||||
}
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
/*
|
||||
* get token from query string
|
||||
*/
|
||||
static int4
|
||||
gettoken_query(TSQueryParserState * state, int4 *val, int4 *lenval, char **strval, int2 *weight)
|
||||
{
|
||||
while (1)
|
||||
{
|
||||
switch (state->state)
|
||||
{
|
||||
case WAITFIRSTOPERAND:
|
||||
case WAITOPERAND:
|
||||
if (t_iseq(state->buf, '!'))
|
||||
{
|
||||
(state->buf)++; /* can safely ++, t_iseq guarantee
|
||||
* that pg_mblen()==1 */
|
||||
*val = (int4) '!';
|
||||
state->state = WAITOPERAND;
|
||||
return OPR;
|
||||
}
|
||||
else if (t_iseq(state->buf, '('))
|
||||
{
|
||||
state->count++;
|
||||
(state->buf)++;
|
||||
state->state = WAITOPERAND;
|
||||
return OPEN;
|
||||
}
|
||||
else if (t_iseq(state->buf, ':'))
|
||||
{
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error at start of operand in tsearch query: \"%s\"",
|
||||
state->buffer)));
|
||||
}
|
||||
else if (!t_isspace(state->buf))
|
||||
{
|
||||
state->valstate.prsbuf = state->buf;
|
||||
if (gettoken_tsvector(&(state->valstate)))
|
||||
{
|
||||
*strval = state->valstate.word;
|
||||
*lenval = state->valstate.curpos - state->valstate.word;
|
||||
state->buf = get_weight(state->valstate.prsbuf, weight);
|
||||
state->state = WAITOPERATOR;
|
||||
return VAL;
|
||||
}
|
||||
else if (state->state == WAITFIRSTOPERAND)
|
||||
return END;
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("no operand in tsearch query: \"%s\"",
|
||||
state->buffer)));
|
||||
}
|
||||
break;
|
||||
case WAITOPERATOR:
|
||||
if (t_iseq(state->buf, '&') || t_iseq(state->buf, '|'))
|
||||
{
|
||||
state->state = WAITOPERAND;
|
||||
*val = (int4) *(state->buf);
|
||||
(state->buf)++;
|
||||
return OPR;
|
||||
}
|
||||
else if (t_iseq(state->buf, ')'))
|
||||
{
|
||||
(state->buf)++;
|
||||
state->count--;
|
||||
return (state->count < 0) ? ERR : CLOSE;
|
||||
}
|
||||
else if (*(state->buf) == '\0')
|
||||
return (state->count) ? ERR : END;
|
||||
else if (!t_isspace(state->buf))
|
||||
return ERR;
|
||||
break;
|
||||
case WAITSINGLEOPERAND:
|
||||
if (*(state->buf) == '\0')
|
||||
return END;
|
||||
*strval = state->buf;
|
||||
*lenval = strlen(state->buf);
|
||||
state->buf += strlen(state->buf);
|
||||
state->count++;
|
||||
return VAL;
|
||||
default:
|
||||
return ERR;
|
||||
break;
|
||||
}
|
||||
state->buf += pg_mblen(state->buf);
|
||||
}
|
||||
return END;
|
||||
}
|
||||
|
||||
/*
|
||||
* push new one in polish notation reverse view
|
||||
*/
|
||||
void
|
||||
pushquery(TSQueryParserState * state, int4 type, int4 val, int4 distance, int4 lenval, int2 weight)
|
||||
{
|
||||
ParseQueryNode *tmp = (ParseQueryNode *) palloc(sizeof(ParseQueryNode));
|
||||
|
||||
tmp->weight = weight;
|
||||
tmp->type = type;
|
||||
tmp->val = val;
|
||||
if (distance >= MAXSTRPOS)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("value is too big in tsearch query: \"%s\"",
|
||||
state->buffer)));
|
||||
if (lenval >= MAXSTRLEN)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("operand is too long in tsearch query: \"%s\"",
|
||||
state->buffer)));
|
||||
tmp->distance = distance;
|
||||
tmp->length = lenval;
|
||||
tmp->next = state->str;
|
||||
state->str = tmp;
|
||||
state->num++;
|
||||
}
|
||||
|
||||
/*
|
||||
* This function is used for tsquery parsing
|
||||
*/
|
||||
void
|
||||
pushval_asis(TSQueryParserState * state, int type, char *strval, int lenval, int2 weight)
|
||||
{
|
||||
pg_crc32 c;
|
||||
|
||||
if (lenval >= MAXSTRLEN)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("word is too long in tsearch query: \"%s\"",
|
||||
state->buffer)));
|
||||
|
||||
INIT_CRC32(c);
|
||||
COMP_CRC32(c, strval, lenval);
|
||||
FIN_CRC32(c);
|
||||
pushquery(state, type, *(int4 *) &c,
|
||||
state->curop - state->op, lenval, weight);
|
||||
|
||||
while (state->curop - state->op + lenval + 1 >= state->lenop)
|
||||
{
|
||||
int4 tmp = state->curop - state->op;
|
||||
|
||||
state->lenop *= 2;
|
||||
state->op = (char *) repalloc((void *) state->op, state->lenop);
|
||||
state->curop = state->op + tmp;
|
||||
}
|
||||
memcpy((void *) state->curop, (void *) strval, lenval);
|
||||
state->curop += lenval;
|
||||
*(state->curop) = '\0';
|
||||
state->curop++;
|
||||
state->sumlen += lenval + 1 /* \0 */ ;
|
||||
return;
|
||||
}
|
||||
|
||||
#define STACKDEPTH 32
|
||||
/*
|
||||
* make polish notation of query
|
||||
*/
|
||||
static int4
|
||||
makepol(TSQueryParserState * state, void (*pushval) (TSQueryParserState *, int, char *, int, int2))
|
||||
{
|
||||
int4 val = 0,
|
||||
type;
|
||||
int4 lenval = 0;
|
||||
char *strval = NULL;
|
||||
int4 stack[STACKDEPTH];
|
||||
int4 lenstack = 0;
|
||||
int2 weight = 0;
|
||||
|
||||
while ((type = gettoken_query(state, &val, &lenval, &strval, &weight)) != END)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case VAL:
|
||||
pushval(state, VAL, strval, lenval, weight);
|
||||
while (lenstack && (stack[lenstack - 1] == (int4) '&' ||
|
||||
stack[lenstack - 1] == (int4) '!'))
|
||||
{
|
||||
lenstack--;
|
||||
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
|
||||
}
|
||||
break;
|
||||
case OPR:
|
||||
if (lenstack && val == (int4) '|')
|
||||
pushquery(state, OPR, val, 0, 0, 0);
|
||||
else
|
||||
{
|
||||
if (lenstack == STACKDEPTH) /* internal error */
|
||||
elog(ERROR, "tsquery stack too small");
|
||||
stack[lenstack] = val;
|
||||
lenstack++;
|
||||
}
|
||||
break;
|
||||
case OPEN:
|
||||
if (makepol(state, pushval) == ERR)
|
||||
return ERR;
|
||||
if (lenstack && (stack[lenstack - 1] == (int4) '&' ||
|
||||
stack[lenstack - 1] == (int4) '!'))
|
||||
{
|
||||
lenstack--;
|
||||
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
|
||||
}
|
||||
break;
|
||||
case CLOSE:
|
||||
while (lenstack)
|
||||
{
|
||||
lenstack--;
|
||||
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
|
||||
};
|
||||
return END;
|
||||
break;
|
||||
case ERR:
|
||||
default:
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsearch query: \"%s\"",
|
||||
state->buffer)));
|
||||
return ERR;
|
||||
|
||||
}
|
||||
}
|
||||
while (lenstack)
|
||||
{
|
||||
lenstack--;
|
||||
pushquery(state, OPR, stack[lenstack], 0, 0, 0);
|
||||
};
|
||||
return END;
|
||||
}
|
||||
|
||||
static void
|
||||
findoprnd(QueryItem * ptr, int4 *pos)
|
||||
{
|
||||
if (ptr[*pos].type == VAL || ptr[*pos].type == VALSTOP)
|
||||
{
|
||||
ptr[*pos].left = 0;
|
||||
(*pos)++;
|
||||
}
|
||||
else if (ptr[*pos].val == (int4) '!')
|
||||
{
|
||||
ptr[*pos].left = 1;
|
||||
(*pos)++;
|
||||
findoprnd(ptr, pos);
|
||||
}
|
||||
else
|
||||
{
|
||||
QueryItem *curitem = &ptr[*pos];
|
||||
int4 tmp = *pos;
|
||||
|
||||
(*pos)++;
|
||||
findoprnd(ptr, pos);
|
||||
curitem->left = *pos - tmp;
|
||||
findoprnd(ptr, pos);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* input
|
||||
*/
|
||||
TSQuery
|
||||
parse_tsquery(char *buf, void (*pushval) (TSQueryParserState *, int, char *, int, int2), Oid cfg_id, bool isplain)
|
||||
{
|
||||
TSQueryParserState state;
|
||||
int4 i;
|
||||
TSQuery query;
|
||||
int4 commonlen;
|
||||
QueryItem *ptr;
|
||||
ParseQueryNode *tmp;
|
||||
int4 pos = 0;
|
||||
|
||||
/* init state */
|
||||
state.buffer = buf;
|
||||
state.buf = buf;
|
||||
state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
|
||||
state.count = 0;
|
||||
state.num = 0;
|
||||
state.str = NULL;
|
||||
state.cfg_id = cfg_id;
|
||||
|
||||
/* init value parser's state */
|
||||
state.valstate.oprisdelim = true;
|
||||
state.valstate.len = 32;
|
||||
state.valstate.word = (char *) palloc(state.valstate.len);
|
||||
|
||||
/* init list of operand */
|
||||
state.sumlen = 0;
|
||||
state.lenop = 64;
|
||||
state.curop = state.op = (char *) palloc(state.lenop);
|
||||
*(state.curop) = '\0';
|
||||
|
||||
/* parse query & make polish notation (postfix, but in reverse order) */
|
||||
makepol(&state, pushval);
|
||||
pfree(state.valstate.word);
|
||||
if (!state.num)
|
||||
{
|
||||
ereport(NOTICE,
|
||||
(errmsg("tsearch query doesn't contain lexeme(s): \"%s\"",
|
||||
state.buffer)));
|
||||
query = (TSQuery) palloc(HDRSIZETQ);
|
||||
SET_VARSIZE(query, HDRSIZETQ);
|
||||
query->size = 0;
|
||||
return query;
|
||||
}
|
||||
|
||||
/* make finish struct */
|
||||
commonlen = COMPUTESIZE(state.num, state.sumlen);
|
||||
query = (TSQuery) palloc(commonlen);
|
||||
SET_VARSIZE(query, commonlen);
|
||||
query->size = state.num;
|
||||
ptr = GETQUERY(query);
|
||||
|
||||
/* set item in polish notation */
|
||||
for (i = 0; i < state.num; i++)
|
||||
{
|
||||
ptr[i].weight = state.str->weight;
|
||||
ptr[i].type = state.str->type;
|
||||
ptr[i].val = state.str->val;
|
||||
ptr[i].distance = state.str->distance;
|
||||
ptr[i].length = state.str->length;
|
||||
tmp = state.str->next;
|
||||
pfree(state.str);
|
||||
state.str = tmp;
|
||||
}
|
||||
|
||||
/* set user friendly-operand view */
|
||||
memcpy((void *) GETOPERAND(query), (void *) state.op, state.sumlen);
|
||||
pfree(state.op);
|
||||
|
||||
/* set left operand's position for every operator */
|
||||
pos = 0;
|
||||
findoprnd(ptr, &pos);
|
||||
|
||||
return query;
|
||||
}
|
||||
|
||||
/*
|
||||
* in without morphology
|
||||
*/
|
||||
Datum
|
||||
tsqueryin(PG_FUNCTION_ARGS)
|
||||
{
|
||||
char *in = PG_GETARG_CSTRING(0);
|
||||
|
||||
pg_verifymbstr(in, strlen(in), false);
|
||||
|
||||
PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, InvalidOid, false));
|
||||
}
|
||||
|
||||
/*
|
||||
* out function
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
QueryItem *curpol;
|
||||
char *buf;
|
||||
char *cur;
|
||||
char *op;
|
||||
int4 buflen;
|
||||
} INFIX;
|
||||
|
||||
#define RESIZEBUF(inf,addsize) \
|
||||
while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
|
||||
{ \
|
||||
int4 len = (inf)->cur - (inf)->buf; \
|
||||
(inf)->buflen *= 2; \
|
||||
(inf)->buf = (char*) repalloc( (void*)(inf)->buf, (inf)->buflen ); \
|
||||
(inf)->cur = (inf)->buf + len; \
|
||||
}
|
||||
|
||||
/*
|
||||
* recursive walk on tree and print it in
|
||||
* infix (human-readable) view
|
||||
*/
|
||||
static void
|
||||
infix(INFIX * in, bool first)
|
||||
{
|
||||
if (in->curpol->type == VAL)
|
||||
{
|
||||
char *op = in->op + in->curpol->distance;
|
||||
int clen;
|
||||
|
||||
RESIZEBUF(in, in->curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
|
||||
*(in->cur) = '\'';
|
||||
in->cur++;
|
||||
while (*op)
|
||||
{
|
||||
if (t_iseq(op, '\''))
|
||||
{
|
||||
*(in->cur) = '\'';
|
||||
in->cur++;
|
||||
}
|
||||
COPYCHAR(in->cur, op);
|
||||
|
||||
clen = pg_mblen(op);
|
||||
op += clen;
|
||||
in->cur += clen;
|
||||
}
|
||||
*(in->cur) = '\'';
|
||||
in->cur++;
|
||||
if (in->curpol->weight)
|
||||
{
|
||||
*(in->cur) = ':';
|
||||
in->cur++;
|
||||
if (in->curpol->weight & (1 << 3))
|
||||
{
|
||||
*(in->cur) = 'A';
|
||||
in->cur++;
|
||||
}
|
||||
if (in->curpol->weight & (1 << 2))
|
||||
{
|
||||
*(in->cur) = 'B';
|
||||
in->cur++;
|
||||
}
|
||||
if (in->curpol->weight & (1 << 1))
|
||||
{
|
||||
*(in->cur) = 'C';
|
||||
in->cur++;
|
||||
}
|
||||
if (in->curpol->weight & 1)
|
||||
{
|
||||
*(in->cur) = 'D';
|
||||
in->cur++;
|
||||
}
|
||||
}
|
||||
*(in->cur) = '\0';
|
||||
in->curpol++;
|
||||
}
|
||||
else if (in->curpol->val == (int4) '!')
|
||||
{
|
||||
bool isopr = false;
|
||||
|
||||
RESIZEBUF(in, 1);
|
||||
*(in->cur) = '!';
|
||||
in->cur++;
|
||||
*(in->cur) = '\0';
|
||||
in->curpol++;
|
||||
if (in->curpol->type == OPR)
|
||||
{
|
||||
isopr = true;
|
||||
RESIZEBUF(in, 2);
|
||||
sprintf(in->cur, "( ");
|
||||
in->cur = strchr(in->cur, '\0');
|
||||
}
|
||||
infix(in, isopr);
|
||||
if (isopr)
|
||||
{
|
||||
RESIZEBUF(in, 2);
|
||||
sprintf(in->cur, " )");
|
||||
in->cur = strchr(in->cur, '\0');
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
int4 op = in->curpol->val;
|
||||
INFIX nrm;
|
||||
|
||||
in->curpol++;
|
||||
if (op == (int4) '|' && !first)
|
||||
{
|
||||
RESIZEBUF(in, 2);
|
||||
sprintf(in->cur, "( ");
|
||||
in->cur = strchr(in->cur, '\0');
|
||||
}
|
||||
|
||||
nrm.curpol = in->curpol;
|
||||
nrm.op = in->op;
|
||||
nrm.buflen = 16;
|
||||
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
|
||||
|
||||
/* get right operand */
|
||||
infix(&nrm, false);
|
||||
|
||||
/* get & print left operand */
|
||||
in->curpol = nrm.curpol;
|
||||
infix(in, false);
|
||||
|
||||
/* print operator & right operand */
|
||||
RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
|
||||
sprintf(in->cur, " %c %s", op, nrm.buf);
|
||||
in->cur = strchr(in->cur, '\0');
|
||||
pfree(nrm.buf);
|
||||
|
||||
if (op == (int4) '|' && !first)
|
||||
{
|
||||
RESIZEBUF(in, 2);
|
||||
sprintf(in->cur, " )");
|
||||
in->cur = strchr(in->cur, '\0');
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
tsqueryout(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSQuery query = PG_GETARG_TSQUERY(0);
|
||||
INFIX nrm;
|
||||
|
||||
if (query->size == 0)
|
||||
{
|
||||
char *b = palloc(1);
|
||||
|
||||
*b = '\0';
|
||||
PG_RETURN_POINTER(b);
|
||||
}
|
||||
nrm.curpol = GETQUERY(query);
|
||||
nrm.buflen = 32;
|
||||
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
|
||||
*(nrm.cur) = '\0';
|
||||
nrm.op = GETOPERAND(query);
|
||||
infix(&nrm, true);
|
||||
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
PG_RETURN_CSTRING(nrm.buf);
|
||||
}
|
||||
|
||||
Datum
|
||||
tsquerysend(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSQuery query = PG_GETARG_TSQUERY(0);
|
||||
StringInfoData buf;
|
||||
int i;
|
||||
QueryItem *item = GETQUERY(query);
|
||||
|
||||
pq_begintypsend(&buf);
|
||||
|
||||
pq_sendint(&buf, query->size, sizeof(int32));
|
||||
for (i = 0; i < query->size; i++)
|
||||
{
|
||||
int tmp;
|
||||
|
||||
pq_sendint(&buf, item->type, sizeof(item->type));
|
||||
pq_sendint(&buf, item->weight, sizeof(item->weight));
|
||||
pq_sendint(&buf, item->left, sizeof(item->left));
|
||||
pq_sendint(&buf, item->val, sizeof(item->val));
|
||||
|
||||
/*
|
||||
* We are sure that sizeof(WordEntry) == sizeof(int32), and about
|
||||
* layout of QueryItem
|
||||
*/
|
||||
tmp = *(int32 *) (((char *) item) + HDRSIZEQI);
|
||||
pq_sendint(&buf, tmp, sizeof(tmp));
|
||||
|
||||
item++;
|
||||
}
|
||||
|
||||
item = GETQUERY(query);
|
||||
for (i = 0; i < query->size; i++)
|
||||
{
|
||||
if (item->type == VAL)
|
||||
pq_sendbytes(&buf, GETOPERAND(query) + item->distance, item->length);
|
||||
item++;
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
|
||||
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
|
||||
}
|
||||
|
||||
Datum
|
||||
tsqueryrecv(PG_FUNCTION_ARGS)
|
||||
{
|
||||
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
|
||||
TSQuery query;
|
||||
int i,
|
||||
size,
|
||||
tmp,
|
||||
len = HDRSIZETQ;
|
||||
QueryItem *item;
|
||||
int datalen = 0;
|
||||
char *ptr;
|
||||
|
||||
size = pq_getmsgint(buf, sizeof(uint32));
|
||||
if (size < 0 || size > (MaxAllocSize / sizeof(QueryItem)))
|
||||
elog(ERROR, "invalid size of tsquery");
|
||||
len += sizeof(QueryItem) * size;
|
||||
|
||||
query = (TSQuery) palloc(len);
|
||||
query->size = size;
|
||||
item = GETQUERY(query);
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
item->type = (int8) pq_getmsgint(buf, sizeof(int8));
|
||||
item->weight = (int8) pq_getmsgint(buf, sizeof(int8));
|
||||
item->left = (int16) pq_getmsgint(buf, sizeof(int16));
|
||||
item->val = (int32) pq_getmsgint(buf, sizeof(int32));
|
||||
tmp = pq_getmsgint(buf, sizeof(int32));
|
||||
memcpy((((char *) item) + HDRSIZEQI), &tmp, sizeof(int32));
|
||||
|
||||
/*
|
||||
* Sanity checks
|
||||
*/
|
||||
if (item->type == VAL)
|
||||
{
|
||||
datalen += item->length + 1; /* \0 */
|
||||
}
|
||||
else if (item->type == OPR)
|
||||
{
|
||||
if (item->val == '|' || item->val == '&')
|
||||
{
|
||||
if (item->left <= 0 || i + item->left >= size)
|
||||
elog(ERROR, "invalid pointer to left operand");
|
||||
}
|
||||
|
||||
if (i == size - 1)
|
||||
elog(ERROR, "invalid pointer to right operand");
|
||||
}
|
||||
else
|
||||
elog(ERROR, "unknown tsquery node type");
|
||||
|
||||
item++;
|
||||
}
|
||||
|
||||
query = (TSQuery) repalloc(query, len + datalen);
|
||||
|
||||
item = GETQUERY(query);
|
||||
ptr = GETOPERAND(query);
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
if (item->type == VAL)
|
||||
{
|
||||
item->distance = ptr - GETOPERAND(query);
|
||||
memcpy(ptr,
|
||||
pq_getmsgbytes(buf, item->length),
|
||||
item->length);
|
||||
ptr += item->length;
|
||||
*ptr++ = '\0';
|
||||
}
|
||||
item++;
|
||||
}
|
||||
|
||||
Assert(ptr - GETOPERAND(query) == datalen);
|
||||
|
||||
SET_VARSIZE(query, len + datalen);
|
||||
|
||||
PG_RETURN_TSVECTOR(query);
|
||||
}
|
||||
|
||||
/*
|
||||
* debug function, used only for view query
|
||||
* which will be executed in non-leaf pages in index
|
||||
*/
|
||||
Datum
|
||||
tsquerytree(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSQuery query = PG_GETARG_TSQUERY(0);
|
||||
INFIX nrm;
|
||||
text *res;
|
||||
QueryItem *q;
|
||||
int4 len;
|
||||
|
||||
if (query->size == 0)
|
||||
{
|
||||
res = (text *) palloc(VARHDRSZ);
|
||||
SET_VARSIZE(res, VARHDRSZ);
|
||||
PG_RETURN_POINTER(res);
|
||||
}
|
||||
|
||||
q = clean_NOT(GETQUERY(query), &len);
|
||||
|
||||
if (!q)
|
||||
{
|
||||
res = (text *) palloc(1 + VARHDRSZ);
|
||||
SET_VARSIZE(res, 1 + VARHDRSZ);
|
||||
*((char *) VARDATA(res)) = 'T';
|
||||
}
|
||||
else
|
||||
{
|
||||
nrm.curpol = q;
|
||||
nrm.buflen = 32;
|
||||
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
|
||||
*(nrm.cur) = '\0';
|
||||
nrm.op = GETOPERAND(query);
|
||||
infix(&nrm, true);
|
||||
|
||||
res = (text *) palloc(nrm.cur - nrm.buf + VARHDRSZ);
|
||||
SET_VARSIZE(res, nrm.cur - nrm.buf + VARHDRSZ);
|
||||
strncpy(VARDATA(res), nrm.buf, nrm.cur - nrm.buf);
|
||||
pfree(q);
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
|
||||
PG_RETURN_POINTER(res);
|
||||
}
|
||||
261
src/backend/utils/adt/tsquery_cleanup.c
Normal file
261
src/backend/utils/adt/tsquery_cleanup.c
Normal file
@@ -0,0 +1,261 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* tsquery_cleanup.c
|
||||
* Cleanup query from NOT values and/or stopword
|
||||
* Utility functions to correct work.
|
||||
*
|
||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_cleanup.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "tsearch/ts_type.h"
|
||||
#include "tsearch/ts_utils.h"
|
||||
|
||||
typedef struct NODE
|
||||
{
|
||||
struct NODE *left;
|
||||
struct NODE *right;
|
||||
QueryItem *valnode;
|
||||
} NODE;
|
||||
|
||||
/*
|
||||
* make query tree from plain view of query
|
||||
*/
|
||||
static NODE *
|
||||
maketree(QueryItem * in)
|
||||
{
|
||||
NODE *node = (NODE *) palloc(sizeof(NODE));
|
||||
|
||||
node->valnode = in;
|
||||
node->right = node->left = NULL;
|
||||
if (in->type == OPR)
|
||||
{
|
||||
node->right = maketree(in + 1);
|
||||
if (in->val != (int4) '!')
|
||||
node->left = maketree(in + in->left);
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
QueryItem *ptr;
|
||||
int4 len;
|
||||
int4 cur;
|
||||
} PLAINTREE;
|
||||
|
||||
static void
|
||||
plainnode(PLAINTREE * state, NODE * node)
|
||||
{
|
||||
if (state->cur == state->len)
|
||||
{
|
||||
state->len *= 2;
|
||||
state->ptr = (QueryItem *) repalloc((void *) state->ptr, state->len * sizeof(QueryItem));
|
||||
}
|
||||
memcpy((void *) &(state->ptr[state->cur]), (void *) node->valnode, sizeof(QueryItem));
|
||||
if (node->valnode->type == VAL)
|
||||
state->cur++;
|
||||
else if (node->valnode->val == (int4) '!')
|
||||
{
|
||||
state->ptr[state->cur].left = 1;
|
||||
state->cur++;
|
||||
plainnode(state, node->right);
|
||||
}
|
||||
else
|
||||
{
|
||||
int4 cur = state->cur;
|
||||
|
||||
state->cur++;
|
||||
plainnode(state, node->right);
|
||||
state->ptr[cur].left = state->cur - cur;
|
||||
plainnode(state, node->left);
|
||||
}
|
||||
pfree(node);
|
||||
}
|
||||
|
||||
/*
|
||||
* make plain view of tree from 'normal' view of tree
|
||||
*/
|
||||
static QueryItem *
|
||||
plaintree(NODE * root, int4 *len)
|
||||
{
|
||||
PLAINTREE pl;
|
||||
|
||||
pl.cur = 0;
|
||||
pl.len = 16;
|
||||
if (root && (root->valnode->type == VAL || root->valnode->type == OPR))
|
||||
{
|
||||
pl.ptr = (QueryItem *) palloc(pl.len * sizeof(QueryItem));
|
||||
plainnode(&pl, root);
|
||||
}
|
||||
else
|
||||
pl.ptr = NULL;
|
||||
*len = pl.cur;
|
||||
return pl.ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
freetree(NODE * node)
|
||||
{
|
||||
if (!node)
|
||||
return;
|
||||
if (node->left)
|
||||
freetree(node->left);
|
||||
if (node->right)
|
||||
freetree(node->right);
|
||||
pfree(node);
|
||||
}
|
||||
|
||||
/*
|
||||
* clean tree for ! operator.
|
||||
* It's usefull for debug, but in
|
||||
* other case, such view is used with search in index.
|
||||
* Operator ! always return TRUE
|
||||
*/
|
||||
static NODE *
|
||||
clean_NOT_intree(NODE * node)
|
||||
{
|
||||
if (node->valnode->type == VAL)
|
||||
return node;
|
||||
|
||||
if (node->valnode->val == (int4) '!')
|
||||
{
|
||||
freetree(node);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* operator & or | */
|
||||
if (node->valnode->val == (int4) '|')
|
||||
{
|
||||
if ((node->left = clean_NOT_intree(node->left)) == NULL ||
|
||||
(node->right = clean_NOT_intree(node->right)) == NULL)
|
||||
{
|
||||
freetree(node);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
NODE *res = node;
|
||||
|
||||
node->left = clean_NOT_intree(node->left);
|
||||
node->right = clean_NOT_intree(node->right);
|
||||
if (node->left == NULL && node->right == NULL)
|
||||
{
|
||||
pfree(node);
|
||||
res = NULL;
|
||||
}
|
||||
else if (node->left == NULL)
|
||||
{
|
||||
res = node->right;
|
||||
pfree(node);
|
||||
}
|
||||
else if (node->right == NULL)
|
||||
{
|
||||
res = node->left;
|
||||
pfree(node);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
QueryItem *
|
||||
clean_NOT(QueryItem * ptr, int4 *len)
|
||||
{
|
||||
NODE *root = maketree(ptr);
|
||||
|
||||
return plaintree(clean_NOT_intree(root), len);
|
||||
}
|
||||
|
||||
|
||||
#ifdef V_UNKNOWN /* exists in Windows headers */
|
||||
#undef V_UNKNOWN
|
||||
#endif
|
||||
|
||||
#define V_UNKNOWN 0
|
||||
#define V_TRUE 1
|
||||
#define V_FALSE 2
|
||||
#define V_STOP 3
|
||||
|
||||
/*
|
||||
* Clean query tree from values which is always in
|
||||
* text (stopword)
|
||||
*/
|
||||
static NODE *
|
||||
clean_fakeval_intree(NODE * node, char *result)
|
||||
{
|
||||
char lresult = V_UNKNOWN,
|
||||
rresult = V_UNKNOWN;
|
||||
|
||||
if (node->valnode->type == VAL)
|
||||
return node;
|
||||
else if (node->valnode->type == VALSTOP)
|
||||
{
|
||||
pfree(node);
|
||||
*result = V_STOP;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
if (node->valnode->val == (int4) '!')
|
||||
{
|
||||
node->right = clean_fakeval_intree(node->right, &rresult);
|
||||
if (!node->right)
|
||||
{
|
||||
*result = V_STOP;
|
||||
freetree(node);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
NODE *res = node;
|
||||
|
||||
node->left = clean_fakeval_intree(node->left, &lresult);
|
||||
node->right = clean_fakeval_intree(node->right, &rresult);
|
||||
if (lresult == V_STOP && rresult == V_STOP)
|
||||
{
|
||||
freetree(node);
|
||||
*result = V_STOP;
|
||||
return NULL;
|
||||
}
|
||||
else if (lresult == V_STOP)
|
||||
{
|
||||
res = node->right;
|
||||
pfree(node);
|
||||
}
|
||||
else if (rresult == V_STOP)
|
||||
{
|
||||
res = node->left;
|
||||
pfree(node);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
QueryItem *
|
||||
clean_fakeval(QueryItem * ptr, int4 *len)
|
||||
{
|
||||
NODE *root = maketree(ptr);
|
||||
char result = V_UNKNOWN;
|
||||
NODE *resroot;
|
||||
|
||||
resroot = clean_fakeval_intree(root, &result);
|
||||
if (result != V_UNKNOWN)
|
||||
{
|
||||
elog(NOTICE, "query contains only stopword(s) or doesn't contain lexeme(s), ignored");
|
||||
*len = 0;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return plaintree(resroot, len);
|
||||
}
|
||||
259
src/backend/utils/adt/tsquery_gist.c
Normal file
259
src/backend/utils/adt/tsquery_gist.c
Normal file
@@ -0,0 +1,259 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* tsquery_gist.c
|
||||
* GiST index support for tsquery
|
||||
*
|
||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_gist.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/skey.h"
|
||||
#include "access/gist.h"
|
||||
#include "tsearch/ts_type.h"
|
||||
#include "tsearch/ts_utils.h"
|
||||
|
||||
#define GETENTRY(vec,pos) ((TSQuerySign *) DatumGetPointer((vec)->vector[(pos)].key))
|
||||
|
||||
Datum
|
||||
gtsquery_compress(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
|
||||
GISTENTRY *retval = entry;
|
||||
|
||||
if (entry->leafkey)
|
||||
{
|
||||
TSQuerySign *sign = (TSQuerySign *) palloc(sizeof(TSQuerySign));
|
||||
|
||||
retval = (GISTENTRY *) palloc(sizeof(GISTENTRY));
|
||||
*sign = makeTSQuerySign(DatumGetTSQuery(entry->key));
|
||||
|
||||
gistentryinit(*retval, PointerGetDatum(sign),
|
||||
entry->rel, entry->page,
|
||||
entry->offset, FALSE);
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(retval);
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsquery_decompress(PG_FUNCTION_ARGS)
|
||||
{
|
||||
PG_RETURN_DATUM(PG_GETARG_DATUM(0));
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsquery_consistent(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GISTENTRY *entry = (GISTENTRY *) PG_GETARG_POINTER(0);
|
||||
TSQuerySign *key = (TSQuerySign *) DatumGetPointer(entry->key);
|
||||
TSQuery query = PG_GETARG_TSQUERY(1);
|
||||
StrategyNumber strategy = (StrategyNumber) PG_GETARG_UINT16(2);
|
||||
TSQuerySign sq = makeTSQuerySign(query);
|
||||
bool retval;
|
||||
|
||||
switch (strategy)
|
||||
{
|
||||
case RTContainsStrategyNumber:
|
||||
if (GIST_LEAF(entry))
|
||||
retval = (*key & sq) == sq;
|
||||
else
|
||||
retval = (*key & sq) != 0;
|
||||
break;
|
||||
case RTContainedByStrategyNumber:
|
||||
if (GIST_LEAF(entry))
|
||||
retval = (*key & sq) == *key;
|
||||
else
|
||||
retval = (*key & sq) != 0;
|
||||
break;
|
||||
default:
|
||||
retval = FALSE;
|
||||
}
|
||||
PG_RETURN_BOOL(retval);
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsquery_union(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
|
||||
int *size = (int *) PG_GETARG_POINTER(1);
|
||||
TSQuerySign *sign = (TSQuerySign *) palloc(sizeof(TSQuerySign));
|
||||
int i;
|
||||
|
||||
memset(sign, 0, sizeof(TSQuerySign));
|
||||
|
||||
for (i = 0; i < entryvec->n; i++)
|
||||
*sign |= *GETENTRY(entryvec, i);
|
||||
|
||||
*size = sizeof(TSQuerySign);
|
||||
|
||||
PG_RETURN_POINTER(sign);
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsquery_same(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSQuerySign *a = (TSQuerySign *) PG_GETARG_POINTER(0);
|
||||
TSQuerySign *b = (TSQuerySign *) PG_GETARG_POINTER(1);
|
||||
|
||||
PG_RETURN_POINTER(*a == *b);
|
||||
}
|
||||
|
||||
static int
|
||||
sizebitvec(TSQuerySign sign)
|
||||
{
|
||||
int size = 0,
|
||||
i;
|
||||
|
||||
for (i = 0; i < TSQS_SIGLEN; i++)
|
||||
size += 0x01 & (sign >> i);
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
static int
|
||||
hemdist(TSQuerySign a, TSQuerySign b)
|
||||
{
|
||||
TSQuerySign res = a ^ b;
|
||||
|
||||
return sizebitvec(res);
|
||||
}
|
||||
|
||||
Datum
|
||||
gtsquery_penalty(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSQuerySign *origval = (TSQuerySign *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(0))->key);
|
||||
TSQuerySign *newval = (TSQuerySign *) DatumGetPointer(((GISTENTRY *) PG_GETARG_POINTER(1))->key);
|
||||
float *penalty = (float *) PG_GETARG_POINTER(2);
|
||||
|
||||
*penalty = hemdist(*origval, *newval);
|
||||
|
||||
PG_RETURN_POINTER(penalty);
|
||||
}
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
OffsetNumber pos;
|
||||
int4 cost;
|
||||
} SPLITCOST;
|
||||
|
||||
static int
|
||||
comparecost(const void *a, const void *b)
|
||||
{
|
||||
if (((SPLITCOST *) a)->cost == ((SPLITCOST *) b)->cost)
|
||||
return 0;
|
||||
else
|
||||
return (((SPLITCOST *) a)->cost > ((SPLITCOST *) b)->cost) ? 1 : -1;
|
||||
}
|
||||
|
||||
#define WISH_F(a,b,c) (double)( -(double)(((a)-(b))*((a)-(b))*((a)-(b)))*(c) )
|
||||
|
||||
Datum
|
||||
gtsquery_picksplit(PG_FUNCTION_ARGS)
|
||||
{
|
||||
GistEntryVector *entryvec = (GistEntryVector *) PG_GETARG_POINTER(0);
|
||||
GIST_SPLITVEC *v = (GIST_SPLITVEC *) PG_GETARG_POINTER(1);
|
||||
OffsetNumber maxoff = entryvec->n - 2;
|
||||
OffsetNumber k,
|
||||
j;
|
||||
|
||||
TSQuerySign *datum_l,
|
||||
*datum_r;
|
||||
int4 size_alpha,
|
||||
size_beta;
|
||||
int4 size_waste,
|
||||
waste = -1;
|
||||
int4 nbytes;
|
||||
OffsetNumber seed_1 = 0,
|
||||
seed_2 = 0;
|
||||
OffsetNumber *left,
|
||||
*right;
|
||||
|
||||
SPLITCOST *costvector;
|
||||
|
||||
nbytes = (maxoff + 2) * sizeof(OffsetNumber);
|
||||
left = v->spl_left = (OffsetNumber *) palloc(nbytes);
|
||||
right = v->spl_right = (OffsetNumber *) palloc(nbytes);
|
||||
v->spl_nleft = v->spl_nright = 0;
|
||||
|
||||
for (k = FirstOffsetNumber; k < maxoff; k = OffsetNumberNext(k))
|
||||
for (j = OffsetNumberNext(k); j <= maxoff; j = OffsetNumberNext(j))
|
||||
{
|
||||
size_waste = hemdist(*GETENTRY(entryvec, j), *GETENTRY(entryvec, k));
|
||||
if (size_waste > waste)
|
||||
{
|
||||
waste = size_waste;
|
||||
seed_1 = k;
|
||||
seed_2 = j;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (seed_1 == 0 || seed_2 == 0)
|
||||
{
|
||||
seed_1 = 1;
|
||||
seed_2 = 2;
|
||||
}
|
||||
|
||||
datum_l = (TSQuerySign *) palloc(sizeof(TSQuerySign));
|
||||
*datum_l = *GETENTRY(entryvec, seed_1);
|
||||
datum_r = (TSQuerySign *) palloc(sizeof(TSQuerySign));
|
||||
*datum_r = *GETENTRY(entryvec, seed_2);
|
||||
|
||||
|
||||
maxoff = OffsetNumberNext(maxoff);
|
||||
costvector = (SPLITCOST *) palloc(sizeof(SPLITCOST) * maxoff);
|
||||
for (j = FirstOffsetNumber; j <= maxoff; j = OffsetNumberNext(j))
|
||||
{
|
||||
costvector[j - 1].pos = j;
|
||||
size_alpha = hemdist(*GETENTRY(entryvec, seed_1), *GETENTRY(entryvec, j));
|
||||
size_beta = hemdist(*GETENTRY(entryvec, seed_2), *GETENTRY(entryvec, j));
|
||||
costvector[j - 1].cost = abs(size_alpha - size_beta);
|
||||
}
|
||||
qsort((void *) costvector, maxoff, sizeof(SPLITCOST), comparecost);
|
||||
|
||||
for (k = 0; k < maxoff; k++)
|
||||
{
|
||||
j = costvector[k].pos;
|
||||
if (j == seed_1)
|
||||
{
|
||||
*left++ = j;
|
||||
v->spl_nleft++;
|
||||
continue;
|
||||
}
|
||||
else if (j == seed_2)
|
||||
{
|
||||
*right++ = j;
|
||||
v->spl_nright++;
|
||||
continue;
|
||||
}
|
||||
size_alpha = hemdist(*datum_l, *GETENTRY(entryvec, j));
|
||||
size_beta = hemdist(*datum_r, *GETENTRY(entryvec, j));
|
||||
|
||||
if (size_alpha < size_beta + WISH_F(v->spl_nleft, v->spl_nright, 0.05))
|
||||
{
|
||||
*datum_l |= *GETENTRY(entryvec, j);
|
||||
*left++ = j;
|
||||
v->spl_nleft++;
|
||||
}
|
||||
else
|
||||
{
|
||||
*datum_r |= *GETENTRY(entryvec, j);
|
||||
*right++ = j;
|
||||
v->spl_nright++;
|
||||
}
|
||||
}
|
||||
|
||||
*right = *left = FirstOffsetNumber;
|
||||
v->spl_ldatum = PointerGetDatum(datum_l);
|
||||
v->spl_rdatum = PointerGetDatum(datum_r);
|
||||
|
||||
PG_RETURN_POINTER(v);
|
||||
}
|
||||
289
src/backend/utils/adt/tsquery_op.c
Normal file
289
src/backend/utils/adt/tsquery_op.c
Normal file
@@ -0,0 +1,289 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* tsquery_op.c
|
||||
* Various operations with tsquery
|
||||
*
|
||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_op.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "tsearch/ts_type.h"
|
||||
#include "tsearch/ts_locale.h"
|
||||
#include "tsearch/ts_utils.h"
|
||||
#include "utils/pg_crc.h"
|
||||
|
||||
Datum
|
||||
tsquery_numnode(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSQuery query = PG_GETARG_TSQUERY(0);
|
||||
int nnode = query->size;
|
||||
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
PG_RETURN_INT32(nnode);
|
||||
}
|
||||
|
||||
static QTNode *
|
||||
join_tsqueries(TSQuery a, TSQuery b)
|
||||
{
|
||||
QTNode *res = (QTNode *) palloc0(sizeof(QTNode));
|
||||
|
||||
res->flags |= QTN_NEEDFREE;
|
||||
|
||||
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
|
||||
res->valnode->type = OPR;
|
||||
|
||||
res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
|
||||
res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b));
|
||||
res->child[1] = QT2QTN(GETQUERY(a), GETOPERAND(a));
|
||||
res->nchild = 2;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
Datum
|
||||
tsquery_and(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSQuery a = PG_GETARG_TSQUERY_COPY(0);
|
||||
TSQuery b = PG_GETARG_TSQUERY_COPY(1);
|
||||
QTNode *res;
|
||||
TSQuery query;
|
||||
|
||||
if (a->size == 0)
|
||||
{
|
||||
PG_FREE_IF_COPY(a, 1);
|
||||
PG_RETURN_POINTER(b);
|
||||
}
|
||||
else if (b->size == 0)
|
||||
{
|
||||
PG_FREE_IF_COPY(b, 1);
|
||||
PG_RETURN_POINTER(a);
|
||||
}
|
||||
|
||||
res = join_tsqueries(a, b);
|
||||
|
||||
res->valnode->val = '&';
|
||||
|
||||
query = QTN2QT(res);
|
||||
|
||||
QTNFree(res);
|
||||
PG_FREE_IF_COPY(a, 0);
|
||||
PG_FREE_IF_COPY(b, 1);
|
||||
|
||||
PG_RETURN_TSQUERY(query);
|
||||
}
|
||||
|
||||
Datum
|
||||
tsquery_or(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSQuery a = PG_GETARG_TSQUERY_COPY(0);
|
||||
TSQuery b = PG_GETARG_TSQUERY_COPY(1);
|
||||
QTNode *res;
|
||||
TSQuery query;
|
||||
|
||||
if (a->size == 0)
|
||||
{
|
||||
PG_FREE_IF_COPY(a, 1);
|
||||
PG_RETURN_POINTER(b);
|
||||
}
|
||||
else if (b->size == 0)
|
||||
{
|
||||
PG_FREE_IF_COPY(b, 1);
|
||||
PG_RETURN_POINTER(a);
|
||||
}
|
||||
|
||||
res = join_tsqueries(a, b);
|
||||
|
||||
res->valnode->val = '|';
|
||||
|
||||
query = QTN2QT(res);
|
||||
|
||||
QTNFree(res);
|
||||
PG_FREE_IF_COPY(a, 0);
|
||||
PG_FREE_IF_COPY(b, 1);
|
||||
|
||||
PG_RETURN_POINTER(query);
|
||||
}
|
||||
|
||||
Datum
|
||||
tsquery_not(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSQuery a = PG_GETARG_TSQUERY_COPY(0);
|
||||
QTNode *res;
|
||||
TSQuery query;
|
||||
|
||||
if (a->size == 0)
|
||||
PG_RETURN_POINTER(a);
|
||||
|
||||
res = (QTNode *) palloc0(sizeof(QTNode));
|
||||
|
||||
res->flags |= QTN_NEEDFREE;
|
||||
|
||||
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
|
||||
res->valnode->type = OPR;
|
||||
res->valnode->val = '!';
|
||||
|
||||
res->child = (QTNode **) palloc0(sizeof(QTNode *));
|
||||
res->child[0] = QT2QTN(GETQUERY(a), GETOPERAND(a));
|
||||
res->nchild = 1;
|
||||
|
||||
query = QTN2QT(res);
|
||||
|
||||
QTNFree(res);
|
||||
PG_FREE_IF_COPY(a, 0);
|
||||
|
||||
PG_RETURN_POINTER(query);
|
||||
}
|
||||
|
||||
static int
|
||||
CompareTSQ(TSQuery a, TSQuery b)
|
||||
{
|
||||
if (a->size != b->size)
|
||||
{
|
||||
return (a->size < b->size) ? -1 : 1;
|
||||
}
|
||||
else if (VARSIZE(a) != VARSIZE(b))
|
||||
{
|
||||
return (VARSIZE(a) < VARSIZE(b)) ? -1 : 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
QTNode *an = QT2QTN(GETQUERY(a), GETOPERAND(a));
|
||||
QTNode *bn = QT2QTN(GETQUERY(b), GETOPERAND(b));
|
||||
int res = QTNodeCompare(an, bn);
|
||||
|
||||
QTNFree(an);
|
||||
QTNFree(bn);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Datum
|
||||
tsquery_cmp(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSQuery a = PG_GETARG_TSQUERY_COPY(0);
|
||||
TSQuery b = PG_GETARG_TSQUERY_COPY(1);
|
||||
int res = CompareTSQ(a, b);
|
||||
|
||||
PG_FREE_IF_COPY(a, 0);
|
||||
PG_FREE_IF_COPY(b, 1);
|
||||
|
||||
PG_RETURN_INT32(res);
|
||||
}
|
||||
|
||||
#define CMPFUNC( NAME, CONDITION ) \
|
||||
Datum \
|
||||
NAME(PG_FUNCTION_ARGS) { \
|
||||
TSQuery a = PG_GETARG_TSQUERY_COPY(0); \
|
||||
TSQuery b = PG_GETARG_TSQUERY_COPY(1); \
|
||||
int res = CompareTSQ(a,b); \
|
||||
\
|
||||
PG_FREE_IF_COPY(a,0); \
|
||||
PG_FREE_IF_COPY(b,1); \
|
||||
\
|
||||
PG_RETURN_BOOL( CONDITION ); \
|
||||
}
|
||||
|
||||
CMPFUNC(tsquery_lt, res < 0);
|
||||
CMPFUNC(tsquery_le, res <= 0);
|
||||
CMPFUNC(tsquery_eq, res == 0);
|
||||
CMPFUNC(tsquery_ge, res >= 0);
|
||||
CMPFUNC(tsquery_gt, res > 0);
|
||||
CMPFUNC(tsquery_ne, res != 0);
|
||||
|
||||
TSQuerySign
|
||||
makeTSQuerySign(TSQuery a)
|
||||
{
|
||||
int i;
|
||||
QueryItem *ptr = GETQUERY(a);
|
||||
TSQuerySign sign = 0;
|
||||
|
||||
for (i = 0; i < a->size; i++)
|
||||
{
|
||||
if (ptr->type == VAL)
|
||||
sign |= ((TSQuerySign) 1) << (ptr->val % TSQS_SIGLEN);
|
||||
ptr++;
|
||||
}
|
||||
|
||||
return sign;
|
||||
}
|
||||
|
||||
Datum
|
||||
tsq_mcontains(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSQuery query = PG_GETARG_TSQUERY(0);
|
||||
TSQuery ex = PG_GETARG_TSQUERY(1);
|
||||
TSQuerySign sq,
|
||||
se;
|
||||
int i,
|
||||
j;
|
||||
QueryItem *iq,
|
||||
*ie;
|
||||
|
||||
if (query->size < ex->size)
|
||||
{
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
PG_FREE_IF_COPY(ex, 1);
|
||||
|
||||
PG_RETURN_BOOL(false);
|
||||
}
|
||||
|
||||
sq = makeTSQuerySign(query);
|
||||
se = makeTSQuerySign(ex);
|
||||
|
||||
if ((sq & se) != se)
|
||||
{
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
PG_FREE_IF_COPY(ex, 1);
|
||||
|
||||
PG_RETURN_BOOL(false);
|
||||
}
|
||||
|
||||
ie = GETQUERY(ex);
|
||||
|
||||
for (i = 0; i < ex->size; i++)
|
||||
{
|
||||
iq = GETQUERY(query);
|
||||
if (ie[i].type != VAL)
|
||||
continue;
|
||||
for (j = 0; j < query->size; j++)
|
||||
if (iq[j].type == VAL && ie[i].val == iq[j].val)
|
||||
{
|
||||
j = query->size + 1;
|
||||
break;
|
||||
}
|
||||
if (j == query->size)
|
||||
{
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
PG_FREE_IF_COPY(ex, 1);
|
||||
|
||||
PG_RETURN_BOOL(false);
|
||||
}
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
PG_FREE_IF_COPY(ex, 1);
|
||||
|
||||
PG_RETURN_BOOL(true);
|
||||
}
|
||||
|
||||
Datum
|
||||
tsq_mcontained(PG_FUNCTION_ARGS)
|
||||
{
|
||||
PG_RETURN_DATUM(
|
||||
DirectFunctionCall2(
|
||||
tsq_mcontains,
|
||||
PG_GETARG_DATUM(1),
|
||||
PG_GETARG_DATUM(0)
|
||||
)
|
||||
);
|
||||
}
|
||||
524
src/backend/utils/adt/tsquery_rewrite.c
Normal file
524
src/backend/utils/adt/tsquery_rewrite.c
Normal file
@@ -0,0 +1,524 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* tsquery_rewrite.c
|
||||
* Utilities for reconstructing tsquery
|
||||
*
|
||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_rewrite.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "executor/spi.h"
|
||||
#include "tsearch/ts_type.h"
|
||||
#include "tsearch/ts_utils.h"
|
||||
|
||||
|
||||
static int
|
||||
addone(int *counters, int last, int total)
|
||||
{
|
||||
counters[last]++;
|
||||
if (counters[last] >= total)
|
||||
{
|
||||
if (last == 0)
|
||||
return 0;
|
||||
if (addone(counters, last - 1, total - 1) == 0)
|
||||
return 0;
|
||||
counters[last] = counters[last - 1] + 1;
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static QTNode *
|
||||
findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind)
|
||||
{
|
||||
|
||||
if ((node->sign & ex->sign) != ex->sign || node->valnode->type != ex->valnode->type || node->valnode->val != ex->valnode->val)
|
||||
return node;
|
||||
|
||||
if (node->flags & QTN_NOCHANGE)
|
||||
return node;
|
||||
|
||||
if (node->valnode->type == OPR)
|
||||
{
|
||||
if (node->nchild == ex->nchild)
|
||||
{
|
||||
if (QTNEq(node, ex))
|
||||
{
|
||||
QTNFree(node);
|
||||
if (subs)
|
||||
{
|
||||
node = QTNCopy(subs);
|
||||
node->flags |= QTN_NOCHANGE;
|
||||
}
|
||||
else
|
||||
node = NULL;
|
||||
*isfind = true;
|
||||
}
|
||||
}
|
||||
else if (node->nchild > ex->nchild)
|
||||
{
|
||||
int *counters = (int *) palloc(sizeof(int) * node->nchild);
|
||||
int i;
|
||||
QTNode *tnode = (QTNode *) palloc(sizeof(QTNode));
|
||||
|
||||
memset(tnode, 0, sizeof(QTNode));
|
||||
tnode->child = (QTNode **) palloc(sizeof(QTNode *) * ex->nchild);
|
||||
tnode->nchild = ex->nchild;
|
||||
tnode->valnode = (QueryItem *) palloc(sizeof(QueryItem));
|
||||
*(tnode->valnode) = *(ex->valnode);
|
||||
|
||||
for (i = 0; i < ex->nchild; i++)
|
||||
counters[i] = i;
|
||||
|
||||
do
|
||||
{
|
||||
tnode->sign = 0;
|
||||
for (i = 0; i < ex->nchild; i++)
|
||||
{
|
||||
tnode->child[i] = node->child[counters[i]];
|
||||
tnode->sign |= tnode->child[i]->sign;
|
||||
}
|
||||
|
||||
if (QTNEq(tnode, ex))
|
||||
{
|
||||
int j = 0;
|
||||
|
||||
pfree(tnode->valnode);
|
||||
pfree(tnode->child);
|
||||
pfree(tnode);
|
||||
if (subs)
|
||||
{
|
||||
tnode = QTNCopy(subs);
|
||||
tnode->flags = QTN_NOCHANGE | QTN_NEEDFREE;
|
||||
}
|
||||
else
|
||||
tnode = NULL;
|
||||
|
||||
node->child[counters[0]] = tnode;
|
||||
|
||||
for (i = 1; i < ex->nchild; i++)
|
||||
node->child[counters[i]] = NULL;
|
||||
for (i = 0; i < node->nchild; i++)
|
||||
{
|
||||
if (node->child[i])
|
||||
{
|
||||
node->child[j] = node->child[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
node->nchild = j;
|
||||
|
||||
*isfind = true;
|
||||
|
||||
break;
|
||||
}
|
||||
} while (addone(counters, ex->nchild - 1, node->nchild));
|
||||
if (tnode && (tnode->flags & QTN_NOCHANGE) == 0)
|
||||
{
|
||||
pfree(tnode->valnode);
|
||||
pfree(tnode->child);
|
||||
pfree(tnode);
|
||||
}
|
||||
else
|
||||
QTNSort(node);
|
||||
pfree(counters);
|
||||
}
|
||||
}
|
||||
else if (QTNEq(node, ex))
|
||||
{
|
||||
QTNFree(node);
|
||||
if (subs)
|
||||
{
|
||||
node = QTNCopy(subs);
|
||||
node->flags |= QTN_NOCHANGE;
|
||||
}
|
||||
else
|
||||
{
|
||||
node = NULL;
|
||||
}
|
||||
*isfind = true;
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
static QTNode *
|
||||
dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
|
||||
{
|
||||
root = findeq(root, ex, subs, isfind);
|
||||
|
||||
if (root && (root->flags & QTN_NOCHANGE) == 0 && root->valnode->type == OPR)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < root->nchild; i++)
|
||||
root->child[i] = dofindsubquery(root->child[i], ex, subs, isfind);
|
||||
}
|
||||
|
||||
return root;
|
||||
}
|
||||
|
||||
static QTNode *
|
||||
dropvoidsubtree(QTNode * root)
|
||||
{
|
||||
|
||||
if (!root)
|
||||
return NULL;
|
||||
|
||||
if (root->valnode->type == OPR)
|
||||
{
|
||||
int i,
|
||||
j = 0;
|
||||
|
||||
for (i = 0; i < root->nchild; i++)
|
||||
{
|
||||
if (root->child[i])
|
||||
{
|
||||
root->child[j] = root->child[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
root->nchild = j;
|
||||
|
||||
if (root->valnode->val == (int4) '!' && root->nchild == 0)
|
||||
{
|
||||
QTNFree(root);
|
||||
root = NULL;
|
||||
}
|
||||
else if (root->nchild == 1)
|
||||
{
|
||||
QTNode *nroot = root->child[0];
|
||||
|
||||
pfree(root);
|
||||
root = nroot;
|
||||
}
|
||||
}
|
||||
|
||||
return root;
|
||||
}
|
||||
|
||||
static QTNode *
|
||||
findsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind)
|
||||
{
|
||||
bool DidFind = false;
|
||||
|
||||
root = dofindsubquery(root, ex, subs, &DidFind);
|
||||
|
||||
if (!subs && DidFind)
|
||||
root = dropvoidsubtree(root);
|
||||
|
||||
if (isfind)
|
||||
*isfind = DidFind;
|
||||
|
||||
return root;
|
||||
}
|
||||
|
||||
Datum
|
||||
ts_rewrite_accum(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSQuery acc;
|
||||
ArrayType *qa;
|
||||
TSQuery q;
|
||||
QTNode *qex = NULL,
|
||||
*subs = NULL,
|
||||
*acctree = NULL;
|
||||
bool isfind = false;
|
||||
Datum *elemsp;
|
||||
int nelemsp;
|
||||
MemoryContext aggcontext;
|
||||
MemoryContext oldcontext;
|
||||
|
||||
aggcontext = ((AggState *) fcinfo->context)->aggcontext;
|
||||
|
||||
if (PG_ARGISNULL(0) || PG_GETARG_POINTER(0) == NULL)
|
||||
{
|
||||
acc = (TSQuery) MemoryContextAlloc(aggcontext, HDRSIZETQ);
|
||||
SET_VARSIZE(acc, HDRSIZETQ);
|
||||
acc->size = 0;
|
||||
}
|
||||
else
|
||||
acc = PG_GETARG_TSQUERY(0);
|
||||
|
||||
if (PG_ARGISNULL(1) || PG_GETARG_POINTER(1) == NULL)
|
||||
PG_RETURN_TSQUERY(acc);
|
||||
else
|
||||
qa = PG_GETARG_ARRAYTYPE_P_COPY(1);
|
||||
|
||||
if (ARR_NDIM(qa) != 1)
|
||||
elog(ERROR, "array must be one-dimensional, not %d dimensions",
|
||||
ARR_NDIM(qa));
|
||||
if (ArrayGetNItems(ARR_NDIM(qa), ARR_DIMS(qa)) != 3)
|
||||
elog(ERROR, "array should have only three elements");
|
||||
if (ARR_ELEMTYPE(qa) != TSQUERYOID)
|
||||
elog(ERROR, "array should contain tsquery type");
|
||||
|
||||
deconstruct_array(qa, TSQUERYOID, -1, false, 'i', &elemsp, NULL, &nelemsp);
|
||||
|
||||
q = DatumGetTSQuery(elemsp[0]);
|
||||
if (q->size == 0)
|
||||
{
|
||||
pfree(elemsp);
|
||||
PG_RETURN_POINTER(acc);
|
||||
}
|
||||
|
||||
if (!acc->size)
|
||||
{
|
||||
if (VARSIZE(acc) > HDRSIZETQ)
|
||||
{
|
||||
pfree(elemsp);
|
||||
PG_RETURN_POINTER(acc);
|
||||
}
|
||||
else
|
||||
acctree = QT2QTN(GETQUERY(q), GETOPERAND(q));
|
||||
}
|
||||
else
|
||||
acctree = QT2QTN(GETQUERY(acc), GETOPERAND(acc));
|
||||
|
||||
QTNTernary(acctree);
|
||||
QTNSort(acctree);
|
||||
|
||||
q = DatumGetTSQuery(elemsp[1]);
|
||||
if (q->size == 0)
|
||||
{
|
||||
pfree(elemsp);
|
||||
PG_RETURN_POINTER(acc);
|
||||
}
|
||||
qex = QT2QTN(GETQUERY(q), GETOPERAND(q));
|
||||
QTNTernary(qex);
|
||||
QTNSort(qex);
|
||||
|
||||
q = DatumGetTSQuery(elemsp[2]);
|
||||
if (q->size)
|
||||
subs = QT2QTN(GETQUERY(q), GETOPERAND(q));
|
||||
|
||||
acctree = findsubquery(acctree, qex, subs, &isfind);
|
||||
|
||||
if (isfind || !acc->size)
|
||||
{
|
||||
/* pfree( acc ); do not pfree(p), because nodeAgg.c will */
|
||||
if (acctree)
|
||||
{
|
||||
QTNBinary(acctree);
|
||||
oldcontext = MemoryContextSwitchTo(aggcontext);
|
||||
acc = QTN2QT(acctree);
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
}
|
||||
else
|
||||
{
|
||||
acc = (TSQuery) MemoryContextAlloc(aggcontext, HDRSIZETQ);
|
||||
SET_VARSIZE(acc, HDRSIZETQ);
|
||||
acc->size = 0;
|
||||
}
|
||||
}
|
||||
|
||||
pfree(elemsp);
|
||||
QTNFree(qex);
|
||||
QTNFree(subs);
|
||||
QTNFree(acctree);
|
||||
|
||||
PG_RETURN_TSQUERY(acc);
|
||||
}
|
||||
|
||||
Datum
|
||||
ts_rewrite_finish(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSQuery acc = PG_GETARG_TSQUERY(0);
|
||||
TSQuery rewrited;
|
||||
|
||||
if (acc == NULL || PG_ARGISNULL(0) || acc->size == 0)
|
||||
{
|
||||
rewrited = (TSQuery) palloc(HDRSIZETQ);
|
||||
SET_VARSIZE(rewrited, HDRSIZETQ);
|
||||
rewrited->size = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
rewrited = (TSQuery) palloc(VARSIZE(acc));
|
||||
memcpy(rewrited, acc, VARSIZE(acc));
|
||||
pfree(acc);
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(rewrited);
|
||||
}
|
||||
|
||||
Datum
|
||||
tsquery_rewrite(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSQuery query = PG_GETARG_TSQUERY_COPY(0);
|
||||
text *in = PG_GETARG_TEXT_P(1);
|
||||
TSQuery rewrited = query;
|
||||
MemoryContext outercontext = CurrentMemoryContext;
|
||||
MemoryContext oldcontext;
|
||||
QTNode *tree;
|
||||
char *buf;
|
||||
void *plan;
|
||||
Portal portal;
|
||||
bool isnull;
|
||||
int i;
|
||||
|
||||
if (query->size == 0)
|
||||
{
|
||||
PG_FREE_IF_COPY(in, 1);
|
||||
PG_RETURN_POINTER(rewrited);
|
||||
}
|
||||
|
||||
tree = QT2QTN(GETQUERY(query), GETOPERAND(query));
|
||||
QTNTernary(tree);
|
||||
QTNSort(tree);
|
||||
|
||||
buf = TextPGetCString(in);
|
||||
|
||||
SPI_connect();
|
||||
|
||||
if ((plan = SPI_prepare(buf, 0, NULL)) == NULL)
|
||||
elog(ERROR, "SPI_prepare(\"%s\") failed", buf);
|
||||
|
||||
if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, false)) == NULL)
|
||||
elog(ERROR, "SPI_cursor_open(\"%s\") failed", buf);
|
||||
|
||||
SPI_cursor_fetch(portal, true, 100);
|
||||
|
||||
if (SPI_tuptable->tupdesc->natts != 2 ||
|
||||
SPI_gettypeid(SPI_tuptable->tupdesc, 1) != TSQUERYOID ||
|
||||
SPI_gettypeid(SPI_tuptable->tupdesc, 2) != TSQUERYOID)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("ts_rewrite query must return two tsquery columns")));
|
||||
|
||||
while (SPI_processed > 0 && tree)
|
||||
{
|
||||
for (i = 0; i < SPI_processed && tree; i++)
|
||||
{
|
||||
Datum qdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull);
|
||||
Datum sdata;
|
||||
|
||||
if (isnull)
|
||||
continue;
|
||||
|
||||
sdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull);
|
||||
|
||||
if (!isnull)
|
||||
{
|
||||
TSQuery qtex = DatumGetTSQuery(qdata);
|
||||
TSQuery qtsubs = DatumGetTSQuery(sdata);
|
||||
QTNode *qex,
|
||||
*qsubs = NULL;
|
||||
|
||||
if (qtex->size == 0)
|
||||
{
|
||||
if (qtex != (TSQuery) DatumGetPointer(qdata))
|
||||
pfree(qtex);
|
||||
if (qtsubs != (TSQuery) DatumGetPointer(sdata))
|
||||
pfree(qtsubs);
|
||||
continue;
|
||||
}
|
||||
|
||||
qex = QT2QTN(GETQUERY(qtex), GETOPERAND(qtex));
|
||||
|
||||
QTNTernary(qex);
|
||||
QTNSort(qex);
|
||||
|
||||
if (qtsubs->size)
|
||||
qsubs = QT2QTN(GETQUERY(qtsubs), GETOPERAND(qtsubs));
|
||||
|
||||
oldcontext = MemoryContextSwitchTo(outercontext);
|
||||
tree = findsubquery(tree, qex, qsubs, NULL);
|
||||
MemoryContextSwitchTo(oldcontext);
|
||||
|
||||
QTNFree(qex);
|
||||
if (qtex != (TSQuery) DatumGetPointer(qdata))
|
||||
pfree(qtex);
|
||||
QTNFree(qsubs);
|
||||
if (qtsubs != (TSQuery) DatumGetPointer(sdata))
|
||||
pfree(qtsubs);
|
||||
}
|
||||
}
|
||||
|
||||
SPI_freetuptable(SPI_tuptable);
|
||||
SPI_cursor_fetch(portal, true, 100);
|
||||
}
|
||||
|
||||
SPI_freetuptable(SPI_tuptable);
|
||||
SPI_cursor_close(portal);
|
||||
SPI_freeplan(plan);
|
||||
SPI_finish();
|
||||
|
||||
if (tree)
|
||||
{
|
||||
QTNBinary(tree);
|
||||
rewrited = QTN2QT(tree);
|
||||
QTNFree(tree);
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
SET_VARSIZE(rewrited, HDRSIZETQ);
|
||||
rewrited->size = 0;
|
||||
}
|
||||
|
||||
pfree(buf);
|
||||
PG_FREE_IF_COPY(in, 1);
|
||||
PG_RETURN_POINTER(rewrited);
|
||||
}
|
||||
|
||||
Datum
|
||||
tsquery_rewrite_query(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSQuery query = PG_GETARG_TSQUERY_COPY(0);
|
||||
TSQuery ex = PG_GETARG_TSQUERY(1);
|
||||
TSQuery subst = PG_GETARG_TSQUERY(2);
|
||||
TSQuery rewrited = query;
|
||||
QTNode *tree,
|
||||
*qex,
|
||||
*subs = NULL;
|
||||
|
||||
if (query->size == 0 || ex->size == 0)
|
||||
{
|
||||
PG_FREE_IF_COPY(ex, 1);
|
||||
PG_FREE_IF_COPY(subst, 2);
|
||||
PG_RETURN_POINTER(rewrited);
|
||||
}
|
||||
|
||||
tree = QT2QTN(GETQUERY(query), GETOPERAND(query));
|
||||
QTNTernary(tree);
|
||||
QTNSort(tree);
|
||||
|
||||
qex = QT2QTN(GETQUERY(ex), GETOPERAND(ex));
|
||||
QTNTernary(qex);
|
||||
QTNSort(qex);
|
||||
|
||||
if (subst->size)
|
||||
subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst));
|
||||
|
||||
tree = findsubquery(tree, qex, subs, NULL);
|
||||
QTNFree(qex);
|
||||
QTNFree(subs);
|
||||
|
||||
if (!tree)
|
||||
{
|
||||
SET_VARSIZE(rewrited, HDRSIZETQ);
|
||||
rewrited->size = 0;
|
||||
PG_FREE_IF_COPY(ex, 1);
|
||||
PG_FREE_IF_COPY(subst, 2);
|
||||
PG_RETURN_POINTER(rewrited);
|
||||
}
|
||||
else
|
||||
{
|
||||
QTNBinary(tree);
|
||||
rewrited = QTN2QT(tree);
|
||||
QTNFree(tree);
|
||||
}
|
||||
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
PG_FREE_IF_COPY(ex, 1);
|
||||
PG_FREE_IF_COPY(subst, 2);
|
||||
PG_RETURN_POINTER(rewrited);
|
||||
}
|
||||
317
src/backend/utils/adt/tsquery_util.c
Normal file
317
src/backend/utils/adt/tsquery_util.c
Normal file
@@ -0,0 +1,317 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* tsquery_util.c
|
||||
* Utilities for tsquery datatype
|
||||
*
|
||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "tsearch/ts_type.h"
|
||||
#include "tsearch/ts_utils.h"
|
||||
|
||||
|
||||
QTNode *
|
||||
QT2QTN(QueryItem * in, char *operand)
|
||||
{
|
||||
QTNode *node = (QTNode *) palloc0(sizeof(QTNode));
|
||||
|
||||
node->valnode = in;
|
||||
|
||||
if (in->type == OPR)
|
||||
{
|
||||
node->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
|
||||
node->child[0] = QT2QTN(in + 1, operand);
|
||||
node->sign = node->child[0]->sign;
|
||||
if (in->val == (int4) '!')
|
||||
node->nchild = 1;
|
||||
else
|
||||
{
|
||||
node->nchild = 2;
|
||||
node->child[1] = QT2QTN(in + in->left, operand);
|
||||
node->sign |= node->child[1]->sign;
|
||||
}
|
||||
}
|
||||
else if (operand)
|
||||
{
|
||||
node->word = operand + in->distance;
|
||||
node->sign = 1 << (in->val % 32);
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
void
|
||||
QTNFree(QTNode * in)
|
||||
{
|
||||
if (!in)
|
||||
return;
|
||||
|
||||
if (in->valnode->type == VAL && in->word && (in->flags & QTN_WORDFREE) != 0)
|
||||
pfree(in->word);
|
||||
|
||||
if (in->child)
|
||||
{
|
||||
if (in->valnode)
|
||||
{
|
||||
if (in->valnode->type == OPR && in->nchild > 0)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < in->nchild; i++)
|
||||
QTNFree(in->child[i]);
|
||||
}
|
||||
if (in->flags & QTN_NEEDFREE)
|
||||
pfree(in->valnode);
|
||||
}
|
||||
pfree(in->child);
|
||||
}
|
||||
|
||||
pfree(in);
|
||||
}
|
||||
|
||||
int
|
||||
QTNodeCompare(QTNode * an, QTNode * bn)
|
||||
{
|
||||
if (an->valnode->type != bn->valnode->type)
|
||||
return (an->valnode->type > bn->valnode->type) ? -1 : 1;
|
||||
else if (an->valnode->val != bn->valnode->val)
|
||||
return (an->valnode->val > bn->valnode->val) ? -1 : 1;
|
||||
else if (an->valnode->type == VAL)
|
||||
{
|
||||
if (an->valnode->length == bn->valnode->length)
|
||||
return strncmp(an->word, bn->word, an->valnode->length);
|
||||
else
|
||||
return (an->valnode->length > bn->valnode->length) ? -1 : 1;
|
||||
}
|
||||
else if (an->nchild != bn->nchild)
|
||||
{
|
||||
return (an->nchild > bn->nchild) ? -1 : 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i,
|
||||
res;
|
||||
|
||||
for (i = 0; i < an->nchild; i++)
|
||||
if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
|
||||
return res;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
cmpQTN(const void *a, const void *b)
|
||||
{
|
||||
return QTNodeCompare(*(QTNode **) a, *(QTNode **) b);
|
||||
}
|
||||
|
||||
void
|
||||
QTNSort(QTNode * in)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (in->valnode->type != OPR)
|
||||
return;
|
||||
|
||||
for (i = 0; i < in->nchild; i++)
|
||||
QTNSort(in->child[i]);
|
||||
if (in->nchild > 1)
|
||||
qsort((void *) in->child, in->nchild, sizeof(QTNode *), cmpQTN);
|
||||
}
|
||||
|
||||
bool
|
||||
QTNEq(QTNode * a, QTNode * b)
|
||||
{
|
||||
uint32 sign = a->sign & b->sign;
|
||||
|
||||
if (!(sign == a->sign && sign == b->sign))
|
||||
return 0;
|
||||
|
||||
return (QTNodeCompare(a, b) == 0) ? true : false;
|
||||
}
|
||||
|
||||
void
|
||||
QTNTernary(QTNode * in)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (in->valnode->type != OPR)
|
||||
return;
|
||||
|
||||
for (i = 0; i < in->nchild; i++)
|
||||
QTNTernary(in->child[i]);
|
||||
|
||||
for (i = 0; i < in->nchild; i++)
|
||||
{
|
||||
if (in->valnode->type == in->child[i]->valnode->type && in->valnode->val == in->child[i]->valnode->val)
|
||||
{
|
||||
QTNode *cc = in->child[i];
|
||||
int oldnchild = in->nchild;
|
||||
|
||||
in->nchild += cc->nchild - 1;
|
||||
in->child = (QTNode **) repalloc(in->child, in->nchild * sizeof(QTNode *));
|
||||
|
||||
if (i + 1 != oldnchild)
|
||||
memmove(in->child + i + cc->nchild, in->child + i + 1,
|
||||
(oldnchild - i - 1) * sizeof(QTNode *));
|
||||
|
||||
memcpy(in->child + i, cc->child, cc->nchild * sizeof(QTNode *));
|
||||
i += cc->nchild - 1;
|
||||
|
||||
pfree(cc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
QTNBinary(QTNode * in)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (in->valnode->type != OPR)
|
||||
return;
|
||||
|
||||
for (i = 0; i < in->nchild; i++)
|
||||
QTNBinary(in->child[i]);
|
||||
|
||||
if (in->nchild <= 2)
|
||||
return;
|
||||
|
||||
while (in->nchild > 2)
|
||||
{
|
||||
QTNode *nn = (QTNode *) palloc0(sizeof(QTNode));
|
||||
|
||||
nn->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
|
||||
nn->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
|
||||
|
||||
nn->nchild = 2;
|
||||
nn->flags = QTN_NEEDFREE;
|
||||
|
||||
nn->child[0] = in->child[0];
|
||||
nn->child[1] = in->child[1];
|
||||
nn->sign = nn->child[0]->sign | nn->child[1]->sign;
|
||||
|
||||
nn->valnode->type = in->valnode->type;
|
||||
nn->valnode->val = in->valnode->val;
|
||||
|
||||
in->child[0] = nn;
|
||||
in->child[1] = in->child[in->nchild - 1];
|
||||
in->nchild--;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
cntsize(QTNode * in, int4 *sumlen, int4 *nnode)
|
||||
{
|
||||
*nnode += 1;
|
||||
if (in->valnode->type == OPR)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < in->nchild; i++)
|
||||
cntsize(in->child[i], sumlen, nnode);
|
||||
}
|
||||
else
|
||||
{
|
||||
*sumlen += in->valnode->length + 1;
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
QueryItem *curitem;
|
||||
char *operand;
|
||||
char *curoperand;
|
||||
} QTN2QTState;
|
||||
|
||||
static void
|
||||
fillQT(QTN2QTState * state, QTNode * in)
|
||||
{
|
||||
*(state->curitem) = *(in->valnode);
|
||||
|
||||
if (in->valnode->type == VAL)
|
||||
{
|
||||
memcpy(state->curoperand, in->word, in->valnode->length);
|
||||
state->curitem->distance = state->curoperand - state->operand;
|
||||
state->curoperand[in->valnode->length] = '\0';
|
||||
state->curoperand += in->valnode->length + 1;
|
||||
state->curitem++;
|
||||
}
|
||||
else
|
||||
{
|
||||
QueryItem *curitem = state->curitem;
|
||||
|
||||
Assert(in->nchild <= 2);
|
||||
state->curitem++;
|
||||
|
||||
fillQT(state, in->child[0]);
|
||||
|
||||
if (in->nchild == 2)
|
||||
{
|
||||
curitem->left = state->curitem - curitem;
|
||||
fillQT(state, in->child[1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TSQuery
|
||||
QTN2QT(QTNode *in)
|
||||
{
|
||||
TSQuery out;
|
||||
int len;
|
||||
int sumlen = 0,
|
||||
nnode = 0;
|
||||
QTN2QTState state;
|
||||
|
||||
cntsize(in, &sumlen, &nnode);
|
||||
len = COMPUTESIZE(nnode, sumlen);
|
||||
|
||||
out = (TSQuery) palloc(len);
|
||||
SET_VARSIZE(out, len);
|
||||
out->size = nnode;
|
||||
|
||||
state.curitem = GETQUERY(out);
|
||||
state.operand = state.curoperand = GETOPERAND(out);
|
||||
|
||||
fillQT(&state, in);
|
||||
return out;
|
||||
}
|
||||
|
||||
QTNode *
|
||||
QTNCopy(QTNode *in)
|
||||
{
|
||||
QTNode *out = (QTNode *) palloc(sizeof(QTNode));
|
||||
|
||||
*out = *in;
|
||||
out->valnode = (QueryItem *) palloc(sizeof(QueryItem));
|
||||
*(out->valnode) = *(in->valnode);
|
||||
out->flags |= QTN_NEEDFREE;
|
||||
|
||||
if (in->valnode->type == VAL)
|
||||
{
|
||||
out->word = palloc(in->valnode->length + 1);
|
||||
memcpy(out->word, in->word, in->valnode->length);
|
||||
out->word[in->valnode->length] = '\0';
|
||||
out->flags |= QTN_WORDFREE;
|
||||
}
|
||||
else
|
||||
{
|
||||
int i;
|
||||
|
||||
out->child = (QTNode **) palloc(sizeof(QTNode *) * in->nchild);
|
||||
|
||||
for (i = 0; i < in->nchild; i++)
|
||||
out->child[i] = QTNCopy(in->child[i]);
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
804
src/backend/utils/adt/tsrank.c
Normal file
804
src/backend/utils/adt/tsrank.c
Normal file
@@ -0,0 +1,804 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* tsrank.c
|
||||
* rank tsvector by tsquery
|
||||
*
|
||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "tsearch/ts_type.h"
|
||||
#include "tsearch/ts_utils.h"
|
||||
#include "utils/array.h"
|
||||
|
||||
|
||||
static float weights[] = {0.1, 0.2, 0.4, 1.0};
|
||||
|
||||
#define wpos(wep) ( w[ WEP_GETWEIGHT(wep) ] )
|
||||
|
||||
#define RANK_NO_NORM 0x00
|
||||
#define RANK_NORM_LOGLENGTH 0x01
|
||||
#define RANK_NORM_LENGTH 0x02
|
||||
#define RANK_NORM_EXTDIST 0x04
|
||||
#define RANK_NORM_UNIQ 0x08
|
||||
#define RANK_NORM_LOGUNIQ 0x10
|
||||
#define DEF_NORM_METHOD RANK_NO_NORM
|
||||
|
||||
static float calc_rank_or(float *w, TSVector t, TSQuery q);
|
||||
static float calc_rank_and(float *w, TSVector t, TSQuery q);
|
||||
|
||||
/*
|
||||
* Returns a weight of a word collocation
|
||||
*/
|
||||
static float4
|
||||
word_distance(int4 w)
|
||||
{
|
||||
if (w > 100)
|
||||
return 1e-30;
|
||||
|
||||
return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
|
||||
}
|
||||
|
||||
static int
|
||||
cnt_length(TSVector t)
|
||||
{
|
||||
WordEntry *ptr = ARRPTR(t),
|
||||
*end = (WordEntry *) STRPTR(t);
|
||||
int len = 0,
|
||||
clen;
|
||||
|
||||
while (ptr < end)
|
||||
{
|
||||
if ((clen = POSDATALEN(t, ptr)) == 0)
|
||||
len += 1;
|
||||
else
|
||||
len += clen;
|
||||
ptr++;
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static int4
|
||||
WordECompareQueryItem(char *eval, char *qval, WordEntry * ptr, QueryItem * item)
|
||||
{
|
||||
if (ptr->len == item->length)
|
||||
return strncmp(
|
||||
eval + ptr->pos,
|
||||
qval + item->distance,
|
||||
item->length);
|
||||
|
||||
return (ptr->len > item->length) ? 1 : -1;
|
||||
}
|
||||
|
||||
static WordEntry *
|
||||
find_wordentry(TSVector t, TSQuery q, QueryItem * item)
|
||||
{
|
||||
WordEntry *StopLow = ARRPTR(t);
|
||||
WordEntry *StopHigh = (WordEntry *) STRPTR(t);
|
||||
WordEntry *StopMiddle;
|
||||
int difference;
|
||||
|
||||
/* Loop invariant: StopLow <= item < StopHigh */
|
||||
|
||||
while (StopLow < StopHigh)
|
||||
{
|
||||
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
||||
difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item);
|
||||
if (difference == 0)
|
||||
return StopMiddle;
|
||||
else if (difference < 0)
|
||||
StopLow = StopMiddle + 1;
|
||||
else
|
||||
StopHigh = StopMiddle;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
compareQueryItem(const void *a, const void *b, void *arg)
|
||||
{
|
||||
char *operand = (char *) arg;
|
||||
|
||||
if ((*(QueryItem **) a)->length == (*(QueryItem **) b)->length)
|
||||
return strncmp(operand + (*(QueryItem **) a)->distance,
|
||||
operand + (*(QueryItem **) b)->distance,
|
||||
(*(QueryItem **) b)->length);
|
||||
|
||||
return ((*(QueryItem **) a)->length > (*(QueryItem **) b)->length) ? 1 : -1;
|
||||
}
|
||||
|
||||
static QueryItem **
|
||||
SortAndUniqItems(char *operand, QueryItem * item, int *size)
|
||||
{
|
||||
QueryItem **res,
|
||||
**ptr,
|
||||
**prevptr;
|
||||
|
||||
ptr = res = (QueryItem **) palloc(sizeof(QueryItem *) * *size);
|
||||
|
||||
while ((*size)--)
|
||||
{
|
||||
if (item->type == VAL)
|
||||
{
|
||||
*ptr = item;
|
||||
ptr++;
|
||||
}
|
||||
item++;
|
||||
}
|
||||
|
||||
*size = ptr - res;
|
||||
if (*size < 2)
|
||||
return res;
|
||||
|
||||
qsort_arg(res, *size, sizeof(QueryItem **), compareQueryItem, (void *) operand);
|
||||
|
||||
ptr = res + 1;
|
||||
prevptr = res;
|
||||
|
||||
while (ptr - res < *size)
|
||||
{
|
||||
if (compareQueryItem((void *) ptr, (void *) prevptr, (void *) operand) != 0)
|
||||
{
|
||||
prevptr++;
|
||||
*prevptr = *ptr;
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
|
||||
*size = prevptr + 1 - res;
|
||||
return res;
|
||||
}
|
||||
|
||||
static WordEntryPos POSNULL[] = {
|
||||
0,
|
||||
0
|
||||
};
|
||||
|
||||
static float
|
||||
calc_rank_and(float *w, TSVector t, TSQuery q)
|
||||
{
|
||||
uint16 **pos;
|
||||
int i,
|
||||
k,
|
||||
l,
|
||||
p;
|
||||
WordEntry *entry;
|
||||
WordEntryPos *post,
|
||||
*ct;
|
||||
int4 dimt,
|
||||
lenct,
|
||||
dist;
|
||||
float res = -1.0;
|
||||
QueryItem **item;
|
||||
int size = q->size;
|
||||
|
||||
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
|
||||
if (size < 2)
|
||||
{
|
||||
pfree(item);
|
||||
return calc_rank_or(w, t, q);
|
||||
}
|
||||
pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
|
||||
memset(pos, 0, sizeof(uint16 *) * q->size);
|
||||
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
||||
WEP_SETPOS(POSNULL[1], MAXENTRYPOS - 1);
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
entry = find_wordentry(t, q, item[i]);
|
||||
if (!entry)
|
||||
continue;
|
||||
|
||||
if (entry->haspos)
|
||||
pos[i] = (uint16 *) _POSDATAPTR(t, entry);
|
||||
else
|
||||
pos[i] = (uint16 *) POSNULL;
|
||||
|
||||
|
||||
dimt = *(uint16 *) (pos[i]);
|
||||
post = (WordEntryPos *) (pos[i] + 1);
|
||||
for (k = 0; k < i; k++)
|
||||
{
|
||||
if (!pos[k])
|
||||
continue;
|
||||
lenct = *(uint16 *) (pos[k]);
|
||||
ct = (WordEntryPos *) (pos[k] + 1);
|
||||
for (l = 0; l < dimt; l++)
|
||||
{
|
||||
for (p = 0; p < lenct; p++)
|
||||
{
|
||||
dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
|
||||
if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL)))
|
||||
{
|
||||
float curw;
|
||||
|
||||
if (!dist)
|
||||
dist = MAXENTRYPOS;
|
||||
curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
|
||||
res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pfree(pos);
|
||||
pfree(item);
|
||||
return res;
|
||||
}
|
||||
|
||||
static float
|
||||
calc_rank_or(float *w, TSVector t, TSQuery q)
|
||||
{
|
||||
WordEntry *entry;
|
||||
WordEntryPos *post;
|
||||
int4 dimt,
|
||||
j,
|
||||
i;
|
||||
float res = 0.0;
|
||||
QueryItem **item;
|
||||
int size = q->size;
|
||||
|
||||
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
||||
item = SortAndUniqItems(GETOPERAND(q), GETQUERY(q), &size);
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
float resj,
|
||||
wjm;
|
||||
int4 jm;
|
||||
|
||||
entry = find_wordentry(t, q, item[i]);
|
||||
if (!entry)
|
||||
continue;
|
||||
|
||||
if (entry->haspos)
|
||||
{
|
||||
dimt = POSDATALEN(t, entry);
|
||||
post = POSDATAPTR(t, entry);
|
||||
}
|
||||
else
|
||||
{
|
||||
dimt = *(uint16 *) POSNULL;
|
||||
post = POSNULL + 1;
|
||||
}
|
||||
|
||||
resj = 0.0;
|
||||
wjm = -1.0;
|
||||
jm = 0;
|
||||
for (j = 0; j < dimt; j++)
|
||||
{
|
||||
resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
|
||||
if (wpos(post[j]) > wjm)
|
||||
{
|
||||
wjm = wpos(post[j]);
|
||||
jm = j;
|
||||
}
|
||||
}
|
||||
/*
|
||||
limit (sum(i/i^2),i->inf) = pi^2/6
|
||||
resj = sum(wi/i^2),i=1,noccurence,
|
||||
wi - should be sorted desc,
|
||||
don't sort for now, just choose maximum weight. This should be corrected
|
||||
Oleg Bartunov
|
||||
*/
|
||||
res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
|
||||
}
|
||||
if (size > 0)
|
||||
res = res / size;
|
||||
pfree(item);
|
||||
return res;
|
||||
}
|
||||
|
||||
static float
|
||||
calc_rank(float *w, TSVector t, TSQuery q, int4 method)
|
||||
{
|
||||
QueryItem *item = GETQUERY(q);
|
||||
float res = 0.0;
|
||||
int len;
|
||||
|
||||
if (!t->size || !q->size)
|
||||
return 0.0;
|
||||
|
||||
res = (item->type != VAL && item->val == (int4) '&') ?
|
||||
calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
|
||||
|
||||
if (res < 0)
|
||||
res = 1e-20;
|
||||
|
||||
if ((method & RANK_NORM_LOGLENGTH) && t->size > 0)
|
||||
res /= log((double) (cnt_length(t) + 1)) / log(2.0);
|
||||
|
||||
if (method & RANK_NORM_LENGTH)
|
||||
{
|
||||
len = cnt_length(t);
|
||||
if (len > 0)
|
||||
res /= (float) len;
|
||||
}
|
||||
|
||||
if ((method & RANK_NORM_UNIQ) && t->size > 0)
|
||||
res /= (float) (t->size);
|
||||
|
||||
if ((method & RANK_NORM_LOGUNIQ) && t->size > 0)
|
||||
res /= log((double) (t->size + 1)) / log(2.0);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static float *
|
||||
getWeights(ArrayType *win)
|
||||
{
|
||||
static float ws[lengthof(weights)];
|
||||
int i;
|
||||
float4 *arrdata;
|
||||
|
||||
if (win == 0)
|
||||
return weights;
|
||||
|
||||
if (ARR_NDIM(win) != 1)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
||||
errmsg("array of weight must be one-dimensional")));
|
||||
|
||||
if (ArrayGetNItems(ARR_NDIM(win), ARR_DIMS(win)) < lengthof(weights))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
||||
errmsg("array of weight is too short")));
|
||||
|
||||
if (ARR_HASNULL(win))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
|
||||
errmsg("array of weight must not contain nulls")));
|
||||
|
||||
arrdata = (float4 *) ARR_DATA_PTR(win);
|
||||
for (i = 0; i < lengthof(weights); i++)
|
||||
{
|
||||
ws[i] = (arrdata[i] >= 0) ? arrdata[i] : weights[i];
|
||||
if (ws[i] > 1.0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("weight out of range")));
|
||||
}
|
||||
|
||||
return ws;
|
||||
}
|
||||
|
||||
Datum
|
||||
ts_rank_wttf(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
TSVector txt = PG_GETARG_TSVECTOR(1);
|
||||
TSQuery query = PG_GETARG_TSQUERY(2);
|
||||
int method = PG_GETARG_INT32(3);
|
||||
float res;
|
||||
|
||||
res = calc_rank(getWeights(win), txt, query, method);
|
||||
|
||||
PG_FREE_IF_COPY(win, 0);
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
PG_FREE_IF_COPY(query, 2);
|
||||
PG_RETURN_FLOAT4(res);
|
||||
}
|
||||
|
||||
Datum
|
||||
ts_rank_wtt(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
TSVector txt = PG_GETARG_TSVECTOR(1);
|
||||
TSQuery query = PG_GETARG_TSQUERY(2);
|
||||
float res;
|
||||
|
||||
res = calc_rank(getWeights(win), txt, query, DEF_NORM_METHOD);
|
||||
|
||||
PG_FREE_IF_COPY(win, 0);
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
PG_FREE_IF_COPY(query, 2);
|
||||
PG_RETURN_FLOAT4(res);
|
||||
}
|
||||
|
||||
Datum
|
||||
ts_rank_ttf(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSVector txt = PG_GETARG_TSVECTOR(0);
|
||||
TSQuery query = PG_GETARG_TSQUERY(1);
|
||||
int method = PG_GETARG_INT32(2);
|
||||
float res;
|
||||
|
||||
res = calc_rank(getWeights(NULL), txt, query, method);
|
||||
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
PG_FREE_IF_COPY(query, 1);
|
||||
PG_RETURN_FLOAT4(res);
|
||||
}
|
||||
|
||||
Datum
|
||||
ts_rank_tt(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSVector txt = PG_GETARG_TSVECTOR(0);
|
||||
TSQuery query = PG_GETARG_TSQUERY(1);
|
||||
float res;
|
||||
|
||||
res = calc_rank(getWeights(NULL), txt, query, DEF_NORM_METHOD);
|
||||
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
PG_FREE_IF_COPY(query, 1);
|
||||
PG_RETURN_FLOAT4(res);
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
QueryItem **item;
|
||||
int16 nitem;
|
||||
bool needfree;
|
||||
uint8 wclass;
|
||||
int32 pos;
|
||||
} DocRepresentation;
|
||||
|
||||
static int
|
||||
compareDocR(const void *a, const void *b)
|
||||
{
|
||||
if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos)
|
||||
return 0;
|
||||
return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
|
||||
}
|
||||
|
||||
static bool
|
||||
checkcondition_QueryItem(void *checkval, QueryItem * val)
|
||||
{
|
||||
return (bool) (val->istrue);
|
||||
}
|
||||
|
||||
static void
|
||||
reset_istrue_flag(TSQuery query)
|
||||
{
|
||||
QueryItem *item = GETQUERY(query);
|
||||
int i;
|
||||
|
||||
/* reset istrue flag */
|
||||
for (i = 0; i < query->size; i++)
|
||||
{
|
||||
if (item->type == VAL)
|
||||
item->istrue = 0;
|
||||
item++;
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int pos;
|
||||
int p;
|
||||
int q;
|
||||
DocRepresentation *begin;
|
||||
DocRepresentation *end;
|
||||
} Extention;
|
||||
|
||||
|
||||
static bool
|
||||
Cover(DocRepresentation * doc, int len, TSQuery query, Extention * ext)
|
||||
{
|
||||
DocRepresentation *ptr;
|
||||
int lastpos = ext->pos;
|
||||
int i;
|
||||
bool found = false;
|
||||
|
||||
reset_istrue_flag(query);
|
||||
|
||||
ext->p = 0x7fffffff;
|
||||
ext->q = 0;
|
||||
ptr = doc + ext->pos;
|
||||
|
||||
/* find upper bound of cover from current position, move up */
|
||||
while (ptr - doc < len)
|
||||
{
|
||||
for (i = 0; i < ptr->nitem; i++)
|
||||
ptr->item[i]->istrue = 1;
|
||||
if (TS_execute(GETQUERY(query), NULL, false, checkcondition_QueryItem))
|
||||
{
|
||||
if (ptr->pos > ext->q)
|
||||
{
|
||||
ext->q = ptr->pos;
|
||||
ext->end = ptr;
|
||||
lastpos = ptr - doc;
|
||||
found = true;
|
||||
}
|
||||
break;
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
|
||||
if (!found)
|
||||
return false;
|
||||
|
||||
reset_istrue_flag(query);
|
||||
|
||||
ptr = doc + lastpos;
|
||||
|
||||
/* find lower bound of cover from founded upper bound, move down */
|
||||
while (ptr >= doc + ext->pos)
|
||||
{
|
||||
for (i = 0; i < ptr->nitem; i++)
|
||||
ptr->item[i]->istrue = 1;
|
||||
if (TS_execute(GETQUERY(query), NULL, true, checkcondition_QueryItem))
|
||||
{
|
||||
if (ptr->pos < ext->p)
|
||||
{
|
||||
ext->begin = ptr;
|
||||
ext->p = ptr->pos;
|
||||
}
|
||||
break;
|
||||
}
|
||||
ptr--;
|
||||
}
|
||||
|
||||
if (ext->p <= ext->q)
|
||||
{
|
||||
/*
|
||||
* set position for next try to next lexeme after begining of founded
|
||||
* cover
|
||||
*/
|
||||
ext->pos = (ptr - doc) + 1;
|
||||
return true;
|
||||
}
|
||||
|
||||
ext->pos++;
|
||||
return Cover(doc, len, query, ext);
|
||||
}
|
||||
|
||||
static DocRepresentation *
|
||||
get_docrep(TSVector txt, TSQuery query, int *doclen)
|
||||
{
|
||||
QueryItem *item = GETQUERY(query);
|
||||
WordEntry *entry;
|
||||
WordEntryPos *post;
|
||||
int4 dimt,
|
||||
j,
|
||||
i;
|
||||
int len = query->size * 4,
|
||||
cur = 0;
|
||||
DocRepresentation *doc;
|
||||
char *operand;
|
||||
|
||||
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
||||
doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
|
||||
operand = GETOPERAND(query);
|
||||
reset_istrue_flag(query);
|
||||
|
||||
for (i = 0; i < query->size; i++)
|
||||
{
|
||||
if (item[i].type != VAL || item[i].istrue)
|
||||
continue;
|
||||
|
||||
entry = find_wordentry(txt, query, &(item[i]));
|
||||
if (!entry)
|
||||
continue;
|
||||
|
||||
if (entry->haspos)
|
||||
{
|
||||
dimt = POSDATALEN(txt, entry);
|
||||
post = POSDATAPTR(txt, entry);
|
||||
}
|
||||
else
|
||||
{
|
||||
dimt = *(uint16 *) POSNULL;
|
||||
post = POSNULL + 1;
|
||||
}
|
||||
|
||||
while (cur + dimt >= len)
|
||||
{
|
||||
len *= 2;
|
||||
doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
|
||||
}
|
||||
|
||||
for (j = 0; j < dimt; j++)
|
||||
{
|
||||
if (j == 0)
|
||||
{
|
||||
QueryItem *kptr,
|
||||
*iptr = item + i;
|
||||
int k;
|
||||
|
||||
doc[cur].needfree = false;
|
||||
doc[cur].nitem = 0;
|
||||
doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * query->size);
|
||||
|
||||
for (k = 0; k < query->size; k++)
|
||||
{
|
||||
kptr = item + k;
|
||||
if (k == i ||
|
||||
(item[k].type == VAL &&
|
||||
compareQueryItem(&kptr, &iptr, operand) == 0))
|
||||
{
|
||||
doc[cur].item[doc[cur].nitem] = item + k;
|
||||
doc[cur].nitem++;
|
||||
kptr->istrue = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
doc[cur].needfree = false;
|
||||
doc[cur].nitem = doc[cur - 1].nitem;
|
||||
doc[cur].item = doc[cur - 1].item;
|
||||
}
|
||||
doc[cur].pos = WEP_GETPOS(post[j]);
|
||||
doc[cur].wclass = WEP_GETWEIGHT(post[j]);
|
||||
cur++;
|
||||
}
|
||||
}
|
||||
|
||||
*doclen = cur;
|
||||
|
||||
if (cur > 0)
|
||||
{
|
||||
if (cur > 1)
|
||||
qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
|
||||
return doc;
|
||||
}
|
||||
|
||||
pfree(doc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static float4
|
||||
calc_rank_cd(float4 *arrdata, TSVector txt, TSQuery query, int method)
|
||||
{
|
||||
DocRepresentation *doc;
|
||||
int len,
|
||||
i,
|
||||
doclen = 0;
|
||||
Extention ext;
|
||||
double Wdoc = 0.0;
|
||||
double invws[lengthof(weights)];
|
||||
double SumDist = 0.0,
|
||||
PrevExtPos = 0.0,
|
||||
CurExtPos = 0.0;
|
||||
int NExtent = 0;
|
||||
|
||||
for (i = 0; i < lengthof(weights); i++)
|
||||
{
|
||||
invws[i] = ((double) ((arrdata[i] >= 0) ? arrdata[i] : weights[i]));
|
||||
if (invws[i] > 1.0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("weight out of range")));
|
||||
invws[i] = 1.0 / invws[i];
|
||||
}
|
||||
|
||||
doc = get_docrep(txt, query, &doclen);
|
||||
if (!doc)
|
||||
return 0.0;
|
||||
|
||||
MemSet(&ext, 0, sizeof(Extention));
|
||||
while (Cover(doc, doclen, query, &ext))
|
||||
{
|
||||
double Cpos = 0.0;
|
||||
double InvSum = 0.0;
|
||||
int nNoise;
|
||||
DocRepresentation *ptr = ext.begin;
|
||||
|
||||
while (ptr <= ext.end)
|
||||
{
|
||||
InvSum += invws[ptr->wclass];
|
||||
ptr++;
|
||||
}
|
||||
|
||||
Cpos = ((double) (ext.end - ext.begin + 1)) / InvSum;
|
||||
|
||||
/*
|
||||
* if doc are big enough then ext.q may be equal to ext.p due to limit
|
||||
* of posional information. In this case we approximate number of
|
||||
* noise word as half cover's length
|
||||
*/
|
||||
nNoise = (ext.q - ext.p) - (ext.end - ext.begin);
|
||||
if (nNoise < 0)
|
||||
nNoise = (ext.end - ext.begin) / 2;
|
||||
Wdoc += Cpos / ((double) (1 + nNoise));
|
||||
|
||||
CurExtPos = ((double) (ext.q + ext.p)) / 2.0;
|
||||
if (NExtent > 0 && CurExtPos > PrevExtPos /* prevent devision by
|
||||
* zero in a case of
|
||||
multiple lexize */ )
|
||||
SumDist += 1.0 / (CurExtPos - PrevExtPos);
|
||||
|
||||
PrevExtPos = CurExtPos;
|
||||
NExtent++;
|
||||
}
|
||||
|
||||
if ((method & RANK_NORM_LOGLENGTH) && txt->size > 0)
|
||||
Wdoc /= log((double) (cnt_length(txt) + 1));
|
||||
|
||||
if (method & RANK_NORM_LENGTH)
|
||||
{
|
||||
len = cnt_length(txt);
|
||||
if (len > 0)
|
||||
Wdoc /= (double) len;
|
||||
}
|
||||
|
||||
if ((method & RANK_NORM_EXTDIST) && SumDist > 0)
|
||||
Wdoc /= ((double) NExtent) / SumDist;
|
||||
|
||||
if ((method & RANK_NORM_UNIQ) && txt->size > 0)
|
||||
Wdoc /= (double) (txt->size);
|
||||
|
||||
if ((method & RANK_NORM_LOGUNIQ) && txt->size > 0)
|
||||
Wdoc /= log((double) (txt->size + 1)) / log(2.0);
|
||||
|
||||
for (i = 0; i < doclen; i++)
|
||||
if (doc[i].needfree)
|
||||
pfree(doc[i].item);
|
||||
pfree(doc);
|
||||
|
||||
return (float4) Wdoc;
|
||||
}
|
||||
|
||||
Datum
|
||||
ts_rankcd_wttf(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
TSVector txt = PG_GETARG_TSVECTOR(1);
|
||||
TSQuery query = PG_GETARG_TSQUERY_COPY(2);
|
||||
int method = PG_GETARG_INT32(3);
|
||||
float res;
|
||||
|
||||
res = calc_rank_cd(getWeights(win), txt, query, method);
|
||||
|
||||
PG_FREE_IF_COPY(win, 0);
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
PG_FREE_IF_COPY(query, 2);
|
||||
PG_RETURN_FLOAT4(res);
|
||||
}
|
||||
|
||||
Datum
|
||||
ts_rankcd_wtt(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
TSVector txt = PG_GETARG_TSVECTOR(1);
|
||||
TSQuery query = PG_GETARG_TSQUERY_COPY(2);
|
||||
float res;
|
||||
|
||||
res = calc_rank_cd(getWeights(win), txt, query, DEF_NORM_METHOD);
|
||||
|
||||
PG_FREE_IF_COPY(win, 0);
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
PG_FREE_IF_COPY(query, 2);
|
||||
PG_RETURN_FLOAT4(res);
|
||||
}
|
||||
|
||||
Datum
|
||||
ts_rankcd_ttf(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSVector txt = PG_GETARG_TSVECTOR(0);
|
||||
TSQuery query = PG_GETARG_TSQUERY_COPY(1);
|
||||
int method = PG_GETARG_INT32(2);
|
||||
float res;
|
||||
|
||||
res = calc_rank_cd(getWeights(NULL), txt, query, method);
|
||||
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
PG_FREE_IF_COPY(query, 1);
|
||||
PG_RETURN_FLOAT4(res);
|
||||
}
|
||||
|
||||
Datum
|
||||
ts_rankcd_tt(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSVector txt = PG_GETARG_TSVECTOR(0);
|
||||
TSQuery query = PG_GETARG_TSQUERY_COPY(1);
|
||||
float res;
|
||||
|
||||
res = calc_rank_cd(getWeights(NULL), txt, query, DEF_NORM_METHOD);
|
||||
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
PG_FREE_IF_COPY(query, 1);
|
||||
PG_RETURN_FLOAT4(res);
|
||||
}
|
||||
683
src/backend/utils/adt/tsvector.c
Normal file
683
src/backend/utils/adt/tsvector.c
Normal file
@@ -0,0 +1,683 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* tsvector.c
|
||||
* I/O functions for tsvector
|
||||
*
|
||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.1 2007/08/21 01:11:19 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include "libpq/pqformat.h"
|
||||
#include "tsearch/ts_type.h"
|
||||
#include "tsearch/ts_locale.h"
|
||||
#include "tsearch/ts_utils.h"
|
||||
#include "utils/memutils.h"
|
||||
|
||||
|
||||
static int
|
||||
comparePos(const void *a, const void *b)
|
||||
{
|
||||
if (WEP_GETPOS(*(WordEntryPos *) a) == WEP_GETPOS(*(WordEntryPos *) b))
|
||||
return 0;
|
||||
return (WEP_GETPOS(*(WordEntryPos *) a) > WEP_GETPOS(*(WordEntryPos *) b)) ? 1 : -1;
|
||||
}
|
||||
|
||||
static int
|
||||
uniquePos(WordEntryPos * a, int4 l)
|
||||
{
|
||||
WordEntryPos *ptr,
|
||||
*res;
|
||||
|
||||
if (l == 1)
|
||||
return l;
|
||||
|
||||
res = a;
|
||||
qsort((void *) a, l, sizeof(WordEntryPos), comparePos);
|
||||
|
||||
ptr = a + 1;
|
||||
while (ptr - a < l)
|
||||
{
|
||||
if (WEP_GETPOS(*ptr) != WEP_GETPOS(*res))
|
||||
{
|
||||
res++;
|
||||
*res = *ptr;
|
||||
if (res - a >= MAXNUMPOS - 1 || WEP_GETPOS(*res) == MAXENTRYPOS - 1)
|
||||
break;
|
||||
}
|
||||
else if (WEP_GETWEIGHT(*ptr) > WEP_GETWEIGHT(*res))
|
||||
WEP_SETWEIGHT(*res, WEP_GETWEIGHT(*ptr));
|
||||
ptr++;
|
||||
}
|
||||
|
||||
return res + 1 - a;
|
||||
}
|
||||
|
||||
static int
|
||||
compareentry(const void *a, const void *b, void *arg)
|
||||
{
|
||||
char *BufferStr = (char *) arg;
|
||||
|
||||
if (((WordEntryIN *) a)->entry.len == ((WordEntryIN *) b)->entry.len)
|
||||
{
|
||||
return strncmp(&BufferStr[((WordEntryIN *) a)->entry.pos],
|
||||
&BufferStr[((WordEntryIN *) b)->entry.pos],
|
||||
((WordEntryIN *) a)->entry.len);
|
||||
}
|
||||
|
||||
return (((WordEntryIN *) a)->entry.len > ((WordEntryIN *) b)->entry.len) ? 1 : -1;
|
||||
}
|
||||
|
||||
static int
|
||||
uniqueentry(WordEntryIN * a, int4 l, char *buf, int4 *outbuflen)
|
||||
{
|
||||
WordEntryIN *ptr,
|
||||
*res;
|
||||
|
||||
res = a;
|
||||
if (l == 1)
|
||||
{
|
||||
if (a->entry.haspos)
|
||||
{
|
||||
*(uint16 *) (a->pos) = uniquePos(&(a->pos[1]), *(uint16 *) (a->pos));
|
||||
*outbuflen = SHORTALIGN(res->entry.len) + (*(uint16 *) (a->pos) + 1) * sizeof(WordEntryPos);
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
||||
ptr = a + 1;
|
||||
qsort_arg((void *) a, l, sizeof(WordEntryIN), compareentry, (void *) buf);
|
||||
|
||||
while (ptr - a < l)
|
||||
{
|
||||
if (!(ptr->entry.len == res->entry.len &&
|
||||
strncmp(&buf[ptr->entry.pos], &buf[res->entry.pos], res->entry.len) == 0))
|
||||
{
|
||||
if (res->entry.haspos)
|
||||
{
|
||||
*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
|
||||
*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
|
||||
}
|
||||
*outbuflen += SHORTALIGN(res->entry.len);
|
||||
res++;
|
||||
memcpy(res, ptr, sizeof(WordEntryIN));
|
||||
}
|
||||
else if (ptr->entry.haspos)
|
||||
{
|
||||
if (res->entry.haspos)
|
||||
{
|
||||
int4 len = *(uint16 *) (ptr->pos) + 1 + *(uint16 *) (res->pos);
|
||||
|
||||
res->pos = (WordEntryPos *) repalloc(res->pos, len * sizeof(WordEntryPos));
|
||||
memcpy(&(res->pos[*(uint16 *) (res->pos) + 1]),
|
||||
&(ptr->pos[1]), *(uint16 *) (ptr->pos) * sizeof(WordEntryPos));
|
||||
*(uint16 *) (res->pos) += *(uint16 *) (ptr->pos);
|
||||
pfree(ptr->pos);
|
||||
}
|
||||
else
|
||||
{
|
||||
res->entry.haspos = 1;
|
||||
res->pos = ptr->pos;
|
||||
}
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
if (res->entry.haspos)
|
||||
{
|
||||
*(uint16 *) (res->pos) = uniquePos(&(res->pos[1]), *(uint16 *) (res->pos));
|
||||
*outbuflen += *(uint16 *) (res->pos) * sizeof(WordEntryPos);
|
||||
}
|
||||
*outbuflen += SHORTALIGN(res->entry.len);
|
||||
|
||||
return res + 1 - a;
|
||||
}
|
||||
|
||||
static int
|
||||
WordEntryCMP(WordEntry * a, WordEntry * b, char *buf)
|
||||
{
|
||||
return compareentry(a, b, buf);
|
||||
}
|
||||
|
||||
#define WAITWORD 1
|
||||
#define WAITENDWORD 2
|
||||
#define WAITNEXTCHAR 3
|
||||
#define WAITENDCMPLX 4
|
||||
#define WAITPOSINFO 5
|
||||
#define INPOSINFO 6
|
||||
#define WAITPOSDELIM 7
|
||||
#define WAITCHARCMPLX 8
|
||||
|
||||
#define RESIZEPRSBUF \
|
||||
do { \
|
||||
if ( state->curpos - state->word + pg_database_encoding_max_length() >= state->len ) \
|
||||
{ \
|
||||
int4 clen = state->curpos - state->word; \
|
||||
state->len *= 2; \
|
||||
state->word = (char*)repalloc( (void*)state->word, state->len ); \
|
||||
state->curpos = state->word + clen; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
bool
|
||||
gettoken_tsvector(TSVectorParseState *state)
|
||||
{
|
||||
int4 oldstate = 0;
|
||||
|
||||
state->curpos = state->word;
|
||||
state->state = WAITWORD;
|
||||
state->alen = 0;
|
||||
|
||||
while (1)
|
||||
{
|
||||
if (state->state == WAITWORD)
|
||||
{
|
||||
if (*(state->prsbuf) == '\0')
|
||||
return false;
|
||||
else if (t_iseq(state->prsbuf, '\''))
|
||||
state->state = WAITENDCMPLX;
|
||||
else if (t_iseq(state->prsbuf, '\\'))
|
||||
{
|
||||
state->state = WAITNEXTCHAR;
|
||||
oldstate = WAITENDWORD;
|
||||
}
|
||||
else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
else if (!t_isspace(state->prsbuf))
|
||||
{
|
||||
COPYCHAR(state->curpos, state->prsbuf);
|
||||
state->curpos += pg_mblen(state->prsbuf);
|
||||
state->state = WAITENDWORD;
|
||||
}
|
||||
}
|
||||
else if (state->state == WAITNEXTCHAR)
|
||||
{
|
||||
if (*(state->prsbuf) == '\0')
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("there is no escaped character")));
|
||||
else
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
COPYCHAR(state->curpos, state->prsbuf);
|
||||
state->curpos += pg_mblen(state->prsbuf);
|
||||
state->state = oldstate;
|
||||
}
|
||||
}
|
||||
else if (state->state == WAITENDWORD)
|
||||
{
|
||||
if (t_iseq(state->prsbuf, '\\'))
|
||||
{
|
||||
state->state = WAITNEXTCHAR;
|
||||
oldstate = WAITENDWORD;
|
||||
}
|
||||
else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
|
||||
(state->oprisdelim && ISOPERATOR(state->prsbuf)))
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
if (state->curpos == state->word)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
*(state->curpos) = '\0';
|
||||
return true;
|
||||
}
|
||||
else if (t_iseq(state->prsbuf, ':'))
|
||||
{
|
||||
if (state->curpos == state->word)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
*(state->curpos) = '\0';
|
||||
if (state->oprisdelim)
|
||||
return true;
|
||||
else
|
||||
state->state = INPOSINFO;
|
||||
}
|
||||
else
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
COPYCHAR(state->curpos, state->prsbuf);
|
||||
state->curpos += pg_mblen(state->prsbuf);
|
||||
}
|
||||
}
|
||||
else if (state->state == WAITENDCMPLX)
|
||||
{
|
||||
if (t_iseq(state->prsbuf, '\''))
|
||||
{
|
||||
state->state = WAITCHARCMPLX;
|
||||
}
|
||||
else if (t_iseq(state->prsbuf, '\\'))
|
||||
{
|
||||
state->state = WAITNEXTCHAR;
|
||||
oldstate = WAITENDCMPLX;
|
||||
}
|
||||
else if (*(state->prsbuf) == '\0')
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
else
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
COPYCHAR(state->curpos, state->prsbuf);
|
||||
state->curpos += pg_mblen(state->prsbuf);
|
||||
}
|
||||
}
|
||||
else if (state->state == WAITCHARCMPLX)
|
||||
{
|
||||
if (t_iseq(state->prsbuf, '\''))
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
COPYCHAR(state->curpos, state->prsbuf);
|
||||
state->curpos += pg_mblen(state->prsbuf);
|
||||
state->state = WAITENDCMPLX;
|
||||
}
|
||||
else
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
*(state->curpos) = '\0';
|
||||
if (state->curpos == state->word)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
if (state->oprisdelim)
|
||||
{
|
||||
/* state->prsbuf+=pg_mblen(state->prsbuf); */
|
||||
return true;
|
||||
}
|
||||
else
|
||||
state->state = WAITPOSINFO;
|
||||
continue; /* recheck current character */
|
||||
}
|
||||
}
|
||||
else if (state->state == WAITPOSINFO)
|
||||
{
|
||||
if (t_iseq(state->prsbuf, ':'))
|
||||
state->state = INPOSINFO;
|
||||
else
|
||||
return true;
|
||||
}
|
||||
else if (state->state == INPOSINFO)
|
||||
{
|
||||
if (t_isdigit(state->prsbuf))
|
||||
{
|
||||
if (state->alen == 0)
|
||||
{
|
||||
state->alen = 4;
|
||||
state->pos = (WordEntryPos *) palloc(sizeof(WordEntryPos) * state->alen);
|
||||
*(uint16 *) (state->pos) = 0;
|
||||
}
|
||||
else if (*(uint16 *) (state->pos) + 1 >= state->alen)
|
||||
{
|
||||
state->alen *= 2;
|
||||
state->pos = (WordEntryPos *) repalloc(state->pos, sizeof(WordEntryPos) * state->alen);
|
||||
}
|
||||
(*(uint16 *) (state->pos))++;
|
||||
WEP_SETPOS(state->pos[*(uint16 *) (state->pos)], LIMITPOS(atoi(state->prsbuf)));
|
||||
if (WEP_GETPOS(state->pos[*(uint16 *) (state->pos)]) == 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("wrong position info in tsvector")));
|
||||
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
|
||||
state->state = WAITPOSDELIM;
|
||||
}
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
}
|
||||
else if (state->state == WAITPOSDELIM)
|
||||
{
|
||||
if (t_iseq(state->prsbuf, ','))
|
||||
state->state = INPOSINFO;
|
||||
else if (t_iseq(state->prsbuf, 'a') || t_iseq(state->prsbuf, 'A') || t_iseq(state->prsbuf, '*'))
|
||||
{
|
||||
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 3);
|
||||
}
|
||||
else if (t_iseq(state->prsbuf, 'b') || t_iseq(state->prsbuf, 'B'))
|
||||
{
|
||||
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 2);
|
||||
}
|
||||
else if (t_iseq(state->prsbuf, 'c') || t_iseq(state->prsbuf, 'C'))
|
||||
{
|
||||
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 1);
|
||||
}
|
||||
else if (t_iseq(state->prsbuf, 'd') || t_iseq(state->prsbuf, 'D'))
|
||||
{
|
||||
if (WEP_GETWEIGHT(state->pos[*(uint16 *) (state->pos)]))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
WEP_SETWEIGHT(state->pos[*(uint16 *) (state->pos)], 0);
|
||||
}
|
||||
else if (t_isspace(state->prsbuf) ||
|
||||
*(state->prsbuf) == '\0')
|
||||
return true;
|
||||
else if (!t_isdigit(state->prsbuf))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("syntax error in tsvector")));
|
||||
}
|
||||
else /* internal error */
|
||||
elog(ERROR, "internal error in gettoken_tsvector");
|
||||
|
||||
/* get next char */
|
||||
state->prsbuf += pg_mblen(state->prsbuf);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
Datum
|
||||
tsvectorin(PG_FUNCTION_ARGS)
|
||||
{
|
||||
char *buf = PG_GETARG_CSTRING(0);
|
||||
TSVectorParseState state;
|
||||
WordEntryIN *arr;
|
||||
WordEntry *inarr;
|
||||
int4 len = 0,
|
||||
totallen = 64;
|
||||
TSVector in;
|
||||
char *tmpbuf,
|
||||
*cur;
|
||||
int4 i,
|
||||
buflen = 256;
|
||||
|
||||
pg_verifymbstr(buf, strlen(buf), false);
|
||||
state.prsbuf = buf;
|
||||
state.len = 32;
|
||||
state.word = (char *) palloc(state.len);
|
||||
state.oprisdelim = false;
|
||||
|
||||
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * totallen);
|
||||
cur = tmpbuf = (char *) palloc(buflen);
|
||||
|
||||
while (gettoken_tsvector(&state))
|
||||
{
|
||||
/*
|
||||
* Realloc buffers if it's needed
|
||||
*/
|
||||
if (len >= totallen)
|
||||
{
|
||||
totallen *= 2;
|
||||
arr = (WordEntryIN *) repalloc((void *) arr, sizeof(WordEntryIN) * totallen);
|
||||
}
|
||||
|
||||
while ((cur - tmpbuf) + (state.curpos - state.word) >= buflen)
|
||||
{
|
||||
int4 dist = cur - tmpbuf;
|
||||
|
||||
buflen *= 2;
|
||||
tmpbuf = (char *) repalloc((void *) tmpbuf, buflen);
|
||||
cur = tmpbuf + dist;
|
||||
}
|
||||
|
||||
if (state.curpos - state.word >= MAXSTRLEN)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("word is too long (%d bytes, max %d bytes)",
|
||||
state.curpos - state.word, MAXSTRLEN)));
|
||||
|
||||
arr[len].entry.len = state.curpos - state.word;
|
||||
if (cur - tmpbuf > MAXSTRPOS)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("position value too large")));
|
||||
arr[len].entry.pos = cur - tmpbuf;
|
||||
memcpy((void *) cur, (void *) state.word, arr[len].entry.len);
|
||||
cur += arr[len].entry.len;
|
||||
|
||||
if (state.alen)
|
||||
{
|
||||
arr[len].entry.haspos = 1;
|
||||
arr[len].pos = state.pos;
|
||||
}
|
||||
else
|
||||
arr[len].entry.haspos = 0;
|
||||
len++;
|
||||
}
|
||||
pfree(state.word);
|
||||
|
||||
if (len > 0)
|
||||
len = uniqueentry(arr, len, tmpbuf, &buflen);
|
||||
else
|
||||
buflen = 0;
|
||||
totallen = CALCDATASIZE(len, buflen);
|
||||
in = (TSVector) palloc0(totallen);
|
||||
|
||||
SET_VARSIZE(in, totallen);
|
||||
in->size = len;
|
||||
cur = STRPTR(in);
|
||||
inarr = ARRPTR(in);
|
||||
for (i = 0; i < len; i++)
|
||||
{
|
||||
memcpy((void *) cur, (void *) &tmpbuf[arr[i].entry.pos], arr[i].entry.len);
|
||||
arr[i].entry.pos = cur - STRPTR(in);
|
||||
cur += SHORTALIGN(arr[i].entry.len);
|
||||
if (arr[i].entry.haspos)
|
||||
{
|
||||
memcpy(cur, arr[i].pos, (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos));
|
||||
cur += (*(uint16 *) arr[i].pos + 1) * sizeof(WordEntryPos);
|
||||
pfree(arr[i].pos);
|
||||
}
|
||||
inarr[i] = arr[i].entry;
|
||||
}
|
||||
|
||||
PG_RETURN_TSVECTOR(in);
|
||||
}
|
||||
|
||||
Datum
|
||||
tsvectorout(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSVector out = PG_GETARG_TSVECTOR(0);
|
||||
char *outbuf;
|
||||
int4 i,
|
||||
lenbuf = 0,
|
||||
pp;
|
||||
WordEntry *ptr = ARRPTR(out);
|
||||
char *curbegin,
|
||||
*curin,
|
||||
*curout;
|
||||
|
||||
lenbuf = out->size * 2 /* '' */ + out->size - 1 /* space */ + 2 /* \0 */ ;
|
||||
for (i = 0; i < out->size; i++)
|
||||
{
|
||||
lenbuf += ptr[i].len * 2 * pg_database_encoding_max_length() /* for escape */ ;
|
||||
if (ptr[i].haspos)
|
||||
lenbuf += 1 /* : */ + 7 /* int2 + , + weight */ * POSDATALEN(out, &(ptr[i]));
|
||||
}
|
||||
|
||||
curout = outbuf = (char *) palloc(lenbuf);
|
||||
for (i = 0; i < out->size; i++)
|
||||
{
|
||||
curbegin = curin = STRPTR(out) + ptr->pos;
|
||||
if (i != 0)
|
||||
*curout++ = ' ';
|
||||
*curout++ = '\'';
|
||||
while (curin - curbegin < ptr->len)
|
||||
{
|
||||
int len = pg_mblen(curin);
|
||||
|
||||
if (t_iseq(curin, '\''))
|
||||
*curout++ = '\'';
|
||||
|
||||
while (len--)
|
||||
*curout++ = *curin++;
|
||||
}
|
||||
|
||||
*curout++ = '\'';
|
||||
if ((pp = POSDATALEN(out, ptr)) != 0)
|
||||
{
|
||||
WordEntryPos *wptr;
|
||||
|
||||
*curout++ = ':';
|
||||
wptr = POSDATAPTR(out, ptr);
|
||||
while (pp)
|
||||
{
|
||||
curout += sprintf(curout, "%d", WEP_GETPOS(*wptr));
|
||||
switch (WEP_GETWEIGHT(*wptr))
|
||||
{
|
||||
case 3:
|
||||
*curout++ = 'A';
|
||||
break;
|
||||
case 2:
|
||||
*curout++ = 'B';
|
||||
break;
|
||||
case 1:
|
||||
*curout++ = 'C';
|
||||
break;
|
||||
case 0:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (pp > 1)
|
||||
*curout++ = ',';
|
||||
pp--;
|
||||
wptr++;
|
||||
}
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
|
||||
*curout = '\0';
|
||||
PG_FREE_IF_COPY(out, 0);
|
||||
PG_RETURN_CSTRING(outbuf);
|
||||
}
|
||||
|
||||
Datum
|
||||
tsvectorsend(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSVector vec = PG_GETARG_TSVECTOR(0);
|
||||
StringInfoData buf;
|
||||
int i,
|
||||
j;
|
||||
WordEntry *weptr = ARRPTR(vec);
|
||||
|
||||
pq_begintypsend(&buf);
|
||||
|
||||
pq_sendint(&buf, vec->size, sizeof(int32));
|
||||
for (i = 0; i < vec->size; i++)
|
||||
{
|
||||
/*
|
||||
* We are sure that sizeof(WordEntry) == sizeof(int32)
|
||||
*/
|
||||
pq_sendint(&buf, *(int32 *) weptr, sizeof(int32));
|
||||
|
||||
pq_sendbytes(&buf, STRPTR(vec) + weptr->pos, weptr->len);
|
||||
if (weptr->haspos)
|
||||
{
|
||||
WordEntryPos *wepptr = POSDATAPTR(vec, weptr);
|
||||
|
||||
pq_sendint(&buf, POSDATALEN(vec, weptr), sizeof(WordEntryPos));
|
||||
for (j = 0; j < POSDATALEN(vec, weptr); j++)
|
||||
pq_sendint(&buf, wepptr[j], sizeof(WordEntryPos));
|
||||
}
|
||||
weptr++;
|
||||
}
|
||||
|
||||
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
|
||||
}
|
||||
|
||||
Datum
|
||||
tsvectorrecv(PG_FUNCTION_ARGS)
|
||||
{
|
||||
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
|
||||
TSVector vec;
|
||||
int i,
|
||||
size,
|
||||
len = DATAHDRSIZE;
|
||||
WordEntry *weptr;
|
||||
int datalen = 0;
|
||||
|
||||
size = pq_getmsgint(buf, sizeof(uint32));
|
||||
if (size < 0 || size > (MaxAllocSize / sizeof(WordEntry)))
|
||||
elog(ERROR, "invalid size of tsvector");
|
||||
|
||||
len += sizeof(WordEntry) * size;
|
||||
|
||||
len *= 2;
|
||||
vec = (TSVector) palloc0(len);
|
||||
vec->size = size;
|
||||
|
||||
weptr = ARRPTR(vec);
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
int tmp;
|
||||
|
||||
weptr = ARRPTR(vec) + i;
|
||||
|
||||
/*
|
||||
* We are sure that sizeof(WordEntry) == sizeof(int32)
|
||||
*/
|
||||
tmp = pq_getmsgint(buf, sizeof(int32));
|
||||
*weptr = *(WordEntry *) & tmp;
|
||||
|
||||
while (CALCDATASIZE(size, datalen + SHORTALIGN(weptr->len)) >= len)
|
||||
{
|
||||
len *= 2;
|
||||
vec = (TSVector) repalloc(vec, len);
|
||||
weptr = ARRPTR(vec) + i;
|
||||
}
|
||||
|
||||
memcpy(STRPTR(vec) + weptr->pos,
|
||||
pq_getmsgbytes(buf, weptr->len),
|
||||
weptr->len);
|
||||
datalen += SHORTALIGN(weptr->len);
|
||||
|
||||
if (i > 0 && WordEntryCMP(weptr, weptr - 1, STRPTR(vec)) <= 0)
|
||||
elog(ERROR, "lexemes are unordered");
|
||||
|
||||
if (weptr->haspos)
|
||||
{
|
||||
uint16 j,
|
||||
npos;
|
||||
WordEntryPos *wepptr;
|
||||
|
||||
npos = (uint16) pq_getmsgint(buf, sizeof(int16));
|
||||
if (npos > MAXNUMPOS)
|
||||
elog(ERROR, "unexpected number of positions");
|
||||
|
||||
while (CALCDATASIZE(size, datalen + (npos + 1) * sizeof(WordEntryPos)) >= len)
|
||||
{
|
||||
len *= 2;
|
||||
vec = (TSVector) repalloc(vec, len);
|
||||
weptr = ARRPTR(vec) + i;
|
||||
}
|
||||
|
||||
memcpy(_POSDATAPTR(vec, weptr), &npos, sizeof(int16));
|
||||
wepptr = POSDATAPTR(vec, weptr);
|
||||
for (j = 0; j < npos; j++)
|
||||
{
|
||||
wepptr[j] = (WordEntryPos) pq_getmsgint(buf, sizeof(int16));
|
||||
if (j > 0 && WEP_GETPOS(wepptr[j]) <= WEP_GETPOS(wepptr[j - 1]))
|
||||
elog(ERROR, "position information is unordered");
|
||||
}
|
||||
|
||||
datalen += (npos + 1) * sizeof(WordEntry);
|
||||
}
|
||||
}
|
||||
|
||||
SET_VARSIZE(vec, CALCDATASIZE(vec->size, datalen));
|
||||
|
||||
PG_RETURN_TSVECTOR(vec);
|
||||
}
|
||||
1334
src/backend/utils/adt/tsvector_op.c
Normal file
1334
src/backend/utils/adt/tsvector_op.c
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user