mirror of
https://github.com/postgres/postgres.git
synced 2025-11-29 23:43:17 +03:00
Extend GIN to support partial-match searches, and extend tsquery to support
prefix matching using this facility. Teodor Sigaev and Oleg Bartunov
This commit is contained in:
@@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.11 2008/04/14 17:05:33 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsginidx.c,v 1.12 2008/05/16 16:31:01 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -19,6 +19,46 @@
|
||||
#include "utils/builtins.h"
|
||||
|
||||
|
||||
Datum
|
||||
gin_cmp_tslexeme(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *a = PG_GETARG_TEXT_P(0);
|
||||
text *b = PG_GETARG_TEXT_P(1);
|
||||
int cmp;
|
||||
|
||||
cmp = tsCompareString(
|
||||
VARDATA(a), VARSIZE(a) - VARHDRSZ,
|
||||
VARDATA(b), VARSIZE(b) - VARHDRSZ,
|
||||
false );
|
||||
|
||||
PG_FREE_IF_COPY(a,0);
|
||||
PG_FREE_IF_COPY(b,1);
|
||||
PG_RETURN_INT32( cmp );
|
||||
}
|
||||
|
||||
Datum
|
||||
gin_cmp_prefix(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *a = PG_GETARG_TEXT_P(0);
|
||||
text *b = PG_GETARG_TEXT_P(1);
|
||||
#ifdef NOT_USED
|
||||
StrategyNumber strategy = PG_GETARG_UINT16(2);
|
||||
#endif
|
||||
int cmp;
|
||||
|
||||
cmp = tsCompareString(
|
||||
VARDATA(a), VARSIZE(a) - VARHDRSZ,
|
||||
VARDATA(b), VARSIZE(b) - VARHDRSZ,
|
||||
true );
|
||||
|
||||
if ( cmp < 0 )
|
||||
cmp = 1; /* prevent continue scan */
|
||||
|
||||
PG_FREE_IF_COPY(a,0);
|
||||
PG_FREE_IF_COPY(b,1);
|
||||
PG_RETURN_INT32( cmp );
|
||||
}
|
||||
|
||||
Datum
|
||||
gin_extract_tsvector(PG_FUNCTION_ARGS)
|
||||
{
|
||||
@@ -55,7 +95,9 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
|
||||
TSQuery query = PG_GETARG_TSQUERY(0);
|
||||
int32 *nentries = (int32 *) PG_GETARG_POINTER(1);
|
||||
/* StrategyNumber strategy = PG_GETARG_UINT16(2); */
|
||||
bool **ptr_partialmatch = (bool**) PG_GETARG_POINTER(3);
|
||||
Datum *entries = NULL;
|
||||
bool *partialmatch;
|
||||
|
||||
*nentries = 0;
|
||||
|
||||
@@ -65,12 +107,14 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
|
||||
j = 0,
|
||||
len;
|
||||
QueryItem *item;
|
||||
bool use_fullscan=false;
|
||||
|
||||
item = clean_NOT(GETQUERY(query), &len);
|
||||
if (!item)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||
errmsg("query requires full scan, which is not supported by GIN indexes")));
|
||||
{
|
||||
use_fullscan = true;
|
||||
*nentries = 1;
|
||||
}
|
||||
|
||||
item = GETQUERY(query);
|
||||
|
||||
@@ -79,6 +123,7 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
|
||||
(*nentries)++;
|
||||
|
||||
entries = (Datum *) palloc(sizeof(Datum) * (*nentries));
|
||||
partialmatch = *ptr_partialmatch = (bool*) palloc(sizeof(bool) * (*nentries));
|
||||
|
||||
for (i = 0; i < query->size; i++)
|
||||
if (item[i].type == QI_VAL)
|
||||
@@ -88,8 +133,12 @@ gin_extract_tsquery(PG_FUNCTION_ARGS)
|
||||
|
||||
txt = cstring_to_text_with_len(GETOPERAND(query) + val->distance,
|
||||
val->length);
|
||||
partialmatch[j] = val->prefix;
|
||||
entries[j++] = PointerGetDatum(txt);
|
||||
}
|
||||
|
||||
if ( use_fullscan )
|
||||
entries[j++] = PointerGetDatum(cstring_to_text_with_len("", 0));
|
||||
}
|
||||
else
|
||||
*nentries = -1; /* nothing can be found */
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.8 2008/04/14 17:05:33 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsgistidx.c,v 1.9 2008/05/16 16:31:01 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -307,6 +307,12 @@ checkcondition_arr(void *checkval, QueryOperand *val)
|
||||
|
||||
/* Loop invariant: StopLow <= val < StopHigh */
|
||||
|
||||
/*
|
||||
* we are not able to find a a prefix by hash value
|
||||
*/
|
||||
if ( val->prefix )
|
||||
return true;
|
||||
|
||||
while (StopLow < StopHigh)
|
||||
{
|
||||
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
||||
@@ -324,6 +330,11 @@ checkcondition_arr(void *checkval, QueryOperand *val)
|
||||
static bool
|
||||
checkcondition_bit(void *checkval, QueryOperand *val)
|
||||
{
|
||||
/*
|
||||
* we are not able to find a a prefix in signature tree
|
||||
*/
|
||||
if ( val->prefix )
|
||||
return true;
|
||||
return GETBIT(checkval, HASHVAL(val->valcrc));
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.17 2008/04/11 22:52:05 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.18 2008/05/16 16:31:01 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -56,12 +56,14 @@ struct TSQueryParserStateData
|
||||
#define WAITSINGLEOPERAND 4
|
||||
|
||||
/*
|
||||
* subroutine to parse the weight part, like ':1AB' of a query.
|
||||
* subroutine to parse the modifiers (weight and prefix flag currently)
|
||||
* part, like ':1AB' of a query.
|
||||
*/
|
||||
static char *
|
||||
get_weight(char *buf, int16 *weight)
|
||||
get_modifiers(char *buf, int16 *weight, bool *prefix)
|
||||
{
|
||||
*weight = 0;
|
||||
*prefix = false;
|
||||
|
||||
if (!t_iseq(buf, ':'))
|
||||
return buf;
|
||||
@@ -87,6 +89,9 @@ get_weight(char *buf, int16 *weight)
|
||||
case 'D':
|
||||
*weight |= 1;
|
||||
break;
|
||||
case '*':
|
||||
*prefix = true;
|
||||
break;
|
||||
default:
|
||||
return buf;
|
||||
}
|
||||
@@ -118,8 +123,11 @@ typedef enum
|
||||
static ts_tokentype
|
||||
gettoken_query(TSQueryParserState state,
|
||||
int8 *operator,
|
||||
int *lenval, char **strval, int16 *weight)
|
||||
int *lenval, char **strval, int16 *weight, bool *prefix)
|
||||
{
|
||||
*weight = 0;
|
||||
*prefix = false;
|
||||
|
||||
while (1)
|
||||
{
|
||||
switch (state->state)
|
||||
@@ -157,7 +165,7 @@ gettoken_query(TSQueryParserState state,
|
||||
reset_tsvector_parser(state->valstate, state->buf);
|
||||
if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
|
||||
{
|
||||
state->buf = get_weight(state->buf, weight);
|
||||
state->buf = get_modifiers(state->buf, weight, prefix);
|
||||
state->state = WAITOPERATOR;
|
||||
return PT_VAL;
|
||||
}
|
||||
@@ -232,7 +240,7 @@ pushOperator(TSQueryParserState state, int8 oper)
|
||||
}
|
||||
|
||||
static void
|
||||
pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight)
|
||||
pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
|
||||
{
|
||||
QueryOperand *tmp;
|
||||
|
||||
@@ -250,6 +258,7 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
|
||||
tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
|
||||
tmp->type = QI_VAL;
|
||||
tmp->weight = weight;
|
||||
tmp->prefix = prefix;
|
||||
tmp->valcrc = (int32) valcrc;
|
||||
tmp->length = lenval;
|
||||
tmp->distance = distance;
|
||||
@@ -264,7 +273,7 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
|
||||
* of the string.
|
||||
*/
|
||||
void
|
||||
pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
|
||||
pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight, bool prefix)
|
||||
{
|
||||
pg_crc32 valcrc;
|
||||
|
||||
@@ -277,7 +286,7 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
|
||||
INIT_CRC32(valcrc);
|
||||
COMP_CRC32(valcrc, strval, lenval);
|
||||
FIN_CRC32(valcrc);
|
||||
pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight);
|
||||
pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
|
||||
|
||||
/* append the value string to state.op, enlarging buffer if needed first */
|
||||
while (state->curop - state->op + lenval + 1 >= state->lenop)
|
||||
@@ -330,16 +339,17 @@ makepol(TSQueryParserState state,
|
||||
int8 opstack[STACKDEPTH];
|
||||
int lenstack = 0;
|
||||
int16 weight = 0;
|
||||
bool prefix;
|
||||
|
||||
/* since this function recurses, it could be driven to stack overflow */
|
||||
check_stack_depth();
|
||||
|
||||
while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight)) != PT_END)
|
||||
while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case PT_VAL:
|
||||
pushval(opaque, state, strval, lenval, weight);
|
||||
pushval(opaque, state, strval, lenval, weight, prefix);
|
||||
while (lenstack && (opstack[lenstack - 1] == OP_AND ||
|
||||
opstack[lenstack - 1] == OP_NOT))
|
||||
{
|
||||
@@ -549,9 +559,9 @@ parse_tsquery(char *buf,
|
||||
|
||||
static void
|
||||
pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
|
||||
int16 weight)
|
||||
int16 weight, bool prefix)
|
||||
{
|
||||
pushValue(state, strval, lenval, weight);
|
||||
pushValue(state, strval, lenval, weight, prefix);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -605,7 +615,7 @@ infix(INFIX *in, bool first)
|
||||
char *op = in->op + curpol->distance;
|
||||
int clen;
|
||||
|
||||
RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
|
||||
RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
|
||||
*(in->cur) = '\'';
|
||||
in->cur++;
|
||||
while (*op)
|
||||
@@ -628,10 +638,15 @@ infix(INFIX *in, bool first)
|
||||
}
|
||||
*(in->cur) = '\'';
|
||||
in->cur++;
|
||||
if (curpol->weight)
|
||||
if (curpol->weight || curpol->prefix)
|
||||
{
|
||||
*(in->cur) = ':';
|
||||
in->cur++;
|
||||
if ( curpol->prefix )
|
||||
{
|
||||
*(in->cur) = '*';
|
||||
in->cur++;
|
||||
}
|
||||
if (curpol->weight & (1 << 3))
|
||||
{
|
||||
*(in->cur) = 'A';
|
||||
@@ -769,6 +784,7 @@ tsqueryout(PG_FUNCTION_ARGS)
|
||||
* uint8 type, QI_VAL
|
||||
* uint8 weight
|
||||
* operand text in client encoding, null-terminated
|
||||
* uint8 prefix
|
||||
*
|
||||
* For each operator:
|
||||
* uint8 type, QI_OPR
|
||||
@@ -793,6 +809,7 @@ tsquerysend(PG_FUNCTION_ARGS)
|
||||
{
|
||||
case QI_VAL:
|
||||
pq_sendint(&buf, item->operand.weight, sizeof(uint8));
|
||||
pq_sendint(&buf, item->operand.prefix, sizeof(uint8));
|
||||
pq_sendstring(&buf, GETOPERAND(query) + item->operand.distance);
|
||||
break;
|
||||
case QI_OPR:
|
||||
@@ -844,10 +861,12 @@ tsqueryrecv(PG_FUNCTION_ARGS)
|
||||
{
|
||||
size_t val_len; /* length after recoding to server encoding */
|
||||
uint8 weight;
|
||||
uint8 prefix;
|
||||
const char *val;
|
||||
pg_crc32 valcrc;
|
||||
|
||||
weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
|
||||
prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
|
||||
val = pq_getmsgstring(buf);
|
||||
val_len = strlen(val);
|
||||
|
||||
@@ -869,6 +888,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
|
||||
FIN_CRC32(valcrc);
|
||||
|
||||
item->operand.weight = weight;
|
||||
item->operand.prefix = (prefix) ? true : false;
|
||||
item->operand.valcrc = (int32) valcrc;
|
||||
item->operand.length = val_len;
|
||||
item->operand.distance = datalen;
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.8 2008/01/01 19:45:53 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery_util.c,v 1.9 2008/05/16 16:31:01 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -125,10 +125,7 @@ QTNodeCompare(QTNode *an, QTNode *bn)
|
||||
return (ao->valcrc > bo->valcrc) ? -1 : 1;
|
||||
}
|
||||
|
||||
if (ao->length == bo->length)
|
||||
return strncmp(an->word, bn->word, ao->length);
|
||||
else
|
||||
return (ao->length > bo->length) ? -1 : 1;
|
||||
return tsCompareString( an->word, ao->length, bn->word, bo->length, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.12 2008/01/01 19:45:53 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsrank.c,v 1.13 2008/05/16 16:31:01 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -71,45 +71,60 @@ cnt_length(TSVector t)
|
||||
return len;
|
||||
}
|
||||
|
||||
static int
|
||||
WordECompareQueryItem(char *eval, char *qval, WordEntry *ptr, QueryOperand *item)
|
||||
{
|
||||
if (ptr->len == item->length)
|
||||
return strncmp(
|
||||
eval + ptr->pos,
|
||||
qval + item->distance,
|
||||
item->length);
|
||||
|
||||
return (ptr->len > item->length) ? 1 : -1;
|
||||
}
|
||||
#define WordECompareQueryItem(e,q,p,i,m) \
|
||||
tsCompareString((q) + (i)->distance, (i)->length, \
|
||||
(e) + (p)->pos, (p)->len, (m))
|
||||
|
||||
|
||||
/*
|
||||
* Returns a pointer to a WordEntry corresponding 'item' from tsvector 't'. 'q'
|
||||
* is the TSQuery containing 'item'. Returns NULL if not found.
|
||||
* Returns a pointer to a WordEntry's array corresponding to 'item' from
|
||||
* tsvector 't'. 'q' is the TSQuery containing 'item'.
|
||||
* Returns NULL if not found.
|
||||
*/
|
||||
static WordEntry *
|
||||
find_wordentry(TSVector t, TSQuery q, QueryOperand *item)
|
||||
find_wordentry(TSVector t, TSQuery q, QueryOperand *item, int32 *nitem)
|
||||
{
|
||||
WordEntry *StopLow = ARRPTR(t);
|
||||
WordEntry *StopHigh = (WordEntry *) STRPTR(t);
|
||||
WordEntry *StopMiddle;
|
||||
WordEntry *StopMiddle = StopHigh;
|
||||
int difference;
|
||||
|
||||
/* Loop invariant: StopLow <= item < StopHigh */
|
||||
*nitem=0;
|
||||
|
||||
/* Loop invariant: StopLow <= item < StopHigh */
|
||||
while (StopLow < StopHigh)
|
||||
{
|
||||
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
||||
difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item);
|
||||
difference = WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, false);
|
||||
if (difference == 0)
|
||||
return StopMiddle;
|
||||
else if (difference < 0)
|
||||
{
|
||||
StopHigh = StopMiddle;
|
||||
*nitem=1;
|
||||
break;
|
||||
}
|
||||
else if (difference > 0)
|
||||
StopLow = StopMiddle + 1;
|
||||
else
|
||||
StopHigh = StopMiddle;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
if ( item->prefix == true )
|
||||
{
|
||||
if ( StopLow >= StopHigh )
|
||||
StopMiddle = StopHigh;
|
||||
|
||||
*nitem=0;
|
||||
|
||||
while( StopMiddle < (WordEntry *) STRPTR(t) &&
|
||||
WordECompareQueryItem(STRPTR(t), GETOPERAND(q), StopMiddle, item, true) == 0 )
|
||||
{
|
||||
(*nitem)++;
|
||||
StopMiddle++;
|
||||
}
|
||||
}
|
||||
|
||||
return ( *nitem > 0 ) ? StopHigh : NULL;
|
||||
}
|
||||
|
||||
|
||||
@@ -123,12 +138,9 @@ compareQueryOperand(const void *a, const void *b, void *arg)
|
||||
QueryOperand *qa = (*(QueryOperand **) a);
|
||||
QueryOperand *qb = (*(QueryOperand **) b);
|
||||
|
||||
if (qa->length == qb->length)
|
||||
return strncmp(operand + qa->distance,
|
||||
operand + qb->distance,
|
||||
qb->length);
|
||||
|
||||
return (qa->length > qb->length) ? 1 : -1;
|
||||
return tsCompareString(operand + qa->distance, qa->length,
|
||||
operand + qb->distance, qb->length,
|
||||
false);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -198,12 +210,14 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
|
||||
k,
|
||||
l,
|
||||
p;
|
||||
WordEntry *entry;
|
||||
WordEntry *entry,
|
||||
*firstentry;
|
||||
WordEntryPos *post,
|
||||
*ct;
|
||||
int4 dimt,
|
||||
lenct,
|
||||
dist;
|
||||
dist,
|
||||
nitem;
|
||||
float res = -1.0;
|
||||
QueryOperand **item;
|
||||
int size = q->size;
|
||||
@@ -219,40 +233,44 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
|
||||
|
||||
for (i = 0; i < size; i++)
|
||||
{
|
||||
entry = find_wordentry(t, q, item[i]);
|
||||
firstentry = entry = find_wordentry(t, q, item[i], &nitem);
|
||||
if (!entry)
|
||||
continue;
|
||||
|
||||
if (entry->haspos)
|
||||
pos[i] = _POSVECPTR(t, entry);
|
||||
else
|
||||
pos[i] = &POSNULL;
|
||||
|
||||
|
||||
dimt = pos[i]->npos;
|
||||
post = pos[i]->pos;
|
||||
for (k = 0; k < i; k++)
|
||||
while( entry - firstentry < nitem )
|
||||
{
|
||||
if (!pos[k])
|
||||
continue;
|
||||
lenct = pos[k]->npos;
|
||||
ct = pos[k]->pos;
|
||||
for (l = 0; l < dimt; l++)
|
||||
{
|
||||
for (p = 0; p < lenct; p++)
|
||||
{
|
||||
dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
|
||||
if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL)))
|
||||
{
|
||||
float curw;
|
||||
if (entry->haspos)
|
||||
pos[i] = _POSVECPTR(t, entry);
|
||||
else
|
||||
pos[i] = &POSNULL;
|
||||
|
||||
if (!dist)
|
||||
dist = MAXENTRYPOS;
|
||||
curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
|
||||
res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
|
||||
dimt = pos[i]->npos;
|
||||
post = pos[i]->pos;
|
||||
for (k = 0; k < i; k++)
|
||||
{
|
||||
if (!pos[k])
|
||||
continue;
|
||||
lenct = pos[k]->npos;
|
||||
ct = pos[k]->pos;
|
||||
for (l = 0; l < dimt; l++)
|
||||
{
|
||||
for (p = 0; p < lenct; p++)
|
||||
{
|
||||
dist = Abs((int) WEP_GETPOS(post[l]) - (int) WEP_GETPOS(ct[p]));
|
||||
if (dist || (dist == 0 && (pos[i] == &POSNULL || pos[k] == &POSNULL)))
|
||||
{
|
||||
float curw;
|
||||
|
||||
if (!dist)
|
||||
dist = MAXENTRYPOS;
|
||||
curw = sqrt(wpos(post[l]) * wpos(ct[p]) * word_distance(dist));
|
||||
res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
entry++;
|
||||
}
|
||||
}
|
||||
pfree(pos);
|
||||
@@ -263,11 +281,13 @@ calc_rank_and(float *w, TSVector t, TSQuery q)
|
||||
static float
|
||||
calc_rank_or(float *w, TSVector t, TSQuery q)
|
||||
{
|
||||
WordEntry *entry;
|
||||
WordEntry *entry,
|
||||
*firstentry;
|
||||
WordEntryPos *post;
|
||||
int4 dimt,
|
||||
j,
|
||||
i;
|
||||
i,
|
||||
nitem;
|
||||
float res = 0.0;
|
||||
QueryOperand **item;
|
||||
int size = q->size;
|
||||
@@ -280,41 +300,46 @@ calc_rank_or(float *w, TSVector t, TSQuery q)
|
||||
wjm;
|
||||
int4 jm;
|
||||
|
||||
entry = find_wordentry(t, q, item[i]);
|
||||
firstentry = entry = find_wordentry(t, q, item[i], &nitem);
|
||||
if (!entry)
|
||||
continue;
|
||||
|
||||
if (entry->haspos)
|
||||
while( entry - firstentry < nitem )
|
||||
{
|
||||
dimt = POSDATALEN(t, entry);
|
||||
post = POSDATAPTR(t, entry);
|
||||
}
|
||||
else
|
||||
{
|
||||
dimt = POSNULL.npos;
|
||||
post = POSNULL.pos;
|
||||
}
|
||||
|
||||
resj = 0.0;
|
||||
wjm = -1.0;
|
||||
jm = 0;
|
||||
for (j = 0; j < dimt; j++)
|
||||
{
|
||||
resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
|
||||
if (wpos(post[j]) > wjm)
|
||||
if (entry->haspos)
|
||||
{
|
||||
wjm = wpos(post[j]);
|
||||
jm = j;
|
||||
dimt = POSDATALEN(t, entry);
|
||||
post = POSDATAPTR(t, entry);
|
||||
}
|
||||
else
|
||||
{
|
||||
dimt = POSNULL.npos;
|
||||
post = POSNULL.pos;
|
||||
}
|
||||
|
||||
resj = 0.0;
|
||||
wjm = -1.0;
|
||||
jm = 0;
|
||||
for (j = 0; j < dimt; j++)
|
||||
{
|
||||
resj = resj + wpos(post[j]) / ((j + 1) * (j + 1));
|
||||
if (wpos(post[j]) > wjm)
|
||||
{
|
||||
wjm = wpos(post[j]);
|
||||
jm = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
limit (sum(i/i^2),i->inf) = pi^2/6
|
||||
resj = sum(wi/i^2),i=1,noccurence,
|
||||
wi - should be sorted desc,
|
||||
don't sort for now, just choose maximum weight. This should be corrected
|
||||
Oleg Bartunov
|
||||
limit (sum(i/i^2),i->inf) = pi^2/6
|
||||
resj = sum(wi/i^2),i=1,noccurence,
|
||||
wi - should be sorted desc,
|
||||
don't sort for now, just choose maximum weight. This should be corrected
|
||||
Oleg Bartunov
|
||||
*/
|
||||
res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
|
||||
res = res + (wjm + resj - wjm / ((jm + 1) * (jm + 1))) / 1.64493406685;
|
||||
|
||||
entry++;
|
||||
}
|
||||
}
|
||||
if (size > 0)
|
||||
res = res / size;
|
||||
@@ -594,11 +619,13 @@ static DocRepresentation *
|
||||
get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
|
||||
{
|
||||
QueryItem *item = GETQUERY(qr->query);
|
||||
WordEntry *entry;
|
||||
WordEntry *entry,
|
||||
*firstentry;
|
||||
WordEntryPos *post;
|
||||
int4 dimt,
|
||||
j,
|
||||
i;
|
||||
i,
|
||||
nitem;
|
||||
int len = qr->query->size * 4,
|
||||
cur = 0;
|
||||
DocRepresentation *doc;
|
||||
@@ -619,63 +646,68 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
|
||||
if (QR_GET_OPERAND_EXISTS(qr, &item[i]))
|
||||
continue;
|
||||
|
||||
entry = find_wordentry(txt, qr->query, curoperand);
|
||||
firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
|
||||
if (!entry)
|
||||
continue;
|
||||
|
||||
if (entry->haspos)
|
||||
while( entry - firstentry < nitem )
|
||||
{
|
||||
dimt = POSDATALEN(txt, entry);
|
||||
post = POSDATAPTR(txt, entry);
|
||||
}
|
||||
else
|
||||
{
|
||||
dimt = POSNULL.npos;
|
||||
post = POSNULL.pos;
|
||||
}
|
||||
|
||||
while (cur + dimt >= len)
|
||||
{
|
||||
len *= 2;
|
||||
doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
|
||||
}
|
||||
|
||||
for (j = 0; j < dimt; j++)
|
||||
{
|
||||
if (j == 0)
|
||||
if (entry->haspos)
|
||||
{
|
||||
int k;
|
||||
|
||||
doc[cur].nitem = 0;
|
||||
doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size);
|
||||
|
||||
for (k = 0; k < qr->query->size; k++)
|
||||
{
|
||||
QueryOperand *kptr = &item[k].operand;
|
||||
QueryOperand *iptr = &item[i].operand;
|
||||
|
||||
if (k == i ||
|
||||
(item[k].type == QI_VAL &&
|
||||
compareQueryOperand(&kptr, &iptr, operand) == 0))
|
||||
{
|
||||
/*
|
||||
* if k == i, we've already checked above that it's
|
||||
* type == Q_VAL
|
||||
*/
|
||||
doc[cur].item[doc[cur].nitem] = item + k;
|
||||
doc[cur].nitem++;
|
||||
QR_SET_OPERAND_EXISTS(qr, item + k);
|
||||
}
|
||||
}
|
||||
dimt = POSDATALEN(txt, entry);
|
||||
post = POSDATAPTR(txt, entry);
|
||||
}
|
||||
else
|
||||
{
|
||||
doc[cur].nitem = doc[cur - 1].nitem;
|
||||
doc[cur].item = doc[cur - 1].item;
|
||||
dimt = POSNULL.npos;
|
||||
post = POSNULL.pos;
|
||||
}
|
||||
doc[cur].pos = WEP_GETPOS(post[j]);
|
||||
doc[cur].wclass = WEP_GETWEIGHT(post[j]);
|
||||
cur++;
|
||||
|
||||
while (cur + dimt >= len)
|
||||
{
|
||||
len *= 2;
|
||||
doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
|
||||
}
|
||||
|
||||
for (j = 0; j < dimt; j++)
|
||||
{
|
||||
if (j == 0)
|
||||
{
|
||||
int k;
|
||||
|
||||
doc[cur].nitem = 0;
|
||||
doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size);
|
||||
|
||||
for (k = 0; k < qr->query->size; k++)
|
||||
{
|
||||
QueryOperand *kptr = &item[k].operand;
|
||||
QueryOperand *iptr = &item[i].operand;
|
||||
|
||||
if (k == i ||
|
||||
(item[k].type == QI_VAL &&
|
||||
compareQueryOperand(&kptr, &iptr, operand) == 0))
|
||||
{
|
||||
/*
|
||||
* if k == i, we've already checked above that it's
|
||||
* type == Q_VAL
|
||||
*/
|
||||
doc[cur].item[doc[cur].nitem] = item + k;
|
||||
doc[cur].nitem++;
|
||||
QR_SET_OPERAND_EXISTS(qr, item + k);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
doc[cur].nitem = doc[cur - 1].nitem;
|
||||
doc[cur].item = doc[cur - 1].item;
|
||||
}
|
||||
doc[cur].pos = WEP_GETPOS(post[j]);
|
||||
doc[cur].wclass = WEP_GETWEIGHT(post[j]);
|
||||
cur++;
|
||||
}
|
||||
|
||||
entry++;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.13 2008/03/10 12:57:05 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector.c,v 1.14 2008/05/16 16:31:01 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -85,14 +85,9 @@ compareentry(const void *va, const void *vb, void *arg)
|
||||
const WordEntryIN *b = (const WordEntryIN *) vb;
|
||||
char *BufferStr = (char *) arg;
|
||||
|
||||
if (a->entry.len == b->entry.len)
|
||||
{
|
||||
return strncmp(&BufferStr[a->entry.pos],
|
||||
&BufferStr[b->entry.pos],
|
||||
a->entry.len);
|
||||
}
|
||||
|
||||
return (a->entry.len > b->entry.len) ? 1 : -1;
|
||||
return tsCompareString( &BufferStr[a->entry.pos], a->entry.len,
|
||||
&BufferStr[b->entry.pos], b->entry.len,
|
||||
false );
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.15 2008/04/08 18:20:29 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsvector_op.c,v 1.16 2008/05/16 16:31:01 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -127,11 +127,7 @@ silly_cmp_tsvector(const TSVector a, const TSVector b)
|
||||
{
|
||||
return (aptr->haspos > bptr->haspos) ? -1 : 1;
|
||||
}
|
||||
else if (aptr->len != bptr->len)
|
||||
{
|
||||
return (aptr->len > bptr->len) ? -1 : 1;
|
||||
}
|
||||
else if ((res = strncmp(STRPTR(a) + aptr->pos, STRPTR(b) + bptr->pos, bptr->len)) != 0)
|
||||
else if ( (res=tsCompareString( STRPTR(a) + aptr->pos, aptr->len, STRPTR(b) + bptr->pos, bptr->len, false)) !=0 )
|
||||
{
|
||||
return res;
|
||||
}
|
||||
@@ -286,18 +282,10 @@ tsvector_setweight(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_POINTER(out);
|
||||
}
|
||||
|
||||
static int
|
||||
compareEntry(char *ptra, WordEntry *a, char *ptrb, WordEntry *b)
|
||||
{
|
||||
if (a->len == b->len)
|
||||
{
|
||||
return strncmp(
|
||||
ptra + a->pos,
|
||||
ptrb + b->pos,
|
||||
a->len);
|
||||
}
|
||||
return (a->len > b->len) ? 1 : -1;
|
||||
}
|
||||
#define compareEntry(pa, a, pb, b) \
|
||||
tsCompareString((pa) + (a)->pos, (a)->len, \
|
||||
(pb) + (b)->pos, (b)->len, \
|
||||
false)
|
||||
|
||||
/*
|
||||
* Add positions from src to dest after offsetting them by maxpos.
|
||||
@@ -534,18 +522,46 @@ tsvector_concat(PG_FUNCTION_ARGS)
|
||||
}
|
||||
|
||||
/*
|
||||
* compare 2 string values
|
||||
* Compare two strings by tsvector rules.
|
||||
* if isPrefix = true then it returns not-zero value if b has prefix a
|
||||
*/
|
||||
static int4
|
||||
ValCompare(CHKVAL *chkval, WordEntry *ptr, QueryOperand *item)
|
||||
int4
|
||||
tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
|
||||
{
|
||||
if (ptr->len == item->length)
|
||||
return strncmp(
|
||||
&(chkval->values[ptr->pos]),
|
||||
&(chkval->operand[item->distance]),
|
||||
item->length);
|
||||
int cmp;
|
||||
|
||||
return (ptr->len > item->length) ? 1 : -1;
|
||||
if ( lena == 0 )
|
||||
{
|
||||
if ( prefix )
|
||||
cmp = 0; /* emtry string is equal to any if a prefix match */
|
||||
else
|
||||
cmp = (lenb>0) ? -1 : 0;
|
||||
}
|
||||
else if ( lenb == 0 )
|
||||
{
|
||||
cmp = (lena>0) ? 1 : 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
cmp = memcmp(a, b, Min(lena, lenb));
|
||||
|
||||
if ( prefix )
|
||||
{
|
||||
if ( cmp == 0 && lena > lenb )
|
||||
{
|
||||
/*
|
||||
* b argument is not beginning with argument a
|
||||
*/
|
||||
cmp=1;
|
||||
}
|
||||
}
|
||||
else if ( (cmp == 0) && (lena != lenb) )
|
||||
{
|
||||
cmp = (lena < lenb) ? -1 : 1;
|
||||
}
|
||||
}
|
||||
|
||||
return cmp;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -582,25 +598,52 @@ checkcondition_str(void *checkval, QueryOperand *val)
|
||||
CHKVAL *chkval = (CHKVAL *) checkval;
|
||||
WordEntry *StopLow = chkval->arrb;
|
||||
WordEntry *StopHigh = chkval->arre;
|
||||
WordEntry *StopMiddle;
|
||||
int difference;
|
||||
WordEntry *StopMiddle = StopHigh;
|
||||
int difference = -1;
|
||||
bool res=false;
|
||||
|
||||
/* Loop invariant: StopLow <= val < StopHigh */
|
||||
|
||||
while (StopLow < StopHigh)
|
||||
{
|
||||
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
||||
difference = ValCompare(chkval, StopMiddle, val);
|
||||
difference = tsCompareString( chkval->operand + val->distance, val->length,
|
||||
chkval->values + StopMiddle->pos, StopMiddle->len,
|
||||
false);
|
||||
|
||||
if (difference == 0)
|
||||
return (val->weight && StopMiddle->haspos) ?
|
||||
{
|
||||
res = (val->weight && StopMiddle->haspos) ?
|
||||
checkclass_str(chkval, StopMiddle, val) : true;
|
||||
else if (difference < 0)
|
||||
break;
|
||||
}
|
||||
else if (difference > 0)
|
||||
StopLow = StopMiddle + 1;
|
||||
else
|
||||
StopHigh = StopMiddle;
|
||||
}
|
||||
|
||||
return (false);
|
||||
if ( res == false && val->prefix == true )
|
||||
{
|
||||
/*
|
||||
* there was a failed exact search, so we should scan further to find
|
||||
* a prefix match.
|
||||
*/
|
||||
if ( StopLow >= StopHigh )
|
||||
StopMiddle = StopHigh;
|
||||
|
||||
while( res == false && StopMiddle < chkval->arre &&
|
||||
tsCompareString( chkval->operand + val->distance, val->length,
|
||||
chkval->values + StopMiddle->pos, StopMiddle->len,
|
||||
true) == 0 )
|
||||
{
|
||||
res = (val->weight && StopMiddle->haspos) ?
|
||||
checkclass_str(chkval, StopMiddle, val) : true;
|
||||
|
||||
StopMiddle++;
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -758,50 +801,38 @@ check_weight(TSVector txt, WordEntry *wptr, int8 weight)
|
||||
return num;
|
||||
}
|
||||
|
||||
static WordEntry **
|
||||
SEI_realloc(WordEntry **in, uint32 *len)
|
||||
{
|
||||
if (*len == 0 || in == NULL)
|
||||
{
|
||||
*len = 8;
|
||||
in = palloc(sizeof(WordEntry *) * (*len));
|
||||
}
|
||||
else
|
||||
{
|
||||
*len *= 2;
|
||||
in = repalloc(in, sizeof(WordEntry *) * (*len));
|
||||
}
|
||||
return in;
|
||||
}
|
||||
#define compareStatWord(a,e,s,t) \
|
||||
tsCompareString(STATSTRPTR(s) + (a)->pos, (a)->len, \
|
||||
STRPTR(t) + (e)->pos, (e)->len, \
|
||||
false)
|
||||
|
||||
static int
|
||||
compareStatWord(StatEntry *a, WordEntry *b, tsstat *stat, TSVector txt)
|
||||
typedef struct WordEntryMark
|
||||
{
|
||||
if (a->len == b->len)
|
||||
return strncmp(
|
||||
STATSTRPTR(stat) + a->pos,
|
||||
STRPTR(txt) + b->pos,
|
||||
a->len
|
||||
);
|
||||
return (a->len > b->len) ? 1 : -1;
|
||||
}
|
||||
WordEntry *newentry;
|
||||
StatEntry *pos;
|
||||
} WordEntryMark;
|
||||
|
||||
static tsstat *
|
||||
formstat(tsstat *stat, TSVector txt, WordEntry **entry, uint32 len)
|
||||
formstat(tsstat *stat, TSVector txt, List *entries)
|
||||
{
|
||||
tsstat *newstat;
|
||||
uint32 totallen,
|
||||
nentry;
|
||||
uint32 slen = 0;
|
||||
WordEntry **ptr = entry;
|
||||
char *curptr;
|
||||
StatEntry *sptr,
|
||||
*nptr;
|
||||
tsstat *newstat;
|
||||
uint32 totallen,
|
||||
nentry,
|
||||
len = list_length(entries);
|
||||
uint32 slen = 0;
|
||||
WordEntry *ptr;
|
||||
char *curptr;
|
||||
StatEntry *sptr,
|
||||
*nptr;
|
||||
ListCell *entry;
|
||||
StatEntry *PosSE = STATPTR(stat),
|
||||
*prevPosSE;
|
||||
WordEntryMark *mark;
|
||||
|
||||
while (ptr - entry < len)
|
||||
foreach( entry, entries )
|
||||
{
|
||||
slen += (*ptr)->len;
|
||||
ptr++;
|
||||
mark = (WordEntryMark*)lfirst(entry);
|
||||
slen += mark->newentry->len;
|
||||
}
|
||||
|
||||
nentry = stat->size + len;
|
||||
@@ -815,78 +846,46 @@ formstat(tsstat *stat, TSVector txt, WordEntry **entry, uint32 len)
|
||||
memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
|
||||
curptr = STATSTRPTR(newstat) + STATSTRSIZE(stat);
|
||||
|
||||
ptr = entry;
|
||||
sptr = STATPTR(stat);
|
||||
nptr = STATPTR(newstat);
|
||||
|
||||
if (len == 1)
|
||||
foreach(entry, entries)
|
||||
{
|
||||
StatEntry *StopLow = STATPTR(stat);
|
||||
StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat);
|
||||
prevPosSE = PosSE;
|
||||
|
||||
while (StopLow < StopHigh)
|
||||
mark = (WordEntryMark*)lfirst(entry);
|
||||
ptr = mark->newentry;
|
||||
PosSE = mark->pos;
|
||||
|
||||
/*
|
||||
* Copy missed entries
|
||||
*/
|
||||
if ( PosSE > prevPosSE )
|
||||
{
|
||||
sptr = StopLow + (StopHigh - StopLow) / 2;
|
||||
if (compareStatWord(sptr, *ptr, stat, txt) < 0)
|
||||
StopLow = sptr + 1;
|
||||
else
|
||||
StopHigh = sptr;
|
||||
memcpy( nptr, prevPosSE, sizeof(StatEntry) * (PosSE-prevPosSE) );
|
||||
nptr += PosSE-prevPosSE;
|
||||
}
|
||||
nptr = STATPTR(newstat) + (StopLow - STATPTR(stat));
|
||||
memcpy(STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow - STATPTR(stat)));
|
||||
if ((*ptr)->haspos)
|
||||
nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
|
||||
|
||||
/*
|
||||
* Copy new entry
|
||||
*/
|
||||
if (ptr->haspos)
|
||||
nptr->nentry = (stat->weight) ? check_weight(txt, ptr, stat->weight) : POSDATALEN(txt, ptr);
|
||||
else
|
||||
nptr->nentry = 1;
|
||||
nptr->ndoc = 1;
|
||||
nptr->len = (*ptr)->len;
|
||||
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
|
||||
nptr->len = ptr->len;
|
||||
memcpy(curptr, STRPTR(txt) + ptr->pos, nptr->len);
|
||||
nptr->pos = curptr - STATSTRPTR(newstat);
|
||||
memcpy(nptr + 1, StopLow, sizeof(StatEntry) * (((StatEntry *) STATSTRPTR(stat)) - StopLow));
|
||||
}
|
||||
else
|
||||
{
|
||||
while (sptr - STATPTR(stat) < stat->size && ptr - entry < len)
|
||||
{
|
||||
if (compareStatWord(sptr, *ptr, stat, txt) < 0)
|
||||
{
|
||||
memcpy(nptr, sptr, sizeof(StatEntry));
|
||||
sptr++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((*ptr)->haspos)
|
||||
nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
|
||||
else
|
||||
nptr->nentry = 1;
|
||||
nptr->ndoc = 1;
|
||||
nptr->len = (*ptr)->len;
|
||||
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
|
||||
nptr->pos = curptr - STATSTRPTR(newstat);
|
||||
curptr += nptr->len;
|
||||
ptr++;
|
||||
}
|
||||
nptr++;
|
||||
}
|
||||
curptr += nptr->len;
|
||||
nptr++;
|
||||
|
||||
memcpy(nptr, sptr, sizeof(StatEntry) * (stat->size - (sptr - STATPTR(stat))));
|
||||
|
||||
while (ptr - entry < len)
|
||||
{
|
||||
if ((*ptr)->haspos)
|
||||
nptr->nentry = (stat->weight) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
|
||||
else
|
||||
nptr->nentry = 1;
|
||||
nptr->ndoc = 1;
|
||||
nptr->len = (*ptr)->len;
|
||||
memcpy(curptr, STRPTR(txt) + (*ptr)->pos, nptr->len);
|
||||
nptr->pos = curptr - STATSTRPTR(newstat);
|
||||
curptr += nptr->len;
|
||||
ptr++;
|
||||
nptr++;
|
||||
}
|
||||
pfree(mark);
|
||||
}
|
||||
|
||||
if ( PosSE < (StatEntry *) STATSTRPTR(stat) )
|
||||
memcpy(nptr, PosSE, sizeof(StatEntry) * (stat->size - (PosSE - STATPTR(stat))));
|
||||
|
||||
return newstat;
|
||||
}
|
||||
|
||||
@@ -907,12 +906,11 @@ ts_accum(tsstat *stat, Datum data)
|
||||
{
|
||||
tsstat *newstat;
|
||||
TSVector txt = DatumGetTSVector(data);
|
||||
WordEntry **newentry = NULL;
|
||||
uint32 len = 0,
|
||||
cur = 0;
|
||||
StatEntry *sptr;
|
||||
WordEntry *wptr;
|
||||
int n = 0;
|
||||
List *newentries=NIL;
|
||||
StatEntry *StopLow;
|
||||
|
||||
if (stat == NULL)
|
||||
{ /* Init in first */
|
||||
@@ -932,16 +930,23 @@ ts_accum(tsstat *stat, Datum data)
|
||||
|
||||
sptr = STATPTR(stat);
|
||||
wptr = ARRPTR(txt);
|
||||
StopLow = STATPTR(stat);
|
||||
|
||||
if (stat->size < 100 * txt->size)
|
||||
{ /* merge */
|
||||
while (sptr - STATPTR(stat) < stat->size && wptr - ARRPTR(txt) < txt->size)
|
||||
while (wptr - ARRPTR(txt) < txt->size)
|
||||
{
|
||||
StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat);
|
||||
int cmp;
|
||||
|
||||
/*
|
||||
* We do not set StopLow to begin of array because tsvector is ordered
|
||||
* with the sames rule, so we can search from last stopped position
|
||||
*/
|
||||
|
||||
while (StopLow < StopHigh)
|
||||
{
|
||||
int cmp = compareStatWord(sptr, wptr, stat, txt);
|
||||
|
||||
if (cmp < 0)
|
||||
sptr++;
|
||||
else if (cmp == 0)
|
||||
sptr = StopLow + (StopHigh - StopLow) / 2;
|
||||
cmp = compareStatWord(sptr, wptr, stat, txt);
|
||||
if (cmp == 0)
|
||||
{
|
||||
if (stat->weight == 0)
|
||||
{
|
||||
@@ -953,90 +958,38 @@ ts_accum(tsstat *stat, Datum data)
|
||||
sptr->ndoc++;
|
||||
sptr->nentry += n;
|
||||
}
|
||||
sptr++;
|
||||
wptr++;
|
||||
break;
|
||||
}
|
||||
else if (cmp < 0)
|
||||
StopLow = sptr + 1;
|
||||
else
|
||||
{
|
||||
if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
|
||||
{
|
||||
if (cur == len)
|
||||
newentry = SEI_realloc(newentry, &len);
|
||||
newentry[cur] = wptr;
|
||||
cur++;
|
||||
}
|
||||
wptr++;
|
||||
}
|
||||
StopHigh = sptr;
|
||||
}
|
||||
|
||||
while (wptr - ARRPTR(txt) < txt->size)
|
||||
{
|
||||
if (StopLow >= StopHigh)
|
||||
{ /* not found */
|
||||
if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
|
||||
{
|
||||
if (cur == len)
|
||||
newentry = SEI_realloc(newentry, &len);
|
||||
newentry[cur] = wptr;
|
||||
cur++;
|
||||
}
|
||||
wptr++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{ /* search */
|
||||
while (wptr - ARRPTR(txt) < txt->size)
|
||||
{
|
||||
StatEntry *StopLow = STATPTR(stat);
|
||||
StatEntry *StopHigh = (StatEntry *) STATSTRPTR(stat);
|
||||
int cmp;
|
||||
WordEntryMark *mark = (WordEntryMark*)palloc(sizeof(WordEntryMark));
|
||||
|
||||
while (StopLow < StopHigh)
|
||||
{
|
||||
sptr = StopLow + (StopHigh - StopLow) / 2;
|
||||
cmp = compareStatWord(sptr, wptr, stat, txt);
|
||||
if (cmp == 0)
|
||||
{
|
||||
if (stat->weight == 0)
|
||||
{
|
||||
sptr->ndoc++;
|
||||
sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1;
|
||||
}
|
||||
else if (wptr->haspos && (n = check_weight(txt, wptr, stat->weight)) != 0)
|
||||
{
|
||||
sptr->ndoc++;
|
||||
sptr->nentry += n;
|
||||
}
|
||||
break;
|
||||
}
|
||||
else if (cmp < 0)
|
||||
StopLow = sptr + 1;
|
||||
else
|
||||
StopHigh = sptr;
|
||||
}
|
||||
mark->newentry = wptr;
|
||||
mark->pos = StopLow;
|
||||
newentries = lappend( newentries, mark );
|
||||
|
||||
if (StopLow >= StopHigh)
|
||||
{ /* not found */
|
||||
if (stat->weight == 0 || check_weight(txt, wptr, stat->weight) != 0)
|
||||
{
|
||||
if (cur == len)
|
||||
newentry = SEI_realloc(newentry, &len);
|
||||
newentry[cur] = wptr;
|
||||
cur++;
|
||||
}
|
||||
}
|
||||
wptr++;
|
||||
}
|
||||
wptr++;
|
||||
}
|
||||
|
||||
|
||||
if (cur == 0)
|
||||
if (list_length(newentries) == 0)
|
||||
{ /* no new words */
|
||||
if (txt != (TSVector) DatumGetPointer(data))
|
||||
pfree(txt);
|
||||
return stat;
|
||||
}
|
||||
|
||||
newstat = formstat(stat, txt, newentry, cur);
|
||||
pfree(newentry);
|
||||
newstat = formstat(stat, txt, newentries);
|
||||
list_free(newentries);
|
||||
|
||||
if (txt != (TSVector) DatumGetPointer(data))
|
||||
pfree(txt);
|
||||
|
||||
Reference in New Issue
Block a user