1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-30 11:03:19 +03:00

Stat function now can show statistics per weight of lexemes

This commit is contained in:
Teodor Sigaev
2004-05-28 15:36:49 +00:00
parent 1b9ef0025d
commit a6ea6457fa
6 changed files with 160 additions and 37 deletions

View File

@ -15,9 +15,10 @@ Datum
tsstat_in(PG_FUNCTION_ARGS)
{
tsstat *stat = palloc(STATHDRSIZE);
stat->len = STATHDRSIZE;
stat->size = 0;
stat->weight = 0;
PG_RETURN_POINTER(stat);
}
@ -32,6 +33,20 @@ tsstat_out(PG_FUNCTION_ARGS)
PG_RETURN_NULL();
}
static int
check_weight(tsvector *txt, WordEntry *wptr, int8 weight) {
int len = POSDATALEN(txt, wptr);
int num=0;
WordEntryPos *ptr = POSDATAPTR(txt, wptr);
while (len--) {
if (weight & (1 << ptr->weight))
num++;
ptr++;
}
return num;
}
static WordEntry **
SEI_realloc(WordEntry ** in, uint32 *len)
{
@ -83,6 +98,7 @@ formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len)
totallen = CALCSTATSIZE(nentry, slen);
newstat = palloc(totallen);
newstat->len = totallen;
newstat->weight = stat->weight;
newstat->size = nentry;
memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
@ -107,8 +123,9 @@ formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len)
}
nptr = STATPTR(newstat) + (StopLow - STATPTR(stat));
memcpy(STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow - STATPTR(stat)));
nptr->nentry = POSDATALEN(txt, *ptr);
if (nptr->nentry == 0)
if ( (*ptr)->haspos ) {
nptr->nentry = ( stat->weight ) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
} else
nptr->nentry = 1;
nptr->ndoc = 1;
nptr->len = (*ptr)->len;
@ -127,8 +144,9 @@ formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len)
}
else
{
nptr->nentry = POSDATALEN(txt, *ptr);
if (nptr->nentry == 0)
if ( (*ptr)->haspos ) {
nptr->nentry = ( stat->weight ) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
} else
nptr->nentry = 1;
nptr->ndoc = 1;
nptr->len = (*ptr)->len;
@ -144,8 +162,9 @@ formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len)
while (ptr - entry < len)
{
nptr->nentry = POSDATALEN(txt, *ptr);
if (nptr->nentry == 0)
if ( (*ptr)->haspos ) {
nptr->nentry = ( stat->weight ) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
} else
nptr->nentry = 1;
nptr->ndoc = 1;
nptr->len = (*ptr)->len;
@ -173,12 +192,14 @@ ts_accum(PG_FUNCTION_ARGS)
cur = 0;
StatEntry *sptr;
WordEntry *wptr;
int n=0;
if (stat == NULL || PG_ARGISNULL(0))
{ /* Init in first */
stat = palloc(STATHDRSIZE);
stat->len = STATHDRSIZE;
stat->size = 0;
stat->weight = 0;
}
/* simple check of correctness */
@ -201,32 +222,37 @@ ts_accum(PG_FUNCTION_ARGS)
sptr++;
else if (cmp == 0)
{
int n = POSDATALEN(txt, wptr);
if (n == 0)
n = 1;
sptr->ndoc++;
sptr->nentry += n;
if ( stat->weight == 0 ) {
sptr->ndoc++;
sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1;
} else if ( wptr->haspos && (n=check_weight(txt, wptr, stat->weight))!=0 ) {
sptr->ndoc++;
sptr->nentry += n;
}
sptr++;
wptr++;
}
else
{
if (cur == len)
newentry = SEI_realloc(newentry, &len);
newentry[cur] = wptr;
if ( stat->weight == 0 || check_weight(txt, wptr, stat->weight)!=0 ) {
if (cur == len)
newentry = SEI_realloc(newentry, &len);
newentry[cur] = wptr;
cur++;
}
wptr++;
cur++;
}
}
while (wptr - ARRPTR(txt) < txt->size)
{
if (cur == len)
newentry = SEI_realloc(newentry, &len);
newentry[cur] = wptr;
if ( stat->weight == 0 || check_weight(txt, wptr, stat->weight)!=0 ) {
if (cur == len)
newentry = SEI_realloc(newentry, &len);
newentry[cur] = wptr;
cur++;
}
wptr++;
cur++;
}
}
else
@ -243,12 +269,13 @@ ts_accum(PG_FUNCTION_ARGS)
cmp = compareStatWord(sptr, wptr, stat, txt);
if (cmp == 0)
{
int n = POSDATALEN(txt, wptr);
if (n == 0)
n = 1;
sptr->ndoc++;
sptr->nentry += n;
if ( stat->weight == 0 ) {
sptr->ndoc++;
sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1;
} else if ( wptr->haspos && (n=check_weight(txt, wptr, stat->weight))!=0 ) {
sptr->ndoc++;
sptr->nentry += n;
}
break;
}
else if (cmp < 0)
@ -259,10 +286,12 @@ ts_accum(PG_FUNCTION_ARGS)
if (StopLow >= StopHigh)
{ /* not found */
if (cur == len)
newentry = SEI_realloc(newentry, &len);
newentry[cur] = wptr;
cur++;
if ( stat->weight == 0 || check_weight(txt, wptr, stat->weight)!=0 ) {
if (cur == len)
newentry = SEI_realloc(newentry, &len);
newentry[cur] = wptr;
cur++;
}
}
wptr++;
}
@ -389,7 +418,7 @@ get_ti_Oid(void)
}
static tsstat *
ts_stat_sql(text *txt)
ts_stat_sql(text *txt, text *ws)
{
char *query = text2char(txt);
int i;
@ -423,6 +452,31 @@ ts_stat_sql(text *txt)
stat = palloc(STATHDRSIZE);
stat->len = STATHDRSIZE;
stat->size = 0;
stat->weight = 0;
if ( ws ) {
char *buf;
buf = VARDATA(ws);
while( buf - VARDATA(ws) < VARSIZE(buf) - VARHDRSZ ) {
switch (tolower(*buf)) {
case 'a':
stat->weight |= 1 << 3;
break;
case 'b':
stat->weight |= 1 << 2;
break;
case 'c':
stat->weight |= 1 << 1;
break;
case 'd':
stat->weight |= 1;
break;
default:
stat->weight |= 0;
}
buf++;
}
}
while (SPI_processed > 0)
{
@ -467,11 +521,13 @@ ts_stat(PG_FUNCTION_ARGS)
{
tsstat *stat;
text *txt = PG_GETARG_TEXT_P(0);
text *ws = (PG_NARGS() > 1) ? PG_GETARG_TEXT_P(1) : NULL;
funcctx = SRF_FIRSTCALL_INIT();
SPI_connect();
stat = ts_stat_sql(txt);
stat = ts_stat_sql(txt,ws);
PG_FREE_IF_COPY(txt, 0);
if (PG_NARGS() > 1 ) PG_FREE_IF_COPY(ws, 1);
ts_setup_firstcall(funcctx, stat);
SPI_finish();
}