mirror of
https://github.com/postgres/postgres.git
synced 2025-07-30 11:03:19 +03:00
Stat function now can show statistics per weight of lexemes
This commit is contained in:
@ -15,9 +15,10 @@ Datum
|
||||
tsstat_in(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsstat *stat = palloc(STATHDRSIZE);
|
||||
|
||||
|
||||
stat->len = STATHDRSIZE;
|
||||
stat->size = 0;
|
||||
stat->weight = 0;
|
||||
PG_RETURN_POINTER(stat);
|
||||
}
|
||||
|
||||
@ -32,6 +33,20 @@ tsstat_out(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
static int
|
||||
check_weight(tsvector *txt, WordEntry *wptr, int8 weight) {
|
||||
int len = POSDATALEN(txt, wptr);
|
||||
int num=0;
|
||||
WordEntryPos *ptr = POSDATAPTR(txt, wptr);
|
||||
|
||||
while (len--) {
|
||||
if (weight & (1 << ptr->weight))
|
||||
num++;
|
||||
ptr++;
|
||||
}
|
||||
return num;
|
||||
}
|
||||
|
||||
static WordEntry **
|
||||
SEI_realloc(WordEntry ** in, uint32 *len)
|
||||
{
|
||||
@ -83,6 +98,7 @@ formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len)
|
||||
totallen = CALCSTATSIZE(nentry, slen);
|
||||
newstat = palloc(totallen);
|
||||
newstat->len = totallen;
|
||||
newstat->weight = stat->weight;
|
||||
newstat->size = nentry;
|
||||
|
||||
memcpy(STATSTRPTR(newstat), STATSTRPTR(stat), STATSTRSIZE(stat));
|
||||
@ -107,8 +123,9 @@ formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len)
|
||||
}
|
||||
nptr = STATPTR(newstat) + (StopLow - STATPTR(stat));
|
||||
memcpy(STATPTR(newstat), STATPTR(stat), sizeof(StatEntry) * (StopLow - STATPTR(stat)));
|
||||
nptr->nentry = POSDATALEN(txt, *ptr);
|
||||
if (nptr->nentry == 0)
|
||||
if ( (*ptr)->haspos ) {
|
||||
nptr->nentry = ( stat->weight ) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
|
||||
} else
|
||||
nptr->nentry = 1;
|
||||
nptr->ndoc = 1;
|
||||
nptr->len = (*ptr)->len;
|
||||
@ -127,8 +144,9 @@ formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len)
|
||||
}
|
||||
else
|
||||
{
|
||||
nptr->nentry = POSDATALEN(txt, *ptr);
|
||||
if (nptr->nentry == 0)
|
||||
if ( (*ptr)->haspos ) {
|
||||
nptr->nentry = ( stat->weight ) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
|
||||
} else
|
||||
nptr->nentry = 1;
|
||||
nptr->ndoc = 1;
|
||||
nptr->len = (*ptr)->len;
|
||||
@ -144,8 +162,9 @@ formstat(tsstat * stat, tsvector * txt, WordEntry ** entry, uint32 len)
|
||||
|
||||
while (ptr - entry < len)
|
||||
{
|
||||
nptr->nentry = POSDATALEN(txt, *ptr);
|
||||
if (nptr->nentry == 0)
|
||||
if ( (*ptr)->haspos ) {
|
||||
nptr->nentry = ( stat->weight ) ? check_weight(txt, *ptr, stat->weight) : POSDATALEN(txt, *ptr);
|
||||
} else
|
||||
nptr->nentry = 1;
|
||||
nptr->ndoc = 1;
|
||||
nptr->len = (*ptr)->len;
|
||||
@ -173,12 +192,14 @@ ts_accum(PG_FUNCTION_ARGS)
|
||||
cur = 0;
|
||||
StatEntry *sptr;
|
||||
WordEntry *wptr;
|
||||
int n=0;
|
||||
|
||||
if (stat == NULL || PG_ARGISNULL(0))
|
||||
{ /* Init in first */
|
||||
stat = palloc(STATHDRSIZE);
|
||||
stat->len = STATHDRSIZE;
|
||||
stat->size = 0;
|
||||
stat->weight = 0;
|
||||
}
|
||||
|
||||
/* simple check of correctness */
|
||||
@ -201,32 +222,37 @@ ts_accum(PG_FUNCTION_ARGS)
|
||||
sptr++;
|
||||
else if (cmp == 0)
|
||||
{
|
||||
int n = POSDATALEN(txt, wptr);
|
||||
|
||||
if (n == 0)
|
||||
n = 1;
|
||||
sptr->ndoc++;
|
||||
sptr->nentry += n;
|
||||
if ( stat->weight == 0 ) {
|
||||
sptr->ndoc++;
|
||||
sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1;
|
||||
} else if ( wptr->haspos && (n=check_weight(txt, wptr, stat->weight))!=0 ) {
|
||||
sptr->ndoc++;
|
||||
sptr->nentry += n;
|
||||
}
|
||||
sptr++;
|
||||
wptr++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (cur == len)
|
||||
newentry = SEI_realloc(newentry, &len);
|
||||
newentry[cur] = wptr;
|
||||
if ( stat->weight == 0 || check_weight(txt, wptr, stat->weight)!=0 ) {
|
||||
if (cur == len)
|
||||
newentry = SEI_realloc(newentry, &len);
|
||||
newentry[cur] = wptr;
|
||||
cur++;
|
||||
}
|
||||
wptr++;
|
||||
cur++;
|
||||
}
|
||||
}
|
||||
|
||||
while (wptr - ARRPTR(txt) < txt->size)
|
||||
{
|
||||
if (cur == len)
|
||||
newentry = SEI_realloc(newentry, &len);
|
||||
newentry[cur] = wptr;
|
||||
if ( stat->weight == 0 || check_weight(txt, wptr, stat->weight)!=0 ) {
|
||||
if (cur == len)
|
||||
newentry = SEI_realloc(newentry, &len);
|
||||
newentry[cur] = wptr;
|
||||
cur++;
|
||||
}
|
||||
wptr++;
|
||||
cur++;
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -243,12 +269,13 @@ ts_accum(PG_FUNCTION_ARGS)
|
||||
cmp = compareStatWord(sptr, wptr, stat, txt);
|
||||
if (cmp == 0)
|
||||
{
|
||||
int n = POSDATALEN(txt, wptr);
|
||||
|
||||
if (n == 0)
|
||||
n = 1;
|
||||
sptr->ndoc++;
|
||||
sptr->nentry += n;
|
||||
if ( stat->weight == 0 ) {
|
||||
sptr->ndoc++;
|
||||
sptr->nentry += (wptr->haspos) ? POSDATALEN(txt, wptr) : 1;
|
||||
} else if ( wptr->haspos && (n=check_weight(txt, wptr, stat->weight))!=0 ) {
|
||||
sptr->ndoc++;
|
||||
sptr->nentry += n;
|
||||
}
|
||||
break;
|
||||
}
|
||||
else if (cmp < 0)
|
||||
@ -259,10 +286,12 @@ ts_accum(PG_FUNCTION_ARGS)
|
||||
|
||||
if (StopLow >= StopHigh)
|
||||
{ /* not found */
|
||||
if (cur == len)
|
||||
newentry = SEI_realloc(newentry, &len);
|
||||
newentry[cur] = wptr;
|
||||
cur++;
|
||||
if ( stat->weight == 0 || check_weight(txt, wptr, stat->weight)!=0 ) {
|
||||
if (cur == len)
|
||||
newentry = SEI_realloc(newentry, &len);
|
||||
newentry[cur] = wptr;
|
||||
cur++;
|
||||
}
|
||||
}
|
||||
wptr++;
|
||||
}
|
||||
@ -389,7 +418,7 @@ get_ti_Oid(void)
|
||||
}
|
||||
|
||||
static tsstat *
|
||||
ts_stat_sql(text *txt)
|
||||
ts_stat_sql(text *txt, text *ws)
|
||||
{
|
||||
char *query = text2char(txt);
|
||||
int i;
|
||||
@ -423,6 +452,31 @@ ts_stat_sql(text *txt)
|
||||
stat = palloc(STATHDRSIZE);
|
||||
stat->len = STATHDRSIZE;
|
||||
stat->size = 0;
|
||||
stat->weight = 0;
|
||||
|
||||
if ( ws ) {
|
||||
char *buf;
|
||||
buf = VARDATA(ws);
|
||||
while( buf - VARDATA(ws) < VARSIZE(buf) - VARHDRSZ ) {
|
||||
switch (tolower(*buf)) {
|
||||
case 'a':
|
||||
stat->weight |= 1 << 3;
|
||||
break;
|
||||
case 'b':
|
||||
stat->weight |= 1 << 2;
|
||||
break;
|
||||
case 'c':
|
||||
stat->weight |= 1 << 1;
|
||||
break;
|
||||
case 'd':
|
||||
stat->weight |= 1;
|
||||
break;
|
||||
default:
|
||||
stat->weight |= 0;
|
||||
}
|
||||
buf++;
|
||||
}
|
||||
}
|
||||
|
||||
while (SPI_processed > 0)
|
||||
{
|
||||
@ -467,11 +521,13 @@ ts_stat(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsstat *stat;
|
||||
text *txt = PG_GETARG_TEXT_P(0);
|
||||
text *ws = (PG_NARGS() > 1) ? PG_GETARG_TEXT_P(1) : NULL;
|
||||
|
||||
funcctx = SRF_FIRSTCALL_INIT();
|
||||
SPI_connect();
|
||||
stat = ts_stat_sql(txt);
|
||||
stat = ts_stat_sql(txt,ws);
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
if (PG_NARGS() > 1 ) PG_FREE_IF_COPY(ws, 1);
|
||||
ts_setup_firstcall(funcctx, stat);
|
||||
SPI_finish();
|
||||
}
|
||||
|
Reference in New Issue
Block a user