mirror of
https://github.com/postgres/postgres.git
synced 2025-07-31 22:04:40 +03:00
pgindent run.
This commit is contained in:
@ -37,29 +37,35 @@ Datum rank_cd_def(PG_FUNCTION_ARGS);
|
||||
PG_FUNCTION_INFO_V1(get_covers);
|
||||
Datum get_covers(PG_FUNCTION_ARGS);
|
||||
|
||||
static float weights[]={0.1, 0.2, 0.4, 1.0};
|
||||
static float weights[] = {0.1, 0.2, 0.4, 1.0};
|
||||
|
||||
#define wpos(wep) ( w[ ((WordEntryPos*)(wep))->weight ] )
|
||||
|
||||
#define DEF_NORM_METHOD 0
|
||||
#define DEF_NORM_METHOD 0
|
||||
|
||||
/*
|
||||
* Returns a weight of a word collocation
|
||||
*/
|
||||
static float4 word_distance ( int4 w ) {
|
||||
if ( w>100 )
|
||||
return 1e-30;
|
||||
static float4
|
||||
word_distance(int4 w)
|
||||
{
|
||||
if (w > 100)
|
||||
return 1e-30;
|
||||
|
||||
return 1.0/(1.005+0.05*exp( ((float4)w)/1.5-2) );
|
||||
return 1.0 / (1.005 + 0.05 * exp(((float4) w) / 1.5 - 2));
|
||||
}
|
||||
|
||||
static int
|
||||
cnt_length( tsvector *t ) {
|
||||
WordEntry *ptr=ARRPTR(t), *end=(WordEntry*)STRPTR(t);
|
||||
int len = 0, clen;
|
||||
cnt_length(tsvector * t)
|
||||
{
|
||||
WordEntry *ptr = ARRPTR(t),
|
||||
*end = (WordEntry *) STRPTR(t);
|
||||
int len = 0,
|
||||
clen;
|
||||
|
||||
while(ptr < end) {
|
||||
if ( (clen=POSDATALEN(t, ptr)) == 0 )
|
||||
while (ptr < end)
|
||||
{
|
||||
if ((clen = POSDATALEN(t, ptr)) == 0)
|
||||
len += 1;
|
||||
else
|
||||
len += clen;
|
||||
@ -70,191 +76,225 @@ cnt_length( tsvector *t ) {
|
||||
}
|
||||
|
||||
static int4
|
||||
WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item) {
|
||||
if (ptr->len == item->length)
|
||||
return strncmp(
|
||||
eval + ptr->pos,
|
||||
qval + item->distance,
|
||||
item->length);
|
||||
WordECompareITEM(char *eval, char *qval, WordEntry * ptr, ITEM * item)
|
||||
{
|
||||
if (ptr->len == item->length)
|
||||
return strncmp(
|
||||
eval + ptr->pos,
|
||||
qval + item->distance,
|
||||
item->length);
|
||||
|
||||
return (ptr->len > item->length) ? 1 : -1;
|
||||
return (ptr->len > item->length) ? 1 : -1;
|
||||
}
|
||||
|
||||
static WordEntry*
|
||||
find_wordentry(tsvector *t, QUERYTYPE *q, ITEM *item) {
|
||||
WordEntry *StopLow = ARRPTR(t);
|
||||
WordEntry *StopHigh = (WordEntry*)STRPTR(t);
|
||||
WordEntry *StopMiddle;
|
||||
int difference;
|
||||
static WordEntry *
|
||||
find_wordentry(tsvector * t, QUERYTYPE * q, ITEM * item)
|
||||
{
|
||||
WordEntry *StopLow = ARRPTR(t);
|
||||
WordEntry *StopHigh = (WordEntry *) STRPTR(t);
|
||||
WordEntry *StopMiddle;
|
||||
int difference;
|
||||
|
||||
/* Loop invariant: StopLow <= item < StopHigh */
|
||||
/* Loop invariant: StopLow <= item < StopHigh */
|
||||
|
||||
while (StopLow < StopHigh)
|
||||
{
|
||||
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
||||
difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
|
||||
if (difference == 0)
|
||||
return StopMiddle;
|
||||
else if (difference < 0)
|
||||
StopLow = StopMiddle + 1;
|
||||
else
|
||||
StopHigh = StopMiddle;
|
||||
}
|
||||
while (StopLow < StopHigh)
|
||||
{
|
||||
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
||||
difference = WordECompareITEM(STRPTR(t), GETOPERAND(q), StopMiddle, item);
|
||||
if (difference == 0)
|
||||
return StopMiddle;
|
||||
else if (difference < 0)
|
||||
StopLow = StopMiddle + 1;
|
||||
else
|
||||
StopHigh = StopMiddle;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static WordEntryPos POSNULL[]={
|
||||
{0,0},
|
||||
{0,MAXENTRYPOS-1}
|
||||
static WordEntryPos POSNULL[] = {
|
||||
{0, 0},
|
||||
{0, MAXENTRYPOS - 1}
|
||||
};
|
||||
|
||||
static float
|
||||
calc_rank_and(float *w, tsvector *t, QUERYTYPE *q) {
|
||||
uint16 **pos=(uint16**)palloc(sizeof(uint16*) * q->size);
|
||||
int i,k,l,p;
|
||||
WordEntry *entry;
|
||||
WordEntryPos *post,*ct;
|
||||
int4 dimt,lenct,dist;
|
||||
float res=-1.0;
|
||||
ITEM *item=GETQUERY(q);
|
||||
calc_rank_and(float *w, tsvector * t, QUERYTYPE * q)
|
||||
{
|
||||
uint16 **pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
|
||||
int i,
|
||||
k,
|
||||
l,
|
||||
p;
|
||||
WordEntry *entry;
|
||||
WordEntryPos *post,
|
||||
*ct;
|
||||
int4 dimt,
|
||||
lenct,
|
||||
dist;
|
||||
float res = -1.0;
|
||||
ITEM *item = GETQUERY(q);
|
||||
|
||||
memset(pos,0,sizeof(uint16**) * q->size);
|
||||
*(uint16*)POSNULL = lengthof(POSNULL)-1;
|
||||
memset(pos, 0, sizeof(uint16 **) * q->size);
|
||||
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
||||
|
||||
for(i=0; i<q->size; i++) {
|
||||
|
||||
if ( item[i].type != VAL )
|
||||
for (i = 0; i < q->size; i++)
|
||||
{
|
||||
|
||||
if (item[i].type != VAL)
|
||||
continue;
|
||||
|
||||
entry=find_wordentry(t,q,&(item[i]));
|
||||
if ( !entry )
|
||||
entry = find_wordentry(t, q, &(item[i]));
|
||||
if (!entry)
|
||||
continue;
|
||||
|
||||
if ( entry->haspos )
|
||||
pos[i] = (uint16*)_POSDATAPTR(t,entry);
|
||||
if (entry->haspos)
|
||||
pos[i] = (uint16 *) _POSDATAPTR(t, entry);
|
||||
else
|
||||
pos[i] = (uint16*)POSNULL;
|
||||
pos[i] = (uint16 *) POSNULL;
|
||||
|
||||
|
||||
dimt = *(uint16*)(pos[i]);
|
||||
post = (WordEntryPos*)(pos[i]+1);
|
||||
for( k=0; k<i; k++ ) {
|
||||
if ( !pos[k] ) continue;
|
||||
lenct = *(uint16*)(pos[k]);
|
||||
ct = (WordEntryPos*)(pos[k]+1);
|
||||
for(l=0; l<dimt; l++) {
|
||||
for(p=0; p<lenct; p++) {
|
||||
dist = abs( post[l].pos - ct[p].pos );
|
||||
if ( dist || (dist==0 && (pos[i]==(uint16*)POSNULL || pos[k]==(uint16*)POSNULL) ) ) {
|
||||
float curw;
|
||||
if ( !dist ) dist=MAXENTRYPOS;
|
||||
curw= sqrt( wpos(&(post[l])) * wpos( &(ct[p]) ) * word_distance(dist) );
|
||||
res = ( res < 0 ) ? curw : 1.0 - ( 1.0 - res ) * ( 1.0 - curw );
|
||||
dimt = *(uint16 *) (pos[i]);
|
||||
post = (WordEntryPos *) (pos[i] + 1);
|
||||
for (k = 0; k < i; k++)
|
||||
{
|
||||
if (!pos[k])
|
||||
continue;
|
||||
lenct = *(uint16 *) (pos[k]);
|
||||
ct = (WordEntryPos *) (pos[k] + 1);
|
||||
for (l = 0; l < dimt; l++)
|
||||
{
|
||||
for (p = 0; p < lenct; p++)
|
||||
{
|
||||
dist = abs(post[l].pos - ct[p].pos);
|
||||
if (dist || (dist == 0 && (pos[i] == (uint16 *) POSNULL || pos[k] == (uint16 *) POSNULL)))
|
||||
{
|
||||
float curw;
|
||||
|
||||
if (!dist)
|
||||
dist = MAXENTRYPOS;
|
||||
curw = sqrt(wpos(&(post[l])) * wpos(&(ct[p])) * word_distance(dist));
|
||||
res = (res < 0) ? curw : 1.0 - (1.0 - res) * (1.0 - curw);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pfree(pos);
|
||||
return res;
|
||||
return res;
|
||||
}
|
||||
|
||||
static float
|
||||
calc_rank_or(float *w, tsvector *t, QUERYTYPE *q) {
|
||||
WordEntry *entry;
|
||||
WordEntryPos *post;
|
||||
int4 dimt,j,i;
|
||||
float res=-1.0;
|
||||
ITEM *item=GETQUERY(q);
|
||||
calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
|
||||
{
|
||||
WordEntry *entry;
|
||||
WordEntryPos *post;
|
||||
int4 dimt,
|
||||
j,
|
||||
i;
|
||||
float res = -1.0;
|
||||
ITEM *item = GETQUERY(q);
|
||||
|
||||
*(uint16*)POSNULL = lengthof(POSNULL)-1;
|
||||
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
||||
|
||||
for(i=0; i<q->size; i++) {
|
||||
if ( item[i].type != VAL )
|
||||
for (i = 0; i < q->size; i++)
|
||||
{
|
||||
if (item[i].type != VAL)
|
||||
continue;
|
||||
|
||||
entry=find_wordentry(t,q,&(item[i]));
|
||||
if ( !entry )
|
||||
entry = find_wordentry(t, q, &(item[i]));
|
||||
if (!entry)
|
||||
continue;
|
||||
|
||||
if ( entry->haspos ) {
|
||||
dimt = POSDATALEN(t,entry);
|
||||
post = POSDATAPTR(t,entry);
|
||||
} else {
|
||||
dimt = *(uint16*)POSNULL;
|
||||
post = POSNULL+1;
|
||||
if (entry->haspos)
|
||||
{
|
||||
dimt = POSDATALEN(t, entry);
|
||||
post = POSDATAPTR(t, entry);
|
||||
}
|
||||
else
|
||||
{
|
||||
dimt = *(uint16 *) POSNULL;
|
||||
post = POSNULL + 1;
|
||||
}
|
||||
|
||||
for(j=0;j<dimt;j++) {
|
||||
if ( res < 0 )
|
||||
res = wpos( &(post[j]) );
|
||||
for (j = 0; j < dimt; j++)
|
||||
{
|
||||
if (res < 0)
|
||||
res = wpos(&(post[j]));
|
||||
else
|
||||
res = 1.0 - ( 1.0-res ) * ( 1.0-wpos( &(post[j]) ) );
|
||||
res = 1.0 - (1.0 - res) * (1.0 - wpos(&(post[j])));
|
||||
}
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
static float
|
||||
calc_rank(float *w, tsvector *t, QUERYTYPE *q, int4 method) {
|
||||
ITEM *item = GETQUERY(q);
|
||||
float res=0.0;
|
||||
calc_rank(float *w, tsvector * t, QUERYTYPE * q, int4 method)
|
||||
{
|
||||
ITEM *item = GETQUERY(q);
|
||||
float res = 0.0;
|
||||
|
||||
if (!t->size || !q->size)
|
||||
return 0.0;
|
||||
|
||||
res = ( item->type != VAL && item->val == (int4) '&' ) ?
|
||||
calc_rank_and(w,t,q) : calc_rank_or(w,t,q);
|
||||
res = (item->type != VAL && item->val == (int4) '&') ?
|
||||
calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
|
||||
|
||||
if ( res < 0 )
|
||||
if (res < 0)
|
||||
res = 1e-20;
|
||||
|
||||
switch(method) {
|
||||
case 0: break;
|
||||
case 1: res /= log((float)cnt_length(t)); break;
|
||||
case 2: res /= (float)cnt_length(t); break;
|
||||
switch (method)
|
||||
{
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
res /= log((float) cnt_length(t));
|
||||
break;
|
||||
case 2:
|
||||
res /= (float) cnt_length(t);
|
||||
break;
|
||||
default:
|
||||
/* internal error */
|
||||
elog(ERROR,"unrecognized normalization method: %d", method);
|
||||
}
|
||||
/* internal error */
|
||||
elog(ERROR, "unrecognized normalization method: %d", method);
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
Datum
|
||||
rank(PG_FUNCTION_ARGS) {
|
||||
rank(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ArrayType *win = (ArrayType *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
|
||||
int method=DEF_NORM_METHOD;
|
||||
float res=0.0;
|
||||
float ws[ lengthof(weights) ];
|
||||
int i;
|
||||
int method = DEF_NORM_METHOD;
|
||||
float res = 0.0;
|
||||
float ws[lengthof(weights)];
|
||||
int i;
|
||||
|
||||
if ( ARR_NDIM(win) != 1 )
|
||||
if (ARR_NDIM(win) != 1)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
||||
errmsg("array of weight must be one-dimensional")));
|
||||
|
||||
if ( ARRNELEMS(win) < lengthof(weights) )
|
||||
if (ARRNELEMS(win) < lengthof(weights))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_ARRAY_SUBSCRIPT_ERROR),
|
||||
errmsg("array of weight is too short")));
|
||||
|
||||
for(i=0;i<lengthof(weights);i++) {
|
||||
ws[ i ] = ( ((float4*)ARR_DATA_PTR(win))[i] >= 0 ) ? ((float4*)ARR_DATA_PTR(win))[i] : weights[i];
|
||||
if ( ws[ i ] > 1.0 )
|
||||
for (i = 0; i < lengthof(weights); i++)
|
||||
{
|
||||
ws[i] = (((float4 *) ARR_DATA_PTR(win))[i] >= 0) ? ((float4 *) ARR_DATA_PTR(win))[i] : weights[i];
|
||||
if (ws[i] > 1.0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("weight out of range")));
|
||||
}
|
||||
}
|
||||
|
||||
if ( PG_NARGS() == 4 )
|
||||
method=PG_GETARG_INT32(3);
|
||||
if (PG_NARGS() == 4)
|
||||
method = PG_GETARG_INT32(3);
|
||||
|
||||
res = calc_rank(ws, txt, query, method);
|
||||
|
||||
res=calc_rank(ws, txt, query, method);
|
||||
|
||||
PG_FREE_IF_COPY(win, 0);
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
PG_FREE_IF_COPY(query, 2);
|
||||
@ -262,108 +302,127 @@ rank(PG_FUNCTION_ARGS) {
|
||||
}
|
||||
|
||||
Datum
|
||||
rank_def(PG_FUNCTION_ARGS) {
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
rank_def(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
float res=0.0;
|
||||
int method=DEF_NORM_METHOD;
|
||||
float res = 0.0;
|
||||
int method = DEF_NORM_METHOD;
|
||||
|
||||
if ( PG_NARGS() == 3 )
|
||||
method=PG_GETARG_INT32(2);
|
||||
if (PG_NARGS() == 3)
|
||||
method = PG_GETARG_INT32(2);
|
||||
|
||||
res = calc_rank(weights, txt, query, method);
|
||||
|
||||
res=calc_rank(weights, txt, query, method);
|
||||
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
PG_FREE_IF_COPY(query, 1);
|
||||
PG_RETURN_FLOAT4(res);
|
||||
}
|
||||
|
||||
|
||||
typedef struct {
|
||||
ITEM *item;
|
||||
int32 pos;
|
||||
} DocRepresentation;
|
||||
typedef struct
|
||||
{
|
||||
ITEM *item;
|
||||
int32 pos;
|
||||
} DocRepresentation;
|
||||
|
||||
static int
|
||||
compareDocR(const void *a, const void *b) {
|
||||
if ( ((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos )
|
||||
compareDocR(const void *a, const void *b)
|
||||
{
|
||||
if (((DocRepresentation *) a)->pos == ((DocRepresentation *) b)->pos)
|
||||
return 1;
|
||||
return ( ((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos ) ? 1 : -1;
|
||||
return (((DocRepresentation *) a)->pos > ((DocRepresentation *) b)->pos) ? 1 : -1;
|
||||
}
|
||||
|
||||
|
||||
typedef struct {
|
||||
typedef struct
|
||||
{
|
||||
DocRepresentation *doc;
|
||||
int len;
|
||||
int len;
|
||||
} ChkDocR;
|
||||
|
||||
static bool
|
||||
checkcondition_DR(void *checkval, ITEM *val) {
|
||||
DocRepresentation *ptr = ((ChkDocR*)checkval)->doc;
|
||||
checkcondition_DR(void *checkval, ITEM * val)
|
||||
{
|
||||
DocRepresentation *ptr = ((ChkDocR *) checkval)->doc;
|
||||
|
||||
while( ptr - ((ChkDocR*)checkval)->doc < ((ChkDocR*)checkval)->len ) {
|
||||
if ( val == ptr->item )
|
||||
while (ptr - ((ChkDocR *) checkval)->doc < ((ChkDocR *) checkval)->len)
|
||||
{
|
||||
if (val == ptr->item)
|
||||
return true;
|
||||
ptr++;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *q) {
|
||||
int i;
|
||||
DocRepresentation *ptr,*f=(DocRepresentation*)0xffffffff;
|
||||
ITEM *item=GETQUERY(query);
|
||||
int lastpos=*pos;
|
||||
int oldq=*q;
|
||||
Cover(DocRepresentation * doc, int len, QUERYTYPE * query, int *pos, int *p, int *q)
|
||||
{
|
||||
int i;
|
||||
DocRepresentation *ptr,
|
||||
*f = (DocRepresentation *) 0xffffffff;
|
||||
ITEM *item = GETQUERY(query);
|
||||
int lastpos = *pos;
|
||||
int oldq = *q;
|
||||
|
||||
*p=0x7fffffff;
|
||||
*q=0;
|
||||
*p = 0x7fffffff;
|
||||
*q = 0;
|
||||
|
||||
for(i=0; i<query->size; i++) {
|
||||
if ( item->type != VAL ) {
|
||||
for (i = 0; i < query->size; i++)
|
||||
{
|
||||
if (item->type != VAL)
|
||||
{
|
||||
item++;
|
||||
continue;
|
||||
}
|
||||
ptr = doc + *pos;
|
||||
|
||||
while(ptr-doc<len) {
|
||||
if ( ptr->item == item ) {
|
||||
if ( ptr->pos > *q ) {
|
||||
while (ptr - doc < len)
|
||||
{
|
||||
if (ptr->item == item)
|
||||
{
|
||||
if (ptr->pos > *q)
|
||||
{
|
||||
*q = ptr->pos;
|
||||
lastpos= ptr - doc;
|
||||
}
|
||||
lastpos = ptr - doc;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
ptr++;
|
||||
}
|
||||
|
||||
item++;
|
||||
}
|
||||
|
||||
if (*q==0 )
|
||||
if (*q == 0)
|
||||
return false;
|
||||
|
||||
if (*q==oldq) { /* already check this pos */
|
||||
if (*q == oldq)
|
||||
{ /* already check this pos */
|
||||
(*pos)++;
|
||||
return Cover(doc, len, query, pos,p,q);
|
||||
}
|
||||
return Cover(doc, len, query, pos, p, q);
|
||||
}
|
||||
|
||||
item=GETQUERY(query);
|
||||
for(i=0; i<query->size; i++) {
|
||||
if ( item->type != VAL ) {
|
||||
item = GETQUERY(query);
|
||||
for (i = 0; i < query->size; i++)
|
||||
{
|
||||
if (item->type != VAL)
|
||||
{
|
||||
item++;
|
||||
continue;
|
||||
}
|
||||
ptr = doc + lastpos;
|
||||
|
||||
while(ptr>=doc+*pos) {
|
||||
if ( ptr->item == item ) {
|
||||
if ( ptr->pos < *p ) {
|
||||
while (ptr >= doc + *pos)
|
||||
{
|
||||
if (ptr->item == item)
|
||||
{
|
||||
if (ptr->pos < *p)
|
||||
{
|
||||
*p = ptr->pos;
|
||||
f=ptr;
|
||||
f = ptr;
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -371,106 +430,135 @@ Cover(DocRepresentation *doc, int len, QUERYTYPE *query, int *pos, int *p, int *
|
||||
}
|
||||
item++;
|
||||
}
|
||||
|
||||
if ( *p<=*q ) {
|
||||
ChkDocR ch = { f, (doc + lastpos)-f+1 };
|
||||
*pos = f-doc+1;
|
||||
if ( TS_execute(GETQUERY(query), &ch, false, checkcondition_DR) ) {
|
||||
/*elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p, *q);*/
|
||||
|
||||
if (*p <= *q)
|
||||
{
|
||||
ChkDocR ch = {f, (doc + lastpos) - f + 1};
|
||||
|
||||
*pos = f - doc + 1;
|
||||
if (TS_execute(GETQUERY(query), &ch, false, checkcondition_DR))
|
||||
{
|
||||
/*
|
||||
* elog(NOTICE,"OP:%d NP:%d P:%d Q:%d", *pos, lastpos, *p,
|
||||
* *q);
|
||||
*/
|
||||
return true;
|
||||
} else
|
||||
return Cover(doc, len, query, pos,p,q);
|
||||
}
|
||||
else
|
||||
return Cover(doc, len, query, pos, p, q);
|
||||
}
|
||||
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static DocRepresentation*
|
||||
get_docrep(tsvector *txt, QUERYTYPE *query, int *doclen) {
|
||||
ITEM *item=GETQUERY(query);
|
||||
WordEntry *entry;
|
||||
WordEntryPos *post;
|
||||
int4 dimt,j,i;
|
||||
int len=query->size*4,cur=0;
|
||||
static DocRepresentation *
|
||||
get_docrep(tsvector * txt, QUERYTYPE * query, int *doclen)
|
||||
{
|
||||
ITEM *item = GETQUERY(query);
|
||||
WordEntry *entry;
|
||||
WordEntryPos *post;
|
||||
int4 dimt,
|
||||
j,
|
||||
i;
|
||||
int len = query->size * 4,
|
||||
cur = 0;
|
||||
DocRepresentation *doc;
|
||||
|
||||
*(uint16*)POSNULL = lengthof(POSNULL)-1;
|
||||
doc = (DocRepresentation*)palloc(sizeof(DocRepresentation)*len);
|
||||
for(i=0; i<query->size; i++) {
|
||||
if ( item[i].type != VAL )
|
||||
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
|
||||
doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
|
||||
for (i = 0; i < query->size; i++)
|
||||
{
|
||||
if (item[i].type != VAL)
|
||||
continue;
|
||||
|
||||
entry=find_wordentry(txt,query,&(item[i]));
|
||||
if ( !entry )
|
||||
entry = find_wordentry(txt, query, &(item[i]));
|
||||
if (!entry)
|
||||
continue;
|
||||
|
||||
if ( entry->haspos ) {
|
||||
dimt = POSDATALEN(txt,entry);
|
||||
post = POSDATAPTR(txt,entry);
|
||||
} else {
|
||||
dimt = *(uint16*)POSNULL;
|
||||
post = POSNULL+1;
|
||||
if (entry->haspos)
|
||||
{
|
||||
dimt = POSDATALEN(txt, entry);
|
||||
post = POSDATAPTR(txt, entry);
|
||||
}
|
||||
else
|
||||
{
|
||||
dimt = *(uint16 *) POSNULL;
|
||||
post = POSNULL + 1;
|
||||
}
|
||||
|
||||
while( cur+dimt >= len ) {
|
||||
len*=2;
|
||||
doc = (DocRepresentation*)repalloc(doc,sizeof(DocRepresentation)*len);
|
||||
while (cur + dimt >= len)
|
||||
{
|
||||
len *= 2;
|
||||
doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
|
||||
}
|
||||
|
||||
for(j=0;j<dimt;j++) {
|
||||
doc[cur].item=&(item[i]);
|
||||
doc[cur].pos=post[j].pos;
|
||||
for (j = 0; j < dimt; j++)
|
||||
{
|
||||
doc[cur].item = &(item[i]);
|
||||
doc[cur].pos = post[j].pos;
|
||||
cur++;
|
||||
}
|
||||
}
|
||||
|
||||
*doclen=cur;
|
||||
|
||||
if ( cur>0 ) {
|
||||
if ( cur>1 )
|
||||
*doclen = cur;
|
||||
|
||||
if (cur > 0)
|
||||
{
|
||||
if (cur > 1)
|
||||
qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
|
||||
return doc;
|
||||
}
|
||||
|
||||
|
||||
pfree(doc);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
rank_cd(PG_FUNCTION_ARGS) {
|
||||
int K = PG_GETARG_INT32(0);
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
rank_cd(PG_FUNCTION_ARGS)
|
||||
{
|
||||
int K = PG_GETARG_INT32(0);
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(2));
|
||||
int method=DEF_NORM_METHOD;
|
||||
DocRepresentation *doc;
|
||||
float res=0.0;
|
||||
int p=0,q=0,len,cur;
|
||||
int method = DEF_NORM_METHOD;
|
||||
DocRepresentation *doc;
|
||||
float res = 0.0;
|
||||
int p = 0,
|
||||
q = 0,
|
||||
len,
|
||||
cur;
|
||||
|
||||
doc = get_docrep(txt, query, &len);
|
||||
if ( !doc ) {
|
||||
if (!doc)
|
||||
{
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
PG_FREE_IF_COPY(query, 2);
|
||||
PG_RETURN_FLOAT4(0.0);
|
||||
}
|
||||
|
||||
cur=0;
|
||||
if (K<=0)
|
||||
K=4;
|
||||
while( Cover(doc, len, query, &cur, &p, &q) )
|
||||
res += ( q-p+1 > K ) ? ((float)K)/((float)(q-p+1)) : 1.0;
|
||||
cur = 0;
|
||||
if (K <= 0)
|
||||
K = 4;
|
||||
while (Cover(doc, len, query, &cur, &p, &q))
|
||||
res += (q - p + 1 > K) ? ((float) K) / ((float) (q - p + 1)) : 1.0;
|
||||
|
||||
if ( PG_NARGS() == 4 )
|
||||
method=PG_GETARG_INT32(3);
|
||||
if (PG_NARGS() == 4)
|
||||
method = PG_GETARG_INT32(3);
|
||||
|
||||
switch(method) {
|
||||
case 0: break;
|
||||
case 1: res /= log((float)cnt_length(txt)); break;
|
||||
case 2: res /= (float)cnt_length(txt); break;
|
||||
switch (method)
|
||||
{
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
res /= log((float) cnt_length(txt));
|
||||
break;
|
||||
case 2:
|
||||
res /= (float) cnt_length(txt);
|
||||
break;
|
||||
default:
|
||||
/* internal error */
|
||||
elog(ERROR,"unrecognized normalization method: %d", method);
|
||||
}
|
||||
/* internal error */
|
||||
elog(ERROR, "unrecognized normalization method: %d", method);
|
||||
}
|
||||
|
||||
pfree(doc);
|
||||
PG_FREE_IF_COPY(txt, 1);
|
||||
@ -481,120 +569,141 @@ rank_cd(PG_FUNCTION_ARGS) {
|
||||
|
||||
|
||||
Datum
|
||||
rank_cd_def(PG_FUNCTION_ARGS) {
|
||||
PG_RETURN_DATUM( DirectFunctionCall4(
|
||||
rank_cd,
|
||||
Int32GetDatum(-1),
|
||||
PG_GETARG_DATUM(0),
|
||||
PG_GETARG_DATUM(1),
|
||||
( PG_NARGS() == 3 ) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
|
||||
));
|
||||
rank_cd_def(PG_FUNCTION_ARGS)
|
||||
{
|
||||
PG_RETURN_DATUM(DirectFunctionCall4(
|
||||
rank_cd,
|
||||
Int32GetDatum(-1),
|
||||
PG_GETARG_DATUM(0),
|
||||
PG_GETARG_DATUM(1),
|
||||
(PG_NARGS() == 3) ? PG_GETARG_DATUM(2) : Int32GetDatum(DEF_NORM_METHOD)
|
||||
));
|
||||
}
|
||||
|
||||
/**************debug*************/
|
||||
|
||||
typedef struct {
|
||||
char *w;
|
||||
int2 len;
|
||||
int2 pos;
|
||||
int2 start;
|
||||
int2 finish;
|
||||
} DocWord;
|
||||
typedef struct
|
||||
{
|
||||
char *w;
|
||||
int2 len;
|
||||
int2 pos;
|
||||
int2 start;
|
||||
int2 finish;
|
||||
} DocWord;
|
||||
|
||||
static int
|
||||
compareDocWord(const void *a, const void *b) {
|
||||
if ( ((DocWord *) a)->pos == ((DocWord *) b)->pos )
|
||||
compareDocWord(const void *a, const void *b)
|
||||
{
|
||||
if (((DocWord *) a)->pos == ((DocWord *) b)->pos)
|
||||
return 1;
|
||||
return ( ((DocWord *) a)->pos > ((DocWord *) b)->pos ) ? 1 : -1;
|
||||
return (((DocWord *) a)->pos > ((DocWord *) b)->pos) ? 1 : -1;
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
get_covers(PG_FUNCTION_ARGS) {
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
Datum
|
||||
get_covers(PG_FUNCTION_ARGS)
|
||||
{
|
||||
tsvector *txt = (tsvector *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
|
||||
QUERYTYPE *query = (QUERYTYPE *) PG_DETOAST_DATUM(PG_GETARG_DATUM(1));
|
||||
WordEntry *pptr=ARRPTR(txt);
|
||||
int i,dlen=0,j,cur=0,len=0,rlen;
|
||||
DocWord *dw,*dwptr;
|
||||
text *out;
|
||||
char *cptr;
|
||||
WordEntry *pptr = ARRPTR(txt);
|
||||
int i,
|
||||
dlen = 0,
|
||||
j,
|
||||
cur = 0,
|
||||
len = 0,
|
||||
rlen;
|
||||
DocWord *dw,
|
||||
*dwptr;
|
||||
text *out;
|
||||
char *cptr;
|
||||
DocRepresentation *doc;
|
||||
int pos=0,p,q,olddwpos=0;
|
||||
int ncover=1;
|
||||
int pos = 0,
|
||||
p,
|
||||
q,
|
||||
olddwpos = 0;
|
||||
int ncover = 1;
|
||||
|
||||
doc = get_docrep(txt, query, &rlen);
|
||||
|
||||
if ( !doc ) {
|
||||
out=palloc(VARHDRSZ);
|
||||
if (!doc)
|
||||
{
|
||||
out = palloc(VARHDRSZ);
|
||||
VARATT_SIZEP(out) = VARHDRSZ;
|
||||
PG_FREE_IF_COPY(txt,0);
|
||||
PG_FREE_IF_COPY(query,1);
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
PG_FREE_IF_COPY(query, 1);
|
||||
PG_RETURN_POINTER(out);
|
||||
}
|
||||
|
||||
for(i=0;i<txt->size;i++) {
|
||||
for (i = 0; i < txt->size; i++)
|
||||
{
|
||||
if (!pptr[i].haspos)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
errmsg("no pos info")));
|
||||
dlen += POSDATALEN(txt,&(pptr[i]));
|
||||
dlen += POSDATALEN(txt, &(pptr[i]));
|
||||
}
|
||||
|
||||
dwptr=dw=palloc(sizeof(DocWord)*dlen);
|
||||
memset(dw,0,sizeof(DocWord)*dlen);
|
||||
dwptr = dw = palloc(sizeof(DocWord) * dlen);
|
||||
memset(dw, 0, sizeof(DocWord) * dlen);
|
||||
|
||||
for(i=0;i<txt->size;i++) {
|
||||
WordEntryPos *posdata = POSDATAPTR(txt,&(pptr[i]));
|
||||
for(j=0;j<POSDATALEN(txt,&(pptr[i]));j++) {
|
||||
dw[cur].w=STRPTR(txt)+pptr[i].pos;
|
||||
dw[cur].len=pptr[i].len;
|
||||
dw[cur].pos=posdata[j].pos;
|
||||
for (i = 0; i < txt->size; i++)
|
||||
{
|
||||
WordEntryPos *posdata = POSDATAPTR(txt, &(pptr[i]));
|
||||
|
||||
for (j = 0; j < POSDATALEN(txt, &(pptr[i])); j++)
|
||||
{
|
||||
dw[cur].w = STRPTR(txt) + pptr[i].pos;
|
||||
dw[cur].len = pptr[i].len;
|
||||
dw[cur].pos = posdata[j].pos;
|
||||
cur++;
|
||||
}
|
||||
len+=(pptr[i].len + 1) * (int)POSDATALEN(txt,&(pptr[i]));
|
||||
len += (pptr[i].len + 1) * (int) POSDATALEN(txt, &(pptr[i]));
|
||||
}
|
||||
qsort((void *) dw, dlen, sizeof(DocWord), compareDocWord);
|
||||
|
||||
while( Cover(doc, rlen, query, &pos, &p, &q) ) {
|
||||
dwptr=dw+olddwpos;
|
||||
while(dwptr->pos < p && dwptr-dw<dlen)
|
||||
while (Cover(doc, rlen, query, &pos, &p, &q))
|
||||
{
|
||||
dwptr = dw + olddwpos;
|
||||
while (dwptr->pos < p && dwptr - dw < dlen)
|
||||
dwptr++;
|
||||
olddwpos=dwptr-dw;
|
||||
dwptr->start=ncover;
|
||||
while(dwptr->pos < q+1 && dwptr-dw<dlen)
|
||||
olddwpos = dwptr - dw;
|
||||
dwptr->start = ncover;
|
||||
while (dwptr->pos < q + 1 && dwptr - dw < dlen)
|
||||
dwptr++;
|
||||
(dwptr-1)->finish=ncover;
|
||||
len+= 4 /* {}+two spaces */ + 2*16 /*numbers*/;
|
||||
ncover++;
|
||||
}
|
||||
|
||||
out=palloc(VARHDRSZ+len);
|
||||
cptr=((char*)out)+VARHDRSZ;
|
||||
dwptr=dw;
|
||||
(dwptr - 1)->finish = ncover;
|
||||
len += 4 /* {}+two spaces */ + 2 * 16 /* numbers */ ;
|
||||
ncover++;
|
||||
}
|
||||
|
||||
while( dwptr-dw < dlen) {
|
||||
if ( dwptr->start ) {
|
||||
sprintf(cptr,"{%d ",dwptr->start);
|
||||
cptr=strchr(cptr,'\0');
|
||||
out = palloc(VARHDRSZ + len);
|
||||
cptr = ((char *) out) + VARHDRSZ;
|
||||
dwptr = dw;
|
||||
|
||||
while (dwptr - dw < dlen)
|
||||
{
|
||||
if (dwptr->start)
|
||||
{
|
||||
sprintf(cptr, "{%d ", dwptr->start);
|
||||
cptr = strchr(cptr, '\0');
|
||||
}
|
||||
memcpy(cptr,dwptr->w,dwptr->len);
|
||||
cptr+=dwptr->len;
|
||||
*cptr=' ';
|
||||
memcpy(cptr, dwptr->w, dwptr->len);
|
||||
cptr += dwptr->len;
|
||||
*cptr = ' ';
|
||||
cptr++;
|
||||
if ( dwptr->finish ) {
|
||||
sprintf(cptr,"}%d ",dwptr->finish);
|
||||
cptr=strchr(cptr,'\0');
|
||||
if (dwptr->finish)
|
||||
{
|
||||
sprintf(cptr, "}%d ", dwptr->finish);
|
||||
cptr = strchr(cptr, '\0');
|
||||
}
|
||||
dwptr++;
|
||||
}
|
||||
}
|
||||
|
||||
VARATT_SIZEP(out) = cptr - ((char *) out);
|
||||
|
||||
VARATT_SIZEP(out) = cptr - ((char*)out);
|
||||
|
||||
pfree(dw);
|
||||
pfree(doc);
|
||||
|
||||
PG_FREE_IF_COPY(txt,0);
|
||||
PG_FREE_IF_COPY(query,1);
|
||||
PG_FREE_IF_COPY(txt, 0);
|
||||
PG_FREE_IF_COPY(query, 1);
|
||||
PG_RETURN_POINTER(out);
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user