mirror of
https://github.com/postgres/postgres.git
synced 2026-01-05 23:38:41 +03:00
Phrase full text search.
Patch introduces new text search operator (<-> or <DISTANCE>) into tsquery. On-disk and binary in/out format of tsquery are backward compatible. It has two side effect: - change order for tsquery, so, users, who has a btree index over tsquery, should reindex it - less number of parenthesis in tsquery output, and tsquery becomes more readable Authors: Teodor Sigaev, Oleg Bartunov, Dmitry Ivanov Reviewers: Alexander Korotkov, Artur Zakirov
This commit is contained in:
@@ -364,8 +364,10 @@ calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
|
||||
return 0.0;
|
||||
|
||||
/* XXX: What about NOT? */
|
||||
res = (item->type == QI_OPR && item->qoperator.oper == OP_AND) ?
|
||||
calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
|
||||
res = (item->type == QI_OPR && (item->qoperator.oper == OP_AND ||
|
||||
item->qoperator.oper == OP_PHRASE)) ?
|
||||
calc_rank_and(w, t, q) :
|
||||
calc_rank_or(w, t, q);
|
||||
|
||||
if (res < 0)
|
||||
res = 1e-20f;
|
||||
@@ -496,10 +498,17 @@ ts_rank_tt(PG_FUNCTION_ARGS)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
QueryItem **item;
|
||||
int16 nitem;
|
||||
uint8 wclass;
|
||||
int32 pos;
|
||||
union {
|
||||
struct { /* compiled doc representation */
|
||||
QueryItem **items;
|
||||
int16 nitem;
|
||||
} query;
|
||||
struct { /* struct is used for preparing doc representation */
|
||||
QueryItem *item;
|
||||
WordEntry *entry;
|
||||
} map;
|
||||
} data;
|
||||
WordEntryPos pos;
|
||||
} DocRepresentation;
|
||||
|
||||
static int
|
||||
@@ -508,26 +517,59 @@ compareDocR(const void *va, const void *vb)
|
||||
const DocRepresentation *a = (const DocRepresentation *) va;
|
||||
const DocRepresentation *b = (const DocRepresentation *) vb;
|
||||
|
||||
if (a->pos == b->pos)
|
||||
return 0;
|
||||
return (a->pos > b->pos) ? 1 : -1;
|
||||
if (WEP_GETPOS(a->pos) == WEP_GETPOS(b->pos))
|
||||
{
|
||||
if (WEP_GETWEIGHT(a->pos) == WEP_GETWEIGHT(b->pos))
|
||||
{
|
||||
if (a->data.map.entry == b->data.map.entry)
|
||||
return 0;
|
||||
|
||||
return (a->data.map.entry > b->data.map.entry) ? 1 : -1;
|
||||
}
|
||||
|
||||
return (WEP_GETWEIGHT(a->pos) > WEP_GETWEIGHT(b->pos)) ? 1 : -1;
|
||||
}
|
||||
|
||||
return (WEP_GETPOS(a->pos) > WEP_GETPOS(b->pos)) ? 1 : -1;
|
||||
}
|
||||
|
||||
#define MAXQROPOS MAXENTRYPOS
|
||||
typedef struct
|
||||
{
|
||||
bool operandexists;
|
||||
bool reverseinsert; /* indicates insert order,
|
||||
true means descending order */
|
||||
uint32 npos;
|
||||
WordEntryPos pos[MAXQROPOS];
|
||||
} QueryRepresentationOperand;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
TSQuery query;
|
||||
bool *operandexist;
|
||||
TSQuery query;
|
||||
QueryRepresentationOperand *operandData;
|
||||
} QueryRepresentation;
|
||||
|
||||
#define QR_GET_OPERAND_EXISTS(q, v) ( (q)->operandexist[ ((QueryItem*)(v)) - GETQUERY((q)->query) ] )
|
||||
#define QR_SET_OPERAND_EXISTS(q, v) QR_GET_OPERAND_EXISTS(q,v) = true
|
||||
#define QR_GET_OPERAND_DATA(q, v) \
|
||||
( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) )
|
||||
|
||||
static bool
|
||||
checkcondition_QueryOperand(void *checkval, QueryOperand *val)
|
||||
checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *data)
|
||||
{
|
||||
QueryRepresentation *qr = (QueryRepresentation *) checkval;
|
||||
QueryRepresentation *qr = (QueryRepresentation *) checkval;
|
||||
QueryRepresentationOperand *opData = QR_GET_OPERAND_DATA(qr, val);
|
||||
|
||||
return QR_GET_OPERAND_EXISTS(qr, val);
|
||||
if (!opData->operandexists)
|
||||
return false;
|
||||
|
||||
if (data)
|
||||
{
|
||||
data->npos = opData->npos;
|
||||
data->pos = opData->pos;
|
||||
if (opData->reverseinsert)
|
||||
data->pos += MAXQROPOS - opData->npos;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
typedef struct
|
||||
@@ -539,14 +581,65 @@ typedef struct
|
||||
DocRepresentation *end;
|
||||
} CoverExt;
|
||||
|
||||
static void
|
||||
resetQueryRepresentation(QueryRepresentation *qr, bool reverseinsert)
|
||||
{
|
||||
int i;
|
||||
|
||||
for(i = 0; i < qr->query->size; i++)
|
||||
{
|
||||
qr->operandData[i].operandexists = false;
|
||||
qr->operandData[i].reverseinsert = reverseinsert;
|
||||
qr->operandData[i].npos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fillQueryRepresentationData(QueryRepresentation *qr, DocRepresentation *entry)
|
||||
{
|
||||
int i;
|
||||
int lastPos;
|
||||
QueryRepresentationOperand *opData;
|
||||
|
||||
for (i = 0; i < entry->data.query.nitem; i++)
|
||||
{
|
||||
if (entry->data.query.items[i]->type != QI_VAL)
|
||||
continue;
|
||||
|
||||
opData = QR_GET_OPERAND_DATA(qr, entry->data.query.items[i]);
|
||||
|
||||
opData->operandexists = true;
|
||||
|
||||
if (opData->npos == 0)
|
||||
{
|
||||
lastPos = (opData->reverseinsert) ? (MAXQROPOS - 1) : 0;
|
||||
opData->pos[lastPos] = entry->pos;
|
||||
opData->npos++;
|
||||
continue;
|
||||
}
|
||||
|
||||
lastPos = opData->reverseinsert ?
|
||||
(MAXQROPOS - opData->npos) :
|
||||
(opData->npos - 1);
|
||||
|
||||
if (WEP_GETPOS(opData->pos[lastPos]) != WEP_GETPOS(entry->pos))
|
||||
{
|
||||
lastPos = opData->reverseinsert ?
|
||||
(MAXQROPOS - 1 - opData->npos) :
|
||||
(opData->npos);
|
||||
|
||||
opData->pos[lastPos] = entry->pos;
|
||||
opData->npos++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
|
||||
{
|
||||
DocRepresentation *ptr;
|
||||
int lastpos = ext->pos;
|
||||
int i;
|
||||
bool found = false;
|
||||
DocRepresentation *ptr;
|
||||
int lastpos = ext->pos;
|
||||
bool found = false;
|
||||
|
||||
/*
|
||||
* since this function recurses, it could be driven to stack overflow.
|
||||
@@ -554,7 +647,7 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
|
||||
*/
|
||||
check_stack_depth();
|
||||
|
||||
memset(qr->operandexist, 0, sizeof(bool) * qr->query->size);
|
||||
resetQueryRepresentation(qr, false);
|
||||
|
||||
ext->p = INT_MAX;
|
||||
ext->q = 0;
|
||||
@@ -563,16 +656,13 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
|
||||
/* find upper bound of cover from current position, move up */
|
||||
while (ptr - doc < len)
|
||||
{
|
||||
for (i = 0; i < ptr->nitem; i++)
|
||||
{
|
||||
if (ptr->item[i]->type == QI_VAL)
|
||||
QR_SET_OPERAND_EXISTS(qr, ptr->item[i]);
|
||||
}
|
||||
fillQueryRepresentationData(qr, ptr);
|
||||
|
||||
if (TS_execute(GETQUERY(qr->query), (void *) qr, false, checkcondition_QueryOperand))
|
||||
{
|
||||
if (ptr->pos > ext->q)
|
||||
if (WEP_GETPOS(ptr->pos) > ext->q)
|
||||
{
|
||||
ext->q = ptr->pos;
|
||||
ext->q = WEP_GETPOS(ptr->pos);
|
||||
ext->end = ptr;
|
||||
lastpos = ptr - doc;
|
||||
found = true;
|
||||
@@ -585,22 +675,24 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
|
||||
if (!found)
|
||||
return false;
|
||||
|
||||
memset(qr->operandexist, 0, sizeof(bool) * qr->query->size);
|
||||
resetQueryRepresentation(qr, true);
|
||||
|
||||
ptr = doc + lastpos;
|
||||
|
||||
/* find lower bound of cover from found upper bound, move down */
|
||||
while (ptr >= doc + ext->pos)
|
||||
{
|
||||
for (i = 0; i < ptr->nitem; i++)
|
||||
if (ptr->item[i]->type == QI_VAL)
|
||||
QR_SET_OPERAND_EXISTS(qr, ptr->item[i]);
|
||||
/*
|
||||
* we scan doc from right to left, so pos info in reverse order!
|
||||
*/
|
||||
fillQueryRepresentationData(qr, ptr);
|
||||
|
||||
if (TS_execute(GETQUERY(qr->query), (void *) qr, true, checkcondition_QueryOperand))
|
||||
{
|
||||
if (ptr->pos < ext->p)
|
||||
if (WEP_GETPOS(ptr->pos) < ext->p)
|
||||
{
|
||||
ext->begin = ptr;
|
||||
ext->p = ptr->pos;
|
||||
ext->p = WEP_GETPOS(ptr->pos);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -628,18 +720,20 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
|
||||
WordEntry *entry,
|
||||
*firstentry;
|
||||
WordEntryPos *post;
|
||||
int32 dimt,
|
||||
int32 dimt, /* number of 'post' items */
|
||||
j,
|
||||
i,
|
||||
nitem;
|
||||
int len = qr->query->size * 4,
|
||||
cur = 0;
|
||||
DocRepresentation *doc;
|
||||
char *operand;
|
||||
|
||||
doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
|
||||
operand = GETOPERAND(qr->query);
|
||||
|
||||
/*
|
||||
* Iterate through query to make DocRepresentaion for words and it's entries
|
||||
* satisfied by query
|
||||
*/
|
||||
for (i = 0; i < qr->query->size; i++)
|
||||
{
|
||||
QueryOperand *curoperand;
|
||||
@@ -649,13 +743,11 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
|
||||
|
||||
curoperand = &item[i].qoperand;
|
||||
|
||||
if (QR_GET_OPERAND_EXISTS(qr, &item[i]))
|
||||
continue;
|
||||
|
||||
firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
|
||||
if (!entry)
|
||||
continue;
|
||||
|
||||
/* iterations over entries in tsvector */
|
||||
while (entry - firstentry < nitem)
|
||||
{
|
||||
if (entry->haspos)
|
||||
@@ -676,53 +768,67 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
|
||||
doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
|
||||
}
|
||||
|
||||
/* iterations over entry's positions */
|
||||
for (j = 0; j < dimt; j++)
|
||||
{
|
||||
if (j == 0)
|
||||
if (curoperand->weight == 0 ||
|
||||
curoperand->weight & (1 << WEP_GETWEIGHT(post[j])))
|
||||
{
|
||||
int k;
|
||||
|
||||
doc[cur].nitem = 0;
|
||||
doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size);
|
||||
|
||||
for (k = 0; k < qr->query->size; k++)
|
||||
{
|
||||
QueryOperand *kptr = &item[k].qoperand;
|
||||
QueryOperand *iptr = &item[i].qoperand;
|
||||
|
||||
if (k == i ||
|
||||
(item[k].type == QI_VAL &&
|
||||
compareQueryOperand(&kptr, &iptr, operand) == 0))
|
||||
{
|
||||
/*
|
||||
* if k == i, we've already checked above that
|
||||
* it's type == Q_VAL
|
||||
*/
|
||||
doc[cur].item[doc[cur].nitem] = item + k;
|
||||
doc[cur].nitem++;
|
||||
QR_SET_OPERAND_EXISTS(qr, item + k);
|
||||
}
|
||||
}
|
||||
doc[cur].pos = post[j];
|
||||
doc[cur].data.map.entry = entry;
|
||||
doc[cur].data.map.item = (QueryItem *) curoperand;
|
||||
cur++;
|
||||
}
|
||||
else
|
||||
{
|
||||
doc[cur].nitem = doc[cur - 1].nitem;
|
||||
doc[cur].item = doc[cur - 1].item;
|
||||
}
|
||||
doc[cur].pos = WEP_GETPOS(post[j]);
|
||||
doc[cur].wclass = WEP_GETWEIGHT(post[j]);
|
||||
cur++;
|
||||
}
|
||||
|
||||
entry++;
|
||||
}
|
||||
}
|
||||
|
||||
*doclen = cur;
|
||||
|
||||
if (cur > 0)
|
||||
{
|
||||
DocRepresentation *rptr = doc + 1,
|
||||
*wptr = doc,
|
||||
storage;
|
||||
|
||||
/*
|
||||
* Sort representation in ascending order by pos and entry
|
||||
*/
|
||||
qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
|
||||
|
||||
/*
|
||||
* Join QueryItem per WordEntry and it's position
|
||||
*/
|
||||
storage.pos = doc->pos;
|
||||
storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
|
||||
storage.data.query.items[0] = doc->data.map.item;
|
||||
storage.data.query.nitem = 1;
|
||||
|
||||
while (rptr - doc < cur)
|
||||
{
|
||||
if (rptr->pos == (rptr-1)->pos &&
|
||||
rptr->data.map.entry == (rptr-1)->data.map.entry)
|
||||
{
|
||||
storage.data.query.items[storage.data.query.nitem] = rptr->data.map.item;
|
||||
storage.data.query.nitem++;
|
||||
}
|
||||
else
|
||||
{
|
||||
*wptr = storage;
|
||||
wptr++;
|
||||
storage.pos = rptr->pos;
|
||||
storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
|
||||
storage.data.query.items[0] = rptr->data.map.item;
|
||||
storage.data.query.nitem = 1;
|
||||
}
|
||||
|
||||
rptr++;
|
||||
}
|
||||
|
||||
*wptr = storage;
|
||||
wptr++;
|
||||
|
||||
*doclen = wptr - doc;
|
||||
return doc;
|
||||
}
|
||||
|
||||
@@ -758,12 +864,13 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
|
||||
}
|
||||
|
||||
qr.query = query;
|
||||
qr.operandexist = (bool *) palloc0(sizeof(bool) * query->size);
|
||||
qr.operandData = (QueryRepresentationOperand *)
|
||||
palloc0(sizeof(QueryRepresentationOperand) * query->size);
|
||||
|
||||
doc = get_docrep(txt, &qr, &doclen);
|
||||
if (!doc)
|
||||
{
|
||||
pfree(qr.operandexist);
|
||||
pfree(qr.operandData);
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
@@ -777,7 +884,7 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
|
||||
|
||||
while (ptr <= ext.end)
|
||||
{
|
||||
InvSum += invws[ptr->wclass];
|
||||
InvSum += invws[WEP_GETWEIGHT(ptr->pos)];
|
||||
ptr++;
|
||||
}
|
||||
|
||||
@@ -827,7 +934,7 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
|
||||
|
||||
pfree(doc);
|
||||
|
||||
pfree(qr.operandexist);
|
||||
pfree(qr.operandData);
|
||||
|
||||
return (float4) Wdoc;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user