mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Phrase full text search.
Patch introduces new text search operator (<-> or <DISTANCE>) into tsquery. On-disk and binary in/out format of tsquery are backward compatible. It has two side effect: - change order for tsquery, so, users, who has a btree index over tsquery, should reindex it - less number of parenthesis in tsquery output, and tsquery becomes more readable Authors: Teodor Sigaev, Oleg Bartunov, Dmitry Ivanov Reviewers: Alexander Korotkov, Artur Zakirov
This commit is contained in:
@ -179,14 +179,16 @@ typedef struct
|
||||
} GinChkVal;
|
||||
|
||||
static GinTernaryValue
|
||||
checkcondition_gin(void *checkval, QueryOperand *val)
|
||||
checkcondition_gin_internal(GinChkVal *gcv, QueryOperand *val, ExecPhraseData *data)
|
||||
{
|
||||
GinChkVal *gcv = (GinChkVal *) checkval;
|
||||
int j;
|
||||
|
||||
/* if any val requiring a weight is used, set recheck flag */
|
||||
if (val->weight != 0)
|
||||
*(gcv->need_recheck) = true;
|
||||
/*
|
||||
* if any val requiring a weight is used or caller
|
||||
* needs position information then set recheck flag
|
||||
*/
|
||||
if (val->weight != 0 || data != NULL)
|
||||
*gcv->need_recheck = true;
|
||||
|
||||
/* convert item's number to corresponding entry's (operand's) number */
|
||||
j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
|
||||
@ -195,16 +197,22 @@ checkcondition_gin(void *checkval, QueryOperand *val)
|
||||
return gcv->check[j];
|
||||
}
|
||||
|
||||
/*
|
||||
* Wrapper of check condition function for TS_execute.
|
||||
*/
|
||||
static bool
|
||||
checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
|
||||
{
|
||||
return checkcondition_gin_internal((GinChkVal *) checkval,
|
||||
val,
|
||||
data) != GIN_FALSE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Evaluate tsquery boolean expression using ternary logic.
|
||||
*
|
||||
* chkcond is a callback function used to evaluate each VAL node in the query.
|
||||
* checkval can be used to pass information to the callback. TS_execute doesn't
|
||||
* do anything with it.
|
||||
*/
|
||||
static GinTernaryValue
|
||||
TS_execute_ternary(QueryItem *curitem, void *checkval,
|
||||
GinTernaryValue (*chkcond) (void *checkval, QueryOperand *val))
|
||||
TS_execute_ternary(GinChkVal *gcv, QueryItem *curitem)
|
||||
{
|
||||
GinTernaryValue val1,
|
||||
val2,
|
||||
@ -214,22 +222,30 @@ TS_execute_ternary(QueryItem *curitem, void *checkval,
|
||||
check_stack_depth();
|
||||
|
||||
if (curitem->type == QI_VAL)
|
||||
return chkcond(checkval, (QueryOperand *) curitem);
|
||||
return checkcondition_gin_internal(gcv,
|
||||
(QueryOperand *) curitem,
|
||||
NULL /* don't have any position info */);
|
||||
|
||||
switch (curitem->qoperator.oper)
|
||||
{
|
||||
case OP_NOT:
|
||||
result = TS_execute_ternary(curitem + 1, checkval, chkcond);
|
||||
result = TS_execute_ternary(gcv, curitem + 1);
|
||||
if (result == GIN_MAYBE)
|
||||
return result;
|
||||
return !result;
|
||||
|
||||
case OP_PHRASE:
|
||||
/*
|
||||
* GIN doesn't contain any information about positions,
|
||||
* treat OP_PHRASE as OP_AND with recheck requirement
|
||||
*/
|
||||
*gcv->need_recheck = true;
|
||||
|
||||
case OP_AND:
|
||||
val1 = TS_execute_ternary(curitem + curitem->qoperator.left,
|
||||
checkval, chkcond);
|
||||
val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left);
|
||||
if (val1 == GIN_FALSE)
|
||||
return GIN_FALSE;
|
||||
val2 = TS_execute_ternary(curitem + 1, checkval, chkcond);
|
||||
val2 = TS_execute_ternary(gcv, curitem + 1);
|
||||
if (val2 == GIN_FALSE)
|
||||
return GIN_FALSE;
|
||||
if (val1 == GIN_TRUE && val2 == GIN_TRUE)
|
||||
@ -238,11 +254,10 @@ TS_execute_ternary(QueryItem *curitem, void *checkval,
|
||||
return GIN_MAYBE;
|
||||
|
||||
case OP_OR:
|
||||
val1 = TS_execute_ternary(curitem + curitem->qoperator.left,
|
||||
checkval, chkcond);
|
||||
val1 = TS_execute_ternary(gcv, curitem + curitem->qoperator.left);
|
||||
if (val1 == GIN_TRUE)
|
||||
return GIN_TRUE;
|
||||
val2 = TS_execute_ternary(curitem + 1, checkval, chkcond);
|
||||
val2 = TS_execute_ternary(gcv, curitem + 1);
|
||||
if (val2 == GIN_TRUE)
|
||||
return GIN_TRUE;
|
||||
if (val1 == GIN_FALSE && val2 == GIN_FALSE)
|
||||
@ -327,9 +342,7 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
|
||||
gcv.map_item_operand = (int *) (extra_data[0]);
|
||||
gcv.need_recheck = &recheck;
|
||||
|
||||
res = TS_execute_ternary(GETQUERY(query),
|
||||
&gcv,
|
||||
checkcondition_gin);
|
||||
res = TS_execute_ternary(&gcv, GETQUERY(query));
|
||||
|
||||
if (res == GIN_TRUE && recheck)
|
||||
res = GIN_MAYBE;
|
||||
|
@ -298,7 +298,7 @@ typedef struct
|
||||
* is there value 'val' in array or not ?
|
||||
*/
|
||||
static bool
|
||||
checkcondition_arr(void *checkval, QueryOperand *val)
|
||||
checkcondition_arr(void *checkval, QueryOperand *val, ExecPhraseData *data)
|
||||
{
|
||||
int32 *StopLow = ((CHKVAL *) checkval)->arrb;
|
||||
int32 *StopHigh = ((CHKVAL *) checkval)->arre;
|
||||
@ -327,7 +327,7 @@ checkcondition_arr(void *checkval, QueryOperand *val)
|
||||
}
|
||||
|
||||
static bool
|
||||
checkcondition_bit(void *checkval, QueryOperand *val)
|
||||
checkcondition_bit(void *checkval, QueryOperand *val, ExecPhraseData *data)
|
||||
{
|
||||
/*
|
||||
* we are not able to find a prefix in signature tree
|
||||
|
@ -56,7 +56,7 @@ struct TSQueryParserStateData
|
||||
|
||||
/*
|
||||
* subroutine to parse the modifiers (weight and prefix flag currently)
|
||||
* part, like ':1AB' of a query.
|
||||
* part, like ':AB*' of a query.
|
||||
*/
|
||||
static char *
|
||||
get_modifiers(char *buf, int16 *weight, bool *prefix)
|
||||
@ -100,6 +100,94 @@ get_modifiers(char *buf, int16 *weight, bool *prefix)
|
||||
return buf;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse phrase operator. The operator
|
||||
* may take the following forms:
|
||||
*
|
||||
* a <X> b (distance is no greater than X)
|
||||
* a <-> b (default distance = 1)
|
||||
*
|
||||
* The buffer should begin with '<' char
|
||||
*/
|
||||
static char *
|
||||
parse_phrase_operator(char *buf, int16 *distance)
|
||||
{
|
||||
enum
|
||||
{
|
||||
PHRASE_OPEN = 0,
|
||||
PHRASE_DIST,
|
||||
PHRASE_CLOSE,
|
||||
PHRASE_ERR,
|
||||
PHRASE_FINISH
|
||||
} state = PHRASE_OPEN;
|
||||
|
||||
char *ptr = buf;
|
||||
char *endptr;
|
||||
long l = 1;
|
||||
|
||||
while (*ptr)
|
||||
{
|
||||
switch(state)
|
||||
{
|
||||
case PHRASE_OPEN:
|
||||
Assert(t_iseq(ptr, '<'));
|
||||
state = PHRASE_DIST;
|
||||
ptr++;
|
||||
break;
|
||||
|
||||
case PHRASE_DIST:
|
||||
if (t_iseq(ptr, '-'))
|
||||
{
|
||||
state = PHRASE_CLOSE;
|
||||
ptr++;
|
||||
break;
|
||||
}
|
||||
else if (!t_isdigit(ptr))
|
||||
{
|
||||
state = PHRASE_ERR;
|
||||
break;
|
||||
}
|
||||
|
||||
l = strtol(ptr, &endptr, 10);
|
||||
if (ptr == endptr)
|
||||
state = PHRASE_ERR;
|
||||
else if (errno == ERANGE || l > MAXENTRYPOS)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("distance in phrase operator should not be greater than %d",
|
||||
MAXENTRYPOS)));
|
||||
else
|
||||
{
|
||||
state = PHRASE_CLOSE;
|
||||
ptr = endptr;
|
||||
}
|
||||
break;
|
||||
|
||||
case PHRASE_CLOSE:
|
||||
if (t_iseq(ptr, '>'))
|
||||
{
|
||||
state = PHRASE_FINISH;
|
||||
ptr++;
|
||||
}
|
||||
else
|
||||
state = PHRASE_ERR;
|
||||
break;
|
||||
|
||||
case PHRASE_FINISH:
|
||||
*distance = (int16) l;
|
||||
return ptr;
|
||||
|
||||
case PHRASE_ERR:
|
||||
default:
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
err:
|
||||
*distance = -1;
|
||||
return buf;
|
||||
}
|
||||
|
||||
/*
|
||||
* token types for parsing
|
||||
*/
|
||||
@ -116,8 +204,10 @@ typedef enum
|
||||
/*
|
||||
* get token from query string
|
||||
*
|
||||
* *operator is filled in with OP_* when return values is PT_OPR
|
||||
* *operator is filled in with OP_* when return values is PT_OPR,
|
||||
* but *weight could contain a distance value in case of phrase operator.
|
||||
* *strval, *lenval and *weight are filled in when return value is PT_VAL
|
||||
*
|
||||
*/
|
||||
static ts_tokentype
|
||||
gettoken_query(TSQueryParserState state,
|
||||
@ -185,13 +275,23 @@ gettoken_query(TSQueryParserState state,
|
||||
(state->buf)++;
|
||||
return PT_OPR;
|
||||
}
|
||||
if (t_iseq(state->buf, '|'))
|
||||
else if (t_iseq(state->buf, '|'))
|
||||
{
|
||||
state->state = WAITOPERAND;
|
||||
*operator = OP_OR;
|
||||
(state->buf)++;
|
||||
return PT_OPR;
|
||||
}
|
||||
else if (t_iseq(state->buf, '<'))
|
||||
{
|
||||
state->state = WAITOPERAND;
|
||||
*operator = OP_PHRASE;
|
||||
/* weight var is used as storage for distance */
|
||||
state->buf = parse_phrase_operator(state->buf, weight);
|
||||
if (*weight < 0)
|
||||
return PT_ERR;
|
||||
return PT_OPR;
|
||||
}
|
||||
else if (t_iseq(state->buf, ')'))
|
||||
{
|
||||
(state->buf)++;
|
||||
@ -223,15 +323,16 @@ gettoken_query(TSQueryParserState state,
|
||||
* Push an operator to state->polstr
|
||||
*/
|
||||
void
|
||||
pushOperator(TSQueryParserState state, int8 oper)
|
||||
pushOperator(TSQueryParserState state, int8 oper, int16 distance)
|
||||
{
|
||||
QueryOperator *tmp;
|
||||
|
||||
Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR);
|
||||
Assert(oper == OP_NOT || oper == OP_AND || oper == OP_OR || oper == OP_PHRASE);
|
||||
|
||||
tmp = (QueryOperator *) palloc0(sizeof(QueryOperator));
|
||||
tmp->type = QI_OPR;
|
||||
tmp->oper = oper;
|
||||
tmp->distance = (oper == OP_PHRASE) ? distance : 0;
|
||||
/* left is filled in later with findoprnd */
|
||||
|
||||
state->polstr = lcons(tmp, state->polstr);
|
||||
@ -330,14 +431,18 @@ makepol(TSQueryParserState state,
|
||||
PushFunction pushval,
|
||||
Datum opaque)
|
||||
{
|
||||
int8 operator = 0;
|
||||
ts_tokentype type;
|
||||
int lenval = 0;
|
||||
char *strval = NULL;
|
||||
int8 opstack[STACKDEPTH];
|
||||
int lenstack = 0;
|
||||
int16 weight = 0;
|
||||
bool prefix;
|
||||
int8 operator = 0;
|
||||
ts_tokentype type;
|
||||
int lenval = 0;
|
||||
char *strval = NULL;
|
||||
struct
|
||||
{
|
||||
int8 op;
|
||||
int16 distance;
|
||||
} opstack[STACKDEPTH];
|
||||
int lenstack = 0;
|
||||
int16 weight = 0;
|
||||
bool prefix;
|
||||
|
||||
/* since this function recurses, it could be driven to stack overflow */
|
||||
check_stack_depth();
|
||||
@ -348,39 +453,48 @@ makepol(TSQueryParserState state,
|
||||
{
|
||||
case PT_VAL:
|
||||
pushval(opaque, state, strval, lenval, weight, prefix);
|
||||
while (lenstack && (opstack[lenstack - 1] == OP_AND ||
|
||||
opstack[lenstack - 1] == OP_NOT))
|
||||
while (lenstack && (opstack[lenstack - 1].op == OP_AND ||
|
||||
opstack[lenstack - 1].op == OP_PHRASE ||
|
||||
opstack[lenstack - 1].op == OP_NOT))
|
||||
{
|
||||
lenstack--;
|
||||
pushOperator(state, opstack[lenstack]);
|
||||
pushOperator(state,
|
||||
opstack[lenstack].op,
|
||||
opstack[lenstack].distance);
|
||||
}
|
||||
break;
|
||||
case PT_OPR:
|
||||
if (lenstack && operator == OP_OR)
|
||||
pushOperator(state, OP_OR);
|
||||
pushOperator(state, OP_OR, 0);
|
||||
else
|
||||
{
|
||||
if (lenstack == STACKDEPTH) /* internal error */
|
||||
elog(ERROR, "tsquery stack too small");
|
||||
opstack[lenstack] = operator;
|
||||
opstack[lenstack].op = operator;
|
||||
opstack[lenstack].distance = weight;
|
||||
lenstack++;
|
||||
}
|
||||
break;
|
||||
case PT_OPEN:
|
||||
makepol(state, pushval, opaque);
|
||||
|
||||
while (lenstack && (opstack[lenstack - 1] == OP_AND ||
|
||||
opstack[lenstack - 1] == OP_NOT))
|
||||
while (lenstack && (opstack[lenstack - 1].op == OP_AND ||
|
||||
opstack[lenstack - 1].op == OP_PHRASE ||
|
||||
opstack[lenstack - 1].op == OP_NOT))
|
||||
{
|
||||
lenstack--;
|
||||
pushOperator(state, opstack[lenstack]);
|
||||
pushOperator(state,
|
||||
opstack[lenstack].op,
|
||||
opstack[lenstack].distance);
|
||||
}
|
||||
break;
|
||||
case PT_CLOSE:
|
||||
while (lenstack)
|
||||
{
|
||||
lenstack--;
|
||||
pushOperator(state, opstack[lenstack]);
|
||||
pushOperator(state,
|
||||
opstack[lenstack].op,
|
||||
opstack[lenstack].distance);
|
||||
};
|
||||
return;
|
||||
case PT_ERR:
|
||||
@ -394,12 +508,14 @@ makepol(TSQueryParserState state,
|
||||
while (lenstack)
|
||||
{
|
||||
lenstack--;
|
||||
pushOperator(state, opstack[lenstack]);
|
||||
pushOperator(state,
|
||||
opstack[lenstack].op,
|
||||
opstack[lenstack].distance);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes)
|
||||
findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes, bool *needcleanup)
|
||||
{
|
||||
/* since this function recurses, it could be driven to stack overflow. */
|
||||
check_stack_depth();
|
||||
@ -407,33 +523,47 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes)
|
||||
if (*pos >= nnodes)
|
||||
elog(ERROR, "malformed tsquery: operand not found");
|
||||
|
||||
if (ptr[*pos].type == QI_VAL ||
|
||||
ptr[*pos].type == QI_VALSTOP) /* need to handle VALSTOP here, they
|
||||
* haven't been cleaned away yet. */
|
||||
if (ptr[*pos].type == QI_VAL)
|
||||
{
|
||||
(*pos)++;
|
||||
}
|
||||
else if (ptr[*pos].type == QI_VALSTOP)
|
||||
{
|
||||
*needcleanup = true; /* we'll have to remove stop words */
|
||||
(*pos)++;
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert(ptr[*pos].type == QI_OPR);
|
||||
|
||||
if (ptr[*pos].qoperator.oper == OP_NOT)
|
||||
{
|
||||
ptr[*pos].qoperator.left = 1;
|
||||
ptr[*pos].qoperator.left = 1; /* fixed offset */
|
||||
(*pos)++;
|
||||
findoprnd_recurse(ptr, pos, nnodes);
|
||||
|
||||
/* process the only argument */
|
||||
findoprnd_recurse(ptr, pos, nnodes, needcleanup);
|
||||
}
|
||||
else
|
||||
{
|
||||
QueryOperator *curitem = &ptr[*pos].qoperator;
|
||||
int tmp = *pos;
|
||||
QueryOperator *curitem = &ptr[*pos].qoperator;
|
||||
int tmp = *pos; /* save current position */
|
||||
|
||||
Assert(curitem->oper == OP_AND || curitem->oper == OP_OR);
|
||||
Assert(curitem->oper == OP_AND ||
|
||||
curitem->oper == OP_OR ||
|
||||
curitem->oper == OP_PHRASE);
|
||||
|
||||
if (curitem->oper == OP_PHRASE)
|
||||
*needcleanup = true; /* push OP_PHRASE down later */
|
||||
|
||||
(*pos)++;
|
||||
findoprnd_recurse(ptr, pos, nnodes);
|
||||
curitem->left = *pos - tmp;
|
||||
findoprnd_recurse(ptr, pos, nnodes);
|
||||
|
||||
/* process RIGHT argument */
|
||||
findoprnd_recurse(ptr, pos, nnodes, needcleanup);
|
||||
curitem->left = *pos - tmp; /* set LEFT arg's offset */
|
||||
|
||||
/* process LEFT argument */
|
||||
findoprnd_recurse(ptr, pos, nnodes, needcleanup);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -444,12 +574,13 @@ findoprnd_recurse(QueryItem *ptr, uint32 *pos, int nnodes)
|
||||
* QueryItems must be in polish (prefix) notation.
|
||||
*/
|
||||
static void
|
||||
findoprnd(QueryItem *ptr, int size)
|
||||
findoprnd(QueryItem *ptr, int size, bool *needcleanup)
|
||||
{
|
||||
uint32 pos;
|
||||
|
||||
*needcleanup = false;
|
||||
pos = 0;
|
||||
findoprnd_recurse(ptr, &pos, size);
|
||||
findoprnd_recurse(ptr, &pos, size, needcleanup);
|
||||
|
||||
if (pos != size)
|
||||
elog(ERROR, "malformed tsquery: extra nodes");
|
||||
@ -466,9 +597,6 @@ findoprnd(QueryItem *ptr, int size)
|
||||
*
|
||||
* opaque is passed on to pushval as is, pushval can use it to store its
|
||||
* private state.
|
||||
*
|
||||
* The returned query might contain QI_STOPVAL nodes. The caller is responsible
|
||||
* for cleaning them up (with clean_fakeval)
|
||||
*/
|
||||
TSQuery
|
||||
parse_tsquery(char *buf,
|
||||
@ -482,6 +610,7 @@ parse_tsquery(char *buf,
|
||||
int commonlen;
|
||||
QueryItem *ptr;
|
||||
ListCell *cell;
|
||||
bool needcleanup;
|
||||
|
||||
/* init state */
|
||||
state.buffer = buf;
|
||||
@ -531,7 +660,7 @@ parse_tsquery(char *buf,
|
||||
i = 0;
|
||||
foreach(cell, state.polstr)
|
||||
{
|
||||
QueryItem *item = (QueryItem *) lfirst(cell);
|
||||
QueryItem *item = (QueryItem *) lfirst(cell);
|
||||
|
||||
switch (item->type)
|
||||
{
|
||||
@ -555,7 +684,14 @@ parse_tsquery(char *buf,
|
||||
pfree(state.op);
|
||||
|
||||
/* Set left operand pointers for every operator. */
|
||||
findoprnd(ptr, query->size);
|
||||
findoprnd(ptr, query->size, &needcleanup);
|
||||
|
||||
/*
|
||||
* QI_VALSTOP nodes should be cleaned and
|
||||
* and OP_PHRASE should be pushed down
|
||||
*/
|
||||
if (needcleanup)
|
||||
return cleanup_fakeval_and_phrase(query);
|
||||
|
||||
return query;
|
||||
}
|
||||
@ -600,12 +736,15 @@ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
|
||||
(inf)->cur = (inf)->buf + len; \
|
||||
}
|
||||
|
||||
#define PRINT_PRIORITY(x) \
|
||||
( (QO_PRIORITY(x) == OP_NOT) ? OP_NOT_PHRASE : QO_PRIORITY(x) )
|
||||
|
||||
/*
|
||||
* recursive walk on tree and print it in
|
||||
* infix (human-readable) view
|
||||
* recursively traverse the tree and
|
||||
* print it in infix (human-readable) form
|
||||
*/
|
||||
static void
|
||||
infix(INFIX *in, bool first)
|
||||
infix(INFIX *in, int parentPriority)
|
||||
{
|
||||
/* since this function recurses, it could be driven to stack overflow. */
|
||||
check_stack_depth();
|
||||
@ -674,24 +813,22 @@ infix(INFIX *in, bool first)
|
||||
}
|
||||
else if (in->curpol->qoperator.oper == OP_NOT)
|
||||
{
|
||||
bool isopr = false;
|
||||
int priority = PRINT_PRIORITY(in->curpol);
|
||||
|
||||
if (priority < parentPriority)
|
||||
{
|
||||
RESIZEBUF(in, 2);
|
||||
sprintf(in->cur, "( ");
|
||||
in->cur = strchr(in->cur, '\0');
|
||||
}
|
||||
RESIZEBUF(in, 1);
|
||||
*(in->cur) = '!';
|
||||
in->cur++;
|
||||
*(in->cur) = '\0';
|
||||
in->curpol++;
|
||||
|
||||
if (in->curpol->type == QI_OPR)
|
||||
{
|
||||
isopr = true;
|
||||
RESIZEBUF(in, 2);
|
||||
sprintf(in->cur, "( ");
|
||||
in->cur = strchr(in->cur, '\0');
|
||||
}
|
||||
|
||||
infix(in, isopr);
|
||||
if (isopr)
|
||||
infix(in, priority);
|
||||
if (priority < parentPriority)
|
||||
{
|
||||
RESIZEBUF(in, 2);
|
||||
sprintf(in->cur, " )");
|
||||
@ -701,11 +838,18 @@ infix(INFIX *in, bool first)
|
||||
else
|
||||
{
|
||||
int8 op = in->curpol->qoperator.oper;
|
||||
int priority = PRINT_PRIORITY(in->curpol);
|
||||
int16 distance = in->curpol->qoperator.distance;
|
||||
INFIX nrm;
|
||||
bool needParenthesis = false;
|
||||
|
||||
in->curpol++;
|
||||
if (op == OP_OR && !first)
|
||||
if (priority < parentPriority ||
|
||||
(op == OP_PHRASE &&
|
||||
(priority == parentPriority || /* phrases are not commutative! */
|
||||
parentPriority == OP_PRIORITY(OP_AND))))
|
||||
{
|
||||
needParenthesis = true;
|
||||
RESIZEBUF(in, 2);
|
||||
sprintf(in->cur, "( ");
|
||||
in->cur = strchr(in->cur, '\0');
|
||||
@ -717,14 +861,14 @@ infix(INFIX *in, bool first)
|
||||
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
|
||||
|
||||
/* get right operand */
|
||||
infix(&nrm, false);
|
||||
infix(&nrm, priority);
|
||||
|
||||
/* get & print left operand */
|
||||
in->curpol = nrm.curpol;
|
||||
infix(in, false);
|
||||
infix(in, priority);
|
||||
|
||||
/* print operator & right operand */
|
||||
RESIZEBUF(in, 3 + (nrm.cur - nrm.buf));
|
||||
RESIZEBUF(in, 3 + (2 + 10 /* distance */) + (nrm.cur - nrm.buf));
|
||||
switch (op)
|
||||
{
|
||||
case OP_OR:
|
||||
@ -733,6 +877,12 @@ infix(INFIX *in, bool first)
|
||||
case OP_AND:
|
||||
sprintf(in->cur, " & %s", nrm.buf);
|
||||
break;
|
||||
case OP_PHRASE:
|
||||
if (distance != 1)
|
||||
sprintf(in->cur, " <%d> %s", distance, nrm.buf);
|
||||
else
|
||||
sprintf(in->cur, " <-> %s", nrm.buf);
|
||||
break;
|
||||
default:
|
||||
/* OP_NOT is handled in above if-branch */
|
||||
elog(ERROR, "unrecognized operator type: %d", op);
|
||||
@ -740,7 +890,7 @@ infix(INFIX *in, bool first)
|
||||
in->cur = strchr(in->cur, '\0');
|
||||
pfree(nrm.buf);
|
||||
|
||||
if (op == OP_OR && !first)
|
||||
if (needParenthesis)
|
||||
{
|
||||
RESIZEBUF(in, 2);
|
||||
sprintf(in->cur, " )");
|
||||
@ -749,7 +899,6 @@ infix(INFIX *in, bool first)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Datum
|
||||
tsqueryout(PG_FUNCTION_ARGS)
|
||||
{
|
||||
@ -768,7 +917,7 @@ tsqueryout(PG_FUNCTION_ARGS)
|
||||
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
|
||||
*(nrm.cur) = '\0';
|
||||
nrm.op = GETOPERAND(query);
|
||||
infix(&nrm, true);
|
||||
infix(&nrm, -1 /* lowest priority */);
|
||||
|
||||
PG_FREE_IF_COPY(query, 0);
|
||||
PG_RETURN_CSTRING(nrm.buf);
|
||||
@ -789,7 +938,8 @@ tsqueryout(PG_FUNCTION_ARGS)
|
||||
*
|
||||
* For each operator:
|
||||
* uint8 type, QI_OPR
|
||||
* uint8 operator, one of OP_AND, OP_OR, OP_NOT.
|
||||
* uint8 operator, one of OP_AND, OP_PHRASE OP_OR, OP_NOT.
|
||||
* uint16 distance (only for OP_PHRASE)
|
||||
*/
|
||||
Datum
|
||||
tsquerysend(PG_FUNCTION_ARGS)
|
||||
@ -815,6 +965,9 @@ tsquerysend(PG_FUNCTION_ARGS)
|
||||
break;
|
||||
case QI_OPR:
|
||||
pq_sendint(&buf, item->qoperator.oper, sizeof(item->qoperator.oper));
|
||||
if (item->qoperator.oper == OP_PHRASE)
|
||||
pq_sendint(&buf, item->qoperator.distance,
|
||||
sizeof(item->qoperator.distance));
|
||||
break;
|
||||
default:
|
||||
elog(ERROR, "unrecognized tsquery node type: %d", item->type);
|
||||
@ -830,15 +983,16 @@ tsquerysend(PG_FUNCTION_ARGS)
|
||||
Datum
|
||||
tsqueryrecv(PG_FUNCTION_ARGS)
|
||||
{
|
||||
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
|
||||
TSQuery query;
|
||||
int i,
|
||||
len;
|
||||
QueryItem *item;
|
||||
int datalen;
|
||||
char *ptr;
|
||||
uint32 size;
|
||||
const char **operands;
|
||||
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
|
||||
TSQuery query;
|
||||
int i,
|
||||
len;
|
||||
QueryItem *item;
|
||||
int datalen;
|
||||
char *ptr;
|
||||
uint32 size;
|
||||
const char **operands;
|
||||
bool needcleanup;
|
||||
|
||||
size = pq_getmsgint(buf, sizeof(uint32));
|
||||
if (size > (MaxAllocSize / sizeof(QueryItem)))
|
||||
@ -907,13 +1061,15 @@ tsqueryrecv(PG_FUNCTION_ARGS)
|
||||
int8 oper;
|
||||
|
||||
oper = (int8) pq_getmsgint(buf, sizeof(int8));
|
||||
if (oper != OP_NOT && oper != OP_OR && oper != OP_AND)
|
||||
if (oper != OP_NOT && oper != OP_OR && oper != OP_AND && oper != OP_PHRASE)
|
||||
elog(ERROR, "invalid tsquery: unrecognized operator type %d",
|
||||
(int) oper);
|
||||
if (i == size - 1)
|
||||
elog(ERROR, "invalid pointer to right operand");
|
||||
|
||||
item->qoperator.oper = oper;
|
||||
if (oper == OP_PHRASE)
|
||||
item->qoperator.distance = (int16) pq_getmsgint(buf, sizeof(int16));
|
||||
}
|
||||
else
|
||||
elog(ERROR, "unrecognized tsquery node type: %d", item->type);
|
||||
@ -930,7 +1086,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
|
||||
* Fill in the left-pointers. Checks that the tree is well-formed as a
|
||||
* side-effect.
|
||||
*/
|
||||
findoprnd(item, size);
|
||||
findoprnd(item, size, &needcleanup);
|
||||
|
||||
/* Copy operands to output struct */
|
||||
for (i = 0; i < size; i++)
|
||||
@ -949,7 +1105,10 @@ tsqueryrecv(PG_FUNCTION_ARGS)
|
||||
|
||||
SET_VARSIZE(query, len + datalen);
|
||||
|
||||
PG_RETURN_TSVECTOR(query);
|
||||
if (needcleanup)
|
||||
PG_RETURN_TSQUERY(cleanup_fakeval_and_phrase(query));
|
||||
|
||||
PG_RETURN_TSQUERY(query);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -25,6 +25,12 @@ typedef struct NODE
|
||||
QueryItem *valnode;
|
||||
} NODE;
|
||||
|
||||
/* Non-operator nodes have fake (but highest) priority */
|
||||
#define NODE_PRIORITY(x) \
|
||||
( ((x)->valnode->qoperator.type == QI_OPR) ? \
|
||||
QO_PRIORITY((x)->valnode) : \
|
||||
TOP_PRIORITY )
|
||||
|
||||
/*
|
||||
* make query tree from plain view of query
|
||||
*/
|
||||
@ -160,7 +166,8 @@ clean_NOT_intree(NODE *node)
|
||||
{
|
||||
NODE *res = node;
|
||||
|
||||
Assert(node->valnode->qoperator.oper == OP_AND);
|
||||
Assert(node->valnode->qoperator.oper == OP_AND ||
|
||||
node->valnode->qoperator.oper == OP_PHRASE);
|
||||
|
||||
node->left = clean_NOT_intree(node->left);
|
||||
node->right = clean_NOT_intree(node->right);
|
||||
@ -212,18 +219,20 @@ clean_NOT(QueryItem *ptr, int *len)
|
||||
#define V_STOP 3 /* the expression is a stop word */
|
||||
|
||||
/*
|
||||
* Clean query tree from values which is always in
|
||||
* text (stopword)
|
||||
* Remove QI_VALSTOP (stopword nodes) from query tree.
|
||||
*/
|
||||
static NODE *
|
||||
clean_fakeval_intree(NODE *node, char *result)
|
||||
clean_fakeval_intree(NODE *node, char *result, int *adddistance)
|
||||
{
|
||||
char lresult = V_UNKNOWN,
|
||||
rresult = V_UNKNOWN;
|
||||
char lresult = V_UNKNOWN,
|
||||
rresult = V_UNKNOWN;
|
||||
|
||||
/* since this function recurses, it could be driven to stack overflow. */
|
||||
check_stack_depth();
|
||||
|
||||
if (adddistance)
|
||||
*adddistance = 0;
|
||||
|
||||
if (node->valnode->type == QI_VAL)
|
||||
return node;
|
||||
else if (node->valnode->type == QI_VALSTOP)
|
||||
@ -237,7 +246,7 @@ clean_fakeval_intree(NODE *node, char *result)
|
||||
|
||||
if (node->valnode->qoperator.oper == OP_NOT)
|
||||
{
|
||||
node->right = clean_fakeval_intree(node->right, &rresult);
|
||||
node->right = clean_fakeval_intree(node->right, &rresult, NULL);
|
||||
if (!node->right)
|
||||
{
|
||||
*result = V_STOP;
|
||||
@ -247,13 +256,30 @@ clean_fakeval_intree(NODE *node, char *result)
|
||||
}
|
||||
else
|
||||
{
|
||||
NODE *res = node;
|
||||
NODE *res = node;
|
||||
int ndistance, ldistance = 0, rdistance = 0;
|
||||
|
||||
node->left = clean_fakeval_intree(node->left, &lresult);
|
||||
node->right = clean_fakeval_intree(node->right, &rresult);
|
||||
ndistance = (node->valnode->qoperator.oper == OP_PHRASE) ?
|
||||
node->valnode->qoperator.distance :
|
||||
0;
|
||||
|
||||
node->left = clean_fakeval_intree(node->left,
|
||||
&lresult,
|
||||
ndistance ? &ldistance : NULL);
|
||||
|
||||
node->right = clean_fakeval_intree(node->right,
|
||||
&rresult,
|
||||
ndistance ? &rdistance : NULL);
|
||||
|
||||
/*
|
||||
* ndistance, ldistance and rdistance are greater than zero
|
||||
* if their corresponding nodes are OP_PHRASE
|
||||
*/
|
||||
|
||||
if (lresult == V_STOP && rresult == V_STOP)
|
||||
{
|
||||
if (adddistance && ndistance)
|
||||
*adddistance = ldistance + ndistance + rdistance;
|
||||
freetree(node);
|
||||
*result = V_STOP;
|
||||
return NULL;
|
||||
@ -261,33 +287,333 @@ clean_fakeval_intree(NODE *node, char *result)
|
||||
else if (lresult == V_STOP)
|
||||
{
|
||||
res = node->right;
|
||||
/*
|
||||
* propagate distance from current node to the
|
||||
* right upper subtree.
|
||||
*/
|
||||
if (adddistance && ndistance)
|
||||
*adddistance = rdistance;
|
||||
pfree(node);
|
||||
}
|
||||
else if (rresult == V_STOP)
|
||||
{
|
||||
res = node->left;
|
||||
/*
|
||||
* propagate distance from current node to the upper tree.
|
||||
*/
|
||||
if (adddistance && ndistance)
|
||||
*adddistance = ndistance + ldistance;
|
||||
pfree(node);
|
||||
}
|
||||
else if (ndistance)
|
||||
{
|
||||
node->valnode->qoperator.distance += ldistance;
|
||||
if (adddistance)
|
||||
*adddistance = 0;
|
||||
}
|
||||
else if (adddistance)
|
||||
{
|
||||
*adddistance = 0;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
QueryItem *
|
||||
clean_fakeval(QueryItem *ptr, int *len)
|
||||
static NODE *
|
||||
copyNODE(NODE *node)
|
||||
{
|
||||
NODE *root = maketree(ptr);
|
||||
char result = V_UNKNOWN;
|
||||
NODE *resroot;
|
||||
NODE *cnode = palloc(sizeof(NODE));
|
||||
|
||||
resroot = clean_fakeval_intree(root, &result);
|
||||
/* since this function recurses, it could be driven to stack overflow. */
|
||||
check_stack_depth();
|
||||
|
||||
cnode->valnode = palloc(sizeof(QueryItem));
|
||||
*(cnode->valnode) = *(node->valnode);
|
||||
|
||||
if (node->valnode->type == QI_OPR)
|
||||
{
|
||||
cnode->right = copyNODE(node->right);
|
||||
if (node->valnode->qoperator.oper != OP_NOT)
|
||||
cnode->left = copyNODE(node->left);
|
||||
}
|
||||
|
||||
return cnode;
|
||||
}
|
||||
|
||||
static NODE *
|
||||
makeNODE(int8 op, NODE *left, NODE *right)
|
||||
{
|
||||
NODE *node = palloc(sizeof(NODE));
|
||||
|
||||
node->valnode = palloc(sizeof(QueryItem));
|
||||
|
||||
node->valnode->qoperator.type = QI_OPR;
|
||||
node->valnode->qoperator.oper = op;
|
||||
|
||||
node->left = left;
|
||||
node->right = right;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
/*
|
||||
* Move operation with high priority to the leaves. This guarantees
|
||||
* that the phrase operator will be near the bottom of the tree.
|
||||
* An idea behind is do not store position of lexemes during execution
|
||||
* of ordinary operations (AND, OR, NOT) because it could be expensive.
|
||||
* Actual transformation will be performed only on subtrees under the
|
||||
* <-> (<n>) operation since it's needed solely for the phrase operator.
|
||||
*
|
||||
* Rules:
|
||||
* a <-> (b | c) => (a <-> b) | (a <-> c)
|
||||
* (a | b) <-> c => (a <-> c) | (b <-> c)
|
||||
* a <-> !b => a & !(a <-> b)
|
||||
* !a <-> b => b & !(a <-> b)
|
||||
*
|
||||
* Warnings for readers:
|
||||
* a <-> b != b <-> a
|
||||
*
|
||||
* a <n> (b <n> c) != (a <n> b) <n> c since the phrase lengths are:
|
||||
* n 2n-1
|
||||
*/
|
||||
static NODE *
|
||||
normalize_phrase_tree(NODE *node)
|
||||
{
|
||||
/* there should be no stop words at this point */
|
||||
Assert(node->valnode->type != QI_VALSTOP);
|
||||
|
||||
if (node->valnode->type == QI_VAL)
|
||||
return node;
|
||||
|
||||
/* since this function recurses, it could be driven to stack overflow. */
|
||||
check_stack_depth();
|
||||
|
||||
Assert(node->valnode->type == QI_OPR);
|
||||
|
||||
if (node->valnode->qoperator.oper == OP_NOT)
|
||||
{
|
||||
/* eliminate NOT sequence */
|
||||
while (node->valnode->type == QI_OPR &&
|
||||
node->valnode->qoperator.oper == node->right->valnode->qoperator.oper)
|
||||
{
|
||||
node = node->right->right;
|
||||
}
|
||||
|
||||
node->right = normalize_phrase_tree(node->right);
|
||||
}
|
||||
else if (node->valnode->qoperator.oper == OP_PHRASE)
|
||||
{
|
||||
int16 distance;
|
||||
NODE *X;
|
||||
|
||||
node->left = normalize_phrase_tree(node->left);
|
||||
node->right = normalize_phrase_tree(node->right);
|
||||
|
||||
if (NODE_PRIORITY(node) <= NODE_PRIORITY(node->right) &&
|
||||
NODE_PRIORITY(node) <= NODE_PRIORITY(node->left))
|
||||
return node;
|
||||
|
||||
/*
|
||||
* We can't swap left-right and works only with left child
|
||||
* because of a <-> b != b <-> a
|
||||
*/
|
||||
|
||||
distance = node->valnode->qoperator.distance;
|
||||
|
||||
if (node->right->valnode->type == QI_OPR)
|
||||
{
|
||||
switch (node->right->valnode->qoperator.oper)
|
||||
{
|
||||
case OP_AND:
|
||||
/* a <-> (b & c) => (a <-> b) & (a <-> c) */
|
||||
node = makeNODE(OP_AND,
|
||||
makeNODE(OP_PHRASE,
|
||||
node->left,
|
||||
node->right->left),
|
||||
makeNODE(OP_PHRASE,
|
||||
copyNODE(node->left),
|
||||
node->right->right));
|
||||
node->left->valnode->qoperator.distance =
|
||||
node->right->valnode->qoperator.distance = distance;
|
||||
break;
|
||||
case OP_OR:
|
||||
/* a <-> (b | c) => (a <-> b) | (a <-> c) */
|
||||
node = makeNODE(OP_OR,
|
||||
makeNODE(OP_PHRASE,
|
||||
node->left,
|
||||
node->right->left),
|
||||
makeNODE(OP_PHRASE,
|
||||
copyNODE(node->left),
|
||||
node->right->right));
|
||||
node->left->valnode->qoperator.distance =
|
||||
node->right->valnode->qoperator.distance = distance;
|
||||
break;
|
||||
case OP_NOT:
|
||||
/* a <-> !b => a & !(a <-> b) */
|
||||
X = node->right;
|
||||
node->right = node->right->right;
|
||||
X->right = node;
|
||||
node = makeNODE(OP_AND,
|
||||
copyNODE(node->left),
|
||||
X);
|
||||
break;
|
||||
case OP_PHRASE:
|
||||
/* no-op */
|
||||
break;
|
||||
default:
|
||||
elog(ERROR,"Wrong type of tsquery node: %d",
|
||||
node->right->valnode->qoperator.oper);
|
||||
}
|
||||
}
|
||||
|
||||
if (node->left->valnode->type == QI_OPR &&
|
||||
node->valnode->qoperator.oper == OP_PHRASE)
|
||||
{
|
||||
/*
|
||||
* if the node is still OP_PHRASE, check the left subtree,
|
||||
* otherwise the whole node will be transformed later.
|
||||
*/
|
||||
switch(node->left->valnode->qoperator.oper)
|
||||
{
|
||||
case OP_AND:
|
||||
/* (a & b) <-> c => (a <-> c) & (b <-> c) */
|
||||
node = makeNODE(OP_AND,
|
||||
makeNODE(OP_PHRASE,
|
||||
node->left->left,
|
||||
node->right),
|
||||
makeNODE(OP_PHRASE,
|
||||
node->left->right,
|
||||
copyNODE(node->right)));
|
||||
node->left->valnode->qoperator.distance =
|
||||
node->right->valnode->qoperator.distance = distance;
|
||||
break;
|
||||
case OP_OR:
|
||||
/* (a | b) <-> c => (a <-> c) | (b <-> c) */
|
||||
node = makeNODE(OP_OR,
|
||||
makeNODE(OP_PHRASE,
|
||||
node->left->left,
|
||||
node->right),
|
||||
makeNODE(OP_PHRASE,
|
||||
node->left->right,
|
||||
copyNODE(node->right)));
|
||||
node->left->valnode->qoperator.distance =
|
||||
node->right->valnode->qoperator.distance = distance;
|
||||
break;
|
||||
case OP_NOT:
|
||||
/* !a <-> b => b & !(a <-> b) */
|
||||
X = node->left;
|
||||
node->left = node->left->right;
|
||||
X->right = node;
|
||||
node = makeNODE(OP_AND,
|
||||
X,
|
||||
copyNODE(node->right));
|
||||
break;
|
||||
case OP_PHRASE:
|
||||
/* no-op */
|
||||
break;
|
||||
default:
|
||||
elog(ERROR,"Wrong type of tsquery node: %d",
|
||||
node->left->valnode->qoperator.oper);
|
||||
}
|
||||
}
|
||||
|
||||
/* continue transformation */
|
||||
node = normalize_phrase_tree(node);
|
||||
}
|
||||
else /* AND or OR */
|
||||
{
|
||||
node->left = normalize_phrase_tree(node->left);
|
||||
node->right = normalize_phrase_tree(node->right);
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
/*
|
||||
* Number of elements in query tree
|
||||
*/
|
||||
static int32
|
||||
calcstrlen(NODE *node)
|
||||
{
|
||||
int32 size = 0;
|
||||
|
||||
if (node->valnode->type == QI_VAL)
|
||||
{
|
||||
size = node->valnode->qoperand.length + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert(node->valnode->type == QI_OPR);
|
||||
|
||||
size = calcstrlen(node->right);
|
||||
if (node->valnode->qoperator.oper != OP_NOT)
|
||||
size += calcstrlen(node->left);
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
TSQuery
|
||||
cleanup_fakeval_and_phrase(TSQuery in)
|
||||
{
|
||||
int32 len,
|
||||
lenstr,
|
||||
commonlen,
|
||||
i;
|
||||
NODE *root;
|
||||
char result = V_UNKNOWN;
|
||||
TSQuery out;
|
||||
QueryItem *items;
|
||||
char *operands;
|
||||
|
||||
if (in->size == 0)
|
||||
return in;
|
||||
|
||||
/* eliminate stop words */
|
||||
root = clean_fakeval_intree(maketree(GETQUERY(in)), &result, NULL);
|
||||
if (result != V_UNKNOWN)
|
||||
{
|
||||
ereport(NOTICE,
|
||||
(errmsg("text-search query contains only stop words or doesn't contain lexemes, ignored")));
|
||||
*len = 0;
|
||||
return NULL;
|
||||
out = palloc(HDRSIZETQ);
|
||||
out->size = 0;
|
||||
SET_VARSIZE(out, HDRSIZETQ);
|
||||
return out;
|
||||
}
|
||||
|
||||
return plaintree(resroot, len);
|
||||
/* push OP_PHRASE nodes down */
|
||||
root = normalize_phrase_tree(root);
|
||||
|
||||
/*
|
||||
* Build TSQuery from plain view
|
||||
*/
|
||||
|
||||
lenstr = calcstrlen(root);
|
||||
items = plaintree(root, &len);
|
||||
commonlen = COMPUTESIZE(len, lenstr);
|
||||
|
||||
out = palloc(commonlen);
|
||||
SET_VARSIZE(out, commonlen);
|
||||
out->size = len;
|
||||
|
||||
memcpy(GETQUERY(out), items, len * sizeof(QueryItem));
|
||||
|
||||
items = GETQUERY(out);
|
||||
operands = GETOPERAND(out);
|
||||
for (i = 0; i < out->size; i++)
|
||||
{
|
||||
QueryOperand *op = (QueryOperand *) &items[i];
|
||||
|
||||
if (op->type != QI_VAL)
|
||||
continue;
|
||||
|
||||
memcpy(operands, GETOPERAND(in) + op->distance, op->length);
|
||||
operands[op->length] = '\0';
|
||||
op->distance = operands - GETOPERAND(out);
|
||||
operands += op->length + 1;
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
@ -27,7 +27,7 @@ tsquery_numnode(PG_FUNCTION_ARGS)
|
||||
}
|
||||
|
||||
static QTNode *
|
||||
join_tsqueries(TSQuery a, TSQuery b, int8 operator)
|
||||
join_tsqueries(TSQuery a, TSQuery b, int8 operator, uint16 distance)
|
||||
{
|
||||
QTNode *res = (QTNode *) palloc0(sizeof(QTNode));
|
||||
|
||||
@ -36,6 +36,8 @@ join_tsqueries(TSQuery a, TSQuery b, int8 operator)
|
||||
res->valnode = (QueryItem *) palloc0(sizeof(QueryItem));
|
||||
res->valnode->type = QI_OPR;
|
||||
res->valnode->qoperator.oper = operator;
|
||||
if (operator == OP_PHRASE)
|
||||
res->valnode->qoperator.distance = distance;
|
||||
|
||||
res->child = (QTNode **) palloc0(sizeof(QTNode *) * 2);
|
||||
res->child[0] = QT2QTN(GETQUERY(b), GETOPERAND(b));
|
||||
@ -64,7 +66,7 @@ tsquery_and(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_POINTER(a);
|
||||
}
|
||||
|
||||
res = join_tsqueries(a, b, OP_AND);
|
||||
res = join_tsqueries(a, b, OP_AND, 0);
|
||||
|
||||
query = QTN2QT(res);
|
||||
|
||||
@ -94,7 +96,7 @@ tsquery_or(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_POINTER(a);
|
||||
}
|
||||
|
||||
res = join_tsqueries(a, b, OP_OR);
|
||||
res = join_tsqueries(a, b, OP_OR, 0);
|
||||
|
||||
query = QTN2QT(res);
|
||||
|
||||
@ -105,6 +107,52 @@ tsquery_or(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_POINTER(query);
|
||||
}
|
||||
|
||||
Datum
|
||||
tsquery_phrase_distance(PG_FUNCTION_ARGS)
|
||||
{
|
||||
TSQuery a = PG_GETARG_TSQUERY_COPY(0);
|
||||
TSQuery b = PG_GETARG_TSQUERY_COPY(1);
|
||||
QTNode *res;
|
||||
TSQuery query;
|
||||
int32 distance = PG_GETARG_INT32(2);
|
||||
|
||||
if (distance < 0 || distance > MAXENTRYPOS)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("distance in phrase operator should be non-negative and less than %d",
|
||||
MAXENTRYPOS)));
|
||||
if (a->size == 0)
|
||||
{
|
||||
PG_FREE_IF_COPY(a, 1);
|
||||
PG_RETURN_POINTER(b);
|
||||
}
|
||||
else if (b->size == 0)
|
||||
{
|
||||
PG_FREE_IF_COPY(b, 1);
|
||||
PG_RETURN_POINTER(a);
|
||||
}
|
||||
|
||||
res = join_tsqueries(a, b, OP_PHRASE, (uint16) distance);
|
||||
|
||||
query = QTN2QT(res);
|
||||
|
||||
QTNFree(res);
|
||||
PG_FREE_IF_COPY(a, 0);
|
||||
PG_FREE_IF_COPY(b, 1);
|
||||
|
||||
PG_RETURN_POINTER(cleanup_fakeval_and_phrase(query));
|
||||
}
|
||||
|
||||
Datum
|
||||
tsquery_phrase(PG_FUNCTION_ARGS)
|
||||
{
|
||||
PG_RETURN_POINTER(DirectFunctionCall3(
|
||||
tsquery_phrase_distance,
|
||||
PG_GETARG_DATUM(0),
|
||||
PG_GETARG_DATUM(1),
|
||||
Int32GetDatum(1)));
|
||||
}
|
||||
|
||||
Datum
|
||||
tsquery_not(PG_FUNCTION_ARGS)
|
||||
{
|
||||
|
@ -110,6 +110,10 @@ QTNodeCompare(QTNode *an, QTNode *bn)
|
||||
if ((res = QTNodeCompare(an->child[i], bn->child[i])) != 0)
|
||||
return res;
|
||||
}
|
||||
|
||||
if (ao->oper == OP_PHRASE && ao->distance != bo->distance)
|
||||
return (ao->distance > bo->distance) ? -1 : 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
else if (an->valnode->type == QI_VAL)
|
||||
@ -150,7 +154,7 @@ QTNSort(QTNode *in)
|
||||
|
||||
for (i = 0; i < in->nchild; i++)
|
||||
QTNSort(in->child[i]);
|
||||
if (in->nchild > 1)
|
||||
if (in->nchild > 1 && in->valnode->qoperator.oper != OP_PHRASE)
|
||||
qsort((void *) in->child, in->nchild, sizeof(QTNode *), cmpQTN);
|
||||
}
|
||||
|
||||
@ -190,7 +194,10 @@ QTNTernary(QTNode *in)
|
||||
{
|
||||
QTNode *cc = in->child[i];
|
||||
|
||||
if (cc->valnode->type == QI_OPR && in->valnode->qoperator.oper == cc->valnode->qoperator.oper)
|
||||
/* OP_Phrase isn't associative */
|
||||
if (cc->valnode->type == QI_OPR &&
|
||||
in->valnode->qoperator.oper == cc->valnode->qoperator.oper &&
|
||||
in->valnode->qoperator.oper != OP_PHRASE)
|
||||
{
|
||||
int oldnchild = in->nchild;
|
||||
|
||||
|
@ -364,8 +364,10 @@ calc_rank(const float *w, TSVector t, TSQuery q, int32 method)
|
||||
return 0.0;
|
||||
|
||||
/* XXX: What about NOT? */
|
||||
res = (item->type == QI_OPR && item->qoperator.oper == OP_AND) ?
|
||||
calc_rank_and(w, t, q) : calc_rank_or(w, t, q);
|
||||
res = (item->type == QI_OPR && (item->qoperator.oper == OP_AND ||
|
||||
item->qoperator.oper == OP_PHRASE)) ?
|
||||
calc_rank_and(w, t, q) :
|
||||
calc_rank_or(w, t, q);
|
||||
|
||||
if (res < 0)
|
||||
res = 1e-20f;
|
||||
@ -496,10 +498,17 @@ ts_rank_tt(PG_FUNCTION_ARGS)
|
||||
|
||||
typedef struct
|
||||
{
|
||||
QueryItem **item;
|
||||
int16 nitem;
|
||||
uint8 wclass;
|
||||
int32 pos;
|
||||
union {
|
||||
struct { /* compiled doc representation */
|
||||
QueryItem **items;
|
||||
int16 nitem;
|
||||
} query;
|
||||
struct { /* struct is used for preparing doc representation */
|
||||
QueryItem *item;
|
||||
WordEntry *entry;
|
||||
} map;
|
||||
} data;
|
||||
WordEntryPos pos;
|
||||
} DocRepresentation;
|
||||
|
||||
static int
|
||||
@ -508,26 +517,59 @@ compareDocR(const void *va, const void *vb)
|
||||
const DocRepresentation *a = (const DocRepresentation *) va;
|
||||
const DocRepresentation *b = (const DocRepresentation *) vb;
|
||||
|
||||
if (a->pos == b->pos)
|
||||
return 0;
|
||||
return (a->pos > b->pos) ? 1 : -1;
|
||||
if (WEP_GETPOS(a->pos) == WEP_GETPOS(b->pos))
|
||||
{
|
||||
if (WEP_GETWEIGHT(a->pos) == WEP_GETWEIGHT(b->pos))
|
||||
{
|
||||
if (a->data.map.entry == b->data.map.entry)
|
||||
return 0;
|
||||
|
||||
return (a->data.map.entry > b->data.map.entry) ? 1 : -1;
|
||||
}
|
||||
|
||||
return (WEP_GETWEIGHT(a->pos) > WEP_GETWEIGHT(b->pos)) ? 1 : -1;
|
||||
}
|
||||
|
||||
return (WEP_GETPOS(a->pos) > WEP_GETPOS(b->pos)) ? 1 : -1;
|
||||
}
|
||||
|
||||
#define MAXQROPOS MAXENTRYPOS
|
||||
typedef struct
|
||||
{
|
||||
bool operandexists;
|
||||
bool reverseinsert; /* indicates insert order,
|
||||
true means descending order */
|
||||
uint32 npos;
|
||||
WordEntryPos pos[MAXQROPOS];
|
||||
} QueryRepresentationOperand;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
TSQuery query;
|
||||
bool *operandexist;
|
||||
TSQuery query;
|
||||
QueryRepresentationOperand *operandData;
|
||||
} QueryRepresentation;
|
||||
|
||||
#define QR_GET_OPERAND_EXISTS(q, v) ( (q)->operandexist[ ((QueryItem*)(v)) - GETQUERY((q)->query) ] )
|
||||
#define QR_SET_OPERAND_EXISTS(q, v) QR_GET_OPERAND_EXISTS(q,v) = true
|
||||
#define QR_GET_OPERAND_DATA(q, v) \
|
||||
( (q)->operandData + (((QueryItem*)(v)) - GETQUERY((q)->query)) )
|
||||
|
||||
static bool
|
||||
checkcondition_QueryOperand(void *checkval, QueryOperand *val)
|
||||
checkcondition_QueryOperand(void *checkval, QueryOperand *val, ExecPhraseData *data)
|
||||
{
|
||||
QueryRepresentation *qr = (QueryRepresentation *) checkval;
|
||||
QueryRepresentation *qr = (QueryRepresentation *) checkval;
|
||||
QueryRepresentationOperand *opData = QR_GET_OPERAND_DATA(qr, val);
|
||||
|
||||
return QR_GET_OPERAND_EXISTS(qr, val);
|
||||
if (!opData->operandexists)
|
||||
return false;
|
||||
|
||||
if (data)
|
||||
{
|
||||
data->npos = opData->npos;
|
||||
data->pos = opData->pos;
|
||||
if (opData->reverseinsert)
|
||||
data->pos += MAXQROPOS - opData->npos;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
typedef struct
|
||||
@ -539,14 +581,65 @@ typedef struct
|
||||
DocRepresentation *end;
|
||||
} CoverExt;
|
||||
|
||||
static void
|
||||
resetQueryRepresentation(QueryRepresentation *qr, bool reverseinsert)
|
||||
{
|
||||
int i;
|
||||
|
||||
for(i = 0; i < qr->query->size; i++)
|
||||
{
|
||||
qr->operandData[i].operandexists = false;
|
||||
qr->operandData[i].reverseinsert = reverseinsert;
|
||||
qr->operandData[i].npos = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
fillQueryRepresentationData(QueryRepresentation *qr, DocRepresentation *entry)
|
||||
{
|
||||
int i;
|
||||
int lastPos;
|
||||
QueryRepresentationOperand *opData;
|
||||
|
||||
for (i = 0; i < entry->data.query.nitem; i++)
|
||||
{
|
||||
if (entry->data.query.items[i]->type != QI_VAL)
|
||||
continue;
|
||||
|
||||
opData = QR_GET_OPERAND_DATA(qr, entry->data.query.items[i]);
|
||||
|
||||
opData->operandexists = true;
|
||||
|
||||
if (opData->npos == 0)
|
||||
{
|
||||
lastPos = (opData->reverseinsert) ? (MAXQROPOS - 1) : 0;
|
||||
opData->pos[lastPos] = entry->pos;
|
||||
opData->npos++;
|
||||
continue;
|
||||
}
|
||||
|
||||
lastPos = opData->reverseinsert ?
|
||||
(MAXQROPOS - opData->npos) :
|
||||
(opData->npos - 1);
|
||||
|
||||
if (WEP_GETPOS(opData->pos[lastPos]) != WEP_GETPOS(entry->pos))
|
||||
{
|
||||
lastPos = opData->reverseinsert ?
|
||||
(MAXQROPOS - 1 - opData->npos) :
|
||||
(opData->npos);
|
||||
|
||||
opData->pos[lastPos] = entry->pos;
|
||||
opData->npos++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static bool
|
||||
Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
|
||||
{
|
||||
DocRepresentation *ptr;
|
||||
int lastpos = ext->pos;
|
||||
int i;
|
||||
bool found = false;
|
||||
DocRepresentation *ptr;
|
||||
int lastpos = ext->pos;
|
||||
bool found = false;
|
||||
|
||||
/*
|
||||
* since this function recurses, it could be driven to stack overflow.
|
||||
@ -554,7 +647,7 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
|
||||
*/
|
||||
check_stack_depth();
|
||||
|
||||
memset(qr->operandexist, 0, sizeof(bool) * qr->query->size);
|
||||
resetQueryRepresentation(qr, false);
|
||||
|
||||
ext->p = INT_MAX;
|
||||
ext->q = 0;
|
||||
@ -563,16 +656,13 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
|
||||
/* find upper bound of cover from current position, move up */
|
||||
while (ptr - doc < len)
|
||||
{
|
||||
for (i = 0; i < ptr->nitem; i++)
|
||||
{
|
||||
if (ptr->item[i]->type == QI_VAL)
|
||||
QR_SET_OPERAND_EXISTS(qr, ptr->item[i]);
|
||||
}
|
||||
fillQueryRepresentationData(qr, ptr);
|
||||
|
||||
if (TS_execute(GETQUERY(qr->query), (void *) qr, false, checkcondition_QueryOperand))
|
||||
{
|
||||
if (ptr->pos > ext->q)
|
||||
if (WEP_GETPOS(ptr->pos) > ext->q)
|
||||
{
|
||||
ext->q = ptr->pos;
|
||||
ext->q = WEP_GETPOS(ptr->pos);
|
||||
ext->end = ptr;
|
||||
lastpos = ptr - doc;
|
||||
found = true;
|
||||
@ -585,22 +675,24 @@ Cover(DocRepresentation *doc, int len, QueryRepresentation *qr, CoverExt *ext)
|
||||
if (!found)
|
||||
return false;
|
||||
|
||||
memset(qr->operandexist, 0, sizeof(bool) * qr->query->size);
|
||||
resetQueryRepresentation(qr, true);
|
||||
|
||||
ptr = doc + lastpos;
|
||||
|
||||
/* find lower bound of cover from found upper bound, move down */
|
||||
while (ptr >= doc + ext->pos)
|
||||
{
|
||||
for (i = 0; i < ptr->nitem; i++)
|
||||
if (ptr->item[i]->type == QI_VAL)
|
||||
QR_SET_OPERAND_EXISTS(qr, ptr->item[i]);
|
||||
/*
|
||||
* we scan doc from right to left, so pos info in reverse order!
|
||||
*/
|
||||
fillQueryRepresentationData(qr, ptr);
|
||||
|
||||
if (TS_execute(GETQUERY(qr->query), (void *) qr, true, checkcondition_QueryOperand))
|
||||
{
|
||||
if (ptr->pos < ext->p)
|
||||
if (WEP_GETPOS(ptr->pos) < ext->p)
|
||||
{
|
||||
ext->begin = ptr;
|
||||
ext->p = ptr->pos;
|
||||
ext->p = WEP_GETPOS(ptr->pos);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -628,18 +720,20 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
|
||||
WordEntry *entry,
|
||||
*firstentry;
|
||||
WordEntryPos *post;
|
||||
int32 dimt,
|
||||
int32 dimt, /* number of 'post' items */
|
||||
j,
|
||||
i,
|
||||
nitem;
|
||||
int len = qr->query->size * 4,
|
||||
cur = 0;
|
||||
DocRepresentation *doc;
|
||||
char *operand;
|
||||
|
||||
doc = (DocRepresentation *) palloc(sizeof(DocRepresentation) * len);
|
||||
operand = GETOPERAND(qr->query);
|
||||
|
||||
/*
|
||||
* Iterate through query to make DocRepresentaion for words and it's entries
|
||||
* satisfied by query
|
||||
*/
|
||||
for (i = 0; i < qr->query->size; i++)
|
||||
{
|
||||
QueryOperand *curoperand;
|
||||
@ -649,13 +743,11 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
|
||||
|
||||
curoperand = &item[i].qoperand;
|
||||
|
||||
if (QR_GET_OPERAND_EXISTS(qr, &item[i]))
|
||||
continue;
|
||||
|
||||
firstentry = entry = find_wordentry(txt, qr->query, curoperand, &nitem);
|
||||
if (!entry)
|
||||
continue;
|
||||
|
||||
/* iterations over entries in tsvector */
|
||||
while (entry - firstentry < nitem)
|
||||
{
|
||||
if (entry->haspos)
|
||||
@ -676,53 +768,67 @@ get_docrep(TSVector txt, QueryRepresentation *qr, int *doclen)
|
||||
doc = (DocRepresentation *) repalloc(doc, sizeof(DocRepresentation) * len);
|
||||
}
|
||||
|
||||
/* iterations over entry's positions */
|
||||
for (j = 0; j < dimt; j++)
|
||||
{
|
||||
if (j == 0)
|
||||
if (curoperand->weight == 0 ||
|
||||
curoperand->weight & (1 << WEP_GETWEIGHT(post[j])))
|
||||
{
|
||||
int k;
|
||||
|
||||
doc[cur].nitem = 0;
|
||||
doc[cur].item = (QueryItem **) palloc(sizeof(QueryItem *) * qr->query->size);
|
||||
|
||||
for (k = 0; k < qr->query->size; k++)
|
||||
{
|
||||
QueryOperand *kptr = &item[k].qoperand;
|
||||
QueryOperand *iptr = &item[i].qoperand;
|
||||
|
||||
if (k == i ||
|
||||
(item[k].type == QI_VAL &&
|
||||
compareQueryOperand(&kptr, &iptr, operand) == 0))
|
||||
{
|
||||
/*
|
||||
* if k == i, we've already checked above that
|
||||
* it's type == Q_VAL
|
||||
*/
|
||||
doc[cur].item[doc[cur].nitem] = item + k;
|
||||
doc[cur].nitem++;
|
||||
QR_SET_OPERAND_EXISTS(qr, item + k);
|
||||
}
|
||||
}
|
||||
doc[cur].pos = post[j];
|
||||
doc[cur].data.map.entry = entry;
|
||||
doc[cur].data.map.item = (QueryItem *) curoperand;
|
||||
cur++;
|
||||
}
|
||||
else
|
||||
{
|
||||
doc[cur].nitem = doc[cur - 1].nitem;
|
||||
doc[cur].item = doc[cur - 1].item;
|
||||
}
|
||||
doc[cur].pos = WEP_GETPOS(post[j]);
|
||||
doc[cur].wclass = WEP_GETWEIGHT(post[j]);
|
||||
cur++;
|
||||
}
|
||||
|
||||
entry++;
|
||||
}
|
||||
}
|
||||
|
||||
*doclen = cur;
|
||||
|
||||
if (cur > 0)
|
||||
{
|
||||
DocRepresentation *rptr = doc + 1,
|
||||
*wptr = doc,
|
||||
storage;
|
||||
|
||||
/*
|
||||
* Sort representation in ascending order by pos and entry
|
||||
*/
|
||||
qsort((void *) doc, cur, sizeof(DocRepresentation), compareDocR);
|
||||
|
||||
/*
|
||||
* Join QueryItem per WordEntry and it's position
|
||||
*/
|
||||
storage.pos = doc->pos;
|
||||
storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
|
||||
storage.data.query.items[0] = doc->data.map.item;
|
||||
storage.data.query.nitem = 1;
|
||||
|
||||
while (rptr - doc < cur)
|
||||
{
|
||||
if (rptr->pos == (rptr-1)->pos &&
|
||||
rptr->data.map.entry == (rptr-1)->data.map.entry)
|
||||
{
|
||||
storage.data.query.items[storage.data.query.nitem] = rptr->data.map.item;
|
||||
storage.data.query.nitem++;
|
||||
}
|
||||
else
|
||||
{
|
||||
*wptr = storage;
|
||||
wptr++;
|
||||
storage.pos = rptr->pos;
|
||||
storage.data.query.items = palloc(sizeof(QueryItem *) * qr->query->size);
|
||||
storage.data.query.items[0] = rptr->data.map.item;
|
||||
storage.data.query.nitem = 1;
|
||||
}
|
||||
|
||||
rptr++;
|
||||
}
|
||||
|
||||
*wptr = storage;
|
||||
wptr++;
|
||||
|
||||
*doclen = wptr - doc;
|
||||
return doc;
|
||||
}
|
||||
|
||||
@ -758,12 +864,13 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
|
||||
}
|
||||
|
||||
qr.query = query;
|
||||
qr.operandexist = (bool *) palloc0(sizeof(bool) * query->size);
|
||||
qr.operandData = (QueryRepresentationOperand *)
|
||||
palloc0(sizeof(QueryRepresentationOperand) * query->size);
|
||||
|
||||
doc = get_docrep(txt, &qr, &doclen);
|
||||
if (!doc)
|
||||
{
|
||||
pfree(qr.operandexist);
|
||||
pfree(qr.operandData);
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
@ -777,7 +884,7 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
|
||||
|
||||
while (ptr <= ext.end)
|
||||
{
|
||||
InvSum += invws[ptr->wclass];
|
||||
InvSum += invws[WEP_GETWEIGHT(ptr->pos)];
|
||||
ptr++;
|
||||
}
|
||||
|
||||
@ -827,7 +934,7 @@ calc_rank_cd(const float4 *arrdata, TSVector txt, TSQuery query, int method)
|
||||
|
||||
pfree(doc);
|
||||
|
||||
pfree(qr.operandexist);
|
||||
pfree(qr.operandData);
|
||||
|
||||
return (float4) Wdoc;
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ typedef struct
|
||||
|
||||
|
||||
/* Compare two WordEntryPos values for qsort */
|
||||
static int
|
||||
int
|
||||
comparePos(const void *a, const void *b)
|
||||
{
|
||||
int apos = WEP_GETPOS(*(const WordEntryPos *) a);
|
||||
|
@ -1121,35 +1121,124 @@ tsCompareString(char *a, int lena, char *b, int lenb, bool prefix)
|
||||
}
|
||||
|
||||
/*
|
||||
* check weight info
|
||||
* Check weight info or/and fill 'data' with the required positions
|
||||
*/
|
||||
static bool
|
||||
checkclass_str(CHKVAL *chkval, WordEntry *val, QueryOperand *item)
|
||||
checkclass_str(CHKVAL *chkval, WordEntry *entry, QueryOperand *val,
|
||||
ExecPhraseData *data)
|
||||
{
|
||||
WordEntryPosVector *posvec;
|
||||
WordEntryPos *ptr;
|
||||
uint16 len;
|
||||
bool result = false;
|
||||
|
||||
posvec = (WordEntryPosVector *)
|
||||
(chkval->values + SHORTALIGN(val->pos + val->len));
|
||||
|
||||
len = posvec->npos;
|
||||
ptr = posvec->pos;
|
||||
|
||||
while (len--)
|
||||
if (entry->haspos && (val->weight || data))
|
||||
{
|
||||
if (item->weight & (1 << WEP_GETWEIGHT(*ptr)))
|
||||
return true;
|
||||
ptr++;
|
||||
WordEntryPosVector *posvec;
|
||||
|
||||
/*
|
||||
* We can't use the _POSVECPTR macro here because the pointer to the
|
||||
* tsvector's lexeme storage is already contained in chkval->values.
|
||||
*/
|
||||
posvec = (WordEntryPosVector *)
|
||||
(chkval->values + SHORTALIGN(entry->pos + entry->len));
|
||||
|
||||
if (val->weight && data)
|
||||
{
|
||||
WordEntryPos *posvec_iter = posvec->pos;
|
||||
WordEntryPos *dptr;
|
||||
|
||||
/*
|
||||
* Filter position information by weights
|
||||
*/
|
||||
dptr = data->pos = palloc(sizeof(WordEntryPos) * posvec->npos);
|
||||
data->allocated = true;
|
||||
|
||||
/* Is there a position with a matching weight? */
|
||||
while (posvec_iter < posvec->pos + posvec->npos)
|
||||
{
|
||||
/* If true, append this position to the data->pos */
|
||||
if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
|
||||
{
|
||||
*dptr = WEP_GETPOS(*posvec_iter);
|
||||
dptr++;
|
||||
}
|
||||
|
||||
posvec_iter++;
|
||||
}
|
||||
|
||||
data->npos = dptr - data->pos;
|
||||
|
||||
if (data->npos > 0)
|
||||
result = true;
|
||||
}
|
||||
else if (val->weight)
|
||||
{
|
||||
WordEntryPos *posvec_iter = posvec->pos;
|
||||
|
||||
/* Is there a position with a matching weight? */
|
||||
while (posvec_iter < posvec->pos + posvec->npos)
|
||||
{
|
||||
if (val->weight & (1 << WEP_GETWEIGHT(*posvec_iter)))
|
||||
{
|
||||
result = true;
|
||||
break; /* no need to go further */
|
||||
}
|
||||
|
||||
posvec_iter++;
|
||||
}
|
||||
}
|
||||
else /* data != NULL */
|
||||
{
|
||||
data->npos = posvec->npos;
|
||||
data->pos = posvec->pos;
|
||||
data->allocated = false;
|
||||
result = true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
else
|
||||
{
|
||||
result = true;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Removes duplicate pos entries. We can't use uniquePos() from
|
||||
* tsvector.c because array might be longer than MAXENTRYPOS
|
||||
*
|
||||
* Returns new length.
|
||||
*/
|
||||
static int
|
||||
uniqueLongPos(WordEntryPos *pos, int npos)
|
||||
{
|
||||
WordEntryPos *pos_iter,
|
||||
*result;
|
||||
|
||||
if (npos <= 1)
|
||||
return npos;
|
||||
|
||||
qsort((void *) pos, npos, sizeof(WordEntryPos), comparePos);
|
||||
|
||||
result = pos;
|
||||
pos_iter = pos + 1;
|
||||
while (pos_iter < pos + npos)
|
||||
{
|
||||
if (WEP_GETPOS(*pos_iter) != WEP_GETPOS(*result))
|
||||
{
|
||||
result++;
|
||||
*result = WEP_GETPOS(*pos_iter);
|
||||
}
|
||||
|
||||
pos_iter++;
|
||||
}
|
||||
|
||||
return result + 1 - pos;
|
||||
}
|
||||
|
||||
/*
|
||||
* is there value 'val' in array or not ?
|
||||
*/
|
||||
static bool
|
||||
checkcondition_str(void *checkval, QueryOperand *val)
|
||||
checkcondition_str(void *checkval, QueryOperand *val, ExecPhraseData *data)
|
||||
{
|
||||
CHKVAL *chkval = (CHKVAL *) checkval;
|
||||
WordEntry *StopLow = chkval->arrb;
|
||||
@ -1162,14 +1251,16 @@ checkcondition_str(void *checkval, QueryOperand *val)
|
||||
while (StopLow < StopHigh)
|
||||
{
|
||||
StopMiddle = StopLow + (StopHigh - StopLow) / 2;
|
||||
difference = tsCompareString(chkval->operand + val->distance, val->length,
|
||||
chkval->values + StopMiddle->pos, StopMiddle->len,
|
||||
difference = tsCompareString(chkval->operand + val->distance,
|
||||
val->length,
|
||||
chkval->values + StopMiddle->pos,
|
||||
StopMiddle->len,
|
||||
false);
|
||||
|
||||
if (difference == 0)
|
||||
{
|
||||
res = (val->weight && StopMiddle->haspos) ?
|
||||
checkclass_str(chkval, StopMiddle, val) : true;
|
||||
/* Check weight info & fill 'data' with positions */
|
||||
res = checkclass_str(chkval, StopMiddle, val, data);
|
||||
break;
|
||||
}
|
||||
else if (difference > 0)
|
||||
@ -1178,30 +1269,199 @@ checkcondition_str(void *checkval, QueryOperand *val)
|
||||
StopHigh = StopMiddle;
|
||||
}
|
||||
|
||||
if (!res && val->prefix)
|
||||
if ((!res || data) && val->prefix)
|
||||
{
|
||||
WordEntryPos *allpos = NULL;
|
||||
int npos = 0,
|
||||
totalpos = 0;
|
||||
/*
|
||||
* there was a failed exact search, so we should scan further to find
|
||||
* a prefix match.
|
||||
* a prefix match. We also need to do so if caller needs position info
|
||||
*/
|
||||
if (StopLow >= StopHigh)
|
||||
StopMiddle = StopHigh;
|
||||
|
||||
while (res == false && StopMiddle < chkval->arre &&
|
||||
tsCompareString(chkval->operand + val->distance, val->length,
|
||||
chkval->values + StopMiddle->pos, StopMiddle->len,
|
||||
while ((!res || data) && StopMiddle < chkval->arre &&
|
||||
tsCompareString(chkval->operand + val->distance,
|
||||
val->length,
|
||||
chkval->values + StopMiddle->pos,
|
||||
StopMiddle->len,
|
||||
true) == 0)
|
||||
{
|
||||
res = (val->weight && StopMiddle->haspos) ?
|
||||
checkclass_str(chkval, StopMiddle, val) : true;
|
||||
if (data)
|
||||
{
|
||||
/*
|
||||
* We need to join position information
|
||||
*/
|
||||
res = checkclass_str(chkval, StopMiddle, val, data);
|
||||
|
||||
if (res)
|
||||
{
|
||||
while (npos + data->npos >= totalpos)
|
||||
{
|
||||
if (totalpos == 0)
|
||||
{
|
||||
totalpos = 256;
|
||||
allpos = palloc(sizeof(WordEntryPos) * totalpos);
|
||||
}
|
||||
else
|
||||
{
|
||||
totalpos *= 2;
|
||||
allpos = repalloc(allpos, sizeof(WordEntryPos) * totalpos);
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(allpos + npos, data->pos, sizeof(WordEntryPos) * data->npos);
|
||||
npos += data->npos;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
res = checkclass_str(chkval, StopMiddle, val, NULL);
|
||||
}
|
||||
|
||||
StopMiddle++;
|
||||
}
|
||||
|
||||
if (res && data)
|
||||
{
|
||||
/* Sort and make unique array of found positions */
|
||||
data->pos = allpos;
|
||||
data->npos = uniqueLongPos(allpos, npos);
|
||||
data->allocated = true;
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for phrase condition. Fallback to the AND operation
|
||||
* if there is no positional information.
|
||||
*/
|
||||
static bool
|
||||
TS_phrase_execute(QueryItem *curitem,
|
||||
void *checkval, bool calcnot, ExecPhraseData *data,
|
||||
bool (*chkcond) (void *, QueryOperand *, ExecPhraseData *))
|
||||
{
|
||||
/* since this function recurses, it could be driven to stack overflow */
|
||||
check_stack_depth();
|
||||
|
||||
if (curitem->type == QI_VAL)
|
||||
{
|
||||
return chkcond(checkval, (QueryOperand *) curitem, data);
|
||||
}
|
||||
else
|
||||
{
|
||||
ExecPhraseData Ldata = {0, false, NULL},
|
||||
Rdata = {0, false, NULL};
|
||||
WordEntryPos *Lpos,
|
||||
*Rpos,
|
||||
*pos_iter = NULL;
|
||||
|
||||
Assert(curitem->qoperator.oper == OP_PHRASE);
|
||||
|
||||
if (!TS_phrase_execute(curitem + curitem->qoperator.left,
|
||||
checkval, calcnot, &Ldata, chkcond))
|
||||
return false;
|
||||
|
||||
if (!TS_phrase_execute(curitem + 1, checkval, calcnot, &Rdata, chkcond))
|
||||
return false;
|
||||
|
||||
/*
|
||||
* if at least one of the operands has no position
|
||||
* information, fallback to AND operation.
|
||||
*/
|
||||
if (Ldata.npos == 0 || Rdata.npos == 0)
|
||||
return true;
|
||||
|
||||
/*
|
||||
* Result of the operation is a list of the
|
||||
* corresponding positions of RIGHT operand.
|
||||
*/
|
||||
if (data)
|
||||
{
|
||||
if (!Rdata.allocated)
|
||||
/*
|
||||
* OP_PHRASE is based on the OP_AND, so the number of resulting
|
||||
* positions could not be greater than the total amount of operands.
|
||||
*/
|
||||
data->pos = palloc(sizeof(WordEntryPos) * Min(Ldata.npos, Rdata.npos));
|
||||
else
|
||||
data->pos = Rdata.pos;
|
||||
|
||||
data->allocated = true;
|
||||
data->npos = 0;
|
||||
pos_iter = data->pos;
|
||||
}
|
||||
|
||||
Lpos = Ldata.pos;
|
||||
Rpos = Rdata.pos;
|
||||
|
||||
/*
|
||||
* Find matches by distance, WEP_GETPOS() is needed because
|
||||
* ExecPhraseData->data can point to the tsvector's WordEntryPosVector
|
||||
*/
|
||||
|
||||
while (Rpos < Rdata.pos + Rdata.npos)
|
||||
{
|
||||
while (Lpos < Ldata.pos + Ldata.npos)
|
||||
{
|
||||
if (WEP_GETPOS(*Lpos) <= WEP_GETPOS(*Rpos))
|
||||
{
|
||||
/*
|
||||
* Lpos is behind the Rpos, so we have to check the
|
||||
* distance condition
|
||||
*/
|
||||
if (WEP_GETPOS(*Rpos) - WEP_GETPOS(*Lpos) <= curitem->qoperator.distance)
|
||||
{
|
||||
/* MATCH! */
|
||||
if (data)
|
||||
{
|
||||
*pos_iter = WEP_GETPOS(*Rpos);
|
||||
pos_iter++;
|
||||
|
||||
break; /* We need to build a unique result
|
||||
* array, so go to the next Rpos */
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* We are in the root of the phrase tree and hence
|
||||
* we don't have to store the resulting positions
|
||||
*/
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Go to the next Rpos, because Lpos
|
||||
* is ahead of the current Rpos
|
||||
*/
|
||||
break;
|
||||
}
|
||||
|
||||
Lpos++;
|
||||
}
|
||||
|
||||
Rpos++;
|
||||
}
|
||||
|
||||
if (data)
|
||||
{
|
||||
data->npos = pos_iter - data->pos;
|
||||
|
||||
if (data->npos > 0)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Evaluate tsquery boolean expression.
|
||||
*
|
||||
@ -1210,16 +1470,19 @@ checkcondition_str(void *checkval, QueryOperand *val)
|
||||
* do anything with it.
|
||||
* if calcnot is false, NOT expressions are always evaluated to be true. This
|
||||
* is used in ranking.
|
||||
* It believes that ordinary operators are always closier to root than phrase
|
||||
* operator, so, TS_execute() may not take care of lexeme's position at all.
|
||||
*/
|
||||
bool
|
||||
TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
|
||||
bool (*chkcond) (void *checkval, QueryOperand *val))
|
||||
bool (*chkcond) (void *checkval, QueryOperand *val, ExecPhraseData *data))
|
||||
{
|
||||
/* since this function recurses, it could be driven to stack overflow */
|
||||
check_stack_depth();
|
||||
|
||||
if (curitem->type == QI_VAL)
|
||||
return chkcond(checkval, (QueryOperand *) curitem);
|
||||
return chkcond(checkval, (QueryOperand *) curitem,
|
||||
NULL /* we don't need position info */);
|
||||
|
||||
switch (curitem->qoperator.oper)
|
||||
{
|
||||
@ -1241,6 +1504,9 @@ TS_execute(QueryItem *curitem, void *checkval, bool calcnot,
|
||||
else
|
||||
return TS_execute(curitem + 1, checkval, calcnot, chkcond);
|
||||
|
||||
case OP_PHRASE:
|
||||
return TS_phrase_execute(curitem, checkval, calcnot, NULL, chkcond);
|
||||
|
||||
default:
|
||||
elog(ERROR, "unrecognized operator: %d", curitem->qoperator.oper);
|
||||
}
|
||||
@ -1277,6 +1543,10 @@ tsquery_requires_match(QueryItem *curitem)
|
||||
*/
|
||||
return false;
|
||||
|
||||
case OP_PHRASE:
|
||||
/*
|
||||
* Treat OP_PHRASE as OP_AND here
|
||||
*/
|
||||
case OP_AND:
|
||||
/* If either side requires a match, we're good */
|
||||
if (tsquery_requires_match(curitem + curitem->qoperator.left))
|
||||
|
@ -89,7 +89,15 @@ do { \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define ISOPERATOR(x) ( pg_mblen(x)==1 && ( *(x)=='!' || *(x)=='&' || *(x)=='|' || *(x)=='(' || *(x)==')' ) )
|
||||
/* phrase operator begins with '<' */
|
||||
#define ISOPERATOR(x) \
|
||||
( pg_mblen(x) == 1 && ( *(x) == '!' || \
|
||||
*(x) == '&' || \
|
||||
*(x) == '|' || \
|
||||
*(x) == '(' || \
|
||||
*(x) == ')' || \
|
||||
*(x) == '<' \
|
||||
) )
|
||||
|
||||
/* Fills gettoken_tsvector's output parameters, and returns true */
|
||||
#define RETURN_TOKEN \
|
||||
|
Reference in New Issue
Block a user