mirror of
https://github.com/postgres/postgres.git
synced 2025-06-22 02:52:08 +03:00
Extend GIN to support partial-match searches, and extend tsquery to support
prefix matching using this facility. Teodor Sigaev and Oleg Bartunov
This commit is contained in:
@ -7,7 +7,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.17 2008/04/11 22:52:05 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.18 2008/05/16 16:31:01 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -56,12 +56,14 @@ struct TSQueryParserStateData
|
||||
#define WAITSINGLEOPERAND 4
|
||||
|
||||
/*
|
||||
* subroutine to parse the weight part, like ':1AB' of a query.
|
||||
* subroutine to parse the modifiers (weight and prefix flag currently)
|
||||
* part, like ':1AB' of a query.
|
||||
*/
|
||||
static char *
|
||||
get_weight(char *buf, int16 *weight)
|
||||
get_modifiers(char *buf, int16 *weight, bool *prefix)
|
||||
{
|
||||
*weight = 0;
|
||||
*prefix = false;
|
||||
|
||||
if (!t_iseq(buf, ':'))
|
||||
return buf;
|
||||
@ -87,6 +89,9 @@ get_weight(char *buf, int16 *weight)
|
||||
case 'D':
|
||||
*weight |= 1;
|
||||
break;
|
||||
case '*':
|
||||
*prefix = true;
|
||||
break;
|
||||
default:
|
||||
return buf;
|
||||
}
|
||||
@ -118,8 +123,11 @@ typedef enum
|
||||
static ts_tokentype
|
||||
gettoken_query(TSQueryParserState state,
|
||||
int8 *operator,
|
||||
int *lenval, char **strval, int16 *weight)
|
||||
int *lenval, char **strval, int16 *weight, bool *prefix)
|
||||
{
|
||||
*weight = 0;
|
||||
*prefix = false;
|
||||
|
||||
while (1)
|
||||
{
|
||||
switch (state->state)
|
||||
@ -157,7 +165,7 @@ gettoken_query(TSQueryParserState state,
|
||||
reset_tsvector_parser(state->valstate, state->buf);
|
||||
if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
|
||||
{
|
||||
state->buf = get_weight(state->buf, weight);
|
||||
state->buf = get_modifiers(state->buf, weight, prefix);
|
||||
state->state = WAITOPERATOR;
|
||||
return PT_VAL;
|
||||
}
|
||||
@ -232,7 +240,7 @@ pushOperator(TSQueryParserState state, int8 oper)
|
||||
}
|
||||
|
||||
static void
|
||||
pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight)
|
||||
pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
|
||||
{
|
||||
QueryOperand *tmp;
|
||||
|
||||
@ -250,6 +258,7 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
|
||||
tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
|
||||
tmp->type = QI_VAL;
|
||||
tmp->weight = weight;
|
||||
tmp->prefix = prefix;
|
||||
tmp->valcrc = (int32) valcrc;
|
||||
tmp->length = lenval;
|
||||
tmp->distance = distance;
|
||||
@ -264,7 +273,7 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
|
||||
* of the string.
|
||||
*/
|
||||
void
|
||||
pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
|
||||
pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight, bool prefix)
|
||||
{
|
||||
pg_crc32 valcrc;
|
||||
|
||||
@ -277,7 +286,7 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
|
||||
INIT_CRC32(valcrc);
|
||||
COMP_CRC32(valcrc, strval, lenval);
|
||||
FIN_CRC32(valcrc);
|
||||
pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight);
|
||||
pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
|
||||
|
||||
/* append the value string to state.op, enlarging buffer if needed first */
|
||||
while (state->curop - state->op + lenval + 1 >= state->lenop)
|
||||
@ -330,16 +339,17 @@ makepol(TSQueryParserState state,
|
||||
int8 opstack[STACKDEPTH];
|
||||
int lenstack = 0;
|
||||
int16 weight = 0;
|
||||
bool prefix;
|
||||
|
||||
/* since this function recurses, it could be driven to stack overflow */
|
||||
check_stack_depth();
|
||||
|
||||
while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight)) != PT_END)
|
||||
while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case PT_VAL:
|
||||
pushval(opaque, state, strval, lenval, weight);
|
||||
pushval(opaque, state, strval, lenval, weight, prefix);
|
||||
while (lenstack && (opstack[lenstack - 1] == OP_AND ||
|
||||
opstack[lenstack - 1] == OP_NOT))
|
||||
{
|
||||
@ -549,9 +559,9 @@ parse_tsquery(char *buf,
|
||||
|
||||
static void
|
||||
pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
|
||||
int16 weight)
|
||||
int16 weight, bool prefix)
|
||||
{
|
||||
pushValue(state, strval, lenval, weight);
|
||||
pushValue(state, strval, lenval, weight, prefix);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -605,7 +615,7 @@ infix(INFIX *in, bool first)
|
||||
char *op = in->op + curpol->distance;
|
||||
int clen;
|
||||
|
||||
RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
|
||||
RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
|
||||
*(in->cur) = '\'';
|
||||
in->cur++;
|
||||
while (*op)
|
||||
@ -628,10 +638,15 @@ infix(INFIX *in, bool first)
|
||||
}
|
||||
*(in->cur) = '\'';
|
||||
in->cur++;
|
||||
if (curpol->weight)
|
||||
if (curpol->weight || curpol->prefix)
|
||||
{
|
||||
*(in->cur) = ':';
|
||||
in->cur++;
|
||||
if ( curpol->prefix )
|
||||
{
|
||||
*(in->cur) = '*';
|
||||
in->cur++;
|
||||
}
|
||||
if (curpol->weight & (1 << 3))
|
||||
{
|
||||
*(in->cur) = 'A';
|
||||
@ -769,6 +784,7 @@ tsqueryout(PG_FUNCTION_ARGS)
|
||||
* uint8 type, QI_VAL
|
||||
* uint8 weight
|
||||
* operand text in client encoding, null-terminated
|
||||
* uint8 prefix
|
||||
*
|
||||
* For each operator:
|
||||
* uint8 type, QI_OPR
|
||||
@ -793,6 +809,7 @@ tsquerysend(PG_FUNCTION_ARGS)
|
||||
{
|
||||
case QI_VAL:
|
||||
pq_sendint(&buf, item->operand.weight, sizeof(uint8));
|
||||
pq_sendint(&buf, item->operand.prefix, sizeof(uint8));
|
||||
pq_sendstring(&buf, GETOPERAND(query) + item->operand.distance);
|
||||
break;
|
||||
case QI_OPR:
|
||||
@ -844,10 +861,12 @@ tsqueryrecv(PG_FUNCTION_ARGS)
|
||||
{
|
||||
size_t val_len; /* length after recoding to server encoding */
|
||||
uint8 weight;
|
||||
uint8 prefix;
|
||||
const char *val;
|
||||
pg_crc32 valcrc;
|
||||
|
||||
weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
|
||||
prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
|
||||
val = pq_getmsgstring(buf);
|
||||
val_len = strlen(val);
|
||||
|
||||
@ -869,6 +888,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
|
||||
FIN_CRC32(valcrc);
|
||||
|
||||
item->operand.weight = weight;
|
||||
item->operand.prefix = (prefix) ? true : false;
|
||||
item->operand.valcrc = (int32) valcrc;
|
||||
item->operand.length = val_len;
|
||||
item->operand.distance = datalen;
|
||||
|
Reference in New Issue
Block a user