1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-22 02:52:08 +03:00

Extend GIN to support partial-match searches, and extend tsquery to support

prefix matching using this facility.

Teodor Sigaev and Oleg Bartunov
This commit is contained in:
Tom Lane
2008-05-16 16:31:02 +00:00
parent e1bdd07c3c
commit e6dbcb72fa
32 changed files with 1284 additions and 508 deletions

View File

@ -7,7 +7,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.17 2008/04/11 22:52:05 tgl Exp $
* $PostgreSQL: pgsql/src/backend/utils/adt/tsquery.c,v 1.18 2008/05/16 16:31:01 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -56,12 +56,14 @@ struct TSQueryParserStateData
#define WAITSINGLEOPERAND 4
/*
* subroutine to parse the weight part, like ':1AB' of a query.
* subroutine to parse the modifiers (weight and prefix flag currently)
* part, like ':1AB' of a query.
*/
static char *
get_weight(char *buf, int16 *weight)
get_modifiers(char *buf, int16 *weight, bool *prefix)
{
*weight = 0;
*prefix = false;
if (!t_iseq(buf, ':'))
return buf;
@ -87,6 +89,9 @@ get_weight(char *buf, int16 *weight)
case 'D':
*weight |= 1;
break;
case '*':
*prefix = true;
break;
default:
return buf;
}
@ -118,8 +123,11 @@ typedef enum
static ts_tokentype
gettoken_query(TSQueryParserState state,
int8 *operator,
int *lenval, char **strval, int16 *weight)
int *lenval, char **strval, int16 *weight, bool *prefix)
{
*weight = 0;
*prefix = false;
while (1)
{
switch (state->state)
@ -157,7 +165,7 @@ gettoken_query(TSQueryParserState state,
reset_tsvector_parser(state->valstate, state->buf);
if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
{
state->buf = get_weight(state->buf, weight);
state->buf = get_modifiers(state->buf, weight, prefix);
state->state = WAITOPERATOR;
return PT_VAL;
}
@ -232,7 +240,7 @@ pushOperator(TSQueryParserState state, int8 oper)
}
static void
pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight)
pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int lenval, int weight, bool prefix)
{
QueryOperand *tmp;
@ -250,6 +258,7 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
tmp = (QueryOperand *) palloc0(sizeof(QueryOperand));
tmp->type = QI_VAL;
tmp->weight = weight;
tmp->prefix = prefix;
tmp->valcrc = (int32) valcrc;
tmp->length = lenval;
tmp->distance = distance;
@ -264,7 +273,7 @@ pushValue_internal(TSQueryParserState state, pg_crc32 valcrc, int distance, int
* of the string.
*/
void
pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight, bool prefix)
{
pg_crc32 valcrc;
@ -277,7 +286,7 @@ pushValue(TSQueryParserState state, char *strval, int lenval, int2 weight)
INIT_CRC32(valcrc);
COMP_CRC32(valcrc, strval, lenval);
FIN_CRC32(valcrc);
pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight);
pushValue_internal(state, valcrc, state->curop - state->op, lenval, weight, prefix);
/* append the value string to state.op, enlarging buffer if needed first */
while (state->curop - state->op + lenval + 1 >= state->lenop)
@ -330,16 +339,17 @@ makepol(TSQueryParserState state,
int8 opstack[STACKDEPTH];
int lenstack = 0;
int16 weight = 0;
bool prefix;
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight)) != PT_END)
while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END)
{
switch (type)
{
case PT_VAL:
pushval(opaque, state, strval, lenval, weight);
pushval(opaque, state, strval, lenval, weight, prefix);
while (lenstack && (opstack[lenstack - 1] == OP_AND ||
opstack[lenstack - 1] == OP_NOT))
{
@ -549,9 +559,9 @@ parse_tsquery(char *buf,
static void
pushval_asis(Datum opaque, TSQueryParserState state, char *strval, int lenval,
int16 weight)
int16 weight, bool prefix)
{
pushValue(state, strval, lenval, weight);
pushValue(state, strval, lenval, weight, prefix);
}
/*
@ -605,7 +615,7 @@ infix(INFIX *in, bool first)
char *op = in->op + curpol->distance;
int clen;
RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 5);
RESIZEBUF(in, curpol->length * (pg_database_encoding_max_length() + 1) + 2 + 6);
*(in->cur) = '\'';
in->cur++;
while (*op)
@ -628,10 +638,15 @@ infix(INFIX *in, bool first)
}
*(in->cur) = '\'';
in->cur++;
if (curpol->weight)
if (curpol->weight || curpol->prefix)
{
*(in->cur) = ':';
in->cur++;
if ( curpol->prefix )
{
*(in->cur) = '*';
in->cur++;
}
if (curpol->weight & (1 << 3))
{
*(in->cur) = 'A';
@ -769,6 +784,7 @@ tsqueryout(PG_FUNCTION_ARGS)
* uint8 type, QI_VAL
* uint8 weight
* operand text in client encoding, null-terminated
* uint8 prefix
*
* For each operator:
* uint8 type, QI_OPR
@ -793,6 +809,7 @@ tsquerysend(PG_FUNCTION_ARGS)
{
case QI_VAL:
pq_sendint(&buf, item->operand.weight, sizeof(uint8));
pq_sendint(&buf, item->operand.prefix, sizeof(uint8));
pq_sendstring(&buf, GETOPERAND(query) + item->operand.distance);
break;
case QI_OPR:
@ -844,10 +861,12 @@ tsqueryrecv(PG_FUNCTION_ARGS)
{
size_t val_len; /* length after recoding to server encoding */
uint8 weight;
uint8 prefix;
const char *val;
pg_crc32 valcrc;
weight = (uint8) pq_getmsgint(buf, sizeof(uint8));
prefix = (uint8) pq_getmsgint(buf, sizeof(uint8));
val = pq_getmsgstring(buf);
val_len = strlen(val);
@ -869,6 +888,7 @@ tsqueryrecv(PG_FUNCTION_ARGS)
FIN_CRC32(valcrc);
item->operand.weight = weight;
item->operand.prefix = (prefix) ? true : false;
item->operand.valcrc = (int32) valcrc;
item->operand.length = val_len;
item->operand.distance = datalen;