mirror of
https://github.com/postgres/postgres.git
synced 2025-11-07 19:06:32 +03:00
Add websearch_to_tsquery
Error-tolerant conversion function with web-like syntax for search query, it simplifies constraining search engine with close to habitual interface for users. Bump catalog version Authors: Victor Drobny, Dmitry Ivanov with editorization by me Reviewed by: Aleksander Alekseev, Tomas Vondra, Thomas Munro, Aleksandr Parfenov Discussion: https://www.postgresql.org/message-id/flat/fe931111ff7e9ad79196486ada79e268@postgrespro.ru
This commit is contained in:
@@ -490,7 +490,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
|
||||
query = parse_tsquery(text_to_cstring(in),
|
||||
pushval_morph,
|
||||
PointerGetDatum(&data),
|
||||
false);
|
||||
0);
|
||||
|
||||
PG_RETURN_TSQUERY(query);
|
||||
}
|
||||
@@ -520,7 +520,7 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
|
||||
query = parse_tsquery(text_to_cstring(in),
|
||||
pushval_morph,
|
||||
PointerGetDatum(&data),
|
||||
true);
|
||||
P_TSQ_PLAIN);
|
||||
|
||||
PG_RETURN_POINTER(query);
|
||||
}
|
||||
@@ -551,7 +551,7 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS)
|
||||
query = parse_tsquery(text_to_cstring(in),
|
||||
pushval_morph,
|
||||
PointerGetDatum(&data),
|
||||
true);
|
||||
P_TSQ_PLAIN);
|
||||
|
||||
PG_RETURN_TSQUERY(query);
|
||||
}
|
||||
@@ -567,3 +567,35 @@ phraseto_tsquery(PG_FUNCTION_ARGS)
|
||||
ObjectIdGetDatum(cfgId),
|
||||
PointerGetDatum(in)));
|
||||
}
|
||||
|
||||
Datum
|
||||
websearch_to_tsquery_byid(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *in = PG_GETARG_TEXT_PP(1);
|
||||
MorphOpaque data;
|
||||
TSQuery query = NULL;
|
||||
|
||||
data.cfg_id = PG_GETARG_OID(0);
|
||||
|
||||
data.qoperator = OP_AND;
|
||||
|
||||
query = parse_tsquery(text_to_cstring(in),
|
||||
pushval_morph,
|
||||
PointerGetDatum(&data),
|
||||
P_TSQ_WEB);
|
||||
|
||||
PG_RETURN_TSQUERY(query);
|
||||
}
|
||||
|
||||
Datum
|
||||
websearch_to_tsquery(PG_FUNCTION_ARGS)
|
||||
{
|
||||
text *in = PG_GETARG_TEXT_PP(0);
|
||||
Oid cfgId;
|
||||
|
||||
cfgId = getTSCurrentConfig(true);
|
||||
PG_RETURN_DATUM(DirectFunctionCall2(websearch_to_tsquery_byid,
|
||||
ObjectIdGetDatum(cfgId),
|
||||
PointerGetDatum(in)));
|
||||
|
||||
}
|
||||
|
||||
@@ -32,14 +32,53 @@ const int tsearch_op_priority[OP_COUNT] =
|
||||
3 /* OP_PHRASE */
|
||||
};
|
||||
|
||||
/*
|
||||
* parser's states
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
WAITOPERAND = 1,
|
||||
WAITOPERATOR = 2,
|
||||
WAITFIRSTOPERAND = 3
|
||||
} ts_parserstate;
|
||||
|
||||
/*
|
||||
* token types for parsing
|
||||
*/
|
||||
typedef enum
|
||||
{
|
||||
PT_END = 0,
|
||||
PT_ERR = 1,
|
||||
PT_VAL = 2,
|
||||
PT_OPR = 3,
|
||||
PT_OPEN = 4,
|
||||
PT_CLOSE = 5
|
||||
} ts_tokentype;
|
||||
|
||||
/*
|
||||
* get token from query string
|
||||
*
|
||||
* *operator is filled in with OP_* when return values is PT_OPR,
|
||||
* but *weight could contain a distance value in case of phrase operator.
|
||||
* *strval, *lenval and *weight are filled in when return value is PT_VAL
|
||||
*
|
||||
*/
|
||||
typedef ts_tokentype (*ts_tokenizer)(TSQueryParserState state, int8 *operator,
|
||||
int *lenval, char **strval,
|
||||
int16 *weight, bool *prefix);
|
||||
|
||||
struct TSQueryParserStateData
|
||||
{
|
||||
/* State for gettoken_query */
|
||||
/* Tokenizer used for parsing tsquery */
|
||||
ts_tokenizer gettoken;
|
||||
|
||||
/* State of tokenizer function */
|
||||
char *buffer; /* entire string we are scanning */
|
||||
char *buf; /* current scan point */
|
||||
int state;
|
||||
int count; /* nesting count, incremented by (,
|
||||
* decremented by ) */
|
||||
bool in_quotes; /* phrase in quotes "" */
|
||||
ts_parserstate state;
|
||||
|
||||
/* polish (prefix) notation in list, filled in by push* functions */
|
||||
List *polstr;
|
||||
@@ -57,12 +96,6 @@ struct TSQueryParserStateData
|
||||
TSVectorParseState valstate;
|
||||
};
|
||||
|
||||
/* parser's states */
|
||||
#define WAITOPERAND 1
|
||||
#define WAITOPERATOR 2
|
||||
#define WAITFIRSTOPERAND 3
|
||||
#define WAITSINGLEOPERAND 4
|
||||
|
||||
/*
|
||||
* subroutine to parse the modifiers (weight and prefix flag currently)
|
||||
* part, like ':AB*' of a query.
|
||||
@@ -118,18 +151,17 @@ get_modifiers(char *buf, int16 *weight, bool *prefix)
|
||||
*
|
||||
* The buffer should begin with '<' char
|
||||
*/
|
||||
static char *
|
||||
parse_phrase_operator(char *buf, int16 *distance)
|
||||
static bool
|
||||
parse_phrase_operator(TSQueryParserState pstate, int16 *distance)
|
||||
{
|
||||
enum
|
||||
{
|
||||
PHRASE_OPEN = 0,
|
||||
PHRASE_DIST,
|
||||
PHRASE_CLOSE,
|
||||
PHRASE_ERR,
|
||||
PHRASE_FINISH
|
||||
} state = PHRASE_OPEN;
|
||||
char *ptr = buf;
|
||||
char *ptr = pstate->buf;
|
||||
char *endptr;
|
||||
long l = 1; /* default distance */
|
||||
|
||||
@@ -138,9 +170,13 @@ parse_phrase_operator(char *buf, int16 *distance)
|
||||
switch (state)
|
||||
{
|
||||
case PHRASE_OPEN:
|
||||
Assert(t_iseq(ptr, '<'));
|
||||
state = PHRASE_DIST;
|
||||
ptr++;
|
||||
if (t_iseq(ptr, '<'))
|
||||
{
|
||||
state = PHRASE_DIST;
|
||||
ptr++;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
break;
|
||||
|
||||
case PHRASE_DIST:
|
||||
@@ -148,18 +184,16 @@ parse_phrase_operator(char *buf, int16 *distance)
|
||||
{
|
||||
state = PHRASE_CLOSE;
|
||||
ptr++;
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!t_isdigit(ptr))
|
||||
{
|
||||
state = PHRASE_ERR;
|
||||
break;
|
||||
}
|
||||
return false;
|
||||
|
||||
errno = 0;
|
||||
l = strtol(ptr, &endptr, 10);
|
||||
if (ptr == endptr)
|
||||
state = PHRASE_ERR;
|
||||
return false;
|
||||
else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
@@ -179,54 +213,77 @@ parse_phrase_operator(char *buf, int16 *distance)
|
||||
ptr++;
|
||||
}
|
||||
else
|
||||
state = PHRASE_ERR;
|
||||
return false;
|
||||
break;
|
||||
|
||||
case PHRASE_FINISH:
|
||||
*distance = (int16) l;
|
||||
return ptr;
|
||||
|
||||
case PHRASE_ERR:
|
||||
default:
|
||||
goto err;
|
||||
pstate->buf = ptr;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
err:
|
||||
*distance = -1;
|
||||
return buf;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* token types for parsing
|
||||
* Parse OR operator used in websearch_to_tsquery(), returns true if we
|
||||
* believe that "OR" literal could be an operator OR
|
||||
*/
|
||||
typedef enum
|
||||
static bool
|
||||
parse_or_operator(TSQueryParserState pstate)
|
||||
{
|
||||
PT_END = 0,
|
||||
PT_ERR = 1,
|
||||
PT_VAL = 2,
|
||||
PT_OPR = 3,
|
||||
PT_OPEN = 4,
|
||||
PT_CLOSE = 5
|
||||
} ts_tokentype;
|
||||
char *ptr = pstate->buf;
|
||||
|
||||
if (pstate->in_quotes)
|
||||
return false;
|
||||
|
||||
/* it should begin with "OR" literal */
|
||||
if (pg_strncasecmp(ptr, "or", 2) != 0)
|
||||
return false;
|
||||
|
||||
ptr += 2;
|
||||
|
||||
/*
|
||||
* it shouldn't be a part of any word but somewhere later it should be some
|
||||
* operand
|
||||
*/
|
||||
if (*ptr == '\0') /* no operand */
|
||||
return false;
|
||||
|
||||
/* it shouldn't be a part of any word */
|
||||
if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalpha(ptr) || t_isdigit(ptr))
|
||||
return false;
|
||||
|
||||
for(;;)
|
||||
{
|
||||
ptr += pg_mblen(ptr);
|
||||
|
||||
if (*ptr == '\0') /* got end of string without operand */
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Suppose, we found an operand, but could be a not correct operand. So
|
||||
* we still treat OR literal as operation with possibly incorrect
|
||||
* operand and will not search it as lexeme
|
||||
*/
|
||||
if (!t_isspace(ptr))
|
||||
break;
|
||||
}
|
||||
|
||||
pstate->buf += 2;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* get token from query string
|
||||
*
|
||||
* *operator is filled in with OP_* when return values is PT_OPR,
|
||||
* but *weight could contain a distance value in case of phrase operator.
|
||||
* *strval, *lenval and *weight are filled in when return value is PT_VAL
|
||||
*
|
||||
*/
|
||||
static ts_tokentype
|
||||
gettoken_query(TSQueryParserState state,
|
||||
int8 *operator,
|
||||
int *lenval, char **strval, int16 *weight, bool *prefix)
|
||||
gettoken_query_standard(TSQueryParserState state, int8 *operator,
|
||||
int *lenval, char **strval,
|
||||
int16 *weight, bool *prefix)
|
||||
{
|
||||
*weight = 0;
|
||||
*prefix = false;
|
||||
|
||||
while (1)
|
||||
while (true)
|
||||
{
|
||||
switch (state->state)
|
||||
{
|
||||
@@ -234,17 +291,16 @@ gettoken_query(TSQueryParserState state,
|
||||
case WAITOPERAND:
|
||||
if (t_iseq(state->buf, '!'))
|
||||
{
|
||||
(state->buf)++; /* can safely ++, t_iseq guarantee that
|
||||
* pg_mblen()==1 */
|
||||
*operator = OP_NOT;
|
||||
state->buf++;
|
||||
state->state = WAITOPERAND;
|
||||
*operator = OP_NOT;
|
||||
return PT_OPR;
|
||||
}
|
||||
else if (t_iseq(state->buf, '('))
|
||||
{
|
||||
state->count++;
|
||||
(state->buf)++;
|
||||
state->buf++;
|
||||
state->state = WAITOPERAND;
|
||||
state->count++;
|
||||
return PT_OPEN;
|
||||
}
|
||||
else if (t_iseq(state->buf, ':'))
|
||||
@@ -256,19 +312,19 @@ gettoken_query(TSQueryParserState state,
|
||||
}
|
||||
else if (!t_isspace(state->buf))
|
||||
{
|
||||
/*
|
||||
* We rely on the tsvector parser to parse the value for
|
||||
* us
|
||||
*/
|
||||
/* We rely on the tsvector parser to parse the value for us */
|
||||
reset_tsvector_parser(state->valstate, state->buf);
|
||||
if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
|
||||
if (gettoken_tsvector(state->valstate, strval, lenval,
|
||||
NULL, NULL, &state->buf))
|
||||
{
|
||||
state->buf = get_modifiers(state->buf, weight, prefix);
|
||||
state->state = WAITOPERATOR;
|
||||
return PT_VAL;
|
||||
}
|
||||
else if (state->state == WAITFIRSTOPERAND)
|
||||
{
|
||||
return PT_END;
|
||||
}
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_SYNTAX_ERROR),
|
||||
@@ -276,58 +332,206 @@ gettoken_query(TSQueryParserState state,
|
||||
state->buffer)));
|
||||
}
|
||||
break;
|
||||
|
||||
case WAITOPERATOR:
|
||||
if (t_iseq(state->buf, '&'))
|
||||
{
|
||||
state->buf++;
|
||||
state->state = WAITOPERAND;
|
||||
*operator = OP_AND;
|
||||
(state->buf)++;
|
||||
return PT_OPR;
|
||||
}
|
||||
else if (t_iseq(state->buf, '|'))
|
||||
{
|
||||
state->buf++;
|
||||
state->state = WAITOPERAND;
|
||||
*operator = OP_OR;
|
||||
(state->buf)++;
|
||||
return PT_OPR;
|
||||
}
|
||||
else if (t_iseq(state->buf, '<'))
|
||||
else if (parse_phrase_operator(state, weight))
|
||||
{
|
||||
/* weight var is used as storage for distance */
|
||||
state->state = WAITOPERAND;
|
||||
*operator = OP_PHRASE;
|
||||
/* weight var is used as storage for distance */
|
||||
state->buf = parse_phrase_operator(state->buf, weight);
|
||||
if (*weight < 0)
|
||||
return PT_ERR;
|
||||
return PT_OPR;
|
||||
}
|
||||
else if (t_iseq(state->buf, ')'))
|
||||
{
|
||||
(state->buf)++;
|
||||
state->buf++;
|
||||
state->count--;
|
||||
return (state->count < 0) ? PT_ERR : PT_CLOSE;
|
||||
}
|
||||
else if (*(state->buf) == '\0')
|
||||
else if (*state->buf == '\0')
|
||||
{
|
||||
return (state->count) ? PT_ERR : PT_END;
|
||||
}
|
||||
else if (!t_isspace(state->buf))
|
||||
{
|
||||
return PT_ERR;
|
||||
break;
|
||||
case WAITSINGLEOPERAND:
|
||||
if (*(state->buf) == '\0')
|
||||
return PT_END;
|
||||
*strval = state->buf;
|
||||
*lenval = strlen(state->buf);
|
||||
state->buf += strlen(state->buf);
|
||||
state->count++;
|
||||
return PT_VAL;
|
||||
default:
|
||||
return PT_ERR;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
state->buf += pg_mblen(state->buf);
|
||||
}
|
||||
}
|
||||
|
||||
static ts_tokentype
|
||||
gettoken_query_websearch(TSQueryParserState state, int8 *operator,
|
||||
int *lenval, char **strval,
|
||||
int16 *weight, bool *prefix)
|
||||
{
|
||||
*weight = 0;
|
||||
*prefix = false;
|
||||
|
||||
while (true)
|
||||
{
|
||||
switch (state->state)
|
||||
{
|
||||
case WAITFIRSTOPERAND:
|
||||
case WAITOPERAND:
|
||||
if (t_iseq(state->buf, '-'))
|
||||
{
|
||||
state->buf++;
|
||||
state->state = WAITOPERAND;
|
||||
|
||||
if (state->in_quotes)
|
||||
continue;
|
||||
|
||||
*operator = OP_NOT;
|
||||
return PT_OPR;
|
||||
}
|
||||
else if (t_iseq(state->buf, '"'))
|
||||
{
|
||||
state->buf++;
|
||||
|
||||
if (!state->in_quotes)
|
||||
{
|
||||
state->state = WAITOPERAND;
|
||||
|
||||
if (strchr(state->buf, '"'))
|
||||
{
|
||||
/* quoted text should be ordered <-> */
|
||||
state->in_quotes = true;
|
||||
return PT_OPEN;
|
||||
}
|
||||
|
||||
/* web search tolerates missing quotes */
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* we have to provide an operand */
|
||||
state->in_quotes = false;
|
||||
state->state = WAITOPERATOR;
|
||||
pushStop(state);
|
||||
return PT_CLOSE;
|
||||
}
|
||||
}
|
||||
else if (ISOPERATOR(state->buf))
|
||||
{
|
||||
/* or else gettoken_tsvector() will raise an error */
|
||||
state->buf++;
|
||||
state->state = WAITOPERAND;
|
||||
continue;
|
||||
}
|
||||
else if (!t_isspace(state->buf))
|
||||
{
|
||||
/* We rely on the tsvector parser to parse the value for us */
|
||||
reset_tsvector_parser(state->valstate, state->buf);
|
||||
if (gettoken_tsvector(state->valstate, strval, lenval,
|
||||
NULL, NULL, &state->buf))
|
||||
{
|
||||
state->state = WAITOPERATOR;
|
||||
return PT_VAL;
|
||||
}
|
||||
else if (state->state == WAITFIRSTOPERAND)
|
||||
{
|
||||
return PT_END;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* finally, we have to provide an operand */
|
||||
pushStop(state);
|
||||
return PT_END;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case WAITOPERATOR:
|
||||
if (t_iseq(state->buf, '"'))
|
||||
{
|
||||
if (!state->in_quotes)
|
||||
{
|
||||
/*
|
||||
* put implicit AND after an operand
|
||||
* and handle this quote in WAITOPERAND
|
||||
*/
|
||||
state->state = WAITOPERAND;
|
||||
*operator = OP_AND;
|
||||
return PT_OPR;
|
||||
}
|
||||
else
|
||||
{
|
||||
state->buf++;
|
||||
|
||||
/* just close quotes */
|
||||
state->in_quotes = false;
|
||||
return PT_CLOSE;
|
||||
}
|
||||
}
|
||||
else if (parse_or_operator(state))
|
||||
{
|
||||
state->state = WAITOPERAND;
|
||||
*operator = OP_OR;
|
||||
return PT_OPR;
|
||||
}
|
||||
else if (*state->buf == '\0')
|
||||
{
|
||||
return PT_END;
|
||||
}
|
||||
else if (!t_isspace(state->buf))
|
||||
{
|
||||
if (state->in_quotes)
|
||||
{
|
||||
/* put implicit <-> after an operand */
|
||||
*operator = OP_PHRASE;
|
||||
*weight = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* put implicit AND after an operand */
|
||||
*operator = OP_AND;
|
||||
}
|
||||
|
||||
state->state = WAITOPERAND;
|
||||
return PT_OPR;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
state->buf += pg_mblen(state->buf);
|
||||
}
|
||||
}
|
||||
|
||||
static ts_tokentype
|
||||
gettoken_query_plain(TSQueryParserState state, int8 *operator,
|
||||
int *lenval, char **strval,
|
||||
int16 *weight, bool *prefix)
|
||||
{
|
||||
*weight = 0;
|
||||
*prefix = false;
|
||||
|
||||
if (*state->buf == '\0')
|
||||
return PT_END;
|
||||
|
||||
*strval = state->buf;
|
||||
*lenval = strlen(state->buf);
|
||||
state->buf += *lenval;
|
||||
state->count++;
|
||||
return PT_VAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Push an operator to state->polstr
|
||||
*/
|
||||
@@ -489,7 +693,9 @@ makepol(TSQueryParserState state,
|
||||
/* since this function recurses, it could be driven to stack overflow */
|
||||
check_stack_depth();
|
||||
|
||||
while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END)
|
||||
while ((type = state->gettoken(state, &operator,
|
||||
&lenval, &strval,
|
||||
&weight, &prefix)) != PT_END)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
@@ -605,7 +811,7 @@ TSQuery
|
||||
parse_tsquery(char *buf,
|
||||
PushFunction pushval,
|
||||
Datum opaque,
|
||||
bool isplain)
|
||||
int flags)
|
||||
{
|
||||
struct TSQueryParserStateData state;
|
||||
int i;
|
||||
@@ -614,16 +820,32 @@ parse_tsquery(char *buf,
|
||||
QueryItem *ptr;
|
||||
ListCell *cell;
|
||||
bool needcleanup;
|
||||
int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY;
|
||||
|
||||
/* plain should not be used with web */
|
||||
Assert((flags & (P_TSQ_PLAIN | P_TSQ_WEB)) != (P_TSQ_PLAIN | P_TSQ_WEB));
|
||||
|
||||
/* select suitable tokenizer */
|
||||
if (flags & P_TSQ_PLAIN)
|
||||
state.gettoken = gettoken_query_plain;
|
||||
else if (flags & P_TSQ_WEB)
|
||||
{
|
||||
state.gettoken = gettoken_query_websearch;
|
||||
tsv_flags |= P_TSV_IS_WEB;
|
||||
}
|
||||
else
|
||||
state.gettoken = gettoken_query_standard;
|
||||
|
||||
/* init state */
|
||||
state.buffer = buf;
|
||||
state.buf = buf;
|
||||
state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
|
||||
state.count = 0;
|
||||
state.in_quotes = false;
|
||||
state.state = WAITFIRSTOPERAND;
|
||||
state.polstr = NIL;
|
||||
|
||||
/* init value parser's state */
|
||||
state.valstate = init_tsvector_parser(state.buffer, true, true);
|
||||
state.valstate = init_tsvector_parser(state.buffer, tsv_flags);
|
||||
|
||||
/* init list of operand */
|
||||
state.sumlen = 0;
|
||||
@@ -716,7 +938,7 @@ tsqueryin(PG_FUNCTION_ARGS)
|
||||
{
|
||||
char *in = PG_GETARG_CSTRING(0);
|
||||
|
||||
PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false));
|
||||
PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), 0));
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -200,7 +200,7 @@ tsvectorin(PG_FUNCTION_ARGS)
|
||||
char *cur;
|
||||
int buflen = 256; /* allocated size of tmpbuf */
|
||||
|
||||
state = init_tsvector_parser(buf, false, false);
|
||||
state = init_tsvector_parser(buf, 0);
|
||||
|
||||
arrlen = 64;
|
||||
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
|
||||
|
||||
@@ -33,6 +33,7 @@ struct TSVectorParseStateData
|
||||
int eml; /* max bytes per character */
|
||||
bool oprisdelim; /* treat ! | * ( ) as delimiters? */
|
||||
bool is_tsquery; /* say "tsquery" not "tsvector" in errors? */
|
||||
bool is_web; /* we're in websearch_to_tsquery() */
|
||||
};
|
||||
|
||||
|
||||
@@ -42,7 +43,7 @@ struct TSVectorParseStateData
|
||||
* ! | & ( )
|
||||
*/
|
||||
TSVectorParseState
|
||||
init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery)
|
||||
init_tsvector_parser(char *input, int flags)
|
||||
{
|
||||
TSVectorParseState state;
|
||||
|
||||
@@ -52,8 +53,9 @@ init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery)
|
||||
state->len = 32;
|
||||
state->word = (char *) palloc(state->len);
|
||||
state->eml = pg_database_encoding_max_length();
|
||||
state->oprisdelim = oprisdelim;
|
||||
state->is_tsquery = is_tsquery;
|
||||
state->oprisdelim = (flags & P_TSV_OPR_IS_DELIM) != 0;
|
||||
state->is_tsquery = (flags & P_TSV_IS_TSQUERY) != 0;
|
||||
state->is_web = (flags & P_TSV_IS_WEB) != 0;
|
||||
|
||||
return state;
|
||||
}
|
||||
@@ -89,16 +91,6 @@ do { \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* phrase operator begins with '<' */
|
||||
#define ISOPERATOR(x) \
|
||||
( pg_mblen(x) == 1 && ( *(x) == '!' || \
|
||||
*(x) == '&' || \
|
||||
*(x) == '|' || \
|
||||
*(x) == '(' || \
|
||||
*(x) == ')' || \
|
||||
*(x) == '<' \
|
||||
) )
|
||||
|
||||
/* Fills gettoken_tsvector's output parameters, and returns true */
|
||||
#define RETURN_TOKEN \
|
||||
do { \
|
||||
@@ -183,14 +175,15 @@ gettoken_tsvector(TSVectorParseState state,
|
||||
{
|
||||
if (*(state->prsbuf) == '\0')
|
||||
return false;
|
||||
else if (t_iseq(state->prsbuf, '\''))
|
||||
else if (!state->is_web && t_iseq(state->prsbuf, '\''))
|
||||
statecode = WAITENDCMPLX;
|
||||
else if (t_iseq(state->prsbuf, '\\'))
|
||||
else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
|
||||
{
|
||||
statecode = WAITNEXTCHAR;
|
||||
oldstate = WAITENDWORD;
|
||||
}
|
||||
else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
|
||||
else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
|
||||
(state->is_web && t_iseq(state->prsbuf, '"')))
|
||||
PRSSYNTAXERROR;
|
||||
else if (!t_isspace(state->prsbuf))
|
||||
{
|
||||
@@ -217,13 +210,14 @@ gettoken_tsvector(TSVectorParseState state,
|
||||
}
|
||||
else if (statecode == WAITENDWORD)
|
||||
{
|
||||
if (t_iseq(state->prsbuf, '\\'))
|
||||
if (!state->is_web && t_iseq(state->prsbuf, '\\'))
|
||||
{
|
||||
statecode = WAITNEXTCHAR;
|
||||
oldstate = WAITENDWORD;
|
||||
}
|
||||
else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
|
||||
(state->oprisdelim && ISOPERATOR(state->prsbuf)))
|
||||
(state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
|
||||
(state->is_web && t_iseq(state->prsbuf, '"')))
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
if (curpos == state->word)
|
||||
@@ -250,11 +244,11 @@ gettoken_tsvector(TSVectorParseState state,
|
||||
}
|
||||
else if (statecode == WAITENDCMPLX)
|
||||
{
|
||||
if (t_iseq(state->prsbuf, '\''))
|
||||
if (!state->is_web && t_iseq(state->prsbuf, '\''))
|
||||
{
|
||||
statecode = WAITCHARCMPLX;
|
||||
}
|
||||
else if (t_iseq(state->prsbuf, '\\'))
|
||||
else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
|
||||
{
|
||||
statecode = WAITNEXTCHAR;
|
||||
oldstate = WAITENDCMPLX;
|
||||
@@ -270,7 +264,7 @@ gettoken_tsvector(TSVectorParseState state,
|
||||
}
|
||||
else if (statecode == WAITCHARCMPLX)
|
||||
{
|
||||
if (t_iseq(state->prsbuf, '\''))
|
||||
if (!state->is_web && t_iseq(state->prsbuf, '\''))
|
||||
{
|
||||
RESIZEPRSBUF;
|
||||
COPYCHAR(curpos, state->prsbuf);
|
||||
|
||||
Reference in New Issue
Block a user