diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 9a1efc14cf7..122f034f177 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -9630,6 +9630,18 @@ CREATE TYPE rainbow AS ENUM ('red', 'orange', 'yellow', 'green', 'blue', 'purple
phraseto_tsquery('english', 'The Fat Rats')
'fat' <-> 'rat'
+
+
+
+ websearch_to_tsquery
+
+ websearch_to_tsquery( config regconfig , query text)
+
+ tsquery
+ produce tsquery from a web search style query
+ websearch_to_tsquery('english', '"fat rat" or rat')
+ 'fat' <-> 'rat' | 'rat'
+
diff --git a/doc/src/sgml/textsearch.sgml b/doc/src/sgml/textsearch.sgml
index 610b7bf0337..19f58511c82 100644
--- a/doc/src/sgml/textsearch.sgml
+++ b/doc/src/sgml/textsearch.sgml
@@ -797,13 +797,16 @@ UPDATE tt SET ti =
PostgreSQL provides the
functions to_tsquery,
- plainto_tsquery, and
- phraseto_tsquery
+ plainto_tsquery,
+ phraseto_tsquery and
+ websearch_to_tsquery
for converting a query to the tsquery data type.
to_tsquery offers access to more features
than either plainto_tsquery or
- phraseto_tsquery, but it is less forgiving
- about its input.
+ phraseto_tsquery, but it is less forgiving about its
+ input. websearch_to_tsquery is a simplified version
+ of to_tsquery with an alternative syntax, similar
+ to the one used by web search engines.
@@ -962,6 +965,87 @@ SELECT phraseto_tsquery('english', 'The Fat & Rats:C');
+
+websearch_to_tsquery( config regconfig, querytext text) returns tsquery
+
+
+
+ websearch_to_tsquery creates a tsquery
+ value from querytext using an alternative
+ syntax in which simple unformatted text is a valid query.
+ Unlike plainto_tsquery
+ and phraseto_tsquery, it also recognizes certain
+ operators. Moreover, this function should never raise syntax errors,
+ which makes it possible to use raw user-supplied input for search.
+ The following syntax is supported:
+
+
+
+ unquoted text: text not inside quote marks will be
+ converted to terms separated by & operators, as
+ if processed by
+ plainto_tsquery.
+
+
+
+
+ "quoted text": text inside quote marks will be
+ converted to terms separated by <->
+ operators, as if processed by phraseto_tsquery.
+
+
+
+
+ OR: logical or will be converted to
+ the | operator.
+
+
+
+
+ -: the logical not operator, converted to the
+ the ! operator.
+
+
+
+
+
+ Examples:
+
+ select websearch_to_tsquery('english', 'The fat rats');
+ websearch_to_tsquery
+ -----------------
+ 'fat' & 'rat'
+ (1 row)
+
+
+ select websearch_to_tsquery('english', '"supernovae stars" -crab');
+ websearch_to_tsquery
+ ----------------------------------
+ 'supernova' <-> 'star' & !'crab'
+ (1 row)
+
+
+ select websearch_to_tsquery('english', '"sad cat" or "fat rat"');
+ websearch_to_tsquery
+ -----------------------------------
+ 'sad' <-> 'cat' | 'fat' <-> 'rat'
+ (1 row)
+
+
+ select websearch_to_tsquery('english', 'signal -"segmentation fault"');
+ websearch_to_tsquery
+ ---------------------------------------
+ 'signal' & !( 'segment' <-> 'fault' )
+ (1 row)
+
+
+ select websearch_to_tsquery('english', '""" )( dummy \\ query <->');
+ websearch_to_tsquery
+ ----------------------
+ 'dummi' & 'queri'
+ (1 row)
+
+
diff --git a/src/backend/tsearch/to_tsany.c b/src/backend/tsearch/to_tsany.c
index ea5947a3a82..6055fb6b4e5 100644
--- a/src/backend/tsearch/to_tsany.c
+++ b/src/backend/tsearch/to_tsany.c
@@ -490,7 +490,7 @@ to_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in),
pushval_morph,
PointerGetDatum(&data),
- false);
+ 0);
PG_RETURN_TSQUERY(query);
}
@@ -520,7 +520,7 @@ plainto_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in),
pushval_morph,
PointerGetDatum(&data),
- true);
+ P_TSQ_PLAIN);
PG_RETURN_POINTER(query);
}
@@ -551,7 +551,7 @@ phraseto_tsquery_byid(PG_FUNCTION_ARGS)
query = parse_tsquery(text_to_cstring(in),
pushval_morph,
PointerGetDatum(&data),
- true);
+ P_TSQ_PLAIN);
PG_RETURN_TSQUERY(query);
}
@@ -567,3 +567,35 @@ phraseto_tsquery(PG_FUNCTION_ARGS)
ObjectIdGetDatum(cfgId),
PointerGetDatum(in)));
}
+
+Datum
+websearch_to_tsquery_byid(PG_FUNCTION_ARGS)
+{
+ text *in = PG_GETARG_TEXT_PP(1);
+ MorphOpaque data;
+ TSQuery query = NULL;
+
+ data.cfg_id = PG_GETARG_OID(0);
+
+ data.qoperator = OP_AND;
+
+ query = parse_tsquery(text_to_cstring(in),
+ pushval_morph,
+ PointerGetDatum(&data),
+ P_TSQ_WEB);
+
+ PG_RETURN_TSQUERY(query);
+}
+
+Datum
+websearch_to_tsquery(PG_FUNCTION_ARGS)
+{
+ text *in = PG_GETARG_TEXT_PP(0);
+ Oid cfgId;
+
+ cfgId = getTSCurrentConfig(true);
+ PG_RETURN_DATUM(DirectFunctionCall2(websearch_to_tsquery_byid,
+ ObjectIdGetDatum(cfgId),
+ PointerGetDatum(in)));
+
+}
diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c
index 1ccbf790306..793c0e5dd1c 100644
--- a/src/backend/utils/adt/tsquery.c
+++ b/src/backend/utils/adt/tsquery.c
@@ -32,14 +32,53 @@ const int tsearch_op_priority[OP_COUNT] =
3 /* OP_PHRASE */
};
+/*
+ * parser's states
+ */
+typedef enum
+{
+ WAITOPERAND = 1,
+ WAITOPERATOR = 2,
+ WAITFIRSTOPERAND = 3
+} ts_parserstate;
+
+/*
+ * token types for parsing
+ */
+typedef enum
+{
+ PT_END = 0,
+ PT_ERR = 1,
+ PT_VAL = 2,
+ PT_OPR = 3,
+ PT_OPEN = 4,
+ PT_CLOSE = 5
+} ts_tokentype;
+
+/*
+ * get token from query string
+ *
+ * *operator is filled in with OP_* when return values is PT_OPR,
+ * but *weight could contain a distance value in case of phrase operator.
+ * *strval, *lenval and *weight are filled in when return value is PT_VAL
+ *
+ */
+typedef ts_tokentype (*ts_tokenizer)(TSQueryParserState state, int8 *operator,
+ int *lenval, char **strval,
+ int16 *weight, bool *prefix);
+
struct TSQueryParserStateData
{
- /* State for gettoken_query */
+ /* Tokenizer used for parsing tsquery */
+ ts_tokenizer gettoken;
+
+ /* State of tokenizer function */
char *buffer; /* entire string we are scanning */
char *buf; /* current scan point */
- int state;
int count; /* nesting count, incremented by (,
* decremented by ) */
+ bool in_quotes; /* phrase in quotes "" */
+ ts_parserstate state;
/* polish (prefix) notation in list, filled in by push* functions */
List *polstr;
@@ -57,12 +96,6 @@ struct TSQueryParserStateData
TSVectorParseState valstate;
};
-/* parser's states */
-#define WAITOPERAND 1
-#define WAITOPERATOR 2
-#define WAITFIRSTOPERAND 3
-#define WAITSINGLEOPERAND 4
-
/*
* subroutine to parse the modifiers (weight and prefix flag currently)
* part, like ':AB*' of a query.
@@ -118,18 +151,17 @@ get_modifiers(char *buf, int16 *weight, bool *prefix)
*
* The buffer should begin with '<' char
*/
-static char *
-parse_phrase_operator(char *buf, int16 *distance)
+static bool
+parse_phrase_operator(TSQueryParserState pstate, int16 *distance)
{
enum
{
PHRASE_OPEN = 0,
PHRASE_DIST,
PHRASE_CLOSE,
- PHRASE_ERR,
PHRASE_FINISH
} state = PHRASE_OPEN;
- char *ptr = buf;
+ char *ptr = pstate->buf;
char *endptr;
long l = 1; /* default distance */
@@ -138,9 +170,13 @@ parse_phrase_operator(char *buf, int16 *distance)
switch (state)
{
case PHRASE_OPEN:
- Assert(t_iseq(ptr, '<'));
- state = PHRASE_DIST;
- ptr++;
+ if (t_iseq(ptr, '<'))
+ {
+ state = PHRASE_DIST;
+ ptr++;
+ }
+ else
+ return false;
break;
case PHRASE_DIST:
@@ -148,18 +184,16 @@ parse_phrase_operator(char *buf, int16 *distance)
{
state = PHRASE_CLOSE;
ptr++;
- break;
+ continue;
}
+
if (!t_isdigit(ptr))
- {
- state = PHRASE_ERR;
- break;
- }
+ return false;
errno = 0;
l = strtol(ptr, &endptr, 10);
if (ptr == endptr)
- state = PHRASE_ERR;
+ return false;
else if (errno == ERANGE || l < 0 || l > MAXENTRYPOS)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
@@ -179,54 +213,77 @@ parse_phrase_operator(char *buf, int16 *distance)
ptr++;
}
else
- state = PHRASE_ERR;
+ return false;
break;
case PHRASE_FINISH:
*distance = (int16) l;
- return ptr;
-
- case PHRASE_ERR:
- default:
- goto err;
+ pstate->buf = ptr;
+ return true;
}
}
-err:
- *distance = -1;
- return buf;
+ return false;
}
/*
- * token types for parsing
+ * Parse OR operator used in websearch_to_tsquery(), returns true if we
+ * believe that "OR" literal could be an operator OR
*/
-typedef enum
+static bool
+parse_or_operator(TSQueryParserState pstate)
{
- PT_END = 0,
- PT_ERR = 1,
- PT_VAL = 2,
- PT_OPR = 3,
- PT_OPEN = 4,
- PT_CLOSE = 5
-} ts_tokentype;
+ char *ptr = pstate->buf;
+
+ if (pstate->in_quotes)
+ return false;
+
+ /* it should begin with "OR" literal */
+ if (pg_strncasecmp(ptr, "or", 2) != 0)
+ return false;
+
+ ptr += 2;
+
+ /*
+ * it shouldn't be a part of any word but somewhere later it should be some
+ * operand
+ */
+ if (*ptr == '\0') /* no operand */
+ return false;
+
+ /* it shouldn't be a part of any word */
+ if (t_iseq(ptr, '-') || t_iseq(ptr, '_') || t_isalpha(ptr) || t_isdigit(ptr))
+ return false;
+
+ for(;;)
+ {
+ ptr += pg_mblen(ptr);
+
+ if (*ptr == '\0') /* got end of string without operand */
+ return false;
+
+ /*
+ * Suppose, we found an operand, but could be a not correct operand. So
+ * we still treat OR literal as operation with possibly incorrect
+ * operand and will not search it as lexeme
+ */
+ if (!t_isspace(ptr))
+ break;
+ }
+
+ pstate->buf += 2;
+ return true;
+}
-/*
- * get token from query string
- *
- * *operator is filled in with OP_* when return values is PT_OPR,
- * but *weight could contain a distance value in case of phrase operator.
- * *strval, *lenval and *weight are filled in when return value is PT_VAL
- *
- */
static ts_tokentype
-gettoken_query(TSQueryParserState state,
- int8 *operator,
- int *lenval, char **strval, int16 *weight, bool *prefix)
+gettoken_query_standard(TSQueryParserState state, int8 *operator,
+ int *lenval, char **strval,
+ int16 *weight, bool *prefix)
{
*weight = 0;
*prefix = false;
- while (1)
+ while (true)
{
switch (state->state)
{
@@ -234,17 +291,16 @@ gettoken_query(TSQueryParserState state,
case WAITOPERAND:
if (t_iseq(state->buf, '!'))
{
- (state->buf)++; /* can safely ++, t_iseq guarantee that
- * pg_mblen()==1 */
- *operator = OP_NOT;
+ state->buf++;
state->state = WAITOPERAND;
+ *operator = OP_NOT;
return PT_OPR;
}
else if (t_iseq(state->buf, '('))
{
- state->count++;
- (state->buf)++;
+ state->buf++;
state->state = WAITOPERAND;
+ state->count++;
return PT_OPEN;
}
else if (t_iseq(state->buf, ':'))
@@ -256,19 +312,19 @@ gettoken_query(TSQueryParserState state,
}
else if (!t_isspace(state->buf))
{
- /*
- * We rely on the tsvector parser to parse the value for
- * us
- */
+ /* We rely on the tsvector parser to parse the value for us */
reset_tsvector_parser(state->valstate, state->buf);
- if (gettoken_tsvector(state->valstate, strval, lenval, NULL, NULL, &state->buf))
+ if (gettoken_tsvector(state->valstate, strval, lenval,
+ NULL, NULL, &state->buf))
{
state->buf = get_modifiers(state->buf, weight, prefix);
state->state = WAITOPERATOR;
return PT_VAL;
}
else if (state->state == WAITFIRSTOPERAND)
+ {
return PT_END;
+ }
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -276,58 +332,206 @@ gettoken_query(TSQueryParserState state,
state->buffer)));
}
break;
+
case WAITOPERATOR:
if (t_iseq(state->buf, '&'))
{
+ state->buf++;
state->state = WAITOPERAND;
*operator = OP_AND;
- (state->buf)++;
return PT_OPR;
}
else if (t_iseq(state->buf, '|'))
{
+ state->buf++;
state->state = WAITOPERAND;
*operator = OP_OR;
- (state->buf)++;
return PT_OPR;
}
- else if (t_iseq(state->buf, '<'))
+ else if (parse_phrase_operator(state, weight))
{
+ /* weight var is used as storage for distance */
state->state = WAITOPERAND;
*operator = OP_PHRASE;
- /* weight var is used as storage for distance */
- state->buf = parse_phrase_operator(state->buf, weight);
- if (*weight < 0)
- return PT_ERR;
return PT_OPR;
}
else if (t_iseq(state->buf, ')'))
{
- (state->buf)++;
+ state->buf++;
state->count--;
return (state->count < 0) ? PT_ERR : PT_CLOSE;
}
- else if (*(state->buf) == '\0')
+ else if (*state->buf == '\0')
+ {
return (state->count) ? PT_ERR : PT_END;
+ }
else if (!t_isspace(state->buf))
+ {
return PT_ERR;
- break;
- case WAITSINGLEOPERAND:
- if (*(state->buf) == '\0')
- return PT_END;
- *strval = state->buf;
- *lenval = strlen(state->buf);
- state->buf += strlen(state->buf);
- state->count++;
- return PT_VAL;
- default:
- return PT_ERR;
+ }
break;
}
+
state->buf += pg_mblen(state->buf);
}
}
+static ts_tokentype
+gettoken_query_websearch(TSQueryParserState state, int8 *operator,
+ int *lenval, char **strval,
+ int16 *weight, bool *prefix)
+{
+ *weight = 0;
+ *prefix = false;
+
+ while (true)
+ {
+ switch (state->state)
+ {
+ case WAITFIRSTOPERAND:
+ case WAITOPERAND:
+ if (t_iseq(state->buf, '-'))
+ {
+ state->buf++;
+ state->state = WAITOPERAND;
+
+ if (state->in_quotes)
+ continue;
+
+ *operator = OP_NOT;
+ return PT_OPR;
+ }
+ else if (t_iseq(state->buf, '"'))
+ {
+ state->buf++;
+
+ if (!state->in_quotes)
+ {
+ state->state = WAITOPERAND;
+
+ if (strchr(state->buf, '"'))
+ {
+ /* quoted text should be ordered <-> */
+ state->in_quotes = true;
+ return PT_OPEN;
+ }
+
+ /* web search tolerates missing quotes */
+ continue;
+ }
+ else
+ {
+ /* we have to provide an operand */
+ state->in_quotes = false;
+ state->state = WAITOPERATOR;
+ pushStop(state);
+ return PT_CLOSE;
+ }
+ }
+ else if (ISOPERATOR(state->buf))
+ {
+ /* or else gettoken_tsvector() will raise an error */
+ state->buf++;
+ state->state = WAITOPERAND;
+ continue;
+ }
+ else if (!t_isspace(state->buf))
+ {
+ /* We rely on the tsvector parser to parse the value for us */
+ reset_tsvector_parser(state->valstate, state->buf);
+ if (gettoken_tsvector(state->valstate, strval, lenval,
+ NULL, NULL, &state->buf))
+ {
+ state->state = WAITOPERATOR;
+ return PT_VAL;
+ }
+ else if (state->state == WAITFIRSTOPERAND)
+ {
+ return PT_END;
+ }
+ else
+ {
+ /* finally, we have to provide an operand */
+ pushStop(state);
+ return PT_END;
+ }
+ }
+ break;
+
+ case WAITOPERATOR:
+ if (t_iseq(state->buf, '"'))
+ {
+ if (!state->in_quotes)
+ {
+ /*
+ * put implicit AND after an operand
+ * and handle this quote in WAITOPERAND
+ */
+ state->state = WAITOPERAND;
+ *operator = OP_AND;
+ return PT_OPR;
+ }
+ else
+ {
+ state->buf++;
+
+ /* just close quotes */
+ state->in_quotes = false;
+ return PT_CLOSE;
+ }
+ }
+ else if (parse_or_operator(state))
+ {
+ state->state = WAITOPERAND;
+ *operator = OP_OR;
+ return PT_OPR;
+ }
+ else if (*state->buf == '\0')
+ {
+ return PT_END;
+ }
+ else if (!t_isspace(state->buf))
+ {
+ if (state->in_quotes)
+ {
+ /* put implicit <-> after an operand */
+ *operator = OP_PHRASE;
+ *weight = 1;
+ }
+ else
+ {
+ /* put implicit AND after an operand */
+ *operator = OP_AND;
+ }
+
+ state->state = WAITOPERAND;
+ return PT_OPR;
+ }
+ break;
+ }
+
+ state->buf += pg_mblen(state->buf);
+ }
+}
+
+static ts_tokentype
+gettoken_query_plain(TSQueryParserState state, int8 *operator,
+ int *lenval, char **strval,
+ int16 *weight, bool *prefix)
+{
+ *weight = 0;
+ *prefix = false;
+
+ if (*state->buf == '\0')
+ return PT_END;
+
+ *strval = state->buf;
+ *lenval = strlen(state->buf);
+ state->buf += *lenval;
+ state->count++;
+ return PT_VAL;
+}
+
/*
* Push an operator to state->polstr
*/
@@ -489,7 +693,9 @@ makepol(TSQueryParserState state,
/* since this function recurses, it could be driven to stack overflow */
check_stack_depth();
- while ((type = gettoken_query(state, &operator, &lenval, &strval, &weight, &prefix)) != PT_END)
+ while ((type = state->gettoken(state, &operator,
+ &lenval, &strval,
+ &weight, &prefix)) != PT_END)
{
switch (type)
{
@@ -605,7 +811,7 @@ TSQuery
parse_tsquery(char *buf,
PushFunction pushval,
Datum opaque,
- bool isplain)
+ int flags)
{
struct TSQueryParserStateData state;
int i;
@@ -614,16 +820,32 @@ parse_tsquery(char *buf,
QueryItem *ptr;
ListCell *cell;
bool needcleanup;
+ int tsv_flags = P_TSV_OPR_IS_DELIM | P_TSV_IS_TSQUERY;
+
+ /* plain should not be used with web */
+ Assert((flags & (P_TSQ_PLAIN | P_TSQ_WEB)) != (P_TSQ_PLAIN | P_TSQ_WEB));
+
+ /* select suitable tokenizer */
+ if (flags & P_TSQ_PLAIN)
+ state.gettoken = gettoken_query_plain;
+ else if (flags & P_TSQ_WEB)
+ {
+ state.gettoken = gettoken_query_websearch;
+ tsv_flags |= P_TSV_IS_WEB;
+ }
+ else
+ state.gettoken = gettoken_query_standard;
/* init state */
state.buffer = buf;
state.buf = buf;
- state.state = (isplain) ? WAITSINGLEOPERAND : WAITFIRSTOPERAND;
state.count = 0;
+ state.in_quotes = false;
+ state.state = WAITFIRSTOPERAND;
state.polstr = NIL;
/* init value parser's state */
- state.valstate = init_tsvector_parser(state.buffer, true, true);
+ state.valstate = init_tsvector_parser(state.buffer, tsv_flags);
/* init list of operand */
state.sumlen = 0;
@@ -716,7 +938,7 @@ tsqueryin(PG_FUNCTION_ARGS)
{
char *in = PG_GETARG_CSTRING(0);
- PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), false));
+ PG_RETURN_TSQUERY(parse_tsquery(in, pushval_asis, PointerGetDatum(NULL), 0));
}
/*
diff --git a/src/backend/utils/adt/tsvector.c b/src/backend/utils/adt/tsvector.c
index 64e02ef4343..7a27bd12a31 100644
--- a/src/backend/utils/adt/tsvector.c
+++ b/src/backend/utils/adt/tsvector.c
@@ -200,7 +200,7 @@ tsvectorin(PG_FUNCTION_ARGS)
char *cur;
int buflen = 256; /* allocated size of tmpbuf */
- state = init_tsvector_parser(buf, false, false);
+ state = init_tsvector_parser(buf, 0);
arrlen = 64;
arr = (WordEntryIN *) palloc(sizeof(WordEntryIN) * arrlen);
diff --git a/src/backend/utils/adt/tsvector_parser.c b/src/backend/utils/adt/tsvector_parser.c
index 7367ba6a40f..fed411a842e 100644
--- a/src/backend/utils/adt/tsvector_parser.c
+++ b/src/backend/utils/adt/tsvector_parser.c
@@ -33,6 +33,7 @@ struct TSVectorParseStateData
int eml; /* max bytes per character */
bool oprisdelim; /* treat ! | * ( ) as delimiters? */
bool is_tsquery; /* say "tsquery" not "tsvector" in errors? */
+ bool is_web; /* we're in websearch_to_tsquery() */
};
@@ -42,7 +43,7 @@ struct TSVectorParseStateData
* ! | & ( )
*/
TSVectorParseState
-init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery)
+init_tsvector_parser(char *input, int flags)
{
TSVectorParseState state;
@@ -52,8 +53,9 @@ init_tsvector_parser(char *input, bool oprisdelim, bool is_tsquery)
state->len = 32;
state->word = (char *) palloc(state->len);
state->eml = pg_database_encoding_max_length();
- state->oprisdelim = oprisdelim;
- state->is_tsquery = is_tsquery;
+ state->oprisdelim = (flags & P_TSV_OPR_IS_DELIM) != 0;
+ state->is_tsquery = (flags & P_TSV_IS_TSQUERY) != 0;
+ state->is_web = (flags & P_TSV_IS_WEB) != 0;
return state;
}
@@ -89,16 +91,6 @@ do { \
} \
} while (0)
-/* phrase operator begins with '<' */
-#define ISOPERATOR(x) \
- ( pg_mblen(x) == 1 && ( *(x) == '!' || \
- *(x) == '&' || \
- *(x) == '|' || \
- *(x) == '(' || \
- *(x) == ')' || \
- *(x) == '<' \
- ) )
-
/* Fills gettoken_tsvector's output parameters, and returns true */
#define RETURN_TOKEN \
do { \
@@ -183,14 +175,15 @@ gettoken_tsvector(TSVectorParseState state,
{
if (*(state->prsbuf) == '\0')
return false;
- else if (t_iseq(state->prsbuf, '\''))
+ else if (!state->is_web && t_iseq(state->prsbuf, '\''))
statecode = WAITENDCMPLX;
- else if (t_iseq(state->prsbuf, '\\'))
+ else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
- else if (state->oprisdelim && ISOPERATOR(state->prsbuf))
+ else if ((state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
+ (state->is_web && t_iseq(state->prsbuf, '"')))
PRSSYNTAXERROR;
else if (!t_isspace(state->prsbuf))
{
@@ -217,13 +210,14 @@ gettoken_tsvector(TSVectorParseState state,
}
else if (statecode == WAITENDWORD)
{
- if (t_iseq(state->prsbuf, '\\'))
+ if (!state->is_web && t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDWORD;
}
else if (t_isspace(state->prsbuf) || *(state->prsbuf) == '\0' ||
- (state->oprisdelim && ISOPERATOR(state->prsbuf)))
+ (state->oprisdelim && ISOPERATOR(state->prsbuf)) ||
+ (state->is_web && t_iseq(state->prsbuf, '"')))
{
RESIZEPRSBUF;
if (curpos == state->word)
@@ -250,11 +244,11 @@ gettoken_tsvector(TSVectorParseState state,
}
else if (statecode == WAITENDCMPLX)
{
- if (t_iseq(state->prsbuf, '\''))
+ if (!state->is_web && t_iseq(state->prsbuf, '\''))
{
statecode = WAITCHARCMPLX;
}
- else if (t_iseq(state->prsbuf, '\\'))
+ else if (!state->is_web && t_iseq(state->prsbuf, '\\'))
{
statecode = WAITNEXTCHAR;
oldstate = WAITENDCMPLX;
@@ -270,7 +264,7 @@ gettoken_tsvector(TSVectorParseState state,
}
else if (statecode == WAITCHARCMPLX)
{
- if (t_iseq(state->prsbuf, '\''))
+ if (!state->is_web && t_iseq(state->prsbuf, '\''))
{
RESIZEPRSBUF;
COPYCHAR(curpos, state->prsbuf);
diff --git a/src/include/catalog/catversion.h b/src/include/catalog/catversion.h
index 5d55890b9dd..5f63efc3552 100644
--- a/src/include/catalog/catversion.h
+++ b/src/include/catalog/catversion.h
@@ -53,6 +53,6 @@
*/
/* yyyymmddN */
-#define CATALOG_VERSION_NO 201804031
+#define CATALOG_VERSION_NO 201804051
#endif
diff --git a/src/include/catalog/pg_proc.h b/src/include/catalog/pg_proc.h
index 9bf20c059bc..edf212fcf0f 100644
--- a/src/include/catalog/pg_proc.h
+++ b/src/include/catalog/pg_proc.h
@@ -4971,6 +4971,8 @@ DATA(insert OID = 3747 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f i s
DESCR("transform to tsquery");
DATA(insert OID = 5006 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery_byid _null_ _null_ _null_ ));
DESCR("transform to tsquery");
+DATA(insert OID = 8889 ( websearch_to_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f i s 2 0 3615 "3734 25" _null_ _null_ _null_ _null_ _null_ websearch_to_tsquery_byid _null_ _null_ _null_ ));
+DESCR("transform to tsquery");
DATA(insert OID = 3749 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "25" _null_ _null_ _null_ _null_ _null_ to_tsvector _null_ _null_ _null_ ));
DESCR("transform to tsvector");
DATA(insert OID = 3750 ( to_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ to_tsquery _null_ _null_ _null_ ));
@@ -4979,6 +4981,8 @@ DATA(insert OID = 3751 ( plainto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s
DESCR("transform to tsquery");
DATA(insert OID = 5001 ( phraseto_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ phraseto_tsquery _null_ _null_ _null_ ));
DESCR("transform to tsquery");
+DATA(insert OID = 8890 ( websearch_to_tsquery PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3615 "25" _null_ _null_ _null_ _null_ _null_ websearch_to_tsquery _null_ _null_ _null_ ));
+DESCR("transform to tsquery");
DATA(insert OID = 4209 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "3802" _null_ _null_ _null_ _null_ _null_ jsonb_to_tsvector _null_ _null_ _null_ ));
DESCR("transform jsonb to tsvector");
DATA(insert OID = 4210 ( to_tsvector PGNSP PGUID 12 100 0 0 0 f f f t f s s 1 0 3614 "114" _null_ _null_ _null_ _null_ _null_ json_to_tsvector _null_ _null_ _null_ ));
diff --git a/src/include/tsearch/ts_utils.h b/src/include/tsearch/ts_utils.h
index f8ddce5ecbd..73e969fe9ce 100644
--- a/src/include/tsearch/ts_utils.h
+++ b/src/include/tsearch/ts_utils.h
@@ -25,9 +25,11 @@
struct TSVectorParseStateData; /* opaque struct in tsvector_parser.c */
typedef struct TSVectorParseStateData *TSVectorParseState;
-extern TSVectorParseState init_tsvector_parser(char *input,
- bool oprisdelim,
- bool is_tsquery);
+#define P_TSV_OPR_IS_DELIM (1 << 0)
+#define P_TSV_IS_TSQUERY (1 << 1)
+#define P_TSV_IS_WEB (1 << 2)
+
+extern TSVectorParseState init_tsvector_parser(char *input, int flags);
extern void reset_tsvector_parser(TSVectorParseState state, char *input);
extern bool gettoken_tsvector(TSVectorParseState state,
char **token, int *len,
@@ -35,6 +37,16 @@ extern bool gettoken_tsvector(TSVectorParseState state,
char **endptr);
extern void close_tsvector_parser(TSVectorParseState state);
+/* phrase operator begins with '<' */
+#define ISOPERATOR(x) \
+ ( pg_mblen(x) == 1 && ( *(x) == '!' || \
+ *(x) == '&' || \
+ *(x) == '|' || \
+ *(x) == '(' || \
+ *(x) == ')' || \
+ *(x) == '<' \
+ ) )
+
/* parse_tsquery */
struct TSQueryParserStateData; /* private in backend/utils/adt/tsquery.c */
@@ -46,9 +58,13 @@ typedef void (*PushFunction) (Datum opaque, TSQueryParserState state,
* QueryOperand struct */
bool prefix);
+#define P_TSQ_PLAIN (1 << 0)
+#define P_TSQ_WEB (1 << 1)
+
extern TSQuery parse_tsquery(char *buf,
- PushFunction pushval,
- Datum opaque, bool isplain);
+ PushFunction pushval,
+ Datum opaque,
+ int flags);
/* Functions for use by PushFunction implementations */
extern void pushValue(TSQueryParserState state,
diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out
index d63fb12f1de..c38237c8a4d 100644
--- a/src/test/regress/expected/tsearch.out
+++ b/src/test/regress/expected/tsearch.out
@@ -1672,3 +1672,426 @@ select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat ca
(1 row)
set enable_seqscan = on;
+-- test websearch_to_tsquery function
+select websearch_to_tsquery('simple', 'I have a fat:*ABCD cat');
+ websearch_to_tsquery
+---------------------------------------------
+ 'i' & 'have' & 'a' & 'fat' & 'abcd' & 'cat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'orange:**AABBCCDD');
+ websearch_to_tsquery
+-----------------------
+ 'orange' & 'aabbccdd'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat:A!cat:B|rat:C<');
+ websearch_to_tsquery
+-----------------------------------------
+ 'fat' & 'a' & 'cat' & 'b' & 'rat' & 'c'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat:A : cat:B');
+ websearch_to_tsquery
+---------------------------
+ 'fat' & 'a' & 'cat' & 'b'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat*rat');
+ websearch_to_tsquery
+----------------------
+ 'fat' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat-rat');
+ websearch_to_tsquery
+---------------------------
+ 'fat-rat' & 'fat' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat_rat');
+ websearch_to_tsquery
+----------------------
+ 'fat' & 'rat'
+(1 row)
+
+-- weights are completely ignored
+select websearch_to_tsquery('simple', 'abc : def');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'def'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc:def');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'def'
+(1 row)
+
+select websearch_to_tsquery('simple', 'a:::b');
+ websearch_to_tsquery
+----------------------
+ 'a' & 'b'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc:d');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'd'
+(1 row)
+
+select websearch_to_tsquery('simple', ':');
+NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery
+----------------------
+
+(1 row)
+
+-- these operators are ignored
+select websearch_to_tsquery('simple', 'abc & def');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'def'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc | def');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'def'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc <-> def');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'def'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc (pg or class)');
+ websearch_to_tsquery
+------------------------
+ 'abc' & 'pg' | 'class'
+(1 row)
+
+-- NOT is ignored in quotes
+select websearch_to_tsquery('english', 'My brand new smartphone');
+ websearch_to_tsquery
+-------------------------------
+ 'brand' & 'new' & 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('english', 'My brand "new smartphone"');
+ websearch_to_tsquery
+---------------------------------
+ 'brand' & 'new' <-> 'smartphon'
+(1 row)
+
+select websearch_to_tsquery('english', 'My brand "new -smartphone"');
+ websearch_to_tsquery
+---------------------------------
+ 'brand' & 'new' <-> 'smartphon'
+(1 row)
+
+-- test OR operator
+select websearch_to_tsquery('simple', 'cat or rat');
+ websearch_to_tsquery
+----------------------
+ 'cat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'cat OR rat');
+ websearch_to_tsquery
+----------------------
+ 'cat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'cat "OR" rat');
+ websearch_to_tsquery
+----------------------
+ 'cat' & 'or' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'cat OR');
+ websearch_to_tsquery
+----------------------
+ 'cat' & 'or'
+(1 row)
+
+select websearch_to_tsquery('simple', 'OR rat');
+ websearch_to_tsquery
+----------------------
+ 'or' & 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', '"fat cat OR rat"');
+ websearch_to_tsquery
+------------------------------------
+ 'fat' <-> 'cat' <-> 'or' <-> 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat (cat OR rat');
+ websearch_to_tsquery
+-----------------------
+ 'fat' & 'cat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'or OR or');
+ websearch_to_tsquery
+----------------------
+ 'or' | 'or'
+(1 row)
+
+-- OR is an operator here ...
+select websearch_to_tsquery('simple', '"fat cat"or"fat rat"');
+ websearch_to_tsquery
+-----------------------------------
+ 'fat' <-> 'cat' | 'fat' <-> 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or(rat');
+ websearch_to_tsquery
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or)rat');
+ websearch_to_tsquery
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or&rat');
+ websearch_to_tsquery
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or|rat');
+ websearch_to_tsquery
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or!rat');
+ websearch_to_tsquery
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat orrat');
+ websearch_to_tsquery
+----------------------
+ 'fat' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('simple', 'fat or ');
+ websearch_to_tsquery
+----------------------
+ 'fat' & 'or'
+(1 row)
+
+-- ... but not here
+select websearch_to_tsquery('simple', 'abc orange');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'orange'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc orтест');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'orтест'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc OR1234');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'or1234'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc or-abc');
+ websearch_to_tsquery
+---------------------------------
+ 'abc' & 'or-abc' & 'or' & 'abc'
+(1 row)
+
+select websearch_to_tsquery('simple', 'abc OR_abc');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'or' & 'abc'
+(1 row)
+
+-- test quotes
+select websearch_to_tsquery('english', '"pg_class pg');
+ websearch_to_tsquery
+-----------------------
+ 'pg' & 'class' & 'pg'
+(1 row)
+
+select websearch_to_tsquery('english', 'pg_class pg"');
+ websearch_to_tsquery
+-----------------------
+ 'pg' & 'class' & 'pg'
+(1 row)
+
+select websearch_to_tsquery('english', '"pg_class pg"');
+ websearch_to_tsquery
+-----------------------------
+ ( 'pg' & 'class' ) <-> 'pg'
+(1 row)
+
+select websearch_to_tsquery('english', 'abc "pg_class pg"');
+ websearch_to_tsquery
+-------------------------------------
+ 'abc' & ( 'pg' & 'class' ) <-> 'pg'
+(1 row)
+
+select websearch_to_tsquery('english', '"pg_class pg" def');
+ websearch_to_tsquery
+-------------------------------------
+ ( 'pg' & 'class' ) <-> 'pg' & 'def'
+(1 row)
+
+select websearch_to_tsquery('english', 'abc "pg pg_class pg" def');
+ websearch_to_tsquery
+------------------------------------------------------
+ 'abc' & 'pg' <-> ( 'pg' & 'class' ) <-> 'pg' & 'def'
+(1 row)
+
+select websearch_to_tsquery('english', ' or "pg pg_class pg" or ');
+ websearch_to_tsquery
+--------------------------------------
+ 'pg' <-> ( 'pg' & 'class' ) <-> 'pg'
+(1 row)
+
+select websearch_to_tsquery('english', '""pg pg_class pg""');
+ websearch_to_tsquery
+------------------------------
+ 'pg' & 'pg' & 'class' & 'pg'
+(1 row)
+
+select websearch_to_tsquery('english', 'abc """"" def');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'def'
+(1 row)
+
+select websearch_to_tsquery('english', 'cat -"fat rat"');
+ websearch_to_tsquery
+------------------------------
+ 'cat' & !( 'fat' <-> 'rat' )
+(1 row)
+
+select websearch_to_tsquery('english', 'cat -"fat rat" cheese');
+ websearch_to_tsquery
+----------------------------------------
+ 'cat' & !( 'fat' <-> 'rat' ) & 'chees'
+(1 row)
+
+select websearch_to_tsquery('english', 'abc "def -"');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'def'
+(1 row)
+
+select websearch_to_tsquery('english', 'abc "def :"');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'def'
+(1 row)
+
+select websearch_to_tsquery('english', '"A fat cat" has just eaten a -rat.');
+ websearch_to_tsquery
+------------------------------------
+ 'fat' <-> 'cat' & 'eaten' & !'rat'
+(1 row)
+
+select websearch_to_tsquery('english', '"A fat cat" has just eaten OR !rat.');
+ websearch_to_tsquery
+-----------------------------------
+ 'fat' <-> 'cat' & 'eaten' | 'rat'
+(1 row)
+
+select websearch_to_tsquery('english', '"A fat cat" has just (+eaten OR -rat)');
+ websearch_to_tsquery
+------------------------------------
+ 'fat' <-> 'cat' & 'eaten' | !'rat'
+(1 row)
+
+select websearch_to_tsquery('english', 'this is ----fine');
+ websearch_to_tsquery
+----------------------
+ !!!!'fine'
+(1 row)
+
+select websearch_to_tsquery('english', '(()) )))) this ||| is && -fine, "dear friend" OR good');
+ websearch_to_tsquery
+----------------------------------------
+ !'fine' & 'dear' <-> 'friend' | 'good'
+(1 row)
+
+select websearch_to_tsquery('english', 'an old <-> cat " is fine &&& too');
+ websearch_to_tsquery
+------------------------
+ 'old' & 'cat' & 'fine'
+(1 row)
+
+select websearch_to_tsquery('english', '"A the" OR just on');
+NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery
+----------------------
+
+(1 row)
+
+select websearch_to_tsquery('english', '"a fat cat" ate a rat');
+ websearch_to_tsquery
+---------------------------------
+ 'fat' <-> 'cat' & 'ate' & 'rat'
+(1 row)
+
+select to_tsvector('english', 'A fat cat ate a rat') @@
+ websearch_to_tsquery('english', '"a fat cat" ate a rat');
+ ?column?
+----------
+ t
+(1 row)
+
+select to_tsvector('english', 'A fat grey cat ate a rat') @@
+ websearch_to_tsquery('english', '"a fat cat" ate a rat');
+ ?column?
+----------
+ f
+(1 row)
+
+-- cases handled by gettoken_tsvector()
+select websearch_to_tsquery('''');
+NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery
+----------------------
+
+(1 row)
+
+select websearch_to_tsquery('''abc''''def''');
+ websearch_to_tsquery
+----------------------
+ 'abc' & 'def'
+(1 row)
+
+select websearch_to_tsquery('\abc');
+ websearch_to_tsquery
+----------------------
+ 'abc'
+(1 row)
+
+select websearch_to_tsquery('\');
+NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
+ websearch_to_tsquery
+----------------------
+
+(1 row)
+
diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql
index 1c8520b3e91..1768541f21b 100644
--- a/src/test/regress/sql/tsearch.sql
+++ b/src/test/regress/sql/tsearch.sql
@@ -539,3 +539,97 @@ create index phrase_index_test_idx on phrase_index_test using gin(fts);
set enable_seqscan = off;
select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');
set enable_seqscan = on;
+
+-- test websearch_to_tsquery function
+select websearch_to_tsquery('simple', 'I have a fat:*ABCD cat');
+select websearch_to_tsquery('simple', 'orange:**AABBCCDD');
+select websearch_to_tsquery('simple', 'fat:A!cat:B|rat:C<');
+select websearch_to_tsquery('simple', 'fat:A : cat:B');
+
+select websearch_to_tsquery('simple', 'fat*rat');
+select websearch_to_tsquery('simple', 'fat-rat');
+select websearch_to_tsquery('simple', 'fat_rat');
+
+-- weights are completely ignored
+select websearch_to_tsquery('simple', 'abc : def');
+select websearch_to_tsquery('simple', 'abc:def');
+select websearch_to_tsquery('simple', 'a:::b');
+select websearch_to_tsquery('simple', 'abc:d');
+select websearch_to_tsquery('simple', ':');
+
+-- these operators are ignored
+select websearch_to_tsquery('simple', 'abc & def');
+select websearch_to_tsquery('simple', 'abc | def');
+select websearch_to_tsquery('simple', 'abc <-> def');
+select websearch_to_tsquery('simple', 'abc (pg or class)');
+
+-- NOT is ignored in quotes
+select websearch_to_tsquery('english', 'My brand new smartphone');
+select websearch_to_tsquery('english', 'My brand "new smartphone"');
+select websearch_to_tsquery('english', 'My brand "new -smartphone"');
+
+-- test OR operator
+select websearch_to_tsquery('simple', 'cat or rat');
+select websearch_to_tsquery('simple', 'cat OR rat');
+select websearch_to_tsquery('simple', 'cat "OR" rat');
+select websearch_to_tsquery('simple', 'cat OR');
+select websearch_to_tsquery('simple', 'OR rat');
+select websearch_to_tsquery('simple', '"fat cat OR rat"');
+select websearch_to_tsquery('simple', 'fat (cat OR rat');
+select websearch_to_tsquery('simple', 'or OR or');
+
+-- OR is an operator here ...
+select websearch_to_tsquery('simple', '"fat cat"or"fat rat"');
+select websearch_to_tsquery('simple', 'fat or(rat');
+select websearch_to_tsquery('simple', 'fat or)rat');
+select websearch_to_tsquery('simple', 'fat or&rat');
+select websearch_to_tsquery('simple', 'fat or|rat');
+select websearch_to_tsquery('simple', 'fat or!rat');
+select websearch_to_tsquery('simple', 'fat orrat');
+select websearch_to_tsquery('simple', 'fat or ');
+
+-- ... but not here
+select websearch_to_tsquery('simple', 'abc orange');
+select websearch_to_tsquery('simple', 'abc orтест');
+select websearch_to_tsquery('simple', 'abc OR1234');
+select websearch_to_tsquery('simple', 'abc or-abc');
+select websearch_to_tsquery('simple', 'abc OR_abc');
+
+-- test quotes
+select websearch_to_tsquery('english', '"pg_class pg');
+select websearch_to_tsquery('english', 'pg_class pg"');
+select websearch_to_tsquery('english', '"pg_class pg"');
+select websearch_to_tsquery('english', 'abc "pg_class pg"');
+select websearch_to_tsquery('english', '"pg_class pg" def');
+select websearch_to_tsquery('english', 'abc "pg pg_class pg" def');
+select websearch_to_tsquery('english', ' or "pg pg_class pg" or ');
+select websearch_to_tsquery('english', '""pg pg_class pg""');
+select websearch_to_tsquery('english', 'abc """"" def');
+select websearch_to_tsquery('english', 'cat -"fat rat"');
+select websearch_to_tsquery('english', 'cat -"fat rat" cheese');
+select websearch_to_tsquery('english', 'abc "def -"');
+select websearch_to_tsquery('english', 'abc "def :"');
+
+select websearch_to_tsquery('english', '"A fat cat" has just eaten a -rat.');
+select websearch_to_tsquery('english', '"A fat cat" has just eaten OR !rat.');
+select websearch_to_tsquery('english', '"A fat cat" has just (+eaten OR -rat)');
+
+select websearch_to_tsquery('english', 'this is ----fine');
+select websearch_to_tsquery('english', '(()) )))) this ||| is && -fine, "dear friend" OR good');
+select websearch_to_tsquery('english', 'an old <-> cat " is fine &&& too');
+
+select websearch_to_tsquery('english', '"A the" OR just on');
+select websearch_to_tsquery('english', '"a fat cat" ate a rat');
+
+select to_tsvector('english', 'A fat cat ate a rat') @@
+ websearch_to_tsquery('english', '"a fat cat" ate a rat');
+
+select to_tsvector('english', 'A fat grey cat ate a rat') @@
+ websearch_to_tsquery('english', '"a fat cat" ate a rat');
+
+-- cases handled by gettoken_tsvector()
+select websearch_to_tsquery('''');
+select websearch_to_tsquery('''abc''''def''');
+select websearch_to_tsquery('\abc');
+select websearch_to_tsquery('\');