From c7de5a654567ffbd0eef1d6d2fb8d3fa2b5c5dee Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Thu, 13 Jun 2024 20:34:43 -0400 Subject: [PATCH] Fix parsing of ignored operators in websearch_to_tsquery(). The manual says clearly that punctuation in the input of websearch_to_tsquery() is ignored, except for the special cases of dashes and quotes. However, this failed for cases like "(foo bar) or something", or in general an ISOPERATOR character in front of the "or". We'd switch back to WAITOPERAND state, then ignore the operator character while remaining in that state, and then reach the "or" in WAITOPERAND state which (intentionally) makes us treat it as data. The fix is simple enough: if we see an ISOPERATOR character while in WAITOPERATOR state, we have to skip it while staying in that state. (We don't need to worry about other punctuation characters: those will be consumed as though they were words, but then rejected by lexizing.) In v14 and up (since commit eb086056f) we can simplify the code a bit more too, because there is no longer a reason for the WAITOPERAND state to distinguish between quoted and unquoted operands. Per bug #18479 from Manos Emmanouilidis. Back-patch to all supported branches. Discussion: https://postgr.es/m/18479-d9b46e2fc242c33e@postgresql.org --- src/backend/utils/adt/tsquery.c | 8 +++++++- src/test/regress/expected/tsearch.out | 7 +++++++ src/test/regress/sql/tsearch.sql | 3 +++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/backend/utils/adt/tsquery.c b/src/backend/utils/adt/tsquery.c index ee98c3f02fc..bf375116d48 100644 --- a/src/backend/utils/adt/tsquery.c +++ b/src/backend/utils/adt/tsquery.c @@ -433,7 +433,7 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator, } else if (ISOPERATOR(state->buf)) { - /* or else gettoken_tsvector() will raise an error */ + /* ignore, else gettoken_tsvector() will raise an error */ state->buf++; state->state = WAITOPERAND; continue; @@ -492,6 +492,12 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator, *operator = OP_OR; return PT_OPR; } + else if (ISOPERATOR(state->buf)) + { + /* ignore other operators in this state too */ + state->buf++; + continue; + } else if (*state->buf == '\0') { return PT_END; diff --git a/src/test/regress/expected/tsearch.out b/src/test/regress/expected/tsearch.out index 827321ac917..73f2d13297c 100644 --- a/src/test/regress/expected/tsearch.out +++ b/src/test/regress/expected/tsearch.out @@ -2539,12 +2539,19 @@ select websearch_to_tsquery('simple', 'abc <-> def'); 'abc' & 'def' (1 row) +-- parens are ignored, too select websearch_to_tsquery('simple', 'abc (pg or class)'); websearch_to_tsquery ------------------------ 'abc' & 'pg' | 'class' (1 row) +select websearch_to_tsquery('simple', '(foo bar) or (ding dong)'); + websearch_to_tsquery +--------------------------------- + 'foo' & 'bar' | 'ding' & 'dong' +(1 row) + -- NOT is ignored in quotes select websearch_to_tsquery('english', 'My brand new smartphone'); websearch_to_tsquery diff --git a/src/test/regress/sql/tsearch.sql b/src/test/regress/sql/tsearch.sql index c958e4df0db..a6088288bc7 100644 --- a/src/test/regress/sql/tsearch.sql +++ b/src/test/regress/sql/tsearch.sql @@ -727,7 +727,10 @@ select websearch_to_tsquery('simple', ':'); select websearch_to_tsquery('simple', 'abc & def'); select websearch_to_tsquery('simple', 'abc | def'); select websearch_to_tsquery('simple', 'abc <-> def'); + +-- parens are ignored, too select websearch_to_tsquery('simple', 'abc (pg or class)'); +select websearch_to_tsquery('simple', '(foo bar) or (ding dong)'); -- NOT is ignored in quotes select websearch_to_tsquery('english', 'My brand new smartphone');