1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-20 05:03:10 +03:00

Add websearch_to_tsquery

Error-tolerant conversion function with web-like syntax for search query,
it simplifies  constraining search engine with close to habitual interface for
users.

Bump catalog version

Authors: Victor Drobny, Dmitry Ivanov with editorization by me
Reviewed by: Aleksander Alekseev, Tomas Vondra, Thomas Munro, Aleksandr Parfenov
Discussion: https://www.postgresql.org/message-id/flat/fe931111ff7e9ad79196486ada79e268@postgrespro.ru
This commit is contained in:
Teodor Sigaev
2018-04-05 19:55:11 +03:00
parent fbc27330b8
commit 1664ae1978
11 changed files with 1002 additions and 121 deletions

View File

@ -1672,3 +1672,426 @@ select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat ca
(1 row)
set enable_seqscan = on;
-- test websearch_to_tsquery function
select websearch_to_tsquery('simple', 'I have a fat:*ABCD cat');
websearch_to_tsquery
---------------------------------------------
'i' & 'have' & 'a' & 'fat' & 'abcd' & 'cat'
(1 row)
select websearch_to_tsquery('simple', 'orange:**AABBCCDD');
websearch_to_tsquery
-----------------------
'orange' & 'aabbccdd'
(1 row)
select websearch_to_tsquery('simple', 'fat:A!cat:B|rat:C<');
websearch_to_tsquery
-----------------------------------------
'fat' & 'a' & 'cat' & 'b' & 'rat' & 'c'
(1 row)
select websearch_to_tsquery('simple', 'fat:A : cat:B');
websearch_to_tsquery
---------------------------
'fat' & 'a' & 'cat' & 'b'
(1 row)
select websearch_to_tsquery('simple', 'fat*rat');
websearch_to_tsquery
----------------------
'fat' & 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat-rat');
websearch_to_tsquery
---------------------------
'fat-rat' & 'fat' & 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat_rat');
websearch_to_tsquery
----------------------
'fat' & 'rat'
(1 row)
-- weights are completely ignored
select websearch_to_tsquery('simple', 'abc : def');
websearch_to_tsquery
----------------------
'abc' & 'def'
(1 row)
select websearch_to_tsquery('simple', 'abc:def');
websearch_to_tsquery
----------------------
'abc' & 'def'
(1 row)
select websearch_to_tsquery('simple', 'a:::b');
websearch_to_tsquery
----------------------
'a' & 'b'
(1 row)
select websearch_to_tsquery('simple', 'abc:d');
websearch_to_tsquery
----------------------
'abc' & 'd'
(1 row)
select websearch_to_tsquery('simple', ':');
NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
websearch_to_tsquery
----------------------
(1 row)
-- these operators are ignored
select websearch_to_tsquery('simple', 'abc & def');
websearch_to_tsquery
----------------------
'abc' & 'def'
(1 row)
select websearch_to_tsquery('simple', 'abc | def');
websearch_to_tsquery
----------------------
'abc' & 'def'
(1 row)
select websearch_to_tsquery('simple', 'abc <-> def');
websearch_to_tsquery
----------------------
'abc' & 'def'
(1 row)
select websearch_to_tsquery('simple', 'abc (pg or class)');
websearch_to_tsquery
------------------------
'abc' & 'pg' | 'class'
(1 row)
-- NOT is ignored in quotes
select websearch_to_tsquery('english', 'My brand new smartphone');
websearch_to_tsquery
-------------------------------
'brand' & 'new' & 'smartphon'
(1 row)
select websearch_to_tsquery('english', 'My brand "new smartphone"');
websearch_to_tsquery
---------------------------------
'brand' & 'new' <-> 'smartphon'
(1 row)
select websearch_to_tsquery('english', 'My brand "new -smartphone"');
websearch_to_tsquery
---------------------------------
'brand' & 'new' <-> 'smartphon'
(1 row)
-- test OR operator
select websearch_to_tsquery('simple', 'cat or rat');
websearch_to_tsquery
----------------------
'cat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'cat OR rat');
websearch_to_tsquery
----------------------
'cat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'cat "OR" rat');
websearch_to_tsquery
----------------------
'cat' & 'or' & 'rat'
(1 row)
select websearch_to_tsquery('simple', 'cat OR');
websearch_to_tsquery
----------------------
'cat' & 'or'
(1 row)
select websearch_to_tsquery('simple', 'OR rat');
websearch_to_tsquery
----------------------
'or' & 'rat'
(1 row)
select websearch_to_tsquery('simple', '"fat cat OR rat"');
websearch_to_tsquery
------------------------------------
'fat' <-> 'cat' <-> 'or' <-> 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat (cat OR rat');
websearch_to_tsquery
-----------------------
'fat' & 'cat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'or OR or');
websearch_to_tsquery
----------------------
'or' | 'or'
(1 row)
-- OR is an operator here ...
select websearch_to_tsquery('simple', '"fat cat"or"fat rat"');
websearch_to_tsquery
-----------------------------------
'fat' <-> 'cat' | 'fat' <-> 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat or(rat');
websearch_to_tsquery
----------------------
'fat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat or)rat');
websearch_to_tsquery
----------------------
'fat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat or&rat');
websearch_to_tsquery
----------------------
'fat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat or|rat');
websearch_to_tsquery
----------------------
'fat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat or!rat');
websearch_to_tsquery
----------------------
'fat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat or<rat');
websearch_to_tsquery
----------------------
'fat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat or>rat');
websearch_to_tsquery
----------------------
'fat' | 'rat'
(1 row)
select websearch_to_tsquery('simple', 'fat or ');
websearch_to_tsquery
----------------------
'fat' & 'or'
(1 row)
-- ... but not here
select websearch_to_tsquery('simple', 'abc orange');
websearch_to_tsquery
----------------------
'abc' & 'orange'
(1 row)
select websearch_to_tsquery('simple', 'abc orтест');
websearch_to_tsquery
----------------------
'abc' & 'orтест'
(1 row)
select websearch_to_tsquery('simple', 'abc OR1234');
websearch_to_tsquery
----------------------
'abc' & 'or1234'
(1 row)
select websearch_to_tsquery('simple', 'abc or-abc');
websearch_to_tsquery
---------------------------------
'abc' & 'or-abc' & 'or' & 'abc'
(1 row)
select websearch_to_tsquery('simple', 'abc OR_abc');
websearch_to_tsquery
----------------------
'abc' & 'or' & 'abc'
(1 row)
-- test quotes
select websearch_to_tsquery('english', '"pg_class pg');
websearch_to_tsquery
-----------------------
'pg' & 'class' & 'pg'
(1 row)
select websearch_to_tsquery('english', 'pg_class pg"');
websearch_to_tsquery
-----------------------
'pg' & 'class' & 'pg'
(1 row)
select websearch_to_tsquery('english', '"pg_class pg"');
websearch_to_tsquery
-----------------------------
( 'pg' & 'class' ) <-> 'pg'
(1 row)
select websearch_to_tsquery('english', 'abc "pg_class pg"');
websearch_to_tsquery
-------------------------------------
'abc' & ( 'pg' & 'class' ) <-> 'pg'
(1 row)
select websearch_to_tsquery('english', '"pg_class pg" def');
websearch_to_tsquery
-------------------------------------
( 'pg' & 'class' ) <-> 'pg' & 'def'
(1 row)
select websearch_to_tsquery('english', 'abc "pg pg_class pg" def');
websearch_to_tsquery
------------------------------------------------------
'abc' & 'pg' <-> ( 'pg' & 'class' ) <-> 'pg' & 'def'
(1 row)
select websearch_to_tsquery('english', ' or "pg pg_class pg" or ');
websearch_to_tsquery
--------------------------------------
'pg' <-> ( 'pg' & 'class' ) <-> 'pg'
(1 row)
select websearch_to_tsquery('english', '""pg pg_class pg""');
websearch_to_tsquery
------------------------------
'pg' & 'pg' & 'class' & 'pg'
(1 row)
select websearch_to_tsquery('english', 'abc """"" def');
websearch_to_tsquery
----------------------
'abc' & 'def'
(1 row)
select websearch_to_tsquery('english', 'cat -"fat rat"');
websearch_to_tsquery
------------------------------
'cat' & !( 'fat' <-> 'rat' )
(1 row)
select websearch_to_tsquery('english', 'cat -"fat rat" cheese');
websearch_to_tsquery
----------------------------------------
'cat' & !( 'fat' <-> 'rat' ) & 'chees'
(1 row)
select websearch_to_tsquery('english', 'abc "def -"');
websearch_to_tsquery
----------------------
'abc' & 'def'
(1 row)
select websearch_to_tsquery('english', 'abc "def :"');
websearch_to_tsquery
----------------------
'abc' & 'def'
(1 row)
select websearch_to_tsquery('english', '"A fat cat" has just eaten a -rat.');
websearch_to_tsquery
------------------------------------
'fat' <-> 'cat' & 'eaten' & !'rat'
(1 row)
select websearch_to_tsquery('english', '"A fat cat" has just eaten OR !rat.');
websearch_to_tsquery
-----------------------------------
'fat' <-> 'cat' & 'eaten' | 'rat'
(1 row)
select websearch_to_tsquery('english', '"A fat cat" has just (+eaten OR -rat)');
websearch_to_tsquery
------------------------------------
'fat' <-> 'cat' & 'eaten' | !'rat'
(1 row)
select websearch_to_tsquery('english', 'this is ----fine');
websearch_to_tsquery
----------------------
!!!!'fine'
(1 row)
select websearch_to_tsquery('english', '(()) )))) this ||| is && -fine, "dear friend" OR good');
websearch_to_tsquery
----------------------------------------
!'fine' & 'dear' <-> 'friend' | 'good'
(1 row)
select websearch_to_tsquery('english', 'an old <-> cat " is fine &&& too');
websearch_to_tsquery
------------------------
'old' & 'cat' & 'fine'
(1 row)
select websearch_to_tsquery('english', '"A the" OR just on');
NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
websearch_to_tsquery
----------------------
(1 row)
select websearch_to_tsquery('english', '"a fat cat" ate a rat');
websearch_to_tsquery
---------------------------------
'fat' <-> 'cat' & 'ate' & 'rat'
(1 row)
select to_tsvector('english', 'A fat cat ate a rat') @@
websearch_to_tsquery('english', '"a fat cat" ate a rat');
?column?
----------
t
(1 row)
select to_tsvector('english', 'A fat grey cat ate a rat') @@
websearch_to_tsquery('english', '"a fat cat" ate a rat');
?column?
----------
f
(1 row)
-- cases handled by gettoken_tsvector()
select websearch_to_tsquery('''');
NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
websearch_to_tsquery
----------------------
(1 row)
select websearch_to_tsquery('''abc''''def''');
websearch_to_tsquery
----------------------
'abc' & 'def'
(1 row)
select websearch_to_tsquery('\abc');
websearch_to_tsquery
----------------------
'abc'
(1 row)
select websearch_to_tsquery('\');
NOTICE: text-search query contains only stop words or doesn't contain lexemes, ignored
websearch_to_tsquery
----------------------
(1 row)

View File

@ -539,3 +539,97 @@ create index phrase_index_test_idx on phrase_index_test using gin(fts);
set enable_seqscan = off;
select * from phrase_index_test where fts @@ phraseto_tsquery('english', 'fat cat');
set enable_seqscan = on;
-- test websearch_to_tsquery function
select websearch_to_tsquery('simple', 'I have a fat:*ABCD cat');
select websearch_to_tsquery('simple', 'orange:**AABBCCDD');
select websearch_to_tsquery('simple', 'fat:A!cat:B|rat:C<');
select websearch_to_tsquery('simple', 'fat:A : cat:B');
select websearch_to_tsquery('simple', 'fat*rat');
select websearch_to_tsquery('simple', 'fat-rat');
select websearch_to_tsquery('simple', 'fat_rat');
-- weights are completely ignored
select websearch_to_tsquery('simple', 'abc : def');
select websearch_to_tsquery('simple', 'abc:def');
select websearch_to_tsquery('simple', 'a:::b');
select websearch_to_tsquery('simple', 'abc:d');
select websearch_to_tsquery('simple', ':');
-- these operators are ignored
select websearch_to_tsquery('simple', 'abc & def');
select websearch_to_tsquery('simple', 'abc | def');
select websearch_to_tsquery('simple', 'abc <-> def');
select websearch_to_tsquery('simple', 'abc (pg or class)');
-- NOT is ignored in quotes
select websearch_to_tsquery('english', 'My brand new smartphone');
select websearch_to_tsquery('english', 'My brand "new smartphone"');
select websearch_to_tsquery('english', 'My brand "new -smartphone"');
-- test OR operator
select websearch_to_tsquery('simple', 'cat or rat');
select websearch_to_tsquery('simple', 'cat OR rat');
select websearch_to_tsquery('simple', 'cat "OR" rat');
select websearch_to_tsquery('simple', 'cat OR');
select websearch_to_tsquery('simple', 'OR rat');
select websearch_to_tsquery('simple', '"fat cat OR rat"');
select websearch_to_tsquery('simple', 'fat (cat OR rat');
select websearch_to_tsquery('simple', 'or OR or');
-- OR is an operator here ...
select websearch_to_tsquery('simple', '"fat cat"or"fat rat"');
select websearch_to_tsquery('simple', 'fat or(rat');
select websearch_to_tsquery('simple', 'fat or)rat');
select websearch_to_tsquery('simple', 'fat or&rat');
select websearch_to_tsquery('simple', 'fat or|rat');
select websearch_to_tsquery('simple', 'fat or!rat');
select websearch_to_tsquery('simple', 'fat or<rat');
select websearch_to_tsquery('simple', 'fat or>rat');
select websearch_to_tsquery('simple', 'fat or ');
-- ... but not here
select websearch_to_tsquery('simple', 'abc orange');
select websearch_to_tsquery('simple', 'abc orтест');
select websearch_to_tsquery('simple', 'abc OR1234');
select websearch_to_tsquery('simple', 'abc or-abc');
select websearch_to_tsquery('simple', 'abc OR_abc');
-- test quotes
select websearch_to_tsquery('english', '"pg_class pg');
select websearch_to_tsquery('english', 'pg_class pg"');
select websearch_to_tsquery('english', '"pg_class pg"');
select websearch_to_tsquery('english', 'abc "pg_class pg"');
select websearch_to_tsquery('english', '"pg_class pg" def');
select websearch_to_tsquery('english', 'abc "pg pg_class pg" def');
select websearch_to_tsquery('english', ' or "pg pg_class pg" or ');
select websearch_to_tsquery('english', '""pg pg_class pg""');
select websearch_to_tsquery('english', 'abc """"" def');
select websearch_to_tsquery('english', 'cat -"fat rat"');
select websearch_to_tsquery('english', 'cat -"fat rat" cheese');
select websearch_to_tsquery('english', 'abc "def -"');
select websearch_to_tsquery('english', 'abc "def :"');
select websearch_to_tsquery('english', '"A fat cat" has just eaten a -rat.');
select websearch_to_tsquery('english', '"A fat cat" has just eaten OR !rat.');
select websearch_to_tsquery('english', '"A fat cat" has just (+eaten OR -rat)');
select websearch_to_tsquery('english', 'this is ----fine');
select websearch_to_tsquery('english', '(()) )))) this ||| is && -fine, "dear friend" OR good');
select websearch_to_tsquery('english', 'an old <-> cat " is fine &&& too');
select websearch_to_tsquery('english', '"A the" OR just on');
select websearch_to_tsquery('english', '"a fat cat" ate a rat');
select to_tsvector('english', 'A fat cat ate a rat') @@
websearch_to_tsquery('english', '"a fat cat" ate a rat');
select to_tsvector('english', 'A fat grey cat ate a rat') @@
websearch_to_tsquery('english', '"a fat cat" ate a rat');
-- cases handled by gettoken_tsvector()
select websearch_to_tsquery('''');
select websearch_to_tsquery('''abc''''def''');
select websearch_to_tsquery('\abc');
select websearch_to_tsquery('\');