mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Handle equality operator in contrib/pg_trgm
Obviously, in order to equality operator be satisfiable, target string must contain all the trigrams of the search string. On this base, we implement equality operator in GiST/GIN indexes with recheck. Discussion: https://postgr.es/m/CAOBaU_YWwtT7tdggtROacjdOdeYHCz-tmSwuC-j-TOG-g97J0w%40mail.gmail.com Author: Julien Rouhaud Reviewed-by: Tom Lane, Alexander Korotkov, Georgios Kokolatos, Erik Rijkers
This commit is contained in:
@ -9,7 +9,7 @@ OBJS = \
|
|||||||
trgm_regexp.o
|
trgm_regexp.o
|
||||||
|
|
||||||
EXTENSION = pg_trgm
|
EXTENSION = pg_trgm
|
||||||
DATA = pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \
|
DATA = pg_trgm--1.5--1.6.sql pg_trgm--1.4--1.5.sql pg_trgm--1.3--1.4.sql \
|
||||||
pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql pg_trgm--1.1--1.2.sql \
|
pg_trgm--1.3.sql pg_trgm--1.2--1.3.sql pg_trgm--1.1--1.2.sql \
|
||||||
pg_trgm--1.0--1.1.sql
|
pg_trgm--1.0--1.1.sql
|
||||||
PGFILEDESC = "pg_trgm - trigram matching"
|
PGFILEDESC = "pg_trgm - trigram matching"
|
||||||
|
@ -4761,6 +4761,12 @@ insert into test2 values ('abcdef');
|
|||||||
insert into test2 values ('quark');
|
insert into test2 values ('quark');
|
||||||
insert into test2 values (' z foo bar');
|
insert into test2 values (' z foo bar');
|
||||||
insert into test2 values ('/123/-45/');
|
insert into test2 values ('/123/-45/');
|
||||||
|
insert into test2 values ('line 1');
|
||||||
|
insert into test2 values ('%line 2');
|
||||||
|
insert into test2 values ('line 3%');
|
||||||
|
insert into test2 values ('%line 4%');
|
||||||
|
insert into test2 values ('%li%ne 5%');
|
||||||
|
insert into test2 values ('li_e 6');
|
||||||
create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
|
create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
|
||||||
set enable_seqscan=off;
|
set enable_seqscan=off;
|
||||||
explain (costs off)
|
explain (costs off)
|
||||||
@ -4863,7 +4869,13 @@ select * from test2 where t ~ '(abc)*$';
|
|||||||
quark
|
quark
|
||||||
z foo bar
|
z foo bar
|
||||||
/123/-45/
|
/123/-45/
|
||||||
(4 rows)
|
line 1
|
||||||
|
%line 2
|
||||||
|
line 3%
|
||||||
|
%line 4%
|
||||||
|
%li%ne 5%
|
||||||
|
li_e 6
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
select * from test2 where t ~* 'DEF';
|
select * from test2 where t ~* 'DEF';
|
||||||
t
|
t
|
||||||
@ -4918,7 +4930,11 @@ select * from test2 where t ~ '[a-z]{3}';
|
|||||||
abcdef
|
abcdef
|
||||||
quark
|
quark
|
||||||
z foo bar
|
z foo bar
|
||||||
(3 rows)
|
line 1
|
||||||
|
%line 2
|
||||||
|
line 3%
|
||||||
|
%line 4%
|
||||||
|
(7 rows)
|
||||||
|
|
||||||
select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
|
select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
|
||||||
t
|
t
|
||||||
@ -4961,6 +4977,93 @@ select * from test2 where t ~ '/\d+/-\d';
|
|||||||
/123/-45/
|
/123/-45/
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
-- test = operator
|
||||||
|
explain (costs off)
|
||||||
|
select * from test2 where t = 'abcdef';
|
||||||
|
QUERY PLAN
|
||||||
|
------------------------------------------
|
||||||
|
Bitmap Heap Scan on test2
|
||||||
|
Recheck Cond: (t = 'abcdef'::text)
|
||||||
|
-> Bitmap Index Scan on test2_idx_gin
|
||||||
|
Index Cond: (t = 'abcdef'::text)
|
||||||
|
(4 rows)
|
||||||
|
|
||||||
|
select * from test2 where t = 'abcdef';
|
||||||
|
t
|
||||||
|
--------
|
||||||
|
abcdef
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
explain (costs off)
|
||||||
|
select * from test2 where t = '%line%';
|
||||||
|
QUERY PLAN
|
||||||
|
------------------------------------------
|
||||||
|
Bitmap Heap Scan on test2
|
||||||
|
Recheck Cond: (t = '%line%'::text)
|
||||||
|
-> Bitmap Index Scan on test2_idx_gin
|
||||||
|
Index Cond: (t = '%line%'::text)
|
||||||
|
(4 rows)
|
||||||
|
|
||||||
|
select * from test2 where t = '%line%';
|
||||||
|
t
|
||||||
|
---
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
select * from test2 where t = 'li_e 1';
|
||||||
|
t
|
||||||
|
---
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
select * from test2 where t = '%line 2';
|
||||||
|
t
|
||||||
|
---------
|
||||||
|
%line 2
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select * from test2 where t = 'line 3%';
|
||||||
|
t
|
||||||
|
---------
|
||||||
|
line 3%
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select * from test2 where t = '%line 3%';
|
||||||
|
t
|
||||||
|
---
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
select * from test2 where t = '%line 4%';
|
||||||
|
t
|
||||||
|
----------
|
||||||
|
%line 4%
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select * from test2 where t = '%line 5%';
|
||||||
|
t
|
||||||
|
---
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
select * from test2 where t = '%li_ne 5%';
|
||||||
|
t
|
||||||
|
---
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
select * from test2 where t = '%li%ne 5%';
|
||||||
|
t
|
||||||
|
-----------
|
||||||
|
%li%ne 5%
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select * from test2 where t = 'line 6';
|
||||||
|
t
|
||||||
|
---
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
select * from test2 where t = 'li_e 6';
|
||||||
|
t
|
||||||
|
--------
|
||||||
|
li_e 6
|
||||||
|
(1 row)
|
||||||
|
|
||||||
drop index test2_idx_gin;
|
drop index test2_idx_gin;
|
||||||
create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
|
create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
|
||||||
set enable_seqscan=off;
|
set enable_seqscan=off;
|
||||||
@ -5056,7 +5159,13 @@ select * from test2 where t ~ '(abc)*$';
|
|||||||
quark
|
quark
|
||||||
z foo bar
|
z foo bar
|
||||||
/123/-45/
|
/123/-45/
|
||||||
(4 rows)
|
line 1
|
||||||
|
%line 2
|
||||||
|
line 3%
|
||||||
|
%line 4%
|
||||||
|
%li%ne 5%
|
||||||
|
li_e 6
|
||||||
|
(10 rows)
|
||||||
|
|
||||||
select * from test2 where t ~* 'DEF';
|
select * from test2 where t ~* 'DEF';
|
||||||
t
|
t
|
||||||
@ -5111,7 +5220,11 @@ select * from test2 where t ~ '[a-z]{3}';
|
|||||||
abcdef
|
abcdef
|
||||||
quark
|
quark
|
||||||
z foo bar
|
z foo bar
|
||||||
(3 rows)
|
line 1
|
||||||
|
%line 2
|
||||||
|
line 3%
|
||||||
|
%line 4%
|
||||||
|
(7 rows)
|
||||||
|
|
||||||
select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
|
select * from test2 where t ~* '(a{10}|b{10}|c{10}){10}';
|
||||||
t
|
t
|
||||||
@ -5154,6 +5267,89 @@ select * from test2 where t ~ '/\d+/-\d';
|
|||||||
/123/-45/
|
/123/-45/
|
||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
|
-- test = operator
|
||||||
|
explain (costs off)
|
||||||
|
select * from test2 where t = 'abcdef';
|
||||||
|
QUERY PLAN
|
||||||
|
------------------------------------------
|
||||||
|
Index Scan using test2_idx_gist on test2
|
||||||
|
Index Cond: (t = 'abcdef'::text)
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
select * from test2 where t = 'abcdef';
|
||||||
|
t
|
||||||
|
--------
|
||||||
|
abcdef
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
explain (costs off)
|
||||||
|
select * from test2 where t = '%line%';
|
||||||
|
QUERY PLAN
|
||||||
|
------------------------------------------
|
||||||
|
Index Scan using test2_idx_gist on test2
|
||||||
|
Index Cond: (t = '%line%'::text)
|
||||||
|
(2 rows)
|
||||||
|
|
||||||
|
select * from test2 where t = '%line%';
|
||||||
|
t
|
||||||
|
---
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
select * from test2 where t = 'li_e 1';
|
||||||
|
t
|
||||||
|
---
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
select * from test2 where t = '%line 2';
|
||||||
|
t
|
||||||
|
---------
|
||||||
|
%line 2
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select * from test2 where t = 'line 3%';
|
||||||
|
t
|
||||||
|
---------
|
||||||
|
line 3%
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select * from test2 where t = '%line 3%';
|
||||||
|
t
|
||||||
|
---
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
select * from test2 where t = '%line 4%';
|
||||||
|
t
|
||||||
|
----------
|
||||||
|
%line 4%
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select * from test2 where t = '%line 5%';
|
||||||
|
t
|
||||||
|
---
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
select * from test2 where t = '%li_ne 5%';
|
||||||
|
t
|
||||||
|
---
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
select * from test2 where t = '%li%ne 5%';
|
||||||
|
t
|
||||||
|
-----------
|
||||||
|
%li%ne 5%
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
select * from test2 where t = 'line 6';
|
||||||
|
t
|
||||||
|
---
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
select * from test2 where t = 'li_e 6';
|
||||||
|
t
|
||||||
|
--------
|
||||||
|
li_e 6
|
||||||
|
(1 row)
|
||||||
|
|
||||||
-- Check similarity threshold (bug #14202)
|
-- Check similarity threshold (bug #14202)
|
||||||
CREATE TEMP TABLE restaurants (city text);
|
CREATE TEMP TABLE restaurants (city text);
|
||||||
INSERT INTO restaurants SELECT 'Warsaw' FROM generate_series(1, 10000);
|
INSERT INTO restaurants SELECT 'Warsaw' FROM generate_series(1, 10000);
|
||||||
|
10
contrib/pg_trgm/pg_trgm--1.5--1.6.sql
Normal file
10
contrib/pg_trgm/pg_trgm--1.5--1.6.sql
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
/* contrib/pg_trgm/pg_trgm--1.5--1.6.sql */
|
||||||
|
|
||||||
|
-- complain if script is sourced in psql, rather than via ALTER EXTENSION
|
||||||
|
\echo Use "ALTER EXTENSION pg_trgm UPDATE TO '1.6'" to load this file. \quit
|
||||||
|
|
||||||
|
ALTER OPERATOR FAMILY gin_trgm_ops USING gin ADD
|
||||||
|
OPERATOR 11 pg_catalog.= (text, text);
|
||||||
|
|
||||||
|
ALTER OPERATOR FAMILY gist_trgm_ops USING gist ADD
|
||||||
|
OPERATOR 11 pg_catalog.= (text, text);
|
@ -1,6 +1,6 @@
|
|||||||
# pg_trgm extension
|
# pg_trgm extension
|
||||||
comment = 'text similarity measurement and index searching based on trigrams'
|
comment = 'text similarity measurement and index searching based on trigrams'
|
||||||
default_version = '1.5'
|
default_version = '1.6'
|
||||||
module_pathname = '$libdir/pg_trgm'
|
module_pathname = '$libdir/pg_trgm'
|
||||||
relocatable = true
|
relocatable = true
|
||||||
trusted = true
|
trusted = true
|
||||||
|
@ -101,6 +101,12 @@ insert into test2 values ('abcdef');
|
|||||||
insert into test2 values ('quark');
|
insert into test2 values ('quark');
|
||||||
insert into test2 values (' z foo bar');
|
insert into test2 values (' z foo bar');
|
||||||
insert into test2 values ('/123/-45/');
|
insert into test2 values ('/123/-45/');
|
||||||
|
insert into test2 values ('line 1');
|
||||||
|
insert into test2 values ('%line 2');
|
||||||
|
insert into test2 values ('line 3%');
|
||||||
|
insert into test2 values ('%line 4%');
|
||||||
|
insert into test2 values ('%li%ne 5%');
|
||||||
|
insert into test2 values ('li_e 6');
|
||||||
create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
|
create index test2_idx_gin on test2 using gin (t gin_trgm_ops);
|
||||||
set enable_seqscan=off;
|
set enable_seqscan=off;
|
||||||
explain (costs off)
|
explain (costs off)
|
||||||
@ -137,6 +143,23 @@ select * from test2 where t ~ ' z foo bar';
|
|||||||
select * from test2 where t ~ ' z foo';
|
select * from test2 where t ~ ' z foo';
|
||||||
select * from test2 where t ~ 'qua(?!foo)';
|
select * from test2 where t ~ 'qua(?!foo)';
|
||||||
select * from test2 where t ~ '/\d+/-\d';
|
select * from test2 where t ~ '/\d+/-\d';
|
||||||
|
-- test = operator
|
||||||
|
explain (costs off)
|
||||||
|
select * from test2 where t = 'abcdef';
|
||||||
|
select * from test2 where t = 'abcdef';
|
||||||
|
explain (costs off)
|
||||||
|
select * from test2 where t = '%line%';
|
||||||
|
select * from test2 where t = '%line%';
|
||||||
|
select * from test2 where t = 'li_e 1';
|
||||||
|
select * from test2 where t = '%line 2';
|
||||||
|
select * from test2 where t = 'line 3%';
|
||||||
|
select * from test2 where t = '%line 3%';
|
||||||
|
select * from test2 where t = '%line 4%';
|
||||||
|
select * from test2 where t = '%line 5%';
|
||||||
|
select * from test2 where t = '%li_ne 5%';
|
||||||
|
select * from test2 where t = '%li%ne 5%';
|
||||||
|
select * from test2 where t = 'line 6';
|
||||||
|
select * from test2 where t = 'li_e 6';
|
||||||
drop index test2_idx_gin;
|
drop index test2_idx_gin;
|
||||||
|
|
||||||
create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
|
create index test2_idx_gist on test2 using gist (t gist_trgm_ops);
|
||||||
@ -175,6 +198,23 @@ select * from test2 where t ~ ' z foo bar';
|
|||||||
select * from test2 where t ~ ' z foo';
|
select * from test2 where t ~ ' z foo';
|
||||||
select * from test2 where t ~ 'qua(?!foo)';
|
select * from test2 where t ~ 'qua(?!foo)';
|
||||||
select * from test2 where t ~ '/\d+/-\d';
|
select * from test2 where t ~ '/\d+/-\d';
|
||||||
|
-- test = operator
|
||||||
|
explain (costs off)
|
||||||
|
select * from test2 where t = 'abcdef';
|
||||||
|
select * from test2 where t = 'abcdef';
|
||||||
|
explain (costs off)
|
||||||
|
select * from test2 where t = '%line%';
|
||||||
|
select * from test2 where t = '%line%';
|
||||||
|
select * from test2 where t = 'li_e 1';
|
||||||
|
select * from test2 where t = '%line 2';
|
||||||
|
select * from test2 where t = 'line 3%';
|
||||||
|
select * from test2 where t = '%line 3%';
|
||||||
|
select * from test2 where t = '%line 4%';
|
||||||
|
select * from test2 where t = '%line 5%';
|
||||||
|
select * from test2 where t = '%li_ne 5%';
|
||||||
|
select * from test2 where t = '%li%ne 5%';
|
||||||
|
select * from test2 where t = 'line 6';
|
||||||
|
select * from test2 where t = 'li_e 6';
|
||||||
|
|
||||||
-- Check similarity threshold (bug #14202)
|
-- Check similarity threshold (bug #14202)
|
||||||
|
|
||||||
|
@ -37,6 +37,7 @@
|
|||||||
#define WordDistanceStrategyNumber 8
|
#define WordDistanceStrategyNumber 8
|
||||||
#define StrictWordSimilarityStrategyNumber 9
|
#define StrictWordSimilarityStrategyNumber 9
|
||||||
#define StrictWordDistanceStrategyNumber 10
|
#define StrictWordDistanceStrategyNumber 10
|
||||||
|
#define EqualStrategyNumber 11
|
||||||
|
|
||||||
typedef char trgm[3];
|
typedef char trgm[3];
|
||||||
|
|
||||||
|
@ -89,6 +89,7 @@ gin_extract_query_trgm(PG_FUNCTION_ARGS)
|
|||||||
case SimilarityStrategyNumber:
|
case SimilarityStrategyNumber:
|
||||||
case WordSimilarityStrategyNumber:
|
case WordSimilarityStrategyNumber:
|
||||||
case StrictWordSimilarityStrategyNumber:
|
case StrictWordSimilarityStrategyNumber:
|
||||||
|
case EqualStrategyNumber:
|
||||||
trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
|
trg = generate_trgm(VARDATA_ANY(val), VARSIZE_ANY_EXHDR(val));
|
||||||
break;
|
break;
|
||||||
case ILikeStrategyNumber:
|
case ILikeStrategyNumber:
|
||||||
@ -221,6 +222,7 @@ gin_trgm_consistent(PG_FUNCTION_ARGS)
|
|||||||
#endif
|
#endif
|
||||||
/* FALL THRU */
|
/* FALL THRU */
|
||||||
case LikeStrategyNumber:
|
case LikeStrategyNumber:
|
||||||
|
case EqualStrategyNumber:
|
||||||
/* Check if all extracted trigrams are presented. */
|
/* Check if all extracted trigrams are presented. */
|
||||||
res = true;
|
res = true;
|
||||||
for (i = 0; i < nkeys; i++)
|
for (i = 0; i < nkeys; i++)
|
||||||
@ -306,6 +308,7 @@ gin_trgm_triconsistent(PG_FUNCTION_ARGS)
|
|||||||
#endif
|
#endif
|
||||||
/* FALL THRU */
|
/* FALL THRU */
|
||||||
case LikeStrategyNumber:
|
case LikeStrategyNumber:
|
||||||
|
case EqualStrategyNumber:
|
||||||
/* Check if all extracted trigrams are presented. */
|
/* Check if all extracted trigrams are presented. */
|
||||||
res = GIN_MAYBE;
|
res = GIN_MAYBE;
|
||||||
for (i = 0; i < nkeys; i++)
|
for (i = 0; i < nkeys; i++)
|
||||||
|
@ -232,6 +232,7 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
|
|||||||
case SimilarityStrategyNumber:
|
case SimilarityStrategyNumber:
|
||||||
case WordSimilarityStrategyNumber:
|
case WordSimilarityStrategyNumber:
|
||||||
case StrictWordSimilarityStrategyNumber:
|
case StrictWordSimilarityStrategyNumber:
|
||||||
|
case EqualStrategyNumber:
|
||||||
qtrg = generate_trgm(VARDATA(query),
|
qtrg = generate_trgm(VARDATA(query),
|
||||||
querysize - VARHDRSZ);
|
querysize - VARHDRSZ);
|
||||||
break;
|
break;
|
||||||
@ -338,7 +339,8 @@ gtrgm_consistent(PG_FUNCTION_ARGS)
|
|||||||
#endif
|
#endif
|
||||||
/* FALL THRU */
|
/* FALL THRU */
|
||||||
case LikeStrategyNumber:
|
case LikeStrategyNumber:
|
||||||
/* Wildcard search is inexact */
|
case EqualStrategyNumber:
|
||||||
|
/* Wildcard and equal search are inexact */
|
||||||
*recheck = true;
|
*recheck = true;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -419,9 +419,11 @@
|
|||||||
the purpose of very fast similarity searches. These index types support
|
the purpose of very fast similarity searches. These index types support
|
||||||
the above-described similarity operators, and additionally support
|
the above-described similarity operators, and additionally support
|
||||||
trigram-based index searches for <literal>LIKE</literal>, <literal>ILIKE</literal>,
|
trigram-based index searches for <literal>LIKE</literal>, <literal>ILIKE</literal>,
|
||||||
<literal>~</literal> and <literal>~*</literal> queries. (These indexes do not
|
<literal>~</literal> and <literal>~*</literal> queries. Beginning in
|
||||||
support equality nor simple comparison operators, so you may need a
|
<productname>PostgreSQL</productname> 14, these indexes also support
|
||||||
regular B-tree index too.)
|
equality operator (simple comparison operators are not supported).
|
||||||
|
Note that those indexes may not be as efficient as regular B-tree indexes
|
||||||
|
for equality operator.
|
||||||
</para>
|
</para>
|
||||||
|
|
||||||
<para>
|
<para>
|
||||||
|
Reference in New Issue
Block a user