1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Avoid full scan of GIN indexes when possible

The strategy of GIN index scan is driven by opclass-specific extract_query
method.  This method that needed search mode is GIN_SEARCH_MODE_ALL.  This
mode means that matching tuple may contain none of extracted entries.  Simple
example is '!term' tsquery, which doesn't need any term to exist in matching
tsvector.

In order to handle such scan key GIN calculates virtual entry, which contains
all TIDs of all entries of attribute.  In fact this is full scan of index
attribute.  And typically this is very slow, but allows to handle some queries
correctly in GIN.  However, current algorithm calculate such virtual entry for
each GIN_SEARCH_MODE_ALL scan key even if they are multiple for the same
attribute.  This is clearly not optimal.

This commit improves the situation by introduction of "exclude only" scan keys.
Such scan keys are not capable to return set of matching TIDs.  Instead, they
are capable only to filter TIDs produced by normal scan keys.  Therefore,
each attribute should contain at least one normal scan key, while rest of them
may be "exclude only" if search mode is GIN_SEARCH_MODE_ALL.

The same optimization might be applied to the whole scan, not per-attribute.
But that leads to NULL values elimination problem.  There is trade-off between
multiple possible ways to do this.  We probably want to do this later using
some cost-based decision algorithm.

Discussion: https://postgr.es/m/CAOBaU_YGP5-BEt5Cc0%3DzMve92vocPzD%2BXiZgiZs1kjY0cj%3DXBg%40mail.gmail.com
Author: Nikita Glukhov, Alexander Korotkov, Tom Lane, Julien Rouhaud
Reviewed-by: Julien Rouhaud, Tomas Vondra, Tom Lane
This commit is contained in:
Alexander Korotkov
2020-01-18 01:11:39 +03:00
parent 41c6f9db25
commit 4b754d6c16
10 changed files with 579 additions and 77 deletions

View File

@ -3498,6 +3498,107 @@ select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}';
1000
(1 row)
-- check handling of indexquals that generate no searchable conditions
explain (costs off)
select count(*) from test_trgm where t like '%99%' and t like '%qwerty%';
QUERY PLAN
-----------------------------------------------------------------------------
Aggregate
-> Bitmap Heap Scan on test_trgm
Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text))
-> Bitmap Index Scan on trgm_idx
Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text))
(5 rows)
select count(*) from test_trgm where t like '%99%' and t like '%qwerty%';
count
-------
19
(1 row)
explain (costs off)
select count(*) from test_trgm where t like '%99%' and t like '%qw%';
QUERY PLAN
-------------------------------------------------------------------------
Aggregate
-> Bitmap Heap Scan on test_trgm
Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text))
-> Bitmap Index Scan on trgm_idx
Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text))
(5 rows)
select count(*) from test_trgm where t like '%99%' and t like '%qw%';
count
-------
19
(1 row)
-- ensure that pending-list items are handled correctly, too
create temp table t_test_trgm(t text COLLATE "C");
create index t_trgm_idx on t_test_trgm using gin (t gin_trgm_ops);
insert into t_test_trgm values ('qwerty99'), ('qwerty01');
explain (costs off)
select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%';
QUERY PLAN
-----------------------------------------------------------------------------
Aggregate
-> Bitmap Heap Scan on t_test_trgm
Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text))
-> Bitmap Index Scan on t_trgm_idx
Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qwerty%'::text))
(5 rows)
select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%';
count
-------
1
(1 row)
explain (costs off)
select count(*) from t_test_trgm where t like '%99%' and t like '%qw%';
QUERY PLAN
-------------------------------------------------------------------------
Aggregate
-> Bitmap Heap Scan on t_test_trgm
Recheck Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text))
-> Bitmap Index Scan on t_trgm_idx
Index Cond: ((t ~~ '%99%'::text) AND (t ~~ '%qw%'::text))
(5 rows)
select count(*) from t_test_trgm where t like '%99%' and t like '%qw%';
count
-------
1
(1 row)
-- run the same queries with sequential scan to check the results
set enable_bitmapscan=off;
set enable_seqscan=on;
select count(*) from test_trgm where t like '%99%' and t like '%qwerty%';
count
-------
19
(1 row)
select count(*) from test_trgm where t like '%99%' and t like '%qw%';
count
-------
19
(1 row)
select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%';
count
-------
1
(1 row)
select count(*) from t_test_trgm where t like '%99%' and t like '%qw%';
count
-------
1
(1 row)
reset enable_bitmapscan;
create table test2(t text COLLATE "C");
insert into test2 values ('abcdef');
insert into test2 values ('quark');

View File

@ -55,6 +55,33 @@ select t,similarity(t,'gwertyu0988') as sml from test_trgm where t % 'gwertyu098
select t,similarity(t,'gwertyu1988') as sml from test_trgm where t % 'gwertyu1988' order by sml desc, t;
select count(*) from test_trgm where t ~ '[qwerty]{2}-?[qwerty]{2}';
-- check handling of indexquals that generate no searchable conditions
explain (costs off)
select count(*) from test_trgm where t like '%99%' and t like '%qwerty%';
select count(*) from test_trgm where t like '%99%' and t like '%qwerty%';
explain (costs off)
select count(*) from test_trgm where t like '%99%' and t like '%qw%';
select count(*) from test_trgm where t like '%99%' and t like '%qw%';
-- ensure that pending-list items are handled correctly, too
create temp table t_test_trgm(t text COLLATE "C");
create index t_trgm_idx on t_test_trgm using gin (t gin_trgm_ops);
insert into t_test_trgm values ('qwerty99'), ('qwerty01');
explain (costs off)
select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%';
select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%';
explain (costs off)
select count(*) from t_test_trgm where t like '%99%' and t like '%qw%';
select count(*) from t_test_trgm where t like '%99%' and t like '%qw%';
-- run the same queries with sequential scan to check the results
set enable_bitmapscan=off;
set enable_seqscan=on;
select count(*) from test_trgm where t like '%99%' and t like '%qwerty%';
select count(*) from test_trgm where t like '%99%' and t like '%qw%';
select count(*) from t_test_trgm where t like '%99%' and t like '%qwerty%';
select count(*) from t_test_trgm where t like '%99%' and t like '%qw%';
reset enable_bitmapscan;
create table test2(t text COLLATE "C");
insert into test2 values ('abcdef');
insert into test2 values ('quark');