mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	doc: make blooms docs match reality
Parallel execution changed the way bloom queries are executed, so update the EXPLAIN output, and restructure the docs to be clearer and more accurate. Reported-by: Daniel Westermann Discussion: https://postgr.es/m/ZR0P278MB0122119FAE78721A694C30C8D2340@ZR0P278MB0122.CHEP278.PROD.OUTLOOK.COM Author: Daniel Westermann and me Backpatch-through: 9.6
This commit is contained in:
		@@ -110,75 +110,71 @@ CREATE INDEX bloomidx ON tbloom USING bloom (i1,i2,i3)
 | 
				
			|||||||
   FROM
 | 
					   FROM
 | 
				
			||||||
  generate_series(1,10000000);
 | 
					  generate_series(1,10000000);
 | 
				
			||||||
SELECT 10000000
 | 
					SELECT 10000000
 | 
				
			||||||
=# CREATE INDEX bloomidx ON tbloom USING bloom (i1, i2, i3, i4, i5, i6);
 | 
					 | 
				
			||||||
CREATE INDEX
 | 
					 | 
				
			||||||
=# SELECT pg_size_pretty(pg_relation_size('bloomidx'));
 | 
					 | 
				
			||||||
 pg_size_pretty
 | 
					 | 
				
			||||||
----------------
 | 
					 | 
				
			||||||
 153 MB
 | 
					 | 
				
			||||||
(1 row)
 | 
					 | 
				
			||||||
=# CREATE index btreeidx ON tbloom (i1, i2, i3, i4, i5, i6);
 | 
					 | 
				
			||||||
CREATE INDEX
 | 
					 | 
				
			||||||
=# SELECT pg_size_pretty(pg_relation_size('btreeidx'));
 | 
					 | 
				
			||||||
 pg_size_pretty
 | 
					 | 
				
			||||||
----------------
 | 
					 | 
				
			||||||
 387 MB
 | 
					 | 
				
			||||||
(1 row)
 | 
					 | 
				
			||||||
</programlisting>
 | 
					</programlisting>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  <para>
 | 
					  <para>
 | 
				
			||||||
   A sequential scan over this large table takes a long time:
 | 
					   A sequential scan over this large table takes a long time:
 | 
				
			||||||
<programlisting>
 | 
					<programlisting>
 | 
				
			||||||
=# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 898732 AND i5 = 123451;
 | 
					=# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 898732 AND i5 = 123451;
 | 
				
			||||||
                                                 QUERY PLAN
 | 
					EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 898732 AND i5 = 123451;
 | 
				
			||||||
------------------------------------------------------------------------------------------------------------
 | 
					                                              QUERY PLAN                                              
 | 
				
			||||||
 Seq Scan on tbloom  (cost=0.00..213694.08 rows=1 width=24) (actual time=1445.438..1445.438 rows=0 loops=1)
 | 
					------------------------------------------------------------------------------------------------------
 | 
				
			||||||
 | 
					 Seq Scan on tbloom  (cost=0.00..2137.14 rows=3 width=24) (actual time=14.894..14.895 rows=0 loops=1)
 | 
				
			||||||
   Filter: ((i2 = 898732) AND (i5 = 123451))
 | 
					   Filter: ((i2 = 898732) AND (i5 = 123451))
 | 
				
			||||||
   Rows Removed by Filter: 10000000
 | 
					   Rows Removed by Filter: 100000
 | 
				
			||||||
 Planning time: 0.177 ms
 | 
					 Planning Time: 0.350 ms
 | 
				
			||||||
 Execution time: 1445.473 ms
 | 
					 Execution Time: 14.916 ms
 | 
				
			||||||
(5 rows)
 | 
					(5 rows)
 | 
				
			||||||
</programlisting>
 | 
					</programlisting>
 | 
				
			||||||
  </para>
 | 
					  </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  <para>
 | 
					  <para>
 | 
				
			||||||
   So the planner will usually select an index scan if possible.
 | 
					   Even with the btree index defined the result will still be a
 | 
				
			||||||
   With a btree index, we get results like this:
 | 
					   sequential scan:
 | 
				
			||||||
<programlisting>
 | 
					<programlisting>
 | 
				
			||||||
 | 
					=# CREATE INDEX btreeidx ON tbloom (i1, i2, i3, i4, i5, i6);
 | 
				
			||||||
 | 
					CREATE INDEX
 | 
				
			||||||
 | 
					=# SELECT pg_size_pretty(pg_relation_size('btreeidx'));
 | 
				
			||||||
 | 
					 pg_size_pretty
 | 
				
			||||||
 | 
					----------------
 | 
				
			||||||
 | 
					 3992 kB
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
=# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 898732 AND i5 = 123451;
 | 
					=# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 898732 AND i5 = 123451;
 | 
				
			||||||
                                                           QUERY PLAN
 | 
					                                              QUERY PLAN                                              
 | 
				
			||||||
--------------------------------------------------------------------------------------------------------------------------------
 | 
					------------------------------------------------------------------------------------------------------
 | 
				
			||||||
 Index Only Scan using btreeidx on tbloom  (cost=0.56..298311.96 rows=1 width=24) (actual time=445.709..445.709 rows=0 loops=1)
 | 
					 Seq Scan on tbloom  (cost=0.00..2137.00 rows=2 width=24) (actual time=12.004..12.004 rows=0 loops=1)
 | 
				
			||||||
   Index Cond: ((i2 = 898732) AND (i5 = 123451))
 | 
					   Filter: ((i2 = 898732) AND (i5 = 123451))
 | 
				
			||||||
   Heap Fetches: 0
 | 
					   Rows Removed by Filter: 100000
 | 
				
			||||||
 Planning time: 0.193 ms
 | 
					 Planning Time: 0.157 ms
 | 
				
			||||||
 Execution time: 445.770 ms
 | 
					 Execution Time: 12.019 ms
 | 
				
			||||||
(5 rows)
 | 
					(5 rows)
 | 
				
			||||||
</programlisting>
 | 
					</programlisting>
 | 
				
			||||||
  </para>
 | 
					  </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  <para>
 | 
					  <para>
 | 
				
			||||||
   Bloom is better than btree in handling this type of search:
 | 
					   Having the bloom index defined on the table is better than btree in
 | 
				
			||||||
 | 
					   handling this type of search:
 | 
				
			||||||
<programlisting>
 | 
					<programlisting>
 | 
				
			||||||
 | 
					=# CREATE INDEX bloomidx ON tbloom USING bloom (i1, i2, i3, i4, i5, i6);
 | 
				
			||||||
 | 
					CREATE INDEX
 | 
				
			||||||
 | 
					=# SELECT pg_size_pretty(pg_relation_size('bloomidx'));
 | 
				
			||||||
 | 
					 pg_size_pretty
 | 
				
			||||||
 | 
					----------------
 | 
				
			||||||
 | 
					 1584 kB
 | 
				
			||||||
 | 
					(1 row)
 | 
				
			||||||
=# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 898732 AND i5 = 123451;
 | 
					=# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 898732 AND i5 = 123451;
 | 
				
			||||||
                                                        QUERY PLAN
 | 
					                                                     QUERY PLAN                                                      
 | 
				
			||||||
---------------------------------------------------------------------------------------------------------------------------
 | 
					---------------------------------------------------------------------------------------------------------------------
 | 
				
			||||||
 Bitmap Heap Scan on tbloom  (cost=178435.39..178439.41 rows=1 width=24) (actual time=76.698..76.698 rows=0 loops=1)
 | 
					 Bitmap Heap Scan on tbloom  (cost=1792.00..1799.69 rows=2 width=24) (actual time=0.390..0.391 rows=0 loops=1)
 | 
				
			||||||
   Recheck Cond: ((i2 = 898732) AND (i5 = 123451))
 | 
					   Recheck Cond: ((i2 = 898732) AND (i5 = 123451))
 | 
				
			||||||
   Rows Removed by Index Recheck: 2439
 | 
					   Rows Removed by Index Recheck: 19
 | 
				
			||||||
   Heap Blocks: exact=2408
 | 
					   Heap Blocks: exact=19
 | 
				
			||||||
   ->  Bitmap Index Scan on bloomidx  (cost=0.00..178435.39 rows=1 width=0) (actual time=72.455..72.455 rows=2439 loops=1)
 | 
					   ->  Bitmap Index Scan on bloomidx  (cost=0.00..1792.00 rows=2 width=0) (actual time=0.361..0.361 rows=19 loops=1)
 | 
				
			||||||
         Index Cond: ((i2 = 898732) AND (i5 = 123451))
 | 
					         Index Cond: ((i2 = 898732) AND (i5 = 123451))
 | 
				
			||||||
 Planning time: 0.475 ms
 | 
					 Planning Time: 0.128 ms
 | 
				
			||||||
 Execution time: 76.778 ms
 | 
					 Execution Time: 0.415 ms
 | 
				
			||||||
(8 rows)
 | 
					(8 rows)
 | 
				
			||||||
</programlisting>
 | 
					</programlisting>
 | 
				
			||||||
   Note the relatively large number of false positives: 2439 rows were
 | 
					 | 
				
			||||||
   selected to be visited in the heap, but none actually matched the
 | 
					 | 
				
			||||||
   query.  We could reduce that by specifying a larger signature length.
 | 
					 | 
				
			||||||
   In this example, creating the index with <literal>length=200</>
 | 
					 | 
				
			||||||
   reduced the number of false positives to 55; but it doubled the index size
 | 
					 | 
				
			||||||
   (to 306 MB) and ended up being slower for this query (125 ms overall).
 | 
					 | 
				
			||||||
  </para>
 | 
					  </para>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  <para>
 | 
					  <para>
 | 
				
			||||||
@@ -187,24 +183,36 @@ CREATE INDEX
 | 
				
			|||||||
   A better strategy for btree is to create a separate index on each column.
 | 
					   A better strategy for btree is to create a separate index on each column.
 | 
				
			||||||
   Then the planner will choose something like this:
 | 
					   Then the planner will choose something like this:
 | 
				
			||||||
<programlisting>
 | 
					<programlisting>
 | 
				
			||||||
 | 
					=# CREATE INDEX btreeidx1 ON tbloom (i1);
 | 
				
			||||||
 | 
					CREATE INDEX
 | 
				
			||||||
 | 
					=# CREATE INDEX btreeidx2 ON tbloom (i2);
 | 
				
			||||||
 | 
					CREATE INDEX
 | 
				
			||||||
 | 
					=# CREATE INDEX btreeidx3 ON tbloom (i3);
 | 
				
			||||||
 | 
					CREATE INDEX
 | 
				
			||||||
 | 
					=# CREATE INDEX btreeidx4 ON tbloom (i4);
 | 
				
			||||||
 | 
					CREATE INDEX
 | 
				
			||||||
 | 
					=# CREATE INDEX btreeidx5 ON tbloom (i5);
 | 
				
			||||||
 | 
					CREATE INDEX
 | 
				
			||||||
 | 
					=# CREATE INDEX btreeidx6 ON tbloom (i6);
 | 
				
			||||||
 | 
					CREATE INDEX
 | 
				
			||||||
=# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 898732 AND i5 = 123451;
 | 
					=# EXPLAIN ANALYZE SELECT * FROM tbloom WHERE i2 = 898732 AND i5 = 123451;
 | 
				
			||||||
                                                          QUERY PLAN
 | 
					                                                        QUERY PLAN                                                         
 | 
				
			||||||
------------------------------------------------------------------------------------------------------------------------------
 | 
					---------------------------------------------------------------------------------------------------------------------------
 | 
				
			||||||
 Bitmap Heap Scan on tbloom  (cost=9.29..13.30 rows=1 width=24) (actual time=0.148..0.148 rows=0 loops=1)
 | 
					 Bitmap Heap Scan on tbloom  (cost=24.34..32.03 rows=2 width=24) (actual time=0.031..0.032 rows=0 loops=1)
 | 
				
			||||||
   Recheck Cond: ((i5 = 123451) AND (i2 = 898732))
 | 
					   Recheck Cond: ((i5 = 123451) AND (i2 = 898732))
 | 
				
			||||||
   ->  BitmapAnd  (cost=9.29..9.29 rows=1 width=0) (actual time=0.145..0.145 rows=0 loops=1)
 | 
					   ->  BitmapAnd  (cost=24.34..24.34 rows=2 width=0) (actual time=0.028..0.029 rows=0 loops=1)
 | 
				
			||||||
         ->  Bitmap Index Scan on tbloom_i5_idx  (cost=0.00..4.52 rows=11 width=0) (actual time=0.089..0.089 rows=10 loops=1)
 | 
					         ->  Bitmap Index Scan on btreeidx5  (cost=0.00..12.04 rows=500 width=0) (actual time=0.028..0.028 rows=0 loops=1)
 | 
				
			||||||
               Index Cond: (i5 = 123451)
 | 
					               Index Cond: (i5 = 123451)
 | 
				
			||||||
         ->  Bitmap Index Scan on tbloom_i2_idx  (cost=0.00..4.52 rows=11 width=0) (actual time=0.048..0.048 rows=8 loops=1)
 | 
					         ->  Bitmap Index Scan on btreeidx2  (cost=0.00..12.04 rows=500 width=0) (never executed)
 | 
				
			||||||
               Index Cond: (i2 = 898732)
 | 
					               Index Cond: (i2 = 898732)
 | 
				
			||||||
 Planning time: 2.049 ms
 | 
					 Planning Time: 0.474 ms
 | 
				
			||||||
 Execution time: 0.280 ms
 | 
					 Execution Time: 0.064 ms
 | 
				
			||||||
(9 rows)
 | 
					(9 rows)
 | 
				
			||||||
</programlisting>
 | 
					</programlisting>
 | 
				
			||||||
   Although this query runs much faster than with either of the single
 | 
					   Although this query runs much faster than with either of the single
 | 
				
			||||||
   indexes, we pay a large penalty in index size.  Each of the single-column
 | 
					   indexes, we pay a penalty in index size.  Each of the single-column
 | 
				
			||||||
   btree indexes occupies 214 MB, so the total space needed is over 1.2GB,
 | 
					   btree indexes occupies 2 MB, so the total space needed is 12 MB,
 | 
				
			||||||
   more than 8 times the space used by the bloom index.
 | 
					   eight times the space used by the bloom index.
 | 
				
			||||||
  </para>
 | 
					  </para>
 | 
				
			||||||
 </sect2>
 | 
					 </sect2>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user