mirror of
https://github.com/postgres/postgres.git
synced 2025-12-19 17:02:53 +03:00
Properly prepare varinfos in estimate_multivariate_bucketsize()
To estimate with extended statistics, we need to clear the varnullingrels
field in the expression, and duplicates are not allowed in the GroupVarInfo
list. We might re-use add_unique_group_var(), but we don't do so for two
reasons.
1) We must keep the origin_rinfos list ordered exactly the same way as
varinfos.
2) add_unique_group_var() is designed for estimate_num_groups(), where a
larger number of groups is worse. While estimating the number of hash
buckets, we have the opposite: a lesser number of groups is worse.
Therefore, we don't have to remove "known equal" vars: the removed var
may valuably contribute to the multivariate statistics to grow the number
of groups.
This commit adds custom code to estimate_multivariate_bucketsize() to
initialize varinfos properly.
Reported-by: Robins Tharakan <tharakan@gmail.com>
Discussion: https://postgr.es/m/18885-da51324078588253%40postgresql.org
Author: Andrei Lepikhov <lepihov@gmail.com>
Reviewed-by: David Rowley <dgrowleyml@gmail.com>
Reviewed-by: Tomas Vondra <tomas@vondra.me>
Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
This commit is contained in:
@@ -3427,4 +3427,32 @@ SELECT * FROM sb_1 a, sb_2 b WHERE a.x = b.x AND a.y = b.y AND a.z = b.z;
|
||||
-> Seq Scan on sb_2 b
|
||||
(5 rows)
|
||||
|
||||
-- Check that the Hash Join bucket size estimator detects equal clauses correctly.
|
||||
SET enable_nestloop = 'off';
|
||||
SET enable_mergejoin = 'off';
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT FROM sb_1 LEFT JOIN sb_2 ON (sb_2.x=sb_1.x) AND (sb_1.x=sb_2.x);
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------
|
||||
Hash Left Join
|
||||
Hash Cond: ((sb_1.x = sb_2.x) AND (sb_1.x = sb_2.x))
|
||||
-> Seq Scan on sb_1
|
||||
-> Hash
|
||||
-> Seq Scan on sb_2
|
||||
(5 rows)
|
||||
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT FROM sb_1 LEFT JOIN sb_2
|
||||
ON (sb_2.x=sb_1.x) AND (sb_1.x=sb_2.x) AND (sb_1.y=sb_2.y);
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------------
|
||||
Hash Left Join
|
||||
Hash Cond: ((sb_1.x = sb_2.x) AND (sb_1.y = sb_2.y) AND (sb_1.x = sb_2.x))
|
||||
-> Seq Scan on sb_1
|
||||
-> Hash
|
||||
-> Seq Scan on sb_2
|
||||
(5 rows)
|
||||
|
||||
RESET enable_nestloop;
|
||||
RESET enable_mergejoin;
|
||||
DROP TABLE sb_1, sb_2 CASCADE;
|
||||
|
||||
@@ -1747,4 +1747,15 @@ ANALYZE sb_2;
|
||||
EXPLAIN (COSTS OFF) -- Choose hash join
|
||||
SELECT * FROM sb_1 a, sb_2 b WHERE a.x = b.x AND a.y = b.y AND a.z = b.z;
|
||||
|
||||
-- Check that the Hash Join bucket size estimator detects equal clauses correctly.
|
||||
SET enable_nestloop = 'off';
|
||||
SET enable_mergejoin = 'off';
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT FROM sb_1 LEFT JOIN sb_2 ON (sb_2.x=sb_1.x) AND (sb_1.x=sb_2.x);
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT FROM sb_1 LEFT JOIN sb_2
|
||||
ON (sb_2.x=sb_1.x) AND (sb_1.x=sb_2.x) AND (sb_1.y=sb_2.y);
|
||||
RESET enable_nestloop;
|
||||
RESET enable_mergejoin;
|
||||
|
||||
DROP TABLE sb_1, sb_2 CASCADE;
|
||||
|
||||
Reference in New Issue
Block a user