mirror of
https://github.com/postgres/postgres.git
synced 2025-06-03 01:21:48 +03:00
Properly prepare varinfos in estimate_multivariate_bucketsize()
To estimate with extended statistics, we need to clear the varnullingrels field in the expression, and duplicates are not allowed in the GroupVarInfo list. We might re-use add_unique_group_var(), but we don't do so for two reasons. 1) We must keep the origin_rinfos list ordered exactly the same way as varinfos. 2) add_unique_group_var() is designed for estimate_num_groups(), where a larger number of groups is worse. While estimating the number of hash buckets, we have the opposite: a lesser number of groups is worse. Therefore, we don't have to remove "known equal" vars: the removed var may valuably contribute to the multivariate statistics to grow the number of groups. This commit adds custom code to estimate_multivariate_bucketsize() to initialize varinfos properly. Reported-by: Robins Tharakan <tharakan@gmail.com> Discussion: https://postgr.es/m/18885-da51324078588253%40postgresql.org Author: Andrei Lepikhov <lepihov@gmail.com> Reviewed-by: David Rowley <dgrowleyml@gmail.com> Reviewed-by: Tomas Vondra <tomas@vondra.me> Reviewed-by: Alexander Korotkov <aekorotkov@gmail.com>
This commit is contained in:
parent
3db61db48e
commit
9f404d7922
@ -3850,6 +3850,8 @@ estimate_multivariate_bucketsize(PlannerInfo *root, RelOptInfo *inner,
|
||||
if (bms_get_singleton_member(relids, &relid) &&
|
||||
root->simple_rel_array[relid]->statlist != NIL)
|
||||
{
|
||||
bool is_duplicate = false;
|
||||
|
||||
/*
|
||||
* This inner-side expression references only one relation.
|
||||
* Extended statistics on this clause can exist.
|
||||
@ -3880,11 +3882,61 @@ estimate_multivariate_bucketsize(PlannerInfo *root, RelOptInfo *inner,
|
||||
*/
|
||||
continue;
|
||||
|
||||
varinfo = (GroupVarInfo *) palloc(sizeof(GroupVarInfo));
|
||||
/*
|
||||
* We're going to add the new clause to the varinfos list. We
|
||||
* might re-use add_unique_group_var(), but we don't do so for
|
||||
* two reasons.
|
||||
*
|
||||
* 1) We must keep the origin_rinfos list ordered exactly the
|
||||
* same way as varinfos.
|
||||
*
|
||||
* 2) add_unique_group_var() is designed for
|
||||
* estimate_num_groups(), where a larger number of groups is
|
||||
* worse. While estimating the number of hash buckets, we
|
||||
* have the opposite: a lesser number of groups is worse.
|
||||
* Therefore, we don't have to remove "known equal" vars: the
|
||||
* removed var may valuably contribute to the multivariate
|
||||
* statistics to grow the number of groups.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Clear nullingrels to correctly match hash keys. See
|
||||
* add_unique_group_var()'s comment for details.
|
||||
*/
|
||||
expr = remove_nulling_relids(expr, root->outer_join_rels, NULL);
|
||||
|
||||
/*
|
||||
* Detect and exclude exact duplicates from the list of hash
|
||||
* keys (like add_unique_group_var does).
|
||||
*/
|
||||
foreach(lc1, varinfos)
|
||||
{
|
||||
varinfo = (GroupVarInfo *) lfirst(lc1);
|
||||
|
||||
if (!equal(expr, varinfo->var))
|
||||
continue;
|
||||
|
||||
is_duplicate = true;
|
||||
break;
|
||||
}
|
||||
|
||||
if (is_duplicate)
|
||||
{
|
||||
/*
|
||||
* Skip exact duplicates. Adding them to the otherclauses
|
||||
* list also doesn't make sense.
|
||||
*/
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize GroupVarInfo. We only use it to call
|
||||
* estimate_multivariate_ndistinct(), which doesn't care about
|
||||
* ndistinct and isdefault fields. Thus, skip these fields.
|
||||
*/
|
||||
varinfo = (GroupVarInfo *) palloc0(sizeof(GroupVarInfo));
|
||||
varinfo->var = expr;
|
||||
varinfo->rel = root->simple_rel_array[relid];
|
||||
varinfo->ndistinct = 0.0;
|
||||
varinfo->isdefault = false;
|
||||
varinfos = lappend(varinfos, varinfo);
|
||||
|
||||
/*
|
||||
@ -3894,8 +3946,10 @@ estimate_multivariate_bucketsize(PlannerInfo *root, RelOptInfo *inner,
|
||||
origin_rinfos = lappend(origin_rinfos, rinfo);
|
||||
}
|
||||
else
|
||||
{
|
||||
/* This clause can't be estimated with extended statistics */
|
||||
otherclauses = lappend(otherclauses, rinfo);
|
||||
}
|
||||
|
||||
clauses = foreach_delete_current(clauses, lc);
|
||||
}
|
||||
|
@ -3427,4 +3427,32 @@ SELECT * FROM sb_1 a, sb_2 b WHERE a.x = b.x AND a.y = b.y AND a.z = b.z;
|
||||
-> Seq Scan on sb_2 b
|
||||
(5 rows)
|
||||
|
||||
-- Check that the Hash Join bucket size estimator detects equal clauses correctly.
|
||||
SET enable_nestloop = 'off';
|
||||
SET enable_mergejoin = 'off';
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT FROM sb_1 LEFT JOIN sb_2 ON (sb_2.x=sb_1.x) AND (sb_1.x=sb_2.x);
|
||||
QUERY PLAN
|
||||
--------------------------------------------------------
|
||||
Hash Left Join
|
||||
Hash Cond: ((sb_1.x = sb_2.x) AND (sb_1.x = sb_2.x))
|
||||
-> Seq Scan on sb_1
|
||||
-> Hash
|
||||
-> Seq Scan on sb_2
|
||||
(5 rows)
|
||||
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT FROM sb_1 LEFT JOIN sb_2
|
||||
ON (sb_2.x=sb_1.x) AND (sb_1.x=sb_2.x) AND (sb_1.y=sb_2.y);
|
||||
QUERY PLAN
|
||||
------------------------------------------------------------------------------
|
||||
Hash Left Join
|
||||
Hash Cond: ((sb_1.x = sb_2.x) AND (sb_1.y = sb_2.y) AND (sb_1.x = sb_2.x))
|
||||
-> Seq Scan on sb_1
|
||||
-> Hash
|
||||
-> Seq Scan on sb_2
|
||||
(5 rows)
|
||||
|
||||
RESET enable_nestloop;
|
||||
RESET enable_mergejoin;
|
||||
DROP TABLE sb_1, sb_2 CASCADE;
|
||||
|
@ -1747,4 +1747,15 @@ ANALYZE sb_2;
|
||||
EXPLAIN (COSTS OFF) -- Choose hash join
|
||||
SELECT * FROM sb_1 a, sb_2 b WHERE a.x = b.x AND a.y = b.y AND a.z = b.z;
|
||||
|
||||
-- Check that the Hash Join bucket size estimator detects equal clauses correctly.
|
||||
SET enable_nestloop = 'off';
|
||||
SET enable_mergejoin = 'off';
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT FROM sb_1 LEFT JOIN sb_2 ON (sb_2.x=sb_1.x) AND (sb_1.x=sb_2.x);
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT FROM sb_1 LEFT JOIN sb_2
|
||||
ON (sb_2.x=sb_1.x) AND (sb_1.x=sb_2.x) AND (sb_1.y=sb_2.y);
|
||||
RESET enable_nestloop;
|
||||
RESET enable_mergejoin;
|
||||
|
||||
DROP TABLE sb_1, sb_2 CASCADE;
|
||||
|
Loading…
x
Reference in New Issue
Block a user