mirror of
https://github.com/postgres/postgres.git
synced 2025-07-18 17:42:25 +03:00
Fix cost_mergejoin's failure to adjust for rescanning of non-unique merge join
keys when considering a semi or anti join. This requires estimating the selectivity of the merge qual as though it were a regular inner join condition. To allow caching both that and the real outer-join-aware selectivity, split RestrictInfo.this_selec into two fields. This fixes one of the problems reported by Kevin Grittner.
This commit is contained in:
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.96 2009/01/01 17:23:43 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.97 2009/02/06 23:43:23 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -516,21 +516,34 @@ clause_selectivity(PlannerInfo *root,
|
||||
/*
|
||||
* If the clause is marked redundant, always return 1.0.
|
||||
*/
|
||||
if (rinfo->this_selec > 1)
|
||||
if (rinfo->norm_selec > 1)
|
||||
return (Selectivity) 1.0;
|
||||
|
||||
/*
|
||||
* If possible, cache the result of the selectivity calculation for
|
||||
* the clause. We can cache if varRelid is zero or the clause
|
||||
* contains only vars of that relid --- otherwise varRelid will affect
|
||||
* the result, so mustn't cache.
|
||||
* the result, so mustn't cache. Outer join quals might be examined
|
||||
* with either their join's actual jointype or JOIN_INNER, so we need
|
||||
* two cache variables to remember both cases. Note: we assume the
|
||||
* result won't change if we are switching the input relations or
|
||||
* considering a unique-ified case, so we only need one cache variable
|
||||
* for all non-JOIN_INNER cases.
|
||||
*/
|
||||
if (varRelid == 0 ||
|
||||
bms_is_subset_singleton(rinfo->clause_relids, varRelid))
|
||||
{
|
||||
/* Cacheable --- do we already have the result? */
|
||||
if (rinfo->this_selec >= 0)
|
||||
return rinfo->this_selec;
|
||||
if (jointype == JOIN_INNER)
|
||||
{
|
||||
if (rinfo->norm_selec >= 0)
|
||||
return rinfo->norm_selec;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rinfo->outer_selec >= 0)
|
||||
return rinfo->outer_selec;
|
||||
}
|
||||
cacheable = true;
|
||||
}
|
||||
|
||||
@ -753,7 +766,12 @@ clause_selectivity(PlannerInfo *root,
|
||||
|
||||
/* Cache the result if possible */
|
||||
if (cacheable)
|
||||
rinfo->this_selec = s1;
|
||||
{
|
||||
if (jointype == JOIN_INNER)
|
||||
rinfo->norm_selec = s1;
|
||||
else
|
||||
rinfo->outer_selec = s1;
|
||||
}
|
||||
|
||||
#ifdef SELECTIVITY_DEBUG
|
||||
elog(DEBUG4, "clause_selectivity: s1 %f", s1);
|
||||
|
@ -54,7 +54,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.203 2009/01/01 17:23:43 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.204 2009/02/06 23:43:23 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -120,7 +120,7 @@ static MergeScanSelCache *cached_scansel(PlannerInfo *root,
|
||||
PathKey *pathkey);
|
||||
static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
|
||||
static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
|
||||
List *quals, SpecialJoinInfo *sjinfo);
|
||||
List *quals);
|
||||
static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
|
||||
static double relation_byte_size(double tuples, int width);
|
||||
static double page_size(double tuples, int width);
|
||||
@ -1507,11 +1507,9 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
|
||||
|
||||
/*
|
||||
* Get approx # tuples passing the mergequals. We use approx_tuple_count
|
||||
* here for speed --- in most cases, any errors won't affect the result
|
||||
* much.
|
||||
* here because we need an estimate done with JOIN_INNER semantics.
|
||||
*/
|
||||
mergejointuples = approx_tuple_count(root, &path->jpath,
|
||||
mergeclauses, sjinfo);
|
||||
mergejointuples = approx_tuple_count(root, &path->jpath, mergeclauses);
|
||||
|
||||
/*
|
||||
* When there are equal merge keys in the outer relation, the mergejoin
|
||||
@ -1539,16 +1537,10 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
|
||||
* when we should not. Can we do better without expensive selectivity
|
||||
* computations?
|
||||
*
|
||||
* For SEMI and ANTI joins, only one inner tuple need be rescanned for
|
||||
* each group of same-keyed outer tuples (assuming that all joinquals
|
||||
* are merge quals). This makes the effect small enough to ignore,
|
||||
* so we just set rescannedtuples = 0. Likewise, the whole issue is
|
||||
* moot if we are working from a unique-ified outer input.
|
||||
* The whole issue is moot if we are working from a unique-ified outer
|
||||
* input.
|
||||
*/
|
||||
if (sjinfo->jointype == JOIN_SEMI ||
|
||||
sjinfo->jointype == JOIN_ANTI)
|
||||
rescannedtuples = 0;
|
||||
else if (IsA(outer_path, UniquePath))
|
||||
if (IsA(outer_path, UniquePath))
|
||||
rescannedtuples = 0;
|
||||
else
|
||||
{
|
||||
@ -1847,11 +1839,9 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
|
||||
|
||||
/*
|
||||
* Get approx # tuples passing the hashquals. We use approx_tuple_count
|
||||
* here for speed --- in most cases, any errors won't affect the result
|
||||
* much.
|
||||
* here because we need an estimate done with JOIN_INNER semantics.
|
||||
*/
|
||||
hashjointuples = approx_tuple_count(root, &path->jpath,
|
||||
hashclauses, sjinfo);
|
||||
hashjointuples = approx_tuple_count(root, &path->jpath, hashclauses);
|
||||
|
||||
/* cost of source data */
|
||||
startup_cost += outer_path->startup_cost;
|
||||
@ -2324,6 +2314,11 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
|
||||
* The quals can be either an implicitly-ANDed list of boolean expressions,
|
||||
* or a list of RestrictInfo nodes (typically the latter).
|
||||
*
|
||||
* We intentionally compute the selectivity under JOIN_INNER rules, even
|
||||
* if it's some type of outer join. This is appropriate because we are
|
||||
* trying to figure out how many tuples pass the initial merge or hash
|
||||
* join step.
|
||||
*
|
||||
* This is quick-and-dirty because we bypass clauselist_selectivity, and
|
||||
* simply multiply the independent clause selectivities together. Now
|
||||
* clauselist_selectivity often can't do any better than that anyhow, but
|
||||
@ -2336,31 +2331,40 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
|
||||
* seems OK to live with the approximation.
|
||||
*/
|
||||
static double
|
||||
approx_tuple_count(PlannerInfo *root, JoinPath *path,
|
||||
List *quals, SpecialJoinInfo *sjinfo)
|
||||
approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals)
|
||||
{
|
||||
double tuples;
|
||||
double outer_tuples = path->outerjoinpath->parent->rows;
|
||||
double inner_tuples = path->innerjoinpath->parent->rows;
|
||||
SpecialJoinInfo sjinfo;
|
||||
Selectivity selec = 1.0;
|
||||
ListCell *l;
|
||||
|
||||
/*
|
||||
* Make up a SpecialJoinInfo for JOIN_INNER semantics.
|
||||
*/
|
||||
sjinfo.type = T_SpecialJoinInfo;
|
||||
sjinfo.min_lefthand = path->outerjoinpath->parent->relids;
|
||||
sjinfo.min_righthand = path->innerjoinpath->parent->relids;
|
||||
sjinfo.syn_lefthand = path->outerjoinpath->parent->relids;
|
||||
sjinfo.syn_righthand = path->innerjoinpath->parent->relids;
|
||||
sjinfo.jointype = JOIN_INNER;
|
||||
/* we don't bother trying to make the remaining fields valid */
|
||||
sjinfo.lhs_strict = false;
|
||||
sjinfo.delay_upper_joins = false;
|
||||
sjinfo.join_quals = NIL;
|
||||
|
||||
/* Get the approximate selectivity */
|
||||
foreach(l, quals)
|
||||
{
|
||||
Node *qual = (Node *) lfirst(l);
|
||||
|
||||
/* Note that clause_selectivity will be able to cache its result */
|
||||
selec *= clause_selectivity(root, qual, 0, sjinfo->jointype, sjinfo);
|
||||
selec *= clause_selectivity(root, qual, 0, JOIN_INNER, &sjinfo);
|
||||
}
|
||||
|
||||
/* Apply it correctly using the input relation sizes */
|
||||
if (sjinfo->jointype == JOIN_SEMI)
|
||||
tuples = selec * outer_tuples;
|
||||
else if (sjinfo->jointype == JOIN_ANTI)
|
||||
tuples = (1.0 - selec) * outer_tuples;
|
||||
else
|
||||
tuples = selec * outer_tuples * inner_tuples;
|
||||
/* Apply it to the input relation sizes */
|
||||
tuples = selec * outer_tuples * inner_tuples;
|
||||
|
||||
return clamp_row_est(tuples);
|
||||
}
|
||||
|
@ -10,7 +10,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.16 2009/01/01 17:23:43 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.17 2009/02/06 23:43:23 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -1200,7 +1200,8 @@ reconsider_outer_join_clauses(PlannerInfo *root)
|
||||
list_delete_cell(root->left_join_clauses, cell, prev);
|
||||
/* we throw it back anyway (see notes above) */
|
||||
/* but the thrown-back clause has no extra selectivity */
|
||||
rinfo->this_selec = 2.0;
|
||||
rinfo->norm_selec = 2.0;
|
||||
rinfo->outer_selec = 1.0;
|
||||
distribute_restrictinfo_to_rels(root, rinfo);
|
||||
}
|
||||
else
|
||||
@ -1222,7 +1223,8 @@ reconsider_outer_join_clauses(PlannerInfo *root)
|
||||
list_delete_cell(root->right_join_clauses, cell, prev);
|
||||
/* we throw it back anyway (see notes above) */
|
||||
/* but the thrown-back clause has no extra selectivity */
|
||||
rinfo->this_selec = 2.0;
|
||||
rinfo->norm_selec = 2.0;
|
||||
rinfo->outer_selec = 1.0;
|
||||
distribute_restrictinfo_to_rels(root, rinfo);
|
||||
}
|
||||
else
|
||||
@ -1244,7 +1246,8 @@ reconsider_outer_join_clauses(PlannerInfo *root)
|
||||
list_delete_cell(root->full_join_clauses, cell, prev);
|
||||
/* we throw it back anyway (see notes above) */
|
||||
/* but the thrown-back clause has no extra selectivity */
|
||||
rinfo->this_selec = 2.0;
|
||||
rinfo->norm_selec = 2.0;
|
||||
rinfo->outer_selec = 1.0;
|
||||
distribute_restrictinfo_to_rels(root, rinfo);
|
||||
}
|
||||
else
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.86 2009/01/01 17:23:44 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.87 2009/02/06 23:43:23 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -174,10 +174,11 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
|
||||
{
|
||||
orig_selec = clause_selectivity(root, (Node *) bestrinfo,
|
||||
0, JOIN_INNER, NULL);
|
||||
bestrinfo->this_selec = orig_selec / or_selec;
|
||||
bestrinfo->norm_selec = orig_selec / or_selec;
|
||||
/* clamp result to sane range */
|
||||
if (bestrinfo->this_selec > 1)
|
||||
bestrinfo->this_selec = 1;
|
||||
if (bestrinfo->norm_selec > 1)
|
||||
bestrinfo->norm_selec = 1;
|
||||
/* It isn't an outer join clause, so no need to adjust outer_selec */
|
||||
}
|
||||
|
||||
/* Tell caller to recompute rel's rows estimate */
|
||||
|
Reference in New Issue
Block a user