mirror of
https://github.com/postgres/postgres.git
synced 2025-09-11 00:12:06 +03:00
Extend join-selectivity API (oprjoin interface) so that join type is
passed to join selectivity estimators. Make use of this in eqjoinsel to derive non-bogus selectivity for IN clauses. Further tweaking of cost estimation for IN. initdb forced because of pg_proc.h changes.
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.55 2003/01/15 19:35:39 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.56 2003/01/28 22:13:29 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -65,12 +65,13 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
|
||||
Selectivity
|
||||
restrictlist_selectivity(Query *root,
|
||||
List *restrictinfo_list,
|
||||
int varRelid)
|
||||
int varRelid,
|
||||
JoinType jointype)
|
||||
{
|
||||
List *clauselist = get_actual_clauses(restrictinfo_list);
|
||||
Selectivity result;
|
||||
|
||||
result = clauselist_selectivity(root, clauselist, varRelid);
|
||||
result = clauselist_selectivity(root, clauselist, varRelid, jointype);
|
||||
freeList(clauselist);
|
||||
return result;
|
||||
}
|
||||
@@ -81,7 +82,7 @@ restrictlist_selectivity(Query *root,
|
||||
* expression clauses. The list can be empty, in which case 1.0
|
||||
* must be returned.
|
||||
*
|
||||
* See clause_selectivity() for the meaning of the varRelid parameter.
|
||||
* See clause_selectivity() for the meaning of the additional parameters.
|
||||
*
|
||||
* Our basic approach is to take the product of the selectivities of the
|
||||
* subclauses. However, that's only right if the subclauses have independent
|
||||
@@ -113,7 +114,8 @@ restrictlist_selectivity(Query *root,
|
||||
Selectivity
|
||||
clauselist_selectivity(Query *root,
|
||||
List *clauses,
|
||||
int varRelid)
|
||||
int varRelid,
|
||||
JoinType jointype)
|
||||
{
|
||||
Selectivity s1 = 1.0;
|
||||
RangeQueryClause *rqlist = NULL;
|
||||
@@ -184,7 +186,7 @@ clauselist_selectivity(Query *root,
|
||||
}
|
||||
}
|
||||
/* Not the right form, so treat it generically. */
|
||||
s2 = clause_selectivity(root, clause, varRelid);
|
||||
s2 = clause_selectivity(root, clause, varRelid, jointype);
|
||||
s1 = s1 * s2;
|
||||
}
|
||||
|
||||
@@ -362,11 +364,15 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause,
|
||||
*
|
||||
* When varRelid is 0, all variables are treated as variables. This
|
||||
* is appropriate for ordinary join clauses and restriction clauses.
|
||||
*
|
||||
* jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
|
||||
* if the clause isn't a join clause or the context is uncertain.
|
||||
*/
|
||||
Selectivity
|
||||
clause_selectivity(Query *root,
|
||||
Node *clause,
|
||||
int varRelid)
|
||||
int varRelid,
|
||||
JoinType jointype)
|
||||
{
|
||||
Selectivity s1 = 1.0; /* default for any unhandled clause type */
|
||||
|
||||
@@ -424,14 +430,16 @@ clause_selectivity(Query *root,
|
||||
/* inverse of the selectivity of the underlying clause */
|
||||
s1 = 1.0 - clause_selectivity(root,
|
||||
(Node *) get_notclausearg((Expr *) clause),
|
||||
varRelid);
|
||||
varRelid,
|
||||
jointype);
|
||||
}
|
||||
else if (and_clause(clause))
|
||||
{
|
||||
/* share code with clauselist_selectivity() */
|
||||
s1 = clauselist_selectivity(root,
|
||||
((BoolExpr *) clause)->args,
|
||||
varRelid);
|
||||
varRelid,
|
||||
jointype);
|
||||
}
|
||||
else if (or_clause(clause))
|
||||
{
|
||||
@@ -447,7 +455,8 @@ clause_selectivity(Query *root,
|
||||
{
|
||||
Selectivity s2 = clause_selectivity(root,
|
||||
(Node *) lfirst(arg),
|
||||
varRelid);
|
||||
varRelid,
|
||||
jointype);
|
||||
|
||||
s1 = s1 + s2 - s1 * s2;
|
||||
}
|
||||
@@ -479,7 +488,8 @@ clause_selectivity(Query *root,
|
||||
{
|
||||
/* Estimate selectivity for a join clause. */
|
||||
s1 = join_selectivity(root, opno,
|
||||
((OpExpr *) clause)->args);
|
||||
((OpExpr *) clause)->args,
|
||||
jointype);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -519,14 +529,16 @@ clause_selectivity(Query *root,
|
||||
s1 = booltestsel(root,
|
||||
((BooleanTest *) clause)->booltesttype,
|
||||
(Node *) ((BooleanTest *) clause)->arg,
|
||||
varRelid);
|
||||
varRelid,
|
||||
jointype);
|
||||
}
|
||||
else if (IsA(clause, RelabelType))
|
||||
{
|
||||
/* Not sure this case is needed, but it can't hurt */
|
||||
s1 = clause_selectivity(root,
|
||||
(Node *) ((RelabelType *) clause)->arg,
|
||||
varRelid);
|
||||
varRelid,
|
||||
jointype);
|
||||
}
|
||||
|
||||
#ifdef SELECTIVITY_DEBUG
|
||||
|
@@ -49,7 +49,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.103 2003/01/27 20:51:50 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.104 2003/01/28 22:13:33 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -104,7 +104,8 @@ bool enable_hashjoin = true;
|
||||
static Selectivity estimate_hash_bucketsize(Query *root, Var *var,
|
||||
int nbuckets);
|
||||
static bool cost_qual_eval_walker(Node *node, QualCost *total);
|
||||
static Selectivity approx_selectivity(Query *root, List *quals);
|
||||
static Selectivity approx_selectivity(Query *root, List *quals,
|
||||
JoinType jointype);
|
||||
static void set_rel_width(Query *root, RelOptInfo *rel);
|
||||
static double relation_byte_size(double tuples, int width);
|
||||
static double page_size(double tuples, int width);
|
||||
@@ -697,7 +698,8 @@ cost_nestloop(NestPath *path, Query *root)
|
||||
*/
|
||||
if (path->jointype == JOIN_IN)
|
||||
{
|
||||
Selectivity qual_selec = approx_selectivity(root, restrictlist);
|
||||
Selectivity qual_selec = approx_selectivity(root, restrictlist,
|
||||
path->jointype);
|
||||
double qptuples;
|
||||
|
||||
qptuples = ceil(qual_selec * outer_path_rows * inner_path_rows);
|
||||
@@ -816,10 +818,12 @@ cost_mergejoin(MergePath *path, Query *root)
|
||||
* Note: it's probably bogus to use the normal selectivity calculation
|
||||
* here when either the outer or inner path is a UniquePath.
|
||||
*/
|
||||
merge_selec = approx_selectivity(root, mergeclauses);
|
||||
merge_selec = approx_selectivity(root, mergeclauses,
|
||||
path->jpath.jointype);
|
||||
cost_qual_eval(&merge_qual_cost, mergeclauses);
|
||||
qpquals = set_ptrDifference(restrictlist, mergeclauses);
|
||||
qp_selec = approx_selectivity(root, qpquals);
|
||||
qp_selec = approx_selectivity(root, qpquals,
|
||||
path->jpath.jointype);
|
||||
cost_qual_eval(&qp_qual_cost, qpquals);
|
||||
freeList(qpquals);
|
||||
|
||||
@@ -1044,10 +1048,12 @@ cost_hashjoin(HashPath *path, Query *root)
|
||||
* Note: it's probably bogus to use the normal selectivity calculation
|
||||
* here when either the outer or inner path is a UniquePath.
|
||||
*/
|
||||
hash_selec = approx_selectivity(root, hashclauses);
|
||||
hash_selec = approx_selectivity(root, hashclauses,
|
||||
path->jpath.jointype);
|
||||
cost_qual_eval(&hash_qual_cost, hashclauses);
|
||||
qpquals = set_ptrDifference(restrictlist, hashclauses);
|
||||
qp_selec = approx_selectivity(root, qpquals);
|
||||
qp_selec = approx_selectivity(root, qpquals,
|
||||
path->jpath.jointype);
|
||||
cost_qual_eval(&qp_qual_cost, qpquals);
|
||||
freeList(qpquals);
|
||||
|
||||
@@ -1084,54 +1090,67 @@ cost_hashjoin(HashPath *path, Query *root)
|
||||
* Determine bucketsize fraction for inner relation. We use the
|
||||
* smallest bucketsize estimated for any individual hashclause;
|
||||
* this is undoubtedly conservative.
|
||||
*
|
||||
* BUT: if inner relation has been unique-ified, we can assume it's
|
||||
* good for hashing. This is important both because it's the right
|
||||
* answer, and because we avoid contaminating the cache with a value
|
||||
* that's wrong for non-unique-ified paths.
|
||||
*/
|
||||
innerbucketsize = 1.0;
|
||||
foreach(hcl, hashclauses)
|
||||
if (IsA(inner_path, UniquePath))
|
||||
innerbucketsize = 1.0 / virtualbuckets;
|
||||
else
|
||||
{
|
||||
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
|
||||
Selectivity thisbucketsize;
|
||||
|
||||
Assert(IsA(restrictinfo, RestrictInfo));
|
||||
|
||||
/*
|
||||
* First we have to figure out which side of the hashjoin clause is the
|
||||
* inner side.
|
||||
*
|
||||
* Since we tend to visit the same clauses over and over when planning
|
||||
* a large query, we cache the bucketsize estimate in the RestrictInfo
|
||||
* node to avoid repeated lookups of statistics.
|
||||
*/
|
||||
if (is_subseti(restrictinfo->right_relids, inner_path->parent->relids))
|
||||
innerbucketsize = 1.0;
|
||||
foreach(hcl, hashclauses)
|
||||
{
|
||||
/* righthand side is inner */
|
||||
thisbucketsize = restrictinfo->right_bucketsize;
|
||||
if (thisbucketsize < 0)
|
||||
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
|
||||
Selectivity thisbucketsize;
|
||||
|
||||
Assert(IsA(restrictinfo, RestrictInfo));
|
||||
|
||||
/*
|
||||
* First we have to figure out which side of the hashjoin clause
|
||||
* is the inner side.
|
||||
*
|
||||
* Since we tend to visit the same clauses over and over when
|
||||
* planning a large query, we cache the bucketsize estimate in the
|
||||
* RestrictInfo node to avoid repeated lookups of statistics.
|
||||
*/
|
||||
if (is_subseti(restrictinfo->right_relids,
|
||||
inner_path->parent->relids))
|
||||
{
|
||||
/* not cached yet */
|
||||
thisbucketsize = estimate_hash_bucketsize(root,
|
||||
/* righthand side is inner */
|
||||
thisbucketsize = restrictinfo->right_bucketsize;
|
||||
if (thisbucketsize < 0)
|
||||
{
|
||||
/* not cached yet */
|
||||
thisbucketsize =
|
||||
estimate_hash_bucketsize(root,
|
||||
(Var *) get_rightop(restrictinfo->clause),
|
||||
virtualbuckets);
|
||||
restrictinfo->right_bucketsize = thisbucketsize;
|
||||
virtualbuckets);
|
||||
restrictinfo->right_bucketsize = thisbucketsize;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert(is_subseti(restrictinfo->left_relids,
|
||||
inner_path->parent->relids));
|
||||
/* lefthand side is inner */
|
||||
thisbucketsize = restrictinfo->left_bucketsize;
|
||||
if (thisbucketsize < 0)
|
||||
else
|
||||
{
|
||||
/* not cached yet */
|
||||
thisbucketsize = estimate_hash_bucketsize(root,
|
||||
Assert(is_subseti(restrictinfo->left_relids,
|
||||
inner_path->parent->relids));
|
||||
/* lefthand side is inner */
|
||||
thisbucketsize = restrictinfo->left_bucketsize;
|
||||
if (thisbucketsize < 0)
|
||||
{
|
||||
/* not cached yet */
|
||||
thisbucketsize =
|
||||
estimate_hash_bucketsize(root,
|
||||
(Var *) get_leftop(restrictinfo->clause),
|
||||
virtualbuckets);
|
||||
restrictinfo->left_bucketsize = thisbucketsize;
|
||||
virtualbuckets);
|
||||
restrictinfo->left_bucketsize = thisbucketsize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (innerbucketsize > thisbucketsize)
|
||||
innerbucketsize = thisbucketsize;
|
||||
if (innerbucketsize > thisbucketsize)
|
||||
innerbucketsize = thisbucketsize;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1557,7 +1576,7 @@ cost_qual_eval_walker(Node *node, QualCost *total)
|
||||
* seems OK to live with the approximation.
|
||||
*/
|
||||
static Selectivity
|
||||
approx_selectivity(Query *root, List *quals)
|
||||
approx_selectivity(Query *root, List *quals, JoinType jointype)
|
||||
{
|
||||
Selectivity total = 1.0;
|
||||
List *l;
|
||||
@@ -1582,13 +1601,14 @@ approx_selectivity(Query *root, List *quals)
|
||||
restrictinfo->this_selec =
|
||||
clause_selectivity(root,
|
||||
(Node *) restrictinfo->clause,
|
||||
0);
|
||||
0,
|
||||
jointype);
|
||||
selec = restrictinfo->this_selec;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* If it's a bare expression, must always do it the hard way */
|
||||
selec = clause_selectivity(root, qual, 0);
|
||||
selec = clause_selectivity(root, qual, 0, jointype);
|
||||
}
|
||||
total *= selec;
|
||||
}
|
||||
@@ -1620,7 +1640,8 @@ set_baserel_size_estimates(Query *root, RelOptInfo *rel)
|
||||
temp = rel->tuples *
|
||||
restrictlist_selectivity(root,
|
||||
rel->baserestrictinfo,
|
||||
lfirsti(rel->relids));
|
||||
lfirsti(rel->relids),
|
||||
JOIN_INNER);
|
||||
|
||||
/*
|
||||
* Force estimate to be at least one row, to make explain output look
|
||||
@@ -1682,7 +1703,8 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
|
||||
*/
|
||||
selec = restrictlist_selectivity(root,
|
||||
restrictlist,
|
||||
0);
|
||||
0,
|
||||
jointype);
|
||||
|
||||
/*
|
||||
* Basically, we multiply size of Cartesian product by selectivity.
|
||||
@@ -1694,8 +1716,6 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
|
||||
* For JOIN_IN and variants, the Cartesian product is figured with
|
||||
* respect to a unique-ified input, and then we can clamp to the size
|
||||
* of the other input.
|
||||
* XXX it's not at all clear that the ordinary selectivity calculation
|
||||
* is appropriate in this case.
|
||||
*/
|
||||
switch (jointype)
|
||||
{
|
||||
@@ -1798,7 +1818,8 @@ set_function_size_estimates(Query *root, RelOptInfo *rel)
|
||||
temp = rel->tuples *
|
||||
restrictlist_selectivity(root,
|
||||
rel->baserestrictinfo,
|
||||
lfirsti(rel->relids));
|
||||
lfirsti(rel->relids),
|
||||
JOIN_INNER);
|
||||
|
||||
/*
|
||||
* Force estimate to be at least one row, to make explain output look
|
||||
|
@@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.133 2003/01/24 03:58:34 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.134 2003/01/28 22:13:33 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -1599,12 +1599,16 @@ make_innerjoin_index_path(Query *root,
|
||||
* selectivity. However, since RestrictInfo nodes aren't copied when
|
||||
* linking them into different lists, it should be sufficient to use
|
||||
* pointer comparison to remove duplicates.)
|
||||
*
|
||||
* Always assume the join type is JOIN_INNER; even if some of the
|
||||
* join clauses come from other contexts, that's not our problem.
|
||||
*/
|
||||
pathnode->rows = rel->tuples *
|
||||
restrictlist_selectivity(root,
|
||||
set_ptrUnion(rel->baserestrictinfo,
|
||||
clausegroup),
|
||||
lfirsti(rel->relids));
|
||||
lfirsti(rel->relids),
|
||||
JOIN_INNER);
|
||||
/* Like costsize.c, force estimate to be at least one row */
|
||||
if (pathnode->rows < 1.0)
|
||||
pathnode->rows = 1.0;
|
||||
|
@@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.68 2003/01/20 18:54:53 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.69 2003/01/28 22:13:35 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -351,7 +351,7 @@ make_subplan(SubLink *slink, List *lefthand, bool isTopQual)
|
||||
|
||||
qualsel = clauselist_selectivity(subquery,
|
||||
plan->qual,
|
||||
0);
|
||||
0, JOIN_INNER);
|
||||
/* Is 10% selectivity a good threshold?? */
|
||||
use_material = qualsel < 0.10;
|
||||
}
|
||||
|
@@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.75 2002/11/24 21:52:14 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/plancat.c,v 1.76 2003/01/28 22:13:35 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -196,8 +196,7 @@ find_secondary_indexes(Oid relationObjectId)
|
||||
* This code executes registered procedures stored in the
|
||||
* operator relation, by calling the function manager.
|
||||
*
|
||||
* varRelid is either 0 or a rangetable index. See clause_selectivity()
|
||||
* for details about its meaning.
|
||||
* See clause_selectivity() for the meaning of the additional parameters.
|
||||
*/
|
||||
Selectivity
|
||||
restriction_selectivity(Query *root,
|
||||
@@ -237,7 +236,8 @@ restriction_selectivity(Query *root,
|
||||
Selectivity
|
||||
join_selectivity(Query *root,
|
||||
Oid operator,
|
||||
List *args)
|
||||
List *args,
|
||||
JoinType jointype)
|
||||
{
|
||||
RegProcedure oprjoin = get_oprjoin(operator);
|
||||
float8 result;
|
||||
@@ -249,10 +249,11 @@ join_selectivity(Query *root,
|
||||
if (!oprjoin)
|
||||
return (Selectivity) 0.5;
|
||||
|
||||
result = DatumGetFloat8(OidFunctionCall3(oprjoin,
|
||||
result = DatumGetFloat8(OidFunctionCall4(oprjoin,
|
||||
PointerGetDatum(root),
|
||||
ObjectIdGetDatum(operator),
|
||||
PointerGetDatum(args)));
|
||||
PointerGetDatum(args),
|
||||
Int16GetDatum(jointype)));
|
||||
|
||||
if (result < 0.0 || result > 1.0)
|
||||
elog(ERROR, "join_selectivity: bad value %f", result);
|
||||
|
Reference in New Issue
Block a user