mirror of
https://github.com/postgres/postgres.git
synced 2025-09-11 00:12:06 +03:00
Upgrade planner and executor to allow multiple hash keys for a hash join,
instead of only one. This should speed up planning (only one hash path to consider for a given pair of relations) as well as allow more effective hashing, when there are multiple hashable joinclauses.
This commit is contained in:
@@ -42,7 +42,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.91 2002/11/21 00:42:19 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.92 2002/11/30 00:08:16 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -819,7 +819,7 @@ cost_mergejoin(Path *path, Query *root,
|
||||
* 'outer_path' is the path for the outer relation
|
||||
* 'inner_path' is the path for the inner relation
|
||||
* 'restrictlist' are the RestrictInfo nodes to be applied at the join
|
||||
* 'hashclauses' is a list of the hash join clause (always a 1-element list)
|
||||
* 'hashclauses' are the RestrictInfo nodes to use as hash clauses
|
||||
* (this should be a subset of the restrictlist)
|
||||
*/
|
||||
void
|
||||
@@ -838,10 +838,8 @@ cost_hashjoin(Path *path, Query *root,
|
||||
double innerbytes = relation_byte_size(inner_path->parent->rows,
|
||||
inner_path->parent->width);
|
||||
long hashtablebytes = SortMem * 1024L;
|
||||
RestrictInfo *restrictinfo;
|
||||
Var *left,
|
||||
*right;
|
||||
Selectivity innerbucketsize;
|
||||
List *hcl;
|
||||
|
||||
if (!enable_hashjoin)
|
||||
startup_cost += disable_cost;
|
||||
@@ -856,43 +854,57 @@ cost_hashjoin(Path *path, Query *root,
|
||||
run_cost += cpu_operator_cost * outer_path->parent->rows;
|
||||
|
||||
/*
|
||||
* Determine bucketsize fraction for inner relation. First we have to
|
||||
* figure out which side of the hashjoin clause is the inner side.
|
||||
* Determine bucketsize fraction for inner relation. We use the
|
||||
* smallest bucketsize estimated for any individual hashclause;
|
||||
* this is undoubtedly conservative.
|
||||
*/
|
||||
Assert(length(hashclauses) == 1);
|
||||
Assert(IsA(lfirst(hashclauses), RestrictInfo));
|
||||
restrictinfo = (RestrictInfo *) lfirst(hashclauses);
|
||||
/* these must be OK, since check_hashjoinable accepted the clause */
|
||||
left = get_leftop(restrictinfo->clause);
|
||||
right = get_rightop(restrictinfo->clause);
|
||||
innerbucketsize = 1.0;
|
||||
foreach(hcl, hashclauses)
|
||||
{
|
||||
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
|
||||
Var *left,
|
||||
*right;
|
||||
Selectivity thisbucketsize;
|
||||
|
||||
/*
|
||||
* Since we tend to visit the same clauses over and over when planning
|
||||
* a large query, we cache the bucketsize estimate in the RestrictInfo
|
||||
* node to avoid repeated lookups of statistics.
|
||||
*/
|
||||
if (VARISRELMEMBER(right->varno, inner_path->parent))
|
||||
{
|
||||
/* righthand side is inner */
|
||||
innerbucketsize = restrictinfo->right_bucketsize;
|
||||
if (innerbucketsize < 0)
|
||||
Assert(IsA(restrictinfo, RestrictInfo));
|
||||
/* these must be OK, since check_hashjoinable accepted the clause */
|
||||
left = get_leftop(restrictinfo->clause);
|
||||
right = get_rightop(restrictinfo->clause);
|
||||
|
||||
/*
|
||||
* First we have to figure out which side of the hashjoin clause is the
|
||||
* inner side.
|
||||
*
|
||||
* Since we tend to visit the same clauses over and over when planning
|
||||
* a large query, we cache the bucketsize estimate in the RestrictInfo
|
||||
* node to avoid repeated lookups of statistics.
|
||||
*/
|
||||
if (VARISRELMEMBER(right->varno, inner_path->parent))
|
||||
{
|
||||
/* not cached yet */
|
||||
innerbucketsize = estimate_hash_bucketsize(root, right);
|
||||
restrictinfo->right_bucketsize = innerbucketsize;
|
||||
/* righthand side is inner */
|
||||
thisbucketsize = restrictinfo->right_bucketsize;
|
||||
if (thisbucketsize < 0)
|
||||
{
|
||||
/* not cached yet */
|
||||
thisbucketsize = estimate_hash_bucketsize(root, right);
|
||||
restrictinfo->right_bucketsize = thisbucketsize;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert(VARISRELMEMBER(left->varno, inner_path->parent));
|
||||
/* lefthand side is inner */
|
||||
innerbucketsize = restrictinfo->left_bucketsize;
|
||||
if (innerbucketsize < 0)
|
||||
else
|
||||
{
|
||||
/* not cached yet */
|
||||
innerbucketsize = estimate_hash_bucketsize(root, left);
|
||||
restrictinfo->left_bucketsize = innerbucketsize;
|
||||
Assert(VARISRELMEMBER(left->varno, inner_path->parent));
|
||||
/* lefthand side is inner */
|
||||
thisbucketsize = restrictinfo->left_bucketsize;
|
||||
if (thisbucketsize < 0)
|
||||
{
|
||||
/* not cached yet */
|
||||
thisbucketsize = estimate_hash_bucketsize(root, left);
|
||||
restrictinfo->left_bucketsize = thisbucketsize;
|
||||
}
|
||||
}
|
||||
|
||||
if (innerbucketsize > thisbucketsize)
|
||||
innerbucketsize = thisbucketsize;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.72 2002/11/24 21:52:14 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.73 2002/11/30 00:08:16 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -701,7 +701,7 @@ match_unsorted_inner(Query *root,
|
||||
/*
|
||||
* hash_inner_and_outer
|
||||
* Create hashjoin join paths by explicitly hashing both the outer and
|
||||
* inner join relations of each available hash clause.
|
||||
* inner keys of each available hash clause.
|
||||
*
|
||||
* 'joinrel' is the join relation
|
||||
* 'outerrel' is the outer join relation
|
||||
@@ -719,6 +719,7 @@ hash_inner_and_outer(Query *root,
|
||||
JoinType jointype)
|
||||
{
|
||||
bool isouterjoin;
|
||||
List *hashclauses;
|
||||
List *i;
|
||||
|
||||
/*
|
||||
@@ -737,20 +738,18 @@ hash_inner_and_outer(Query *root,
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to build only one hashpath for any given pair of outer and
|
||||
* inner relations; all of the hashable clauses will be used as keys.
|
||||
*
|
||||
* Scan the join's restrictinfo list to find hashjoinable clauses that
|
||||
* are usable with this pair of sub-relations. Since we currently
|
||||
* accept only var-op-var clauses as hashjoinable, we need only check
|
||||
* the membership of the vars to determine whether a particular clause
|
||||
* can be used with this pair of sub-relations. This code would need
|
||||
* to be upgraded if we wanted to allow more-complex expressions in
|
||||
* hash joins.
|
||||
* are usable with this pair of sub-relations.
|
||||
*/
|
||||
hashclauses = NIL;
|
||||
foreach(i, restrictlist)
|
||||
{
|
||||
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
|
||||
Var *left,
|
||||
*right;
|
||||
List *hashclauses;
|
||||
|
||||
if (restrictinfo->hashjoinoperator == InvalidOid)
|
||||
continue; /* not hashjoinable */
|
||||
@@ -768,6 +767,12 @@ hash_inner_and_outer(Query *root,
|
||||
|
||||
/*
|
||||
* Check if clause is usable with these input rels.
|
||||
*
|
||||
* Since we currently accept only var-op-var clauses as hashjoinable,
|
||||
* we need only check the membership of the vars to determine whether
|
||||
* a particular clause can be used with this pair of sub-relations.
|
||||
* This code would need to be upgraded if we wanted to allow
|
||||
* more-complex expressions in hash joins.
|
||||
*/
|
||||
if (VARISRELMEMBER(left->varno, outerrel) &&
|
||||
VARISRELMEMBER(right->varno, innerrel))
|
||||
@@ -782,9 +787,12 @@ hash_inner_and_outer(Query *root,
|
||||
else
|
||||
continue; /* no good for these input relations */
|
||||
|
||||
/* always a one-element list of hash clauses */
|
||||
hashclauses = makeList1(restrictinfo);
|
||||
hashclauses = lappend(hashclauses, restrictinfo);
|
||||
}
|
||||
|
||||
/* If we found any usable hashclauses, make a path */
|
||||
if (hashclauses)
|
||||
{
|
||||
/*
|
||||
* We consider both the cheapest-total-cost and
|
||||
* cheapest-startup-cost outer paths. There's no need to consider
|
||||
|
@@ -10,7 +10,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.124 2002/11/21 00:42:19 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.125 2002/11/30 00:08:17 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -91,7 +91,7 @@ static HashJoin *make_hashjoin(List *tlist,
|
||||
List *hashclauses,
|
||||
Plan *lefttree, Plan *righttree,
|
||||
JoinType jointype);
|
||||
static Hash *make_hash(List *tlist, Node *hashkey, Plan *lefttree);
|
||||
static Hash *make_hash(List *tlist, List *hashkeys, Plan *lefttree);
|
||||
static MergeJoin *make_mergejoin(List *tlist,
|
||||
List *joinclauses, List *otherclauses,
|
||||
List *mergeclauses,
|
||||
@@ -910,14 +910,9 @@ create_hashjoin_plan(Query *root,
|
||||
List *hashclauses;
|
||||
HashJoin *join_plan;
|
||||
Hash *hash_plan;
|
||||
Node *innerhashkey;
|
||||
List *innerhashkeys;
|
||||
List *hcl;
|
||||
|
||||
/*
|
||||
* NOTE: there will always be exactly one hashclause in the list
|
||||
* best_path->path_hashclauses (cf. hash_inner_and_outer()). We
|
||||
* represent it as a list anyway, for convenience with routines that
|
||||
* want to work on lists of clauses.
|
||||
*/
|
||||
hashclauses = get_actual_clauses(best_path->path_hashclauses);
|
||||
|
||||
/*
|
||||
@@ -950,13 +945,20 @@ create_hashjoin_plan(Query *root,
|
||||
inner_tlist,
|
||||
(Index) 0));
|
||||
|
||||
/* Now the righthand op of the sole hashclause is the inner hash key. */
|
||||
innerhashkey = (Node *) get_rightop(lfirst(hashclauses));
|
||||
/*
|
||||
* Extract the inner hash keys (right-hand operands of the hashclauses)
|
||||
* to put in the Hash node.
|
||||
*/
|
||||
innerhashkeys = NIL;
|
||||
foreach(hcl, hashclauses)
|
||||
{
|
||||
innerhashkeys = lappend(innerhashkeys, get_rightop(lfirst(hcl)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Build the hash node and hash join node.
|
||||
*/
|
||||
hash_plan = make_hash(inner_tlist, innerhashkey, inner_plan);
|
||||
hash_plan = make_hash(inner_tlist, innerhashkeys, inner_plan);
|
||||
join_plan = make_hashjoin(tlist,
|
||||
joinclauses,
|
||||
otherclauses,
|
||||
@@ -1511,7 +1513,7 @@ make_hashjoin(List *tlist,
|
||||
}
|
||||
|
||||
static Hash *
|
||||
make_hash(List *tlist, Node *hashkey, Plan *lefttree)
|
||||
make_hash(List *tlist, List *hashkeys, Plan *lefttree)
|
||||
{
|
||||
Hash *node = makeNode(Hash);
|
||||
Plan *plan = &node->plan;
|
||||
@@ -1528,7 +1530,7 @@ make_hash(List *tlist, Node *hashkey, Plan *lefttree)
|
||||
plan->qual = NULL;
|
||||
plan->lefttree = lefttree;
|
||||
plan->righttree = NULL;
|
||||
node->hashkey = hashkey;
|
||||
node->hashkeys = hashkeys;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
@@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.56 2002/11/26 03:01:58 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.57 2002/11/30 00:08:18 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -677,7 +677,7 @@ SS_finalize_plan(Plan *plan, List *rtable)
|
||||
break;
|
||||
|
||||
case T_Hash:
|
||||
finalize_primnode(((Hash *) plan)->hashkey,
|
||||
finalize_primnode((Node *) ((Hash *) plan)->hashkeys,
|
||||
&results);
|
||||
break;
|
||||
|
||||
|
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.80 2002/11/24 21:52:14 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.81 2002/11/30 00:08:20 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -616,7 +616,7 @@ create_mergejoin_path(Query *root,
|
||||
* 'outer_path' is the cheapest outer path
|
||||
* 'inner_path' is the cheapest inner path
|
||||
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
|
||||
* 'hashclauses' is a list of the hash join clause (always a 1-element list)
|
||||
* 'hashclauses' are the RestrictInfo nodes to use as hash clauses
|
||||
* (this should be a subset of the restrict_clauses list)
|
||||
*/
|
||||
HashPath *
|
||||
|
Reference in New Issue
Block a user