1
0
mirror of https://github.com/postgres/postgres.git synced 2025-09-11 00:12:06 +03:00

Upgrade planner and executor to allow multiple hash keys for a hash join,

instead of only one.  This should speed up planning (only one hash path
to consider for a given pair of relations) as well as allow more effective
hashing, when there are multiple hashable joinclauses.
This commit is contained in:
Tom Lane
2002-11-30 00:08:22 +00:00
parent f68f11928d
commit ddb2d78de0
14 changed files with 182 additions and 133 deletions

View File

@@ -42,7 +42,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.91 2002/11/21 00:42:19 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.92 2002/11/30 00:08:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -819,7 +819,7 @@ cost_mergejoin(Path *path, Query *root,
* 'outer_path' is the path for the outer relation
* 'inner_path' is the path for the inner relation
* 'restrictlist' are the RestrictInfo nodes to be applied at the join
* 'hashclauses' is a list of the hash join clause (always a 1-element list)
* 'hashclauses' are the RestrictInfo nodes to use as hash clauses
* (this should be a subset of the restrictlist)
*/
void
@@ -838,10 +838,8 @@ cost_hashjoin(Path *path, Query *root,
double innerbytes = relation_byte_size(inner_path->parent->rows,
inner_path->parent->width);
long hashtablebytes = SortMem * 1024L;
RestrictInfo *restrictinfo;
Var *left,
*right;
Selectivity innerbucketsize;
List *hcl;
if (!enable_hashjoin)
startup_cost += disable_cost;
@@ -856,43 +854,57 @@ cost_hashjoin(Path *path, Query *root,
run_cost += cpu_operator_cost * outer_path->parent->rows;
/*
* Determine bucketsize fraction for inner relation. First we have to
* figure out which side of the hashjoin clause is the inner side.
* Determine bucketsize fraction for inner relation. We use the
* smallest bucketsize estimated for any individual hashclause;
* this is undoubtedly conservative.
*/
Assert(length(hashclauses) == 1);
Assert(IsA(lfirst(hashclauses), RestrictInfo));
restrictinfo = (RestrictInfo *) lfirst(hashclauses);
/* these must be OK, since check_hashjoinable accepted the clause */
left = get_leftop(restrictinfo->clause);
right = get_rightop(restrictinfo->clause);
innerbucketsize = 1.0;
foreach(hcl, hashclauses)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
Var *left,
*right;
Selectivity thisbucketsize;
/*
* Since we tend to visit the same clauses over and over when planning
* a large query, we cache the bucketsize estimate in the RestrictInfo
* node to avoid repeated lookups of statistics.
*/
if (VARISRELMEMBER(right->varno, inner_path->parent))
{
/* righthand side is inner */
innerbucketsize = restrictinfo->right_bucketsize;
if (innerbucketsize < 0)
Assert(IsA(restrictinfo, RestrictInfo));
/* these must be OK, since check_hashjoinable accepted the clause */
left = get_leftop(restrictinfo->clause);
right = get_rightop(restrictinfo->clause);
/*
* First we have to figure out which side of the hashjoin clause is the
* inner side.
*
* Since we tend to visit the same clauses over and over when planning
* a large query, we cache the bucketsize estimate in the RestrictInfo
* node to avoid repeated lookups of statistics.
*/
if (VARISRELMEMBER(right->varno, inner_path->parent))
{
/* not cached yet */
innerbucketsize = estimate_hash_bucketsize(root, right);
restrictinfo->right_bucketsize = innerbucketsize;
/* righthand side is inner */
thisbucketsize = restrictinfo->right_bucketsize;
if (thisbucketsize < 0)
{
/* not cached yet */
thisbucketsize = estimate_hash_bucketsize(root, right);
restrictinfo->right_bucketsize = thisbucketsize;
}
}
}
else
{
Assert(VARISRELMEMBER(left->varno, inner_path->parent));
/* lefthand side is inner */
innerbucketsize = restrictinfo->left_bucketsize;
if (innerbucketsize < 0)
else
{
/* not cached yet */
innerbucketsize = estimate_hash_bucketsize(root, left);
restrictinfo->left_bucketsize = innerbucketsize;
Assert(VARISRELMEMBER(left->varno, inner_path->parent));
/* lefthand side is inner */
thisbucketsize = restrictinfo->left_bucketsize;
if (thisbucketsize < 0)
{
/* not cached yet */
thisbucketsize = estimate_hash_bucketsize(root, left);
restrictinfo->left_bucketsize = thisbucketsize;
}
}
if (innerbucketsize > thisbucketsize)
innerbucketsize = thisbucketsize;
}
/*

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.72 2002/11/24 21:52:14 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.73 2002/11/30 00:08:16 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -701,7 +701,7 @@ match_unsorted_inner(Query *root,
/*
* hash_inner_and_outer
* Create hashjoin join paths by explicitly hashing both the outer and
* inner join relations of each available hash clause.
* inner keys of each available hash clause.
*
* 'joinrel' is the join relation
* 'outerrel' is the outer join relation
@@ -719,6 +719,7 @@ hash_inner_and_outer(Query *root,
JoinType jointype)
{
bool isouterjoin;
List *hashclauses;
List *i;
/*
@@ -737,20 +738,18 @@ hash_inner_and_outer(Query *root,
}
/*
* We need to build only one hashpath for any given pair of outer and
* inner relations; all of the hashable clauses will be used as keys.
*
* Scan the join's restrictinfo list to find hashjoinable clauses that
* are usable with this pair of sub-relations. Since we currently
* accept only var-op-var clauses as hashjoinable, we need only check
* the membership of the vars to determine whether a particular clause
* can be used with this pair of sub-relations. This code would need
* to be upgraded if we wanted to allow more-complex expressions in
* hash joins.
* are usable with this pair of sub-relations.
*/
hashclauses = NIL;
foreach(i, restrictlist)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
Var *left,
*right;
List *hashclauses;
if (restrictinfo->hashjoinoperator == InvalidOid)
continue; /* not hashjoinable */
@@ -768,6 +767,12 @@ hash_inner_and_outer(Query *root,
/*
* Check if clause is usable with these input rels.
*
* Since we currently accept only var-op-var clauses as hashjoinable,
* we need only check the membership of the vars to determine whether
* a particular clause can be used with this pair of sub-relations.
* This code would need to be upgraded if we wanted to allow
* more-complex expressions in hash joins.
*/
if (VARISRELMEMBER(left->varno, outerrel) &&
VARISRELMEMBER(right->varno, innerrel))
@@ -782,9 +787,12 @@ hash_inner_and_outer(Query *root,
else
continue; /* no good for these input relations */
/* always a one-element list of hash clauses */
hashclauses = makeList1(restrictinfo);
hashclauses = lappend(hashclauses, restrictinfo);
}
/* If we found any usable hashclauses, make a path */
if (hashclauses)
{
/*
* We consider both the cheapest-total-cost and
* cheapest-startup-cost outer paths. There's no need to consider

View File

@@ -10,7 +10,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.124 2002/11/21 00:42:19 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.125 2002/11/30 00:08:17 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -91,7 +91,7 @@ static HashJoin *make_hashjoin(List *tlist,
List *hashclauses,
Plan *lefttree, Plan *righttree,
JoinType jointype);
static Hash *make_hash(List *tlist, Node *hashkey, Plan *lefttree);
static Hash *make_hash(List *tlist, List *hashkeys, Plan *lefttree);
static MergeJoin *make_mergejoin(List *tlist,
List *joinclauses, List *otherclauses,
List *mergeclauses,
@@ -910,14 +910,9 @@ create_hashjoin_plan(Query *root,
List *hashclauses;
HashJoin *join_plan;
Hash *hash_plan;
Node *innerhashkey;
List *innerhashkeys;
List *hcl;
/*
* NOTE: there will always be exactly one hashclause in the list
* best_path->path_hashclauses (cf. hash_inner_and_outer()). We
* represent it as a list anyway, for convenience with routines that
* want to work on lists of clauses.
*/
hashclauses = get_actual_clauses(best_path->path_hashclauses);
/*
@@ -950,13 +945,20 @@ create_hashjoin_plan(Query *root,
inner_tlist,
(Index) 0));
/* Now the righthand op of the sole hashclause is the inner hash key. */
innerhashkey = (Node *) get_rightop(lfirst(hashclauses));
/*
* Extract the inner hash keys (right-hand operands of the hashclauses)
* to put in the Hash node.
*/
innerhashkeys = NIL;
foreach(hcl, hashclauses)
{
innerhashkeys = lappend(innerhashkeys, get_rightop(lfirst(hcl)));
}
/*
* Build the hash node and hash join node.
*/
hash_plan = make_hash(inner_tlist, innerhashkey, inner_plan);
hash_plan = make_hash(inner_tlist, innerhashkeys, inner_plan);
join_plan = make_hashjoin(tlist,
joinclauses,
otherclauses,
@@ -1511,7 +1513,7 @@ make_hashjoin(List *tlist,
}
static Hash *
make_hash(List *tlist, Node *hashkey, Plan *lefttree)
make_hash(List *tlist, List *hashkeys, Plan *lefttree)
{
Hash *node = makeNode(Hash);
Plan *plan = &node->plan;
@@ -1528,7 +1530,7 @@ make_hash(List *tlist, Node *hashkey, Plan *lefttree)
plan->qual = NULL;
plan->lefttree = lefttree;
plan->righttree = NULL;
node->hashkey = hashkey;
node->hashkeys = hashkeys;
return node;
}

View File

@@ -7,7 +7,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.56 2002/11/26 03:01:58 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/subselect.c,v 1.57 2002/11/30 00:08:18 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -677,7 +677,7 @@ SS_finalize_plan(Plan *plan, List *rtable)
break;
case T_Hash:
finalize_primnode(((Hash *) plan)->hashkey,
finalize_primnode((Node *) ((Hash *) plan)->hashkeys,
&results);
break;

View File

@@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.80 2002/11/24 21:52:14 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.81 2002/11/30 00:08:20 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -616,7 +616,7 @@ create_mergejoin_path(Query *root,
* 'outer_path' is the cheapest outer path
* 'inner_path' is the cheapest inner path
* 'restrict_clauses' are the RestrictInfo nodes to apply at the join
* 'hashclauses' is a list of the hash join clause (always a 1-element list)
* 'hashclauses' are the RestrictInfo nodes to use as hash clauses
* (this should be a subset of the restrict_clauses list)
*/
HashPath *