1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-27 23:21:58 +03:00

IN clauses appearing at top level of WHERE can now be handled as joins.

There are two implementation techniques: the executor understands a new
JOIN_IN jointype, which emits at most one matching row per left-hand row,
or the result of the IN's sub-select can be fed through a DISTINCT filter
and then joined as an ordinary relation.
Along the way, some minor code cleanup in the optimizer; notably, break
out most of the jointree-rearrangement preprocessing in planner.c and
put it in a new file prep/prepjointree.c.
This commit is contained in:
Tom Lane
2003-01-20 18:55:07 +00:00
parent be2b660ecd
commit bdfbfde1b1
47 changed files with 2075 additions and 875 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.75 2003/01/15 19:35:40 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.76 2003/01/20 18:54:50 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -32,13 +32,6 @@ static void match_unsorted_outer(Query *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist, List *mergeclause_list,
JoinType jointype);
#ifdef NOT_USED
static void match_unsorted_inner(Query *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist, List *mergeclause_list,
JoinType jointype);
#endif
static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
RelOptInfo *outerrel, RelOptInfo *innerrel,
List *restrictlist, JoinType jointype);
@ -149,6 +142,8 @@ sort_inner_and_outer(Query *root,
JoinType jointype)
{
bool useallclauses;
Path *outer_path;
Path *inner_path;
List *all_pathkeys;
List *i;
@ -160,6 +155,9 @@ sort_inner_and_outer(Query *root,
{
case JOIN_INNER:
case JOIN_LEFT:
case JOIN_IN:
case JOIN_UNIQUE_OUTER:
case JOIN_UNIQUE_INNER:
useallclauses = false;
break;
case JOIN_RIGHT:
@ -173,6 +171,28 @@ sort_inner_and_outer(Query *root,
break;
}
/*
* We only consider the cheapest-total-cost input paths, since we are
* assuming here that a sort is required. We will consider
* cheapest-startup-cost input paths later, and only if they don't
* need a sort.
*
* If unique-ification is requested, do it and then handle as a plain
* inner join.
*/
outer_path = outerrel->cheapest_total_path;
inner_path = innerrel->cheapest_total_path;
if (jointype == JOIN_UNIQUE_OUTER)
{
outer_path = (Path *) create_unique_path(root, outerrel, outer_path);
jointype = JOIN_INNER;
}
else if (jointype == JOIN_UNIQUE_INNER)
{
inner_path = (Path *) create_unique_path(root, innerrel, inner_path);
jointype = JOIN_INNER;
}
/*
* Each possible ordering of the available mergejoin clauses will
* generate a differently-sorted result path at essentially the same
@ -254,17 +274,14 @@ sort_inner_and_outer(Query *root,
merge_pathkeys = build_join_pathkeys(root, joinrel, outerkeys);
/*
* And now we can make the path. We only consider the cheapest-
* total-cost input paths, since we are assuming here that a sort
* is required. We will consider cheapest-startup-cost input
* paths later, and only if they don't need a sort.
* And now we can make the path.
*/
add_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
jointype,
outerrel->cheapest_total_path,
innerrel->cheapest_total_path,
outer_path,
inner_path,
restrictlist,
merge_pathkeys,
cur_mergeclauses,
@ -314,15 +331,18 @@ match_unsorted_outer(Query *root,
List *mergeclause_list,
JoinType jointype)
{
JoinType save_jointype = jointype;
bool nestjoinOK;
bool useallclauses;
Path *inner_cheapest_startup = innerrel->cheapest_startup_path;
Path *inner_cheapest_total = innerrel->cheapest_total_path;
Path *matpath = NULL;
Path *bestinnerjoin = NULL;
List *i;
/*
* Nestloop only supports inner and left joins. Also, if we are doing
* a right or full join, we must use *all* the mergeclauses as join
* Nestloop only supports inner, left, and IN joins. Also, if we are
* doing a right or full join, we must use *all* the mergeclauses as join
* clauses, else we will not have a valid plan. (Although these two
* flags are currently inverses, keep them separate for clarity and
* possible future changes.)
@ -331,6 +351,9 @@ match_unsorted_outer(Query *root,
{
case JOIN_INNER:
case JOIN_LEFT:
case JOIN_IN:
case JOIN_UNIQUE_OUTER:
case JOIN_UNIQUE_INNER:
nestjoinOK = true;
useallclauses = false;
break;
@ -347,18 +370,28 @@ match_unsorted_outer(Query *root,
break;
}
if (nestjoinOK)
/*
* If we need to unique-ify the inner path, we will consider only
* the cheapest inner.
*/
if (jointype == JOIN_UNIQUE_INNER)
{
inner_cheapest_total = (Path *)
create_unique_path(root, innerrel, inner_cheapest_total);
inner_cheapest_startup = inner_cheapest_total;
jointype = JOIN_INNER;
}
else if (nestjoinOK)
{
/*
* If the cheapest inner path is a join or seqscan, we should consider
* materializing it. (This is a heuristic: we could consider it
* always, but for inner indexscans it's probably a waste of time.)
*/
if (!(IsA(innerrel->cheapest_total_path, IndexPath) ||
IsA(innerrel->cheapest_total_path, TidPath)))
if (!(IsA(inner_cheapest_total, IndexPath) ||
IsA(inner_cheapest_total, TidPath)))
matpath = (Path *)
create_material_path(innerrel,
innerrel->cheapest_total_path);
create_material_path(innerrel, inner_cheapest_total);
/*
* Get the best innerjoin indexpath (if any) for this outer rel. It's
@ -380,6 +413,18 @@ match_unsorted_outer(Query *root,
int num_sortkeys;
int sortkeycnt;
/*
* If we need to unique-ify the outer path, it's pointless to consider
* any but the cheapest outer.
*/
if (save_jointype == JOIN_UNIQUE_OUTER)
{
if (outerpath != outerrel->cheapest_total_path)
continue;
outerpath = (Path *) create_unique_path(root, outerrel, outerpath);
jointype = JOIN_INNER;
}
/*
* The result will have this sort order (even if it is implemented
* as a nestloop, and even if some of the mergeclauses are
@ -402,7 +447,7 @@ match_unsorted_outer(Query *root,
joinrel,
jointype,
outerpath,
innerrel->cheapest_total_path,
inner_cheapest_total,
restrictlist,
merge_pathkeys));
if (matpath != NULL)
@ -414,14 +459,13 @@ match_unsorted_outer(Query *root,
matpath,
restrictlist,
merge_pathkeys));
if (innerrel->cheapest_startup_path !=
innerrel->cheapest_total_path)
if (inner_cheapest_startup != inner_cheapest_total)
add_path(joinrel, (Path *)
create_nestloop_path(root,
joinrel,
jointype,
outerpath,
innerrel->cheapest_startup_path,
inner_cheapest_startup,
restrictlist,
merge_pathkeys));
if (bestinnerjoin != NULL)
@ -435,6 +479,10 @@ match_unsorted_outer(Query *root,
merge_pathkeys));
}
/* Can't do anything else if outer path needs to be unique'd */
if (save_jointype == JOIN_UNIQUE_OUTER)
continue;
/* Look for useful mergeclauses (if any) */
mergeclauses = find_mergeclauses_for_pathkeys(root,
outerpath->pathkeys,
@ -455,27 +503,30 @@ match_unsorted_outer(Query *root,
* Generate a mergejoin on the basis of sorting the cheapest
* inner. Since a sort will be needed, only cheapest total cost
* matters. (But create_mergejoin_path will do the right thing if
* innerrel->cheapest_total_path is already correctly sorted.)
* inner_cheapest_total is already correctly sorted.)
*/
add_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
jointype,
outerpath,
innerrel->cheapest_total_path,
inner_cheapest_total,
restrictlist,
merge_pathkeys,
mergeclauses,
NIL,
innersortkeys));
/* Can't do anything else if inner path needs to be unique'd */
if (save_jointype == JOIN_UNIQUE_INNER)
continue;
/*
* Look for presorted inner paths that satisfy the innersortkey
* list --- or any truncation thereof, if we are allowed to build
* a mergejoin using a subset of the merge clauses. Here, we
* consider both cheap startup cost and cheap total cost. Ignore
* innerrel->cheapest_total_path, since we already made a path
* with it.
* inner_cheapest_total, since we already made a path with it.
*/
num_sortkeys = length(innersortkeys);
if (num_sortkeys > 1 && !useallclauses)
@ -500,7 +551,7 @@ match_unsorted_outer(Query *root,
trialsortkeys,
TOTAL_COST);
if (innerpath != NULL &&
innerpath != innerrel->cheapest_total_path &&
innerpath != inner_cheapest_total &&
(cheapest_total_inner == NULL ||
compare_path_costs(innerpath, cheapest_total_inner,
TOTAL_COST) < 0))
@ -535,7 +586,7 @@ match_unsorted_outer(Query *root,
trialsortkeys,
STARTUP_COST);
if (innerpath != NULL &&
innerpath != innerrel->cheapest_total_path &&
innerpath != inner_cheapest_total &&
(cheapest_startup_inner == NULL ||
compare_path_costs(innerpath, cheapest_startup_inner,
STARTUP_COST) < 0))
@ -584,146 +635,6 @@ match_unsorted_outer(Query *root,
}
}
#ifdef NOT_USED
/*
* match_unsorted_inner
* Generate mergejoin paths that use an explicit sort of the outer path
* with an already-ordered inner path.
*
* 'joinrel' is the join result relation
* 'outerrel' is the outer join relation
* 'innerrel' is the inner join relation
* 'restrictlist' contains all of the RestrictInfo nodes for restriction
* clauses that apply to this join
* 'mergeclause_list' is a list of RestrictInfo nodes for available
* mergejoin clauses in this join
* 'jointype' is the type of join to do
*/
static void
match_unsorted_inner(Query *root,
RelOptInfo *joinrel,
RelOptInfo *outerrel,
RelOptInfo *innerrel,
List *restrictlist,
List *mergeclause_list,
JoinType jointype)
{
bool useallclauses;
List *i;
switch (jointype)
{
case JOIN_INNER:
case JOIN_LEFT:
useallclauses = false;
break;
case JOIN_RIGHT:
case JOIN_FULL:
useallclauses = true;
break;
default:
elog(ERROR, "match_unsorted_inner: unexpected join type %d",
(int) jointype);
useallclauses = false; /* keep compiler quiet */
break;
}
foreach(i, innerrel->pathlist)
{
Path *innerpath = (Path *) lfirst(i);
List *mergeclauses;
List *outersortkeys;
List *merge_pathkeys;
Path *totalouterpath;
Path *startupouterpath;
/* Look for useful mergeclauses (if any) */
mergeclauses = find_mergeclauses_for_pathkeys(root,
innerpath->pathkeys,
mergeclause_list);
/* Done with this inner path if no chance for a mergejoin */
if (mergeclauses == NIL)
continue;
if (useallclauses && length(mergeclauses) != length(mergeclause_list))
continue;
/* Compute the required ordering of the outer path */
outersortkeys = make_pathkeys_for_mergeclauses(root,
mergeclauses,
outerrel);
/*
* Generate a mergejoin on the basis of sorting the cheapest
* outer. Since a sort will be needed, only cheapest total cost
* matters.
*/
merge_pathkeys = build_join_pathkeys(root, joinrel, outersortkeys);
add_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
jointype,
outerrel->cheapest_total_path,
innerpath,
restrictlist,
merge_pathkeys,
mergeclauses,
outersortkeys,
NIL));
/*
* Now generate mergejoins based on already-sufficiently-ordered
* outer paths. There's likely to be some redundancy here with
* paths already generated by merge_unsorted_outer ... but since
* merge_unsorted_outer doesn't consider all permutations of the
* mergeclause list, it may fail to notice that this particular
* innerpath could have been used with this outerpath.
*/
totalouterpath = get_cheapest_path_for_pathkeys(outerrel->pathlist,
outersortkeys,
TOTAL_COST);
if (totalouterpath == NULL)
continue; /* there won't be a startup-cost path
* either */
merge_pathkeys = build_join_pathkeys(root, joinrel,
totalouterpath->pathkeys);
add_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
jointype,
totalouterpath,
innerpath,
restrictlist,
merge_pathkeys,
mergeclauses,
NIL,
NIL));
startupouterpath = get_cheapest_path_for_pathkeys(outerrel->pathlist,
outersortkeys,
STARTUP_COST);
if (startupouterpath != NULL && startupouterpath != totalouterpath)
{
merge_pathkeys = build_join_pathkeys(root, joinrel,
startupouterpath->pathkeys);
add_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
jointype,
startupouterpath,
innerpath,
restrictlist,
merge_pathkeys,
mergeclauses,
NIL,
NIL));
}
}
}
#endif
/*
* hash_inner_and_outer
* Create hashjoin join paths by explicitly hashing both the outer and
@ -749,11 +660,14 @@ hash_inner_and_outer(Query *root,
List *i;
/*
* Hashjoin only supports inner and left joins.
* Hashjoin only supports inner, left, and IN joins.
*/
switch (jointype)
{
case JOIN_INNER:
case JOIN_IN:
case JOIN_UNIQUE_OUTER:
case JOIN_UNIQUE_INNER:
isouterjoin = false;
break;
case JOIN_LEFT:
@ -813,21 +727,40 @@ hash_inner_and_outer(Query *root,
* cheapest-startup-cost outer paths. There's no need to consider
* any but the cheapest-total-cost inner path, however.
*/
Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
Path *cheapest_total_outer = outerrel->cheapest_total_path;
Path *cheapest_total_inner = innerrel->cheapest_total_path;
/* Unique-ify if need be */
if (jointype == JOIN_UNIQUE_OUTER)
{
cheapest_total_outer = (Path *)
create_unique_path(root, outerrel, cheapest_total_outer);
cheapest_startup_outer = cheapest_total_outer;
jointype = JOIN_INNER;
}
else if (jointype == JOIN_UNIQUE_INNER)
{
cheapest_total_inner = (Path *)
create_unique_path(root, innerrel, cheapest_total_inner);
jointype = JOIN_INNER;
}
add_path(joinrel, (Path *)
create_hashjoin_path(root,
joinrel,
jointype,
outerrel->cheapest_total_path,
innerrel->cheapest_total_path,
cheapest_total_outer,
cheapest_total_inner,
restrictlist,
hashclauses));
if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path)
if (cheapest_startup_outer != cheapest_total_outer)
add_path(joinrel, (Path *)
create_hashjoin_path(root,
joinrel,
jointype,
outerrel->cheapest_startup_path,
innerrel->cheapest_total_path,
cheapest_startup_outer,
cheapest_total_inner,
restrictlist,
hashclauses));
}