mirror of
https://github.com/postgres/postgres.git
synced 2025-11-13 16:22:44 +03:00
IN clauses appearing at top level of WHERE can now be handled as joins.
There are two implementation techniques: the executor understands a new JOIN_IN jointype, which emits at most one matching row per left-hand row, or the result of the IN's sub-select can be fed through a DISTINCT filter and then joined as an ordinary relation. Along the way, some minor code cleanup in the optimizer; notably, break out most of the jointree-rearrangement preprocessing in planner.c and put it in a new file prep/prepjointree.c.
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.93 2002/11/30 05:21:02 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.94 2003/01/20 18:54:49 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -750,6 +750,10 @@ print_path(Query *root, Path *path, int indent)
|
||||
ptype = "Material";
|
||||
subpath = ((MaterialPath *) path)->subpath;
|
||||
break;
|
||||
case T_UniquePath:
|
||||
ptype = "Unique";
|
||||
subpath = ((UniquePath *) path)->subpath;
|
||||
break;
|
||||
case T_NestPath:
|
||||
ptype = "NestLoop";
|
||||
join = true;
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.100 2003/01/15 19:35:39 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.101 2003/01/20 18:54:49 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -1024,12 +1024,17 @@ cost_hashjoin(Path *path, Query *root,
|
||||
* Bias against putting larger relation on inside. We don't want an
|
||||
* absolute prohibition, though, since larger relation might have
|
||||
* better bucketsize --- and we can't trust the size estimates
|
||||
* unreservedly, anyway. Instead, inflate the startup cost by the
|
||||
* unreservedly, anyway. Instead, inflate the run cost by the
|
||||
* square root of the size ratio. (Why square root? No real good
|
||||
* reason, but it seems reasonable...)
|
||||
*
|
||||
* Note: before 7.4 we implemented this by inflating startup cost;
|
||||
* but if there's a disable_cost component in the input paths'
|
||||
* startup cost, that unfairly penalizes the hash. Probably it'd
|
||||
* be better to keep track of disable penalty separately from cost.
|
||||
*/
|
||||
if (innerbytes > outerbytes && outerbytes > 0)
|
||||
startup_cost *= sqrt(innerbytes / outerbytes);
|
||||
run_cost *= sqrt(innerbytes / outerbytes);
|
||||
|
||||
path->startup_cost = startup_cost;
|
||||
path->total_cost = startup_cost + run_cost;
|
||||
@@ -1492,22 +1497,26 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
|
||||
JoinType jointype,
|
||||
List *restrictlist)
|
||||
{
|
||||
Selectivity selec;
|
||||
double temp;
|
||||
|
||||
/* Start with the Cartesian product */
|
||||
temp = outer_rel->rows * inner_rel->rows;
|
||||
UniquePath *upath;
|
||||
|
||||
/*
|
||||
* Apply join restrictivity. Note that we are only considering
|
||||
* Compute joinclause selectivity. Note that we are only considering
|
||||
* clauses that become restriction clauses at this join level; we are
|
||||
* not double-counting them because they were not considered in
|
||||
* estimating the sizes of the component rels.
|
||||
*/
|
||||
temp *= restrictlist_selectivity(root,
|
||||
selec = restrictlist_selectivity(root,
|
||||
restrictlist,
|
||||
0);
|
||||
|
||||
/*
|
||||
* Normally, we multiply size of Cartesian product by selectivity.
|
||||
* But for JOIN_IN, we just multiply the lefthand size by the selectivity
|
||||
* (is that really right?). For UNIQUE_OUTER or UNIQUE_INNER, use
|
||||
* the estimated number of distinct rows (again, is that right?)
|
||||
*
|
||||
* If we are doing an outer join, take that into account: the output
|
||||
* must be at least as large as the non-nullable input. (Is there any
|
||||
* chance of being even smarter?)
|
||||
@@ -1515,24 +1524,45 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
|
||||
switch (jointype)
|
||||
{
|
||||
case JOIN_INNER:
|
||||
temp = outer_rel->rows * inner_rel->rows * selec;
|
||||
break;
|
||||
case JOIN_LEFT:
|
||||
temp = outer_rel->rows * inner_rel->rows * selec;
|
||||
if (temp < outer_rel->rows)
|
||||
temp = outer_rel->rows;
|
||||
break;
|
||||
case JOIN_RIGHT:
|
||||
temp = outer_rel->rows * inner_rel->rows * selec;
|
||||
if (temp < inner_rel->rows)
|
||||
temp = inner_rel->rows;
|
||||
break;
|
||||
case JOIN_FULL:
|
||||
temp = outer_rel->rows * inner_rel->rows * selec;
|
||||
if (temp < outer_rel->rows)
|
||||
temp = outer_rel->rows;
|
||||
if (temp < inner_rel->rows)
|
||||
temp = inner_rel->rows;
|
||||
break;
|
||||
case JOIN_IN:
|
||||
temp = outer_rel->rows * selec;
|
||||
break;
|
||||
case JOIN_REVERSE_IN:
|
||||
temp = inner_rel->rows * selec;
|
||||
break;
|
||||
case JOIN_UNIQUE_OUTER:
|
||||
upath = create_unique_path(root, outer_rel,
|
||||
outer_rel->cheapest_total_path);
|
||||
temp = upath->rows * inner_rel->rows * selec;
|
||||
break;
|
||||
case JOIN_UNIQUE_INNER:
|
||||
upath = create_unique_path(root, inner_rel,
|
||||
inner_rel->cheapest_total_path);
|
||||
temp = outer_rel->rows * upath->rows * selec;
|
||||
break;
|
||||
default:
|
||||
elog(ERROR, "set_joinrel_size_estimates: unsupported join type %d",
|
||||
(int) jointype);
|
||||
temp = 0; /* keep compiler quiet */
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.131 2003/01/15 19:35:39 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.132 2003/01/20 18:54:49 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -1401,11 +1401,13 @@ best_inner_indexscan(Query *root, RelOptInfo *rel,
|
||||
MemoryContext oldcontext;
|
||||
|
||||
/*
|
||||
* Nestloop only supports inner and left joins.
|
||||
* Nestloop only supports inner, left, and IN joins.
|
||||
*/
|
||||
switch (jointype)
|
||||
{
|
||||
case JOIN_INNER:
|
||||
case JOIN_IN:
|
||||
case JOIN_UNIQUE_OUTER:
|
||||
isouterjoin = false;
|
||||
break;
|
||||
case JOIN_LEFT:
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.75 2003/01/15 19:35:40 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.76 2003/01/20 18:54:50 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -32,13 +32,6 @@ static void match_unsorted_outer(Query *root, RelOptInfo *joinrel,
|
||||
RelOptInfo *outerrel, RelOptInfo *innerrel,
|
||||
List *restrictlist, List *mergeclause_list,
|
||||
JoinType jointype);
|
||||
|
||||
#ifdef NOT_USED
|
||||
static void match_unsorted_inner(Query *root, RelOptInfo *joinrel,
|
||||
RelOptInfo *outerrel, RelOptInfo *innerrel,
|
||||
List *restrictlist, List *mergeclause_list,
|
||||
JoinType jointype);
|
||||
#endif
|
||||
static void hash_inner_and_outer(Query *root, RelOptInfo *joinrel,
|
||||
RelOptInfo *outerrel, RelOptInfo *innerrel,
|
||||
List *restrictlist, JoinType jointype);
|
||||
@@ -149,6 +142,8 @@ sort_inner_and_outer(Query *root,
|
||||
JoinType jointype)
|
||||
{
|
||||
bool useallclauses;
|
||||
Path *outer_path;
|
||||
Path *inner_path;
|
||||
List *all_pathkeys;
|
||||
List *i;
|
||||
|
||||
@@ -160,6 +155,9 @@ sort_inner_and_outer(Query *root,
|
||||
{
|
||||
case JOIN_INNER:
|
||||
case JOIN_LEFT:
|
||||
case JOIN_IN:
|
||||
case JOIN_UNIQUE_OUTER:
|
||||
case JOIN_UNIQUE_INNER:
|
||||
useallclauses = false;
|
||||
break;
|
||||
case JOIN_RIGHT:
|
||||
@@ -173,6 +171,28 @@ sort_inner_and_outer(Query *root,
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* We only consider the cheapest-total-cost input paths, since we are
|
||||
* assuming here that a sort is required. We will consider
|
||||
* cheapest-startup-cost input paths later, and only if they don't
|
||||
* need a sort.
|
||||
*
|
||||
* If unique-ification is requested, do it and then handle as a plain
|
||||
* inner join.
|
||||
*/
|
||||
outer_path = outerrel->cheapest_total_path;
|
||||
inner_path = innerrel->cheapest_total_path;
|
||||
if (jointype == JOIN_UNIQUE_OUTER)
|
||||
{
|
||||
outer_path = (Path *) create_unique_path(root, outerrel, outer_path);
|
||||
jointype = JOIN_INNER;
|
||||
}
|
||||
else if (jointype == JOIN_UNIQUE_INNER)
|
||||
{
|
||||
inner_path = (Path *) create_unique_path(root, innerrel, inner_path);
|
||||
jointype = JOIN_INNER;
|
||||
}
|
||||
|
||||
/*
|
||||
* Each possible ordering of the available mergejoin clauses will
|
||||
* generate a differently-sorted result path at essentially the same
|
||||
@@ -254,17 +274,14 @@ sort_inner_and_outer(Query *root,
|
||||
merge_pathkeys = build_join_pathkeys(root, joinrel, outerkeys);
|
||||
|
||||
/*
|
||||
* And now we can make the path. We only consider the cheapest-
|
||||
* total-cost input paths, since we are assuming here that a sort
|
||||
* is required. We will consider cheapest-startup-cost input
|
||||
* paths later, and only if they don't need a sort.
|
||||
* And now we can make the path.
|
||||
*/
|
||||
add_path(joinrel, (Path *)
|
||||
create_mergejoin_path(root,
|
||||
joinrel,
|
||||
jointype,
|
||||
outerrel->cheapest_total_path,
|
||||
innerrel->cheapest_total_path,
|
||||
outer_path,
|
||||
inner_path,
|
||||
restrictlist,
|
||||
merge_pathkeys,
|
||||
cur_mergeclauses,
|
||||
@@ -314,15 +331,18 @@ match_unsorted_outer(Query *root,
|
||||
List *mergeclause_list,
|
||||
JoinType jointype)
|
||||
{
|
||||
JoinType save_jointype = jointype;
|
||||
bool nestjoinOK;
|
||||
bool useallclauses;
|
||||
Path *inner_cheapest_startup = innerrel->cheapest_startup_path;
|
||||
Path *inner_cheapest_total = innerrel->cheapest_total_path;
|
||||
Path *matpath = NULL;
|
||||
Path *bestinnerjoin = NULL;
|
||||
List *i;
|
||||
|
||||
/*
|
||||
* Nestloop only supports inner and left joins. Also, if we are doing
|
||||
* a right or full join, we must use *all* the mergeclauses as join
|
||||
* Nestloop only supports inner, left, and IN joins. Also, if we are
|
||||
* doing a right or full join, we must use *all* the mergeclauses as join
|
||||
* clauses, else we will not have a valid plan. (Although these two
|
||||
* flags are currently inverses, keep them separate for clarity and
|
||||
* possible future changes.)
|
||||
@@ -331,6 +351,9 @@ match_unsorted_outer(Query *root,
|
||||
{
|
||||
case JOIN_INNER:
|
||||
case JOIN_LEFT:
|
||||
case JOIN_IN:
|
||||
case JOIN_UNIQUE_OUTER:
|
||||
case JOIN_UNIQUE_INNER:
|
||||
nestjoinOK = true;
|
||||
useallclauses = false;
|
||||
break;
|
||||
@@ -347,18 +370,28 @@ match_unsorted_outer(Query *root,
|
||||
break;
|
||||
}
|
||||
|
||||
if (nestjoinOK)
|
||||
/*
|
||||
* If we need to unique-ify the inner path, we will consider only
|
||||
* the cheapest inner.
|
||||
*/
|
||||
if (jointype == JOIN_UNIQUE_INNER)
|
||||
{
|
||||
inner_cheapest_total = (Path *)
|
||||
create_unique_path(root, innerrel, inner_cheapest_total);
|
||||
inner_cheapest_startup = inner_cheapest_total;
|
||||
jointype = JOIN_INNER;
|
||||
}
|
||||
else if (nestjoinOK)
|
||||
{
|
||||
/*
|
||||
* If the cheapest inner path is a join or seqscan, we should consider
|
||||
* materializing it. (This is a heuristic: we could consider it
|
||||
* always, but for inner indexscans it's probably a waste of time.)
|
||||
*/
|
||||
if (!(IsA(innerrel->cheapest_total_path, IndexPath) ||
|
||||
IsA(innerrel->cheapest_total_path, TidPath)))
|
||||
if (!(IsA(inner_cheapest_total, IndexPath) ||
|
||||
IsA(inner_cheapest_total, TidPath)))
|
||||
matpath = (Path *)
|
||||
create_material_path(innerrel,
|
||||
innerrel->cheapest_total_path);
|
||||
create_material_path(innerrel, inner_cheapest_total);
|
||||
|
||||
/*
|
||||
* Get the best innerjoin indexpath (if any) for this outer rel. It's
|
||||
@@ -380,6 +413,18 @@ match_unsorted_outer(Query *root,
|
||||
int num_sortkeys;
|
||||
int sortkeycnt;
|
||||
|
||||
/*
|
||||
* If we need to unique-ify the outer path, it's pointless to consider
|
||||
* any but the cheapest outer.
|
||||
*/
|
||||
if (save_jointype == JOIN_UNIQUE_OUTER)
|
||||
{
|
||||
if (outerpath != outerrel->cheapest_total_path)
|
||||
continue;
|
||||
outerpath = (Path *) create_unique_path(root, outerrel, outerpath);
|
||||
jointype = JOIN_INNER;
|
||||
}
|
||||
|
||||
/*
|
||||
* The result will have this sort order (even if it is implemented
|
||||
* as a nestloop, and even if some of the mergeclauses are
|
||||
@@ -402,7 +447,7 @@ match_unsorted_outer(Query *root,
|
||||
joinrel,
|
||||
jointype,
|
||||
outerpath,
|
||||
innerrel->cheapest_total_path,
|
||||
inner_cheapest_total,
|
||||
restrictlist,
|
||||
merge_pathkeys));
|
||||
if (matpath != NULL)
|
||||
@@ -414,14 +459,13 @@ match_unsorted_outer(Query *root,
|
||||
matpath,
|
||||
restrictlist,
|
||||
merge_pathkeys));
|
||||
if (innerrel->cheapest_startup_path !=
|
||||
innerrel->cheapest_total_path)
|
||||
if (inner_cheapest_startup != inner_cheapest_total)
|
||||
add_path(joinrel, (Path *)
|
||||
create_nestloop_path(root,
|
||||
joinrel,
|
||||
jointype,
|
||||
outerpath,
|
||||
innerrel->cheapest_startup_path,
|
||||
inner_cheapest_startup,
|
||||
restrictlist,
|
||||
merge_pathkeys));
|
||||
if (bestinnerjoin != NULL)
|
||||
@@ -435,6 +479,10 @@ match_unsorted_outer(Query *root,
|
||||
merge_pathkeys));
|
||||
}
|
||||
|
||||
/* Can't do anything else if outer path needs to be unique'd */
|
||||
if (save_jointype == JOIN_UNIQUE_OUTER)
|
||||
continue;
|
||||
|
||||
/* Look for useful mergeclauses (if any) */
|
||||
mergeclauses = find_mergeclauses_for_pathkeys(root,
|
||||
outerpath->pathkeys,
|
||||
@@ -455,27 +503,30 @@ match_unsorted_outer(Query *root,
|
||||
* Generate a mergejoin on the basis of sorting the cheapest
|
||||
* inner. Since a sort will be needed, only cheapest total cost
|
||||
* matters. (But create_mergejoin_path will do the right thing if
|
||||
* innerrel->cheapest_total_path is already correctly sorted.)
|
||||
* inner_cheapest_total is already correctly sorted.)
|
||||
*/
|
||||
add_path(joinrel, (Path *)
|
||||
create_mergejoin_path(root,
|
||||
joinrel,
|
||||
jointype,
|
||||
outerpath,
|
||||
innerrel->cheapest_total_path,
|
||||
inner_cheapest_total,
|
||||
restrictlist,
|
||||
merge_pathkeys,
|
||||
mergeclauses,
|
||||
NIL,
|
||||
innersortkeys));
|
||||
|
||||
/* Can't do anything else if inner path needs to be unique'd */
|
||||
if (save_jointype == JOIN_UNIQUE_INNER)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Look for presorted inner paths that satisfy the innersortkey
|
||||
* list --- or any truncation thereof, if we are allowed to build
|
||||
* a mergejoin using a subset of the merge clauses. Here, we
|
||||
* consider both cheap startup cost and cheap total cost. Ignore
|
||||
* innerrel->cheapest_total_path, since we already made a path
|
||||
* with it.
|
||||
* inner_cheapest_total, since we already made a path with it.
|
||||
*/
|
||||
num_sortkeys = length(innersortkeys);
|
||||
if (num_sortkeys > 1 && !useallclauses)
|
||||
@@ -500,7 +551,7 @@ match_unsorted_outer(Query *root,
|
||||
trialsortkeys,
|
||||
TOTAL_COST);
|
||||
if (innerpath != NULL &&
|
||||
innerpath != innerrel->cheapest_total_path &&
|
||||
innerpath != inner_cheapest_total &&
|
||||
(cheapest_total_inner == NULL ||
|
||||
compare_path_costs(innerpath, cheapest_total_inner,
|
||||
TOTAL_COST) < 0))
|
||||
@@ -535,7 +586,7 @@ match_unsorted_outer(Query *root,
|
||||
trialsortkeys,
|
||||
STARTUP_COST);
|
||||
if (innerpath != NULL &&
|
||||
innerpath != innerrel->cheapest_total_path &&
|
||||
innerpath != inner_cheapest_total &&
|
||||
(cheapest_startup_inner == NULL ||
|
||||
compare_path_costs(innerpath, cheapest_startup_inner,
|
||||
STARTUP_COST) < 0))
|
||||
@@ -584,146 +635,6 @@ match_unsorted_outer(Query *root,
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef NOT_USED
|
||||
|
||||
/*
|
||||
* match_unsorted_inner
|
||||
* Generate mergejoin paths that use an explicit sort of the outer path
|
||||
* with an already-ordered inner path.
|
||||
*
|
||||
* 'joinrel' is the join result relation
|
||||
* 'outerrel' is the outer join relation
|
||||
* 'innerrel' is the inner join relation
|
||||
* 'restrictlist' contains all of the RestrictInfo nodes for restriction
|
||||
* clauses that apply to this join
|
||||
* 'mergeclause_list' is a list of RestrictInfo nodes for available
|
||||
* mergejoin clauses in this join
|
||||
* 'jointype' is the type of join to do
|
||||
*/
|
||||
static void
|
||||
match_unsorted_inner(Query *root,
|
||||
RelOptInfo *joinrel,
|
||||
RelOptInfo *outerrel,
|
||||
RelOptInfo *innerrel,
|
||||
List *restrictlist,
|
||||
List *mergeclause_list,
|
||||
JoinType jointype)
|
||||
{
|
||||
bool useallclauses;
|
||||
List *i;
|
||||
|
||||
switch (jointype)
|
||||
{
|
||||
case JOIN_INNER:
|
||||
case JOIN_LEFT:
|
||||
useallclauses = false;
|
||||
break;
|
||||
case JOIN_RIGHT:
|
||||
case JOIN_FULL:
|
||||
useallclauses = true;
|
||||
break;
|
||||
default:
|
||||
elog(ERROR, "match_unsorted_inner: unexpected join type %d",
|
||||
(int) jointype);
|
||||
useallclauses = false; /* keep compiler quiet */
|
||||
break;
|
||||
}
|
||||
|
||||
foreach(i, innerrel->pathlist)
|
||||
{
|
||||
Path *innerpath = (Path *) lfirst(i);
|
||||
List *mergeclauses;
|
||||
List *outersortkeys;
|
||||
List *merge_pathkeys;
|
||||
Path *totalouterpath;
|
||||
Path *startupouterpath;
|
||||
|
||||
/* Look for useful mergeclauses (if any) */
|
||||
mergeclauses = find_mergeclauses_for_pathkeys(root,
|
||||
innerpath->pathkeys,
|
||||
mergeclause_list);
|
||||
|
||||
/* Done with this inner path if no chance for a mergejoin */
|
||||
if (mergeclauses == NIL)
|
||||
continue;
|
||||
if (useallclauses && length(mergeclauses) != length(mergeclause_list))
|
||||
continue;
|
||||
|
||||
/* Compute the required ordering of the outer path */
|
||||
outersortkeys = make_pathkeys_for_mergeclauses(root,
|
||||
mergeclauses,
|
||||
outerrel);
|
||||
|
||||
/*
|
||||
* Generate a mergejoin on the basis of sorting the cheapest
|
||||
* outer. Since a sort will be needed, only cheapest total cost
|
||||
* matters.
|
||||
*/
|
||||
merge_pathkeys = build_join_pathkeys(root, joinrel, outersortkeys);
|
||||
add_path(joinrel, (Path *)
|
||||
create_mergejoin_path(root,
|
||||
joinrel,
|
||||
jointype,
|
||||
outerrel->cheapest_total_path,
|
||||
innerpath,
|
||||
restrictlist,
|
||||
merge_pathkeys,
|
||||
mergeclauses,
|
||||
outersortkeys,
|
||||
NIL));
|
||||
|
||||
/*
|
||||
* Now generate mergejoins based on already-sufficiently-ordered
|
||||
* outer paths. There's likely to be some redundancy here with
|
||||
* paths already generated by merge_unsorted_outer ... but since
|
||||
* merge_unsorted_outer doesn't consider all permutations of the
|
||||
* mergeclause list, it may fail to notice that this particular
|
||||
* innerpath could have been used with this outerpath.
|
||||
*/
|
||||
totalouterpath = get_cheapest_path_for_pathkeys(outerrel->pathlist,
|
||||
outersortkeys,
|
||||
TOTAL_COST);
|
||||
if (totalouterpath == NULL)
|
||||
continue; /* there won't be a startup-cost path
|
||||
* either */
|
||||
|
||||
merge_pathkeys = build_join_pathkeys(root, joinrel,
|
||||
totalouterpath->pathkeys);
|
||||
add_path(joinrel, (Path *)
|
||||
create_mergejoin_path(root,
|
||||
joinrel,
|
||||
jointype,
|
||||
totalouterpath,
|
||||
innerpath,
|
||||
restrictlist,
|
||||
merge_pathkeys,
|
||||
mergeclauses,
|
||||
NIL,
|
||||
NIL));
|
||||
|
||||
startupouterpath = get_cheapest_path_for_pathkeys(outerrel->pathlist,
|
||||
outersortkeys,
|
||||
STARTUP_COST);
|
||||
if (startupouterpath != NULL && startupouterpath != totalouterpath)
|
||||
{
|
||||
merge_pathkeys = build_join_pathkeys(root, joinrel,
|
||||
startupouterpath->pathkeys);
|
||||
add_path(joinrel, (Path *)
|
||||
create_mergejoin_path(root,
|
||||
joinrel,
|
||||
jointype,
|
||||
startupouterpath,
|
||||
innerpath,
|
||||
restrictlist,
|
||||
merge_pathkeys,
|
||||
mergeclauses,
|
||||
NIL,
|
||||
NIL));
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* hash_inner_and_outer
|
||||
* Create hashjoin join paths by explicitly hashing both the outer and
|
||||
@@ -749,11 +660,14 @@ hash_inner_and_outer(Query *root,
|
||||
List *i;
|
||||
|
||||
/*
|
||||
* Hashjoin only supports inner and left joins.
|
||||
* Hashjoin only supports inner, left, and IN joins.
|
||||
*/
|
||||
switch (jointype)
|
||||
{
|
||||
case JOIN_INNER:
|
||||
case JOIN_IN:
|
||||
case JOIN_UNIQUE_OUTER:
|
||||
case JOIN_UNIQUE_INNER:
|
||||
isouterjoin = false;
|
||||
break;
|
||||
case JOIN_LEFT:
|
||||
@@ -813,21 +727,40 @@ hash_inner_and_outer(Query *root,
|
||||
* cheapest-startup-cost outer paths. There's no need to consider
|
||||
* any but the cheapest-total-cost inner path, however.
|
||||
*/
|
||||
Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
|
||||
Path *cheapest_total_outer = outerrel->cheapest_total_path;
|
||||
Path *cheapest_total_inner = innerrel->cheapest_total_path;
|
||||
|
||||
/* Unique-ify if need be */
|
||||
if (jointype == JOIN_UNIQUE_OUTER)
|
||||
{
|
||||
cheapest_total_outer = (Path *)
|
||||
create_unique_path(root, outerrel, cheapest_total_outer);
|
||||
cheapest_startup_outer = cheapest_total_outer;
|
||||
jointype = JOIN_INNER;
|
||||
}
|
||||
else if (jointype == JOIN_UNIQUE_INNER)
|
||||
{
|
||||
cheapest_total_inner = (Path *)
|
||||
create_unique_path(root, innerrel, cheapest_total_inner);
|
||||
jointype = JOIN_INNER;
|
||||
}
|
||||
|
||||
add_path(joinrel, (Path *)
|
||||
create_hashjoin_path(root,
|
||||
joinrel,
|
||||
jointype,
|
||||
outerrel->cheapest_total_path,
|
||||
innerrel->cheapest_total_path,
|
||||
cheapest_total_outer,
|
||||
cheapest_total_inner,
|
||||
restrictlist,
|
||||
hashclauses));
|
||||
if (outerrel->cheapest_startup_path != outerrel->cheapest_total_path)
|
||||
if (cheapest_startup_outer != cheapest_total_outer)
|
||||
add_path(joinrel, (Path *)
|
||||
create_hashjoin_path(root,
|
||||
joinrel,
|
||||
jointype,
|
||||
outerrel->cheapest_startup_path,
|
||||
innerrel->cheapest_total_path,
|
||||
cheapest_startup_outer,
|
||||
cheapest_total_inner,
|
||||
restrictlist,
|
||||
hashclauses));
|
||||
}
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.58 2002/12/16 21:30:30 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinrels.c,v 1.59 2003/01/20 18:54:51 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -172,7 +172,7 @@ make_rels_by_joins(Query *root, int level, List **joinrels)
|
||||
jrel = make_join_rel(root, old_rel, new_rel,
|
||||
JOIN_INNER);
|
||||
/* Avoid making duplicate entries ... */
|
||||
if (!ptrMember(jrel, result_rels))
|
||||
if (jrel && !ptrMember(jrel, result_rels))
|
||||
result_rels = lcons(jrel, result_rels);
|
||||
break; /* need not consider more
|
||||
* joininfos */
|
||||
@@ -276,10 +276,9 @@ make_rels_by_clause_joins(Query *root,
|
||||
|
||||
/*
|
||||
* Avoid entering same joinrel into our output list more
|
||||
* than once. (make_rels_by_joins doesn't really care,
|
||||
* but GEQO does.)
|
||||
* than once.
|
||||
*/
|
||||
if (!ptrMember(jrel, result))
|
||||
if (jrel && !ptrMember(jrel, result))
|
||||
result = lcons(jrel, result);
|
||||
}
|
||||
}
|
||||
@@ -323,7 +322,8 @@ make_rels_by_clauseless_joins(Query *root,
|
||||
* As long as given other_rels are distinct, don't need to
|
||||
* test to see if jrel is already part of output list.
|
||||
*/
|
||||
result = lcons(jrel, result);
|
||||
if (jrel)
|
||||
result = lcons(jrel, result);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -367,6 +367,9 @@ make_jointree_rel(Query *root, Node *jtnode)
|
||||
/* Make this join rel */
|
||||
rel = make_join_rel(root, lrel, rrel, j->jointype);
|
||||
|
||||
if (rel == NULL)
|
||||
elog(ERROR, "make_jointree_rel: invalid join order!?");
|
||||
|
||||
/*
|
||||
* Since we are only going to consider this one way to do it,
|
||||
* we're done generating Paths for this joinrel and can now select
|
||||
@@ -395,19 +398,121 @@ make_jointree_rel(Query *root, Node *jtnode)
|
||||
* created with the two rels as outer and inner rel.
|
||||
* (The join rel may already contain paths generated from other
|
||||
* pairs of rels that add up to the same set of base rels.)
|
||||
*
|
||||
* NB: will return NULL if attempted join is not valid. This can only
|
||||
* happen when working with IN clauses that have been turned into joins.
|
||||
*/
|
||||
RelOptInfo *
|
||||
make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2,
|
||||
JoinType jointype)
|
||||
{
|
||||
List *joinrelids;
|
||||
RelOptInfo *joinrel;
|
||||
List *restrictlist;
|
||||
|
||||
/* We should never try to join two overlapping sets of rels. */
|
||||
Assert(nonoverlap_setsi(rel1->relids, rel2->relids));
|
||||
|
||||
/* Construct Relids set that identifies the joinrel. */
|
||||
joinrelids = nconc(listCopy(rel1->relids), listCopy(rel2->relids));
|
||||
|
||||
/*
|
||||
* If we are implementing IN clauses as joins, there are some joins
|
||||
* that are illegal. Check to see if the proposed join is trouble.
|
||||
* We can skip the work if looking at an outer join, however, because
|
||||
* only top-level joins might be affected.
|
||||
*/
|
||||
if (jointype == JOIN_INNER)
|
||||
{
|
||||
List *l;
|
||||
|
||||
foreach(l, root->in_info_list)
|
||||
{
|
||||
InClauseInfo *ininfo = (InClauseInfo *) lfirst(l);
|
||||
|
||||
/*
|
||||
* Cannot join if proposed join contains part, but only
|
||||
* part, of the RHS, *and* it contains rels not in the RHS.
|
||||
*
|
||||
* Singleton RHS cannot be a problem, so skip expensive tests.
|
||||
*/
|
||||
if (length(ininfo->righthand) > 1 &&
|
||||
overlap_setsi(ininfo->righthand, joinrelids) &&
|
||||
!is_subseti(ininfo->righthand, joinrelids) &&
|
||||
!is_subseti(joinrelids, ininfo->righthand))
|
||||
{
|
||||
freeList(joinrelids);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* No issue unless we are looking at a join of the IN's RHS
|
||||
* to other stuff.
|
||||
*/
|
||||
if (! (length(ininfo->righthand) < length(joinrelids) &&
|
||||
is_subseti(ininfo->righthand, joinrelids)))
|
||||
continue;
|
||||
/*
|
||||
* If we already joined IN's RHS to any part of its LHS in either
|
||||
* input path, then this join is not constrained (the necessary
|
||||
* work was done at a lower level).
|
||||
*/
|
||||
if (overlap_setsi(ininfo->lefthand, rel1->relids) &&
|
||||
is_subseti(ininfo->righthand, rel1->relids))
|
||||
continue;
|
||||
if (overlap_setsi(ininfo->lefthand, rel2->relids) &&
|
||||
is_subseti(ininfo->righthand, rel2->relids))
|
||||
continue;
|
||||
/*
|
||||
* JOIN_IN technique will work if outerrel includes LHS and
|
||||
* innerrel is exactly RHS; conversely JOIN_REVERSE_IN handles
|
||||
* RHS/LHS.
|
||||
*
|
||||
* JOIN_UNIQUE_OUTER will work if outerrel is exactly RHS;
|
||||
* conversely JOIN_UNIQUE_INNER will work if innerrel is
|
||||
* exactly RHS.
|
||||
*
|
||||
* But none of these will work if we already found another IN
|
||||
* that needs to trigger here.
|
||||
*/
|
||||
if (jointype != JOIN_INNER)
|
||||
{
|
||||
freeList(joinrelids);
|
||||
return NULL;
|
||||
}
|
||||
if (is_subseti(ininfo->lefthand, rel1->relids) &&
|
||||
sameseti(ininfo->righthand, rel2->relids))
|
||||
{
|
||||
jointype = JOIN_IN;
|
||||
}
|
||||
else if (is_subseti(ininfo->lefthand, rel2->relids) &&
|
||||
sameseti(ininfo->righthand, rel1->relids))
|
||||
{
|
||||
jointype = JOIN_REVERSE_IN;
|
||||
}
|
||||
else if (sameseti(ininfo->righthand, rel1->relids))
|
||||
{
|
||||
jointype = JOIN_UNIQUE_OUTER;
|
||||
}
|
||||
else if (sameseti(ininfo->righthand, rel2->relids))
|
||||
{
|
||||
jointype = JOIN_UNIQUE_INNER;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* invalid join path */
|
||||
freeList(joinrelids);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Find or build the join RelOptInfo, and compute the restrictlist
|
||||
* that goes with this particular joining.
|
||||
*/
|
||||
joinrel = build_join_rel(root, rel1, rel2, jointype, &restrictlist);
|
||||
joinrel = build_join_rel(root, joinrelids, rel1, rel2, jointype,
|
||||
&restrictlist);
|
||||
|
||||
/*
|
||||
* Consider paths using each rel as both outer and inner.
|
||||
@@ -438,11 +543,43 @@ make_join_rel(Query *root, RelOptInfo *rel1, RelOptInfo *rel2,
|
||||
add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_LEFT,
|
||||
restrictlist);
|
||||
break;
|
||||
case JOIN_IN:
|
||||
add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_IN,
|
||||
restrictlist);
|
||||
/* REVERSE_IN isn't supported by joinpath.c */
|
||||
add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER,
|
||||
restrictlist);
|
||||
add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER,
|
||||
restrictlist);
|
||||
break;
|
||||
case JOIN_REVERSE_IN:
|
||||
/* REVERSE_IN isn't supported by joinpath.c */
|
||||
add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_IN,
|
||||
restrictlist);
|
||||
add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER,
|
||||
restrictlist);
|
||||
add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER,
|
||||
restrictlist);
|
||||
break;
|
||||
case JOIN_UNIQUE_OUTER:
|
||||
add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_OUTER,
|
||||
restrictlist);
|
||||
add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_INNER,
|
||||
restrictlist);
|
||||
break;
|
||||
case JOIN_UNIQUE_INNER:
|
||||
add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER,
|
||||
restrictlist);
|
||||
add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER,
|
||||
restrictlist);
|
||||
break;
|
||||
default:
|
||||
elog(ERROR, "make_join_rel: unsupported join type %d",
|
||||
(int) jointype);
|
||||
break;
|
||||
}
|
||||
|
||||
freeList(joinrelids);
|
||||
|
||||
return joinrel;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user