1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-27 23:21:58 +03:00

Refactor planner's pathkeys data structure to create a separate, explicit

representation of equivalence classes of variables.  This is an extensive
rewrite, but it brings a number of benefits:
* planner no longer fails in the presence of "incomplete" operator families
that don't offer operators for every possible combination of datatypes.
* avoid generating and then discarding redundant equality clauses.
* remove bogus assumption that derived equalities always use operators
named "=".
* mergejoins can work with a variety of sort orders (e.g., descending) now,
instead of tying each mergejoinable operator to exactly one sort order.
* better recognition of redundant sort columns.
* can make use of equalities appearing underneath an outer join.
This commit is contained in:
Tom Lane
2007-01-20 20:45:41 +00:00
parent 2b7334d487
commit f41803bb39
35 changed files with 3882 additions and 2719 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.110 2007/01/10 18:06:03 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.111 2007/01/20 20:45:39 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -16,7 +16,6 @@
#include <math.h>
#include "access/skey.h"
#include "optimizer/cost.h"
#include "optimizer/pathnode.h"
#include "optimizer/paths.h"
@ -40,10 +39,6 @@ static List *select_mergejoin_clauses(RelOptInfo *joinrel,
RelOptInfo *innerrel,
List *restrictlist,
JoinType jointype);
static void build_mergejoin_strat_arrays(List *mergeclauses,
Oid **mergefamilies,
int **mergestrategies,
bool **mergenullsfirst);
/*
@ -205,9 +200,9 @@ sort_inner_and_outer(PlannerInfo *root,
*
* Actually, it's not quite true that every mergeclause ordering will
* generate a different path order, because some of the clauses may be
* redundant. Therefore, what we do is convert the mergeclause list to a
* list of canonical pathkeys, and then consider different orderings of
* the pathkeys.
* partially redundant (refer to the same EquivalenceClasses). Therefore,
* what we do is convert the mergeclause list to a list of canonical
* pathkeys, and then consider different orderings of the pathkeys.
*
* Generating a path for *every* permutation of the pathkeys doesn't seem
* like a winning strategy; the cost in planning time is too high. For
@ -216,76 +211,59 @@ sort_inner_and_outer(PlannerInfo *root,
* mergejoin without re-sorting against any other possible mergejoin
* partner path. But if we've not guessed the right ordering of secondary
* keys, we may end up evaluating clauses as qpquals when they could have
* been done as mergeclauses. We need to figure out a better way. (Two
* possible approaches: look at all the relevant index relations to
* suggest plausible sort orders, or make just one output path and somehow
* mark it as having a sort-order that can be rearranged freely.)
* been done as mergeclauses. (In practice, it's rare that there's more
* than two or three mergeclauses, so expending a huge amount of thought
* on that is probably not worth it.)
*
* The pathkey order returned by select_outer_pathkeys_for_merge() has
* some heuristics behind it (see that function), so be sure to try it
* exactly as-is as well as making variants.
*/
all_pathkeys = make_pathkeys_for_mergeclauses(root,
mergeclause_list,
outerrel);
all_pathkeys = select_outer_pathkeys_for_merge(root,
mergeclause_list,
joinrel);
foreach(l, all_pathkeys)
{
List *front_pathkey = (List *) lfirst(l);
List *cur_pathkeys;
List *cur_mergeclauses;
Oid *mergefamilies;
int *mergestrategies;
bool *mergenullsfirst;
List *outerkeys;
List *innerkeys;
List *merge_pathkeys;
/* Make a pathkey list with this guy first. */
/* Make a pathkey list with this guy first */
if (l != list_head(all_pathkeys))
cur_pathkeys = lcons(front_pathkey,
list_delete_ptr(list_copy(all_pathkeys),
front_pathkey));
outerkeys = lcons(front_pathkey,
list_delete_ptr(list_copy(all_pathkeys),
front_pathkey));
else
cur_pathkeys = all_pathkeys; /* no work at first one... */
outerkeys = all_pathkeys; /* no work at first one... */
/*
* Select mergeclause(s) that match this sort ordering. If we had
* redundant merge clauses then we will get a subset of the original
* clause list. There had better be some match, however...
*/
/* Sort the mergeclauses into the corresponding ordering */
cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
cur_pathkeys,
outerkeys,
true,
mergeclause_list);
Assert(cur_mergeclauses != NIL);
/* Forget it if can't use all the clauses in right/full join */
if (useallclauses &&
list_length(cur_mergeclauses) != list_length(mergeclause_list))
continue;
/* Should have used them all... */
Assert(list_length(cur_mergeclauses) == list_length(mergeclause_list));
/* Build sort pathkeys for the inner side */
innerkeys = make_inner_pathkeys_for_merge(root,
cur_mergeclauses,
outerkeys);
/* Build pathkeys representing output sort order */
merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
outerkeys);
/*
* Build sort pathkeys for both sides.
* And now we can make the path.
*
* Note: it's possible that the cheapest paths will already be sorted
* properly. create_mergejoin_path will detect that case and suppress
* an explicit sort step, so we needn't do so here.
*/
outerkeys = make_pathkeys_for_mergeclauses(root,
cur_mergeclauses,
outerrel);
innerkeys = make_pathkeys_for_mergeclauses(root,
cur_mergeclauses,
innerrel);
/* Build pathkeys representing output sort order. */
merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
outerkeys);
/* Build opfamily info for execution */
build_mergejoin_strat_arrays(cur_mergeclauses,
&mergefamilies,
&mergestrategies,
&mergenullsfirst);
/*
* And now we can make the path.
*/
add_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
@ -295,9 +273,6 @@ sort_inner_and_outer(PlannerInfo *root,
restrictlist,
merge_pathkeys,
cur_mergeclauses,
mergefamilies,
mergestrategies,
mergenullsfirst,
outerkeys,
innerkeys));
}
@ -427,9 +402,6 @@ match_unsorted_outer(PlannerInfo *root,
Path *outerpath = (Path *) lfirst(l);
List *merge_pathkeys;
List *mergeclauses;
Oid *mergefamilies;
int *mergestrategies;
bool *mergenullsfirst;
List *innersortkeys;
List *trialsortkeys;
Path *cheapest_startup_inner;
@ -510,6 +482,7 @@ match_unsorted_outer(PlannerInfo *root,
/* Look for useful mergeclauses (if any) */
mergeclauses = find_mergeclauses_for_pathkeys(root,
outerpath->pathkeys,
true,
mergeclause_list);
/*
@ -532,15 +505,9 @@ match_unsorted_outer(PlannerInfo *root,
continue;
/* Compute the required ordering of the inner path */
innersortkeys = make_pathkeys_for_mergeclauses(root,
mergeclauses,
innerrel);
/* Build opfamily info for execution */
build_mergejoin_strat_arrays(mergeclauses,
&mergefamilies,
&mergestrategies,
&mergenullsfirst);
innersortkeys = make_inner_pathkeys_for_merge(root,
mergeclauses,
outerpath->pathkeys);
/*
* Generate a mergejoin on the basis of sorting the cheapest inner.
@ -557,9 +524,6 @@ match_unsorted_outer(PlannerInfo *root,
restrictlist,
merge_pathkeys,
mergeclauses,
mergefamilies,
mergestrategies,
mergenullsfirst,
NIL,
innersortkeys));
@ -613,18 +577,12 @@ match_unsorted_outer(PlannerInfo *root,
newclauses =
find_mergeclauses_for_pathkeys(root,
trialsortkeys,
false,
mergeclauses);
Assert(newclauses != NIL);
}
else
newclauses = mergeclauses;
/* Build opfamily info for execution */
build_mergejoin_strat_arrays(newclauses,
&mergefamilies,
&mergestrategies,
&mergenullsfirst);
add_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
@ -634,9 +592,6 @@ match_unsorted_outer(PlannerInfo *root,
restrictlist,
merge_pathkeys,
newclauses,
mergefamilies,
mergestrategies,
mergenullsfirst,
NIL,
NIL));
cheapest_total_inner = innerpath;
@ -666,19 +621,13 @@ match_unsorted_outer(PlannerInfo *root,
newclauses =
find_mergeclauses_for_pathkeys(root,
trialsortkeys,
false,
mergeclauses);
Assert(newclauses != NIL);
}
else
newclauses = mergeclauses;
}
/* Build opfamily info for execution */
build_mergejoin_strat_arrays(newclauses,
&mergefamilies,
&mergestrategies,
&mergenullsfirst);
add_path(joinrel, (Path *)
create_mergejoin_path(root,
joinrel,
@ -688,9 +637,6 @@ match_unsorted_outer(PlannerInfo *root,
restrictlist,
merge_pathkeys,
newclauses,
mergefamilies,
mergestrategies,
mergenullsfirst,
NIL,
NIL));
}
@ -909,6 +855,10 @@ best_appendrel_indexscan(PlannerInfo *root, RelOptInfo *rel,
* Select mergejoin clauses that are usable for a particular join.
* Returns a list of RestrictInfo nodes for those clauses.
*
* We also mark each selected RestrictInfo to show which side is currently
* being considered as outer. These are transient markings that are only
* good for the duration of the current add_paths_to_joinrel() call!
*
* We examine each restrictinfo clause known for the join to see
* if it is mergejoinable and involves vars from the two sub-relations
* currently of interest.
@ -939,7 +889,7 @@ select_mergejoin_clauses(RelOptInfo *joinrel,
continue;
if (!restrictinfo->can_join ||
restrictinfo->mergejoinoperator == InvalidOid)
restrictinfo->mergeopfamilies == NIL)
{
have_nonmergeable_joinclause = true;
continue; /* not mergejoinable */
@ -954,11 +904,13 @@ select_mergejoin_clauses(RelOptInfo *joinrel,
bms_is_subset(restrictinfo->right_relids, innerrel->relids))
{
/* righthand side is inner */
restrictinfo->outer_is_left = true;
}
else if (bms_is_subset(restrictinfo->left_relids, innerrel->relids) &&
bms_is_subset(restrictinfo->right_relids, outerrel->relids))
{
/* lefthand side is inner */
restrictinfo->outer_is_left = false;
}
else
{
@ -966,7 +918,7 @@ select_mergejoin_clauses(RelOptInfo *joinrel,
continue; /* no good for these input relations */
}
result_list = lcons(restrictinfo, result_list);
result_list = lappend(result_list, restrictinfo);
}
/*
@ -995,46 +947,3 @@ select_mergejoin_clauses(RelOptInfo *joinrel,
return result_list;
}
/*
* Temporary hack to build opfamily and strategy info needed for mergejoin
* by the executor. We need to rethink the planner's handling of merge
* planning so that it can deal with multiple possible merge orders, but
* that's not done yet.
*/
static void
build_mergejoin_strat_arrays(List *mergeclauses,
Oid **mergefamilies,
int **mergestrategies,
bool **mergenullsfirst)
{
int nClauses = list_length(mergeclauses);
int i;
ListCell *l;
*mergefamilies = (Oid *) palloc(nClauses * sizeof(Oid));
*mergestrategies = (int *) palloc(nClauses * sizeof(int));
*mergenullsfirst = (bool *) palloc(nClauses * sizeof(bool));
i = 0;
foreach(l, mergeclauses)
{
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
/*
* We do not need to worry about whether the mergeclause will be
* commuted at runtime --- it's the same opfamily either way.
*/
(*mergefamilies)[i] = restrictinfo->mergeopfamily;
/*
* For the moment, strategy must always be LessThan --- see
* hack version of get_op_mergejoin_info
*/
(*mergestrategies)[i] = BTLessStrategyNumber;
/* And we only allow NULLS LAST, too */
(*mergenullsfirst)[i] = false;
i++;
}
}