1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-27 23:21:58 +03:00

Planner speedup hacking. Avoid saving useless pathkeys, so that path

comparison does not consider paths different when they differ only in
uninteresting aspects of sort order.  (We had a special case of this
consideration for indexscans already, but generalize it to apply to
ordered join paths too.)  Be stricter about what is a canonical pathkey
to allow faster pathkey comparison.  Cache canonical pathkeys and
dispersion stats for left and right sides of a RestrictInfo's clause,
to avoid repeated computation.  Total speedup will depend on number of
tables in a query, but I see about 4x speedup of planning phase for
a sample seven-table query.
This commit is contained in:
Tom Lane
2000-12-14 22:30:45 +00:00
parent db11f4382a
commit ea166f1146
16 changed files with 622 additions and 365 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.59 2000/11/23 03:57:31 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.60 2000/12/14 22:30:43 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -152,6 +152,7 @@ sort_inner_and_outer(Query *root,
List *mergeclause_list,
JoinType jointype)
{
List *all_pathkeys;
List *i;
/*
@ -159,36 +160,57 @@ sort_inner_and_outer(Query *root,
* generate a differently-sorted result path at essentially the same
* cost. We have no basis for choosing one over another at this level
* of joining, but some sort orders may be more useful than others for
* higher-level mergejoins. Generating a path here for *every*
* permutation of mergejoin clauses doesn't seem like a winning
* strategy, however; the cost in planning time is too high.
* higher-level mergejoins, so it's worth considering multiple orderings.
*
* For now, we generate one path for each mergejoin clause, listing that
* clause first and the rest in random order. This should allow at
* Actually, it's not quite true that every mergeclause ordering will
* generate a different path order, because some of the clauses may be
* redundant. Therefore, what we do is convert the mergeclause list to
* a list of canonical pathkeys, and then consider different orderings
* of the pathkeys.
*
* Generating a path for *every* permutation of the pathkeys doesn't
* seem like a winning strategy; the cost in planning time is too high.
* For now, we generate one path for each pathkey, listing that pathkey
* first and the rest in random order. This should allow at
* least a one-clause mergejoin without re-sorting against any other
* possible mergejoin partner path. But if we've not guessed the
* right ordering of secondary clauses, we may end up evaluating
* right ordering of secondary keys, we may end up evaluating
* clauses as qpquals when they could have been done as mergeclauses.
* We need to figure out a better way. (Two possible approaches: look
* at all the relevant index relations to suggest plausible sort
* orders, or make just one output path and somehow mark it as having
* a sort-order that can be rearranged freely.)
*/
foreach(i, mergeclause_list)
all_pathkeys = make_pathkeys_for_mergeclauses(root,
mergeclause_list,
outerrel);
foreach(i, all_pathkeys)
{
RestrictInfo *restrictinfo = lfirst(i);
List *curclause_list;
List *front_pathkey = lfirst(i);
List *cur_pathkeys;
List *cur_mergeclauses;
List *outerkeys;
List *innerkeys;
List *merge_pathkeys;
/* Make a mergeclause list with this guy first. */
if (i != mergeclause_list)
curclause_list = lcons(restrictinfo,
lremove(restrictinfo,
listCopy(mergeclause_list)));
/* Make a pathkey list with this guy first. */
if (i != all_pathkeys)
cur_pathkeys = lcons(front_pathkey,
lremove(front_pathkey,
listCopy(all_pathkeys)));
else
curclause_list = mergeclause_list; /* no work at first one... */
cur_pathkeys = all_pathkeys; /* no work at first one... */
/*
* Select mergeclause(s) that match this sort ordering. If we had
* redundant merge clauses then we will get a subset of the original
* clause list. There had better be some match, however...
*/
cur_mergeclauses = find_mergeclauses_for_pathkeys(root,
cur_pathkeys,
mergeclause_list);
Assert(cur_mergeclauses != NIL);
/*
* Build sort pathkeys for both sides.
@ -198,15 +220,13 @@ sort_inner_and_outer(Query *root,
* suppress an explicit sort step, so we needn't do so here.
*/
outerkeys = make_pathkeys_for_mergeclauses(root,
curclause_list,
cur_mergeclauses,
outerrel);
innerkeys = make_pathkeys_for_mergeclauses(root,
curclause_list,
cur_mergeclauses,
innerrel);
/* Build pathkeys representing output sort order. */
merge_pathkeys = build_join_pathkeys(outerkeys,
joinrel->targetlist,
root->equi_key_list);
merge_pathkeys = build_join_pathkeys(root, joinrel, outerkeys);
/*
* And now we can make the path. We only consider the cheapest-
@ -221,7 +241,7 @@ sort_inner_and_outer(Query *root,
innerrel->cheapest_total_path,
restrictlist,
merge_pathkeys,
curclause_list,
cur_mergeclauses,
outerkeys,
innerkeys));
}
@ -301,17 +321,16 @@ match_unsorted_outer(Query *root,
List *trialsortkeys;
Path *cheapest_startup_inner;
Path *cheapest_total_inner;
int num_mergeclauses;
int clausecnt;
int num_sortkeys;
int sortkeycnt;
/*
* The result will have this sort order (even if it is implemented
* as a nestloop, and even if some of the mergeclauses are
* implemented by qpquals rather than as true mergeclauses):
*/
merge_pathkeys = build_join_pathkeys(outerpath->pathkeys,
joinrel->targetlist,
root->equi_key_list);
merge_pathkeys = build_join_pathkeys(root, joinrel,
outerpath->pathkeys);
if (nestjoinOK)
{
@ -347,7 +366,8 @@ match_unsorted_outer(Query *root,
}
/* Look for useful mergeclauses (if any) */
mergeclauses = find_mergeclauses_for_pathkeys(outerpath->pathkeys,
mergeclauses = find_mergeclauses_for_pathkeys(root,
outerpath->pathkeys,
mergeclause_list);
/* Done with this outer path if no chance for a mergejoin */
@ -362,7 +382,8 @@ match_unsorted_outer(Query *root,
/*
* Generate a mergejoin on the basis of sorting the cheapest
* inner. Since a sort will be needed, only cheapest total cost
* matters.
* matters. (But create_mergejoin_path will do the right thing
* if innerrel->cheapest_total_path is already correctly sorted.)
*/
add_path(joinrel, (Path *)
create_mergejoin_path(joinrel,
@ -376,38 +397,49 @@ match_unsorted_outer(Query *root,
innersortkeys));
/*
* Look for presorted inner paths that satisfy the mergeclause
* Look for presorted inner paths that satisfy the innersortkey
* list or any truncation thereof. Here, we consider both cheap
* startup cost and cheap total cost.
* startup cost and cheap total cost. Ignore
* innerrel->cheapest_total_path, since we already made a path with it.
*/
trialsortkeys = listCopy(innersortkeys); /* modifiable copy */
num_sortkeys = length(innersortkeys);
if (num_sortkeys > 1)
trialsortkeys = listCopy(innersortkeys); /* need modifiable copy */
else
trialsortkeys = innersortkeys; /* won't really truncate */
cheapest_startup_inner = NULL;
cheapest_total_inner = NULL;
num_mergeclauses = length(mergeclauses);
for (clausecnt = num_mergeclauses; clausecnt > 0; clausecnt--)
for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
{
Path *innerpath;
List *newclauses = NIL;
/*
* Look for an inner path ordered well enough to merge with
* the first 'clausecnt' mergeclauses. NB: trialsortkeys list
* Look for an inner path ordered well enough for the first
* 'sortkeycnt' innersortkeys. NB: trialsortkeys list
* is modified destructively, which is why we made a copy...
*/
trialsortkeys = ltruncate(clausecnt, trialsortkeys);
trialsortkeys = ltruncate(sortkeycnt, trialsortkeys);
innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
trialsortkeys,
TOTAL_COST);
if (innerpath != NULL &&
innerpath != innerrel->cheapest_total_path &&
(cheapest_total_inner == NULL ||
compare_path_costs(innerpath, cheapest_total_inner,
TOTAL_COST) < 0))
{
/* Found a cheap (or even-cheaper) sorted path */
if (clausecnt < num_mergeclauses)
newclauses = ltruncate(clausecnt,
listCopy(mergeclauses));
/* Select the right mergeclauses, if we didn't already */
if (sortkeycnt < num_sortkeys)
{
newclauses =
find_mergeclauses_for_pathkeys(root,
trialsortkeys,
mergeclauses);
Assert(newclauses != NIL);
}
else
newclauses = mergeclauses;
add_path(joinrel, (Path *)
@ -427,6 +459,7 @@ match_unsorted_outer(Query *root,
trialsortkeys,
STARTUP_COST);
if (innerpath != NULL &&
innerpath != innerrel->cheapest_total_path &&
(cheapest_startup_inner == NULL ||
compare_path_costs(innerpath, cheapest_startup_inner,
STARTUP_COST) < 0))
@ -441,9 +474,14 @@ match_unsorted_outer(Query *root,
*/
if (newclauses == NIL)
{
if (clausecnt < num_mergeclauses)
newclauses = ltruncate(clausecnt,
listCopy(mergeclauses));
if (sortkeycnt < num_sortkeys)
{
newclauses =
find_mergeclauses_for_pathkeys(root,
trialsortkeys,
mergeclauses);
Assert(newclauses != NIL);
}
else
newclauses = mergeclauses;
}
@ -501,7 +539,8 @@ match_unsorted_inner(Query *root,
Path *startupouterpath;
/* Look for useful mergeclauses (if any) */
mergeclauses = find_mergeclauses_for_pathkeys(innerpath->pathkeys,
mergeclauses = find_mergeclauses_for_pathkeys(root,
innerpath->pathkeys,
mergeclause_list);
if (mergeclauses == NIL)
continue;
@ -516,9 +555,7 @@ match_unsorted_inner(Query *root,
* outer. Since a sort will be needed, only cheapest total cost
* matters.
*/
merge_pathkeys = build_join_pathkeys(outersortkeys,
joinrel->targetlist,
root->equi_key_list);
merge_pathkeys = build_join_pathkeys(root, joinrel, outersortkeys);
add_path(joinrel, (Path *)
create_mergejoin_path(joinrel,
jointype,
@ -545,9 +582,8 @@ match_unsorted_inner(Query *root,
continue; /* there won't be a startup-cost path
* either */
merge_pathkeys = build_join_pathkeys(totalouterpath->pathkeys,
joinrel->targetlist,
root->equi_key_list);
merge_pathkeys = build_join_pathkeys(root, joinrel,
totalouterpath->pathkeys);
add_path(joinrel, (Path *)
create_mergejoin_path(joinrel,
jointype,
@ -564,9 +600,8 @@ match_unsorted_inner(Query *root,
STARTUP_COST);
if (startupouterpath != NULL && startupouterpath != totalouterpath)
{
merge_pathkeys = build_join_pathkeys(startupouterpath->pathkeys,
joinrel->targetlist,
root->equi_key_list);
merge_pathkeys = build_join_pathkeys(root, joinrel,
startupouterpath->pathkeys);
add_path(joinrel, (Path *)
create_mergejoin_path(joinrel,
jointype,
@ -637,10 +672,9 @@ hash_inner_and_outer(Query *root,
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(i);
Expr *clause;
Var *left,
*right,
*inner;
List *hashclauses;
*right;
Selectivity innerdispersion;
List *hashclauses;
if (restrictinfo->hashjoinoperator == InvalidOid)
continue; /* not hashjoinable */
@ -657,26 +691,48 @@ hash_inner_and_outer(Query *root,
left = get_leftop(clause);
right = get_rightop(clause);
/* check if clause is usable with these sub-rels, find inner var */
/*
* Check if clause is usable with these sub-rels, find inner side,
* estimate dispersion of inner var for costing purposes.
*
* Since we tend to visit the same clauses over and over when
* planning a large query, we cache the dispersion estimates in the
* RestrictInfo node to avoid repeated lookups of statistics.
*/
if (intMember(left->varno, outerrelids) &&
intMember(right->varno, innerrelids))
inner = right;
{
/* righthand side is inner */
innerdispersion = restrictinfo->right_dispersion;
if (innerdispersion < 0)
{
/* not cached yet */
innerdispersion = estimate_dispersion(root, right);
restrictinfo->right_dispersion = innerdispersion;
}
}
else if (intMember(left->varno, innerrelids) &&
intMember(right->varno, outerrelids))
inner = left;
{
/* lefthand side is inner */
innerdispersion = restrictinfo->left_dispersion;
if (innerdispersion < 0)
{
/* not cached yet */
innerdispersion = estimate_dispersion(root, left);
restrictinfo->left_dispersion = innerdispersion;
}
}
else
continue; /* no good for these input relations */
/* always a one-element list of hash clauses */
hashclauses = makeList1(restrictinfo);
/* estimate dispersion of inner var for costing purposes */
innerdispersion = estimate_dispersion(root, inner);
/*
* We consider both the cheapest-total-cost and
* cheapest-startup-cost outer paths. There's no need to consider
* any but the cheapest- total-cost inner path, however.
* any but the cheapest-total-cost inner path, however.
*/
add_path(joinrel, (Path *)
create_hashjoin_path(joinrel,