1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-03 01:21:48 +03:00

Fix best_inner_indexscan to return both the cheapest-total-cost and

cheapest-startup-cost innerjoin indexscans, and make joinpath.c consider
both of these (when different) as the inside of a nestloop join.  The
original design was based on the assumption that indexscan paths always
have negligible startup cost, and so total cost is the only important
figure of merit; an assumption that's obviously broken by bitmap
indexscans.  This oversight could lead to choosing poor plans in cases
where fast-start behavior is more important than total cost, such as
LIMIT and IN queries.  8.1-vintage brain fade exposed by an example from
Chuck D.
This commit is contained in:
Tom Lane 2007-05-22 01:40:53 +00:00
parent 3c49269b90
commit 9ccf784833
5 changed files with 80 additions and 52 deletions

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.261.2.1 2005/11/14 23:54:34 tgl Exp $ * $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.261.2.2 2007/05/22 01:40:52 tgl Exp $
* *
* NOTES * NOTES
* Every node type that can appear in stored rules' parsetrees *must* * Every node type that can appear in stored rules' parsetrees *must*
@ -1262,7 +1262,8 @@ _outInnerIndexscanInfo(StringInfo str, InnerIndexscanInfo *node)
WRITE_NODE_TYPE("INNERINDEXSCANINFO"); WRITE_NODE_TYPE("INNERINDEXSCANINFO");
WRITE_BITMAPSET_FIELD(other_relids); WRITE_BITMAPSET_FIELD(other_relids);
WRITE_BOOL_FIELD(isouterjoin); WRITE_BOOL_FIELD(isouterjoin);
WRITE_NODE_FIELD(best_innerpath); WRITE_NODE_FIELD(cheapest_startup_innerpath);
WRITE_NODE_FIELD(cheapest_total_innerpath);
} }
static void static void

View File

@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.191.2.10 2007/04/17 20:03:16 tgl Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.191.2.11 2007/05/22 01:40:52 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -1313,22 +1313,25 @@ matches_any_index(RestrictInfo *rinfo, RelOptInfo *rel, Relids outer_relids)
/* /*
* best_inner_indexscan * best_inner_indexscan
* Finds the best available inner indexscan for a nestloop join * Finds the best available inner indexscans for a nestloop join
* with the given rel on the inside and the given outer_relids outside. * with the given rel on the inside and the given outer_relids outside.
* May return NULL if there are no possible inner indexscans.
* *
* We ignore ordering considerations (since a nestloop's inner scan's order * *cheapest_startup gets the path with least startup cost
* is uninteresting). Also, we consider only total cost when deciding which * *cheapest_total gets the path with least total cost (often the same path)
* of two possible paths is better --- this assumes that all indexpaths have * Both are set to NULL if there are no possible inner indexscans.
* negligible startup cost. (True today, but someday we might have to think *
* harder.) Therefore, there is only one dimension of comparison and so it's * We ignore ordering considerations, since a nestloop's inner scan's order
* sufficient to return a single "best" path. * is uninteresting. Hence startup cost and total cost are the only figures
* of merit to consider.
*
* Note: create_index_paths() must have been run previously for this rel,
* else the results will always be NULL.
*/ */
Path * void
best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel, best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
Relids outer_relids, JoinType jointype) Relids outer_relids, JoinType jointype,
Path **cheapest_startup, Path **cheapest_total)
{ {
Path *cheapest;
bool isouterjoin; bool isouterjoin;
List *clause_list; List *clause_list;
List *indexpaths; List *indexpaths;
@ -1337,6 +1340,9 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
InnerIndexscanInfo *info; InnerIndexscanInfo *info;
MemoryContext oldcontext; MemoryContext oldcontext;
/* Initialize results for failure returns */
*cheapest_startup = *cheapest_total = NULL;
/* /*
* Nestloop only supports inner, left, and IN joins. * Nestloop only supports inner, left, and IN joins.
*/ */
@ -1351,14 +1357,14 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
isouterjoin = true; isouterjoin = true;
break; break;
default: default:
return NULL; return;
} }
/* /*
* If there are no indexable joinclauses for this rel, exit quickly. * If there are no indexable joinclauses for this rel, exit quickly.
*/ */
if (bms_is_empty(rel->index_outer_relids)) if (bms_is_empty(rel->index_outer_relids))
return NULL; return;
/* /*
* Otherwise, we have to do path selection in the memory context of the * Otherwise, we have to do path selection in the memory context of the
@ -1378,14 +1384,14 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
{ {
bms_free(outer_relids); bms_free(outer_relids);
MemoryContextSwitchTo(oldcontext); MemoryContextSwitchTo(oldcontext);
return NULL; return;
} }
/* /*
* Look to see if we already computed the result for this set of relevant * Look to see if we already computed the result for this set of relevant
* outerrels. (We include the isouterjoin status in the cache lookup key * outerrels. (We include the isouterjoin status in the cache lookup key
* for safety. In practice I suspect this is not necessary because it * for safety. In practice I suspect this is not necessary because it
* should always be the same for a given innerrel.) * should always be the same for a given combination of rels.)
*/ */
foreach(l, rel->index_inner_paths) foreach(l, rel->index_inner_paths)
{ {
@ -1395,7 +1401,9 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
{ {
bms_free(outer_relids); bms_free(outer_relids);
MemoryContextSwitchTo(oldcontext); MemoryContextSwitchTo(oldcontext);
return info->best_innerpath; *cheapest_startup = info->cheapest_startup_innerpath;
*cheapest_total = info->cheapest_total_innerpath;
return;
} }
} }
@ -1450,28 +1458,32 @@ best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
} }
/* /*
* Now choose the cheapest member of indexpaths. * Now choose the cheapest members of indexpaths.
*/ */
cheapest = NULL; if (indexpaths != NIL)
foreach(l, indexpaths) {
*cheapest_startup = *cheapest_total = (Path *) linitial(indexpaths);
for_each_cell(l, lnext(list_head(indexpaths)))
{ {
Path *path = (Path *) lfirst(l); Path *path = (Path *) lfirst(l);
if (cheapest == NULL || if (compare_path_costs(path, *cheapest_startup, STARTUP_COST) < 0)
compare_path_costs(path, cheapest, TOTAL_COST) < 0) *cheapest_startup = path;
cheapest = path; if (compare_path_costs(path, *cheapest_total, TOTAL_COST) < 0)
*cheapest_total = path;
}
} }
/* Cache the result --- whether positive or negative */ /* Cache the results --- whether positive or negative */
info = makeNode(InnerIndexscanInfo); info = makeNode(InnerIndexscanInfo);
info->other_relids = outer_relids; info->other_relids = outer_relids;
info->isouterjoin = isouterjoin; info->isouterjoin = isouterjoin;
info->best_innerpath = cheapest; info->cheapest_startup_innerpath = *cheapest_startup;
info->cheapest_total_innerpath = *cheapest_total;
rel->index_inner_paths = lcons(info, rel->index_inner_paths); rel->index_inner_paths = lcons(info, rel->index_inner_paths);
MemoryContextSwitchTo(oldcontext); MemoryContextSwitchTo(oldcontext);
return cheapest;
} }
/* /*

View File

@ -8,7 +8,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.97.2.1 2005/11/22 18:23:10 momjian Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/path/joinpath.c,v 1.97.2.2 2007/05/22 01:40:53 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -295,10 +295,11 @@ sort_inner_and_outer(PlannerInfo *root,
* only outer paths that are already ordered well enough for merging). * only outer paths that are already ordered well enough for merging).
* *
* We always generate a nestloop path for each available outer path. * We always generate a nestloop path for each available outer path.
* In fact we may generate as many as four: one on the cheapest-total-cost * In fact we may generate as many as five: one on the cheapest-total-cost
* inner path, one on the same with materialization, one on the * inner path, one on the same with materialization, one on the
* cheapest-startup-cost inner path (if different), * cheapest-startup-cost inner path (if different), one on the
* and one on the best inner-indexscan path (if any). * cheapest-total inner-indexscan path (if any), and one on the
* cheapest-startup inner-indexscan path (if different).
* *
* We also consider mergejoins if mergejoin clauses are available. We have * We also consider mergejoins if mergejoin clauses are available. We have
* two ways to generate the inner path for a mergejoin: sort the cheapest * two ways to generate the inner path for a mergejoin: sort the cheapest
@ -334,7 +335,8 @@ match_unsorted_outer(PlannerInfo *root,
Path *inner_cheapest_startup = innerrel->cheapest_startup_path; Path *inner_cheapest_startup = innerrel->cheapest_startup_path;
Path *inner_cheapest_total = innerrel->cheapest_total_path; Path *inner_cheapest_total = innerrel->cheapest_total_path;
Path *matpath = NULL; Path *matpath = NULL;
Path *bestinnerjoin = NULL; Path *index_cheapest_startup = NULL;
Path *index_cheapest_total = NULL;
ListCell *l; ListCell *l;
/* /*
@ -392,11 +394,12 @@ match_unsorted_outer(PlannerInfo *root,
create_material_path(innerrel, inner_cheapest_total); create_material_path(innerrel, inner_cheapest_total);
/* /*
* Get the best innerjoin indexpath (if any) for this outer rel. It's * Get the best innerjoin indexpaths (if any) for this outer rel.
* the same for all outer paths. * They're the same for all outer paths.
*/ */
bestinnerjoin = best_inner_indexscan(root, innerrel, best_inner_indexscan(root, innerrel, outerrel->relids, jointype,
outerrel->relids, jointype); &index_cheapest_startup,
&index_cheapest_total);
} }
foreach(l, outerrel->pathlist) foreach(l, outerrel->pathlist)
@ -437,8 +440,8 @@ match_unsorted_outer(PlannerInfo *root,
* Always consider a nestloop join with this outer and * Always consider a nestloop join with this outer and
* cheapest-total-cost inner. When appropriate, also consider * cheapest-total-cost inner. When appropriate, also consider
* using the materialized form of the cheapest inner, the * using the materialized form of the cheapest inner, the
* cheapest-startup-cost inner path, and the best innerjoin * cheapest-startup-cost inner path, and the cheapest innerjoin
* indexpath. * indexpaths.
*/ */
add_path(joinrel, (Path *) add_path(joinrel, (Path *)
create_nestloop_path(root, create_nestloop_path(root,
@ -466,13 +469,23 @@ match_unsorted_outer(PlannerInfo *root,
inner_cheapest_startup, inner_cheapest_startup,
restrictlist, restrictlist,
merge_pathkeys)); merge_pathkeys));
if (bestinnerjoin != NULL) if (index_cheapest_total != NULL)
add_path(joinrel, (Path *) add_path(joinrel, (Path *)
create_nestloop_path(root, create_nestloop_path(root,
joinrel, joinrel,
jointype, jointype,
outerpath, outerpath,
bestinnerjoin, index_cheapest_total,
restrictlist,
merge_pathkeys));
if (index_cheapest_startup != NULL &&
index_cheapest_startup != index_cheapest_total)
add_path(joinrel, (Path *)
create_nestloop_path(root,
joinrel,
jointype,
outerpath,
index_cheapest_startup,
restrictlist, restrictlist,
merge_pathkeys)); merge_pathkeys));
} }

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.119.2.1 2005/11/14 23:54:36 tgl Exp $ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.119.2.2 2007/05/22 01:40:53 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -803,20 +803,20 @@ typedef struct RestrictInfo
* relation includes all other relids appearing in those joinclauses. * relation includes all other relids appearing in those joinclauses.
* The set of usable joinclauses, and thus the best inner indexscan, * The set of usable joinclauses, and thus the best inner indexscan,
* thus varies depending on which outer relation we consider; so we have * thus varies depending on which outer relation we consider; so we have
* to recompute the best such path for every join. To avoid lots of * to recompute the best such paths for every join. To avoid lots of
* redundant computation, we cache the results of such searches. For * redundant computation, we cache the results of such searches. For
* each relation we compute the set of possible otherrelids (all relids * each relation we compute the set of possible otherrelids (all relids
* appearing in joinquals that could become indexquals for this table). * appearing in joinquals that could become indexquals for this table).
* Two outer relations whose relids have the same intersection with this * Two outer relations whose relids have the same intersection with this
* set will have the same set of available joinclauses and thus the same * set will have the same set of available joinclauses and thus the same
* best inner indexscan for the inner relation. By taking the intersection * best inner indexscans for the inner relation. By taking the intersection
* before scanning the cache, we avoid recomputing when considering * before scanning the cache, we avoid recomputing when considering
* join rels that differ only by the inclusion of irrelevant other rels. * join rels that differ only by the inclusion of irrelevant other rels.
* *
* The search key also includes a bool showing whether the join being * The search key also includes a bool showing whether the join being
* considered is an outer join. Since we constrain the join order for * considered is an outer join. Since we constrain the join order for
* outer joins, I believe that this bool can only have one possible value * outer joins, I believe that this bool can only have one possible value
* for any particular base relation; but store it anyway to avoid confusion. * for any particular lookup key; but store it anyway to avoid confusion.
*/ */
typedef struct InnerIndexscanInfo typedef struct InnerIndexscanInfo
@ -825,8 +825,9 @@ typedef struct InnerIndexscanInfo
/* The lookup key: */ /* The lookup key: */
Relids other_relids; /* a set of relevant other relids */ Relids other_relids; /* a set of relevant other relids */
bool isouterjoin; /* true if join is outer */ bool isouterjoin; /* true if join is outer */
/* Best path for this lookup key: */ /* Best paths for this lookup key (NULL if no available indexscans): */
Path *best_innerpath; /* best inner indexscan, or NULL if none */ Path *cheapest_startup_innerpath; /* cheapest startup cost */
Path *cheapest_total_innerpath; /* cheapest total cost */
} InnerIndexscanInfo; } InnerIndexscanInfo;
/* /*

View File

@ -7,7 +7,7 @@
* Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group * Portions Copyright (c) 1996-2005, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California * Portions Copyright (c) 1994, Regents of the University of California
* *
* $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.88.2.2 2007/02/16 00:14:16 tgl Exp $ * $PostgreSQL: pgsql/src/include/optimizer/paths.h,v 1.88.2.3 2007/05/22 01:40:53 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -39,8 +39,9 @@ extern List *generate_bitmap_or_paths(PlannerInfo *root, RelOptInfo *rel,
List *clauses, List *outer_clauses, List *clauses, List *outer_clauses,
bool isjoininner, bool isjoininner,
Relids outer_relids); Relids outer_relids);
extern Path *best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel, extern void best_inner_indexscan(PlannerInfo *root, RelOptInfo *rel,
Relids outer_relids, JoinType jointype); Relids outer_relids, JoinType jointype,
Path **cheapest_startup, Path **cheapest_total);
extern List *group_clauses_by_indexkey(IndexOptInfo *index, extern List *group_clauses_by_indexkey(IndexOptInfo *index,
List *clauses, List *outer_clauses, List *clauses, List *outer_clauses,
Relids outer_relids, Relids outer_relids,