1
0
mirror of https://github.com/postgres/postgres.git synced 2025-05-03 22:24:49 +03:00

Rewrite choose_bitmap_and() to make it more robust in the presence of

competing alternatives for indexes to use in a bitmap scan.  The former
coding took estimated selectivity as an overriding factor, causing it to
sometimes choose indexes that were much slower to scan than ones with a
slightly worse selectivity.  It was also too narrow-minded about which
combinations of indexes to consider ANDing.  The rewrite makes it pay more
attention to index scan cost than selectivity; this seems sane since it's
impossible to have very bad selectivity with low cost, whereas the reverse
isn't true.  Also, we now consider each index alone, as well as adding
each index to an AND-group led by each prior index, for a total of about
O(N^2) rather than O(N) combinations considered.  This makes the results
much less dependent on the exact order in which the indexes are
considered.  It's still a lot cheaper than an O(2^N) exhaustive search.
A prefilter step eliminates all but the cheapest of those indexes using
the same set of WHERE conditions, to keep the effective value of N down in
scenarios where the DBA has created lots of partially-redundant indexes.
This commit is contained in:
Tom Lane 2007-04-17 20:03:10 +00:00
parent ff663c9606
commit 2edda80720

View File

@ -9,7 +9,7 @@
* *
* *
* IDENTIFICATION * IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.212 2006/10/04 00:29:54 momjian Exp $ * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.212.2.1 2007/04/17 20:03:10 tgl Exp $
* *
*------------------------------------------------------------------------- *-------------------------------------------------------------------------
*/ */
@ -46,6 +46,16 @@
((opclass) == BOOL_BTREE_OPS_OID || (opclass) == BOOL_HASH_OPS_OID) ((opclass) == BOOL_BTREE_OPS_OID || (opclass) == BOOL_HASH_OPS_OID)
/* Per-path data used within choose_bitmap_and() */
typedef struct
{
Path *path; /* IndexPath, BitmapAndPath, or BitmapOrPath */
List *quals; /* the WHERE clauses it uses */
List *preds; /* predicates of its partial index(es) */
Bitmapset *clauseids; /* quals+preds represented as a bitmapset */
} PathClauseUsage;
static List *find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, static List *find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
List *clauses, List *outer_clauses, List *clauses, List *outer_clauses,
bool istoplevel, RelOptInfo *outer_rel, bool istoplevel, RelOptInfo *outer_rel,
@ -55,11 +65,15 @@ static List *find_saop_paths(PlannerInfo *root, RelOptInfo *rel,
bool istoplevel, RelOptInfo *outer_rel); bool istoplevel, RelOptInfo *outer_rel);
static Path *choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, static Path *choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
List *paths, RelOptInfo *outer_rel); List *paths, RelOptInfo *outer_rel);
static int bitmap_path_comparator(const void *a, const void *b); static int path_usage_comparator(const void *a, const void *b);
static Cost bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel,
Path *ipath, RelOptInfo *outer_rel);
static Cost bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, static Cost bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel,
List *paths, RelOptInfo *outer_rel); List *paths, RelOptInfo *outer_rel);
static List *pull_indexpath_quals(Path *bitmapqual); static PathClauseUsage *classify_index_clause_usage(Path *path,
static bool lists_intersect_ptr(List *list1, List *list2); List **clauselist);
static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds);
static int find_list_position(Node *node, List **nodelist);
static bool match_clause_to_indexcol(IndexOptInfo *index, static bool match_clause_to_indexcol(IndexOptInfo *index,
int indexcol, Oid opclass, int indexcol, Oid opclass,
RestrictInfo *rinfo, RestrictInfo *rinfo,
@ -573,11 +587,12 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
List *paths, RelOptInfo *outer_rel) List *paths, RelOptInfo *outer_rel)
{ {
int npaths = list_length(paths); int npaths = list_length(paths);
Path **patharray; PathClauseUsage **pathinfoarray;
Cost costsofar; PathClauseUsage *pathinfo;
List *qualsofar; List *clauselist;
ListCell *lastcell; List *bestpaths = NIL;
int i; Cost bestcost = 0;
int i, j;
ListCell *l; ListCell *l;
Assert(npaths > 0); /* else caller error */ Assert(npaths > 0); /* else caller error */
@ -588,120 +603,231 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel,
* In theory we should consider every nonempty subset of the given paths. * In theory we should consider every nonempty subset of the given paths.
* In practice that seems like overkill, given the crude nature of the * In practice that seems like overkill, given the crude nature of the
* estimates, not to mention the possible effects of higher-level AND and * estimates, not to mention the possible effects of higher-level AND and
* OR clauses. As a compromise, we sort the paths by selectivity. We * OR clauses. Moreover, it's completely impractical if there are a large
* always take the first, and sequentially add on paths that result in a * number of paths, since the work would grow as O(2^N).
* lower estimated cost.
* *
* We also make some effort to detect directly redundant input paths, as * As a heuristic, we first check for paths using exactly the same
* can happen if there are multiple possibly usable indexes. (Another way * sets of WHERE clauses + index predicate conditions, and reject all
* it can happen is that best_inner_indexscan will find the same OR join * but the cheapest-to-scan in any such group. This primarily gets rid
* clauses that create_or_index_quals has pulled OR restriction clauses * of indexes that include the interesting columns but also irrelevant
* out of, and then both versions show up as duplicate paths.) We * columns. (In situations where the DBA has gone overboard on creating
* consider an index redundant if any of its index conditions were already * variant indexes, this can make for a very large reduction in the number
* used by earlier indexes. (We could use predicate_implied_by to have a * of paths considered further.)
* more intelligent, but much more expensive, check --- but in most cases
* simple pointer equality should suffice, since after all the index
* conditions are all coming from the same RestrictInfo lists.)
* *
* You might think the condition for redundancy should be "all index * We then sort the surviving paths with the cheapest-to-scan first,
* conditions already used", not "any", but this turns out to be wrong. * and for each path, consider using that path alone as the basis for
* For example, if we use an index on A, and then come to an index with * a bitmap scan. Then we consider bitmap AND scans formed from that
* conditions on A and B, the only way that the second index can be later * path plus each subsequent (higher-cost) path, adding on a subsequent
* in the selectivity-order sort is if the condition on B is completely * path if it results in a reduction in the estimated total scan cost.
* non-selective. In any case, we'd surely be drastically misestimating * This means we consider about O(N^2) rather than O(2^N) path
* the selectivity if we count the same condition twice. * combinations, which is quite tolerable, especially given than N is
* usually reasonably small because of the prefiltering step. The
* cheapest of these is returned.
* *
* We include index predicate conditions in the redundancy test. Because * We will only consider AND combinations in which no two indexes use
* the test is just for pointer equality and not equal(), the effect is * the same WHERE clause. This is a bit of a kluge: it's needed because
* that use of the same partial index in two different AND elements is * costsize.c and clausesel.c aren't very smart about redundant clauses.
* considered redundant. (XXX is this too strong?) * They will usually double-count the redundant clauses, producing a
* too-small selectivity that makes a redundant AND step look like it
* reduces the total cost. Perhaps someday that code will be smarter and
* we can remove this limitation. (But note that this also defends
* against flat-out duplicate input paths, which can happen because
* best_inner_indexscan will find the same OR join clauses that
* create_or_index_quals has pulled OR restriction clauses out of.)
* *
* Note: outputting the selected sub-paths in selectivity order is a good * For the same reason, we reject AND combinations in which an index
* thing even if we weren't using that as part of the selection method, * predicate clause duplicates another clause. Here we find it necessary
* because it makes the short-circuit case in MultiExecBitmapAnd() more * to be even stricter: we'll reject a partial index if any of its
* likely to apply. * predicate clauses are implied by the set of WHERE clauses and predicate
* clauses used so far. This covers cases such as a condition "x = 42"
* used with a plain index, followed by a clauseless scan of a partial
* index "WHERE x >= 40 AND x < 50". The partial index has been accepted
* only because "x = 42" was present, and so allowing it would partially
* double-count selectivity. (We could use predicate_implied_by on
* regular qual clauses too, to have a more intelligent, but much more
* expensive, check for redundancy --- but in most cases simple equality
* seems to suffice.)
*/ */
/* Convert list to array so we can apply qsort */ /*
patharray = (Path **) palloc(npaths * sizeof(Path *)); * Extract clause usage info and detect any paths that use exactly
i = 0; * the same set of clauses; keep only the cheapest-to-scan of any such
* groups. The surviving paths are put into an array for qsort'ing.
*/
pathinfoarray = (PathClauseUsage **)
palloc(npaths * sizeof(PathClauseUsage *));
clauselist = NIL;
npaths = 0;
foreach(l, paths) foreach(l, paths)
{ {
patharray[i++] = (Path *) lfirst(l); Path *ipath = (Path *) lfirst(l);
}
qsort(patharray, npaths, sizeof(Path *), bitmap_path_comparator);
paths = list_make1(patharray[0]); pathinfo = classify_index_clause_usage(ipath, &clauselist);
costsofar = bitmap_and_cost_est(root, rel, paths, outer_rel); for (i = 0; i < npaths; i++)
qualsofar = pull_indexpath_quals(patharray[0]);
lastcell = list_head(paths); /* for quick deletions */
for (i = 1; i < npaths; i++)
{
Path *newpath = patharray[i];
List *newqual;
Cost newcost;
newqual = pull_indexpath_quals(newpath);
if (lists_intersect_ptr(newqual, qualsofar))
continue; /* consider it redundant */
/* tentatively add newpath to paths, so we can estimate cost */
paths = lappend(paths, newpath);
newcost = bitmap_and_cost_est(root, rel, paths, outer_rel);
if (newcost < costsofar)
{ {
/* keep newpath in paths, update subsidiary variables */ if (bms_equal(pathinfo->clauseids, pathinfoarray[i]->clauseids))
costsofar = newcost; break;
qualsofar = list_concat(qualsofar, newqual); }
lastcell = lnext(lastcell); if (i < npaths)
{
/* duplicate clauseids, keep the cheaper one */
Cost ncost;
Cost ocost;
Selectivity nselec;
Selectivity oselec;
cost_bitmap_tree_node(pathinfo->path, &ncost, &nselec);
cost_bitmap_tree_node(pathinfoarray[i]->path, &ocost, &oselec);
if (ncost < ocost)
pathinfoarray[i] = pathinfo;
} }
else else
{ {
/* reject newpath, remove it from paths list */ /* not duplicate clauseids, add to array */
paths = list_delete_cell(paths, lnext(lastcell), lastcell); pathinfoarray[npaths++] = pathinfo;
} }
Assert(lnext(lastcell) == NULL);
} }
if (list_length(paths) == 1) /* If only one surviving path, we're done */
return (Path *) linitial(paths); /* no need for AND */ if (npaths == 1)
return (Path *) create_bitmap_and_path(root, rel, paths); return pathinfoarray[0]->path;
/* Sort the surviving paths by index access cost */
qsort(pathinfoarray, npaths, sizeof(PathClauseUsage *),
path_usage_comparator);
/*
* For each surviving index, consider it as an "AND group leader", and
* see whether adding on any of the later indexes results in an AND path
* with cheaper total cost than before. Then take the cheapest AND group.
*/
for (i = 0; i < npaths; i++)
{
Cost costsofar;
List *qualsofar;
Bitmapset *clauseidsofar;
ListCell *lastcell;
pathinfo = pathinfoarray[i];
paths = list_make1(pathinfo->path);
costsofar = bitmap_scan_cost_est(root, rel, pathinfo->path, outer_rel);
qualsofar = list_concat(list_copy(pathinfo->quals),
list_copy(pathinfo->preds));
clauseidsofar = bms_copy(pathinfo->clauseids);
lastcell = list_head(paths); /* for quick deletions */
for (j = i+1; j < npaths; j++)
{
Cost newcost;
pathinfo = pathinfoarray[j];
/* Check for redundancy */
if (bms_overlap(pathinfo->clauseids, clauseidsofar))
continue; /* consider it redundant */
if (pathinfo->preds)
{
bool redundant = false;
/* we check each predicate clause separately */
foreach(l, pathinfo->preds)
{
Node *np = (Node *) lfirst(l);
if (predicate_implied_by(list_make1(np), qualsofar))
{
redundant = true;
break; /* out of inner foreach loop */
}
}
if (redundant)
continue;
}
/* tentatively add new path to paths, so we can estimate cost */
paths = lappend(paths, pathinfo->path);
newcost = bitmap_and_cost_est(root, rel, paths, outer_rel);
if (newcost < costsofar)
{
/* keep new path in paths, update subsidiary variables */
costsofar = newcost;
qualsofar = list_concat(qualsofar,
list_copy(pathinfo->quals));
qualsofar = list_concat(qualsofar,
list_copy(pathinfo->preds));
clauseidsofar = bms_add_members(clauseidsofar,
pathinfo->clauseids);
lastcell = lnext(lastcell);
}
else
{
/* reject new path, remove it from paths list */
paths = list_delete_cell(paths, lnext(lastcell), lastcell);
}
Assert(lnext(lastcell) == NULL);
}
/* Keep the cheapest AND-group (or singleton) */
if (i == 0 || costsofar < bestcost)
{
bestpaths = paths;
bestcost = costsofar;
}
/* some easy cleanup (we don't try real hard though) */
list_free(qualsofar);
}
if (list_length(bestpaths) == 1)
return (Path *) linitial(bestpaths); /* no need for AND */
return (Path *) create_bitmap_and_path(root, rel, bestpaths);
} }
/* qsort comparator to sort in increasing selectivity order */ /* qsort comparator to sort in increasing index access cost order */
static int static int
bitmap_path_comparator(const void *a, const void *b) path_usage_comparator(const void *a, const void *b)
{ {
Path *pa = *(Path *const *) a; PathClauseUsage *pa = *(PathClauseUsage *const *) a;
Path *pb = *(Path *const *) b; PathClauseUsage *pb = *(PathClauseUsage *const *) b;
Cost acost; Cost acost;
Cost bcost; Cost bcost;
Selectivity aselec; Selectivity aselec;
Selectivity bselec; Selectivity bselec;
cost_bitmap_tree_node(pa, &acost, &aselec); cost_bitmap_tree_node(pa->path, &acost, &aselec);
cost_bitmap_tree_node(pb, &bcost, &bselec); cost_bitmap_tree_node(pb->path, &bcost, &bselec);
/* /*
* If selectivities are the same, sort by cost. (Note: there used to be * If costs are the same, sort by selectivity.
* logic here to do "fuzzy comparison", but that's a bad idea because it
* fails to be transitive, which will confuse qsort terribly.)
*/ */
if (aselec < bselec)
return -1;
if (aselec > bselec)
return 1;
if (acost < bcost) if (acost < bcost)
return -1; return -1;
if (acost > bcost) if (acost > bcost)
return 1; return 1;
if (aselec < bselec)
return -1;
if (aselec > bselec)
return 1;
return 0; return 0;
} }
/* /*
* Estimate the cost of actually executing a BitmapAnd with the given * Estimate the cost of actually executing a bitmap scan with a single
* index path (no BitmapAnd, at least not at this level).
*/
static Cost
bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel,
Path *ipath, RelOptInfo *outer_rel)
{
Path bpath;
cost_bitmap_heap_scan(&bpath, root, rel, ipath, outer_rel);
return bpath.total_cost;
}
/*
* Estimate the cost of actually executing a BitmapAnd scan with the given
* inputs. * inputs.
*/ */
static Cost static Cost
@ -723,90 +849,134 @@ bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel,
return bpath.total_cost; return bpath.total_cost;
} }
/* /*
* pull_indexpath_quals * classify_index_clause_usage
* Construct a PathClauseUsage struct describing the WHERE clauses and
* index predicate clauses used by the given indexscan path.
* We consider two clauses the same if they are equal().
* *
* Given the Path structure for a plain or bitmap indexscan, extract a list * At some point we might want to migrate this info into the Path data
* of all the indexquals and index predicate conditions used in the Path. * structure proper, but for the moment it's only needed within
* choose_bitmap_and().
* *
* This is sort of a simplified version of make_restrictinfo_from_bitmapqual; * *clauselist is used and expanded as needed to identify all the distinct
* here, we are not trying to produce an accurate representation of the AND/OR * clauses seen across successive calls. Caller must initialize it to NIL
* semantics of the Path, but just find out all the base conditions used. * before first call of a set.
*
* The result list contains pointers to the expressions used in the Path,
* but all the list cells are freshly built, so it's safe to destructively
* modify the list (eg, by concat'ing it with other lists).
*/ */
static List * static PathClauseUsage *
pull_indexpath_quals(Path *bitmapqual) classify_index_clause_usage(Path *path, List **clauselist)
{ {
List *result = NIL; PathClauseUsage *result;
ListCell *l; Bitmapset *clauseids;
ListCell *lc;
if (IsA(bitmapqual, BitmapAndPath)) result = (PathClauseUsage *) palloc(sizeof(PathClauseUsage));
result->path = path;
/* Recursively find the quals and preds used by the path */
result->quals = NIL;
result->preds = NIL;
find_indexpath_quals(path, &result->quals, &result->preds);
/* Build up a bitmapset representing the quals and preds */
clauseids = NULL;
foreach(lc, result->quals)
{ {
BitmapAndPath *apath = (BitmapAndPath *) bitmapqual; Node *node = (Node *) lfirst(lc);
foreach(l, apath->bitmapquals) clauseids = bms_add_member(clauseids,
{ find_list_position(node, clauselist));
List *sublist;
sublist = pull_indexpath_quals((Path *) lfirst(l));
result = list_concat(result, sublist);
}
} }
else if (IsA(bitmapqual, BitmapOrPath)) foreach(lc, result->preds)
{ {
BitmapOrPath *opath = (BitmapOrPath *) bitmapqual; Node *node = (Node *) lfirst(lc);
foreach(l, opath->bitmapquals) clauseids = bms_add_member(clauseids,
{ find_list_position(node, clauselist));
List *sublist;
sublist = pull_indexpath_quals((Path *) lfirst(l));
result = list_concat(result, sublist);
}
} }
else if (IsA(bitmapqual, IndexPath)) result->clauseids = clauseids;
{
IndexPath *ipath = (IndexPath *) bitmapqual;
result = get_actual_clauses(ipath->indexclauses);
result = list_concat(result, list_copy(ipath->indexinfo->indpred));
}
else
elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual));
return result; return result;
} }
/* /*
* lists_intersect_ptr * find_indexpath_quals
* Detect whether two lists have a nonempty intersection,
* using pointer equality to compare members.
* *
* This possibly should go into list.c, but it doesn't yet have any use * Given the Path structure for a plain or bitmap indexscan, extract lists
* except in choose_bitmap_and. * of all the indexquals and index predicate conditions used in the Path.
* These are appended to the initial contents of *quals and *preds (hence
* caller should initialize those to NIL).
*
* This is sort of a simplified version of make_restrictinfo_from_bitmapqual;
* here, we are not trying to produce an accurate representation of the AND/OR
* semantics of the Path, but just find out all the base conditions used.
*
* The result lists contain pointers to the expressions used in the Path,
* but all the list cells are freshly built, so it's safe to destructively
* modify the lists (eg, by concat'ing with other lists).
*/ */
static bool static void
lists_intersect_ptr(List *list1, List *list2) find_indexpath_quals(Path *bitmapqual, List **quals, List **preds)
{ {
ListCell *cell1; if (IsA(bitmapqual, BitmapAndPath))
foreach(cell1, list1)
{ {
void *datum1 = lfirst(cell1); BitmapAndPath *apath = (BitmapAndPath *) bitmapqual;
ListCell *cell2; ListCell *l;
foreach(cell2, list2) foreach(l, apath->bitmapquals)
{ {
if (lfirst(cell2) == datum1) find_indexpath_quals((Path *) lfirst(l), quals, preds);
return true;
} }
} }
else if (IsA(bitmapqual, BitmapOrPath))
{
BitmapOrPath *opath = (BitmapOrPath *) bitmapqual;
ListCell *l;
return false; foreach(l, opath->bitmapquals)
{
find_indexpath_quals((Path *) lfirst(l), quals, preds);
}
}
else if (IsA(bitmapqual, IndexPath))
{
IndexPath *ipath = (IndexPath *) bitmapqual;
*quals = list_concat(*quals, get_actual_clauses(ipath->indexclauses));
*preds = list_concat(*preds, list_copy(ipath->indexinfo->indpred));
}
else
elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual));
}
/*
* find_list_position
* Return the given node's position (counting from 0) in the given
* list of nodes. If it's not equal() to any existing list member,
* add it at the end, and return that position.
*/
static int
find_list_position(Node *node, List **nodelist)
{
int i;
ListCell *lc;
i = 0;
foreach(lc, *nodelist)
{
Node *oldnode = (Node *) lfirst(lc);
if (equal(node, oldnode))
return i;
i++;
}
*nodelist = lappend(*nodelist, node);
return i;
} }