diff --git a/src/backend/optimizer/path/indxpath.c b/src/backend/optimizer/path/indxpath.c index 243888b58db..268aa925fc4 100644 --- a/src/backend/optimizer/path/indxpath.c +++ b/src/backend/optimizer/path/indxpath.c @@ -9,7 +9,7 @@ * * * IDENTIFICATION - * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.220 2007/04/15 20:09:28 tgl Exp $ + * $PostgreSQL: pgsql/src/backend/optimizer/path/indxpath.c,v 1.221 2007/04/17 20:03:03 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -45,6 +45,16 @@ ((opfamily) == BOOL_BTREE_FAM_OID || (opfamily) == BOOL_HASH_FAM_OID) +/* Per-path data used within choose_bitmap_and() */ +typedef struct +{ + Path *path; /* IndexPath, BitmapAndPath, or BitmapOrPath */ + List *quals; /* the WHERE clauses it uses */ + List *preds; /* predicates of its partial index(es) */ + Bitmapset *clauseids; /* quals+preds represented as a bitmapset */ +} PathClauseUsage; + + static List *find_usable_indexes(PlannerInfo *root, RelOptInfo *rel, List *clauses, List *outer_clauses, bool istoplevel, RelOptInfo *outer_rel, @@ -54,11 +64,15 @@ static List *find_saop_paths(PlannerInfo *root, RelOptInfo *rel, bool istoplevel, RelOptInfo *outer_rel); static Path *choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, List *paths, RelOptInfo *outer_rel); -static int bitmap_path_comparator(const void *a, const void *b); +static int path_usage_comparator(const void *a, const void *b); +static Cost bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel, + Path *ipath, RelOptInfo *outer_rel); static Cost bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, List *paths, RelOptInfo *outer_rel); +static PathClauseUsage *classify_index_clause_usage(Path *path, + List **clauselist); static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds); -static bool lists_intersect(List *list1, List *list2); +static int find_list_position(Node *node, List **nodelist); static bool match_clause_to_indexcol(IndexOptInfo *index, int indexcol, Oid opfamily, RestrictInfo *rinfo, @@ -562,12 +576,12 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, List *paths, RelOptInfo *outer_rel) { int npaths = list_length(paths); - Path **patharray; - Cost costsofar; - List *qualsofar; - List *firstpred; - ListCell *lastcell; - int i; + PathClauseUsage **pathinfoarray; + PathClauseUsage *pathinfo; + List *clauselist; + List *bestpaths = NIL; + Cost bestcost = 0; + int i, j; ListCell *l; Assert(npaths > 0); /* else caller error */ @@ -578,156 +592,231 @@ choose_bitmap_and(PlannerInfo *root, RelOptInfo *rel, * In theory we should consider every nonempty subset of the given paths. * In practice that seems like overkill, given the crude nature of the * estimates, not to mention the possible effects of higher-level AND and - * OR clauses. As a compromise, we sort the paths by selectivity. We - * always take the first, and sequentially add on paths that result in a - * lower estimated cost. + * OR clauses. Moreover, it's completely impractical if there are a large + * number of paths, since the work would grow as O(2^N). * - * We also make some effort to detect directly redundant input paths, as - * can happen if there are multiple possibly usable indexes. (Another way - * it can happen is that best_inner_indexscan will find the same OR join - * clauses that create_or_index_quals has pulled OR restriction clauses - * out of, and then both versions show up as duplicate paths.) We - * consider an index redundant if any of its index conditions were already - * used by earlier indexes. (We could use predicate_implied_by to have a - * more intelligent, but much more expensive, check --- but in most cases - * simple equality should suffice.) + * As a heuristic, we first check for paths using exactly the same + * sets of WHERE clauses + index predicate conditions, and reject all + * but the cheapest-to-scan in any such group. This primarily gets rid + * of indexes that include the interesting columns but also irrelevant + * columns. (In situations where the DBA has gone overboard on creating + * variant indexes, this can make for a very large reduction in the number + * of paths considered further.) * - * You might think the condition for redundancy should be "all index - * conditions already used", not "any", but this turns out to be wrong. - * For example, if we use an index on A, and then come to an index with - * conditions on A and B, the only way that the second index can be later - * in the selectivity-order sort is if the condition on B is completely - * non-selective. In any case, we'd surely be drastically misestimating - * the selectivity if we count the same condition twice. + * We then sort the surviving paths with the cheapest-to-scan first, + * and for each path, consider using that path alone as the basis for + * a bitmap scan. Then we consider bitmap AND scans formed from that + * path plus each subsequent (higher-cost) path, adding on a subsequent + * path if it results in a reduction in the estimated total scan cost. + * This means we consider about O(N^2) rather than O(2^N) path + * combinations, which is quite tolerable, especially given than N is + * usually reasonably small because of the prefiltering step. The + * cheapest of these is returned. * - * We must also consider index predicate conditions in checking for - * redundancy, because the estimated selectivity of a partial index - * includes its predicate even if the explicit index conditions don't. - * Here we have to work harder than just checking expression equality: - * we check to see if any of the predicate clauses are implied by - * index conditions or predicate clauses of previous paths. This covers - * cases such as a condition "x = 42" used with a plain index, followed - * by a clauseless scan of a partial index "WHERE x >= 40 AND x < 50". - * Also, we reject indexes that have a qual condition matching any - * previously-used index's predicate (by including predicate conditions - * into qualsofar). It should be sufficient to check equality in this - * case, not implication, since we've sorted the paths by selectivity - * and so tighter conditions are seen first --- only for exactly equal - * cases might the partial index come first. + * We will only consider AND combinations in which no two indexes use + * the same WHERE clause. This is a bit of a kluge: it's needed because + * costsize.c and clausesel.c aren't very smart about redundant clauses. + * They will usually double-count the redundant clauses, producing a + * too-small selectivity that makes a redundant AND step look like it + * reduces the total cost. Perhaps someday that code will be smarter and + * we can remove this limitation. (But note that this also defends + * against flat-out duplicate input paths, which can happen because + * best_inner_indexscan will find the same OR join clauses that + * create_or_index_quals has pulled OR restriction clauses out of.) * - * XXX the reason we need all these redundancy checks is that costsize.c - * and clausesel.c aren't very smart about redundant clauses: they will - * usually double-count the redundant clauses, producing a too-small - * selectivity that makes a redundant AND look like it reduces the total - * cost. Perhaps someday that code will be smarter and we can remove - * these heuristics. - * - * Note: outputting the selected sub-paths in selectivity order is a good - * thing even if we weren't using that as part of the selection method, - * because it makes the short-circuit case in MultiExecBitmapAnd() more - * likely to apply. + * For the same reason, we reject AND combinations in which an index + * predicate clause duplicates another clause. Here we find it necessary + * to be even stricter: we'll reject a partial index if any of its + * predicate clauses are implied by the set of WHERE clauses and predicate + * clauses used so far. This covers cases such as a condition "x = 42" + * used with a plain index, followed by a clauseless scan of a partial + * index "WHERE x >= 40 AND x < 50". The partial index has been accepted + * only because "x = 42" was present, and so allowing it would partially + * double-count selectivity. (We could use predicate_implied_by on + * regular qual clauses too, to have a more intelligent, but much more + * expensive, check for redundancy --- but in most cases simple equality + * seems to suffice.) */ - /* Convert list to array so we can apply qsort */ - patharray = (Path **) palloc(npaths * sizeof(Path *)); - i = 0; + /* + * Extract clause usage info and detect any paths that use exactly + * the same set of clauses; keep only the cheapest-to-scan of any such + * groups. The surviving paths are put into an array for qsort'ing. + */ + pathinfoarray = (PathClauseUsage **) + palloc(npaths * sizeof(PathClauseUsage *)); + clauselist = NIL; + npaths = 0; foreach(l, paths) { - patharray[i++] = (Path *) lfirst(l); - } - qsort(patharray, npaths, sizeof(Path *), bitmap_path_comparator); + Path *ipath = (Path *) lfirst(l); - paths = list_make1(patharray[0]); - costsofar = bitmap_and_cost_est(root, rel, paths, outer_rel); - find_indexpath_quals(patharray[0], &qualsofar, &firstpred); - qualsofar = list_concat(qualsofar, firstpred); - lastcell = list_head(paths); /* for quick deletions */ - - for (i = 1; i < npaths; i++) - { - Path *newpath = patharray[i]; - List *newqual; - List *newpred; - Cost newcost; - - find_indexpath_quals(newpath, &newqual, &newpred); - if (lists_intersect(newqual, qualsofar)) - continue; /* consider it redundant */ - if (newpred) + pathinfo = classify_index_clause_usage(ipath, &clauselist); + for (i = 0; i < npaths; i++) { - bool redundant = false; - - /* we check each predicate clause separately */ - foreach(l, newpred) - { - Node *np = (Node *) lfirst(l); - - if (predicate_implied_by(list_make1(np), qualsofar)) - { - redundant = true; - break; /* out of inner loop */ - } - } - if (redundant) - continue; + if (bms_equal(pathinfo->clauseids, pathinfoarray[i]->clauseids)) + break; } - /* tentatively add newpath to paths, so we can estimate cost */ - paths = lappend(paths, newpath); - newcost = bitmap_and_cost_est(root, rel, paths, outer_rel); - if (newcost < costsofar) + if (i < npaths) { - /* keep newpath in paths, update subsidiary variables */ - costsofar = newcost; - qualsofar = list_concat(list_concat(qualsofar, newqual), newpred); - lastcell = lnext(lastcell); + /* duplicate clauseids, keep the cheaper one */ + Cost ncost; + Cost ocost; + Selectivity nselec; + Selectivity oselec; + + cost_bitmap_tree_node(pathinfo->path, &ncost, &nselec); + cost_bitmap_tree_node(pathinfoarray[i]->path, &ocost, &oselec); + if (ncost < ocost) + pathinfoarray[i] = pathinfo; } else { - /* reject newpath, remove it from paths list */ - paths = list_delete_cell(paths, lnext(lastcell), lastcell); + /* not duplicate clauseids, add to array */ + pathinfoarray[npaths++] = pathinfo; } - Assert(lnext(lastcell) == NULL); } - if (list_length(paths) == 1) - return (Path *) linitial(paths); /* no need for AND */ - return (Path *) create_bitmap_and_path(root, rel, paths); + /* If only one surviving path, we're done */ + if (npaths == 1) + return pathinfoarray[0]->path; + + /* Sort the surviving paths by index access cost */ + qsort(pathinfoarray, npaths, sizeof(PathClauseUsage *), + path_usage_comparator); + + /* + * For each surviving index, consider it as an "AND group leader", and + * see whether adding on any of the later indexes results in an AND path + * with cheaper total cost than before. Then take the cheapest AND group. + */ + for (i = 0; i < npaths; i++) + { + Cost costsofar; + List *qualsofar; + Bitmapset *clauseidsofar; + ListCell *lastcell; + + pathinfo = pathinfoarray[i]; + paths = list_make1(pathinfo->path); + costsofar = bitmap_scan_cost_est(root, rel, pathinfo->path, outer_rel); + qualsofar = list_concat(list_copy(pathinfo->quals), + list_copy(pathinfo->preds)); + clauseidsofar = bms_copy(pathinfo->clauseids); + lastcell = list_head(paths); /* for quick deletions */ + + for (j = i+1; j < npaths; j++) + { + Cost newcost; + + pathinfo = pathinfoarray[j]; + /* Check for redundancy */ + if (bms_overlap(pathinfo->clauseids, clauseidsofar)) + continue; /* consider it redundant */ + if (pathinfo->preds) + { + bool redundant = false; + + /* we check each predicate clause separately */ + foreach(l, pathinfo->preds) + { + Node *np = (Node *) lfirst(l); + + if (predicate_implied_by(list_make1(np), qualsofar)) + { + redundant = true; + break; /* out of inner foreach loop */ + } + } + if (redundant) + continue; + } + /* tentatively add new path to paths, so we can estimate cost */ + paths = lappend(paths, pathinfo->path); + newcost = bitmap_and_cost_est(root, rel, paths, outer_rel); + if (newcost < costsofar) + { + /* keep new path in paths, update subsidiary variables */ + costsofar = newcost; + qualsofar = list_concat(qualsofar, + list_copy(pathinfo->quals)); + qualsofar = list_concat(qualsofar, + list_copy(pathinfo->preds)); + clauseidsofar = bms_add_members(clauseidsofar, + pathinfo->clauseids); + lastcell = lnext(lastcell); + } + else + { + /* reject new path, remove it from paths list */ + paths = list_delete_cell(paths, lnext(lastcell), lastcell); + } + Assert(lnext(lastcell) == NULL); + } + + /* Keep the cheapest AND-group (or singleton) */ + if (i == 0 || costsofar < bestcost) + { + bestpaths = paths; + bestcost = costsofar; + } + + /* some easy cleanup (we don't try real hard though) */ + list_free(qualsofar); + } + + if (list_length(bestpaths) == 1) + return (Path *) linitial(bestpaths); /* no need for AND */ + return (Path *) create_bitmap_and_path(root, rel, bestpaths); } -/* qsort comparator to sort in increasing selectivity order */ +/* qsort comparator to sort in increasing index access cost order */ static int -bitmap_path_comparator(const void *a, const void *b) +path_usage_comparator(const void *a, const void *b) { - Path *pa = *(Path *const *) a; - Path *pb = *(Path *const *) b; + PathClauseUsage *pa = *(PathClauseUsage *const *) a; + PathClauseUsage *pb = *(PathClauseUsage *const *) b; Cost acost; Cost bcost; Selectivity aselec; Selectivity bselec; - cost_bitmap_tree_node(pa, &acost, &aselec); - cost_bitmap_tree_node(pb, &bcost, &bselec); + cost_bitmap_tree_node(pa->path, &acost, &aselec); + cost_bitmap_tree_node(pb->path, &bcost, &bselec); /* - * If selectivities are the same, sort by cost. (Note: there used to be - * logic here to do "fuzzy comparison", but that's a bad idea because it - * fails to be transitive, which will confuse qsort terribly.) + * If costs are the same, sort by selectivity. */ - if (aselec < bselec) - return -1; - if (aselec > bselec) - return 1; - if (acost < bcost) return -1; if (acost > bcost) return 1; + if (aselec < bselec) + return -1; + if (aselec > bselec) + return 1; + return 0; } /* - * Estimate the cost of actually executing a BitmapAnd with the given + * Estimate the cost of actually executing a bitmap scan with a single + * index path (no BitmapAnd, at least not at this level). + */ +static Cost +bitmap_scan_cost_est(PlannerInfo *root, RelOptInfo *rel, + Path *ipath, RelOptInfo *outer_rel) +{ + Path bpath; + + cost_bitmap_heap_scan(&bpath, root, rel, ipath, outer_rel); + + return bpath.total_cost; +} + +/* + * Estimate the cost of actually executing a BitmapAnd scan with the given * inputs. */ static Cost @@ -749,11 +838,65 @@ bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, return bpath.total_cost; } + +/* + * classify_index_clause_usage + * Construct a PathClauseUsage struct describing the WHERE clauses and + * index predicate clauses used by the given indexscan path. + * We consider two clauses the same if they are equal(). + * + * At some point we might want to migrate this info into the Path data + * structure proper, but for the moment it's only needed within + * choose_bitmap_and(). + * + * *clauselist is used and expanded as needed to identify all the distinct + * clauses seen across successive calls. Caller must initialize it to NIL + * before first call of a set. + */ +static PathClauseUsage * +classify_index_clause_usage(Path *path, List **clauselist) +{ + PathClauseUsage *result; + Bitmapset *clauseids; + ListCell *lc; + + result = (PathClauseUsage *) palloc(sizeof(PathClauseUsage)); + result->path = path; + + /* Recursively find the quals and preds used by the path */ + result->quals = NIL; + result->preds = NIL; + find_indexpath_quals(path, &result->quals, &result->preds); + + /* Build up a bitmapset representing the quals and preds */ + clauseids = NULL; + foreach(lc, result->quals) + { + Node *node = (Node *) lfirst(lc); + + clauseids = bms_add_member(clauseids, + find_list_position(node, clauselist)); + } + foreach(lc, result->preds) + { + Node *node = (Node *) lfirst(lc); + + clauseids = bms_add_member(clauseids, + find_list_position(node, clauselist)); + } + result->clauseids = clauseids; + + return result; +} + + /* * find_indexpath_quals * * Given the Path structure for a plain or bitmap indexscan, extract lists * of all the indexquals and index predicate conditions used in the Path. + * These are appended to the initial contents of *quals and *preds (hence + * caller should initialize those to NIL). * * This is sort of a simplified version of make_restrictinfo_from_bitmapqual; * here, we are not trying to produce an accurate representation of the AND/OR @@ -766,45 +909,32 @@ bitmap_and_cost_est(PlannerInfo *root, RelOptInfo *rel, static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds) { - ListCell *l; - - *quals = NIL; - *preds = NIL; - if (IsA(bitmapqual, BitmapAndPath)) { BitmapAndPath *apath = (BitmapAndPath *) bitmapqual; + ListCell *l; foreach(l, apath->bitmapquals) { - List *subquals; - List *subpreds; - - find_indexpath_quals((Path *) lfirst(l), &subquals, &subpreds); - *quals = list_concat(*quals, subquals); - *preds = list_concat(*preds, subpreds); + find_indexpath_quals((Path *) lfirst(l), quals, preds); } } else if (IsA(bitmapqual, BitmapOrPath)) { BitmapOrPath *opath = (BitmapOrPath *) bitmapqual; + ListCell *l; foreach(l, opath->bitmapquals) { - List *subquals; - List *subpreds; - - find_indexpath_quals((Path *) lfirst(l), &subquals, &subpreds); - *quals = list_concat(*quals, subquals); - *preds = list_concat(*preds, subpreds); + find_indexpath_quals((Path *) lfirst(l), quals, preds); } } else if (IsA(bitmapqual, IndexPath)) { IndexPath *ipath = (IndexPath *) bitmapqual; - *quals = get_actual_clauses(ipath->indexclauses); - *preds = list_copy(ipath->indexinfo->indpred); + *quals = list_concat(*quals, get_actual_clauses(ipath->indexclauses)); + *preds = list_concat(*preds, list_copy(ipath->indexinfo->indpred)); } else elog(ERROR, "unrecognized node type: %d", nodeTag(bitmapqual)); @@ -812,31 +942,30 @@ find_indexpath_quals(Path *bitmapqual, List **quals, List **preds) /* - * lists_intersect - * Detect whether two lists have a nonempty intersection, - * using equal() to compare members. - * - * This possibly should go into list.c, but it doesn't yet have any use - * except in choose_bitmap_and. + * find_list_position + * Return the given node's position (counting from 0) in the given + * list of nodes. If it's not equal() to any existing list member, + * add it at the end, and return that position. */ -static bool -lists_intersect(List *list1, List *list2) +static int +find_list_position(Node *node, List **nodelist) { - ListCell *cell1; + int i; + ListCell *lc; - foreach(cell1, list1) + i = 0; + foreach(lc, *nodelist) { - void *datum1 = lfirst(cell1); - ListCell *cell2; + Node *oldnode = (Node *) lfirst(lc); - foreach(cell2, list2) - { - if (equal(lfirst(cell2), datum1)) - return true; - } + if (equal(node, oldnode)) + return i; + i++; } - return false; + *nodelist = lappend(*nodelist, node); + + return i; }