1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-12 21:01:52 +03:00

Support index-only scans using the visibility map to avoid heap fetches.

When a btree index contains all columns required by the query, and the
visibility map shows that all tuples on a target heap page are
visible-to-all, we don't need to fetch that heap page.  This patch depends
on the previous patches that made the visibility map reliable.

There's a fair amount left to do here, notably trying to figure out a less
chintzy way of estimating the cost of an index-only scan, but the core
functionality seems ready to commit.

Robert Haas and Ibrar Ahmed, with some previous work by Heikki Linnakangas.
This commit is contained in:
Tom Lane
2011-10-07 20:13:02 -04:00
parent caa1054df8
commit a2822fb933
34 changed files with 704 additions and 203 deletions

View File

@ -110,6 +110,7 @@ Cost disable_cost = 1.0e10;
bool enable_seqscan = true;
bool enable_indexscan = true;
bool enable_indexonlyscan = true;
bool enable_bitmapscan = true;
bool enable_tidscan = true;
bool enable_sort = true;
@ -119,6 +120,9 @@ bool enable_material = true;
bool enable_mergejoin = true;
bool enable_hashjoin = true;
/* Possibly this should become a GUC too */
static double visibility_fraction = 0.9;
typedef struct
{
PlannerInfo *root;
@ -210,6 +214,7 @@ cost_seqscan(Path *path, PlannerInfo *root,
* 'index' is the index to be used
* 'indexQuals' is the list of applicable qual clauses (implicit AND semantics)
* 'indexOrderBys' is the list of ORDER BY operators for amcanorderbyop indexes
* 'indexonly' is true if it's an index-only scan
* 'outer_rel' is the outer relation when we are considering using the index
* scan as the inside of a nestloop join (hence, some of the indexQuals
* are join clauses, and we should expect repeated scans of the index);
@ -232,6 +237,7 @@ cost_index(IndexPath *path, PlannerInfo *root,
IndexOptInfo *index,
List *indexQuals,
List *indexOrderBys,
bool indexonly,
RelOptInfo *outer_rel)
{
RelOptInfo *baserel = index->rel;
@ -314,6 +320,12 @@ cost_index(IndexPath *path, PlannerInfo *root,
* For partially-correlated indexes, we ought to charge somewhere between
* these two estimates. We currently interpolate linearly between the
* estimates based on the correlation squared (XXX is that appropriate?).
*
* If it's an index-only scan, then we will not need to fetch any heap
* pages for which the visibility map shows all tuples are visible.
* Unfortunately, we have no stats as to how much of the heap is
* all-visible, and that's likely to be a rather unstable number anyway.
* We use an arbitrary constant visibility_fraction to estimate this.
*----------
*/
if (outer_rel != NULL && outer_rel->rows > 1)
@ -333,6 +345,8 @@ cost_index(IndexPath *path, PlannerInfo *root,
(double) index->pages,
root);
pages_fetched = ceil(pages_fetched * visibility_fraction);
max_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans;
/*
@ -352,6 +366,8 @@ cost_index(IndexPath *path, PlannerInfo *root,
(double) index->pages,
root);
pages_fetched = ceil(pages_fetched * visibility_fraction);
min_IO_cost = (pages_fetched * spc_random_page_cost) / num_scans;
}
else
@ -365,11 +381,16 @@ cost_index(IndexPath *path, PlannerInfo *root,
(double) index->pages,
root);
pages_fetched = ceil(pages_fetched * visibility_fraction);
/* max_IO_cost is for the perfectly uncorrelated case (csquared=0) */
max_IO_cost = pages_fetched * spc_random_page_cost;
/* min_IO_cost is for the perfectly correlated case (csquared=1) */
pages_fetched = ceil(indexSelectivity * (double) baserel->pages);
pages_fetched = ceil(pages_fetched * visibility_fraction);
min_IO_cost = spc_random_page_cost;
if (pages_fetched > 1)
min_IO_cost += (pages_fetched - 1) * spc_seq_page_cost;

View File

@ -18,6 +18,7 @@
#include <math.h>
#include "access/skey.h"
#include "access/sysattr.h"
#include "catalog/pg_am.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_operator.h"
@ -88,6 +89,7 @@ static PathClauseUsage *classify_index_clause_usage(Path *path,
List **clauselist);
static void find_indexpath_quals(Path *bitmapqual, List **quals, List **preds);
static int find_list_position(Node *node, List **nodelist);
static bool check_index_only(RelOptInfo *rel, IndexOptInfo *index);
static List *group_clauses_by_indexkey(IndexOptInfo *index,
List *clauses, List *outer_clauses,
Relids outer_relids,
@ -314,6 +316,8 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
bool useful_predicate;
bool found_clause;
bool index_is_ordered;
bool index_only_scan = false;
bool checked_index_only = false;
/*
* Check that index supports the desired scan type(s)
@ -438,6 +442,10 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
*/
if (found_clause || useful_pathkeys != NIL || useful_predicate)
{
/* First, detect whether index-only scan is possible */
index_only_scan = check_index_only(rel, index);
checked_index_only = true;
ipath = create_index_path(root, index,
restrictclauses,
orderbyclauses,
@ -445,6 +453,7 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
index_is_ordered ?
ForwardScanDirection :
NoMovementScanDirection,
index_only_scan,
outer_rel);
result = lappend(result, ipath);
}
@ -462,11 +471,15 @@ find_usable_indexes(PlannerInfo *root, RelOptInfo *rel,
index_pathkeys);
if (useful_pathkeys != NIL)
{
if (!checked_index_only)
index_only_scan = check_index_only(rel, index);
ipath = create_index_path(root, index,
restrictclauses,
NIL,
useful_pathkeys,
BackwardScanDirection,
index_only_scan,
outer_rel);
result = lappend(result, ipath);
}
@ -1040,6 +1053,82 @@ find_list_position(Node *node, List **nodelist)
}
/*
* check_index_only
* Determine whether an index-only scan is possible for this index.
*/
static bool
check_index_only(RelOptInfo *rel, IndexOptInfo *index)
{
bool result;
Bitmapset *attrs_used = NULL;
Bitmapset *index_attrs = NULL;
ListCell *lc;
int i;
/* Index-only scans must be enabled, and AM must be capable of it */
if (!enable_indexonlyscan)
return false;
if (!index->amcanreturn)
return false;
/*
* Check that all needed attributes of the relation are available from
* the index.
*
* XXX this is overly conservative for partial indexes, since we will
* consider attributes involved in the index predicate as required even
* though the predicate won't need to be checked at runtime. (The same
* is true for attributes used only in index quals, if we are certain
* that the index is not lossy.) However, it would be quite expensive
* to determine that accurately at this point, so for now we take the
* easy way out.
*/
/*
* Add all the attributes needed for joins or final output. Note: we must
* look at reltargetlist, not the attr_needed data, because attr_needed
* isn't computed for inheritance child rels.
*/
pull_varattnos((Node *) rel->reltargetlist, rel->relid, &attrs_used);
/* Add all the attributes used by restriction clauses. */
foreach(lc, rel->baserestrictinfo)
{
RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
pull_varattnos((Node *) rinfo->clause, rel->relid, &attrs_used);
}
/* Construct a bitmapset of columns stored in the index. */
for (i = 0; i < index->ncolumns; i++)
{
int attno = index->indexkeys[i];
/*
* For the moment, we just ignore index expressions. It might be nice
* to do something with them, later. We also ignore index columns
* that are system columns (such as OID), because the virtual-tuple
* coding used by IndexStoreHeapTuple() can't deal with them.
*/
if (attno <= 0)
continue;
index_attrs =
bms_add_member(index_attrs,
attno - FirstLowInvalidHeapAttributeNumber);
}
/* Do we have all the necessary attributes? */
result = bms_is_subset(attrs_used, index_attrs);
bms_free(attrs_used);
bms_free(index_attrs);
return result;
}
/****************************************************************************
* ---- ROUTINES TO CHECK RESTRICTIONS ----
****************************************************************************/