1
0
mirror of https://github.com/postgres/postgres.git synced 2025-09-02 04:21:28 +03:00

Support index-only scans using the visibility map to avoid heap fetches.

When a btree index contains all columns required by the query, and the
visibility map shows that all tuples on a target heap page are
visible-to-all, we don't need to fetch that heap page.  This patch depends
on the previous patches that made the visibility map reliable.

There's a fair amount left to do here, notably trying to figure out a less
chintzy way of estimating the cost of an index-only scan, but the core
functionality seems ready to commit.

Robert Haas and Ibrar Ahmed, with some previous work by Heikki Linnakangas.
This commit is contained in:
Tom Lane
2011-10-07 20:13:02 -04:00
parent caa1054df8
commit a2822fb933
34 changed files with 704 additions and 203 deletions

View File

@@ -93,6 +93,8 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
else
scan->orderByData = NULL;
scan->xs_want_itup = false; /* may be set later */
/*
* During recovery we ignore killed tuples and don't bother to kill them
* either. We do this because the xmin on the primary node could easily be
@@ -109,6 +111,8 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
scan->opaque = NULL;
scan->xs_itup = NULL;
ItemPointerSetInvalid(&scan->xs_ctup.t_self);
scan->xs_ctup.t_data = NULL;
scan->xs_cbuf = InvalidBuffer;

View File

@@ -20,7 +20,9 @@
* index_insert - insert an index tuple into a relation
* index_markpos - mark a scan position
* index_restrpos - restore a scan position
* index_getnext - get the next tuple from a scan
* index_getnext_tid - get the next TID from a scan
* index_fetch_heap - get the scan's next heap tuple
* index_getnext - get the next heap tuple from a scan
* index_getbitmap - get all tuples from a scan
* index_bulk_delete - bulk deletion of index tuples
* index_vacuum_cleanup - post-deletion cleanup of an index
@@ -422,13 +424,143 @@ index_restrpos(IndexScanDesc scan)
FunctionCall1(procedure, PointerGetDatum(scan));
}
/* ----------------
* index_getnext_tid - get the next TID from a scan
*
* The result is the next TID satisfying the scan keys,
* or NULL if no more matching tuples exist.
* ----------------
*/
ItemPointer
index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
{
FmgrInfo *procedure;
bool found;
SCAN_CHECKS;
GET_SCAN_PROCEDURE(amgettuple);
Assert(TransactionIdIsValid(RecentGlobalXmin));
/*
* The AM's gettuple proc finds the next index entry matching the scan
* keys, and puts the TID in xs_ctup.t_self. It should also set
* scan->xs_recheck, though we pay no attention to that here.
*/
found = DatumGetBool(FunctionCall2(procedure,
PointerGetDatum(scan),
Int32GetDatum(direction)));
/* Reset kill flag immediately for safety */
scan->kill_prior_tuple = false;
/* If we're out of index entries, we're done */
if (!found)
{
/* ... but first, release any held pin on a heap page */
if (BufferIsValid(scan->xs_cbuf))
{
ReleaseBuffer(scan->xs_cbuf);
scan->xs_cbuf = InvalidBuffer;
}
return NULL;
}
pgstat_count_index_tuples(scan->indexRelation, 1);
/* Return the TID of the tuple we found. */
return &scan->xs_ctup.t_self;
}
/* ----------------
* index_fetch_heap - get the scan's next heap tuple
*
* The result is a visible heap tuple associated with the index TID most
* recently fetched by index_getnext_tid, or NULL if no more matching tuples
* exist. (There can be more than one matching tuple because of HOT chains,
* although when using an MVCC snapshot it should be impossible for more than
* one such tuple to exist.)
*
* On success, the buffer containing the heap tup is pinned (the pin will be
* dropped in a future index_getnext_tid, index_fetch_heap or index_endscan
* call).
*
* Note: caller must check scan->xs_recheck, and perform rechecking of the
* scan keys if required. We do not do that here because we don't have
* enough information to do it efficiently in the general case.
* ----------------
*/
HeapTuple
index_fetch_heap(IndexScanDesc scan)
{
ItemPointer tid = &scan->xs_ctup.t_self;
bool all_dead = false;
bool got_heap_tuple;
/* We can skip the buffer-switching logic if we're in mid-HOT chain. */
if (!scan->xs_continue_hot)
{
/* Switch to correct buffer if we don't have it already */
Buffer prev_buf = scan->xs_cbuf;
scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf,
scan->heapRelation,
ItemPointerGetBlockNumber(tid));
/*
* Prune page, but only if we weren't already on this page
*/
if (prev_buf != scan->xs_cbuf)
heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf,
RecentGlobalXmin);
}
/* Obtain share-lock on the buffer so we can examine visibility */
LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation,
scan->xs_cbuf,
scan->xs_snapshot,
&scan->xs_ctup,
&all_dead,
!scan->xs_continue_hot);
LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
if (got_heap_tuple)
{
/*
* Only in a non-MVCC snapshot can more than one member of the
* HOT chain be visible.
*/
scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot);
pgstat_count_heap_fetch(scan->indexRelation);
return &scan->xs_ctup;
}
/* We've reached the end of the HOT chain. */
scan->xs_continue_hot = false;
/*
* If we scanned a whole HOT chain and found only dead tuples, tell index
* AM to kill its entry for that TID (this will take effect in the next
* amgettuple call, in index_getnext_tid). We do not do this when in
* recovery because it may violate MVCC to do so. See comments in
* RelationGetIndexScan().
*/
if (!scan->xactStartedInRecovery)
scan->kill_prior_tuple = all_dead;
return NULL;
}
/* ----------------
* index_getnext - get the next heap tuple from a scan
*
* The result is the next heap tuple satisfying the scan keys and the
* snapshot, or NULL if no more matching tuples exist. On success,
* the buffer containing the heap tuple is pinned (the pin will be dropped
* at the next index_getnext or index_endscan).
* snapshot, or NULL if no more matching tuples exist.
*
* On success, the buffer containing the heap tup is pinned (the pin will be
* dropped in a future index_getnext_tid, index_fetch_heap or index_endscan
* call).
*
* Note: caller must check scan->xs_recheck, and perform rechecking of the
* scan keys if required. We do not do that here because we don't have
@@ -438,20 +570,11 @@ index_restrpos(IndexScanDesc scan)
HeapTuple
index_getnext(IndexScanDesc scan, ScanDirection direction)
{
HeapTuple heapTuple = &scan->xs_ctup;
ItemPointer tid = &heapTuple->t_self;
FmgrInfo *procedure;
bool all_dead = false;
SCAN_CHECKS;
GET_SCAN_PROCEDURE(amgettuple);
Assert(TransactionIdIsValid(RecentGlobalXmin));
HeapTuple heapTuple;
ItemPointer tid;
for (;;)
{
bool got_heap_tuple;
if (scan->xs_continue_hot)
{
/*
@@ -459,86 +582,27 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
* earlier member. Must still hold pin on current heap page.
*/
Assert(BufferIsValid(scan->xs_cbuf));
Assert(ItemPointerGetBlockNumber(tid) ==
Assert(ItemPointerGetBlockNumber(&scan->xs_ctup.t_self) ==
BufferGetBlockNumber(scan->xs_cbuf));
}
else
{
bool found;
Buffer prev_buf;
/* Time to fetch the next TID from the index */
tid = index_getnext_tid(scan, direction);
/*
* If we scanned a whole HOT chain and found only dead tuples,
* tell index AM to kill its entry for that TID. We do not do this
* when in recovery because it may violate MVCC to do so. see
* comments in RelationGetIndexScan().
*/
if (!scan->xactStartedInRecovery)
scan->kill_prior_tuple = all_dead;
/*
* The AM's gettuple proc finds the next index entry matching the
* scan keys, and puts the TID in xs_ctup.t_self (ie, *tid). It
* should also set scan->xs_recheck, though we pay no attention to
* that here.
*/
found = DatumGetBool(FunctionCall2(procedure,
PointerGetDatum(scan),
Int32GetDatum(direction)));
/* Reset kill flag immediately for safety */
scan->kill_prior_tuple = false;
/* If we're out of index entries, break out of outer loop */
if (!found)
/* If we're out of index entries, we're done */
if (tid == NULL)
break;
pgstat_count_index_tuples(scan->indexRelation, 1);
/* Switch to correct buffer if we don't have it already */
prev_buf = scan->xs_cbuf;
scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf,
scan->heapRelation,
ItemPointerGetBlockNumber(tid));
/*
* Prune page, but only if we weren't already on this page
*/
if (prev_buf != scan->xs_cbuf)
heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf,
RecentGlobalXmin);
}
/* Obtain share-lock on the buffer so we can examine visibility */
LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation,
scan->xs_cbuf,
scan->xs_snapshot,
&scan->xs_ctup,
&all_dead,
!scan->xs_continue_hot);
LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
if (got_heap_tuple)
{
/*
* Only in a non-MVCC snapshot can more than one member of the
* HOT chain be visible.
*/
scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot);
pgstat_count_heap_fetch(scan->indexRelation);
/*
* Fetch the next (or only) visible heap tuple for this index entry.
* If we don't find anything, loop around and grab the next TID from
* the index.
*/
heapTuple = index_fetch_heap(scan);
if (heapTuple != NULL)
return heapTuple;
}
/* Loop around to ask index AM for another TID */
scan->xs_continue_hot = false;
}
/* Release any held pin on a heap page */
if (BufferIsValid(scan->xs_cbuf))
{
ReleaseBuffer(scan->xs_cbuf);
scan->xs_cbuf = InvalidBuffer;
}
return NULL; /* failure exit */