mirror of
https://github.com/postgres/postgres.git
synced 2025-09-02 04:21:28 +03:00
Support index-only scans using the visibility map to avoid heap fetches.
When a btree index contains all columns required by the query, and the visibility map shows that all tuples on a target heap page are visible-to-all, we don't need to fetch that heap page. This patch depends on the previous patches that made the visibility map reliable. There's a fair amount left to do here, notably trying to figure out a less chintzy way of estimating the cost of an index-only scan, but the core functionality seems ready to commit. Robert Haas and Ibrar Ahmed, with some previous work by Heikki Linnakangas.
This commit is contained in:
@@ -93,6 +93,8 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
|
||||
else
|
||||
scan->orderByData = NULL;
|
||||
|
||||
scan->xs_want_itup = false; /* may be set later */
|
||||
|
||||
/*
|
||||
* During recovery we ignore killed tuples and don't bother to kill them
|
||||
* either. We do this because the xmin on the primary node could easily be
|
||||
@@ -109,6 +111,8 @@ RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys)
|
||||
|
||||
scan->opaque = NULL;
|
||||
|
||||
scan->xs_itup = NULL;
|
||||
|
||||
ItemPointerSetInvalid(&scan->xs_ctup.t_self);
|
||||
scan->xs_ctup.t_data = NULL;
|
||||
scan->xs_cbuf = InvalidBuffer;
|
||||
|
@@ -20,7 +20,9 @@
|
||||
* index_insert - insert an index tuple into a relation
|
||||
* index_markpos - mark a scan position
|
||||
* index_restrpos - restore a scan position
|
||||
* index_getnext - get the next tuple from a scan
|
||||
* index_getnext_tid - get the next TID from a scan
|
||||
* index_fetch_heap - get the scan's next heap tuple
|
||||
* index_getnext - get the next heap tuple from a scan
|
||||
* index_getbitmap - get all tuples from a scan
|
||||
* index_bulk_delete - bulk deletion of index tuples
|
||||
* index_vacuum_cleanup - post-deletion cleanup of an index
|
||||
@@ -422,13 +424,143 @@ index_restrpos(IndexScanDesc scan)
|
||||
FunctionCall1(procedure, PointerGetDatum(scan));
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* index_getnext_tid - get the next TID from a scan
|
||||
*
|
||||
* The result is the next TID satisfying the scan keys,
|
||||
* or NULL if no more matching tuples exist.
|
||||
* ----------------
|
||||
*/
|
||||
ItemPointer
|
||||
index_getnext_tid(IndexScanDesc scan, ScanDirection direction)
|
||||
{
|
||||
FmgrInfo *procedure;
|
||||
bool found;
|
||||
|
||||
SCAN_CHECKS;
|
||||
GET_SCAN_PROCEDURE(amgettuple);
|
||||
|
||||
Assert(TransactionIdIsValid(RecentGlobalXmin));
|
||||
|
||||
/*
|
||||
* The AM's gettuple proc finds the next index entry matching the scan
|
||||
* keys, and puts the TID in xs_ctup.t_self. It should also set
|
||||
* scan->xs_recheck, though we pay no attention to that here.
|
||||
*/
|
||||
found = DatumGetBool(FunctionCall2(procedure,
|
||||
PointerGetDatum(scan),
|
||||
Int32GetDatum(direction)));
|
||||
|
||||
/* Reset kill flag immediately for safety */
|
||||
scan->kill_prior_tuple = false;
|
||||
|
||||
/* If we're out of index entries, we're done */
|
||||
if (!found)
|
||||
{
|
||||
/* ... but first, release any held pin on a heap page */
|
||||
if (BufferIsValid(scan->xs_cbuf))
|
||||
{
|
||||
ReleaseBuffer(scan->xs_cbuf);
|
||||
scan->xs_cbuf = InvalidBuffer;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
pgstat_count_index_tuples(scan->indexRelation, 1);
|
||||
|
||||
/* Return the TID of the tuple we found. */
|
||||
return &scan->xs_ctup.t_self;
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* index_fetch_heap - get the scan's next heap tuple
|
||||
*
|
||||
* The result is a visible heap tuple associated with the index TID most
|
||||
* recently fetched by index_getnext_tid, or NULL if no more matching tuples
|
||||
* exist. (There can be more than one matching tuple because of HOT chains,
|
||||
* although when using an MVCC snapshot it should be impossible for more than
|
||||
* one such tuple to exist.)
|
||||
*
|
||||
* On success, the buffer containing the heap tup is pinned (the pin will be
|
||||
* dropped in a future index_getnext_tid, index_fetch_heap or index_endscan
|
||||
* call).
|
||||
*
|
||||
* Note: caller must check scan->xs_recheck, and perform rechecking of the
|
||||
* scan keys if required. We do not do that here because we don't have
|
||||
* enough information to do it efficiently in the general case.
|
||||
* ----------------
|
||||
*/
|
||||
HeapTuple
|
||||
index_fetch_heap(IndexScanDesc scan)
|
||||
{
|
||||
ItemPointer tid = &scan->xs_ctup.t_self;
|
||||
bool all_dead = false;
|
||||
bool got_heap_tuple;
|
||||
|
||||
/* We can skip the buffer-switching logic if we're in mid-HOT chain. */
|
||||
if (!scan->xs_continue_hot)
|
||||
{
|
||||
/* Switch to correct buffer if we don't have it already */
|
||||
Buffer prev_buf = scan->xs_cbuf;
|
||||
|
||||
scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf,
|
||||
scan->heapRelation,
|
||||
ItemPointerGetBlockNumber(tid));
|
||||
|
||||
/*
|
||||
* Prune page, but only if we weren't already on this page
|
||||
*/
|
||||
if (prev_buf != scan->xs_cbuf)
|
||||
heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf,
|
||||
RecentGlobalXmin);
|
||||
}
|
||||
|
||||
/* Obtain share-lock on the buffer so we can examine visibility */
|
||||
LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
|
||||
got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation,
|
||||
scan->xs_cbuf,
|
||||
scan->xs_snapshot,
|
||||
&scan->xs_ctup,
|
||||
&all_dead,
|
||||
!scan->xs_continue_hot);
|
||||
LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
|
||||
|
||||
if (got_heap_tuple)
|
||||
{
|
||||
/*
|
||||
* Only in a non-MVCC snapshot can more than one member of the
|
||||
* HOT chain be visible.
|
||||
*/
|
||||
scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot);
|
||||
pgstat_count_heap_fetch(scan->indexRelation);
|
||||
return &scan->xs_ctup;
|
||||
}
|
||||
|
||||
/* We've reached the end of the HOT chain. */
|
||||
scan->xs_continue_hot = false;
|
||||
|
||||
/*
|
||||
* If we scanned a whole HOT chain and found only dead tuples, tell index
|
||||
* AM to kill its entry for that TID (this will take effect in the next
|
||||
* amgettuple call, in index_getnext_tid). We do not do this when in
|
||||
* recovery because it may violate MVCC to do so. See comments in
|
||||
* RelationGetIndexScan().
|
||||
*/
|
||||
if (!scan->xactStartedInRecovery)
|
||||
scan->kill_prior_tuple = all_dead;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* ----------------
|
||||
* index_getnext - get the next heap tuple from a scan
|
||||
*
|
||||
* The result is the next heap tuple satisfying the scan keys and the
|
||||
* snapshot, or NULL if no more matching tuples exist. On success,
|
||||
* the buffer containing the heap tuple is pinned (the pin will be dropped
|
||||
* at the next index_getnext or index_endscan).
|
||||
* snapshot, or NULL if no more matching tuples exist.
|
||||
*
|
||||
* On success, the buffer containing the heap tup is pinned (the pin will be
|
||||
* dropped in a future index_getnext_tid, index_fetch_heap or index_endscan
|
||||
* call).
|
||||
*
|
||||
* Note: caller must check scan->xs_recheck, and perform rechecking of the
|
||||
* scan keys if required. We do not do that here because we don't have
|
||||
@@ -438,20 +570,11 @@ index_restrpos(IndexScanDesc scan)
|
||||
HeapTuple
|
||||
index_getnext(IndexScanDesc scan, ScanDirection direction)
|
||||
{
|
||||
HeapTuple heapTuple = &scan->xs_ctup;
|
||||
ItemPointer tid = &heapTuple->t_self;
|
||||
FmgrInfo *procedure;
|
||||
bool all_dead = false;
|
||||
|
||||
SCAN_CHECKS;
|
||||
GET_SCAN_PROCEDURE(amgettuple);
|
||||
|
||||
Assert(TransactionIdIsValid(RecentGlobalXmin));
|
||||
HeapTuple heapTuple;
|
||||
ItemPointer tid;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
bool got_heap_tuple;
|
||||
|
||||
if (scan->xs_continue_hot)
|
||||
{
|
||||
/*
|
||||
@@ -459,86 +582,27 @@ index_getnext(IndexScanDesc scan, ScanDirection direction)
|
||||
* earlier member. Must still hold pin on current heap page.
|
||||
*/
|
||||
Assert(BufferIsValid(scan->xs_cbuf));
|
||||
Assert(ItemPointerGetBlockNumber(tid) ==
|
||||
Assert(ItemPointerGetBlockNumber(&scan->xs_ctup.t_self) ==
|
||||
BufferGetBlockNumber(scan->xs_cbuf));
|
||||
}
|
||||
else
|
||||
{
|
||||
bool found;
|
||||
Buffer prev_buf;
|
||||
/* Time to fetch the next TID from the index */
|
||||
tid = index_getnext_tid(scan, direction);
|
||||
|
||||
/*
|
||||
* If we scanned a whole HOT chain and found only dead tuples,
|
||||
* tell index AM to kill its entry for that TID. We do not do this
|
||||
* when in recovery because it may violate MVCC to do so. see
|
||||
* comments in RelationGetIndexScan().
|
||||
*/
|
||||
if (!scan->xactStartedInRecovery)
|
||||
scan->kill_prior_tuple = all_dead;
|
||||
|
||||
/*
|
||||
* The AM's gettuple proc finds the next index entry matching the
|
||||
* scan keys, and puts the TID in xs_ctup.t_self (ie, *tid). It
|
||||
* should also set scan->xs_recheck, though we pay no attention to
|
||||
* that here.
|
||||
*/
|
||||
found = DatumGetBool(FunctionCall2(procedure,
|
||||
PointerGetDatum(scan),
|
||||
Int32GetDatum(direction)));
|
||||
|
||||
/* Reset kill flag immediately for safety */
|
||||
scan->kill_prior_tuple = false;
|
||||
|
||||
/* If we're out of index entries, break out of outer loop */
|
||||
if (!found)
|
||||
/* If we're out of index entries, we're done */
|
||||
if (tid == NULL)
|
||||
break;
|
||||
|
||||
pgstat_count_index_tuples(scan->indexRelation, 1);
|
||||
|
||||
/* Switch to correct buffer if we don't have it already */
|
||||
prev_buf = scan->xs_cbuf;
|
||||
scan->xs_cbuf = ReleaseAndReadBuffer(scan->xs_cbuf,
|
||||
scan->heapRelation,
|
||||
ItemPointerGetBlockNumber(tid));
|
||||
|
||||
/*
|
||||
* Prune page, but only if we weren't already on this page
|
||||
*/
|
||||
if (prev_buf != scan->xs_cbuf)
|
||||
heap_page_prune_opt(scan->heapRelation, scan->xs_cbuf,
|
||||
RecentGlobalXmin);
|
||||
}
|
||||
|
||||
/* Obtain share-lock on the buffer so we can examine visibility */
|
||||
LockBuffer(scan->xs_cbuf, BUFFER_LOCK_SHARE);
|
||||
got_heap_tuple = heap_hot_search_buffer(tid, scan->heapRelation,
|
||||
scan->xs_cbuf,
|
||||
scan->xs_snapshot,
|
||||
&scan->xs_ctup,
|
||||
&all_dead,
|
||||
!scan->xs_continue_hot);
|
||||
LockBuffer(scan->xs_cbuf, BUFFER_LOCK_UNLOCK);
|
||||
|
||||
if (got_heap_tuple)
|
||||
{
|
||||
/*
|
||||
* Only in a non-MVCC snapshot can more than one member of the
|
||||
* HOT chain be visible.
|
||||
*/
|
||||
scan->xs_continue_hot = !IsMVCCSnapshot(scan->xs_snapshot);
|
||||
pgstat_count_heap_fetch(scan->indexRelation);
|
||||
/*
|
||||
* Fetch the next (or only) visible heap tuple for this index entry.
|
||||
* If we don't find anything, loop around and grab the next TID from
|
||||
* the index.
|
||||
*/
|
||||
heapTuple = index_fetch_heap(scan);
|
||||
if (heapTuple != NULL)
|
||||
return heapTuple;
|
||||
}
|
||||
|
||||
/* Loop around to ask index AM for another TID */
|
||||
scan->xs_continue_hot = false;
|
||||
}
|
||||
|
||||
/* Release any held pin on a heap page */
|
||||
if (BufferIsValid(scan->xs_cbuf))
|
||||
{
|
||||
ReleaseBuffer(scan->xs_cbuf);
|
||||
scan->xs_cbuf = InvalidBuffer;
|
||||
}
|
||||
|
||||
return NULL; /* failure exit */
|
||||
|
Reference in New Issue
Block a user