mirror of
https://github.com/postgres/postgres.git
synced 2025-06-27 23:21:58 +03:00
Add TID Range Scans to support efficient scanning ranges of TIDs
This adds a new executor node named TID Range Scan. The query planner will generate paths for TID Range scans when quals are discovered on base relations which search for ranges on the table's ctid column. These ranges may be open at either end. For example, WHERE ctid >= '(10,0)'; will return all tuples on page 10 and over. To support this, two new optional callback functions have been added to table AM. scan_set_tidrange is used to set the scan range to just the given range of TIDs. scan_getnextslot_tidrange fetches the next tuple in the given range. For AMs were scanning ranges of TIDs would not make sense, these functions can be set to NULL in the TableAmRoutine. The query planner won't generate TID Range Scan Paths in that case. Author: Edmund Horner, David Rowley Reviewed-by: David Rowley, Tomas Vondra, Tom Lane, Andres Freund, Zhihong Yu Discussion: https://postgr.es/m/CAMyN-kB-nFTkF=VA_JPwFNo08S0d-Yk0F741S2B7LDmYAi8eyA@mail.gmail.com
This commit is contained in:
@ -1391,6 +1391,153 @@ heap_getnextslot(TableScanDesc sscan, ScanDirection direction, TupleTableSlot *s
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
heap_set_tidrange(TableScanDesc sscan, ItemPointer mintid,
|
||||
ItemPointer maxtid)
|
||||
{
|
||||
HeapScanDesc scan = (HeapScanDesc) sscan;
|
||||
BlockNumber startBlk;
|
||||
BlockNumber numBlks;
|
||||
ItemPointerData highestItem;
|
||||
ItemPointerData lowestItem;
|
||||
|
||||
/*
|
||||
* For relations without any pages, we can simply leave the TID range
|
||||
* unset. There will be no tuples to scan, therefore no tuples outside
|
||||
* the given TID range.
|
||||
*/
|
||||
if (scan->rs_nblocks == 0)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Set up some ItemPointers which point to the first and last possible
|
||||
* tuples in the heap.
|
||||
*/
|
||||
ItemPointerSet(&highestItem, scan->rs_nblocks - 1, MaxOffsetNumber);
|
||||
ItemPointerSet(&lowestItem, 0, FirstOffsetNumber);
|
||||
|
||||
/*
|
||||
* If the given maximum TID is below the highest possible TID in the
|
||||
* relation, then restrict the range to that, otherwise we scan to the end
|
||||
* of the relation.
|
||||
*/
|
||||
if (ItemPointerCompare(maxtid, &highestItem) < 0)
|
||||
ItemPointerCopy(maxtid, &highestItem);
|
||||
|
||||
/*
|
||||
* If the given minimum TID is above the lowest possible TID in the
|
||||
* relation, then restrict the range to only scan for TIDs above that.
|
||||
*/
|
||||
if (ItemPointerCompare(mintid, &lowestItem) > 0)
|
||||
ItemPointerCopy(mintid, &lowestItem);
|
||||
|
||||
/*
|
||||
* Check for an empty range and protect from would be negative results
|
||||
* from the numBlks calculation below.
|
||||
*/
|
||||
if (ItemPointerCompare(&highestItem, &lowestItem) < 0)
|
||||
{
|
||||
/* Set an empty range of blocks to scan */
|
||||
heap_setscanlimits(sscan, 0, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Calculate the first block and the number of blocks we must scan. We
|
||||
* could be more aggressive here and perform some more validation to try
|
||||
* and further narrow the scope of blocks to scan by checking if the
|
||||
* lowerItem has an offset above MaxOffsetNumber. In this case, we could
|
||||
* advance startBlk by one. Likewise, if highestItem has an offset of 0
|
||||
* we could scan one fewer blocks. However, such an optimization does not
|
||||
* seem worth troubling over, currently.
|
||||
*/
|
||||
startBlk = ItemPointerGetBlockNumberNoCheck(&lowestItem);
|
||||
|
||||
numBlks = ItemPointerGetBlockNumberNoCheck(&highestItem) -
|
||||
ItemPointerGetBlockNumberNoCheck(&lowestItem) + 1;
|
||||
|
||||
/* Set the start block and number of blocks to scan */
|
||||
heap_setscanlimits(sscan, startBlk, numBlks);
|
||||
|
||||
/* Finally, set the TID range in sscan */
|
||||
ItemPointerCopy(&lowestItem, &sscan->rs_mintid);
|
||||
ItemPointerCopy(&highestItem, &sscan->rs_maxtid);
|
||||
}
|
||||
|
||||
bool
|
||||
heap_getnextslot_tidrange(TableScanDesc sscan, ScanDirection direction,
|
||||
TupleTableSlot *slot)
|
||||
{
|
||||
HeapScanDesc scan = (HeapScanDesc) sscan;
|
||||
ItemPointer mintid = &sscan->rs_mintid;
|
||||
ItemPointer maxtid = &sscan->rs_maxtid;
|
||||
|
||||
/* Note: no locking manipulations needed */
|
||||
for (;;)
|
||||
{
|
||||
if (sscan->rs_flags & SO_ALLOW_PAGEMODE)
|
||||
heapgettup_pagemode(scan, direction, sscan->rs_nkeys, sscan->rs_key);
|
||||
else
|
||||
heapgettup(scan, direction, sscan->rs_nkeys, sscan->rs_key);
|
||||
|
||||
if (scan->rs_ctup.t_data == NULL)
|
||||
{
|
||||
ExecClearTuple(slot);
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* heap_set_tidrange will have used heap_setscanlimits to limit the
|
||||
* range of pages we scan to only ones that can contain the TID range
|
||||
* we're scanning for. Here we must filter out any tuples from these
|
||||
* pages that are outwith that range.
|
||||
*/
|
||||
if (ItemPointerCompare(&scan->rs_ctup.t_self, mintid) < 0)
|
||||
{
|
||||
ExecClearTuple(slot);
|
||||
|
||||
/*
|
||||
* When scanning backwards, the TIDs will be in descending order.
|
||||
* Future tuples in this direction will be lower still, so we can
|
||||
* just return false to indicate there will be no more tuples.
|
||||
*/
|
||||
if (ScanDirectionIsBackward(direction))
|
||||
return false;
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Likewise for the final page, we must filter out TIDs greater than
|
||||
* maxtid.
|
||||
*/
|
||||
if (ItemPointerCompare(&scan->rs_ctup.t_self, maxtid) > 0)
|
||||
{
|
||||
ExecClearTuple(slot);
|
||||
|
||||
/*
|
||||
* When scanning forward, the TIDs will be in ascending order.
|
||||
* Future tuples in this direction will be higher still, so we can
|
||||
* just return false to indicate there will be no more tuples.
|
||||
*/
|
||||
if (ScanDirectionIsForward(direction))
|
||||
return false;
|
||||
continue;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* if we get here it means we have a new current scan tuple, so point to
|
||||
* the proper return buffer and return the tuple.
|
||||
*/
|
||||
pgstat_count_heap_getnext(scan->rs_base.rs_rd);
|
||||
|
||||
ExecStoreBufferHeapTuple(&scan->rs_ctup, slot, scan->rs_cbuf);
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* heap_fetch - retrieve tuple with given tid
|
||||
*
|
||||
|
Reference in New Issue
Block a user