mirror of
https://github.com/postgres/postgres.git
synced 2025-06-17 17:02:08 +03:00
Avoid BufferGetLSNAtomic() calls during nbtree scans.
Delay calling BufferGetLSNAtomic() until we finish reading a page that
actually contains items that btgettuple will return to the executor.
This reduces the number of calls during plain index scans (we'll only
call BufferGetLSNAtomic() when _bt_readpage returns true), and totally
eliminates calls during index-only scans, bitmap index scans, and plain
index scans of an unlogged relation.
Currently, when checksums (or wal_log_hints) are enabled, acquiring a
page's LSN in BufferGetLSNAtomic() involves locking the buffer header
(which involves the use of spinlocks). Testing has shown that enabling
page-level checksums causes large regressions with certain workloads,
especially on larger multi-socket systems.
The regression isn't tied to any Postgres 18 commit. However, Postgres
18 commit 04bec894
made initdb use checksums by default, so it seems
prudent to address the problem now.
Author: Peter Geoghegan <pg@bowt.ie>
Reviewed-By: Tomas Vondra <tomas@vondra.me>
Discussion: https://postgr.es/m/941f0190-e3c6-4622-9ac7-c04e936e5fdb@vondra.me
Discussion: https://postgr.es/m/CAH2-Wzk-Dg5XWs_jDuiHt4_7ryrSY+n=vxmHY51EVqPDFsKXmg@mail.gmail.com
This commit is contained in:
@ -228,6 +228,8 @@ btgettuple(IndexScanDesc scan, ScanDirection dir)
|
|||||||
BTScanOpaque so = (BTScanOpaque) scan->opaque;
|
BTScanOpaque so = (BTScanOpaque) scan->opaque;
|
||||||
bool res;
|
bool res;
|
||||||
|
|
||||||
|
Assert(scan->heapRelation != NULL);
|
||||||
|
|
||||||
/* btree indexes are never lossy */
|
/* btree indexes are never lossy */
|
||||||
scan->xs_recheck = false;
|
scan->xs_recheck = false;
|
||||||
|
|
||||||
@ -289,6 +291,8 @@ btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
|
|||||||
int64 ntids = 0;
|
int64 ntids = 0;
|
||||||
ItemPointer heapTid;
|
ItemPointer heapTid;
|
||||||
|
|
||||||
|
Assert(scan->heapRelation == NULL);
|
||||||
|
|
||||||
/* Each loop iteration performs another primitive index scan */
|
/* Each loop iteration performs another primitive index scan */
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
@ -393,6 +397,32 @@ btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
|
|||||||
BTScanPosInvalidate(so->currPos);
|
BTScanPosInvalidate(so->currPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We prefer to eagerly drop leaf page pins before btgettuple returns.
|
||||||
|
* This avoids making VACUUM wait to acquire a cleanup lock on the page.
|
||||||
|
*
|
||||||
|
* We cannot safely drop leaf page pins during index-only scans due to a
|
||||||
|
* race condition involving VACUUM setting pages all-visible in the VM.
|
||||||
|
* It's also unsafe for plain index scans that use a non-MVCC snapshot.
|
||||||
|
*
|
||||||
|
* When we drop pins eagerly, the mechanism that marks so->killedItems[]
|
||||||
|
* index tuples LP_DEAD has to deal with concurrent TID recycling races.
|
||||||
|
* The scheme used to detect unsafe TID recycling won't work when scanning
|
||||||
|
* unlogged relations (since it involves saving an affected page's LSN).
|
||||||
|
* Opt out of eager pin dropping during unlogged relation scans for now
|
||||||
|
* (this is preferable to opting out of kill_prior_tuple LP_DEAD setting).
|
||||||
|
*
|
||||||
|
* Also opt out of dropping leaf page pins eagerly during bitmap scans.
|
||||||
|
* Pins cannot be held for more than an instant during bitmap scans either
|
||||||
|
* way, so we might as well avoid wasting cycles on acquiring page LSNs.
|
||||||
|
*
|
||||||
|
* See nbtree/README section on making concurrent TID recycling safe.
|
||||||
|
*/
|
||||||
|
so->dropPin = (!scan->xs_want_itup &&
|
||||||
|
IsMVCCSnapshot(scan->xs_snapshot) &&
|
||||||
|
RelationNeedsWAL(scan->indexRelation) &&
|
||||||
|
scan->heapRelation != NULL);
|
||||||
|
|
||||||
so->markItemIndex = -1;
|
so->markItemIndex = -1;
|
||||||
so->needPrimScan = false;
|
so->needPrimScan = false;
|
||||||
so->scanBehind = false;
|
so->scanBehind = false;
|
||||||
|
@ -25,7 +25,7 @@
|
|||||||
#include "utils/rel.h"
|
#include "utils/rel.h"
|
||||||
|
|
||||||
|
|
||||||
static void _bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp);
|
static inline void _bt_drop_lock_and_maybe_pin(Relation rel, BTScanOpaque so);
|
||||||
static Buffer _bt_moveright(Relation rel, Relation heaprel, BTScanInsert key,
|
static Buffer _bt_moveright(Relation rel, Relation heaprel, BTScanInsert key,
|
||||||
Buffer buf, bool forupdate, BTStack stack,
|
Buffer buf, bool forupdate, BTStack stack,
|
||||||
int access);
|
int access);
|
||||||
@ -57,24 +57,29 @@ static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir);
|
|||||||
/*
|
/*
|
||||||
* _bt_drop_lock_and_maybe_pin()
|
* _bt_drop_lock_and_maybe_pin()
|
||||||
*
|
*
|
||||||
* Unlock the buffer; and if it is safe to release the pin, do that, too.
|
* Unlock so->currPos.buf. If scan is so->dropPin, drop the pin, too.
|
||||||
* This will prevent vacuum from stalling in a blocked state trying to read a
|
* Dropping the pin prevents VACUUM from blocking on acquiring a cleanup lock.
|
||||||
* page when a cursor is sitting on it.
|
|
||||||
*
|
|
||||||
* See nbtree/README section on making concurrent TID recycling safe.
|
|
||||||
*/
|
*/
|
||||||
static void
|
static inline void
|
||||||
_bt_drop_lock_and_maybe_pin(IndexScanDesc scan, BTScanPos sp)
|
_bt_drop_lock_and_maybe_pin(Relation rel, BTScanOpaque so)
|
||||||
{
|
{
|
||||||
_bt_unlockbuf(scan->indexRelation, sp->buf);
|
if (!so->dropPin)
|
||||||
|
|
||||||
if (IsMVCCSnapshot(scan->xs_snapshot) &&
|
|
||||||
RelationNeedsWAL(scan->indexRelation) &&
|
|
||||||
!scan->xs_want_itup)
|
|
||||||
{
|
{
|
||||||
ReleaseBuffer(sp->buf);
|
/* Just drop the lock (not the pin) */
|
||||||
sp->buf = InvalidBuffer;
|
_bt_unlockbuf(rel, so->currPos.buf);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Drop both the lock and the pin.
|
||||||
|
*
|
||||||
|
* Have to set so->currPos.lsn so that _bt_killitems has a way to detect
|
||||||
|
* when concurrent heap TID recycling by VACUUM might have taken place.
|
||||||
|
*/
|
||||||
|
Assert(RelationNeedsWAL(rel));
|
||||||
|
so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf);
|
||||||
|
_bt_relbuf(rel, so->currPos.buf);
|
||||||
|
so->currPos.buf = InvalidBuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -866,8 +871,8 @@ _bt_compare(Relation rel,
|
|||||||
* if backwards scan, the last item) in the tree that satisfies the
|
* if backwards scan, the last item) in the tree that satisfies the
|
||||||
* qualifications in the scan key. On success exit, data about the
|
* qualifications in the scan key. On success exit, data about the
|
||||||
* matching tuple(s) on the page has been loaded into so->currPos. We'll
|
* matching tuple(s) on the page has been loaded into so->currPos. We'll
|
||||||
* drop all locks and hold onto a pin on page's buffer, except when
|
* drop all locks and hold onto a pin on page's buffer, except during
|
||||||
* _bt_drop_lock_and_maybe_pin dropped the pin to avoid blocking VACUUM.
|
* so->dropPin scans, when we drop both the lock and the pin.
|
||||||
* _bt_returnitem sets the next item to return to scan on success exit.
|
* _bt_returnitem sets the next item to return to scan on success exit.
|
||||||
*
|
*
|
||||||
* If there are no matching items in the index, we return false, with no
|
* If there are no matching items in the index, we return false, with no
|
||||||
@ -1610,7 +1615,13 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
|
|||||||
so->currPos.currPage = BufferGetBlockNumber(so->currPos.buf);
|
so->currPos.currPage = BufferGetBlockNumber(so->currPos.buf);
|
||||||
so->currPos.prevPage = opaque->btpo_prev;
|
so->currPos.prevPage = opaque->btpo_prev;
|
||||||
so->currPos.nextPage = opaque->btpo_next;
|
so->currPos.nextPage = opaque->btpo_next;
|
||||||
|
/* delay setting so->currPos.lsn until _bt_drop_lock_and_maybe_pin */
|
||||||
|
so->currPos.dir = dir;
|
||||||
|
so->currPos.nextTupleOffset = 0;
|
||||||
|
|
||||||
|
/* either moreRight or moreLeft should be set now (may be unset later) */
|
||||||
|
Assert(ScanDirectionIsForward(dir) ? so->currPos.moreRight :
|
||||||
|
so->currPos.moreLeft);
|
||||||
Assert(!P_IGNORE(opaque));
|
Assert(!P_IGNORE(opaque));
|
||||||
Assert(BTScanPosIsPinned(so->currPos));
|
Assert(BTScanPosIsPinned(so->currPos));
|
||||||
Assert(!so->needPrimScan);
|
Assert(!so->needPrimScan);
|
||||||
@ -1626,14 +1637,6 @@ _bt_readpage(IndexScanDesc scan, ScanDirection dir, OffsetNumber offnum,
|
|||||||
so->currPos.currPage);
|
so->currPos.currPage);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* initialize remaining currPos fields related to current page */
|
|
||||||
so->currPos.lsn = BufferGetLSNAtomic(so->currPos.buf);
|
|
||||||
so->currPos.dir = dir;
|
|
||||||
so->currPos.nextTupleOffset = 0;
|
|
||||||
/* either moreLeft or moreRight should be set now (may be unset later) */
|
|
||||||
Assert(ScanDirectionIsForward(dir) ? so->currPos.moreRight :
|
|
||||||
so->currPos.moreLeft);
|
|
||||||
|
|
||||||
PredicateLockPage(rel, so->currPos.currPage, scan->xs_snapshot);
|
PredicateLockPage(rel, so->currPos.currPage, scan->xs_snapshot);
|
||||||
|
|
||||||
/* initialize local variables */
|
/* initialize local variables */
|
||||||
@ -2107,10 +2110,9 @@ _bt_returnitem(IndexScanDesc scan, BTScanOpaque so)
|
|||||||
*
|
*
|
||||||
* Wrapper on _bt_readnextpage that performs final steps for the current page.
|
* Wrapper on _bt_readnextpage that performs final steps for the current page.
|
||||||
*
|
*
|
||||||
* On entry, if so->currPos.buf is valid the buffer is pinned but not locked.
|
* On entry, so->currPos must be valid. Its buffer will be pinned, though
|
||||||
* If there's no pin held, it's because _bt_drop_lock_and_maybe_pin dropped
|
* never locked. (Actually, when so->dropPin there won't even be a pin held,
|
||||||
* the pin eagerly earlier on. The scan must have so->currPos.currPage set to
|
* though so->currPos.currPage must still be set to a valid block number.)
|
||||||
* a valid block, in any case.
|
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
_bt_steppage(IndexScanDesc scan, ScanDirection dir)
|
_bt_steppage(IndexScanDesc scan, ScanDirection dir)
|
||||||
@ -2251,12 +2253,14 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir)
|
|||||||
*/
|
*/
|
||||||
if (_bt_readpage(scan, dir, offnum, true))
|
if (_bt_readpage(scan, dir, offnum, true))
|
||||||
{
|
{
|
||||||
|
Relation rel = scan->indexRelation;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* _bt_readpage succeeded. Drop the lock (and maybe the pin) on
|
* _bt_readpage succeeded. Drop the lock (and maybe the pin) on
|
||||||
* so->currPos.buf in preparation for btgettuple returning tuples.
|
* so->currPos.buf in preparation for btgettuple returning tuples.
|
||||||
*/
|
*/
|
||||||
Assert(BTScanPosIsPinned(so->currPos));
|
Assert(BTScanPosIsPinned(so->currPos));
|
||||||
_bt_drop_lock_and_maybe_pin(scan, &so->currPos);
|
_bt_drop_lock_and_maybe_pin(rel, so);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2294,8 +2298,8 @@ _bt_readfirstpage(IndexScanDesc scan, OffsetNumber offnum, ScanDirection dir)
|
|||||||
*
|
*
|
||||||
* On success exit, so->currPos is updated to contain data from the next
|
* On success exit, so->currPos is updated to contain data from the next
|
||||||
* interesting page, and we return true. We hold a pin on the buffer on
|
* interesting page, and we return true. We hold a pin on the buffer on
|
||||||
* success exit, except when _bt_drop_lock_and_maybe_pin decided it was safe
|
* success exit (except during so->dropPin index scans, when we drop the pin
|
||||||
* to eagerly drop the pin (to avoid blocking VACUUM).
|
* eagerly to avoid blocking VACUUM).
|
||||||
*
|
*
|
||||||
* If there are no more matching records in the given direction, we drop all
|
* If there are no more matching records in the given direction, we drop all
|
||||||
* locks and pins, invalidate so->currPos, and return false.
|
* locks and pins, invalidate so->currPos, and return false.
|
||||||
@ -2413,7 +2417,7 @@ _bt_readnextpage(IndexScanDesc scan, BlockNumber blkno,
|
|||||||
*/
|
*/
|
||||||
Assert(so->currPos.currPage == blkno);
|
Assert(so->currPos.currPage == blkno);
|
||||||
Assert(BTScanPosIsPinned(so->currPos));
|
Assert(BTScanPosIsPinned(so->currPos));
|
||||||
_bt_drop_lock_and_maybe_pin(scan, &so->currPos);
|
_bt_drop_lock_and_maybe_pin(rel, so);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -3335,75 +3335,71 @@ _bt_checkkeys_look_ahead(IndexScanDesc scan, BTReadPageState *pstate,
|
|||||||
*
|
*
|
||||||
* Note that if we hold a pin on the target page continuously from initially
|
* Note that if we hold a pin on the target page continuously from initially
|
||||||
* reading the items until applying this function, VACUUM cannot have deleted
|
* reading the items until applying this function, VACUUM cannot have deleted
|
||||||
* any items from the page, and so there is no need to search left from the
|
* any items on the page, so the page's TIDs can't have been recycled by now.
|
||||||
* recorded offset. (This observation also guarantees that the item is still
|
* There's no risk that we'll confuse a new index tuple that happens to use a
|
||||||
* the right one to delete, which might otherwise be questionable since heap
|
* recycled TID with a now-removed tuple with the same TID (that used to be on
|
||||||
* TIDs can get recycled.) This holds true even if the page has been modified
|
* this same page). We can't rely on that during scans that drop pins eagerly
|
||||||
* by inserts and page splits, so there is no need to consult the LSN.
|
* (so->dropPin scans), though, so we must condition setting LP_DEAD bits on
|
||||||
*
|
* the page LSN having not changed since back when _bt_readpage saw the page.
|
||||||
* If the pin was released after reading the page, then we re-read it. If it
|
|
||||||
* has been modified since we read it (as determined by the LSN), we dare not
|
|
||||||
* flag any entries because it is possible that the old entry was vacuumed
|
|
||||||
* away and the TID was re-used by a completely different heap tuple.
|
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
_bt_killitems(IndexScanDesc scan)
|
_bt_killitems(IndexScanDesc scan)
|
||||||
{
|
{
|
||||||
|
Relation rel = scan->indexRelation;
|
||||||
BTScanOpaque so = (BTScanOpaque) scan->opaque;
|
BTScanOpaque so = (BTScanOpaque) scan->opaque;
|
||||||
Page page;
|
Page page;
|
||||||
BTPageOpaque opaque;
|
BTPageOpaque opaque;
|
||||||
OffsetNumber minoff;
|
OffsetNumber minoff;
|
||||||
OffsetNumber maxoff;
|
OffsetNumber maxoff;
|
||||||
int i;
|
|
||||||
int numKilled = so->numKilled;
|
int numKilled = so->numKilled;
|
||||||
bool killedsomething = false;
|
bool killedsomething = false;
|
||||||
bool droppedpin PG_USED_FOR_ASSERTS_ONLY;
|
|
||||||
|
|
||||||
|
Assert(numKilled > 0);
|
||||||
Assert(BTScanPosIsValid(so->currPos));
|
Assert(BTScanPosIsValid(so->currPos));
|
||||||
|
Assert(scan->heapRelation != NULL); /* can't be a bitmap index scan */
|
||||||
|
|
||||||
/*
|
/* Always invalidate so->killedItems[] before leaving so->currPos */
|
||||||
* Always reset the scan state, so we don't look for same items on other
|
|
||||||
* pages.
|
|
||||||
*/
|
|
||||||
so->numKilled = 0;
|
so->numKilled = 0;
|
||||||
|
|
||||||
if (BTScanPosIsPinned(so->currPos))
|
if (!so->dropPin)
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* We have held the pin on this page since we read the index tuples,
|
* We have held the pin on this page since we read the index tuples,
|
||||||
* so all we need to do is lock it. The pin will have prevented
|
* so all we need to do is lock it. The pin will have prevented
|
||||||
* re-use of any TID on the page, so there is no need to check the
|
* concurrent VACUUMs from recycling any of the TIDs on the page.
|
||||||
* LSN.
|
|
||||||
*/
|
*/
|
||||||
droppedpin = false;
|
Assert(BTScanPosIsPinned(so->currPos));
|
||||||
_bt_lockbuf(scan->indexRelation, so->currPos.buf, BT_READ);
|
_bt_lockbuf(rel, so->currPos.buf, BT_READ);
|
||||||
|
|
||||||
page = BufferGetPage(so->currPos.buf);
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
Buffer buf;
|
Buffer buf;
|
||||||
|
XLogRecPtr latestlsn;
|
||||||
|
|
||||||
droppedpin = true;
|
Assert(!BTScanPosIsPinned(so->currPos));
|
||||||
/* Attempt to re-read the buffer, getting pin and lock. */
|
Assert(RelationNeedsWAL(rel));
|
||||||
buf = _bt_getbuf(scan->indexRelation, so->currPos.currPage, BT_READ);
|
buf = _bt_getbuf(rel, so->currPos.currPage, BT_READ);
|
||||||
|
|
||||||
page = BufferGetPage(buf);
|
latestlsn = BufferGetLSNAtomic(buf);
|
||||||
if (BufferGetLSNAtomic(buf) == so->currPos.lsn)
|
Assert(!XLogRecPtrIsInvalid(so->currPos.lsn));
|
||||||
so->currPos.buf = buf;
|
Assert(so->currPos.lsn <= latestlsn);
|
||||||
else
|
if (so->currPos.lsn != latestlsn)
|
||||||
{
|
{
|
||||||
/* Modified while not pinned means hinting is not safe. */
|
/* Modified, give up on hinting */
|
||||||
_bt_relbuf(scan->indexRelation, buf);
|
_bt_relbuf(rel, buf);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Unmodified, hinting is safe */
|
||||||
|
so->currPos.buf = buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
page = BufferGetPage(so->currPos.buf);
|
||||||
opaque = BTPageGetOpaque(page);
|
opaque = BTPageGetOpaque(page);
|
||||||
minoff = P_FIRSTDATAKEY(opaque);
|
minoff = P_FIRSTDATAKEY(opaque);
|
||||||
maxoff = PageGetMaxOffsetNumber(page);
|
maxoff = PageGetMaxOffsetNumber(page);
|
||||||
|
|
||||||
for (i = 0; i < numKilled; i++)
|
for (int i = 0; i < numKilled; i++)
|
||||||
{
|
{
|
||||||
int itemIndex = so->killedItems[i];
|
int itemIndex = so->killedItems[i];
|
||||||
BTScanPosItem *kitem = &so->currPos.items[itemIndex];
|
BTScanPosItem *kitem = &so->currPos.items[itemIndex];
|
||||||
@ -3435,7 +3431,7 @@ _bt_killitems(IndexScanDesc scan)
|
|||||||
* correctness.
|
* correctness.
|
||||||
*
|
*
|
||||||
* Note that the page may have been modified in almost any way
|
* Note that the page may have been modified in almost any way
|
||||||
* since we first read it (in the !droppedpin case), so it's
|
* since we first read it (in the !so->dropPin case), so it's
|
||||||
* possible that this posting list tuple wasn't a posting list
|
* possible that this posting list tuple wasn't a posting list
|
||||||
* tuple when we first encountered its heap TIDs.
|
* tuple when we first encountered its heap TIDs.
|
||||||
*/
|
*/
|
||||||
@ -3451,7 +3447,7 @@ _bt_killitems(IndexScanDesc scan)
|
|||||||
* though only in the common case where the page can't
|
* though only in the common case where the page can't
|
||||||
* have been concurrently modified
|
* have been concurrently modified
|
||||||
*/
|
*/
|
||||||
Assert(kitem->indexOffset == offnum || !droppedpin);
|
Assert(kitem->indexOffset == offnum || !so->dropPin);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Read-ahead to later kitems here.
|
* Read-ahead to later kitems here.
|
||||||
@ -3518,7 +3514,7 @@ _bt_killitems(IndexScanDesc scan)
|
|||||||
MarkBufferDirtyHint(so->currPos.buf, true);
|
MarkBufferDirtyHint(so->currPos.buf, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
_bt_unlockbuf(scan->indexRelation, so->currPos.buf);
|
_bt_unlockbuf(rel, so->currPos.buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -939,7 +939,7 @@ typedef BTVacuumPostingData *BTVacuumPosting;
|
|||||||
* processing. This approach minimizes lock/unlock traffic. We must always
|
* processing. This approach minimizes lock/unlock traffic. We must always
|
||||||
* drop the lock to make it okay for caller to process the returned items.
|
* drop the lock to make it okay for caller to process the returned items.
|
||||||
* Whether or not we can also release the pin during this window will vary.
|
* Whether or not we can also release the pin during this window will vary.
|
||||||
* We drop the pin eagerly (when safe) to avoid blocking progress by VACUUM
|
* We drop the pin (when so->dropPin) to avoid blocking progress by VACUUM
|
||||||
* (see nbtree/README section about making concurrent TID recycling safe).
|
* (see nbtree/README section about making concurrent TID recycling safe).
|
||||||
* We'll always release both the lock and the pin on the current page before
|
* We'll always release both the lock and the pin on the current page before
|
||||||
* moving on to its sibling page.
|
* moving on to its sibling page.
|
||||||
@ -967,7 +967,7 @@ typedef struct BTScanPosData
|
|||||||
BlockNumber currPage; /* page referenced by items array */
|
BlockNumber currPage; /* page referenced by items array */
|
||||||
BlockNumber prevPage; /* currPage's left link */
|
BlockNumber prevPage; /* currPage's left link */
|
||||||
BlockNumber nextPage; /* currPage's right link */
|
BlockNumber nextPage; /* currPage's right link */
|
||||||
XLogRecPtr lsn; /* currPage's LSN */
|
XLogRecPtr lsn; /* currPage's LSN (when so->dropPin) */
|
||||||
|
|
||||||
/* scan direction for the saved position's call to _bt_readpage */
|
/* scan direction for the saved position's call to _bt_readpage */
|
||||||
ScanDirection dir;
|
ScanDirection dir;
|
||||||
@ -1070,6 +1070,7 @@ typedef struct BTScanOpaqueData
|
|||||||
/* info about killed items if any (killedItems is NULL if never used) */
|
/* info about killed items if any (killedItems is NULL if never used) */
|
||||||
int *killedItems; /* currPos.items indexes of killed items */
|
int *killedItems; /* currPos.items indexes of killed items */
|
||||||
int numKilled; /* number of currently stored items */
|
int numKilled; /* number of currently stored items */
|
||||||
|
bool dropPin; /* drop leaf pin before btgettuple returns? */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we are doing an index-only scan, these are the tuple storage
|
* If we are doing an index-only scan, these are the tuple storage
|
||||||
|
Reference in New Issue
Block a user