1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Reduce pinning and buffer content locking for btree scans.

Even though the main benefit of the Lehman and Yao algorithm for
btrees is that no locks need be held between page reads in an
index search, we were holding a buffer pin on each leaf page after
it was read until we were ready to read the next one.  The reason
was so that we could treat this as a weak lock to create an
"interlock" with vacuum's deletion of heap line pointers, even
though our README file pointed out that this was not necessary for
a scan using an MVCC snapshot.

The main goal of this patch is to reduce the blocking of vacuum
processes by in-progress btree index scans (including a cursor
which is idle), but the code rearrangement also allows for one
less buffer content lock to be taken when a forward scan steps from
one page to the next, which results in a small but consistent
performance improvement in many workloads.

This patch leaves behavior unchanged for some cases, which can be
addressed separately so that each case can be evaluated on its own
merits.  These unchanged cases are when a scan uses a non-MVCC
snapshot, an index-only scan, and a scan of a btree index for which
modifications are not WAL-logged.  If later patches allow  all of
these cases to drop the buffer pin after reading a leaf page, then
the btree vacuum process can be simplified; it will no longer need
the "super-exclusive" lock to delete tuples from a page.

Reviewed by Heikki Linnakangas and Kyotaro Horiguchi
This commit is contained in:
Kevin Grittner
2015-03-25 14:24:43 -05:00
parent 8217fb1441
commit 2ed5b87f96
8 changed files with 326 additions and 132 deletions

View File

@ -518,6 +518,8 @@ typedef struct BTScanPosData
{
Buffer buf; /* if valid, the buffer is pinned */
XLogRecPtr lsn; /* pos in the WAL stream when page was read */
BlockNumber currPage; /* page we've referencd by items array */
BlockNumber nextPage; /* page's right link when we scanned it */
/*
@ -551,7 +553,37 @@ typedef struct BTScanPosData
typedef BTScanPosData *BTScanPos;
#define BTScanPosIsValid(scanpos) BufferIsValid((scanpos).buf)
#define BTScanPosIsPinned(scanpos) \
( \
AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
!BufferIsValid((scanpos).buf)), \
BufferIsValid((scanpos).buf) \
)
#define BTScanPosUnpin(scanpos) \
do { \
ReleaseBuffer((scanpos).buf); \
(scanpos).buf = InvalidBuffer; \
} while (0)
#define BTScanPosUnpinIfPinned(scanpos) \
do { \
if (BTScanPosIsPinned(scanpos)) \
BTScanPosUnpin(scanpos); \
} while (0)
#define BTScanPosIsValid(scanpos) \
( \
AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
!BufferIsValid((scanpos).buf)), \
BlockNumberIsValid((scanpos).currPage) \
)
#define BTScanPosInvalidate(scanpos) \
do { \
(scanpos).currPage = InvalidBlockNumber; \
(scanpos).nextPage = InvalidBlockNumber; \
(scanpos).buf = InvalidBuffer; \
(scanpos).lsn = InvalidXLogRecPtr; \
(scanpos).nextTupleOffset = 0; \
} while (0);
/* We need one of these for each equality-type SK_SEARCHARRAY scan key */
typedef struct BTArrayKeyInfo
@ -697,7 +729,7 @@ extern void _bt_preprocess_keys(IndexScanDesc scan);
extern IndexTuple _bt_checkkeys(IndexScanDesc scan,
Page page, OffsetNumber offnum,
ScanDirection dir, bool *continuescan);
extern void _bt_killitems(IndexScanDesc scan, bool haveLock);
extern void _bt_killitems(IndexScanDesc scan);
extern BTCycleId _bt_vacuum_cycleid(Relation rel);
extern BTCycleId _bt_start_vacuum(Relation rel);
extern void _bt_end_vacuum(Relation rel);