mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-24 01:29:19 +03:00 
			
		
		
		
	Reduce pinning and buffer content locking for btree scans.
Even though the main benefit of the Lehman and Yao algorithm for btrees is that no locks need be held between page reads in an index search, we were holding a buffer pin on each leaf page after it was read until we were ready to read the next one. The reason was so that we could treat this as a weak lock to create an "interlock" with vacuum's deletion of heap line pointers, even though our README file pointed out that this was not necessary for a scan using an MVCC snapshot. The main goal of this patch is to reduce the blocking of vacuum processes by in-progress btree index scans (including a cursor which is idle), but the code rearrangement also allows for one less buffer content lock to be taken when a forward scan steps from one page to the next, which results in a small but consistent performance improvement in many workloads. This patch leaves behavior unchanged for some cases, which can be addressed separately so that each case can be evaluated on its own merits. These unchanged cases are when a scan uses a non-MVCC snapshot, an index-only scan, and a scan of a btree index for which modifications are not WAL-logged. If later patches allow all of these cases to drop the buffer pin after reading a leaf page, then the btree vacuum process can be simplified; it will no longer need the "super-exclusive" lock to delete tuples from a page. Reviewed by Heikki Linnakangas and Kyotaro Horiguchi
This commit is contained in:
		| @@ -518,6 +518,8 @@ typedef struct BTScanPosData | ||||
| { | ||||
| 	Buffer		buf;			/* if valid, the buffer is pinned */ | ||||
|  | ||||
| 	XLogRecPtr	lsn;			/* pos in the WAL stream when page was read */ | ||||
| 	BlockNumber currPage;		/* page we've referencd by items array */ | ||||
| 	BlockNumber nextPage;		/* page's right link when we scanned it */ | ||||
|  | ||||
| 	/* | ||||
| @@ -551,7 +553,37 @@ typedef struct BTScanPosData | ||||
|  | ||||
| typedef BTScanPosData *BTScanPos; | ||||
|  | ||||
| #define BTScanPosIsValid(scanpos) BufferIsValid((scanpos).buf) | ||||
| #define BTScanPosIsPinned(scanpos) \ | ||||
| ( \ | ||||
| 	AssertMacro(BlockNumberIsValid((scanpos).currPage) || \ | ||||
| 				!BufferIsValid((scanpos).buf)), \ | ||||
| 	BufferIsValid((scanpos).buf) \ | ||||
| ) | ||||
| #define BTScanPosUnpin(scanpos) \ | ||||
| 	do { \ | ||||
| 		ReleaseBuffer((scanpos).buf); \ | ||||
| 		(scanpos).buf = InvalidBuffer; \ | ||||
| 	} while (0) | ||||
| #define BTScanPosUnpinIfPinned(scanpos) \ | ||||
| 	do { \ | ||||
| 		if (BTScanPosIsPinned(scanpos)) \ | ||||
| 			BTScanPosUnpin(scanpos); \ | ||||
| 	} while (0) | ||||
|  | ||||
| #define BTScanPosIsValid(scanpos) \ | ||||
| ( \ | ||||
| 	AssertMacro(BlockNumberIsValid((scanpos).currPage) || \ | ||||
| 				!BufferIsValid((scanpos).buf)), \ | ||||
| 	BlockNumberIsValid((scanpos).currPage) \ | ||||
| ) | ||||
| #define BTScanPosInvalidate(scanpos) \ | ||||
| 	do { \ | ||||
| 		(scanpos).currPage = InvalidBlockNumber; \ | ||||
| 		(scanpos).nextPage = InvalidBlockNumber; \ | ||||
| 		(scanpos).buf = InvalidBuffer; \ | ||||
| 		(scanpos).lsn = InvalidXLogRecPtr; \ | ||||
| 		(scanpos).nextTupleOffset = 0; \ | ||||
| 	} while (0); | ||||
|  | ||||
| /* We need one of these for each equality-type SK_SEARCHARRAY scan key */ | ||||
| typedef struct BTArrayKeyInfo | ||||
| @@ -697,7 +729,7 @@ extern void _bt_preprocess_keys(IndexScanDesc scan); | ||||
| extern IndexTuple _bt_checkkeys(IndexScanDesc scan, | ||||
| 			  Page page, OffsetNumber offnum, | ||||
| 			  ScanDirection dir, bool *continuescan); | ||||
| extern void _bt_killitems(IndexScanDesc scan, bool haveLock); | ||||
| extern void _bt_killitems(IndexScanDesc scan); | ||||
| extern BTCycleId _bt_vacuum_cycleid(Relation rel); | ||||
| extern BTCycleId _bt_start_vacuum(Relation rel); | ||||
| extern void _bt_end_vacuum(Relation rel); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user