1
0
mirror of https://github.com/postgres/postgres.git synced 2025-09-03 15:22:11 +03:00

Change locking regimen around buffer replacement.

Previously, we used an lwlock that was held from the time we began
seeking a candidate buffer until the time when we found and pinned
one, which is disastrous for concurrency.  Instead, use a spinlock
which is held just long enough to pop the freelist or advance the
clock sweep hand, and then released.  If we need to advance the clock
sweep further, we reacquire the spinlock once per buffer.

This represents a significant increase in atomic operations around
buffer eviction, but it still wins on many workloads.  On others, it
may result in no gain, or even cause a regression, unless the number
of buffer mapping locks is also increased.  However, that seems like
material for a separate commit.  We may also need to consider other
methods of mitigating contention on this spinlock, such as splitting
it into multiple locks or jumping the clock sweep hand more than one
buffer at a time, but those, too, seem like separate improvements.

Patch by me, inspired by a much larger patch from Amit Kapila.
Reviewed by Andres Freund.
This commit is contained in:
Robert Haas
2014-09-25 10:43:24 -04:00
parent 1dcfb8da09
commit 5d7962c679
5 changed files with 61 additions and 60 deletions

View File

@@ -24,6 +24,9 @@
*/
typedef struct
{
/* Spinlock: protects the values below */
slock_t buffer_strategy_lock;
/* Clock sweep hand: index of next buffer to consider grabbing */
int nextVictimBuffer;
@@ -101,15 +104,10 @@ static void AddBufferToRing(BufferAccessStrategy strategy,
* strategy is a BufferAccessStrategy object, or NULL for default strategy.
*
* To ensure that no one else can pin the buffer before we do, we must
* return the buffer with the buffer header spinlock still held. If
* *lock_held is set on exit, we have returned with the BufFreelistLock
* still held, as well; the caller must release that lock once the spinlock
* is dropped. We do it that way because releasing the BufFreelistLock
* might awaken other processes, and it would be bad to do the associated
* kernel calls while holding the buffer header spinlock.
* return the buffer with the buffer header spinlock still held.
*/
volatile BufferDesc *
StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
StrategyGetBuffer(BufferAccessStrategy strategy)
{
volatile BufferDesc *buf;
Latch *bgwriterLatch;
@@ -117,21 +115,17 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
/*
* If given a strategy object, see whether it can select a buffer. We
* assume strategy objects don't need the BufFreelistLock.
* assume strategy objects don't need buffer_strategy_lock.
*/
if (strategy != NULL)
{
buf = GetBufferFromRing(strategy);
if (buf != NULL)
{
*lock_held = false;
return buf;
}
}
/* Nope, so lock the freelist */
*lock_held = true;
LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
/*
* We count buffer allocation requests so that the bgwriter can estimate
@@ -142,22 +136,22 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
/*
* If bgwriterLatch is set, we need to waken the bgwriter, but we should
* not do so while holding BufFreelistLock; so release and re-grab. This
* is annoyingly tedious, but it happens at most once per bgwriter cycle,
* so the performance hit is minimal.
* not do so while holding buffer_strategy_lock; so release and re-grab.
* This is annoyingly tedious, but it happens at most once per bgwriter
* cycle, so the performance hit is minimal.
*/
bgwriterLatch = StrategyControl->bgwriterLatch;
if (bgwriterLatch)
{
StrategyControl->bgwriterLatch = NULL;
LWLockRelease(BufFreelistLock);
SpinLockRelease(&StrategyControl->buffer_strategy_lock);
SetLatch(bgwriterLatch);
LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
}
/*
* Try to get a buffer from the freelist. Note that the freeNext fields
* are considered to be protected by the BufFreelistLock not the
* are considered to be protected by the buffer_strategy_lock not the
* individual buffer spinlocks, so it's OK to manipulate them without
* holding the spinlock.
*/
@@ -170,6 +164,12 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
StrategyControl->firstFreeBuffer = buf->freeNext;
buf->freeNext = FREENEXT_NOT_IN_LIST;
/*
* Release the lock so someone else can access the freelist (or run
* the clocksweep) while we check out this buffer.
*/
SpinLockRelease(&StrategyControl->buffer_strategy_lock);
/*
* If the buffer is pinned or has a nonzero usage_count, we cannot use
* it; discard it and retry. (This can only happen if VACUUM put a
@@ -185,6 +185,9 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
return buf;
}
UnlockBufHdr(buf);
/* Reacquire the lock and go around for another pass. */
SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
}
/* Nothing on the freelist, so run the "clock sweep" algorithm */
@@ -199,6 +202,9 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
StrategyControl->completePasses++;
}
/* Release the lock before manipulating the candidate buffer. */
SpinLockRelease(&StrategyControl->buffer_strategy_lock);
/*
* If the buffer is pinned or has a nonzero usage_count, we cannot use
* it; decrement the usage_count (unless pinned) and keep scanning.
@@ -232,6 +238,9 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
elog(ERROR, "no unpinned buffers available");
}
UnlockBufHdr(buf);
/* Reacquire the lock and get a new candidate buffer. */
SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
}
}
@@ -241,7 +250,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
void
StrategyFreeBuffer(volatile BufferDesc *buf)
{
LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
/*
* It is possible that we are told to put something in the freelist that
@@ -255,7 +264,7 @@ StrategyFreeBuffer(volatile BufferDesc *buf)
StrategyControl->firstFreeBuffer = buf->buf_id;
}
LWLockRelease(BufFreelistLock);
SpinLockRelease(&StrategyControl->buffer_strategy_lock);
}
/*
@@ -274,7 +283,7 @@ StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
{
int result;
LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
result = StrategyControl->nextVictimBuffer;
if (complete_passes)
*complete_passes = StrategyControl->completePasses;
@@ -283,7 +292,7 @@ StrategySyncStart(uint32 *complete_passes, uint32 *num_buf_alloc)
*num_buf_alloc = StrategyControl->numBufferAllocs;
StrategyControl->numBufferAllocs = 0;
}
LWLockRelease(BufFreelistLock);
SpinLockRelease(&StrategyControl->buffer_strategy_lock);
return result;
}
@@ -299,13 +308,13 @@ void
StrategyNotifyBgWriter(Latch *bgwriterLatch)
{
/*
* We acquire the BufFreelistLock just to ensure that the store appears
* We acquire buffer_strategy_lock just to ensure that the store appears
* atomic to StrategyGetBuffer. The bgwriter should call this rather
* infrequently, so there's no performance penalty from being safe.
*/
LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
SpinLockAcquire(&StrategyControl->buffer_strategy_lock);
StrategyControl->bgwriterLatch = bgwriterLatch;
LWLockRelease(BufFreelistLock);
SpinLockRelease(&StrategyControl->buffer_strategy_lock);
}
@@ -370,6 +379,8 @@ StrategyInitialize(bool init)
*/
Assert(init);
SpinLockInit(&StrategyControl->buffer_strategy_lock);
/*
* Grab the whole linked list of free buffers for our strategy. We
* assume it was previously set up by InitBufferPool().