1
0
mirror of https://github.com/postgres/postgres.git synced 2025-09-02 04:21:28 +03:00

Allow Pin/UnpinBuffer to operate in a lockfree manner.

Pinning/Unpinning a buffer is a very frequent operation; especially in
read-mostly cache resident workloads. Benchmarking shows that in various
scenarios the spinlock protecting a buffer header's state becomes a
significant bottleneck. The problem can be reproduced with pgbench -S on
larger machines, but can be considerably worse for queries which touch
the same buffers over and over at a high frequency (e.g. nested loops
over a small inner table).

To allow atomic operations to be used, cram BufferDesc's flags,
usage_count, buf_hdr_lock, refcount into a single 32bit atomic variable;
that allows to manipulate them together using 32bit compare-and-swap
operations. This requires reducing MAX_BACKENDS to 2^18-1 (which could
be lifted by using a 64bit field, but it's not a realistic configuration
atm).

As not all operations can easily implemented in a lockfree manner,
implement the previous buf_hdr_lock via a flag bit in the atomic
variable. That way we can continue to lock the header in places where
it's needed, but can get away without acquiring it in the more frequent
hot-paths.  There's some additional operations which can be done without
the lock, but aren't in this patch; but the most important places are
covered.

As bufmgr.c now essentially re-implements spinlocks, abstract the delay
logic from s_lock.c into something more generic. It now has already two
users, and more are coming up; there's a follupw patch for lwlock.c at
least.

This patch is based on a proof-of-concept written by me, which Alexander
Korotkov made into a fully working patch; the committed version is again
revised by me.  Benchmarking and testing has, amongst others, been
provided by Dilip Kumar, Alexander Korotkov, Robert Haas.

On a large x86 system improvements for readonly pgbench, with a high
client count, of a factor of 8 have been observed.

Author: Alexander Korotkov and Andres Freund
Discussion: 2400449.GjM57CE0Yg@dinodell
This commit is contained in:
Andres Freund
2016-04-10 20:12:32 -07:00
parent cf223c3bf5
commit 48354581a4
10 changed files with 628 additions and 363 deletions

View File

@@ -98,7 +98,8 @@ typedef struct BufferAccessStrategyData
/* Prototypes for internal functions */
static BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy);
static BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy,
uint32 *buf_state);
static void AddBufferToRing(BufferAccessStrategy strategy,
BufferDesc *buf);
@@ -180,11 +181,12 @@ ClockSweepTick(void)
* return the buffer with the buffer header spinlock still held.
*/
BufferDesc *
StrategyGetBuffer(BufferAccessStrategy strategy)
StrategyGetBuffer(BufferAccessStrategy strategy, uint32 *buf_state)
{
BufferDesc *buf;
int bgwprocno;
int trycounter;
uint32 local_buf_state; /* to avoid repeated (de-)referencing */
/*
* If given a strategy object, see whether it can select a buffer. We
@@ -192,7 +194,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy)
*/
if (strategy != NULL)
{
buf = GetBufferFromRing(strategy);
buf = GetBufferFromRing(strategy, buf_state);
if (buf != NULL)
return buf;
}
@@ -279,14 +281,16 @@ StrategyGetBuffer(BufferAccessStrategy strategy)
* it before we got to it. It's probably impossible altogether as
* of 8.3, but we'd better check anyway.)
*/
LockBufHdr(buf);
if (buf->refcount == 0 && buf->usage_count == 0)
local_buf_state = LockBufHdr(buf);
if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0
&& BUF_STATE_GET_USAGECOUNT(local_buf_state) == 0)
{
if (strategy != NULL)
AddBufferToRing(strategy, buf);
*buf_state = local_buf_state;
return buf;
}
UnlockBufHdr(buf);
UnlockBufHdr(buf, local_buf_state);
}
}
@@ -295,19 +299,20 @@ StrategyGetBuffer(BufferAccessStrategy strategy)
trycounter = NBuffers;
for (;;)
{
buf = GetBufferDescriptor(ClockSweepTick());
/*
* If the buffer is pinned or has a nonzero usage_count, we cannot use
* it; decrement the usage_count (unless pinned) and keep scanning.
*/
LockBufHdr(buf);
if (buf->refcount == 0)
local_buf_state = LockBufHdr(buf);
if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0)
{
if (buf->usage_count > 0)
if (BUF_STATE_GET_USAGECOUNT(local_buf_state) != 0)
{
buf->usage_count--;
local_buf_state -= BUF_USAGECOUNT_ONE;
trycounter = NBuffers;
}
else
@@ -315,6 +320,7 @@ StrategyGetBuffer(BufferAccessStrategy strategy)
/* Found a usable buffer */
if (strategy != NULL)
AddBufferToRing(strategy, buf);
*buf_state = local_buf_state;
return buf;
}
}
@@ -327,10 +333,10 @@ StrategyGetBuffer(BufferAccessStrategy strategy)
* probably better to fail than to risk getting stuck in an
* infinite loop.
*/
UnlockBufHdr(buf);
UnlockBufHdr(buf, local_buf_state);
elog(ERROR, "no unpinned buffers available");
}
UnlockBufHdr(buf);
UnlockBufHdr(buf, local_buf_state);
}
}
@@ -585,10 +591,12 @@ FreeAccessStrategy(BufferAccessStrategy strategy)
* The bufhdr spin lock is held on the returned buffer.
*/
static BufferDesc *
GetBufferFromRing(BufferAccessStrategy strategy)
GetBufferFromRing(BufferAccessStrategy strategy, uint32 *buf_state)
{
BufferDesc *buf;
Buffer bufnum;
uint32 local_buf_state; /* to avoid repeated (de-)referencing */
/* Advance to next ring slot */
if (++strategy->current >= strategy->ring_size)
@@ -616,13 +624,15 @@ GetBufferFromRing(BufferAccessStrategy strategy)
* shouldn't re-use it.
*/
buf = GetBufferDescriptor(bufnum - 1);
LockBufHdr(buf);
if (buf->refcount == 0 && buf->usage_count <= 1)
local_buf_state = LockBufHdr(buf);
if (BUF_STATE_GET_REFCOUNT(local_buf_state) == 0
&& BUF_STATE_GET_USAGECOUNT(local_buf_state) <= 1)
{
strategy->current_was_in_ring = true;
*buf_state = local_buf_state;
return buf;
}
UnlockBufHdr(buf);
UnlockBufHdr(buf, local_buf_state);
/*
* Tell caller to allocate a new buffer with the normal allocation