1
0
mirror of https://github.com/postgres/postgres.git synced 2025-09-02 04:21:28 +03:00

Make large sequential scans and VACUUMs work in a limited-size "ring" of

buffers, rather than blowing out the whole shared-buffer arena.  Aside from
avoiding cache spoliation, this fixes the problem that VACUUM formerly tended
to cause a WAL flush for every page it modified, because we had it hacked to
use only a single buffer.  Those flushes will now occur only once per
ring-ful.  The exact ring size, and the threshold for seqscans to switch into
the ring usage pattern, remain under debate; but the infrastructure seems
done.  The key bit of infrastructure is a new optional BufferAccessStrategy
object that can be passed to ReadBuffer operations; this replaces the former
StrategyHintVacuum API.

This patch also changes the buffer usage-count methodology a bit: we now
advance usage_count when first pinning a buffer, rather than when last
unpinning it.  To preserve the behavior that a buffer's lifetime starts to
decrease when it's released, the clock sweep code is modified to not decrement
usage_count of pinned buffers.

Work not done in this commit: teach GiST and GIN indexes to use the vacuum
BufferAccessStrategy for vacuum-driven fetches.

Original patch by Simon, reworked by Heikki and again by Tom.
This commit is contained in:
Tom Lane
2007-05-30 20:12:03 +00:00
parent 0a6f2ee84d
commit d526575f89
24 changed files with 722 additions and 262 deletions

View File

@@ -9,7 +9,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.58 2007/01/05 22:19:37 momjian Exp $
* $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.59 2007/05/30 20:11:59 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@@ -39,8 +39,42 @@ typedef struct
/* Pointers to shared state */
static BufferStrategyControl *StrategyControl = NULL;
/* Backend-local state about whether currently vacuuming */
bool strategy_hint_vacuum = false;
/*
* Private (non-shared) state for managing a ring of shared buffers to re-use.
* This is currently the only kind of BufferAccessStrategy object, but someday
* we might have more kinds.
*/
typedef struct BufferAccessStrategyData
{
/* Overall strategy type */
BufferAccessStrategyType btype;
/* Number of elements in buffers[] array */
int ring_size;
/*
* Index of the "current" slot in the ring, ie, the one most recently
* returned by GetBufferFromRing.
*/
int current;
/*
* True if the buffer just returned by StrategyGetBuffer had been in
* the ring already.
*/
bool current_was_in_ring;
/*
* Array of buffer numbers. InvalidBuffer (that is, zero) indicates
* we have not yet selected a buffer for this ring slot. For allocation
* simplicity this is palloc'd together with the fixed fields of the
* struct.
*/
Buffer buffers[1]; /* VARIABLE SIZE ARRAY */
} BufferAccessStrategyData;
/* Prototypes for internal functions */
static volatile BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy);
static void AddBufferToRing(BufferAccessStrategy strategy,
volatile BufferDesc *buf);
/*
@@ -50,17 +84,38 @@ bool strategy_hint_vacuum = false;
* BufferAlloc(). The only hard requirement BufferAlloc() has is that
* the selected buffer must not currently be pinned by anyone.
*
* strategy is a BufferAccessStrategy object, or NULL for default strategy.
*
* To ensure that no one else can pin the buffer before we do, we must
* return the buffer with the buffer header spinlock still held. That
* means that we return with the BufFreelistLock still held, as well;
* the caller must release that lock once the spinlock is dropped.
* return the buffer with the buffer header spinlock still held. If
* *lock_held is set on exit, we have returned with the BufFreelistLock
* still held, as well; the caller must release that lock once the spinlock
* is dropped. We do it that way because releasing the BufFreelistLock
* might awaken other processes, and it would be bad to do the associated
* kernel calls while holding the buffer header spinlock.
*/
volatile BufferDesc *
StrategyGetBuffer(void)
StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
{
volatile BufferDesc *buf;
int trycounter;
/*
* If given a strategy object, see whether it can select a buffer.
* We assume strategy objects don't need the BufFreelistLock.
*/
if (strategy != NULL)
{
buf = GetBufferFromRing(strategy);
if (buf != NULL)
{
*lock_held = false;
return buf;
}
}
/* Nope, so lock the freelist */
*lock_held = true;
LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
/*
@@ -82,11 +137,16 @@ StrategyGetBuffer(void)
* If the buffer is pinned or has a nonzero usage_count, we cannot use
* it; discard it and retry. (This can only happen if VACUUM put a
* valid buffer in the freelist and then someone else used it before
* we got to it.)
* we got to it. It's probably impossible altogether as of 8.3,
* but we'd better check anyway.)
*/
LockBufHdr(buf);
if (buf->refcount == 0 && buf->usage_count == 0)
{
if (strategy != NULL)
AddBufferToRing(strategy, buf);
return buf;
}
UnlockBufHdr(buf);
}
@@ -101,15 +161,23 @@ StrategyGetBuffer(void)
/*
* If the buffer is pinned or has a nonzero usage_count, we cannot use
* it; decrement the usage_count and keep scanning.
* it; decrement the usage_count (unless pinned) and keep scanning.
*/
LockBufHdr(buf);
if (buf->refcount == 0 && buf->usage_count == 0)
return buf;
if (buf->usage_count > 0)
if (buf->refcount == 0)
{
buf->usage_count--;
trycounter = NBuffers;
if (buf->usage_count > 0)
{
buf->usage_count--;
trycounter = NBuffers;
}
else
{
/* Found a usable buffer */
if (strategy != NULL)
AddBufferToRing(strategy, buf);
return buf;
}
}
else if (--trycounter == 0)
{
@@ -132,13 +200,9 @@ StrategyGetBuffer(void)
/*
* StrategyFreeBuffer: put a buffer on the freelist
*
* The buffer is added either at the head or the tail, according to the
* at_head parameter. This allows a small amount of control over how
* quickly the buffer is reused.
*/
void
StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head)
StrategyFreeBuffer(volatile BufferDesc *buf)
{
LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
@@ -148,22 +212,10 @@ StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head)
*/
if (buf->freeNext == FREENEXT_NOT_IN_LIST)
{
if (at_head)
{
buf->freeNext = StrategyControl->firstFreeBuffer;
if (buf->freeNext < 0)
StrategyControl->lastFreeBuffer = buf->buf_id;
StrategyControl->firstFreeBuffer = buf->buf_id;
}
else
{
buf->freeNext = FREENEXT_END_OF_LIST;
if (StrategyControl->firstFreeBuffer < 0)
StrategyControl->firstFreeBuffer = buf->buf_id;
else
BufferDescriptors[StrategyControl->lastFreeBuffer].freeNext = buf->buf_id;
buf->freeNext = StrategyControl->firstFreeBuffer;
if (buf->freeNext < 0)
StrategyControl->lastFreeBuffer = buf->buf_id;
}
StrategyControl->firstFreeBuffer = buf->buf_id;
}
LWLockRelease(BufFreelistLock);
@@ -190,15 +242,6 @@ StrategySyncStart(void)
return result;
}
/*
* StrategyHintVacuum -- tell us whether VACUUM is active
*/
void
StrategyHintVacuum(bool vacuum_active)
{
strategy_hint_vacuum = vacuum_active;
}
/*
* StrategyShmemSize
@@ -274,3 +317,172 @@ StrategyInitialize(bool init)
else
Assert(!init);
}
/* ----------------------------------------------------------------
* Backend-private buffer ring management
* ----------------------------------------------------------------
*/
/*
* GetAccessStrategy -- create a BufferAccessStrategy object
*
* The object is allocated in the current memory context.
*/
BufferAccessStrategy
GetAccessStrategy(BufferAccessStrategyType btype)
{
BufferAccessStrategy strategy;
int ring_size;
/*
* Select ring size to use. See buffer/README for rationales.
* (Currently all cases are the same size, but keep this code
* structure for flexibility.)
*/
switch (btype)
{
case BAS_NORMAL:
/* if someone asks for NORMAL, just give 'em a "default" object */
return NULL;
case BAS_BULKREAD:
ring_size = 256 * 1024 / BLCKSZ;
break;
case BAS_VACUUM:
ring_size = 256 * 1024 / BLCKSZ;
break;
default:
elog(ERROR, "unrecognized buffer access strategy: %d",
(int) btype);
return NULL; /* keep compiler quiet */
}
/* Make sure ring isn't an undue fraction of shared buffers */
ring_size = Min(NBuffers / 8, ring_size);
/* Allocate the object and initialize all elements to zeroes */
strategy = (BufferAccessStrategy)
palloc0(offsetof(BufferAccessStrategyData, buffers) +
ring_size * sizeof(Buffer));
/* Set fields that don't start out zero */
strategy->btype = btype;
strategy->ring_size = ring_size;
return strategy;
}
/*
* FreeAccessStrategy -- release a BufferAccessStrategy object
*
* A simple pfree would do at the moment, but we would prefer that callers
* don't assume that much about the representation of BufferAccessStrategy.
*/
void
FreeAccessStrategy(BufferAccessStrategy strategy)
{
/* don't crash if called on a "default" strategy */
if (strategy != NULL)
pfree(strategy);
}
/*
* GetBufferFromRing -- returns a buffer from the ring, or NULL if the
* ring is empty.
*
* The bufhdr spin lock is held on the returned buffer.
*/
static volatile BufferDesc *
GetBufferFromRing(BufferAccessStrategy strategy)
{
volatile BufferDesc *buf;
Buffer bufnum;
/* Advance to next ring slot */
if (++strategy->current >= strategy->ring_size)
strategy->current = 0;
/*
* If the slot hasn't been filled yet, tell the caller to allocate
* a new buffer with the normal allocation strategy. He will then
* fill this slot by calling AddBufferToRing with the new buffer.
*/
bufnum = strategy->buffers[strategy->current];
if (bufnum == InvalidBuffer)
{
strategy->current_was_in_ring = false;
return NULL;
}
/*
* If the buffer is pinned we cannot use it under any circumstances.
*
* If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
* since our own previous usage of the ring element would have left it
* there, but it might've been decremented by clock sweep since then).
* A higher usage_count indicates someone else has touched the buffer,
* so we shouldn't re-use it.
*/
buf = &BufferDescriptors[bufnum - 1];
LockBufHdr(buf);
if (buf->refcount == 0 && buf->usage_count <= 1)
{
strategy->current_was_in_ring = true;
return buf;
}
UnlockBufHdr(buf);
/*
* Tell caller to allocate a new buffer with the normal allocation
* strategy. He'll then replace this ring element via AddBufferToRing.
*/
strategy->current_was_in_ring = false;
return NULL;
}
/*
* AddBufferToRing -- add a buffer to the buffer ring
*
* Caller must hold the buffer header spinlock on the buffer. Since this
* is called with the spinlock held, it had better be quite cheap.
*/
static void
AddBufferToRing(BufferAccessStrategy strategy, volatile BufferDesc *buf)
{
strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf);
}
/*
* StrategyRejectBuffer -- consider rejecting a dirty buffer
*
* When a nondefault strategy is used, the buffer manager calls this function
* when it turns out that the buffer selected by StrategyGetBuffer needs to
* be written out and doing so would require flushing WAL too. This gives us
* a chance to choose a different victim.
*
* Returns true if buffer manager should ask for a new victim, and false
* if this buffer should be written and re-used.
*/
bool
StrategyRejectBuffer(BufferAccessStrategy strategy, volatile BufferDesc *buf)
{
/* We only do this in bulkread mode */
if (strategy->btype != BAS_BULKREAD)
return false;
/* Don't muck with behavior of normal buffer-replacement strategy */
if (!strategy->current_was_in_ring ||
strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
return false;
/*
* Remove the dirty buffer from the ring; necessary to prevent infinite
* loop if all ring members are dirty.
*/
strategy->buffers[strategy->current] = InvalidBuffer;
return true;
}