mirror of
https://github.com/postgres/postgres.git
synced 2025-11-10 17:42:29 +03:00
Make large sequential scans and VACUUMs work in a limited-size "ring" of
buffers, rather than blowing out the whole shared-buffer arena. Aside from avoiding cache spoliation, this fixes the problem that VACUUM formerly tended to cause a WAL flush for every page it modified, because we had it hacked to use only a single buffer. Those flushes will now occur only once per ring-ful. The exact ring size, and the threshold for seqscans to switch into the ring usage pattern, remain under debate; but the infrastructure seems done. The key bit of infrastructure is a new optional BufferAccessStrategy object that can be passed to ReadBuffer operations; this replaces the former StrategyHintVacuum API. This patch also changes the buffer usage-count methodology a bit: we now advance usage_count when first pinning a buffer, rather than when last unpinning it. To preserve the behavior that a buffer's lifetime starts to decrease when it's released, the clock sweep code is modified to not decrement usage_count of pinned buffers. Work not done in this commit: teach GiST and GIN indexes to use the vacuum BufferAccessStrategy for vacuum-driven fetches. Original patch by Simon, reworked by Heikki and again by Tom.
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.94 2007/05/03 16:45:58 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.95 2007/05/30 20:11:51 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* This file contains only the public interface routines.
|
||||
@@ -547,8 +547,9 @@ loop_top:
|
||||
|
||||
vacuum_delay_point();
|
||||
|
||||
buf = _hash_getbuf(rel, blkno, HASH_WRITE,
|
||||
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
|
||||
buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
|
||||
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
|
||||
info->strategy);
|
||||
page = BufferGetPage(buf);
|
||||
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
||||
Assert(opaque->hasho_bucket == cur_bucket);
|
||||
@@ -596,7 +597,8 @@ loop_top:
|
||||
|
||||
/* If we deleted anything, try to compact free space */
|
||||
if (bucket_dirty)
|
||||
_hash_squeezebucket(rel, cur_bucket, bucket_blkno);
|
||||
_hash_squeezebucket(rel, cur_bucket, bucket_blkno,
|
||||
info->strategy);
|
||||
|
||||
/* Release bucket lock */
|
||||
_hash_droplock(rel, bucket_blkno, HASH_EXCLUSIVE);
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.57 2007/05/03 16:45:58 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.58 2007/05/30 20:11:51 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Overflow pages look like ordinary relation pages.
|
||||
@@ -362,6 +362,9 @@ _hash_firstfreebit(uint32 map)
|
||||
* Remove this overflow page from its bucket's chain, and mark the page as
|
||||
* free. On entry, ovflbuf is write-locked; it is released before exiting.
|
||||
*
|
||||
* Since this function is invoked in VACUUM, we provide an access strategy
|
||||
* parameter that controls fetches of the bucket pages.
|
||||
*
|
||||
* Returns the block number of the page that followed the given page
|
||||
* in the bucket, or InvalidBlockNumber if no following page.
|
||||
*
|
||||
@@ -370,7 +373,8 @@ _hash_firstfreebit(uint32 map)
|
||||
* on the bucket, too.
|
||||
*/
|
||||
BlockNumber
|
||||
_hash_freeovflpage(Relation rel, Buffer ovflbuf)
|
||||
_hash_freeovflpage(Relation rel, Buffer ovflbuf,
|
||||
BufferAccessStrategy bstrategy)
|
||||
{
|
||||
HashMetaPage metap;
|
||||
Buffer metabuf;
|
||||
@@ -413,8 +417,11 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
|
||||
*/
|
||||
if (BlockNumberIsValid(prevblkno))
|
||||
{
|
||||
Buffer prevbuf = _hash_getbuf(rel, prevblkno, HASH_WRITE,
|
||||
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
|
||||
Buffer prevbuf = _hash_getbuf_with_strategy(rel,
|
||||
prevblkno,
|
||||
HASH_WRITE,
|
||||
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
|
||||
bstrategy);
|
||||
Page prevpage = BufferGetPage(prevbuf);
|
||||
HashPageOpaque prevopaque = (HashPageOpaque) PageGetSpecialPointer(prevpage);
|
||||
|
||||
@@ -424,8 +431,11 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
|
||||
}
|
||||
if (BlockNumberIsValid(nextblkno))
|
||||
{
|
||||
Buffer nextbuf = _hash_getbuf(rel, nextblkno, HASH_WRITE,
|
||||
LH_OVERFLOW_PAGE);
|
||||
Buffer nextbuf = _hash_getbuf_with_strategy(rel,
|
||||
nextblkno,
|
||||
HASH_WRITE,
|
||||
LH_OVERFLOW_PAGE,
|
||||
bstrategy);
|
||||
Page nextpage = BufferGetPage(nextbuf);
|
||||
HashPageOpaque nextopaque = (HashPageOpaque) PageGetSpecialPointer(nextpage);
|
||||
|
||||
@@ -434,6 +444,8 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
|
||||
_hash_wrtbuf(rel, nextbuf);
|
||||
}
|
||||
|
||||
/* Note: bstrategy is intentionally not used for metapage and bitmap */
|
||||
|
||||
/* Read the metapage so we can determine which bitmap page to use */
|
||||
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
|
||||
metap = (HashMetaPage) BufferGetPage(metabuf);
|
||||
@@ -558,11 +570,15 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno)
|
||||
*
|
||||
* Caller must hold exclusive lock on the target bucket. This allows
|
||||
* us to safely lock multiple pages in the bucket.
|
||||
*
|
||||
* Since this function is invoked in VACUUM, we provide an access strategy
|
||||
* parameter that controls fetches of the bucket pages.
|
||||
*/
|
||||
void
|
||||
_hash_squeezebucket(Relation rel,
|
||||
Bucket bucket,
|
||||
BlockNumber bucket_blkno)
|
||||
BlockNumber bucket_blkno,
|
||||
BufferAccessStrategy bstrategy)
|
||||
{
|
||||
Buffer wbuf;
|
||||
Buffer rbuf = 0;
|
||||
@@ -581,7 +597,11 @@ _hash_squeezebucket(Relation rel,
|
||||
* start squeezing into the base bucket page.
|
||||
*/
|
||||
wblkno = bucket_blkno;
|
||||
wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE, LH_BUCKET_PAGE);
|
||||
wbuf = _hash_getbuf_with_strategy(rel,
|
||||
wblkno,
|
||||
HASH_WRITE,
|
||||
LH_BUCKET_PAGE,
|
||||
bstrategy);
|
||||
wpage = BufferGetPage(wbuf);
|
||||
wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
|
||||
|
||||
@@ -595,8 +615,10 @@ _hash_squeezebucket(Relation rel,
|
||||
}
|
||||
|
||||
/*
|
||||
* find the last page in the bucket chain by starting at the base bucket
|
||||
* page and working forward.
|
||||
* Find the last page in the bucket chain by starting at the base bucket
|
||||
* page and working forward. Note: we assume that a hash bucket chain is
|
||||
* usually smaller than the buffer ring being used by VACUUM, else using
|
||||
* the access strategy here would be counterproductive.
|
||||
*/
|
||||
ropaque = wopaque;
|
||||
do
|
||||
@@ -604,7 +626,11 @@ _hash_squeezebucket(Relation rel,
|
||||
rblkno = ropaque->hasho_nextblkno;
|
||||
if (ropaque != wopaque)
|
||||
_hash_relbuf(rel, rbuf);
|
||||
rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
|
||||
rbuf = _hash_getbuf_with_strategy(rel,
|
||||
rblkno,
|
||||
HASH_WRITE,
|
||||
LH_OVERFLOW_PAGE,
|
||||
bstrategy);
|
||||
rpage = BufferGetPage(rbuf);
|
||||
ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
|
||||
Assert(ropaque->hasho_bucket == bucket);
|
||||
@@ -644,7 +670,11 @@ _hash_squeezebucket(Relation rel,
|
||||
return;
|
||||
}
|
||||
|
||||
wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
|
||||
wbuf = _hash_getbuf_with_strategy(rel,
|
||||
wblkno,
|
||||
HASH_WRITE,
|
||||
LH_OVERFLOW_PAGE,
|
||||
bstrategy);
|
||||
wpage = BufferGetPage(wbuf);
|
||||
wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
|
||||
Assert(wopaque->hasho_bucket == bucket);
|
||||
@@ -688,15 +718,19 @@ _hash_squeezebucket(Relation rel,
|
||||
/* yes, so release wbuf lock first */
|
||||
_hash_wrtbuf(rel, wbuf);
|
||||
/* free this overflow page (releases rbuf) */
|
||||
_hash_freeovflpage(rel, rbuf);
|
||||
_hash_freeovflpage(rel, rbuf, bstrategy);
|
||||
/* done */
|
||||
return;
|
||||
}
|
||||
|
||||
/* free this overflow page, then get the previous one */
|
||||
_hash_freeovflpage(rel, rbuf);
|
||||
_hash_freeovflpage(rel, rbuf, bstrategy);
|
||||
|
||||
rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
|
||||
rbuf = _hash_getbuf_with_strategy(rel,
|
||||
rblkno,
|
||||
HASH_WRITE,
|
||||
LH_OVERFLOW_PAGE,
|
||||
bstrategy);
|
||||
rpage = BufferGetPage(rbuf);
|
||||
ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
|
||||
Assert(ropaque->hasho_bucket == bucket);
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.67 2007/05/03 16:45:58 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.68 2007/05/30 20:11:51 tgl Exp $
|
||||
*
|
||||
* NOTES
|
||||
* Postgres hash pages look like ordinary relation pages. The opaque
|
||||
@@ -214,6 +214,34 @@ _hash_getnewbuf(Relation rel, BlockNumber blkno)
|
||||
return buf;
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_getbuf_with_strategy() -- Get a buffer with nondefault strategy.
|
||||
*
|
||||
* This is identical to _hash_getbuf() but also allows a buffer access
|
||||
* strategy to be specified. We use this for VACUUM operations.
|
||||
*/
|
||||
Buffer
|
||||
_hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
|
||||
int access, int flags,
|
||||
BufferAccessStrategy bstrategy)
|
||||
{
|
||||
Buffer buf;
|
||||
|
||||
if (blkno == P_NEW)
|
||||
elog(ERROR, "hash AM does not use P_NEW");
|
||||
|
||||
buf = ReadBufferWithStrategy(rel, blkno, bstrategy);
|
||||
|
||||
if (access != HASH_NOLOCK)
|
||||
LockBuffer(buf, access);
|
||||
|
||||
/* ref count and lock type are correct */
|
||||
|
||||
_hash_checkpage(rel, buf, flags);
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
/*
|
||||
* _hash_relbuf() -- release a locked buffer.
|
||||
*
|
||||
@@ -840,5 +868,5 @@ _hash_splitbucket(Relation rel,
|
||||
_hash_wrtbuf(rel, obuf);
|
||||
_hash_wrtbuf(rel, nbuf);
|
||||
|
||||
_hash_squeezebucket(rel, obucket, start_oblkno);
|
||||
_hash_squeezebucket(rel, obucket, start_oblkno, NULL);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user