mirror of
https://github.com/postgres/postgres.git
synced 2025-07-21 16:02:15 +03:00
Make large sequential scans and VACUUMs work in a limited-size "ring" of
buffers, rather than blowing out the whole shared-buffer arena. Aside from avoiding cache spoliation, this fixes the problem that VACUUM formerly tended to cause a WAL flush for every page it modified, because we had it hacked to use only a single buffer. Those flushes will now occur only once per ring-ful. The exact ring size, and the threshold for seqscans to switch into the ring usage pattern, remain under debate; but the infrastructure seems done. The key bit of infrastructure is a new optional BufferAccessStrategy object that can be passed to ReadBuffer operations; this replaces the former StrategyHintVacuum API. This patch also changes the buffer usage-count methodology a bit: we now advance usage_count when first pinning a buffer, rather than when last unpinning it. To preserve the behavior that a buffer's lifetime starts to decrease when it's released, the clock sweep code is modified to not decrement usage_count of pinned buffers. Work not done in this commit: teach GiST and GIN indexes to use the vacuum BufferAccessStrategy for vacuum-driven fetches. Original patch by Simon, reworked by Heikki and again by Tom.
This commit is contained in:
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.94 2007/05/03 16:45:58 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/access/hash/hash.c,v 1.95 2007/05/30 20:11:51 tgl Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* This file contains only the public interface routines.
|
* This file contains only the public interface routines.
|
||||||
@ -547,8 +547,9 @@ loop_top:
|
|||||||
|
|
||||||
vacuum_delay_point();
|
vacuum_delay_point();
|
||||||
|
|
||||||
buf = _hash_getbuf(rel, blkno, HASH_WRITE,
|
buf = _hash_getbuf_with_strategy(rel, blkno, HASH_WRITE,
|
||||||
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
|
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
|
||||||
|
info->strategy);
|
||||||
page = BufferGetPage(buf);
|
page = BufferGetPage(buf);
|
||||||
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
opaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
||||||
Assert(opaque->hasho_bucket == cur_bucket);
|
Assert(opaque->hasho_bucket == cur_bucket);
|
||||||
@ -596,7 +597,8 @@ loop_top:
|
|||||||
|
|
||||||
/* If we deleted anything, try to compact free space */
|
/* If we deleted anything, try to compact free space */
|
||||||
if (bucket_dirty)
|
if (bucket_dirty)
|
||||||
_hash_squeezebucket(rel, cur_bucket, bucket_blkno);
|
_hash_squeezebucket(rel, cur_bucket, bucket_blkno,
|
||||||
|
info->strategy);
|
||||||
|
|
||||||
/* Release bucket lock */
|
/* Release bucket lock */
|
||||||
_hash_droplock(rel, bucket_blkno, HASH_EXCLUSIVE);
|
_hash_droplock(rel, bucket_blkno, HASH_EXCLUSIVE);
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.57 2007/05/03 16:45:58 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/access/hash/hashovfl.c,v 1.58 2007/05/30 20:11:51 tgl Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* Overflow pages look like ordinary relation pages.
|
* Overflow pages look like ordinary relation pages.
|
||||||
@ -362,6 +362,9 @@ _hash_firstfreebit(uint32 map)
|
|||||||
* Remove this overflow page from its bucket's chain, and mark the page as
|
* Remove this overflow page from its bucket's chain, and mark the page as
|
||||||
* free. On entry, ovflbuf is write-locked; it is released before exiting.
|
* free. On entry, ovflbuf is write-locked; it is released before exiting.
|
||||||
*
|
*
|
||||||
|
* Since this function is invoked in VACUUM, we provide an access strategy
|
||||||
|
* parameter that controls fetches of the bucket pages.
|
||||||
|
*
|
||||||
* Returns the block number of the page that followed the given page
|
* Returns the block number of the page that followed the given page
|
||||||
* in the bucket, or InvalidBlockNumber if no following page.
|
* in the bucket, or InvalidBlockNumber if no following page.
|
||||||
*
|
*
|
||||||
@ -370,7 +373,8 @@ _hash_firstfreebit(uint32 map)
|
|||||||
* on the bucket, too.
|
* on the bucket, too.
|
||||||
*/
|
*/
|
||||||
BlockNumber
|
BlockNumber
|
||||||
_hash_freeovflpage(Relation rel, Buffer ovflbuf)
|
_hash_freeovflpage(Relation rel, Buffer ovflbuf,
|
||||||
|
BufferAccessStrategy bstrategy)
|
||||||
{
|
{
|
||||||
HashMetaPage metap;
|
HashMetaPage metap;
|
||||||
Buffer metabuf;
|
Buffer metabuf;
|
||||||
@ -413,8 +417,11 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
|
|||||||
*/
|
*/
|
||||||
if (BlockNumberIsValid(prevblkno))
|
if (BlockNumberIsValid(prevblkno))
|
||||||
{
|
{
|
||||||
Buffer prevbuf = _hash_getbuf(rel, prevblkno, HASH_WRITE,
|
Buffer prevbuf = _hash_getbuf_with_strategy(rel,
|
||||||
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
|
prevblkno,
|
||||||
|
HASH_WRITE,
|
||||||
|
LH_BUCKET_PAGE | LH_OVERFLOW_PAGE,
|
||||||
|
bstrategy);
|
||||||
Page prevpage = BufferGetPage(prevbuf);
|
Page prevpage = BufferGetPage(prevbuf);
|
||||||
HashPageOpaque prevopaque = (HashPageOpaque) PageGetSpecialPointer(prevpage);
|
HashPageOpaque prevopaque = (HashPageOpaque) PageGetSpecialPointer(prevpage);
|
||||||
|
|
||||||
@ -424,8 +431,11 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
|
|||||||
}
|
}
|
||||||
if (BlockNumberIsValid(nextblkno))
|
if (BlockNumberIsValid(nextblkno))
|
||||||
{
|
{
|
||||||
Buffer nextbuf = _hash_getbuf(rel, nextblkno, HASH_WRITE,
|
Buffer nextbuf = _hash_getbuf_with_strategy(rel,
|
||||||
LH_OVERFLOW_PAGE);
|
nextblkno,
|
||||||
|
HASH_WRITE,
|
||||||
|
LH_OVERFLOW_PAGE,
|
||||||
|
bstrategy);
|
||||||
Page nextpage = BufferGetPage(nextbuf);
|
Page nextpage = BufferGetPage(nextbuf);
|
||||||
HashPageOpaque nextopaque = (HashPageOpaque) PageGetSpecialPointer(nextpage);
|
HashPageOpaque nextopaque = (HashPageOpaque) PageGetSpecialPointer(nextpage);
|
||||||
|
|
||||||
@ -434,6 +444,8 @@ _hash_freeovflpage(Relation rel, Buffer ovflbuf)
|
|||||||
_hash_wrtbuf(rel, nextbuf);
|
_hash_wrtbuf(rel, nextbuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Note: bstrategy is intentionally not used for metapage and bitmap */
|
||||||
|
|
||||||
/* Read the metapage so we can determine which bitmap page to use */
|
/* Read the metapage so we can determine which bitmap page to use */
|
||||||
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
|
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
|
||||||
metap = (HashMetaPage) BufferGetPage(metabuf);
|
metap = (HashMetaPage) BufferGetPage(metabuf);
|
||||||
@ -558,11 +570,15 @@ _hash_initbitmap(Relation rel, HashMetaPage metap, BlockNumber blkno)
|
|||||||
*
|
*
|
||||||
* Caller must hold exclusive lock on the target bucket. This allows
|
* Caller must hold exclusive lock on the target bucket. This allows
|
||||||
* us to safely lock multiple pages in the bucket.
|
* us to safely lock multiple pages in the bucket.
|
||||||
|
*
|
||||||
|
* Since this function is invoked in VACUUM, we provide an access strategy
|
||||||
|
* parameter that controls fetches of the bucket pages.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
_hash_squeezebucket(Relation rel,
|
_hash_squeezebucket(Relation rel,
|
||||||
Bucket bucket,
|
Bucket bucket,
|
||||||
BlockNumber bucket_blkno)
|
BlockNumber bucket_blkno,
|
||||||
|
BufferAccessStrategy bstrategy)
|
||||||
{
|
{
|
||||||
Buffer wbuf;
|
Buffer wbuf;
|
||||||
Buffer rbuf = 0;
|
Buffer rbuf = 0;
|
||||||
@ -581,7 +597,11 @@ _hash_squeezebucket(Relation rel,
|
|||||||
* start squeezing into the base bucket page.
|
* start squeezing into the base bucket page.
|
||||||
*/
|
*/
|
||||||
wblkno = bucket_blkno;
|
wblkno = bucket_blkno;
|
||||||
wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE, LH_BUCKET_PAGE);
|
wbuf = _hash_getbuf_with_strategy(rel,
|
||||||
|
wblkno,
|
||||||
|
HASH_WRITE,
|
||||||
|
LH_BUCKET_PAGE,
|
||||||
|
bstrategy);
|
||||||
wpage = BufferGetPage(wbuf);
|
wpage = BufferGetPage(wbuf);
|
||||||
wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
|
wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
|
||||||
|
|
||||||
@ -595,8 +615,10 @@ _hash_squeezebucket(Relation rel,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* find the last page in the bucket chain by starting at the base bucket
|
* Find the last page in the bucket chain by starting at the base bucket
|
||||||
* page and working forward.
|
* page and working forward. Note: we assume that a hash bucket chain is
|
||||||
|
* usually smaller than the buffer ring being used by VACUUM, else using
|
||||||
|
* the access strategy here would be counterproductive.
|
||||||
*/
|
*/
|
||||||
ropaque = wopaque;
|
ropaque = wopaque;
|
||||||
do
|
do
|
||||||
@ -604,7 +626,11 @@ _hash_squeezebucket(Relation rel,
|
|||||||
rblkno = ropaque->hasho_nextblkno;
|
rblkno = ropaque->hasho_nextblkno;
|
||||||
if (ropaque != wopaque)
|
if (ropaque != wopaque)
|
||||||
_hash_relbuf(rel, rbuf);
|
_hash_relbuf(rel, rbuf);
|
||||||
rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
|
rbuf = _hash_getbuf_with_strategy(rel,
|
||||||
|
rblkno,
|
||||||
|
HASH_WRITE,
|
||||||
|
LH_OVERFLOW_PAGE,
|
||||||
|
bstrategy);
|
||||||
rpage = BufferGetPage(rbuf);
|
rpage = BufferGetPage(rbuf);
|
||||||
ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
|
ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
|
||||||
Assert(ropaque->hasho_bucket == bucket);
|
Assert(ropaque->hasho_bucket == bucket);
|
||||||
@ -644,7 +670,11 @@ _hash_squeezebucket(Relation rel,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
wbuf = _hash_getbuf(rel, wblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
|
wbuf = _hash_getbuf_with_strategy(rel,
|
||||||
|
wblkno,
|
||||||
|
HASH_WRITE,
|
||||||
|
LH_OVERFLOW_PAGE,
|
||||||
|
bstrategy);
|
||||||
wpage = BufferGetPage(wbuf);
|
wpage = BufferGetPage(wbuf);
|
||||||
wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
|
wopaque = (HashPageOpaque) PageGetSpecialPointer(wpage);
|
||||||
Assert(wopaque->hasho_bucket == bucket);
|
Assert(wopaque->hasho_bucket == bucket);
|
||||||
@ -688,15 +718,19 @@ _hash_squeezebucket(Relation rel,
|
|||||||
/* yes, so release wbuf lock first */
|
/* yes, so release wbuf lock first */
|
||||||
_hash_wrtbuf(rel, wbuf);
|
_hash_wrtbuf(rel, wbuf);
|
||||||
/* free this overflow page (releases rbuf) */
|
/* free this overflow page (releases rbuf) */
|
||||||
_hash_freeovflpage(rel, rbuf);
|
_hash_freeovflpage(rel, rbuf, bstrategy);
|
||||||
/* done */
|
/* done */
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* free this overflow page, then get the previous one */
|
/* free this overflow page, then get the previous one */
|
||||||
_hash_freeovflpage(rel, rbuf);
|
_hash_freeovflpage(rel, rbuf, bstrategy);
|
||||||
|
|
||||||
rbuf = _hash_getbuf(rel, rblkno, HASH_WRITE, LH_OVERFLOW_PAGE);
|
rbuf = _hash_getbuf_with_strategy(rel,
|
||||||
|
rblkno,
|
||||||
|
HASH_WRITE,
|
||||||
|
LH_OVERFLOW_PAGE,
|
||||||
|
bstrategy);
|
||||||
rpage = BufferGetPage(rbuf);
|
rpage = BufferGetPage(rbuf);
|
||||||
ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
|
ropaque = (HashPageOpaque) PageGetSpecialPointer(rpage);
|
||||||
Assert(ropaque->hasho_bucket == bucket);
|
Assert(ropaque->hasho_bucket == bucket);
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.67 2007/05/03 16:45:58 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/access/hash/hashpage.c,v 1.68 2007/05/30 20:11:51 tgl Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* Postgres hash pages look like ordinary relation pages. The opaque
|
* Postgres hash pages look like ordinary relation pages. The opaque
|
||||||
@ -214,6 +214,34 @@ _hash_getnewbuf(Relation rel, BlockNumber blkno)
|
|||||||
return buf;
|
return buf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* _hash_getbuf_with_strategy() -- Get a buffer with nondefault strategy.
|
||||||
|
*
|
||||||
|
* This is identical to _hash_getbuf() but also allows a buffer access
|
||||||
|
* strategy to be specified. We use this for VACUUM operations.
|
||||||
|
*/
|
||||||
|
Buffer
|
||||||
|
_hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
|
||||||
|
int access, int flags,
|
||||||
|
BufferAccessStrategy bstrategy)
|
||||||
|
{
|
||||||
|
Buffer buf;
|
||||||
|
|
||||||
|
if (blkno == P_NEW)
|
||||||
|
elog(ERROR, "hash AM does not use P_NEW");
|
||||||
|
|
||||||
|
buf = ReadBufferWithStrategy(rel, blkno, bstrategy);
|
||||||
|
|
||||||
|
if (access != HASH_NOLOCK)
|
||||||
|
LockBuffer(buf, access);
|
||||||
|
|
||||||
|
/* ref count and lock type are correct */
|
||||||
|
|
||||||
|
_hash_checkpage(rel, buf, flags);
|
||||||
|
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* _hash_relbuf() -- release a locked buffer.
|
* _hash_relbuf() -- release a locked buffer.
|
||||||
*
|
*
|
||||||
@ -840,5 +868,5 @@ _hash_splitbucket(Relation rel,
|
|||||||
_hash_wrtbuf(rel, obuf);
|
_hash_wrtbuf(rel, obuf);
|
||||||
_hash_wrtbuf(rel, nbuf);
|
_hash_wrtbuf(rel, nbuf);
|
||||||
|
|
||||||
_hash_squeezebucket(rel, obucket, start_oblkno);
|
_hash_squeezebucket(rel, obucket, start_oblkno, NULL);
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.233 2007/05/27 03:50:38 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.234 2007/05/30 20:11:53 tgl Exp $
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* INTERFACE ROUTINES
|
* INTERFACE ROUTINES
|
||||||
@ -83,6 +83,24 @@ initscan(HeapScanDesc scan, ScanKey key)
|
|||||||
*/
|
*/
|
||||||
scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd);
|
scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the table is large relative to NBuffers, use a bulk-read access
|
||||||
|
* strategy, else use the default random-access strategy. During a
|
||||||
|
* rescan, don't make a new strategy object if we don't have to.
|
||||||
|
*/
|
||||||
|
if (scan->rs_nblocks > NBuffers / 4 &&
|
||||||
|
!scan->rs_rd->rd_istemp)
|
||||||
|
{
|
||||||
|
if (scan->rs_strategy == NULL)
|
||||||
|
scan->rs_strategy = GetAccessStrategy(BAS_BULKREAD);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (scan->rs_strategy != NULL)
|
||||||
|
FreeAccessStrategy(scan->rs_strategy);
|
||||||
|
scan->rs_strategy = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
scan->rs_inited = false;
|
scan->rs_inited = false;
|
||||||
scan->rs_ctup.t_data = NULL;
|
scan->rs_ctup.t_data = NULL;
|
||||||
ItemPointerSetInvalid(&scan->rs_ctup.t_self);
|
ItemPointerSetInvalid(&scan->rs_ctup.t_self);
|
||||||
@ -123,9 +141,17 @@ heapgetpage(HeapScanDesc scan, BlockNumber page)
|
|||||||
|
|
||||||
Assert(page < scan->rs_nblocks);
|
Assert(page < scan->rs_nblocks);
|
||||||
|
|
||||||
scan->rs_cbuf = ReleaseAndReadBuffer(scan->rs_cbuf,
|
/* release previous scan buffer, if any */
|
||||||
scan->rs_rd,
|
if (BufferIsValid(scan->rs_cbuf))
|
||||||
page);
|
{
|
||||||
|
ReleaseBuffer(scan->rs_cbuf);
|
||||||
|
scan->rs_cbuf = InvalidBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* read page using selected strategy */
|
||||||
|
scan->rs_cbuf = ReadBufferWithStrategy(scan->rs_rd,
|
||||||
|
page,
|
||||||
|
scan->rs_strategy);
|
||||||
scan->rs_cblock = page;
|
scan->rs_cblock = page;
|
||||||
|
|
||||||
if (!scan->rs_pageatatime)
|
if (!scan->rs_pageatatime)
|
||||||
@ -938,6 +964,7 @@ heap_beginscan(Relation relation, Snapshot snapshot,
|
|||||||
scan->rs_rd = relation;
|
scan->rs_rd = relation;
|
||||||
scan->rs_snapshot = snapshot;
|
scan->rs_snapshot = snapshot;
|
||||||
scan->rs_nkeys = nkeys;
|
scan->rs_nkeys = nkeys;
|
||||||
|
scan->rs_strategy = NULL; /* set in initscan */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* we can use page-at-a-time mode if it's an MVCC-safe snapshot
|
* we can use page-at-a-time mode if it's an MVCC-safe snapshot
|
||||||
@ -1007,6 +1034,9 @@ heap_endscan(HeapScanDesc scan)
|
|||||||
if (scan->rs_key)
|
if (scan->rs_key)
|
||||||
pfree(scan->rs_key);
|
pfree(scan->rs_key);
|
||||||
|
|
||||||
|
if (scan->rs_strategy != NULL)
|
||||||
|
FreeAccessStrategy(scan->rs_strategy);
|
||||||
|
|
||||||
pfree(scan);
|
pfree(scan);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.154 2007/01/05 22:19:23 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/access/nbtree/nbtree.c,v 1.155 2007/05/30 20:11:53 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -786,9 +786,10 @@ restart:
|
|||||||
/*
|
/*
|
||||||
* We can't use _bt_getbuf() here because it always applies
|
* We can't use _bt_getbuf() here because it always applies
|
||||||
* _bt_checkpage(), which will barf on an all-zero page. We want to
|
* _bt_checkpage(), which will barf on an all-zero page. We want to
|
||||||
* recycle all-zero pages, not fail.
|
* recycle all-zero pages, not fail. Also, we want to use a nondefault
|
||||||
|
* buffer access strategy.
|
||||||
*/
|
*/
|
||||||
buf = ReadBuffer(rel, blkno);
|
buf = ReadBufferWithStrategy(rel, blkno, info->strategy);
|
||||||
LockBuffer(buf, BT_READ);
|
LockBuffer(buf, BT_READ);
|
||||||
page = BufferGetPage(buf);
|
page = BufferGetPage(buf);
|
||||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.269 2007/05/20 21:08:19 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.270 2007/05/30 20:11:55 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -1799,6 +1799,36 @@ XLogFlush(XLogRecPtr record)
|
|||||||
LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);
|
LogwrtResult.Flush.xlogid, LogwrtResult.Flush.xrecoff);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test whether XLOG data has been flushed up to (at least) the given position.
|
||||||
|
*
|
||||||
|
* Returns true if a flush is still needed. (It may be that someone else
|
||||||
|
* is already in process of flushing that far, however.)
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
XLogNeedsFlush(XLogRecPtr record)
|
||||||
|
{
|
||||||
|
/* Quick exit if already known flushed */
|
||||||
|
if (XLByteLE(record, LogwrtResult.Flush))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* read LogwrtResult and update local state */
|
||||||
|
{
|
||||||
|
/* use volatile pointer to prevent code rearrangement */
|
||||||
|
volatile XLogCtlData *xlogctl = XLogCtl;
|
||||||
|
|
||||||
|
SpinLockAcquire(&xlogctl->info_lck);
|
||||||
|
LogwrtResult = xlogctl->LogwrtResult;
|
||||||
|
SpinLockRelease(&xlogctl->info_lck);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check again */
|
||||||
|
if (XLByteLE(record, LogwrtResult.Flush))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create a new XLOG file segment, or open a pre-existing one.
|
* Create a new XLOG file segment, or open a pre-existing one.
|
||||||
*
|
*
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.283 2007/05/16 17:28:20 alvherre Exp $
|
* $PostgreSQL: pgsql/src/backend/catalog/index.c,v 1.284 2007/05/30 20:11:55 tgl Exp $
|
||||||
*
|
*
|
||||||
*
|
*
|
||||||
* INTERFACE ROUTINES
|
* INTERFACE ROUTINES
|
||||||
@ -1658,6 +1658,7 @@ validate_index(Oid heapId, Oid indexId, Snapshot snapshot)
|
|||||||
ivinfo.vacuum_full = false;
|
ivinfo.vacuum_full = false;
|
||||||
ivinfo.message_level = DEBUG2;
|
ivinfo.message_level = DEBUG2;
|
||||||
ivinfo.num_heap_tuples = -1;
|
ivinfo.num_heap_tuples = -1;
|
||||||
|
ivinfo.strategy = NULL;
|
||||||
|
|
||||||
state.tuplesort = tuplesort_begin_datum(TIDOID,
|
state.tuplesort = tuplesort_begin_datum(TIDOID,
|
||||||
TIDLessOperator, false,
|
TIDLessOperator, false,
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.107 2007/04/30 03:23:48 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.108 2007/05/30 20:11:56 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -63,10 +63,13 @@ typedef struct AnlIndexData
|
|||||||
/* Default statistics target (GUC parameter) */
|
/* Default statistics target (GUC parameter) */
|
||||||
int default_statistics_target = 10;
|
int default_statistics_target = 10;
|
||||||
|
|
||||||
|
/* A few variables that don't seem worth passing around as parameters */
|
||||||
static int elevel = -1;
|
static int elevel = -1;
|
||||||
|
|
||||||
static MemoryContext anl_context = NULL;
|
static MemoryContext anl_context = NULL;
|
||||||
|
|
||||||
|
static BufferAccessStrategy vac_strategy;
|
||||||
|
|
||||||
|
|
||||||
static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
|
static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
|
||||||
int samplesize);
|
int samplesize);
|
||||||
@ -94,7 +97,8 @@ static bool std_typanalyze(VacAttrStats *stats);
|
|||||||
* analyze_rel() -- analyze one relation
|
* analyze_rel() -- analyze one relation
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
analyze_rel(Oid relid, VacuumStmt *vacstmt)
|
analyze_rel(Oid relid, VacuumStmt *vacstmt,
|
||||||
|
BufferAccessStrategy bstrategy)
|
||||||
{
|
{
|
||||||
Relation onerel;
|
Relation onerel;
|
||||||
int attr_cnt,
|
int attr_cnt,
|
||||||
@ -120,6 +124,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt)
|
|||||||
else
|
else
|
||||||
elevel = DEBUG2;
|
elevel = DEBUG2;
|
||||||
|
|
||||||
|
vac_strategy = bstrategy;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Use the current context for storing analysis info. vacuum.c ensures
|
* Use the current context for storing analysis info. vacuum.c ensures
|
||||||
* that this context will be cleared when I return, thus releasing the
|
* that this context will be cleared when I return, thus releasing the
|
||||||
@ -845,7 +851,7 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
|
|||||||
* looking at it. We don't maintain a lock on the page, so tuples
|
* looking at it. We don't maintain a lock on the page, so tuples
|
||||||
* could get added to it, but we ignore such tuples.
|
* could get added to it, but we ignore such tuples.
|
||||||
*/
|
*/
|
||||||
targbuffer = ReadBuffer(onerel, targblock);
|
targbuffer = ReadBufferWithStrategy(onerel, targblock, vac_strategy);
|
||||||
LockBuffer(targbuffer, BUFFER_LOCK_SHARE);
|
LockBuffer(targbuffer, BUFFER_LOCK_SHARE);
|
||||||
targpage = BufferGetPage(targbuffer);
|
targpage = BufferGetPage(targbuffer);
|
||||||
maxoffset = PageGetMaxOffsetNumber(targpage);
|
maxoffset = PageGetMaxOffsetNumber(targpage);
|
||||||
|
@ -13,7 +13,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.351 2007/05/17 15:28:29 alvherre Exp $
|
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.352 2007/05/30 20:11:57 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -191,6 +191,7 @@ ExecContext_Finish(ExecContext ec)
|
|||||||
*----------------------------------------------------------------------
|
*----------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* A few variables that don't seem worth passing around as parameters */
|
||||||
static MemoryContext vac_context = NULL;
|
static MemoryContext vac_context = NULL;
|
||||||
|
|
||||||
static int elevel = -1;
|
static int elevel = -1;
|
||||||
@ -198,6 +199,8 @@ static int elevel = -1;
|
|||||||
static TransactionId OldestXmin;
|
static TransactionId OldestXmin;
|
||||||
static TransactionId FreezeLimit;
|
static TransactionId FreezeLimit;
|
||||||
|
|
||||||
|
static BufferAccessStrategy vac_strategy;
|
||||||
|
|
||||||
|
|
||||||
/* non-export function prototypes */
|
/* non-export function prototypes */
|
||||||
static List *get_rel_oids(List *relids, const RangeVar *vacrel,
|
static List *get_rel_oids(List *relids, const RangeVar *vacrel,
|
||||||
@ -257,14 +260,18 @@ static Size PageGetFreeSpaceWithFillFactor(Relation relation, Page page);
|
|||||||
* relation OIDs to be processed, and vacstmt->relation is ignored.
|
* relation OIDs to be processed, and vacstmt->relation is ignored.
|
||||||
* (The non-NIL case is currently only used by autovacuum.)
|
* (The non-NIL case is currently only used by autovacuum.)
|
||||||
*
|
*
|
||||||
|
* bstrategy is normally given as NULL, but in autovacuum it can be passed
|
||||||
|
* in to use the same buffer strategy object across multiple vacuum() calls.
|
||||||
|
*
|
||||||
* isTopLevel should be passed down from ProcessUtility.
|
* isTopLevel should be passed down from ProcessUtility.
|
||||||
*
|
*
|
||||||
* It is the caller's responsibility that both vacstmt and relids
|
* It is the caller's responsibility that vacstmt, relids, and bstrategy
|
||||||
* (if given) be allocated in a memory context that won't disappear
|
* (if given) be allocated in a memory context that won't disappear
|
||||||
* at transaction commit.
|
* at transaction commit.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
|
vacuum(VacuumStmt *vacstmt, List *relids,
|
||||||
|
BufferAccessStrategy bstrategy, bool isTopLevel)
|
||||||
{
|
{
|
||||||
const char *stmttype = vacstmt->vacuum ? "VACUUM" : "ANALYZE";
|
const char *stmttype = vacstmt->vacuum ? "VACUUM" : "ANALYZE";
|
||||||
volatile MemoryContext anl_context = NULL;
|
volatile MemoryContext anl_context = NULL;
|
||||||
@ -319,6 +326,19 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
|
|||||||
ALLOCSET_DEFAULT_INITSIZE,
|
ALLOCSET_DEFAULT_INITSIZE,
|
||||||
ALLOCSET_DEFAULT_MAXSIZE);
|
ALLOCSET_DEFAULT_MAXSIZE);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If caller didn't give us a buffer strategy object, make one in the
|
||||||
|
* cross-transaction memory context.
|
||||||
|
*/
|
||||||
|
if (bstrategy == NULL)
|
||||||
|
{
|
||||||
|
MemoryContext old_context = MemoryContextSwitchTo(vac_context);
|
||||||
|
|
||||||
|
bstrategy = GetAccessStrategy(BAS_VACUUM);
|
||||||
|
MemoryContextSwitchTo(old_context);
|
||||||
|
}
|
||||||
|
vac_strategy = bstrategy;
|
||||||
|
|
||||||
/* Remember whether we are processing everything in the DB */
|
/* Remember whether we are processing everything in the DB */
|
||||||
all_rels = (relids == NIL && vacstmt->relation == NULL);
|
all_rels = (relids == NIL && vacstmt->relation == NULL);
|
||||||
|
|
||||||
@ -417,15 +437,7 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
|
|||||||
else
|
else
|
||||||
old_context = MemoryContextSwitchTo(anl_context);
|
old_context = MemoryContextSwitchTo(anl_context);
|
||||||
|
|
||||||
/*
|
analyze_rel(relid, vacstmt, vac_strategy);
|
||||||
* Tell the buffer replacement strategy that vacuum is causing
|
|
||||||
* the IO
|
|
||||||
*/
|
|
||||||
StrategyHintVacuum(true);
|
|
||||||
|
|
||||||
analyze_rel(relid, vacstmt);
|
|
||||||
|
|
||||||
StrategyHintVacuum(false);
|
|
||||||
|
|
||||||
if (use_own_xacts)
|
if (use_own_xacts)
|
||||||
CommitTransactionCommand();
|
CommitTransactionCommand();
|
||||||
@ -441,8 +453,6 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
|
|||||||
{
|
{
|
||||||
/* Make sure cost accounting is turned off after error */
|
/* Make sure cost accounting is turned off after error */
|
||||||
VacuumCostActive = false;
|
VacuumCostActive = false;
|
||||||
/* And reset buffer replacement strategy, too */
|
|
||||||
StrategyHintVacuum(false);
|
|
||||||
PG_RE_THROW();
|
PG_RE_THROW();
|
||||||
}
|
}
|
||||||
PG_END_TRY();
|
PG_END_TRY();
|
||||||
@ -1084,21 +1094,13 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
|
|||||||
*/
|
*/
|
||||||
toast_relid = onerel->rd_rel->reltoastrelid;
|
toast_relid = onerel->rd_rel->reltoastrelid;
|
||||||
|
|
||||||
/*
|
|
||||||
* Tell the cache replacement strategy that vacuum is causing all
|
|
||||||
* following IO
|
|
||||||
*/
|
|
||||||
StrategyHintVacuum(true);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Do the actual work --- either FULL or "lazy" vacuum
|
* Do the actual work --- either FULL or "lazy" vacuum
|
||||||
*/
|
*/
|
||||||
if (vacstmt->full)
|
if (vacstmt->full)
|
||||||
full_vacuum_rel(onerel, vacstmt);
|
full_vacuum_rel(onerel, vacstmt);
|
||||||
else
|
else
|
||||||
lazy_vacuum_rel(onerel, vacstmt);
|
lazy_vacuum_rel(onerel, vacstmt, vac_strategy);
|
||||||
|
|
||||||
StrategyHintVacuum(false);
|
|
||||||
|
|
||||||
/* all done with this class, but hold lock until commit */
|
/* all done with this class, but hold lock until commit */
|
||||||
relation_close(onerel, NoLock);
|
relation_close(onerel, NoLock);
|
||||||
@ -1290,7 +1292,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
|
|||||||
|
|
||||||
vacuum_delay_point();
|
vacuum_delay_point();
|
||||||
|
|
||||||
buf = ReadBuffer(onerel, blkno);
|
buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
|
||||||
page = BufferGetPage(buf);
|
page = BufferGetPage(buf);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1730,7 +1732,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
|
|||||||
/*
|
/*
|
||||||
* Process this page of relation.
|
* Process this page of relation.
|
||||||
*/
|
*/
|
||||||
buf = ReadBuffer(onerel, blkno);
|
buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
|
||||||
page = BufferGetPage(buf);
|
page = BufferGetPage(buf);
|
||||||
|
|
||||||
vacpage->offsets_free = 0;
|
vacpage->offsets_free = 0;
|
||||||
@ -1954,8 +1956,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
|
|||||||
nextTid = tp.t_data->t_ctid;
|
nextTid = tp.t_data->t_ctid;
|
||||||
priorXmax = HeapTupleHeaderGetXmax(tp.t_data);
|
priorXmax = HeapTupleHeaderGetXmax(tp.t_data);
|
||||||
/* assume block# is OK (see heap_fetch comments) */
|
/* assume block# is OK (see heap_fetch comments) */
|
||||||
nextBuf = ReadBuffer(onerel,
|
nextBuf = ReadBufferWithStrategy(onerel,
|
||||||
ItemPointerGetBlockNumber(&nextTid));
|
ItemPointerGetBlockNumber(&nextTid),
|
||||||
|
vac_strategy);
|
||||||
nextPage = BufferGetPage(nextBuf);
|
nextPage = BufferGetPage(nextBuf);
|
||||||
/* If bogus or unused slot, assume tp is end of chain */
|
/* If bogus or unused slot, assume tp is end of chain */
|
||||||
nextOffnum = ItemPointerGetOffsetNumber(&nextTid);
|
nextOffnum = ItemPointerGetOffsetNumber(&nextTid);
|
||||||
@ -2091,8 +2094,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
|
|||||||
break; /* out of check-all-items loop */
|
break; /* out of check-all-items loop */
|
||||||
}
|
}
|
||||||
tp.t_self = vtlp->this_tid;
|
tp.t_self = vtlp->this_tid;
|
||||||
Pbuf = ReadBuffer(onerel,
|
Pbuf = ReadBufferWithStrategy(onerel,
|
||||||
ItemPointerGetBlockNumber(&(tp.t_self)));
|
ItemPointerGetBlockNumber(&(tp.t_self)),
|
||||||
|
vac_strategy);
|
||||||
Ppage = BufferGetPage(Pbuf);
|
Ppage = BufferGetPage(Pbuf);
|
||||||
Pitemid = PageGetItemId(Ppage,
|
Pitemid = PageGetItemId(Ppage,
|
||||||
ItemPointerGetOffsetNumber(&(tp.t_self)));
|
ItemPointerGetOffsetNumber(&(tp.t_self)));
|
||||||
@ -2174,11 +2178,14 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
|
|||||||
|
|
||||||
/* Get page to move from */
|
/* Get page to move from */
|
||||||
tuple.t_self = vtmove[ti].tid;
|
tuple.t_self = vtmove[ti].tid;
|
||||||
Cbuf = ReadBuffer(onerel,
|
Cbuf = ReadBufferWithStrategy(onerel,
|
||||||
ItemPointerGetBlockNumber(&(tuple.t_self)));
|
ItemPointerGetBlockNumber(&(tuple.t_self)),
|
||||||
|
vac_strategy);
|
||||||
|
|
||||||
/* Get page to move to */
|
/* Get page to move to */
|
||||||
dst_buffer = ReadBuffer(onerel, destvacpage->blkno);
|
dst_buffer = ReadBufferWithStrategy(onerel,
|
||||||
|
destvacpage->blkno,
|
||||||
|
vac_strategy);
|
||||||
|
|
||||||
LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
|
LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||||
if (dst_buffer != Cbuf)
|
if (dst_buffer != Cbuf)
|
||||||
@ -2239,7 +2246,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
|
|||||||
if (i == num_fraged_pages)
|
if (i == num_fraged_pages)
|
||||||
break; /* can't move item anywhere */
|
break; /* can't move item anywhere */
|
||||||
dst_vacpage = fraged_pages->pagedesc[i];
|
dst_vacpage = fraged_pages->pagedesc[i];
|
||||||
dst_buffer = ReadBuffer(onerel, dst_vacpage->blkno);
|
dst_buffer = ReadBufferWithStrategy(onerel,
|
||||||
|
dst_vacpage->blkno,
|
||||||
|
vac_strategy);
|
||||||
LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
|
LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||||
dst_page = BufferGetPage(dst_buffer);
|
dst_page = BufferGetPage(dst_buffer);
|
||||||
/* if this page was not used before - clean it */
|
/* if this page was not used before - clean it */
|
||||||
@ -2386,7 +2395,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
|
|||||||
Page page;
|
Page page;
|
||||||
|
|
||||||
/* this page was not used as a move target, so must clean it */
|
/* this page was not used as a move target, so must clean it */
|
||||||
buf = ReadBuffer(onerel, (*curpage)->blkno);
|
buf = ReadBufferWithStrategy(onerel,
|
||||||
|
(*curpage)->blkno,
|
||||||
|
vac_strategy);
|
||||||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||||
page = BufferGetPage(buf);
|
page = BufferGetPage(buf);
|
||||||
if (!PageIsEmpty(page))
|
if (!PageIsEmpty(page))
|
||||||
@ -2470,7 +2481,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
|
|||||||
int uncnt;
|
int uncnt;
|
||||||
int num_tuples = 0;
|
int num_tuples = 0;
|
||||||
|
|
||||||
buf = ReadBuffer(onerel, vacpage->blkno);
|
buf = ReadBufferWithStrategy(onerel, vacpage->blkno, vac_strategy);
|
||||||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||||
page = BufferGetPage(buf);
|
page = BufferGetPage(buf);
|
||||||
maxoff = PageGetMaxOffsetNumber(page);
|
maxoff = PageGetMaxOffsetNumber(page);
|
||||||
@ -2859,7 +2870,7 @@ update_hint_bits(Relation rel, VacPageList fraged_pages, int num_fraged_pages,
|
|||||||
break; /* no need to scan any further */
|
break; /* no need to scan any further */
|
||||||
if ((*curpage)->offsets_used == 0)
|
if ((*curpage)->offsets_used == 0)
|
||||||
continue; /* this page was never used as a move dest */
|
continue; /* this page was never used as a move dest */
|
||||||
buf = ReadBuffer(rel, (*curpage)->blkno);
|
buf = ReadBufferWithStrategy(rel, (*curpage)->blkno, vac_strategy);
|
||||||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||||
page = BufferGetPage(buf);
|
page = BufferGetPage(buf);
|
||||||
max_offset = PageGetMaxOffsetNumber(page);
|
max_offset = PageGetMaxOffsetNumber(page);
|
||||||
@ -2925,7 +2936,9 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
|
|||||||
|
|
||||||
if ((*vacpage)->offsets_free > 0)
|
if ((*vacpage)->offsets_free > 0)
|
||||||
{
|
{
|
||||||
buf = ReadBuffer(onerel, (*vacpage)->blkno);
|
buf = ReadBufferWithStrategy(onerel,
|
||||||
|
(*vacpage)->blkno,
|
||||||
|
vac_strategy);
|
||||||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||||
vacuum_page(onerel, buf, *vacpage);
|
vacuum_page(onerel, buf, *vacpage);
|
||||||
UnlockReleaseBuffer(buf);
|
UnlockReleaseBuffer(buf);
|
||||||
@ -3012,6 +3025,7 @@ scan_index(Relation indrel, double num_tuples)
|
|||||||
ivinfo.vacuum_full = true;
|
ivinfo.vacuum_full = true;
|
||||||
ivinfo.message_level = elevel;
|
ivinfo.message_level = elevel;
|
||||||
ivinfo.num_heap_tuples = num_tuples;
|
ivinfo.num_heap_tuples = num_tuples;
|
||||||
|
ivinfo.strategy = vac_strategy;
|
||||||
|
|
||||||
stats = index_vacuum_cleanup(&ivinfo, NULL);
|
stats = index_vacuum_cleanup(&ivinfo, NULL);
|
||||||
|
|
||||||
@ -3077,6 +3091,7 @@ vacuum_index(VacPageList vacpagelist, Relation indrel,
|
|||||||
ivinfo.vacuum_full = true;
|
ivinfo.vacuum_full = true;
|
||||||
ivinfo.message_level = elevel;
|
ivinfo.message_level = elevel;
|
||||||
ivinfo.num_heap_tuples = num_tuples + keep_tuples;
|
ivinfo.num_heap_tuples = num_tuples + keep_tuples;
|
||||||
|
ivinfo.strategy = vac_strategy;
|
||||||
|
|
||||||
/* Do bulk deletion */
|
/* Do bulk deletion */
|
||||||
stats = index_bulk_delete(&ivinfo, NULL, tid_reaped, (void *) vacpagelist);
|
stats = index_bulk_delete(&ivinfo, NULL, tid_reaped, (void *) vacpagelist);
|
||||||
|
@ -36,7 +36,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.89 2007/05/17 15:28:29 alvherre Exp $
|
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.90 2007/05/30 20:11:57 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -96,11 +96,14 @@ typedef struct LVRelStats
|
|||||||
} LVRelStats;
|
} LVRelStats;
|
||||||
|
|
||||||
|
|
||||||
|
/* A few variables that don't seem worth passing around as parameters */
|
||||||
static int elevel = -1;
|
static int elevel = -1;
|
||||||
|
|
||||||
static TransactionId OldestXmin;
|
static TransactionId OldestXmin;
|
||||||
static TransactionId FreezeLimit;
|
static TransactionId FreezeLimit;
|
||||||
|
|
||||||
|
static BufferAccessStrategy vac_strategy;
|
||||||
|
|
||||||
|
|
||||||
/* non-export function prototypes */
|
/* non-export function prototypes */
|
||||||
static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
||||||
@ -138,7 +141,8 @@ static int vac_cmp_page_spaces(const void *left, const void *right);
|
|||||||
* and locked the relation.
|
* and locked the relation.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
|
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
|
||||||
|
BufferAccessStrategy bstrategy)
|
||||||
{
|
{
|
||||||
LVRelStats *vacrelstats;
|
LVRelStats *vacrelstats;
|
||||||
Relation *Irel;
|
Relation *Irel;
|
||||||
@ -158,6 +162,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
|
|||||||
else
|
else
|
||||||
elevel = DEBUG2;
|
elevel = DEBUG2;
|
||||||
|
|
||||||
|
vac_strategy = bstrategy;
|
||||||
|
|
||||||
vacuum_set_xid_limits(vacstmt->freeze_min_age, onerel->rd_rel->relisshared,
|
vacuum_set_xid_limits(vacstmt->freeze_min_age, onerel->rd_rel->relisshared,
|
||||||
&OldestXmin, &FreezeLimit);
|
&OldestXmin, &FreezeLimit);
|
||||||
|
|
||||||
@ -318,7 +324,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
|||||||
vacrelstats->num_index_scans++;
|
vacrelstats->num_index_scans++;
|
||||||
}
|
}
|
||||||
|
|
||||||
buf = ReadBuffer(onerel, blkno);
|
buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
|
||||||
|
|
||||||
/* Initially, we only need shared access to the buffer */
|
/* Initially, we only need shared access to the buffer */
|
||||||
LockBuffer(buf, BUFFER_LOCK_SHARE);
|
LockBuffer(buf, BUFFER_LOCK_SHARE);
|
||||||
@ -586,7 +592,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
|
|||||||
vacuum_delay_point();
|
vacuum_delay_point();
|
||||||
|
|
||||||
tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
|
tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
|
||||||
buf = ReadBuffer(onerel, tblk);
|
buf = ReadBufferWithStrategy(onerel, tblk, vac_strategy);
|
||||||
LockBufferForCleanup(buf);
|
LockBufferForCleanup(buf);
|
||||||
tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats);
|
tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats);
|
||||||
/* Now that we've compacted the page, record its available space */
|
/* Now that we've compacted the page, record its available space */
|
||||||
@ -684,6 +690,7 @@ lazy_vacuum_index(Relation indrel,
|
|||||||
ivinfo.message_level = elevel;
|
ivinfo.message_level = elevel;
|
||||||
/* We don't yet know rel_tuples, so pass -1 */
|
/* We don't yet know rel_tuples, so pass -1 */
|
||||||
ivinfo.num_heap_tuples = -1;
|
ivinfo.num_heap_tuples = -1;
|
||||||
|
ivinfo.strategy = vac_strategy;
|
||||||
|
|
||||||
/* Do bulk deletion */
|
/* Do bulk deletion */
|
||||||
*stats = index_bulk_delete(&ivinfo, *stats,
|
*stats = index_bulk_delete(&ivinfo, *stats,
|
||||||
@ -713,6 +720,7 @@ lazy_cleanup_index(Relation indrel,
|
|||||||
ivinfo.vacuum_full = false;
|
ivinfo.vacuum_full = false;
|
||||||
ivinfo.message_level = elevel;
|
ivinfo.message_level = elevel;
|
||||||
ivinfo.num_heap_tuples = vacrelstats->rel_tuples;
|
ivinfo.num_heap_tuples = vacrelstats->rel_tuples;
|
||||||
|
ivinfo.strategy = vac_strategy;
|
||||||
|
|
||||||
stats = index_vacuum_cleanup(&ivinfo, stats);
|
stats = index_vacuum_cleanup(&ivinfo, stats);
|
||||||
|
|
||||||
@ -869,7 +877,7 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
|
|||||||
|
|
||||||
blkno--;
|
blkno--;
|
||||||
|
|
||||||
buf = ReadBuffer(onerel, blkno);
|
buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
|
||||||
|
|
||||||
/* In this phase we only need shared access to the buffer */
|
/* In this phase we only need shared access to the buffer */
|
||||||
LockBuffer(buf, BUFFER_LOCK_SHARE);
|
LockBuffer(buf, BUFFER_LOCK_SHARE);
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.46 2007/05/07 20:41:24 alvherre Exp $
|
* $PostgreSQL: pgsql/src/backend/postmaster/autovacuum.c,v 1.47 2007/05/30 20:11:57 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -218,7 +218,8 @@ static void relation_needs_vacanalyze(Oid relid, Form_pg_autovacuum avForm,
|
|||||||
bool *doanalyze);
|
bool *doanalyze);
|
||||||
|
|
||||||
static void autovacuum_do_vac_analyze(Oid relid, bool dovacuum,
|
static void autovacuum_do_vac_analyze(Oid relid, bool dovacuum,
|
||||||
bool doanalyze, int freeze_min_age);
|
bool doanalyze, int freeze_min_age,
|
||||||
|
BufferAccessStrategy bstrategy);
|
||||||
static HeapTuple get_pg_autovacuum_tuple_relid(Relation avRel, Oid relid);
|
static HeapTuple get_pg_autovacuum_tuple_relid(Relation avRel, Oid relid);
|
||||||
static PgStat_StatTabEntry *get_pgstat_tabentry_relid(Oid relid, bool isshared,
|
static PgStat_StatTabEntry *get_pgstat_tabentry_relid(Oid relid, bool isshared,
|
||||||
PgStat_StatDBEntry *shared,
|
PgStat_StatDBEntry *shared,
|
||||||
@ -1673,6 +1674,7 @@ do_autovacuum(void)
|
|||||||
ListCell *cell;
|
ListCell *cell;
|
||||||
PgStat_StatDBEntry *shared;
|
PgStat_StatDBEntry *shared;
|
||||||
PgStat_StatDBEntry *dbentry;
|
PgStat_StatDBEntry *dbentry;
|
||||||
|
BufferAccessStrategy bstrategy;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* may be NULL if we couldn't find an entry (only happens if we
|
* may be NULL if we couldn't find an entry (only happens if we
|
||||||
@ -1812,6 +1814,13 @@ do_autovacuum(void)
|
|||||||
list_free(toast_oids);
|
list_free(toast_oids);
|
||||||
toast_oids = NIL;
|
toast_oids = NIL;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create a buffer access strategy object for VACUUM to use. We want
|
||||||
|
* to use the same one across all the vacuum operations we perform,
|
||||||
|
* since the point is for VACUUM not to blow out the shared cache.
|
||||||
|
*/
|
||||||
|
bstrategy = GetAccessStrategy(BAS_VACUUM);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Perform operations on collected tables.
|
* Perform operations on collected tables.
|
||||||
*/
|
*/
|
||||||
@ -1910,7 +1919,8 @@ next_worker:
|
|||||||
autovacuum_do_vac_analyze(tab->at_relid,
|
autovacuum_do_vac_analyze(tab->at_relid,
|
||||||
tab->at_dovacuum,
|
tab->at_dovacuum,
|
||||||
tab->at_doanalyze,
|
tab->at_doanalyze,
|
||||||
tab->at_freeze_min_age);
|
tab->at_freeze_min_age,
|
||||||
|
bstrategy);
|
||||||
/* be tidy */
|
/* be tidy */
|
||||||
pfree(tab);
|
pfree(tab);
|
||||||
}
|
}
|
||||||
@ -2328,7 +2338,8 @@ relation_needs_vacanalyze(Oid relid,
|
|||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
autovacuum_do_vac_analyze(Oid relid, bool dovacuum, bool doanalyze,
|
autovacuum_do_vac_analyze(Oid relid, bool dovacuum, bool doanalyze,
|
||||||
int freeze_min_age)
|
int freeze_min_age,
|
||||||
|
BufferAccessStrategy bstrategy)
|
||||||
{
|
{
|
||||||
VacuumStmt vacstmt;
|
VacuumStmt vacstmt;
|
||||||
MemoryContext old_cxt;
|
MemoryContext old_cxt;
|
||||||
@ -2354,7 +2365,7 @@ autovacuum_do_vac_analyze(Oid relid, bool dovacuum, bool doanalyze,
|
|||||||
/* Let pgstat know what we're doing */
|
/* Let pgstat know what we're doing */
|
||||||
autovac_report_activity(&vacstmt, relid);
|
autovac_report_activity(&vacstmt, relid);
|
||||||
|
|
||||||
vacuum(&vacstmt, list_make1_oid(relid), true);
|
vacuum(&vacstmt, list_make1_oid(relid), bstrategy, true);
|
||||||
MemoryContextSwitchTo(old_cxt);
|
MemoryContextSwitchTo(old_cxt);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.11 2006/07/23 03:07:58 tgl Exp $
|
$PostgreSQL: pgsql/src/backend/storage/buffer/README,v 1.12 2007/05/30 20:11:58 tgl Exp $
|
||||||
|
|
||||||
Notes about shared buffer access rules
|
Notes about shared buffer access rules
|
||||||
--------------------------------------
|
--------------------------------------
|
||||||
@ -152,20 +152,21 @@ we could use per-backend LWLocks instead (a buffer header would then contain
|
|||||||
a field to show which backend is doing its I/O).
|
a field to show which backend is doing its I/O).
|
||||||
|
|
||||||
|
|
||||||
Buffer replacement strategy
|
Normal buffer replacement strategy
|
||||||
---------------------------
|
----------------------------------
|
||||||
|
|
||||||
There is a "free list" of buffers that are prime candidates for replacement.
|
There is a "free list" of buffers that are prime candidates for replacement.
|
||||||
In particular, buffers that are completely free (contain no valid page) are
|
In particular, buffers that are completely free (contain no valid page) are
|
||||||
always in this list. We may also throw buffers into this list if we
|
always in this list. We could also throw buffers into this list if we
|
||||||
consider their pages unlikely to be needed soon. The list is singly-linked
|
consider their pages unlikely to be needed soon; however, the current
|
||||||
using fields in the buffer headers; we maintain head and tail pointers in
|
algorithm never does that. The list is singly-linked using fields in the
|
||||||
global variables. (Note: although the list links are in the buffer headers,
|
buffer headers; we maintain head and tail pointers in global variables.
|
||||||
they are considered to be protected by the BufFreelistLock, not the
|
(Note: although the list links are in the buffer headers, they are
|
||||||
buffer-header spinlocks.) To choose a victim buffer to recycle when there
|
considered to be protected by the BufFreelistLock, not the buffer-header
|
||||||
are no free buffers available, we use a simple clock-sweep algorithm, which
|
spinlocks.) To choose a victim buffer to recycle when there are no free
|
||||||
avoids the need to take system-wide locks during common operations. It
|
buffers available, we use a simple clock-sweep algorithm, which avoids the
|
||||||
works like this:
|
need to take system-wide locks during common operations. It works like
|
||||||
|
this:
|
||||||
|
|
||||||
Each buffer header contains a usage counter, which is incremented (up to a
|
Each buffer header contains a usage counter, which is incremented (up to a
|
||||||
small limit value) whenever the buffer is unpinned. (This requires only the
|
small limit value) whenever the buffer is unpinned. (This requires only the
|
||||||
@ -199,22 +200,40 @@ before we can recycle it; if someone else pins the buffer meanwhile we will
|
|||||||
have to give up and try another buffer. This however is not a concern
|
have to give up and try another buffer. This however is not a concern
|
||||||
of the basic select-a-victim-buffer algorithm.)
|
of the basic select-a-victim-buffer algorithm.)
|
||||||
|
|
||||||
A special provision is that while running VACUUM, a backend does not
|
|
||||||
increment the usage count on buffers it accesses. In fact, if ReleaseBuffer
|
|
||||||
sees that it is dropping the pin count to zero and the usage count is zero,
|
|
||||||
then it appends the buffer to the tail of the free list. (This implies that
|
|
||||||
VACUUM, but only VACUUM, must take the BufFreelistLock during ReleaseBuffer;
|
|
||||||
this shouldn't create much of a contention problem.) This provision
|
|
||||||
encourages VACUUM to work in a relatively small number of buffers rather
|
|
||||||
than blowing out the entire buffer cache. It is reasonable since a page
|
|
||||||
that has been touched only by VACUUM is unlikely to be needed again soon.
|
|
||||||
|
|
||||||
Since VACUUM usually requests many pages very fast, the effect of this is that
|
Buffer ring replacement strategy
|
||||||
it will get back the very buffers it filled and possibly modified on the next
|
---------------------------------
|
||||||
call and will therefore do its work in a few shared memory buffers, while
|
|
||||||
being able to use whatever it finds in the cache already. This also implies
|
When running a query that needs to access a large number of pages just once,
|
||||||
that most of the write traffic caused by a VACUUM will be done by the VACUUM
|
such as VACUUM or a large sequential scan, a different strategy is used.
|
||||||
itself and not pushed off onto other processes.
|
A page that has been touched only by such a scan is unlikely to be needed
|
||||||
|
again soon, so instead of running the normal clock sweep algorithm and
|
||||||
|
blowing out the entire buffer cache, a small ring of buffers is allocated
|
||||||
|
using the normal clock sweep algorithm and those buffers are reused for the
|
||||||
|
whole scan. This also implies that much of the write traffic caused by such
|
||||||
|
a statement will be done by the backend itself and not pushed off onto other
|
||||||
|
processes.
|
||||||
|
|
||||||
|
For sequential scans, a 256KB ring is used. That's small enough to fit in L2
|
||||||
|
cache, which makes transferring pages from OS cache to shared buffer cache
|
||||||
|
efficient. Even less would often be enough, but the ring must be big enough
|
||||||
|
to accommodate all pages in the scan that are pinned concurrently. 256KB
|
||||||
|
should also be enough to leave a small cache trail for other backends to
|
||||||
|
join in a synchronized seq scan. If a ring buffer is dirtied and its LSN
|
||||||
|
updated, we would normally have to write and flush WAL before we could
|
||||||
|
re-use the buffer; in this case we instead discard the buffer from the ring
|
||||||
|
and (later) choose a replacement using the normal clock-sweep algorithm.
|
||||||
|
Hence this strategy works best for scans that are read-only (or at worst
|
||||||
|
update hint bits). In a scan that modifies every page in the scan, like a
|
||||||
|
bulk UPDATE or DELETE, the buffers in the ring will always be dirtied and
|
||||||
|
the ring strategy effectively degrades to the normal strategy.
|
||||||
|
|
||||||
|
VACUUM uses a 256KB ring like sequential scans, but dirty pages are not
|
||||||
|
removed from the ring. Instead, WAL is flushed if needed to allow reuse of
|
||||||
|
the buffers. Before introducing the buffer ring strategy in 8.3, VACUUM's
|
||||||
|
buffers were sent to the freelist, which was effectively a buffer ring of 1
|
||||||
|
buffer, resulting in excessive WAL flushing. Allowing VACUUM to update
|
||||||
|
256KB between WAL flushes should be more efficient.
|
||||||
|
|
||||||
|
|
||||||
Background writer's processing
|
Background writer's processing
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.219 2007/05/27 03:50:39 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/storage/buffer/bufmgr.c,v 1.220 2007/05/30 20:11:58 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -90,11 +90,11 @@ static volatile BufferDesc *PinCountWaitBuf = NULL;
|
|||||||
|
|
||||||
|
|
||||||
static Buffer ReadBuffer_common(Relation reln, BlockNumber blockNum,
|
static Buffer ReadBuffer_common(Relation reln, BlockNumber blockNum,
|
||||||
bool zeroPage);
|
bool zeroPage,
|
||||||
static bool PinBuffer(volatile BufferDesc *buf);
|
BufferAccessStrategy strategy);
|
||||||
|
static bool PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy);
|
||||||
static void PinBuffer_Locked(volatile BufferDesc *buf);
|
static void PinBuffer_Locked(volatile BufferDesc *buf);
|
||||||
static void UnpinBuffer(volatile BufferDesc *buf,
|
static void UnpinBuffer(volatile BufferDesc *buf, bool fixOwner);
|
||||||
bool fixOwner, bool normalAccess);
|
|
||||||
static bool SyncOneBuffer(int buf_id, bool skip_pinned);
|
static bool SyncOneBuffer(int buf_id, bool skip_pinned);
|
||||||
static void WaitIO(volatile BufferDesc *buf);
|
static void WaitIO(volatile BufferDesc *buf);
|
||||||
static bool StartBufferIO(volatile BufferDesc *buf, bool forInput);
|
static bool StartBufferIO(volatile BufferDesc *buf, bool forInput);
|
||||||
@ -102,7 +102,8 @@ static void TerminateBufferIO(volatile BufferDesc *buf, bool clear_dirty,
|
|||||||
int set_flag_bits);
|
int set_flag_bits);
|
||||||
static void buffer_write_error_callback(void *arg);
|
static void buffer_write_error_callback(void *arg);
|
||||||
static volatile BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
|
static volatile BufferDesc *BufferAlloc(Relation reln, BlockNumber blockNum,
|
||||||
bool *foundPtr);
|
BufferAccessStrategy strategy,
|
||||||
|
bool *foundPtr);
|
||||||
static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);
|
static void FlushBuffer(volatile BufferDesc *buf, SMgrRelation reln);
|
||||||
static void AtProcExit_Buffers(int code, Datum arg);
|
static void AtProcExit_Buffers(int code, Datum arg);
|
||||||
|
|
||||||
@ -125,7 +126,18 @@ static void AtProcExit_Buffers(int code, Datum arg);
|
|||||||
Buffer
|
Buffer
|
||||||
ReadBuffer(Relation reln, BlockNumber blockNum)
|
ReadBuffer(Relation reln, BlockNumber blockNum)
|
||||||
{
|
{
|
||||||
return ReadBuffer_common(reln, blockNum, false);
|
return ReadBuffer_common(reln, blockNum, false, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ReadBufferWithStrategy -- same as ReadBuffer, except caller can specify
|
||||||
|
* a nondefault buffer access strategy. See buffer/README for details.
|
||||||
|
*/
|
||||||
|
Buffer
|
||||||
|
ReadBufferWithStrategy(Relation reln, BlockNumber blockNum,
|
||||||
|
BufferAccessStrategy strategy)
|
||||||
|
{
|
||||||
|
return ReadBuffer_common(reln, blockNum, false, strategy);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -140,14 +152,15 @@ ReadBuffer(Relation reln, BlockNumber blockNum)
|
|||||||
Buffer
|
Buffer
|
||||||
ReadOrZeroBuffer(Relation reln, BlockNumber blockNum)
|
ReadOrZeroBuffer(Relation reln, BlockNumber blockNum)
|
||||||
{
|
{
|
||||||
return ReadBuffer_common(reln, blockNum, true);
|
return ReadBuffer_common(reln, blockNum, true, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ReadBuffer_common -- common logic for ReadBuffer and ReadOrZeroBuffer
|
* ReadBuffer_common -- common logic for ReadBuffer variants
|
||||||
*/
|
*/
|
||||||
static Buffer
|
static Buffer
|
||||||
ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
|
ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage,
|
||||||
|
BufferAccessStrategy strategy)
|
||||||
{
|
{
|
||||||
volatile BufferDesc *bufHdr;
|
volatile BufferDesc *bufHdr;
|
||||||
Block bufBlock;
|
Block bufBlock;
|
||||||
@ -185,7 +198,7 @@ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
|
|||||||
* lookup the buffer. IO_IN_PROGRESS is set if the requested block is
|
* lookup the buffer. IO_IN_PROGRESS is set if the requested block is
|
||||||
* not currently in memory.
|
* not currently in memory.
|
||||||
*/
|
*/
|
||||||
bufHdr = BufferAlloc(reln, blockNum, &found);
|
bufHdr = BufferAlloc(reln, blockNum, strategy, &found);
|
||||||
if (found)
|
if (found)
|
||||||
BufferHitCount++;
|
BufferHitCount++;
|
||||||
}
|
}
|
||||||
@ -330,6 +343,10 @@ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
|
|||||||
* buffer. If no buffer exists already, selects a replacement
|
* buffer. If no buffer exists already, selects a replacement
|
||||||
* victim and evicts the old page, but does NOT read in new page.
|
* victim and evicts the old page, but does NOT read in new page.
|
||||||
*
|
*
|
||||||
|
* "strategy" can be a buffer replacement strategy object, or NULL for
|
||||||
|
* the default strategy. The selected buffer's usage_count is advanced when
|
||||||
|
* using the default strategy, but otherwise possibly not (see PinBuffer).
|
||||||
|
*
|
||||||
* The returned buffer is pinned and is already marked as holding the
|
* The returned buffer is pinned and is already marked as holding the
|
||||||
* desired page. If it already did have the desired page, *foundPtr is
|
* desired page. If it already did have the desired page, *foundPtr is
|
||||||
* set TRUE. Otherwise, *foundPtr is set FALSE and the buffer is marked
|
* set TRUE. Otherwise, *foundPtr is set FALSE and the buffer is marked
|
||||||
@ -343,6 +360,7 @@ ReadBuffer_common(Relation reln, BlockNumber blockNum, bool zeroPage)
|
|||||||
static volatile BufferDesc *
|
static volatile BufferDesc *
|
||||||
BufferAlloc(Relation reln,
|
BufferAlloc(Relation reln,
|
||||||
BlockNumber blockNum,
|
BlockNumber blockNum,
|
||||||
|
BufferAccessStrategy strategy,
|
||||||
bool *foundPtr)
|
bool *foundPtr)
|
||||||
{
|
{
|
||||||
BufferTag newTag; /* identity of requested block */
|
BufferTag newTag; /* identity of requested block */
|
||||||
@ -375,7 +393,7 @@ BufferAlloc(Relation reln,
|
|||||||
*/
|
*/
|
||||||
buf = &BufferDescriptors[buf_id];
|
buf = &BufferDescriptors[buf_id];
|
||||||
|
|
||||||
valid = PinBuffer(buf);
|
valid = PinBuffer(buf, strategy);
|
||||||
|
|
||||||
/* Can release the mapping lock as soon as we've pinned it */
|
/* Can release the mapping lock as soon as we've pinned it */
|
||||||
LWLockRelease(newPartitionLock);
|
LWLockRelease(newPartitionLock);
|
||||||
@ -413,13 +431,15 @@ BufferAlloc(Relation reln,
|
|||||||
/* Loop here in case we have to try another victim buffer */
|
/* Loop here in case we have to try another victim buffer */
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
|
bool lock_held;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Select a victim buffer. The buffer is returned with its header
|
* Select a victim buffer. The buffer is returned with its header
|
||||||
* spinlock still held! Also the BufFreelistLock is still held, since
|
* spinlock still held! Also (in most cases) the BufFreelistLock is
|
||||||
* it would be bad to hold the spinlock while possibly waking up other
|
* still held, since it would be bad to hold the spinlock while
|
||||||
* processes.
|
* possibly waking up other processes.
|
||||||
*/
|
*/
|
||||||
buf = StrategyGetBuffer();
|
buf = StrategyGetBuffer(strategy, &lock_held);
|
||||||
|
|
||||||
Assert(buf->refcount == 0);
|
Assert(buf->refcount == 0);
|
||||||
|
|
||||||
@ -430,7 +450,8 @@ BufferAlloc(Relation reln,
|
|||||||
PinBuffer_Locked(buf);
|
PinBuffer_Locked(buf);
|
||||||
|
|
||||||
/* Now it's safe to release the freelist lock */
|
/* Now it's safe to release the freelist lock */
|
||||||
LWLockRelease(BufFreelistLock);
|
if (lock_held)
|
||||||
|
LWLockRelease(BufFreelistLock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the buffer was dirty, try to write it out. There is a race
|
* If the buffer was dirty, try to write it out. There is a race
|
||||||
@ -458,16 +479,34 @@ BufferAlloc(Relation reln,
|
|||||||
*/
|
*/
|
||||||
if (LWLockConditionalAcquire(buf->content_lock, LW_SHARED))
|
if (LWLockConditionalAcquire(buf->content_lock, LW_SHARED))
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* If using a nondefault strategy, and writing the buffer
|
||||||
|
* would require a WAL flush, let the strategy decide whether
|
||||||
|
* to go ahead and write/reuse the buffer or to choose another
|
||||||
|
* victim. We need lock to inspect the page LSN, so this
|
||||||
|
* can't be done inside StrategyGetBuffer.
|
||||||
|
*/
|
||||||
|
if (strategy != NULL &&
|
||||||
|
XLogNeedsFlush(BufferGetLSN(buf)) &&
|
||||||
|
StrategyRejectBuffer(strategy, buf))
|
||||||
|
{
|
||||||
|
/* Drop lock/pin and loop around for another buffer */
|
||||||
|
LWLockRelease(buf->content_lock);
|
||||||
|
UnpinBuffer(buf, true);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* OK, do the I/O */
|
||||||
FlushBuffer(buf, NULL);
|
FlushBuffer(buf, NULL);
|
||||||
LWLockRelease(buf->content_lock);
|
LWLockRelease(buf->content_lock);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
* Someone else has pinned the buffer, so give it up and loop
|
* Someone else has locked the buffer, so give it up and loop
|
||||||
* back to get another one.
|
* back to get another one.
|
||||||
*/
|
*/
|
||||||
UnpinBuffer(buf, true, false /* evidently recently used */ );
|
UnpinBuffer(buf, true);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -531,10 +570,9 @@ BufferAlloc(Relation reln,
|
|||||||
* Got a collision. Someone has already done what we were about to
|
* Got a collision. Someone has already done what we were about to
|
||||||
* do. We'll just handle this as if it were found in the buffer
|
* do. We'll just handle this as if it were found in the buffer
|
||||||
* pool in the first place. First, give up the buffer we were
|
* pool in the first place. First, give up the buffer we were
|
||||||
* planning to use. Don't allow it to be thrown in the free list
|
* planning to use.
|
||||||
* (we don't want to hold freelist and mapping locks at once).
|
|
||||||
*/
|
*/
|
||||||
UnpinBuffer(buf, true, false);
|
UnpinBuffer(buf, true);
|
||||||
|
|
||||||
/* Can give up that buffer's mapping partition lock now */
|
/* Can give up that buffer's mapping partition lock now */
|
||||||
if ((oldFlags & BM_TAG_VALID) &&
|
if ((oldFlags & BM_TAG_VALID) &&
|
||||||
@ -545,7 +583,7 @@ BufferAlloc(Relation reln,
|
|||||||
|
|
||||||
buf = &BufferDescriptors[buf_id];
|
buf = &BufferDescriptors[buf_id];
|
||||||
|
|
||||||
valid = PinBuffer(buf);
|
valid = PinBuffer(buf, strategy);
|
||||||
|
|
||||||
/* Can release the mapping lock as soon as we've pinned it */
|
/* Can release the mapping lock as soon as we've pinned it */
|
||||||
LWLockRelease(newPartitionLock);
|
LWLockRelease(newPartitionLock);
|
||||||
@ -595,20 +633,21 @@ BufferAlloc(Relation reln,
|
|||||||
oldPartitionLock != newPartitionLock)
|
oldPartitionLock != newPartitionLock)
|
||||||
LWLockRelease(oldPartitionLock);
|
LWLockRelease(oldPartitionLock);
|
||||||
LWLockRelease(newPartitionLock);
|
LWLockRelease(newPartitionLock);
|
||||||
UnpinBuffer(buf, true, false /* evidently recently used */ );
|
UnpinBuffer(buf, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Okay, it's finally safe to rename the buffer.
|
* Okay, it's finally safe to rename the buffer.
|
||||||
*
|
*
|
||||||
* Clearing BM_VALID here is necessary, clearing the dirtybits is just
|
* Clearing BM_VALID here is necessary, clearing the dirtybits is just
|
||||||
* paranoia. We also clear the usage_count since any recency of use of
|
* paranoia. We also reset the usage_count since any recency of use of
|
||||||
* the old content is no longer relevant.
|
* the old content is no longer relevant. (The usage_count starts out
|
||||||
|
* at 1 so that the buffer can survive one clock-sweep pass.)
|
||||||
*/
|
*/
|
||||||
buf->tag = newTag;
|
buf->tag = newTag;
|
||||||
buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
|
buf->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
|
||||||
buf->flags |= BM_TAG_VALID;
|
buf->flags |= BM_TAG_VALID;
|
||||||
buf->usage_count = 0;
|
buf->usage_count = 1;
|
||||||
|
|
||||||
UnlockBufHdr(buf);
|
UnlockBufHdr(buf);
|
||||||
|
|
||||||
@ -736,7 +775,7 @@ retry:
|
|||||||
/*
|
/*
|
||||||
* Insert the buffer at the head of the list of free buffers.
|
* Insert the buffer at the head of the list of free buffers.
|
||||||
*/
|
*/
|
||||||
StrategyFreeBuffer(buf, true);
|
StrategyFreeBuffer(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -814,9 +853,6 @@ ReleaseAndReadBuffer(Buffer buffer,
|
|||||||
return buffer;
|
return buffer;
|
||||||
ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);
|
ResourceOwnerForgetBuffer(CurrentResourceOwner, buffer);
|
||||||
LocalRefCount[-buffer - 1]--;
|
LocalRefCount[-buffer - 1]--;
|
||||||
if (LocalRefCount[-buffer - 1] == 0 &&
|
|
||||||
bufHdr->usage_count < BM_MAX_USAGE_COUNT)
|
|
||||||
bufHdr->usage_count++;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -826,7 +862,7 @@ ReleaseAndReadBuffer(Buffer buffer,
|
|||||||
if (bufHdr->tag.blockNum == blockNum &&
|
if (bufHdr->tag.blockNum == blockNum &&
|
||||||
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node))
|
RelFileNodeEquals(bufHdr->tag.rnode, relation->rd_node))
|
||||||
return buffer;
|
return buffer;
|
||||||
UnpinBuffer(bufHdr, true, true);
|
UnpinBuffer(bufHdr, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -836,6 +872,14 @@ ReleaseAndReadBuffer(Buffer buffer,
|
|||||||
/*
|
/*
|
||||||
* PinBuffer -- make buffer unavailable for replacement.
|
* PinBuffer -- make buffer unavailable for replacement.
|
||||||
*
|
*
|
||||||
|
* For the default access strategy, the buffer's usage_count is incremented
|
||||||
|
* when we first pin it; for other strategies we just make sure the usage_count
|
||||||
|
* isn't zero. (The idea of the latter is that we don't want synchronized
|
||||||
|
* heap scans to inflate the count, but we need it to not be zero to discourage
|
||||||
|
* other backends from stealing buffers from our ring. As long as we cycle
|
||||||
|
* through the ring faster than the global clock-sweep cycles, buffers in
|
||||||
|
* our ring won't be chosen as victims for replacement by other backends.)
|
||||||
|
*
|
||||||
* This should be applied only to shared buffers, never local ones.
|
* This should be applied only to shared buffers, never local ones.
|
||||||
*
|
*
|
||||||
* Note that ResourceOwnerEnlargeBuffers must have been done already.
|
* Note that ResourceOwnerEnlargeBuffers must have been done already.
|
||||||
@ -844,7 +888,7 @@ ReleaseAndReadBuffer(Buffer buffer,
|
|||||||
* some callers to avoid an extra spinlock cycle.
|
* some callers to avoid an extra spinlock cycle.
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
PinBuffer(volatile BufferDesc *buf)
|
PinBuffer(volatile BufferDesc *buf, BufferAccessStrategy strategy)
|
||||||
{
|
{
|
||||||
int b = buf->buf_id;
|
int b = buf->buf_id;
|
||||||
bool result;
|
bool result;
|
||||||
@ -853,6 +897,16 @@ PinBuffer(volatile BufferDesc *buf)
|
|||||||
{
|
{
|
||||||
LockBufHdr(buf);
|
LockBufHdr(buf);
|
||||||
buf->refcount++;
|
buf->refcount++;
|
||||||
|
if (strategy == NULL)
|
||||||
|
{
|
||||||
|
if (buf->usage_count < BM_MAX_USAGE_COUNT)
|
||||||
|
buf->usage_count++;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (buf->usage_count == 0)
|
||||||
|
buf->usage_count = 1;
|
||||||
|
}
|
||||||
result = (buf->flags & BM_VALID) != 0;
|
result = (buf->flags & BM_VALID) != 0;
|
||||||
UnlockBufHdr(buf);
|
UnlockBufHdr(buf);
|
||||||
}
|
}
|
||||||
@ -872,6 +926,11 @@ PinBuffer(volatile BufferDesc *buf)
|
|||||||
* PinBuffer_Locked -- as above, but caller already locked the buffer header.
|
* PinBuffer_Locked -- as above, but caller already locked the buffer header.
|
||||||
* The spinlock is released before return.
|
* The spinlock is released before return.
|
||||||
*
|
*
|
||||||
|
* Currently, no callers of this function want to modify the buffer's
|
||||||
|
* usage_count at all, so there's no need for a strategy parameter.
|
||||||
|
* Also we don't bother with a BM_VALID test (the caller could check that for
|
||||||
|
* itself).
|
||||||
|
*
|
||||||
* Note: use of this routine is frequently mandatory, not just an optimization
|
* Note: use of this routine is frequently mandatory, not just an optimization
|
||||||
* to save a spin lock/unlock cycle, because we need to pin a buffer before
|
* to save a spin lock/unlock cycle, because we need to pin a buffer before
|
||||||
* its state can change under us.
|
* its state can change under us.
|
||||||
@ -897,17 +956,9 @@ PinBuffer_Locked(volatile BufferDesc *buf)
|
|||||||
*
|
*
|
||||||
* Most but not all callers want CurrentResourceOwner to be adjusted.
|
* Most but not all callers want CurrentResourceOwner to be adjusted.
|
||||||
* Those that don't should pass fixOwner = FALSE.
|
* Those that don't should pass fixOwner = FALSE.
|
||||||
*
|
|
||||||
* normalAccess indicates that we are finishing a "normal" page access,
|
|
||||||
* that is, one requested by something outside the buffer subsystem.
|
|
||||||
* Passing FALSE means it's an internal access that should not update the
|
|
||||||
* buffer's usage count nor cause a change in the freelist.
|
|
||||||
*
|
|
||||||
* If we are releasing a buffer during VACUUM, and it's not been otherwise
|
|
||||||
* used recently, and normalAccess is true, we send the buffer to the freelist.
|
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
UnpinBuffer(volatile BufferDesc *buf, bool fixOwner, bool normalAccess)
|
UnpinBuffer(volatile BufferDesc *buf, bool fixOwner)
|
||||||
{
|
{
|
||||||
int b = buf->buf_id;
|
int b = buf->buf_id;
|
||||||
|
|
||||||
@ -919,8 +970,6 @@ UnpinBuffer(volatile BufferDesc *buf, bool fixOwner, bool normalAccess)
|
|||||||
PrivateRefCount[b]--;
|
PrivateRefCount[b]--;
|
||||||
if (PrivateRefCount[b] == 0)
|
if (PrivateRefCount[b] == 0)
|
||||||
{
|
{
|
||||||
bool immed_free_buffer = false;
|
|
||||||
|
|
||||||
/* I'd better not still hold any locks on the buffer */
|
/* I'd better not still hold any locks on the buffer */
|
||||||
Assert(!LWLockHeldByMe(buf->content_lock));
|
Assert(!LWLockHeldByMe(buf->content_lock));
|
||||||
Assert(!LWLockHeldByMe(buf->io_in_progress_lock));
|
Assert(!LWLockHeldByMe(buf->io_in_progress_lock));
|
||||||
@ -931,22 +980,7 @@ UnpinBuffer(volatile BufferDesc *buf, bool fixOwner, bool normalAccess)
|
|||||||
Assert(buf->refcount > 0);
|
Assert(buf->refcount > 0);
|
||||||
buf->refcount--;
|
buf->refcount--;
|
||||||
|
|
||||||
/* Update buffer usage info, unless this is an internal access */
|
/* Support LockBufferForCleanup() */
|
||||||
if (normalAccess)
|
|
||||||
{
|
|
||||||
if (!strategy_hint_vacuum)
|
|
||||||
{
|
|
||||||
if (buf->usage_count < BM_MAX_USAGE_COUNT)
|
|
||||||
buf->usage_count++;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* VACUUM accesses don't bump usage count, instead... */
|
|
||||||
if (buf->refcount == 0 && buf->usage_count == 0)
|
|
||||||
immed_free_buffer = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((buf->flags & BM_PIN_COUNT_WAITER) &&
|
if ((buf->flags & BM_PIN_COUNT_WAITER) &&
|
||||||
buf->refcount == 1)
|
buf->refcount == 1)
|
||||||
{
|
{
|
||||||
@ -959,14 +993,6 @@ UnpinBuffer(volatile BufferDesc *buf, bool fixOwner, bool normalAccess)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
UnlockBufHdr(buf);
|
UnlockBufHdr(buf);
|
||||||
|
|
||||||
/*
|
|
||||||
* If VACUUM is releasing an otherwise-unused buffer, send it to the
|
|
||||||
* freelist for near-term reuse. We put it at the tail so that it
|
|
||||||
* won't be used before any invalid buffers that may exist.
|
|
||||||
*/
|
|
||||||
if (immed_free_buffer)
|
|
||||||
StrategyFreeBuffer(buf, false);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1150,7 +1176,7 @@ SyncOneBuffer(int buf_id, bool skip_pinned)
|
|||||||
FlushBuffer(bufHdr, NULL);
|
FlushBuffer(bufHdr, NULL);
|
||||||
|
|
||||||
LWLockRelease(bufHdr->content_lock);
|
LWLockRelease(bufHdr->content_lock);
|
||||||
UnpinBuffer(bufHdr, true, false /* don't change freelist */ );
|
UnpinBuffer(bufHdr, true);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -1266,7 +1292,7 @@ AtProcExit_Buffers(int code, Datum arg)
|
|||||||
* here, it suggests that ResourceOwners are messed up.
|
* here, it suggests that ResourceOwners are messed up.
|
||||||
*/
|
*/
|
||||||
PrivateRefCount[i] = 1; /* make sure we release shared pin */
|
PrivateRefCount[i] = 1; /* make sure we release shared pin */
|
||||||
UnpinBuffer(buf, false, false /* don't change freelist */ );
|
UnpinBuffer(buf, false);
|
||||||
Assert(PrivateRefCount[i] == 0);
|
Assert(PrivateRefCount[i] == 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1700,7 +1726,7 @@ FlushRelationBuffers(Relation rel)
|
|||||||
LWLockAcquire(bufHdr->content_lock, LW_SHARED);
|
LWLockAcquire(bufHdr->content_lock, LW_SHARED);
|
||||||
FlushBuffer(bufHdr, rel->rd_smgr);
|
FlushBuffer(bufHdr, rel->rd_smgr);
|
||||||
LWLockRelease(bufHdr->content_lock);
|
LWLockRelease(bufHdr->content_lock);
|
||||||
UnpinBuffer(bufHdr, true, false /* no freelist change */ );
|
UnpinBuffer(bufHdr, true);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
UnlockBufHdr(bufHdr);
|
UnlockBufHdr(bufHdr);
|
||||||
@ -1723,11 +1749,7 @@ ReleaseBuffer(Buffer buffer)
|
|||||||
if (BufferIsLocal(buffer))
|
if (BufferIsLocal(buffer))
|
||||||
{
|
{
|
||||||
Assert(LocalRefCount[-buffer - 1] > 0);
|
Assert(LocalRefCount[-buffer - 1] > 0);
|
||||||
bufHdr = &LocalBufferDescriptors[-buffer - 1];
|
|
||||||
LocalRefCount[-buffer - 1]--;
|
LocalRefCount[-buffer - 1]--;
|
||||||
if (LocalRefCount[-buffer - 1] == 0 &&
|
|
||||||
bufHdr->usage_count < BM_MAX_USAGE_COUNT)
|
|
||||||
bufHdr->usage_count++;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1738,7 +1760,7 @@ ReleaseBuffer(Buffer buffer)
|
|||||||
if (PrivateRefCount[buffer - 1] > 1)
|
if (PrivateRefCount[buffer - 1] > 1)
|
||||||
PrivateRefCount[buffer - 1]--;
|
PrivateRefCount[buffer - 1]--;
|
||||||
else
|
else
|
||||||
UnpinBuffer(bufHdr, false, true);
|
UnpinBuffer(bufHdr, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.58 2007/01/05 22:19:37 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/storage/buffer/freelist.c,v 1.59 2007/05/30 20:11:59 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -39,8 +39,42 @@ typedef struct
|
|||||||
/* Pointers to shared state */
|
/* Pointers to shared state */
|
||||||
static BufferStrategyControl *StrategyControl = NULL;
|
static BufferStrategyControl *StrategyControl = NULL;
|
||||||
|
|
||||||
/* Backend-local state about whether currently vacuuming */
|
/*
|
||||||
bool strategy_hint_vacuum = false;
|
* Private (non-shared) state for managing a ring of shared buffers to re-use.
|
||||||
|
* This is currently the only kind of BufferAccessStrategy object, but someday
|
||||||
|
* we might have more kinds.
|
||||||
|
*/
|
||||||
|
typedef struct BufferAccessStrategyData
|
||||||
|
{
|
||||||
|
/* Overall strategy type */
|
||||||
|
BufferAccessStrategyType btype;
|
||||||
|
/* Number of elements in buffers[] array */
|
||||||
|
int ring_size;
|
||||||
|
/*
|
||||||
|
* Index of the "current" slot in the ring, ie, the one most recently
|
||||||
|
* returned by GetBufferFromRing.
|
||||||
|
*/
|
||||||
|
int current;
|
||||||
|
/*
|
||||||
|
* True if the buffer just returned by StrategyGetBuffer had been in
|
||||||
|
* the ring already.
|
||||||
|
*/
|
||||||
|
bool current_was_in_ring;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Array of buffer numbers. InvalidBuffer (that is, zero) indicates
|
||||||
|
* we have not yet selected a buffer for this ring slot. For allocation
|
||||||
|
* simplicity this is palloc'd together with the fixed fields of the
|
||||||
|
* struct.
|
||||||
|
*/
|
||||||
|
Buffer buffers[1]; /* VARIABLE SIZE ARRAY */
|
||||||
|
} BufferAccessStrategyData;
|
||||||
|
|
||||||
|
|
||||||
|
/* Prototypes for internal functions */
|
||||||
|
static volatile BufferDesc *GetBufferFromRing(BufferAccessStrategy strategy);
|
||||||
|
static void AddBufferToRing(BufferAccessStrategy strategy,
|
||||||
|
volatile BufferDesc *buf);
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -50,17 +84,38 @@ bool strategy_hint_vacuum = false;
|
|||||||
* BufferAlloc(). The only hard requirement BufferAlloc() has is that
|
* BufferAlloc(). The only hard requirement BufferAlloc() has is that
|
||||||
* the selected buffer must not currently be pinned by anyone.
|
* the selected buffer must not currently be pinned by anyone.
|
||||||
*
|
*
|
||||||
|
* strategy is a BufferAccessStrategy object, or NULL for default strategy.
|
||||||
|
*
|
||||||
* To ensure that no one else can pin the buffer before we do, we must
|
* To ensure that no one else can pin the buffer before we do, we must
|
||||||
* return the buffer with the buffer header spinlock still held. That
|
* return the buffer with the buffer header spinlock still held. If
|
||||||
* means that we return with the BufFreelistLock still held, as well;
|
* *lock_held is set on exit, we have returned with the BufFreelistLock
|
||||||
* the caller must release that lock once the spinlock is dropped.
|
* still held, as well; the caller must release that lock once the spinlock
|
||||||
|
* is dropped. We do it that way because releasing the BufFreelistLock
|
||||||
|
* might awaken other processes, and it would be bad to do the associated
|
||||||
|
* kernel calls while holding the buffer header spinlock.
|
||||||
*/
|
*/
|
||||||
volatile BufferDesc *
|
volatile BufferDesc *
|
||||||
StrategyGetBuffer(void)
|
StrategyGetBuffer(BufferAccessStrategy strategy, bool *lock_held)
|
||||||
{
|
{
|
||||||
volatile BufferDesc *buf;
|
volatile BufferDesc *buf;
|
||||||
int trycounter;
|
int trycounter;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If given a strategy object, see whether it can select a buffer.
|
||||||
|
* We assume strategy objects don't need the BufFreelistLock.
|
||||||
|
*/
|
||||||
|
if (strategy != NULL)
|
||||||
|
{
|
||||||
|
buf = GetBufferFromRing(strategy);
|
||||||
|
if (buf != NULL)
|
||||||
|
{
|
||||||
|
*lock_held = false;
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Nope, so lock the freelist */
|
||||||
|
*lock_held = true;
|
||||||
LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
|
LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -82,11 +137,16 @@ StrategyGetBuffer(void)
|
|||||||
* If the buffer is pinned or has a nonzero usage_count, we cannot use
|
* If the buffer is pinned or has a nonzero usage_count, we cannot use
|
||||||
* it; discard it and retry. (This can only happen if VACUUM put a
|
* it; discard it and retry. (This can only happen if VACUUM put a
|
||||||
* valid buffer in the freelist and then someone else used it before
|
* valid buffer in the freelist and then someone else used it before
|
||||||
* we got to it.)
|
* we got to it. It's probably impossible altogether as of 8.3,
|
||||||
|
* but we'd better check anyway.)
|
||||||
*/
|
*/
|
||||||
LockBufHdr(buf);
|
LockBufHdr(buf);
|
||||||
if (buf->refcount == 0 && buf->usage_count == 0)
|
if (buf->refcount == 0 && buf->usage_count == 0)
|
||||||
|
{
|
||||||
|
if (strategy != NULL)
|
||||||
|
AddBufferToRing(strategy, buf);
|
||||||
return buf;
|
return buf;
|
||||||
|
}
|
||||||
UnlockBufHdr(buf);
|
UnlockBufHdr(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -101,15 +161,23 @@ StrategyGetBuffer(void)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* If the buffer is pinned or has a nonzero usage_count, we cannot use
|
* If the buffer is pinned or has a nonzero usage_count, we cannot use
|
||||||
* it; decrement the usage_count and keep scanning.
|
* it; decrement the usage_count (unless pinned) and keep scanning.
|
||||||
*/
|
*/
|
||||||
LockBufHdr(buf);
|
LockBufHdr(buf);
|
||||||
if (buf->refcount == 0 && buf->usage_count == 0)
|
if (buf->refcount == 0)
|
||||||
return buf;
|
|
||||||
if (buf->usage_count > 0)
|
|
||||||
{
|
{
|
||||||
buf->usage_count--;
|
if (buf->usage_count > 0)
|
||||||
trycounter = NBuffers;
|
{
|
||||||
|
buf->usage_count--;
|
||||||
|
trycounter = NBuffers;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/* Found a usable buffer */
|
||||||
|
if (strategy != NULL)
|
||||||
|
AddBufferToRing(strategy, buf);
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (--trycounter == 0)
|
else if (--trycounter == 0)
|
||||||
{
|
{
|
||||||
@ -132,13 +200,9 @@ StrategyGetBuffer(void)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* StrategyFreeBuffer: put a buffer on the freelist
|
* StrategyFreeBuffer: put a buffer on the freelist
|
||||||
*
|
|
||||||
* The buffer is added either at the head or the tail, according to the
|
|
||||||
* at_head parameter. This allows a small amount of control over how
|
|
||||||
* quickly the buffer is reused.
|
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head)
|
StrategyFreeBuffer(volatile BufferDesc *buf)
|
||||||
{
|
{
|
||||||
LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
|
LWLockAcquire(BufFreelistLock, LW_EXCLUSIVE);
|
||||||
|
|
||||||
@ -148,22 +212,10 @@ StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head)
|
|||||||
*/
|
*/
|
||||||
if (buf->freeNext == FREENEXT_NOT_IN_LIST)
|
if (buf->freeNext == FREENEXT_NOT_IN_LIST)
|
||||||
{
|
{
|
||||||
if (at_head)
|
buf->freeNext = StrategyControl->firstFreeBuffer;
|
||||||
{
|
if (buf->freeNext < 0)
|
||||||
buf->freeNext = StrategyControl->firstFreeBuffer;
|
|
||||||
if (buf->freeNext < 0)
|
|
||||||
StrategyControl->lastFreeBuffer = buf->buf_id;
|
|
||||||
StrategyControl->firstFreeBuffer = buf->buf_id;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
buf->freeNext = FREENEXT_END_OF_LIST;
|
|
||||||
if (StrategyControl->firstFreeBuffer < 0)
|
|
||||||
StrategyControl->firstFreeBuffer = buf->buf_id;
|
|
||||||
else
|
|
||||||
BufferDescriptors[StrategyControl->lastFreeBuffer].freeNext = buf->buf_id;
|
|
||||||
StrategyControl->lastFreeBuffer = buf->buf_id;
|
StrategyControl->lastFreeBuffer = buf->buf_id;
|
||||||
}
|
StrategyControl->firstFreeBuffer = buf->buf_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
LWLockRelease(BufFreelistLock);
|
LWLockRelease(BufFreelistLock);
|
||||||
@ -190,15 +242,6 @@ StrategySyncStart(void)
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* StrategyHintVacuum -- tell us whether VACUUM is active
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
StrategyHintVacuum(bool vacuum_active)
|
|
||||||
{
|
|
||||||
strategy_hint_vacuum = vacuum_active;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* StrategyShmemSize
|
* StrategyShmemSize
|
||||||
@ -274,3 +317,172 @@ StrategyInitialize(bool init)
|
|||||||
else
|
else
|
||||||
Assert(!init);
|
Assert(!init);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------
|
||||||
|
* Backend-private buffer ring management
|
||||||
|
* ----------------------------------------------------------------
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GetAccessStrategy -- create a BufferAccessStrategy object
|
||||||
|
*
|
||||||
|
* The object is allocated in the current memory context.
|
||||||
|
*/
|
||||||
|
BufferAccessStrategy
|
||||||
|
GetAccessStrategy(BufferAccessStrategyType btype)
|
||||||
|
{
|
||||||
|
BufferAccessStrategy strategy;
|
||||||
|
int ring_size;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Select ring size to use. See buffer/README for rationales.
|
||||||
|
* (Currently all cases are the same size, but keep this code
|
||||||
|
* structure for flexibility.)
|
||||||
|
*/
|
||||||
|
switch (btype)
|
||||||
|
{
|
||||||
|
case BAS_NORMAL:
|
||||||
|
/* if someone asks for NORMAL, just give 'em a "default" object */
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
case BAS_BULKREAD:
|
||||||
|
ring_size = 256 * 1024 / BLCKSZ;
|
||||||
|
break;
|
||||||
|
case BAS_VACUUM:
|
||||||
|
ring_size = 256 * 1024 / BLCKSZ;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
elog(ERROR, "unrecognized buffer access strategy: %d",
|
||||||
|
(int) btype);
|
||||||
|
return NULL; /* keep compiler quiet */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Make sure ring isn't an undue fraction of shared buffers */
|
||||||
|
ring_size = Min(NBuffers / 8, ring_size);
|
||||||
|
|
||||||
|
/* Allocate the object and initialize all elements to zeroes */
|
||||||
|
strategy = (BufferAccessStrategy)
|
||||||
|
palloc0(offsetof(BufferAccessStrategyData, buffers) +
|
||||||
|
ring_size * sizeof(Buffer));
|
||||||
|
|
||||||
|
/* Set fields that don't start out zero */
|
||||||
|
strategy->btype = btype;
|
||||||
|
strategy->ring_size = ring_size;
|
||||||
|
|
||||||
|
return strategy;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* FreeAccessStrategy -- release a BufferAccessStrategy object
|
||||||
|
*
|
||||||
|
* A simple pfree would do at the moment, but we would prefer that callers
|
||||||
|
* don't assume that much about the representation of BufferAccessStrategy.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
FreeAccessStrategy(BufferAccessStrategy strategy)
|
||||||
|
{
|
||||||
|
/* don't crash if called on a "default" strategy */
|
||||||
|
if (strategy != NULL)
|
||||||
|
pfree(strategy);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GetBufferFromRing -- returns a buffer from the ring, or NULL if the
|
||||||
|
* ring is empty.
|
||||||
|
*
|
||||||
|
* The bufhdr spin lock is held on the returned buffer.
|
||||||
|
*/
|
||||||
|
static volatile BufferDesc *
|
||||||
|
GetBufferFromRing(BufferAccessStrategy strategy)
|
||||||
|
{
|
||||||
|
volatile BufferDesc *buf;
|
||||||
|
Buffer bufnum;
|
||||||
|
|
||||||
|
/* Advance to next ring slot */
|
||||||
|
if (++strategy->current >= strategy->ring_size)
|
||||||
|
strategy->current = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the slot hasn't been filled yet, tell the caller to allocate
|
||||||
|
* a new buffer with the normal allocation strategy. He will then
|
||||||
|
* fill this slot by calling AddBufferToRing with the new buffer.
|
||||||
|
*/
|
||||||
|
bufnum = strategy->buffers[strategy->current];
|
||||||
|
if (bufnum == InvalidBuffer)
|
||||||
|
{
|
||||||
|
strategy->current_was_in_ring = false;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the buffer is pinned we cannot use it under any circumstances.
|
||||||
|
*
|
||||||
|
* If usage_count is 0 or 1 then the buffer is fair game (we expect 1,
|
||||||
|
* since our own previous usage of the ring element would have left it
|
||||||
|
* there, but it might've been decremented by clock sweep since then).
|
||||||
|
* A higher usage_count indicates someone else has touched the buffer,
|
||||||
|
* so we shouldn't re-use it.
|
||||||
|
*/
|
||||||
|
buf = &BufferDescriptors[bufnum - 1];
|
||||||
|
LockBufHdr(buf);
|
||||||
|
if (buf->refcount == 0 && buf->usage_count <= 1)
|
||||||
|
{
|
||||||
|
strategy->current_was_in_ring = true;
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
UnlockBufHdr(buf);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Tell caller to allocate a new buffer with the normal allocation
|
||||||
|
* strategy. He'll then replace this ring element via AddBufferToRing.
|
||||||
|
*/
|
||||||
|
strategy->current_was_in_ring = false;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* AddBufferToRing -- add a buffer to the buffer ring
|
||||||
|
*
|
||||||
|
* Caller must hold the buffer header spinlock on the buffer. Since this
|
||||||
|
* is called with the spinlock held, it had better be quite cheap.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
AddBufferToRing(BufferAccessStrategy strategy, volatile BufferDesc *buf)
|
||||||
|
{
|
||||||
|
strategy->buffers[strategy->current] = BufferDescriptorGetBuffer(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* StrategyRejectBuffer -- consider rejecting a dirty buffer
|
||||||
|
*
|
||||||
|
* When a nondefault strategy is used, the buffer manager calls this function
|
||||||
|
* when it turns out that the buffer selected by StrategyGetBuffer needs to
|
||||||
|
* be written out and doing so would require flushing WAL too. This gives us
|
||||||
|
* a chance to choose a different victim.
|
||||||
|
*
|
||||||
|
* Returns true if buffer manager should ask for a new victim, and false
|
||||||
|
* if this buffer should be written and re-used.
|
||||||
|
*/
|
||||||
|
bool
|
||||||
|
StrategyRejectBuffer(BufferAccessStrategy strategy, volatile BufferDesc *buf)
|
||||||
|
{
|
||||||
|
/* We only do this in bulkread mode */
|
||||||
|
if (strategy->btype != BAS_BULKREAD)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Don't muck with behavior of normal buffer-replacement strategy */
|
||||||
|
if (!strategy->current_was_in_ring ||
|
||||||
|
strategy->buffers[strategy->current] != BufferDescriptorGetBuffer(buf))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Remove the dirty buffer from the ring; necessary to prevent infinite
|
||||||
|
* loop if all ring members are dirty.
|
||||||
|
*/
|
||||||
|
strategy->buffers[strategy->current] = InvalidBuffer;
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
@ -9,7 +9,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.76 2007/01/05 22:19:37 momjian Exp $
|
* $PostgreSQL: pgsql/src/backend/storage/buffer/localbuf.c,v 1.77 2007/05/30 20:11:59 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -57,7 +57,8 @@ static Block GetLocalBufferStorage(void);
|
|||||||
*
|
*
|
||||||
* API is similar to bufmgr.c's BufferAlloc, except that we do not need
|
* API is similar to bufmgr.c's BufferAlloc, except that we do not need
|
||||||
* to do any locking since this is all local. Also, IO_IN_PROGRESS
|
* to do any locking since this is all local. Also, IO_IN_PROGRESS
|
||||||
* does not get set.
|
* does not get set. Lastly, we support only default access strategy
|
||||||
|
* (hence, usage_count is always advanced).
|
||||||
*/
|
*/
|
||||||
BufferDesc *
|
BufferDesc *
|
||||||
LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
|
LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
|
||||||
@ -88,7 +89,12 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
|
|||||||
fprintf(stderr, "LB ALLOC (%u,%d) %d\n",
|
fprintf(stderr, "LB ALLOC (%u,%d) %d\n",
|
||||||
RelationGetRelid(reln), blockNum, -b - 1);
|
RelationGetRelid(reln), blockNum, -b - 1);
|
||||||
#endif
|
#endif
|
||||||
|
/* this part is equivalent to PinBuffer for a shared buffer */
|
||||||
|
if (LocalRefCount[b] == 0)
|
||||||
|
{
|
||||||
|
if (bufHdr->usage_count < BM_MAX_USAGE_COUNT)
|
||||||
|
bufHdr->usage_count++;
|
||||||
|
}
|
||||||
LocalRefCount[b]++;
|
LocalRefCount[b]++;
|
||||||
ResourceOwnerRememberBuffer(CurrentResourceOwner,
|
ResourceOwnerRememberBuffer(CurrentResourceOwner,
|
||||||
BufferDescriptorGetBuffer(bufHdr));
|
BufferDescriptorGetBuffer(bufHdr));
|
||||||
@ -121,18 +127,21 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
|
|||||||
|
|
||||||
bufHdr = &LocalBufferDescriptors[b];
|
bufHdr = &LocalBufferDescriptors[b];
|
||||||
|
|
||||||
if (LocalRefCount[b] == 0 && bufHdr->usage_count == 0)
|
if (LocalRefCount[b] == 0)
|
||||||
{
|
{
|
||||||
LocalRefCount[b]++;
|
if (bufHdr->usage_count > 0)
|
||||||
ResourceOwnerRememberBuffer(CurrentResourceOwner,
|
{
|
||||||
BufferDescriptorGetBuffer(bufHdr));
|
bufHdr->usage_count--;
|
||||||
break;
|
trycounter = NLocBuffer;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
if (bufHdr->usage_count > 0)
|
{
|
||||||
{
|
/* Found a usable buffer */
|
||||||
bufHdr->usage_count--;
|
LocalRefCount[b]++;
|
||||||
trycounter = NLocBuffer;
|
ResourceOwnerRememberBuffer(CurrentResourceOwner,
|
||||||
|
BufferDescriptorGetBuffer(bufHdr));
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else if (--trycounter == 0)
|
else if (--trycounter == 0)
|
||||||
ereport(ERROR,
|
ereport(ERROR,
|
||||||
@ -199,7 +208,7 @@ LocalBufferAlloc(Relation reln, BlockNumber blockNum, bool *foundPtr)
|
|||||||
bufHdr->tag = newTag;
|
bufHdr->tag = newTag;
|
||||||
bufHdr->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
|
bufHdr->flags &= ~(BM_VALID | BM_DIRTY | BM_JUST_DIRTIED | BM_IO_ERROR);
|
||||||
bufHdr->flags |= BM_TAG_VALID;
|
bufHdr->flags |= BM_TAG_VALID;
|
||||||
bufHdr->usage_count = 0;
|
bufHdr->usage_count = 1;
|
||||||
|
|
||||||
*foundPtr = FALSE;
|
*foundPtr = FALSE;
|
||||||
return bufHdr;
|
return bufHdr;
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.279 2007/04/27 22:05:49 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/tcop/utility.c,v 1.280 2007/05/30 20:12:01 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -931,7 +931,7 @@ ProcessUtility(Node *parsetree,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case T_VacuumStmt:
|
case T_VacuumStmt:
|
||||||
vacuum((VacuumStmt *) parsetree, NIL, isTopLevel);
|
vacuum((VacuumStmt *) parsetree, NIL, NULL, isTopLevel);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case T_ExplainStmt:
|
case T_ExplainStmt:
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/access/genam.h,v 1.66 2007/01/05 22:19:50 momjian Exp $
|
* $PostgreSQL: pgsql/src/include/access/genam.h,v 1.67 2007/05/30 20:12:02 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -40,6 +40,7 @@ typedef struct IndexVacuumInfo
|
|||||||
bool vacuum_full; /* VACUUM FULL (we have exclusive lock) */
|
bool vacuum_full; /* VACUUM FULL (we have exclusive lock) */
|
||||||
int message_level; /* ereport level for progress messages */
|
int message_level; /* ereport level for progress messages */
|
||||||
double num_heap_tuples; /* tuples remaining in heap */
|
double num_heap_tuples; /* tuples remaining in heap */
|
||||||
|
BufferAccessStrategy strategy; /* access strategy for reads */
|
||||||
} IndexVacuumInfo;
|
} IndexVacuumInfo;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/access/hash.h,v 1.80 2007/05/03 16:45:58 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/access/hash.h,v 1.81 2007/05/30 20:12:02 tgl Exp $
|
||||||
*
|
*
|
||||||
* NOTES
|
* NOTES
|
||||||
* modeled after Margo Seltzer's hash implementation for unix.
|
* modeled after Margo Seltzer's hash implementation for unix.
|
||||||
@ -273,11 +273,13 @@ extern void _hash_doinsert(Relation rel, IndexTuple itup);
|
|||||||
|
|
||||||
/* hashovfl.c */
|
/* hashovfl.c */
|
||||||
extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf);
|
extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf);
|
||||||
extern BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf);
|
extern BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf,
|
||||||
|
BufferAccessStrategy bstrategy);
|
||||||
extern void _hash_initbitmap(Relation rel, HashMetaPage metap,
|
extern void _hash_initbitmap(Relation rel, HashMetaPage metap,
|
||||||
BlockNumber blkno);
|
BlockNumber blkno);
|
||||||
extern void _hash_squeezebucket(Relation rel,
|
extern void _hash_squeezebucket(Relation rel,
|
||||||
Bucket bucket, BlockNumber bucket_blkno);
|
Bucket bucket, BlockNumber bucket_blkno,
|
||||||
|
BufferAccessStrategy bstrategy);
|
||||||
|
|
||||||
/* hashpage.c */
|
/* hashpage.c */
|
||||||
extern void _hash_getlock(Relation rel, BlockNumber whichlock, int access);
|
extern void _hash_getlock(Relation rel, BlockNumber whichlock, int access);
|
||||||
@ -287,6 +289,9 @@ extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno,
|
|||||||
int access, int flags);
|
int access, int flags);
|
||||||
extern Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno);
|
extern Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno);
|
||||||
extern Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno);
|
extern Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno);
|
||||||
|
extern Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
|
||||||
|
int access, int flags,
|
||||||
|
BufferAccessStrategy bstrategy);
|
||||||
extern void _hash_relbuf(Relation rel, Buffer buf);
|
extern void _hash_relbuf(Relation rel, Buffer buf);
|
||||||
extern void _hash_dropbuf(Relation rel, Buffer buf);
|
extern void _hash_dropbuf(Relation rel, Buffer buf);
|
||||||
extern void _hash_wrtbuf(Relation rel, Buffer buf);
|
extern void _hash_wrtbuf(Relation rel, Buffer buf);
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.53 2007/05/27 03:50:39 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/access/relscan.h,v 1.54 2007/05/30 20:12:02 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -27,6 +27,7 @@ typedef struct HeapScanDescData
|
|||||||
int rs_nkeys; /* number of scan keys */
|
int rs_nkeys; /* number of scan keys */
|
||||||
ScanKey rs_key; /* array of scan key descriptors */
|
ScanKey rs_key; /* array of scan key descriptors */
|
||||||
BlockNumber rs_nblocks; /* number of blocks to scan */
|
BlockNumber rs_nblocks; /* number of blocks to scan */
|
||||||
|
BufferAccessStrategy rs_strategy; /* access strategy for reads */
|
||||||
bool rs_pageatatime; /* verify visibility page-at-a-time? */
|
bool rs_pageatatime; /* verify visibility page-at-a-time? */
|
||||||
|
|
||||||
/* scan current state */
|
/* scan current state */
|
||||||
|
@ -6,7 +6,7 @@
|
|||||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.77 2007/05/20 21:08:19 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/access/xlog.h,v 1.78 2007/05/30 20:12:02 tgl Exp $
|
||||||
*/
|
*/
|
||||||
#ifndef XLOG_H
|
#ifndef XLOG_H
|
||||||
#define XLOG_H
|
#define XLOG_H
|
||||||
@ -159,6 +159,7 @@ extern bool XLOG_DEBUG;
|
|||||||
|
|
||||||
extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
|
extern XLogRecPtr XLogInsert(RmgrId rmid, uint8 info, XLogRecData *rdata);
|
||||||
extern void XLogFlush(XLogRecPtr RecPtr);
|
extern void XLogFlush(XLogRecPtr RecPtr);
|
||||||
|
extern bool XLogNeedsFlush(XLogRecPtr RecPtr);
|
||||||
|
|
||||||
extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record);
|
extern void xlog_redo(XLogRecPtr lsn, XLogRecord *record);
|
||||||
extern void xlog_desc(StringInfo buf, uint8 xl_info, char *rec);
|
extern void xlog_desc(StringInfo buf, uint8 xl_info, char *rec);
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.71 2007/05/17 15:28:29 alvherre Exp $
|
* $PostgreSQL: pgsql/src/include/commands/vacuum.h,v 1.72 2007/05/30 20:12:03 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -18,9 +18,11 @@
|
|||||||
#include "catalog/pg_statistic.h"
|
#include "catalog/pg_statistic.h"
|
||||||
#include "catalog/pg_type.h"
|
#include "catalog/pg_type.h"
|
||||||
#include "nodes/parsenodes.h"
|
#include "nodes/parsenodes.h"
|
||||||
|
#include "storage/buf.h"
|
||||||
#include "storage/lock.h"
|
#include "storage/lock.h"
|
||||||
#include "utils/rel.h"
|
#include "utils/rel.h"
|
||||||
|
|
||||||
|
|
||||||
/*----------
|
/*----------
|
||||||
* ANALYZE builds one of these structs for each attribute (column) that is
|
* ANALYZE builds one of these structs for each attribute (column) that is
|
||||||
* to be analyzed. The struct and subsidiary data are in anl_context,
|
* to be analyzed. The struct and subsidiary data are in anl_context,
|
||||||
@ -110,7 +112,8 @@ extern int vacuum_freeze_min_age;
|
|||||||
|
|
||||||
|
|
||||||
/* in commands/vacuum.c */
|
/* in commands/vacuum.c */
|
||||||
extern void vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel);
|
extern void vacuum(VacuumStmt *vacstmt, List *relids,
|
||||||
|
BufferAccessStrategy bstrategy, bool isTopLevel);
|
||||||
extern void vac_open_indexes(Relation relation, LOCKMODE lockmode,
|
extern void vac_open_indexes(Relation relation, LOCKMODE lockmode,
|
||||||
int *nindexes, Relation **Irel);
|
int *nindexes, Relation **Irel);
|
||||||
extern void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode);
|
extern void vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode);
|
||||||
@ -127,9 +130,11 @@ extern bool vac_is_partial_index(Relation indrel);
|
|||||||
extern void vacuum_delay_point(void);
|
extern void vacuum_delay_point(void);
|
||||||
|
|
||||||
/* in commands/vacuumlazy.c */
|
/* in commands/vacuumlazy.c */
|
||||||
extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt);
|
extern void lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
|
||||||
|
BufferAccessStrategy bstrategy);
|
||||||
|
|
||||||
/* in commands/analyze.c */
|
/* in commands/analyze.c */
|
||||||
extern void analyze_rel(Oid relid, VacuumStmt *vacstmt);
|
extern void analyze_rel(Oid relid, VacuumStmt *vacstmt,
|
||||||
|
BufferAccessStrategy bstrategy);
|
||||||
|
|
||||||
#endif /* VACUUM_H */
|
#endif /* VACUUM_H */
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/storage/buf.h,v 1.21 2007/01/05 22:19:57 momjian Exp $
|
* $PostgreSQL: pgsql/src/include/storage/buf.h,v 1.22 2007/05/30 20:12:03 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -36,4 +36,11 @@ typedef int Buffer;
|
|||||||
*/
|
*/
|
||||||
#define BufferIsLocal(buffer) ((buffer) < 0)
|
#define BufferIsLocal(buffer) ((buffer) < 0)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Buffer access strategy objects.
|
||||||
|
*
|
||||||
|
* BufferAccessStrategyData is private to freelist.c
|
||||||
|
*/
|
||||||
|
typedef struct BufferAccessStrategyData *BufferAccessStrategy;
|
||||||
|
|
||||||
#endif /* BUF_H */
|
#endif /* BUF_H */
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.89 2007/01/05 22:19:57 momjian Exp $
|
* $PostgreSQL: pgsql/src/include/storage/buf_internals.h,v 1.90 2007/05/30 20:12:03 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -167,9 +167,6 @@ extern DLLIMPORT BufferDesc *BufferDescriptors;
|
|||||||
/* in localbuf.c */
|
/* in localbuf.c */
|
||||||
extern BufferDesc *LocalBufferDescriptors;
|
extern BufferDesc *LocalBufferDescriptors;
|
||||||
|
|
||||||
/* in freelist.c */
|
|
||||||
extern bool strategy_hint_vacuum;
|
|
||||||
|
|
||||||
/* event counters in buf_init.c */
|
/* event counters in buf_init.c */
|
||||||
extern long int ReadBufferCount;
|
extern long int ReadBufferCount;
|
||||||
extern long int ReadLocalBufferCount;
|
extern long int ReadLocalBufferCount;
|
||||||
@ -184,8 +181,12 @@ extern long int LocalBufferFlushCount;
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
/* freelist.c */
|
/* freelist.c */
|
||||||
extern volatile BufferDesc *StrategyGetBuffer(void);
|
extern volatile BufferDesc *StrategyGetBuffer(BufferAccessStrategy strategy,
|
||||||
extern void StrategyFreeBuffer(volatile BufferDesc *buf, bool at_head);
|
bool *lock_held);
|
||||||
|
extern void StrategyFreeBuffer(volatile BufferDesc *buf);
|
||||||
|
extern bool StrategyRejectBuffer(BufferAccessStrategy strategy,
|
||||||
|
volatile BufferDesc *buf);
|
||||||
|
|
||||||
extern int StrategySyncStart(void);
|
extern int StrategySyncStart(void);
|
||||||
extern Size StrategyShmemSize(void);
|
extern Size StrategyShmemSize(void);
|
||||||
extern void StrategyInitialize(bool init);
|
extern void StrategyInitialize(bool init);
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||||
* Portions Copyright (c) 1994, Regents of the University of California
|
* Portions Copyright (c) 1994, Regents of the University of California
|
||||||
*
|
*
|
||||||
* $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.103 2007/05/02 23:18:03 tgl Exp $
|
* $PostgreSQL: pgsql/src/include/storage/bufmgr.h,v 1.104 2007/05/30 20:12:03 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -19,6 +19,14 @@
|
|||||||
|
|
||||||
typedef void *Block;
|
typedef void *Block;
|
||||||
|
|
||||||
|
/* Possible arguments for GetAccessStrategy() */
|
||||||
|
typedef enum BufferAccessStrategyType
|
||||||
|
{
|
||||||
|
BAS_NORMAL, /* Normal random access */
|
||||||
|
BAS_BULKREAD, /* Large read-only scan (hint bit updates are ok) */
|
||||||
|
BAS_VACUUM /* VACUUM */
|
||||||
|
} BufferAccessStrategyType;
|
||||||
|
|
||||||
/* in globals.c ... this duplicates miscadmin.h */
|
/* in globals.c ... this duplicates miscadmin.h */
|
||||||
extern DLLIMPORT int NBuffers;
|
extern DLLIMPORT int NBuffers;
|
||||||
|
|
||||||
@ -111,6 +119,8 @@ extern DLLIMPORT int32 *LocalRefCount;
|
|||||||
* prototypes for functions in bufmgr.c
|
* prototypes for functions in bufmgr.c
|
||||||
*/
|
*/
|
||||||
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
|
extern Buffer ReadBuffer(Relation reln, BlockNumber blockNum);
|
||||||
|
extern Buffer ReadBufferWithStrategy(Relation reln, BlockNumber blockNum,
|
||||||
|
BufferAccessStrategy strategy);
|
||||||
extern Buffer ReadOrZeroBuffer(Relation reln, BlockNumber blockNum);
|
extern Buffer ReadOrZeroBuffer(Relation reln, BlockNumber blockNum);
|
||||||
extern void ReleaseBuffer(Buffer buffer);
|
extern void ReleaseBuffer(Buffer buffer);
|
||||||
extern void UnlockReleaseBuffer(Buffer buffer);
|
extern void UnlockReleaseBuffer(Buffer buffer);
|
||||||
@ -157,6 +167,7 @@ extern void BgBufferSync(void);
|
|||||||
extern void AtProcExit_LocalBuffers(void);
|
extern void AtProcExit_LocalBuffers(void);
|
||||||
|
|
||||||
/* in freelist.c */
|
/* in freelist.c */
|
||||||
extern void StrategyHintVacuum(bool vacuum_active);
|
extern BufferAccessStrategy GetAccessStrategy(BufferAccessStrategyType btype);
|
||||||
|
extern void FreeAccessStrategy(BufferAccessStrategy strategy);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user