mirror of
https://github.com/postgres/postgres.git
synced 2025-07-17 06:41:09 +03:00
Make large sequential scans and VACUUMs work in a limited-size "ring" of
buffers, rather than blowing out the whole shared-buffer arena. Aside from avoiding cache spoliation, this fixes the problem that VACUUM formerly tended to cause a WAL flush for every page it modified, because we had it hacked to use only a single buffer. Those flushes will now occur only once per ring-ful. The exact ring size, and the threshold for seqscans to switch into the ring usage pattern, remain under debate; but the infrastructure seems done. The key bit of infrastructure is a new optional BufferAccessStrategy object that can be passed to ReadBuffer operations; this replaces the former StrategyHintVacuum API. This patch also changes the buffer usage-count methodology a bit: we now advance usage_count when first pinning a buffer, rather than when last unpinning it. To preserve the behavior that a buffer's lifetime starts to decrease when it's released, the clock sweep code is modified to not decrement usage_count of pinned buffers. Work not done in this commit: teach GiST and GIN indexes to use the vacuum BufferAccessStrategy for vacuum-driven fetches. Original patch by Simon, reworked by Heikki and again by Tom.
This commit is contained in:
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.107 2007/04/30 03:23:48 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.108 2007/05/30 20:11:56 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -63,10 +63,13 @@ typedef struct AnlIndexData
|
||||
/* Default statistics target (GUC parameter) */
|
||||
int default_statistics_target = 10;
|
||||
|
||||
/* A few variables that don't seem worth passing around as parameters */
|
||||
static int elevel = -1;
|
||||
|
||||
static MemoryContext anl_context = NULL;
|
||||
|
||||
static BufferAccessStrategy vac_strategy;
|
||||
|
||||
|
||||
static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
|
||||
int samplesize);
|
||||
@ -94,7 +97,8 @@ static bool std_typanalyze(VacAttrStats *stats);
|
||||
* analyze_rel() -- analyze one relation
|
||||
*/
|
||||
void
|
||||
analyze_rel(Oid relid, VacuumStmt *vacstmt)
|
||||
analyze_rel(Oid relid, VacuumStmt *vacstmt,
|
||||
BufferAccessStrategy bstrategy)
|
||||
{
|
||||
Relation onerel;
|
||||
int attr_cnt,
|
||||
@ -120,6 +124,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt)
|
||||
else
|
||||
elevel = DEBUG2;
|
||||
|
||||
vac_strategy = bstrategy;
|
||||
|
||||
/*
|
||||
* Use the current context for storing analysis info. vacuum.c ensures
|
||||
* that this context will be cleared when I return, thus releasing the
|
||||
@ -845,7 +851,7 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
|
||||
* looking at it. We don't maintain a lock on the page, so tuples
|
||||
* could get added to it, but we ignore such tuples.
|
||||
*/
|
||||
targbuffer = ReadBuffer(onerel, targblock);
|
||||
targbuffer = ReadBufferWithStrategy(onerel, targblock, vac_strategy);
|
||||
LockBuffer(targbuffer, BUFFER_LOCK_SHARE);
|
||||
targpage = BufferGetPage(targbuffer);
|
||||
maxoffset = PageGetMaxOffsetNumber(targpage);
|
||||
|
@ -13,7 +13,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.351 2007/05/17 15:28:29 alvherre Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.352 2007/05/30 20:11:57 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -191,6 +191,7 @@ ExecContext_Finish(ExecContext ec)
|
||||
*----------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
/* A few variables that don't seem worth passing around as parameters */
|
||||
static MemoryContext vac_context = NULL;
|
||||
|
||||
static int elevel = -1;
|
||||
@ -198,6 +199,8 @@ static int elevel = -1;
|
||||
static TransactionId OldestXmin;
|
||||
static TransactionId FreezeLimit;
|
||||
|
||||
static BufferAccessStrategy vac_strategy;
|
||||
|
||||
|
||||
/* non-export function prototypes */
|
||||
static List *get_rel_oids(List *relids, const RangeVar *vacrel,
|
||||
@ -257,14 +260,18 @@ static Size PageGetFreeSpaceWithFillFactor(Relation relation, Page page);
|
||||
* relation OIDs to be processed, and vacstmt->relation is ignored.
|
||||
* (The non-NIL case is currently only used by autovacuum.)
|
||||
*
|
||||
* bstrategy is normally given as NULL, but in autovacuum it can be passed
|
||||
* in to use the same buffer strategy object across multiple vacuum() calls.
|
||||
*
|
||||
* isTopLevel should be passed down from ProcessUtility.
|
||||
*
|
||||
* It is the caller's responsibility that both vacstmt and relids
|
||||
* It is the caller's responsibility that vacstmt, relids, and bstrategy
|
||||
* (if given) be allocated in a memory context that won't disappear
|
||||
* at transaction commit.
|
||||
*/
|
||||
void
|
||||
vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
|
||||
vacuum(VacuumStmt *vacstmt, List *relids,
|
||||
BufferAccessStrategy bstrategy, bool isTopLevel)
|
||||
{
|
||||
const char *stmttype = vacstmt->vacuum ? "VACUUM" : "ANALYZE";
|
||||
volatile MemoryContext anl_context = NULL;
|
||||
@ -319,6 +326,19 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
|
||||
ALLOCSET_DEFAULT_INITSIZE,
|
||||
ALLOCSET_DEFAULT_MAXSIZE);
|
||||
|
||||
/*
|
||||
* If caller didn't give us a buffer strategy object, make one in the
|
||||
* cross-transaction memory context.
|
||||
*/
|
||||
if (bstrategy == NULL)
|
||||
{
|
||||
MemoryContext old_context = MemoryContextSwitchTo(vac_context);
|
||||
|
||||
bstrategy = GetAccessStrategy(BAS_VACUUM);
|
||||
MemoryContextSwitchTo(old_context);
|
||||
}
|
||||
vac_strategy = bstrategy;
|
||||
|
||||
/* Remember whether we are processing everything in the DB */
|
||||
all_rels = (relids == NIL && vacstmt->relation == NULL);
|
||||
|
||||
@ -417,15 +437,7 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
|
||||
else
|
||||
old_context = MemoryContextSwitchTo(anl_context);
|
||||
|
||||
/*
|
||||
* Tell the buffer replacement strategy that vacuum is causing
|
||||
* the IO
|
||||
*/
|
||||
StrategyHintVacuum(true);
|
||||
|
||||
analyze_rel(relid, vacstmt);
|
||||
|
||||
StrategyHintVacuum(false);
|
||||
analyze_rel(relid, vacstmt, vac_strategy);
|
||||
|
||||
if (use_own_xacts)
|
||||
CommitTransactionCommand();
|
||||
@ -441,8 +453,6 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
|
||||
{
|
||||
/* Make sure cost accounting is turned off after error */
|
||||
VacuumCostActive = false;
|
||||
/* And reset buffer replacement strategy, too */
|
||||
StrategyHintVacuum(false);
|
||||
PG_RE_THROW();
|
||||
}
|
||||
PG_END_TRY();
|
||||
@ -1084,21 +1094,13 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
|
||||
*/
|
||||
toast_relid = onerel->rd_rel->reltoastrelid;
|
||||
|
||||
/*
|
||||
* Tell the cache replacement strategy that vacuum is causing all
|
||||
* following IO
|
||||
*/
|
||||
StrategyHintVacuum(true);
|
||||
|
||||
/*
|
||||
* Do the actual work --- either FULL or "lazy" vacuum
|
||||
*/
|
||||
if (vacstmt->full)
|
||||
full_vacuum_rel(onerel, vacstmt);
|
||||
else
|
||||
lazy_vacuum_rel(onerel, vacstmt);
|
||||
|
||||
StrategyHintVacuum(false);
|
||||
lazy_vacuum_rel(onerel, vacstmt, vac_strategy);
|
||||
|
||||
/* all done with this class, but hold lock until commit */
|
||||
relation_close(onerel, NoLock);
|
||||
@ -1290,7 +1292,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
|
||||
|
||||
vacuum_delay_point();
|
||||
|
||||
buf = ReadBuffer(onerel, blkno);
|
||||
buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
|
||||
page = BufferGetPage(buf);
|
||||
|
||||
/*
|
||||
@ -1730,7 +1732,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
|
||||
/*
|
||||
* Process this page of relation.
|
||||
*/
|
||||
buf = ReadBuffer(onerel, blkno);
|
||||
buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
|
||||
page = BufferGetPage(buf);
|
||||
|
||||
vacpage->offsets_free = 0;
|
||||
@ -1954,8 +1956,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
|
||||
nextTid = tp.t_data->t_ctid;
|
||||
priorXmax = HeapTupleHeaderGetXmax(tp.t_data);
|
||||
/* assume block# is OK (see heap_fetch comments) */
|
||||
nextBuf = ReadBuffer(onerel,
|
||||
ItemPointerGetBlockNumber(&nextTid));
|
||||
nextBuf = ReadBufferWithStrategy(onerel,
|
||||
ItemPointerGetBlockNumber(&nextTid),
|
||||
vac_strategy);
|
||||
nextPage = BufferGetPage(nextBuf);
|
||||
/* If bogus or unused slot, assume tp is end of chain */
|
||||
nextOffnum = ItemPointerGetOffsetNumber(&nextTid);
|
||||
@ -2091,8 +2094,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
|
||||
break; /* out of check-all-items loop */
|
||||
}
|
||||
tp.t_self = vtlp->this_tid;
|
||||
Pbuf = ReadBuffer(onerel,
|
||||
ItemPointerGetBlockNumber(&(tp.t_self)));
|
||||
Pbuf = ReadBufferWithStrategy(onerel,
|
||||
ItemPointerGetBlockNumber(&(tp.t_self)),
|
||||
vac_strategy);
|
||||
Ppage = BufferGetPage(Pbuf);
|
||||
Pitemid = PageGetItemId(Ppage,
|
||||
ItemPointerGetOffsetNumber(&(tp.t_self)));
|
||||
@ -2174,11 +2178,14 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
|
||||
|
||||
/* Get page to move from */
|
||||
tuple.t_self = vtmove[ti].tid;
|
||||
Cbuf = ReadBuffer(onerel,
|
||||
ItemPointerGetBlockNumber(&(tuple.t_self)));
|
||||
Cbuf = ReadBufferWithStrategy(onerel,
|
||||
ItemPointerGetBlockNumber(&(tuple.t_self)),
|
||||
vac_strategy);
|
||||
|
||||
/* Get page to move to */
|
||||
dst_buffer = ReadBuffer(onerel, destvacpage->blkno);
|
||||
dst_buffer = ReadBufferWithStrategy(onerel,
|
||||
destvacpage->blkno,
|
||||
vac_strategy);
|
||||
|
||||
LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||
if (dst_buffer != Cbuf)
|
||||
@ -2239,7 +2246,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
|
||||
if (i == num_fraged_pages)
|
||||
break; /* can't move item anywhere */
|
||||
dst_vacpage = fraged_pages->pagedesc[i];
|
||||
dst_buffer = ReadBuffer(onerel, dst_vacpage->blkno);
|
||||
dst_buffer = ReadBufferWithStrategy(onerel,
|
||||
dst_vacpage->blkno,
|
||||
vac_strategy);
|
||||
LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
|
||||
dst_page = BufferGetPage(dst_buffer);
|
||||
/* if this page was not used before - clean it */
|
||||
@ -2386,7 +2395,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
|
||||
Page page;
|
||||
|
||||
/* this page was not used as a move target, so must clean it */
|
||||
buf = ReadBuffer(onerel, (*curpage)->blkno);
|
||||
buf = ReadBufferWithStrategy(onerel,
|
||||
(*curpage)->blkno,
|
||||
vac_strategy);
|
||||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||
page = BufferGetPage(buf);
|
||||
if (!PageIsEmpty(page))
|
||||
@ -2470,7 +2481,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
|
||||
int uncnt;
|
||||
int num_tuples = 0;
|
||||
|
||||
buf = ReadBuffer(onerel, vacpage->blkno);
|
||||
buf = ReadBufferWithStrategy(onerel, vacpage->blkno, vac_strategy);
|
||||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||
page = BufferGetPage(buf);
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
@ -2859,7 +2870,7 @@ update_hint_bits(Relation rel, VacPageList fraged_pages, int num_fraged_pages,
|
||||
break; /* no need to scan any further */
|
||||
if ((*curpage)->offsets_used == 0)
|
||||
continue; /* this page was never used as a move dest */
|
||||
buf = ReadBuffer(rel, (*curpage)->blkno);
|
||||
buf = ReadBufferWithStrategy(rel, (*curpage)->blkno, vac_strategy);
|
||||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||
page = BufferGetPage(buf);
|
||||
max_offset = PageGetMaxOffsetNumber(page);
|
||||
@ -2925,7 +2936,9 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
|
||||
|
||||
if ((*vacpage)->offsets_free > 0)
|
||||
{
|
||||
buf = ReadBuffer(onerel, (*vacpage)->blkno);
|
||||
buf = ReadBufferWithStrategy(onerel,
|
||||
(*vacpage)->blkno,
|
||||
vac_strategy);
|
||||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||
vacuum_page(onerel, buf, *vacpage);
|
||||
UnlockReleaseBuffer(buf);
|
||||
@ -3012,6 +3025,7 @@ scan_index(Relation indrel, double num_tuples)
|
||||
ivinfo.vacuum_full = true;
|
||||
ivinfo.message_level = elevel;
|
||||
ivinfo.num_heap_tuples = num_tuples;
|
||||
ivinfo.strategy = vac_strategy;
|
||||
|
||||
stats = index_vacuum_cleanup(&ivinfo, NULL);
|
||||
|
||||
@ -3077,6 +3091,7 @@ vacuum_index(VacPageList vacpagelist, Relation indrel,
|
||||
ivinfo.vacuum_full = true;
|
||||
ivinfo.message_level = elevel;
|
||||
ivinfo.num_heap_tuples = num_tuples + keep_tuples;
|
||||
ivinfo.strategy = vac_strategy;
|
||||
|
||||
/* Do bulk deletion */
|
||||
stats = index_bulk_delete(&ivinfo, NULL, tid_reaped, (void *) vacpagelist);
|
||||
|
@ -36,7 +36,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.89 2007/05/17 15:28:29 alvherre Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.90 2007/05/30 20:11:57 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -96,11 +96,14 @@ typedef struct LVRelStats
|
||||
} LVRelStats;
|
||||
|
||||
|
||||
/* A few variables that don't seem worth passing around as parameters */
|
||||
static int elevel = -1;
|
||||
|
||||
static TransactionId OldestXmin;
|
||||
static TransactionId FreezeLimit;
|
||||
|
||||
static BufferAccessStrategy vac_strategy;
|
||||
|
||||
|
||||
/* non-export function prototypes */
|
||||
static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
||||
@ -138,7 +141,8 @@ static int vac_cmp_page_spaces(const void *left, const void *right);
|
||||
* and locked the relation.
|
||||
*/
|
||||
void
|
||||
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
|
||||
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
|
||||
BufferAccessStrategy bstrategy)
|
||||
{
|
||||
LVRelStats *vacrelstats;
|
||||
Relation *Irel;
|
||||
@ -158,6 +162,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
|
||||
else
|
||||
elevel = DEBUG2;
|
||||
|
||||
vac_strategy = bstrategy;
|
||||
|
||||
vacuum_set_xid_limits(vacstmt->freeze_min_age, onerel->rd_rel->relisshared,
|
||||
&OldestXmin, &FreezeLimit);
|
||||
|
||||
@ -318,7 +324,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
|
||||
vacrelstats->num_index_scans++;
|
||||
}
|
||||
|
||||
buf = ReadBuffer(onerel, blkno);
|
||||
buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
|
||||
|
||||
/* Initially, we only need shared access to the buffer */
|
||||
LockBuffer(buf, BUFFER_LOCK_SHARE);
|
||||
@ -586,7 +592,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
|
||||
vacuum_delay_point();
|
||||
|
||||
tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
|
||||
buf = ReadBuffer(onerel, tblk);
|
||||
buf = ReadBufferWithStrategy(onerel, tblk, vac_strategy);
|
||||
LockBufferForCleanup(buf);
|
||||
tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats);
|
||||
/* Now that we've compacted the page, record its available space */
|
||||
@ -684,6 +690,7 @@ lazy_vacuum_index(Relation indrel,
|
||||
ivinfo.message_level = elevel;
|
||||
/* We don't yet know rel_tuples, so pass -1 */
|
||||
ivinfo.num_heap_tuples = -1;
|
||||
ivinfo.strategy = vac_strategy;
|
||||
|
||||
/* Do bulk deletion */
|
||||
*stats = index_bulk_delete(&ivinfo, *stats,
|
||||
@ -713,6 +720,7 @@ lazy_cleanup_index(Relation indrel,
|
||||
ivinfo.vacuum_full = false;
|
||||
ivinfo.message_level = elevel;
|
||||
ivinfo.num_heap_tuples = vacrelstats->rel_tuples;
|
||||
ivinfo.strategy = vac_strategy;
|
||||
|
||||
stats = index_vacuum_cleanup(&ivinfo, stats);
|
||||
|
||||
@ -869,7 +877,7 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
|
||||
|
||||
blkno--;
|
||||
|
||||
buf = ReadBuffer(onerel, blkno);
|
||||
buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
|
||||
|
||||
/* In this phase we only need shared access to the buffer */
|
||||
LockBuffer(buf, BUFFER_LOCK_SHARE);
|
||||
|
Reference in New Issue
Block a user