1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-17 06:41:09 +03:00

Make large sequential scans and VACUUMs work in a limited-size "ring" of

buffers, rather than blowing out the whole shared-buffer arena.  Aside from
avoiding cache spoliation, this fixes the problem that VACUUM formerly tended
to cause a WAL flush for every page it modified, because we had it hacked to
use only a single buffer.  Those flushes will now occur only once per
ring-ful.  The exact ring size, and the threshold for seqscans to switch into
the ring usage pattern, remain under debate; but the infrastructure seems
done.  The key bit of infrastructure is a new optional BufferAccessStrategy
object that can be passed to ReadBuffer operations; this replaces the former
StrategyHintVacuum API.

This patch also changes the buffer usage-count methodology a bit: we now
advance usage_count when first pinning a buffer, rather than when last
unpinning it.  To preserve the behavior that a buffer's lifetime starts to
decrease when it's released, the clock sweep code is modified to not decrement
usage_count of pinned buffers.

Work not done in this commit: teach GiST and GIN indexes to use the vacuum
BufferAccessStrategy for vacuum-driven fetches.

Original patch by Simon, reworked by Heikki and again by Tom.
This commit is contained in:
Tom Lane
2007-05-30 20:12:03 +00:00
parent 0a6f2ee84d
commit d526575f89
24 changed files with 722 additions and 262 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.107 2007/04/30 03:23:48 tgl Exp $
* $PostgreSQL: pgsql/src/backend/commands/analyze.c,v 1.108 2007/05/30 20:11:56 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -63,10 +63,13 @@ typedef struct AnlIndexData
/* Default statistics target (GUC parameter) */
int default_statistics_target = 10;
/* A few variables that don't seem worth passing around as parameters */
static int elevel = -1;
static MemoryContext anl_context = NULL;
static BufferAccessStrategy vac_strategy;
static void BlockSampler_Init(BlockSampler bs, BlockNumber nblocks,
int samplesize);
@ -94,7 +97,8 @@ static bool std_typanalyze(VacAttrStats *stats);
* analyze_rel() -- analyze one relation
*/
void
analyze_rel(Oid relid, VacuumStmt *vacstmt)
analyze_rel(Oid relid, VacuumStmt *vacstmt,
BufferAccessStrategy bstrategy)
{
Relation onerel;
int attr_cnt,
@ -120,6 +124,8 @@ analyze_rel(Oid relid, VacuumStmt *vacstmt)
else
elevel = DEBUG2;
vac_strategy = bstrategy;
/*
* Use the current context for storing analysis info. vacuum.c ensures
* that this context will be cleared when I return, thus releasing the
@ -845,7 +851,7 @@ acquire_sample_rows(Relation onerel, HeapTuple *rows, int targrows,
* looking at it. We don't maintain a lock on the page, so tuples
* could get added to it, but we ignore such tuples.
*/
targbuffer = ReadBuffer(onerel, targblock);
targbuffer = ReadBufferWithStrategy(onerel, targblock, vac_strategy);
LockBuffer(targbuffer, BUFFER_LOCK_SHARE);
targpage = BufferGetPage(targbuffer);
maxoffset = PageGetMaxOffsetNumber(targpage);

View File

@ -13,7 +13,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.351 2007/05/17 15:28:29 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/commands/vacuum.c,v 1.352 2007/05/30 20:11:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -191,6 +191,7 @@ ExecContext_Finish(ExecContext ec)
*----------------------------------------------------------------------
*/
/* A few variables that don't seem worth passing around as parameters */
static MemoryContext vac_context = NULL;
static int elevel = -1;
@ -198,6 +199,8 @@ static int elevel = -1;
static TransactionId OldestXmin;
static TransactionId FreezeLimit;
static BufferAccessStrategy vac_strategy;
/* non-export function prototypes */
static List *get_rel_oids(List *relids, const RangeVar *vacrel,
@ -257,14 +260,18 @@ static Size PageGetFreeSpaceWithFillFactor(Relation relation, Page page);
* relation OIDs to be processed, and vacstmt->relation is ignored.
* (The non-NIL case is currently only used by autovacuum.)
*
* bstrategy is normally given as NULL, but in autovacuum it can be passed
* in to use the same buffer strategy object across multiple vacuum() calls.
*
* isTopLevel should be passed down from ProcessUtility.
*
* It is the caller's responsibility that both vacstmt and relids
* It is the caller's responsibility that vacstmt, relids, and bstrategy
* (if given) be allocated in a memory context that won't disappear
* at transaction commit.
*/
void
vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
vacuum(VacuumStmt *vacstmt, List *relids,
BufferAccessStrategy bstrategy, bool isTopLevel)
{
const char *stmttype = vacstmt->vacuum ? "VACUUM" : "ANALYZE";
volatile MemoryContext anl_context = NULL;
@ -319,6 +326,19 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
ALLOCSET_DEFAULT_INITSIZE,
ALLOCSET_DEFAULT_MAXSIZE);
/*
* If caller didn't give us a buffer strategy object, make one in the
* cross-transaction memory context.
*/
if (bstrategy == NULL)
{
MemoryContext old_context = MemoryContextSwitchTo(vac_context);
bstrategy = GetAccessStrategy(BAS_VACUUM);
MemoryContextSwitchTo(old_context);
}
vac_strategy = bstrategy;
/* Remember whether we are processing everything in the DB */
all_rels = (relids == NIL && vacstmt->relation == NULL);
@ -417,15 +437,7 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
else
old_context = MemoryContextSwitchTo(anl_context);
/*
* Tell the buffer replacement strategy that vacuum is causing
* the IO
*/
StrategyHintVacuum(true);
analyze_rel(relid, vacstmt);
StrategyHintVacuum(false);
analyze_rel(relid, vacstmt, vac_strategy);
if (use_own_xacts)
CommitTransactionCommand();
@ -441,8 +453,6 @@ vacuum(VacuumStmt *vacstmt, List *relids, bool isTopLevel)
{
/* Make sure cost accounting is turned off after error */
VacuumCostActive = false;
/* And reset buffer replacement strategy, too */
StrategyHintVacuum(false);
PG_RE_THROW();
}
PG_END_TRY();
@ -1084,21 +1094,13 @@ vacuum_rel(Oid relid, VacuumStmt *vacstmt, char expected_relkind)
*/
toast_relid = onerel->rd_rel->reltoastrelid;
/*
* Tell the cache replacement strategy that vacuum is causing all
* following IO
*/
StrategyHintVacuum(true);
/*
* Do the actual work --- either FULL or "lazy" vacuum
*/
if (vacstmt->full)
full_vacuum_rel(onerel, vacstmt);
else
lazy_vacuum_rel(onerel, vacstmt);
StrategyHintVacuum(false);
lazy_vacuum_rel(onerel, vacstmt, vac_strategy);
/* all done with this class, but hold lock until commit */
relation_close(onerel, NoLock);
@ -1290,7 +1292,7 @@ scan_heap(VRelStats *vacrelstats, Relation onerel,
vacuum_delay_point();
buf = ReadBuffer(onerel, blkno);
buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
page = BufferGetPage(buf);
/*
@ -1730,7 +1732,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
/*
* Process this page of relation.
*/
buf = ReadBuffer(onerel, blkno);
buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
page = BufferGetPage(buf);
vacpage->offsets_free = 0;
@ -1954,8 +1956,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
nextTid = tp.t_data->t_ctid;
priorXmax = HeapTupleHeaderGetXmax(tp.t_data);
/* assume block# is OK (see heap_fetch comments) */
nextBuf = ReadBuffer(onerel,
ItemPointerGetBlockNumber(&nextTid));
nextBuf = ReadBufferWithStrategy(onerel,
ItemPointerGetBlockNumber(&nextTid),
vac_strategy);
nextPage = BufferGetPage(nextBuf);
/* If bogus or unused slot, assume tp is end of chain */
nextOffnum = ItemPointerGetOffsetNumber(&nextTid);
@ -2091,8 +2094,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
break; /* out of check-all-items loop */
}
tp.t_self = vtlp->this_tid;
Pbuf = ReadBuffer(onerel,
ItemPointerGetBlockNumber(&(tp.t_self)));
Pbuf = ReadBufferWithStrategy(onerel,
ItemPointerGetBlockNumber(&(tp.t_self)),
vac_strategy);
Ppage = BufferGetPage(Pbuf);
Pitemid = PageGetItemId(Ppage,
ItemPointerGetOffsetNumber(&(tp.t_self)));
@ -2174,11 +2178,14 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
/* Get page to move from */
tuple.t_self = vtmove[ti].tid;
Cbuf = ReadBuffer(onerel,
ItemPointerGetBlockNumber(&(tuple.t_self)));
Cbuf = ReadBufferWithStrategy(onerel,
ItemPointerGetBlockNumber(&(tuple.t_self)),
vac_strategy);
/* Get page to move to */
dst_buffer = ReadBuffer(onerel, destvacpage->blkno);
dst_buffer = ReadBufferWithStrategy(onerel,
destvacpage->blkno,
vac_strategy);
LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
if (dst_buffer != Cbuf)
@ -2239,7 +2246,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
if (i == num_fraged_pages)
break; /* can't move item anywhere */
dst_vacpage = fraged_pages->pagedesc[i];
dst_buffer = ReadBuffer(onerel, dst_vacpage->blkno);
dst_buffer = ReadBufferWithStrategy(onerel,
dst_vacpage->blkno,
vac_strategy);
LockBuffer(dst_buffer, BUFFER_LOCK_EXCLUSIVE);
dst_page = BufferGetPage(dst_buffer);
/* if this page was not used before - clean it */
@ -2386,7 +2395,9 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
Page page;
/* this page was not used as a move target, so must clean it */
buf = ReadBuffer(onerel, (*curpage)->blkno);
buf = ReadBufferWithStrategy(onerel,
(*curpage)->blkno,
vac_strategy);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
page = BufferGetPage(buf);
if (!PageIsEmpty(page))
@ -2470,7 +2481,7 @@ repair_frag(VRelStats *vacrelstats, Relation onerel,
int uncnt;
int num_tuples = 0;
buf = ReadBuffer(onerel, vacpage->blkno);
buf = ReadBufferWithStrategy(onerel, vacpage->blkno, vac_strategy);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
page = BufferGetPage(buf);
maxoff = PageGetMaxOffsetNumber(page);
@ -2859,7 +2870,7 @@ update_hint_bits(Relation rel, VacPageList fraged_pages, int num_fraged_pages,
break; /* no need to scan any further */
if ((*curpage)->offsets_used == 0)
continue; /* this page was never used as a move dest */
buf = ReadBuffer(rel, (*curpage)->blkno);
buf = ReadBufferWithStrategy(rel, (*curpage)->blkno, vac_strategy);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
page = BufferGetPage(buf);
max_offset = PageGetMaxOffsetNumber(page);
@ -2925,7 +2936,9 @@ vacuum_heap(VRelStats *vacrelstats, Relation onerel, VacPageList vacuum_pages)
if ((*vacpage)->offsets_free > 0)
{
buf = ReadBuffer(onerel, (*vacpage)->blkno);
buf = ReadBufferWithStrategy(onerel,
(*vacpage)->blkno,
vac_strategy);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
vacuum_page(onerel, buf, *vacpage);
UnlockReleaseBuffer(buf);
@ -3012,6 +3025,7 @@ scan_index(Relation indrel, double num_tuples)
ivinfo.vacuum_full = true;
ivinfo.message_level = elevel;
ivinfo.num_heap_tuples = num_tuples;
ivinfo.strategy = vac_strategy;
stats = index_vacuum_cleanup(&ivinfo, NULL);
@ -3077,6 +3091,7 @@ vacuum_index(VacPageList vacpagelist, Relation indrel,
ivinfo.vacuum_full = true;
ivinfo.message_level = elevel;
ivinfo.num_heap_tuples = num_tuples + keep_tuples;
ivinfo.strategy = vac_strategy;
/* Do bulk deletion */
stats = index_bulk_delete(&ivinfo, NULL, tid_reaped, (void *) vacpagelist);

View File

@ -36,7 +36,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.89 2007/05/17 15:28:29 alvherre Exp $
* $PostgreSQL: pgsql/src/backend/commands/vacuumlazy.c,v 1.90 2007/05/30 20:11:57 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -96,11 +96,14 @@ typedef struct LVRelStats
} LVRelStats;
/* A few variables that don't seem worth passing around as parameters */
static int elevel = -1;
static TransactionId OldestXmin;
static TransactionId FreezeLimit;
static BufferAccessStrategy vac_strategy;
/* non-export function prototypes */
static void lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
@ -138,7 +141,8 @@ static int vac_cmp_page_spaces(const void *left, const void *right);
* and locked the relation.
*/
void
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt,
BufferAccessStrategy bstrategy)
{
LVRelStats *vacrelstats;
Relation *Irel;
@ -158,6 +162,8 @@ lazy_vacuum_rel(Relation onerel, VacuumStmt *vacstmt)
else
elevel = DEBUG2;
vac_strategy = bstrategy;
vacuum_set_xid_limits(vacstmt->freeze_min_age, onerel->rd_rel->relisshared,
&OldestXmin, &FreezeLimit);
@ -318,7 +324,7 @@ lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
vacrelstats->num_index_scans++;
}
buf = ReadBuffer(onerel, blkno);
buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
/* Initially, we only need shared access to the buffer */
LockBuffer(buf, BUFFER_LOCK_SHARE);
@ -586,7 +592,7 @@ lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats)
vacuum_delay_point();
tblk = ItemPointerGetBlockNumber(&vacrelstats->dead_tuples[tupindex]);
buf = ReadBuffer(onerel, tblk);
buf = ReadBufferWithStrategy(onerel, tblk, vac_strategy);
LockBufferForCleanup(buf);
tupindex = lazy_vacuum_page(onerel, tblk, buf, tupindex, vacrelstats);
/* Now that we've compacted the page, record its available space */
@ -684,6 +690,7 @@ lazy_vacuum_index(Relation indrel,
ivinfo.message_level = elevel;
/* We don't yet know rel_tuples, so pass -1 */
ivinfo.num_heap_tuples = -1;
ivinfo.strategy = vac_strategy;
/* Do bulk deletion */
*stats = index_bulk_delete(&ivinfo, *stats,
@ -713,6 +720,7 @@ lazy_cleanup_index(Relation indrel,
ivinfo.vacuum_full = false;
ivinfo.message_level = elevel;
ivinfo.num_heap_tuples = vacrelstats->rel_tuples;
ivinfo.strategy = vac_strategy;
stats = index_vacuum_cleanup(&ivinfo, stats);
@ -869,7 +877,7 @@ count_nondeletable_pages(Relation onerel, LVRelStats *vacrelstats)
blkno--;
buf = ReadBuffer(onerel, blkno);
buf = ReadBufferWithStrategy(onerel, blkno, vac_strategy);
/* In this phase we only need shared access to the buffer */
LockBuffer(buf, BUFFER_LOCK_SHARE);