1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-16 06:01:02 +03:00

Use streaming read I/O in VACUUM's first phase

Make vacuum's first phase, which prunes and freezes tuples and records
dead TIDs, use the read stream API by by converting
heap_vac_scan_next_block() to a read stream callback.

Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com>
Reviewed-by: Thomas Munro <thomas.munro@gmail.com>
Discussion: https://postgr.es/m/CAAKRu_aLwANZpxHc0tC-6OT0OQT4TftDGkKAO5yigMUOv_Tcsw%40mail.gmail.com
This commit is contained in:
Melanie Plageman
2025-02-14 12:56:57 -05:00
parent 32acad7d1d
commit 9256822608

View File

@ -153,6 +153,7 @@
#include "storage/bufmgr.h"
#include "storage/freespace.h"
#include "storage/lmgr.h"
#include "storage/read_stream.h"
#include "utils/lsyscache.h"
#include "utils/pg_rusage.h"
#include "utils/timestamp.h"
@ -423,8 +424,9 @@ typedef struct LVSavedErrInfo
static void lazy_scan_heap(LVRelState *vacrel);
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
VacuumParams *params);
static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
uint8 *blk_info);
static BlockNumber heap_vac_scan_next_block(ReadStream *stream,
void *callback_private_data,
void *per_buffer_data);
static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
BlockNumber blkno, Page page,
@ -1174,10 +1176,11 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
static void
lazy_scan_heap(LVRelState *vacrel)
{
ReadStream *stream;
BlockNumber rel_pages = vacrel->rel_pages,
blkno,
blkno = 0,
next_fsm_block_to_vacuum = 0;
uint8 blk_info = 0;
void *per_buffer_data = NULL;
BlockNumber orig_eager_scan_success_limit =
vacrel->eager_scan_remaining_successes; /* for logging */
Buffer vmbuffer = InvalidBuffer;
@ -1201,23 +1204,24 @@ lazy_scan_heap(LVRelState *vacrel)
vacrel->next_unskippable_eager_scanned = false;
vacrel->next_unskippable_vmbuffer = InvalidBuffer;
while (heap_vac_scan_next_block(vacrel, &blkno, &blk_info))
/* Set up the read stream for vacuum's first pass through the heap */
stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
vacrel->bstrategy,
vacrel->rel,
MAIN_FORKNUM,
heap_vac_scan_next_block,
vacrel,
sizeof(uint8));
while (true)
{
Buffer buf;
Page page;
uint8 blk_info = 0;
bool has_lpdead_items;
bool vm_page_frozen = false;
bool got_cleanup_lock = false;
vacrel->scanned_pages++;
if (blk_info & VAC_BLK_WAS_EAGER_SCANNED)
vacrel->eager_scanned_pages++;
/* Report as block scanned, update error traceback information */
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
blkno, InvalidOffsetNumber);
vacuum_delay_point(false);
/*
@ -1229,7 +1233,8 @@ lazy_scan_heap(LVRelState *vacrel)
* one-pass strategy, and the two-pass strategy with the index_cleanup
* param set to 'off'.
*/
if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
if (vacrel->scanned_pages > 0 &&
vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
lazy_check_wraparound_failsafe(vacrel);
/*
@ -1258,10 +1263,11 @@ lazy_scan_heap(LVRelState *vacrel)
/*
* Vacuum the Free Space Map to make newly-freed space visible on
* upper-level FSM pages. Note we have not yet processed blkno.
* upper-level FSM pages. Note that blkno is the previously
* processed block.
*/
FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
blkno);
blkno + 1);
next_fsm_block_to_vacuum = blkno;
/* Report that we are once again scanning the heap */
@ -1269,6 +1275,26 @@ lazy_scan_heap(LVRelState *vacrel)
PROGRESS_VACUUM_PHASE_SCAN_HEAP);
}
buf = read_stream_next_buffer(stream, &per_buffer_data);
/* The relation is exhausted. */
if (!BufferIsValid(buf))
break;
blk_info = *((uint8 *) per_buffer_data);
CheckBufferIsPinnedOnce(buf);
page = BufferGetPage(buf);
blkno = BufferGetBlockNumber(buf);
vacrel->scanned_pages++;
if (blk_info & VAC_BLK_WAS_EAGER_SCANNED)
vacrel->eager_scanned_pages++;
/* Report as block scanned, update error traceback information */
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
blkno, InvalidOffsetNumber);
/*
* Pin the visibility map page in case we need to mark the page
* all-visible. In most cases this will be very cheap, because we'll
@ -1276,10 +1302,6 @@ lazy_scan_heap(LVRelState *vacrel)
*/
visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
vacrel->bstrategy);
page = BufferGetPage(buf);
/*
* We need a buffer cleanup lock to prune HOT chains and defragment
* the page in lazy_scan_prune. But when it's not possible to acquire
@ -1439,8 +1461,12 @@ lazy_scan_heap(LVRelState *vacrel)
if (BufferIsValid(vmbuffer))
ReleaseBuffer(vmbuffer);
/* report that everything is now scanned */
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
/*
* Report that everything is now scanned. We never skip scanning the last
* block in the relation, so we can pass rel_pages here.
*/
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED,
rel_pages);
/* now we can compute the new value for pg_class.reltuples */
vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
@ -1455,6 +1481,8 @@ lazy_scan_heap(LVRelState *vacrel)
Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
vacrel->missed_dead_tuples;
read_stream_end(stream);
/*
* Do index vacuuming (call each index's ambulkdelete routine), then do
* related heap vacuuming
@ -1465,12 +1493,14 @@ lazy_scan_heap(LVRelState *vacrel)
/*
* Vacuum the remainder of the Free Space Map. We must do this whether or
* not there were indexes, and whether or not we bypassed index vacuuming.
* We can pass rel_pages here because we never skip scanning the last
* block of the relation.
*/
if (blkno > next_fsm_block_to_vacuum)
FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno);
if (rel_pages > next_fsm_block_to_vacuum)
FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages);
/* report all blocks vacuumed */
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages);
/* Do final index cleanup (call each index's amvacuumcleanup routine) */
if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
@ -1478,36 +1508,41 @@ lazy_scan_heap(LVRelState *vacrel)
}
/*
* heap_vac_scan_next_block() -- get next block for vacuum to process
* heap_vac_scan_next_block() -- read stream callback to get the next block
* for vacuum to process
*
* lazy_scan_heap() calls here every time it needs to get the next block to
* prune and vacuum. The function uses the visibility map, vacuum options,
* and various thresholds to skip blocks which do not need to be processed and
* sets blkno to the next block to process.
* Every time lazy_scan_heap() needs a new block to process during its first
* phase, it invokes read_stream_next_buffer() with a stream set up to call
* heap_vac_scan_next_block() to get the next block.
*
* The block number of the next block to process is set in *blkno and its
* visibility status and whether or not it was eager scanned is set in
* *blk_info.
* heap_vac_scan_next_block() uses the visibility map, vacuum options, and
* various thresholds to skip blocks which do not need to be processed and
* returns the next block to process or InvalidBlockNumber if there are no
* remaining blocks.
*
* The return value is false if there are no further blocks to process.
* The visibility status of the next block to process and whether or not it
* was eager scanned is set in the per_buffer_data.
*
* vacrel is an in/out parameter here. Vacuum options and information about
* the relation are read. vacrel->skippedallvis is set if we skip a block
* that's all-visible but not all-frozen, to ensure that we don't update
* relfrozenxid in that case. vacrel also holds information about the next
* unskippable block, as bookkeeping for this function.
* callback_private_data contains a reference to the LVRelState, passed to the
* read stream API during stream setup. The LVRelState is an in/out parameter
* here (locally named `vacrel`). Vacuum options and information about the
* relation are read from it. vacrel->skippedallvis is set if we skip a block
* that's all-visible but not all-frozen (to ensure that we don't update
* relfrozenxid in that case). vacrel also holds information about the next
* unskippable block -- as bookkeeping for this function.
*/
static bool
heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
uint8 *blk_info)
static BlockNumber
heap_vac_scan_next_block(ReadStream *stream,
void *callback_private_data,
void *per_buffer_data)
{
BlockNumber next_block;
LVRelState *vacrel = callback_private_data;
uint8 blk_info = 0;
/* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
next_block = vacrel->current_block + 1;
*blk_info = 0;
/* Have we reached the end of the relation? */
if (next_block >= vacrel->rel_pages)
{
@ -1516,8 +1551,7 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
vacrel->next_unskippable_vmbuffer = InvalidBuffer;
}
*blkno = vacrel->rel_pages;
return false;
return InvalidBlockNumber;
}
/*
@ -1566,9 +1600,10 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
* but chose not to. We know that they are all-visible in the VM,
* otherwise they would've been unskippable.
*/
*blkno = vacrel->current_block = next_block;
*blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
return true;
vacrel->current_block = next_block;
blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
*((uint8 *) per_buffer_data) = blk_info;
return vacrel->current_block;
}
else
{
@ -1578,12 +1613,13 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
*/
Assert(next_block == vacrel->next_unskippable_block);
*blkno = vacrel->current_block = next_block;
vacrel->current_block = next_block;
if (vacrel->next_unskippable_allvis)
*blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
if (vacrel->next_unskippable_eager_scanned)
*blk_info |= VAC_BLK_WAS_EAGER_SCANNED;
return true;
blk_info |= VAC_BLK_WAS_EAGER_SCANNED;
*((uint8 *) per_buffer_data) = blk_info;
return vacrel->current_block;
}
}