1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-16 06:01:02 +03:00

Use streaming read I/O in VACUUM's first phase

Make vacuum's first phase, which prunes and freezes tuples and records
dead TIDs, use the read stream API by by converting
heap_vac_scan_next_block() to a read stream callback.

Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com>
Reviewed-by: Thomas Munro <thomas.munro@gmail.com>
Discussion: https://postgr.es/m/CAAKRu_aLwANZpxHc0tC-6OT0OQT4TftDGkKAO5yigMUOv_Tcsw%40mail.gmail.com
This commit is contained in:
Melanie Plageman
2025-02-14 12:56:57 -05:00
parent 32acad7d1d
commit 9256822608

View File

@ -153,6 +153,7 @@
#include "storage/bufmgr.h" #include "storage/bufmgr.h"
#include "storage/freespace.h" #include "storage/freespace.h"
#include "storage/lmgr.h" #include "storage/lmgr.h"
#include "storage/read_stream.h"
#include "utils/lsyscache.h" #include "utils/lsyscache.h"
#include "utils/pg_rusage.h" #include "utils/pg_rusage.h"
#include "utils/timestamp.h" #include "utils/timestamp.h"
@ -423,8 +424,9 @@ typedef struct LVSavedErrInfo
static void lazy_scan_heap(LVRelState *vacrel); static void lazy_scan_heap(LVRelState *vacrel);
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
VacuumParams *params); VacuumParams *params);
static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, static BlockNumber heap_vac_scan_next_block(ReadStream *stream,
uint8 *blk_info); void *callback_private_data,
void *per_buffer_data);
static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis); static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
BlockNumber blkno, Page page, BlockNumber blkno, Page page,
@ -1174,10 +1176,11 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
static void static void
lazy_scan_heap(LVRelState *vacrel) lazy_scan_heap(LVRelState *vacrel)
{ {
ReadStream *stream;
BlockNumber rel_pages = vacrel->rel_pages, BlockNumber rel_pages = vacrel->rel_pages,
blkno, blkno = 0,
next_fsm_block_to_vacuum = 0; next_fsm_block_to_vacuum = 0;
uint8 blk_info = 0; void *per_buffer_data = NULL;
BlockNumber orig_eager_scan_success_limit = BlockNumber orig_eager_scan_success_limit =
vacrel->eager_scan_remaining_successes; /* for logging */ vacrel->eager_scan_remaining_successes; /* for logging */
Buffer vmbuffer = InvalidBuffer; Buffer vmbuffer = InvalidBuffer;
@ -1201,23 +1204,24 @@ lazy_scan_heap(LVRelState *vacrel)
vacrel->next_unskippable_eager_scanned = false; vacrel->next_unskippable_eager_scanned = false;
vacrel->next_unskippable_vmbuffer = InvalidBuffer; vacrel->next_unskippable_vmbuffer = InvalidBuffer;
while (heap_vac_scan_next_block(vacrel, &blkno, &blk_info)) /* Set up the read stream for vacuum's first pass through the heap */
stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
vacrel->bstrategy,
vacrel->rel,
MAIN_FORKNUM,
heap_vac_scan_next_block,
vacrel,
sizeof(uint8));
while (true)
{ {
Buffer buf; Buffer buf;
Page page; Page page;
uint8 blk_info = 0;
bool has_lpdead_items; bool has_lpdead_items;
bool vm_page_frozen = false; bool vm_page_frozen = false;
bool got_cleanup_lock = false; bool got_cleanup_lock = false;
vacrel->scanned_pages++;
if (blk_info & VAC_BLK_WAS_EAGER_SCANNED)
vacrel->eager_scanned_pages++;
/* Report as block scanned, update error traceback information */
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
blkno, InvalidOffsetNumber);
vacuum_delay_point(false); vacuum_delay_point(false);
/* /*
@ -1229,7 +1233,8 @@ lazy_scan_heap(LVRelState *vacrel)
* one-pass strategy, and the two-pass strategy with the index_cleanup * one-pass strategy, and the two-pass strategy with the index_cleanup
* param set to 'off'. * param set to 'off'.
*/ */
if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0) if (vacrel->scanned_pages > 0 &&
vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
lazy_check_wraparound_failsafe(vacrel); lazy_check_wraparound_failsafe(vacrel);
/* /*
@ -1258,10 +1263,11 @@ lazy_scan_heap(LVRelState *vacrel)
/* /*
* Vacuum the Free Space Map to make newly-freed space visible on * Vacuum the Free Space Map to make newly-freed space visible on
* upper-level FSM pages. Note we have not yet processed blkno. * upper-level FSM pages. Note that blkno is the previously
* processed block.
*/ */
FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
blkno); blkno + 1);
next_fsm_block_to_vacuum = blkno; next_fsm_block_to_vacuum = blkno;
/* Report that we are once again scanning the heap */ /* Report that we are once again scanning the heap */
@ -1269,6 +1275,26 @@ lazy_scan_heap(LVRelState *vacrel)
PROGRESS_VACUUM_PHASE_SCAN_HEAP); PROGRESS_VACUUM_PHASE_SCAN_HEAP);
} }
buf = read_stream_next_buffer(stream, &per_buffer_data);
/* The relation is exhausted. */
if (!BufferIsValid(buf))
break;
blk_info = *((uint8 *) per_buffer_data);
CheckBufferIsPinnedOnce(buf);
page = BufferGetPage(buf);
blkno = BufferGetBlockNumber(buf);
vacrel->scanned_pages++;
if (blk_info & VAC_BLK_WAS_EAGER_SCANNED)
vacrel->eager_scanned_pages++;
/* Report as block scanned, update error traceback information */
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
blkno, InvalidOffsetNumber);
/* /*
* Pin the visibility map page in case we need to mark the page * Pin the visibility map page in case we need to mark the page
* all-visible. In most cases this will be very cheap, because we'll * all-visible. In most cases this will be very cheap, because we'll
@ -1276,10 +1302,6 @@ lazy_scan_heap(LVRelState *vacrel)
*/ */
visibilitymap_pin(vacrel->rel, blkno, &vmbuffer); visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
vacrel->bstrategy);
page = BufferGetPage(buf);
/* /*
* We need a buffer cleanup lock to prune HOT chains and defragment * We need a buffer cleanup lock to prune HOT chains and defragment
* the page in lazy_scan_prune. But when it's not possible to acquire * the page in lazy_scan_prune. But when it's not possible to acquire
@ -1439,8 +1461,12 @@ lazy_scan_heap(LVRelState *vacrel)
if (BufferIsValid(vmbuffer)) if (BufferIsValid(vmbuffer))
ReleaseBuffer(vmbuffer); ReleaseBuffer(vmbuffer);
/* report that everything is now scanned */ /*
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); * Report that everything is now scanned. We never skip scanning the last
* block in the relation, so we can pass rel_pages here.
*/
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED,
rel_pages);
/* now we can compute the new value for pg_class.reltuples */ /* now we can compute the new value for pg_class.reltuples */
vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages, vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
@ -1455,6 +1481,8 @@ lazy_scan_heap(LVRelState *vacrel)
Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples + Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
vacrel->missed_dead_tuples; vacrel->missed_dead_tuples;
read_stream_end(stream);
/* /*
* Do index vacuuming (call each index's ambulkdelete routine), then do * Do index vacuuming (call each index's ambulkdelete routine), then do
* related heap vacuuming * related heap vacuuming
@ -1465,12 +1493,14 @@ lazy_scan_heap(LVRelState *vacrel)
/* /*
* Vacuum the remainder of the Free Space Map. We must do this whether or * Vacuum the remainder of the Free Space Map. We must do this whether or
* not there were indexes, and whether or not we bypassed index vacuuming. * not there were indexes, and whether or not we bypassed index vacuuming.
* We can pass rel_pages here because we never skip scanning the last
* block of the relation.
*/ */
if (blkno > next_fsm_block_to_vacuum) if (rel_pages > next_fsm_block_to_vacuum)
FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno); FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages);
/* report all blocks vacuumed */ /* report all blocks vacuumed */
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno); pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages);
/* Do final index cleanup (call each index's amvacuumcleanup routine) */ /* Do final index cleanup (call each index's amvacuumcleanup routine) */
if (vacrel->nindexes > 0 && vacrel->do_index_cleanup) if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
@ -1478,36 +1508,41 @@ lazy_scan_heap(LVRelState *vacrel)
} }
/* /*
* heap_vac_scan_next_block() -- get next block for vacuum to process * heap_vac_scan_next_block() -- read stream callback to get the next block
* for vacuum to process
* *
* lazy_scan_heap() calls here every time it needs to get the next block to * Every time lazy_scan_heap() needs a new block to process during its first
* prune and vacuum. The function uses the visibility map, vacuum options, * phase, it invokes read_stream_next_buffer() with a stream set up to call
* and various thresholds to skip blocks which do not need to be processed and * heap_vac_scan_next_block() to get the next block.
* sets blkno to the next block to process.
* *
* The block number of the next block to process is set in *blkno and its * heap_vac_scan_next_block() uses the visibility map, vacuum options, and
* visibility status and whether or not it was eager scanned is set in * various thresholds to skip blocks which do not need to be processed and
* *blk_info. * returns the next block to process or InvalidBlockNumber if there are no
* remaining blocks.
* *
* The return value is false if there are no further blocks to process. * The visibility status of the next block to process and whether or not it
* was eager scanned is set in the per_buffer_data.
* *
* vacrel is an in/out parameter here. Vacuum options and information about * callback_private_data contains a reference to the LVRelState, passed to the
* the relation are read. vacrel->skippedallvis is set if we skip a block * read stream API during stream setup. The LVRelState is an in/out parameter
* that's all-visible but not all-frozen, to ensure that we don't update * here (locally named `vacrel`). Vacuum options and information about the
* relfrozenxid in that case. vacrel also holds information about the next * relation are read from it. vacrel->skippedallvis is set if we skip a block
* unskippable block, as bookkeeping for this function. * that's all-visible but not all-frozen (to ensure that we don't update
* relfrozenxid in that case). vacrel also holds information about the next
* unskippable block -- as bookkeeping for this function.
*/ */
static bool static BlockNumber
heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, heap_vac_scan_next_block(ReadStream *stream,
uint8 *blk_info) void *callback_private_data,
void *per_buffer_data)
{ {
BlockNumber next_block; BlockNumber next_block;
LVRelState *vacrel = callback_private_data;
uint8 blk_info = 0;
/* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */ /* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
next_block = vacrel->current_block + 1; next_block = vacrel->current_block + 1;
*blk_info = 0;
/* Have we reached the end of the relation? */ /* Have we reached the end of the relation? */
if (next_block >= vacrel->rel_pages) if (next_block >= vacrel->rel_pages)
{ {
@ -1516,8 +1551,7 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
ReleaseBuffer(vacrel->next_unskippable_vmbuffer); ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
vacrel->next_unskippable_vmbuffer = InvalidBuffer; vacrel->next_unskippable_vmbuffer = InvalidBuffer;
} }
*blkno = vacrel->rel_pages; return InvalidBlockNumber;
return false;
} }
/* /*
@ -1566,9 +1600,10 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
* but chose not to. We know that they are all-visible in the VM, * but chose not to. We know that they are all-visible in the VM,
* otherwise they would've been unskippable. * otherwise they would've been unskippable.
*/ */
*blkno = vacrel->current_block = next_block; vacrel->current_block = next_block;
*blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM; blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
return true; *((uint8 *) per_buffer_data) = blk_info;
return vacrel->current_block;
} }
else else
{ {
@ -1578,12 +1613,13 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
*/ */
Assert(next_block == vacrel->next_unskippable_block); Assert(next_block == vacrel->next_unskippable_block);
*blkno = vacrel->current_block = next_block; vacrel->current_block = next_block;
if (vacrel->next_unskippable_allvis) if (vacrel->next_unskippable_allvis)
*blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM; blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
if (vacrel->next_unskippable_eager_scanned) if (vacrel->next_unskippable_eager_scanned)
*blk_info |= VAC_BLK_WAS_EAGER_SCANNED; blk_info |= VAC_BLK_WAS_EAGER_SCANNED;
return true; *((uint8 *) per_buffer_data) = blk_info;
return vacrel->current_block;
} }
} }