mirror of
https://github.com/postgres/postgres.git
synced 2025-06-16 06:01:02 +03:00
Use streaming read I/O in VACUUM's first phase
Make vacuum's first phase, which prunes and freezes tuples and records dead TIDs, use the read stream API by by converting heap_vac_scan_next_block() to a read stream callback. Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com> Reviewed-by: Thomas Munro <thomas.munro@gmail.com> Discussion: https://postgr.es/m/CAAKRu_aLwANZpxHc0tC-6OT0OQT4TftDGkKAO5yigMUOv_Tcsw%40mail.gmail.com
This commit is contained in:
@ -153,6 +153,7 @@
|
|||||||
#include "storage/bufmgr.h"
|
#include "storage/bufmgr.h"
|
||||||
#include "storage/freespace.h"
|
#include "storage/freespace.h"
|
||||||
#include "storage/lmgr.h"
|
#include "storage/lmgr.h"
|
||||||
|
#include "storage/read_stream.h"
|
||||||
#include "utils/lsyscache.h"
|
#include "utils/lsyscache.h"
|
||||||
#include "utils/pg_rusage.h"
|
#include "utils/pg_rusage.h"
|
||||||
#include "utils/timestamp.h"
|
#include "utils/timestamp.h"
|
||||||
@ -423,8 +424,9 @@ typedef struct LVSavedErrInfo
|
|||||||
static void lazy_scan_heap(LVRelState *vacrel);
|
static void lazy_scan_heap(LVRelState *vacrel);
|
||||||
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
|
static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
|
||||||
VacuumParams *params);
|
VacuumParams *params);
|
||||||
static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
|
static BlockNumber heap_vac_scan_next_block(ReadStream *stream,
|
||||||
uint8 *blk_info);
|
void *callback_private_data,
|
||||||
|
void *per_buffer_data);
|
||||||
static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
|
static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
|
||||||
static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
|
static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
|
||||||
BlockNumber blkno, Page page,
|
BlockNumber blkno, Page page,
|
||||||
@ -1174,10 +1176,11 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
|
|||||||
static void
|
static void
|
||||||
lazy_scan_heap(LVRelState *vacrel)
|
lazy_scan_heap(LVRelState *vacrel)
|
||||||
{
|
{
|
||||||
|
ReadStream *stream;
|
||||||
BlockNumber rel_pages = vacrel->rel_pages,
|
BlockNumber rel_pages = vacrel->rel_pages,
|
||||||
blkno,
|
blkno = 0,
|
||||||
next_fsm_block_to_vacuum = 0;
|
next_fsm_block_to_vacuum = 0;
|
||||||
uint8 blk_info = 0;
|
void *per_buffer_data = NULL;
|
||||||
BlockNumber orig_eager_scan_success_limit =
|
BlockNumber orig_eager_scan_success_limit =
|
||||||
vacrel->eager_scan_remaining_successes; /* for logging */
|
vacrel->eager_scan_remaining_successes; /* for logging */
|
||||||
Buffer vmbuffer = InvalidBuffer;
|
Buffer vmbuffer = InvalidBuffer;
|
||||||
@ -1201,23 +1204,24 @@ lazy_scan_heap(LVRelState *vacrel)
|
|||||||
vacrel->next_unskippable_eager_scanned = false;
|
vacrel->next_unskippable_eager_scanned = false;
|
||||||
vacrel->next_unskippable_vmbuffer = InvalidBuffer;
|
vacrel->next_unskippable_vmbuffer = InvalidBuffer;
|
||||||
|
|
||||||
while (heap_vac_scan_next_block(vacrel, &blkno, &blk_info))
|
/* Set up the read stream for vacuum's first pass through the heap */
|
||||||
|
stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE,
|
||||||
|
vacrel->bstrategy,
|
||||||
|
vacrel->rel,
|
||||||
|
MAIN_FORKNUM,
|
||||||
|
heap_vac_scan_next_block,
|
||||||
|
vacrel,
|
||||||
|
sizeof(uint8));
|
||||||
|
|
||||||
|
while (true)
|
||||||
{
|
{
|
||||||
Buffer buf;
|
Buffer buf;
|
||||||
Page page;
|
Page page;
|
||||||
|
uint8 blk_info = 0;
|
||||||
bool has_lpdead_items;
|
bool has_lpdead_items;
|
||||||
bool vm_page_frozen = false;
|
bool vm_page_frozen = false;
|
||||||
bool got_cleanup_lock = false;
|
bool got_cleanup_lock = false;
|
||||||
|
|
||||||
vacrel->scanned_pages++;
|
|
||||||
if (blk_info & VAC_BLK_WAS_EAGER_SCANNED)
|
|
||||||
vacrel->eager_scanned_pages++;
|
|
||||||
|
|
||||||
/* Report as block scanned, update error traceback information */
|
|
||||||
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
|
|
||||||
update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
|
|
||||||
blkno, InvalidOffsetNumber);
|
|
||||||
|
|
||||||
vacuum_delay_point(false);
|
vacuum_delay_point(false);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1229,7 +1233,8 @@ lazy_scan_heap(LVRelState *vacrel)
|
|||||||
* one-pass strategy, and the two-pass strategy with the index_cleanup
|
* one-pass strategy, and the two-pass strategy with the index_cleanup
|
||||||
* param set to 'off'.
|
* param set to 'off'.
|
||||||
*/
|
*/
|
||||||
if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
|
if (vacrel->scanned_pages > 0 &&
|
||||||
|
vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0)
|
||||||
lazy_check_wraparound_failsafe(vacrel);
|
lazy_check_wraparound_failsafe(vacrel);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1258,10 +1263,11 @@ lazy_scan_heap(LVRelState *vacrel)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Vacuum the Free Space Map to make newly-freed space visible on
|
* Vacuum the Free Space Map to make newly-freed space visible on
|
||||||
* upper-level FSM pages. Note we have not yet processed blkno.
|
* upper-level FSM pages. Note that blkno is the previously
|
||||||
|
* processed block.
|
||||||
*/
|
*/
|
||||||
FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
|
FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum,
|
||||||
blkno);
|
blkno + 1);
|
||||||
next_fsm_block_to_vacuum = blkno;
|
next_fsm_block_to_vacuum = blkno;
|
||||||
|
|
||||||
/* Report that we are once again scanning the heap */
|
/* Report that we are once again scanning the heap */
|
||||||
@ -1269,6 +1275,26 @@ lazy_scan_heap(LVRelState *vacrel)
|
|||||||
PROGRESS_VACUUM_PHASE_SCAN_HEAP);
|
PROGRESS_VACUUM_PHASE_SCAN_HEAP);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
buf = read_stream_next_buffer(stream, &per_buffer_data);
|
||||||
|
|
||||||
|
/* The relation is exhausted. */
|
||||||
|
if (!BufferIsValid(buf))
|
||||||
|
break;
|
||||||
|
|
||||||
|
blk_info = *((uint8 *) per_buffer_data);
|
||||||
|
CheckBufferIsPinnedOnce(buf);
|
||||||
|
page = BufferGetPage(buf);
|
||||||
|
blkno = BufferGetBlockNumber(buf);
|
||||||
|
|
||||||
|
vacrel->scanned_pages++;
|
||||||
|
if (blk_info & VAC_BLK_WAS_EAGER_SCANNED)
|
||||||
|
vacrel->eager_scanned_pages++;
|
||||||
|
|
||||||
|
/* Report as block scanned, update error traceback information */
|
||||||
|
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
|
||||||
|
update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP,
|
||||||
|
blkno, InvalidOffsetNumber);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Pin the visibility map page in case we need to mark the page
|
* Pin the visibility map page in case we need to mark the page
|
||||||
* all-visible. In most cases this will be very cheap, because we'll
|
* all-visible. In most cases this will be very cheap, because we'll
|
||||||
@ -1276,10 +1302,6 @@ lazy_scan_heap(LVRelState *vacrel)
|
|||||||
*/
|
*/
|
||||||
visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
|
visibilitymap_pin(vacrel->rel, blkno, &vmbuffer);
|
||||||
|
|
||||||
buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
|
|
||||||
vacrel->bstrategy);
|
|
||||||
page = BufferGetPage(buf);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We need a buffer cleanup lock to prune HOT chains and defragment
|
* We need a buffer cleanup lock to prune HOT chains and defragment
|
||||||
* the page in lazy_scan_prune. But when it's not possible to acquire
|
* the page in lazy_scan_prune. But when it's not possible to acquire
|
||||||
@ -1439,8 +1461,12 @@ lazy_scan_heap(LVRelState *vacrel)
|
|||||||
if (BufferIsValid(vmbuffer))
|
if (BufferIsValid(vmbuffer))
|
||||||
ReleaseBuffer(vmbuffer);
|
ReleaseBuffer(vmbuffer);
|
||||||
|
|
||||||
/* report that everything is now scanned */
|
/*
|
||||||
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
|
* Report that everything is now scanned. We never skip scanning the last
|
||||||
|
* block in the relation, so we can pass rel_pages here.
|
||||||
|
*/
|
||||||
|
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED,
|
||||||
|
rel_pages);
|
||||||
|
|
||||||
/* now we can compute the new value for pg_class.reltuples */
|
/* now we can compute the new value for pg_class.reltuples */
|
||||||
vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
|
vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages,
|
||||||
@ -1455,6 +1481,8 @@ lazy_scan_heap(LVRelState *vacrel)
|
|||||||
Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
|
Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
|
||||||
vacrel->missed_dead_tuples;
|
vacrel->missed_dead_tuples;
|
||||||
|
|
||||||
|
read_stream_end(stream);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Do index vacuuming (call each index's ambulkdelete routine), then do
|
* Do index vacuuming (call each index's ambulkdelete routine), then do
|
||||||
* related heap vacuuming
|
* related heap vacuuming
|
||||||
@ -1465,12 +1493,14 @@ lazy_scan_heap(LVRelState *vacrel)
|
|||||||
/*
|
/*
|
||||||
* Vacuum the remainder of the Free Space Map. We must do this whether or
|
* Vacuum the remainder of the Free Space Map. We must do this whether or
|
||||||
* not there were indexes, and whether or not we bypassed index vacuuming.
|
* not there were indexes, and whether or not we bypassed index vacuuming.
|
||||||
|
* We can pass rel_pages here because we never skip scanning the last
|
||||||
|
* block of the relation.
|
||||||
*/
|
*/
|
||||||
if (blkno > next_fsm_block_to_vacuum)
|
if (rel_pages > next_fsm_block_to_vacuum)
|
||||||
FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno);
|
FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages);
|
||||||
|
|
||||||
/* report all blocks vacuumed */
|
/* report all blocks vacuumed */
|
||||||
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno);
|
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages);
|
||||||
|
|
||||||
/* Do final index cleanup (call each index's amvacuumcleanup routine) */
|
/* Do final index cleanup (call each index's amvacuumcleanup routine) */
|
||||||
if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
|
if (vacrel->nindexes > 0 && vacrel->do_index_cleanup)
|
||||||
@ -1478,36 +1508,41 @@ lazy_scan_heap(LVRelState *vacrel)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* heap_vac_scan_next_block() -- get next block for vacuum to process
|
* heap_vac_scan_next_block() -- read stream callback to get the next block
|
||||||
|
* for vacuum to process
|
||||||
*
|
*
|
||||||
* lazy_scan_heap() calls here every time it needs to get the next block to
|
* Every time lazy_scan_heap() needs a new block to process during its first
|
||||||
* prune and vacuum. The function uses the visibility map, vacuum options,
|
* phase, it invokes read_stream_next_buffer() with a stream set up to call
|
||||||
* and various thresholds to skip blocks which do not need to be processed and
|
* heap_vac_scan_next_block() to get the next block.
|
||||||
* sets blkno to the next block to process.
|
|
||||||
*
|
*
|
||||||
* The block number of the next block to process is set in *blkno and its
|
* heap_vac_scan_next_block() uses the visibility map, vacuum options, and
|
||||||
* visibility status and whether or not it was eager scanned is set in
|
* various thresholds to skip blocks which do not need to be processed and
|
||||||
* *blk_info.
|
* returns the next block to process or InvalidBlockNumber if there are no
|
||||||
|
* remaining blocks.
|
||||||
*
|
*
|
||||||
* The return value is false if there are no further blocks to process.
|
* The visibility status of the next block to process and whether or not it
|
||||||
|
* was eager scanned is set in the per_buffer_data.
|
||||||
*
|
*
|
||||||
* vacrel is an in/out parameter here. Vacuum options and information about
|
* callback_private_data contains a reference to the LVRelState, passed to the
|
||||||
* the relation are read. vacrel->skippedallvis is set if we skip a block
|
* read stream API during stream setup. The LVRelState is an in/out parameter
|
||||||
* that's all-visible but not all-frozen, to ensure that we don't update
|
* here (locally named `vacrel`). Vacuum options and information about the
|
||||||
* relfrozenxid in that case. vacrel also holds information about the next
|
* relation are read from it. vacrel->skippedallvis is set if we skip a block
|
||||||
* unskippable block, as bookkeeping for this function.
|
* that's all-visible but not all-frozen (to ensure that we don't update
|
||||||
|
* relfrozenxid in that case). vacrel also holds information about the next
|
||||||
|
* unskippable block -- as bookkeeping for this function.
|
||||||
*/
|
*/
|
||||||
static bool
|
static BlockNumber
|
||||||
heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
|
heap_vac_scan_next_block(ReadStream *stream,
|
||||||
uint8 *blk_info)
|
void *callback_private_data,
|
||||||
|
void *per_buffer_data)
|
||||||
{
|
{
|
||||||
BlockNumber next_block;
|
BlockNumber next_block;
|
||||||
|
LVRelState *vacrel = callback_private_data;
|
||||||
|
uint8 blk_info = 0;
|
||||||
|
|
||||||
/* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
|
/* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
|
||||||
next_block = vacrel->current_block + 1;
|
next_block = vacrel->current_block + 1;
|
||||||
|
|
||||||
*blk_info = 0;
|
|
||||||
|
|
||||||
/* Have we reached the end of the relation? */
|
/* Have we reached the end of the relation? */
|
||||||
if (next_block >= vacrel->rel_pages)
|
if (next_block >= vacrel->rel_pages)
|
||||||
{
|
{
|
||||||
@ -1516,8 +1551,7 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
|
|||||||
ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
|
ReleaseBuffer(vacrel->next_unskippable_vmbuffer);
|
||||||
vacrel->next_unskippable_vmbuffer = InvalidBuffer;
|
vacrel->next_unskippable_vmbuffer = InvalidBuffer;
|
||||||
}
|
}
|
||||||
*blkno = vacrel->rel_pages;
|
return InvalidBlockNumber;
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -1566,9 +1600,10 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
|
|||||||
* but chose not to. We know that they are all-visible in the VM,
|
* but chose not to. We know that they are all-visible in the VM,
|
||||||
* otherwise they would've been unskippable.
|
* otherwise they would've been unskippable.
|
||||||
*/
|
*/
|
||||||
*blkno = vacrel->current_block = next_block;
|
vacrel->current_block = next_block;
|
||||||
*blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
|
blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
|
||||||
return true;
|
*((uint8 *) per_buffer_data) = blk_info;
|
||||||
|
return vacrel->current_block;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1578,12 +1613,13 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
|
|||||||
*/
|
*/
|
||||||
Assert(next_block == vacrel->next_unskippable_block);
|
Assert(next_block == vacrel->next_unskippable_block);
|
||||||
|
|
||||||
*blkno = vacrel->current_block = next_block;
|
vacrel->current_block = next_block;
|
||||||
if (vacrel->next_unskippable_allvis)
|
if (vacrel->next_unskippable_allvis)
|
||||||
*blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
|
blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM;
|
||||||
if (vacrel->next_unskippable_eager_scanned)
|
if (vacrel->next_unskippable_eager_scanned)
|
||||||
*blk_info |= VAC_BLK_WAS_EAGER_SCANNED;
|
blk_info |= VAC_BLK_WAS_EAGER_SCANNED;
|
||||||
return true;
|
*((uint8 *) per_buffer_data) = blk_info;
|
||||||
|
return vacrel->current_block;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user