mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Use streaming read I/O in VACUUM's first phase
Make vacuum's first phase, which prunes and freezes tuples and records dead TIDs, use the read stream API by by converting heap_vac_scan_next_block() to a read stream callback. Reviewed-by: Masahiko Sawada <sawada.mshk@gmail.com> Reviewed-by: Thomas Munro <thomas.munro@gmail.com> Discussion: https://postgr.es/m/CAAKRu_aLwANZpxHc0tC-6OT0OQT4TftDGkKAO5yigMUOv_Tcsw%40mail.gmail.com
This commit is contained in:
		| @@ -153,6 +153,7 @@ | |||||||
| #include "storage/bufmgr.h" | #include "storage/bufmgr.h" | ||||||
| #include "storage/freespace.h" | #include "storage/freespace.h" | ||||||
| #include "storage/lmgr.h" | #include "storage/lmgr.h" | ||||||
|  | #include "storage/read_stream.h" | ||||||
| #include "utils/lsyscache.h" | #include "utils/lsyscache.h" | ||||||
| #include "utils/pg_rusage.h" | #include "utils/pg_rusage.h" | ||||||
| #include "utils/timestamp.h" | #include "utils/timestamp.h" | ||||||
| @@ -423,8 +424,9 @@ typedef struct LVSavedErrInfo | |||||||
| static void lazy_scan_heap(LVRelState *vacrel); | static void lazy_scan_heap(LVRelState *vacrel); | ||||||
| static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, | static void heap_vacuum_eager_scan_setup(LVRelState *vacrel, | ||||||
| 										 VacuumParams *params); | 										 VacuumParams *params); | ||||||
| static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, | static BlockNumber heap_vac_scan_next_block(ReadStream *stream, | ||||||
| 									 uint8 *blk_info); | 											void *callback_private_data, | ||||||
|  | 											void *per_buffer_data); | ||||||
| static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis); | static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis); | ||||||
| static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, | static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, | ||||||
| 								   BlockNumber blkno, Page page, | 								   BlockNumber blkno, Page page, | ||||||
| @@ -1174,10 +1176,11 @@ heap_vacuum_rel(Relation rel, VacuumParams *params, | |||||||
| static void | static void | ||||||
| lazy_scan_heap(LVRelState *vacrel) | lazy_scan_heap(LVRelState *vacrel) | ||||||
| { | { | ||||||
|  | 	ReadStream *stream; | ||||||
| 	BlockNumber rel_pages = vacrel->rel_pages, | 	BlockNumber rel_pages = vacrel->rel_pages, | ||||||
| 				blkno, | 				blkno = 0, | ||||||
| 				next_fsm_block_to_vacuum = 0; | 				next_fsm_block_to_vacuum = 0; | ||||||
| 	uint8		blk_info = 0; | 	void	   *per_buffer_data = NULL; | ||||||
| 	BlockNumber orig_eager_scan_success_limit = | 	BlockNumber orig_eager_scan_success_limit = | ||||||
| 		vacrel->eager_scan_remaining_successes; /* for logging */ | 		vacrel->eager_scan_remaining_successes; /* for logging */ | ||||||
| 	Buffer		vmbuffer = InvalidBuffer; | 	Buffer		vmbuffer = InvalidBuffer; | ||||||
| @@ -1201,23 +1204,24 @@ lazy_scan_heap(LVRelState *vacrel) | |||||||
| 	vacrel->next_unskippable_eager_scanned = false; | 	vacrel->next_unskippable_eager_scanned = false; | ||||||
| 	vacrel->next_unskippable_vmbuffer = InvalidBuffer; | 	vacrel->next_unskippable_vmbuffer = InvalidBuffer; | ||||||
|  |  | ||||||
| 	while (heap_vac_scan_next_block(vacrel, &blkno, &blk_info)) | 	/* Set up the read stream for vacuum's first pass through the heap */ | ||||||
|  | 	stream = read_stream_begin_relation(READ_STREAM_MAINTENANCE, | ||||||
|  | 										vacrel->bstrategy, | ||||||
|  | 										vacrel->rel, | ||||||
|  | 										MAIN_FORKNUM, | ||||||
|  | 										heap_vac_scan_next_block, | ||||||
|  | 										vacrel, | ||||||
|  | 										sizeof(uint8)); | ||||||
|  |  | ||||||
|  | 	while (true) | ||||||
| 	{ | 	{ | ||||||
| 		Buffer		buf; | 		Buffer		buf; | ||||||
| 		Page		page; | 		Page		page; | ||||||
|  | 		uint8		blk_info = 0; | ||||||
| 		bool		has_lpdead_items; | 		bool		has_lpdead_items; | ||||||
| 		bool		vm_page_frozen = false; | 		bool		vm_page_frozen = false; | ||||||
| 		bool		got_cleanup_lock = false; | 		bool		got_cleanup_lock = false; | ||||||
|  |  | ||||||
| 		vacrel->scanned_pages++; |  | ||||||
| 		if (blk_info & VAC_BLK_WAS_EAGER_SCANNED) |  | ||||||
| 			vacrel->eager_scanned_pages++; |  | ||||||
|  |  | ||||||
| 		/* Report as block scanned, update error traceback information */ |  | ||||||
| 		pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); |  | ||||||
| 		update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP, |  | ||||||
| 								 blkno, InvalidOffsetNumber); |  | ||||||
|  |  | ||||||
| 		vacuum_delay_point(false); | 		vacuum_delay_point(false); | ||||||
|  |  | ||||||
| 		/* | 		/* | ||||||
| @@ -1229,7 +1233,8 @@ lazy_scan_heap(LVRelState *vacrel) | |||||||
| 		 * one-pass strategy, and the two-pass strategy with the index_cleanup | 		 * one-pass strategy, and the two-pass strategy with the index_cleanup | ||||||
| 		 * param set to 'off'. | 		 * param set to 'off'. | ||||||
| 		 */ | 		 */ | ||||||
| 		if (vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0) | 		if (vacrel->scanned_pages > 0 && | ||||||
|  | 			vacrel->scanned_pages % FAILSAFE_EVERY_PAGES == 0) | ||||||
| 			lazy_check_wraparound_failsafe(vacrel); | 			lazy_check_wraparound_failsafe(vacrel); | ||||||
|  |  | ||||||
| 		/* | 		/* | ||||||
| @@ -1258,10 +1263,11 @@ lazy_scan_heap(LVRelState *vacrel) | |||||||
|  |  | ||||||
| 			/* | 			/* | ||||||
| 			 * Vacuum the Free Space Map to make newly-freed space visible on | 			 * Vacuum the Free Space Map to make newly-freed space visible on | ||||||
| 			 * upper-level FSM pages.  Note we have not yet processed blkno. | 			 * upper-level FSM pages. Note that blkno is the previously | ||||||
|  | 			 * processed block. | ||||||
| 			 */ | 			 */ | ||||||
| 			FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, | 			FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, | ||||||
| 									blkno); | 									blkno + 1); | ||||||
| 			next_fsm_block_to_vacuum = blkno; | 			next_fsm_block_to_vacuum = blkno; | ||||||
|  |  | ||||||
| 			/* Report that we are once again scanning the heap */ | 			/* Report that we are once again scanning the heap */ | ||||||
| @@ -1269,6 +1275,26 @@ lazy_scan_heap(LVRelState *vacrel) | |||||||
| 										 PROGRESS_VACUUM_PHASE_SCAN_HEAP); | 										 PROGRESS_VACUUM_PHASE_SCAN_HEAP); | ||||||
| 		} | 		} | ||||||
|  |  | ||||||
|  | 		buf = read_stream_next_buffer(stream, &per_buffer_data); | ||||||
|  |  | ||||||
|  | 		/* The relation is exhausted. */ | ||||||
|  | 		if (!BufferIsValid(buf)) | ||||||
|  | 			break; | ||||||
|  |  | ||||||
|  | 		blk_info = *((uint8 *) per_buffer_data); | ||||||
|  | 		CheckBufferIsPinnedOnce(buf); | ||||||
|  | 		page = BufferGetPage(buf); | ||||||
|  | 		blkno = BufferGetBlockNumber(buf); | ||||||
|  |  | ||||||
|  | 		vacrel->scanned_pages++; | ||||||
|  | 		if (blk_info & VAC_BLK_WAS_EAGER_SCANNED) | ||||||
|  | 			vacrel->eager_scanned_pages++; | ||||||
|  |  | ||||||
|  | 		/* Report as block scanned, update error traceback information */ | ||||||
|  | 		pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); | ||||||
|  | 		update_vacuum_error_info(vacrel, NULL, VACUUM_ERRCB_PHASE_SCAN_HEAP, | ||||||
|  | 								 blkno, InvalidOffsetNumber); | ||||||
|  |  | ||||||
| 		/* | 		/* | ||||||
| 		 * Pin the visibility map page in case we need to mark the page | 		 * Pin the visibility map page in case we need to mark the page | ||||||
| 		 * all-visible.  In most cases this will be very cheap, because we'll | 		 * all-visible.  In most cases this will be very cheap, because we'll | ||||||
| @@ -1276,10 +1302,6 @@ lazy_scan_heap(LVRelState *vacrel) | |||||||
| 		 */ | 		 */ | ||||||
| 		visibilitymap_pin(vacrel->rel, blkno, &vmbuffer); | 		visibilitymap_pin(vacrel->rel, blkno, &vmbuffer); | ||||||
|  |  | ||||||
| 		buf = ReadBufferExtended(vacrel->rel, MAIN_FORKNUM, blkno, RBM_NORMAL, |  | ||||||
| 								 vacrel->bstrategy); |  | ||||||
| 		page = BufferGetPage(buf); |  | ||||||
|  |  | ||||||
| 		/* | 		/* | ||||||
| 		 * We need a buffer cleanup lock to prune HOT chains and defragment | 		 * We need a buffer cleanup lock to prune HOT chains and defragment | ||||||
| 		 * the page in lazy_scan_prune.  But when it's not possible to acquire | 		 * the page in lazy_scan_prune.  But when it's not possible to acquire | ||||||
| @@ -1439,8 +1461,12 @@ lazy_scan_heap(LVRelState *vacrel) | |||||||
| 	if (BufferIsValid(vmbuffer)) | 	if (BufferIsValid(vmbuffer)) | ||||||
| 		ReleaseBuffer(vmbuffer); | 		ReleaseBuffer(vmbuffer); | ||||||
|  |  | ||||||
| 	/* report that everything is now scanned */ | 	/* | ||||||
| 	pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno); | 	 * Report that everything is now scanned. We never skip scanning the last | ||||||
|  | 	 * block in the relation, so we can pass rel_pages here. | ||||||
|  | 	 */ | ||||||
|  | 	pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, | ||||||
|  | 								 rel_pages); | ||||||
|  |  | ||||||
| 	/* now we can compute the new value for pg_class.reltuples */ | 	/* now we can compute the new value for pg_class.reltuples */ | ||||||
| 	vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages, | 	vacrel->new_live_tuples = vac_estimate_reltuples(vacrel->rel, rel_pages, | ||||||
| @@ -1455,6 +1481,8 @@ lazy_scan_heap(LVRelState *vacrel) | |||||||
| 		Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples + | 		Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples + | ||||||
| 		vacrel->missed_dead_tuples; | 		vacrel->missed_dead_tuples; | ||||||
|  |  | ||||||
|  | 	read_stream_end(stream); | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| 	 * Do index vacuuming (call each index's ambulkdelete routine), then do | 	 * Do index vacuuming (call each index's ambulkdelete routine), then do | ||||||
| 	 * related heap vacuuming | 	 * related heap vacuuming | ||||||
| @@ -1465,12 +1493,14 @@ lazy_scan_heap(LVRelState *vacrel) | |||||||
| 	/* | 	/* | ||||||
| 	 * Vacuum the remainder of the Free Space Map.  We must do this whether or | 	 * Vacuum the remainder of the Free Space Map.  We must do this whether or | ||||||
| 	 * not there were indexes, and whether or not we bypassed index vacuuming. | 	 * not there were indexes, and whether or not we bypassed index vacuuming. | ||||||
|  | 	 * We can pass rel_pages here because we never skip scanning the last | ||||||
|  | 	 * block of the relation. | ||||||
| 	 */ | 	 */ | ||||||
| 	if (blkno > next_fsm_block_to_vacuum) | 	if (rel_pages > next_fsm_block_to_vacuum) | ||||||
| 		FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, blkno); | 		FreeSpaceMapVacuumRange(vacrel->rel, next_fsm_block_to_vacuum, rel_pages); | ||||||
|  |  | ||||||
| 	/* report all blocks vacuumed */ | 	/* report all blocks vacuumed */ | ||||||
| 	pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, blkno); | 	pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_VACUUMED, rel_pages); | ||||||
|  |  | ||||||
| 	/* Do final index cleanup (call each index's amvacuumcleanup routine) */ | 	/* Do final index cleanup (call each index's amvacuumcleanup routine) */ | ||||||
| 	if (vacrel->nindexes > 0 && vacrel->do_index_cleanup) | 	if (vacrel->nindexes > 0 && vacrel->do_index_cleanup) | ||||||
| @@ -1478,36 +1508,41 @@ lazy_scan_heap(LVRelState *vacrel) | |||||||
| } | } | ||||||
|  |  | ||||||
| /* | /* | ||||||
|  *	heap_vac_scan_next_block() -- get next block for vacuum to process |  *	heap_vac_scan_next_block() -- read stream callback to get the next block | ||||||
|  |  *	for vacuum to process | ||||||
|  * |  * | ||||||
|  * lazy_scan_heap() calls here every time it needs to get the next block to |  * Every time lazy_scan_heap() needs a new block to process during its first | ||||||
|  * prune and vacuum.  The function uses the visibility map, vacuum options, |  * phase, it invokes read_stream_next_buffer() with a stream set up to call | ||||||
|  * and various thresholds to skip blocks which do not need to be processed and |  * heap_vac_scan_next_block() to get the next block. | ||||||
|  * sets blkno to the next block to process. |  | ||||||
|  * |  * | ||||||
|  * The block number of the next block to process is set in *blkno and its |  * heap_vac_scan_next_block() uses the visibility map, vacuum options, and | ||||||
|  * visibility status and whether or not it was eager scanned is set in |  * various thresholds to skip blocks which do not need to be processed and | ||||||
|  * *blk_info. |  * returns the next block to process or InvalidBlockNumber if there are no | ||||||
|  |  * remaining blocks. | ||||||
|  * |  * | ||||||
|  * The return value is false if there are no further blocks to process. |  * The visibility status of the next block to process and whether or not it | ||||||
|  |  * was eager scanned is set in the per_buffer_data. | ||||||
|  * |  * | ||||||
|  * vacrel is an in/out parameter here.  Vacuum options and information about |  * callback_private_data contains a reference to the LVRelState, passed to the | ||||||
|  * the relation are read.  vacrel->skippedallvis is set if we skip a block |  * read stream API during stream setup. The LVRelState is an in/out parameter | ||||||
|  * that's all-visible but not all-frozen, to ensure that we don't update |  * here (locally named `vacrel`). Vacuum options and information about the | ||||||
|  * relfrozenxid in that case.  vacrel also holds information about the next |  * relation are read from it. vacrel->skippedallvis is set if we skip a block | ||||||
|  * unskippable block, as bookkeeping for this function. |  * that's all-visible but not all-frozen (to ensure that we don't update | ||||||
|  |  * relfrozenxid in that case). vacrel also holds information about the next | ||||||
|  |  * unskippable block -- as bookkeeping for this function. | ||||||
|  */ |  */ | ||||||
| static bool | static BlockNumber | ||||||
| heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, | heap_vac_scan_next_block(ReadStream *stream, | ||||||
| 						 uint8 *blk_info) | 						 void *callback_private_data, | ||||||
|  | 						 void *per_buffer_data) | ||||||
| { | { | ||||||
| 	BlockNumber next_block; | 	BlockNumber next_block; | ||||||
|  | 	LVRelState *vacrel = callback_private_data; | ||||||
|  | 	uint8		blk_info = 0; | ||||||
|  |  | ||||||
| 	/* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */ | 	/* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */ | ||||||
| 	next_block = vacrel->current_block + 1; | 	next_block = vacrel->current_block + 1; | ||||||
|  |  | ||||||
| 	*blk_info = 0; |  | ||||||
|  |  | ||||||
| 	/* Have we reached the end of the relation? */ | 	/* Have we reached the end of the relation? */ | ||||||
| 	if (next_block >= vacrel->rel_pages) | 	if (next_block >= vacrel->rel_pages) | ||||||
| 	{ | 	{ | ||||||
| @@ -1516,8 +1551,7 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, | |||||||
| 			ReleaseBuffer(vacrel->next_unskippable_vmbuffer); | 			ReleaseBuffer(vacrel->next_unskippable_vmbuffer); | ||||||
| 			vacrel->next_unskippable_vmbuffer = InvalidBuffer; | 			vacrel->next_unskippable_vmbuffer = InvalidBuffer; | ||||||
| 		} | 		} | ||||||
| 		*blkno = vacrel->rel_pages; | 		return InvalidBlockNumber; | ||||||
| 		return false; |  | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	/* | 	/* | ||||||
| @@ -1566,9 +1600,10 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, | |||||||
| 		 * but chose not to.  We know that they are all-visible in the VM, | 		 * but chose not to.  We know that they are all-visible in the VM, | ||||||
| 		 * otherwise they would've been unskippable. | 		 * otherwise they would've been unskippable. | ||||||
| 		 */ | 		 */ | ||||||
| 		*blkno = vacrel->current_block = next_block; | 		vacrel->current_block = next_block; | ||||||
| 		*blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM; | 		blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM; | ||||||
| 		return true; | 		*((uint8 *) per_buffer_data) = blk_info; | ||||||
|  | 		return vacrel->current_block; | ||||||
| 	} | 	} | ||||||
| 	else | 	else | ||||||
| 	{ | 	{ | ||||||
| @@ -1578,12 +1613,13 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno, | |||||||
| 		 */ | 		 */ | ||||||
| 		Assert(next_block == vacrel->next_unskippable_block); | 		Assert(next_block == vacrel->next_unskippable_block); | ||||||
|  |  | ||||||
| 		*blkno = vacrel->current_block = next_block; | 		vacrel->current_block = next_block; | ||||||
| 		if (vacrel->next_unskippable_allvis) | 		if (vacrel->next_unskippable_allvis) | ||||||
| 			*blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM; | 			blk_info |= VAC_BLK_ALL_VISIBLE_ACCORDING_TO_VM; | ||||||
| 		if (vacrel->next_unskippable_eager_scanned) | 		if (vacrel->next_unskippable_eager_scanned) | ||||||
| 			*blk_info |= VAC_BLK_WAS_EAGER_SCANNED; | 			blk_info |= VAC_BLK_WAS_EAGER_SCANNED; | ||||||
| 		return true; | 		*((uint8 *) per_buffer_data) = blk_info; | ||||||
|  | 		return vacrel->current_block; | ||||||
| 	} | 	} | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user