mirror of
https://github.com/postgres/postgres.git
synced 2025-10-27 00:12:01 +03:00
Use TidStore for dead tuple TIDs storage during lazy vacuum.
Previously, we used a simple array for storing dead tuple IDs during
lazy vacuum, which had a number of problems:
* The array used a single allocation and so was limited to 1GB.
* The allocation was pessimistically sized according to table size.
* Lookup with binary search was slow because of poor CPU cache and
branch prediction behavior.
This commit replaces that array with the TID store from commit
30e144287a.
Since the backing radix tree makes small allocations as needed, the
1GB limit is now gone. Further, the total memory used is now often
smaller by an order of magnitude or more, depending on the
distribution of blocks and offsets. These two features should make
multiple rounds of heap scanning and index cleanup an extremely rare
event. TID lookup during index cleanup is also several times faster,
even more so when index order is correlated with heap tuple order.
Since there is no longer a predictable relationship between the number
of dead tuples vacuumed and the space taken up by their TIDs, the
number of tuples no longer provides any meaningful insights for users,
nor is the maximum number predictable. For that reason this commit
also changes to byte-based progress reporting, with the relevant
columns of pg_stat_progress_vacuum renamed accordingly to
max_dead_tuple_bytes and dead_tuple_bytes.
For parallel vacuum, both the TID store and supplemental information
specific to vacuum are shared among the parallel vacuum workers. As
with the previous array, we don't take any locks on TidStore during
parallel vacuum since writes are still only done by the leader
process.
Bump catalog version.
Reviewed-by: John Naylor, (in an earlier version) Dilip Kumar
Discussion: https://postgr.es/m/CAD21AoAfOZvmfR0j8VmZorZjL7RhTiQdVttNuC4W-Shdc2a-AA%40mail.gmail.com
This commit is contained in:
@@ -8,8 +8,8 @@
|
||||
*
|
||||
* In a parallel vacuum, we perform both index bulk deletion and index cleanup
|
||||
* with parallel worker processes. Individual indexes are processed by one
|
||||
* vacuum process. ParallelVacuumState contains shared information as well as
|
||||
* the memory space for storing dead items allocated in the DSM segment. We
|
||||
* vacuum process. ParalleVacuumState contains shared information as well as
|
||||
* the memory space for storing dead items allocated in the DSA area. We
|
||||
* launch parallel worker processes at the start of parallel index
|
||||
* bulk-deletion and index cleanup and once all indexes are processed, the
|
||||
* parallel worker processes exit. Each time we process indexes in parallel,
|
||||
@@ -45,11 +45,10 @@
|
||||
* use small integers.
|
||||
*/
|
||||
#define PARALLEL_VACUUM_KEY_SHARED 1
|
||||
#define PARALLEL_VACUUM_KEY_DEAD_ITEMS 2
|
||||
#define PARALLEL_VACUUM_KEY_QUERY_TEXT 3
|
||||
#define PARALLEL_VACUUM_KEY_BUFFER_USAGE 4
|
||||
#define PARALLEL_VACUUM_KEY_WAL_USAGE 5
|
||||
#define PARALLEL_VACUUM_KEY_INDEX_STATS 6
|
||||
#define PARALLEL_VACUUM_KEY_QUERY_TEXT 2
|
||||
#define PARALLEL_VACUUM_KEY_BUFFER_USAGE 3
|
||||
#define PARALLEL_VACUUM_KEY_WAL_USAGE 4
|
||||
#define PARALLEL_VACUUM_KEY_INDEX_STATS 5
|
||||
|
||||
/*
|
||||
* Shared information among parallel workers. So this is allocated in the DSM
|
||||
@@ -110,6 +109,15 @@ typedef struct PVShared
|
||||
|
||||
/* Counter for vacuuming and cleanup */
|
||||
pg_atomic_uint32 idx;
|
||||
|
||||
/* DSA handle where the TidStore lives */
|
||||
dsa_handle dead_items_dsa_handle;
|
||||
|
||||
/* DSA pointer to the shared TidStore */
|
||||
dsa_pointer dead_items_handle;
|
||||
|
||||
/* Statistics of shared dead items */
|
||||
VacDeadItemsInfo dead_items_info;
|
||||
} PVShared;
|
||||
|
||||
/* Status used during parallel index vacuum or cleanup */
|
||||
@@ -176,7 +184,7 @@ struct ParallelVacuumState
|
||||
PVIndStats *indstats;
|
||||
|
||||
/* Shared dead items space among parallel vacuum workers */
|
||||
VacDeadItems *dead_items;
|
||||
TidStore *dead_items;
|
||||
|
||||
/* Points to buffer usage area in DSM */
|
||||
BufferUsage *buffer_usage;
|
||||
@@ -232,20 +240,19 @@ static void parallel_vacuum_error_callback(void *arg);
|
||||
*/
|
||||
ParallelVacuumState *
|
||||
parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
|
||||
int nrequested_workers, int max_items,
|
||||
int nrequested_workers, int vac_work_mem,
|
||||
int elevel, BufferAccessStrategy bstrategy)
|
||||
{
|
||||
ParallelVacuumState *pvs;
|
||||
ParallelContext *pcxt;
|
||||
PVShared *shared;
|
||||
VacDeadItems *dead_items;
|
||||
TidStore *dead_items;
|
||||
PVIndStats *indstats;
|
||||
BufferUsage *buffer_usage;
|
||||
WalUsage *wal_usage;
|
||||
bool *will_parallel_vacuum;
|
||||
Size est_indstats_len;
|
||||
Size est_shared_len;
|
||||
Size est_dead_items_len;
|
||||
int nindexes_mwm = 0;
|
||||
int parallel_workers = 0;
|
||||
int querylen;
|
||||
@@ -294,11 +301,6 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
|
||||
shm_toc_estimate_chunk(&pcxt->estimator, est_shared_len);
|
||||
shm_toc_estimate_keys(&pcxt->estimator, 1);
|
||||
|
||||
/* Estimate size for dead_items -- PARALLEL_VACUUM_KEY_DEAD_ITEMS */
|
||||
est_dead_items_len = vac_max_items_to_alloc_size(max_items);
|
||||
shm_toc_estimate_chunk(&pcxt->estimator, est_dead_items_len);
|
||||
shm_toc_estimate_keys(&pcxt->estimator, 1);
|
||||
|
||||
/*
|
||||
* Estimate space for BufferUsage and WalUsage --
|
||||
* PARALLEL_VACUUM_KEY_BUFFER_USAGE and PARALLEL_VACUUM_KEY_WAL_USAGE.
|
||||
@@ -371,6 +373,14 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
|
||||
(nindexes_mwm > 0) ?
|
||||
maintenance_work_mem / Min(parallel_workers, nindexes_mwm) :
|
||||
maintenance_work_mem;
|
||||
shared->dead_items_info.max_bytes = vac_work_mem * 1024L;
|
||||
|
||||
/* Prepare DSA space for dead items */
|
||||
dead_items = TidStoreCreateShared(shared->dead_items_info.max_bytes,
|
||||
LWTRANCHE_PARALLEL_VACUUM_DSA);
|
||||
pvs->dead_items = dead_items;
|
||||
shared->dead_items_handle = TidStoreGetHandle(dead_items);
|
||||
shared->dead_items_dsa_handle = dsa_get_handle(TidStoreGetDSA(dead_items));
|
||||
|
||||
/* Use the same buffer size for all workers */
|
||||
shared->ring_nbuffers = GetAccessStrategyBufferCount(bstrategy);
|
||||
@@ -382,15 +392,6 @@ parallel_vacuum_init(Relation rel, Relation *indrels, int nindexes,
|
||||
shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_SHARED, shared);
|
||||
pvs->shared = shared;
|
||||
|
||||
/* Prepare the dead_items space */
|
||||
dead_items = (VacDeadItems *) shm_toc_allocate(pcxt->toc,
|
||||
est_dead_items_len);
|
||||
dead_items->max_items = max_items;
|
||||
dead_items->num_items = 0;
|
||||
MemSet(dead_items->items, 0, sizeof(ItemPointerData) * max_items);
|
||||
shm_toc_insert(pcxt->toc, PARALLEL_VACUUM_KEY_DEAD_ITEMS, dead_items);
|
||||
pvs->dead_items = dead_items;
|
||||
|
||||
/*
|
||||
* Allocate space for each worker's BufferUsage and WalUsage; no need to
|
||||
* initialize
|
||||
@@ -448,6 +449,8 @@ parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats)
|
||||
istats[i] = NULL;
|
||||
}
|
||||
|
||||
TidStoreDestroy(pvs->dead_items);
|
||||
|
||||
DestroyParallelContext(pvs->pcxt);
|
||||
ExitParallelMode();
|
||||
|
||||
@@ -455,13 +458,40 @@ parallel_vacuum_end(ParallelVacuumState *pvs, IndexBulkDeleteResult **istats)
|
||||
pfree(pvs);
|
||||
}
|
||||
|
||||
/* Returns the dead items space */
|
||||
VacDeadItems *
|
||||
parallel_vacuum_get_dead_items(ParallelVacuumState *pvs)
|
||||
/*
|
||||
* Returns the dead items space and dead items information.
|
||||
*/
|
||||
TidStore *
|
||||
parallel_vacuum_get_dead_items(ParallelVacuumState *pvs, VacDeadItemsInfo **dead_items_info_p)
|
||||
{
|
||||
*dead_items_info_p = &(pvs->shared->dead_items_info);
|
||||
return pvs->dead_items;
|
||||
}
|
||||
|
||||
/* Forget all items in dead_items */
|
||||
void
|
||||
parallel_vacuum_reset_dead_items(ParallelVacuumState *pvs)
|
||||
{
|
||||
TidStore *dead_items = pvs->dead_items;
|
||||
VacDeadItemsInfo *dead_items_info = &(pvs->shared->dead_items_info);
|
||||
|
||||
/*
|
||||
* Free the current tidstore and return allocated DSA segments to the
|
||||
* operating system. Then we recreate the tidstore with the same max_bytes
|
||||
* limitation we just used.
|
||||
*/
|
||||
TidStoreDestroy(dead_items);
|
||||
pvs->dead_items = TidStoreCreateShared(dead_items_info->max_bytes,
|
||||
LWTRANCHE_PARALLEL_VACUUM_DSA);
|
||||
|
||||
/* Update the DSA pointer for dead_items to the new one */
|
||||
pvs->shared->dead_items_dsa_handle = dsa_get_handle(TidStoreGetDSA(dead_items));
|
||||
pvs->shared->dead_items_handle = TidStoreGetHandle(dead_items);
|
||||
|
||||
/* Reset the counter */
|
||||
dead_items_info->num_items = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Do parallel index bulk-deletion with parallel workers.
|
||||
*/
|
||||
@@ -861,7 +891,8 @@ parallel_vacuum_process_one_index(ParallelVacuumState *pvs, Relation indrel,
|
||||
switch (indstats->status)
|
||||
{
|
||||
case PARALLEL_INDVAC_STATUS_NEED_BULKDELETE:
|
||||
istat_res = vac_bulkdel_one_index(&ivinfo, istat, pvs->dead_items);
|
||||
istat_res = vac_bulkdel_one_index(&ivinfo, istat, pvs->dead_items,
|
||||
&pvs->shared->dead_items_info);
|
||||
break;
|
||||
case PARALLEL_INDVAC_STATUS_NEED_CLEANUP:
|
||||
istat_res = vac_cleanup_one_index(&ivinfo, istat);
|
||||
@@ -961,7 +992,7 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
|
||||
Relation *indrels;
|
||||
PVIndStats *indstats;
|
||||
PVShared *shared;
|
||||
VacDeadItems *dead_items;
|
||||
TidStore *dead_items;
|
||||
BufferUsage *buffer_usage;
|
||||
WalUsage *wal_usage;
|
||||
int nindexes;
|
||||
@@ -1005,10 +1036,9 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
|
||||
PARALLEL_VACUUM_KEY_INDEX_STATS,
|
||||
false);
|
||||
|
||||
/* Set dead_items space */
|
||||
dead_items = (VacDeadItems *) shm_toc_lookup(toc,
|
||||
PARALLEL_VACUUM_KEY_DEAD_ITEMS,
|
||||
false);
|
||||
/* Find dead_items in shared memory */
|
||||
dead_items = TidStoreAttach(shared->dead_items_dsa_handle,
|
||||
shared->dead_items_handle);
|
||||
|
||||
/* Set cost-based vacuum delay */
|
||||
VacuumUpdateCosts();
|
||||
@@ -1056,6 +1086,8 @@ parallel_vacuum_main(dsm_segment *seg, shm_toc *toc)
|
||||
InstrEndParallelQuery(&buffer_usage[ParallelWorkerNumber],
|
||||
&wal_usage[ParallelWorkerNumber]);
|
||||
|
||||
TidStoreDetach(dead_items);
|
||||
|
||||
/* Pop the error context stack */
|
||||
error_context_stack = errcallback.previous;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user