1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-11 10:01:57 +03:00

Allow vacuum command to process indexes in parallel.

This feature allows the vacuum to leverage multiple CPUs in order to
process indexes.  This enables us to perform index vacuuming and index
cleanup with background workers.  This adds a PARALLEL option to VACUUM
command where the user can specify the number of workers that can be used
to perform the command which is limited by the number of indexes on a
table.  Specifying zero as a number of workers will disable parallelism.
This option can't be used with the FULL option.

Each index is processed by at most one vacuum process.  Therefore parallel
vacuum can be used when the table has at least two indexes.

The parallel degree is either specified by the user or determined based on
the number of indexes that the table has, and further limited by
max_parallel_maintenance_workers.  The index can participate in parallel
vacuum iff it's size is greater than min_parallel_index_scan_size.

Author: Masahiko Sawada and Amit Kapila
Reviewed-by: Dilip Kumar, Amit Kapila, Robert Haas, Tomas Vondra,
Mahendra Singh and Sergei Kornilov
Tested-by: Mahendra Singh and Prabhat Sahu
Discussion:
https://postgr.es/m/CAD21AoDTPMgzSkV4E3SFo1CH_x50bf5PqZFQf4jmqjk-C03BWg@mail.gmail.com
https://postgr.es/m/CAA4eK1J-VoR9gzS5E75pcD-OH0mEyCdp8RihcwKrcuw7J-Q0+w@mail.gmail.com
This commit is contained in:
Amit Kapila
2020-01-20 07:57:49 +05:30
parent 44f1fc8df5
commit 40d964ec99
13 changed files with 1452 additions and 136 deletions

File diff suppressed because it is too large Load Diff

View File

@ -14,6 +14,7 @@
#include "postgres.h"
#include "access/heapam.h"
#include "access/nbtree.h"
#include "access/parallel.h"
#include "access/session.h"
@ -139,6 +140,9 @@ static const struct
},
{
"_bt_parallel_build_main", _bt_parallel_build_main
},
{
"parallel_vacuum_main", parallel_vacuum_main
}
};
@ -174,6 +178,7 @@ CreateParallelContext(const char *library_name, const char *function_name,
pcxt = palloc0(sizeof(ParallelContext));
pcxt->subid = GetCurrentSubTransactionId();
pcxt->nworkers = nworkers;
pcxt->nworkers_to_launch = nworkers;
pcxt->library_name = pstrdup(library_name);
pcxt->function_name = pstrdup(function_name);
pcxt->error_context_stack = error_context_stack;
@ -486,6 +491,23 @@ ReinitializeParallelDSM(ParallelContext *pcxt)
}
}
/*
* Reinitialize parallel workers for a parallel context such that we could
* launch the different number of workers. This is required for cases where
* we need to reuse the same DSM segment, but the number of workers can
* vary from run-to-run.
*/
void
ReinitializeParallelWorkers(ParallelContext *pcxt, int nworkers_to_launch)
{
/*
* The number of workers that need to be launched must be less than the
* number of workers with which the parallel context is initialized.
*/
Assert(pcxt->nworkers >= nworkers_to_launch);
pcxt->nworkers_to_launch = nworkers_to_launch;
}
/*
* Launch parallel workers.
*/
@ -498,7 +520,7 @@ LaunchParallelWorkers(ParallelContext *pcxt)
bool any_registrations_failed = false;
/* Skip this if we have no workers. */
if (pcxt->nworkers == 0)
if (pcxt->nworkers == 0 || pcxt->nworkers_to_launch == 0)
return;
/* We need to be a lock group leader. */
@ -533,7 +555,7 @@ LaunchParallelWorkers(ParallelContext *pcxt)
* fails. It wouldn't help much anyway, because registering the worker in
* no way guarantees that it will start up and initialize successfully.
*/
for (i = 0; i < pcxt->nworkers; ++i)
for (i = 0; i < pcxt->nworkers_to_launch; ++i)
{
memcpy(worker.bgw_extra, &i, sizeof(int));
if (!any_registrations_failed &&

View File

@ -42,6 +42,7 @@
#include "nodes/makefuncs.h"
#include "pgstat.h"
#include "postmaster/autovacuum.h"
#include "postmaster/bgworker_internals.h"
#include "storage/bufmgr.h"
#include "storage/lmgr.h"
#include "storage/proc.h"
@ -68,6 +69,14 @@ static MemoryContext vac_context = NULL;
static BufferAccessStrategy vac_strategy;
/*
* Variables for cost-based parallel vacuum. See comments atop
* compute_parallel_delay to understand how it works.
*/
pg_atomic_uint32 *VacuumSharedCostBalance = NULL;
pg_atomic_uint32 *VacuumActiveNWorkers = NULL;
int VacuumCostBalanceLocal = 0;
/* non-export function prototypes */
static List *expand_vacuum_rel(VacuumRelation *vrel, int options);
static List *get_all_vacuum_rels(int options);
@ -76,6 +85,7 @@ static void vac_truncate_clog(TransactionId frozenXID,
TransactionId lastSaneFrozenXid,
MultiXactId lastSaneMinMulti);
static bool vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params);
static double compute_parallel_delay(void);
static VacOptTernaryValue get_vacopt_ternary_value(DefElem *def);
/*
@ -94,12 +104,16 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
bool freeze = false;
bool full = false;
bool disable_page_skipping = false;
bool parallel_option = false;
ListCell *lc;
/* Set default value */
params.index_cleanup = VACOPT_TERNARY_DEFAULT;
params.truncate = VACOPT_TERNARY_DEFAULT;
/* By default parallel vacuum is enabled */
params.nworkers = 0;
/* Parse options list */
foreach(lc, vacstmt->options)
{
@ -129,6 +143,39 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
params.index_cleanup = get_vacopt_ternary_value(opt);
else if (strcmp(opt->defname, "truncate") == 0)
params.truncate = get_vacopt_ternary_value(opt);
else if (strcmp(opt->defname, "parallel") == 0)
{
parallel_option = true;
if (opt->arg == NULL)
{
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("parallel option requires a value between 0 and %d",
MAX_PARALLEL_WORKER_LIMIT),
parser_errposition(pstate, opt->location)));
}
else
{
int nworkers;
nworkers = defGetInt32(opt);
if (nworkers < 0 || nworkers > MAX_PARALLEL_WORKER_LIMIT)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("parallel vacuum degree must be between 0 and %d",
MAX_PARALLEL_WORKER_LIMIT),
parser_errposition(pstate, opt->location)));
/*
* Disable parallel vacuum, if user has specified parallel
* degree as zero.
*/
if (nworkers == 0)
params.nworkers = -1;
else
params.nworkers = nworkers;
}
}
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
@ -152,6 +199,11 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
!(params.options & (VACOPT_FULL | VACOPT_FREEZE)));
Assert(!(params.options & VACOPT_SKIPTOAST));
if ((params.options & VACOPT_FULL) && parallel_option)
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot specify both FULL and PARALLEL options")));
/*
* Make sure VACOPT_ANALYZE is specified if any column lists are present.
*/
@ -383,6 +435,9 @@ vacuum(List *relations, VacuumParams *params,
VacuumPageHit = 0;
VacuumPageMiss = 0;
VacuumPageDirty = 0;
VacuumCostBalanceLocal = 0;
VacuumSharedCostBalance = NULL;
VacuumActiveNWorkers = NULL;
/*
* Loop to process each selected relation.
@ -1941,16 +1996,26 @@ vac_close_indexes(int nindexes, Relation *Irel, LOCKMODE lockmode)
void
vacuum_delay_point(void)
{
double msec = 0;
/* Always check for interrupts */
CHECK_FOR_INTERRUPTS();
/* Nap if appropriate */
if (VacuumCostActive && !InterruptPending &&
VacuumCostBalance >= VacuumCostLimit)
{
double msec;
if (!VacuumCostActive || InterruptPending)
return;
/*
* For parallel vacuum, the delay is computed based on the shared cost
* balance. See compute_parallel_delay.
*/
if (VacuumSharedCostBalance != NULL)
msec = compute_parallel_delay();
else if (VacuumCostBalance >= VacuumCostLimit)
msec = VacuumCostDelay * VacuumCostBalance / VacuumCostLimit;
/* Nap if appropriate */
if (msec > 0)
{
if (msec > VacuumCostDelay * 4)
msec = VacuumCostDelay * 4;
@ -1966,6 +2031,66 @@ vacuum_delay_point(void)
}
}
/*
* Computes the vacuum delay for parallel workers.
*
* The basic idea of a cost-based vacuum delay for parallel vacuum is to allow
* each worker to sleep proportional to the work done by it. We achieve this
* by allowing all parallel vacuum workers including the leader process to
* have a shared view of cost related parameters (mainly VacuumCostBalance).
* We allow each worker to update it as and when it has incurred any cost and
* then based on that decide whether it needs to sleep. We compute the time
* to sleep for a worker based on the cost it has incurred
* (VacuumCostBalanceLocal) and then reduce the VacuumSharedCostBalance by
* that amount. This avoids letting the workers sleep who have done less or
* no I/O as compared to other workers and therefore can ensure that workers
* who are doing more I/O got throttled more.
*
* We allow any worker to sleep only if it has performed the I/O above a
* certain threshold, which is calculated based on the number of active
* workers (VacuumActiveNWorkers), and the overall cost balance is more than
* VacuumCostLimit set by the system. The testing reveals that we achieve
* the required throttling if we allow a worker that has done more than 50%
* of its share of work to sleep.
*/
static double
compute_parallel_delay(void)
{
double msec = 0;
uint32 shared_balance;
int nworkers;
/* Parallel vacuum must be active */
Assert(VacuumSharedCostBalance);
nworkers = pg_atomic_read_u32(VacuumActiveNWorkers);
/* At least count itself */
Assert(nworkers >= 1);
/* Update the shared cost balance value atomically */
shared_balance = pg_atomic_add_fetch_u32(VacuumSharedCostBalance, VacuumCostBalance);
/* Compute the total local balance for the current worker */
VacuumCostBalanceLocal += VacuumCostBalance;
if ((shared_balance >= VacuumCostLimit) &&
(VacuumCostBalanceLocal > 0.5 * (VacuumCostLimit / nworkers)))
{
/* Compute sleep time based on the local cost balance */
msec = VacuumCostDelay * VacuumCostBalanceLocal / VacuumCostLimit;
pg_atomic_sub_fetch_u32(VacuumSharedCostBalance, VacuumCostBalanceLocal);
VacuumCostBalanceLocal = 0;
}
/*
* Reset the local balance as we accumulated it into the shared value.
*/
VacuumCostBalance = 0;
return msec;
}
/*
* A wrapper function of defGetBoolean().
*

View File

@ -2886,6 +2886,8 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
(!wraparound ? VACOPT_SKIP_LOCKED : 0);
tab->at_params.index_cleanup = VACOPT_TERNARY_DEFAULT;
tab->at_params.truncate = VACOPT_TERNARY_DEFAULT;
/* As of now, we don't support parallel vacuum for autovacuum */
tab->at_params.nworkers = -1;
tab->at_params.freeze_min_age = freeze_min_age;
tab->at_params.freeze_table_age = freeze_table_age;
tab->at_params.multixact_freeze_min_age = multixact_freeze_min_age;

View File

@ -3597,7 +3597,7 @@ psql_completion(const char *text, int start, int end)
if (ends_with(prev_wd, '(') || ends_with(prev_wd, ','))
COMPLETE_WITH("FULL", "FREEZE", "ANALYZE", "VERBOSE",
"DISABLE_PAGE_SKIPPING", "SKIP_LOCKED",
"INDEX_CLEANUP", "TRUNCATE");
"INDEX_CLEANUP", "TRUNCATE", "PARALLEL");
else if (TailMatches("FULL|FREEZE|ANALYZE|VERBOSE|DISABLE_PAGE_SKIPPING|SKIP_LOCKED|INDEX_CLEANUP|TRUNCATE"))
COMPLETE_WITH("ON", "OFF");
}

View File

@ -23,7 +23,9 @@
#include "nodes/lockoptions.h"
#include "nodes/primnodes.h"
#include "storage/bufpage.h"
#include "storage/dsm.h"
#include "storage/lockdefs.h"
#include "storage/shm_toc.h"
#include "utils/relcache.h"
#include "utils/snapshot.h"
@ -193,6 +195,7 @@ extern Size SyncScanShmemSize(void);
struct VacuumParams;
extern void heap_vacuum_rel(Relation onerel,
struct VacuumParams *params, BufferAccessStrategy bstrategy);
extern void parallel_vacuum_main(dsm_segment *seg, shm_toc *toc);
/* in heap/heapam_visibility.c */
extern bool HeapTupleSatisfiesVisibility(HeapTuple stup, Snapshot snapshot,

View File

@ -33,7 +33,8 @@ typedef struct ParallelContext
{
dlist_node node;
SubTransactionId subid;
int nworkers;
int nworkers; /* Maximum number of workers to launch */
int nworkers_to_launch; /* Actual number of workers to launch */
int nworkers_launched;
char *library_name;
char *function_name;
@ -63,6 +64,7 @@ extern ParallelContext *CreateParallelContext(const char *library_name,
const char *function_name, int nworkers);
extern void InitializeParallelDSM(ParallelContext *pcxt);
extern void ReinitializeParallelDSM(ParallelContext *pcxt);
extern void ReinitializeParallelWorkers(ParallelContext *pcxt, int nworkers_to_launch);
extern void LaunchParallelWorkers(ParallelContext *pcxt);
extern void WaitForParallelWorkersToAttach(ParallelContext *pcxt);
extern void WaitForParallelWorkersToFinish(ParallelContext *pcxt);

View File

@ -222,6 +222,13 @@ typedef struct VacuumParams
* default value depends on reloptions */
VacOptTernaryValue truncate; /* Truncate empty pages at the end,
* default value depends on reloptions */
/*
* The number of parallel vacuum workers. 0 by default which means choose
* based on the number of indexes. -1 indicates a parallel vacuum is
* disabled.
*/
int nworkers;
} VacuumParams;
/* GUC parameters */
@ -231,6 +238,11 @@ extern int vacuum_freeze_table_age;
extern int vacuum_multixact_freeze_min_age;
extern int vacuum_multixact_freeze_table_age;
/* Variables for cost-based parallel vacuum */
extern pg_atomic_uint32 *VacuumSharedCostBalance;
extern pg_atomic_uint32 *VacuumActiveNWorkers;
extern int VacuumCostBalanceLocal;
/* in commands/vacuum.c */
extern void ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel);

View File

@ -92,6 +92,40 @@ CONTEXT: SQL function "do_analyze" statement 1
SQL function "wrap_do_analyze" statement 1
VACUUM FULL vactst;
VACUUM (DISABLE_PAGE_SKIPPING) vaccluster;
-- PARALLEL option
CREATE TABLE pvactst (i INT, a INT[], p POINT) with (autovacuum_enabled = off);
INSERT INTO pvactst SELECT i, array[1,2,3], point(i, i+1) FROM generate_series(1,1000) i;
CREATE INDEX btree_pvactst ON pvactst USING btree (i);
CREATE INDEX hash_pvactst ON pvactst USING hash (i);
CREATE INDEX brin_pvactst ON pvactst USING brin (i);
CREATE INDEX gin_pvactst ON pvactst USING gin (a);
CREATE INDEX gist_pvactst ON pvactst USING gist (p);
CREATE INDEX spgist_pvactst ON pvactst USING spgist (p);
-- VACUUM invokes parallel index cleanup
SET min_parallel_index_scan_size to 0;
VACUUM (PARALLEL 2) pvactst;
-- VACUUM invokes parallel bulk-deletion
UPDATE pvactst SET i = i WHERE i < 1000;
VACUUM (PARALLEL 2) pvactst;
UPDATE pvactst SET i = i WHERE i < 1000;
VACUUM (PARALLEL 0) pvactst; -- disable parallel vacuum
VACUUM (PARALLEL -1) pvactst; -- error
ERROR: parallel vacuum degree must be between 0 and 1024
LINE 1: VACUUM (PARALLEL -1) pvactst;
^
VACUUM (PARALLEL 2, INDEX_CLEANUP FALSE) pvactst;
VACUUM (PARALLEL 2, FULL TRUE) pvactst; -- error, cannot use both PARALLEL and FULL
ERROR: cannot specify both FULL and PARALLEL options
VACUUM (PARALLEL) pvactst; -- error, cannot use PARALLEL option without parallel degree
ERROR: parallel option requires a value between 0 and 1024
LINE 1: VACUUM (PARALLEL) pvactst;
^
CREATE TEMPORARY TABLE tmp (a int PRIMARY KEY);
CREATE INDEX tmp_idx1 ON tmp (a);
VACUUM (PARALLEL 1) tmp; -- disables parallel vacuum option
WARNING: disabling parallel option of vacuum on "tmp" --- cannot vacuum temporary tables in parallel
RESET min_parallel_index_scan_size;
DROP TABLE pvactst;
-- INDEX_CLEANUP option
CREATE TABLE no_index_cleanup (i INT PRIMARY KEY, t TEXT);
-- Use uncompressed data stored in toast.

View File

@ -75,6 +75,37 @@ VACUUM FULL vactst;
VACUUM (DISABLE_PAGE_SKIPPING) vaccluster;
-- PARALLEL option
CREATE TABLE pvactst (i INT, a INT[], p POINT) with (autovacuum_enabled = off);
INSERT INTO pvactst SELECT i, array[1,2,3], point(i, i+1) FROM generate_series(1,1000) i;
CREATE INDEX btree_pvactst ON pvactst USING btree (i);
CREATE INDEX hash_pvactst ON pvactst USING hash (i);
CREATE INDEX brin_pvactst ON pvactst USING brin (i);
CREATE INDEX gin_pvactst ON pvactst USING gin (a);
CREATE INDEX gist_pvactst ON pvactst USING gist (p);
CREATE INDEX spgist_pvactst ON pvactst USING spgist (p);
-- VACUUM invokes parallel index cleanup
SET min_parallel_index_scan_size to 0;
VACUUM (PARALLEL 2) pvactst;
-- VACUUM invokes parallel bulk-deletion
UPDATE pvactst SET i = i WHERE i < 1000;
VACUUM (PARALLEL 2) pvactst;
UPDATE pvactst SET i = i WHERE i < 1000;
VACUUM (PARALLEL 0) pvactst; -- disable parallel vacuum
VACUUM (PARALLEL -1) pvactst; -- error
VACUUM (PARALLEL 2, INDEX_CLEANUP FALSE) pvactst;
VACUUM (PARALLEL 2, FULL TRUE) pvactst; -- error, cannot use both PARALLEL and FULL
VACUUM (PARALLEL) pvactst; -- error, cannot use PARALLEL option without parallel degree
CREATE TEMPORARY TABLE tmp (a int PRIMARY KEY);
CREATE INDEX tmp_idx1 ON tmp (a);
VACUUM (PARALLEL 1) tmp; -- disables parallel vacuum option
RESET min_parallel_index_scan_size;
DROP TABLE pvactst;
-- INDEX_CLEANUP option
CREATE TABLE no_index_cleanup (i INT PRIMARY KEY, t TEXT);
-- Use uncompressed data stored in toast.

View File

@ -1216,7 +1216,11 @@ LPVOID
LPWSTR
LSEG
LUID
LVDeadTuples
LVParallelState
LVRelStats
LVShared
LVSharedIndStats
LWLock
LWLockHandle
LWLockMinimallyPadded