diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 17795616b5d..fc186657a53 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -9147,6 +9147,45 @@ COPY postgres_log FROM '/full/path/to/logfile.csv' WITH csv;
+
+ vacuum_max_eager_freeze_failure_rate (floating point)
+
+ vacuum_max_eager_freeze_failure_rate configuration parameter
+
+
+
+
+ Specifies the maximum number of pages (as a fraction of total pages in
+ the relation) that VACUUM may scan and
+ fail to set all-frozen in the visibility map
+ before disabling eager scanning. A value of 0
+ disables eager scanning altogether. The default is
+ 0.03 (3%).
+
+
+
+ Note that when eager scanning is enabled, successful page freezes do
+ not count against the cap on eager freeze failures. Successful page
+ freezes are capped internally at 20% of the all-visible but not
+ all-frozen pages in the relation. Capping successful page freezes helps
+ amortize the overhead across multiple normal vacuums and limits the
+ potential downside of wasted eager freezes of pages that are modified
+ again before the next aggressive vacuum.
+
+
+
+ This parameter can only be set in the
+ postgresql.conf file or on the server command
+ line; but the setting can be overridden for individual tables by
+ changing the
+
+ corresponding table storage parameter.
+ For more information on tuning vacuum's freezing behavior,
+ see .
+
+
+
+
diff --git a/doc/src/sgml/maintenance.sgml b/doc/src/sgml/maintenance.sgml
index f84ad7557d9..b5b9da7f8a9 100644
--- a/doc/src/sgml/maintenance.sgml
+++ b/doc/src/sgml/maintenance.sgml
@@ -496,9 +496,25 @@
When that happens, VACUUM will eventually need to perform an
aggressive vacuum, which will freeze all eligible unfrozen
XID and MXID values, including those from all-visible but not all-frozen pages.
- In practice most tables require periodic aggressive vacuuming.
+
+
+
+ If a table is building up a backlog of all-visible but not all-frozen
+ pages, a normal vacuum may choose to scan skippable pages in an effort to
+ freeze them. Doing so decreases the number of pages the next aggressive
+ vacuum must scan. These are referred to as eagerly
+ scanned pages. Eager scanning can be tuned to attempt to freeze
+ more all-visible pages by increasing . Even if eager
+ scanning has kept the number of all-visible but not all-frozen pages to a
+ minimum, most tables still require periodic aggressive vacuuming. However,
+ any pages successfully eager frozen may be skipped during an aggressive
+ vacuum, so eager freezing may minimize the overhead of aggressive vacuums.
+
+
+
- controls when VACUUM does that: all-visible but not all-frozen
+ controls when a table is aggressively vacuumed. All all-visible but not all-frozen
pages are scanned if the number of transactions that have passed since the
last such scan is greater than vacuum_freeze_table_age minus
vacuum_freeze_min_age. Setting
@@ -626,10 +642,12 @@ SELECT datname, age(datfrozenxid) FROM pg_database;
- VACUUM normally only scans pages that have been modified
- since the last vacuum, but relfrozenxid can only be
- advanced when every page of the table
- that might contain unfrozen XIDs is scanned. This happens when
+ While VACUUM scans mostly pages that have been
+ modified since the last vacuum, it may also eagerly scan some
+ all-visible but not all-frozen pages in an attempt to freeze them, but
+ the relfrozenxid will only be advanced when
+ every page of the table that might contain unfrozen XIDs is scanned.
+ This happens when
relfrozenxid is more than
vacuum_freeze_table_age transactions old, when
VACUUM's FREEZE option is used, or when all
@@ -931,8 +949,7 @@ vacuum insert threshold = vacuum base insert threshold + vacuum insert scale fac
If the relfrozenxid value of the table
is more than vacuum_freeze_table_age transactions old,
an aggressive vacuum is performed to freeze old tuples and advance
- relfrozenxid; otherwise, only pages that have been modified
- since the last vacuum are scanned.
+ relfrozenxid.
diff --git a/doc/src/sgml/ref/create_table.sgml b/doc/src/sgml/ref/create_table.sgml
index 9acbc4dd34d..0a3e520f215 100644
--- a/doc/src/sgml/ref/create_table.sgml
+++ b/doc/src/sgml/ref/create_table.sgml
@@ -1950,6 +1950,21 @@ WITH ( MODULUS numeric_literal, REM
+
+ vacuum_max_eager_freeze_failure_rate, toast.vacuum_max_eager_freeze_failure_rate (floating point)
+
+ vacuum_max_eager_freeze_failure_rate
+ storage parameter
+
+
+
+
+ Per-table value for
+ parameter.
+
+
+
+
user_catalog_table (boolean)
diff --git a/src/backend/access/common/reloptions.c b/src/backend/access/common/reloptions.c
index 5731cf42f54..59fb53e7707 100644
--- a/src/backend/access/common/reloptions.c
+++ b/src/backend/access/common/reloptions.c
@@ -432,6 +432,16 @@ static relopt_real realRelOpts[] =
},
-1, 0.0, 100.0
},
+ {
+ {
+ "vacuum_max_eager_freeze_failure_rate",
+ "Fraction of pages in a relation vacuum can scan and fail to freeze before disabling eager scanning.",
+ RELOPT_KIND_HEAP | RELOPT_KIND_TOAST,
+ ShareUpdateExclusiveLock
+ },
+ -1, 0.0, 1.0
+ },
+
{
{
"seq_page_cost",
@@ -1891,7 +1901,9 @@ default_reloptions(Datum reloptions, bool validate, relopt_kind kind)
{"vacuum_index_cleanup", RELOPT_TYPE_ENUM,
offsetof(StdRdOptions, vacuum_index_cleanup)},
{"vacuum_truncate", RELOPT_TYPE_BOOL,
- offsetof(StdRdOptions, vacuum_truncate)}
+ offsetof(StdRdOptions, vacuum_truncate)},
+ {"vacuum_max_eager_freeze_failure_rate", RELOPT_TYPE_REAL,
+ offsetof(StdRdOptions, vacuum_max_eager_freeze_failure_rate)}
};
return (bytea *) build_reloptions(reloptions, validate, kind,
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 075af385cd1..8c387ae557e 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -17,9 +17,9 @@
* failsafe mechanism has triggered (to avoid transaction ID wraparound),
* vacuum may skip phases II and III.
*
- * If the TID store fills up in phase I, vacuum suspends phase I, proceeds to
- * phases II and II, cleaning up the dead tuples referenced in the current TID
- * store. This empties the TID store resumes phase I.
+ * If the TID store fills up in phase I, vacuum suspends phase I and proceeds
+ * to phases II and III, cleaning up the dead tuples referenced in the current
+ * TID store. This empties the TID store, allowing vacuum to resume phase I.
*
* In a way, the phases are more like states in a state machine, but they have
* been referred to colloquially as phases for so long that they are referred
@@ -41,9 +41,53 @@
* to the end, skipping pages as permitted by their visibility status, vacuum
* options, and various other requirements.
*
- * When page skipping is not disabled, a non-aggressive vacuum may scan pages
- * that are marked all-visible (and even all-frozen) in the visibility map if
- * the range of skippable pages is below SKIP_PAGES_THRESHOLD.
+ * Vacuums are either aggressive or normal. Aggressive vacuums must scan every
+ * unfrozen tuple in order to advance relfrozenxid and avoid transaction ID
+ * wraparound. Normal vacuums may scan otherwise skippable pages for one of
+ * two reasons:
+ *
+ * When page skipping is not disabled, a normal vacuum may scan pages that are
+ * marked all-visible (and even all-frozen) in the visibility map if the range
+ * of skippable pages is below SKIP_PAGES_THRESHOLD. This is primarily for the
+ * benefit of kernel readahead (see comment in heap_vac_scan_next_block()).
+ *
+ * A normal vacuum may also scan skippable pages in an effort to freeze them
+ * and decrease the backlog of all-visible but not all-frozen pages that have
+ * to be processed by the next aggressive vacuum. These are referred to as
+ * eagerly scanned pages. Pages scanned due to SKIP_PAGES_THRESHOLD do not
+ * count as eagerly scanned pages.
+ *
+ * Eagerly scanned pages that are set all-frozen in the VM are successful
+ * eager freezes and those not set all-frozen in the VM are failed eager
+ * freezes.
+ *
+ * Because we want to amortize the overhead of freezing pages over multiple
+ * vacuums, normal vacuums cap the number of successful eager freezes to
+ * MAX_EAGER_FREEZE_SUCCESS_RATE of the number of all-visible but not
+ * all-frozen pages at the beginning of the vacuum. Since eagerly frozen pages
+ * may be unfrozen before the next aggressive vacuum, capping the number of
+ * successful eager freezes also caps the downside of eager freezing:
+ * potentially wasted work.
+ *
+ * Once the success cap has been hit, eager scanning is disabled for the
+ * remainder of the vacuum of the relation.
+ *
+ * Success is capped globally because we don't want to limit our successes if
+ * old data happens to be concentrated in a particular part of the table. This
+ * is especially likely to happen for append-mostly workloads where the oldest
+ * data is at the beginning of the unfrozen portion of the relation.
+ *
+ * On the assumption that different regions of the table are likely to contain
+ * similarly aged data, normal vacuums use a localized eager freeze failure
+ * cap. The failure count is reset for each region of the table -- comprised
+ * of EAGER_SCAN_REGION_SIZE blocks. In each region, we tolerate
+ * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE failures
+ * before suspending eager scanning until the end of the region.
+ * vacuum_max_eager_freeze_failure_rate is configurable both globally and per
+ * table.
+ *
+ * Aggressive vacuums must examine every unfrozen tuple and thus are not
+ * subject to any of the limits imposed by the eager scanning algorithm.
*
* Once vacuum has decided to scan a given block, it must read the block and
* obtain a cleanup lock to prune tuples on the page. A non-aggressive vacuum
@@ -100,6 +144,7 @@
#include "commands/progress.h"
#include "commands/vacuum.h"
#include "common/int.h"
+#include "common/pg_prng.h"
#include "executor/instrument.h"
#include "miscadmin.h"
#include "pgstat.h"
@@ -185,6 +230,24 @@ typedef enum
VACUUM_ERRCB_PHASE_TRUNCATE,
} VacErrPhase;
+/*
+ * An eager scan of a page that is set all-frozen in the VM is considered
+ * "successful". To spread out freezing overhead across multiple normal
+ * vacuums, we limit the number of successful eager page freezes. The maximum
+ * number of eager page freezes is calculated as a ratio of the all-visible
+ * but not all-frozen pages at the beginning of the vacuum.
+ */
+#define MAX_EAGER_FREEZE_SUCCESS_RATE 0.2
+
+/*
+ * On the assumption that different regions of the table tend to have
+ * similarly aged data, once vacuum fails to freeze
+ * vacuum_max_eager_freeze_failure_rate of the blocks in a region of size
+ * EAGER_SCAN_REGION_SIZE, it suspends eager scanning until it has progressed
+ * to another region of the table with potentially older data.
+ */
+#define EAGER_SCAN_REGION_SIZE 4096
+
typedef struct LVRelState
{
/* Target heap relation and its indexes */
@@ -241,6 +304,13 @@ typedef struct LVRelState
BlockNumber rel_pages; /* total number of pages */
BlockNumber scanned_pages; /* # pages examined (not skipped via VM) */
+
+ /*
+ * Count of all-visible blocks eagerly scanned (for logging only). This
+ * does not include skippable blocks scanned due to SKIP_PAGES_THRESHOLD.
+ */
+ BlockNumber eager_scanned_pages;
+
BlockNumber removed_pages; /* # pages removed by relation truncation */
BlockNumber new_frozen_tuple_pages; /* # pages with newly frozen tuples */
@@ -282,9 +352,57 @@ typedef struct LVRelState
BlockNumber current_block; /* last block returned */
BlockNumber next_unskippable_block; /* next unskippable block */
bool next_unskippable_allvis; /* its visibility status */
+ bool next_unskippable_eager_scanned; /* if it was eagerly scanned */
Buffer next_unskippable_vmbuffer; /* buffer containing its VM bit */
+
+ /* State related to managing eager scanning of all-visible pages */
+
+ /*
+ * A normal vacuum that has failed to freeze too many eagerly scanned
+ * blocks in a region suspends eager scanning.
+ * next_eager_scan_region_start is the block number of the first block
+ * eligible for resumed eager scanning.
+ *
+ * When eager scanning is permanently disabled, either initially
+ * (including for aggressive vacuum) or due to hitting the success cap,
+ * this is set to InvalidBlockNumber.
+ */
+ BlockNumber next_eager_scan_region_start;
+
+ /*
+ * The remaining number of blocks a normal vacuum will consider eager
+ * scanning when it is successful. When eager scanning is enabled, this is
+ * initialized to MAX_EAGER_FREEZE_SUCCESS_RATE of the total number of
+ * all-visible but not all-frozen pages. For each eager freeze success,
+ * this is decremented. Once it hits 0, eager scanning is permanently
+ * disabled. It is initialized to 0 if eager scanning starts out disabled
+ * (including for aggressive vacuum).
+ */
+ BlockNumber eager_scan_remaining_successes;
+
+ /*
+ * The maximum number of blocks which may be eagerly scanned and not
+ * frozen before eager scanning is temporarily suspended. This is
+ * configurable both globally, via the
+ * vacuum_max_eager_freeze_failure_rate GUC, and per table, with a table
+ * storage parameter of the same name. It is calculated as
+ * vacuum_max_eager_freeze_failure_rate of EAGER_SCAN_REGION_SIZE blocks.
+ * It is 0 when eager scanning is disabled.
+ */
+ BlockNumber eager_scan_max_fails_per_region;
+
+ /*
+ * The number of eagerly scanned blocks vacuum failed to freeze (due to
+ * age) in the current eager scan region. Vacuum resets it to
+ * eager_scan_max_fails_per_region each time it enters a new region of the
+ * relation. If eager_scan_remaining_fails hits 0, eager scanning is
+ * suspended until the next region. It is also 0 if eager scanning has
+ * been permanently disabled.
+ */
+ BlockNumber eager_scan_remaining_fails;
} LVRelState;
+
/* Struct for saving and restoring vacuum error information. */
typedef struct LVSavedErrInfo
{
@@ -296,8 +414,11 @@ typedef struct LVSavedErrInfo
/* non-export function prototypes */
static void lazy_scan_heap(LVRelState *vacrel);
+static void heap_vacuum_eager_scan_setup(LVRelState *vacrel,
+ VacuumParams *params);
static bool heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
- bool *all_visible_according_to_vm);
+ bool *all_visible_according_to_vm,
+ bool *was_eager_scanned);
static void find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis);
static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
BlockNumber blkno, Page page,
@@ -305,7 +426,7 @@ static bool lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf,
static void lazy_scan_prune(LVRelState *vacrel, Buffer buf,
BlockNumber blkno, Page page,
Buffer vmbuffer, bool all_visible_according_to_vm,
- bool *has_lpdead_items);
+ bool *has_lpdead_items, bool *vm_page_frozen);
static bool lazy_scan_noprune(LVRelState *vacrel, Buffer buf,
BlockNumber blkno, Page page,
bool *has_lpdead_items);
@@ -347,6 +468,130 @@ static void restore_vacuum_error_info(LVRelState *vacrel,
const LVSavedErrInfo *saved_vacrel);
+
+/*
+ * Helper to set up the eager scanning state for vacuuming a single relation.
+ * Initializes the eager scan management related members of the LVRelState.
+ *
+ * Caller provides whether or not an aggressive vacuum is required due to
+ * vacuum options or for relfrozenxid/relminmxid advancement.
+ */
+static void
+heap_vacuum_eager_scan_setup(LVRelState *vacrel, VacuumParams *params)
+{
+ uint32 randseed;
+ BlockNumber allvisible;
+ BlockNumber allfrozen;
+ float first_region_ratio;
+ bool oldest_unfrozen_before_cutoff = false;
+
+ /*
+ * Initialize eager scan management fields to their disabled values.
+ * Aggressive vacuums, normal vacuums of small tables, and normal vacuums
+ * of tables without sufficiently old tuples disable eager scanning.
+ */
+ vacrel->next_eager_scan_region_start = InvalidBlockNumber;
+ vacrel->eager_scan_max_fails_per_region = 0;
+ vacrel->eager_scan_remaining_fails = 0;
+ vacrel->eager_scan_remaining_successes = 0;
+
+ /* If eager scanning is explicitly disabled, just return. */
+ if (params->max_eager_freeze_failure_rate == 0)
+ return;
+
+ /*
+ * The caller will have determined whether or not an aggressive vacuum is
+ * required by either the vacuum parameters or the relative age of the
+ * oldest unfrozen transaction IDs. An aggressive vacuum must scan every
+ * all-visible page to safely advance the relfrozenxid and/or relminmxid,
+ * so scans of all-visible pages are not considered eager.
+ */
+ if (vacrel->aggressive)
+ return;
+
+ /*
+ * Aggressively vacuuming a small relation shouldn't take long, so it
+ * isn't worth amortizing. We use two times the region size as the size
+ * cutoff because the eager scan start block is a random spot somewhere in
+ * the first region, making the second region the first to be eager
+ * scanned normally.
+ */
+ if (vacrel->rel_pages < 2 * EAGER_SCAN_REGION_SIZE)
+ return;
+
+ /*
+ * We only want to enable eager scanning if we are likely to be able to
+ * freeze some of the pages in the relation.
+ *
+ * Tuples with XIDs older than OldestXmin or MXIDs older than OldestMxact
+ * are technically freezable, but we won't freeze them unless the criteria
+ * for opportunistic freezing is met. Only tuples with XIDs/MXIDs older
+ * than the the FreezeLimit/MultiXactCutoff are frozen in the common case.
+ *
+ * So, as a heuristic, we wait until the FreezeLimit has advanced past the
+ * relfrozenxid or the MultiXactCutoff has advanced past the relminmxid to
+ * enable eager scanning.
+ */
+ if (TransactionIdIsNormal(vacrel->cutoffs.relfrozenxid) &&
+ TransactionIdPrecedes(vacrel->cutoffs.relfrozenxid,
+ vacrel->cutoffs.FreezeLimit))
+ oldest_unfrozen_before_cutoff = true;
+
+ if (!oldest_unfrozen_before_cutoff &&
+ MultiXactIdIsValid(vacrel->cutoffs.relminmxid) &&
+ MultiXactIdPrecedes(vacrel->cutoffs.relminmxid,
+ vacrel->cutoffs.MultiXactCutoff))
+ oldest_unfrozen_before_cutoff = true;
+
+ if (!oldest_unfrozen_before_cutoff)
+ return;
+
+ /* We have met the criteria to eagerly scan some pages. */
+
+ /*
+ * Our success cap is MAX_EAGER_FREEZE_SUCCESS_RATE of the number of
+ * all-visible but not all-frozen blocks in the relation.
+ */
+ visibilitymap_count(vacrel->rel, &allvisible, &allfrozen);
+
+ vacrel->eager_scan_remaining_successes =
+ (BlockNumber) (MAX_EAGER_FREEZE_SUCCESS_RATE *
+ (allvisible - allfrozen));
+
+ /* If every all-visible page is frozen, eager scanning is disabled. */
+ if (vacrel->eager_scan_remaining_successes == 0)
+ return;
+
+ /*
+ * Now calculate the bounds of the first eager scan region. Its end block
+ * will be a random spot somewhere in the first EAGER_SCAN_REGION_SIZE
+ * blocks. This affects the bounds of all subsequent regions and avoids
+ * eager scanning and failing to freeze the same blocks each vacuum of the
+ * relation.
+ */
+ randseed = pg_prng_uint32(&pg_global_prng_state);
+
+ vacrel->next_eager_scan_region_start = randseed % EAGER_SCAN_REGION_SIZE;
+
+ Assert(params->max_eager_freeze_failure_rate > 0 &&
+ params->max_eager_freeze_failure_rate <= 1);
+
+ vacrel->eager_scan_max_fails_per_region =
+ params->max_eager_freeze_failure_rate *
+ EAGER_SCAN_REGION_SIZE;
+
+ /*
+ * The first region will be smaller than subsequent regions. As such,
+ * adjust the eager freeze failures tolerated for this region.
+ */
+ first_region_ratio = 1 - (float) vacrel->next_eager_scan_region_start /
+ EAGER_SCAN_REGION_SIZE;
+
+ vacrel->eager_scan_remaining_fails =
+ vacrel->eager_scan_max_fails_per_region *
+ first_region_ratio;
+}
+
/*
* heap_vacuum_rel() -- perform VACUUM for one heap relation
*
@@ -477,6 +722,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
/* Initialize page counters explicitly (be tidy) */
vacrel->scanned_pages = 0;
+ vacrel->eager_scanned_pages = 0;
vacrel->removed_pages = 0;
vacrel->new_frozen_tuple_pages = 0;
vacrel->lpdead_item_pages = 0;
@@ -502,6 +748,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
vacrel->vm_new_visible_pages = 0;
vacrel->vm_new_visible_frozen_pages = 0;
vacrel->vm_new_frozen_pages = 0;
+ vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
/*
* Get cutoffs that determine which deleted tuples are considered DEAD,
@@ -520,11 +767,16 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
* to increase the number of dead tuples it can prune away.)
*/
vacrel->aggressive = vacuum_get_cutoffs(rel, params, &vacrel->cutoffs);
- vacrel->rel_pages = orig_rel_pages = RelationGetNumberOfBlocks(rel);
vacrel->vistest = GlobalVisTestFor(rel);
/* Initialize state used to track oldest extant XID/MXID */
vacrel->NewRelfrozenXid = vacrel->cutoffs.OldestXmin;
vacrel->NewRelminMxid = vacrel->cutoffs.OldestMxact;
+
+ /*
+ * Initialize state related to tracking all-visible page skipping. This is
+ * very important to determine whether or not it is safe to advance the
+ * relfrozenxid/relminmxid.
+ */
vacrel->skippedallvis = false;
skipwithvm = true;
if (params->options & VACOPT_DISABLE_PAGE_SKIPPING)
@@ -539,6 +791,13 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
vacrel->skipwithvm = skipwithvm;
+ /*
+ * Set up eager scan tracking state. This must happen after determining
+ * whether or not the vacuum must be aggressive, because only normal
+ * vacuums use the eager scan algorithm.
+ */
+ heap_vacuum_eager_scan_setup(vacrel, params);
+
if (verbose)
{
if (vacrel->aggressive)
@@ -734,12 +993,14 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
vacrel->relnamespace,
vacrel->relname,
vacrel->num_index_scans);
- appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total)\n"),
+ appendStringInfo(&buf, _("pages: %u removed, %u remain, %u scanned (%.2f%% of total), %u eagerly scanned\n"),
vacrel->removed_pages,
new_rel_pages,
vacrel->scanned_pages,
orig_rel_pages == 0 ? 100.0 :
- 100.0 * vacrel->scanned_pages / orig_rel_pages);
+ 100.0 * vacrel->scanned_pages /
+ orig_rel_pages,
+ vacrel->eager_scanned_pages);
appendStringInfo(&buf,
_("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
(long long) vacrel->tuples_deleted,
@@ -910,8 +1171,10 @@ lazy_scan_heap(LVRelState *vacrel)
BlockNumber rel_pages = vacrel->rel_pages,
blkno,
next_fsm_block_to_vacuum = 0;
- bool all_visible_according_to_vm;
-
+ bool all_visible_according_to_vm,
+ was_eager_scanned = false;
+ BlockNumber orig_eager_scan_success_limit =
+ vacrel->eager_scan_remaining_successes; /* for logging */
Buffer vmbuffer = InvalidBuffer;
const int initprog_index[] = {
PROGRESS_VACUUM_PHASE,
@@ -930,16 +1193,21 @@ lazy_scan_heap(LVRelState *vacrel)
vacrel->current_block = InvalidBlockNumber;
vacrel->next_unskippable_block = InvalidBlockNumber;
vacrel->next_unskippable_allvis = false;
+ vacrel->next_unskippable_eager_scanned = false;
vacrel->next_unskippable_vmbuffer = InvalidBuffer;
- while (heap_vac_scan_next_block(vacrel, &blkno, &all_visible_according_to_vm))
+ while (heap_vac_scan_next_block(vacrel, &blkno, &all_visible_according_to_vm,
+ &was_eager_scanned))
{
Buffer buf;
Page page;
bool has_lpdead_items;
+ bool vm_page_frozen = false;
bool got_cleanup_lock = false;
vacrel->scanned_pages++;
+ if (was_eager_scanned)
+ vacrel->eager_scanned_pages++;
/* Report as block scanned, update error traceback information */
pgstat_progress_update_param(PROGRESS_VACUUM_HEAP_BLKS_SCANNED, blkno);
@@ -1064,7 +1332,56 @@ lazy_scan_heap(LVRelState *vacrel)
if (got_cleanup_lock)
lazy_scan_prune(vacrel, buf, blkno, page,
vmbuffer, all_visible_according_to_vm,
- &has_lpdead_items);
+ &has_lpdead_items, &vm_page_frozen);
+
+ /*
+ * Count an eagerly scanned page as a failure or a success.
+ *
+ * Only lazy_scan_prune() freezes pages, so if we didn't get the
+ * cleanup lock, we won't have frozen the page. However, we only count
+ * pages that were too new to require freezing as eager freeze
+ * failures.
+ *
+ * We could gather more information from lazy_scan_noprune() about
+ * whether or not there were tuples with XIDs or MXIDs older than the
+ * FreezeLimit or MultiXactCutoff. However, for simplicity, we simply
+ * exclude pages skipped due to cleanup lock contention from eager
+ * freeze algorithm caps.
+ */
+ if (got_cleanup_lock && was_eager_scanned)
+ {
+ /* Aggressive vacuums do not eager scan. */
+ Assert(!vacrel->aggressive);
+
+ if (vm_page_frozen)
+ {
+ Assert(vacrel->eager_scan_remaining_successes > 0);
+ vacrel->eager_scan_remaining_successes--;
+
+ if (vacrel->eager_scan_remaining_successes == 0)
+ {
+ /*
+ * If we hit our success cap, permanently disable eager
+ * scanning by setting the other eager scan management
+ * fields to their disabled values.
+ */
+ vacrel->eager_scan_remaining_fails = 0;
+ vacrel->next_eager_scan_region_start = InvalidBlockNumber;
+ vacrel->eager_scan_max_fails_per_region = 0;
+
+ ereport(vacrel->verbose ? INFO : DEBUG2,
+ (errmsg("disabling eager scanning after freezing %u eagerly scanned blocks of \"%s.%s.%s\"",
+ orig_eager_scan_success_limit,
+ vacrel->dbname, vacrel->relnamespace,
+ vacrel->relname)));
+ }
+ }
+ else
+ {
+ Assert(vacrel->eager_scan_remaining_fails > 0);
+ vacrel->eager_scan_remaining_fails--;
+ }
+ }
/*
* Now drop the buffer lock and, potentially, update the FSM.
@@ -1164,7 +1481,9 @@ lazy_scan_heap(LVRelState *vacrel)
*
* The block number and visibility status of the next block to process are set
* in *blkno and *all_visible_according_to_vm. The return value is false if
- * there are no further blocks to process.
+ * there are no further blocks to process. If the block is being eagerly
+ * scanned, was_eager_scanned is set so that the caller can count whether or
+ * not an eagerly scanned page is successfully frozen.
*
* vacrel is an in/out parameter here. Vacuum options and information about
* the relation are read. vacrel->skippedallvis is set if we skip a block
@@ -1174,13 +1493,16 @@ lazy_scan_heap(LVRelState *vacrel)
*/
static bool
heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
- bool *all_visible_according_to_vm)
+ bool *all_visible_according_to_vm,
+ bool *was_eager_scanned)
{
BlockNumber next_block;
/* relies on InvalidBlockNumber + 1 overflowing to 0 on first call */
next_block = vacrel->current_block + 1;
+ *was_eager_scanned = false;
+
/* Have we reached the end of the relation? */
if (next_block >= vacrel->rel_pages)
{
@@ -1253,6 +1575,7 @@ heap_vac_scan_next_block(LVRelState *vacrel, BlockNumber *blkno,
*blkno = vacrel->current_block = next_block;
*all_visible_according_to_vm = vacrel->next_unskippable_allvis;
+ *was_eager_scanned = vacrel->next_unskippable_eager_scanned;
return true;
}
}
@@ -1276,11 +1599,12 @@ find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
BlockNumber rel_pages = vacrel->rel_pages;
BlockNumber next_unskippable_block = vacrel->next_unskippable_block + 1;
Buffer next_unskippable_vmbuffer = vacrel->next_unskippable_vmbuffer;
+ bool next_unskippable_eager_scanned = false;
bool next_unskippable_allvis;
*skipsallvis = false;
- for (;;)
+ for (;; next_unskippable_block++)
{
uint8 mapbits = visibilitymap_get_status(vacrel->rel,
next_unskippable_block,
@@ -1288,6 +1612,19 @@ find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
next_unskippable_allvis = (mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0;
+ /*
+ * At the start of each eager scan region, normal vacuums with eager
+ * scanning enabled reset the failure counter, allowing vacuum to
+ * resume eager scanning if it had been suspended in the previous
+ * region.
+ */
+ if (next_unskippable_block >= vacrel->next_eager_scan_region_start)
+ {
+ vacrel->eager_scan_remaining_fails =
+ vacrel->eager_scan_max_fails_per_region;
+ vacrel->next_eager_scan_region_start += EAGER_SCAN_REGION_SIZE;
+ }
+
/*
* A block is unskippable if it is not all visible according to the
* visibility map.
@@ -1316,28 +1653,41 @@ find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
break;
/*
- * Aggressive VACUUM caller can't skip pages just because they are
- * all-visible. They may still skip all-frozen pages, which can't
- * contain XIDs < OldestXmin (XIDs that aren't already frozen by now).
+ * All-frozen pages cannot contain XIDs < OldestXmin (XIDs that aren't
+ * already frozen by now), so this page can be skipped.
*/
- if ((mapbits & VISIBILITYMAP_ALL_FROZEN) == 0)
- {
- if (vacrel->aggressive)
- break;
+ if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
+ continue;
- /*
- * All-visible block is safe to skip in non-aggressive case. But
- * remember that the final range contains such a block for later.
- */
- *skipsallvis = true;
+ /*
+ * Aggressive vacuums cannot skip any all-visible pages that are not
+ * also all-frozen.
+ */
+ if (vacrel->aggressive)
+ break;
+
+ /*
+ * Normal vacuums with eager scanning enabled only skip all-visible
+ * but not all-frozen pages if they have hit the failure limit for the
+ * current eager scan region.
+ */
+ if (vacrel->eager_scan_remaining_fails > 0)
+ {
+ next_unskippable_eager_scanned = true;
+ break;
}
- next_unskippable_block++;
+ /*
+ * All-visible blocks are safe to skip in a normal vacuum. But
+ * remember that the final range contains such a block for later.
+ */
+ *skipsallvis = true;
}
/* write the local variables back to vacrel */
vacrel->next_unskippable_block = next_unskippable_block;
vacrel->next_unskippable_allvis = next_unskippable_allvis;
+ vacrel->next_unskippable_eager_scanned = next_unskippable_eager_scanned;
vacrel->next_unskippable_vmbuffer = next_unskippable_vmbuffer;
}
@@ -1368,6 +1718,12 @@ find_next_unskippable_block(LVRelState *vacrel, bool *skipsallvis)
* lazy_scan_prune (or lazy_scan_noprune). Otherwise returns true, indicating
* that lazy_scan_heap is done processing the page, releasing lock on caller's
* behalf.
+ *
+ * No vm_page_frozen output parameter (like that passed to lazy_scan_prune())
+ * is passed here because neither empty nor new pages can be eagerly frozen.
+ * New pages are never frozen. Empty pages are always set frozen in the VM at
+ * the same time that they are set all-visible, and we don't eagerly scan
+ * frozen pages.
*/
static bool
lazy_scan_new_or_empty(LVRelState *vacrel, Buffer buf, BlockNumber blkno,
@@ -1507,6 +1863,10 @@ cmpOffsetNumbers(const void *a, const void *b)
*
* *has_lpdead_items is set to true or false depending on whether, upon return
* from this function, any LP_DEAD items are still present on the page.
+ *
+ * *vm_page_frozen is set to true if the page is newly set all-frozen in the
+ * VM. The caller currently only uses this for determining whether an eagerly
+ * scanned page was successfully set all-frozen.
*/
static void
lazy_scan_prune(LVRelState *vacrel,
@@ -1515,7 +1875,8 @@ lazy_scan_prune(LVRelState *vacrel,
Page page,
Buffer vmbuffer,
bool all_visible_according_to_vm,
- bool *has_lpdead_items)
+ bool *has_lpdead_items,
+ bool *vm_page_frozen)
{
Relation rel = vacrel->rel;
PruneFreezeResult presult;
@@ -1667,11 +2028,17 @@ lazy_scan_prune(LVRelState *vacrel,
{
vacrel->vm_new_visible_pages++;
if (presult.all_frozen)
+ {
vacrel->vm_new_visible_frozen_pages++;
+ *vm_page_frozen = true;
+ }
}
else if ((old_vmbits & VISIBILITYMAP_ALL_FROZEN) == 0 &&
presult.all_frozen)
+ {
vacrel->vm_new_frozen_pages++;
+ *vm_page_frozen = true;
+ }
}
/*
@@ -1759,6 +2126,7 @@ lazy_scan_prune(LVRelState *vacrel,
{
vacrel->vm_new_visible_pages++;
vacrel->vm_new_visible_frozen_pages++;
+ *vm_page_frozen = true;
}
/*
@@ -1766,7 +2134,10 @@ lazy_scan_prune(LVRelState *vacrel,
* above, so we don't need to test the value of old_vmbits.
*/
else
+ {
vacrel->vm_new_frozen_pages++;
+ *vm_page_frozen = true;
+ }
}
}
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index e6745e6145c..a13a2d7f222 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -69,6 +69,7 @@ int vacuum_multixact_freeze_min_age;
int vacuum_multixact_freeze_table_age;
int vacuum_failsafe_age;
int vacuum_multixact_failsafe_age;
+double vacuum_max_eager_freeze_failure_rate;
/*
* Variables for cost-based vacuum delay. The defaults differ between
@@ -405,6 +406,11 @@ ExecVacuum(ParseState *pstate, VacuumStmt *vacstmt, bool isTopLevel)
/* user-invoked vacuum uses VACOPT_VERBOSE instead of log_min_duration */
params.log_min_duration = -1;
+ /*
+ * Later, in vacuum_rel(), we check if a reloption override was specified.
+ */
+ params.max_eager_freeze_failure_rate = vacuum_max_eager_freeze_failure_rate;
+
/*
* Create special memory context for cross-transaction storage.
*
@@ -2165,6 +2171,15 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params,
}
}
+ /*
+ * Check if the vacuum_max_eager_freeze_failure_rate table storage
+ * parameter was specified. This overrides the GUC value.
+ */
+ if (rel->rd_options != NULL &&
+ ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate >= 0)
+ params->max_eager_freeze_failure_rate =
+ ((StdRdOptions *) rel->rd_options)->vacuum_max_eager_freeze_failure_rate;
+
/*
* Set truncate option based on truncate reloption if it wasn't specified
* in VACUUM command, or when running in an autovacuum worker
diff --git a/src/backend/postmaster/autovacuum.c b/src/backend/postmaster/autovacuum.c
index 09ec9bb6990..ade2708b59e 100644
--- a/src/backend/postmaster/autovacuum.c
+++ b/src/backend/postmaster/autovacuum.c
@@ -2827,6 +2827,12 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
tab->at_params.is_wraparound = wraparound;
tab->at_params.log_min_duration = log_min_duration;
tab->at_params.toast_parent = InvalidOid;
+
+ /*
+ * Later, in vacuum_rel(), we check reloptions for any
+ * vacuum_max_eager_freeze_failure_rate override.
+ */
+ tab->at_params.max_eager_freeze_failure_rate = vacuum_max_eager_freeze_failure_rate;
tab->at_storage_param_vac_cost_limit = avopts ?
avopts->vacuum_cost_limit : 0;
tab->at_storage_param_vac_cost_delay = avopts ?
diff --git a/src/backend/utils/misc/guc_tables.c b/src/backend/utils/misc/guc_tables.c
index b59a3de49c5..382c774b245 100644
--- a/src/backend/utils/misc/guc_tables.c
+++ b/src/backend/utils/misc/guc_tables.c
@@ -4034,6 +4034,16 @@ struct config_real ConfigureNamesReal[] =
NULL, NULL, NULL
},
+ {
+ {"vacuum_max_eager_freeze_failure_rate", PGC_USERSET, VACUUM_FREEZING,
+ gettext_noop("Fraction of pages in a relation vacuum can scan and fail to freeze before disabling eager scanning."),
+ gettext_noop("A value of 0.0 disables eager scanning and a value of 1.0 will eagerly scan up to 100 percent of the all-visible pages in the relation. If vacuum successfully freezes these pages, the cap is lower than 100 percent, because the goal is to amortize page freezing across multiple vacuums.")
+ },
+ &vacuum_max_eager_freeze_failure_rate,
+ 0.03, 0.0, 1.0,
+ NULL, NULL, NULL
+ },
+
/* End-of-list marker */
{
{NULL, 0, 0, NULL, NULL}, NULL, 0.0, 0.0, 0.0, NULL, NULL, NULL
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 43492472455..f039eaa0c62 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -704,6 +704,7 @@ autovacuum_worker_slots = 16 # autovacuum worker slots to allocate
#vacuum_multixact_freeze_table_age = 150000000
#vacuum_multixact_freeze_min_age = 5000000
#vacuum_multixact_failsafe_age = 1600000000
+#vacuum_max_eager_freeze_failure_rate = 0.03 # 0 disables eager scanning
#------------------------------------------------------------------------------
# CLIENT CONNECTION DEFAULTS
diff --git a/src/bin/psql/tab-complete.in.c b/src/bin/psql/tab-complete.in.c
index 5f6897c8486..a9a81ab3c14 100644
--- a/src/bin/psql/tab-complete.in.c
+++ b/src/bin/psql/tab-complete.in.c
@@ -1390,10 +1390,12 @@ static const char *const table_storage_parameters[] = {
"toast.autovacuum_vacuum_threshold",
"toast.log_autovacuum_min_duration",
"toast.vacuum_index_cleanup",
+ "toast.vacuum_max_eager_freeze_failure_rate",
"toast.vacuum_truncate",
"toast_tuple_target",
"user_catalog_table",
"vacuum_index_cleanup",
+ "vacuum_max_eager_freeze_failure_rate",
"vacuum_truncate",
NULL
};
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 12d0b61950d..7dad14319a1 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -231,6 +231,13 @@ typedef struct VacuumParams
VacOptValue truncate; /* Truncate empty pages at the end */
Oid toast_parent; /* for privilege checks when recursing */
+ /*
+ * Fraction of pages in a relation that vacuum can eagerly scan and fail
+ * to freeze. Only applicable for table AMs using visibility maps. Derived
+ * from GUC or table storage parameter. 0 if disabled.
+ */
+ double max_eager_freeze_failure_rate;
+
/*
* The number of parallel vacuum workers. 0 by default which means choose
* based on the number of indexes. -1 indicates parallel vacuum is
@@ -297,6 +304,16 @@ extern PGDLLIMPORT int vacuum_multixact_freeze_table_age;
extern PGDLLIMPORT int vacuum_failsafe_age;
extern PGDLLIMPORT int vacuum_multixact_failsafe_age;
+/*
+ * Relevant for vacuums implementing eager scanning. Normal vacuums may
+ * eagerly scan some all-visible but not all-frozen pages. Since the goal
+ * is to freeze these pages, an eager scan that fails to set the page
+ * all-frozen in the VM is considered to have "failed". This is the
+ * fraction of pages in the relation vacuum may scan and fail to freeze
+ * before disabling eager scanning.
+ */
+extern PGDLLIMPORT double vacuum_max_eager_freeze_failure_rate;
+
/*
* Maximum value for default_statistics_target and per-column statistics
* targets. This is fairly arbitrary, mainly to prevent users from creating
diff --git a/src/include/utils/rel.h b/src/include/utils/rel.h
index 48b95f211f3..db3e504c3d2 100644
--- a/src/include/utils/rel.h
+++ b/src/include/utils/rel.h
@@ -344,6 +344,12 @@ typedef struct StdRdOptions
int parallel_workers; /* max number of parallel workers */
StdRdOptIndexCleanup vacuum_index_cleanup; /* controls index vacuuming */
bool vacuum_truncate; /* enables vacuum to truncate a relation */
+
+ /*
+ * Fraction of pages in a relation that vacuum can eagerly scan and fail
+ * to freeze. 0 if disabled, -1 if unspecified.
+ */
+ double vacuum_max_eager_freeze_failure_rate;
} StdRdOptions;
#define HEAP_MIN_FILLFACTOR 10