1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-24 01:29:19 +03:00

Teach autovacuum about multixact member wraparound.

The logic introduced in commit b69bf30b9b
and repaired in commits 669c7d20e6 and
7be47c56af helps to ensure that we don't
overwrite old multixact member information while it is still needed,
but a user who creates many large multixacts can still exhaust the
member space (and thus start getting errors) while autovacuum stands
idly by.

To fix this, progressively ramp down the effective value (but not the
actual contents) of autovacuum_multixact_freeze_max_age as member space
utilization increases.  This makes autovacuum more aggressive and also
reduces the threshold for a manual VACUUM to perform a full-table scan.

This patch leaves unsolved the problem of ensuring that emergency
autovacuums are triggered even when autovacuum=off.  We'll need to fix
that via a separate patch.

Thomas Munro and Robert Haas
This commit is contained in:
Robert Haas
2015-05-08 12:09:14 -04:00
parent 32c50af4cf
commit 3ecab37d97
5 changed files with 130 additions and 11 deletions

View File

@@ -628,6 +628,9 @@ HINT: Stop the postmaster and vacuum that database in single-user mode.
Like transaction IDs, multixact IDs are implemented as a Like transaction IDs, multixact IDs are implemented as a
32-bit counter and corresponding storage, all of which requires 32-bit counter and corresponding storage, all of which requires
careful aging management, storage cleanup, and wraparound handling. careful aging management, storage cleanup, and wraparound handling.
There is a separate storage area which holds the list of members in
each multixact, which also uses a 32-bit counter and which must also
be managed.
</para> </para>
<para> <para>
@@ -651,7 +654,10 @@ HINT: Stop the postmaster and vacuum that database in single-user mode.
As a safety device, a whole-table vacuum scan will occur for any table As a safety device, a whole-table vacuum scan will occur for any table
whose multixact-age is greater than whose multixact-age is greater than
<xref linkend="guc-autovacuum-multixact-freeze-max-age">. <xref linkend="guc-autovacuum-multixact-freeze-max-age">.
This will occur even if autovacuum is nominally disabled. This will occur even if autovacuum is nominally disabled. Whole-table
vacuum scans will also occur progressively for all tables, starting with
those that have the oldest multixact-age, if the amount of used member
storage space exceeds the amount 25% of the addressible storage space.
</para> </para>
</sect3> </sect3>
</sect2> </sect2>

View File

@@ -166,6 +166,11 @@
(MXOffsetToFlagsOffset(xid) + MULTIXACT_FLAGBYTES_PER_GROUP + \ (MXOffsetToFlagsOffset(xid) + MULTIXACT_FLAGBYTES_PER_GROUP + \
((xid) % MULTIXACT_MEMBERS_PER_MEMBERGROUP) * sizeof(TransactionId)) ((xid) % MULTIXACT_MEMBERS_PER_MEMBERGROUP) * sizeof(TransactionId))
/* Multixact members wraparound thresholds. */
#define MULTIXACT_MEMBER_SAFE_THRESHOLD (MaxMultiXactOffset / 4)
#define MULTIXACT_MEMBER_DANGER_THRESHOLD \
(MaxMultiXactOffset - MaxMultiXactOffset / 4)
/* /*
* Links to shared-memory data structures for MultiXact control * Links to shared-memory data structures for MultiXact control
@@ -2597,6 +2602,89 @@ find_multixact_start(MultiXactId multi)
return offset; return offset;
} }
/*
* Determine how many multixacts, and how many multixact members, currently
* exist.
*/
static void
ReadMultiXactCounts(uint32 *multixacts, MultiXactOffset *members)
{
MultiXactOffset nextOffset;
MultiXactOffset oldestOffset;
MultiXactId oldestMultiXactId;
MultiXactId nextMultiXactId;
LWLockAcquire(MultiXactGenLock, LW_SHARED);
nextOffset = MultiXactState->nextOffset;
oldestMultiXactId = MultiXactState->oldestMultiXactId;
nextMultiXactId = MultiXactState->nextMXact;
LWLockRelease(MultiXactGenLock);
oldestOffset = find_multixact_start(oldestMultiXactId);
*members = nextOffset - oldestOffset;
*multixacts = nextMultiXactId - oldestMultiXactId;
}
/*
* Multixact members can be removed once the multixacts that refer to them
* are older than every datminxmid. autovacuum_multixact_freeze_max_age and
* vacuum_multixact_freeze_table_age work together to make sure we never have
* too many multixacts; we hope that, at least under normal circumstances,
* this will also be sufficient to keep us from using too many offsets.
* However, if the average multixact has many members, we might exhaust the
* members space while still using few enough members that these limits fail
* to trigger full table scans for relminmxid advancement. At that point,
* we'd have no choice but to start failing multixact-creating operations
* with an error.
*
* To prevent that, if more than a threshold portion of the members space is
* used, we effectively reduce autovacuum_multixact_freeze_max_age and
* to a value just less than the number of multixacts in use. We hope that
* this will quickly trigger autovacuuming on the table or tables with the
* oldest relminmxid, thus allowing datminmxid values to advance and removing
* some members.
*
* As the fraction of the member space currently in use grows, we become
* more aggressive in clamping this value. That not only causes autovacuum
* to ramp up, but also makes any manual vacuums the user issues more
* aggressive. This happens because vacuum_set_xid_limits() clamps the
* freeze table and and the minimum freeze age based on the effective
* autovacuum_multixact_freeze_max_age this function returns. In the worst
* case, we'll claim the freeze_max_age to zero, and every vacuum of any
* table will try to freeze every multixact.
*
* It's possible that these thresholds should be user-tunable, but for now
* we keep it simple.
*/
int
MultiXactMemberFreezeThreshold(void)
{
MultiXactOffset members;
uint32 multixacts;
uint32 victim_multixacts;
double fraction;
ReadMultiXactCounts(&multixacts, &members);
/* If member space utilization is low, no special action is required. */
if (members <= MULTIXACT_MEMBER_SAFE_THRESHOLD)
return autovacuum_multixact_freeze_max_age;
/*
* Compute a target for relminmxid advancement. The number of multixacts
* we try to eliminate from the system is based on how far we are past
* MULTIXACT_MEMBER_SAFE_THRESHOLD.
*/
fraction = (double) (members - MULTIXACT_MEMBER_SAFE_THRESHOLD) /
(MULTIXACT_MEMBER_DANGER_THRESHOLD - MULTIXACT_MEMBER_SAFE_THRESHOLD);
victim_multixacts = multixacts * fraction;
/* fraction could be > 1.0, but lowest possible freeze age is zero */
if (victim_multixacts > multixacts)
return 0;
return multixacts - victim_multixacts;
}
/* /*
* SlruScanDirectory callback. * SlruScanDirectory callback.
* This callback deletes segments that are outside the range determined by * This callback deletes segments that are outside the range determined by

View File

@@ -426,6 +426,7 @@ vacuum_set_xid_limits(Relation rel,
{ {
int freezemin; int freezemin;
int mxid_freezemin; int mxid_freezemin;
int effective_multixact_freeze_max_age;
TransactionId limit; TransactionId limit;
TransactionId safeLimit; TransactionId safeLimit;
MultiXactId mxactLimit; MultiXactId mxactLimit;
@@ -482,17 +483,24 @@ vacuum_set_xid_limits(Relation rel,
*freezeLimit = limit; *freezeLimit = limit;
/*
* Compute the multixact age for which freezing is urgent. This is
* normally autovacuum_multixact_freeze_max_age, but may be less if we
* are short of multixact member space.
*/
effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
/* /*
* Determine the minimum multixact freeze age to use: as specified by * Determine the minimum multixact freeze age to use: as specified by
* caller, or vacuum_multixact_freeze_min_age, but in any case not more * caller, or vacuum_multixact_freeze_min_age, but in any case not more
* than half autovacuum_multixact_freeze_max_age, so that autovacuums to * than half effective_multixact_freeze_max_age, so that autovacuums to
* prevent MultiXact wraparound won't occur too frequently. * prevent MultiXact wraparound won't occur too frequently.
*/ */
mxid_freezemin = multixact_freeze_min_age; mxid_freezemin = multixact_freeze_min_age;
if (mxid_freezemin < 0) if (mxid_freezemin < 0)
mxid_freezemin = vacuum_multixact_freeze_min_age; mxid_freezemin = vacuum_multixact_freeze_min_age;
mxid_freezemin = Min(mxid_freezemin, mxid_freezemin = Min(mxid_freezemin,
autovacuum_multixact_freeze_max_age / 2); effective_multixact_freeze_max_age / 2);
Assert(mxid_freezemin >= 0); Assert(mxid_freezemin >= 0);
/* compute the cutoff multi, being careful to generate a valid value */ /* compute the cutoff multi, being careful to generate a valid value */
@@ -501,7 +509,7 @@ vacuum_set_xid_limits(Relation rel,
mxactLimit = FirstMultiXactId; mxactLimit = FirstMultiXactId;
safeMxactLimit = safeMxactLimit =
ReadNextMultiXactId() - autovacuum_multixact_freeze_max_age; ReadNextMultiXactId() - effective_multixact_freeze_max_age;
if (safeMxactLimit < FirstMultiXactId) if (safeMxactLimit < FirstMultiXactId)
safeMxactLimit = FirstMultiXactId; safeMxactLimit = FirstMultiXactId;
@@ -556,7 +564,7 @@ vacuum_set_xid_limits(Relation rel,
if (freezetable < 0) if (freezetable < 0)
freezetable = vacuum_multixact_freeze_table_age; freezetable = vacuum_multixact_freeze_table_age;
freezetable = Min(freezetable, freezetable = Min(freezetable,
autovacuum_multixact_freeze_max_age * 0.95); effective_multixact_freeze_max_age * 0.95);
Assert(freezetable >= 0); Assert(freezetable >= 0);
/* /*

View File

@@ -303,10 +303,12 @@ static void do_autovacuum(void);
static void FreeWorkerInfo(int code, Datum arg); static void FreeWorkerInfo(int code, Datum arg);
static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map, static autovac_table *table_recheck_autovac(Oid relid, HTAB *table_toast_map,
TupleDesc pg_class_desc); TupleDesc pg_class_desc,
int effective_multixact_freeze_max_age);
static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts, static void relation_needs_vacanalyze(Oid relid, AutoVacOpts *relopts,
Form_pg_class classForm, Form_pg_class classForm,
PgStat_StatTabEntry *tabentry, PgStat_StatTabEntry *tabentry,
int effective_multixact_freeze_max_age,
bool *dovacuum, bool *doanalyze, bool *wraparound); bool *dovacuum, bool *doanalyze, bool *wraparound);
static void autovacuum_do_vac_analyze(autovac_table *tab, static void autovacuum_do_vac_analyze(autovac_table *tab,
@@ -1147,7 +1149,7 @@ do_start_worker(void)
/* Also determine the oldest datminmxid we will consider. */ /* Also determine the oldest datminmxid we will consider. */
recentMulti = ReadNextMultiXactId(); recentMulti = ReadNextMultiXactId();
multiForceLimit = recentMulti - autovacuum_multixact_freeze_max_age; multiForceLimit = recentMulti - MultiXactMemberFreezeThreshold();
if (multiForceLimit < FirstMultiXactId) if (multiForceLimit < FirstMultiXactId)
multiForceLimit -= FirstMultiXactId; multiForceLimit -= FirstMultiXactId;
@@ -1936,6 +1938,7 @@ do_autovacuum(void)
BufferAccessStrategy bstrategy; BufferAccessStrategy bstrategy;
ScanKeyData key; ScanKeyData key;
TupleDesc pg_class_desc; TupleDesc pg_class_desc;
int effective_multixact_freeze_max_age;
/* /*
* StartTransactionCommand and CommitTransactionCommand will automatically * StartTransactionCommand and CommitTransactionCommand will automatically
@@ -1965,6 +1968,13 @@ do_autovacuum(void)
*/ */
pgstat_vacuum_stat(); pgstat_vacuum_stat();
/*
* Compute the multixact age for which freezing is urgent. This is
* normally autovacuum_multixact_freeze_max_age, but may be less if we
* are short of multixact member space.
*/
effective_multixact_freeze_max_age = MultiXactMemberFreezeThreshold();
/* /*
* Find the pg_database entry and select the default freeze ages. We use * Find the pg_database entry and select the default freeze ages. We use
* zero in template and nonconnectable databases, else the system-wide * zero in template and nonconnectable databases, else the system-wide
@@ -2057,6 +2067,7 @@ do_autovacuum(void)
/* Check if it needs vacuum or analyze */ /* Check if it needs vacuum or analyze */
relation_needs_vacanalyze(relid, relopts, classForm, tabentry, relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
effective_multixact_freeze_max_age,
&dovacuum, &doanalyze, &wraparound); &dovacuum, &doanalyze, &wraparound);
/* /*
@@ -2185,6 +2196,7 @@ do_autovacuum(void)
shared, dbentry); shared, dbentry);
relation_needs_vacanalyze(relid, relopts, classForm, tabentry, relation_needs_vacanalyze(relid, relopts, classForm, tabentry,
effective_multixact_freeze_max_age,
&dovacuum, &doanalyze, &wraparound); &dovacuum, &doanalyze, &wraparound);
/* ignore analyze for toast tables */ /* ignore analyze for toast tables */
@@ -2275,7 +2287,8 @@ do_autovacuum(void)
* the race condition is not closed but it is very small. * the race condition is not closed but it is very small.
*/ */
MemoryContextSwitchTo(AutovacMemCxt); MemoryContextSwitchTo(AutovacMemCxt);
tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc); tab = table_recheck_autovac(relid, table_toast_map, pg_class_desc,
effective_multixact_freeze_max_age);
if (tab == NULL) if (tab == NULL)
{ {
/* someone else vacuumed the table, or it went away */ /* someone else vacuumed the table, or it went away */
@@ -2482,7 +2495,8 @@ get_pgstat_tabentry_relid(Oid relid, bool isshared, PgStat_StatDBEntry *shared,
*/ */
static autovac_table * static autovac_table *
table_recheck_autovac(Oid relid, HTAB *table_toast_map, table_recheck_autovac(Oid relid, HTAB *table_toast_map,
TupleDesc pg_class_desc) TupleDesc pg_class_desc,
int effective_multixact_freeze_max_age)
{ {
Form_pg_class classForm; Form_pg_class classForm;
HeapTuple classTup; HeapTuple classTup;
@@ -2528,6 +2542,7 @@ table_recheck_autovac(Oid relid, HTAB *table_toast_map,
shared, dbentry); shared, dbentry);
relation_needs_vacanalyze(relid, avopts, classForm, tabentry, relation_needs_vacanalyze(relid, avopts, classForm, tabentry,
effective_multixact_freeze_max_age,
&dovacuum, &doanalyze, &wraparound); &dovacuum, &doanalyze, &wraparound);
/* ignore ANALYZE for toast tables */ /* ignore ANALYZE for toast tables */
@@ -2655,6 +2670,7 @@ relation_needs_vacanalyze(Oid relid,
AutoVacOpts *relopts, AutoVacOpts *relopts,
Form_pg_class classForm, Form_pg_class classForm,
PgStat_StatTabEntry *tabentry, PgStat_StatTabEntry *tabentry,
int effective_multixact_freeze_max_age,
/* output params below */ /* output params below */
bool *dovacuum, bool *dovacuum,
bool *doanalyze, bool *doanalyze,
@@ -2715,8 +2731,8 @@ relation_needs_vacanalyze(Oid relid,
: autovacuum_freeze_max_age; : autovacuum_freeze_max_age;
multixact_freeze_max_age = (relopts && relopts->multixact_freeze_max_age >= 0) multixact_freeze_max_age = (relopts && relopts->multixact_freeze_max_age >= 0)
? Min(relopts->multixact_freeze_max_age, autovacuum_multixact_freeze_max_age) ? Min(relopts->multixact_freeze_max_age, effective_multixact_freeze_max_age)
: autovacuum_multixact_freeze_max_age; : effective_multixact_freeze_max_age;
av_enabled = (relopts ? relopts->enabled : true); av_enabled = (relopts ? relopts->enabled : true);

View File

@@ -126,6 +126,7 @@ extern void MultiXactAdvanceNextMXact(MultiXactId minMulti,
MultiXactOffset minMultiOffset); MultiXactOffset minMultiOffset);
extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB); extern void MultiXactAdvanceOldest(MultiXactId oldestMulti, Oid oldestMultiDB);
extern void MultiXactSetSafeTruncate(MultiXactId safeTruncateMulti); extern void MultiXactSetSafeTruncate(MultiXactId safeTruncateMulti);
extern int MultiXactMemberFreezeThreshold(void);
extern void multixact_twophase_recover(TransactionId xid, uint16 info, extern void multixact_twophase_recover(TransactionId xid, uint16 info,
void *recdata, uint32 len); void *recdata, uint32 len);