1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-28 23:42:10 +03:00

Prevent concurrent SimpleLruTruncate() for any given SLRU.

The SimpleLruTruncate() header comment states the new coding rule.  To
achieve this, add locktype "frozenid" and two LWLocks.  This closes a
rare opportunity for data loss, which manifested as "apparent
wraparound" or "could not access status of transaction" errors.  Data
loss is more likely in pg_multixact, due to released branches' thin
margin between multiStopLimit and multiWrapLimit.  If a user's physical
replication primary logged ":  apparent wraparound" messages, the user
should rebuild standbys of that primary regardless of symptoms.  At less
risk is a cluster having emitted "not accepting commands" errors or
"must be vacuumed" warnings at some point.  One can test a cluster for
this data loss by running VACUUM FREEZE in every database.  Back-patch
to 9.5 (all supported versions).

Discussion: https://postgr.es/m/20190218073103.GA1434723@rfd.leadboat.com
This commit is contained in:
Noah Misch
2020-08-15 10:15:53 -07:00
parent d4d443b3bb
commit 566372b3d6
11 changed files with 117 additions and 13 deletions

View File

@ -244,19 +244,22 @@ typedef struct QueueBackendStatus
/*
* Shared memory state for LISTEN/NOTIFY (excluding its SLRU stuff)
*
* The AsyncQueueControl structure is protected by the NotifyQueueLock.
* The AsyncQueueControl structure is protected by the NotifyQueueLock and
* NotifyQueueTailLock.
*
* When holding the lock in SHARED mode, backends may only inspect their own
* entries as well as the head and tail pointers. Consequently we can allow a
* backend to update its own record while holding only SHARED lock (since no
* other backend will inspect it).
* When holding NotifyQueueLock in SHARED mode, backends may only inspect
* their own entries as well as the head and tail pointers. Consequently we
* can allow a backend to update its own record while holding only SHARED lock
* (since no other backend will inspect it).
*
* When holding the lock in EXCLUSIVE mode, backends can inspect the entries
* of other backends and also change the head and tail pointers.
* When holding NotifyQueueLock in EXCLUSIVE mode, backends can inspect the
* entries of other backends and also change the head pointer. When holding
* both NotifyQueueLock and NotifyQueueTailLock in EXCLUSIVE mode, backends
* can change the tail pointer.
*
* NotifySLRULock is used as the control lock for the pg_notify SLRU buffers.
* In order to avoid deadlocks, whenever we need both locks, we always first
* get NotifyQueueLock and then NotifySLRULock.
* In order to avoid deadlocks, whenever we need multiple locks, we first get
* NotifyQueueTailLock, then NotifyQueueLock, and lastly NotifySLRULock.
*
* Each backend uses the backend[] array entry with index equal to its
* BackendId (which can range from 1 to MaxBackends). We rely on this to make
@ -2177,6 +2180,10 @@ asyncQueueAdvanceTail(void)
int newtailpage;
int boundary;
/* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
LWLockAcquire(NotifyQueueTailLock, LW_EXCLUSIVE);
/* Compute the new tail. */
LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE);
min = QUEUE_HEAD;
for (BackendId i = QUEUE_FIRST_LISTENER; i > 0; i = QUEUE_NEXT_LISTENER(i))
@ -2185,7 +2192,6 @@ asyncQueueAdvanceTail(void)
min = QUEUE_POS_MIN(min, QUEUE_BACKEND_POS(i));
}
oldtailpage = QUEUE_POS_PAGE(QUEUE_TAIL);
QUEUE_TAIL = min;
LWLockRelease(NotifyQueueLock);
/*
@ -2205,6 +2211,17 @@ asyncQueueAdvanceTail(void)
*/
SimpleLruTruncate(NotifyCtl, newtailpage);
}
/*
* Advertise the new tail. This changes asyncQueueIsFull()'s verdict for
* the segment immediately prior to the new tail, allowing fresh data into
* that segment.
*/
LWLockAcquire(NotifyQueueLock, LW_EXCLUSIVE);
QUEUE_TAIL = min;
LWLockRelease(NotifyQueueLock);
LWLockRelease(NotifyQueueTailLock);
}
/*

View File

@ -1361,6 +1361,14 @@ vac_update_datfrozenxid(void)
bool bogus = false;
bool dirty = false;
/*
* Restrict this task to one backend per database. This avoids race
* conditions that would move datfrozenxid or datminmxid backward. It
* avoids calling vac_truncate_clog() with a datfrozenxid preceding a
* datfrozenxid passed to an earlier vac_truncate_clog() call.
*/
LockDatabaseFrozenIds(ExclusiveLock);
/*
* Initialize the "min" calculation with
* GetOldestNonRemovableTransactionId(), which is a reasonable
@ -1551,6 +1559,9 @@ vac_truncate_clog(TransactionId frozenXID,
bool bogus = false;
bool frozenAlreadyWrapped = false;
/* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
/* init oldest datoids to sync with my frozenXID/minMulti values */
oldestxid_datoid = MyDatabaseId;
minmulti_datoid = MyDatabaseId;
@ -1660,6 +1671,8 @@ vac_truncate_clog(TransactionId frozenXID,
*/
SetTransactionIdLimit(frozenXID, oldestxid_datoid);
SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
LWLockRelease(WrapLimitsVacuumLock);
}