mirror of
https://github.com/postgres/postgres.git
synced 2025-07-03 20:02:46 +03:00
Prevent concurrent SimpleLruTruncate() for any given SLRU.
The SimpleLruTruncate() header comment states the new coding rule. To achieve this, add locktype "frozenid" and two LWLocks. This closes a rare opportunity for data loss, which manifested as "apparent wraparound" or "could not access status of transaction" errors. Data loss is more likely in pg_multixact, due to released branches' thin margin between multiStopLimit and multiWrapLimit. If a user's physical replication primary logged ": apparent wraparound" messages, the user should rebuild standbys of that primary regardless of symptoms. At less risk is a cluster having emitted "not accepting commands" errors or "must be vacuumed" warnings at some point. One can test a cluster for this data loss by running VACUUM FREEZE in every database. Back-patch to 9.5 (all supported versions). Discussion: https://postgr.es/m/20190218073103.GA1434723@rfd.leadboat.com
This commit is contained in:
@ -1180,6 +1180,14 @@ SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
|
||||
|
||||
/*
|
||||
* Remove all segments before the one holding the passed page number
|
||||
*
|
||||
* All SLRUs prevent concurrent calls to this function, either with an LWLock
|
||||
* or by calling it only as part of a checkpoint. Mutual exclusion must begin
|
||||
* before computing cutoffPage. Mutual exclusion must end after any limit
|
||||
* update that would permit other backends to write fresh data into the
|
||||
* segment immediately preceding the one containing cutoffPage. Otherwise,
|
||||
* when the SLRU is quite full, SimpleLruTruncate() might delete that segment
|
||||
* after it has accrued freshly-written data.
|
||||
*/
|
||||
void
|
||||
SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
|
||||
|
@ -349,8 +349,8 @@ ExtendSUBTRANS(TransactionId newestXact)
|
||||
/*
|
||||
* Remove all SUBTRANS segments before the one holding the passed transaction ID
|
||||
*
|
||||
* This is normally called during checkpoint, with oldestXact being the
|
||||
* oldest TransactionXmin of any running transaction.
|
||||
* oldestXact is the oldest TransactionXmin of any running transaction. This
|
||||
* is called only during checkpoint.
|
||||
*/
|
||||
void
|
||||
TruncateSUBTRANS(TransactionId oldestXact)
|
||||
|
@ -223,19 +223,22 @@ typedef struct QueueBackendStatus
|
||||
/*
|
||||
* Shared memory state for LISTEN/NOTIFY (excluding its SLRU stuff)
|
||||
*
|
||||
* The AsyncQueueControl structure is protected by the AsyncQueueLock.
|
||||
* The AsyncQueueControl structure is protected by the AsyncQueueLock and
|
||||
* NotifyQueueTailLock.
|
||||
*
|
||||
* When holding the lock in SHARED mode, backends may only inspect their own
|
||||
* entries as well as the head and tail pointers. Consequently we can allow a
|
||||
* backend to update its own record while holding only SHARED lock (since no
|
||||
* other backend will inspect it).
|
||||
* When holding AsyncQueueLock in SHARED mode, backends may only inspect their
|
||||
* own entries as well as the head and tail pointers. Consequently we can
|
||||
* allow a backend to update its own record while holding only SHARED lock
|
||||
* (since no other backend will inspect it).
|
||||
*
|
||||
* When holding the lock in EXCLUSIVE mode, backends can inspect the entries
|
||||
* of other backends and also change the head and tail pointers.
|
||||
* When holding AsyncQueueLock in EXCLUSIVE mode, backends can inspect the
|
||||
* entries of other backends and also change the head pointer. When holding
|
||||
* both AsyncQueueLock and NotifyQueueTailLock in EXCLUSIVE mode, backends can
|
||||
* change the tail pointer.
|
||||
*
|
||||
* AsyncCtlLock is used as the control lock for the pg_notify SLRU buffers.
|
||||
* In order to avoid deadlocks, whenever we need both locks, we always first
|
||||
* get AsyncQueueLock and then AsyncCtlLock.
|
||||
* In order to avoid deadlocks, whenever we need multiple locks, we first get
|
||||
* NotifyQueueTailLock, then AsyncQueueLock, and lastly AsyncCtlLock.
|
||||
*
|
||||
* Each backend uses the backend[] array entry with index equal to its
|
||||
* BackendId (which can range from 1 to MaxBackends). We rely on this to make
|
||||
@ -2012,6 +2015,10 @@ asyncQueueAdvanceTail(void)
|
||||
int newtailpage;
|
||||
int boundary;
|
||||
|
||||
/* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
|
||||
LWLockAcquire(NotifyQueueTailLock, LW_EXCLUSIVE);
|
||||
|
||||
/* Compute the new tail. */
|
||||
LWLockAcquire(AsyncQueueLock, LW_EXCLUSIVE);
|
||||
min = QUEUE_HEAD;
|
||||
for (i = 1; i <= MaxBackends; i++)
|
||||
@ -2020,7 +2027,6 @@ asyncQueueAdvanceTail(void)
|
||||
min = QUEUE_POS_MIN(min, QUEUE_BACKEND_POS(i));
|
||||
}
|
||||
oldtailpage = QUEUE_POS_PAGE(QUEUE_TAIL);
|
||||
QUEUE_TAIL = min;
|
||||
LWLockRelease(AsyncQueueLock);
|
||||
|
||||
/*
|
||||
@ -2040,6 +2046,17 @@ asyncQueueAdvanceTail(void)
|
||||
*/
|
||||
SimpleLruTruncate(AsyncCtl, newtailpage);
|
||||
}
|
||||
|
||||
/*
|
||||
* Advertise the new tail. This changes asyncQueueIsFull()'s verdict for
|
||||
* the segment immediately prior to the new tail, allowing fresh data into
|
||||
* that segment.
|
||||
*/
|
||||
LWLockAcquire(AsyncQueueLock, LW_EXCLUSIVE);
|
||||
QUEUE_TAIL = min;
|
||||
LWLockRelease(AsyncQueueLock);
|
||||
|
||||
LWLockRelease(NotifyQueueTailLock);
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -1295,6 +1295,14 @@ vac_update_datfrozenxid(void)
|
||||
bool bogus = false;
|
||||
bool dirty = false;
|
||||
|
||||
/*
|
||||
* Restrict this task to one backend per database. This avoids race
|
||||
* conditions that would move datfrozenxid or datminmxid backward. It
|
||||
* avoids calling vac_truncate_clog() with a datfrozenxid preceding a
|
||||
* datfrozenxid passed to an earlier vac_truncate_clog() call.
|
||||
*/
|
||||
LockDatabaseFrozenIds(ExclusiveLock);
|
||||
|
||||
/*
|
||||
* Initialize the "min" calculation with GetOldestXmin, which is a
|
||||
* reasonable approximation to the minimum relfrozenxid for not-yet-
|
||||
@ -1484,6 +1492,9 @@ vac_truncate_clog(TransactionId frozenXID,
|
||||
bool bogus = false;
|
||||
bool frozenAlreadyWrapped = false;
|
||||
|
||||
/* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
|
||||
LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
|
||||
|
||||
/* init oldest datoids to sync with my frozenXID/minMulti values */
|
||||
oldestxid_datoid = MyDatabaseId;
|
||||
minmulti_datoid = MyDatabaseId;
|
||||
@ -1593,6 +1604,8 @@ vac_truncate_clog(TransactionId frozenXID,
|
||||
*/
|
||||
SetTransactionIdLimit(frozenXID, oldestxid_datoid);
|
||||
SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
|
||||
|
||||
LWLockRelease(WrapLimitsVacuumLock);
|
||||
}
|
||||
|
||||
|
||||
|
@ -460,6 +460,21 @@ UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
|
||||
LockRelease(&tag, lockmode, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* LockDatabaseFrozenIds
|
||||
*
|
||||
* This allows one backend per database to execute vac_update_datfrozenxid().
|
||||
*/
|
||||
void
|
||||
LockDatabaseFrozenIds(LOCKMODE lockmode)
|
||||
{
|
||||
LOCKTAG tag;
|
||||
|
||||
SET_LOCKTAG_DATABASE_FROZEN_IDS(tag, MyDatabaseId);
|
||||
|
||||
(void) LockAcquire(&tag, lockmode, false, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* LockPage
|
||||
*
|
||||
@ -1098,6 +1113,11 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *tag)
|
||||
tag->locktag_field2,
|
||||
tag->locktag_field1);
|
||||
break;
|
||||
case LOCKTAG_DATABASE_FROZEN_IDS:
|
||||
appendStringInfo(buf,
|
||||
_("pg_database.datfrozenxid of database %u"),
|
||||
tag->locktag_field1);
|
||||
break;
|
||||
case LOCKTAG_PAGE:
|
||||
appendStringInfo(buf,
|
||||
_("page %u of relation %u of database %u"),
|
||||
|
@ -49,3 +49,6 @@ MultiXactTruncationLock 41
|
||||
OldSnapshotTimeMapLock 42
|
||||
LogicalRepWorkerLock 43
|
||||
CLogTruncationLock 44
|
||||
# 45 was CLogTruncationLock until removal of BackendRandomLock
|
||||
WrapLimitsVacuumLock 46
|
||||
NotifyQueueTailLock 47
|
||||
|
@ -26,6 +26,7 @@
|
||||
const char *const LockTagTypeNames[] = {
|
||||
"relation",
|
||||
"extend",
|
||||
"frozenid",
|
||||
"page",
|
||||
"tuple",
|
||||
"transactionid",
|
||||
@ -245,6 +246,17 @@ pg_lock_status(PG_FUNCTION_ARGS)
|
||||
nulls[8] = true;
|
||||
nulls[9] = true;
|
||||
break;
|
||||
case LOCKTAG_DATABASE_FROZEN_IDS:
|
||||
values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
|
||||
nulls[2] = true;
|
||||
nulls[3] = true;
|
||||
nulls[4] = true;
|
||||
nulls[5] = true;
|
||||
nulls[6] = true;
|
||||
nulls[7] = true;
|
||||
nulls[8] = true;
|
||||
nulls[9] = true;
|
||||
break;
|
||||
case LOCKTAG_PAGE:
|
||||
values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
|
||||
values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2);
|
||||
|
Reference in New Issue
Block a user