diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml index f314f93051d..96e55193f19 100644 --- a/doc/src/sgml/catalogs.sgml +++ b/doc/src/sgml/catalogs.sgml @@ -8886,7 +8886,8 @@ SCRAM-SHA-256$<iteration count>:<salt>< and general database objects (identified by class OID and object OID, in the same way as in pg_description or pg_depend). Also, the right to extend a - relation is represented as a separate lockable object. + relation is represented as a separate lockable object, as is the right to + update pg_database.datfrozenxid. Also, advisory locks can be taken on numbers that have user-defined meanings. @@ -8912,6 +8913,7 @@ SCRAM-SHA-256$<iteration count>:<salt>< Type of the lockable object: relation, extend, + frozenid, page, tuple, transactionid, diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml index c492114f331..601616ec2ce 100644 --- a/doc/src/sgml/monitoring.sgml +++ b/doc/src/sgml/monitoring.sgml @@ -845,7 +845,7 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser - LWLock + LWLock ShmemIndexLock Waiting to find or allocate space in shared memory. @@ -1043,6 +1043,16 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser Waiting to execute txid_status or update the oldest transaction id available to it. + + WrapLimitsVacuumLock + Waiting to update limits on transaction id and multixact + consumption. + + + NotifyQueueTailLock + Waiting to update limit on notification message + storage. + clog Waiting for I/O on a clog (transaction status) buffer. @@ -1118,7 +1128,7 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser Waiting for TBM shared iterator lock. - Lock + Lock relation Waiting to acquire a lock on a relation. @@ -1126,6 +1136,12 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser extend Waiting to extend a relation. + + frozenid + Waiting to + update pg_database.datfrozenxid + and pg_database.datminmxid. + page Waiting to acquire a lock on page of a relation. diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c index 56ca77c4074..c7e3a76b35a 100644 --- a/src/backend/access/transam/slru.c +++ b/src/backend/access/transam/slru.c @@ -1164,6 +1164,14 @@ SimpleLruFlush(SlruCtl ctl, bool allow_redirtied) /* * Remove all segments before the one holding the passed page number + * + * All SLRUs prevent concurrent calls to this function, either with an LWLock + * or by calling it only as part of a checkpoint. Mutual exclusion must begin + * before computing cutoffPage. Mutual exclusion must end after any limit + * update that would permit other backends to write fresh data into the + * segment immediately preceding the one containing cutoffPage. Otherwise, + * when the SLRU is quite full, SimpleLruTruncate() might delete that segment + * after it has accrued freshly-written data. */ void SimpleLruTruncate(SlruCtl ctl, int cutoffPage) diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c index f6406611301..cfcab811a82 100644 --- a/src/backend/access/transam/subtrans.c +++ b/src/backend/access/transam/subtrans.c @@ -347,8 +347,8 @@ ExtendSUBTRANS(TransactionId newestXact) /* * Remove all SUBTRANS segments before the one holding the passed transaction ID * - * This is normally called during checkpoint, with oldestXact being the - * oldest TransactionXmin of any running transaction. + * oldestXact is the oldest TransactionXmin of any running transaction. This + * is called only during checkpoint. */ void TruncateSUBTRANS(TransactionId oldestXact) diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c index 9b5273a6de8..3d1f60c632a 100644 --- a/src/backend/commands/async.c +++ b/src/backend/commands/async.c @@ -224,19 +224,22 @@ typedef struct QueueBackendStatus /* * Shared memory state for LISTEN/NOTIFY (excluding its SLRU stuff) * - * The AsyncQueueControl structure is protected by the AsyncQueueLock. + * The AsyncQueueControl structure is protected by the AsyncQueueLock and + * NotifyQueueTailLock. * - * When holding the lock in SHARED mode, backends may only inspect their own - * entries as well as the head and tail pointers. Consequently we can allow a - * backend to update its own record while holding only SHARED lock (since no - * other backend will inspect it). + * When holding AsyncQueueLock in SHARED mode, backends may only inspect their + * own entries as well as the head and tail pointers. Consequently we can + * allow a backend to update its own record while holding only SHARED lock + * (since no other backend will inspect it). * - * When holding the lock in EXCLUSIVE mode, backends can inspect the entries - * of other backends and also change the head and tail pointers. + * When holding AsyncQueueLock in EXCLUSIVE mode, backends can inspect the + * entries of other backends and also change the head pointer. When holding + * both AsyncQueueLock and NotifyQueueTailLock in EXCLUSIVE mode, backends can + * change the tail pointer. * * AsyncCtlLock is used as the control lock for the pg_notify SLRU buffers. - * In order to avoid deadlocks, whenever we need both locks, we always first - * get AsyncQueueLock and then AsyncCtlLock. + * In order to avoid deadlocks, whenever we need multiple locks, we first get + * NotifyQueueTailLock, then AsyncQueueLock, and lastly AsyncCtlLock. * * Each backend uses the backend[] array entry with index equal to its * BackendId (which can range from 1 to MaxBackends). We rely on this to make @@ -2013,6 +2016,10 @@ asyncQueueAdvanceTail(void) int newtailpage; int boundary; + /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */ + LWLockAcquire(NotifyQueueTailLock, LW_EXCLUSIVE); + + /* Compute the new tail. */ LWLockAcquire(AsyncQueueLock, LW_EXCLUSIVE); min = QUEUE_HEAD; for (i = 1; i <= MaxBackends; i++) @@ -2021,7 +2028,6 @@ asyncQueueAdvanceTail(void) min = QUEUE_POS_MIN(min, QUEUE_BACKEND_POS(i)); } oldtailpage = QUEUE_POS_PAGE(QUEUE_TAIL); - QUEUE_TAIL = min; LWLockRelease(AsyncQueueLock); /* @@ -2041,6 +2047,17 @@ asyncQueueAdvanceTail(void) */ SimpleLruTruncate(AsyncCtl, newtailpage); } + + /* + * Advertise the new tail. This changes asyncQueueIsFull()'s verdict for + * the segment immediately prior to the new tail, allowing fresh data into + * that segment. + */ + LWLockAcquire(AsyncQueueLock, LW_EXCLUSIVE); + QUEUE_TAIL = min; + LWLockRelease(AsyncQueueLock); + + LWLockRelease(NotifyQueueTailLock); } /* diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c index 83e807e32aa..fa5a3a34bf4 100644 --- a/src/backend/commands/vacuum.c +++ b/src/backend/commands/vacuum.c @@ -933,6 +933,14 @@ vac_update_datfrozenxid(void) bool bogus = false; bool dirty = false; + /* + * Restrict this task to one backend per database. This avoids race + * conditions that would move datfrozenxid or datminmxid backward. It + * avoids calling vac_truncate_clog() with a datfrozenxid preceding a + * datfrozenxid passed to an earlier vac_truncate_clog() call. + */ + LockDatabaseFrozenIds(ExclusiveLock); + /* * Initialize the "min" calculation with GetOldestXmin, which is a * reasonable approximation to the minimum relfrozenxid for not-yet- @@ -1097,6 +1105,9 @@ vac_truncate_clog(TransactionId frozenXID, bool bogus = false; bool frozenAlreadyWrapped = false; + /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */ + LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE); + /* init oldest datoids to sync with my frozenXID/minMulti values */ oldestxid_datoid = MyDatabaseId; minmulti_datoid = MyDatabaseId; @@ -1206,6 +1217,8 @@ vac_truncate_clog(TransactionId frozenXID, */ SetTransactionIdLimit(frozenXID, oldestxid_datoid); SetMultiXactIdLimit(minMulti, minmulti_datoid, false); + + LWLockRelease(WrapLimitsVacuumLock); } diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c index 10e2d028fa8..9510442628c 100644 --- a/src/backend/storage/lmgr/lmgr.c +++ b/src/backend/storage/lmgr/lmgr.c @@ -412,6 +412,21 @@ UnlockRelationForExtension(Relation relation, LOCKMODE lockmode) LockRelease(&tag, lockmode, false); } +/* + * LockDatabaseFrozenIds + * + * This allows one backend per database to execute vac_update_datfrozenxid(). + */ +void +LockDatabaseFrozenIds(LOCKMODE lockmode) +{ + LOCKTAG tag; + + SET_LOCKTAG_DATABASE_FROZEN_IDS(tag, MyDatabaseId); + + (void) LockAcquire(&tag, lockmode, false, false); +} + /* * LockPage * @@ -1015,6 +1030,11 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *tag) tag->locktag_field2, tag->locktag_field1); break; + case LOCKTAG_DATABASE_FROZEN_IDS: + appendStringInfo(buf, + _("pg_database.datfrozenxid of database %u"), + tag->locktag_field1); + break; case LOCKTAG_PAGE: appendStringInfo(buf, _("page %u of relation %u of database %u"), diff --git a/src/backend/storage/lmgr/lwlock.c b/src/backend/storage/lmgr/lwlock.c index f40af6a1d47..1965e093b04 100644 --- a/src/backend/storage/lmgr/lwlock.c +++ b/src/backend/storage/lmgr/lwlock.c @@ -495,7 +495,7 @@ RegisterLWLockTranches(void) if (LWLockTrancheArray == NULL) { - LWLockTranchesAllocated = 64; + LWLockTranchesAllocated = 128; LWLockTrancheArray = (char **) MemoryContextAllocZero(TopMemoryContext, LWLockTranchesAllocated * sizeof(char *)); diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt index e6025ecedb3..04a1786d372 100644 --- a/src/backend/storage/lmgr/lwlocknames.txt +++ b/src/backend/storage/lmgr/lwlocknames.txt @@ -50,3 +50,5 @@ OldSnapshotTimeMapLock 42 BackendRandomLock 43 LogicalRepWorkerLock 44 CLogTruncationLock 45 +WrapLimitsVacuumLock 46 +NotifyQueueTailLock 47 diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c index 9e0a8ab79d4..4ed5bf15f9e 100644 --- a/src/backend/utils/adt/lockfuncs.c +++ b/src/backend/utils/adt/lockfuncs.c @@ -26,6 +26,7 @@ const char *const LockTagTypeNames[] = { "relation", "extend", + "frozenid", "page", "tuple", "transactionid", @@ -245,6 +246,17 @@ pg_lock_status(PG_FUNCTION_ARGS) nulls[8] = true; nulls[9] = true; break; + case LOCKTAG_DATABASE_FROZEN_IDS: + values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); + nulls[2] = true; + nulls[3] = true; + nulls[4] = true; + nulls[5] = true; + nulls[6] = true; + nulls[7] = true; + nulls[8] = true; + nulls[9] = true; + break; case LOCKTAG_PAGE: values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h index 0b923227a22..a37107903a6 100644 --- a/src/include/storage/lmgr.h +++ b/src/include/storage/lmgr.h @@ -57,6 +57,9 @@ extern bool ConditionalLockRelationForExtension(Relation relation, LOCKMODE lockmode); extern int RelationExtensionLockWaiterCount(Relation relation); +/* Lock to recompute pg_database.datfrozenxid in the current database */ +extern void LockDatabaseFrozenIds(LOCKMODE lockmode); + /* Lock a page (currently only used within indexes) */ extern void LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode); extern bool ConditionalLockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode); diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h index 1cd3685fae8..cf1bed6ea4c 100644 --- a/src/include/storage/lock.h +++ b/src/include/storage/lock.h @@ -141,6 +141,8 @@ typedef enum LockTagType /* ID info for a relation is DB OID + REL OID; DB OID = 0 if shared */ LOCKTAG_RELATION_EXTEND, /* the right to extend a relation */ /* same ID info as RELATION */ + LOCKTAG_DATABASE_FROZEN_IDS, /* pg_database.datfrozenxid */ + /* ID info for frozen IDs is DB OID */ LOCKTAG_PAGE, /* one page of a relation */ /* ID info for a page is RELATION info + BlockNumber */ LOCKTAG_TUPLE, /* one physical tuple */ @@ -206,6 +208,14 @@ typedef struct LOCKTAG (locktag).locktag_type = LOCKTAG_RELATION_EXTEND, \ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) +#define SET_LOCKTAG_DATABASE_FROZEN_IDS(locktag,dboid) \ + ((locktag).locktag_field1 = (dboid), \ + (locktag).locktag_field2 = 0, \ + (locktag).locktag_field3 = 0, \ + (locktag).locktag_field4 = 0, \ + (locktag).locktag_type = LOCKTAG_DATABASE_FROZEN_IDS, \ + (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD) + #define SET_LOCKTAG_PAGE(locktag,dboid,reloid,blocknum) \ ((locktag).locktag_field1 = (dboid), \ (locktag).locktag_field2 = (reloid), \