diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index 791c7476387..25478510f3a 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -9095,7 +9095,8 @@ SCRAM-SHA-256$<iteration count>:&l
and general database objects (identified by class OID and object OID,
in the same way as in pg_description or
pg_depend). Also, the right to extend a
- relation is represented as a separate lockable object.
+ relation is represented as a separate lockable object, as is the right to
+ update pg_database.datfrozenxid.
Also, advisory
locks can be taken on numbers that have
user-defined meanings.
@@ -9121,6 +9122,7 @@ SCRAM-SHA-256$<iteration count>:&l
Type of the lockable object:
relation,
extend,
+ frozenid,
page,
tuple,
transactionid,
diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 4e87de2236a..0de92cd1dd8 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -885,7 +885,7 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
- LWLock
+ LWLock
ShmemIndexLock
Waiting to find or allocate space in shared memory.
@@ -1079,6 +1079,16 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
Waiting to execute txid_status or update
the oldest transaction id available to it.
+
+ WrapLimitsVacuumLock
+ Waiting to update limits on transaction id and multixact
+ consumption.
+
+
+ NotifyQueueTailLock
+ Waiting to update limit on notification message
+ storage.
+
clog
Waiting for I/O on a clog (transaction status) buffer.
@@ -1169,7 +1179,7 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
counters during Parallel Hash plan execution.
- Lock
+ Lock
relation
Waiting to acquire a lock on a relation.
@@ -1177,6 +1187,12 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
extend
Waiting to extend a relation.
+
+ frozenid
+ Waiting to
+ update pg_database.datfrozenxid
+ and pg_database.datminmxid.
+
page
Waiting to acquire a lock on page of a relation.
diff --git a/src/backend/access/transam/slru.c b/src/backend/access/transam/slru.c
index 3903a130a1f..f78a90f3336 100644
--- a/src/backend/access/transam/slru.c
+++ b/src/backend/access/transam/slru.c
@@ -1180,6 +1180,14 @@ SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
/*
* Remove all segments before the one holding the passed page number
+ *
+ * All SLRUs prevent concurrent calls to this function, either with an LWLock
+ * or by calling it only as part of a checkpoint. Mutual exclusion must begin
+ * before computing cutoffPage. Mutual exclusion must end after any limit
+ * update that would permit other backends to write fresh data into the
+ * segment immediately preceding the one containing cutoffPage. Otherwise,
+ * when the SLRU is quite full, SimpleLruTruncate() might delete that segment
+ * after it has accrued freshly-written data.
*/
void
SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
diff --git a/src/backend/access/transam/subtrans.c b/src/backend/access/transam/subtrans.c
index e667fd02385..81594271b61 100644
--- a/src/backend/access/transam/subtrans.c
+++ b/src/backend/access/transam/subtrans.c
@@ -349,8 +349,8 @@ ExtendSUBTRANS(TransactionId newestXact)
/*
* Remove all SUBTRANS segments before the one holding the passed transaction ID
*
- * This is normally called during checkpoint, with oldestXact being the
- * oldest TransactionXmin of any running transaction.
+ * oldestXact is the oldest TransactionXmin of any running transaction. This
+ * is called only during checkpoint.
*/
void
TruncateSUBTRANS(TransactionId oldestXact)
diff --git a/src/backend/commands/async.c b/src/backend/commands/async.c
index fb944911ee9..8e1c821cc5d 100644
--- a/src/backend/commands/async.c
+++ b/src/backend/commands/async.c
@@ -223,19 +223,22 @@ typedef struct QueueBackendStatus
/*
* Shared memory state for LISTEN/NOTIFY (excluding its SLRU stuff)
*
- * The AsyncQueueControl structure is protected by the AsyncQueueLock.
+ * The AsyncQueueControl structure is protected by the AsyncQueueLock and
+ * NotifyQueueTailLock.
*
- * When holding the lock in SHARED mode, backends may only inspect their own
- * entries as well as the head and tail pointers. Consequently we can allow a
- * backend to update its own record while holding only SHARED lock (since no
- * other backend will inspect it).
+ * When holding AsyncQueueLock in SHARED mode, backends may only inspect their
+ * own entries as well as the head and tail pointers. Consequently we can
+ * allow a backend to update its own record while holding only SHARED lock
+ * (since no other backend will inspect it).
*
- * When holding the lock in EXCLUSIVE mode, backends can inspect the entries
- * of other backends and also change the head and tail pointers.
+ * When holding AsyncQueueLock in EXCLUSIVE mode, backends can inspect the
+ * entries of other backends and also change the head pointer. When holding
+ * both AsyncQueueLock and NotifyQueueTailLock in EXCLUSIVE mode, backends can
+ * change the tail pointer.
*
* AsyncCtlLock is used as the control lock for the pg_notify SLRU buffers.
- * In order to avoid deadlocks, whenever we need both locks, we always first
- * get AsyncQueueLock and then AsyncCtlLock.
+ * In order to avoid deadlocks, whenever we need multiple locks, we first get
+ * NotifyQueueTailLock, then AsyncQueueLock, and lastly AsyncCtlLock.
*
* Each backend uses the backend[] array entry with index equal to its
* BackendId (which can range from 1 to MaxBackends). We rely on this to make
@@ -2012,6 +2015,10 @@ asyncQueueAdvanceTail(void)
int newtailpage;
int boundary;
+ /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
+ LWLockAcquire(NotifyQueueTailLock, LW_EXCLUSIVE);
+
+ /* Compute the new tail. */
LWLockAcquire(AsyncQueueLock, LW_EXCLUSIVE);
min = QUEUE_HEAD;
for (i = 1; i <= MaxBackends; i++)
@@ -2020,7 +2027,6 @@ asyncQueueAdvanceTail(void)
min = QUEUE_POS_MIN(min, QUEUE_BACKEND_POS(i));
}
oldtailpage = QUEUE_POS_PAGE(QUEUE_TAIL);
- QUEUE_TAIL = min;
LWLockRelease(AsyncQueueLock);
/*
@@ -2040,6 +2046,17 @@ asyncQueueAdvanceTail(void)
*/
SimpleLruTruncate(AsyncCtl, newtailpage);
}
+
+ /*
+ * Advertise the new tail. This changes asyncQueueIsFull()'s verdict for
+ * the segment immediately prior to the new tail, allowing fresh data into
+ * that segment.
+ */
+ LWLockAcquire(AsyncQueueLock, LW_EXCLUSIVE);
+ QUEUE_TAIL = min;
+ LWLockRelease(AsyncQueueLock);
+
+ LWLockRelease(NotifyQueueTailLock);
}
/*
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 7c2ef933352..f5a0b600f9e 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1295,6 +1295,14 @@ vac_update_datfrozenxid(void)
bool bogus = false;
bool dirty = false;
+ /*
+ * Restrict this task to one backend per database. This avoids race
+ * conditions that would move datfrozenxid or datminmxid backward. It
+ * avoids calling vac_truncate_clog() with a datfrozenxid preceding a
+ * datfrozenxid passed to an earlier vac_truncate_clog() call.
+ */
+ LockDatabaseFrozenIds(ExclusiveLock);
+
/*
* Initialize the "min" calculation with GetOldestXmin, which is a
* reasonable approximation to the minimum relfrozenxid for not-yet-
@@ -1484,6 +1492,9 @@ vac_truncate_clog(TransactionId frozenXID,
bool bogus = false;
bool frozenAlreadyWrapped = false;
+ /* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
+ LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
+
/* init oldest datoids to sync with my frozenXID/minMulti values */
oldestxid_datoid = MyDatabaseId;
minmulti_datoid = MyDatabaseId;
@@ -1593,6 +1604,8 @@ vac_truncate_clog(TransactionId frozenXID,
*/
SetTransactionIdLimit(frozenXID, oldestxid_datoid);
SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
+
+ LWLockRelease(WrapLimitsVacuumLock);
}
diff --git a/src/backend/storage/lmgr/lmgr.c b/src/backend/storage/lmgr/lmgr.c
index 889841f4cc5..863a06845cc 100644
--- a/src/backend/storage/lmgr/lmgr.c
+++ b/src/backend/storage/lmgr/lmgr.c
@@ -460,6 +460,21 @@ UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
LockRelease(&tag, lockmode, false);
}
+/*
+ * LockDatabaseFrozenIds
+ *
+ * This allows one backend per database to execute vac_update_datfrozenxid().
+ */
+void
+LockDatabaseFrozenIds(LOCKMODE lockmode)
+{
+ LOCKTAG tag;
+
+ SET_LOCKTAG_DATABASE_FROZEN_IDS(tag, MyDatabaseId);
+
+ (void) LockAcquire(&tag, lockmode, false, false);
+}
+
/*
* LockPage
*
@@ -1098,6 +1113,11 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *tag)
tag->locktag_field2,
tag->locktag_field1);
break;
+ case LOCKTAG_DATABASE_FROZEN_IDS:
+ appendStringInfo(buf,
+ _("pg_database.datfrozenxid of database %u"),
+ tag->locktag_field1);
+ break;
case LOCKTAG_PAGE:
appendStringInfo(buf,
_("page %u of relation %u of database %u"),
diff --git a/src/backend/storage/lmgr/lwlocknames.txt b/src/backend/storage/lmgr/lwlocknames.txt
index db478432291..9cfa7ef9c32 100644
--- a/src/backend/storage/lmgr/lwlocknames.txt
+++ b/src/backend/storage/lmgr/lwlocknames.txt
@@ -49,3 +49,6 @@ MultiXactTruncationLock 41
OldSnapshotTimeMapLock 42
LogicalRepWorkerLock 43
CLogTruncationLock 44
+# 45 was CLogTruncationLock until removal of BackendRandomLock
+WrapLimitsVacuumLock 46
+NotifyQueueTailLock 47
diff --git a/src/backend/utils/adt/lockfuncs.c b/src/backend/utils/adt/lockfuncs.c
index ffd1970f589..bb1add7dcfb 100644
--- a/src/backend/utils/adt/lockfuncs.c
+++ b/src/backend/utils/adt/lockfuncs.c
@@ -26,6 +26,7 @@
const char *const LockTagTypeNames[] = {
"relation",
"extend",
+ "frozenid",
"page",
"tuple",
"transactionid",
@@ -245,6 +246,17 @@ pg_lock_status(PG_FUNCTION_ARGS)
nulls[8] = true;
nulls[9] = true;
break;
+ case LOCKTAG_DATABASE_FROZEN_IDS:
+ values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
+ nulls[2] = true;
+ nulls[3] = true;
+ nulls[4] = true;
+ nulls[5] = true;
+ nulls[6] = true;
+ nulls[7] = true;
+ nulls[8] = true;
+ nulls[9] = true;
+ break;
case LOCKTAG_PAGE:
values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2);
diff --git a/src/include/storage/lmgr.h b/src/include/storage/lmgr.h
index 099e18f2b75..3f42000bda8 100644
--- a/src/include/storage/lmgr.h
+++ b/src/include/storage/lmgr.h
@@ -59,6 +59,9 @@ extern bool ConditionalLockRelationForExtension(Relation relation,
LOCKMODE lockmode);
extern int RelationExtensionLockWaiterCount(Relation relation);
+/* Lock to recompute pg_database.datfrozenxid in the current database */
+extern void LockDatabaseFrozenIds(LOCKMODE lockmode);
+
/* Lock a page (currently only used within indexes) */
extern void LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
extern bool ConditionalLockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
diff --git a/src/include/storage/lock.h b/src/include/storage/lock.h
index 986bb6433a0..5dc7f873680 100644
--- a/src/include/storage/lock.h
+++ b/src/include/storage/lock.h
@@ -139,6 +139,7 @@ typedef enum LockTagType
{
LOCKTAG_RELATION, /* whole relation */
LOCKTAG_RELATION_EXTEND, /* the right to extend a relation */
+ LOCKTAG_DATABASE_FROZEN_IDS, /* pg_database.datfrozenxid */
LOCKTAG_PAGE, /* one page of a relation */
LOCKTAG_TUPLE, /* one physical tuple */
LOCKTAG_TRANSACTION, /* transaction (for waiting for xact done) */
@@ -195,6 +196,15 @@ typedef struct LOCKTAG
(locktag).locktag_type = LOCKTAG_RELATION_EXTEND, \
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+/* ID info for frozen IDs is DB OID */
+#define SET_LOCKTAG_DATABASE_FROZEN_IDS(locktag,dboid) \
+ ((locktag).locktag_field1 = (dboid), \
+ (locktag).locktag_field2 = 0, \
+ (locktag).locktag_field3 = 0, \
+ (locktag).locktag_field4 = 0, \
+ (locktag).locktag_type = LOCKTAG_DATABASE_FROZEN_IDS, \
+ (locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
+
/* ID info for a page is RELATION info + BlockNumber */
#define SET_LOCKTAG_PAGE(locktag,dboid,reloid,blocknum) \
((locktag).locktag_field1 = (dboid), \