mirror of
https://github.com/postgres/postgres.git
synced 2025-04-29 13:56:47 +03:00
Prevent concurrent SimpleLruTruncate() for any given SLRU.
The SimpleLruTruncate() header comment states the new coding rule. To achieve this, add locktype "frozenid" and two LWLocks. This closes a rare opportunity for data loss, which manifested as "apparent wraparound" or "could not access status of transaction" errors. Data loss is more likely in pg_multixact, due to released branches' thin margin between multiStopLimit and multiWrapLimit. If a user's physical replication primary logged ": apparent wraparound" messages, the user should rebuild standbys of that primary regardless of symptoms. At less risk is a cluster having emitted "not accepting commands" errors or "must be vacuumed" warnings at some point. One can test a cluster for this data loss by running VACUUM FREEZE in every database. Back-patch to 9.5 (all supported versions). Discussion: https://postgr.es/m/20190218073103.GA1434723@rfd.leadboat.com
This commit is contained in:
parent
dea07098af
commit
e525770dd5
@ -8886,7 +8886,8 @@ SCRAM-SHA-256$<replaceable><iteration count></>:<replaceable><salt><
|
|||||||
and general database objects (identified by class OID and object OID,
|
and general database objects (identified by class OID and object OID,
|
||||||
in the same way as in <structname>pg_description</structname> or
|
in the same way as in <structname>pg_description</structname> or
|
||||||
<structname>pg_depend</structname>). Also, the right to extend a
|
<structname>pg_depend</structname>). Also, the right to extend a
|
||||||
relation is represented as a separate lockable object.
|
relation is represented as a separate lockable object, as is the right to
|
||||||
|
update <structname>pg_database</structname>.<structfield>datfrozenxid</structfield>.
|
||||||
Also, <quote>advisory</> locks can be taken on numbers that have
|
Also, <quote>advisory</> locks can be taken on numbers that have
|
||||||
user-defined meanings.
|
user-defined meanings.
|
||||||
</para>
|
</para>
|
||||||
@ -8912,6 +8913,7 @@ SCRAM-SHA-256$<replaceable><iteration count></>:<replaceable><salt><
|
|||||||
Type of the lockable object:
|
Type of the lockable object:
|
||||||
<literal>relation</>,
|
<literal>relation</>,
|
||||||
<literal>extend</>,
|
<literal>extend</>,
|
||||||
|
<literal>frozenid</literal>,
|
||||||
<literal>page</>,
|
<literal>page</>,
|
||||||
<literal>tuple</>,
|
<literal>tuple</>,
|
||||||
<literal>transactionid</>,
|
<literal>transactionid</>,
|
||||||
|
@ -845,7 +845,7 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
|
|||||||
|
|
||||||
<tbody>
|
<tbody>
|
||||||
<row>
|
<row>
|
||||||
<entry morerows="62"><literal>LWLock</></entry>
|
<entry morerows="64"><literal>LWLock</></entry>
|
||||||
<entry><literal>ShmemIndexLock</></entry>
|
<entry><literal>ShmemIndexLock</></entry>
|
||||||
<entry>Waiting to find or allocate space in shared memory.</entry>
|
<entry>Waiting to find or allocate space in shared memory.</entry>
|
||||||
</row>
|
</row>
|
||||||
@ -1043,6 +1043,16 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
|
|||||||
<entry>Waiting to execute <function>txid_status</function> or update
|
<entry>Waiting to execute <function>txid_status</function> or update
|
||||||
the oldest transaction id available to it.</entry>
|
the oldest transaction id available to it.</entry>
|
||||||
</row>
|
</row>
|
||||||
|
<row>
|
||||||
|
<entry><literal>WrapLimitsVacuumLock</literal></entry>
|
||||||
|
<entry>Waiting to update limits on transaction id and multixact
|
||||||
|
consumption.</entry>
|
||||||
|
</row>
|
||||||
|
<row>
|
||||||
|
<entry><literal>NotifyQueueTailLock</literal></entry>
|
||||||
|
<entry>Waiting to update limit on notification message
|
||||||
|
storage.</entry>
|
||||||
|
</row>
|
||||||
<row>
|
<row>
|
||||||
<entry><literal>clog</></entry>
|
<entry><literal>clog</></entry>
|
||||||
<entry>Waiting for I/O on a clog (transaction status) buffer.</entry>
|
<entry>Waiting for I/O on a clog (transaction status) buffer.</entry>
|
||||||
@ -1118,7 +1128,7 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
|
|||||||
<entry>Waiting for TBM shared iterator lock.</entry>
|
<entry>Waiting for TBM shared iterator lock.</entry>
|
||||||
</row>
|
</row>
|
||||||
<row>
|
<row>
|
||||||
<entry morerows="9"><literal>Lock</></entry>
|
<entry morerows="10"><literal>Lock</></entry>
|
||||||
<entry><literal>relation</></entry>
|
<entry><literal>relation</></entry>
|
||||||
<entry>Waiting to acquire a lock on a relation.</entry>
|
<entry>Waiting to acquire a lock on a relation.</entry>
|
||||||
</row>
|
</row>
|
||||||
@ -1126,6 +1136,12 @@ postgres 27093 0.0 0.0 30096 2752 ? Ss 11:34 0:00 postgres: ser
|
|||||||
<entry><literal>extend</></entry>
|
<entry><literal>extend</></entry>
|
||||||
<entry>Waiting to extend a relation.</entry>
|
<entry>Waiting to extend a relation.</entry>
|
||||||
</row>
|
</row>
|
||||||
|
<row>
|
||||||
|
<entry><literal>frozenid</literal></entry>
|
||||||
|
<entry>Waiting to
|
||||||
|
update <structname>pg_database</structname>.<structfield>datfrozenxid</structfield>
|
||||||
|
and <structname>pg_database</structname>.<structfield>datminmxid</structfield>.</entry>
|
||||||
|
</row>
|
||||||
<row>
|
<row>
|
||||||
<entry><literal>page</></entry>
|
<entry><literal>page</></entry>
|
||||||
<entry>Waiting to acquire a lock on page of a relation.</entry>
|
<entry>Waiting to acquire a lock on page of a relation.</entry>
|
||||||
|
@ -1164,6 +1164,14 @@ SimpleLruFlush(SlruCtl ctl, bool allow_redirtied)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Remove all segments before the one holding the passed page number
|
* Remove all segments before the one holding the passed page number
|
||||||
|
*
|
||||||
|
* All SLRUs prevent concurrent calls to this function, either with an LWLock
|
||||||
|
* or by calling it only as part of a checkpoint. Mutual exclusion must begin
|
||||||
|
* before computing cutoffPage. Mutual exclusion must end after any limit
|
||||||
|
* update that would permit other backends to write fresh data into the
|
||||||
|
* segment immediately preceding the one containing cutoffPage. Otherwise,
|
||||||
|
* when the SLRU is quite full, SimpleLruTruncate() might delete that segment
|
||||||
|
* after it has accrued freshly-written data.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
|
SimpleLruTruncate(SlruCtl ctl, int cutoffPage)
|
||||||
|
@ -347,8 +347,8 @@ ExtendSUBTRANS(TransactionId newestXact)
|
|||||||
/*
|
/*
|
||||||
* Remove all SUBTRANS segments before the one holding the passed transaction ID
|
* Remove all SUBTRANS segments before the one holding the passed transaction ID
|
||||||
*
|
*
|
||||||
* This is normally called during checkpoint, with oldestXact being the
|
* oldestXact is the oldest TransactionXmin of any running transaction. This
|
||||||
* oldest TransactionXmin of any running transaction.
|
* is called only during checkpoint.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
TruncateSUBTRANS(TransactionId oldestXact)
|
TruncateSUBTRANS(TransactionId oldestXact)
|
||||||
|
@ -224,19 +224,22 @@ typedef struct QueueBackendStatus
|
|||||||
/*
|
/*
|
||||||
* Shared memory state for LISTEN/NOTIFY (excluding its SLRU stuff)
|
* Shared memory state for LISTEN/NOTIFY (excluding its SLRU stuff)
|
||||||
*
|
*
|
||||||
* The AsyncQueueControl structure is protected by the AsyncQueueLock.
|
* The AsyncQueueControl structure is protected by the AsyncQueueLock and
|
||||||
|
* NotifyQueueTailLock.
|
||||||
*
|
*
|
||||||
* When holding the lock in SHARED mode, backends may only inspect their own
|
* When holding AsyncQueueLock in SHARED mode, backends may only inspect their
|
||||||
* entries as well as the head and tail pointers. Consequently we can allow a
|
* own entries as well as the head and tail pointers. Consequently we can
|
||||||
* backend to update its own record while holding only SHARED lock (since no
|
* allow a backend to update its own record while holding only SHARED lock
|
||||||
* other backend will inspect it).
|
* (since no other backend will inspect it).
|
||||||
*
|
*
|
||||||
* When holding the lock in EXCLUSIVE mode, backends can inspect the entries
|
* When holding AsyncQueueLock in EXCLUSIVE mode, backends can inspect the
|
||||||
* of other backends and also change the head and tail pointers.
|
* entries of other backends and also change the head pointer. When holding
|
||||||
|
* both AsyncQueueLock and NotifyQueueTailLock in EXCLUSIVE mode, backends can
|
||||||
|
* change the tail pointer.
|
||||||
*
|
*
|
||||||
* AsyncCtlLock is used as the control lock for the pg_notify SLRU buffers.
|
* AsyncCtlLock is used as the control lock for the pg_notify SLRU buffers.
|
||||||
* In order to avoid deadlocks, whenever we need both locks, we always first
|
* In order to avoid deadlocks, whenever we need multiple locks, we first get
|
||||||
* get AsyncQueueLock and then AsyncCtlLock.
|
* NotifyQueueTailLock, then AsyncQueueLock, and lastly AsyncCtlLock.
|
||||||
*
|
*
|
||||||
* Each backend uses the backend[] array entry with index equal to its
|
* Each backend uses the backend[] array entry with index equal to its
|
||||||
* BackendId (which can range from 1 to MaxBackends). We rely on this to make
|
* BackendId (which can range from 1 to MaxBackends). We rely on this to make
|
||||||
@ -2013,6 +2016,10 @@ asyncQueueAdvanceTail(void)
|
|||||||
int newtailpage;
|
int newtailpage;
|
||||||
int boundary;
|
int boundary;
|
||||||
|
|
||||||
|
/* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
|
||||||
|
LWLockAcquire(NotifyQueueTailLock, LW_EXCLUSIVE);
|
||||||
|
|
||||||
|
/* Compute the new tail. */
|
||||||
LWLockAcquire(AsyncQueueLock, LW_EXCLUSIVE);
|
LWLockAcquire(AsyncQueueLock, LW_EXCLUSIVE);
|
||||||
min = QUEUE_HEAD;
|
min = QUEUE_HEAD;
|
||||||
for (i = 1; i <= MaxBackends; i++)
|
for (i = 1; i <= MaxBackends; i++)
|
||||||
@ -2021,7 +2028,6 @@ asyncQueueAdvanceTail(void)
|
|||||||
min = QUEUE_POS_MIN(min, QUEUE_BACKEND_POS(i));
|
min = QUEUE_POS_MIN(min, QUEUE_BACKEND_POS(i));
|
||||||
}
|
}
|
||||||
oldtailpage = QUEUE_POS_PAGE(QUEUE_TAIL);
|
oldtailpage = QUEUE_POS_PAGE(QUEUE_TAIL);
|
||||||
QUEUE_TAIL = min;
|
|
||||||
LWLockRelease(AsyncQueueLock);
|
LWLockRelease(AsyncQueueLock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -2041,6 +2047,17 @@ asyncQueueAdvanceTail(void)
|
|||||||
*/
|
*/
|
||||||
SimpleLruTruncate(AsyncCtl, newtailpage);
|
SimpleLruTruncate(AsyncCtl, newtailpage);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Advertise the new tail. This changes asyncQueueIsFull()'s verdict for
|
||||||
|
* the segment immediately prior to the new tail, allowing fresh data into
|
||||||
|
* that segment.
|
||||||
|
*/
|
||||||
|
LWLockAcquire(AsyncQueueLock, LW_EXCLUSIVE);
|
||||||
|
QUEUE_TAIL = min;
|
||||||
|
LWLockRelease(AsyncQueueLock);
|
||||||
|
|
||||||
|
LWLockRelease(NotifyQueueTailLock);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -933,6 +933,14 @@ vac_update_datfrozenxid(void)
|
|||||||
bool bogus = false;
|
bool bogus = false;
|
||||||
bool dirty = false;
|
bool dirty = false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Restrict this task to one backend per database. This avoids race
|
||||||
|
* conditions that would move datfrozenxid or datminmxid backward. It
|
||||||
|
* avoids calling vac_truncate_clog() with a datfrozenxid preceding a
|
||||||
|
* datfrozenxid passed to an earlier vac_truncate_clog() call.
|
||||||
|
*/
|
||||||
|
LockDatabaseFrozenIds(ExclusiveLock);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize the "min" calculation with GetOldestXmin, which is a
|
* Initialize the "min" calculation with GetOldestXmin, which is a
|
||||||
* reasonable approximation to the minimum relfrozenxid for not-yet-
|
* reasonable approximation to the minimum relfrozenxid for not-yet-
|
||||||
@ -1097,6 +1105,9 @@ vac_truncate_clog(TransactionId frozenXID,
|
|||||||
bool bogus = false;
|
bool bogus = false;
|
||||||
bool frozenAlreadyWrapped = false;
|
bool frozenAlreadyWrapped = false;
|
||||||
|
|
||||||
|
/* Restrict task to one backend per cluster; see SimpleLruTruncate(). */
|
||||||
|
LWLockAcquire(WrapLimitsVacuumLock, LW_EXCLUSIVE);
|
||||||
|
|
||||||
/* init oldest datoids to sync with my frozenXID/minMulti values */
|
/* init oldest datoids to sync with my frozenXID/minMulti values */
|
||||||
oldestxid_datoid = MyDatabaseId;
|
oldestxid_datoid = MyDatabaseId;
|
||||||
minmulti_datoid = MyDatabaseId;
|
minmulti_datoid = MyDatabaseId;
|
||||||
@ -1206,6 +1217,8 @@ vac_truncate_clog(TransactionId frozenXID,
|
|||||||
*/
|
*/
|
||||||
SetTransactionIdLimit(frozenXID, oldestxid_datoid);
|
SetTransactionIdLimit(frozenXID, oldestxid_datoid);
|
||||||
SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
|
SetMultiXactIdLimit(minMulti, minmulti_datoid, false);
|
||||||
|
|
||||||
|
LWLockRelease(WrapLimitsVacuumLock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -412,6 +412,21 @@ UnlockRelationForExtension(Relation relation, LOCKMODE lockmode)
|
|||||||
LockRelease(&tag, lockmode, false);
|
LockRelease(&tag, lockmode, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* LockDatabaseFrozenIds
|
||||||
|
*
|
||||||
|
* This allows one backend per database to execute vac_update_datfrozenxid().
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
LockDatabaseFrozenIds(LOCKMODE lockmode)
|
||||||
|
{
|
||||||
|
LOCKTAG tag;
|
||||||
|
|
||||||
|
SET_LOCKTAG_DATABASE_FROZEN_IDS(tag, MyDatabaseId);
|
||||||
|
|
||||||
|
(void) LockAcquire(&tag, lockmode, false, false);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* LockPage
|
* LockPage
|
||||||
*
|
*
|
||||||
@ -1015,6 +1030,11 @@ DescribeLockTag(StringInfo buf, const LOCKTAG *tag)
|
|||||||
tag->locktag_field2,
|
tag->locktag_field2,
|
||||||
tag->locktag_field1);
|
tag->locktag_field1);
|
||||||
break;
|
break;
|
||||||
|
case LOCKTAG_DATABASE_FROZEN_IDS:
|
||||||
|
appendStringInfo(buf,
|
||||||
|
_("pg_database.datfrozenxid of database %u"),
|
||||||
|
tag->locktag_field1);
|
||||||
|
break;
|
||||||
case LOCKTAG_PAGE:
|
case LOCKTAG_PAGE:
|
||||||
appendStringInfo(buf,
|
appendStringInfo(buf,
|
||||||
_("page %u of relation %u of database %u"),
|
_("page %u of relation %u of database %u"),
|
||||||
|
@ -495,7 +495,7 @@ RegisterLWLockTranches(void)
|
|||||||
|
|
||||||
if (LWLockTrancheArray == NULL)
|
if (LWLockTrancheArray == NULL)
|
||||||
{
|
{
|
||||||
LWLockTranchesAllocated = 64;
|
LWLockTranchesAllocated = 128;
|
||||||
LWLockTrancheArray = (char **)
|
LWLockTrancheArray = (char **)
|
||||||
MemoryContextAllocZero(TopMemoryContext,
|
MemoryContextAllocZero(TopMemoryContext,
|
||||||
LWLockTranchesAllocated * sizeof(char *));
|
LWLockTranchesAllocated * sizeof(char *));
|
||||||
|
@ -50,3 +50,5 @@ OldSnapshotTimeMapLock 42
|
|||||||
BackendRandomLock 43
|
BackendRandomLock 43
|
||||||
LogicalRepWorkerLock 44
|
LogicalRepWorkerLock 44
|
||||||
CLogTruncationLock 45
|
CLogTruncationLock 45
|
||||||
|
WrapLimitsVacuumLock 46
|
||||||
|
NotifyQueueTailLock 47
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
const char *const LockTagTypeNames[] = {
|
const char *const LockTagTypeNames[] = {
|
||||||
"relation",
|
"relation",
|
||||||
"extend",
|
"extend",
|
||||||
|
"frozenid",
|
||||||
"page",
|
"page",
|
||||||
"tuple",
|
"tuple",
|
||||||
"transactionid",
|
"transactionid",
|
||||||
@ -245,6 +246,17 @@ pg_lock_status(PG_FUNCTION_ARGS)
|
|||||||
nulls[8] = true;
|
nulls[8] = true;
|
||||||
nulls[9] = true;
|
nulls[9] = true;
|
||||||
break;
|
break;
|
||||||
|
case LOCKTAG_DATABASE_FROZEN_IDS:
|
||||||
|
values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
|
||||||
|
nulls[2] = true;
|
||||||
|
nulls[3] = true;
|
||||||
|
nulls[4] = true;
|
||||||
|
nulls[5] = true;
|
||||||
|
nulls[6] = true;
|
||||||
|
nulls[7] = true;
|
||||||
|
nulls[8] = true;
|
||||||
|
nulls[9] = true;
|
||||||
|
break;
|
||||||
case LOCKTAG_PAGE:
|
case LOCKTAG_PAGE:
|
||||||
values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
|
values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1);
|
||||||
values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2);
|
values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2);
|
||||||
|
@ -57,6 +57,9 @@ extern bool ConditionalLockRelationForExtension(Relation relation,
|
|||||||
LOCKMODE lockmode);
|
LOCKMODE lockmode);
|
||||||
extern int RelationExtensionLockWaiterCount(Relation relation);
|
extern int RelationExtensionLockWaiterCount(Relation relation);
|
||||||
|
|
||||||
|
/* Lock to recompute pg_database.datfrozenxid in the current database */
|
||||||
|
extern void LockDatabaseFrozenIds(LOCKMODE lockmode);
|
||||||
|
|
||||||
/* Lock a page (currently only used within indexes) */
|
/* Lock a page (currently only used within indexes) */
|
||||||
extern void LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
|
extern void LockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
|
||||||
extern bool ConditionalLockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
|
extern bool ConditionalLockPage(Relation relation, BlockNumber blkno, LOCKMODE lockmode);
|
||||||
|
@ -141,6 +141,8 @@ typedef enum LockTagType
|
|||||||
/* ID info for a relation is DB OID + REL OID; DB OID = 0 if shared */
|
/* ID info for a relation is DB OID + REL OID; DB OID = 0 if shared */
|
||||||
LOCKTAG_RELATION_EXTEND, /* the right to extend a relation */
|
LOCKTAG_RELATION_EXTEND, /* the right to extend a relation */
|
||||||
/* same ID info as RELATION */
|
/* same ID info as RELATION */
|
||||||
|
LOCKTAG_DATABASE_FROZEN_IDS, /* pg_database.datfrozenxid */
|
||||||
|
/* ID info for frozen IDs is DB OID */
|
||||||
LOCKTAG_PAGE, /* one page of a relation */
|
LOCKTAG_PAGE, /* one page of a relation */
|
||||||
/* ID info for a page is RELATION info + BlockNumber */
|
/* ID info for a page is RELATION info + BlockNumber */
|
||||||
LOCKTAG_TUPLE, /* one physical tuple */
|
LOCKTAG_TUPLE, /* one physical tuple */
|
||||||
@ -206,6 +208,14 @@ typedef struct LOCKTAG
|
|||||||
(locktag).locktag_type = LOCKTAG_RELATION_EXTEND, \
|
(locktag).locktag_type = LOCKTAG_RELATION_EXTEND, \
|
||||||
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
||||||
|
|
||||||
|
#define SET_LOCKTAG_DATABASE_FROZEN_IDS(locktag,dboid) \
|
||||||
|
((locktag).locktag_field1 = (dboid), \
|
||||||
|
(locktag).locktag_field2 = 0, \
|
||||||
|
(locktag).locktag_field3 = 0, \
|
||||||
|
(locktag).locktag_field4 = 0, \
|
||||||
|
(locktag).locktag_type = LOCKTAG_DATABASE_FROZEN_IDS, \
|
||||||
|
(locktag).locktag_lockmethodid = DEFAULT_LOCKMETHOD)
|
||||||
|
|
||||||
#define SET_LOCKTAG_PAGE(locktag,dboid,reloid,blocknum) \
|
#define SET_LOCKTAG_PAGE(locktag,dboid,reloid,blocknum) \
|
||||||
((locktag).locktag_field1 = (dboid), \
|
((locktag).locktag_field1 = (dboid), \
|
||||||
(locktag).locktag_field2 = (reloid), \
|
(locktag).locktag_field2 = (reloid), \
|
||||||
|
Loading…
x
Reference in New Issue
Block a user