diff --git a/src/backend/access/heap/README.tuplock b/src/backend/access/heap/README.tuplock index 750684d3398..ad835ff4820 100644 --- a/src/backend/access/heap/README.tuplock +++ b/src/backend/access/heap/README.tuplock @@ -201,3 +201,35 @@ wider than four bytes, and current readers don't need consistency across fields. Hence, they get by with just fetching each field once. XXX such a caller may also read a value that has not reached WAL; see systable_inplace_update_finish(). + +During logical decoding, caches reflect an inplace update no later than the +next XLOG_XACT_INVALIDATIONS. That record witnesses the end of a command. +Tuples of its cmin are then visible to decoding, as are inplace updates of any +lower LSN. Inplace updates of a higher LSN may also be visible, even if those +updates would have been invisible to a non-historic snapshot matching +decoding's historic snapshot. (In other words, decoding may see inplace +updates that were not visible to a similar snapshot taken during original +transaction processing.) That's a consequence of inplace update violating +MVCC: there are no snapshot-specific versions of inplace-updated values. This +all makes it hard to reason about inplace-updated column reads during logical +decoding, but the behavior does suffice for relhasindex. A relhasindex=t in +CREATE INDEX becomes visible no later than the new pg_index row. While it may +be visible earlier, that's harmless. Finding zero indexes despite +relhasindex=t is normal in more cases than this, e.g. after DROP INDEX. +Example of a case that meaningfully reacts to the inplace inval: + +CREATE TABLE cat (c int) WITH (user_catalog_table = true); +CREATE TABLE normal (d int); +... +CREATE INDEX ON cat (c)\; INSERT INTO normal VALUES (1); + +If the output plugin reads "cat" during decoding of the INSERT, it's fair to +want that read to see relhasindex=t and use the new index. + +An alternative would be to have decoding of XLOG_HEAP_INPLACE immediately +execute its invals. That would behave more like invals during original +transaction processing. It would remove the decoding-specific delay in e.g. a +decoding plugin witnessing a relfrozenxid change. However, a good use case +for that is unlikely, since the plugin would still witness relfrozenxid +changes prematurely. Hence, inplace update takes the trivial approach of +delegating to XLOG_XACT_INVALIDATIONS. diff --git a/src/backend/access/heap/heapam.c b/src/backend/access/heap/heapam.c index cce38f482bd..731d504489b 100644 --- a/src/backend/access/heap/heapam.c +++ b/src/backend/access/heap/heapam.c @@ -6320,6 +6320,19 @@ heap_inplace_lock(Relation relation, Assert(BufferIsValid(buffer)); + /* + * Register shared cache invals if necessary. Other sessions may finish + * inplace updates of this tuple between this step and LockTuple(). Since + * inplace updates don't change cache keys, that's harmless. + * + * While it's tempting to register invals only after confirming we can + * return true, the following obstacle precludes reordering steps that + * way. Registering invals might reach a CatalogCacheInitializeCache() + * that locks "buffer". That would hang indefinitely if running after our + * own LockBuffer(). Hence, we must register invals before LockBuffer(). + */ + CacheInvalidateHeapTupleInplace(relation, oldtup_ptr); + LockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); @@ -6415,6 +6428,7 @@ heap_inplace_lock(Relation relation, if (!ret) { UnlockTuple(relation, &oldtup.t_self, InplaceUpdateTupleLock); + ForgetInplace_Inval(); InvalidateCatalogSnapshot(); } return ret; @@ -6443,6 +6457,16 @@ heap_inplace_update_and_unlock(Relation relation, if (oldlen != newlen || htup->t_hoff != tuple->t_data->t_hoff) elog(ERROR, "wrong tuple length"); + /* + * Unlink relcache init files as needed. If unlinking, acquire + * RelCacheInitLock until after associated invalidations. By doing this + * in advance, if we checkpoint and then crash between inplace + * XLogInsert() and inval, we don't rely on StartupXLOG() -> + * RelationCacheInitFileRemove(). That uses elevel==LOG, so replay would + * neglect to PANIC on EIO. + */ + PreInplace_Inval(); + /* NO EREPORT(ERROR) from here till changes are logged */ START_CRIT_SECTION(); @@ -6486,17 +6510,24 @@ heap_inplace_update_and_unlock(Relation relation, PageSetLSN(BufferGetPage(buffer), recptr); } - END_CRIT_SECTION(); - - heap_inplace_unlock(relation, oldtup, buffer); + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); /* - * Send out shared cache inval if necessary. Note that because we only - * pass the new version of the tuple, this mustn't be used for any - * operations that could change catcache lookup keys. But we aren't - * bothering with index updates either, so that's true a fortiori. - * - * XXX ROLLBACK discards the invalidation. See test inplace-inval.spec. + * Send invalidations to shared queue. SearchSysCacheLocked1() assumes we + * do this before UnlockTuple(). + */ + AtInplace_Inval(); + + END_CRIT_SECTION(); + UnlockTuple(relation, &tuple->t_self, InplaceUpdateTupleLock); + + AcceptInvalidationMessages(); /* local processing of just-sent inval */ + + /* + * Queue a transactional inval, for logical decoding and for third-party + * code that might have been relying on it since long before inplace + * update adopted immediate invalidation. See README.tuplock section + * "Reading inplace-updated columns" for logical decoding details. */ if (!IsBootstrapProcessingMode()) CacheInvalidateHeapTuple(relation, tuple, NULL); @@ -6511,6 +6542,7 @@ heap_inplace_unlock(Relation relation, { LockBuffer(buffer, BUFFER_LOCK_UNLOCK); UnlockTuple(relation, &oldtup->t_self, InplaceUpdateTupleLock); + ForgetInplace_Inval(); } /* diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index 4cecf630060..053a200d9cb 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -1358,14 +1358,24 @@ RecordTransactionCommit(void) /* * Transactions without an assigned xid can contain invalidation - * messages (e.g. explicit relcache invalidations or catcache - * invalidations for inplace updates); standbys need to process those. - * We can't emit a commit record without an xid, and we don't want to - * force assigning an xid, because that'd be problematic for e.g. - * vacuum. Hence we emit a bespoke record for the invalidations. We - * don't want to use that in case a commit record is emitted, so they - * happen synchronously with commits (besides not wanting to emit more - * WAL records). + * messages. While inplace updates do this, this is not known to be + * necessary; see comment at inplace CacheInvalidateHeapTuple(). + * Extensions might still rely on this capability, and standbys may + * need to process those invals. We can't emit a commit record + * without an xid, and we don't want to force assigning an xid, + * because that'd be problematic for e.g. vacuum. Hence we emit a + * bespoke record for the invalidations. We don't want to use that in + * case a commit record is emitted, so they happen synchronously with + * commits (besides not wanting to emit more WAL records). + * + * XXX Every known use of this capability is a defect. Since an XID + * isn't controlling visibility of the change that prompted invals, + * other sessions need the inval even if this transactions aborts. + * + * ON COMMIT DELETE ROWS does a nontransactional index_build(), which + * queues a relcache inval, including in transactions without an xid + * that had read the (empty) table. Standbys don't need any ON COMMIT + * DELETE ROWS invals, but we've not done the work to withhold them. */ if (nmsgs != 0) { diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c index abd8eef0865..b625e8a3907 100644 --- a/src/backend/catalog/index.c +++ b/src/backend/catalog/index.c @@ -2905,12 +2905,19 @@ index_update_stats(Relation rel, if (dirty) { systable_inplace_update_finish(state, tuple); - /* the above sends a cache inval message */ + /* the above sends transactional and immediate cache inval messages */ } else { systable_inplace_update_cancel(state); - /* no need to change tuple, but force relcache inval anyway */ + + /* + * While we didn't change relhasindex, CREATE INDEX needs a + * transactional inval for when the new index's catalog rows become + * visible. Other CREATE INDEX and REINDEX code happens to also queue + * this inval, but keep this in case rare callers rely on this part of + * our API contract. + */ CacheInvalidateRelcacheByTuple(tuple); } diff --git a/src/backend/commands/event_trigger.c b/src/backend/commands/event_trigger.c index 72f975a7363..a87b77e0b1b 100644 --- a/src/backend/commands/event_trigger.c +++ b/src/backend/commands/event_trigger.c @@ -979,11 +979,6 @@ EventTriggerOnLogin(void) * this instead of regular updates serves two purposes. First, * that avoids possible waiting on the row-level lock. Second, * that avoids dealing with TOAST. - * - * Changes made by inplace update may be lost due to - * concurrent normal updates; see inplace-inval.spec. However, - * we are OK with that. The subsequent connections will still - * have a chance to set "dathasloginevt" to false. */ systable_inplace_update_finish(state, tuple); } diff --git a/src/backend/replication/logical/decode.c b/src/backend/replication/logical/decode.c index 4911e98ec2d..02d4e4fd8ee 100644 --- a/src/backend/replication/logical/decode.c +++ b/src/backend/replication/logical/decode.c @@ -521,20 +521,13 @@ heap_decode(LogicalDecodingContext *ctx, XLogRecordBuffer *buf) /* * Inplace updates are only ever performed on catalog tuples and * can, per definition, not change tuple visibility. Since we - * don't decode catalog tuples, we're not interested in the + * also don't decode catalog tuples, we're not interested in the * record's contents. - * - * In-place updates can be used either by XID-bearing transactions - * (e.g. in CREATE INDEX CONCURRENTLY) or by XID-less - * transactions (e.g. VACUUM). In the former case, the commit - * record will include cache invalidations, so we mark the - * transaction as catalog modifying here. Currently that's - * redundant because the commit will do that as well, but once we - * support decoding in-progress relations, this will be important. */ if (!TransactionIdIsValid(xid)) break; + /* PostgreSQL 13 was the last to need these actions. */ (void) SnapBuildProcessChange(builder, xid, buf->origptr); ReorderBufferXidSetCatalogChanges(ctx->reorder, xid, buf->origptr); break; diff --git a/src/backend/utils/cache/catcache.c b/src/backend/utils/cache/catcache.c index 59d3825917b..5169ca72bc0 100644 --- a/src/backend/utils/cache/catcache.c +++ b/src/backend/utils/cache/catcache.c @@ -2356,7 +2356,8 @@ void PrepareToInvalidateCacheTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple, - void (*function) (int, uint32, Oid)) + void (*function) (int, uint32, Oid, void *), + void *context) { slist_iter iter; Oid reloid; @@ -2397,7 +2398,7 @@ PrepareToInvalidateCacheTuple(Relation relation, hashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, tuple); dbid = ccp->cc_relisshared ? (Oid) 0 : MyDatabaseId; - (*function) (ccp->id, hashvalue, dbid); + (*function) (ccp->id, hashvalue, dbid, context); if (newtuple) { @@ -2406,7 +2407,7 @@ PrepareToInvalidateCacheTuple(Relation relation, newhashvalue = CatalogCacheComputeTupleHashValue(ccp, ccp->cc_nkeys, newtuple); if (newhashvalue != hashvalue) - (*function) (ccp->id, newhashvalue, dbid); + (*function) (ccp->id, newhashvalue, dbid, context); } } } diff --git a/src/backend/utils/cache/inval.c b/src/backend/utils/cache/inval.c index 66e04f973f6..4c8715bf672 100644 --- a/src/backend/utils/cache/inval.c +++ b/src/backend/utils/cache/inval.c @@ -94,6 +94,10 @@ * worth trying to avoid sending such inval traffic in the future, if those * problems can be overcome cheaply. * + * When making a nontransactional change to a cacheable object, we must + * likewise send the invalidation immediately, before ending the change's + * critical section. This includes inplace heap updates, relmap, and smgr. + * * When wal_level=logical, write invalidations into WAL at each command end to * support the decoding of the in-progress transactions. See * CommandEndInvalidationMessages. @@ -131,13 +135,15 @@ /* * Pending requests are stored as ready-to-send SharedInvalidationMessages. - * We keep the messages themselves in arrays in TopTransactionContext - * (there are separate arrays for catcache and relcache messages). Control - * information is kept in a chain of TransInvalidationInfo structs, also - * allocated in TopTransactionContext. (We could keep a subtransaction's - * TransInvalidationInfo in its CurTransactionContext; but that's more - * wasteful not less so, since in very many scenarios it'd be the only - * allocation in the subtransaction's CurTransactionContext.) + * We keep the messages themselves in arrays in TopTransactionContext (there + * are separate arrays for catcache and relcache messages). For transactional + * messages, control information is kept in a chain of TransInvalidationInfo + * structs, also allocated in TopTransactionContext. (We could keep a + * subtransaction's TransInvalidationInfo in its CurTransactionContext; but + * that's more wasteful not less so, since in very many scenarios it'd be the + * only allocation in the subtransaction's CurTransactionContext.) For + * inplace update messages, control information appears in an + * InvalidationInfo, allocated in CurrentMemoryContext. * * We can store the message arrays densely, and yet avoid moving data around * within an array, because within any one subtransaction we need only @@ -148,7 +154,9 @@ * struct. Similarly, we need distinguish messages of prior subtransactions * from those of the current subtransaction only until the subtransaction * completes, after which we adjust the array indexes in the parent's - * TransInvalidationInfo to include the subtransaction's messages. + * TransInvalidationInfo to include the subtransaction's messages. Inplace + * invalidations don't need a concept of command or subtransaction boundaries, + * since we send them during the WAL insertion critical section. * * The ordering of the individual messages within a command's or * subtransaction's output is not considered significant, although this @@ -201,7 +209,7 @@ typedef struct InvalidationMsgsGroup /*---------------- - * Invalidation messages are divided into two groups: + * Transactional invalidation messages are divided into two groups: * 1) events so far in current command, not yet reflected to caches. * 2) events in previous commands of current transaction; these have * been reflected to local caches, and must be either broadcast to @@ -217,26 +225,36 @@ typedef struct InvalidationMsgsGroup *---------------- */ +/* fields common to both transactional and inplace invalidation */ +typedef struct InvalidationInfo +{ + /* Events emitted by current command */ + InvalidationMsgsGroup CurrentCmdInvalidMsgs; + + /* init file must be invalidated? */ + bool RelcacheInitFileInval; +} InvalidationInfo; + +/* subclass adding fields specific to transactional invalidation */ typedef struct TransInvalidationInfo { + /* Base class */ + struct InvalidationInfo ii; + + /* Events emitted by previous commands of this (sub)transaction */ + InvalidationMsgsGroup PriorCmdInvalidMsgs; + /* Back link to parent transaction's info */ struct TransInvalidationInfo *parent; /* Subtransaction nesting depth */ int my_level; - - /* Events emitted by current command */ - InvalidationMsgsGroup CurrentCmdInvalidMsgs; - - /* Events emitted by previous commands of this (sub)transaction */ - InvalidationMsgsGroup PriorCmdInvalidMsgs; - - /* init file must be invalidated? */ - bool RelcacheInitFileInval; } TransInvalidationInfo; static TransInvalidationInfo *transInvalInfo = NULL; +static InvalidationInfo *inplaceInvalInfo = NULL; + /* GUC storage */ int debug_discard_caches = 0; @@ -544,9 +562,12 @@ ProcessInvalidationMessagesMulti(InvalidationMsgsGroup *group, static void RegisterCatcacheInvalidation(int cacheId, uint32 hashValue, - Oid dbId) + Oid dbId, + void *context) { - AddCatcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, + InvalidationInfo *info = (InvalidationInfo *) context; + + AddCatcacheInvalidationMessage(&info->CurrentCmdInvalidMsgs, cacheId, hashValue, dbId); } @@ -556,10 +577,9 @@ RegisterCatcacheInvalidation(int cacheId, * Register an invalidation event for all catcache entries from a catalog. */ static void -RegisterCatalogInvalidation(Oid dbId, Oid catId) +RegisterCatalogInvalidation(InvalidationInfo *info, Oid dbId, Oid catId) { - AddCatalogInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, - dbId, catId); + AddCatalogInvalidationMessage(&info->CurrentCmdInvalidMsgs, dbId, catId); } /* @@ -568,10 +588,9 @@ RegisterCatalogInvalidation(Oid dbId, Oid catId) * As above, but register a relcache invalidation event. */ static void -RegisterRelcacheInvalidation(Oid dbId, Oid relId) +RegisterRelcacheInvalidation(InvalidationInfo *info, Oid dbId, Oid relId) { - AddRelcacheInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, - dbId, relId); + AddRelcacheInvalidationMessage(&info->CurrentCmdInvalidMsgs, dbId, relId); /* * Most of the time, relcache invalidation is associated with system @@ -588,7 +607,7 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId) * as well. Also zap when we are invalidating whole relcache. */ if (relId == InvalidOid || RelationIdIsInInitFile(relId)) - transInvalInfo->RelcacheInitFileInval = true; + info->RelcacheInitFileInval = true; } /* @@ -598,24 +617,27 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId) * Only needed for catalogs that don't have catcaches. */ static void -RegisterSnapshotInvalidation(Oid dbId, Oid relId) +RegisterSnapshotInvalidation(InvalidationInfo *info, Oid dbId, Oid relId) { - AddSnapshotInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs, - dbId, relId); + AddSnapshotInvalidationMessage(&info->CurrentCmdInvalidMsgs, dbId, relId); } /* * PrepareInvalidationState * Initialize inval data for the current (sub)transaction. */ -static void +static InvalidationInfo * PrepareInvalidationState(void) { TransInvalidationInfo *myInfo; + Assert(IsTransactionState()); + /* Can't queue transactional message while collecting inplace messages. */ + Assert(inplaceInvalInfo == NULL); + if (transInvalInfo != NULL && transInvalInfo->my_level == GetCurrentTransactionNestLevel()) - return; + return (InvalidationInfo *) transInvalInfo; myInfo = (TransInvalidationInfo *) MemoryContextAllocZero(TopTransactionContext, @@ -638,7 +660,7 @@ PrepareInvalidationState(void) * counter. This is a convenient place to check for that, as well as * being important to keep management of the message arrays simple. */ - if (NumMessagesInGroup(&transInvalInfo->CurrentCmdInvalidMsgs) != 0) + if (NumMessagesInGroup(&transInvalInfo->ii.CurrentCmdInvalidMsgs) != 0) elog(ERROR, "cannot start a subtransaction when there are unprocessed inval messages"); /* @@ -647,8 +669,8 @@ PrepareInvalidationState(void) * to update them to follow whatever is already in the arrays. */ SetGroupToFollow(&myInfo->PriorCmdInvalidMsgs, - &transInvalInfo->CurrentCmdInvalidMsgs); - SetGroupToFollow(&myInfo->CurrentCmdInvalidMsgs, + &transInvalInfo->ii.CurrentCmdInvalidMsgs); + SetGroupToFollow(&myInfo->ii.CurrentCmdInvalidMsgs, &myInfo->PriorCmdInvalidMsgs); } else @@ -664,6 +686,41 @@ PrepareInvalidationState(void) } transInvalInfo = myInfo; + return (InvalidationInfo *) myInfo; +} + +/* + * PrepareInplaceInvalidationState + * Initialize inval data for an inplace update. + * + * See previous function for more background. + */ +static InvalidationInfo * +PrepareInplaceInvalidationState(void) +{ + InvalidationInfo *myInfo; + + Assert(IsTransactionState()); + /* limit of one inplace update under assembly */ + Assert(inplaceInvalInfo == NULL); + + /* gone after WAL insertion CritSection ends, so use current context */ + myInfo = (InvalidationInfo *) palloc0(sizeof(InvalidationInfo)); + + /* Stash our messages past end of the transactional messages, if any. */ + if (transInvalInfo != NULL) + SetGroupToFollow(&myInfo->CurrentCmdInvalidMsgs, + &transInvalInfo->ii.CurrentCmdInvalidMsgs); + else + { + InvalMessageArrays[CatCacheMsgs].msgs = NULL; + InvalMessageArrays[CatCacheMsgs].maxmsgs = 0; + InvalMessageArrays[RelCacheMsgs].msgs = NULL; + InvalMessageArrays[RelCacheMsgs].maxmsgs = 0; + } + + inplaceInvalInfo = myInfo; + return myInfo; } /* ---------------------------------------------------------------- @@ -903,7 +960,7 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, * after we send the SI messages. However, we need not do anything unless * we committed. */ - *RelcacheInitFileInval = transInvalInfo->RelcacheInitFileInval; + *RelcacheInitFileInval = transInvalInfo->ii.RelcacheInitFileInval; /* * Collect all the pending messages into a single contiguous array of @@ -914,7 +971,7 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, * not new ones. */ nummsgs = NumMessagesInGroup(&transInvalInfo->PriorCmdInvalidMsgs) + - NumMessagesInGroup(&transInvalInfo->CurrentCmdInvalidMsgs); + NumMessagesInGroup(&transInvalInfo->ii.CurrentCmdInvalidMsgs); *msgs = msgarray = (SharedInvalidationMessage *) MemoryContextAlloc(CurTransactionContext, @@ -927,7 +984,7 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, msgs, n * sizeof(SharedInvalidationMessage)), nmsgs += n)); - ProcessMessageSubGroupMulti(&transInvalInfo->CurrentCmdInvalidMsgs, + ProcessMessageSubGroupMulti(&transInvalInfo->ii.CurrentCmdInvalidMsgs, CatCacheMsgs, (memcpy(msgarray + nmsgs, msgs, @@ -939,7 +996,7 @@ xactGetCommittedInvalidationMessages(SharedInvalidationMessage **msgs, msgs, n * sizeof(SharedInvalidationMessage)), nmsgs += n)); - ProcessMessageSubGroupMulti(&transInvalInfo->CurrentCmdInvalidMsgs, + ProcessMessageSubGroupMulti(&transInvalInfo->ii.CurrentCmdInvalidMsgs, RelCacheMsgs, (memcpy(msgarray + nmsgs, msgs, @@ -1025,7 +1082,9 @@ ProcessCommittedInvalidationMessages(SharedInvalidationMessage *msgs, void AtEOXact_Inval(bool isCommit) { - /* Quick exit if no messages */ + inplaceInvalInfo = NULL; + + /* Quick exit if no transactional messages */ if (transInvalInfo == NULL) return; @@ -1041,16 +1100,16 @@ AtEOXact_Inval(bool isCommit) * after we send the SI messages. However, we need not do anything * unless we committed. */ - if (transInvalInfo->RelcacheInitFileInval) + if (transInvalInfo->ii.RelcacheInitFileInval) RelationCacheInitFilePreInvalidate(); AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, - &transInvalInfo->CurrentCmdInvalidMsgs); + &transInvalInfo->ii.CurrentCmdInvalidMsgs); ProcessInvalidationMessagesMulti(&transInvalInfo->PriorCmdInvalidMsgs, SendSharedInvalidMessages); - if (transInvalInfo->RelcacheInitFileInval) + if (transInvalInfo->ii.RelcacheInitFileInval) RelationCacheInitFilePostInvalidate(); } else @@ -1063,6 +1122,56 @@ AtEOXact_Inval(bool isCommit) transInvalInfo = NULL; } +/* + * PreInplace_Inval + * Process queued-up invalidation before inplace update critical section. + * + * Tasks belong here if they are safe even if the inplace update does not + * complete. Currently, this just unlinks a cache file, which can fail. The + * sum of this and AtInplace_Inval() mirrors AtEOXact_Inval(isCommit=true). + */ +void +PreInplace_Inval(void) +{ + Assert(CritSectionCount == 0); + + if (inplaceInvalInfo && inplaceInvalInfo->RelcacheInitFileInval) + RelationCacheInitFilePreInvalidate(); +} + +/* + * AtInplace_Inval + * Process queued-up invalidations after inplace update buffer mutation. + */ +void +AtInplace_Inval(void) +{ + Assert(CritSectionCount > 0); + + if (inplaceInvalInfo == NULL) + return; + + ProcessInvalidationMessagesMulti(&inplaceInvalInfo->CurrentCmdInvalidMsgs, + SendSharedInvalidMessages); + + if (inplaceInvalInfo->RelcacheInitFileInval) + RelationCacheInitFilePostInvalidate(); + + inplaceInvalInfo = NULL; +} + +/* + * ForgetInplace_Inval + * Alternative to PreInplace_Inval()+AtInplace_Inval(): discard queued-up + * invalidations. This lets inplace update enumerate invalidations + * optimistically, before locking the buffer. + */ +void +ForgetInplace_Inval(void) +{ + inplaceInvalInfo = NULL; +} + /* * AtEOSubXact_Inval * Process queued-up invalidation messages at end of subtransaction. @@ -1085,9 +1194,20 @@ void AtEOSubXact_Inval(bool isCommit) { int my_level; - TransInvalidationInfo *myInfo = transInvalInfo; + TransInvalidationInfo *myInfo; - /* Quick exit if no messages. */ + /* + * Successful inplace update must clear this, but we clear it on abort. + * Inplace updates allocate this in CurrentMemoryContext, which has + * lifespan <= subtransaction lifespan. Hence, don't free it explicitly. + */ + if (isCommit) + Assert(inplaceInvalInfo == NULL); + else + inplaceInvalInfo = NULL; + + /* Quick exit if no transactional messages. */ + myInfo = transInvalInfo; if (myInfo == NULL) return; @@ -1128,12 +1248,12 @@ AtEOSubXact_Inval(bool isCommit) &myInfo->PriorCmdInvalidMsgs); /* Must readjust parent's CurrentCmdInvalidMsgs indexes now */ - SetGroupToFollow(&myInfo->parent->CurrentCmdInvalidMsgs, + SetGroupToFollow(&myInfo->parent->ii.CurrentCmdInvalidMsgs, &myInfo->parent->PriorCmdInvalidMsgs); /* Pending relcache inval becomes parent's problem too */ - if (myInfo->RelcacheInitFileInval) - myInfo->parent->RelcacheInitFileInval = true; + if (myInfo->ii.RelcacheInitFileInval) + myInfo->parent->ii.RelcacheInitFileInval = true; /* Pop the transaction state stack */ transInvalInfo = myInfo->parent; @@ -1180,7 +1300,7 @@ CommandEndInvalidationMessages(void) if (transInvalInfo == NULL) return; - ProcessInvalidationMessages(&transInvalInfo->CurrentCmdInvalidMsgs, + ProcessInvalidationMessages(&transInvalInfo->ii.CurrentCmdInvalidMsgs, LocalExecuteInvalidationMessage); /* WAL Log per-command invalidation messages for wal_level=logical */ @@ -1188,26 +1308,21 @@ CommandEndInvalidationMessages(void) LogLogicalInvalidations(); AppendInvalidationMessages(&transInvalInfo->PriorCmdInvalidMsgs, - &transInvalInfo->CurrentCmdInvalidMsgs); + &transInvalInfo->ii.CurrentCmdInvalidMsgs); } /* - * CacheInvalidateHeapTuple - * Register the given tuple for invalidation at end of command - * (ie, current command is creating or outdating this tuple). - * Also, detect whether a relcache invalidation is implied. - * - * For an insert or delete, tuple is the target tuple and newtuple is NULL. - * For an update, we are called just once, with tuple being the old tuple - * version and newtuple the new version. This allows avoidance of duplicate - * effort during an update. + * CacheInvalidateHeapTupleCommon + * Common logic for end-of-command and inplace variants. */ -void -CacheInvalidateHeapTuple(Relation relation, - HeapTuple tuple, - HeapTuple newtuple) +static void +CacheInvalidateHeapTupleCommon(Relation relation, + HeapTuple tuple, + HeapTuple newtuple, + InvalidationInfo *(*prepare_callback) (void)) { + InvalidationInfo *info; Oid tupleRelId; Oid databaseId; Oid relationId; @@ -1231,11 +1346,8 @@ CacheInvalidateHeapTuple(Relation relation, if (IsToastRelation(relation)) return; - /* - * If we're not prepared to queue invalidation messages for this - * subtransaction level, get ready now. - */ - PrepareInvalidationState(); + /* Allocate any required resources. */ + info = prepare_callback(); /* * First let the catcache do its thing @@ -1244,11 +1356,12 @@ CacheInvalidateHeapTuple(Relation relation, if (RelationInvalidatesSnapshotsOnly(tupleRelId)) { databaseId = IsSharedRelation(tupleRelId) ? InvalidOid : MyDatabaseId; - RegisterSnapshotInvalidation(databaseId, tupleRelId); + RegisterSnapshotInvalidation(info, databaseId, tupleRelId); } else PrepareToInvalidateCacheTuple(relation, tuple, newtuple, - RegisterCatcacheInvalidation); + RegisterCatcacheInvalidation, + (void *) info); /* * Now, is this tuple one of the primary definers of a relcache entry? See @@ -1321,7 +1434,48 @@ CacheInvalidateHeapTuple(Relation relation, /* * Yes. We need to register a relcache invalidation event. */ - RegisterRelcacheInvalidation(databaseId, relationId); + RegisterRelcacheInvalidation(info, databaseId, relationId); +} + +/* + * CacheInvalidateHeapTuple + * Register the given tuple for invalidation at end of command + * (ie, current command is creating or outdating this tuple) and end of + * transaction. Also, detect whether a relcache invalidation is implied. + * + * For an insert or delete, tuple is the target tuple and newtuple is NULL. + * For an update, we are called just once, with tuple being the old tuple + * version and newtuple the new version. This allows avoidance of duplicate + * effort during an update. + */ +void +CacheInvalidateHeapTuple(Relation relation, + HeapTuple tuple, + HeapTuple newtuple) +{ + CacheInvalidateHeapTupleCommon(relation, tuple, newtuple, + PrepareInvalidationState); +} + +/* + * CacheInvalidateHeapTupleInplace + * Register the given tuple for nontransactional invalidation pertaining + * to an inplace update. Also, detect whether a relcache invalidation is + * implied. + * + * Like CacheInvalidateHeapTuple(), but for inplace updates. + * + * Just before and just after the inplace update, the tuple's cache keys must + * match those in key_equivalent_tuple. Cache keys consist of catcache lookup + * key columns and columns referencing pg_class.oid values, + * e.g. pg_constraint.conrelid, which would trigger relcache inval. + */ +void +CacheInvalidateHeapTupleInplace(Relation relation, + HeapTuple key_equivalent_tuple) +{ + CacheInvalidateHeapTupleCommon(relation, key_equivalent_tuple, NULL, + PrepareInplaceInvalidationState); } /* @@ -1340,14 +1494,13 @@ CacheInvalidateCatalog(Oid catalogId) { Oid databaseId; - PrepareInvalidationState(); - if (IsSharedRelation(catalogId)) databaseId = InvalidOid; else databaseId = MyDatabaseId; - RegisterCatalogInvalidation(databaseId, catalogId); + RegisterCatalogInvalidation(PrepareInvalidationState(), + databaseId, catalogId); } /* @@ -1365,15 +1518,14 @@ CacheInvalidateRelcache(Relation relation) Oid databaseId; Oid relationId; - PrepareInvalidationState(); - relationId = RelationGetRelid(relation); if (relation->rd_rel->relisshared) databaseId = InvalidOid; else databaseId = MyDatabaseId; - RegisterRelcacheInvalidation(databaseId, relationId); + RegisterRelcacheInvalidation(PrepareInvalidationState(), + databaseId, relationId); } /* @@ -1386,9 +1538,8 @@ CacheInvalidateRelcache(Relation relation) void CacheInvalidateRelcacheAll(void) { - PrepareInvalidationState(); - - RegisterRelcacheInvalidation(InvalidOid, InvalidOid); + RegisterRelcacheInvalidation(PrepareInvalidationState(), + InvalidOid, InvalidOid); } /* @@ -1402,14 +1553,13 @@ CacheInvalidateRelcacheByTuple(HeapTuple classTuple) Oid databaseId; Oid relationId; - PrepareInvalidationState(); - relationId = classtup->oid; if (classtup->relisshared) databaseId = InvalidOid; else databaseId = MyDatabaseId; - RegisterRelcacheInvalidation(databaseId, relationId); + RegisterRelcacheInvalidation(PrepareInvalidationState(), + databaseId, relationId); } /* @@ -1423,8 +1573,6 @@ CacheInvalidateRelcacheByRelid(Oid relid) { HeapTuple tup; - PrepareInvalidationState(); - tup = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); if (!HeapTupleIsValid(tup)) elog(ERROR, "cache lookup failed for relation %u", relid); @@ -1614,7 +1762,7 @@ LogLogicalInvalidations(void) if (transInvalInfo == NULL) return; - group = &transInvalInfo->CurrentCmdInvalidMsgs; + group = &transInvalInfo->ii.CurrentCmdInvalidMsgs; nmsgs = NumMessagesInGroup(group); if (nmsgs > 0) diff --git a/src/backend/utils/cache/syscache.c b/src/backend/utils/cache/syscache.c index 88c47a99965..f7f4f56a4d2 100644 --- a/src/backend/utils/cache/syscache.c +++ b/src/backend/utils/cache/syscache.c @@ -356,8 +356,7 @@ SearchSysCacheLocked1(int cacheId, /* * If an inplace update just finished, ensure we process the syscache - * inval. XXX this is insufficient: the inplace updater may not yet - * have reached AtEOXact_Inval(). See test at inplace-inval.spec. + * inval. * * If a heap_update() call just released its LOCKTAG_TUPLE, we'll * probably find the old tuple and reach "tuple concurrently updated". diff --git a/src/include/utils/catcache.h b/src/include/utils/catcache.h index 99169a93d91..b71d0109dfd 100644 --- a/src/include/utils/catcache.h +++ b/src/include/utils/catcache.h @@ -226,6 +226,7 @@ extern void CatCacheInvalidate(CatCache *cache, uint32 hashValue); extern void PrepareToInvalidateCacheTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple, - void (*function) (int, uint32, Oid)); + void (*function) (int, uint32, Oid, void *), + void *context); #endif /* CATCACHE_H */ diff --git a/src/include/utils/inval.h b/src/include/utils/inval.h index 24695facf22..121ffa9b26c 100644 --- a/src/include/utils/inval.h +++ b/src/include/utils/inval.h @@ -28,6 +28,10 @@ extern void AcceptInvalidationMessages(void); extern void AtEOXact_Inval(bool isCommit); +extern void PreInplace_Inval(void); +extern void AtInplace_Inval(void); +extern void ForgetInplace_Inval(void); + extern void AtEOSubXact_Inval(bool isCommit); extern void PostPrepare_Inval(void); @@ -37,6 +41,8 @@ extern void CommandEndInvalidationMessages(void); extern void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple); +extern void CacheInvalidateHeapTupleInplace(Relation relation, + HeapTuple key_equivalent_tuple); extern void CacheInvalidateCatalog(Oid catalogId); diff --git a/src/test/isolation/expected/inplace-inval.out b/src/test/isolation/expected/inplace-inval.out index e68eca5de98..c35895a8aa7 100644 --- a/src/test/isolation/expected/inplace-inval.out +++ b/src/test/isolation/expected/inplace-inval.out @@ -1,6 +1,6 @@ Parsed test spec with 3 sessions -starting permutation: cachefill3 cir1 cic2 ddl3 +starting permutation: cachefill3 cir1 cic2 ddl3 read1 step cachefill3: TABLE newly_indexed; c - @@ -9,6 +9,14 @@ c step cir1: BEGIN; CREATE INDEX i1 ON newly_indexed (c); ROLLBACK; step cic2: CREATE INDEX i2 ON newly_indexed (c); step ddl3: ALTER TABLE newly_indexed ADD extra int; +step read1: + SELECT relhasindex FROM pg_class WHERE oid = 'newly_indexed'::regclass; + +relhasindex +----------- +t +(1 row) + starting permutation: cir1 cic2 ddl3 read1 step cir1: BEGIN; CREATE INDEX i1 ON newly_indexed (c); ROLLBACK; diff --git a/src/test/isolation/specs/inplace-inval.spec b/src/test/isolation/specs/inplace-inval.spec index 96954fd86c4..b99112ddb88 100644 --- a/src/test/isolation/specs/inplace-inval.spec +++ b/src/test/isolation/specs/inplace-inval.spec @@ -1,7 +1,7 @@ -# If a heap_update() caller retrieves its oldtup from a cache, it's possible -# for that cache entry to predate an inplace update, causing loss of that -# inplace update. This arises because the transaction may abort before -# sending the inplace invalidation message to the shared queue. +# An inplace update had been able to abort before sending the inplace +# invalidation message to the shared queue. If a heap_update() caller then +# retrieved its oldtup from a cache, the heap_update() could revert the +# inplace update. setup { @@ -27,14 +27,12 @@ step cachefill3 { TABLE newly_indexed; } step ddl3 { ALTER TABLE newly_indexed ADD extra int; } -# XXX shows an extant bug. Adding step read1 at the end would usually print -# relhasindex=f (not wanted). This does not reach the unwanted behavior under -# -DCATCACHE_FORCE_RELEASE and friends. permutation cachefill3 # populates the pg_class row in the catcache cir1 # sets relhasindex=true; rollback discards cache inval cic2 # sees relhasindex=true, skips changing it (so no inval) ddl3 # cached row as the oldtup of an update, losing relhasindex + read1 # observe damage # without cachefill3, no bug permutation cir1 cic2 ddl3 read1 diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list index 72e6d3a9865..c0e48a7e200 100644 --- a/src/tools/pgindent/typedefs.list +++ b/src/tools/pgindent/typedefs.list @@ -1254,6 +1254,7 @@ Interval IntervalAggState IntoClause InvalMessageArray +InvalidationInfo InvalidationMsgsGroup IpcMemoryId IpcMemoryKey