1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-23 14:01:44 +03:00

Use an MVCC snapshot, rather than SnapshotNow, for catalog scans.

SnapshotNow scans have the undesirable property that, in the face of
concurrent updates, the scan can fail to see either the old or the new
versions of the row.  In many cases, we work around this by requiring
DDL operations to hold AccessExclusiveLock on the object being
modified; in some cases, the existing locking is inadequate and random
failures occur as a result.  This commit doesn't change anything
related to locking, but will hopefully pave the way to allowing lock
strength reductions in the future.

The major issue has held us back from making this change in the past
is that taking an MVCC snapshot is significantly more expensive than
using a static special snapshot such as SnapshotNow.  However, testing
of various worst-case scenarios reveals that this problem is not
severe except under fairly extreme workloads.  To mitigate those
problems, we avoid retaking the MVCC snapshot for each new scan;
instead, we take a new snapshot only when invalidation messages have
been processed.  The catcache machinery already requires that
invalidation messages be sent before releasing the related heavyweight
lock; else other backends might rely on locally-cached data rather
than scanning the catalog at all.  Thus, making snapshot reuse
dependent on the same guarantees shouldn't break anything that wasn't
already subtly broken.

Patch by me.  Review by Michael Paquier and Andres Freund.
This commit is contained in:
Robert Haas
2013-07-02 09:47:01 -04:00
parent 384f933046
commit 568d4138c6
69 changed files with 617 additions and 353 deletions

View File

@ -9,8 +9,8 @@
* consider that it is *still valid* so long as we are in the same command,
* ie, until the next CommandCounterIncrement() or transaction commit.
* (See utils/time/tqual.c, and note that system catalogs are generally
* scanned under SnapshotNow rules by the system, or plain user snapshots
* for user queries.) At the command boundary, the old tuple stops
* scanned under the most current snapshot available, rather than the
* transaction snapshot.) At the command boundary, the old tuple stops
* being valid and the new version, if any, becomes valid. Therefore,
* we cannot simply flush a tuple from the system caches during heap_update()
* or heap_delete(). The tuple is still good at that point; what's more,
@ -106,6 +106,7 @@
#include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/relmapper.h"
#include "utils/snapmgr.h"
#include "utils/syscache.h"
@ -372,6 +373,29 @@ AddRelcacheInvalidationMessage(InvalidationListHeader *hdr,
AddInvalidationMessage(&hdr->rclist, &msg);
}
/*
* Add a snapshot inval entry
*/
static void
AddSnapshotInvalidationMessage(InvalidationListHeader *hdr,
Oid dbId, Oid relId)
{
SharedInvalidationMessage msg;
/* Don't add a duplicate item */
/* We assume dbId need not be checked because it will never change */
ProcessMessageList(hdr->rclist,
if (msg->sn.id == SHAREDINVALSNAPSHOT_ID &&
msg->sn.relId == relId)
return);
/* OK, add the item */
msg.sn.id = SHAREDINVALSNAPSHOT_ID;
msg.sn.dbId = dbId;
msg.sn.relId = relId;
AddInvalidationMessage(&hdr->rclist, &msg);
}
/*
* Append one list of invalidation messages to another, resetting
* the source list to empty.
@ -468,6 +492,19 @@ RegisterRelcacheInvalidation(Oid dbId, Oid relId)
transInvalInfo->RelcacheInitFileInval = true;
}
/*
* RegisterSnapshotInvalidation
*
* Register a invalidation event for MVCC scans against a given catalog.
* Only needed for catalogs that don't have catcaches.
*/
static void
RegisterSnapshotInvalidation(Oid dbId, Oid relId)
{
AddSnapshotInvalidationMessage(&transInvalInfo->CurrentCmdInvalidMsgs,
dbId, relId);
}
/*
* LocalExecuteInvalidationMessage
*
@ -482,6 +519,8 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
{
if (msg->cc.dbId == MyDatabaseId || msg->cc.dbId == InvalidOid)
{
InvalidateCatalogSnapshot();
CatalogCacheIdInvalidate(msg->cc.id, msg->cc.hashValue);
CallSyscacheCallbacks(msg->cc.id, msg->cc.hashValue);
@ -491,6 +530,8 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
{
if (msg->cat.dbId == MyDatabaseId || msg->cat.dbId == InvalidOid)
{
InvalidateCatalogSnapshot();
CatalogCacheFlushCatalog(msg->cat.catId);
/* CatalogCacheFlushCatalog calls CallSyscacheCallbacks as needed */
@ -532,6 +573,14 @@ LocalExecuteInvalidationMessage(SharedInvalidationMessage *msg)
else if (msg->rm.dbId == MyDatabaseId)
RelationMapInvalidate(false);
}
else if (msg->id == SHAREDINVALSNAPSHOT_ID)
{
/* We only care about our own database and shared catalogs */
if (msg->rm.dbId == InvalidOid)
InvalidateCatalogSnapshot();
else if (msg->rm.dbId == MyDatabaseId)
InvalidateCatalogSnapshot();
}
else
elog(FATAL, "unrecognized SI message ID: %d", msg->id);
}
@ -552,6 +601,7 @@ InvalidateSystemCaches(void)
{
int i;
InvalidateCatalogSnapshot();
ResetCatalogCaches();
RelationCacheInvalidate(); /* gets smgr and relmap too */
@ -1006,8 +1056,15 @@ CacheInvalidateHeapTuple(Relation relation,
/*
* First let the catcache do its thing
*/
PrepareToInvalidateCacheTuple(relation, tuple, newtuple,
RegisterCatcacheInvalidation);
tupleRelId = RelationGetRelid(relation);
if (RelationInvalidatesSnapshotsOnly(tupleRelId))
{
databaseId = IsSharedRelation(tupleRelId) ? InvalidOid : MyDatabaseId;
RegisterSnapshotInvalidation(databaseId, tupleRelId);
}
else
PrepareToInvalidateCacheTuple(relation, tuple, newtuple,
RegisterCatcacheInvalidation);
/*
* Now, is this tuple one of the primary definers of a relcache entry?
@ -1015,8 +1072,6 @@ CacheInvalidateHeapTuple(Relation relation,
* Note we ignore newtuple here; we assume an update cannot move a tuple
* from being part of one relcache entry to being part of another.
*/
tupleRelId = RelationGetRelid(relation);
if (tupleRelId == RelationRelationId)
{
Form_pg_class classtup = (Form_pg_class) GETSTRUCT(tuple);