From d508e0ddd23a35b9cb30ec1a769de40e98d5561f Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Wed, 11 Aug 2004 04:08:02 +0000 Subject: [PATCH] Fix failure to guarantee that a checkpoint will write out pg_clog updates for transaction commits that occurred just before the checkpoint. This is an EXTREMELY serious bug --- kudos to Satoshi Okada for creating a reproducible test case to prove its existence. --- src/backend/access/transam/xact.c | 36 +++++++++++++++++++++++++------ src/backend/access/transam/xlog.c | 18 +++++++++++++--- src/include/storage/lwlock.h | 3 ++- 3 files changed, 47 insertions(+), 10 deletions(-) diff --git a/src/backend/access/transam/xact.c b/src/backend/access/transam/xact.c index e6767d4c4b1..3409de03a56 100644 --- a/src/backend/access/transam/xact.c +++ b/src/backend/access/transam/xact.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.156 2003/10/16 16:50:41 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/xact.c,v 1.156.2.1 2004/08/11 04:08:00 tgl Exp $ * * NOTES * Transaction aborts can now occur two ways: @@ -503,6 +503,7 @@ RecordTransactionCommit(void) if (MyXactMadeXLogEntry || MyXactMadeTempRelUpdate) { TransactionId xid = GetCurrentTransactionId(); + bool madeTCentries; XLogRecPtr recptr; /* Tell bufmgr and smgr to prepare for commit */ @@ -511,12 +512,29 @@ RecordTransactionCommit(void) START_CRIT_SECTION(); /* - * We only need to log the commit in xlog if the transaction made - * any transaction-controlled XLOG entries. (Otherwise, its XID - * appears nowhere in permanent storage, so no one else will ever - * care if it committed.) + * If our transaction made any transaction-controlled XLOG entries, + * we need to lock out checkpoint start between writing our XLOG + * record and updating pg_clog. Otherwise it is possible for the + * checkpoint to set REDO after the XLOG record but fail to flush the + * pg_clog update to disk, leading to loss of the transaction commit + * if we crash a little later. Slightly klugy fix for problem + * discovered 2004-08-10. + * + * (If it made no transaction-controlled XLOG entries, its XID + * appears nowhere in permanent storage, so no one else will ever care + * if it committed; so it doesn't matter if we lose the commit flag.) + * + * Note we only need a shared lock. */ - if (MyLastRecPtr.xrecoff != 0) + madeTCentries = (MyLastRecPtr.xrecoff != 0); + if (madeTCentries) + LWLockAcquire(CheckpointStartLock, LW_SHARED); + + /* + * We only need to log the commit in XLOG if the transaction made + * any transaction-controlled XLOG entries. + */ + if (madeTCentries) { /* Need to emit a commit record */ XLogRecData rdata; @@ -585,6 +603,10 @@ RecordTransactionCommit(void) if (MyLastRecPtr.xrecoff != 0 || MyXactMadeTempRelUpdate) TransactionIdCommit(xid); + /* Unlock checkpoint lock if we acquired it */ + if (madeTCentries) + LWLockRelease(CheckpointStartLock); + END_CRIT_SECTION(); } @@ -698,6 +720,8 @@ RecordTransactionAbort(void) * care if it committed.) We do not flush XLOG to disk in any * case, since the default assumption after a crash would be that * we aborted, anyway. + * For the same reason, we don't need to worry about interlocking + * against checkpoint start. */ if (MyLastRecPtr.xrecoff != 0) { diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 8eb154f7bab..a8acf758a8c 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.125.2.1 2004/02/23 23:03:43 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/transam/xlog.c,v 1.125.2.2 2004/08/11 04:08:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -3159,6 +3159,15 @@ CreateCheckPoint(bool shutdown, bool force) checkPoint.ThisStartUpID = ThisStartUpID; checkPoint.time = time(NULL); + /* + * We must hold CheckpointStartLock while determining the checkpoint + * REDO pointer. This ensures that any concurrent transaction commits + * will be either not yet logged, or logged and recorded in pg_clog. + * See notes in RecordTransactionCommit(). + */ + LWLockAcquire(CheckpointStartLock, LW_EXCLUSIVE); + + /* And we need WALInsertLock too */ LWLockAcquire(WALInsertLock, LW_EXCLUSIVE); /* @@ -3191,6 +3200,7 @@ CreateCheckPoint(bool shutdown, bool force) ControlFile->checkPointCopy.redo.xrecoff) { LWLockRelease(WALInsertLock); + LWLockRelease(CheckpointStartLock); LWLockRelease(CheckpointLock); END_CRIT_SECTION(); return; @@ -3258,11 +3268,13 @@ CreateCheckPoint(bool shutdown, bool force) #endif /* - * Now we can release insert lock, allowing other xacts to proceed - * even while we are flushing disk buffers. + * Now we can release insert lock and checkpoint start lock, allowing + * other xacts to proceed even while we are flushing disk buffers. */ LWLockRelease(WALInsertLock); + LWLockRelease(CheckpointStartLock); + /* * Get the other info we need for the checkpoint record. */ diff --git a/src/include/storage/lwlock.h b/src/include/storage/lwlock.h index 7b2a4c92154..6f61eb20fdf 100644 --- a/src/include/storage/lwlock.h +++ b/src/include/storage/lwlock.h @@ -7,7 +7,7 @@ * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * - * $Id: lwlock.h,v 1.8 2003/08/04 02:40:14 momjian Exp $ + * $Id: lwlock.h,v 1.8.4.1 2004/08/11 04:08:02 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -37,6 +37,7 @@ typedef enum LWLockId WALWriteLock, ControlFileLock, CheckpointLock, + CheckpointStartLock, RelCacheInitLock, NumFixedLWLocks, /* must be last except for