1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-16 06:01:02 +03:00

Support an optional asynchronous commit mode, in which we don't flush WAL

before reporting a transaction committed.  Data consistency is still
guaranteed (unlike setting fsync = off), but a crash may lose the effects
of the last few transactions.  Patch by Simon, some editorialization by Tom.
This commit is contained in:
Tom Lane
2007-08-01 22:45:09 +00:00
parent c722628a43
commit 4a78cdeb6b
25 changed files with 998 additions and 303 deletions

View File

@ -14,17 +14,19 @@
* CLOG page is initialized to zeroes. Other writes of CLOG come from
* recording of transaction commit or abort in xact.c, which generates its
* own XLOG records for these events and will re-perform the status update
* on redo; so we need make no additional XLOG entry here. Also, the XLOG
* is guaranteed flushed through the XLOG commit record before we are called
* to log a commit, so the WAL rule "write xlog before data" is satisfied
* automatically for commits, and we don't really care for aborts. Therefore,
* we don't need to mark CLOG pages with LSN information; we have enough
* synchronization already.
* on redo; so we need make no additional XLOG entry here. For synchronous
* transaction commits, the XLOG is guaranteed flushed through the XLOG commit
* record before we are called to log a commit, so the WAL rule "write xlog
* before data" is satisfied automatically. However, for async commits we
* must track the latest LSN affecting each CLOG page, so that we can flush
* XLOG that far and satisfy the WAL rule. We don't have to worry about this
* for aborts (whether sync or async), since the post-crash assumption would
* be that such transactions failed anyway.
*
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.42 2007/01/05 22:19:23 momjian Exp $
* $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.43 2007/08/01 22:45:07 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -57,6 +59,13 @@
#define TransactionIdToByte(xid) (TransactionIdToPgIndex(xid) / CLOG_XACTS_PER_BYTE)
#define TransactionIdToBIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_BYTE)
/* We store the latest async LSN for each group of transactions */
#define CLOG_XACTS_PER_LSN_GROUP 32 /* keep this a power of 2 */
#define CLOG_LSNS_PER_PAGE (CLOG_XACTS_PER_PAGE / CLOG_XACTS_PER_LSN_GROUP)
#define GetLSNIndex(slotno, xid) ((slotno) * CLOG_LSNS_PER_PAGE + \
((xid) % (TransactionId) CLOG_XACTS_PER_PAGE) / CLOG_XACTS_PER_LSN_GROUP)
/*
* Link to shared-memory data structures for CLOG control
@ -75,11 +84,16 @@ static void WriteTruncateXlogRec(int pageno);
/*
* Record the final state of a transaction in the commit log.
*
* lsn must be the WAL location of the commit record when recording an async
* commit. For a synchronous commit it can be InvalidXLogRecPtr, since the
* caller guarantees the commit record is already flushed in that case. It
* should be InvalidXLogRecPtr for abort cases, too.
*
* NB: this is a low-level routine and is NOT the preferred entry point
* for most uses; TransactionLogUpdate() in transam.c is the intended caller.
*/
void
TransactionIdSetStatus(TransactionId xid, XidStatus status)
TransactionIdSetStatus(TransactionId xid, XidStatus status, XLogRecPtr lsn)
{
int pageno = TransactionIdToPage(xid);
int byteno = TransactionIdToByte(xid);
@ -94,7 +108,16 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status)
LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
slotno = SimpleLruReadPage(ClogCtl, pageno, xid);
/*
* If we're doing an async commit (ie, lsn is valid), then we must wait
* for any active write on the page slot to complete. Otherwise our
* update could reach disk in that write, which will not do since we
* mustn't let it reach disk until we've done the appropriate WAL flush.
* But when lsn is invalid, it's OK to scribble on a page while it is
* write-busy, since we don't care if the update reaches disk sooner than
* we think. Hence, pass write_ok = XLogRecPtrIsInvalid(lsn).
*/
slotno = SimpleLruReadPage(ClogCtl, pageno, XLogRecPtrIsInvalid(lsn), xid);
byteptr = ClogCtl->shared->page_buffer[slotno] + byteno;
/* Current state should be 0, subcommitted or target state */
@ -110,22 +133,48 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status)
ClogCtl->shared->page_dirty[slotno] = true;
/*
* Update the group LSN if the transaction completion LSN is higher.
*
* Note: lsn will be invalid when supplied during InRecovery processing,
* so we don't need to do anything special to avoid LSN updates during
* recovery. After recovery completes the next clog change will set the
* LSN correctly.
*/
if (!XLogRecPtrIsInvalid(lsn))
{
int lsnindex = GetLSNIndex(slotno, xid);
if (XLByteLT(ClogCtl->shared->group_lsn[lsnindex], lsn))
ClogCtl->shared->group_lsn[lsnindex] = lsn;
}
LWLockRelease(CLogControlLock);
}
/*
* Interrogate the state of a transaction in the commit log.
*
* Aside from the actual commit status, this function returns (into *lsn)
* an LSN that is late enough to be able to guarantee that if we flush up to
* that LSN then we will have flushed the transaction's commit record to disk.
* The result is not necessarily the exact LSN of the transaction's commit
* record! For example, for long-past transactions (those whose clog pages
* already migrated to disk), we'll return InvalidXLogRecPtr. Also, because
* we group transactions on the same clog page to conserve storage, we might
* return the LSN of a later transaction that falls into the same group.
*
* NB: this is a low-level routine and is NOT the preferred entry point
* for most uses; TransactionLogFetch() in transam.c is the intended caller.
*/
XidStatus
TransactionIdGetStatus(TransactionId xid)
TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
{
int pageno = TransactionIdToPage(xid);
int byteno = TransactionIdToByte(xid);
int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
int slotno;
int lsnindex;
char *byteptr;
XidStatus status;
@ -136,6 +185,9 @@ TransactionIdGetStatus(TransactionId xid)
status = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
lsnindex = GetLSNIndex(slotno, xid);
*lsn = ClogCtl->shared->group_lsn[lsnindex];
LWLockRelease(CLogControlLock);
return status;
@ -148,14 +200,14 @@ TransactionIdGetStatus(TransactionId xid)
Size
CLOGShmemSize(void)
{
return SimpleLruShmemSize(NUM_CLOG_BUFFERS);
return SimpleLruShmemSize(NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE);
}
void
CLOGShmemInit(void)
{
ClogCtl->PagePrecedes = CLOGPagePrecedes;
SimpleLruInit(ClogCtl, "CLOG Ctl", NUM_CLOG_BUFFERS,
SimpleLruInit(ClogCtl, "CLOG Ctl", NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE,
CLogControlLock, "pg_clog");
}
@ -240,7 +292,7 @@ StartupCLOG(void)
int slotno;
char *byteptr;
slotno = SimpleLruReadPage(ClogCtl, pageno, xid);
slotno = SimpleLruReadPage(ClogCtl, pageno, false, xid);
byteptr = ClogCtl->shared->page_buffer[slotno] + byteno;
/* Zero so-far-unused positions in the current byte */