mirror of
https://github.com/postgres/postgres.git
synced 2025-06-16 06:01:02 +03:00
Support an optional asynchronous commit mode, in which we don't flush WAL
before reporting a transaction committed. Data consistency is still guaranteed (unlike setting fsync = off), but a crash may lose the effects of the last few transactions. Patch by Simon, some editorialization by Tom.
This commit is contained in:
@ -14,17 +14,19 @@
|
||||
* CLOG page is initialized to zeroes. Other writes of CLOG come from
|
||||
* recording of transaction commit or abort in xact.c, which generates its
|
||||
* own XLOG records for these events and will re-perform the status update
|
||||
* on redo; so we need make no additional XLOG entry here. Also, the XLOG
|
||||
* is guaranteed flushed through the XLOG commit record before we are called
|
||||
* to log a commit, so the WAL rule "write xlog before data" is satisfied
|
||||
* automatically for commits, and we don't really care for aborts. Therefore,
|
||||
* we don't need to mark CLOG pages with LSN information; we have enough
|
||||
* synchronization already.
|
||||
* on redo; so we need make no additional XLOG entry here. For synchronous
|
||||
* transaction commits, the XLOG is guaranteed flushed through the XLOG commit
|
||||
* record before we are called to log a commit, so the WAL rule "write xlog
|
||||
* before data" is satisfied automatically. However, for async commits we
|
||||
* must track the latest LSN affecting each CLOG page, so that we can flush
|
||||
* XLOG that far and satisfy the WAL rule. We don't have to worry about this
|
||||
* for aborts (whether sync or async), since the post-crash assumption would
|
||||
* be that such transactions failed anyway.
|
||||
*
|
||||
* Portions Copyright (c) 1996-2007, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.42 2007/01/05 22:19:23 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.43 2007/08/01 22:45:07 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -57,6 +59,13 @@
|
||||
#define TransactionIdToByte(xid) (TransactionIdToPgIndex(xid) / CLOG_XACTS_PER_BYTE)
|
||||
#define TransactionIdToBIndex(xid) ((xid) % (TransactionId) CLOG_XACTS_PER_BYTE)
|
||||
|
||||
/* We store the latest async LSN for each group of transactions */
|
||||
#define CLOG_XACTS_PER_LSN_GROUP 32 /* keep this a power of 2 */
|
||||
#define CLOG_LSNS_PER_PAGE (CLOG_XACTS_PER_PAGE / CLOG_XACTS_PER_LSN_GROUP)
|
||||
|
||||
#define GetLSNIndex(slotno, xid) ((slotno) * CLOG_LSNS_PER_PAGE + \
|
||||
((xid) % (TransactionId) CLOG_XACTS_PER_PAGE) / CLOG_XACTS_PER_LSN_GROUP)
|
||||
|
||||
|
||||
/*
|
||||
* Link to shared-memory data structures for CLOG control
|
||||
@ -75,11 +84,16 @@ static void WriteTruncateXlogRec(int pageno);
|
||||
/*
|
||||
* Record the final state of a transaction in the commit log.
|
||||
*
|
||||
* lsn must be the WAL location of the commit record when recording an async
|
||||
* commit. For a synchronous commit it can be InvalidXLogRecPtr, since the
|
||||
* caller guarantees the commit record is already flushed in that case. It
|
||||
* should be InvalidXLogRecPtr for abort cases, too.
|
||||
*
|
||||
* NB: this is a low-level routine and is NOT the preferred entry point
|
||||
* for most uses; TransactionLogUpdate() in transam.c is the intended caller.
|
||||
*/
|
||||
void
|
||||
TransactionIdSetStatus(TransactionId xid, XidStatus status)
|
||||
TransactionIdSetStatus(TransactionId xid, XidStatus status, XLogRecPtr lsn)
|
||||
{
|
||||
int pageno = TransactionIdToPage(xid);
|
||||
int byteno = TransactionIdToByte(xid);
|
||||
@ -94,7 +108,16 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status)
|
||||
|
||||
LWLockAcquire(CLogControlLock, LW_EXCLUSIVE);
|
||||
|
||||
slotno = SimpleLruReadPage(ClogCtl, pageno, xid);
|
||||
/*
|
||||
* If we're doing an async commit (ie, lsn is valid), then we must wait
|
||||
* for any active write on the page slot to complete. Otherwise our
|
||||
* update could reach disk in that write, which will not do since we
|
||||
* mustn't let it reach disk until we've done the appropriate WAL flush.
|
||||
* But when lsn is invalid, it's OK to scribble on a page while it is
|
||||
* write-busy, since we don't care if the update reaches disk sooner than
|
||||
* we think. Hence, pass write_ok = XLogRecPtrIsInvalid(lsn).
|
||||
*/
|
||||
slotno = SimpleLruReadPage(ClogCtl, pageno, XLogRecPtrIsInvalid(lsn), xid);
|
||||
byteptr = ClogCtl->shared->page_buffer[slotno] + byteno;
|
||||
|
||||
/* Current state should be 0, subcommitted or target state */
|
||||
@ -110,22 +133,48 @@ TransactionIdSetStatus(TransactionId xid, XidStatus status)
|
||||
|
||||
ClogCtl->shared->page_dirty[slotno] = true;
|
||||
|
||||
/*
|
||||
* Update the group LSN if the transaction completion LSN is higher.
|
||||
*
|
||||
* Note: lsn will be invalid when supplied during InRecovery processing,
|
||||
* so we don't need to do anything special to avoid LSN updates during
|
||||
* recovery. After recovery completes the next clog change will set the
|
||||
* LSN correctly.
|
||||
*/
|
||||
if (!XLogRecPtrIsInvalid(lsn))
|
||||
{
|
||||
int lsnindex = GetLSNIndex(slotno, xid);
|
||||
|
||||
if (XLByteLT(ClogCtl->shared->group_lsn[lsnindex], lsn))
|
||||
ClogCtl->shared->group_lsn[lsnindex] = lsn;
|
||||
}
|
||||
|
||||
LWLockRelease(CLogControlLock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Interrogate the state of a transaction in the commit log.
|
||||
*
|
||||
* Aside from the actual commit status, this function returns (into *lsn)
|
||||
* an LSN that is late enough to be able to guarantee that if we flush up to
|
||||
* that LSN then we will have flushed the transaction's commit record to disk.
|
||||
* The result is not necessarily the exact LSN of the transaction's commit
|
||||
* record! For example, for long-past transactions (those whose clog pages
|
||||
* already migrated to disk), we'll return InvalidXLogRecPtr. Also, because
|
||||
* we group transactions on the same clog page to conserve storage, we might
|
||||
* return the LSN of a later transaction that falls into the same group.
|
||||
*
|
||||
* NB: this is a low-level routine and is NOT the preferred entry point
|
||||
* for most uses; TransactionLogFetch() in transam.c is the intended caller.
|
||||
*/
|
||||
XidStatus
|
||||
TransactionIdGetStatus(TransactionId xid)
|
||||
TransactionIdGetStatus(TransactionId xid, XLogRecPtr *lsn)
|
||||
{
|
||||
int pageno = TransactionIdToPage(xid);
|
||||
int byteno = TransactionIdToByte(xid);
|
||||
int bshift = TransactionIdToBIndex(xid) * CLOG_BITS_PER_XACT;
|
||||
int slotno;
|
||||
int lsnindex;
|
||||
char *byteptr;
|
||||
XidStatus status;
|
||||
|
||||
@ -136,6 +185,9 @@ TransactionIdGetStatus(TransactionId xid)
|
||||
|
||||
status = (*byteptr >> bshift) & CLOG_XACT_BITMASK;
|
||||
|
||||
lsnindex = GetLSNIndex(slotno, xid);
|
||||
*lsn = ClogCtl->shared->group_lsn[lsnindex];
|
||||
|
||||
LWLockRelease(CLogControlLock);
|
||||
|
||||
return status;
|
||||
@ -148,14 +200,14 @@ TransactionIdGetStatus(TransactionId xid)
|
||||
Size
|
||||
CLOGShmemSize(void)
|
||||
{
|
||||
return SimpleLruShmemSize(NUM_CLOG_BUFFERS);
|
||||
return SimpleLruShmemSize(NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE);
|
||||
}
|
||||
|
||||
void
|
||||
CLOGShmemInit(void)
|
||||
{
|
||||
ClogCtl->PagePrecedes = CLOGPagePrecedes;
|
||||
SimpleLruInit(ClogCtl, "CLOG Ctl", NUM_CLOG_BUFFERS,
|
||||
SimpleLruInit(ClogCtl, "CLOG Ctl", NUM_CLOG_BUFFERS, CLOG_LSNS_PER_PAGE,
|
||||
CLogControlLock, "pg_clog");
|
||||
}
|
||||
|
||||
@ -240,7 +292,7 @@ StartupCLOG(void)
|
||||
int slotno;
|
||||
char *byteptr;
|
||||
|
||||
slotno = SimpleLruReadPage(ClogCtl, pageno, xid);
|
||||
slotno = SimpleLruReadPage(ClogCtl, pageno, false, xid);
|
||||
byteptr = ClogCtl->shared->page_buffer[slotno] + byteno;
|
||||
|
||||
/* Zero so-far-unused positions in the current byte */
|
||||
|
Reference in New Issue
Block a user