mirror of
https://github.com/postgres/postgres.git
synced 2025-11-10 17:42:29 +03:00
Fix recently-understood problems with handling of XID freezing, particularly
in PITR scenarios. We now WAL-log the replacement of old XIDs with FrozenTransactionId, so that such replacement is guaranteed to propagate to PITR slave databases. Also, rather than relying on hint-bit updates to be preserved, pg_clog is not truncated until all instances of an XID are known to have been replaced by FrozenTransactionId. Add new GUC variables and pg_autovacuum columns to allow management of the freezing policy, so that users can trade off the size of pg_clog against the amount of freezing work done. Revise the already-existing code that forces autovacuum of tables approaching the wraparound point to make it more bulletproof; also, revise the autovacuum logic so that anti-wraparound vacuuming is done per-table rather than per-database. initdb forced because of changes in pg_class, pg_database, and pg_autovacuum catalogs. Heikki Linnakangas, Simon Riggs, and Tom Lane.
This commit is contained in:
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.220 2006/10/04 00:29:48 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.221 2006/11/05 22:42:07 tgl Exp $
|
||||
*
|
||||
*
|
||||
* INTERFACE ROUTINES
|
||||
@@ -2809,6 +2809,166 @@ heap_inplace_update(Relation relation, HeapTuple tuple)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* heap_freeze_tuple
|
||||
*
|
||||
* Check to see whether any of the XID fields of a tuple (xmin, xmax, xvac)
|
||||
* are older than the specified cutoff XID. If so, replace them with
|
||||
* FrozenTransactionId or InvalidTransactionId as appropriate, and return
|
||||
* TRUE. Return FALSE if nothing was changed.
|
||||
*
|
||||
* It is assumed that the caller has checked the tuple with
|
||||
* HeapTupleSatisfiesVacuum() and determined that it is not HEAPTUPLE_DEAD
|
||||
* (else we should be removing the tuple, not freezing it).
|
||||
*
|
||||
* NB: cutoff_xid *must* be <= the current global xmin, to ensure that any
|
||||
* XID older than it could neither be running nor seen as running by any
|
||||
* open transaction. This ensures that the replacement will not change
|
||||
* anyone's idea of the tuple state. Also, since we assume the tuple is
|
||||
* not HEAPTUPLE_DEAD, the fact that an XID is not still running allows us
|
||||
* to assume that it is either committed good or aborted, as appropriate;
|
||||
* so we need no external state checks to decide what to do. (This is good
|
||||
* because this function is applied during WAL recovery, when we don't have
|
||||
* access to any such state, and can't depend on the hint bits to be set.)
|
||||
*
|
||||
* In lazy VACUUM, we call this while initially holding only a shared lock
|
||||
* on the tuple's buffer. If any change is needed, we trade that in for an
|
||||
* exclusive lock before making the change. Caller should pass the buffer ID
|
||||
* if shared lock is held, InvalidBuffer if exclusive lock is already held.
|
||||
*
|
||||
* Note: it might seem we could make the changes without exclusive lock, since
|
||||
* TransactionId read/write is assumed atomic anyway. However there is a race
|
||||
* condition: someone who just fetched an old XID that we overwrite here could
|
||||
* conceivably not finish checking the XID against pg_clog before we finish
|
||||
* the VACUUM and perhaps truncate off the part of pg_clog he needs. Getting
|
||||
* exclusive lock ensures no other backend is in process of checking the
|
||||
* tuple status. Also, getting exclusive lock makes it safe to adjust the
|
||||
* infomask bits.
|
||||
*/
|
||||
bool
|
||||
heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
|
||||
Buffer buf)
|
||||
{
|
||||
bool changed = false;
|
||||
TransactionId xid;
|
||||
|
||||
xid = HeapTupleHeaderGetXmin(tuple);
|
||||
if (TransactionIdIsNormal(xid) &&
|
||||
TransactionIdPrecedes(xid, cutoff_xid))
|
||||
{
|
||||
if (buf != InvalidBuffer)
|
||||
{
|
||||
/* trade in share lock for exclusive lock */
|
||||
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
||||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||
buf = InvalidBuffer;
|
||||
}
|
||||
HeapTupleHeaderSetXmin(tuple, FrozenTransactionId);
|
||||
/*
|
||||
* Might as well fix the hint bits too; usually XMIN_COMMITTED will
|
||||
* already be set here, but there's a small chance not.
|
||||
*/
|
||||
Assert(!(tuple->t_infomask & HEAP_XMIN_INVALID));
|
||||
tuple->t_infomask |= HEAP_XMIN_COMMITTED;
|
||||
changed = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* When we release shared lock, it's possible for someone else to change
|
||||
* xmax before we get the lock back, so repeat the check after acquiring
|
||||
* exclusive lock. (We don't need this pushup for xmin, because only
|
||||
* VACUUM could be interested in changing an existing tuple's xmin,
|
||||
* and there's only one VACUUM allowed on a table at a time.)
|
||||
*/
|
||||
recheck_xmax:
|
||||
if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
|
||||
{
|
||||
xid = HeapTupleHeaderGetXmax(tuple);
|
||||
if (TransactionIdIsNormal(xid) &&
|
||||
TransactionIdPrecedes(xid, cutoff_xid))
|
||||
{
|
||||
if (buf != InvalidBuffer)
|
||||
{
|
||||
/* trade in share lock for exclusive lock */
|
||||
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
||||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||
buf = InvalidBuffer;
|
||||
goto recheck_xmax; /* see comment above */
|
||||
}
|
||||
HeapTupleHeaderSetXmax(tuple, InvalidTransactionId);
|
||||
/*
|
||||
* The tuple might be marked either XMAX_INVALID or
|
||||
* XMAX_COMMITTED + LOCKED. Normalize to INVALID just to be
|
||||
* sure no one gets confused.
|
||||
*/
|
||||
tuple->t_infomask &= ~HEAP_XMAX_COMMITTED;
|
||||
tuple->t_infomask |= HEAP_XMAX_INVALID;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*----------
|
||||
* XXX perhaps someday we should zero out very old MultiXactIds here?
|
||||
*
|
||||
* The only way a stale MultiXactId could pose a problem is if a
|
||||
* tuple, having once been multiply-share-locked, is not touched by
|
||||
* any vacuum or attempted lock or deletion for just over 4G MultiXact
|
||||
* creations, and then in the probably-narrow window where its xmax
|
||||
* is again a live MultiXactId, someone tries to lock or delete it.
|
||||
* Even then, another share-lock attempt would work fine. An
|
||||
* exclusive-lock or delete attempt would face unexpected delay, or
|
||||
* in the very worst case get a deadlock error. This seems an
|
||||
* extremely low-probability scenario with minimal downside even if
|
||||
* it does happen, so for now we don't do the extra bookkeeping that
|
||||
* would be needed to clean out MultiXactIds.
|
||||
*----------
|
||||
*/
|
||||
}
|
||||
|
||||
/*
|
||||
* Although xvac per se could only be set by VACUUM, it shares physical
|
||||
* storage space with cmax, and so could be wiped out by someone setting
|
||||
* xmax. Hence recheck after changing lock, same as for xmax itself.
|
||||
*/
|
||||
recheck_xvac:
|
||||
if (tuple->t_infomask & HEAP_MOVED)
|
||||
{
|
||||
xid = HeapTupleHeaderGetXvac(tuple);
|
||||
if (TransactionIdIsNormal(xid) &&
|
||||
TransactionIdPrecedes(xid, cutoff_xid))
|
||||
{
|
||||
if (buf != InvalidBuffer)
|
||||
{
|
||||
/* trade in share lock for exclusive lock */
|
||||
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
||||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||
buf = InvalidBuffer;
|
||||
goto recheck_xvac; /* see comment above */
|
||||
}
|
||||
/*
|
||||
* If a MOVED_OFF tuple is not dead, the xvac transaction must
|
||||
* have failed; whereas a non-dead MOVED_IN tuple must mean the
|
||||
* xvac transaction succeeded.
|
||||
*/
|
||||
if (tuple->t_infomask & HEAP_MOVED_OFF)
|
||||
HeapTupleHeaderSetXvac(tuple, InvalidTransactionId);
|
||||
else
|
||||
HeapTupleHeaderSetXvac(tuple, FrozenTransactionId);
|
||||
/*
|
||||
* Might as well fix the hint bits too; usually XMIN_COMMITTED will
|
||||
* already be set here, but there's a small chance not.
|
||||
*/
|
||||
Assert(!(tuple->t_infomask & HEAP_XMIN_INVALID));
|
||||
tuple->t_infomask |= HEAP_XMIN_COMMITTED;
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
return changed;
|
||||
}
|
||||
|
||||
|
||||
/* ----------------
|
||||
* heap_markpos - mark scan position
|
||||
* ----------------
|
||||
@@ -2877,6 +3037,9 @@ heap_restrpos(HeapScanDesc scan)
|
||||
/*
|
||||
* Perform XLogInsert for a heap-clean operation. Caller must already
|
||||
* have modified the buffer and marked it dirty.
|
||||
*
|
||||
* Note: for historical reasons, the entries in the unused[] array should
|
||||
* be zero-based tuple indexes, not one-based.
|
||||
*/
|
||||
XLogRecPtr
|
||||
log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *unused, int uncnt)
|
||||
@@ -2920,6 +3083,57 @@ log_heap_clean(Relation reln, Buffer buffer, OffsetNumber *unused, int uncnt)
|
||||
return recptr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform XLogInsert for a heap-freeze operation. Caller must already
|
||||
* have modified the buffer and marked it dirty.
|
||||
*
|
||||
* Unlike log_heap_clean(), the offsets[] entries are one-based.
|
||||
*/
|
||||
XLogRecPtr
|
||||
log_heap_freeze(Relation reln, Buffer buffer,
|
||||
TransactionId cutoff_xid,
|
||||
OffsetNumber *offsets, int offcnt)
|
||||
{
|
||||
xl_heap_freeze xlrec;
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[2];
|
||||
|
||||
/* Caller should not call me on a temp relation */
|
||||
Assert(!reln->rd_istemp);
|
||||
|
||||
xlrec.node = reln->rd_node;
|
||||
xlrec.block = BufferGetBlockNumber(buffer);
|
||||
xlrec.cutoff_xid = cutoff_xid;
|
||||
|
||||
rdata[0].data = (char *) &xlrec;
|
||||
rdata[0].len = SizeOfHeapFreeze;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].next = &(rdata[1]);
|
||||
|
||||
/*
|
||||
* The tuple-offsets array is not actually in the buffer, but pretend
|
||||
* that it is. When XLogInsert stores the whole buffer, the offsets array
|
||||
* need not be stored too.
|
||||
*/
|
||||
if (offcnt > 0)
|
||||
{
|
||||
rdata[1].data = (char *) offsets;
|
||||
rdata[1].len = offcnt * sizeof(OffsetNumber);
|
||||
}
|
||||
else
|
||||
{
|
||||
rdata[1].data = NULL;
|
||||
rdata[1].len = 0;
|
||||
}
|
||||
rdata[1].buffer = buffer;
|
||||
rdata[1].buffer_std = true;
|
||||
rdata[1].next = NULL;
|
||||
|
||||
recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_FREEZE, rdata);
|
||||
|
||||
return recptr;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform XLogInsert for a heap-update operation. Caller must already
|
||||
* have modified the buffer(s) and marked them dirty.
|
||||
@@ -3057,6 +3271,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
|
||||
|
||||
while (unused < unend)
|
||||
{
|
||||
/* unused[] entries are zero-based */
|
||||
lp = PageGetItemId(page, *unused + 1);
|
||||
lp->lp_flags &= ~LP_USED;
|
||||
unused++;
|
||||
@@ -3071,6 +3286,55 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record)
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
|
||||
static void
|
||||
heap_xlog_freeze(XLogRecPtr lsn, XLogRecord *record)
|
||||
{
|
||||
xl_heap_freeze *xlrec = (xl_heap_freeze *) XLogRecGetData(record);
|
||||
TransactionId cutoff_xid = xlrec->cutoff_xid;
|
||||
Relation reln;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
|
||||
if (record->xl_info & XLR_BKP_BLOCK_1)
|
||||
return;
|
||||
|
||||
reln = XLogOpenRelation(xlrec->node);
|
||||
buffer = XLogReadBuffer(reln, xlrec->block, false);
|
||||
if (!BufferIsValid(buffer))
|
||||
return;
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
if (XLByteLE(lsn, PageGetLSN(page)))
|
||||
{
|
||||
UnlockReleaseBuffer(buffer);
|
||||
return;
|
||||
}
|
||||
|
||||
if (record->xl_len > SizeOfHeapFreeze)
|
||||
{
|
||||
OffsetNumber *offsets;
|
||||
OffsetNumber *offsets_end;
|
||||
|
||||
offsets = (OffsetNumber *) ((char *) xlrec + SizeOfHeapFreeze);
|
||||
offsets_end = (OffsetNumber *) ((char *) xlrec + record->xl_len);
|
||||
|
||||
while (offsets < offsets_end)
|
||||
{
|
||||
/* offsets[] entries are one-based */
|
||||
ItemId lp = PageGetItemId(page, *offsets);
|
||||
HeapTupleHeader tuple = (HeapTupleHeader) PageGetItem(page, lp);
|
||||
|
||||
(void) heap_freeze_tuple(tuple, cutoff_xid, InvalidBuffer);
|
||||
offsets++;
|
||||
}
|
||||
}
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
PageSetTLI(page, ThisTimeLineID);
|
||||
MarkBufferDirty(buffer);
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
|
||||
static void
|
||||
heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
|
||||
{
|
||||
@@ -3546,6 +3810,18 @@ heap_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
elog(PANIC, "heap_redo: unknown op code %u", info);
|
||||
}
|
||||
|
||||
void
|
||||
heap2_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
{
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
|
||||
info &= XLOG_HEAP_OPMASK;
|
||||
if (info == XLOG_HEAP2_FREEZE)
|
||||
heap_xlog_freeze(lsn, record);
|
||||
else
|
||||
elog(PANIC, "heap2_redo: unknown op code %u", info);
|
||||
}
|
||||
|
||||
static void
|
||||
out_target(StringInfo buf, xl_heaptid *target)
|
||||
{
|
||||
@@ -3645,3 +3921,22 @@ heap_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
else
|
||||
appendStringInfo(buf, "UNKNOWN");
|
||||
}
|
||||
|
||||
void
|
||||
heap2_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
{
|
||||
uint8 info = xl_info & ~XLR_INFO_MASK;
|
||||
|
||||
info &= XLOG_HEAP_OPMASK;
|
||||
if (info == XLOG_HEAP2_FREEZE)
|
||||
{
|
||||
xl_heap_freeze *xlrec = (xl_heap_freeze *) rec;
|
||||
|
||||
appendStringInfo(buf, "freeze: rel %u/%u/%u; blk %u; cutoff %u",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode, xlrec->block,
|
||||
xlrec->cutoff_xid);
|
||||
}
|
||||
else
|
||||
appendStringInfo(buf, "UNKNOWN");
|
||||
}
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.40 2006/10/04 00:29:49 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/clog.c,v 1.41 2006/11/05 22:42:07 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -69,6 +69,7 @@ static SlruCtlData ClogCtlData;
|
||||
static int ZeroCLOGPage(int pageno, bool writeXlog);
|
||||
static bool CLOGPagePrecedes(int page1, int page2);
|
||||
static void WriteZeroPageXlogRec(int pageno);
|
||||
static void WriteTruncateXlogRec(int pageno);
|
||||
|
||||
|
||||
/*
|
||||
@@ -309,16 +310,17 @@ ExtendCLOG(TransactionId newestXact)
|
||||
/*
|
||||
* Remove all CLOG segments before the one holding the passed transaction ID
|
||||
*
|
||||
* When this is called, we know that the database logically contains no
|
||||
* reference to transaction IDs older than oldestXact. However, we must
|
||||
* not truncate the CLOG until we have performed a checkpoint, to ensure
|
||||
* that no such references remain on disk either; else a crash just after
|
||||
* the truncation might leave us with a problem. Since CLOG segments hold
|
||||
* a large number of transactions, the opportunity to actually remove a
|
||||
* segment is fairly rare, and so it seems best not to do the checkpoint
|
||||
* unless we have confirmed that there is a removable segment. Therefore
|
||||
* we issue the checkpoint command here, not in higher-level code as might
|
||||
* seem cleaner.
|
||||
* Before removing any CLOG data, we must flush XLOG to disk, to ensure
|
||||
* that any recently-emitted HEAP_FREEZE records have reached disk; otherwise
|
||||
* a crash and restart might leave us with some unfrozen tuples referencing
|
||||
* removed CLOG data. We choose to emit a special TRUNCATE XLOG record too.
|
||||
* Replaying the deletion from XLOG is not critical, since the files could
|
||||
* just as well be removed later, but doing so prevents a long-running hot
|
||||
* standby server from acquiring an unreasonably bloated CLOG directory.
|
||||
*
|
||||
* Since CLOG segments hold a large number of transactions, the opportunity to
|
||||
* actually remove a segment is fairly rare, and so it seems best not to do
|
||||
* the XLOG flush unless we have confirmed that there is a removable segment.
|
||||
*/
|
||||
void
|
||||
TruncateCLOG(TransactionId oldestXact)
|
||||
@@ -335,8 +337,8 @@ TruncateCLOG(TransactionId oldestXact)
|
||||
if (!SlruScanDirectory(ClogCtl, cutoffPage, false))
|
||||
return; /* nothing to remove */
|
||||
|
||||
/* Perform a CHECKPOINT */
|
||||
RequestCheckpoint(true, false);
|
||||
/* Write XLOG record and flush XLOG to disk */
|
||||
WriteTruncateXlogRec(cutoffPage);
|
||||
|
||||
/* Now we can remove the old CLOG segment(s) */
|
||||
SimpleLruTruncate(ClogCtl, cutoffPage);
|
||||
@@ -386,6 +388,29 @@ WriteZeroPageXlogRec(int pageno)
|
||||
(void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE | XLOG_NO_TRAN, &rdata);
|
||||
}
|
||||
|
||||
/*
|
||||
* Write a TRUNCATE xlog record
|
||||
*
|
||||
* We must flush the xlog record to disk before returning --- see notes
|
||||
* in TruncateCLOG().
|
||||
*
|
||||
* Note: xlog record is marked as outside transaction control, since we
|
||||
* want it to be redone whether the invoking transaction commits or not.
|
||||
*/
|
||||
static void
|
||||
WriteTruncateXlogRec(int pageno)
|
||||
{
|
||||
XLogRecData rdata;
|
||||
XLogRecPtr recptr;
|
||||
|
||||
rdata.data = (char *) (&pageno);
|
||||
rdata.len = sizeof(int);
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.next = NULL;
|
||||
recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE | XLOG_NO_TRAN, &rdata);
|
||||
XLogFlush(recptr);
|
||||
}
|
||||
|
||||
/*
|
||||
* CLOG resource manager's routines
|
||||
*/
|
||||
@@ -409,6 +434,22 @@ clog_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
|
||||
LWLockRelease(CLogControlLock);
|
||||
}
|
||||
else if (info == CLOG_TRUNCATE)
|
||||
{
|
||||
int pageno;
|
||||
|
||||
memcpy(&pageno, XLogRecGetData(record), sizeof(int));
|
||||
|
||||
/*
|
||||
* During XLOG replay, latest_page_number isn't set up yet; insert
|
||||
* a suitable value to bypass the sanity test in SimpleLruTruncate.
|
||||
*/
|
||||
ClogCtl->shared->latest_page_number = pageno;
|
||||
|
||||
SimpleLruTruncate(ClogCtl, pageno);
|
||||
}
|
||||
else
|
||||
elog(PANIC, "clog_redo: unknown op code %u", info);
|
||||
}
|
||||
|
||||
void
|
||||
@@ -423,6 +464,13 @@ clog_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
memcpy(&pageno, rec, sizeof(int));
|
||||
appendStringInfo(buf, "zeropage: %d", pageno);
|
||||
}
|
||||
else if (info == CLOG_TRUNCATE)
|
||||
{
|
||||
int pageno;
|
||||
|
||||
memcpy(&pageno, rec, sizeof(int));
|
||||
appendStringInfo(buf, "truncate before: %d", pageno);
|
||||
}
|
||||
else
|
||||
appendStringInfo(buf, "UNKNOWN");
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
*
|
||||
* Resource managers definition
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.24 2006/08/07 16:57:56 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/rmgr.c,v 1.25 2006/11/05 22:42:07 tgl Exp $
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
@@ -32,7 +32,7 @@ const RmgrData RmgrTable[RM_MAX_ID + 1] = {
|
||||
{"MultiXact", multixact_redo, multixact_desc, NULL, NULL, NULL},
|
||||
{"Reserved 7", NULL, NULL, NULL, NULL, NULL},
|
||||
{"Reserved 8", NULL, NULL, NULL, NULL, NULL},
|
||||
{"Reserved 9", NULL, NULL, NULL, NULL, NULL},
|
||||
{"Heap2", heap2_redo, heap2_desc, NULL, NULL, NULL},
|
||||
{"Heap", heap_redo, heap_desc, NULL, NULL, NULL},
|
||||
{"Btree", btree_redo, btree_desc, btree_xlog_startup, btree_xlog_cleanup, btree_safe_restartpoint},
|
||||
{"Hash", hash_redo, hash_desc, NULL, NULL, NULL},
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
* Copyright (c) 2000-2006, PostgreSQL Global Development Group
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/varsup.c,v 1.75 2006/10/04 00:29:49 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/varsup.c,v 1.76 2006/11/05 22:42:07 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -17,6 +17,8 @@
|
||||
#include "access/subtrans.h"
|
||||
#include "access/transam.h"
|
||||
#include "miscadmin.h"
|
||||
#include "postmaster/autovacuum.h"
|
||||
#include "storage/pmsignal.h"
|
||||
#include "storage/proc.h"
|
||||
#include "utils/builtins.h"
|
||||
|
||||
@@ -47,20 +49,31 @@ GetNewTransactionId(bool isSubXact)
|
||||
|
||||
xid = ShmemVariableCache->nextXid;
|
||||
|
||||
/*
|
||||
/*----------
|
||||
* Check to see if it's safe to assign another XID. This protects against
|
||||
* catastrophic data loss due to XID wraparound. The basic rules are:
|
||||
* warn if we're past xidWarnLimit, and refuse to execute transactions if
|
||||
* we're past xidStopLimit, unless we are running in a standalone backend
|
||||
* (which gives an escape hatch to the DBA who ignored all those
|
||||
* warnings).
|
||||
*
|
||||
* If we're past xidVacLimit, start trying to force autovacuum cycles.
|
||||
* If we're past xidWarnLimit, start issuing warnings.
|
||||
* If we're past xidStopLimit, refuse to execute transactions, unless
|
||||
* we are running in a standalone backend (which gives an escape hatch
|
||||
* to the DBA who somehow got past the earlier defenses).
|
||||
*
|
||||
* Test is coded to fall out as fast as possible during normal operation,
|
||||
* ie, when the warn limit is set and we haven't violated it.
|
||||
* ie, when the vac limit is set and we haven't violated it.
|
||||
*----------
|
||||
*/
|
||||
if (TransactionIdFollowsOrEquals(xid, ShmemVariableCache->xidWarnLimit) &&
|
||||
TransactionIdIsValid(ShmemVariableCache->xidWarnLimit))
|
||||
if (TransactionIdFollowsOrEquals(xid, ShmemVariableCache->xidVacLimit) &&
|
||||
TransactionIdIsValid(ShmemVariableCache->xidVacLimit))
|
||||
{
|
||||
/*
|
||||
* To avoid swamping the postmaster with signals, we issue the
|
||||
* autovac request only once per 64K transaction starts. This
|
||||
* still gives plenty of chances before we get into real trouble.
|
||||
*/
|
||||
if (IsUnderPostmaster && (xid % 65536) == 0)
|
||||
SendPostmasterSignal(PMSIGNAL_START_AUTOVAC);
|
||||
|
||||
if (IsUnderPostmaster &&
|
||||
TransactionIdFollowsOrEquals(xid, ShmemVariableCache->xidStopLimit))
|
||||
ereport(ERROR,
|
||||
@@ -69,7 +82,7 @@ GetNewTransactionId(bool isSubXact)
|
||||
NameStr(ShmemVariableCache->limit_datname)),
|
||||
errhint("Stop the postmaster and use a standalone backend to vacuum database \"%s\".",
|
||||
NameStr(ShmemVariableCache->limit_datname))));
|
||||
else
|
||||
else if (TransactionIdFollowsOrEquals(xid, ShmemVariableCache->xidWarnLimit))
|
||||
ereport(WARNING,
|
||||
(errmsg("database \"%s\" must be vacuumed within %u transactions",
|
||||
NameStr(ShmemVariableCache->limit_datname),
|
||||
@@ -178,28 +191,29 @@ ReadNewTransactionId(void)
|
||||
|
||||
/*
|
||||
* Determine the last safe XID to allocate given the currently oldest
|
||||
* datminxid (ie, the oldest XID that might exist in any database
|
||||
* datfrozenxid (ie, the oldest XID that might exist in any database
|
||||
* of our cluster).
|
||||
*/
|
||||
void
|
||||
SetTransactionIdLimit(TransactionId oldest_datminxid,
|
||||
SetTransactionIdLimit(TransactionId oldest_datfrozenxid,
|
||||
Name oldest_datname)
|
||||
{
|
||||
TransactionId xidVacLimit;
|
||||
TransactionId xidWarnLimit;
|
||||
TransactionId xidStopLimit;
|
||||
TransactionId xidWrapLimit;
|
||||
TransactionId curXid;
|
||||
|
||||
Assert(TransactionIdIsValid(oldest_datminxid));
|
||||
Assert(TransactionIdIsNormal(oldest_datfrozenxid));
|
||||
|
||||
/*
|
||||
* The place where we actually get into deep trouble is halfway around
|
||||
* from the oldest existing XID. (This calculation is probably off by one
|
||||
* or two counts, because the special XIDs reduce the size of the loop a
|
||||
* little bit. But we throw in plenty of slop below, so it doesn't
|
||||
* matter.)
|
||||
* from the oldest potentially-existing XID. (This calculation is
|
||||
* probably off by one or two counts, because the special XIDs reduce the
|
||||
* size of the loop a little bit. But we throw in plenty of slop below,
|
||||
* so it doesn't matter.)
|
||||
*/
|
||||
xidWrapLimit = oldest_datminxid + (MaxTransactionId >> 1);
|
||||
xidWrapLimit = oldest_datfrozenxid + (MaxTransactionId >> 1);
|
||||
if (xidWrapLimit < FirstNormalTransactionId)
|
||||
xidWrapLimit += FirstNormalTransactionId;
|
||||
|
||||
@@ -229,8 +243,28 @@ SetTransactionIdLimit(TransactionId oldest_datminxid,
|
||||
if (xidWarnLimit < FirstNormalTransactionId)
|
||||
xidWarnLimit -= FirstNormalTransactionId;
|
||||
|
||||
/*
|
||||
* We'll start trying to force autovacuums when oldest_datfrozenxid
|
||||
* gets to be more than autovacuum_freeze_max_age transactions old.
|
||||
*
|
||||
* Note: guc.c ensures that autovacuum_freeze_max_age is in a sane
|
||||
* range, so that xidVacLimit will be well before xidWarnLimit.
|
||||
*
|
||||
* Note: autovacuum_freeze_max_age is a PGC_POSTMASTER parameter so that
|
||||
* we don't have to worry about dealing with on-the-fly changes in its
|
||||
* value. It doesn't look practical to update shared state from a GUC
|
||||
* assign hook (too many processes would try to execute the hook,
|
||||
* resulting in race conditions as well as crashes of those not
|
||||
* connected to shared memory). Perhaps this can be improved someday.
|
||||
*/
|
||||
xidVacLimit = oldest_datfrozenxid + autovacuum_freeze_max_age;
|
||||
if (xidVacLimit < FirstNormalTransactionId)
|
||||
xidVacLimit += FirstNormalTransactionId;
|
||||
|
||||
/* Grab lock for just long enough to set the new limit values */
|
||||
LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
|
||||
ShmemVariableCache->oldestXid = oldest_datfrozenxid;
|
||||
ShmemVariableCache->xidVacLimit = xidVacLimit;
|
||||
ShmemVariableCache->xidWarnLimit = xidWarnLimit;
|
||||
ShmemVariableCache->xidStopLimit = xidStopLimit;
|
||||
ShmemVariableCache->xidWrapLimit = xidWrapLimit;
|
||||
@@ -242,6 +276,18 @@ SetTransactionIdLimit(TransactionId oldest_datminxid,
|
||||
ereport(DEBUG1,
|
||||
(errmsg("transaction ID wrap limit is %u, limited by database \"%s\"",
|
||||
xidWrapLimit, NameStr(*oldest_datname))));
|
||||
|
||||
/*
|
||||
* If past the autovacuum force point, immediately signal an autovac
|
||||
* request. The reason for this is that autovac only processes one
|
||||
* database per invocation. Once it's finished cleaning up the oldest
|
||||
* database, it'll call here, and we'll signal the postmaster to start
|
||||
* another iteration immediately if there are still any old databases.
|
||||
*/
|
||||
if (TransactionIdFollowsOrEquals(curXid, xidVacLimit) &&
|
||||
IsUnderPostmaster)
|
||||
SendPostmasterSignal(PMSIGNAL_START_AUTOVAC);
|
||||
|
||||
/* Give an immediate warning if past the wrap warn point */
|
||||
if (TransactionIdFollowsOrEquals(curXid, xidWarnLimit))
|
||||
ereport(WARNING,
|
||||
|
||||
@@ -10,7 +10,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.227 2006/10/04 00:29:49 momjian Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/xact.c,v 1.228 2006/11/05 22:42:07 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -468,8 +468,12 @@ TransactionIdIsCurrentTransactionId(TransactionId xid)
|
||||
* is what we need during bootstrap. (Bootstrap mode only inserts tuples,
|
||||
* it never updates or deletes them, so all tuples can be presumed good
|
||||
* immediately.)
|
||||
*
|
||||
* Likewise, InvalidTransactionId and FrozenTransactionId are certainly
|
||||
* not my transaction ID, so we can just return "false" immediately for
|
||||
* any non-normal XID.
|
||||
*/
|
||||
if (xid == BootstrapTransactionId)
|
||||
if (!TransactionIdIsNormal(xid))
|
||||
return false;
|
||||
|
||||
/*
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
* Portions Copyright (c) 1996-2006, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.252 2006/10/18 22:44:11 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/transam/xlog.c,v 1.253 2006/11/05 22:42:08 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -5343,36 +5343,6 @@ GetLastSegSwitchTime(void)
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* GetRecentNextXid - get the nextXid value saved by the most recent checkpoint
|
||||
*
|
||||
* This is currently used only by the autovacuum daemon. To check for
|
||||
* impending XID wraparound, autovac needs an approximate idea of the current
|
||||
* XID counter, and it needs it before choosing which DB to attach to, hence
|
||||
* before it sets up a PGPROC, hence before it can take any LWLocks. But it
|
||||
* has attached to shared memory, and so we can let it reach into the shared
|
||||
* ControlFile structure and pull out the last checkpoint nextXID.
|
||||
*
|
||||
* Since we don't take any sort of lock, we have to assume that reading a
|
||||
* TransactionId is atomic ... but that assumption is made elsewhere, too,
|
||||
* and in any case the worst possible consequence of a bogus result is that
|
||||
* autovac issues an unnecessary database-wide VACUUM.
|
||||
*
|
||||
* Note: we could also choose to read ShmemVariableCache->nextXid in an
|
||||
* unlocked fashion, thus getting a more up-to-date result; but since that
|
||||
* changes far more frequently than the controlfile checkpoint copy, it would
|
||||
* pose a far higher risk of bogus result if we did have a nonatomic-read
|
||||
* problem.
|
||||
*
|
||||
* A (theoretically) completely safe answer is to read the actual pg_control
|
||||
* file into local process memory, but that certainly seems like overkill.
|
||||
*/
|
||||
TransactionId
|
||||
GetRecentNextXid(void)
|
||||
{
|
||||
return ControlFile->checkPointCopy.nextXid;
|
||||
}
|
||||
|
||||
/*
|
||||
* GetNextXidAndEpoch - get the current nextXid value and associated epoch
|
||||
*
|
||||
|
||||
Reference in New Issue
Block a user