mirror of
https://github.com/postgres/postgres.git
synced 2025-07-21 16:02:15 +03:00
pgindent run for 8.3.
This commit is contained in:
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.244 2007/11/07 12:24:24 petere Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/heapam.c,v 1.245 2007/11/15 21:14:32 momjian Exp $
|
||||
*
|
||||
*
|
||||
* INTERFACE ROUTINES
|
||||
@ -60,9 +60,9 @@
|
||||
|
||||
|
||||
static HeapScanDesc heap_beginscan_internal(Relation relation,
|
||||
Snapshot snapshot,
|
||||
int nkeys, ScanKey key,
|
||||
bool is_bitmapscan);
|
||||
Snapshot snapshot,
|
||||
int nkeys, ScanKey key,
|
||||
bool is_bitmapscan);
|
||||
static XLogRecPtr log_heap_update(Relation reln, Buffer oldbuf,
|
||||
ItemPointerData from, Buffer newbuf, HeapTuple newtup, bool move);
|
||||
static bool HeapSatisfiesHOTUpdate(Relation relation, Bitmapset *hot_attrs,
|
||||
@ -85,18 +85,18 @@ initscan(HeapScanDesc scan, ScanKey key)
|
||||
* Determine the number of blocks we have to scan.
|
||||
*
|
||||
* It is sufficient to do this once at scan start, since any tuples added
|
||||
* while the scan is in progress will be invisible to my snapshot
|
||||
* anyway. (That is not true when using a non-MVCC snapshot. However,
|
||||
* we couldn't guarantee to return tuples added after scan start anyway,
|
||||
* since they might go into pages we already scanned. To guarantee
|
||||
* consistent results for a non-MVCC snapshot, the caller must hold some
|
||||
* higher-level lock that ensures the interesting tuple(s) won't change.)
|
||||
* while the scan is in progress will be invisible to my snapshot anyway.
|
||||
* (That is not true when using a non-MVCC snapshot. However, we couldn't
|
||||
* guarantee to return tuples added after scan start anyway, since they
|
||||
* might go into pages we already scanned. To guarantee consistent
|
||||
* results for a non-MVCC snapshot, the caller must hold some higher-level
|
||||
* lock that ensures the interesting tuple(s) won't change.)
|
||||
*/
|
||||
scan->rs_nblocks = RelationGetNumberOfBlocks(scan->rs_rd);
|
||||
|
||||
/*
|
||||
* If the table is large relative to NBuffers, use a bulk-read access
|
||||
* strategy and enable synchronized scanning (see syncscan.c). Although
|
||||
* strategy and enable synchronized scanning (see syncscan.c). Although
|
||||
* the thresholds for these features could be different, we make them the
|
||||
* same so that there are only two behaviors to tune rather than four.
|
||||
*
|
||||
@ -140,8 +140,8 @@ initscan(HeapScanDesc scan, ScanKey key)
|
||||
memcpy(scan->rs_key, key, scan->rs_nkeys * sizeof(ScanKeyData));
|
||||
|
||||
/*
|
||||
* Currently, we don't have a stats counter for bitmap heap scans
|
||||
* (but the underlying bitmap index scans will be counted).
|
||||
* Currently, we don't have a stats counter for bitmap heap scans (but the
|
||||
* underlying bitmap index scans will be counted).
|
||||
*/
|
||||
if (!scan->rs_bitmapscan)
|
||||
pgstat_count_heap_scan(scan->rs_rd);
|
||||
@ -283,7 +283,7 @@ heapgettup(HeapScanDesc scan,
|
||||
tuple->t_data = NULL;
|
||||
return;
|
||||
}
|
||||
page = scan->rs_startblock; /* first page */
|
||||
page = scan->rs_startblock; /* first page */
|
||||
heapgetpage(scan, page);
|
||||
lineoff = FirstOffsetNumber; /* first offnum */
|
||||
scan->rs_inited = true;
|
||||
@ -317,6 +317,7 @@ heapgettup(HeapScanDesc scan,
|
||||
tuple->t_data = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable reporting to syncscan logic in a backwards scan; it's
|
||||
* not very likely anyone else is doing the same thing at the same
|
||||
@ -459,9 +460,9 @@ heapgettup(HeapScanDesc scan,
|
||||
finished = (page == scan->rs_startblock);
|
||||
|
||||
/*
|
||||
* Report our new scan position for synchronization purposes.
|
||||
* We don't do that when moving backwards, however. That would
|
||||
* just mess up any other forward-moving scanners.
|
||||
* Report our new scan position for synchronization purposes. We
|
||||
* don't do that when moving backwards, however. That would just
|
||||
* mess up any other forward-moving scanners.
|
||||
*
|
||||
* Note: we do this before checking for end of scan so that the
|
||||
* final state of the position hint is back at the start of the
|
||||
@ -554,7 +555,7 @@ heapgettup_pagemode(HeapScanDesc scan,
|
||||
tuple->t_data = NULL;
|
||||
return;
|
||||
}
|
||||
page = scan->rs_startblock; /* first page */
|
||||
page = scan->rs_startblock; /* first page */
|
||||
heapgetpage(scan, page);
|
||||
lineindex = 0;
|
||||
scan->rs_inited = true;
|
||||
@ -585,6 +586,7 @@ heapgettup_pagemode(HeapScanDesc scan,
|
||||
tuple->t_data = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Disable reporting to syncscan logic in a backwards scan; it's
|
||||
* not very likely anyone else is doing the same thing at the same
|
||||
@ -719,9 +721,9 @@ heapgettup_pagemode(HeapScanDesc scan,
|
||||
finished = (page == scan->rs_startblock);
|
||||
|
||||
/*
|
||||
* Report our new scan position for synchronization purposes.
|
||||
* We don't do that when moving backwards, however. That would
|
||||
* just mess up any other forward-moving scanners.
|
||||
* Report our new scan position for synchronization purposes. We
|
||||
* don't do that when moving backwards, however. That would just
|
||||
* mess up any other forward-moving scanners.
|
||||
*
|
||||
* Note: we do this before checking for end of scan so that the
|
||||
* final state of the position hint is back at the start of the
|
||||
@ -1057,7 +1059,7 @@ heap_openrv(const RangeVar *relation, LOCKMODE lockmode)
|
||||
* heap_beginscan - begin relation scan
|
||||
*
|
||||
* heap_beginscan_bm is an alternative entry point for setting up a HeapScanDesc
|
||||
* for a bitmap heap scan. Although that scan technology is really quite
|
||||
* for a bitmap heap scan. Although that scan technology is really quite
|
||||
* unlike a standard seqscan, there is just enough commonality to make it
|
||||
* worth using the same data structure.
|
||||
* ----------------
|
||||
@ -1423,10 +1425,10 @@ bool
|
||||
heap_hot_search_buffer(ItemPointer tid, Buffer buffer, Snapshot snapshot,
|
||||
bool *all_dead)
|
||||
{
|
||||
Page dp = (Page) BufferGetPage(buffer);
|
||||
Page dp = (Page) BufferGetPage(buffer);
|
||||
TransactionId prev_xmax = InvalidTransactionId;
|
||||
OffsetNumber offnum;
|
||||
bool at_chain_start;
|
||||
bool at_chain_start;
|
||||
|
||||
if (all_dead)
|
||||
*all_dead = true;
|
||||
@ -1438,7 +1440,7 @@ heap_hot_search_buffer(ItemPointer tid, Buffer buffer, Snapshot snapshot,
|
||||
/* Scan through possible multiple members of HOT-chain */
|
||||
for (;;)
|
||||
{
|
||||
ItemId lp;
|
||||
ItemId lp;
|
||||
HeapTupleData heapTuple;
|
||||
|
||||
/* check for bogus TID */
|
||||
@ -1472,7 +1474,8 @@ heap_hot_search_buffer(ItemPointer tid, Buffer buffer, Snapshot snapshot,
|
||||
break;
|
||||
|
||||
/*
|
||||
* The xmin should match the previous xmax value, else chain is broken.
|
||||
* The xmin should match the previous xmax value, else chain is
|
||||
* broken.
|
||||
*/
|
||||
if (TransactionIdIsValid(prev_xmax) &&
|
||||
!TransactionIdEquals(prev_xmax,
|
||||
@ -1499,8 +1502,8 @@ heap_hot_search_buffer(ItemPointer tid, Buffer buffer, Snapshot snapshot,
|
||||
*all_dead = false;
|
||||
|
||||
/*
|
||||
* Check to see if HOT chain continues past this tuple; if so
|
||||
* fetch the next offnum and loop around.
|
||||
* Check to see if HOT chain continues past this tuple; if so fetch
|
||||
* the next offnum and loop around.
|
||||
*/
|
||||
if (HeapTupleIsHotUpdated(&heapTuple))
|
||||
{
|
||||
@ -1511,7 +1514,7 @@ heap_hot_search_buffer(ItemPointer tid, Buffer buffer, Snapshot snapshot,
|
||||
prev_xmax = HeapTupleHeaderGetXmax(heapTuple.t_data);
|
||||
}
|
||||
else
|
||||
break; /* end of chain */
|
||||
break; /* end of chain */
|
||||
}
|
||||
|
||||
return false;
|
||||
@ -1528,8 +1531,8 @@ bool
|
||||
heap_hot_search(ItemPointer tid, Relation relation, Snapshot snapshot,
|
||||
bool *all_dead)
|
||||
{
|
||||
bool result;
|
||||
Buffer buffer;
|
||||
bool result;
|
||||
Buffer buffer;
|
||||
|
||||
buffer = ReadBuffer(relation, ItemPointerGetBlockNumber(tid));
|
||||
LockBuffer(buffer, BUFFER_LOCK_SHARE);
|
||||
@ -1665,7 +1668,7 @@ heap_get_latest_tid(Relation relation,
|
||||
*
|
||||
* This is called after we have waited for the XMAX transaction to terminate.
|
||||
* If the transaction aborted, we guarantee the XMAX_INVALID hint bit will
|
||||
* be set on exit. If the transaction committed, we set the XMAX_COMMITTED
|
||||
* be set on exit. If the transaction committed, we set the XMAX_COMMITTED
|
||||
* hint bit if possible --- but beware that that may not yet be possible,
|
||||
* if the transaction committed asynchronously. Hence callers should look
|
||||
* only at XMAX_INVALID.
|
||||
@ -2069,7 +2072,7 @@ l1:
|
||||
/*
|
||||
* If this transaction commits, the tuple will become DEAD sooner or
|
||||
* later. Set flag that this page is a candidate for pruning once our xid
|
||||
* falls below the OldestXmin horizon. If the transaction finally aborts,
|
||||
* falls below the OldestXmin horizon. If the transaction finally aborts,
|
||||
* the subsequent page pruning will be a no-op and the hint will be
|
||||
* cleared.
|
||||
*/
|
||||
@ -2252,15 +2255,15 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
|
||||
|
||||
/*
|
||||
* Fetch the list of attributes to be checked for HOT update. This is
|
||||
* wasted effort if we fail to update or have to put the new tuple on
|
||||
* a different page. But we must compute the list before obtaining
|
||||
* buffer lock --- in the worst case, if we are doing an update on one
|
||||
* of the relevant system catalogs, we could deadlock if we try to
|
||||
* fetch the list later. In any case, the relcache caches the data
|
||||
* so this is usually pretty cheap.
|
||||
* wasted effort if we fail to update or have to put the new tuple on a
|
||||
* different page. But we must compute the list before obtaining buffer
|
||||
* lock --- in the worst case, if we are doing an update on one of the
|
||||
* relevant system catalogs, we could deadlock if we try to fetch the list
|
||||
* later. In any case, the relcache caches the data so this is usually
|
||||
* pretty cheap.
|
||||
*
|
||||
* Note that we get a copy here, so we need not worry about relcache
|
||||
* flush happening midway through.
|
||||
* Note that we get a copy here, so we need not worry about relcache flush
|
||||
* happening midway through.
|
||||
*/
|
||||
hot_attrs = RelationGetIndexAttrBitmap(relation);
|
||||
|
||||
@ -2555,7 +2558,7 @@ l2:
|
||||
{
|
||||
/*
|
||||
* Since the new tuple is going into the same page, we might be able
|
||||
* to do a HOT update. Check if any of the index columns have been
|
||||
* to do a HOT update. Check if any of the index columns have been
|
||||
* changed. If not, then HOT update is possible.
|
||||
*/
|
||||
if (HeapSatisfiesHOTUpdate(relation, hot_attrs, &oldtup, heaptup))
|
||||
@ -2573,14 +2576,14 @@ l2:
|
||||
/*
|
||||
* If this transaction commits, the old tuple will become DEAD sooner or
|
||||
* later. Set flag that this page is a candidate for pruning once our xid
|
||||
* falls below the OldestXmin horizon. If the transaction finally aborts,
|
||||
* falls below the OldestXmin horizon. If the transaction finally aborts,
|
||||
* the subsequent page pruning will be a no-op and the hint will be
|
||||
* cleared.
|
||||
*
|
||||
* XXX Should we set hint on newbuf as well? If the transaction
|
||||
* aborts, there would be a prunable tuple in the newbuf; but for now
|
||||
* we choose not to optimize for aborts. Note that heap_xlog_update
|
||||
* must be kept in sync if this decision changes.
|
||||
* XXX Should we set hint on newbuf as well? If the transaction aborts,
|
||||
* there would be a prunable tuple in the newbuf; but for now we choose
|
||||
* not to optimize for aborts. Note that heap_xlog_update must be kept in
|
||||
* sync if this decision changes.
|
||||
*/
|
||||
PageSetPrunable(dp, xid);
|
||||
|
||||
@ -2695,22 +2698,24 @@ static bool
|
||||
heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum,
|
||||
HeapTuple tup1, HeapTuple tup2)
|
||||
{
|
||||
Datum value1, value2;
|
||||
bool isnull1, isnull2;
|
||||
Datum value1,
|
||||
value2;
|
||||
bool isnull1,
|
||||
isnull2;
|
||||
Form_pg_attribute att;
|
||||
|
||||
/*
|
||||
* If it's a whole-tuple reference, say "not equal". It's not really
|
||||
* worth supporting this case, since it could only succeed after a
|
||||
* no-op update, which is hardly a case worth optimizing for.
|
||||
* worth supporting this case, since it could only succeed after a no-op
|
||||
* update, which is hardly a case worth optimizing for.
|
||||
*/
|
||||
if (attrnum == 0)
|
||||
return false;
|
||||
|
||||
/*
|
||||
* Likewise, automatically say "not equal" for any system attribute
|
||||
* other than OID and tableOID; we cannot expect these to be consistent
|
||||
* in a HOT chain, or even to be set correctly yet in the new tuple.
|
||||
* Likewise, automatically say "not equal" for any system attribute other
|
||||
* than OID and tableOID; we cannot expect these to be consistent in a HOT
|
||||
* chain, or even to be set correctly yet in the new tuple.
|
||||
*/
|
||||
if (attrnum < 0)
|
||||
{
|
||||
@ -2720,17 +2725,17 @@ heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum,
|
||||
}
|
||||
|
||||
/*
|
||||
* Extract the corresponding values. XXX this is pretty inefficient
|
||||
* if there are many indexed columns. Should HeapSatisfiesHOTUpdate
|
||||
* do a single heap_deform_tuple call on each tuple, instead? But
|
||||
* that doesn't work for system columns ...
|
||||
* Extract the corresponding values. XXX this is pretty inefficient if
|
||||
* there are many indexed columns. Should HeapSatisfiesHOTUpdate do a
|
||||
* single heap_deform_tuple call on each tuple, instead? But that doesn't
|
||||
* work for system columns ...
|
||||
*/
|
||||
value1 = heap_getattr(tup1, attrnum, tupdesc, &isnull1);
|
||||
value2 = heap_getattr(tup2, attrnum, tupdesc, &isnull2);
|
||||
|
||||
/*
|
||||
* If one value is NULL and other is not, then they are certainly
|
||||
* not equal
|
||||
* If one value is NULL and other is not, then they are certainly not
|
||||
* equal
|
||||
*/
|
||||
if (isnull1 != isnull2)
|
||||
return false;
|
||||
@ -2744,7 +2749,7 @@ heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum,
|
||||
/*
|
||||
* We do simple binary comparison of the two datums. This may be overly
|
||||
* strict because there can be multiple binary representations for the
|
||||
* same logical value. But we should be OK as long as there are no false
|
||||
* same logical value. But we should be OK as long as there are no false
|
||||
* positives. Using a type-specific equality operator is messy because
|
||||
* there could be multiple notions of equality in different operator
|
||||
* classes; furthermore, we cannot safely invoke user-defined functions
|
||||
@ -2758,7 +2763,7 @@ heap_tuple_attr_equals(TupleDesc tupdesc, int attrnum,
|
||||
else
|
||||
{
|
||||
Assert(attrnum <= tupdesc->natts);
|
||||
att = tupdesc->attrs[attrnum - 1];
|
||||
att = tupdesc->attrs[attrnum - 1];
|
||||
return datumIsEqual(value1, value2, att->attbyval, att->attlen);
|
||||
}
|
||||
}
|
||||
@ -2779,7 +2784,7 @@ static bool
|
||||
HeapSatisfiesHOTUpdate(Relation relation, Bitmapset *hot_attrs,
|
||||
HeapTuple oldtup, HeapTuple newtup)
|
||||
{
|
||||
int attrnum;
|
||||
int attrnum;
|
||||
|
||||
while ((attrnum = bms_first_member(hot_attrs)) >= 0)
|
||||
{
|
||||
@ -3094,15 +3099,15 @@ l3:
|
||||
}
|
||||
|
||||
/*
|
||||
* We might already hold the desired lock (or stronger), possibly under
|
||||
* a different subtransaction of the current top transaction. If so,
|
||||
* there is no need to change state or issue a WAL record. We already
|
||||
* handled the case where this is true for xmax being a MultiXactId,
|
||||
* so now check for cases where it is a plain TransactionId.
|
||||
* We might already hold the desired lock (or stronger), possibly under a
|
||||
* different subtransaction of the current top transaction. If so, there
|
||||
* is no need to change state or issue a WAL record. We already handled
|
||||
* the case where this is true for xmax being a MultiXactId, so now check
|
||||
* for cases where it is a plain TransactionId.
|
||||
*
|
||||
* Note in particular that this covers the case where we already hold
|
||||
* exclusive lock on the tuple and the caller only wants shared lock.
|
||||
* It would certainly not do to give up the exclusive lock.
|
||||
* exclusive lock on the tuple and the caller only wants shared lock. It
|
||||
* would certainly not do to give up the exclusive lock.
|
||||
*/
|
||||
xmax = HeapTupleHeaderGetXmax(tuple->t_data);
|
||||
old_infomask = tuple->t_data->t_infomask;
|
||||
@ -3179,8 +3184,8 @@ l3:
|
||||
{
|
||||
/*
|
||||
* If the XMAX is a valid TransactionId, then we need to
|
||||
* create a new MultiXactId that includes both the old
|
||||
* locker and our own TransactionId.
|
||||
* create a new MultiXactId that includes both the old locker
|
||||
* and our own TransactionId.
|
||||
*/
|
||||
xid = MultiXactIdCreate(xmax, xid);
|
||||
new_infomask |= HEAP_XMAX_IS_MULTI;
|
||||
@ -3214,8 +3219,8 @@ l3:
|
||||
/*
|
||||
* Store transaction information of xact locking the tuple.
|
||||
*
|
||||
* Note: Cmax is meaningless in this context, so don't set it; this
|
||||
* avoids possibly generating a useless combo CID.
|
||||
* Note: Cmax is meaningless in this context, so don't set it; this avoids
|
||||
* possibly generating a useless combo CID.
|
||||
*/
|
||||
tuple->t_data->t_infomask = new_infomask;
|
||||
HeapTupleHeaderClearHotUpdated(tuple->t_data);
|
||||
@ -3425,6 +3430,7 @@ heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
|
||||
buf = InvalidBuffer;
|
||||
}
|
||||
HeapTupleHeaderSetXmin(tuple, FrozenTransactionId);
|
||||
|
||||
/*
|
||||
* Might as well fix the hint bits too; usually XMIN_COMMITTED will
|
||||
* already be set here, but there's a small chance not.
|
||||
@ -3437,9 +3443,9 @@ heap_freeze_tuple(HeapTupleHeader tuple, TransactionId cutoff_xid,
|
||||
/*
|
||||
* When we release shared lock, it's possible for someone else to change
|
||||
* xmax before we get the lock back, so repeat the check after acquiring
|
||||
* exclusive lock. (We don't need this pushup for xmin, because only
|
||||
* VACUUM could be interested in changing an existing tuple's xmin,
|
||||
* and there's only one VACUUM allowed on a table at a time.)
|
||||
* exclusive lock. (We don't need this pushup for xmin, because only
|
||||
* VACUUM could be interested in changing an existing tuple's xmin, and
|
||||
* there's only one VACUUM allowed on a table at a time.)
|
||||
*/
|
||||
recheck_xmax:
|
||||
if (!(tuple->t_infomask & HEAP_XMAX_IS_MULTI))
|
||||
@ -3454,13 +3460,14 @@ recheck_xmax:
|
||||
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
||||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||
buf = InvalidBuffer;
|
||||
goto recheck_xmax; /* see comment above */
|
||||
goto recheck_xmax; /* see comment above */
|
||||
}
|
||||
HeapTupleHeaderSetXmax(tuple, InvalidTransactionId);
|
||||
|
||||
/*
|
||||
* The tuple might be marked either XMAX_INVALID or
|
||||
* XMAX_COMMITTED + LOCKED. Normalize to INVALID just to be
|
||||
* sure no one gets confused.
|
||||
* The tuple might be marked either XMAX_INVALID or XMAX_COMMITTED
|
||||
* + LOCKED. Normalize to INVALID just to be sure no one gets
|
||||
* confused.
|
||||
*/
|
||||
tuple->t_infomask &= ~HEAP_XMAX_COMMITTED;
|
||||
tuple->t_infomask |= HEAP_XMAX_INVALID;
|
||||
@ -3506,8 +3513,9 @@ recheck_xvac:
|
||||
LockBuffer(buf, BUFFER_LOCK_UNLOCK);
|
||||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||
buf = InvalidBuffer;
|
||||
goto recheck_xvac; /* see comment above */
|
||||
goto recheck_xvac; /* see comment above */
|
||||
}
|
||||
|
||||
/*
|
||||
* If a MOVED_OFF tuple is not dead, the xvac transaction must
|
||||
* have failed; whereas a non-dead MOVED_IN tuple must mean the
|
||||
@ -3517,9 +3525,10 @@ recheck_xvac:
|
||||
HeapTupleHeaderSetXvac(tuple, InvalidTransactionId);
|
||||
else
|
||||
HeapTupleHeaderSetXvac(tuple, FrozenTransactionId);
|
||||
|
||||
/*
|
||||
* Might as well fix the hint bits too; usually XMIN_COMMITTED will
|
||||
* already be set here, but there's a small chance not.
|
||||
* Might as well fix the hint bits too; usually XMIN_COMMITTED
|
||||
* will already be set here, but there's a small chance not.
|
||||
*/
|
||||
Assert(!(tuple->t_infomask & HEAP_XMIN_INVALID));
|
||||
tuple->t_infomask |= HEAP_XMIN_COMMITTED;
|
||||
@ -3632,8 +3641,8 @@ log_heap_clean(Relation reln, Buffer buffer,
|
||||
/*
|
||||
* The OffsetNumber arrays are not actually in the buffer, but we pretend
|
||||
* that they are. When XLogInsert stores the whole buffer, the offset
|
||||
* arrays need not be stored too. Note that even if all three arrays
|
||||
* are empty, we want to expose the buffer as a candidate for whole-page
|
||||
* arrays need not be stored too. Note that even if all three arrays are
|
||||
* empty, we want to expose the buffer as a candidate for whole-page
|
||||
* storage, since this record type implies a defragmentation operation
|
||||
* even if no item pointers changed state.
|
||||
*/
|
||||
@ -3686,7 +3695,7 @@ log_heap_clean(Relation reln, Buffer buffer,
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform XLogInsert for a heap-freeze operation. Caller must already
|
||||
* Perform XLogInsert for a heap-freeze operation. Caller must already
|
||||
* have modified the buffer and marked it dirty.
|
||||
*/
|
||||
XLogRecPtr
|
||||
@ -3711,9 +3720,9 @@ log_heap_freeze(Relation reln, Buffer buffer,
|
||||
rdata[0].next = &(rdata[1]);
|
||||
|
||||
/*
|
||||
* The tuple-offsets array is not actually in the buffer, but pretend
|
||||
* that it is. When XLogInsert stores the whole buffer, the offsets array
|
||||
* need not be stored too.
|
||||
* The tuple-offsets array is not actually in the buffer, but pretend that
|
||||
* it is. When XLogInsert stores the whole buffer, the offsets array need
|
||||
* not be stored too.
|
||||
*/
|
||||
if (offcnt > 0)
|
||||
{
|
||||
@ -3853,7 +3862,7 @@ log_heap_move(Relation reln, Buffer oldbuf, ItemPointerData from,
|
||||
* for writing the page to disk after calling this routine.
|
||||
*
|
||||
* Note: all current callers build pages in private memory and write them
|
||||
* directly to smgr, rather than using bufmgr. Therefore there is no need
|
||||
* directly to smgr, rather than using bufmgr. Therefore there is no need
|
||||
* to pass a buffer ID to XLogInsert, nor to perform MarkBufferDirty within
|
||||
* the critical section.
|
||||
*
|
||||
@ -3905,9 +3914,9 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move)
|
||||
Page page;
|
||||
OffsetNumber *offnum;
|
||||
OffsetNumber *end;
|
||||
int nredirected;
|
||||
int ndead;
|
||||
int i;
|
||||
int nredirected;
|
||||
int ndead;
|
||||
int i;
|
||||
|
||||
if (record->xl_info & XLR_BKP_BLOCK_1)
|
||||
return;
|
||||
@ -3934,12 +3943,12 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move)
|
||||
{
|
||||
OffsetNumber fromoff = *offnum++;
|
||||
OffsetNumber tooff = *offnum++;
|
||||
ItemId fromlp = PageGetItemId(page, fromoff);
|
||||
ItemId fromlp = PageGetItemId(page, fromoff);
|
||||
|
||||
if (clean_move)
|
||||
{
|
||||
/* Physically move the "to" item to the "from" slot */
|
||||
ItemId tolp = PageGetItemId(page, tooff);
|
||||
ItemId tolp = PageGetItemId(page, tooff);
|
||||
HeapTupleHeader htup;
|
||||
|
||||
*fromlp = *tolp;
|
||||
@ -3962,7 +3971,7 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move)
|
||||
for (i = 0; i < ndead; i++)
|
||||
{
|
||||
OffsetNumber off = *offnum++;
|
||||
ItemId lp = PageGetItemId(page, off);
|
||||
ItemId lp = PageGetItemId(page, off);
|
||||
|
||||
ItemIdSetDead(lp);
|
||||
}
|
||||
@ -3971,14 +3980,14 @@ heap_xlog_clean(XLogRecPtr lsn, XLogRecord *record, bool clean_move)
|
||||
while (offnum < end)
|
||||
{
|
||||
OffsetNumber off = *offnum++;
|
||||
ItemId lp = PageGetItemId(page, off);
|
||||
ItemId lp = PageGetItemId(page, off);
|
||||
|
||||
ItemIdSetUnused(lp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Finally, repair any fragmentation, and update the page's hint bit
|
||||
* about whether it has free pointers.
|
||||
* Finally, repair any fragmentation, and update the page's hint bit about
|
||||
* whether it has free pointers.
|
||||
*/
|
||||
PageRepairFragmentation(page);
|
||||
|
||||
@ -4617,7 +4626,7 @@ heap_desc(StringInfo buf, uint8 xl_info, char *rec)
|
||||
{
|
||||
xl_heap_update *xlrec = (xl_heap_update *) rec;
|
||||
|
||||
if (xl_info & XLOG_HEAP_INIT_PAGE) /* can this case happen? */
|
||||
if (xl_info & XLOG_HEAP_INIT_PAGE) /* can this case happen? */
|
||||
appendStringInfo(buf, "hot_update(init): ");
|
||||
else
|
||||
appendStringInfo(buf, "hot_update: ");
|
||||
@ -4724,7 +4733,7 @@ heap_sync(Relation rel)
|
||||
/* toast heap, if any */
|
||||
if (OidIsValid(rel->rd_rel->reltoastrelid))
|
||||
{
|
||||
Relation toastrel;
|
||||
Relation toastrel;
|
||||
|
||||
toastrel = heap_open(rel->rd_rel->reltoastrelid, AccessShareLock);
|
||||
FlushRelationBuffers(toastrel);
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/pruneheap.c,v 1.3 2007/10/24 13:05:57 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/pruneheap.c,v 1.4 2007/11/15 21:14:32 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -22,21 +22,21 @@
|
||||
|
||||
|
||||
/* Local functions */
|
||||
static int heap_prune_chain(Relation relation, Buffer buffer,
|
||||
OffsetNumber rootoffnum,
|
||||
TransactionId OldestXmin,
|
||||
OffsetNumber *redirected, int *nredirected,
|
||||
OffsetNumber *nowdead, int *ndead,
|
||||
OffsetNumber *nowunused, int *nunused,
|
||||
bool redirect_move);
|
||||
static int heap_prune_chain(Relation relation, Buffer buffer,
|
||||
OffsetNumber rootoffnum,
|
||||
TransactionId OldestXmin,
|
||||
OffsetNumber *redirected, int *nredirected,
|
||||
OffsetNumber *nowdead, int *ndead,
|
||||
OffsetNumber *nowunused, int *nunused,
|
||||
bool redirect_move);
|
||||
static void heap_prune_record_redirect(OffsetNumber *redirected,
|
||||
int *nredirected,
|
||||
OffsetNumber offnum,
|
||||
OffsetNumber rdoffnum);
|
||||
int *nredirected,
|
||||
OffsetNumber offnum,
|
||||
OffsetNumber rdoffnum);
|
||||
static void heap_prune_record_dead(OffsetNumber *nowdead, int *ndead,
|
||||
OffsetNumber offnum);
|
||||
OffsetNumber offnum);
|
||||
static void heap_prune_record_unused(OffsetNumber *nowunused, int *nunused,
|
||||
OffsetNumber offnum);
|
||||
OffsetNumber offnum);
|
||||
|
||||
|
||||
/*
|
||||
@ -70,16 +70,16 @@ heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin)
|
||||
return;
|
||||
|
||||
/*
|
||||
* We prune when a previous UPDATE failed to find enough space on the
|
||||
* page for a new tuple version, or when free space falls below the
|
||||
* relation's fill-factor target (but not less than 10%).
|
||||
* We prune when a previous UPDATE failed to find enough space on the page
|
||||
* for a new tuple version, or when free space falls below the relation's
|
||||
* fill-factor target (but not less than 10%).
|
||||
*
|
||||
* Checking free space here is questionable since we aren't holding
|
||||
* any lock on the buffer; in the worst case we could get a bogus
|
||||
* answer. It's unlikely to be *seriously* wrong, though, since
|
||||
* reading either pd_lower or pd_upper is probably atomic. Avoiding
|
||||
* taking a lock seems better than sometimes getting a wrong answer
|
||||
* in what is after all just a heuristic estimate.
|
||||
* Checking free space here is questionable since we aren't holding any
|
||||
* lock on the buffer; in the worst case we could get a bogus answer.
|
||||
* It's unlikely to be *seriously* wrong, though, since reading either
|
||||
* pd_lower or pd_upper is probably atomic. Avoiding taking a lock seems
|
||||
* better than sometimes getting a wrong answer in what is after all just
|
||||
* a heuristic estimate.
|
||||
*/
|
||||
minfree = RelationGetTargetPageFreeSpace(relation,
|
||||
HEAP_DEFAULT_FILLFACTOR);
|
||||
@ -93,9 +93,9 @@ heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin)
|
||||
|
||||
/*
|
||||
* Now that we have buffer lock, get accurate information about the
|
||||
* page's free space, and recheck the heuristic about whether to prune.
|
||||
* (We needn't recheck PageIsPrunable, since no one else could have
|
||||
* pruned while we hold pin.)
|
||||
* page's free space, and recheck the heuristic about whether to
|
||||
* prune. (We needn't recheck PageIsPrunable, since no one else could
|
||||
* have pruned while we hold pin.)
|
||||
*/
|
||||
if (PageIsFull(dp) || PageGetHeapFreeSpace((Page) dp) < minfree)
|
||||
{
|
||||
@ -119,7 +119,7 @@ heap_page_prune_opt(Relation relation, Buffer buffer, TransactionId OldestXmin)
|
||||
*
|
||||
* If redirect_move is set, we remove redirecting line pointers by
|
||||
* updating the root line pointer to point directly to the first non-dead
|
||||
* tuple in the chain. NOTE: eliminating the redirect changes the first
|
||||
* tuple in the chain. NOTE: eliminating the redirect changes the first
|
||||
* tuple's effective CTID, and is therefore unsafe except within VACUUM FULL.
|
||||
* The only reason we support this capability at all is that by using it,
|
||||
* VACUUM FULL need not cope with LP_REDIRECT items at all; which seems a
|
||||
@ -136,18 +136,18 @@ int
|
||||
heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
|
||||
bool redirect_move, bool report_stats)
|
||||
{
|
||||
int ndeleted = 0;
|
||||
Page page = BufferGetPage(buffer);
|
||||
OffsetNumber offnum,
|
||||
maxoff;
|
||||
OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
|
||||
OffsetNumber nowdead[MaxHeapTuplesPerPage];
|
||||
OffsetNumber nowunused[MaxHeapTuplesPerPage];
|
||||
int nredirected = 0;
|
||||
int ndead = 0;
|
||||
int nunused = 0;
|
||||
bool page_was_full = false;
|
||||
TransactionId save_prune_xid;
|
||||
int ndeleted = 0;
|
||||
Page page = BufferGetPage(buffer);
|
||||
OffsetNumber offnum,
|
||||
maxoff;
|
||||
OffsetNumber redirected[MaxHeapTuplesPerPage * 2];
|
||||
OffsetNumber nowdead[MaxHeapTuplesPerPage];
|
||||
OffsetNumber nowunused[MaxHeapTuplesPerPage];
|
||||
int nredirected = 0;
|
||||
int ndead = 0;
|
||||
int nunused = 0;
|
||||
bool page_was_full = false;
|
||||
TransactionId save_prune_xid;
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
@ -159,7 +159,7 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
|
||||
save_prune_xid = ((PageHeader) page)->pd_prune_xid;
|
||||
PageClearPrunable(page);
|
||||
|
||||
/*
|
||||
/*
|
||||
* Also clear the "page is full" flag if it is set, since there's no point
|
||||
* in repeating the prune/defrag process until something else happens to
|
||||
* the page.
|
||||
@ -176,7 +176,7 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
|
||||
offnum <= maxoff;
|
||||
offnum = OffsetNumberNext(offnum))
|
||||
{
|
||||
ItemId itemid = PageGetItemId(page, offnum);
|
||||
ItemId itemid = PageGetItemId(page, offnum);
|
||||
|
||||
/* Nothing to do if slot is empty or already dead */
|
||||
if (!ItemIdIsUsed(itemid) || ItemIdIsDead(itemid))
|
||||
@ -233,9 +233,9 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
|
||||
END_CRIT_SECTION();
|
||||
|
||||
/*
|
||||
* If requested, report the number of tuples reclaimed to pgstats.
|
||||
* This is ndeleted minus ndead, because we don't want to count a now-DEAD
|
||||
* root item as a deletion for this purpose.
|
||||
* If requested, report the number of tuples reclaimed to pgstats. This is
|
||||
* ndeleted minus ndead, because we don't want to count a now-DEAD root
|
||||
* item as a deletion for this purpose.
|
||||
*/
|
||||
if (report_stats && ndeleted > ndead)
|
||||
pgstat_update_heap_dead_tuples(relation, ndeleted - ndead);
|
||||
@ -243,19 +243,17 @@ heap_page_prune(Relation relation, Buffer buffer, TransactionId OldestXmin,
|
||||
/*
|
||||
* XXX Should we update the FSM information of this page ?
|
||||
*
|
||||
* There are two schools of thought here. We may not want to update
|
||||
* FSM information so that the page is not used for unrelated
|
||||
* UPDATEs/INSERTs and any free space in this page will remain
|
||||
* available for further UPDATEs in *this* page, thus improving
|
||||
* chances for doing HOT updates.
|
||||
* There are two schools of thought here. We may not want to update FSM
|
||||
* information so that the page is not used for unrelated UPDATEs/INSERTs
|
||||
* and any free space in this page will remain available for further
|
||||
* UPDATEs in *this* page, thus improving chances for doing HOT updates.
|
||||
*
|
||||
* But for a large table and where a page does not receive further
|
||||
* UPDATEs for a long time, we might waste this space by not
|
||||
* updating the FSM information. The relation may get extended and
|
||||
* fragmented further.
|
||||
* But for a large table and where a page does not receive further UPDATEs
|
||||
* for a long time, we might waste this space by not updating the FSM
|
||||
* information. The relation may get extended and fragmented further.
|
||||
*
|
||||
* One possibility is to leave "fillfactor" worth of space in this
|
||||
* page and update FSM with the remaining space.
|
||||
* One possibility is to leave "fillfactor" worth of space in this page
|
||||
* and update FSM with the remaining space.
|
||||
*
|
||||
* In any case, the current FSM implementation doesn't accept
|
||||
* one-page-at-a-time updates, so this is all academic for now.
|
||||
@ -298,17 +296,17 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
|
||||
OffsetNumber *nowunused, int *nunused,
|
||||
bool redirect_move)
|
||||
{
|
||||
int ndeleted = 0;
|
||||
Page dp = (Page) BufferGetPage(buffer);
|
||||
TransactionId priorXmax = InvalidTransactionId;
|
||||
ItemId rootlp;
|
||||
HeapTupleHeader htup;
|
||||
OffsetNumber latestdead = InvalidOffsetNumber,
|
||||
maxoff = PageGetMaxOffsetNumber(dp),
|
||||
offnum;
|
||||
OffsetNumber chainitems[MaxHeapTuplesPerPage];
|
||||
int nchain = 0,
|
||||
i;
|
||||
int ndeleted = 0;
|
||||
Page dp = (Page) BufferGetPage(buffer);
|
||||
TransactionId priorXmax = InvalidTransactionId;
|
||||
ItemId rootlp;
|
||||
HeapTupleHeader htup;
|
||||
OffsetNumber latestdead = InvalidOffsetNumber,
|
||||
maxoff = PageGetMaxOffsetNumber(dp),
|
||||
offnum;
|
||||
OffsetNumber chainitems[MaxHeapTuplesPerPage];
|
||||
int nchain = 0,
|
||||
i;
|
||||
|
||||
rootlp = PageGetItemId(dp, rootoffnum);
|
||||
|
||||
@ -321,14 +319,14 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
|
||||
if (HeapTupleHeaderIsHeapOnly(htup))
|
||||
{
|
||||
/*
|
||||
* If the tuple is DEAD and doesn't chain to anything else, mark it
|
||||
* unused immediately. (If it does chain, we can only remove it as
|
||||
* part of pruning its chain.)
|
||||
* If the tuple is DEAD and doesn't chain to anything else, mark
|
||||
* it unused immediately. (If it does chain, we can only remove
|
||||
* it as part of pruning its chain.)
|
||||
*
|
||||
* We need this primarily to handle aborted HOT updates, that is,
|
||||
* XMIN_INVALID heap-only tuples. Those might not be linked to
|
||||
* by any chain, since the parent tuple might be re-updated before
|
||||
* any pruning occurs. So we have to be able to reap them
|
||||
* XMIN_INVALID heap-only tuples. Those might not be linked to by
|
||||
* any chain, since the parent tuple might be re-updated before
|
||||
* any pruning occurs. So we have to be able to reap them
|
||||
* separately from chain-pruning.
|
||||
*
|
||||
* Note that we might first arrive at a dead heap-only tuple
|
||||
@ -354,9 +352,9 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
|
||||
/* while not end of the chain */
|
||||
for (;;)
|
||||
{
|
||||
ItemId lp;
|
||||
bool tupdead,
|
||||
recent_dead;
|
||||
ItemId lp;
|
||||
bool tupdead,
|
||||
recent_dead;
|
||||
|
||||
/* Some sanity checks */
|
||||
if (offnum < FirstOffsetNumber || offnum > maxoff)
|
||||
@ -368,9 +366,9 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
|
||||
break;
|
||||
|
||||
/*
|
||||
* If we are looking at the redirected root line pointer,
|
||||
* jump to the first normal tuple in the chain. If we find
|
||||
* a redirect somewhere else, stop --- it must not be same chain.
|
||||
* If we are looking at the redirected root line pointer, jump to the
|
||||
* first normal tuple in the chain. If we find a redirect somewhere
|
||||
* else, stop --- it must not be same chain.
|
||||
*/
|
||||
if (ItemIdIsRedirected(lp))
|
||||
{
|
||||
@ -382,9 +380,9 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
|
||||
}
|
||||
|
||||
/*
|
||||
* Likewise, a dead item pointer can't be part of the chain.
|
||||
* (We already eliminated the case of dead root tuple outside
|
||||
* this function.)
|
||||
* Likewise, a dead item pointer can't be part of the chain. (We
|
||||
* already eliminated the case of dead root tuple outside this
|
||||
* function.)
|
||||
*/
|
||||
if (ItemIdIsDead(lp))
|
||||
break;
|
||||
@ -417,6 +415,7 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
|
||||
|
||||
case HEAPTUPLE_RECENTLY_DEAD:
|
||||
recent_dead = true;
|
||||
|
||||
/*
|
||||
* This tuple may soon become DEAD. Update the hint field so
|
||||
* that the page is reconsidered for pruning in future.
|
||||
@ -425,6 +424,7 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
|
||||
break;
|
||||
|
||||
case HEAPTUPLE_DELETE_IN_PROGRESS:
|
||||
|
||||
/*
|
||||
* This tuple may soon become DEAD. Update the hint field so
|
||||
* that the page is reconsidered for pruning in future.
|
||||
@ -434,11 +434,12 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
|
||||
|
||||
case HEAPTUPLE_LIVE:
|
||||
case HEAPTUPLE_INSERT_IN_PROGRESS:
|
||||
|
||||
/*
|
||||
* If we wanted to optimize for aborts, we might consider
|
||||
* marking the page prunable when we see INSERT_IN_PROGRESS.
|
||||
* But we don't. See related decisions about when to mark
|
||||
* the page prunable in heapam.c.
|
||||
* But we don't. See related decisions about when to mark the
|
||||
* page prunable in heapam.c.
|
||||
*/
|
||||
break;
|
||||
|
||||
@ -486,12 +487,12 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
|
||||
* Mark as unused each intermediate item that we are able to remove
|
||||
* from the chain.
|
||||
*
|
||||
* When the previous item is the last dead tuple seen, we are at
|
||||
* the right candidate for redirection.
|
||||
* When the previous item is the last dead tuple seen, we are at the
|
||||
* right candidate for redirection.
|
||||
*/
|
||||
for (i = 1; (i < nchain) && (chainitems[i - 1] != latestdead); i++)
|
||||
{
|
||||
ItemId lp = PageGetItemId(dp, chainitems[i]);
|
||||
ItemId lp = PageGetItemId(dp, chainitems[i]);
|
||||
|
||||
ItemIdSetUnused(lp);
|
||||
heap_prune_record_unused(nowunused, nunused, chainitems[i]);
|
||||
@ -499,17 +500,17 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
|
||||
}
|
||||
|
||||
/*
|
||||
* If the root entry had been a normal tuple, we are deleting it,
|
||||
* so count it in the result. But changing a redirect (even to
|
||||
* DEAD state) doesn't count.
|
||||
* If the root entry had been a normal tuple, we are deleting it, so
|
||||
* count it in the result. But changing a redirect (even to DEAD
|
||||
* state) doesn't count.
|
||||
*/
|
||||
if (ItemIdIsNormal(rootlp))
|
||||
ndeleted++;
|
||||
|
||||
/*
|
||||
* If the DEAD tuple is at the end of the chain, the entire chain is
|
||||
* dead and the root line pointer can be marked dead. Otherwise
|
||||
* just redirect the root to the correct chain member.
|
||||
* dead and the root line pointer can be marked dead. Otherwise just
|
||||
* redirect the root to the correct chain member.
|
||||
*/
|
||||
if (i >= nchain)
|
||||
{
|
||||
@ -528,25 +529,25 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
|
||||
{
|
||||
/*
|
||||
* We found a redirect item that doesn't point to a valid follow-on
|
||||
* item. This can happen if the loop in heap_page_prune caused us
|
||||
* to visit the dead successor of a redirect item before visiting
|
||||
* the redirect item. We can clean up by setting the redirect item
|
||||
* to DEAD state.
|
||||
* item. This can happen if the loop in heap_page_prune caused us to
|
||||
* visit the dead successor of a redirect item before visiting the
|
||||
* redirect item. We can clean up by setting the redirect item to
|
||||
* DEAD state.
|
||||
*/
|
||||
ItemIdSetDead(rootlp);
|
||||
heap_prune_record_dead(nowdead, ndead, rootoffnum);
|
||||
}
|
||||
|
||||
/*
|
||||
* If requested, eliminate LP_REDIRECT items by moving tuples. Note that
|
||||
* If requested, eliminate LP_REDIRECT items by moving tuples. Note that
|
||||
* if the root item is LP_REDIRECT and doesn't point to a valid follow-on
|
||||
* item, we already killed it above.
|
||||
*/
|
||||
if (redirect_move && ItemIdIsRedirected(rootlp))
|
||||
{
|
||||
OffsetNumber firstoffnum = ItemIdGetRedirect(rootlp);
|
||||
ItemId firstlp = PageGetItemId(dp, firstoffnum);
|
||||
HeapTupleData firsttup;
|
||||
ItemId firstlp = PageGetItemId(dp, firstoffnum);
|
||||
HeapTupleData firsttup;
|
||||
|
||||
Assert(ItemIdIsNormal(firstlp));
|
||||
/* Set up firsttup to reference the tuple at its existing CTID */
|
||||
@ -558,15 +559,15 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
|
||||
firsttup.t_tableOid = RelationGetRelid(relation);
|
||||
|
||||
/*
|
||||
* Mark the tuple for invalidation. Needed because we're changing
|
||||
* its CTID.
|
||||
* Mark the tuple for invalidation. Needed because we're changing its
|
||||
* CTID.
|
||||
*/
|
||||
CacheInvalidateHeapTuple(relation, &firsttup);
|
||||
|
||||
/*
|
||||
* Change heap-only status of the tuple because after the line
|
||||
* pointer manipulation, it's no longer a heap-only tuple, but is
|
||||
* directly pointed to by index entries.
|
||||
* Change heap-only status of the tuple because after the line pointer
|
||||
* manipulation, it's no longer a heap-only tuple, but is directly
|
||||
* pointed to by index entries.
|
||||
*/
|
||||
Assert(HeapTupleIsHeapOnly(&firsttup));
|
||||
HeapTupleClearHeapOnly(&firsttup);
|
||||
@ -594,7 +595,7 @@ heap_prune_chain(Relation relation, Buffer buffer, OffsetNumber rootoffnum,
|
||||
/* Record newly-redirected item pointer */
|
||||
static void
|
||||
heap_prune_record_redirect(OffsetNumber *redirected, int *nredirected,
|
||||
OffsetNumber offnum, OffsetNumber rdoffnum)
|
||||
OffsetNumber offnum, OffsetNumber rdoffnum)
|
||||
{
|
||||
Assert(*nredirected < MaxHeapTuplesPerPage);
|
||||
redirected[*nredirected * 2] = offnum;
|
||||
@ -641,17 +642,18 @@ heap_prune_record_unused(OffsetNumber *nowunused, int *nunused,
|
||||
void
|
||||
heap_get_root_tuples(Page page, OffsetNumber *root_offsets)
|
||||
{
|
||||
OffsetNumber offnum, maxoff;
|
||||
OffsetNumber offnum,
|
||||
maxoff;
|
||||
|
||||
MemSet(root_offsets, 0, MaxHeapTuplesPerPage * sizeof(OffsetNumber));
|
||||
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
for (offnum = FirstOffsetNumber; offnum <= maxoff; offnum++)
|
||||
{
|
||||
ItemId lp = PageGetItemId(page, offnum);
|
||||
HeapTupleHeader htup;
|
||||
OffsetNumber nextoffnum;
|
||||
TransactionId priorXmax;
|
||||
ItemId lp = PageGetItemId(page, offnum);
|
||||
HeapTupleHeader htup;
|
||||
OffsetNumber nextoffnum;
|
||||
TransactionId priorXmax;
|
||||
|
||||
/* skip unused and dead items */
|
||||
if (!ItemIdIsUsed(lp) || ItemIdIsDead(lp))
|
||||
|
@ -10,7 +10,7 @@
|
||||
*
|
||||
* The caller is responsible for creating the new heap, all catalog
|
||||
* changes, supplying the tuples to be written to the new heap, and
|
||||
* rebuilding indexes. The caller must hold AccessExclusiveLock on the
|
||||
* rebuilding indexes. The caller must hold AccessExclusiveLock on the
|
||||
* target table, because we assume no one else is writing into it.
|
||||
*
|
||||
* To use the facility:
|
||||
@ -18,13 +18,13 @@
|
||||
* begin_heap_rewrite
|
||||
* while (fetch next tuple)
|
||||
* {
|
||||
* if (tuple is dead)
|
||||
* rewrite_heap_dead_tuple
|
||||
* else
|
||||
* {
|
||||
* // do any transformations here if required
|
||||
* rewrite_heap_tuple
|
||||
* }
|
||||
* if (tuple is dead)
|
||||
* rewrite_heap_dead_tuple
|
||||
* else
|
||||
* {
|
||||
* // do any transformations here if required
|
||||
* rewrite_heap_tuple
|
||||
* }
|
||||
* }
|
||||
* end_heap_rewrite
|
||||
*
|
||||
@ -43,7 +43,7 @@
|
||||
* to substitute the correct ctid instead.
|
||||
*
|
||||
* For each ctid reference from A -> B, we might encounter either A first
|
||||
* or B first. (Note that a tuple in the middle of a chain is both A and B
|
||||
* or B first. (Note that a tuple in the middle of a chain is both A and B
|
||||
* of different pairs.)
|
||||
*
|
||||
* If we encounter A first, we'll store the tuple in the unresolved_tups
|
||||
@ -58,11 +58,11 @@
|
||||
* and can write A immediately with the correct ctid.
|
||||
*
|
||||
* Entries in the hash tables can be removed as soon as the later tuple
|
||||
* is encountered. That helps to keep the memory usage down. At the end,
|
||||
* is encountered. That helps to keep the memory usage down. At the end,
|
||||
* both tables are usually empty; we should have encountered both A and B
|
||||
* of each pair. However, it's possible for A to be RECENTLY_DEAD and B
|
||||
* entirely DEAD according to HeapTupleSatisfiesVacuum, because the test
|
||||
* for deadness using OldestXmin is not exact. In such a case we might
|
||||
* for deadness using OldestXmin is not exact. In such a case we might
|
||||
* encounter B first, and skip it, and find A later. Then A would be added
|
||||
* to unresolved_tups, and stay there until end of the rewrite. Since
|
||||
* this case is very unusual, we don't worry about the memory usage.
|
||||
@ -78,7 +78,7 @@
|
||||
* of CLUSTERing on an unchanging key column, we'll see all the versions
|
||||
* of a given tuple together anyway, and so the peak memory usage is only
|
||||
* proportional to the number of RECENTLY_DEAD versions of a single row, not
|
||||
* in the whole table. Note that if we do fail halfway through a CLUSTER,
|
||||
* in the whole table. Note that if we do fail halfway through a CLUSTER,
|
||||
* the old table is still valid, so failure is not catastrophic.
|
||||
*
|
||||
* We can't use the normal heap_insert function to insert into the new
|
||||
@ -96,7 +96,7 @@
|
||||
* Portions Copyright (c) 1994-5, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.7 2007/09/20 17:56:30 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/rewriteheap.c,v 1.8 2007/11/15 21:14:32 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -116,20 +116,20 @@
|
||||
*/
|
||||
typedef struct RewriteStateData
|
||||
{
|
||||
Relation rs_new_rel; /* destination heap */
|
||||
Page rs_buffer; /* page currently being built */
|
||||
BlockNumber rs_blockno; /* block where page will go */
|
||||
bool rs_buffer_valid; /* T if any tuples in buffer */
|
||||
bool rs_use_wal; /* must we WAL-log inserts? */
|
||||
TransactionId rs_oldest_xmin; /* oldest xmin used by caller to
|
||||
Relation rs_new_rel; /* destination heap */
|
||||
Page rs_buffer; /* page currently being built */
|
||||
BlockNumber rs_blockno; /* block where page will go */
|
||||
bool rs_buffer_valid; /* T if any tuples in buffer */
|
||||
bool rs_use_wal; /* must we WAL-log inserts? */
|
||||
TransactionId rs_oldest_xmin; /* oldest xmin used by caller to
|
||||
* determine tuple visibility */
|
||||
TransactionId rs_freeze_xid; /* Xid that will be used as freeze
|
||||
* cutoff point */
|
||||
MemoryContext rs_cxt; /* for hash tables and entries and
|
||||
* tuples in them */
|
||||
HTAB *rs_unresolved_tups; /* unmatched A tuples */
|
||||
HTAB *rs_old_new_tid_map; /* unmatched B tuples */
|
||||
} RewriteStateData;
|
||||
TransactionId rs_freeze_xid;/* Xid that will be used as freeze cutoff
|
||||
* point */
|
||||
MemoryContext rs_cxt; /* for hash tables and entries and tuples in
|
||||
* them */
|
||||
HTAB *rs_unresolved_tups; /* unmatched A tuples */
|
||||
HTAB *rs_old_new_tid_map; /* unmatched B tuples */
|
||||
} RewriteStateData;
|
||||
|
||||
/*
|
||||
* The lookup keys for the hash tables are tuple TID and xmin (we must check
|
||||
@ -139,27 +139,27 @@ typedef struct RewriteStateData
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
TransactionId xmin; /* tuple xmin */
|
||||
TransactionId xmin; /* tuple xmin */
|
||||
ItemPointerData tid; /* tuple location in old heap */
|
||||
} TidHashKey;
|
||||
} TidHashKey;
|
||||
|
||||
/*
|
||||
* Entry structures for the hash tables
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
TidHashKey key; /* expected xmin/old location of B tuple */
|
||||
TidHashKey key; /* expected xmin/old location of B tuple */
|
||||
ItemPointerData old_tid; /* A's location in the old heap */
|
||||
HeapTuple tuple; /* A's tuple contents */
|
||||
} UnresolvedTupData;
|
||||
HeapTuple tuple; /* A's tuple contents */
|
||||
} UnresolvedTupData;
|
||||
|
||||
typedef UnresolvedTupData *UnresolvedTup;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
TidHashKey key; /* actual xmin/old location of B tuple */
|
||||
TidHashKey key; /* actual xmin/old location of B tuple */
|
||||
ItemPointerData new_tid; /* where we put it in the new heap */
|
||||
} OldToNewMappingData;
|
||||
} OldToNewMappingData;
|
||||
|
||||
typedef OldToNewMappingData *OldToNewMapping;
|
||||
|
||||
@ -189,8 +189,8 @@ begin_heap_rewrite(Relation new_heap, TransactionId oldest_xmin,
|
||||
HASHCTL hash_ctl;
|
||||
|
||||
/*
|
||||
* To ease cleanup, make a separate context that will contain
|
||||
* the RewriteState struct itself plus all subsidiary data.
|
||||
* To ease cleanup, make a separate context that will contain the
|
||||
* RewriteState struct itself plus all subsidiary data.
|
||||
*/
|
||||
rw_cxt = AllocSetContextCreate(CurrentMemoryContext,
|
||||
"Table rewrite",
|
||||
@ -221,7 +221,7 @@ begin_heap_rewrite(Relation new_heap, TransactionId oldest_xmin,
|
||||
|
||||
state->rs_unresolved_tups =
|
||||
hash_create("Rewrite / Unresolved ctids",
|
||||
128, /* arbitrary initial size */
|
||||
128, /* arbitrary initial size */
|
||||
&hash_ctl,
|
||||
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
|
||||
|
||||
@ -229,7 +229,7 @@ begin_heap_rewrite(Relation new_heap, TransactionId oldest_xmin,
|
||||
|
||||
state->rs_old_new_tid_map =
|
||||
hash_create("Rewrite / Old to new tid map",
|
||||
128, /* arbitrary initial size */
|
||||
128, /* arbitrary initial size */
|
||||
&hash_ctl,
|
||||
HASH_ELEM | HASH_FUNCTION | HASH_CONTEXT);
|
||||
|
||||
@ -250,8 +250,8 @@ end_heap_rewrite(RewriteState state)
|
||||
UnresolvedTup unresolved;
|
||||
|
||||
/*
|
||||
* Write any remaining tuples in the UnresolvedTups table. If we have
|
||||
* any left, they should in fact be dead, but let's err on the safe side.
|
||||
* Write any remaining tuples in the UnresolvedTups table. If we have any
|
||||
* left, they should in fact be dead, but let's err on the safe side.
|
||||
*
|
||||
* XXX this really is a waste of code no?
|
||||
*/
|
||||
@ -276,15 +276,15 @@ end_heap_rewrite(RewriteState state)
|
||||
}
|
||||
|
||||
/*
|
||||
* If the rel isn't temp, must fsync before commit. We use heap_sync
|
||||
* to ensure that the toast table gets fsync'd too.
|
||||
* If the rel isn't temp, must fsync before commit. We use heap_sync to
|
||||
* ensure that the toast table gets fsync'd too.
|
||||
*
|
||||
* It's obvious that we must do this when not WAL-logging. It's less
|
||||
* obvious that we have to do it even if we did WAL-log the pages.
|
||||
* The reason is the same as in tablecmds.c's copy_relation_data():
|
||||
* we're writing data that's not in shared buffers, and so a CHECKPOINT
|
||||
* occurring during the rewriteheap operation won't have fsync'd data
|
||||
* we wrote before the checkpoint.
|
||||
* obvious that we have to do it even if we did WAL-log the pages. The
|
||||
* reason is the same as in tablecmds.c's copy_relation_data(): we're
|
||||
* writing data that's not in shared buffers, and so a CHECKPOINT
|
||||
* occurring during the rewriteheap operation won't have fsync'd data we
|
||||
* wrote before the checkpoint.
|
||||
*/
|
||||
if (!state->rs_new_rel->rd_istemp)
|
||||
heap_sync(state->rs_new_rel);
|
||||
@ -310,17 +310,17 @@ rewrite_heap_tuple(RewriteState state,
|
||||
{
|
||||
MemoryContext old_cxt;
|
||||
ItemPointerData old_tid;
|
||||
TidHashKey hashkey;
|
||||
bool found;
|
||||
bool free_new;
|
||||
TidHashKey hashkey;
|
||||
bool found;
|
||||
bool free_new;
|
||||
|
||||
old_cxt = MemoryContextSwitchTo(state->rs_cxt);
|
||||
|
||||
/*
|
||||
* Copy the original tuple's visibility information into new_tuple.
|
||||
*
|
||||
* XXX we might later need to copy some t_infomask2 bits, too?
|
||||
* Right now, we intentionally clear the HOT status bits.
|
||||
* XXX we might later need to copy some t_infomask2 bits, too? Right now,
|
||||
* we intentionally clear the HOT status bits.
|
||||
*/
|
||||
memcpy(&new_tuple->t_data->t_choice.t_heap,
|
||||
&old_tuple->t_data->t_choice.t_heap,
|
||||
@ -335,16 +335,16 @@ rewrite_heap_tuple(RewriteState state,
|
||||
* While we have our hands on the tuple, we may as well freeze any
|
||||
* very-old xmin or xmax, so that future VACUUM effort can be saved.
|
||||
*
|
||||
* Note we abuse heap_freeze_tuple() a bit here, since it's expecting
|
||||
* to be given a pointer to a tuple in a disk buffer. It happens
|
||||
* though that we can get the right things to happen by passing
|
||||
* InvalidBuffer for the buffer.
|
||||
* Note we abuse heap_freeze_tuple() a bit here, since it's expecting to
|
||||
* be given a pointer to a tuple in a disk buffer. It happens though that
|
||||
* we can get the right things to happen by passing InvalidBuffer for the
|
||||
* buffer.
|
||||
*/
|
||||
heap_freeze_tuple(new_tuple->t_data, state->rs_freeze_xid, InvalidBuffer);
|
||||
|
||||
/*
|
||||
* Invalid ctid means that ctid should point to the tuple itself.
|
||||
* We'll override it later if the tuple is part of an update chain.
|
||||
* Invalid ctid means that ctid should point to the tuple itself. We'll
|
||||
* override it later if the tuple is part of an update chain.
|
||||
*/
|
||||
ItemPointerSetInvalid(&new_tuple->t_data->t_ctid);
|
||||
|
||||
@ -369,9 +369,9 @@ rewrite_heap_tuple(RewriteState state,
|
||||
if (mapping != NULL)
|
||||
{
|
||||
/*
|
||||
* We've already copied the tuple that t_ctid points to, so we
|
||||
* can set the ctid of this tuple to point to the new location,
|
||||
* and insert it right away.
|
||||
* We've already copied the tuple that t_ctid points to, so we can
|
||||
* set the ctid of this tuple to point to the new location, and
|
||||
* insert it right away.
|
||||
*/
|
||||
new_tuple->t_data->t_ctid = mapping->new_tid;
|
||||
|
||||
@ -405,10 +405,10 @@ rewrite_heap_tuple(RewriteState state,
|
||||
}
|
||||
|
||||
/*
|
||||
* Now we will write the tuple, and then check to see if it is the
|
||||
* B tuple in any new or known pair. When we resolve a known pair,
|
||||
* we will be able to write that pair's A tuple, and then we have to
|
||||
* check if it resolves some other pair. Hence, we need a loop here.
|
||||
* Now we will write the tuple, and then check to see if it is the B tuple
|
||||
* in any new or known pair. When we resolve a known pair, we will be
|
||||
* able to write that pair's A tuple, and then we have to check if it
|
||||
* resolves some other pair. Hence, we need a loop here.
|
||||
*/
|
||||
old_tid = old_tuple->t_self;
|
||||
free_new = false;
|
||||
@ -422,13 +422,12 @@ rewrite_heap_tuple(RewriteState state,
|
||||
new_tid = new_tuple->t_self;
|
||||
|
||||
/*
|
||||
* If the tuple is the updated version of a row, and the prior
|
||||
* version wouldn't be DEAD yet, then we need to either resolve
|
||||
* the prior version (if it's waiting in rs_unresolved_tups),
|
||||
* or make an entry in rs_old_new_tid_map (so we can resolve it
|
||||
* when we do see it). The previous tuple's xmax would equal this
|
||||
* one's xmin, so it's RECENTLY_DEAD if and only if the xmin is
|
||||
* not before OldestXmin.
|
||||
* If the tuple is the updated version of a row, and the prior version
|
||||
* wouldn't be DEAD yet, then we need to either resolve the prior
|
||||
* version (if it's waiting in rs_unresolved_tups), or make an entry
|
||||
* in rs_old_new_tid_map (so we can resolve it when we do see it).
|
||||
* The previous tuple's xmax would equal this one's xmin, so it's
|
||||
* RECENTLY_DEAD if and only if the xmin is not before OldestXmin.
|
||||
*/
|
||||
if ((new_tuple->t_data->t_infomask & HEAP_UPDATED) &&
|
||||
!TransactionIdPrecedes(HeapTupleHeaderGetXmin(new_tuple->t_data),
|
||||
@ -449,9 +448,9 @@ rewrite_heap_tuple(RewriteState state,
|
||||
if (unresolved != NULL)
|
||||
{
|
||||
/*
|
||||
* We have seen and memorized the previous tuple already.
|
||||
* Now that we know where we inserted the tuple its t_ctid
|
||||
* points to, fix its t_ctid and insert it to the new heap.
|
||||
* We have seen and memorized the previous tuple already. Now
|
||||
* that we know where we inserted the tuple its t_ctid points
|
||||
* to, fix its t_ctid and insert it to the new heap.
|
||||
*/
|
||||
if (free_new)
|
||||
heap_freetuple(new_tuple);
|
||||
@ -461,8 +460,8 @@ rewrite_heap_tuple(RewriteState state,
|
||||
new_tuple->t_data->t_ctid = new_tid;
|
||||
|
||||
/*
|
||||
* We don't need the hash entry anymore, but don't free
|
||||
* its tuple just yet.
|
||||
* We don't need the hash entry anymore, but don't free its
|
||||
* tuple just yet.
|
||||
*/
|
||||
hash_search(state->rs_unresolved_tups, &hashkey,
|
||||
HASH_REMOVE, &found);
|
||||
@ -474,8 +473,8 @@ rewrite_heap_tuple(RewriteState state,
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Remember the new tid of this tuple. We'll use it to set
|
||||
* the ctid when we find the previous tuple in the chain.
|
||||
* Remember the new tid of this tuple. We'll use it to set the
|
||||
* ctid when we find the previous tuple in the chain.
|
||||
*/
|
||||
OldToNewMapping mapping;
|
||||
|
||||
@ -506,22 +505,22 @@ rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple)
|
||||
{
|
||||
/*
|
||||
* If we have already seen an earlier tuple in the update chain that
|
||||
* points to this tuple, let's forget about that earlier tuple. It's
|
||||
* in fact dead as well, our simple xmax < OldestXmin test in
|
||||
* HeapTupleSatisfiesVacuum just wasn't enough to detect it. It
|
||||
* happens when xmin of a tuple is greater than xmax, which sounds
|
||||
* points to this tuple, let's forget about that earlier tuple. It's in
|
||||
* fact dead as well, our simple xmax < OldestXmin test in
|
||||
* HeapTupleSatisfiesVacuum just wasn't enough to detect it. It happens
|
||||
* when xmin of a tuple is greater than xmax, which sounds
|
||||
* counter-intuitive but is perfectly valid.
|
||||
*
|
||||
* We don't bother to try to detect the situation the other way
|
||||
* round, when we encounter the dead tuple first and then the
|
||||
* recently dead one that points to it. If that happens, we'll
|
||||
* have some unmatched entries in the UnresolvedTups hash table
|
||||
* at the end. That can happen anyway, because a vacuum might
|
||||
* have removed the dead tuple in the chain before us.
|
||||
* We don't bother to try to detect the situation the other way round,
|
||||
* when we encounter the dead tuple first and then the recently dead one
|
||||
* that points to it. If that happens, we'll have some unmatched entries
|
||||
* in the UnresolvedTups hash table at the end. That can happen anyway,
|
||||
* because a vacuum might have removed the dead tuple in the chain before
|
||||
* us.
|
||||
*/
|
||||
UnresolvedTup unresolved;
|
||||
TidHashKey hashkey;
|
||||
bool found;
|
||||
TidHashKey hashkey;
|
||||
bool found;
|
||||
|
||||
memset(&hashkey, 0, sizeof(hashkey));
|
||||
hashkey.xmin = HeapTupleHeaderGetXmin(old_tuple->t_data);
|
||||
@ -541,7 +540,7 @@ rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple)
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert a tuple to the new relation. This has to track heap_insert
|
||||
* Insert a tuple to the new relation. This has to track heap_insert
|
||||
* and its subsidiary functions!
|
||||
*
|
||||
* t_self of the tuple is set to the new TID of the tuple. If t_ctid of the
|
||||
@ -551,11 +550,12 @@ rewrite_heap_dead_tuple(RewriteState state, HeapTuple old_tuple)
|
||||
static void
|
||||
raw_heap_insert(RewriteState state, HeapTuple tup)
|
||||
{
|
||||
Page page = state->rs_buffer;
|
||||
Size pageFreeSpace, saveFreeSpace;
|
||||
Size len;
|
||||
OffsetNumber newoff;
|
||||
HeapTuple heaptup;
|
||||
Page page = state->rs_buffer;
|
||||
Size pageFreeSpace,
|
||||
saveFreeSpace;
|
||||
Size len;
|
||||
OffsetNumber newoff;
|
||||
HeapTuple heaptup;
|
||||
|
||||
/*
|
||||
* If the new tuple is too big for storage or contains already toasted
|
||||
@ -610,7 +610,8 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
|
||||
/*
|
||||
* Now write the page. We say isTemp = true even if it's not a
|
||||
* temp table, because there's no need for smgr to schedule an
|
||||
* fsync for this write; we'll do it ourselves in end_heap_rewrite.
|
||||
* fsync for this write; we'll do it ourselves in
|
||||
* end_heap_rewrite.
|
||||
*/
|
||||
RelationOpenSmgr(state->rs_new_rel);
|
||||
smgrextend(state->rs_new_rel->rd_smgr, state->rs_blockno,
|
||||
@ -638,12 +639,12 @@ raw_heap_insert(RewriteState state, HeapTuple tup)
|
||||
ItemPointerSet(&(tup->t_self), state->rs_blockno, newoff);
|
||||
|
||||
/*
|
||||
* Insert the correct position into CTID of the stored tuple, too,
|
||||
* if the caller didn't supply a valid CTID.
|
||||
* Insert the correct position into CTID of the stored tuple, too, if the
|
||||
* caller didn't supply a valid CTID.
|
||||
*/
|
||||
if(!ItemPointerIsValid(&tup->t_data->t_ctid))
|
||||
if (!ItemPointerIsValid(&tup->t_data->t_ctid))
|
||||
{
|
||||
ItemId newitemid;
|
||||
ItemId newitemid;
|
||||
HeapTupleHeader onpage_tup;
|
||||
|
||||
newitemid = PageGetItemId(page, newoff);
|
||||
|
@ -4,7 +4,7 @@
|
||||
* heap scan synchronization support
|
||||
*
|
||||
* When multiple backends run a sequential scan on the same table, we try
|
||||
* to keep them synchronized to reduce the overall I/O needed. The goal is
|
||||
* to keep them synchronized to reduce the overall I/O needed. The goal is
|
||||
* to read each page into shared buffer cache only once, and let all backends
|
||||
* that take part in the shared scan process the page before it falls out of
|
||||
* the cache.
|
||||
@ -26,7 +26,7 @@
|
||||
* don't want such queries to slow down others.
|
||||
*
|
||||
* There can realistically only be a few large sequential scans on different
|
||||
* tables in progress at any time. Therefore we just keep the scan positions
|
||||
* tables in progress at any time. Therefore we just keep the scan positions
|
||||
* in a small LRU list which we scan every time we need to look up or update a
|
||||
* scan position. The whole mechanism is only applied for tables exceeding
|
||||
* a threshold size (but that is not the concern of this module).
|
||||
@ -40,7 +40,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/syncscan.c,v 1.1 2007/06/08 18:23:52 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/syncscan.c,v 1.2 2007/11/15 21:14:32 momjian Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -52,7 +52,7 @@
|
||||
|
||||
/* GUC variables */
|
||||
#ifdef TRACE_SYNCSCAN
|
||||
bool trace_syncscan = false;
|
||||
bool trace_syncscan = false;
|
||||
#endif
|
||||
|
||||
|
||||
@ -89,21 +89,21 @@ typedef struct ss_scan_location_t
|
||||
{
|
||||
RelFileNode relfilenode; /* identity of a relation */
|
||||
BlockNumber location; /* last-reported location in the relation */
|
||||
} ss_scan_location_t;
|
||||
} ss_scan_location_t;
|
||||
|
||||
typedef struct ss_lru_item_t
|
||||
{
|
||||
struct ss_lru_item_t *prev;
|
||||
struct ss_lru_item_t *next;
|
||||
ss_scan_location_t location;
|
||||
} ss_lru_item_t;
|
||||
struct ss_lru_item_t *prev;
|
||||
struct ss_lru_item_t *next;
|
||||
ss_scan_location_t location;
|
||||
} ss_lru_item_t;
|
||||
|
||||
typedef struct ss_scan_locations_t
|
||||
{
|
||||
ss_lru_item_t *head;
|
||||
ss_lru_item_t *tail;
|
||||
ss_lru_item_t items[1]; /* SYNC_SCAN_NELEM items */
|
||||
} ss_scan_locations_t;
|
||||
ss_lru_item_t *head;
|
||||
ss_lru_item_t *tail;
|
||||
ss_lru_item_t items[1]; /* SYNC_SCAN_NELEM items */
|
||||
} ss_scan_locations_t;
|
||||
|
||||
#define SizeOfScanLocations(N) offsetof(ss_scan_locations_t, items[N])
|
||||
|
||||
@ -112,7 +112,7 @@ static ss_scan_locations_t *scan_locations;
|
||||
|
||||
/* prototypes for internal functions */
|
||||
static BlockNumber ss_search(RelFileNode relfilenode,
|
||||
BlockNumber location, bool set);
|
||||
BlockNumber location, bool set);
|
||||
|
||||
|
||||
/*
|
||||
@ -130,8 +130,8 @@ SyncScanShmemSize(void)
|
||||
void
|
||||
SyncScanShmemInit(void)
|
||||
{
|
||||
int i;
|
||||
bool found;
|
||||
int i;
|
||||
bool found;
|
||||
|
||||
scan_locations = (ss_scan_locations_t *)
|
||||
ShmemInitStruct("Sync Scan Locations List",
|
||||
@ -186,20 +186,20 @@ SyncScanShmemInit(void)
|
||||
static BlockNumber
|
||||
ss_search(RelFileNode relfilenode, BlockNumber location, bool set)
|
||||
{
|
||||
ss_lru_item_t *item;
|
||||
ss_lru_item_t *item;
|
||||
|
||||
item = scan_locations->head;
|
||||
for (;;)
|
||||
{
|
||||
bool match;
|
||||
bool match;
|
||||
|
||||
match = RelFileNodeEquals(item->location.relfilenode, relfilenode);
|
||||
|
||||
if (match || item->next == NULL)
|
||||
{
|
||||
/*
|
||||
* If we reached the end of list and no match was found,
|
||||
* take over the last entry
|
||||
* If we reached the end of list and no match was found, take over
|
||||
* the last entry
|
||||
*/
|
||||
if (!match)
|
||||
{
|
||||
@ -242,7 +242,7 @@ ss_search(RelFileNode relfilenode, BlockNumber location, bool set)
|
||||
* relation, or 0 if no valid location is found.
|
||||
*
|
||||
* We expect the caller has just done RelationGetNumberOfBlocks(), and
|
||||
* so that number is passed in rather than computing it again. The result
|
||||
* so that number is passed in rather than computing it again. The result
|
||||
* is guaranteed less than relnblocks (assuming that's > 0).
|
||||
*/
|
||||
BlockNumber
|
||||
@ -257,8 +257,8 @@ ss_get_location(Relation rel, BlockNumber relnblocks)
|
||||
/*
|
||||
* If the location is not a valid block number for this scan, start at 0.
|
||||
*
|
||||
* This can happen if for instance a VACUUM truncated the table
|
||||
* since the location was saved.
|
||||
* This can happen if for instance a VACUUM truncated the table since the
|
||||
* location was saved.
|
||||
*/
|
||||
if (startloc >= relnblocks)
|
||||
startloc = 0;
|
||||
@ -294,12 +294,12 @@ ss_report_location(Relation rel, BlockNumber location)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* To reduce lock contention, only report scan progress every N pages.
|
||||
* For the same reason, don't block if the lock isn't immediately
|
||||
* available. Missing a few updates isn't critical, it just means that a
|
||||
* new scan that wants to join the pack will start a little bit behind the
|
||||
* head of the scan. Hopefully the pages are still in OS cache and the
|
||||
* scan catches up quickly.
|
||||
* To reduce lock contention, only report scan progress every N pages. For
|
||||
* the same reason, don't block if the lock isn't immediately available.
|
||||
* Missing a few updates isn't critical, it just means that a new scan
|
||||
* that wants to join the pack will start a little bit behind the head of
|
||||
* the scan. Hopefully the pages are still in OS cache and the scan
|
||||
* catches up quickly.
|
||||
*/
|
||||
if ((location % SYNC_SCAN_REPORT_INTERVAL) == 0)
|
||||
{
|
||||
|
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.78 2007/10/11 18:19:58 tgl Exp $
|
||||
* $PostgreSQL: pgsql/src/backend/access/heap/tuptoaster.c,v 1.79 2007/11/15 21:14:32 momjian Exp $
|
||||
*
|
||||
*
|
||||
* INTERFACE ROUTINES
|
||||
@ -72,9 +72,9 @@ do { \
|
||||
|
||||
static void toast_delete_datum(Relation rel, Datum value);
|
||||
static Datum toast_save_datum(Relation rel, Datum value,
|
||||
bool use_wal, bool use_fsm);
|
||||
static struct varlena *toast_fetch_datum(struct varlena *attr);
|
||||
static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
|
||||
bool use_wal, bool use_fsm);
|
||||
static struct varlena *toast_fetch_datum(struct varlena * attr);
|
||||
static struct varlena *toast_fetch_datum_slice(struct varlena * attr,
|
||||
int32 sliceoffset, int32 length);
|
||||
|
||||
|
||||
@ -90,9 +90,9 @@ static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
|
||||
----------
|
||||
*/
|
||||
struct varlena *
|
||||
heap_tuple_fetch_attr(struct varlena *attr)
|
||||
heap_tuple_fetch_attr(struct varlena * attr)
|
||||
{
|
||||
struct varlena *result;
|
||||
struct varlena *result;
|
||||
|
||||
if (VARATT_IS_EXTERNAL(attr))
|
||||
{
|
||||
@ -121,7 +121,7 @@ heap_tuple_fetch_attr(struct varlena *attr)
|
||||
* ----------
|
||||
*/
|
||||
struct varlena *
|
||||
heap_tuple_untoast_attr(struct varlena *attr)
|
||||
heap_tuple_untoast_attr(struct varlena * attr)
|
||||
{
|
||||
if (VARATT_IS_EXTERNAL(attr))
|
||||
{
|
||||
@ -156,8 +156,8 @@ heap_tuple_untoast_attr(struct varlena *attr)
|
||||
/*
|
||||
* This is a short-header varlena --- convert to 4-byte header format
|
||||
*/
|
||||
Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
|
||||
Size new_size = data_size + VARHDRSZ;
|
||||
Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
|
||||
Size new_size = data_size + VARHDRSZ;
|
||||
struct varlena *new_attr;
|
||||
|
||||
new_attr = (struct varlena *) palloc(new_size);
|
||||
@ -178,12 +178,12 @@ heap_tuple_untoast_attr(struct varlena *attr)
|
||||
* ----------
|
||||
*/
|
||||
struct varlena *
|
||||
heap_tuple_untoast_attr_slice(struct varlena *attr,
|
||||
heap_tuple_untoast_attr_slice(struct varlena * attr,
|
||||
int32 sliceoffset, int32 slicelength)
|
||||
{
|
||||
struct varlena *preslice;
|
||||
struct varlena *result;
|
||||
char *attrdata;
|
||||
char *attrdata;
|
||||
int32 attrsize;
|
||||
|
||||
if (VARATT_IS_EXTERNAL(attr))
|
||||
@ -205,7 +205,7 @@ heap_tuple_untoast_attr_slice(struct varlena *attr,
|
||||
if (VARATT_IS_COMPRESSED(preslice))
|
||||
{
|
||||
PGLZ_Header *tmp = (PGLZ_Header *) preslice;
|
||||
Size size = PGLZ_RAW_SIZE(tmp) + VARHDRSZ;
|
||||
Size size = PGLZ_RAW_SIZE(tmp) + VARHDRSZ;
|
||||
|
||||
preslice = (struct varlena *) palloc(size);
|
||||
SET_VARSIZE(preslice, size);
|
||||
@ -300,7 +300,7 @@ toast_raw_datum_size(Datum value)
|
||||
Size
|
||||
toast_datum_size(Datum value)
|
||||
{
|
||||
struct varlena *attr = (struct varlena *) DatumGetPointer(value);
|
||||
struct varlena *attr = (struct varlena *) DatumGetPointer(value);
|
||||
Size result;
|
||||
|
||||
if (VARATT_IS_EXTERNAL(attr))
|
||||
@ -469,8 +469,8 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
|
||||
|
||||
for (i = 0; i < numAttrs; i++)
|
||||
{
|
||||
struct varlena *old_value;
|
||||
struct varlena *new_value;
|
||||
struct varlena *old_value;
|
||||
struct varlena *new_value;
|
||||
|
||||
if (oldtup != NULL)
|
||||
{
|
||||
@ -488,7 +488,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
|
||||
VARATT_IS_EXTERNAL(old_value))
|
||||
{
|
||||
if (toast_isnull[i] || !VARATT_IS_EXTERNAL(new_value) ||
|
||||
memcmp((char *) old_value, (char *) new_value,
|
||||
memcmp((char *) old_value, (char *) new_value,
|
||||
VARSIZE_EXTERNAL(old_value)) != 0)
|
||||
{
|
||||
/*
|
||||
@ -543,7 +543,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
|
||||
* We took care of UPDATE above, so any external value we find
|
||||
* still in the tuple must be someone else's we cannot reuse.
|
||||
* Fetch it back (without decompression, unless we are forcing
|
||||
* PLAIN storage). If necessary, we'll push it out as a new
|
||||
* PLAIN storage). If necessary, we'll push it out as a new
|
||||
* external value below.
|
||||
*/
|
||||
if (VARATT_IS_EXTERNAL(new_value))
|
||||
@ -656,7 +656,7 @@ toast_insert_or_update(Relation rel, HeapTuple newtup, HeapTuple oldtup,
|
||||
|
||||
/*
|
||||
* Second we look for attributes of attstorage 'x' or 'e' that are still
|
||||
* inline. But skip this if there's no toast table to push them to.
|
||||
* inline. But skip this if there's no toast table to push them to.
|
||||
*/
|
||||
while (heap_compute_data_size(tupleDesc,
|
||||
toast_values, toast_isnull) > maxDataLen &&
|
||||
@ -956,7 +956,7 @@ toast_flatten_tuple_attribute(Datum value,
|
||||
has_nulls = true;
|
||||
else if (att[i]->attlen == -1)
|
||||
{
|
||||
struct varlena *new_value;
|
||||
struct varlena *new_value;
|
||||
|
||||
new_value = (struct varlena *) DatumGetPointer(toast_values[i]);
|
||||
if (VARATT_IS_EXTERNAL(new_value) ||
|
||||
@ -1046,7 +1046,8 @@ toast_compress_datum(Datum value)
|
||||
Assert(!VARATT_IS_COMPRESSED(value));
|
||||
|
||||
/*
|
||||
* No point in wasting a palloc cycle if value is too short for compression
|
||||
* No point in wasting a palloc cycle if value is too short for
|
||||
* compression
|
||||
*/
|
||||
if (valsize < PGLZ_strategy_default->min_input_size)
|
||||
return PointerGetDatum(NULL);
|
||||
@ -1110,8 +1111,8 @@ toast_save_datum(Relation rel, Datum value,
|
||||
/*
|
||||
* Get the data pointer and length, and compute va_rawsize and va_extsize.
|
||||
*
|
||||
* va_rawsize is the size of the equivalent fully uncompressed datum,
|
||||
* so we have to adjust for short headers.
|
||||
* va_rawsize is the size of the equivalent fully uncompressed datum, so
|
||||
* we have to adjust for short headers.
|
||||
*
|
||||
* va_extsize is the actual size of the data payload in the toast records.
|
||||
*/
|
||||
@ -1119,7 +1120,7 @@ toast_save_datum(Relation rel, Datum value,
|
||||
{
|
||||
data_p = VARDATA_SHORT(value);
|
||||
data_todo = VARSIZE_SHORT(value) - VARHDRSZ_SHORT;
|
||||
toast_pointer.va_rawsize = data_todo + VARHDRSZ; /* as if not short */
|
||||
toast_pointer.va_rawsize = data_todo + VARHDRSZ; /* as if not short */
|
||||
toast_pointer.va_extsize = data_todo;
|
||||
}
|
||||
else if (VARATT_IS_COMPRESSED(value))
|
||||
@ -1283,7 +1284,7 @@ toast_delete_datum(Relation rel, Datum value)
|
||||
* ----------
|
||||
*/
|
||||
static struct varlena *
|
||||
toast_fetch_datum(struct varlena *attr)
|
||||
toast_fetch_datum(struct varlena * attr)
|
||||
{
|
||||
Relation toastrel;
|
||||
Relation toastidx;
|
||||
@ -1299,7 +1300,7 @@ toast_fetch_datum(struct varlena *attr)
|
||||
int32 numchunks;
|
||||
Pointer chunk;
|
||||
bool isnull;
|
||||
char *chunkdata;
|
||||
char *chunkdata;
|
||||
int32 chunksize;
|
||||
|
||||
/* Must copy to access aligned fields */
|
||||
@ -1365,7 +1366,7 @@ toast_fetch_datum(struct varlena *attr)
|
||||
{
|
||||
/* should never happen */
|
||||
elog(ERROR, "found toasted toast chunk");
|
||||
chunksize = 0; /* keep compiler quiet */
|
||||
chunksize = 0; /* keep compiler quiet */
|
||||
chunkdata = NULL;
|
||||
}
|
||||
|
||||
@ -1384,12 +1385,12 @@ toast_fetch_datum(struct varlena *attr)
|
||||
residx, numchunks,
|
||||
toast_pointer.va_valueid);
|
||||
}
|
||||
else if (residx == numchunks-1)
|
||||
else if (residx == numchunks - 1)
|
||||
{
|
||||
if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != ressize)
|
||||
elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u",
|
||||
chunksize,
|
||||
(int) (ressize - residx*TOAST_MAX_CHUNK_SIZE),
|
||||
(int) (ressize - residx * TOAST_MAX_CHUNK_SIZE),
|
||||
residx,
|
||||
toast_pointer.va_valueid);
|
||||
}
|
||||
@ -1397,7 +1398,7 @@ toast_fetch_datum(struct varlena *attr)
|
||||
elog(ERROR, "unexpected chunk number %d for toast value %u (out of range %d..%d)",
|
||||
residx,
|
||||
toast_pointer.va_valueid,
|
||||
0, numchunks-1);
|
||||
0, numchunks - 1);
|
||||
|
||||
/*
|
||||
* Copy the data into proper place in our result
|
||||
@ -1435,7 +1436,7 @@ toast_fetch_datum(struct varlena *attr)
|
||||
* ----------
|
||||
*/
|
||||
static struct varlena *
|
||||
toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
|
||||
toast_fetch_datum_slice(struct varlena * attr, int32 sliceoffset, int32 length)
|
||||
{
|
||||
Relation toastrel;
|
||||
Relation toastidx;
|
||||
@ -1457,7 +1458,7 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
|
||||
int totalchunks;
|
||||
Pointer chunk;
|
||||
bool isnull;
|
||||
char *chunkdata;
|
||||
char *chunkdata;
|
||||
int32 chunksize;
|
||||
int32 chcpystrt;
|
||||
int32 chcpyend;
|
||||
@ -1574,7 +1575,7 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
|
||||
{
|
||||
/* should never happen */
|
||||
elog(ERROR, "found toasted toast chunk");
|
||||
chunksize = 0; /* keep compiler quiet */
|
||||
chunksize = 0; /* keep compiler quiet */
|
||||
chunkdata = NULL;
|
||||
}
|
||||
|
||||
@ -1593,7 +1594,7 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
|
||||
residx, totalchunks,
|
||||
toast_pointer.va_valueid);
|
||||
}
|
||||
else if (residx == totalchunks-1)
|
||||
else if (residx == totalchunks - 1)
|
||||
{
|
||||
if ((residx * TOAST_MAX_CHUNK_SIZE + chunksize) != attrsize)
|
||||
elog(ERROR, "unexpected chunk size %d (expected %d) in final chunk %d for toast value %u when fetching slice",
|
||||
@ -1606,7 +1607,7 @@ toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset, int32 length)
|
||||
elog(ERROR, "unexpected chunk number %d for toast value %u (out of range %d..%d)",
|
||||
residx,
|
||||
toast_pointer.va_valueid,
|
||||
0, totalchunks-1);
|
||||
0, totalchunks - 1);
|
||||
|
||||
/*
|
||||
* Copy the data into proper place in our result
|
||||
|
Reference in New Issue
Block a user