mirror of
https://github.com/postgres/postgres.git
synced 2025-08-18 12:22:09 +03:00
Post-PG 10 beta1 pgindent run
perltidy run not included.
This commit is contained in:
@@ -364,7 +364,7 @@ bringetbitmap(IndexScanDesc scan, TIDBitmap *tbm)
|
||||
MemoryContext oldcxt;
|
||||
MemoryContext perRangeCxt;
|
||||
BrinMemTuple *dtup;
|
||||
BrinTuple *btup = NULL;
|
||||
BrinTuple *btup = NULL;
|
||||
Size btupsz = 0;
|
||||
|
||||
opaque = (BrinOpaque *) scan->opaque;
|
||||
@@ -920,13 +920,13 @@ brin_summarize_range(PG_FUNCTION_ARGS)
|
||||
Datum
|
||||
brin_desummarize_range(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Oid indexoid = PG_GETARG_OID(0);
|
||||
int64 heapBlk64 = PG_GETARG_INT64(1);
|
||||
Oid indexoid = PG_GETARG_OID(0);
|
||||
int64 heapBlk64 = PG_GETARG_INT64(1);
|
||||
BlockNumber heapBlk;
|
||||
Oid heapoid;
|
||||
Relation heapRel;
|
||||
Relation indexRel;
|
||||
bool done;
|
||||
Oid heapoid;
|
||||
Relation heapRel;
|
||||
Relation indexRel;
|
||||
bool done;
|
||||
|
||||
if (heapBlk64 > MaxBlockNumber || heapBlk64 < 0)
|
||||
{
|
||||
@@ -977,7 +977,8 @@ brin_desummarize_range(PG_FUNCTION_ARGS)
|
||||
RelationGetRelationName(indexRel))));
|
||||
|
||||
/* the revmap does the hard work */
|
||||
do {
|
||||
do
|
||||
{
|
||||
done = brinRevmapDesummarizeRange(indexRel, heapBlk);
|
||||
}
|
||||
while (!done);
|
||||
|
@@ -318,11 +318,11 @@ bool
|
||||
brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk)
|
||||
{
|
||||
BrinRevmap *revmap;
|
||||
BlockNumber pagesPerRange;
|
||||
BlockNumber pagesPerRange;
|
||||
RevmapContents *contents;
|
||||
ItemPointerData *iptr;
|
||||
ItemPointerData invalidIptr;
|
||||
BlockNumber revmapBlk;
|
||||
ItemPointerData invalidIptr;
|
||||
BlockNumber revmapBlk;
|
||||
Buffer revmapBuf;
|
||||
Buffer regBuf;
|
||||
Page revmapPg;
|
||||
@@ -415,7 +415,7 @@ brinRevmapDesummarizeRange(Relation idxrel, BlockNumber heapBlk)
|
||||
if (RelationNeedsWAL(idxrel))
|
||||
{
|
||||
xl_brin_desummarize xlrec;
|
||||
XLogRecPtr recptr;
|
||||
XLogRecPtr recptr;
|
||||
|
||||
xlrec.pagesPerRange = revmap->rm_pagesPerRange;
|
||||
xlrec.heapBlk = heapBlk;
|
||||
|
@@ -268,7 +268,7 @@ brin_xlog_desummarize_page(XLogReaderState *record)
|
||||
action = XLogReadBufferForRedo(record, 0, &buffer);
|
||||
if (action == BLK_NEEDS_REDO)
|
||||
{
|
||||
ItemPointerData iptr;
|
||||
ItemPointerData iptr;
|
||||
|
||||
ItemPointerSetInvalid(&iptr);
|
||||
brinSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk, iptr);
|
||||
@@ -283,7 +283,7 @@ brin_xlog_desummarize_page(XLogReaderState *record)
|
||||
action = XLogReadBufferForRedo(record, 1, &buffer);
|
||||
if (action == BLK_NEEDS_REDO)
|
||||
{
|
||||
Page regPg = BufferGetPage(buffer);
|
||||
Page regPg = BufferGetPage(buffer);
|
||||
|
||||
PageIndexTupleDeleteNoCompact(regPg, xlrec->regOffset);
|
||||
|
||||
|
@@ -102,8 +102,8 @@ printsimple(TupleTableSlot *slot, DestReceiver *self)
|
||||
|
||||
case INT4OID:
|
||||
{
|
||||
int32 num = DatumGetInt32(value);
|
||||
char str[12]; /* sign, 10 digits and '\0' */
|
||||
int32 num = DatumGetInt32(value);
|
||||
char str[12]; /* sign, 10 digits and '\0' */
|
||||
|
||||
pg_ltoa(num, str);
|
||||
pq_sendcountedtext(&buf, str, strlen(str), false);
|
||||
@@ -112,8 +112,8 @@ printsimple(TupleTableSlot *slot, DestReceiver *self)
|
||||
|
||||
case INT8OID:
|
||||
{
|
||||
int64 num = DatumGetInt64(value);
|
||||
char str[23]; /* sign, 21 digits and '\0' */
|
||||
int64 num = DatumGetInt64(value);
|
||||
char str[23]; /* sign, 21 digits and '\0' */
|
||||
|
||||
pg_lltoa(num, str);
|
||||
pq_sendcountedtext(&buf, str, strlen(str), false);
|
||||
|
@@ -140,9 +140,9 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
|
||||
* exclusive cleanup lock. This guarantees that no insertions currently
|
||||
* happen in this subtree. Caller also acquire Exclusive lock on deletable
|
||||
* page and is acquiring and releasing exclusive lock on left page before.
|
||||
* Left page was locked and released. Then parent and this page are locked.
|
||||
* We acquire left page lock here only to mark page dirty after changing
|
||||
* right pointer.
|
||||
* Left page was locked and released. Then parent and this page are
|
||||
* locked. We acquire left page lock here only to mark page dirty after
|
||||
* changing right pointer.
|
||||
*/
|
||||
lBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, leftBlkno,
|
||||
RBM_NORMAL, gvs->strategy);
|
||||
@@ -258,7 +258,7 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
|
||||
buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
|
||||
RBM_NORMAL, gvs->strategy);
|
||||
|
||||
if(!isRoot)
|
||||
if (!isRoot)
|
||||
LockBuffer(buffer, GIN_EXCLUSIVE);
|
||||
|
||||
page = BufferGetPage(buffer);
|
||||
@@ -295,8 +295,8 @@ ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot,
|
||||
}
|
||||
}
|
||||
|
||||
if(!isRoot)
|
||||
LockBuffer(buffer, GIN_UNLOCK);
|
||||
if (!isRoot)
|
||||
LockBuffer(buffer, GIN_UNLOCK);
|
||||
|
||||
ReleaseBuffer(buffer);
|
||||
|
||||
@@ -326,7 +326,7 @@ ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot)
|
||||
RBM_NORMAL, gvs->strategy);
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
ginTraverseLock(buffer,false);
|
||||
ginTraverseLock(buffer, false);
|
||||
|
||||
Assert(GinPageIsData(page));
|
||||
|
||||
@@ -347,15 +347,15 @@ ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot)
|
||||
}
|
||||
else
|
||||
{
|
||||
OffsetNumber i;
|
||||
bool hasEmptyChild = FALSE;
|
||||
bool hasNonEmptyChild = FALSE;
|
||||
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
|
||||
BlockNumber* children = palloc(sizeof(BlockNumber) * (maxoff + 1));
|
||||
OffsetNumber i;
|
||||
bool hasEmptyChild = FALSE;
|
||||
bool hasNonEmptyChild = FALSE;
|
||||
OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff;
|
||||
BlockNumber *children = palloc(sizeof(BlockNumber) * (maxoff + 1));
|
||||
|
||||
/*
|
||||
* Read all children BlockNumbers.
|
||||
* Not sure it is safe if there are many concurrent vacuums.
|
||||
* Read all children BlockNumbers. Not sure it is safe if there are
|
||||
* many concurrent vacuums.
|
||||
*/
|
||||
|
||||
for (i = FirstOffsetNumber; i <= maxoff; i++)
|
||||
@@ -380,26 +380,26 @@ ginVacuumPostingTreeLeaves(GinVacuumState *gvs, BlockNumber blkno, bool isRoot)
|
||||
vacuum_delay_point();
|
||||
|
||||
/*
|
||||
* All subtree is empty - just return TRUE to indicate that parent must
|
||||
* do a cleanup. Unless we are ROOT an there is way to go upper.
|
||||
* All subtree is empty - just return TRUE to indicate that parent
|
||||
* must do a cleanup. Unless we are ROOT an there is way to go upper.
|
||||
*/
|
||||
|
||||
if(hasEmptyChild && !hasNonEmptyChild && !isRoot)
|
||||
if (hasEmptyChild && !hasNonEmptyChild && !isRoot)
|
||||
return TRUE;
|
||||
|
||||
if(hasEmptyChild)
|
||||
if (hasEmptyChild)
|
||||
{
|
||||
DataPageDeleteStack root,
|
||||
*ptr,
|
||||
*tmp;
|
||||
|
||||
buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno,
|
||||
RBM_NORMAL, gvs->strategy);
|
||||
RBM_NORMAL, gvs->strategy);
|
||||
LockBufferForCleanup(buffer);
|
||||
|
||||
memset(&root, 0, sizeof(DataPageDeleteStack));
|
||||
root.leftBlkno = InvalidBlockNumber;
|
||||
root.isRoot = TRUE;
|
||||
root.leftBlkno = InvalidBlockNumber;
|
||||
root.isRoot = TRUE;
|
||||
|
||||
ginScanToDelete(gvs, blkno, TRUE, &root, InvalidOffsetNumber);
|
||||
|
||||
|
@@ -333,12 +333,12 @@ hashgettuple(IndexScanDesc scan, ScanDirection dir)
|
||||
if (scan->kill_prior_tuple)
|
||||
{
|
||||
/*
|
||||
* Yes, so remember it for later. (We'll deal with all such
|
||||
* tuples at once right after leaving the index page or at
|
||||
* end of scan.) In case if caller reverses the indexscan
|
||||
* direction it is quite possible that the same item might
|
||||
* get entered multiple times. But, we don't detect that;
|
||||
* instead, we just forget any excess entries.
|
||||
* Yes, so remember it for later. (We'll deal with all such tuples
|
||||
* at once right after leaving the index page or at end of scan.)
|
||||
* In case if caller reverses the indexscan direction it is quite
|
||||
* possible that the same item might get entered multiple times.
|
||||
* But, we don't detect that; instead, we just forget any excess
|
||||
* entries.
|
||||
*/
|
||||
if (so->killedItems == NULL)
|
||||
so->killedItems = palloc(MaxIndexTuplesPerPage *
|
||||
@@ -348,7 +348,7 @@ hashgettuple(IndexScanDesc scan, ScanDirection dir)
|
||||
{
|
||||
so->killedItems[so->numKilled].heapTid = so->hashso_heappos;
|
||||
so->killedItems[so->numKilled].indexOffset =
|
||||
ItemPointerGetOffsetNumber(&(so->hashso_curpos));
|
||||
ItemPointerGetOffsetNumber(&(so->hashso_curpos));
|
||||
so->numKilled++;
|
||||
}
|
||||
}
|
||||
@@ -477,9 +477,8 @@ hashrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
|
||||
Relation rel = scan->indexRelation;
|
||||
|
||||
/*
|
||||
* Before leaving current page, deal with any killed items.
|
||||
* Also, ensure that we acquire lock on current page before
|
||||
* calling _hash_kill_items.
|
||||
* Before leaving current page, deal with any killed items. Also, ensure
|
||||
* that we acquire lock on current page before calling _hash_kill_items.
|
||||
*/
|
||||
if (so->numKilled > 0)
|
||||
{
|
||||
@@ -516,9 +515,8 @@ hashendscan(IndexScanDesc scan)
|
||||
Relation rel = scan->indexRelation;
|
||||
|
||||
/*
|
||||
* Before leaving current page, deal with any killed items.
|
||||
* Also, ensure that we acquire lock on current page before
|
||||
* calling _hash_kill_items.
|
||||
* Before leaving current page, deal with any killed items. Also, ensure
|
||||
* that we acquire lock on current page before calling _hash_kill_items.
|
||||
*/
|
||||
if (so->numKilled > 0)
|
||||
{
|
||||
@@ -889,8 +887,8 @@ hashbucketcleanup(Relation rel, Bucket cur_bucket, Buffer bucket_buf,
|
||||
|
||||
/*
|
||||
* Let us mark the page as clean if vacuum removes the DEAD tuples
|
||||
* from an index page. We do this by clearing LH_PAGE_HAS_DEAD_TUPLES
|
||||
* flag.
|
||||
* from an index page. We do this by clearing
|
||||
* LH_PAGE_HAS_DEAD_TUPLES flag.
|
||||
*/
|
||||
if (tuples_removed && *tuples_removed > 0 &&
|
||||
H_HAS_DEAD_TUPLES(opaque))
|
||||
|
@@ -950,22 +950,22 @@ hash_xlog_update_meta_page(XLogReaderState *record)
|
||||
static TransactionId
|
||||
hash_xlog_vacuum_get_latestRemovedXid(XLogReaderState *record)
|
||||
{
|
||||
xl_hash_vacuum_one_page *xlrec;
|
||||
OffsetNumber *unused;
|
||||
xl_hash_vacuum_one_page *xlrec;
|
||||
OffsetNumber *unused;
|
||||
Buffer ibuffer,
|
||||
hbuffer;
|
||||
Page ipage,
|
||||
hpage;
|
||||
RelFileNode rnode;
|
||||
BlockNumber blkno;
|
||||
RelFileNode rnode;
|
||||
BlockNumber blkno;
|
||||
ItemId iitemid,
|
||||
hitemid;
|
||||
IndexTuple itup;
|
||||
HeapTupleHeader htuphdr;
|
||||
BlockNumber hblkno;
|
||||
OffsetNumber hoffnum;
|
||||
TransactionId latestRemovedXid = InvalidTransactionId;
|
||||
int i;
|
||||
HeapTupleHeader htuphdr;
|
||||
BlockNumber hblkno;
|
||||
OffsetNumber hoffnum;
|
||||
TransactionId latestRemovedXid = InvalidTransactionId;
|
||||
int i;
|
||||
|
||||
xlrec = (xl_hash_vacuum_one_page *) XLogRecGetData(record);
|
||||
|
||||
@@ -984,9 +984,9 @@ hash_xlog_vacuum_get_latestRemovedXid(XLogReaderState *record)
|
||||
return latestRemovedXid;
|
||||
|
||||
/*
|
||||
* Check if WAL replay has reached a consistent database state. If not,
|
||||
* we must PANIC. See the definition of btree_xlog_delete_get_latestRemovedXid
|
||||
* for more details.
|
||||
* Check if WAL replay has reached a consistent database state. If not, we
|
||||
* must PANIC. See the definition of
|
||||
* btree_xlog_delete_get_latestRemovedXid for more details.
|
||||
*/
|
||||
if (!reachedConsistency)
|
||||
elog(PANIC, "hash_xlog_vacuum_get_latestRemovedXid: cannot operate with inconsistent data");
|
||||
@@ -1098,11 +1098,11 @@ hash_xlog_vacuum_get_latestRemovedXid(XLogReaderState *record)
|
||||
static void
|
||||
hash_xlog_vacuum_one_page(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
xl_hash_vacuum_one_page *xldata;
|
||||
Buffer buffer;
|
||||
Buffer metabuf;
|
||||
Page page;
|
||||
Buffer buffer;
|
||||
Buffer metabuf;
|
||||
Page page;
|
||||
XLogRedoAction action;
|
||||
HashPageOpaque pageopaque;
|
||||
|
||||
@@ -1123,7 +1123,7 @@ hash_xlog_vacuum_one_page(XLogReaderState *record)
|
||||
if (InHotStandby)
|
||||
{
|
||||
TransactionId latestRemovedXid =
|
||||
hash_xlog_vacuum_get_latestRemovedXid(record);
|
||||
hash_xlog_vacuum_get_latestRemovedXid(record);
|
||||
RelFileNode rnode;
|
||||
|
||||
XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL);
|
||||
@@ -1146,8 +1146,8 @@ hash_xlog_vacuum_one_page(XLogReaderState *record)
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark the page as not containing any LP_DEAD items. See comments
|
||||
* in _hash_vacuum_one_page() for details.
|
||||
* Mark the page as not containing any LP_DEAD items. See comments in
|
||||
* _hash_vacuum_one_page() for details.
|
||||
*/
|
||||
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
||||
pageopaque->hasho_flag &= ~LH_PAGE_HAS_DEAD_TUPLES;
|
||||
@@ -1160,7 +1160,7 @@ hash_xlog_vacuum_one_page(XLogReaderState *record)
|
||||
|
||||
if (XLogReadBufferForRedo(record, 1, &metabuf) == BLK_NEEDS_REDO)
|
||||
{
|
||||
Page metapage;
|
||||
Page metapage;
|
||||
HashMetaPage metap;
|
||||
|
||||
metapage = BufferGetPage(metabuf);
|
||||
|
@@ -24,7 +24,7 @@
|
||||
#include "storage/buf_internals.h"
|
||||
|
||||
static void _hash_vacuum_one_page(Relation rel, Buffer metabuf, Buffer buf,
|
||||
RelFileNode hnode);
|
||||
RelFileNode hnode);
|
||||
|
||||
/*
|
||||
* _hash_doinsert() -- Handle insertion of a single index tuple.
|
||||
@@ -63,8 +63,8 @@ restart_insert:
|
||||
|
||||
/*
|
||||
* Read the metapage. We don't lock it yet; HashMaxItemSize() will
|
||||
* examine pd_pagesize_version, but that can't change so we can examine
|
||||
* it without a lock.
|
||||
* examine pd_pagesize_version, but that can't change so we can examine it
|
||||
* without a lock.
|
||||
*/
|
||||
metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_NOLOCK, LH_META_PAGE);
|
||||
metapage = BufferGetPage(metabuf);
|
||||
@@ -126,10 +126,9 @@ restart_insert:
|
||||
BlockNumber nextblkno;
|
||||
|
||||
/*
|
||||
* Check if current page has any DEAD tuples. If yes,
|
||||
* delete these tuples and see if we can get a space for
|
||||
* the new item to be inserted before moving to the next
|
||||
* page in the bucket chain.
|
||||
* Check if current page has any DEAD tuples. If yes, delete these
|
||||
* tuples and see if we can get a space for the new item to be
|
||||
* inserted before moving to the next page in the bucket chain.
|
||||
*/
|
||||
if (H_HAS_DEAD_TUPLES(pageopaque))
|
||||
{
|
||||
@@ -139,7 +138,7 @@ restart_insert:
|
||||
_hash_vacuum_one_page(rel, metabuf, buf, heapRel->rd_node);
|
||||
|
||||
if (PageGetFreeSpace(page) >= itemsz)
|
||||
break; /* OK, now we have enough space */
|
||||
break; /* OK, now we have enough space */
|
||||
}
|
||||
}
|
||||
|
||||
@@ -337,13 +336,13 @@ static void
|
||||
_hash_vacuum_one_page(Relation rel, Buffer metabuf, Buffer buf,
|
||||
RelFileNode hnode)
|
||||
{
|
||||
OffsetNumber deletable[MaxOffsetNumber];
|
||||
int ndeletable = 0;
|
||||
OffsetNumber deletable[MaxOffsetNumber];
|
||||
int ndeletable = 0;
|
||||
OffsetNumber offnum,
|
||||
maxoff;
|
||||
Page page = BufferGetPage(buf);
|
||||
HashPageOpaque pageopaque;
|
||||
HashMetaPage metap;
|
||||
maxoff;
|
||||
Page page = BufferGetPage(buf);
|
||||
HashPageOpaque pageopaque;
|
||||
HashMetaPage metap;
|
||||
|
||||
/* Scan each tuple in page to see if it is marked as LP_DEAD */
|
||||
maxoff = PageGetMaxOffsetNumber(page);
|
||||
@@ -351,7 +350,7 @@ _hash_vacuum_one_page(Relation rel, Buffer metabuf, Buffer buf,
|
||||
offnum <= maxoff;
|
||||
offnum = OffsetNumberNext(offnum))
|
||||
{
|
||||
ItemId itemId = PageGetItemId(page, offnum);
|
||||
ItemId itemId = PageGetItemId(page, offnum);
|
||||
|
||||
if (ItemIdIsDead(itemId))
|
||||
deletable[ndeletable++] = offnum;
|
||||
@@ -360,8 +359,7 @@ _hash_vacuum_one_page(Relation rel, Buffer metabuf, Buffer buf,
|
||||
if (ndeletable > 0)
|
||||
{
|
||||
/*
|
||||
* Write-lock the meta page so that we can decrement
|
||||
* tuple count.
|
||||
* Write-lock the meta page so that we can decrement tuple count.
|
||||
*/
|
||||
LockBuffer(metabuf, BUFFER_LOCK_EXCLUSIVE);
|
||||
|
||||
@@ -374,8 +372,8 @@ _hash_vacuum_one_page(Relation rel, Buffer metabuf, Buffer buf,
|
||||
* Mark the page as not containing any LP_DEAD items. This is not
|
||||
* certainly true (there might be some that have recently been marked,
|
||||
* but weren't included in our target-item list), but it will almost
|
||||
* always be true and it doesn't seem worth an additional page scan
|
||||
* to check it. Remember that LH_PAGE_HAS_DEAD_TUPLES is only a hint
|
||||
* always be true and it doesn't seem worth an additional page scan to
|
||||
* check it. Remember that LH_PAGE_HAS_DEAD_TUPLES is only a hint
|
||||
* anyway.
|
||||
*/
|
||||
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
||||
@@ -390,7 +388,7 @@ _hash_vacuum_one_page(Relation rel, Buffer metabuf, Buffer buf,
|
||||
/* XLOG stuff */
|
||||
if (RelationNeedsWAL(rel))
|
||||
{
|
||||
xl_hash_vacuum_one_page xlrec;
|
||||
xl_hash_vacuum_one_page xlrec;
|
||||
XLogRecPtr recptr;
|
||||
|
||||
xlrec.hnode = hnode;
|
||||
@@ -401,12 +399,12 @@ _hash_vacuum_one_page(Relation rel, Buffer metabuf, Buffer buf,
|
||||
XLogRegisterData((char *) &xlrec, SizeOfHashVacuumOnePage);
|
||||
|
||||
/*
|
||||
* We need the target-offsets array whether or not we store the whole
|
||||
* buffer, to allow us to find the latestRemovedXid on a standby
|
||||
* server.
|
||||
* We need the target-offsets array whether or not we store the
|
||||
* whole buffer, to allow us to find the latestRemovedXid on a
|
||||
* standby server.
|
||||
*/
|
||||
XLogRegisterData((char *) deletable,
|
||||
ndeletable * sizeof(OffsetNumber));
|
||||
ndeletable * sizeof(OffsetNumber));
|
||||
|
||||
XLogRegisterBuffer(1, metabuf, REGBUF_STANDARD);
|
||||
|
||||
@@ -417,9 +415,10 @@ _hash_vacuum_one_page(Relation rel, Buffer metabuf, Buffer buf,
|
||||
}
|
||||
|
||||
END_CRIT_SECTION();
|
||||
|
||||
/*
|
||||
* Releasing write lock on meta page as we have updated
|
||||
* the tuple count.
|
||||
* Releasing write lock on meta page as we have updated the tuple
|
||||
* count.
|
||||
*/
|
||||
LockBuffer(metabuf, BUFFER_LOCK_UNLOCK);
|
||||
}
|
||||
|
@@ -177,8 +177,8 @@ _hash_initbuf(Buffer buf, uint32 max_bucket, uint32 num_bucket, uint32 flag,
|
||||
pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
/*
|
||||
* Set hasho_prevblkno with current hashm_maxbucket. This value will
|
||||
* be used to validate cached HashMetaPageData. See
|
||||
* Set hasho_prevblkno with current hashm_maxbucket. This value will be
|
||||
* used to validate cached HashMetaPageData. See
|
||||
* _hash_getbucketbuf_from_hashkey().
|
||||
*/
|
||||
pageopaque->hasho_prevblkno = max_bucket;
|
||||
@@ -509,8 +509,8 @@ _hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid,
|
||||
* Choose the number of initial bucket pages to match the fill factor
|
||||
* given the estimated number of tuples. We round up the result to the
|
||||
* total number of buckets which has to be allocated before using its
|
||||
* _hashm_spare element. However always force at least 2 bucket pages.
|
||||
* The upper limit is determined by considerations explained in
|
||||
* _hashm_spare element. However always force at least 2 bucket pages. The
|
||||
* upper limit is determined by considerations explained in
|
||||
* _hash_expandtable().
|
||||
*/
|
||||
dnumbuckets = num_tuples / ffactor;
|
||||
@@ -568,8 +568,8 @@ _hash_init_metabuffer(Buffer buf, double num_tuples, RegProcedure procid,
|
||||
metap->hashm_maxbucket = num_buckets - 1;
|
||||
|
||||
/*
|
||||
* Set highmask as next immediate ((2 ^ x) - 1), which should be sufficient
|
||||
* to cover num_buckets.
|
||||
* Set highmask as next immediate ((2 ^ x) - 1), which should be
|
||||
* sufficient to cover num_buckets.
|
||||
*/
|
||||
metap->hashm_highmask = (1 << (_hash_log2(num_buckets + 1))) - 1;
|
||||
metap->hashm_lowmask = (metap->hashm_highmask >> 1);
|
||||
@@ -748,8 +748,8 @@ restart_expand:
|
||||
{
|
||||
/*
|
||||
* Copy bucket mapping info now; refer to the comment in code below
|
||||
* where we copy this information before calling _hash_splitbucket
|
||||
* to see why this is okay.
|
||||
* where we copy this information before calling _hash_splitbucket to
|
||||
* see why this is okay.
|
||||
*/
|
||||
maxbucket = metap->hashm_maxbucket;
|
||||
highmask = metap->hashm_highmask;
|
||||
@@ -792,8 +792,7 @@ restart_expand:
|
||||
* We treat allocation of buckets as a separate WAL-logged action.
|
||||
* Even if we fail after this operation, won't leak bucket pages;
|
||||
* rather, the next split will consume this space. In any case, even
|
||||
* without failure we don't use all the space in one split
|
||||
* operation.
|
||||
* without failure we don't use all the space in one split operation.
|
||||
*/
|
||||
buckets_to_add = _hash_get_totalbuckets(spare_ndx) - new_bucket;
|
||||
if (!_hash_alloc_buckets(rel, start_nblkno, buckets_to_add))
|
||||
@@ -870,10 +869,9 @@ restart_expand:
|
||||
|
||||
/*
|
||||
* Mark the old bucket to indicate that split is in progress. (At
|
||||
* operation end, we will clear the split-in-progress flag.) Also,
|
||||
* for a primary bucket page, hasho_prevblkno stores the number of
|
||||
* buckets that existed as of the last split, so we must update that
|
||||
* value here.
|
||||
* operation end, we will clear the split-in-progress flag.) Also, for a
|
||||
* primary bucket page, hasho_prevblkno stores the number of buckets that
|
||||
* existed as of the last split, so we must update that value here.
|
||||
*/
|
||||
oopaque->hasho_flag |= LH_BUCKET_BEING_SPLIT;
|
||||
oopaque->hasho_prevblkno = maxbucket;
|
||||
@@ -1008,8 +1006,8 @@ _hash_alloc_buckets(Relation rel, BlockNumber firstblock, uint32 nblocks)
|
||||
|
||||
/*
|
||||
* Initialize the page. Just zeroing the page won't work; see
|
||||
* _hash_freeovflpage for similar usage. We take care to make the
|
||||
* special space valid for the benefit of tools such as pageinspect.
|
||||
* _hash_freeovflpage for similar usage. We take care to make the special
|
||||
* space valid for the benefit of tools such as pageinspect.
|
||||
*/
|
||||
_hash_pageinit(page, BLCKSZ);
|
||||
|
||||
@@ -1462,11 +1460,11 @@ log_split_page(Relation rel, Buffer buf)
|
||||
* _hash_getcachedmetap() -- Returns cached metapage data.
|
||||
*
|
||||
* If metabuf is not InvalidBuffer, caller must hold a pin, but no lock, on
|
||||
* the metapage. If not set, we'll set it before returning if we have to
|
||||
* refresh the cache, and return with a pin but no lock on it; caller is
|
||||
* responsible for releasing the pin.
|
||||
* the metapage. If not set, we'll set it before returning if we have to
|
||||
* refresh the cache, and return with a pin but no lock on it; caller is
|
||||
* responsible for releasing the pin.
|
||||
*
|
||||
* We refresh the cache if it's not initialized yet or force_refresh is true.
|
||||
* We refresh the cache if it's not initialized yet or force_refresh is true.
|
||||
*/
|
||||
HashMetaPage
|
||||
_hash_getcachedmetap(Relation rel, Buffer *metabuf, bool force_refresh)
|
||||
@@ -1476,13 +1474,13 @@ _hash_getcachedmetap(Relation rel, Buffer *metabuf, bool force_refresh)
|
||||
Assert(metabuf);
|
||||
if (force_refresh || rel->rd_amcache == NULL)
|
||||
{
|
||||
char *cache = NULL;
|
||||
char *cache = NULL;
|
||||
|
||||
/*
|
||||
* It's important that we don't set rd_amcache to an invalid
|
||||
* value. Either MemoryContextAlloc or _hash_getbuf could fail,
|
||||
* so don't install a pointer to the newly-allocated storage in the
|
||||
* actual relcache entry until both have succeeeded.
|
||||
* It's important that we don't set rd_amcache to an invalid value.
|
||||
* Either MemoryContextAlloc or _hash_getbuf could fail, so don't
|
||||
* install a pointer to the newly-allocated storage in the actual
|
||||
* relcache entry until both have succeeeded.
|
||||
*/
|
||||
if (rel->rd_amcache == NULL)
|
||||
cache = MemoryContextAlloc(rel->rd_indexcxt,
|
||||
@@ -1517,7 +1515,7 @@ _hash_getcachedmetap(Relation rel, Buffer *metabuf, bool force_refresh)
|
||||
* us an opportunity to use the previously saved metapage contents to reach
|
||||
* the target bucket buffer, instead of reading from the metapage every time.
|
||||
* This saves one buffer access every time we want to reach the target bucket
|
||||
* buffer, which is very helpful savings in bufmgr traffic and contention.
|
||||
* buffer, which is very helpful savings in bufmgr traffic and contention.
|
||||
*
|
||||
* The access type parameter (HASH_READ or HASH_WRITE) indicates whether the
|
||||
* bucket buffer has to be locked for reading or writing.
|
||||
|
@@ -528,20 +528,21 @@ _hash_get_newbucket_from_oldbucket(Relation rel, Bucket old_bucket,
|
||||
void
|
||||
_hash_kill_items(IndexScanDesc scan)
|
||||
{
|
||||
HashScanOpaque so = (HashScanOpaque) scan->opaque;
|
||||
Page page;
|
||||
HashPageOpaque opaque;
|
||||
OffsetNumber offnum, maxoff;
|
||||
int numKilled = so->numKilled;
|
||||
int i;
|
||||
bool killedsomething = false;
|
||||
HashScanOpaque so = (HashScanOpaque) scan->opaque;
|
||||
Page page;
|
||||
HashPageOpaque opaque;
|
||||
OffsetNumber offnum,
|
||||
maxoff;
|
||||
int numKilled = so->numKilled;
|
||||
int i;
|
||||
bool killedsomething = false;
|
||||
|
||||
Assert(so->numKilled > 0);
|
||||
Assert(so->killedItems != NULL);
|
||||
|
||||
/*
|
||||
* Always reset the scan state, so we don't look for same
|
||||
* items on other pages.
|
||||
* Always reset the scan state, so we don't look for same items on other
|
||||
* pages.
|
||||
*/
|
||||
so->numKilled = 0;
|
||||
|
||||
@@ -555,7 +556,7 @@ _hash_kill_items(IndexScanDesc scan)
|
||||
|
||||
while (offnum <= maxoff)
|
||||
{
|
||||
ItemId iid = PageGetItemId(page, offnum);
|
||||
ItemId iid = PageGetItemId(page, offnum);
|
||||
IndexTuple ituple = (IndexTuple) PageGetItem(page, iid);
|
||||
|
||||
if (ItemPointerEquals(&ituple->t_tid, &so->killedItems[i].heapTid))
|
||||
@@ -563,15 +564,15 @@ _hash_kill_items(IndexScanDesc scan)
|
||||
/* found the item */
|
||||
ItemIdMarkDead(iid);
|
||||
killedsomething = true;
|
||||
break; /* out of inner search loop */
|
||||
break; /* out of inner search loop */
|
||||
}
|
||||
offnum = OffsetNumberNext(offnum);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Since this can be redone later if needed, mark as dirty hint.
|
||||
* Whenever we mark anything LP_DEAD, we also set the page's
|
||||
* Since this can be redone later if needed, mark as dirty hint. Whenever
|
||||
* we mark anything LP_DEAD, we also set the page's
|
||||
* LH_PAGE_HAS_DEAD_TUPLES flag, which is likewise just a hint.
|
||||
*/
|
||||
if (killedsomething)
|
||||
|
@@ -3518,10 +3518,10 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
|
||||
*
|
||||
* For HOT considerations, this is wasted effort if we fail to update or
|
||||
* have to put the new tuple on a different page. But we must compute the
|
||||
* list before obtaining buffer lock --- in the worst case, if we are doing
|
||||
* an update on one of the relevant system catalogs, we could deadlock if
|
||||
* we try to fetch the list later. In any case, the relcache caches the
|
||||
* data so this is usually pretty cheap.
|
||||
* list before obtaining buffer lock --- in the worst case, if we are
|
||||
* doing an update on one of the relevant system catalogs, we could
|
||||
* deadlock if we try to fetch the list later. In any case, the relcache
|
||||
* caches the data so this is usually pretty cheap.
|
||||
*
|
||||
* We also need columns used by the replica identity and columns that are
|
||||
* considered the "key" of rows in the table.
|
||||
@@ -3540,15 +3540,16 @@ heap_update(Relation relation, ItemPointer otid, HeapTuple newtup,
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
interesting_attrs = NULL;
|
||||
|
||||
/*
|
||||
* If the page is already full, there is hardly any chance of doing a HOT
|
||||
* update on this page. It might be wasteful effort to look for index
|
||||
* column updates only to later reject HOT updates for lack of space in the
|
||||
* same page. So we be conservative and only fetch hot_attrs if the page is
|
||||
* not already full. Since we are already holding a pin on the buffer,
|
||||
* there is no chance that the buffer can get cleaned up concurrently and
|
||||
* even if that was possible, in the worst case we lose a chance to do a
|
||||
* HOT update.
|
||||
* column updates only to later reject HOT updates for lack of space in
|
||||
* the same page. So we be conservative and only fetch hot_attrs if the
|
||||
* page is not already full. Since we are already holding a pin on the
|
||||
* buffer, there is no chance that the buffer can get cleaned up
|
||||
* concurrently and even if that was possible, in the worst case we lose a
|
||||
* chance to do a HOT update.
|
||||
*/
|
||||
if (!PageIsFull(page))
|
||||
{
|
||||
@@ -4176,7 +4177,7 @@ l2:
|
||||
* logged.
|
||||
*/
|
||||
old_key_tuple = ExtractReplicaIdentity(relation, &oldtup,
|
||||
bms_overlap(modified_attrs, id_attrs),
|
||||
bms_overlap(modified_attrs, id_attrs),
|
||||
&old_key_copied);
|
||||
|
||||
/* NO EREPORT(ERROR) from here till changes are logged */
|
||||
@@ -4422,17 +4423,17 @@ static Bitmapset *
|
||||
HeapDetermineModifiedColumns(Relation relation, Bitmapset *interesting_cols,
|
||||
HeapTuple oldtup, HeapTuple newtup)
|
||||
{
|
||||
int attnum;
|
||||
Bitmapset *modified = NULL;
|
||||
int attnum;
|
||||
Bitmapset *modified = NULL;
|
||||
|
||||
while ((attnum = bms_first_member(interesting_cols)) >= 0)
|
||||
{
|
||||
attnum += FirstLowInvalidHeapAttributeNumber;
|
||||
|
||||
if (!heap_tuple_attr_equals(RelationGetDescr(relation),
|
||||
attnum, oldtup, newtup))
|
||||
attnum, oldtup, newtup))
|
||||
modified = bms_add_member(modified,
|
||||
attnum - FirstLowInvalidHeapAttributeNumber);
|
||||
attnum - FirstLowInvalidHeapAttributeNumber);
|
||||
}
|
||||
|
||||
return modified;
|
||||
|
@@ -100,7 +100,7 @@ typedef struct BTParallelScanDescData
|
||||
* scan */
|
||||
slock_t btps_mutex; /* protects above variables */
|
||||
ConditionVariable btps_cv; /* used to synchronize parallel scan */
|
||||
} BTParallelScanDescData;
|
||||
} BTParallelScanDescData;
|
||||
|
||||
typedef struct BTParallelScanDescData *BTParallelScanDesc;
|
||||
|
||||
@@ -289,11 +289,11 @@ btbuildempty(Relation index)
|
||||
_bt_initmetapage(metapage, P_NONE, 0);
|
||||
|
||||
/*
|
||||
* Write the page and log it. It might seem that an immediate sync
|
||||
* would be sufficient to guarantee that the file exists on disk, but
|
||||
* recovery itself might remove it while replaying, for example, an
|
||||
* XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we
|
||||
* need this even when wal_level=minimal.
|
||||
* Write the page and log it. It might seem that an immediate sync would
|
||||
* be sufficient to guarantee that the file exists on disk, but recovery
|
||||
* itself might remove it while replaying, for example, an
|
||||
* XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we need
|
||||
* this even when wal_level=minimal.
|
||||
*/
|
||||
PageSetChecksumInplace(metapage, BTREE_METAPAGE);
|
||||
smgrwrite(index->rd_smgr, INIT_FORKNUM, BTREE_METAPAGE,
|
||||
|
@@ -66,7 +66,7 @@ brin_desc(StringInfo buf, XLogReaderState *record)
|
||||
xl_brin_desummarize *xlrec = (xl_brin_desummarize *) rec;
|
||||
|
||||
appendStringInfo(buf, "pagesPerRange %u, heapBlk %u, page offset %u",
|
||||
xlrec->pagesPerRange, xlrec->heapBlk, xlrec->regOffset);
|
||||
xlrec->pagesPerRange, xlrec->heapBlk, xlrec->regOffset);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -36,7 +36,7 @@ clog_desc(StringInfo buf, XLogReaderState *record)
|
||||
|
||||
memcpy(&xlrec, rec, sizeof(xl_clog_truncate));
|
||||
appendStringInfo(buf, "page %d; oldestXact %u",
|
||||
xlrec.pageno, xlrec.oldestXact);
|
||||
xlrec.pageno, xlrec.oldestXact);
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -117,18 +117,18 @@ gin_desc(StringInfo buf, XLogReaderState *record)
|
||||
|
||||
if (!(xlrec->flags & GIN_INSERT_ISDATA))
|
||||
appendStringInfo(buf, " isdelete: %c",
|
||||
(((ginxlogInsertEntry *) payload)->isDelete) ? 'T' : 'F');
|
||||
(((ginxlogInsertEntry *) payload)->isDelete) ? 'T' : 'F');
|
||||
else if (xlrec->flags & GIN_INSERT_ISLEAF)
|
||||
desc_recompress_leaf(buf, (ginxlogRecompressDataLeaf *) payload);
|
||||
else
|
||||
{
|
||||
ginxlogInsertDataInternal *insertData =
|
||||
(ginxlogInsertDataInternal *) payload;
|
||||
(ginxlogInsertDataInternal *) payload;
|
||||
|
||||
appendStringInfo(buf, " pitem: %u-%u/%u",
|
||||
PostingItemGetBlockNumber(&insertData->newitem),
|
||||
ItemPointerGetBlockNumber(&insertData->newitem.key),
|
||||
ItemPointerGetOffsetNumber(&insertData->newitem.key));
|
||||
PostingItemGetBlockNumber(&insertData->newitem),
|
||||
ItemPointerGetBlockNumber(&insertData->newitem.key),
|
||||
ItemPointerGetOffsetNumber(&insertData->newitem.key));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -159,7 +159,7 @@ gin_desc(StringInfo buf, XLogReaderState *record)
|
||||
else
|
||||
{
|
||||
ginxlogVacuumDataLeafPage *xlrec =
|
||||
(ginxlogVacuumDataLeafPage *) XLogRecGetBlockData(record, 0, NULL);
|
||||
(ginxlogVacuumDataLeafPage *) XLogRecGetBlockData(record, 0, NULL);
|
||||
|
||||
desc_recompress_leaf(buf, &xlrec->data);
|
||||
}
|
||||
|
@@ -164,10 +164,10 @@ spgbuildempty(Relation index)
|
||||
|
||||
/*
|
||||
* Write the page and log it unconditionally. This is important
|
||||
* particularly for indexes created on tablespaces and databases
|
||||
* whose creation happened after the last redo pointer as recovery
|
||||
* removes any of their existing content when the corresponding
|
||||
* create records are replayed.
|
||||
* particularly for indexes created on tablespaces and databases whose
|
||||
* creation happened after the last redo pointer as recovery removes any
|
||||
* of their existing content when the corresponding create records are
|
||||
* replayed.
|
||||
*/
|
||||
PageSetChecksumInplace(page, SPGIST_METAPAGE_BLKNO);
|
||||
smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_METAPAGE_BLKNO,
|
||||
|
@@ -84,7 +84,7 @@ static int ZeroCLOGPage(int pageno, bool writeXlog);
|
||||
static bool CLOGPagePrecedes(int page1, int page2);
|
||||
static void WriteZeroPageXlogRec(int pageno);
|
||||
static void WriteTruncateXlogRec(int pageno, TransactionId oldestXact,
|
||||
Oid oldestXidDb);
|
||||
Oid oldestXidDb);
|
||||
static void TransactionIdSetPageStatus(TransactionId xid, int nsubxids,
|
||||
TransactionId *subxids, XidStatus status,
|
||||
XLogRecPtr lsn, int pageno);
|
||||
@@ -680,13 +680,13 @@ TruncateCLOG(TransactionId oldestXact, Oid oldestxid_datoid)
|
||||
|
||||
/* vac_truncate_clog already advanced oldestXid */
|
||||
Assert(TransactionIdPrecedesOrEquals(oldestXact,
|
||||
ShmemVariableCache->oldestXid));
|
||||
ShmemVariableCache->oldestXid));
|
||||
|
||||
/*
|
||||
* Write XLOG record and flush XLOG to disk. We record the oldest xid we're
|
||||
* keeping information about here so we can ensure that it's always ahead
|
||||
* of clog truncation in case we crash, and so a standby finds out the new
|
||||
* valid xid before the next checkpoint.
|
||||
* Write XLOG record and flush XLOG to disk. We record the oldest xid
|
||||
* we're keeping information about here so we can ensure that it's always
|
||||
* ahead of clog truncation in case we crash, and so a standby finds out
|
||||
* the new valid xid before the next checkpoint.
|
||||
*/
|
||||
WriteTruncateXlogRec(cutoffPage, oldestXact, oldestxid_datoid);
|
||||
|
||||
|
@@ -748,8 +748,8 @@ ShutdownCommitTs(void)
|
||||
SimpleLruFlush(CommitTsCtl, false);
|
||||
|
||||
/*
|
||||
* fsync pg_commit_ts to ensure that any files flushed previously are durably
|
||||
* on disk.
|
||||
* fsync pg_commit_ts to ensure that any files flushed previously are
|
||||
* durably on disk.
|
||||
*/
|
||||
fsync_fname("pg_commit_ts", true);
|
||||
}
|
||||
@@ -764,8 +764,8 @@ CheckPointCommitTs(void)
|
||||
SimpleLruFlush(CommitTsCtl, true);
|
||||
|
||||
/*
|
||||
* fsync pg_commit_ts to ensure that any files flushed previously are durably
|
||||
* on disk.
|
||||
* fsync pg_commit_ts to ensure that any files flushed previously are
|
||||
* durably on disk.
|
||||
*/
|
||||
fsync_fname("pg_commit_ts", true);
|
||||
}
|
||||
|
@@ -87,9 +87,9 @@ SubTransSetParent(TransactionId xid, TransactionId parent)
|
||||
ptr += entryno;
|
||||
|
||||
/*
|
||||
* It's possible we'll try to set the parent xid multiple times
|
||||
* but we shouldn't ever be changing the xid from one valid xid
|
||||
* to another valid xid, which would corrupt the data structure.
|
||||
* It's possible we'll try to set the parent xid multiple times but we
|
||||
* shouldn't ever be changing the xid from one valid xid to another valid
|
||||
* xid, which would corrupt the data structure.
|
||||
*/
|
||||
if (*ptr != parent)
|
||||
{
|
||||
@@ -162,13 +162,13 @@ SubTransGetTopmostTransaction(TransactionId xid)
|
||||
parentXid = SubTransGetParent(parentXid);
|
||||
|
||||
/*
|
||||
* By convention the parent xid gets allocated first, so should
|
||||
* always precede the child xid. Anything else points to a corrupted
|
||||
* data structure that could lead to an infinite loop, so exit.
|
||||
* By convention the parent xid gets allocated first, so should always
|
||||
* precede the child xid. Anything else points to a corrupted data
|
||||
* structure that could lead to an infinite loop, so exit.
|
||||
*/
|
||||
if (!TransactionIdPrecedes(parentXid, previousXid))
|
||||
elog(ERROR, "pg_subtrans contains invalid entry: xid %u points to parent xid %u",
|
||||
previousXid, parentXid);
|
||||
previousXid, parentXid);
|
||||
}
|
||||
|
||||
Assert(TransactionIdIsValid(previousXid));
|
||||
|
@@ -166,7 +166,7 @@ typedef struct GlobalTransactionData
|
||||
*/
|
||||
XLogRecPtr prepare_start_lsn; /* XLOG offset of prepare record start */
|
||||
XLogRecPtr prepare_end_lsn; /* XLOG offset of prepare record end */
|
||||
TransactionId xid; /* The GXACT id */
|
||||
TransactionId xid; /* The GXACT id */
|
||||
|
||||
Oid owner; /* ID of user that executed the xact */
|
||||
BackendId locking_backend; /* backend currently working on the xact */
|
||||
@@ -220,11 +220,11 @@ static void RemoveGXact(GlobalTransaction gxact);
|
||||
|
||||
static void XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len);
|
||||
static char *ProcessTwoPhaseBuffer(TransactionId xid,
|
||||
XLogRecPtr prepare_start_lsn,
|
||||
bool fromdisk, bool setParent, bool setNextXid);
|
||||
XLogRecPtr prepare_start_lsn,
|
||||
bool fromdisk, bool setParent, bool setNextXid);
|
||||
static void MarkAsPreparingGuts(GlobalTransaction gxact, TransactionId xid,
|
||||
const char *gid, TimestampTz prepared_at, Oid owner,
|
||||
Oid databaseid);
|
||||
const char *gid, TimestampTz prepared_at, Oid owner,
|
||||
Oid databaseid);
|
||||
static void RemoveTwoPhaseFile(TransactionId xid, bool giveWarning);
|
||||
static void RecreateTwoPhaseFile(TransactionId xid, void *content, int len);
|
||||
|
||||
@@ -1304,7 +1304,7 @@ XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory"),
|
||||
errdetail("Failed while allocating a WAL reading processor.")));
|
||||
errdetail("Failed while allocating a WAL reading processor.")));
|
||||
|
||||
record = XLogReadRecord(xlogreader, lsn, &errormsg);
|
||||
if (record == NULL)
|
||||
@@ -1318,9 +1318,9 @@ XlogReadTwoPhaseData(XLogRecPtr lsn, char **buf, int *len)
|
||||
(XLogRecGetInfo(xlogreader) & XLOG_XACT_OPMASK) != XLOG_XACT_PREPARE)
|
||||
ereport(ERROR,
|
||||
(errcode_for_file_access(),
|
||||
errmsg("expected two-phase state data is not present in WAL at %X/%X",
|
||||
(uint32) (lsn >> 32),
|
||||
(uint32) lsn)));
|
||||
errmsg("expected two-phase state data is not present in WAL at %X/%X",
|
||||
(uint32) (lsn >> 32),
|
||||
(uint32) lsn)));
|
||||
|
||||
if (len != NULL)
|
||||
*len = XLogRecGetDataLen(xlogreader);
|
||||
@@ -1675,7 +1675,10 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon)
|
||||
LWLockAcquire(TwoPhaseStateLock, LW_SHARED);
|
||||
for (i = 0; i < TwoPhaseState->numPrepXacts; i++)
|
||||
{
|
||||
/* Note that we are using gxact not pgxact so this works in recovery also */
|
||||
/*
|
||||
* Note that we are using gxact not pgxact so this works in recovery
|
||||
* also
|
||||
*/
|
||||
GlobalTransaction gxact = TwoPhaseState->prepXacts[i];
|
||||
|
||||
if ((gxact->valid || gxact->inredo) &&
|
||||
@@ -1727,8 +1730,8 @@ CheckPointTwoPhase(XLogRecPtr redo_horizon)
|
||||
void
|
||||
restoreTwoPhaseData(void)
|
||||
{
|
||||
DIR *cldir;
|
||||
struct dirent *clde;
|
||||
DIR *cldir;
|
||||
struct dirent *clde;
|
||||
|
||||
cldir = AllocateDir(TWOPHASE_DIR);
|
||||
while ((clde = ReadDir(cldir, TWOPHASE_DIR)) != NULL)
|
||||
@@ -1801,8 +1804,8 @@ PrescanPreparedTransactions(TransactionId **xids_p, int *nxids_p)
|
||||
xid = gxact->xid;
|
||||
|
||||
buf = ProcessTwoPhaseBuffer(xid,
|
||||
gxact->prepare_start_lsn,
|
||||
gxact->ondisk, false, true);
|
||||
gxact->prepare_start_lsn,
|
||||
gxact->ondisk, false, true);
|
||||
|
||||
if (buf == NULL)
|
||||
continue;
|
||||
@@ -1876,8 +1879,8 @@ StandbyRecoverPreparedTransactions(void)
|
||||
xid = gxact->xid;
|
||||
|
||||
buf = ProcessTwoPhaseBuffer(xid,
|
||||
gxact->prepare_start_lsn,
|
||||
gxact->ondisk, false, false);
|
||||
gxact->prepare_start_lsn,
|
||||
gxact->ondisk, false, false);
|
||||
if (buf != NULL)
|
||||
pfree(buf);
|
||||
}
|
||||
@@ -1920,17 +1923,17 @@ RecoverPreparedTransactions(void)
|
||||
xid = gxact->xid;
|
||||
|
||||
/*
|
||||
* Reconstruct subtrans state for the transaction --- needed
|
||||
* because pg_subtrans is not preserved over a restart. Note that
|
||||
* we are linking all the subtransactions directly to the
|
||||
* top-level XID; there may originally have been a more complex
|
||||
* hierarchy, but there's no need to restore that exactly.
|
||||
* It's possible that SubTransSetParent has been set before, if
|
||||
* the prepared transaction generated xid assignment records.
|
||||
* Reconstruct subtrans state for the transaction --- needed because
|
||||
* pg_subtrans is not preserved over a restart. Note that we are
|
||||
* linking all the subtransactions directly to the top-level XID;
|
||||
* there may originally have been a more complex hierarchy, but
|
||||
* there's no need to restore that exactly. It's possible that
|
||||
* SubTransSetParent has been set before, if the prepared transaction
|
||||
* generated xid assignment records.
|
||||
*/
|
||||
buf = ProcessTwoPhaseBuffer(xid,
|
||||
gxact->prepare_start_lsn,
|
||||
gxact->ondisk, true, false);
|
||||
gxact->prepare_start_lsn,
|
||||
gxact->ondisk, true, false);
|
||||
if (buf == NULL)
|
||||
continue;
|
||||
|
||||
@@ -1949,9 +1952,8 @@ RecoverPreparedTransactions(void)
|
||||
bufptr += MAXALIGN(hdr->ninvalmsgs * sizeof(SharedInvalidationMessage));
|
||||
|
||||
/*
|
||||
* Recreate its GXACT and dummy PGPROC. But, check whether
|
||||
* it was added in redo and already has a shmem entry for
|
||||
* it.
|
||||
* Recreate its GXACT and dummy PGPROC. But, check whether it was
|
||||
* added in redo and already has a shmem entry for it.
|
||||
*/
|
||||
LWLockAcquire(TwoPhaseStateLock, LW_EXCLUSIVE);
|
||||
MarkAsPreparingGuts(gxact, xid, gid,
|
||||
@@ -1980,9 +1982,8 @@ RecoverPreparedTransactions(void)
|
||||
StandbyReleaseLockTree(xid, hdr->nsubxacts, subxids);
|
||||
|
||||
/*
|
||||
* We're done with recovering this transaction. Clear
|
||||
* MyLockedGxact, like we do in PrepareTransaction() during normal
|
||||
* operation.
|
||||
* We're done with recovering this transaction. Clear MyLockedGxact,
|
||||
* like we do in PrepareTransaction() during normal operation.
|
||||
*/
|
||||
PostPrepare_Twophase();
|
||||
|
||||
@@ -2049,8 +2050,8 @@ ProcessTwoPhaseBuffer(TransactionId xid,
|
||||
else
|
||||
{
|
||||
ereport(WARNING,
|
||||
(errmsg("removing future two-phase state from memory for \"%u\"",
|
||||
xid)));
|
||||
(errmsg("removing future two-phase state from memory for \"%u\"",
|
||||
xid)));
|
||||
PrepareRedoRemove(xid, true);
|
||||
}
|
||||
return NULL;
|
||||
@@ -2063,8 +2064,8 @@ ProcessTwoPhaseBuffer(TransactionId xid,
|
||||
if (buf == NULL)
|
||||
{
|
||||
ereport(WARNING,
|
||||
(errmsg("removing corrupt two-phase state file for \"%u\"",
|
||||
xid)));
|
||||
(errmsg("removing corrupt two-phase state file for \"%u\"",
|
||||
xid)));
|
||||
RemoveTwoPhaseFile(xid, true);
|
||||
return NULL;
|
||||
}
|
||||
@@ -2082,15 +2083,15 @@ ProcessTwoPhaseBuffer(TransactionId xid,
|
||||
if (fromdisk)
|
||||
{
|
||||
ereport(WARNING,
|
||||
(errmsg("removing corrupt two-phase state file for \"%u\"",
|
||||
xid)));
|
||||
(errmsg("removing corrupt two-phase state file for \"%u\"",
|
||||
xid)));
|
||||
RemoveTwoPhaseFile(xid, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
ereport(WARNING,
|
||||
(errmsg("removing corrupt two-phase state from memory for \"%u\"",
|
||||
xid)));
|
||||
(errmsg("removing corrupt two-phase state from memory for \"%u\"",
|
||||
xid)));
|
||||
PrepareRedoRemove(xid, true);
|
||||
}
|
||||
pfree(buf);
|
||||
@@ -2098,8 +2099,8 @@ ProcessTwoPhaseBuffer(TransactionId xid,
|
||||
}
|
||||
|
||||
/*
|
||||
* Examine subtransaction XIDs ... they should all follow main
|
||||
* XID, and they may force us to advance nextXid.
|
||||
* Examine subtransaction XIDs ... they should all follow main XID, and
|
||||
* they may force us to advance nextXid.
|
||||
*/
|
||||
subxids = (TransactionId *) (buf +
|
||||
MAXALIGN(sizeof(TwoPhaseFileHeader)) +
|
||||
@@ -2122,7 +2123,7 @@ ProcessTwoPhaseBuffer(TransactionId xid,
|
||||
*/
|
||||
LWLockAcquire(XidGenLock, LW_EXCLUSIVE);
|
||||
if (TransactionIdFollowsOrEquals(subxid,
|
||||
ShmemVariableCache->nextXid))
|
||||
ShmemVariableCache->nextXid))
|
||||
{
|
||||
ShmemVariableCache->nextXid = subxid;
|
||||
TransactionIdAdvance(ShmemVariableCache->nextXid);
|
||||
@@ -2175,14 +2176,15 @@ RecordTransactionCommitPrepared(TransactionId xid,
|
||||
MyPgXact->delayChkpt = true;
|
||||
|
||||
/*
|
||||
* Emit the XLOG commit record. Note that we mark 2PC commits as potentially
|
||||
* having AccessExclusiveLocks since we don't know whether or not they do.
|
||||
* Emit the XLOG commit record. Note that we mark 2PC commits as
|
||||
* potentially having AccessExclusiveLocks since we don't know whether or
|
||||
* not they do.
|
||||
*/
|
||||
recptr = XactLogCommitRecord(committs,
|
||||
nchildren, children, nrels, rels,
|
||||
ninvalmsgs, invalmsgs,
|
||||
initfileinval, false,
|
||||
MyXactFlags | XACT_FLAGS_ACQUIREDACCESSEXCLUSIVELOCK,
|
||||
MyXactFlags | XACT_FLAGS_ACQUIREDACCESSEXCLUSIVELOCK,
|
||||
xid);
|
||||
|
||||
|
||||
@@ -2260,13 +2262,14 @@ RecordTransactionAbortPrepared(TransactionId xid,
|
||||
START_CRIT_SECTION();
|
||||
|
||||
/*
|
||||
* Emit the XLOG commit record. Note that we mark 2PC aborts as potentially
|
||||
* having AccessExclusiveLocks since we don't know whether or not they do.
|
||||
* Emit the XLOG commit record. Note that we mark 2PC aborts as
|
||||
* potentially having AccessExclusiveLocks since we don't know whether or
|
||||
* not they do.
|
||||
*/
|
||||
recptr = XactLogAbortRecord(GetCurrentTimestamp(),
|
||||
nchildren, children,
|
||||
nrels, rels,
|
||||
MyXactFlags | XACT_FLAGS_ACQUIREDACCESSEXCLUSIVELOCK,
|
||||
MyXactFlags | XACT_FLAGS_ACQUIREDACCESSEXCLUSIVELOCK,
|
||||
xid);
|
||||
|
||||
/* Always flush, since we're about to remove the 2PC state file */
|
||||
@@ -2301,8 +2304,8 @@ void
|
||||
PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, XLogRecPtr end_lsn)
|
||||
{
|
||||
TwoPhaseFileHeader *hdr = (TwoPhaseFileHeader *) buf;
|
||||
char *bufptr;
|
||||
const char *gid;
|
||||
char *bufptr;
|
||||
const char *gid;
|
||||
GlobalTransaction gxact;
|
||||
|
||||
Assert(RecoveryInProgress());
|
||||
@@ -2315,8 +2318,8 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, XLogRecPtr end_lsn)
|
||||
*
|
||||
* This creates a gxact struct and puts it into the active array.
|
||||
*
|
||||
* In redo, this struct is mainly used to track PREPARE/COMMIT entries
|
||||
* in shared memory. Hence, we only fill up the bare minimum contents here.
|
||||
* In redo, this struct is mainly used to track PREPARE/COMMIT entries in
|
||||
* shared memory. Hence, we only fill up the bare minimum contents here.
|
||||
* The gxact also gets marked with gxact->inredo set to true to indicate
|
||||
* that it got added in the redo phase
|
||||
*/
|
||||
@@ -2340,7 +2343,7 @@ PrepareRedoAdd(char *buf, XLogRecPtr start_lsn, XLogRecPtr end_lsn)
|
||||
gxact->locking_backend = InvalidBackendId;
|
||||
gxact->valid = false;
|
||||
gxact->ondisk = XLogRecPtrIsInvalid(start_lsn);
|
||||
gxact->inredo = true; /* yes, added in redo */
|
||||
gxact->inredo = true; /* yes, added in redo */
|
||||
strcpy(gxact->gid, gid);
|
||||
|
||||
/* And insert it into the active array */
|
||||
|
@@ -272,7 +272,7 @@ AdvanceOldestClogXid(TransactionId oldest_datfrozenxid)
|
||||
{
|
||||
LWLockAcquire(CLogTruncationLock, LW_EXCLUSIVE);
|
||||
if (TransactionIdPrecedes(ShmemVariableCache->oldestClogXid,
|
||||
oldest_datfrozenxid))
|
||||
oldest_datfrozenxid))
|
||||
{
|
||||
ShmemVariableCache->oldestClogXid = oldest_datfrozenxid;
|
||||
}
|
||||
|
@@ -115,7 +115,7 @@ TransactionId *ParallelCurrentXids;
|
||||
* globally accessible, so can be set from anywhere in the code that requires
|
||||
* recording flags.
|
||||
*/
|
||||
int MyXactFlags;
|
||||
int MyXactFlags;
|
||||
|
||||
/*
|
||||
* transaction states - transaction state from server perspective
|
||||
@@ -2641,7 +2641,8 @@ CleanupTransaction(void)
|
||||
* do abort cleanup processing
|
||||
*/
|
||||
AtCleanup_Portals(); /* now safe to release portal memory */
|
||||
AtEOXact_Snapshot(false, true); /* and release the transaction's snapshots */
|
||||
AtEOXact_Snapshot(false, true); /* and release the transaction's
|
||||
* snapshots */
|
||||
|
||||
CurrentResourceOwner = NULL; /* and resource owner */
|
||||
if (TopTransactionResourceOwner)
|
||||
@@ -5646,8 +5647,8 @@ xact_redo(XLogReaderState *record)
|
||||
else if (info == XLOG_XACT_PREPARE)
|
||||
{
|
||||
/*
|
||||
* Store xid and start/end pointers of the WAL record in
|
||||
* TwoPhaseState gxact entry.
|
||||
* Store xid and start/end pointers of the WAL record in TwoPhaseState
|
||||
* gxact entry.
|
||||
*/
|
||||
PrepareRedoAdd(XLogRecGetData(record),
|
||||
record->ReadRecPtr,
|
||||
|
@@ -550,13 +550,12 @@ typedef struct XLogCtlInsert
|
||||
bool fullPageWrites;
|
||||
|
||||
/*
|
||||
* exclusiveBackupState indicates the state of an exclusive backup
|
||||
* (see comments of ExclusiveBackupState for more details).
|
||||
* nonExclusiveBackups is a counter indicating the number of streaming
|
||||
* base backups currently in progress. forcePageWrites is set to true
|
||||
* when either of these is non-zero. lastBackupStart is the latest
|
||||
* checkpoint redo location used as a starting point for an online
|
||||
* backup.
|
||||
* exclusiveBackupState indicates the state of an exclusive backup (see
|
||||
* comments of ExclusiveBackupState for more details). nonExclusiveBackups
|
||||
* is a counter indicating the number of streaming base backups currently
|
||||
* in progress. forcePageWrites is set to true when either of these is
|
||||
* non-zero. lastBackupStart is the latest checkpoint redo location used
|
||||
* as a starting point for an online backup.
|
||||
*/
|
||||
ExclusiveBackupState exclusiveBackupState;
|
||||
int nonExclusiveBackups;
|
||||
@@ -1082,7 +1081,7 @@ XLogInsertRecord(XLogRecData *rdata,
|
||||
*/
|
||||
if ((flags & XLOG_MARK_UNIMPORTANT) == 0)
|
||||
{
|
||||
int lockno = holdingAllLocks ? 0 : MyLockNo;
|
||||
int lockno = holdingAllLocks ? 0 : MyLockNo;
|
||||
|
||||
WALInsertLocks[lockno].l.lastImportantAt = StartPos;
|
||||
}
|
||||
@@ -1405,7 +1404,8 @@ checkXLogConsistency(XLogReaderState *record)
|
||||
|
||||
/*
|
||||
* If the block LSN is already ahead of this WAL record, we can't
|
||||
* expect contents to match. This can happen if recovery is restarted.
|
||||
* expect contents to match. This can happen if recovery is
|
||||
* restarted.
|
||||
*/
|
||||
if (PageGetLSN(replay_image_masked) > record->EndRecPtr)
|
||||
continue;
|
||||
@@ -4975,15 +4975,15 @@ BootStrapXLOG(void)
|
||||
sysidentifier |= getpid() & 0xFFF;
|
||||
|
||||
/*
|
||||
* Generate a random nonce. This is used for authentication requests
|
||||
* that will fail because the user does not exist. The nonce is used to
|
||||
* create a genuine-looking password challenge for the non-existent user,
|
||||
* in lieu of an actual stored password.
|
||||
* Generate a random nonce. This is used for authentication requests that
|
||||
* will fail because the user does not exist. The nonce is used to create
|
||||
* a genuine-looking password challenge for the non-existent user, in lieu
|
||||
* of an actual stored password.
|
||||
*/
|
||||
if (!pg_backend_random(mock_auth_nonce, MOCK_AUTH_NONCE_LEN))
|
||||
ereport(PANIC,
|
||||
(errcode(ERRCODE_INTERNAL_ERROR),
|
||||
errmsg("could not generate secret authorization token")));
|
||||
(errcode(ERRCODE_INTERNAL_ERROR),
|
||||
errmsg("could not generate secret authorization token")));
|
||||
|
||||
/* First timeline ID is always 1 */
|
||||
ThisTimeLineID = 1;
|
||||
@@ -5298,7 +5298,7 @@ readRecoveryCommandFile(void)
|
||||
DatumGetLSN(DirectFunctionCall3(pg_lsn_in,
|
||||
CStringGetDatum(item->value),
|
||||
ObjectIdGetDatum(InvalidOid),
|
||||
Int32GetDatum(-1)));
|
||||
Int32GetDatum(-1)));
|
||||
ereport(DEBUG2,
|
||||
(errmsg_internal("recovery_target_lsn = '%X/%X'",
|
||||
(uint32) (recoveryTargetLSN >> 32),
|
||||
@@ -5643,9 +5643,9 @@ recoveryStopsBefore(XLogReaderState *record)
|
||||
recoveryStopTime = 0;
|
||||
recoveryStopName[0] = '\0';
|
||||
ereport(LOG,
|
||||
(errmsg("recovery stopping before WAL location (LSN) \"%X/%X\"",
|
||||
(uint32) (recoveryStopLSN >> 32),
|
||||
(uint32) recoveryStopLSN)));
|
||||
(errmsg("recovery stopping before WAL location (LSN) \"%X/%X\"",
|
||||
(uint32) (recoveryStopLSN >> 32),
|
||||
(uint32) recoveryStopLSN)));
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -5800,9 +5800,9 @@ recoveryStopsAfter(XLogReaderState *record)
|
||||
recoveryStopTime = 0;
|
||||
recoveryStopName[0] = '\0';
|
||||
ereport(LOG,
|
||||
(errmsg("recovery stopping after WAL location (LSN) \"%X/%X\"",
|
||||
(uint32) (recoveryStopLSN >> 32),
|
||||
(uint32) recoveryStopLSN)));
|
||||
(errmsg("recovery stopping after WAL location (LSN) \"%X/%X\"",
|
||||
(uint32) (recoveryStopLSN >> 32),
|
||||
(uint32) recoveryStopLSN)));
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -6348,12 +6348,12 @@ StartupXLOG(void)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OUT_OF_MEMORY),
|
||||
errmsg("out of memory"),
|
||||
errdetail("Failed while allocating a WAL reading processor.")));
|
||||
errdetail("Failed while allocating a WAL reading processor.")));
|
||||
xlogreader->system_identifier = ControlFile->system_identifier;
|
||||
|
||||
/*
|
||||
* Allocate pages dedicated to WAL consistency checks, those had better
|
||||
* be aligned.
|
||||
* Allocate pages dedicated to WAL consistency checks, those had better be
|
||||
* aligned.
|
||||
*/
|
||||
replay_image_masked = (char *) palloc(BLCKSZ);
|
||||
master_image_masked = (char *) palloc(BLCKSZ);
|
||||
@@ -6687,21 +6687,21 @@ StartupXLOG(void)
|
||||
|
||||
/*
|
||||
* Copy any missing timeline history files between 'now' and the recovery
|
||||
* target timeline from archive to pg_wal. While we don't need those
|
||||
* files ourselves - the history file of the recovery target timeline
|
||||
* covers all the previous timelines in the history too - a cascading
|
||||
* standby server might be interested in them. Or, if you archive the WAL
|
||||
* from this server to a different archive than the master, it'd be good
|
||||
* for all the history files to get archived there after failover, so that
|
||||
* you can use one of the old timelines as a PITR target. Timeline history
|
||||
* files are small, so it's better to copy them unnecessarily than not
|
||||
* copy them and regret later.
|
||||
* target timeline from archive to pg_wal. While we don't need those files
|
||||
* ourselves - the history file of the recovery target timeline covers all
|
||||
* the previous timelines in the history too - a cascading standby server
|
||||
* might be interested in them. Or, if you archive the WAL from this
|
||||
* server to a different archive than the master, it'd be good for all the
|
||||
* history files to get archived there after failover, so that you can use
|
||||
* one of the old timelines as a PITR target. Timeline history files are
|
||||
* small, so it's better to copy them unnecessarily than not copy them and
|
||||
* regret later.
|
||||
*/
|
||||
restoreTimeLineHistoryFiles(ThisTimeLineID, recoveryTargetTLI);
|
||||
|
||||
/*
|
||||
* Before running in recovery, scan pg_twophase and fill in its status
|
||||
* to be able to work on entries generated by redo. Doing a scan before
|
||||
* Before running in recovery, scan pg_twophase and fill in its status to
|
||||
* be able to work on entries generated by redo. Doing a scan before
|
||||
* taking any recovery action has the merit to discard any 2PC files that
|
||||
* are newer than the first record to replay, saving from any conflicts at
|
||||
* replay. This avoids as well any subsequent scans when doing recovery
|
||||
@@ -7426,7 +7426,7 @@ StartupXLOG(void)
|
||||
snprintf(reason, sizeof(reason),
|
||||
"%s LSN %X/%X\n",
|
||||
recoveryStopAfter ? "after" : "before",
|
||||
(uint32 ) (recoveryStopLSN >> 32),
|
||||
(uint32) (recoveryStopLSN >> 32),
|
||||
(uint32) recoveryStopLSN);
|
||||
else if (recoveryTarget == RECOVERY_TARGET_NAME)
|
||||
snprintf(reason, sizeof(reason),
|
||||
@@ -9645,6 +9645,7 @@ xlog_redo(XLogReaderState *record)
|
||||
|
||||
MultiXactAdvanceOldest(checkPoint.oldestMulti,
|
||||
checkPoint.oldestMultiDB);
|
||||
|
||||
/*
|
||||
* No need to set oldestClogXid here as well; it'll be set when we
|
||||
* redo an xl_clog_truncate if it changed since initialization.
|
||||
@@ -10238,8 +10239,8 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
|
||||
if (exclusive)
|
||||
{
|
||||
/*
|
||||
* At first, mark that we're now starting an exclusive backup,
|
||||
* to ensure that there are no other sessions currently running
|
||||
* At first, mark that we're now starting an exclusive backup, to
|
||||
* ensure that there are no other sessions currently running
|
||||
* pg_start_backup() or pg_stop_backup().
|
||||
*/
|
||||
if (XLogCtl->Insert.exclusiveBackupState != EXCLUSIVE_BACKUP_NONE)
|
||||
@@ -10505,8 +10506,9 @@ do_pg_start_backup(const char *backupidstr, bool fast, TimeLineID *starttli_p,
|
||||
{
|
||||
/*
|
||||
* Check for existing backup label --- implies a backup is already
|
||||
* running. (XXX given that we checked exclusiveBackupState above,
|
||||
* maybe it would be OK to just unlink any such label file?)
|
||||
* running. (XXX given that we checked exclusiveBackupState
|
||||
* above, maybe it would be OK to just unlink any such label
|
||||
* file?)
|
||||
*/
|
||||
if (stat(BACKUP_LABEL_FILE, &stat_buf) != 0)
|
||||
{
|
||||
@@ -10727,8 +10729,8 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
|
||||
if (exclusive)
|
||||
{
|
||||
/*
|
||||
* At first, mark that we're now stopping an exclusive backup,
|
||||
* to ensure that there are no other sessions currently running
|
||||
* At first, mark that we're now stopping an exclusive backup, to
|
||||
* ensure that there are no other sessions currently running
|
||||
* pg_start_backup() or pg_stop_backup().
|
||||
*/
|
||||
WALInsertLockAcquireExclusive();
|
||||
@@ -10790,8 +10792,8 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
|
||||
durable_unlink(BACKUP_LABEL_FILE, ERROR);
|
||||
|
||||
/*
|
||||
* Remove tablespace_map file if present, it is created only if there
|
||||
* are tablespaces.
|
||||
* Remove tablespace_map file if present, it is created only if
|
||||
* there are tablespaces.
|
||||
*/
|
||||
durable_unlink(TABLESPACE_MAP, DEBUG1);
|
||||
}
|
||||
@@ -10978,9 +10980,9 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
|
||||
* archived before returning. If archiving isn't enabled, the required WAL
|
||||
* needs to be transported via streaming replication (hopefully with
|
||||
* wal_keep_segments set high enough), or some more exotic mechanism like
|
||||
* polling and copying files from pg_wal with script. We have no
|
||||
* knowledge of those mechanisms, so it's up to the user to ensure that he
|
||||
* gets all the required WAL.
|
||||
* polling and copying files from pg_wal with script. We have no knowledge
|
||||
* of those mechanisms, so it's up to the user to ensure that he gets all
|
||||
* the required WAL.
|
||||
*
|
||||
* We wait until both the last WAL file filled during backup and the
|
||||
* history file have been archived, and assume that the alphabetic sorting
|
||||
@@ -10990,8 +10992,8 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
|
||||
* We wait forever, since archive_command is supposed to work and we
|
||||
* assume the admin wanted his backup to work completely. If you don't
|
||||
* wish to wait, then either waitforarchive should be passed in as false,
|
||||
* or you can set statement_timeout. Also, some notices are
|
||||
* issued to clue in anyone who might be doing this interactively.
|
||||
* or you can set statement_timeout. Also, some notices are issued to
|
||||
* clue in anyone who might be doing this interactively.
|
||||
*/
|
||||
if (waitforarchive && XLogArchivingActive())
|
||||
{
|
||||
@@ -11717,8 +11719,8 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
|
||||
* little chance that the problem will just go away, but
|
||||
* PANIC is not good for availability either, especially
|
||||
* in hot standby mode. So, we treat that the same as
|
||||
* disconnection, and retry from archive/pg_wal again.
|
||||
* The WAL in the archive should be identical to what was
|
||||
* disconnection, and retry from archive/pg_wal again. The
|
||||
* WAL in the archive should be identical to what was
|
||||
* streamed, so it's unlikely that it helps, but one can
|
||||
* hope...
|
||||
*/
|
||||
@@ -11881,9 +11883,9 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
|
||||
* not open already. Also read the timeline history
|
||||
* file if we haven't initialized timeline history
|
||||
* yet; it should be streamed over and present in
|
||||
* pg_wal by now. Use XLOG_FROM_STREAM so that
|
||||
* source info is set correctly and XLogReceiptTime
|
||||
* isn't changed.
|
||||
* pg_wal by now. Use XLOG_FROM_STREAM so that source
|
||||
* info is set correctly and XLogReceiptTime isn't
|
||||
* changed.
|
||||
*/
|
||||
if (readFile < 0)
|
||||
{
|
||||
|
@@ -156,7 +156,8 @@ pg_stop_backup(PG_FUNCTION_ARGS)
|
||||
* Exclusive backups were typically started in a different connection, so
|
||||
* don't try to verify that status of backup is set to
|
||||
* SESSION_BACKUP_EXCLUSIVE in this function. Actual verification that an
|
||||
* exclusive backup is in fact running is handled inside do_pg_stop_backup.
|
||||
* exclusive backup is in fact running is handled inside
|
||||
* do_pg_stop_backup.
|
||||
*/
|
||||
stoppoint = do_pg_stop_backup(NULL, true, NULL);
|
||||
|
||||
@@ -527,7 +528,7 @@ pg_walfile_name(PG_FUNCTION_ARGS)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
|
||||
errmsg("recovery is in progress"),
|
||||
errhint("pg_walfile_name() cannot be executed during recovery.")));
|
||||
errhint("pg_walfile_name() cannot be executed during recovery.")));
|
||||
|
||||
XLByteToPrevSeg(locationpoint, xlogsegno);
|
||||
XLogFileName(xlogfilename, ThisTimeLineID, xlogsegno);
|
||||
|
@@ -388,10 +388,10 @@ XLogRegisterBufData(uint8 block_id, char *data, int len)
|
||||
*
|
||||
* The flags that can be used here are:
|
||||
* - XLOG_INCLUDE_ORIGIN, to determine if the replication origin should be
|
||||
* included in the record.
|
||||
* included in the record.
|
||||
* - XLOG_MARK_UNIMPORTANT, to signal that the record is not important for
|
||||
* durability, which allows to avoid triggering WAL archiving and other
|
||||
* background activity.
|
||||
* durability, which allows to avoid triggering WAL archiving and other
|
||||
* background activity.
|
||||
*/
|
||||
void
|
||||
XLogSetRecordFlags(uint8 flags)
|
||||
@@ -507,10 +507,10 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
|
||||
hdr_rdt.data = hdr_scratch;
|
||||
|
||||
/*
|
||||
* Enforce consistency checks for this record if user is looking for
|
||||
* it. Do this before at the beginning of this routine to give the
|
||||
* possibility for callers of XLogInsert() to pass XLR_CHECK_CONSISTENCY
|
||||
* directly for a record.
|
||||
* Enforce consistency checks for this record if user is looking for it.
|
||||
* Do this before at the beginning of this routine to give the possibility
|
||||
* for callers of XLogInsert() to pass XLR_CHECK_CONSISTENCY directly for
|
||||
* a record.
|
||||
*/
|
||||
if (wal_consistency_checking[rmid])
|
||||
info |= XLR_CHECK_CONSISTENCY;
|
||||
@@ -576,9 +576,8 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
|
||||
bkpb.fork_flags |= BKPBLOCK_WILL_INIT;
|
||||
|
||||
/*
|
||||
* If needs_backup is true or WAL checking is enabled for
|
||||
* current resource manager, log a full-page write for the current
|
||||
* block.
|
||||
* If needs_backup is true or WAL checking is enabled for current
|
||||
* resource manager, log a full-page write for the current block.
|
||||
*/
|
||||
include_image = needs_backup || (info & XLR_CHECK_CONSISTENCY) != 0;
|
||||
|
||||
@@ -645,8 +644,8 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
|
||||
bimg.bimg_info = (cbimg.hole_length == 0) ? 0 : BKPIMAGE_HAS_HOLE;
|
||||
|
||||
/*
|
||||
* If WAL consistency checking is enabled for the resource manager of
|
||||
* this WAL record, a full-page image is included in the record
|
||||
* If WAL consistency checking is enabled for the resource manager
|
||||
* of this WAL record, a full-page image is included in the record
|
||||
* for the block modified. During redo, the full-page is replayed
|
||||
* only if BKPIMAGE_APPLY is set.
|
||||
*/
|
||||
|
@@ -892,8 +892,8 @@ XLogFindNextRecord(XLogReaderState *state, XLogRecPtr RecPtr)
|
||||
* that, except when caller has explicitly specified the offset that
|
||||
* falls somewhere there or when we are skipping multi-page
|
||||
* continuation record. It doesn't matter though because
|
||||
* ReadPageInternal() is prepared to handle that and will read at least
|
||||
* short page-header worth of data
|
||||
* ReadPageInternal() is prepared to handle that and will read at
|
||||
* least short page-header worth of data
|
||||
*/
|
||||
targetRecOff = tmpRecPtr % XLOG_BLCKSZ;
|
||||
|
||||
|
@@ -805,22 +805,23 @@ XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wa
|
||||
Assert(state->readLen == 0 || state->readLen <= XLOG_BLCKSZ);
|
||||
|
||||
/*
|
||||
* If the desired page is currently read in and valid, we have nothing to do.
|
||||
* If the desired page is currently read in and valid, we have nothing to
|
||||
* do.
|
||||
*
|
||||
* The caller should've ensured that it didn't previously advance readOff
|
||||
* past the valid limit of this timeline, so it doesn't matter if the current
|
||||
* TLI has since become historical.
|
||||
* past the valid limit of this timeline, so it doesn't matter if the
|
||||
* current TLI has since become historical.
|
||||
*/
|
||||
if (lastReadPage == wantPage &&
|
||||
state->readLen != 0 &&
|
||||
lastReadPage + state->readLen >= wantPage + Min(wantLength,XLOG_BLCKSZ-1))
|
||||
lastReadPage + state->readLen >= wantPage + Min(wantLength, XLOG_BLCKSZ - 1))
|
||||
return;
|
||||
|
||||
/*
|
||||
* If we're reading from the current timeline, it hasn't become historical
|
||||
* and the page we're reading is after the last page read, we can again
|
||||
* just carry on. (Seeking backwards requires a check to make sure the older
|
||||
* page isn't on a prior timeline).
|
||||
* just carry on. (Seeking backwards requires a check to make sure the
|
||||
* older page isn't on a prior timeline).
|
||||
*
|
||||
* ThisTimeLineID might've become historical since we last looked, but the
|
||||
* caller is required not to read past the flush limit it saw at the time
|
||||
@@ -835,8 +836,8 @@ XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wa
|
||||
|
||||
/*
|
||||
* If we're just reading pages from a previously validated historical
|
||||
* timeline and the timeline we're reading from is valid until the
|
||||
* end of the current segment we can just keep reading.
|
||||
* timeline and the timeline we're reading from is valid until the end of
|
||||
* the current segment we can just keep reading.
|
||||
*/
|
||||
if (state->currTLIValidUntil != InvalidXLogRecPtr &&
|
||||
state->currTLI != ThisTimeLineID &&
|
||||
@@ -845,10 +846,10 @@ XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wa
|
||||
return;
|
||||
|
||||
/*
|
||||
* If we reach this point we're either looking up a page for random access,
|
||||
* the current timeline just became historical, or we're reading from a new
|
||||
* segment containing a timeline switch. In all cases we need to determine
|
||||
* the newest timeline on the segment.
|
||||
* If we reach this point we're either looking up a page for random
|
||||
* access, the current timeline just became historical, or we're reading
|
||||
* from a new segment containing a timeline switch. In all cases we need
|
||||
* to determine the newest timeline on the segment.
|
||||
*
|
||||
* If it's the current timeline we can just keep reading from here unless
|
||||
* we detect a timeline switch that makes the current timeline historical.
|
||||
@@ -861,26 +862,29 @@ XLogReadDetermineTimeline(XLogReaderState *state, XLogRecPtr wantPage, uint32 wa
|
||||
* We need to re-read the timeline history in case it's been changed
|
||||
* by a promotion or replay from a cascaded replica.
|
||||
*/
|
||||
List *timelineHistory = readTimeLineHistory(ThisTimeLineID);
|
||||
List *timelineHistory = readTimeLineHistory(ThisTimeLineID);
|
||||
|
||||
XLogRecPtr endOfSegment = (((wantPage / XLogSegSize) + 1) * XLogSegSize) - 1;
|
||||
XLogRecPtr endOfSegment = (((wantPage / XLogSegSize) + 1) * XLogSegSize) - 1;
|
||||
|
||||
Assert(wantPage / XLogSegSize == endOfSegment / XLogSegSize);
|
||||
|
||||
/* Find the timeline of the last LSN on the segment containing wantPage. */
|
||||
/*
|
||||
* Find the timeline of the last LSN on the segment containing
|
||||
* wantPage.
|
||||
*/
|
||||
state->currTLI = tliOfPointInHistory(endOfSegment, timelineHistory);
|
||||
state->currTLIValidUntil = tliSwitchPoint(state->currTLI, timelineHistory,
|
||||
&state->nextTLI);
|
||||
&state->nextTLI);
|
||||
|
||||
Assert(state->currTLIValidUntil == InvalidXLogRecPtr ||
|
||||
wantPage + wantLength < state->currTLIValidUntil);
|
||||
wantPage + wantLength < state->currTLIValidUntil);
|
||||
|
||||
list_free_deep(timelineHistory);
|
||||
|
||||
elog(DEBUG3, "switched to timeline %u valid until %X/%X",
|
||||
state->currTLI,
|
||||
(uint32)(state->currTLIValidUntil >> 32),
|
||||
(uint32)(state->currTLIValidUntil));
|
||||
state->currTLI,
|
||||
(uint32) (state->currTLIValidUntil >> 32),
|
||||
(uint32) (state->currTLIValidUntil));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -929,21 +933,22 @@ read_local_xlog_page(XLogReaderState *state, XLogRecPtr targetPagePtr,
|
||||
*
|
||||
* We have to do it each time through the loop because if we're in
|
||||
* recovery as a cascading standby, the current timeline might've
|
||||
* become historical. We can't rely on RecoveryInProgress() because
|
||||
* in a standby configuration like
|
||||
* become historical. We can't rely on RecoveryInProgress() because in
|
||||
* a standby configuration like
|
||||
*
|
||||
* A => B => C
|
||||
* A => B => C
|
||||
*
|
||||
* if we're a logical decoding session on C, and B gets promoted, our
|
||||
* timeline will change while we remain in recovery.
|
||||
*
|
||||
* We can't just keep reading from the old timeline as the last WAL
|
||||
* archive in the timeline will get renamed to .partial by StartupXLOG().
|
||||
* archive in the timeline will get renamed to .partial by
|
||||
* StartupXLOG().
|
||||
*
|
||||
* If that happens after our caller updated ThisTimeLineID but before
|
||||
* we actually read the xlog page, we might still try to read from the
|
||||
* old (now renamed) segment and fail. There's not much we can do about
|
||||
* this, but it can only happen when we're a leaf of a cascading
|
||||
* old (now renamed) segment and fail. There's not much we can do
|
||||
* about this, but it can only happen when we're a leaf of a cascading
|
||||
* standby whose master gets promoted while we're decoding, so a
|
||||
* one-off ERROR isn't too bad.
|
||||
*/
|
||||
|
Reference in New Issue
Block a user