mirror of
https://github.com/postgres/postgres.git
synced 2025-11-13 16:22:44 +03:00
Revamp the WAL record format.
Each WAL record now carries information about the modified relation and block(s) in a standardized format. That makes it easier to write tools that need that information, like pg_rewind, prefetching the blocks to speed up recovery, etc. There's a whole new API for building WAL records, replacing the XLogRecData chains used previously. The new API consists of XLogRegister* functions, which are called for each buffer and chunk of data that is added to the record. The new API also gives more control over when a full-page image is written, by passing flags to the XLogRegisterBuffer function. This also simplifies the XLogReadBufferForRedo() calls. The function can dig the relation and block number from the WAL record, so they no longer need to be passed as arguments. For the convenience of redo routines, XLogReader now disects each WAL record after reading it, copying the main data part and the per-block data into MAXALIGNed buffers. The data chunks are not aligned within the WAL record, but the redo routines can assume that the pointers returned by XLogRecGet* functions are. Redo routines are now passed the XLogReaderState, which contains the record in the already-disected format, instead of the plain XLogRecord. The new record format also makes the fixed size XLogRecord header smaller, by removing the xl_len field. The length of the "main data" portion is now stored at the end of the WAL record, and there's a separate header after XLogRecord for it. The alignment padding at the end of XLogRecord is also removed. This compansates for the fact that the new format would otherwise be more bulky than the old format. Reviewed by Andres Freund, Amit Kapila, Michael Paquier, Alvaro Herrera, Fujii Masao.
This commit is contained in:
@@ -666,19 +666,16 @@ brinbuild(PG_FUNCTION_ARGS)
|
||||
{
|
||||
xl_brin_createidx xlrec;
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata;
|
||||
Page page;
|
||||
|
||||
xlrec.node = index->rd_node;
|
||||
xlrec.version = BRIN_CURRENT_VERSION;
|
||||
xlrec.pagesPerRange = BrinGetPagesPerRange(index);
|
||||
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.data = (char *) &xlrec;
|
||||
rdata.len = SizeOfBrinCreateIdx;
|
||||
rdata.next = NULL;
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &xlrec, SizeOfBrinCreateIdx);
|
||||
XLogRegisterBuffer(0, meta, REGBUF_WILL_INIT);
|
||||
|
||||
recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX, &rdata);
|
||||
recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_CREATE_INDEX);
|
||||
|
||||
page = BufferGetPage(meta);
|
||||
PageSetLSN(page, recptr);
|
||||
|
||||
@@ -140,27 +140,19 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
|
||||
/* XLOG stuff */
|
||||
if (RelationNeedsWAL(idxrel))
|
||||
{
|
||||
BlockNumber blk = BufferGetBlockNumber(oldbuf);
|
||||
xl_brin_samepage_update xlrec;
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[2];
|
||||
uint8 info = XLOG_BRIN_SAMEPAGE_UPDATE;
|
||||
|
||||
xlrec.node = idxrel->rd_node;
|
||||
ItemPointerSetBlockNumber(&xlrec.tid, blk);
|
||||
ItemPointerSetOffsetNumber(&xlrec.tid, oldoff);
|
||||
rdata[0].data = (char *) &xlrec;
|
||||
rdata[0].len = SizeOfBrinSamepageUpdate;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].next = &(rdata[1]);
|
||||
xlrec.offnum = oldoff;
|
||||
|
||||
rdata[1].data = (char *) newtup;
|
||||
rdata[1].len = newsz;
|
||||
rdata[1].buffer = oldbuf;
|
||||
rdata[1].buffer_std = true;
|
||||
rdata[1].next = NULL;
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &xlrec, SizeOfBrinSamepageUpdate);
|
||||
|
||||
recptr = XLogInsert(RM_BRIN_ID, info, rdata);
|
||||
XLogRegisterBuffer(0, oldbuf, REGBUF_STANDARD);
|
||||
XLogRegisterBufData(0, (char *) newtup, newsz);
|
||||
|
||||
recptr = XLogInsert(RM_BRIN_ID, info);
|
||||
|
||||
PageSetLSN(oldpage, recptr);
|
||||
}
|
||||
@@ -211,43 +203,30 @@ brin_doupdate(Relation idxrel, BlockNumber pagesPerRange,
|
||||
{
|
||||
xl_brin_update xlrec;
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[4];
|
||||
uint8 info;
|
||||
|
||||
info = XLOG_BRIN_UPDATE | (extended ? XLOG_BRIN_INIT_PAGE : 0);
|
||||
|
||||
xlrec.insert.node = idxrel->rd_node;
|
||||
ItemPointerSet(&xlrec.insert.tid, BufferGetBlockNumber(newbuf), newoff);
|
||||
xlrec.insert.offnum = newoff;
|
||||
xlrec.insert.heapBlk = heapBlk;
|
||||
xlrec.insert.tuplen = newsz;
|
||||
xlrec.insert.revmapBlk = BufferGetBlockNumber(revmapbuf);
|
||||
xlrec.insert.pagesPerRange = pagesPerRange;
|
||||
ItemPointerSet(&xlrec.oldtid, BufferGetBlockNumber(oldbuf), oldoff);
|
||||
xlrec.oldOffnum = oldoff;
|
||||
|
||||
rdata[0].data = (char *) &xlrec;
|
||||
rdata[0].len = SizeOfBrinUpdate;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].next = &(rdata[1]);
|
||||
XLogBeginInsert();
|
||||
|
||||
rdata[1].data = (char *) newtup;
|
||||
rdata[1].len = newsz;
|
||||
rdata[1].buffer = extended ? InvalidBuffer : newbuf;
|
||||
rdata[1].buffer_std = true;
|
||||
rdata[1].next = &(rdata[2]);
|
||||
/* new page */
|
||||
XLogRegisterData((char *) &xlrec, SizeOfBrinUpdate);
|
||||
|
||||
rdata[2].data = (char *) NULL;
|
||||
rdata[2].len = 0;
|
||||
rdata[2].buffer = revmapbuf;
|
||||
rdata[2].buffer_std = true;
|
||||
rdata[2].next = &(rdata[3]);
|
||||
XLogRegisterBuffer(0, newbuf, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
|
||||
XLogRegisterBufData(0, (char *) newtup, newsz);
|
||||
|
||||
rdata[3].data = (char *) NULL;
|
||||
rdata[3].len = 0;
|
||||
rdata[3].buffer = oldbuf;
|
||||
rdata[3].buffer_std = true;
|
||||
rdata[3].next = NULL;
|
||||
/* revmap page */
|
||||
XLogRegisterBuffer(1, revmapbuf, REGBUF_STANDARD);
|
||||
|
||||
recptr = XLogInsert(RM_BRIN_ID, info, rdata);
|
||||
/* old page */
|
||||
XLogRegisterBuffer(2, oldbuf, REGBUF_STANDARD);
|
||||
|
||||
recptr = XLogInsert(RM_BRIN_ID, info);
|
||||
|
||||
PageSetLSN(oldpage, recptr);
|
||||
PageSetLSN(newpage, recptr);
|
||||
@@ -354,36 +333,22 @@ brin_doinsert(Relation idxrel, BlockNumber pagesPerRange,
|
||||
{
|
||||
xl_brin_insert xlrec;
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[3];
|
||||
uint8 info;
|
||||
|
||||
info = XLOG_BRIN_INSERT | (extended ? XLOG_BRIN_INIT_PAGE : 0);
|
||||
xlrec.node = idxrel->rd_node;
|
||||
xlrec.heapBlk = heapBlk;
|
||||
xlrec.pagesPerRange = pagesPerRange;
|
||||
xlrec.revmapBlk = BufferGetBlockNumber(revmapbuf);
|
||||
xlrec.tuplen = itemsz;
|
||||
ItemPointerSet(&xlrec.tid, blk, off);
|
||||
xlrec.offnum = off;
|
||||
|
||||
rdata[0].data = (char *) &xlrec;
|
||||
rdata[0].len = SizeOfBrinInsert;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].buffer_std = false;
|
||||
rdata[0].next = &(rdata[1]);
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &xlrec, SizeOfBrinInsert);
|
||||
|
||||
rdata[1].data = (char *) tup;
|
||||
rdata[1].len = itemsz;
|
||||
rdata[1].buffer = extended ? InvalidBuffer : *buffer;
|
||||
rdata[1].buffer_std = true;
|
||||
rdata[1].next = &(rdata[2]);
|
||||
XLogRegisterBuffer(0, *buffer, REGBUF_STANDARD | (extended ? REGBUF_WILL_INIT : 0));
|
||||
XLogRegisterBufData(0, (char *) tup, itemsz);
|
||||
|
||||
rdata[2].data = (char *) NULL;
|
||||
rdata[2].len = 0;
|
||||
rdata[2].buffer = revmapbuf;
|
||||
rdata[2].buffer_std = false;
|
||||
rdata[2].next = NULL;
|
||||
XLogRegisterBuffer(1, revmapbuf, 0);
|
||||
|
||||
recptr = XLogInsert(RM_BRIN_ID, info, rdata);
|
||||
recptr = XLogInsert(RM_BRIN_ID, info);
|
||||
|
||||
PageSetLSN(page, recptr);
|
||||
PageSetLSN(BufferGetPage(revmapbuf), recptr);
|
||||
|
||||
@@ -477,23 +477,16 @@ revmap_physical_extend(BrinRevmap *revmap)
|
||||
{
|
||||
xl_brin_revmap_extend xlrec;
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[2];
|
||||
|
||||
xlrec.node = revmap->rm_irel->rd_node;
|
||||
xlrec.targetBlk = mapBlk;
|
||||
rdata[0].data = (char *) &xlrec;
|
||||
rdata[0].len = SizeOfBrinRevmapExtend;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].buffer_std = false;
|
||||
rdata[0].next = &(rdata[1]);
|
||||
|
||||
rdata[1].data = (char *) NULL;
|
||||
rdata[1].len = 0;
|
||||
rdata[1].buffer = revmap->rm_metaBuf;
|
||||
rdata[1].buffer_std = false;
|
||||
rdata[1].next = NULL;
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &xlrec, SizeOfBrinRevmapExtend);
|
||||
XLogRegisterBuffer(0, revmap->rm_metaBuf, 0);
|
||||
|
||||
recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND, rdata);
|
||||
XLogRegisterBuffer(1, buf, REGBUF_WILL_INIT);
|
||||
|
||||
recptr = XLogInsert(RM_BRIN_ID, XLOG_BRIN_REVMAP_EXTEND);
|
||||
PageSetLSN(metapage, recptr);
|
||||
PageSetLSN(page, recptr);
|
||||
}
|
||||
|
||||
@@ -20,17 +20,15 @@
|
||||
* xlog replay routines
|
||||
*/
|
||||
static void
|
||||
brin_xlog_createidx(XLogRecPtr lsn, XLogRecord *record)
|
||||
brin_xlog_createidx(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
xl_brin_createidx *xlrec = (xl_brin_createidx *) XLogRecGetData(record);
|
||||
Buffer buf;
|
||||
Page page;
|
||||
|
||||
/* Backup blocks are not used in create_index records */
|
||||
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
|
||||
|
||||
/* create the index' metapage */
|
||||
buf = XLogReadBuffer(xlrec->node, BRIN_METAPAGE_BLKNO, true);
|
||||
buf = XLogInitBufferForRedo(record, 0);
|
||||
Assert(BufferIsValid(buf));
|
||||
page = (Page) BufferGetPage(buf);
|
||||
brin_metapage_init(page, xlrec->pagesPerRange, xlrec->version);
|
||||
@@ -44,51 +42,47 @@ brin_xlog_createidx(XLogRecPtr lsn, XLogRecord *record)
|
||||
* revmap.
|
||||
*/
|
||||
static void
|
||||
brin_xlog_insert_update(XLogRecPtr lsn, XLogRecord *record,
|
||||
xl_brin_insert *xlrec, BrinTuple *tuple)
|
||||
brin_xlog_insert_update(XLogReaderState *record,
|
||||
xl_brin_insert *xlrec)
|
||||
{
|
||||
BlockNumber blkno;
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
XLogRedoAction action;
|
||||
|
||||
blkno = ItemPointerGetBlockNumber(&xlrec->tid);
|
||||
|
||||
/*
|
||||
* If we inserted the first and only tuple on the page, re-initialize the
|
||||
* page from scratch.
|
||||
*/
|
||||
if (record->xl_info & XLOG_BRIN_INIT_PAGE)
|
||||
if (XLogRecGetInfo(record) & XLOG_BRIN_INIT_PAGE)
|
||||
{
|
||||
/*
|
||||
* No full-page image here. Don't try to read it, because there
|
||||
* might be one for the revmap buffer, below.
|
||||
*/
|
||||
buffer = XLogReadBuffer(xlrec->node, blkno, true);
|
||||
buffer = XLogInitBufferForRedo(record, 0);
|
||||
page = BufferGetPage(buffer);
|
||||
brin_page_init(page, BRIN_PAGETYPE_REGULAR);
|
||||
action = BLK_NEEDS_REDO;
|
||||
}
|
||||
else
|
||||
{
|
||||
action = XLogReadBufferForRedo(lsn, record, 0,
|
||||
xlrec->node, blkno, &buffer);
|
||||
action = XLogReadBufferForRedo(record, 0, &buffer);
|
||||
}
|
||||
|
||||
/* insert the index item into the page */
|
||||
if (action == BLK_NEEDS_REDO)
|
||||
{
|
||||
OffsetNumber offnum;
|
||||
BrinTuple *tuple;
|
||||
Size tuplen;
|
||||
|
||||
tuple = (BrinTuple *) XLogRecGetBlockData(record, 0, &tuplen);
|
||||
|
||||
Assert(tuple->bt_blkno == xlrec->heapBlk);
|
||||
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
offnum = ItemPointerGetOffsetNumber(&(xlrec->tid));
|
||||
offnum = xlrec->offnum;
|
||||
if (PageGetMaxOffsetNumber(page) + 1 < offnum)
|
||||
elog(PANIC, "brin_xlog_insert_update: invalid max offset number");
|
||||
|
||||
offnum = PageAddItem(page, (Item) tuple, xlrec->tuplen, offnum, true,
|
||||
false);
|
||||
offnum = PageAddItem(page, (Item) tuple, tuplen, offnum, true, false);
|
||||
if (offnum == InvalidOffsetNumber)
|
||||
elog(PANIC, "brin_xlog_insert_update: failed to add tuple");
|
||||
|
||||
@@ -99,16 +93,17 @@ brin_xlog_insert_update(XLogRecPtr lsn, XLogRecord *record,
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
/* update the revmap */
|
||||
action = XLogReadBufferForRedo(lsn, record,
|
||||
record->xl_info & XLOG_BRIN_INIT_PAGE ? 0 : 1,
|
||||
xlrec->node,
|
||||
xlrec->revmapBlk, &buffer);
|
||||
action = XLogReadBufferForRedo(record, 1, &buffer);
|
||||
if (action == BLK_NEEDS_REDO)
|
||||
{
|
||||
ItemPointerData tid;
|
||||
BlockNumber blkno = BufferGetBlockNumber(buffer);
|
||||
|
||||
ItemPointerSet(&tid, blkno, xlrec->offnum);
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
brinSetHeapBlockItemptr(buffer, xlrec->pagesPerRange, xlrec->heapBlk,
|
||||
xlrec->tid);
|
||||
tid);
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(buffer);
|
||||
}
|
||||
@@ -122,34 +117,26 @@ brin_xlog_insert_update(XLogRecPtr lsn, XLogRecord *record,
|
||||
* replay a BRIN index insertion
|
||||
*/
|
||||
static void
|
||||
brin_xlog_insert(XLogRecPtr lsn, XLogRecord *record)
|
||||
brin_xlog_insert(XLogReaderState *record)
|
||||
{
|
||||
xl_brin_insert *xlrec = (xl_brin_insert *) XLogRecGetData(record);
|
||||
BrinTuple *newtup;
|
||||
|
||||
newtup = (BrinTuple *) ((char *) xlrec + SizeOfBrinInsert);
|
||||
|
||||
brin_xlog_insert_update(lsn, record, xlrec, newtup);
|
||||
brin_xlog_insert_update(record, xlrec);
|
||||
}
|
||||
|
||||
/*
|
||||
* replay a BRIN index update
|
||||
*/
|
||||
static void
|
||||
brin_xlog_update(XLogRecPtr lsn, XLogRecord *record)
|
||||
brin_xlog_update(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
xl_brin_update *xlrec = (xl_brin_update *) XLogRecGetData(record);
|
||||
BlockNumber blkno;
|
||||
Buffer buffer;
|
||||
BrinTuple *newtup;
|
||||
XLogRedoAction action;
|
||||
|
||||
newtup = (BrinTuple *) ((char *) xlrec + SizeOfBrinUpdate);
|
||||
|
||||
/* First remove the old tuple */
|
||||
blkno = ItemPointerGetBlockNumber(&(xlrec->oldtid));
|
||||
action = XLogReadBufferForRedo(lsn, record, 2, xlrec->insert.node,
|
||||
blkno, &buffer);
|
||||
action = XLogReadBufferForRedo(record, 2, &buffer);
|
||||
if (action == BLK_NEEDS_REDO)
|
||||
{
|
||||
Page page;
|
||||
@@ -157,7 +144,7 @@ brin_xlog_update(XLogRecPtr lsn, XLogRecord *record)
|
||||
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
offnum = ItemPointerGetOffsetNumber(&(xlrec->oldtid));
|
||||
offnum = xlrec->oldOffnum;
|
||||
if (PageGetMaxOffsetNumber(page) + 1 < offnum)
|
||||
elog(PANIC, "brin_xlog_update: invalid max offset number");
|
||||
|
||||
@@ -168,7 +155,7 @@ brin_xlog_update(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
/* Then insert the new tuple and update revmap, like in an insertion. */
|
||||
brin_xlog_insert_update(lsn, record, &xlrec->insert, newtup);
|
||||
brin_xlog_insert_update(record, &xlrec->insert);
|
||||
|
||||
if (BufferIsValid(buffer))
|
||||
UnlockReleaseBuffer(buffer);
|
||||
@@ -178,30 +165,27 @@ brin_xlog_update(XLogRecPtr lsn, XLogRecord *record)
|
||||
* Update a tuple on a single page.
|
||||
*/
|
||||
static void
|
||||
brin_xlog_samepage_update(XLogRecPtr lsn, XLogRecord *record)
|
||||
brin_xlog_samepage_update(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
xl_brin_samepage_update *xlrec;
|
||||
BlockNumber blkno;
|
||||
Buffer buffer;
|
||||
XLogRedoAction action;
|
||||
|
||||
xlrec = (xl_brin_samepage_update *) XLogRecGetData(record);
|
||||
blkno = ItemPointerGetBlockNumber(&(xlrec->tid));
|
||||
action = XLogReadBufferForRedo(lsn, record, 0, xlrec->node, blkno,
|
||||
&buffer);
|
||||
action = XLogReadBufferForRedo(record, 0, &buffer);
|
||||
if (action == BLK_NEEDS_REDO)
|
||||
{
|
||||
int tuplen;
|
||||
Size tuplen;
|
||||
BrinTuple *mmtuple;
|
||||
Page page;
|
||||
OffsetNumber offnum;
|
||||
|
||||
tuplen = record->xl_len - SizeOfBrinSamepageUpdate;
|
||||
mmtuple = (BrinTuple *) ((char *) xlrec + SizeOfBrinSamepageUpdate);
|
||||
mmtuple = (BrinTuple *) XLogRecGetBlockData(record, 0, &tuplen);
|
||||
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
offnum = ItemPointerGetOffsetNumber(&(xlrec->tid));
|
||||
offnum = xlrec->offnum;
|
||||
if (PageGetMaxOffsetNumber(page) + 1 < offnum)
|
||||
elog(PANIC, "brin_xlog_samepage_update: invalid max offset number");
|
||||
|
||||
@@ -223,18 +207,23 @@ brin_xlog_samepage_update(XLogRecPtr lsn, XLogRecord *record)
|
||||
* Replay a revmap page extension
|
||||
*/
|
||||
static void
|
||||
brin_xlog_revmap_extend(XLogRecPtr lsn, XLogRecord *record)
|
||||
brin_xlog_revmap_extend(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
xl_brin_revmap_extend *xlrec;
|
||||
Buffer metabuf;
|
||||
Buffer buf;
|
||||
Page page;
|
||||
BlockNumber targetBlk;
|
||||
XLogRedoAction action;
|
||||
|
||||
xlrec = (xl_brin_revmap_extend *) XLogRecGetData(record);
|
||||
|
||||
XLogRecGetBlockTag(record, 1, NULL, NULL, &targetBlk);
|
||||
Assert(xlrec->targetBlk == targetBlk);
|
||||
|
||||
/* Update the metapage */
|
||||
action = XLogReadBufferForRedo(lsn, record, 0, xlrec->node,
|
||||
BRIN_METAPAGE_BLKNO, &metabuf);
|
||||
action = XLogReadBufferForRedo(record, 0, &metabuf);
|
||||
if (action == BLK_NEEDS_REDO)
|
||||
{
|
||||
Page metapg;
|
||||
@@ -255,7 +244,7 @@ brin_xlog_revmap_extend(XLogRecPtr lsn, XLogRecord *record)
|
||||
* image here.
|
||||
*/
|
||||
|
||||
buf = XLogReadBuffer(xlrec->node, xlrec->targetBlk, true);
|
||||
buf = XLogInitBufferForRedo(record, 1);
|
||||
page = (Page) BufferGetPage(buf);
|
||||
brin_page_init(page, BRIN_PAGETYPE_REVMAP);
|
||||
|
||||
@@ -268,26 +257,26 @@ brin_xlog_revmap_extend(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
void
|
||||
brin_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
brin_redo(XLogReaderState *record)
|
||||
{
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
switch (info & XLOG_BRIN_OPMASK)
|
||||
{
|
||||
case XLOG_BRIN_CREATE_INDEX:
|
||||
brin_xlog_createidx(lsn, record);
|
||||
brin_xlog_createidx(record);
|
||||
break;
|
||||
case XLOG_BRIN_INSERT:
|
||||
brin_xlog_insert(lsn, record);
|
||||
brin_xlog_insert(record);
|
||||
break;
|
||||
case XLOG_BRIN_UPDATE:
|
||||
brin_xlog_update(lsn, record);
|
||||
brin_xlog_update(record);
|
||||
break;
|
||||
case XLOG_BRIN_SAMEPAGE_UPDATE:
|
||||
brin_xlog_samepage_update(lsn, record);
|
||||
brin_xlog_samepage_update(record);
|
||||
break;
|
||||
case XLOG_BRIN_REVMAP_EXTEND:
|
||||
brin_xlog_revmap_extend(lsn, record);
|
||||
brin_xlog_revmap_extend(record);
|
||||
break;
|
||||
default:
|
||||
elog(PANIC, "brin_redo: unknown op code %u", info);
|
||||
|
||||
@@ -326,7 +326,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
|
||||
Buffer childbuf, GinStatsData *buildStats)
|
||||
{
|
||||
Page page = BufferGetPage(stack->buffer);
|
||||
XLogRecData *payloadrdata;
|
||||
GinPlaceToPageRC rc;
|
||||
uint16 xlflags = 0;
|
||||
Page childpage = NULL;
|
||||
@@ -351,12 +350,36 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
|
||||
/*
|
||||
* Try to put the incoming tuple on the page. placeToPage will decide if
|
||||
* the page needs to be split.
|
||||
*
|
||||
* WAL-logging this operation is a bit funny:
|
||||
*
|
||||
* We're responsible for calling XLogBeginInsert() and XLogInsert().
|
||||
* XLogBeginInsert() must be called before placeToPage, because
|
||||
* placeToPage can register some data to the WAL record.
|
||||
*
|
||||
* If placeToPage returns INSERTED, placeToPage has already called
|
||||
* START_CRIT_SECTION(), and we're responsible for calling
|
||||
* END_CRIT_SECTION. When it returns INSERTED, it is also responsible for
|
||||
* registering any data required to replay the operation with
|
||||
* XLogRegisterData(0, ...). It may only add data to block index 0; the
|
||||
* main data of the WAL record is reserved for this function.
|
||||
*
|
||||
* If placeToPage returns SPLIT, we're wholly responsible for WAL logging.
|
||||
* Splits happen infrequently, so we just make a full-page image of all
|
||||
* the pages involved.
|
||||
*/
|
||||
|
||||
if (RelationNeedsWAL(btree->index))
|
||||
XLogBeginInsert();
|
||||
|
||||
rc = btree->placeToPage(btree, stack->buffer, stack,
|
||||
insertdata, updateblkno,
|
||||
&payloadrdata, &newlpage, &newrpage);
|
||||
&newlpage, &newrpage);
|
||||
if (rc == UNMODIFIED)
|
||||
{
|
||||
XLogResetInsertion();
|
||||
return true;
|
||||
}
|
||||
else if (rc == INSERTED)
|
||||
{
|
||||
/* placeToPage did START_CRIT_SECTION() */
|
||||
@@ -372,17 +395,18 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
|
||||
if (RelationNeedsWAL(btree->index))
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[3];
|
||||
ginxlogInsert xlrec;
|
||||
BlockIdData childblknos[2];
|
||||
|
||||
xlrec.node = btree->index->rd_node;
|
||||
xlrec.blkno = BufferGetBlockNumber(stack->buffer);
|
||||
/*
|
||||
* placetopage already registered stack->buffer as block 0.
|
||||
*/
|
||||
xlrec.flags = xlflags;
|
||||
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].data = (char *) &xlrec;
|
||||
rdata[0].len = sizeof(ginxlogInsert);
|
||||
if (childbuf != InvalidBuffer)
|
||||
XLogRegisterBuffer(1, childbuf, REGBUF_STANDARD);
|
||||
|
||||
XLogRegisterData((char *) &xlrec, sizeof(ginxlogInsert));
|
||||
|
||||
/*
|
||||
* Log information about child if this was an insertion of a
|
||||
@@ -390,26 +414,13 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
|
||||
*/
|
||||
if (childbuf != InvalidBuffer)
|
||||
{
|
||||
rdata[0].next = &rdata[1];
|
||||
|
||||
BlockIdSet(&childblknos[0], BufferGetBlockNumber(childbuf));
|
||||
BlockIdSet(&childblknos[1], GinPageGetOpaque(childpage)->rightlink);
|
||||
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
rdata[1].data = (char *) childblknos;
|
||||
rdata[1].len = sizeof(BlockIdData) * 2;
|
||||
rdata[1].next = &rdata[2];
|
||||
|
||||
rdata[2].buffer = childbuf;
|
||||
rdata[2].buffer_std = false;
|
||||
rdata[2].data = NULL;
|
||||
rdata[2].len = 0;
|
||||
rdata[2].next = payloadrdata;
|
||||
XLogRegisterData((char *) childblknos,
|
||||
sizeof(BlockIdData) * 2);
|
||||
}
|
||||
else
|
||||
rdata[0].next = payloadrdata;
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT, rdata);
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT);
|
||||
PageSetLSN(page, recptr);
|
||||
if (childbuf != InvalidBuffer)
|
||||
PageSetLSN(childpage, recptr);
|
||||
@@ -421,10 +432,9 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
|
||||
}
|
||||
else if (rc == SPLIT)
|
||||
{
|
||||
/* Didn't fit, have to split */
|
||||
/* Didn't fit, had to split */
|
||||
Buffer rbuffer;
|
||||
BlockNumber savedRightLink;
|
||||
XLogRecData rdata[2];
|
||||
ginxlogSplit data;
|
||||
Buffer lbuffer = InvalidBuffer;
|
||||
Page newrootpg = NULL;
|
||||
@@ -448,7 +458,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
|
||||
*/
|
||||
|
||||
data.node = btree->index->rd_node;
|
||||
data.rblkno = BufferGetBlockNumber(rbuffer);
|
||||
data.flags = xlflags;
|
||||
if (childbuf != InvalidBuffer)
|
||||
{
|
||||
@@ -462,23 +471,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
|
||||
else
|
||||
data.leftChildBlkno = data.rightChildBlkno = InvalidBlockNumber;
|
||||
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].data = (char *) &data;
|
||||
rdata[0].len = sizeof(ginxlogSplit);
|
||||
|
||||
if (childbuf != InvalidBuffer)
|
||||
{
|
||||
rdata[0].next = &rdata[1];
|
||||
|
||||
rdata[1].buffer = childbuf;
|
||||
rdata[1].buffer_std = false;
|
||||
rdata[1].data = NULL;
|
||||
rdata[1].len = 0;
|
||||
rdata[1].next = payloadrdata;
|
||||
}
|
||||
else
|
||||
rdata[0].next = payloadrdata;
|
||||
|
||||
if (stack->parent == NULL)
|
||||
{
|
||||
/*
|
||||
@@ -496,12 +488,7 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
|
||||
buildStats->nEntryPages++;
|
||||
}
|
||||
|
||||
/*
|
||||
* root never has a right-link, so we borrow the rrlink field to
|
||||
* store the root block number.
|
||||
*/
|
||||
data.rrlink = BufferGetBlockNumber(stack->buffer);
|
||||
data.lblkno = BufferGetBlockNumber(lbuffer);
|
||||
data.rrlink = InvalidBlockNumber;
|
||||
data.flags |= GIN_SPLIT_ROOT;
|
||||
|
||||
GinPageGetOpaque(newrpage)->rightlink = InvalidBlockNumber;
|
||||
@@ -524,7 +511,6 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
|
||||
{
|
||||
/* split non-root page */
|
||||
data.rrlink = savedRightLink;
|
||||
data.lblkno = BufferGetBlockNumber(stack->buffer);
|
||||
|
||||
GinPageGetOpaque(newrpage)->rightlink = savedRightLink;
|
||||
GinPageGetOpaque(newlpage)->flags |= GIN_INCOMPLETE_SPLIT;
|
||||
@@ -572,7 +558,28 @@ ginPlaceToPage(GinBtree btree, GinBtreeStack *stack,
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT, rdata);
|
||||
/*
|
||||
* We just take full page images of all the split pages. Splits
|
||||
* are uncommon enough that it's not worth complicating the code
|
||||
* to be more efficient.
|
||||
*/
|
||||
if (stack->parent == NULL)
|
||||
{
|
||||
XLogRegisterBuffer(0, lbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
|
||||
XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
|
||||
XLogRegisterBuffer(2, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
|
||||
}
|
||||
else
|
||||
{
|
||||
XLogRegisterBuffer(0, stack->buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
|
||||
XLogRegisterBuffer(1, rbuffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
|
||||
}
|
||||
if (BufferIsValid(childbuf))
|
||||
XLogRegisterBuffer(3, childbuf, 0);
|
||||
|
||||
XLogRegisterData((char *) &data, sizeof(ginxlogSplit));
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_SPLIT);
|
||||
PageSetLSN(BufferGetPage(stack->buffer), recptr);
|
||||
PageSetLSN(BufferGetPage(rbuffer), recptr);
|
||||
if (stack->parent == NULL)
|
||||
|
||||
@@ -98,20 +98,19 @@ static ItemPointer dataLeafPageGetUncompressed(Page page, int *nitems);
|
||||
static void dataSplitPageInternal(GinBtree btree, Buffer origbuf,
|
||||
GinBtreeStack *stack,
|
||||
void *insertdata, BlockNumber updateblkno,
|
||||
XLogRecData **prdata, Page *newlpage, Page *newrpage);
|
||||
Page *newlpage, Page *newrpage);
|
||||
|
||||
static disassembledLeaf *disassembleLeaf(Page page);
|
||||
static bool leafRepackItems(disassembledLeaf *leaf, ItemPointer remaining);
|
||||
static bool addItemsToLeaf(disassembledLeaf *leaf, ItemPointer newItems,
|
||||
int nNewItems);
|
||||
|
||||
static XLogRecData *constructLeafRecompressWALData(Buffer buf,
|
||||
disassembledLeaf *leaf);
|
||||
static void registerLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf);
|
||||
static void dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf);
|
||||
static void dataPlaceToPageLeafSplit(Buffer buf,
|
||||
disassembledLeaf *leaf,
|
||||
ItemPointerData lbound, ItemPointerData rbound,
|
||||
XLogRecData **prdata, Page lpage, Page rpage);
|
||||
Page lpage, Page rpage);
|
||||
|
||||
/*
|
||||
* Read TIDs from leaf data page to single uncompressed array. The TIDs are
|
||||
@@ -428,8 +427,7 @@ GinPageDeletePostingItem(Page page, OffsetNumber offset)
|
||||
*/
|
||||
static GinPlaceToPageRC
|
||||
dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
|
||||
void *insertdata, XLogRecData **prdata,
|
||||
Page *newlpage, Page *newrpage)
|
||||
void *insertdata, Page *newlpage, Page *newrpage)
|
||||
{
|
||||
GinBtreeDataLeafInsertData *items = insertdata;
|
||||
ItemPointer newItems = &items->items[items->curitem];
|
||||
@@ -602,9 +600,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
|
||||
*/
|
||||
MemoryContextSwitchTo(oldCxt);
|
||||
if (RelationNeedsWAL(btree->index))
|
||||
*prdata = constructLeafRecompressWALData(buf, leaf);
|
||||
else
|
||||
*prdata = NULL;
|
||||
registerLeafRecompressWALData(buf, leaf);
|
||||
START_CRIT_SECTION();
|
||||
dataPlaceToPageLeafRecompress(buf, leaf);
|
||||
|
||||
@@ -685,7 +681,7 @@ dataPlaceToPageLeaf(GinBtree btree, Buffer buf, GinBtreeStack *stack,
|
||||
*newrpage = MemoryContextAlloc(oldCxt, BLCKSZ);
|
||||
|
||||
dataPlaceToPageLeafSplit(buf, leaf, lbound, rbound,
|
||||
prdata, *newlpage, *newrpage);
|
||||
*newlpage, *newrpage);
|
||||
|
||||
Assert(GinPageRightMost(page) ||
|
||||
ginCompareItemPointers(GinDataPageGetRightBound(*newlpage),
|
||||
@@ -791,7 +787,6 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
|
||||
*/
|
||||
if (removedsomething)
|
||||
{
|
||||
XLogRecData *payloadrdata = NULL;
|
||||
bool modified;
|
||||
|
||||
/*
|
||||
@@ -818,7 +813,10 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
|
||||
}
|
||||
|
||||
if (RelationNeedsWAL(indexrel))
|
||||
payloadrdata = constructLeafRecompressWALData(buffer, leaf);
|
||||
{
|
||||
XLogBeginInsert();
|
||||
registerLeafRecompressWALData(buffer, leaf);
|
||||
}
|
||||
START_CRIT_SECTION();
|
||||
dataPlaceToPageLeafRecompress(buffer, leaf);
|
||||
|
||||
@@ -827,18 +825,8 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
|
||||
if (RelationNeedsWAL(indexrel))
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata;
|
||||
ginxlogVacuumDataLeafPage xlrec;
|
||||
|
||||
xlrec.node = indexrel->rd_node;
|
||||
xlrec.blkno = BufferGetBlockNumber(buffer);
|
||||
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.data = (char *) &xlrec;
|
||||
rdata.len = offsetof(ginxlogVacuumDataLeafPage, data);
|
||||
rdata.next = payloadrdata;
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE, &rdata);
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_DATA_LEAF_PAGE);
|
||||
PageSetLSN(page, recptr);
|
||||
}
|
||||
|
||||
@@ -850,13 +838,12 @@ ginVacuumPostingTreeLeaf(Relation indexrel, Buffer buffer, GinVacuumState *gvs)
|
||||
* Construct a ginxlogRecompressDataLeaf record representing the changes
|
||||
* in *leaf.
|
||||
*/
|
||||
static XLogRecData *
|
||||
constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
|
||||
static void
|
||||
registerLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
|
||||
{
|
||||
int nmodified = 0;
|
||||
char *walbufbegin;
|
||||
char *walbufend;
|
||||
XLogRecData *rdata;
|
||||
dlist_iter iter;
|
||||
int segno;
|
||||
ginxlogRecompressDataLeaf *recompress_xlog;
|
||||
@@ -871,12 +858,11 @@ constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
|
||||
nmodified++;
|
||||
}
|
||||
|
||||
walbufbegin = palloc(
|
||||
sizeof(ginxlogRecompressDataLeaf) +
|
||||
BLCKSZ + /* max size needed to hold the segment
|
||||
* data */
|
||||
nmodified * 2 + /* (segno + action) per action */
|
||||
sizeof(XLogRecData));
|
||||
walbufbegin =
|
||||
palloc(sizeof(ginxlogRecompressDataLeaf) +
|
||||
BLCKSZ + /* max size needed to hold the segment data */
|
||||
nmodified * 2 /* (segno + action) per action */
|
||||
);
|
||||
walbufend = walbufbegin;
|
||||
|
||||
recompress_xlog = (ginxlogRecompressDataLeaf *) walbufend;
|
||||
@@ -944,14 +930,10 @@ constructLeafRecompressWALData(Buffer buf, disassembledLeaf *leaf)
|
||||
segno++;
|
||||
}
|
||||
|
||||
rdata = (XLogRecData *) MAXALIGN(walbufend);
|
||||
rdata->buffer = buf;
|
||||
rdata->buffer_std = TRUE;
|
||||
rdata->data = walbufbegin;
|
||||
rdata->len = walbufend - walbufbegin;
|
||||
rdata->next = NULL;
|
||||
|
||||
return rdata;
|
||||
XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
|
||||
XLogRegisterBufData(0, walbufbegin, walbufend - walbufbegin);
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1024,7 +1006,7 @@ dataPlaceToPageLeafRecompress(Buffer buf, disassembledLeaf *leaf)
|
||||
static void
|
||||
dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
|
||||
ItemPointerData lbound, ItemPointerData rbound,
|
||||
XLogRecData **prdata, Page lpage, Page rpage)
|
||||
Page lpage, Page rpage)
|
||||
{
|
||||
char *ptr;
|
||||
int segsize;
|
||||
@@ -1034,10 +1016,6 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
|
||||
dlist_node *firstright;
|
||||
leafSegmentInfo *seginfo;
|
||||
|
||||
/* these must be static so they can be returned to caller */
|
||||
static ginxlogSplitDataLeaf split_xlog;
|
||||
static XLogRecData rdata[3];
|
||||
|
||||
/* Initialize temporary pages to hold the new left and right pages */
|
||||
GinInitPage(lpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
|
||||
GinInitPage(rpage, GIN_DATA | GIN_LEAF | GIN_COMPRESSED, BLCKSZ);
|
||||
@@ -1092,29 +1070,6 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
|
||||
Assert(rsize == leaf->rsize);
|
||||
GinDataPageSetDataSize(rpage, rsize);
|
||||
*GinDataPageGetRightBound(rpage) = rbound;
|
||||
|
||||
/* Create WAL record */
|
||||
split_xlog.lsize = lsize;
|
||||
split_xlog.rsize = rsize;
|
||||
split_xlog.lrightbound = lbound;
|
||||
split_xlog.rrightbound = rbound;
|
||||
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].data = (char *) &split_xlog;
|
||||
rdata[0].len = sizeof(ginxlogSplitDataLeaf);
|
||||
rdata[0].next = &rdata[1];
|
||||
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
rdata[1].data = (char *) GinDataLeafPageGetPostingList(lpage);
|
||||
rdata[1].len = lsize;
|
||||
rdata[1].next = &rdata[2];
|
||||
|
||||
rdata[2].buffer = InvalidBuffer;
|
||||
rdata[2].data = (char *) GinDataLeafPageGetPostingList(rpage);
|
||||
rdata[2].len = rsize;
|
||||
rdata[2].next = NULL;
|
||||
|
||||
*prdata = rdata;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1124,29 +1079,30 @@ dataPlaceToPageLeafSplit(Buffer buf, disassembledLeaf *leaf,
|
||||
*
|
||||
* In addition to inserting the given item, the downlink of the existing item
|
||||
* at 'off' is updated to point to 'updateblkno'.
|
||||
*
|
||||
* On INSERTED, registers the buffer as buffer ID 0, with data.
|
||||
* On SPLIT, returns rdata that represents the split pages in *prdata.
|
||||
*/
|
||||
static GinPlaceToPageRC
|
||||
dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
|
||||
void *insertdata, BlockNumber updateblkno,
|
||||
XLogRecData **prdata, Page *newlpage, Page *newrpage)
|
||||
Page *newlpage, Page *newrpage)
|
||||
{
|
||||
Page page = BufferGetPage(buf);
|
||||
OffsetNumber off = stack->off;
|
||||
PostingItem *pitem;
|
||||
|
||||
/* these must be static so they can be returned to caller */
|
||||
static XLogRecData rdata;
|
||||
/* this must be static so it can be returned to caller */
|
||||
static ginxlogInsertDataInternal data;
|
||||
|
||||
/* split if we have to */
|
||||
if (GinNonLeafDataPageGetFreeSpace(page) < sizeof(PostingItem))
|
||||
{
|
||||
dataSplitPageInternal(btree, buf, stack, insertdata, updateblkno,
|
||||
prdata, newlpage, newrpage);
|
||||
newlpage, newrpage);
|
||||
return SPLIT;
|
||||
}
|
||||
|
||||
*prdata = &rdata;
|
||||
Assert(GinPageIsData(page));
|
||||
|
||||
START_CRIT_SECTION();
|
||||
@@ -1159,14 +1115,15 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
|
||||
pitem = (PostingItem *) insertdata;
|
||||
GinDataPageAddPostingItem(page, pitem, off);
|
||||
|
||||
data.offset = off;
|
||||
data.newitem = *pitem;
|
||||
if (RelationNeedsWAL(btree->index))
|
||||
{
|
||||
data.offset = off;
|
||||
data.newitem = *pitem;
|
||||
|
||||
rdata.buffer = buf;
|
||||
rdata.buffer_std = TRUE;
|
||||
rdata.data = (char *) &data;
|
||||
rdata.len = sizeof(ginxlogInsertDataInternal);
|
||||
rdata.next = NULL;
|
||||
XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
|
||||
XLogRegisterBufData(0, (char *) &data,
|
||||
sizeof(ginxlogInsertDataInternal));
|
||||
}
|
||||
|
||||
return INSERTED;
|
||||
}
|
||||
@@ -1178,7 +1135,6 @@ dataPlaceToPageInternal(GinBtree btree, Buffer buf, GinBtreeStack *stack,
|
||||
static GinPlaceToPageRC
|
||||
dataPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
|
||||
void *insertdata, BlockNumber updateblkno,
|
||||
XLogRecData **prdata,
|
||||
Page *newlpage, Page *newrpage)
|
||||
{
|
||||
Page page = BufferGetPage(buf);
|
||||
@@ -1187,11 +1143,11 @@ dataPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
|
||||
|
||||
if (GinPageIsLeaf(page))
|
||||
return dataPlaceToPageLeaf(btree, buf, stack, insertdata,
|
||||
prdata, newlpage, newrpage);
|
||||
newlpage, newrpage);
|
||||
else
|
||||
return dataPlaceToPageInternal(btree, buf, stack,
|
||||
insertdata, updateblkno,
|
||||
prdata, newlpage, newrpage);
|
||||
newlpage, newrpage);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1202,7 +1158,7 @@ static void
|
||||
dataSplitPageInternal(GinBtree btree, Buffer origbuf,
|
||||
GinBtreeStack *stack,
|
||||
void *insertdata, BlockNumber updateblkno,
|
||||
XLogRecData **prdata, Page *newlpage, Page *newrpage)
|
||||
Page *newlpage, Page *newrpage)
|
||||
{
|
||||
Page oldpage = BufferGetPage(origbuf);
|
||||
OffsetNumber off = stack->off;
|
||||
@@ -1215,19 +1171,13 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
|
||||
Page lpage;
|
||||
Page rpage;
|
||||
OffsetNumber separator;
|
||||
|
||||
/* these must be static so they can be returned to caller */
|
||||
static ginxlogSplitDataInternal data;
|
||||
static XLogRecData rdata[4];
|
||||
static PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
|
||||
PostingItem allitems[(BLCKSZ / sizeof(PostingItem)) + 1];
|
||||
|
||||
lpage = PageGetTempPage(oldpage);
|
||||
rpage = PageGetTempPage(oldpage);
|
||||
GinInitPage(lpage, GinPageGetOpaque(oldpage)->flags, pageSize);
|
||||
GinInitPage(rpage, GinPageGetOpaque(oldpage)->flags, pageSize);
|
||||
|
||||
*prdata = rdata;
|
||||
|
||||
/*
|
||||
* First construct a new list of PostingItems, which includes all the old
|
||||
* items, and the new item.
|
||||
@@ -1277,20 +1227,6 @@ dataSplitPageInternal(GinBtree btree, Buffer origbuf,
|
||||
/* set up right bound for right page */
|
||||
*GinDataPageGetRightBound(rpage) = oldbound;
|
||||
|
||||
data.separator = separator;
|
||||
data.nitem = nitems;
|
||||
data.rightbound = oldbound;
|
||||
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].data = (char *) &data;
|
||||
rdata[0].len = sizeof(ginxlogSplitDataInternal);
|
||||
rdata[0].next = &rdata[1];
|
||||
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
rdata[1].data = (char *) allitems;
|
||||
rdata[1].len = nitems * sizeof(PostingItem);
|
||||
rdata[1].next = NULL;
|
||||
|
||||
*newlpage = lpage;
|
||||
*newrpage = rpage;
|
||||
}
|
||||
@@ -1797,24 +1733,18 @@ createPostingTree(Relation index, ItemPointerData *items, uint32 nitems,
|
||||
if (RelationNeedsWAL(index))
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[2];
|
||||
ginxlogCreatePostingTree data;
|
||||
|
||||
data.node = index->rd_node;
|
||||
data.blkno = blkno;
|
||||
data.size = rootsize;
|
||||
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].data = (char *) &data;
|
||||
rdata[0].len = sizeof(ginxlogCreatePostingTree);
|
||||
rdata[0].next = &rdata[1];
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &data, sizeof(ginxlogCreatePostingTree));
|
||||
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
rdata[1].data = (char *) GinDataLeafPageGetPostingList(page);
|
||||
rdata[1].len = rootsize;
|
||||
rdata[1].next = NULL;
|
||||
XLogRegisterData((char *) GinDataLeafPageGetPostingList(page),
|
||||
rootsize);
|
||||
XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE, rdata);
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_PTREE);
|
||||
PageSetLSN(page, recptr);
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
static void entrySplitPage(GinBtree btree, Buffer origbuf,
|
||||
GinBtreeStack *stack,
|
||||
void *insertPayload,
|
||||
BlockNumber updateblkno, XLogRecData **prdata,
|
||||
BlockNumber updateblkno,
|
||||
Page *newlpage, Page *newrpage);
|
||||
|
||||
/*
|
||||
@@ -515,33 +515,33 @@ entryPreparePage(GinBtree btree, Page page, OffsetNumber off,
|
||||
* On insertion to an internal node, in addition to inserting the given item,
|
||||
* the downlink of the existing item at 'off' is updated to point to
|
||||
* 'updateblkno'.
|
||||
*
|
||||
* On INSERTED, registers the buffer as buffer ID 0, with data.
|
||||
* On SPLIT, returns rdata that represents the split pages in *prdata.
|
||||
*/
|
||||
static GinPlaceToPageRC
|
||||
entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
|
||||
void *insertPayload, BlockNumber updateblkno,
|
||||
XLogRecData **prdata, Page *newlpage, Page *newrpage)
|
||||
Page *newlpage, Page *newrpage)
|
||||
{
|
||||
GinBtreeEntryInsertData *insertData = insertPayload;
|
||||
Page page = BufferGetPage(buf);
|
||||
OffsetNumber off = stack->off;
|
||||
OffsetNumber placed;
|
||||
int cnt = 0;
|
||||
|
||||
/* these must be static so they can be returned to caller */
|
||||
static XLogRecData rdata[3];
|
||||
/* this must be static so it can be returned to caller. */
|
||||
static ginxlogInsertEntry data;
|
||||
|
||||
/* quick exit if it doesn't fit */
|
||||
if (!entryIsEnoughSpace(btree, buf, off, insertData))
|
||||
{
|
||||
entrySplitPage(btree, buf, stack, insertPayload, updateblkno,
|
||||
prdata, newlpage, newrpage);
|
||||
newlpage, newrpage);
|
||||
return SPLIT;
|
||||
}
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
*prdata = rdata;
|
||||
entryPreparePage(btree, page, off, insertData, updateblkno);
|
||||
|
||||
placed = PageAddItem(page,
|
||||
@@ -552,21 +552,17 @@ entryPlaceToPage(GinBtree btree, Buffer buf, GinBtreeStack *stack,
|
||||
elog(ERROR, "failed to add item to index page in \"%s\"",
|
||||
RelationGetRelationName(btree->index));
|
||||
|
||||
data.isDelete = insertData->isDelete;
|
||||
data.offset = off;
|
||||
if (RelationNeedsWAL(btree->index))
|
||||
{
|
||||
data.isDelete = insertData->isDelete;
|
||||
data.offset = off;
|
||||
|
||||
rdata[cnt].buffer = buf;
|
||||
rdata[cnt].buffer_std = true;
|
||||
rdata[cnt].data = (char *) &data;
|
||||
rdata[cnt].len = offsetof(ginxlogInsertEntry, tuple);
|
||||
rdata[cnt].next = &rdata[cnt + 1];
|
||||
cnt++;
|
||||
|
||||
rdata[cnt].buffer = buf;
|
||||
rdata[cnt].buffer_std = true;
|
||||
rdata[cnt].data = (char *) insertData->entry;
|
||||
rdata[cnt].len = IndexTupleSize(insertData->entry);
|
||||
rdata[cnt].next = NULL;
|
||||
XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
|
||||
XLogRegisterBufData(0, (char *) &data,
|
||||
offsetof(ginxlogInsertEntry, tuple));
|
||||
XLogRegisterBufData(0, (char *) insertData->entry,
|
||||
IndexTupleSize(insertData->entry));
|
||||
}
|
||||
|
||||
return INSERTED;
|
||||
}
|
||||
@@ -581,7 +577,7 @@ static void
|
||||
entrySplitPage(GinBtree btree, Buffer origbuf,
|
||||
GinBtreeStack *stack,
|
||||
void *insertPayload,
|
||||
BlockNumber updateblkno, XLogRecData **prdata,
|
||||
BlockNumber updateblkno,
|
||||
Page *newlpage, Page *newrpage)
|
||||
{
|
||||
GinBtreeEntryInsertData *insertData = insertPayload;
|
||||
@@ -590,7 +586,6 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
|
||||
maxoff,
|
||||
separator = InvalidOffsetNumber;
|
||||
Size totalsize = 0;
|
||||
Size tupstoresize;
|
||||
Size lsize = 0,
|
||||
size;
|
||||
char *ptr;
|
||||
@@ -599,13 +594,8 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
|
||||
Page lpage = PageGetTempPageCopy(BufferGetPage(origbuf));
|
||||
Page rpage = PageGetTempPageCopy(BufferGetPage(origbuf));
|
||||
Size pageSize = PageGetPageSize(lpage);
|
||||
char tupstore[2 * BLCKSZ];
|
||||
|
||||
/* these must be static so they can be returned to caller */
|
||||
static XLogRecData rdata[2];
|
||||
static ginxlogSplitEntry data;
|
||||
static char tupstore[2 * BLCKSZ];
|
||||
|
||||
*prdata = rdata;
|
||||
entryPreparePage(btree, lpage, off, insertData, updateblkno);
|
||||
|
||||
/*
|
||||
@@ -638,7 +628,6 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
|
||||
ptr += size;
|
||||
totalsize += size + sizeof(ItemIdData);
|
||||
}
|
||||
tupstoresize = ptr - tupstore;
|
||||
|
||||
/*
|
||||
* Initialize the left and right pages, and copy all the tuples back to
|
||||
@@ -673,19 +662,6 @@ entrySplitPage(GinBtree btree, Buffer origbuf,
|
||||
ptr += MAXALIGN(IndexTupleSize(itup));
|
||||
}
|
||||
|
||||
data.separator = separator;
|
||||
data.nitem = maxoff;
|
||||
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].data = (char *) &data;
|
||||
rdata[0].len = sizeof(ginxlogSplitEntry);
|
||||
rdata[0].next = &rdata[1];
|
||||
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
rdata[1].data = tupstore;
|
||||
rdata[1].len = tupstoresize;
|
||||
rdata[1].next = NULL;
|
||||
|
||||
*newlpage = lpage;
|
||||
*newrpage = rpage;
|
||||
}
|
||||
|
||||
@@ -108,26 +108,19 @@ writeListPage(Relation index, Buffer buffer,
|
||||
|
||||
if (RelationNeedsWAL(index))
|
||||
{
|
||||
XLogRecData rdata[2];
|
||||
ginxlogInsertListPage data;
|
||||
XLogRecPtr recptr;
|
||||
|
||||
data.node = index->rd_node;
|
||||
data.blkno = BufferGetBlockNumber(buffer);
|
||||
data.rightlink = rightlink;
|
||||
data.ntuples = ntuples;
|
||||
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].data = (char *) &data;
|
||||
rdata[0].len = sizeof(ginxlogInsertListPage);
|
||||
rdata[0].next = rdata + 1;
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &data, sizeof(ginxlogInsertListPage));
|
||||
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
rdata[1].data = workspace;
|
||||
rdata[1].len = size;
|
||||
rdata[1].next = NULL;
|
||||
XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
|
||||
XLogRegisterBufData(0, workspace, size);
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE, rdata);
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_INSERT_LISTPAGE);
|
||||
PageSetLSN(page, recptr);
|
||||
}
|
||||
|
||||
@@ -224,26 +217,23 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
|
||||
Buffer metabuffer;
|
||||
Page metapage;
|
||||
GinMetaPageData *metadata = NULL;
|
||||
XLogRecData rdata[2];
|
||||
Buffer buffer = InvalidBuffer;
|
||||
Page page = NULL;
|
||||
ginxlogUpdateMeta data;
|
||||
bool separateList = false;
|
||||
bool needCleanup = false;
|
||||
int cleanupSize;
|
||||
bool needWal;
|
||||
|
||||
if (collector->ntuples == 0)
|
||||
return;
|
||||
|
||||
needWal = RelationNeedsWAL(index);
|
||||
|
||||
data.node = index->rd_node;
|
||||
data.ntuples = 0;
|
||||
data.newRightlink = data.prevTail = InvalidBlockNumber;
|
||||
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].data = (char *) &data;
|
||||
rdata[0].len = sizeof(ginxlogUpdateMeta);
|
||||
rdata[0].next = NULL;
|
||||
|
||||
metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO);
|
||||
metapage = BufferGetPage(metabuffer);
|
||||
|
||||
@@ -283,6 +273,9 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
|
||||
memset(&sublist, 0, sizeof(GinMetaPageData));
|
||||
makeSublist(index, collector->tuples, collector->ntuples, &sublist);
|
||||
|
||||
if (needWal)
|
||||
XLogBeginInsert();
|
||||
|
||||
/*
|
||||
* metapage was unlocked, see above
|
||||
*/
|
||||
@@ -315,14 +308,6 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
|
||||
LockBuffer(buffer, GIN_EXCLUSIVE);
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
rdata[0].next = rdata + 1;
|
||||
|
||||
rdata[1].buffer = buffer;
|
||||
rdata[1].buffer_std = true;
|
||||
rdata[1].data = NULL;
|
||||
rdata[1].len = 0;
|
||||
rdata[1].next = NULL;
|
||||
|
||||
Assert(GinPageGetOpaque(page)->rightlink == InvalidBlockNumber);
|
||||
|
||||
START_CRIT_SECTION();
|
||||
@@ -336,6 +321,9 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
|
||||
|
||||
metadata->nPendingPages += sublist.nPendingPages;
|
||||
metadata->nPendingHeapTuples += sublist.nPendingHeapTuples;
|
||||
|
||||
if (needWal)
|
||||
XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -348,6 +336,7 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
|
||||
int i,
|
||||
tupsize;
|
||||
char *ptr;
|
||||
char *collectordata;
|
||||
|
||||
buffer = ReadBuffer(index, metadata->tail);
|
||||
LockBuffer(buffer, GIN_EXCLUSIVE);
|
||||
@@ -356,16 +345,13 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
|
||||
off = (PageIsEmpty(page)) ? FirstOffsetNumber :
|
||||
OffsetNumberNext(PageGetMaxOffsetNumber(page));
|
||||
|
||||
rdata[0].next = rdata + 1;
|
||||
|
||||
rdata[1].buffer = buffer;
|
||||
rdata[1].buffer_std = true;
|
||||
ptr = rdata[1].data = (char *) palloc(collector->sumsize);
|
||||
rdata[1].len = collector->sumsize;
|
||||
rdata[1].next = NULL;
|
||||
collectordata = ptr = (char *) palloc(collector->sumsize);
|
||||
|
||||
data.ntuples = collector->ntuples;
|
||||
|
||||
if (needWal)
|
||||
XLogBeginInsert();
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
/*
|
||||
@@ -390,7 +376,12 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
|
||||
off++;
|
||||
}
|
||||
|
||||
Assert((ptr - rdata[1].data) <= collector->sumsize);
|
||||
Assert((ptr - collectordata) <= collector->sumsize);
|
||||
if (needWal)
|
||||
{
|
||||
XLogRegisterBuffer(1, buffer, REGBUF_STANDARD);
|
||||
XLogRegisterBufData(1, collectordata, collector->sumsize);
|
||||
}
|
||||
|
||||
metadata->tailFreeSize = PageGetExactFreeSpace(page);
|
||||
|
||||
@@ -402,13 +393,16 @@ ginHeapTupleFastInsert(GinState *ginstate, GinTupleCollector *collector)
|
||||
*/
|
||||
MarkBufferDirty(metabuffer);
|
||||
|
||||
if (RelationNeedsWAL(index))
|
||||
if (needWal)
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
|
||||
memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, rdata);
|
||||
XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
|
||||
XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta));
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
|
||||
PageSetLSN(metapage, recptr);
|
||||
|
||||
if (buffer != InvalidBuffer)
|
||||
@@ -526,20 +520,11 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
|
||||
int i;
|
||||
int64 nDeletedHeapTuples = 0;
|
||||
ginxlogDeleteListPages data;
|
||||
XLogRecData rdata[1];
|
||||
Buffer buffers[GIN_NDELETE_AT_ONCE];
|
||||
|
||||
data.node = index->rd_node;
|
||||
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].data = (char *) &data;
|
||||
rdata[0].len = sizeof(ginxlogDeleteListPages);
|
||||
rdata[0].next = NULL;
|
||||
|
||||
data.ndeleted = 0;
|
||||
while (data.ndeleted < GIN_NDELETE_AT_ONCE && blknoToDelete != newHead)
|
||||
{
|
||||
data.toDelete[data.ndeleted] = blknoToDelete;
|
||||
buffers[data.ndeleted] = ReadBuffer(index, blknoToDelete);
|
||||
LockBuffer(buffers[data.ndeleted], GIN_EXCLUSIVE);
|
||||
page = BufferGetPage(buffers[data.ndeleted]);
|
||||
@@ -562,6 +547,13 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
|
||||
if (stats)
|
||||
stats->pages_deleted += data.ndeleted;
|
||||
|
||||
/*
|
||||
* This operation touches an unusually large number of pages, so
|
||||
* prepare the XLogInsert machinery for that before entering the
|
||||
* critical section.
|
||||
*/
|
||||
XLogEnsureRecordSpace(data.ndeleted, 0);
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
metadata->head = blknoToDelete;
|
||||
@@ -592,9 +584,17 @@ shiftList(Relation index, Buffer metabuffer, BlockNumber newHead,
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
|
||||
XLogBeginInsert();
|
||||
XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
|
||||
for (i = 0; i < data.ndeleted; i++)
|
||||
XLogRegisterBuffer(i + 1, buffers[i], REGBUF_WILL_INIT);
|
||||
|
||||
memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE, rdata);
|
||||
XLogRegisterData((char *) &data,
|
||||
sizeof(ginxlogDeleteListPages));
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_LISTPAGE);
|
||||
PageSetLSN(metapage, recptr);
|
||||
|
||||
for (i = 0; i < data.ndeleted; i++)
|
||||
|
||||
@@ -347,15 +347,13 @@ ginbuild(PG_FUNCTION_ARGS)
|
||||
if (RelationNeedsWAL(index))
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata;
|
||||
Page page;
|
||||
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.data = (char *) &(index->rd_node);
|
||||
rdata.len = sizeof(RelFileNode);
|
||||
rdata.next = NULL;
|
||||
XLogBeginInsert();
|
||||
XLogRegisterBuffer(0, MetaBuffer, REGBUF_WILL_INIT);
|
||||
XLogRegisterBuffer(1, RootBuffer, REGBUF_WILL_INIT);
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX, &rdata);
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_CREATE_INDEX);
|
||||
|
||||
page = BufferGetPage(RootBuffer);
|
||||
PageSetLSN(page, recptr);
|
||||
|
||||
@@ -605,19 +605,17 @@ ginUpdateStats(Relation index, const GinStatsData *stats)
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
ginxlogUpdateMeta data;
|
||||
XLogRecData rdata;
|
||||
|
||||
data.node = index->rd_node;
|
||||
data.ntuples = 0;
|
||||
data.newRightlink = data.prevTail = InvalidBlockNumber;
|
||||
memcpy(&data.metadata, metadata, sizeof(GinMetaPageData));
|
||||
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.data = (char *) &data;
|
||||
rdata.len = sizeof(ginxlogUpdateMeta);
|
||||
rdata.next = NULL;
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &data, sizeof(ginxlogUpdateMeta));
|
||||
XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE, &rdata);
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_UPDATE_META_PAGE);
|
||||
PageSetLSN(metapage, recptr);
|
||||
}
|
||||
|
||||
|
||||
@@ -89,10 +89,6 @@ xlogVacuumPage(Relation index, Buffer buffer)
|
||||
{
|
||||
Page page = BufferGetPage(buffer);
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[3];
|
||||
ginxlogVacuumPage xlrec;
|
||||
uint16 lower;
|
||||
uint16 upper;
|
||||
|
||||
/* This is only used for entry tree leaf pages. */
|
||||
Assert(!GinPageIsData(page));
|
||||
@@ -101,57 +97,14 @@ xlogVacuumPage(Relation index, Buffer buffer)
|
||||
if (!RelationNeedsWAL(index))
|
||||
return;
|
||||
|
||||
xlrec.node = index->rd_node;
|
||||
xlrec.blkno = BufferGetBlockNumber(buffer);
|
||||
/*
|
||||
* Always create a full image, we don't track the changes on the page at
|
||||
* any more fine-grained level. This could obviously be improved...
|
||||
*/
|
||||
XLogBeginInsert();
|
||||
XLogRegisterBuffer(0, buffer, REGBUF_FORCE_IMAGE | REGBUF_STANDARD);
|
||||
|
||||
/* Assume we can omit data between pd_lower and pd_upper */
|
||||
lower = ((PageHeader) page)->pd_lower;
|
||||
upper = ((PageHeader) page)->pd_upper;
|
||||
|
||||
Assert(lower < BLCKSZ);
|
||||
Assert(upper < BLCKSZ);
|
||||
|
||||
if (lower >= SizeOfPageHeaderData &&
|
||||
upper > lower &&
|
||||
upper <= BLCKSZ)
|
||||
{
|
||||
xlrec.hole_offset = lower;
|
||||
xlrec.hole_length = upper - lower;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* No "hole" to compress out */
|
||||
xlrec.hole_offset = 0;
|
||||
xlrec.hole_length = 0;
|
||||
}
|
||||
|
||||
rdata[0].data = (char *) &xlrec;
|
||||
rdata[0].len = sizeof(ginxlogVacuumPage);
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].next = &rdata[1];
|
||||
|
||||
if (xlrec.hole_length == 0)
|
||||
{
|
||||
rdata[1].data = (char *) page;
|
||||
rdata[1].len = BLCKSZ;
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
rdata[1].next = NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* must skip the hole */
|
||||
rdata[1].data = (char *) page;
|
||||
rdata[1].len = xlrec.hole_offset;
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
rdata[1].next = &rdata[2];
|
||||
|
||||
rdata[2].data = (char *) page + (xlrec.hole_offset + xlrec.hole_length);
|
||||
rdata[2].len = BLCKSZ - (xlrec.hole_offset + xlrec.hole_length);
|
||||
rdata[2].buffer = InvalidBuffer;
|
||||
rdata[2].next = NULL;
|
||||
}
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE, rdata);
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_VACUUM_PAGE);
|
||||
PageSetLSN(page, recptr);
|
||||
}
|
||||
|
||||
@@ -292,48 +245,27 @@ ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkn
|
||||
if (RelationNeedsWAL(gvs->index))
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[4];
|
||||
ginxlogDeletePage data;
|
||||
|
||||
data.node = gvs->index->rd_node;
|
||||
data.blkno = deleteBlkno;
|
||||
data.parentBlkno = parentBlkno;
|
||||
/*
|
||||
* We can't pass REGBUF_STANDARD for the deleted page, because we
|
||||
* didn't set pd_lower on pre-9.4 versions. The page might've been
|
||||
* binary-upgraded from an older version, and hence not have pd_lower
|
||||
* set correctly. Ditto for the left page, but removing the item from
|
||||
* the parent updated its pd_lower, so we know that's OK at this
|
||||
* point.
|
||||
*/
|
||||
XLogBeginInsert();
|
||||
XLogRegisterBuffer(0, dBuffer, 0);
|
||||
XLogRegisterBuffer(1, pBuffer, REGBUF_STANDARD);
|
||||
XLogRegisterBuffer(2, lBuffer, 0);
|
||||
|
||||
data.parentOffset = myoff;
|
||||
data.leftBlkno = leftBlkno;
|
||||
data.rightLink = GinPageGetOpaque(page)->rightlink;
|
||||
|
||||
/*
|
||||
* We can't pass buffer_std = TRUE, because we didn't set pd_lower on
|
||||
* pre-9.4 versions. The page might've been binary-upgraded from an
|
||||
* older version, and hence not have pd_lower set correctly. Ditto for
|
||||
* the left page, but removing the item from the parent updated its
|
||||
* pd_lower, so we know that's OK at this point.
|
||||
*/
|
||||
rdata[0].buffer = dBuffer;
|
||||
rdata[0].buffer_std = FALSE;
|
||||
rdata[0].data = NULL;
|
||||
rdata[0].len = 0;
|
||||
rdata[0].next = rdata + 1;
|
||||
XLogRegisterData((char *) &data, sizeof(ginxlogDeletePage));
|
||||
|
||||
rdata[1].buffer = pBuffer;
|
||||
rdata[1].buffer_std = TRUE;
|
||||
rdata[1].data = NULL;
|
||||
rdata[1].len = 0;
|
||||
rdata[1].next = rdata + 2;
|
||||
|
||||
rdata[2].buffer = lBuffer;
|
||||
rdata[2].buffer_std = FALSE;
|
||||
rdata[2].data = NULL;
|
||||
rdata[2].len = 0;
|
||||
rdata[2].next = rdata + 3;
|
||||
|
||||
rdata[3].buffer = InvalidBuffer;
|
||||
rdata[3].buffer_std = FALSE;
|
||||
rdata[3].len = sizeof(ginxlogDeletePage);
|
||||
rdata[3].data = (char *) &data;
|
||||
rdata[3].next = NULL;
|
||||
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE, rdata);
|
||||
recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE);
|
||||
PageSetLSN(page, recptr);
|
||||
PageSetLSN(parentPage, recptr);
|
||||
PageSetLSN(BufferGetPage(lBuffer), recptr);
|
||||
|
||||
@@ -20,18 +20,15 @@
|
||||
static MemoryContext opCtx; /* working memory for operations */
|
||||
|
||||
static void
|
||||
ginRedoClearIncompleteSplit(XLogRecPtr lsn, XLogRecord *record,
|
||||
int block_index,
|
||||
RelFileNode node, BlockNumber blkno)
|
||||
ginRedoClearIncompleteSplit(XLogReaderState *record, uint8 block_id)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
|
||||
if (XLogReadBufferForRedo(lsn, record, block_index, node, blkno, &buffer)
|
||||
== BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, block_id, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
GinPageGetOpaque(page)->flags &= ~GIN_INCOMPLETE_SPLIT;
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
@@ -42,18 +39,15 @@ ginRedoClearIncompleteSplit(XLogRecPtr lsn, XLogRecord *record,
|
||||
}
|
||||
|
||||
static void
|
||||
ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
|
||||
ginRedoCreateIndex(XLogReaderState *record)
|
||||
{
|
||||
RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
Buffer RootBuffer,
|
||||
MetaBuffer;
|
||||
Page page;
|
||||
|
||||
/* Backup blocks are not used in create_index records */
|
||||
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
|
||||
|
||||
MetaBuffer = XLogReadBuffer(*node, GIN_METAPAGE_BLKNO, true);
|
||||
Assert(BufferIsValid(MetaBuffer));
|
||||
MetaBuffer = XLogInitBufferForRedo(record, 0);
|
||||
Assert(BufferGetBlockNumber(MetaBuffer) == GIN_METAPAGE_BLKNO);
|
||||
page = (Page) BufferGetPage(MetaBuffer);
|
||||
|
||||
GinInitMetabuffer(MetaBuffer);
|
||||
@@ -61,8 +55,8 @@ ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(MetaBuffer);
|
||||
|
||||
RootBuffer = XLogReadBuffer(*node, GIN_ROOT_BLKNO, true);
|
||||
Assert(BufferIsValid(RootBuffer));
|
||||
RootBuffer = XLogInitBufferForRedo(record, 1);
|
||||
Assert(BufferGetBlockNumber(RootBuffer) == GIN_ROOT_BLKNO);
|
||||
page = (Page) BufferGetPage(RootBuffer);
|
||||
|
||||
GinInitBuffer(RootBuffer, GIN_LEAF);
|
||||
@@ -75,18 +69,15 @@ ginRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
static void
|
||||
ginRedoCreatePTree(XLogRecPtr lsn, XLogRecord *record)
|
||||
ginRedoCreatePTree(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
ginxlogCreatePostingTree *data = (ginxlogCreatePostingTree *) XLogRecGetData(record);
|
||||
char *ptr;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
|
||||
/* Backup blocks are not used in create_ptree records */
|
||||
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
|
||||
|
||||
buffer = XLogReadBuffer(data->node, data->blkno, true);
|
||||
Assert(BufferIsValid(buffer));
|
||||
buffer = XLogInitBufferForRedo(record, 0);
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
GinInitBuffer(buffer, GIN_DATA | GIN_LEAF | GIN_COMPRESSED);
|
||||
@@ -328,35 +319,40 @@ ginRedoInsertData(Buffer buffer, bool isLeaf, BlockNumber rightblkno, void *rdat
|
||||
}
|
||||
|
||||
static void
|
||||
ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
|
||||
ginRedoInsert(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
ginxlogInsert *data = (ginxlogInsert *) XLogRecGetData(record);
|
||||
Buffer buffer;
|
||||
char *payload;
|
||||
#ifdef NOT_USED
|
||||
BlockNumber leftChildBlkno = InvalidBlockNumber;
|
||||
#endif
|
||||
BlockNumber rightChildBlkno = InvalidBlockNumber;
|
||||
bool isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
|
||||
|
||||
payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
|
||||
|
||||
/*
|
||||
* First clear incomplete-split flag on child page if this finishes a
|
||||
* split.
|
||||
*/
|
||||
if (!isLeaf)
|
||||
{
|
||||
char *payload = XLogRecGetData(record) + sizeof(ginxlogInsert);
|
||||
|
||||
#ifdef NOT_USED
|
||||
leftChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
|
||||
#endif
|
||||
payload += sizeof(BlockIdData);
|
||||
rightChildBlkno = BlockIdGetBlockNumber((BlockId) payload);
|
||||
payload += sizeof(BlockIdData);
|
||||
|
||||
ginRedoClearIncompleteSplit(lsn, record, 0, data->node, leftChildBlkno);
|
||||
ginRedoClearIncompleteSplit(record, 1);
|
||||
}
|
||||
|
||||
if (XLogReadBufferForRedo(lsn, record, isLeaf ? 0 : 1, data->node,
|
||||
data->blkno, &buffer) == BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
Page page = BufferGetPage(buffer);
|
||||
Size len;
|
||||
char *payload = XLogRecGetBlockData(record, 0, &len);
|
||||
|
||||
/* How to insert the payload is tree-type specific */
|
||||
if (data->flags & GIN_INSERT_ISDATA)
|
||||
@@ -378,161 +374,33 @@ ginRedoInsert(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
static void
|
||||
ginRedoSplitEntry(Page lpage, Page rpage, void *rdata)
|
||||
{
|
||||
ginxlogSplitEntry *data = (ginxlogSplitEntry *) rdata;
|
||||
IndexTuple itup = (IndexTuple) ((char *) rdata + sizeof(ginxlogSplitEntry));
|
||||
OffsetNumber i;
|
||||
|
||||
for (i = 0; i < data->separator; i++)
|
||||
{
|
||||
if (PageAddItem(lpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
|
||||
elog(ERROR, "failed to add item to gin index page");
|
||||
itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
|
||||
}
|
||||
|
||||
for (i = data->separator; i < data->nitem; i++)
|
||||
{
|
||||
if (PageAddItem(rpage, (Item) itup, IndexTupleSize(itup), InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
|
||||
elog(ERROR, "failed to add item to gin index page");
|
||||
itup = (IndexTuple) (((char *) itup) + MAXALIGN(IndexTupleSize(itup)));
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ginRedoSplitData(Page lpage, Page rpage, void *rdata)
|
||||
{
|
||||
bool isleaf = GinPageIsLeaf(lpage);
|
||||
|
||||
if (isleaf)
|
||||
{
|
||||
ginxlogSplitDataLeaf *data = (ginxlogSplitDataLeaf *) rdata;
|
||||
Pointer lptr = (Pointer) rdata + sizeof(ginxlogSplitDataLeaf);
|
||||
Pointer rptr = lptr + data->lsize;
|
||||
|
||||
Assert(data->lsize > 0 && data->lsize <= GinDataPageMaxDataSize);
|
||||
Assert(data->rsize > 0 && data->rsize <= GinDataPageMaxDataSize);
|
||||
|
||||
memcpy(GinDataLeafPageGetPostingList(lpage), lptr, data->lsize);
|
||||
memcpy(GinDataLeafPageGetPostingList(rpage), rptr, data->rsize);
|
||||
|
||||
GinDataPageSetDataSize(lpage, data->lsize);
|
||||
GinDataPageSetDataSize(rpage, data->rsize);
|
||||
*GinDataPageGetRightBound(lpage) = data->lrightbound;
|
||||
*GinDataPageGetRightBound(rpage) = data->rrightbound;
|
||||
}
|
||||
else
|
||||
{
|
||||
ginxlogSplitDataInternal *data = (ginxlogSplitDataInternal *) rdata;
|
||||
PostingItem *items = (PostingItem *) ((char *) rdata + sizeof(ginxlogSplitDataInternal));
|
||||
OffsetNumber i;
|
||||
OffsetNumber maxoff;
|
||||
|
||||
for (i = 0; i < data->separator; i++)
|
||||
GinDataPageAddPostingItem(lpage, &items[i], InvalidOffsetNumber);
|
||||
for (i = data->separator; i < data->nitem; i++)
|
||||
GinDataPageAddPostingItem(rpage, &items[i], InvalidOffsetNumber);
|
||||
|
||||
/* set up right key */
|
||||
maxoff = GinPageGetOpaque(lpage)->maxoff;
|
||||
*GinDataPageGetRightBound(lpage) = GinDataPageGetPostingItem(lpage, maxoff)->key;
|
||||
*GinDataPageGetRightBound(rpage) = data->rightbound;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
ginRedoSplit(XLogReaderState *record)
|
||||
{
|
||||
ginxlogSplit *data = (ginxlogSplit *) XLogRecGetData(record);
|
||||
Buffer lbuffer,
|
||||
rbuffer;
|
||||
Page lpage,
|
||||
rpage;
|
||||
uint32 flags;
|
||||
uint32 lflags,
|
||||
rflags;
|
||||
char *payload;
|
||||
rbuffer,
|
||||
rootbuf;
|
||||
bool isLeaf = (data->flags & GIN_INSERT_ISLEAF) != 0;
|
||||
bool isData = (data->flags & GIN_INSERT_ISDATA) != 0;
|
||||
bool isRoot = (data->flags & GIN_SPLIT_ROOT) != 0;
|
||||
|
||||
payload = XLogRecGetData(record) + sizeof(ginxlogSplit);
|
||||
|
||||
/*
|
||||
* First clear incomplete-split flag on child page if this finishes a
|
||||
* split
|
||||
*/
|
||||
if (!isLeaf)
|
||||
ginRedoClearIncompleteSplit(lsn, record, 0, data->node, data->leftChildBlkno);
|
||||
ginRedoClearIncompleteSplit(record, 3);
|
||||
|
||||
flags = 0;
|
||||
if (isLeaf)
|
||||
flags |= GIN_LEAF;
|
||||
if (isData)
|
||||
flags |= GIN_DATA;
|
||||
if (isLeaf && isData)
|
||||
flags |= GIN_COMPRESSED;
|
||||
if (XLogReadBufferForRedo(record, 0, &lbuffer) != BLK_RESTORED)
|
||||
elog(ERROR, "GIN split record did not contain a full-page image of left page");
|
||||
|
||||
lflags = rflags = flags;
|
||||
if (!isRoot)
|
||||
lflags |= GIN_INCOMPLETE_SPLIT;
|
||||
|
||||
lbuffer = XLogReadBuffer(data->node, data->lblkno, true);
|
||||
Assert(BufferIsValid(lbuffer));
|
||||
lpage = (Page) BufferGetPage(lbuffer);
|
||||
GinInitBuffer(lbuffer, lflags);
|
||||
|
||||
rbuffer = XLogReadBuffer(data->node, data->rblkno, true);
|
||||
Assert(BufferIsValid(rbuffer));
|
||||
rpage = (Page) BufferGetPage(rbuffer);
|
||||
GinInitBuffer(rbuffer, rflags);
|
||||
|
||||
GinPageGetOpaque(lpage)->rightlink = BufferGetBlockNumber(rbuffer);
|
||||
GinPageGetOpaque(rpage)->rightlink = isRoot ? InvalidBlockNumber : data->rrlink;
|
||||
|
||||
/* Do the tree-type specific portion to restore the page contents */
|
||||
if (isData)
|
||||
ginRedoSplitData(lpage, rpage, payload);
|
||||
else
|
||||
ginRedoSplitEntry(lpage, rpage, payload);
|
||||
|
||||
PageSetLSN(rpage, lsn);
|
||||
MarkBufferDirty(rbuffer);
|
||||
|
||||
PageSetLSN(lpage, lsn);
|
||||
MarkBufferDirty(lbuffer);
|
||||
if (XLogReadBufferForRedo(record, 1, &rbuffer) != BLK_RESTORED)
|
||||
elog(ERROR, "GIN split record did not contain a full-page image of right page");
|
||||
|
||||
if (isRoot)
|
||||
{
|
||||
BlockNumber rootBlkno = data->rrlink;
|
||||
Buffer rootBuf = XLogReadBuffer(data->node, rootBlkno, true);
|
||||
Page rootPage = BufferGetPage(rootBuf);
|
||||
|
||||
GinInitBuffer(rootBuf, flags & ~GIN_LEAF & ~GIN_COMPRESSED);
|
||||
|
||||
if (isData)
|
||||
{
|
||||
Assert(rootBlkno != GIN_ROOT_BLKNO);
|
||||
ginDataFillRoot(NULL, BufferGetPage(rootBuf),
|
||||
BufferGetBlockNumber(lbuffer),
|
||||
BufferGetPage(lbuffer),
|
||||
BufferGetBlockNumber(rbuffer),
|
||||
BufferGetPage(rbuffer));
|
||||
}
|
||||
else
|
||||
{
|
||||
Assert(rootBlkno == GIN_ROOT_BLKNO);
|
||||
ginEntryFillRoot(NULL, BufferGetPage(rootBuf),
|
||||
BufferGetBlockNumber(lbuffer),
|
||||
BufferGetPage(lbuffer),
|
||||
BufferGetBlockNumber(rbuffer),
|
||||
BufferGetPage(rbuffer));
|
||||
}
|
||||
|
||||
PageSetLSN(rootPage, lsn);
|
||||
|
||||
MarkBufferDirty(rootBuf);
|
||||
UnlockReleaseBuffer(rootBuf);
|
||||
if (XLogReadBufferForRedo(record, 2, &rootbuf) != BLK_RESTORED)
|
||||
elog(ERROR, "GIN split record did not contain a full-page image of root page");
|
||||
UnlockReleaseBuffer(rootbuf);
|
||||
}
|
||||
|
||||
UnlockReleaseBuffer(rbuffer);
|
||||
@@ -544,54 +412,30 @@ ginRedoSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
* a XLOG_FPI record.
|
||||
*/
|
||||
static void
|
||||
ginRedoVacuumPage(XLogRecPtr lsn, XLogRecord *record)
|
||||
ginRedoVacuumPage(XLogReaderState *record)
|
||||
{
|
||||
ginxlogVacuumPage *xlrec = (ginxlogVacuumPage *) XLogRecGetData(record);
|
||||
char *blk = ((char *) xlrec) + sizeof(ginxlogVacuumPage);
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
|
||||
Assert(xlrec->hole_offset < BLCKSZ);
|
||||
Assert(xlrec->hole_length < BLCKSZ);
|
||||
|
||||
/* Backup blocks are not used, we'll re-initialize the page always. */
|
||||
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
|
||||
|
||||
buffer = XLogReadBuffer(xlrec->node, xlrec->blkno, true);
|
||||
if (!BufferIsValid(buffer))
|
||||
return;
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
if (xlrec->hole_length == 0)
|
||||
if (XLogReadBufferForRedo(record, 0, &buffer) != BLK_RESTORED)
|
||||
{
|
||||
memcpy((char *) page, blk, BLCKSZ);
|
||||
elog(ERROR, "replay of gin entry tree page vacuum did not restore the page");
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy((char *) page, blk, xlrec->hole_offset);
|
||||
/* must zero-fill the hole */
|
||||
MemSet((char *) page + xlrec->hole_offset, 0, xlrec->hole_length);
|
||||
memcpy((char *) page + (xlrec->hole_offset + xlrec->hole_length),
|
||||
blk + xlrec->hole_offset,
|
||||
BLCKSZ - (xlrec->hole_offset + xlrec->hole_length));
|
||||
}
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
|
||||
MarkBufferDirty(buffer);
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
|
||||
static void
|
||||
ginRedoVacuumDataLeafPage(XLogRecPtr lsn, XLogRecord *record)
|
||||
ginRedoVacuumDataLeafPage(XLogReaderState *record)
|
||||
{
|
||||
ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetData(record);
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
Buffer buffer;
|
||||
|
||||
if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->blkno,
|
||||
&buffer) == BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
Page page = BufferGetPage(buffer);
|
||||
Size len;
|
||||
ginxlogVacuumDataLeafPage *xlrec;
|
||||
|
||||
xlrec = (ginxlogVacuumDataLeafPage *) XLogRecGetBlockData(record, 0, &len);
|
||||
|
||||
Assert(GinPageIsLeaf(page));
|
||||
Assert(GinPageIsData(page));
|
||||
@@ -605,30 +449,27 @@ ginRedoVacuumDataLeafPage(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
static void
|
||||
ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
|
||||
ginRedoDeletePage(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
ginxlogDeletePage *data = (ginxlogDeletePage *) XLogRecGetData(record);
|
||||
Buffer dbuffer;
|
||||
Buffer pbuffer;
|
||||
Buffer lbuffer;
|
||||
Page page;
|
||||
|
||||
if (XLogReadBufferForRedo(lsn, record, 0, data->node, data->blkno, &dbuffer)
|
||||
== BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 0, &dbuffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = BufferGetPage(dbuffer);
|
||||
|
||||
Assert(GinPageIsData(page));
|
||||
GinPageGetOpaque(page)->flags = GIN_DELETED;
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(dbuffer);
|
||||
}
|
||||
|
||||
if (XLogReadBufferForRedo(lsn, record, 1, data->node, data->parentBlkno,
|
||||
&pbuffer) == BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 1, &pbuffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = BufferGetPage(pbuffer);
|
||||
|
||||
Assert(GinPageIsData(page));
|
||||
Assert(!GinPageIsLeaf(page));
|
||||
GinPageDeletePostingItem(page, data->parentOffset);
|
||||
@@ -636,11 +477,9 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
|
||||
MarkBufferDirty(pbuffer);
|
||||
}
|
||||
|
||||
if (XLogReadBufferForRedo(lsn, record, 2, data->node, data->leftBlkno,
|
||||
&lbuffer) == BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 2, &lbuffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = BufferGetPage(lbuffer);
|
||||
|
||||
Assert(GinPageIsData(page));
|
||||
GinPageGetOpaque(page)->rightlink = data->rightLink;
|
||||
PageSetLSN(page, lsn);
|
||||
@@ -656,8 +495,9 @@ ginRedoDeletePage(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
static void
|
||||
ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
|
||||
ginRedoUpdateMetapage(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
ginxlogUpdateMeta *data = (ginxlogUpdateMeta *) XLogRecGetData(record);
|
||||
Buffer metabuffer;
|
||||
Page metapage;
|
||||
@@ -668,9 +508,8 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
|
||||
* image, so restore the metapage unconditionally without looking at the
|
||||
* LSN, to avoid torn page hazards.
|
||||
*/
|
||||
metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
|
||||
if (!BufferIsValid(metabuffer))
|
||||
return; /* assume index was deleted, nothing to do */
|
||||
metabuffer = XLogInitBufferForRedo(record, 0);
|
||||
Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
|
||||
metapage = BufferGetPage(metabuffer);
|
||||
|
||||
memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
|
||||
@@ -682,17 +521,18 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
|
||||
/*
|
||||
* insert into tail page
|
||||
*/
|
||||
if (XLogReadBufferForRedo(lsn, record, 0, data->node,
|
||||
data->metadata.tail, &buffer)
|
||||
== BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
Page page = BufferGetPage(buffer);
|
||||
OffsetNumber off;
|
||||
int i;
|
||||
Size tupsize;
|
||||
char *payload;
|
||||
IndexTuple tuples;
|
||||
Size totaltupsize;
|
||||
|
||||
tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogUpdateMeta));
|
||||
payload = XLogRecGetBlockData(record, 1, &totaltupsize);
|
||||
tuples = (IndexTuple) payload;
|
||||
|
||||
if (PageIsEmpty(page))
|
||||
off = FirstOffsetNumber;
|
||||
@@ -711,6 +551,7 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
|
||||
|
||||
off++;
|
||||
}
|
||||
Assert(payload + totaltupsize == (char *) tuples);
|
||||
|
||||
/*
|
||||
* Increase counter of heap tuples
|
||||
@@ -728,8 +569,7 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
|
||||
/*
|
||||
* New tail
|
||||
*/
|
||||
if (XLogReadBufferForRedo(lsn, record, 0, data->node, data->prevTail,
|
||||
&buffer) == BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
Page page = BufferGetPage(buffer);
|
||||
|
||||
@@ -746,8 +586,9 @@ ginRedoUpdateMetapage(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
static void
|
||||
ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
|
||||
ginRedoInsertListPage(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
ginxlogInsertListPage *data = (ginxlogInsertListPage *) XLogRecGetData(record);
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
@@ -755,15 +596,12 @@ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
|
||||
off = FirstOffsetNumber;
|
||||
int i,
|
||||
tupsize;
|
||||
IndexTuple tuples = (IndexTuple) (XLogRecGetData(record) + sizeof(ginxlogInsertListPage));
|
||||
char *payload;
|
||||
IndexTuple tuples;
|
||||
Size totaltupsize;
|
||||
|
||||
/*
|
||||
* Backup blocks are not used, we always re-initialize the page.
|
||||
*/
|
||||
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
|
||||
|
||||
buffer = XLogReadBuffer(data->node, data->blkno, true);
|
||||
Assert(BufferIsValid(buffer));
|
||||
/* We always re-initialize the page. */
|
||||
buffer = XLogInitBufferForRedo(record, 0);
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
GinInitBuffer(buffer, GIN_LIST);
|
||||
@@ -779,6 +617,9 @@ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
|
||||
GinPageGetOpaque(page)->maxoff = 0;
|
||||
}
|
||||
|
||||
payload = XLogRecGetBlockData(record, 0, &totaltupsize);
|
||||
|
||||
tuples = (IndexTuple) payload;
|
||||
for (i = 0; i < data->ntuples; i++)
|
||||
{
|
||||
tupsize = IndexTupleSize(tuples);
|
||||
@@ -791,6 +632,7 @@ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
|
||||
tuples = (IndexTuple) (((char *) tuples) + tupsize);
|
||||
off++;
|
||||
}
|
||||
Assert((char *) tuples == payload + totaltupsize);
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(buffer);
|
||||
@@ -799,21 +641,20 @@ ginRedoInsertListPage(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
static void
|
||||
ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
|
||||
ginRedoDeleteListPages(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
ginxlogDeleteListPages *data = (ginxlogDeleteListPages *) XLogRecGetData(record);
|
||||
Buffer metabuffer;
|
||||
Page metapage;
|
||||
int i;
|
||||
|
||||
/* Backup blocks are not used in delete_listpage records */
|
||||
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
|
||||
|
||||
metabuffer = XLogReadBuffer(data->node, GIN_METAPAGE_BLKNO, false);
|
||||
if (!BufferIsValid(metabuffer))
|
||||
return; /* assume index was deleted, nothing to do */
|
||||
metabuffer = XLogInitBufferForRedo(record, 0);
|
||||
Assert(BufferGetBlockNumber(metabuffer) == GIN_METAPAGE_BLKNO);
|
||||
metapage = BufferGetPage(metabuffer);
|
||||
|
||||
GinInitPage(metapage, GIN_META, BufferGetPageSize(metabuffer));
|
||||
|
||||
memcpy(GinPageGetMeta(metapage), &data->metadata, sizeof(GinMetaPageData));
|
||||
PageSetLSN(metapage, lsn);
|
||||
MarkBufferDirty(metabuffer);
|
||||
@@ -838,7 +679,7 @@ ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
|
||||
buffer = XLogReadBuffer(data->node, data->toDelete[i], true);
|
||||
buffer = XLogInitBufferForRedo(record, i + 1);
|
||||
page = BufferGetPage(buffer);
|
||||
GinInitBuffer(buffer, GIN_DELETED);
|
||||
|
||||
@@ -851,9 +692,9 @@ ginRedoDeleteListPages(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
void
|
||||
gin_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
gin_redo(XLogReaderState *record)
|
||||
{
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
MemoryContext oldCtx;
|
||||
|
||||
/*
|
||||
@@ -866,34 +707,34 @@ gin_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
switch (info)
|
||||
{
|
||||
case XLOG_GIN_CREATE_INDEX:
|
||||
ginRedoCreateIndex(lsn, record);
|
||||
ginRedoCreateIndex(record);
|
||||
break;
|
||||
case XLOG_GIN_CREATE_PTREE:
|
||||
ginRedoCreatePTree(lsn, record);
|
||||
ginRedoCreatePTree(record);
|
||||
break;
|
||||
case XLOG_GIN_INSERT:
|
||||
ginRedoInsert(lsn, record);
|
||||
ginRedoInsert(record);
|
||||
break;
|
||||
case XLOG_GIN_SPLIT:
|
||||
ginRedoSplit(lsn, record);
|
||||
ginRedoSplit(record);
|
||||
break;
|
||||
case XLOG_GIN_VACUUM_PAGE:
|
||||
ginRedoVacuumPage(lsn, record);
|
||||
ginRedoVacuumPage(record);
|
||||
break;
|
||||
case XLOG_GIN_VACUUM_DATA_LEAF_PAGE:
|
||||
ginRedoVacuumDataLeafPage(lsn, record);
|
||||
ginRedoVacuumDataLeafPage(record);
|
||||
break;
|
||||
case XLOG_GIN_DELETE_PAGE:
|
||||
ginRedoDeletePage(lsn, record);
|
||||
ginRedoDeletePage(record);
|
||||
break;
|
||||
case XLOG_GIN_UPDATE_META_PAGE:
|
||||
ginRedoUpdateMetapage(lsn, record);
|
||||
ginRedoUpdateMetapage(record);
|
||||
break;
|
||||
case XLOG_GIN_INSERT_LISTPAGE:
|
||||
ginRedoInsertListPage(lsn, record);
|
||||
ginRedoInsertListPage(record);
|
||||
break;
|
||||
case XLOG_GIN_DELETE_LISTPAGE:
|
||||
ginRedoDeleteListPages(lsn, record);
|
||||
ginRedoDeleteListPages(record);
|
||||
break;
|
||||
default:
|
||||
elog(PANIC, "gin_redo: unknown op code %u", info);
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
|
||||
#include "access/genam.h"
|
||||
#include "access/gist_private.h"
|
||||
#include "access/xloginsert.h"
|
||||
#include "catalog/index.h"
|
||||
#include "catalog/pg_collation.h"
|
||||
#include "miscadmin.h"
|
||||
@@ -394,6 +395,14 @@ gistplacetopage(Relation rel, Size freespace, GISTSTATE *giststate,
|
||||
GistPageSetNSN(ptr->page, oldnsn);
|
||||
}
|
||||
|
||||
/*
|
||||
* gistXLogSplit() needs to WAL log a lot of pages, prepare WAL
|
||||
* insertion for that. NB: The number of pages and data segments
|
||||
* specified here must match the calculations in gistXLogSplit()!
|
||||
*/
|
||||
if (RelationNeedsWAL(rel))
|
||||
XLogEnsureRecordSpace(npage, 1 + npage * 2);
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
/*
|
||||
|
||||
@@ -183,14 +183,11 @@ gistbuild(PG_FUNCTION_ARGS)
|
||||
if (RelationNeedsWAL(index))
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata;
|
||||
|
||||
rdata.data = (char *) &(index->rd_node);
|
||||
rdata.len = sizeof(RelFileNode);
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.next = NULL;
|
||||
XLogBeginInsert();
|
||||
XLogRegisterBuffer(0, buffer, REGBUF_WILL_INIT);
|
||||
|
||||
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX, &rdata);
|
||||
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_CREATE_INDEX);
|
||||
PageSetLSN(page, recptr);
|
||||
}
|
||||
else
|
||||
|
||||
@@ -18,18 +18,6 @@
|
||||
#include "access/xlogutils.h"
|
||||
#include "utils/memutils.h"
|
||||
|
||||
typedef struct
|
||||
{
|
||||
gistxlogPage *header;
|
||||
IndexTuple *itup;
|
||||
} NewPage;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
gistxlogPageSplit *data;
|
||||
NewPage *page;
|
||||
} PageSplitRecord;
|
||||
|
||||
static MemoryContext opCtx; /* working memory for operations */
|
||||
|
||||
/*
|
||||
@@ -44,9 +32,9 @@ static MemoryContext opCtx; /* working memory for operations */
|
||||
* action.)
|
||||
*/
|
||||
static void
|
||||
gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index,
|
||||
RelFileNode node, BlockNumber childblkno)
|
||||
gistRedoClearFollowRight(XLogReaderState *record, uint8 block_id)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
XLogRedoAction action;
|
||||
@@ -55,8 +43,7 @@ gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index,
|
||||
* Note that we still update the page even if it was restored from a full
|
||||
* page image, because the updated NSN is not included in the image.
|
||||
*/
|
||||
action = XLogReadBufferForRedo(lsn, record, block_index, node, childblkno,
|
||||
&buffer);
|
||||
action = XLogReadBufferForRedo(record, block_id, &buffer);
|
||||
if (action == BLK_NEEDS_REDO || action == BLK_RESTORED)
|
||||
{
|
||||
page = BufferGetPage(buffer);
|
||||
@@ -75,20 +62,23 @@ gistRedoClearFollowRight(XLogRecPtr lsn, XLogRecord *record, int block_index,
|
||||
* redo any page update (except page split)
|
||||
*/
|
||||
static void
|
||||
gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
|
||||
gistRedoPageUpdateRecord(XLogReaderState *record)
|
||||
{
|
||||
char *begin = XLogRecGetData(record);
|
||||
gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) begin;
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
gistxlogPageUpdate *xldata = (gistxlogPageUpdate *) XLogRecGetData(record);
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
char *data;
|
||||
|
||||
if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
|
||||
&buffer) == BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
char *begin;
|
||||
char *data;
|
||||
Size datalen;
|
||||
int ninserted = 0;
|
||||
|
||||
data = begin + sizeof(gistxlogPageUpdate);
|
||||
data = begin = XLogRecGetBlockData(record, 0, &datalen);
|
||||
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
/* Delete old tuples */
|
||||
if (xldata->ntodelete > 0)
|
||||
@@ -105,12 +95,12 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
/* add tuples */
|
||||
if (data - begin < record->xl_len)
|
||||
if (data - begin < datalen)
|
||||
{
|
||||
OffsetNumber off = (PageIsEmpty(page)) ? FirstOffsetNumber :
|
||||
OffsetNumberNext(PageGetMaxOffsetNumber(page));
|
||||
|
||||
while (data - begin < record->xl_len)
|
||||
while (data - begin < datalen)
|
||||
{
|
||||
IndexTuple itup = (IndexTuple) data;
|
||||
Size sz = IndexTupleSize(itup);
|
||||
@@ -123,9 +113,12 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
|
||||
elog(ERROR, "failed to add item to GiST index page, size %d bytes",
|
||||
(int) sz);
|
||||
off++;
|
||||
ninserted++;
|
||||
}
|
||||
}
|
||||
|
||||
Assert(ninserted == xldata->ntoinsert);
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(buffer);
|
||||
}
|
||||
@@ -137,58 +130,51 @@ gistRedoPageUpdateRecord(XLogRecPtr lsn, XLogRecord *record)
|
||||
* that even if the target page no longer exists, we still attempt to
|
||||
* replay the change on the child page.
|
||||
*/
|
||||
if (BlockNumberIsValid(xldata->leftchild))
|
||||
gistRedoClearFollowRight(lsn, record, 1,
|
||||
xldata->node, xldata->leftchild);
|
||||
if (XLogRecHasBlockRef(record, 1))
|
||||
gistRedoClearFollowRight(record, 1);
|
||||
|
||||
if (BufferIsValid(buffer))
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
|
||||
static void
|
||||
decodePageSplitRecord(PageSplitRecord *decoded, XLogRecord *record)
|
||||
/*
|
||||
* Returns an array of index pointers.
|
||||
*/
|
||||
static IndexTuple *
|
||||
decodePageSplitRecord(char *begin, int len, int *n)
|
||||
{
|
||||
char *begin = XLogRecGetData(record),
|
||||
*ptr;
|
||||
int j,
|
||||
i = 0;
|
||||
char *ptr;
|
||||
int i = 0;
|
||||
IndexTuple *tuples;
|
||||
|
||||
decoded->data = (gistxlogPageSplit *) begin;
|
||||
decoded->page = (NewPage *) palloc(sizeof(NewPage) * decoded->data->npage);
|
||||
/* extract the number of tuples */
|
||||
memcpy(n, begin, sizeof(int));
|
||||
ptr = begin + sizeof(int);
|
||||
|
||||
ptr = begin + sizeof(gistxlogPageSplit);
|
||||
for (i = 0; i < decoded->data->npage; i++)
|
||||
tuples = palloc(*n * sizeof(IndexTuple));
|
||||
|
||||
for (i = 0; i < *n; i++)
|
||||
{
|
||||
Assert(ptr - begin < record->xl_len);
|
||||
decoded->page[i].header = (gistxlogPage *) ptr;
|
||||
ptr += sizeof(gistxlogPage);
|
||||
|
||||
decoded->page[i].itup = (IndexTuple *)
|
||||
palloc(sizeof(IndexTuple) * decoded->page[i].header->num);
|
||||
j = 0;
|
||||
while (j < decoded->page[i].header->num)
|
||||
{
|
||||
Assert(ptr - begin < record->xl_len);
|
||||
decoded->page[i].itup[j] = (IndexTuple) ptr;
|
||||
ptr += IndexTupleSize((IndexTuple) ptr);
|
||||
j++;
|
||||
}
|
||||
Assert(ptr - begin < len);
|
||||
tuples[i] = (IndexTuple) ptr;
|
||||
ptr += IndexTupleSize((IndexTuple) ptr);
|
||||
}
|
||||
Assert(ptr - begin == len);
|
||||
|
||||
return tuples;
|
||||
}
|
||||
|
||||
static void
|
||||
gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
|
||||
gistRedoPageSplitRecord(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
gistxlogPageSplit *xldata = (gistxlogPageSplit *) XLogRecGetData(record);
|
||||
PageSplitRecord xlrec;
|
||||
Buffer firstbuffer = InvalidBuffer;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
int i;
|
||||
bool isrootsplit = false;
|
||||
|
||||
decodePageSplitRecord(&xlrec, record);
|
||||
|
||||
/*
|
||||
* We must hold lock on the first-listed page throughout the action,
|
||||
* including while updating the left child page (if any). We can unlock
|
||||
@@ -198,32 +184,39 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
|
||||
*/
|
||||
|
||||
/* loop around all pages */
|
||||
for (i = 0; i < xlrec.data->npage; i++)
|
||||
for (i = 0; i < xldata->npage; i++)
|
||||
{
|
||||
NewPage *newpage = xlrec.page + i;
|
||||
int flags;
|
||||
char *data;
|
||||
Size datalen;
|
||||
int num;
|
||||
BlockNumber blkno;
|
||||
IndexTuple *tuples;
|
||||
|
||||
if (newpage->header->blkno == GIST_ROOT_BLKNO)
|
||||
XLogRecGetBlockTag(record, i + 1, NULL, NULL, &blkno);
|
||||
if (blkno == GIST_ROOT_BLKNO)
|
||||
{
|
||||
Assert(i == 0);
|
||||
isrootsplit = true;
|
||||
}
|
||||
|
||||
buffer = XLogReadBuffer(xlrec.data->node, newpage->header->blkno, true);
|
||||
Assert(BufferIsValid(buffer));
|
||||
buffer = XLogInitBufferForRedo(record, i + 1);
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
data = XLogRecGetBlockData(record, i + 1, &datalen);
|
||||
|
||||
tuples = decodePageSplitRecord(data, datalen, &num);
|
||||
|
||||
/* ok, clear buffer */
|
||||
if (xlrec.data->origleaf && newpage->header->blkno != GIST_ROOT_BLKNO)
|
||||
if (xldata->origleaf && blkno != GIST_ROOT_BLKNO)
|
||||
flags = F_LEAF;
|
||||
else
|
||||
flags = 0;
|
||||
GISTInitBuffer(buffer, flags);
|
||||
|
||||
/* and fill it */
|
||||
gistfillbuffer(page, newpage->itup, newpage->header->num, FirstOffsetNumber);
|
||||
gistfillbuffer(page, tuples, num, FirstOffsetNumber);
|
||||
|
||||
if (newpage->header->blkno == GIST_ROOT_BLKNO)
|
||||
if (blkno == GIST_ROOT_BLKNO)
|
||||
{
|
||||
GistPageGetOpaque(page)->rightlink = InvalidBlockNumber;
|
||||
GistPageSetNSN(page, xldata->orignsn);
|
||||
@@ -231,12 +224,17 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
else
|
||||
{
|
||||
if (i < xlrec.data->npage - 1)
|
||||
GistPageGetOpaque(page)->rightlink = xlrec.page[i + 1].header->blkno;
|
||||
if (i < xldata->npage - 1)
|
||||
{
|
||||
BlockNumber nextblkno;
|
||||
|
||||
XLogRecGetBlockTag(record, i + 2, NULL, NULL, &nextblkno);
|
||||
GistPageGetOpaque(page)->rightlink = nextblkno;
|
||||
}
|
||||
else
|
||||
GistPageGetOpaque(page)->rightlink = xldata->origrlink;
|
||||
GistPageSetNSN(page, xldata->orignsn);
|
||||
if (i < xlrec.data->npage - 1 && !isrootsplit &&
|
||||
if (i < xldata->npage - 1 && !isrootsplit &&
|
||||
xldata->markfollowright)
|
||||
GistMarkFollowRight(page);
|
||||
else
|
||||
@@ -253,26 +251,22 @@ gistRedoPageSplitRecord(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
/* Fix follow-right data on left child page, if any */
|
||||
if (BlockNumberIsValid(xldata->leftchild))
|
||||
gistRedoClearFollowRight(lsn, record, 0,
|
||||
xldata->node, xldata->leftchild);
|
||||
if (XLogRecHasBlockRef(record, 0))
|
||||
gistRedoClearFollowRight(record, 0);
|
||||
|
||||
/* Finally, release lock on the first page */
|
||||
UnlockReleaseBuffer(firstbuffer);
|
||||
}
|
||||
|
||||
static void
|
||||
gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
|
||||
gistRedoCreateIndex(XLogReaderState *record)
|
||||
{
|
||||
RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
|
||||
/* Backup blocks are not used in create_index records */
|
||||
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
|
||||
|
||||
buffer = XLogReadBuffer(*node, GIST_ROOT_BLKNO, true);
|
||||
Assert(BufferIsValid(buffer));
|
||||
buffer = XLogInitBufferForRedo(record, 0);
|
||||
Assert(BufferGetBlockNumber(buffer) == GIST_ROOT_BLKNO);
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
GISTInitBuffer(buffer, F_LEAF);
|
||||
@@ -284,9 +278,9 @@ gistRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
void
|
||||
gist_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
gist_redo(XLogReaderState *record)
|
||||
{
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
MemoryContext oldCxt;
|
||||
|
||||
/*
|
||||
@@ -299,13 +293,13 @@ gist_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
switch (info)
|
||||
{
|
||||
case XLOG_GIST_PAGE_UPDATE:
|
||||
gistRedoPageUpdateRecord(lsn, record);
|
||||
gistRedoPageUpdateRecord(record);
|
||||
break;
|
||||
case XLOG_GIST_PAGE_SPLIT:
|
||||
gistRedoPageSplitRecord(lsn, record);
|
||||
gistRedoPageSplitRecord(record);
|
||||
break;
|
||||
case XLOG_GIST_CREATE_INDEX:
|
||||
gistRedoCreateIndex(lsn, record);
|
||||
gistRedoCreateIndex(record);
|
||||
break;
|
||||
default:
|
||||
elog(PANIC, "gist_redo: unknown op code %u", info);
|
||||
@@ -336,70 +330,49 @@ gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
|
||||
BlockNumber origrlink, GistNSN orignsn,
|
||||
Buffer leftchildbuf, bool markfollowright)
|
||||
{
|
||||
XLogRecData rdata[GIST_MAX_SPLIT_PAGES * 2 + 2];
|
||||
gistxlogPageSplit xlrec;
|
||||
SplitedPageLayout *ptr;
|
||||
int npage = 0,
|
||||
cur;
|
||||
int npage = 0;
|
||||
XLogRecPtr recptr;
|
||||
int i;
|
||||
|
||||
for (ptr = dist; ptr; ptr = ptr->next)
|
||||
npage++;
|
||||
|
||||
/*
|
||||
* the caller should've checked this already, but doesn't hurt to check
|
||||
* again.
|
||||
*/
|
||||
if (npage > GIST_MAX_SPLIT_PAGES)
|
||||
elog(ERROR, "GiST page split into too many halves");
|
||||
|
||||
xlrec.node = node;
|
||||
xlrec.origblkno = blkno;
|
||||
xlrec.origrlink = origrlink;
|
||||
xlrec.orignsn = orignsn;
|
||||
xlrec.origleaf = page_is_leaf;
|
||||
xlrec.npage = (uint16) npage;
|
||||
xlrec.leftchild =
|
||||
BufferIsValid(leftchildbuf) ? BufferGetBlockNumber(leftchildbuf) : InvalidBlockNumber;
|
||||
xlrec.markfollowright = markfollowright;
|
||||
|
||||
rdata[0].data = (char *) &xlrec;
|
||||
rdata[0].len = sizeof(gistxlogPageSplit);
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
|
||||
cur = 1;
|
||||
XLogBeginInsert();
|
||||
|
||||
/*
|
||||
* Include a full page image of the child buf. (only necessary if a
|
||||
* checkpoint happened since the child page was split)
|
||||
*/
|
||||
if (BufferIsValid(leftchildbuf))
|
||||
{
|
||||
rdata[cur - 1].next = &(rdata[cur]);
|
||||
rdata[cur].data = NULL;
|
||||
rdata[cur].len = 0;
|
||||
rdata[cur].buffer = leftchildbuf;
|
||||
rdata[cur].buffer_std = true;
|
||||
cur++;
|
||||
}
|
||||
XLogRegisterBuffer(0, leftchildbuf, REGBUF_STANDARD);
|
||||
|
||||
/*
|
||||
* NOTE: We register a lot of data. The caller must've called
|
||||
* XLogEnsureRecordSpace() to prepare for that. We cannot do it here,
|
||||
* because we're already in a critical section. If you change the number
|
||||
* of buffer or data registrations here, make sure you modify the
|
||||
* XLogEnsureRecordSpace() calls accordingly!
|
||||
*/
|
||||
XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageSplit));
|
||||
|
||||
i = 1;
|
||||
for (ptr = dist; ptr; ptr = ptr->next)
|
||||
{
|
||||
rdata[cur - 1].next = &(rdata[cur]);
|
||||
rdata[cur].buffer = InvalidBuffer;
|
||||
rdata[cur].data = (char *) &(ptr->block);
|
||||
rdata[cur].len = sizeof(gistxlogPage);
|
||||
cur++;
|
||||
|
||||
rdata[cur - 1].next = &(rdata[cur]);
|
||||
rdata[cur].buffer = InvalidBuffer;
|
||||
rdata[cur].data = (char *) (ptr->list);
|
||||
rdata[cur].len = ptr->lenlist;
|
||||
cur++;
|
||||
XLogRegisterBuffer(i, ptr->buffer, REGBUF_WILL_INIT);
|
||||
XLogRegisterBufData(i, (char *) &(ptr->block.num), sizeof(int));
|
||||
XLogRegisterBufData(i, (char *) ptr->list, ptr->lenlist);
|
||||
i++;
|
||||
}
|
||||
rdata[cur - 1].next = NULL;
|
||||
|
||||
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT, rdata);
|
||||
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_SPLIT);
|
||||
|
||||
return recptr;
|
||||
}
|
||||
@@ -413,9 +386,7 @@ gistXLogSplit(RelFileNode node, BlockNumber blkno, bool page_is_leaf,
|
||||
*
|
||||
* Note that both the todelete array and the tuples are marked as belonging
|
||||
* to the target buffer; they need not be stored in XLOG if XLogInsert decides
|
||||
* to log the whole buffer contents instead. Also, we take care that there's
|
||||
* at least one rdata item referencing the buffer, even when ntodelete and
|
||||
* ituplen are both zero; this ensures that XLogInsert knows about the buffer.
|
||||
* to log the whole buffer contents instead.
|
||||
*/
|
||||
XLogRecPtr
|
||||
gistXLogUpdate(RelFileNode node, Buffer buffer,
|
||||
@@ -423,57 +394,31 @@ gistXLogUpdate(RelFileNode node, Buffer buffer,
|
||||
IndexTuple *itup, int ituplen,
|
||||
Buffer leftchildbuf)
|
||||
{
|
||||
XLogRecData rdata[MaxIndexTuplesPerPage + 3];
|
||||
gistxlogPageUpdate xlrec;
|
||||
int cur,
|
||||
i;
|
||||
int i;
|
||||
XLogRecPtr recptr;
|
||||
|
||||
xlrec.node = node;
|
||||
xlrec.blkno = BufferGetBlockNumber(buffer);
|
||||
xlrec.ntodelete = ntodelete;
|
||||
xlrec.leftchild =
|
||||
BufferIsValid(leftchildbuf) ? BufferGetBlockNumber(leftchildbuf) : InvalidBlockNumber;
|
||||
xlrec.ntoinsert = ituplen;
|
||||
|
||||
rdata[0].data = (char *) &xlrec;
|
||||
rdata[0].len = sizeof(gistxlogPageUpdate);
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].next = &(rdata[1]);
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &xlrec, sizeof(gistxlogPageUpdate));
|
||||
|
||||
rdata[1].data = (char *) todelete;
|
||||
rdata[1].len = sizeof(OffsetNumber) * ntodelete;
|
||||
rdata[1].buffer = buffer;
|
||||
rdata[1].buffer_std = true;
|
||||
|
||||
cur = 2;
|
||||
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
|
||||
XLogRegisterBufData(0, (char *) todelete, sizeof(OffsetNumber) * ntodelete);
|
||||
|
||||
/* new tuples */
|
||||
for (i = 0; i < ituplen; i++)
|
||||
{
|
||||
rdata[cur - 1].next = &(rdata[cur]);
|
||||
rdata[cur].data = (char *) (itup[i]);
|
||||
rdata[cur].len = IndexTupleSize(itup[i]);
|
||||
rdata[cur].buffer = buffer;
|
||||
rdata[cur].buffer_std = true;
|
||||
cur++;
|
||||
}
|
||||
XLogRegisterBufData(0, (char *) (itup[i]), IndexTupleSize(itup[i]));
|
||||
|
||||
/*
|
||||
* Include a full page image of the child buf. (only necessary if a
|
||||
* checkpoint happened since the child page was split)
|
||||
*/
|
||||
if (BufferIsValid(leftchildbuf))
|
||||
{
|
||||
rdata[cur - 1].next = &(rdata[cur]);
|
||||
rdata[cur].data = NULL;
|
||||
rdata[cur].len = 0;
|
||||
rdata[cur].buffer = leftchildbuf;
|
||||
rdata[cur].buffer_std = true;
|
||||
cur++;
|
||||
}
|
||||
rdata[cur - 1].next = NULL;
|
||||
XLogRegisterBuffer(1, leftchildbuf, REGBUF_STANDARD);
|
||||
|
||||
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE, rdata);
|
||||
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_UPDATE);
|
||||
|
||||
return recptr;
|
||||
}
|
||||
|
||||
@@ -700,7 +700,7 @@ hashvacuumcleanup(PG_FUNCTION_ARGS)
|
||||
|
||||
|
||||
void
|
||||
hash_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
hash_redo(XLogReaderState *record)
|
||||
{
|
||||
elog(PANIC, "hash_redo: unimplemented");
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -865,7 +865,6 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
|
||||
hash_seq_init(&seq_status, state->rs_logical_mappings);
|
||||
while ((src = (RewriteMappingFile *) hash_seq_search(&seq_status)) != NULL)
|
||||
{
|
||||
XLogRecData rdata[2];
|
||||
char *waldata;
|
||||
char *waldata_start;
|
||||
xl_heap_rewrite_mapping xlrec;
|
||||
@@ -889,11 +888,6 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
|
||||
xlrec.offset = src->off;
|
||||
xlrec.start_lsn = state->rs_begin_lsn;
|
||||
|
||||
rdata[0].data = (char *) (&xlrec);
|
||||
rdata[0].len = sizeof(xlrec);
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].next = &(rdata[1]);
|
||||
|
||||
/* write all mappings consecutively */
|
||||
len = src->num_mappings * sizeof(LogicalRewriteMappingData);
|
||||
waldata_start = waldata = palloc(len);
|
||||
@@ -934,13 +928,12 @@ logical_heap_rewrite_flush_mappings(RewriteState state)
|
||||
written, len)));
|
||||
src->off += len;
|
||||
|
||||
rdata[1].data = waldata_start;
|
||||
rdata[1].len = len;
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
rdata[1].next = NULL;
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) (&xlrec), sizeof(xlrec));
|
||||
XLogRegisterData(waldata_start, len);
|
||||
|
||||
/* write xlog record */
|
||||
XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE, rdata);
|
||||
XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_REWRITE);
|
||||
|
||||
pfree(waldata_start);
|
||||
}
|
||||
@@ -1123,7 +1116,7 @@ logical_rewrite_heap_tuple(RewriteState state, ItemPointerData old_tid,
|
||||
* Replay XLOG_HEAP2_REWRITE records
|
||||
*/
|
||||
void
|
||||
heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r)
|
||||
heap_xlog_logical_rewrite(XLogReaderState *r)
|
||||
{
|
||||
char path[MAXPGPATH];
|
||||
int fd;
|
||||
@@ -1138,7 +1131,7 @@ heap_xlog_logical_rewrite(XLogRecPtr lsn, XLogRecord *r)
|
||||
xlrec->mapped_db, xlrec->mapped_rel,
|
||||
(uint32) (xlrec->start_lsn >> 32),
|
||||
(uint32) xlrec->start_lsn,
|
||||
xlrec->mapped_xid, r->xl_xid);
|
||||
xlrec->mapped_xid, XLogRecGetXid(r));
|
||||
|
||||
fd = OpenTransientFile(path,
|
||||
O_CREAT | O_WRONLY | PG_BINARY,
|
||||
|
||||
@@ -837,37 +837,25 @@ _bt_insertonpg(Relation rel,
|
||||
if (RelationNeedsWAL(rel))
|
||||
{
|
||||
xl_btree_insert xlrec;
|
||||
BlockNumber xlleftchild;
|
||||
xl_btree_metadata xlmeta;
|
||||
uint8 xlinfo;
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[4];
|
||||
XLogRecData *nextrdata;
|
||||
IndexTupleData trunctuple;
|
||||
|
||||
xlrec.target.node = rel->rd_node;
|
||||
ItemPointerSet(&(xlrec.target.tid), itup_blkno, itup_off);
|
||||
xlrec.offnum = itup_off;
|
||||
|
||||
rdata[0].data = (char *) &xlrec;
|
||||
rdata[0].len = SizeOfBtreeInsert;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].next = nextrdata = &(rdata[1]);
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &xlrec, SizeOfBtreeInsert);
|
||||
|
||||
if (P_ISLEAF(lpageop))
|
||||
xlinfo = XLOG_BTREE_INSERT_LEAF;
|
||||
else
|
||||
{
|
||||
/*
|
||||
* Include the block number of the left child, whose
|
||||
* INCOMPLETE_SPLIT flag was cleared.
|
||||
* Register the left child whose INCOMPLETE_SPLIT flag was
|
||||
* cleared.
|
||||
*/
|
||||
xlleftchild = BufferGetBlockNumber(cbuf);
|
||||
nextrdata->data = (char *) &xlleftchild;
|
||||
nextrdata->len = sizeof(BlockNumber);
|
||||
nextrdata->buffer = cbuf;
|
||||
nextrdata->buffer_std = true;
|
||||
nextrdata->next = nextrdata + 1;
|
||||
nextrdata++;
|
||||
XLogRegisterBuffer(1, cbuf, REGBUF_STANDARD);
|
||||
|
||||
xlinfo = XLOG_BTREE_INSERT_UPPER;
|
||||
}
|
||||
@@ -879,33 +867,25 @@ _bt_insertonpg(Relation rel,
|
||||
xlmeta.fastroot = metad->btm_fastroot;
|
||||
xlmeta.fastlevel = metad->btm_fastlevel;
|
||||
|
||||
nextrdata->data = (char *) &xlmeta;
|
||||
nextrdata->len = sizeof(xl_btree_metadata);
|
||||
nextrdata->buffer = InvalidBuffer;
|
||||
nextrdata->next = nextrdata + 1;
|
||||
nextrdata++;
|
||||
XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT);
|
||||
XLogRegisterBufData(2, (char *) &xlmeta, sizeof(xl_btree_metadata));
|
||||
|
||||
xlinfo = XLOG_BTREE_INSERT_META;
|
||||
}
|
||||
|
||||
/* Read comments in _bt_pgaddtup */
|
||||
XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
|
||||
if (!P_ISLEAF(lpageop) && newitemoff == P_FIRSTDATAKEY(lpageop))
|
||||
{
|
||||
trunctuple = *itup;
|
||||
trunctuple.t_info = sizeof(IndexTupleData);
|
||||
nextrdata->data = (char *) &trunctuple;
|
||||
nextrdata->len = sizeof(IndexTupleData);
|
||||
XLogRegisterBufData(0, (char *) &trunctuple,
|
||||
sizeof(IndexTupleData));
|
||||
}
|
||||
else
|
||||
{
|
||||
nextrdata->data = (char *) itup;
|
||||
nextrdata->len = IndexTupleDSize(*itup);
|
||||
}
|
||||
nextrdata->buffer = buf;
|
||||
nextrdata->buffer_std = true;
|
||||
nextrdata->next = NULL;
|
||||
XLogRegisterBufData(0, (char *) itup, IndexTupleDSize(*itup));
|
||||
|
||||
recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
|
||||
recptr = XLogInsert(RM_BTREE_ID, xlinfo);
|
||||
|
||||
if (BufferIsValid(metabuf))
|
||||
{
|
||||
@@ -1260,56 +1240,37 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
|
||||
xl_btree_split xlrec;
|
||||
uint8 xlinfo;
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[7];
|
||||
XLogRecData *lastrdata;
|
||||
BlockNumber cblkno;
|
||||
|
||||
xlrec.node = rel->rd_node;
|
||||
xlrec.leftsib = origpagenumber;
|
||||
xlrec.rightsib = rightpagenumber;
|
||||
xlrec.rnext = ropaque->btpo_next;
|
||||
xlrec.level = ropaque->btpo.level;
|
||||
xlrec.firstright = firstright;
|
||||
xlrec.newitemoff = newitemoff;
|
||||
|
||||
rdata[0].data = (char *) &xlrec;
|
||||
rdata[0].len = SizeOfBtreeSplit;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &xlrec, SizeOfBtreeSplit);
|
||||
|
||||
lastrdata = &rdata[0];
|
||||
XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
|
||||
XLogRegisterBuffer(1, rbuf, REGBUF_WILL_INIT);
|
||||
/* Log the right sibling, because we've changed its prev-pointer. */
|
||||
if (!P_RIGHTMOST(ropaque))
|
||||
XLogRegisterBuffer(2, sbuf, REGBUF_STANDARD);
|
||||
if (BufferIsValid(cbuf))
|
||||
XLogRegisterBuffer(3, cbuf, REGBUF_STANDARD);
|
||||
|
||||
/*
|
||||
* Log the new item and its offset, if it was inserted on the left
|
||||
* page. (If it was put on the right page, we don't need to explicitly
|
||||
* WAL log it because it's included with all the other items on the
|
||||
* right page.) Show the new item as belonging to the left page
|
||||
* buffer, so that it is not stored if XLogInsert decides it needs a
|
||||
* full-page image of the left page. We store the offset anyway,
|
||||
* though, to support archive compression of these records.
|
||||
* Log the new item, if it was inserted on the left page. (If it was
|
||||
* put on the right page, we don't need to explicitly WAL log it
|
||||
* because it's included with all the other items on the right page.)
|
||||
* Show the new item as belonging to the left page buffer, so that it
|
||||
* is not stored if XLogInsert decides it needs a full-page image of
|
||||
* the left page. We store the offset anyway, though, to support
|
||||
* archive compression of these records.
|
||||
*/
|
||||
if (newitemonleft)
|
||||
{
|
||||
lastrdata->next = lastrdata + 1;
|
||||
lastrdata++;
|
||||
|
||||
lastrdata->data = (char *) &newitemoff;
|
||||
lastrdata->len = sizeof(OffsetNumber);
|
||||
lastrdata->buffer = InvalidBuffer;
|
||||
|
||||
lastrdata->next = lastrdata + 1;
|
||||
lastrdata++;
|
||||
|
||||
lastrdata->data = (char *) newitem;
|
||||
lastrdata->len = MAXALIGN(newitemsz);
|
||||
lastrdata->buffer = buf; /* backup block 0 */
|
||||
lastrdata->buffer_std = true;
|
||||
}
|
||||
XLogRegisterBufData(0, (char *) newitem, MAXALIGN(newitemsz));
|
||||
|
||||
/* Log left page */
|
||||
if (!isleaf)
|
||||
{
|
||||
lastrdata->next = lastrdata + 1;
|
||||
lastrdata++;
|
||||
|
||||
/*
|
||||
* We must also log the left page's high key, because the right
|
||||
* page's leftmost key is suppressed on non-leaf levels. Show it
|
||||
@@ -1319,43 +1280,7 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
|
||||
*/
|
||||
itemid = PageGetItemId(origpage, P_HIKEY);
|
||||
item = (IndexTuple) PageGetItem(origpage, itemid);
|
||||
lastrdata->data = (char *) item;
|
||||
lastrdata->len = MAXALIGN(IndexTupleSize(item));
|
||||
lastrdata->buffer = buf; /* backup block 0 */
|
||||
lastrdata->buffer_std = true;
|
||||
}
|
||||
|
||||
if (isleaf && !newitemonleft)
|
||||
{
|
||||
lastrdata->next = lastrdata + 1;
|
||||
lastrdata++;
|
||||
|
||||
/*
|
||||
* Although we don't need to WAL-log anything on the left page, we
|
||||
* still need XLogInsert to consider storing a full-page image of
|
||||
* the left page, so make an empty entry referencing that buffer.
|
||||
* This also ensures that the left page is always backup block 0.
|
||||
*/
|
||||
lastrdata->data = NULL;
|
||||
lastrdata->len = 0;
|
||||
lastrdata->buffer = buf; /* backup block 0 */
|
||||
lastrdata->buffer_std = true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Log block number of left child, whose INCOMPLETE_SPLIT flag this
|
||||
* insertion clears.
|
||||
*/
|
||||
if (!isleaf)
|
||||
{
|
||||
lastrdata->next = lastrdata + 1;
|
||||
lastrdata++;
|
||||
|
||||
cblkno = BufferGetBlockNumber(cbuf);
|
||||
lastrdata->data = (char *) &cblkno;
|
||||
lastrdata->len = sizeof(BlockNumber);
|
||||
lastrdata->buffer = cbuf; /* backup block 1 */
|
||||
lastrdata->buffer_std = true;
|
||||
XLogRegisterBufData(0, (char *) item, MAXALIGN(IndexTupleSize(item)));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1370,35 +1295,16 @@ _bt_split(Relation rel, Buffer buf, Buffer cbuf, OffsetNumber firstright,
|
||||
* and so the item pointers can be reconstructed. See comments for
|
||||
* _bt_restore_page().
|
||||
*/
|
||||
lastrdata->next = lastrdata + 1;
|
||||
lastrdata++;
|
||||
|
||||
lastrdata->data = (char *) rightpage +
|
||||
((PageHeader) rightpage)->pd_upper;
|
||||
lastrdata->len = ((PageHeader) rightpage)->pd_special -
|
||||
((PageHeader) rightpage)->pd_upper;
|
||||
lastrdata->buffer = InvalidBuffer;
|
||||
|
||||
/* Log the right sibling, because we've changed its' prev-pointer. */
|
||||
if (!P_RIGHTMOST(ropaque))
|
||||
{
|
||||
lastrdata->next = lastrdata + 1;
|
||||
lastrdata++;
|
||||
|
||||
lastrdata->data = NULL;
|
||||
lastrdata->len = 0;
|
||||
lastrdata->buffer = sbuf; /* bkp block 1 (leaf) or 2 (non-leaf) */
|
||||
lastrdata->buffer_std = true;
|
||||
}
|
||||
|
||||
lastrdata->next = NULL;
|
||||
XLogRegisterBufData(1,
|
||||
(char *) rightpage + ((PageHeader) rightpage)->pd_upper,
|
||||
((PageHeader) rightpage)->pd_special - ((PageHeader) rightpage)->pd_upper);
|
||||
|
||||
if (isroot)
|
||||
xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L_ROOT : XLOG_BTREE_SPLIT_R_ROOT;
|
||||
else
|
||||
xlinfo = newitemonleft ? XLOG_BTREE_SPLIT_L : XLOG_BTREE_SPLIT_R;
|
||||
|
||||
recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
|
||||
recptr = XLogInsert(RM_BTREE_ID, xlinfo);
|
||||
|
||||
PageSetLSN(origpage, recptr);
|
||||
PageSetLSN(rightpage, recptr);
|
||||
@@ -2090,34 +1996,35 @@ _bt_newroot(Relation rel, Buffer lbuf, Buffer rbuf)
|
||||
{
|
||||
xl_btree_newroot xlrec;
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[3];
|
||||
xl_btree_metadata md;
|
||||
|
||||
xlrec.node = rel->rd_node;
|
||||
xlrec.rootblk = rootblknum;
|
||||
xlrec.level = metad->btm_level;
|
||||
|
||||
rdata[0].data = (char *) &xlrec;
|
||||
rdata[0].len = SizeOfBtreeNewroot;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].next = &(rdata[1]);
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &xlrec, SizeOfBtreeNewroot);
|
||||
|
||||
XLogRegisterBuffer(0, rootbuf, REGBUF_WILL_INIT);
|
||||
XLogRegisterBuffer(1, lbuf, REGBUF_STANDARD);
|
||||
XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT);
|
||||
|
||||
md.root = rootblknum;
|
||||
md.level = metad->btm_level;
|
||||
md.fastroot = rootblknum;
|
||||
md.fastlevel = metad->btm_level;
|
||||
|
||||
XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
|
||||
|
||||
/*
|
||||
* Direct access to page is not good but faster - we should implement
|
||||
* some new func in page API.
|
||||
*/
|
||||
rdata[1].data = (char *) rootpage + ((PageHeader) rootpage)->pd_upper;
|
||||
rdata[1].len = ((PageHeader) rootpage)->pd_special -
|
||||
((PageHeader) rootpage)->pd_upper;
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
rdata[1].next = &(rdata[2]);
|
||||
XLogRegisterBufData(0,
|
||||
(char *) rootpage + ((PageHeader) rootpage)->pd_upper,
|
||||
((PageHeader) rootpage)->pd_special -
|
||||
((PageHeader) rootpage)->pd_upper);
|
||||
|
||||
/* Make a full-page image of the left child if needed */
|
||||
rdata[2].data = NULL;
|
||||
rdata[2].len = 0;
|
||||
rdata[2].buffer = lbuf;
|
||||
rdata[2].next = NULL;
|
||||
|
||||
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, rdata);
|
||||
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT);
|
||||
|
||||
PageSetLSN(lpage, recptr);
|
||||
PageSetLSN(rootpage, recptr);
|
||||
|
||||
@@ -236,18 +236,25 @@ _bt_getroot(Relation rel, int access)
|
||||
{
|
||||
xl_btree_newroot xlrec;
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata;
|
||||
xl_btree_metadata md;
|
||||
|
||||
XLogBeginInsert();
|
||||
XLogRegisterBuffer(0, rootbuf, REGBUF_WILL_INIT);
|
||||
XLogRegisterBuffer(2, metabuf, REGBUF_WILL_INIT);
|
||||
|
||||
md.root = rootblkno;
|
||||
md.level = 0;
|
||||
md.fastroot = rootblkno;
|
||||
md.fastlevel = 0;
|
||||
|
||||
XLogRegisterBufData(2, (char *) &md, sizeof(xl_btree_metadata));
|
||||
|
||||
xlrec.node = rel->rd_node;
|
||||
xlrec.rootblk = rootblkno;
|
||||
xlrec.level = 0;
|
||||
|
||||
rdata.data = (char *) &xlrec;
|
||||
rdata.len = SizeOfBtreeNewroot;
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.next = NULL;
|
||||
XLogRegisterData((char *) &xlrec, SizeOfBtreeNewroot);
|
||||
|
||||
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT, &rdata);
|
||||
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWROOT);
|
||||
|
||||
PageSetLSN(rootpage, recptr);
|
||||
PageSetLSN(metapg, recptr);
|
||||
@@ -528,39 +535,23 @@ _bt_checkpage(Relation rel, Buffer buf)
|
||||
static void
|
||||
_bt_log_reuse_page(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
|
||||
{
|
||||
if (!RelationNeedsWAL(rel))
|
||||
return;
|
||||
|
||||
/* No ereport(ERROR) until changes are logged */
|
||||
START_CRIT_SECTION();
|
||||
xl_btree_reuse_page xlrec_reuse;
|
||||
|
||||
/*
|
||||
* We don't do MarkBufferDirty here because we're about to initialise the
|
||||
* page, and nobody else can see it yet.
|
||||
* Note that we don't register the buffer with the record, because this
|
||||
* operation doesn't modify the page. This record only exists to provide a
|
||||
* conflict point for Hot Standby.
|
||||
*/
|
||||
|
||||
/* XLOG stuff */
|
||||
{
|
||||
XLogRecData rdata[1];
|
||||
xl_btree_reuse_page xlrec_reuse;
|
||||
xlrec_reuse.node = rel->rd_node;
|
||||
xlrec_reuse.block = blkno;
|
||||
xlrec_reuse.latestRemovedXid = latestRemovedXid;
|
||||
|
||||
xlrec_reuse.node = rel->rd_node;
|
||||
xlrec_reuse.block = blkno;
|
||||
xlrec_reuse.latestRemovedXid = latestRemovedXid;
|
||||
rdata[0].data = (char *) &xlrec_reuse;
|
||||
rdata[0].len = SizeOfBtreeReusePage;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].next = NULL;
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &xlrec_reuse, SizeOfBtreeReusePage);
|
||||
|
||||
XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE, rdata);
|
||||
|
||||
/*
|
||||
* We don't do PageSetLSN here because we're about to initialise the
|
||||
* page, so no need.
|
||||
*/
|
||||
}
|
||||
|
||||
END_CRIT_SECTION();
|
||||
XLogInsert(RM_BTREE_ID, XLOG_BTREE_REUSE_PAGE);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -633,7 +624,7 @@ _bt_getbuf(Relation rel, BlockNumber blkno, int access)
|
||||
* WAL record that will allow us to conflict with queries
|
||||
* running on standby.
|
||||
*/
|
||||
if (XLogStandbyInfoActive())
|
||||
if (XLogStandbyInfoActive() && RelationNeedsWAL(rel))
|
||||
{
|
||||
BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
@@ -830,17 +821,13 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
|
||||
if (RelationNeedsWAL(rel))
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[2];
|
||||
xl_btree_vacuum xlrec_vacuum;
|
||||
|
||||
xlrec_vacuum.node = rel->rd_node;
|
||||
xlrec_vacuum.block = BufferGetBlockNumber(buf);
|
||||
|
||||
xlrec_vacuum.lastBlockVacuumed = lastBlockVacuumed;
|
||||
rdata[0].data = (char *) &xlrec_vacuum;
|
||||
rdata[0].len = SizeOfBtreeVacuum;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].next = &(rdata[1]);
|
||||
|
||||
XLogBeginInsert();
|
||||
XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
|
||||
XLogRegisterData((char *) &xlrec_vacuum, SizeOfBtreeVacuum);
|
||||
|
||||
/*
|
||||
* The target-offsets array is not in the buffer, but pretend that it
|
||||
@@ -848,20 +835,9 @@ _bt_delitems_vacuum(Relation rel, Buffer buf,
|
||||
* need not be stored too.
|
||||
*/
|
||||
if (nitems > 0)
|
||||
{
|
||||
rdata[1].data = (char *) itemnos;
|
||||
rdata[1].len = nitems * sizeof(OffsetNumber);
|
||||
}
|
||||
else
|
||||
{
|
||||
rdata[1].data = NULL;
|
||||
rdata[1].len = 0;
|
||||
}
|
||||
rdata[1].buffer = buf;
|
||||
rdata[1].buffer_std = true;
|
||||
rdata[1].next = NULL;
|
||||
XLogRegisterBufData(0, (char *) itemnos, nitems * sizeof(OffsetNumber));
|
||||
|
||||
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM, rdata);
|
||||
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_VACUUM);
|
||||
|
||||
PageSetLSN(page, recptr);
|
||||
}
|
||||
@@ -919,36 +895,23 @@ _bt_delitems_delete(Relation rel, Buffer buf,
|
||||
if (RelationNeedsWAL(rel))
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[3];
|
||||
xl_btree_delete xlrec_delete;
|
||||
|
||||
xlrec_delete.node = rel->rd_node;
|
||||
xlrec_delete.hnode = heapRel->rd_node;
|
||||
xlrec_delete.block = BufferGetBlockNumber(buf);
|
||||
xlrec_delete.nitems = nitems;
|
||||
|
||||
rdata[0].data = (char *) &xlrec_delete;
|
||||
rdata[0].len = SizeOfBtreeDelete;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].next = &(rdata[1]);
|
||||
XLogBeginInsert();
|
||||
XLogRegisterBuffer(0, buf, REGBUF_STANDARD);
|
||||
XLogRegisterData((char *) &xlrec_delete, SizeOfBtreeDelete);
|
||||
|
||||
/*
|
||||
* We need the target-offsets array whether or not we store the whole
|
||||
* buffer, to allow us to find the latestRemovedXid on a standby
|
||||
* server.
|
||||
*/
|
||||
rdata[1].data = (char *) itemnos;
|
||||
rdata[1].len = nitems * sizeof(OffsetNumber);
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
rdata[1].next = &(rdata[2]);
|
||||
XLogRegisterData((char *) itemnos, nitems * sizeof(OffsetNumber));
|
||||
|
||||
rdata[2].data = NULL;
|
||||
rdata[2].len = 0;
|
||||
rdata[2].buffer = buf;
|
||||
rdata[2].buffer_std = true;
|
||||
rdata[2].next = NULL;
|
||||
|
||||
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE, rdata);
|
||||
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_DELETE);
|
||||
|
||||
PageSetLSN(page, recptr);
|
||||
}
|
||||
@@ -1493,33 +1456,26 @@ _bt_mark_page_halfdead(Relation rel, Buffer leafbuf, BTStack stack)
|
||||
{
|
||||
xl_btree_mark_page_halfdead xlrec;
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[2];
|
||||
|
||||
xlrec.target.node = rel->rd_node;
|
||||
ItemPointerSet(&(xlrec.target.tid), BufferGetBlockNumber(topparent), topoff);
|
||||
xlrec.poffset = topoff;
|
||||
xlrec.leafblk = leafblkno;
|
||||
if (target != leafblkno)
|
||||
xlrec.topparent = target;
|
||||
else
|
||||
xlrec.topparent = InvalidBlockNumber;
|
||||
|
||||
XLogBeginInsert();
|
||||
XLogRegisterBuffer(0, leafbuf, REGBUF_WILL_INIT);
|
||||
XLogRegisterBuffer(1, topparent, REGBUF_STANDARD);
|
||||
|
||||
page = BufferGetPage(leafbuf);
|
||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
xlrec.leftblk = opaque->btpo_prev;
|
||||
xlrec.rightblk = opaque->btpo_next;
|
||||
|
||||
rdata[0].data = (char *) &xlrec;
|
||||
rdata[0].len = SizeOfBtreeMarkPageHalfDead;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].next = &(rdata[1]);
|
||||
XLogRegisterData((char *) &xlrec, SizeOfBtreeMarkPageHalfDead);
|
||||
|
||||
rdata[1].data = NULL;
|
||||
rdata[1].len = 0;
|
||||
rdata[1].buffer = topparent;
|
||||
rdata[1].buffer_std = true;
|
||||
rdata[1].next = NULL;
|
||||
|
||||
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_MARK_PAGE_HALFDEAD, rdata);
|
||||
recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_MARK_PAGE_HALFDEAD);
|
||||
|
||||
page = BufferGetPage(topparent);
|
||||
PageSetLSN(page, recptr);
|
||||
@@ -1826,63 +1782,44 @@ _bt_unlink_halfdead_page(Relation rel, Buffer leafbuf, bool *rightsib_empty)
|
||||
xl_btree_metadata xlmeta;
|
||||
uint8 xlinfo;
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata[4];
|
||||
XLogRecData *nextrdata;
|
||||
|
||||
xlrec.node = rel->rd_node;
|
||||
XLogBeginInsert();
|
||||
|
||||
XLogRegisterBuffer(0, buf, REGBUF_WILL_INIT);
|
||||
if (BufferIsValid(lbuf))
|
||||
XLogRegisterBuffer(1, lbuf, REGBUF_STANDARD);
|
||||
XLogRegisterBuffer(2, rbuf, REGBUF_STANDARD);
|
||||
if (target != leafblkno)
|
||||
XLogRegisterBuffer(3, leafbuf, REGBUF_WILL_INIT);
|
||||
|
||||
/* information on the unlinked block */
|
||||
xlrec.deadblk = target;
|
||||
xlrec.leftsib = leftsib;
|
||||
xlrec.rightsib = rightsib;
|
||||
xlrec.btpo_xact = opaque->btpo.xact;
|
||||
|
||||
/* information needed to recreate the leaf block (if not the target) */
|
||||
xlrec.leafblk = leafblkno;
|
||||
xlrec.leafleftsib = leafleftsib;
|
||||
xlrec.leafrightsib = leafrightsib;
|
||||
xlrec.topparent = nextchild;
|
||||
|
||||
rdata[0].data = (char *) &xlrec;
|
||||
rdata[0].len = SizeOfBtreeUnlinkPage;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].next = nextrdata = &(rdata[1]);
|
||||
XLogRegisterData((char *) &xlrec, SizeOfBtreeUnlinkPage);
|
||||
|
||||
if (BufferIsValid(metabuf))
|
||||
{
|
||||
XLogRegisterBuffer(4, metabuf, REGBUF_WILL_INIT);
|
||||
|
||||
xlmeta.root = metad->btm_root;
|
||||
xlmeta.level = metad->btm_level;
|
||||
xlmeta.fastroot = metad->btm_fastroot;
|
||||
xlmeta.fastlevel = metad->btm_fastlevel;
|
||||
|
||||
nextrdata->data = (char *) &xlmeta;
|
||||
nextrdata->len = sizeof(xl_btree_metadata);
|
||||
nextrdata->buffer = InvalidBuffer;
|
||||
nextrdata->next = nextrdata + 1;
|
||||
nextrdata++;
|
||||
XLogRegisterBufData(4, (char *) &xlmeta, sizeof(xl_btree_metadata));
|
||||
xlinfo = XLOG_BTREE_UNLINK_PAGE_META;
|
||||
}
|
||||
else
|
||||
xlinfo = XLOG_BTREE_UNLINK_PAGE;
|
||||
|
||||
nextrdata->data = NULL;
|
||||
nextrdata->len = 0;
|
||||
nextrdata->buffer = rbuf;
|
||||
nextrdata->buffer_std = true;
|
||||
nextrdata->next = NULL;
|
||||
|
||||
if (BufferIsValid(lbuf))
|
||||
{
|
||||
nextrdata->next = nextrdata + 1;
|
||||
nextrdata++;
|
||||
nextrdata->data = NULL;
|
||||
nextrdata->len = 0;
|
||||
nextrdata->buffer = lbuf;
|
||||
nextrdata->buffer_std = true;
|
||||
nextrdata->next = NULL;
|
||||
}
|
||||
|
||||
recptr = XLogInsert(RM_BTREE_ID, xlinfo, rdata);
|
||||
recptr = XLogInsert(RM_BTREE_ID, xlinfo);
|
||||
|
||||
if (BufferIsValid(metabuf))
|
||||
{
|
||||
|
||||
@@ -72,17 +72,23 @@ _bt_restore_page(Page page, char *from, int len)
|
||||
}
|
||||
|
||||
static void
|
||||
_bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn,
|
||||
BlockNumber root, uint32 level,
|
||||
BlockNumber fastroot, uint32 fastlevel)
|
||||
_bt_restore_meta(XLogReaderState *record, uint8 block_id)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
Buffer metabuf;
|
||||
Page metapg;
|
||||
BTMetaPageData *md;
|
||||
BTPageOpaque pageop;
|
||||
xl_btree_metadata *xlrec;
|
||||
char *ptr;
|
||||
Size len;
|
||||
|
||||
metabuf = XLogReadBuffer(rnode, BTREE_METAPAGE, true);
|
||||
Assert(BufferIsValid(metabuf));
|
||||
metabuf = XLogInitBufferForRedo(record, block_id);
|
||||
ptr = XLogRecGetBlockData(record, block_id, &len);
|
||||
|
||||
Assert(len == sizeof(xl_btree_metadata));
|
||||
Assert(BufferGetBlockNumber(metabuf) == BTREE_METAPAGE);
|
||||
xlrec = (xl_btree_metadata *) ptr;
|
||||
metapg = BufferGetPage(metabuf);
|
||||
|
||||
_bt_pageinit(metapg, BufferGetPageSize(metabuf));
|
||||
@@ -90,10 +96,10 @@ _bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn,
|
||||
md = BTPageGetMeta(metapg);
|
||||
md->btm_magic = BTREE_MAGIC;
|
||||
md->btm_version = BTREE_VERSION;
|
||||
md->btm_root = root;
|
||||
md->btm_level = level;
|
||||
md->btm_fastroot = fastroot;
|
||||
md->btm_fastlevel = fastlevel;
|
||||
md->btm_root = xlrec->root;
|
||||
md->btm_level = xlrec->level;
|
||||
md->btm_fastroot = xlrec->fastroot;
|
||||
md->btm_fastlevel = xlrec->fastlevel;
|
||||
|
||||
pageop = (BTPageOpaque) PageGetSpecialPointer(metapg);
|
||||
pageop->btpo_flags = BTP_META;
|
||||
@@ -117,14 +123,12 @@ _bt_restore_meta(RelFileNode rnode, XLogRecPtr lsn,
|
||||
* types that can insert a downlink: insert, split, and newroot.
|
||||
*/
|
||||
static void
|
||||
_bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record,
|
||||
int block_index,
|
||||
RelFileNode rnode, BlockNumber cblock)
|
||||
_bt_clear_incomplete_split(XLogReaderState *record, uint8 block_id)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
Buffer buf;
|
||||
|
||||
if (XLogReadBufferForRedo(lsn, record, block_index, rnode, cblock, &buf)
|
||||
== BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, block_id, &buf) == BLK_NEEDS_REDO)
|
||||
{
|
||||
Page page = (Page) BufferGetPage(buf);
|
||||
BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
@@ -140,38 +144,12 @@ _bt_clear_incomplete_split(XLogRecPtr lsn, XLogRecord *record,
|
||||
}
|
||||
|
||||
static void
|
||||
btree_xlog_insert(bool isleaf, bool ismeta,
|
||||
XLogRecPtr lsn, XLogRecord *record)
|
||||
btree_xlog_insert(bool isleaf, bool ismeta, XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
xl_btree_insert *xlrec = (xl_btree_insert *) XLogRecGetData(record);
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
char *datapos;
|
||||
int datalen;
|
||||
xl_btree_metadata md;
|
||||
BlockNumber cblkno = 0;
|
||||
int main_blk_index;
|
||||
|
||||
datapos = (char *) xlrec + SizeOfBtreeInsert;
|
||||
datalen = record->xl_len - SizeOfBtreeInsert;
|
||||
|
||||
/*
|
||||
* if this insert finishes a split at lower level, extract the block
|
||||
* number of the (left) child.
|
||||
*/
|
||||
if (!isleaf && (record->xl_info & XLR_BKP_BLOCK(0)) == 0)
|
||||
{
|
||||
memcpy(&cblkno, datapos, sizeof(BlockNumber));
|
||||
Assert(cblkno != 0);
|
||||
datapos += sizeof(BlockNumber);
|
||||
datalen -= sizeof(BlockNumber);
|
||||
}
|
||||
if (ismeta)
|
||||
{
|
||||
memcpy(&md, datapos, sizeof(xl_btree_metadata));
|
||||
datapos += sizeof(xl_btree_metadata);
|
||||
datalen -= sizeof(xl_btree_metadata);
|
||||
}
|
||||
|
||||
/*
|
||||
* Insertion to an internal page finishes an incomplete split at the child
|
||||
@@ -183,21 +161,15 @@ btree_xlog_insert(bool isleaf, bool ismeta,
|
||||
* cannot be updates happening.
|
||||
*/
|
||||
if (!isleaf)
|
||||
_bt_clear_incomplete_split(record, 1);
|
||||
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
_bt_clear_incomplete_split(lsn, record, 0, xlrec->target.node, cblkno);
|
||||
main_blk_index = 1;
|
||||
}
|
||||
else
|
||||
main_blk_index = 0;
|
||||
Size datalen;
|
||||
char *datapos = XLogRecGetBlockData(record, 0, &datalen);
|
||||
|
||||
if (XLogReadBufferForRedo(lsn, record, main_blk_index, xlrec->target.node,
|
||||
ItemPointerGetBlockNumber(&(xlrec->target.tid)),
|
||||
&buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
if (PageAddItem(page, (Item) datapos, datalen,
|
||||
ItemPointerGetOffsetNumber(&(xlrec->target.tid)),
|
||||
if (PageAddItem(page, (Item) datapos, datalen, xlrec->offnum,
|
||||
false, false) == InvalidOffsetNumber)
|
||||
elog(PANIC, "btree_insert_redo: failed to add item");
|
||||
|
||||
@@ -215,15 +187,13 @@ btree_xlog_insert(bool isleaf, bool ismeta,
|
||||
* obsolete link from the metapage.
|
||||
*/
|
||||
if (ismeta)
|
||||
_bt_restore_meta(xlrec->target.node, lsn,
|
||||
md.root, md.level,
|
||||
md.fastroot, md.fastlevel);
|
||||
_bt_restore_meta(record, 2);
|
||||
}
|
||||
|
||||
static void
|
||||
btree_xlog_split(bool onleft, bool isroot,
|
||||
XLogRecPtr lsn, XLogRecord *record)
|
||||
btree_xlog_split(bool onleft, bool isroot, XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
xl_btree_split *xlrec = (xl_btree_split *) XLogRecGetData(record);
|
||||
bool isleaf = (xlrec->level == 0);
|
||||
Buffer lbuf;
|
||||
@@ -231,56 +201,17 @@ btree_xlog_split(bool onleft, bool isroot,
|
||||
Page rpage;
|
||||
BTPageOpaque ropaque;
|
||||
char *datapos;
|
||||
int datalen;
|
||||
OffsetNumber newitemoff = 0;
|
||||
Item newitem = NULL;
|
||||
Size newitemsz = 0;
|
||||
Size datalen;
|
||||
Item left_hikey = NULL;
|
||||
Size left_hikeysz = 0;
|
||||
BlockNumber cblkno = InvalidBlockNumber;
|
||||
BlockNumber leftsib;
|
||||
BlockNumber rightsib;
|
||||
BlockNumber rnext;
|
||||
|
||||
datapos = (char *) xlrec + SizeOfBtreeSplit;
|
||||
datalen = record->xl_len - SizeOfBtreeSplit;
|
||||
|
||||
/* Extract newitemoff and newitem, if present */
|
||||
if (onleft)
|
||||
{
|
||||
memcpy(&newitemoff, datapos, sizeof(OffsetNumber));
|
||||
datapos += sizeof(OffsetNumber);
|
||||
datalen -= sizeof(OffsetNumber);
|
||||
}
|
||||
if (onleft && !(record->xl_info & XLR_BKP_BLOCK(0)))
|
||||
{
|
||||
/*
|
||||
* We assume that 16-bit alignment is enough to apply IndexTupleSize
|
||||
* (since it's fetching from a uint16 field) and also enough for
|
||||
* PageAddItem to insert the tuple.
|
||||
*/
|
||||
newitem = (Item) datapos;
|
||||
newitemsz = MAXALIGN(IndexTupleSize(newitem));
|
||||
datapos += newitemsz;
|
||||
datalen -= newitemsz;
|
||||
}
|
||||
|
||||
/* Extract left hikey and its size (still assuming 16-bit alignment) */
|
||||
if (!isleaf && !(record->xl_info & XLR_BKP_BLOCK(0)))
|
||||
{
|
||||
left_hikey = (Item) datapos;
|
||||
left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
|
||||
datapos += left_hikeysz;
|
||||
datalen -= left_hikeysz;
|
||||
}
|
||||
|
||||
/*
|
||||
* If this insertion finishes an incomplete split, get the block number of
|
||||
* the child.
|
||||
*/
|
||||
if (!isleaf && !(record->xl_info & XLR_BKP_BLOCK(1)))
|
||||
{
|
||||
memcpy(&cblkno, datapos, sizeof(BlockNumber));
|
||||
datapos += sizeof(BlockNumber);
|
||||
datalen -= sizeof(BlockNumber);
|
||||
}
|
||||
XLogRecGetBlockTag(record, 0, NULL, NULL, &leftsib);
|
||||
XLogRecGetBlockTag(record, 1, NULL, NULL, &rightsib);
|
||||
if (!XLogRecGetBlockTag(record, 2, NULL, NULL, &rnext))
|
||||
rnext = P_NONE;
|
||||
|
||||
/*
|
||||
* Clear the incomplete split flag on the left sibling of the child page
|
||||
@@ -288,18 +219,18 @@ btree_xlog_split(bool onleft, bool isroot,
|
||||
* before locking the other pages)
|
||||
*/
|
||||
if (!isleaf)
|
||||
_bt_clear_incomplete_split(lsn, record, 1, xlrec->node, cblkno);
|
||||
_bt_clear_incomplete_split(record, 3);
|
||||
|
||||
/* Reconstruct right (new) sibling page from scratch */
|
||||
rbuf = XLogReadBuffer(xlrec->node, xlrec->rightsib, true);
|
||||
Assert(BufferIsValid(rbuf));
|
||||
rbuf = XLogInitBufferForRedo(record, 1);
|
||||
datapos = XLogRecGetBlockData(record, 1, &datalen);
|
||||
rpage = (Page) BufferGetPage(rbuf);
|
||||
|
||||
_bt_pageinit(rpage, BufferGetPageSize(rbuf));
|
||||
ropaque = (BTPageOpaque) PageGetSpecialPointer(rpage);
|
||||
|
||||
ropaque->btpo_prev = xlrec->leftsib;
|
||||
ropaque->btpo_next = xlrec->rnext;
|
||||
ropaque->btpo_prev = leftsib;
|
||||
ropaque->btpo_next = rnext;
|
||||
ropaque->btpo.level = xlrec->level;
|
||||
ropaque->btpo_flags = isleaf ? BTP_LEAF : 0;
|
||||
ropaque->btpo_cycleid = 0;
|
||||
@@ -324,8 +255,7 @@ btree_xlog_split(bool onleft, bool isroot,
|
||||
/* don't release the buffer yet; we touch right page's first item below */
|
||||
|
||||
/* Now reconstruct left (original) sibling page */
|
||||
if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->leftsib,
|
||||
&lbuf) == BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 0, &lbuf) == BLK_NEEDS_REDO)
|
||||
{
|
||||
/*
|
||||
* To retain the same physical order of the tuples that they had, we
|
||||
@@ -339,9 +269,31 @@ btree_xlog_split(bool onleft, bool isroot,
|
||||
Page lpage = (Page) BufferGetPage(lbuf);
|
||||
BTPageOpaque lopaque = (BTPageOpaque) PageGetSpecialPointer(lpage);
|
||||
OffsetNumber off;
|
||||
Item newitem;
|
||||
Size newitemsz = 0;
|
||||
Page newlpage;
|
||||
OffsetNumber leftoff;
|
||||
|
||||
datapos = XLogRecGetBlockData(record, 0, &datalen);
|
||||
|
||||
if (onleft)
|
||||
{
|
||||
newitem = (Item) datapos;
|
||||
newitemsz = MAXALIGN(IndexTupleSize(newitem));
|
||||
datapos += newitemsz;
|
||||
datalen -= newitemsz;
|
||||
}
|
||||
|
||||
/* Extract left hikey and its size (assuming 16-bit alignment) */
|
||||
if (!isleaf)
|
||||
{
|
||||
left_hikey = (Item) datapos;
|
||||
left_hikeysz = MAXALIGN(IndexTupleSize(left_hikey));
|
||||
datapos += left_hikeysz;
|
||||
datalen -= left_hikeysz;
|
||||
}
|
||||
Assert(datalen == 0);
|
||||
|
||||
newlpage = PageGetTempPageCopySpecial(lpage);
|
||||
|
||||
/* Set high key */
|
||||
@@ -358,7 +310,7 @@ btree_xlog_split(bool onleft, bool isroot,
|
||||
Item item;
|
||||
|
||||
/* add the new item if it was inserted on left page */
|
||||
if (onleft && off == newitemoff)
|
||||
if (onleft && off == xlrec->newitemoff)
|
||||
{
|
||||
if (PageAddItem(newlpage, newitem, newitemsz, leftoff,
|
||||
false, false) == InvalidOffsetNumber)
|
||||
@@ -376,7 +328,7 @@ btree_xlog_split(bool onleft, bool isroot,
|
||||
}
|
||||
|
||||
/* cope with possibility that newitem goes at the end */
|
||||
if (onleft && off == newitemoff)
|
||||
if (onleft && off == xlrec->newitemoff)
|
||||
{
|
||||
if (PageAddItem(newlpage, newitem, newitemsz, leftoff,
|
||||
false, false) == InvalidOffsetNumber)
|
||||
@@ -390,7 +342,7 @@ btree_xlog_split(bool onleft, bool isroot,
|
||||
lopaque->btpo_flags = BTP_INCOMPLETE_SPLIT;
|
||||
if (isleaf)
|
||||
lopaque->btpo_flags |= BTP_LEAF;
|
||||
lopaque->btpo_next = xlrec->rightsib;
|
||||
lopaque->btpo_next = rightsib;
|
||||
lopaque->btpo_cycleid = 0;
|
||||
|
||||
PageSetLSN(lpage, lsn);
|
||||
@@ -410,22 +362,16 @@ btree_xlog_split(bool onleft, bool isroot,
|
||||
* replay, because no other index update can be in progress, and readers
|
||||
* will cope properly when following an obsolete left-link.
|
||||
*/
|
||||
if (xlrec->rnext != P_NONE)
|
||||
if (rnext != P_NONE)
|
||||
{
|
||||
/*
|
||||
* the backup block containing right sibling is 1 or 2, depending
|
||||
* whether this was a leaf or internal page.
|
||||
*/
|
||||
int rnext_index = isleaf ? 1 : 2;
|
||||
Buffer buffer;
|
||||
|
||||
if (XLogReadBufferForRedo(lsn, record, rnext_index, xlrec->node,
|
||||
xlrec->rnext, &buffer) == BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
Page page = (Page) BufferGetPage(buffer);
|
||||
BTPageOpaque pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
pageop->btpo_prev = xlrec->rightsib;
|
||||
pageop->btpo_prev = rightsib;
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(buffer);
|
||||
@@ -436,8 +382,9 @@ btree_xlog_split(bool onleft, bool isroot,
|
||||
}
|
||||
|
||||
static void
|
||||
btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
|
||||
btree_xlog_vacuum(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
xl_btree_vacuum *xlrec = (xl_btree_vacuum *) XLogRecGetData(record);
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
@@ -466,9 +413,13 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
|
||||
*/
|
||||
if (HotStandbyActiveInReplay())
|
||||
{
|
||||
RelFileNode thisrnode;
|
||||
BlockNumber thisblkno;
|
||||
BlockNumber blkno;
|
||||
|
||||
for (blkno = xlrec->lastBlockVacuumed + 1; blkno < xlrec->block; blkno++)
|
||||
XLogRecGetBlockTag(record, 0, &thisrnode, NULL, &thisblkno);
|
||||
|
||||
for (blkno = xlrec->lastBlockVacuumed + 1; blkno < thisblkno; blkno++)
|
||||
{
|
||||
/*
|
||||
* We use RBM_NORMAL_NO_LOG mode because it's not an error
|
||||
@@ -483,7 +434,7 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
|
||||
* buffer manager we could optimise this so that if the block is
|
||||
* not in shared_buffers we confirm it as unpinned.
|
||||
*/
|
||||
buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, blkno,
|
||||
buffer = XLogReadBufferExtended(thisrnode, MAIN_FORKNUM, blkno,
|
||||
RBM_NORMAL_NO_LOG);
|
||||
if (BufferIsValid(buffer))
|
||||
{
|
||||
@@ -497,20 +448,23 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
|
||||
* Like in btvacuumpage(), we need to take a cleanup lock on every leaf
|
||||
* page. See nbtree/README for details.
|
||||
*/
|
||||
if (XLogReadBufferForRedoExtended(lsn, record, 0,
|
||||
xlrec->node, MAIN_FORKNUM, xlrec->block,
|
||||
RBM_NORMAL, true, &buffer)
|
||||
if (XLogReadBufferForRedoExtended(record, 0, RBM_NORMAL, true, &buffer)
|
||||
== BLK_NEEDS_REDO)
|
||||
{
|
||||
char *ptr;
|
||||
Size len;
|
||||
|
||||
ptr = XLogRecGetBlockData(record, 0, &len);
|
||||
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
if (record->xl_len > SizeOfBtreeVacuum)
|
||||
if (len > 0)
|
||||
{
|
||||
OffsetNumber *unused;
|
||||
OffsetNumber *unend;
|
||||
|
||||
unused = (OffsetNumber *) ((char *) xlrec + SizeOfBtreeVacuum);
|
||||
unend = (OffsetNumber *) ((char *) xlrec + record->xl_len);
|
||||
unused = (OffsetNumber *) ptr;
|
||||
unend = (OffsetNumber *) ((char *) ptr + len);
|
||||
|
||||
if ((unend - unused) > 0)
|
||||
PageIndexMultiDelete(page, unused, unend - unused);
|
||||
@@ -542,13 +496,16 @@ btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
|
||||
* XXX optimise later with something like XLogPrefetchBuffer()
|
||||
*/
|
||||
static TransactionId
|
||||
btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
|
||||
btree_xlog_delete_get_latestRemovedXid(XLogReaderState *record)
|
||||
{
|
||||
xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
|
||||
OffsetNumber *unused;
|
||||
Buffer ibuffer,
|
||||
hbuffer;
|
||||
Page ipage,
|
||||
hpage;
|
||||
RelFileNode rnode;
|
||||
BlockNumber blkno;
|
||||
ItemId iitemid,
|
||||
hitemid;
|
||||
IndexTuple itup;
|
||||
@@ -588,9 +545,11 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
|
||||
* InvalidTransactionId to cancel all HS transactions. That's probably
|
||||
* overkill, but it's safe, and certainly better than panicking here.
|
||||
*/
|
||||
ibuffer = XLogReadBuffer(xlrec->node, xlrec->block, false);
|
||||
XLogRecGetBlockTag(record, 0, &rnode, NULL, &blkno);
|
||||
ibuffer = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno, RBM_NORMAL);
|
||||
if (!BufferIsValid(ibuffer))
|
||||
return InvalidTransactionId;
|
||||
LockBuffer(ibuffer, BT_READ);
|
||||
ipage = (Page) BufferGetPage(ibuffer);
|
||||
|
||||
/*
|
||||
@@ -611,12 +570,13 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
|
||||
* Locate the heap page that the index tuple points at
|
||||
*/
|
||||
hblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
|
||||
hbuffer = XLogReadBuffer(xlrec->hnode, hblkno, false);
|
||||
hbuffer = XLogReadBufferExtended(xlrec->hnode, MAIN_FORKNUM, hblkno, RBM_NORMAL);
|
||||
if (!BufferIsValid(hbuffer))
|
||||
{
|
||||
UnlockReleaseBuffer(ibuffer);
|
||||
return InvalidTransactionId;
|
||||
}
|
||||
LockBuffer(hbuffer, BUFFER_LOCK_SHARE);
|
||||
hpage = (Page) BufferGetPage(hbuffer);
|
||||
|
||||
/*
|
||||
@@ -678,8 +638,9 @@ btree_xlog_delete_get_latestRemovedXid(xl_btree_delete *xlrec)
|
||||
}
|
||||
|
||||
static void
|
||||
btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
|
||||
btree_xlog_delete(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
xl_btree_delete *xlrec = (xl_btree_delete *) XLogRecGetData(record);
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
@@ -698,21 +659,23 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
|
||||
*/
|
||||
if (InHotStandby)
|
||||
{
|
||||
TransactionId latestRemovedXid = btree_xlog_delete_get_latestRemovedXid(xlrec);
|
||||
TransactionId latestRemovedXid = btree_xlog_delete_get_latestRemovedXid(record);
|
||||
RelFileNode rnode;
|
||||
|
||||
ResolveRecoveryConflictWithSnapshot(latestRemovedXid, xlrec->node);
|
||||
XLogRecGetBlockTag(record, 0, &rnode, NULL, NULL);
|
||||
|
||||
ResolveRecoveryConflictWithSnapshot(latestRemovedXid, rnode);
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't need to take a cleanup lock to apply these changes. See
|
||||
* nbtree/README for details.
|
||||
*/
|
||||
if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, xlrec->block,
|
||||
&buffer) == BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
if (record->xl_len > SizeOfBtreeDelete)
|
||||
if (XLogRecGetDataLen(record) > SizeOfBtreeDelete)
|
||||
{
|
||||
OffsetNumber *unused;
|
||||
|
||||
@@ -736,17 +699,15 @@ btree_xlog_delete(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
static void
|
||||
btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
|
||||
btree_xlog_mark_page_halfdead(uint8 info, XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) XLogRecGetData(record);
|
||||
BlockNumber parent;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
BTPageOpaque pageop;
|
||||
IndexTupleData trunctuple;
|
||||
|
||||
parent = ItemPointerGetBlockNumber(&(xlrec->target.tid));
|
||||
|
||||
/*
|
||||
* In normal operation, we would lock all the pages this WAL record
|
||||
* touches before changing any of them. In WAL replay, it should be okay
|
||||
@@ -756,8 +717,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
|
||||
*/
|
||||
|
||||
/* parent page */
|
||||
if (XLogReadBufferForRedo(lsn, record, 0, xlrec->target.node, parent,
|
||||
&buffer) == BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
OffsetNumber poffset;
|
||||
ItemId itemid;
|
||||
@@ -768,7 +728,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
poffset = ItemPointerGetOffsetNumber(&(xlrec->target.tid));
|
||||
poffset = xlrec->poffset;
|
||||
|
||||
nextoffset = OffsetNumberNext(poffset);
|
||||
itemid = PageGetItemId(page, nextoffset);
|
||||
@@ -788,8 +748,7 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
/* Rewrite the leaf page as a halfdead page */
|
||||
buffer = XLogReadBuffer(xlrec->target.node, xlrec->leafblk, true);
|
||||
Assert(BufferIsValid(buffer));
|
||||
buffer = XLogInitBufferForRedo(record, 0);
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
_bt_pageinit(page, BufferGetPageSize(buffer));
|
||||
@@ -822,17 +781,16 @@ btree_xlog_mark_page_halfdead(uint8 info, XLogRecPtr lsn, XLogRecord *record)
|
||||
|
||||
|
||||
static void
|
||||
btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
|
||||
btree_xlog_unlink_page(uint8 info, XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) XLogRecGetData(record);
|
||||
BlockNumber target;
|
||||
BlockNumber leftsib;
|
||||
BlockNumber rightsib;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
BTPageOpaque pageop;
|
||||
|
||||
target = xlrec->deadblk;
|
||||
leftsib = xlrec->leftsib;
|
||||
rightsib = xlrec->rightsib;
|
||||
|
||||
@@ -845,8 +803,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
|
||||
*/
|
||||
|
||||
/* Fix left-link of right sibling */
|
||||
if (XLogReadBufferForRedo(lsn, record, 0, xlrec->node, rightsib, &buffer)
|
||||
== BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
@@ -861,8 +818,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
|
||||
/* Fix right-link of left sibling, if any */
|
||||
if (leftsib != P_NONE)
|
||||
{
|
||||
if (XLogReadBufferForRedo(lsn, record, 1, xlrec->node, leftsib, &buffer)
|
||||
== BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
@@ -876,8 +832,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
/* Rewrite target page as empty deleted page */
|
||||
buffer = XLogReadBuffer(xlrec->node, target, true);
|
||||
Assert(BufferIsValid(buffer));
|
||||
buffer = XLogInitBufferForRedo(record, 0);
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
_bt_pageinit(page, BufferGetPageSize(buffer));
|
||||
@@ -898,7 +853,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
|
||||
* itself, update the leaf to point to the next remaining child in the
|
||||
* branch.
|
||||
*/
|
||||
if (target != xlrec->leafblk)
|
||||
if (XLogRecHasBlockRef(record, 3))
|
||||
{
|
||||
/*
|
||||
* There is no real data on the page, so we just re-create it from
|
||||
@@ -906,8 +861,7 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
|
||||
*/
|
||||
IndexTupleData trunctuple;
|
||||
|
||||
buffer = XLogReadBuffer(xlrec->node, xlrec->leafblk, true);
|
||||
Assert(BufferIsValid(buffer));
|
||||
buffer = XLogInitBufferForRedo(record, 3);
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
pageop = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||
|
||||
@@ -936,27 +890,21 @@ btree_xlog_unlink_page(uint8 info, XLogRecPtr lsn, XLogRecord *record)
|
||||
|
||||
/* Update metapage if needed */
|
||||
if (info == XLOG_BTREE_UNLINK_PAGE_META)
|
||||
{
|
||||
xl_btree_metadata md;
|
||||
|
||||
memcpy(&md, (char *) xlrec + SizeOfBtreeUnlinkPage,
|
||||
sizeof(xl_btree_metadata));
|
||||
_bt_restore_meta(xlrec->node, lsn,
|
||||
md.root, md.level,
|
||||
md.fastroot, md.fastlevel);
|
||||
}
|
||||
_bt_restore_meta(record, 4);
|
||||
}
|
||||
|
||||
static void
|
||||
btree_xlog_newroot(XLogRecPtr lsn, XLogRecord *record)
|
||||
btree_xlog_newroot(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
xl_btree_newroot *xlrec = (xl_btree_newroot *) XLogRecGetData(record);
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
BTPageOpaque pageop;
|
||||
char *ptr;
|
||||
Size len;
|
||||
|
||||
buffer = XLogReadBuffer(xlrec->node, xlrec->rootblk, true);
|
||||
Assert(BufferIsValid(buffer));
|
||||
buffer = XLogInitBufferForRedo(record, 0);
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
_bt_pageinit(page, BufferGetPageSize(buffer));
|
||||
@@ -969,34 +917,24 @@ btree_xlog_newroot(XLogRecPtr lsn, XLogRecord *record)
|
||||
pageop->btpo_flags |= BTP_LEAF;
|
||||
pageop->btpo_cycleid = 0;
|
||||
|
||||
if (record->xl_len > SizeOfBtreeNewroot)
|
||||
if (xlrec->level > 0)
|
||||
{
|
||||
IndexTuple itup;
|
||||
BlockNumber cblkno;
|
||||
|
||||
_bt_restore_page(page,
|
||||
(char *) xlrec + SizeOfBtreeNewroot,
|
||||
record->xl_len - SizeOfBtreeNewroot);
|
||||
/* extract block number of the left-hand split page */
|
||||
itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, P_HIKEY));
|
||||
cblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
|
||||
Assert(ItemPointerGetOffsetNumber(&(itup->t_tid)) == P_HIKEY);
|
||||
ptr = XLogRecGetBlockData(record, 0, &len);
|
||||
_bt_restore_page(page, ptr, len);
|
||||
|
||||
/* Clear the incomplete-split flag in left child */
|
||||
_bt_clear_incomplete_split(lsn, record, 0, xlrec->node, cblkno);
|
||||
_bt_clear_incomplete_split(record, 1);
|
||||
}
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(buffer);
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
_bt_restore_meta(xlrec->node, lsn,
|
||||
xlrec->rootblk, xlrec->level,
|
||||
xlrec->rootblk, xlrec->level);
|
||||
_bt_restore_meta(record, 2);
|
||||
}
|
||||
|
||||
static void
|
||||
btree_xlog_reuse_page(XLogRecPtr lsn, XLogRecord *record)
|
||||
btree_xlog_reuse_page(XLogReaderState *record)
|
||||
{
|
||||
xl_btree_reuse_page *xlrec = (xl_btree_reuse_page *) XLogRecGetData(record);
|
||||
|
||||
@@ -1015,58 +953,55 @@ btree_xlog_reuse_page(XLogRecPtr lsn, XLogRecord *record)
|
||||
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid,
|
||||
xlrec->node);
|
||||
}
|
||||
|
||||
/* Backup blocks are not used in reuse_page records */
|
||||
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
btree_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
btree_redo(XLogReaderState *record)
|
||||
{
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
switch (info)
|
||||
{
|
||||
case XLOG_BTREE_INSERT_LEAF:
|
||||
btree_xlog_insert(true, false, lsn, record);
|
||||
btree_xlog_insert(true, false, record);
|
||||
break;
|
||||
case XLOG_BTREE_INSERT_UPPER:
|
||||
btree_xlog_insert(false, false, lsn, record);
|
||||
btree_xlog_insert(false, false, record);
|
||||
break;
|
||||
case XLOG_BTREE_INSERT_META:
|
||||
btree_xlog_insert(false, true, lsn, record);
|
||||
btree_xlog_insert(false, true, record);
|
||||
break;
|
||||
case XLOG_BTREE_SPLIT_L:
|
||||
btree_xlog_split(true, false, lsn, record);
|
||||
btree_xlog_split(true, false, record);
|
||||
break;
|
||||
case XLOG_BTREE_SPLIT_R:
|
||||
btree_xlog_split(false, false, lsn, record);
|
||||
btree_xlog_split(false, false, record);
|
||||
break;
|
||||
case XLOG_BTREE_SPLIT_L_ROOT:
|
||||
btree_xlog_split(true, true, lsn, record);
|
||||
btree_xlog_split(true, true, record);
|
||||
break;
|
||||
case XLOG_BTREE_SPLIT_R_ROOT:
|
||||
btree_xlog_split(false, true, lsn, record);
|
||||
btree_xlog_split(false, true, record);
|
||||
break;
|
||||
case XLOG_BTREE_VACUUM:
|
||||
btree_xlog_vacuum(lsn, record);
|
||||
btree_xlog_vacuum(record);
|
||||
break;
|
||||
case XLOG_BTREE_DELETE:
|
||||
btree_xlog_delete(lsn, record);
|
||||
btree_xlog_delete(record);
|
||||
break;
|
||||
case XLOG_BTREE_MARK_PAGE_HALFDEAD:
|
||||
btree_xlog_mark_page_halfdead(info, lsn, record);
|
||||
btree_xlog_mark_page_halfdead(info, record);
|
||||
break;
|
||||
case XLOG_BTREE_UNLINK_PAGE:
|
||||
case XLOG_BTREE_UNLINK_PAGE_META:
|
||||
btree_xlog_unlink_page(info, lsn, record);
|
||||
btree_xlog_unlink_page(info, record);
|
||||
break;
|
||||
case XLOG_BTREE_NEWROOT:
|
||||
btree_xlog_newroot(lsn, record);
|
||||
btree_xlog_newroot(record);
|
||||
break;
|
||||
case XLOG_BTREE_REUSE_PAGE:
|
||||
btree_xlog_reuse_page(lsn, record);
|
||||
btree_xlog_reuse_page(record);
|
||||
break;
|
||||
default:
|
||||
elog(PANIC, "btree_redo: unknown op code %u", info);
|
||||
|
||||
@@ -17,64 +17,49 @@
|
||||
#include "access/brin_xlog.h"
|
||||
|
||||
void
|
||||
brin_desc(StringInfo buf, XLogRecord *record)
|
||||
brin_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
char *rec = XLogRecGetData(record);
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
info &= XLOG_BRIN_OPMASK;
|
||||
if (info == XLOG_BRIN_CREATE_INDEX)
|
||||
{
|
||||
xl_brin_createidx *xlrec = (xl_brin_createidx *) rec;
|
||||
|
||||
appendStringInfo(buf, "v%d pagesPerRange %u rel %u/%u/%u",
|
||||
xlrec->version, xlrec->pagesPerRange,
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode);
|
||||
appendStringInfo(buf, "v%d pagesPerRange %u",
|
||||
xlrec->version, xlrec->pagesPerRange);
|
||||
}
|
||||
else if (info == XLOG_BRIN_INSERT)
|
||||
{
|
||||
xl_brin_insert *xlrec = (xl_brin_insert *) rec;
|
||||
|
||||
appendStringInfo(buf, "rel %u/%u/%u heapBlk %u revmapBlk %u pagesPerRange %u TID (%u,%u)",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode,
|
||||
xlrec->heapBlk, xlrec->revmapBlk,
|
||||
appendStringInfo(buf, "heapBlk %u pagesPerRange %u offnum %u",
|
||||
xlrec->heapBlk,
|
||||
xlrec->pagesPerRange,
|
||||
ItemPointerGetBlockNumber(&xlrec->tid),
|
||||
ItemPointerGetOffsetNumber(&xlrec->tid));
|
||||
xlrec->offnum);
|
||||
}
|
||||
else if (info == XLOG_BRIN_UPDATE)
|
||||
{
|
||||
xl_brin_update *xlrec = (xl_brin_update *) rec;
|
||||
|
||||
appendStringInfo(buf, "rel %u/%u/%u heapBlk %u revmapBlk %u pagesPerRange %u old TID (%u,%u) TID (%u,%u)",
|
||||
xlrec->insert.node.spcNode, xlrec->insert.node.dbNode,
|
||||
xlrec->insert.node.relNode,
|
||||
xlrec->insert.heapBlk, xlrec->insert.revmapBlk,
|
||||
appendStringInfo(buf, "heapBlk %u pagesPerRange %u old offnum %u, new offnum %u",
|
||||
xlrec->insert.heapBlk,
|
||||
xlrec->insert.pagesPerRange,
|
||||
ItemPointerGetBlockNumber(&xlrec->oldtid),
|
||||
ItemPointerGetOffsetNumber(&xlrec->oldtid),
|
||||
ItemPointerGetBlockNumber(&xlrec->insert.tid),
|
||||
ItemPointerGetOffsetNumber(&xlrec->insert.tid));
|
||||
xlrec->oldOffnum,
|
||||
xlrec->insert.offnum);
|
||||
}
|
||||
else if (info == XLOG_BRIN_SAMEPAGE_UPDATE)
|
||||
{
|
||||
xl_brin_samepage_update *xlrec = (xl_brin_samepage_update *) rec;
|
||||
|
||||
appendStringInfo(buf, "rel %u/%u/%u TID (%u,%u)",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode,
|
||||
ItemPointerGetBlockNumber(&xlrec->tid),
|
||||
ItemPointerGetOffsetNumber(&xlrec->tid));
|
||||
appendStringInfo(buf, "offnum %u", xlrec->offnum);
|
||||
}
|
||||
else if (info == XLOG_BRIN_REVMAP_EXTEND)
|
||||
{
|
||||
xl_brin_revmap_extend *xlrec = (xl_brin_revmap_extend *) rec;
|
||||
|
||||
appendStringInfo(buf, "rel %u/%u/%u targetBlk %u",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode, xlrec->targetBlk);
|
||||
appendStringInfo(buf, "targetBlk %u", xlrec->targetBlk);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -18,10 +18,10 @@
|
||||
|
||||
|
||||
void
|
||||
clog_desc(StringInfo buf, XLogRecord *record)
|
||||
clog_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
char *rec = XLogRecGetData(record);
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
if (info == CLOG_ZEROPAGE || info == CLOG_TRUNCATE)
|
||||
{
|
||||
|
||||
@@ -19,10 +19,10 @@
|
||||
|
||||
|
||||
void
|
||||
dbase_desc(StringInfo buf, XLogRecord *record)
|
||||
dbase_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
char *rec = XLogRecGetData(record);
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
if (info == XLOG_DBASE_CREATE)
|
||||
{
|
||||
|
||||
@@ -15,16 +15,10 @@
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/gin_private.h"
|
||||
#include "access/xlogutils.h"
|
||||
#include "lib/stringinfo.h"
|
||||
#include "storage/relfilenode.h"
|
||||
|
||||
static void
|
||||
desc_node(StringInfo buf, RelFileNode node, BlockNumber blkno)
|
||||
{
|
||||
appendStringInfo(buf, "node: %u/%u/%u blkno: %u",
|
||||
node.spcNode, node.dbNode, node.relNode, blkno);
|
||||
}
|
||||
|
||||
static void
|
||||
desc_recompress_leaf(StringInfo buf, ginxlogRecompressDataLeaf *insertData)
|
||||
{
|
||||
@@ -77,26 +71,25 @@ desc_recompress_leaf(StringInfo buf, ginxlogRecompressDataLeaf *insertData)
|
||||
}
|
||||
|
||||
void
|
||||
gin_desc(StringInfo buf, XLogRecord *record)
|
||||
gin_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
char *rec = XLogRecGetData(record);
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
switch (info)
|
||||
{
|
||||
case XLOG_GIN_CREATE_INDEX:
|
||||
desc_node(buf, *(RelFileNode *) rec, GIN_ROOT_BLKNO);
|
||||
/* no further information */
|
||||
break;
|
||||
case XLOG_GIN_CREATE_PTREE:
|
||||
desc_node(buf, ((ginxlogCreatePostingTree *) rec)->node, ((ginxlogCreatePostingTree *) rec)->blkno);
|
||||
/* no further information */
|
||||
break;
|
||||
case XLOG_GIN_INSERT:
|
||||
{
|
||||
ginxlogInsert *xlrec = (ginxlogInsert *) rec;
|
||||
char *payload = rec + sizeof(ginxlogInsert);
|
||||
|
||||
desc_node(buf, xlrec->node, xlrec->blkno);
|
||||
appendStringInfo(buf, " isdata: %c isleaf: %c",
|
||||
appendStringInfo(buf, "isdata: %c isleaf: %c",
|
||||
(xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F',
|
||||
(xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F');
|
||||
if (!(xlrec->flags & GIN_INSERT_ISLEAF))
|
||||
@@ -119,7 +112,7 @@ gin_desc(StringInfo buf, XLogRecord *record)
|
||||
ginxlogRecompressDataLeaf *insertData =
|
||||
(ginxlogRecompressDataLeaf *) payload;
|
||||
|
||||
if (record->xl_info & XLR_BKP_BLOCK(0))
|
||||
if (XLogRecHasBlockImage(record, 0))
|
||||
appendStringInfo(buf, " (full page image)");
|
||||
else
|
||||
desc_recompress_leaf(buf, insertData);
|
||||
@@ -139,39 +132,38 @@ gin_desc(StringInfo buf, XLogRecord *record)
|
||||
{
|
||||
ginxlogSplit *xlrec = (ginxlogSplit *) rec;
|
||||
|
||||
desc_node(buf, ((ginxlogSplit *) rec)->node, ((ginxlogSplit *) rec)->lblkno);
|
||||
appendStringInfo(buf, " isrootsplit: %c", (((ginxlogSplit *) rec)->flags & GIN_SPLIT_ROOT) ? 'T' : 'F');
|
||||
appendStringInfo(buf, "isrootsplit: %c",
|
||||
(((ginxlogSplit *) rec)->flags & GIN_SPLIT_ROOT) ? 'T' : 'F');
|
||||
appendStringInfo(buf, " isdata: %c isleaf: %c",
|
||||
(xlrec->flags & GIN_INSERT_ISDATA) ? 'T' : 'F',
|
||||
(xlrec->flags & GIN_INSERT_ISLEAF) ? 'T' : 'F');
|
||||
}
|
||||
break;
|
||||
case XLOG_GIN_VACUUM_PAGE:
|
||||
desc_node(buf, ((ginxlogVacuumPage *) rec)->node, ((ginxlogVacuumPage *) rec)->blkno);
|
||||
/* no further information */
|
||||
break;
|
||||
case XLOG_GIN_VACUUM_DATA_LEAF_PAGE:
|
||||
{
|
||||
ginxlogVacuumDataLeafPage *xlrec = (ginxlogVacuumDataLeafPage *) rec;
|
||||
|
||||
desc_node(buf, xlrec->node, xlrec->blkno);
|
||||
if (record->xl_info & XLR_BKP_BLOCK(0))
|
||||
if (XLogRecHasBlockImage(record, 0))
|
||||
appendStringInfo(buf, " (full page image)");
|
||||
else
|
||||
desc_recompress_leaf(buf, &xlrec->data);
|
||||
}
|
||||
break;
|
||||
case XLOG_GIN_DELETE_PAGE:
|
||||
desc_node(buf, ((ginxlogDeletePage *) rec)->node, ((ginxlogDeletePage *) rec)->blkno);
|
||||
/* no further information */
|
||||
break;
|
||||
case XLOG_GIN_UPDATE_META_PAGE:
|
||||
desc_node(buf, ((ginxlogUpdateMeta *) rec)->node, GIN_METAPAGE_BLKNO);
|
||||
/* no further information */
|
||||
break;
|
||||
case XLOG_GIN_INSERT_LISTPAGE:
|
||||
desc_node(buf, ((ginxlogInsertListPage *) rec)->node, ((ginxlogInsertListPage *) rec)->blkno);
|
||||
/* no further information */
|
||||
break;
|
||||
case XLOG_GIN_DELETE_LISTPAGE:
|
||||
appendStringInfo(buf, "%d pages, ", ((ginxlogDeleteListPages *) rec)->ndeleted);
|
||||
desc_node(buf, ((ginxlogDeleteListPages *) rec)->node, GIN_METAPAGE_BLKNO);
|
||||
appendStringInfo(buf, "ndeleted: %d",
|
||||
((ginxlogDeleteListPages *) rec)->ndeleted);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,34 +18,23 @@
|
||||
#include "lib/stringinfo.h"
|
||||
#include "storage/relfilenode.h"
|
||||
|
||||
static void
|
||||
out_target(StringInfo buf, RelFileNode node)
|
||||
{
|
||||
appendStringInfo(buf, "rel %u/%u/%u",
|
||||
node.spcNode, node.dbNode, node.relNode);
|
||||
}
|
||||
|
||||
static void
|
||||
out_gistxlogPageUpdate(StringInfo buf, gistxlogPageUpdate *xlrec)
|
||||
{
|
||||
out_target(buf, xlrec->node);
|
||||
appendStringInfo(buf, "; block number %u", xlrec->blkno);
|
||||
}
|
||||
|
||||
static void
|
||||
out_gistxlogPageSplit(StringInfo buf, gistxlogPageSplit *xlrec)
|
||||
{
|
||||
appendStringInfoString(buf, "page_split: ");
|
||||
out_target(buf, xlrec->node);
|
||||
appendStringInfo(buf, "; block number %u splits to %d pages",
|
||||
xlrec->origblkno, xlrec->npage);
|
||||
appendStringInfo(buf, "page_split: splits to %d pages",
|
||||
xlrec->npage);
|
||||
}
|
||||
|
||||
void
|
||||
gist_desc(StringInfo buf, XLogRecord *record)
|
||||
gist_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
char *rec = XLogRecGetData(record);
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
switch (info)
|
||||
{
|
||||
@@ -56,10 +45,6 @@ gist_desc(StringInfo buf, XLogRecord *record)
|
||||
out_gistxlogPageSplit(buf, (gistxlogPageSplit *) rec);
|
||||
break;
|
||||
case XLOG_GIST_CREATE_INDEX:
|
||||
appendStringInfo(buf, "rel %u/%u/%u",
|
||||
((RelFileNode *) rec)->spcNode,
|
||||
((RelFileNode *) rec)->dbNode,
|
||||
((RelFileNode *) rec)->relNode);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,7 +17,7 @@
|
||||
#include "access/hash.h"
|
||||
|
||||
void
|
||||
hash_desc(StringInfo buf, XLogRecord *record)
|
||||
hash_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
@@ -16,15 +16,6 @@
|
||||
|
||||
#include "access/heapam_xlog.h"
|
||||
|
||||
static void
|
||||
out_target(StringInfo buf, xl_heaptid *target)
|
||||
{
|
||||
appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
|
||||
target->node.spcNode, target->node.dbNode, target->node.relNode,
|
||||
ItemPointerGetBlockNumber(&(target->tid)),
|
||||
ItemPointerGetOffsetNumber(&(target->tid)));
|
||||
}
|
||||
|
||||
static void
|
||||
out_infobits(StringInfo buf, uint8 infobits)
|
||||
{
|
||||
@@ -41,23 +32,23 @@ out_infobits(StringInfo buf, uint8 infobits)
|
||||
}
|
||||
|
||||
void
|
||||
heap_desc(StringInfo buf, XLogRecord *record)
|
||||
heap_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
char *rec = XLogRecGetData(record);
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
info &= XLOG_HEAP_OPMASK;
|
||||
if (info == XLOG_HEAP_INSERT)
|
||||
{
|
||||
xl_heap_insert *xlrec = (xl_heap_insert *) rec;
|
||||
|
||||
out_target(buf, &(xlrec->target));
|
||||
appendStringInfo(buf, "off %u", xlrec->offnum);
|
||||
}
|
||||
else if (info == XLOG_HEAP_DELETE)
|
||||
{
|
||||
xl_heap_delete *xlrec = (xl_heap_delete *) rec;
|
||||
|
||||
out_target(buf, &(xlrec->target));
|
||||
appendStringInfo(buf, "off %u", xlrec->offnum);
|
||||
appendStringInfoChar(buf, ' ');
|
||||
out_infobits(buf, xlrec->infobits_set);
|
||||
}
|
||||
@@ -65,24 +56,24 @@ heap_desc(StringInfo buf, XLogRecord *record)
|
||||
{
|
||||
xl_heap_update *xlrec = (xl_heap_update *) rec;
|
||||
|
||||
out_target(buf, &(xlrec->target));
|
||||
appendStringInfo(buf, " xmax %u ", xlrec->old_xmax);
|
||||
appendStringInfo(buf, "off %u xmax %u",
|
||||
xlrec->old_offnum,
|
||||
xlrec->old_xmax);
|
||||
out_infobits(buf, xlrec->old_infobits_set);
|
||||
appendStringInfo(buf, "; new tid %u/%u xmax %u",
|
||||
ItemPointerGetBlockNumber(&(xlrec->newtid)),
|
||||
ItemPointerGetOffsetNumber(&(xlrec->newtid)),
|
||||
appendStringInfo(buf, "; new off %u xmax %u",
|
||||
xlrec->new_offnum,
|
||||
xlrec->new_xmax);
|
||||
}
|
||||
else if (info == XLOG_HEAP_HOT_UPDATE)
|
||||
{
|
||||
xl_heap_update *xlrec = (xl_heap_update *) rec;
|
||||
|
||||
out_target(buf, &(xlrec->target));
|
||||
appendStringInfo(buf, " xmax %u ", xlrec->old_xmax);
|
||||
appendStringInfo(buf, "off %u xmax %u",
|
||||
xlrec->old_offnum,
|
||||
xlrec->old_xmax);
|
||||
out_infobits(buf, xlrec->old_infobits_set);
|
||||
appendStringInfo(buf, "; new tid %u/%u xmax %u",
|
||||
ItemPointerGetBlockNumber(&(xlrec->newtid)),
|
||||
ItemPointerGetOffsetNumber(&(xlrec->newtid)),
|
||||
appendStringInfo(buf, "; new off %u xmax %u",
|
||||
xlrec->new_offnum,
|
||||
xlrec->new_xmax);
|
||||
}
|
||||
else if (info == XLOG_HEAP_LOCK)
|
||||
@@ -90,40 +81,34 @@ heap_desc(StringInfo buf, XLogRecord *record)
|
||||
xl_heap_lock *xlrec = (xl_heap_lock *) rec;
|
||||
|
||||
appendStringInfo(buf, "xid %u: ", xlrec->locking_xid);
|
||||
out_target(buf, &(xlrec->target));
|
||||
appendStringInfoChar(buf, ' ');
|
||||
appendStringInfo(buf, "off %u ", xlrec->offnum);
|
||||
out_infobits(buf, xlrec->infobits_set);
|
||||
}
|
||||
else if (info == XLOG_HEAP_INPLACE)
|
||||
{
|
||||
xl_heap_inplace *xlrec = (xl_heap_inplace *) rec;
|
||||
|
||||
out_target(buf, &(xlrec->target));
|
||||
appendStringInfo(buf, "off %u", xlrec->offnum);
|
||||
}
|
||||
}
|
||||
void
|
||||
heap2_desc(StringInfo buf, XLogRecord *record)
|
||||
heap2_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
char *rec = XLogRecGetData(record);
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
info &= XLOG_HEAP_OPMASK;
|
||||
if (info == XLOG_HEAP2_CLEAN)
|
||||
{
|
||||
xl_heap_clean *xlrec = (xl_heap_clean *) rec;
|
||||
|
||||
appendStringInfo(buf, "rel %u/%u/%u; blk %u remxid %u",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode, xlrec->block,
|
||||
xlrec->latestRemovedXid);
|
||||
appendStringInfo(buf, "remxid %u", xlrec->latestRemovedXid);
|
||||
}
|
||||
else if (info == XLOG_HEAP2_FREEZE_PAGE)
|
||||
{
|
||||
xl_heap_freeze_page *xlrec = (xl_heap_freeze_page *) rec;
|
||||
|
||||
appendStringInfo(buf, "rel %u/%u/%u; blk %u; cutoff xid %u ntuples %u",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode, xlrec->block,
|
||||
appendStringInfo(buf, "cutoff xid %u ntuples %u",
|
||||
xlrec->cutoff_xid, xlrec->ntuples);
|
||||
}
|
||||
else if (info == XLOG_HEAP2_CLEANUP_INFO)
|
||||
@@ -136,17 +121,13 @@ heap2_desc(StringInfo buf, XLogRecord *record)
|
||||
{
|
||||
xl_heap_visible *xlrec = (xl_heap_visible *) rec;
|
||||
|
||||
appendStringInfo(buf, "rel %u/%u/%u; blk %u",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode, xlrec->block);
|
||||
appendStringInfo(buf, "cutoff xid %u", xlrec->cutoff_xid);
|
||||
}
|
||||
else if (info == XLOG_HEAP2_MULTI_INSERT)
|
||||
{
|
||||
xl_heap_multi_insert *xlrec = (xl_heap_multi_insert *) rec;
|
||||
|
||||
appendStringInfo(buf, "rel %u/%u/%u; blk %u; %d tuples",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
|
||||
xlrec->blkno, xlrec->ntuples);
|
||||
appendStringInfo(buf, "%d tuples", xlrec->ntuples);
|
||||
}
|
||||
else if (info == XLOG_HEAP2_LOCK_UPDATED)
|
||||
{
|
||||
@@ -154,13 +135,18 @@ heap2_desc(StringInfo buf, XLogRecord *record)
|
||||
|
||||
appendStringInfo(buf, "xmax %u msk %04x; ", xlrec->xmax,
|
||||
xlrec->infobits_set);
|
||||
out_target(buf, &(xlrec->target));
|
||||
appendStringInfo(buf, "off %u", xlrec->offnum);
|
||||
}
|
||||
else if (info == XLOG_HEAP2_NEW_CID)
|
||||
{
|
||||
xl_heap_new_cid *xlrec = (xl_heap_new_cid *) rec;
|
||||
|
||||
out_target(buf, &(xlrec->target));
|
||||
appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
|
||||
xlrec->target_node.spcNode,
|
||||
xlrec->target_node.dbNode,
|
||||
xlrec->target_node.relNode,
|
||||
ItemPointerGetBlockNumber(&(xlrec->target_tid)),
|
||||
ItemPointerGetOffsetNumber(&(xlrec->target_tid)));
|
||||
appendStringInfo(buf, "; cmin: %u, cmax: %u, combo: %u",
|
||||
xlrec->cmin, xlrec->cmax, xlrec->combocid);
|
||||
}
|
||||
|
||||
@@ -47,10 +47,10 @@ out_member(StringInfo buf, MultiXactMember *member)
|
||||
}
|
||||
|
||||
void
|
||||
multixact_desc(StringInfo buf, XLogRecord *record)
|
||||
multixact_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
char *rec = XLogRecGetData(record);
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE ||
|
||||
info == XLOG_MULTIXACT_ZERO_MEM_PAGE)
|
||||
|
||||
@@ -16,20 +16,11 @@
|
||||
|
||||
#include "access/nbtree.h"
|
||||
|
||||
static void
|
||||
out_target(StringInfo buf, xl_btreetid *target)
|
||||
{
|
||||
appendStringInfo(buf, "rel %u/%u/%u; tid %u/%u",
|
||||
target->node.spcNode, target->node.dbNode, target->node.relNode,
|
||||
ItemPointerGetBlockNumber(&(target->tid)),
|
||||
ItemPointerGetOffsetNumber(&(target->tid)));
|
||||
}
|
||||
|
||||
void
|
||||
btree_desc(StringInfo buf, XLogRecord *record)
|
||||
btree_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
char *rec = XLogRecGetData(record);
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
switch (info)
|
||||
{
|
||||
@@ -39,7 +30,7 @@ btree_desc(StringInfo buf, XLogRecord *record)
|
||||
{
|
||||
xl_btree_insert *xlrec = (xl_btree_insert *) rec;
|
||||
|
||||
out_target(buf, &(xlrec->target));
|
||||
appendStringInfo(buf, "off %u", xlrec->offnum);
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_SPLIT_L:
|
||||
@@ -49,11 +40,7 @@ btree_desc(StringInfo buf, XLogRecord *record)
|
||||
{
|
||||
xl_btree_split *xlrec = (xl_btree_split *) rec;
|
||||
|
||||
appendStringInfo(buf, "rel %u/%u/%u ",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode);
|
||||
appendStringInfo(buf, "left %u, right %u, next %u, level %u, firstright %d",
|
||||
xlrec->leftsib, xlrec->rightsib, xlrec->rnext,
|
||||
appendStringInfo(buf, "level %u, firstright %d",
|
||||
xlrec->level, xlrec->firstright);
|
||||
break;
|
||||
}
|
||||
@@ -61,9 +48,7 @@ btree_desc(StringInfo buf, XLogRecord *record)
|
||||
{
|
||||
xl_btree_vacuum *xlrec = (xl_btree_vacuum *) rec;
|
||||
|
||||
appendStringInfo(buf, "rel %u/%u/%u; blk %u, lastBlockVacuumed %u",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode, xlrec->block,
|
||||
appendStringInfo(buf, "lastBlockVacuumed %u",
|
||||
xlrec->lastBlockVacuumed);
|
||||
break;
|
||||
}
|
||||
@@ -71,18 +56,14 @@ btree_desc(StringInfo buf, XLogRecord *record)
|
||||
{
|
||||
xl_btree_delete *xlrec = (xl_btree_delete *) rec;
|
||||
|
||||
appendStringInfo(buf, "index %u/%u/%u; iblk %u, heap %u/%u/%u;",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode,
|
||||
xlrec->block,
|
||||
xlrec->hnode.spcNode, xlrec->hnode.dbNode, xlrec->hnode.relNode);
|
||||
appendStringInfo(buf, "%d items", xlrec->nitems);
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_MARK_PAGE_HALFDEAD:
|
||||
{
|
||||
xl_btree_mark_page_halfdead *xlrec = (xl_btree_mark_page_halfdead *) rec;
|
||||
|
||||
out_target(buf, &(xlrec->target));
|
||||
appendStringInfo(buf, "; topparent %u; leaf %u; left %u; right %u",
|
||||
appendStringInfo(buf, "topparent %u; leaf %u; left %u; right %u",
|
||||
xlrec->topparent, xlrec->leafblk, xlrec->leftblk, xlrec->rightblk);
|
||||
break;
|
||||
}
|
||||
@@ -91,22 +72,19 @@ btree_desc(StringInfo buf, XLogRecord *record)
|
||||
{
|
||||
xl_btree_unlink_page *xlrec = (xl_btree_unlink_page *) rec;
|
||||
|
||||
appendStringInfo(buf, "rel %u/%u/%u; ",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode, xlrec->node.relNode);
|
||||
appendStringInfo(buf, "dead %u; left %u; right %u; btpo_xact %u; ",
|
||||
xlrec->deadblk, xlrec->leftsib, xlrec->rightsib, xlrec->btpo_xact);
|
||||
appendStringInfo(buf, "leaf %u; leafleft %u; leafright %u; topparent %u",
|
||||
xlrec->leafblk, xlrec->leafleftsib, xlrec->leafrightsib, xlrec->topparent);
|
||||
appendStringInfo(buf, "left %u; right %u; btpo_xact %u; ",
|
||||
xlrec->leftsib, xlrec->rightsib,
|
||||
xlrec->btpo_xact);
|
||||
appendStringInfo(buf, "leafleft %u; leafright %u; topparent %u",
|
||||
xlrec->leafleftsib, xlrec->leafrightsib,
|
||||
xlrec->topparent);
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_NEWROOT:
|
||||
{
|
||||
xl_btree_newroot *xlrec = (xl_btree_newroot *) rec;
|
||||
|
||||
appendStringInfo(buf, "rel %u/%u/%u; root %u lev %u",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode,
|
||||
xlrec->rootblk, xlrec->level);
|
||||
appendStringInfo(buf, "lev %u", xlrec->level);
|
||||
break;
|
||||
}
|
||||
case XLOG_BTREE_REUSE_PAGE:
|
||||
@@ -115,7 +93,7 @@ btree_desc(StringInfo buf, XLogRecord *record)
|
||||
|
||||
appendStringInfo(buf, "rel %u/%u/%u; latestRemovedXid %u",
|
||||
xlrec->node.spcNode, xlrec->node.dbNode,
|
||||
xlrec->node.relNode, xlrec->latestRemovedXid);
|
||||
xlrec->node.relNode, xlrec->latestRemovedXid);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,10 +17,10 @@
|
||||
#include "utils/relmapper.h"
|
||||
|
||||
void
|
||||
relmap_desc(StringInfo buf, XLogRecord *record)
|
||||
relmap_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
char *rec = XLogRecGetData(record);
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
if (info == XLOG_RELMAP_UPDATE)
|
||||
{
|
||||
|
||||
@@ -18,10 +18,10 @@
|
||||
|
||||
|
||||
void
|
||||
seq_desc(StringInfo buf, XLogRecord *record)
|
||||
seq_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
char *rec = XLogRecGetData(record);
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
xl_seq_rec *xlrec = (xl_seq_rec *) rec;
|
||||
|
||||
if (info == XLOG_SEQ_LOG)
|
||||
|
||||
@@ -19,10 +19,10 @@
|
||||
|
||||
|
||||
void
|
||||
smgr_desc(StringInfo buf, XLogRecord *record)
|
||||
smgr_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
char *rec = XLogRecGetData(record);
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
if (info == XLOG_SMGR_CREATE)
|
||||
{
|
||||
|
||||
@@ -16,70 +16,66 @@
|
||||
|
||||
#include "access/spgist_private.h"
|
||||
|
||||
static void
|
||||
out_target(StringInfo buf, RelFileNode node)
|
||||
{
|
||||
appendStringInfo(buf, "rel %u/%u/%u ",
|
||||
node.spcNode, node.dbNode, node.relNode);
|
||||
}
|
||||
|
||||
void
|
||||
spg_desc(StringInfo buf, XLogRecord *record)
|
||||
spg_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
char *rec = XLogRecGetData(record);
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
switch (info)
|
||||
{
|
||||
case XLOG_SPGIST_CREATE_INDEX:
|
||||
appendStringInfo(buf, "rel %u/%u/%u",
|
||||
((RelFileNode *) rec)->spcNode,
|
||||
((RelFileNode *) rec)->dbNode,
|
||||
((RelFileNode *) rec)->relNode);
|
||||
break;
|
||||
case XLOG_SPGIST_ADD_LEAF:
|
||||
out_target(buf, ((spgxlogAddLeaf *) rec)->node);
|
||||
appendStringInfo(buf, "%u",
|
||||
((spgxlogAddLeaf *) rec)->blknoLeaf);
|
||||
{
|
||||
spgxlogAddLeaf *xlrec = (spgxlogAddLeaf *) rec;
|
||||
|
||||
appendStringInfo(buf, "add leaf to page");
|
||||
appendStringInfo(buf, "; off %u; headoff %u; parentoff %u",
|
||||
xlrec->offnumLeaf, xlrec->offnumHeadLeaf,
|
||||
xlrec->offnumParent);
|
||||
if (xlrec->newPage)
|
||||
appendStringInfo(buf, " (newpage)");
|
||||
if (xlrec->storesNulls)
|
||||
appendStringInfo(buf, " (nulls)");
|
||||
}
|
||||
break;
|
||||
case XLOG_SPGIST_MOVE_LEAFS:
|
||||
out_target(buf, ((spgxlogMoveLeafs *) rec)->node);
|
||||
appendStringInfo(buf, "%u leafs from page %u to page %u",
|
||||
((spgxlogMoveLeafs *) rec)->nMoves,
|
||||
((spgxlogMoveLeafs *) rec)->blknoSrc,
|
||||
((spgxlogMoveLeafs *) rec)->blknoDst);
|
||||
appendStringInfo(buf, "%u leafs",
|
||||
((spgxlogMoveLeafs *) rec)->nMoves);
|
||||
break;
|
||||
case XLOG_SPGIST_ADD_NODE:
|
||||
out_target(buf, ((spgxlogAddNode *) rec)->node);
|
||||
appendStringInfo(buf, "%u:%u",
|
||||
((spgxlogAddNode *) rec)->blkno,
|
||||
appendStringInfo(buf, "off %u",
|
||||
((spgxlogAddNode *) rec)->offnum);
|
||||
break;
|
||||
case XLOG_SPGIST_SPLIT_TUPLE:
|
||||
out_target(buf, ((spgxlogSplitTuple *) rec)->node);
|
||||
appendStringInfo(buf, "%u:%u to %u:%u",
|
||||
((spgxlogSplitTuple *) rec)->blknoPrefix,
|
||||
appendStringInfo(buf, "prefix off: %u, postfix off: %u (same %d, new %d)",
|
||||
((spgxlogSplitTuple *) rec)->offnumPrefix,
|
||||
((spgxlogSplitTuple *) rec)->blknoPostfix,
|
||||
((spgxlogSplitTuple *) rec)->offnumPostfix);
|
||||
((spgxlogSplitTuple *) rec)->offnumPostfix,
|
||||
((spgxlogSplitTuple *) rec)->postfixBlkSame,
|
||||
((spgxlogSplitTuple *) rec)->newPage
|
||||
);
|
||||
break;
|
||||
case XLOG_SPGIST_PICKSPLIT:
|
||||
out_target(buf, ((spgxlogPickSplit *) rec)->node);
|
||||
{
|
||||
spgxlogPickSplit *xlrec = (spgxlogPickSplit *) rec;
|
||||
|
||||
appendStringInfo(buf, "ndel %u; nins %u",
|
||||
xlrec->nDelete, xlrec->nInsert);
|
||||
if (xlrec->innerIsParent)
|
||||
appendStringInfo(buf, " (innerIsParent)");
|
||||
if (xlrec->isRootSplit)
|
||||
appendStringInfo(buf, " (isRootSplit)");
|
||||
}
|
||||
break;
|
||||
case XLOG_SPGIST_VACUUM_LEAF:
|
||||
out_target(buf, ((spgxlogVacuumLeaf *) rec)->node);
|
||||
appendStringInfo(buf, "page %u",
|
||||
((spgxlogVacuumLeaf *) rec)->blkno);
|
||||
/* no further information */
|
||||
break;
|
||||
case XLOG_SPGIST_VACUUM_ROOT:
|
||||
out_target(buf, ((spgxlogVacuumRoot *) rec)->node);
|
||||
appendStringInfo(buf, "page %u",
|
||||
((spgxlogVacuumRoot *) rec)->blkno);
|
||||
/* no further information */
|
||||
break;
|
||||
case XLOG_SPGIST_VACUUM_REDIRECT:
|
||||
out_target(buf, ((spgxlogVacuumRedirect *) rec)->node);
|
||||
appendStringInfo(buf, "page %u, newest XID %u",
|
||||
((spgxlogVacuumRedirect *) rec)->blkno,
|
||||
appendStringInfo(buf, "newest XID %u",
|
||||
((spgxlogVacuumRedirect *) rec)->newestRedirectXid);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -37,10 +37,10 @@ standby_desc_running_xacts(StringInfo buf, xl_running_xacts *xlrec)
|
||||
}
|
||||
|
||||
void
|
||||
standby_desc(StringInfo buf, XLogRecord *record)
|
||||
standby_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
char *rec = XLogRecGetData(record);
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
if (info == XLOG_STANDBY_LOCK)
|
||||
{
|
||||
|
||||
@@ -18,10 +18,10 @@
|
||||
|
||||
|
||||
void
|
||||
tblspc_desc(StringInfo buf, XLogRecord *record)
|
||||
tblspc_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
char *rec = XLogRecGetData(record);
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
if (info == XLOG_TBLSPC_CREATE)
|
||||
{
|
||||
|
||||
@@ -137,10 +137,10 @@ xact_desc_assignment(StringInfo buf, xl_xact_assignment *xlrec)
|
||||
}
|
||||
|
||||
void
|
||||
xact_desc(StringInfo buf, XLogRecord *record)
|
||||
xact_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
char *rec = XLogRecGetData(record);
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
if (info == XLOG_XACT_COMMIT_COMPACT)
|
||||
{
|
||||
|
||||
@@ -32,10 +32,10 @@ const struct config_enum_entry wal_level_options[] = {
|
||||
};
|
||||
|
||||
void
|
||||
xlog_desc(StringInfo buf, XLogRecord *record)
|
||||
xlog_desc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
char *rec = XLogRecGetData(record);
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
if (info == XLOG_CHECKPOINT_SHUTDOWN ||
|
||||
info == XLOG_CHECKPOINT_ONLINE)
|
||||
@@ -76,11 +76,7 @@ xlog_desc(StringInfo buf, XLogRecord *record)
|
||||
}
|
||||
else if (info == XLOG_FPI)
|
||||
{
|
||||
BkpBlock *bkp = (BkpBlock *) rec;
|
||||
|
||||
appendStringInfo(buf, "%s block %u",
|
||||
relpathperm(bkp->node, bkp->fork),
|
||||
bkp->block);
|
||||
/* no further information to print */
|
||||
}
|
||||
else if (info == XLOG_BACKUP_END)
|
||||
{
|
||||
|
||||
@@ -16,8 +16,8 @@
|
||||
#include "postgres.h"
|
||||
|
||||
#include "access/genam.h"
|
||||
#include "access/xloginsert.h"
|
||||
#include "access/spgist_private.h"
|
||||
#include "access/xloginsert.h"
|
||||
#include "miscadmin.h"
|
||||
#include "storage/bufmgr.h"
|
||||
#include "utils/rel.h"
|
||||
@@ -202,25 +202,17 @@ static void
|
||||
addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
|
||||
SPPageDesc *current, SPPageDesc *parent, bool isNulls, bool isNew)
|
||||
{
|
||||
XLogRecData rdata[4];
|
||||
spgxlogAddLeaf xlrec;
|
||||
|
||||
xlrec.node = index->rd_node;
|
||||
xlrec.blknoLeaf = current->blkno;
|
||||
xlrec.newPage = isNew;
|
||||
xlrec.storesNulls = isNulls;
|
||||
|
||||
/* these will be filled below as needed */
|
||||
xlrec.offnumLeaf = InvalidOffsetNumber;
|
||||
xlrec.offnumHeadLeaf = InvalidOffsetNumber;
|
||||
xlrec.blknoParent = InvalidBlockNumber;
|
||||
xlrec.offnumParent = InvalidOffsetNumber;
|
||||
xlrec.nodeI = 0;
|
||||
|
||||
ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
|
||||
ACCEPT_RDATA_DATA(leafTuple, leafTuple->size, 1);
|
||||
ACCEPT_RDATA_BUFFER(current->buffer, 2);
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
if (current->offnum == InvalidOffsetNumber ||
|
||||
@@ -237,13 +229,10 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
|
||||
/* Must update parent's downlink if any */
|
||||
if (parent->buffer != InvalidBuffer)
|
||||
{
|
||||
xlrec.blknoParent = parent->blkno;
|
||||
xlrec.offnumParent = parent->offnum;
|
||||
xlrec.nodeI = parent->node;
|
||||
|
||||
saveNodeLink(index, parent, current->blkno, current->offnum);
|
||||
|
||||
ACCEPT_RDATA_BUFFER(parent->buffer, 3);
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -303,12 +292,20 @@ addLeafTuple(Relation index, SpGistState *state, SpGistLeafTuple leafTuple,
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_LEAF, rdata);
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &xlrec, sizeof(xlrec));
|
||||
XLogRegisterData((char *) leafTuple, leafTuple->size);
|
||||
|
||||
XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
|
||||
if (xlrec.offnumParent != InvalidOffsetNumber)
|
||||
XLogRegisterBuffer(1, parent->buffer, REGBUF_STANDARD);
|
||||
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_LEAF);
|
||||
|
||||
PageSetLSN(current->page, recptr);
|
||||
|
||||
/* update parent only if we actually changed it */
|
||||
if (xlrec.blknoParent != InvalidBlockNumber)
|
||||
if (xlrec.offnumParent != InvalidOffsetNumber)
|
||||
{
|
||||
PageSetLSN(parent->page, recptr);
|
||||
}
|
||||
@@ -399,7 +396,6 @@ moveLeafs(Relation index, SpGistState *state,
|
||||
OffsetNumber *toDelete;
|
||||
OffsetNumber *toInsert;
|
||||
BlockNumber nblkno;
|
||||
XLogRecData rdata[7];
|
||||
spgxlogMoveLeafs xlrec;
|
||||
char *leafdata,
|
||||
*leafptr;
|
||||
@@ -455,20 +451,6 @@ moveLeafs(Relation index, SpGistState *state,
|
||||
nblkno = BufferGetBlockNumber(nbuf);
|
||||
Assert(nblkno != current->blkno);
|
||||
|
||||
/* prepare WAL info */
|
||||
xlrec.node = index->rd_node;
|
||||
STORE_STATE(state, xlrec.stateSrc);
|
||||
|
||||
xlrec.blknoSrc = current->blkno;
|
||||
xlrec.blknoDst = nblkno;
|
||||
xlrec.nMoves = nDelete;
|
||||
xlrec.replaceDead = replaceDead;
|
||||
xlrec.storesNulls = isNulls;
|
||||
|
||||
xlrec.blknoParent = parent->blkno;
|
||||
xlrec.offnumParent = parent->offnum;
|
||||
xlrec.nodeI = parent->node;
|
||||
|
||||
leafdata = leafptr = palloc(size);
|
||||
|
||||
START_CRIT_SECTION();
|
||||
@@ -533,15 +515,29 @@ moveLeafs(Relation index, SpGistState *state,
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
|
||||
ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogMoveLeafs, 0);
|
||||
ACCEPT_RDATA_DATA(toDelete, sizeof(OffsetNumber) * nDelete, 1);
|
||||
ACCEPT_RDATA_DATA(toInsert, sizeof(OffsetNumber) * nInsert, 2);
|
||||
ACCEPT_RDATA_DATA(leafdata, leafptr - leafdata, 3);
|
||||
ACCEPT_RDATA_BUFFER(current->buffer, 4);
|
||||
ACCEPT_RDATA_BUFFER(nbuf, 5);
|
||||
ACCEPT_RDATA_BUFFER(parent->buffer, 6);
|
||||
/* prepare WAL info */
|
||||
STORE_STATE(state, xlrec.stateSrc);
|
||||
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_MOVE_LEAFS, rdata);
|
||||
xlrec.nMoves = nDelete;
|
||||
xlrec.replaceDead = replaceDead;
|
||||
xlrec.storesNulls = isNulls;
|
||||
|
||||
xlrec.offnumParent = parent->offnum;
|
||||
xlrec.nodeI = parent->node;
|
||||
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &xlrec, SizeOfSpgxlogMoveLeafs);
|
||||
XLogRegisterData((char *) toDelete,
|
||||
sizeof(OffsetNumber) * nDelete);
|
||||
XLogRegisterData((char *) toInsert,
|
||||
sizeof(OffsetNumber) * nInsert);
|
||||
XLogRegisterData((char *) leafdata, leafptr - leafdata);
|
||||
|
||||
XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
|
||||
XLogRegisterBuffer(1, nbuf, REGBUF_STANDARD | (xlrec.newPage ? REGBUF_WILL_INIT : 0));
|
||||
XLogRegisterBuffer(2, parent->buffer, REGBUF_STANDARD);
|
||||
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_MOVE_LEAFS);
|
||||
|
||||
PageSetLSN(current->page, recptr);
|
||||
PageSetLSN(npage, recptr);
|
||||
@@ -701,8 +697,6 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
int currentFreeSpace;
|
||||
int totalLeafSizes;
|
||||
bool allTheSame;
|
||||
XLogRecData rdata[10];
|
||||
int nRdata;
|
||||
spgxlogPickSplit xlrec;
|
||||
char *leafdata,
|
||||
*leafptr;
|
||||
@@ -725,7 +719,6 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
newLeafs = (SpGistLeafTuple *) palloc(sizeof(SpGistLeafTuple) * n);
|
||||
leafPageSelect = (uint8 *) palloc(sizeof(uint8) * n);
|
||||
|
||||
xlrec.node = index->rd_node;
|
||||
STORE_STATE(state, xlrec.stateSrc);
|
||||
|
||||
/*
|
||||
@@ -971,10 +964,6 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
}
|
||||
|
||||
/*
|
||||
* Because a WAL record can't involve more than four buffers, we can only
|
||||
* afford to deal with two leaf pages in each picksplit action, ie the
|
||||
* current page and at most one other.
|
||||
*
|
||||
* The new leaf tuples converted from the existing ones should require the
|
||||
* same or less space, and therefore should all fit onto one page
|
||||
* (although that's not necessarily the current page, since we can't
|
||||
@@ -1108,17 +1097,13 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
}
|
||||
|
||||
/* Start preparing WAL record */
|
||||
xlrec.blknoSrc = current->blkno;
|
||||
xlrec.blknoDest = InvalidBlockNumber;
|
||||
xlrec.nDelete = 0;
|
||||
xlrec.initSrc = isNew;
|
||||
xlrec.storesNulls = isNulls;
|
||||
xlrec.isRootSplit = SpGistBlockIsRoot(current->blkno);
|
||||
|
||||
leafdata = leafptr = (char *) palloc(totalLeafSizes);
|
||||
|
||||
ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogPickSplit, 0);
|
||||
nRdata = 1;
|
||||
|
||||
/* Here we begin making the changes to the target pages */
|
||||
START_CRIT_SECTION();
|
||||
|
||||
@@ -1150,12 +1135,6 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
else
|
||||
{
|
||||
xlrec.nDelete = nToDelete;
|
||||
ACCEPT_RDATA_DATA(toDelete,
|
||||
sizeof(OffsetNumber) * nToDelete,
|
||||
nRdata);
|
||||
nRdata++;
|
||||
ACCEPT_RDATA_BUFFER(current->buffer, nRdata);
|
||||
nRdata++;
|
||||
|
||||
if (!state->isBuild)
|
||||
{
|
||||
@@ -1240,25 +1219,8 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
if (newLeafBuffer != InvalidBuffer)
|
||||
{
|
||||
MarkBufferDirty(newLeafBuffer);
|
||||
/* also save block number for WAL */
|
||||
xlrec.blknoDest = BufferGetBlockNumber(newLeafBuffer);
|
||||
if (!xlrec.initDest)
|
||||
{
|
||||
ACCEPT_RDATA_BUFFER(newLeafBuffer, nRdata);
|
||||
nRdata++;
|
||||
}
|
||||
}
|
||||
|
||||
xlrec.nInsert = nToInsert;
|
||||
ACCEPT_RDATA_DATA(toInsert, sizeof(OffsetNumber) * nToInsert, nRdata);
|
||||
nRdata++;
|
||||
ACCEPT_RDATA_DATA(leafPageSelect, sizeof(uint8) * nToInsert, nRdata);
|
||||
nRdata++;
|
||||
ACCEPT_RDATA_DATA(innerTuple, innerTuple->size, nRdata);
|
||||
nRdata++;
|
||||
ACCEPT_RDATA_DATA(leafdata, leafptr - leafdata, nRdata);
|
||||
nRdata++;
|
||||
|
||||
/* Remember current buffer, since we're about to change "current" */
|
||||
saveCurrent = *current;
|
||||
|
||||
@@ -1276,7 +1238,6 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
current->blkno = parent->blkno;
|
||||
current->buffer = parent->buffer;
|
||||
current->page = parent->page;
|
||||
xlrec.blknoInner = current->blkno;
|
||||
xlrec.offnumInner = current->offnum =
|
||||
SpGistPageAddNewItem(state, current->page,
|
||||
(Item) innerTuple, innerTuple->size,
|
||||
@@ -1285,14 +1246,11 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
/*
|
||||
* Update parent node link and mark parent page dirty
|
||||
*/
|
||||
xlrec.blknoParent = parent->blkno;
|
||||
xlrec.innerIsParent = true;
|
||||
xlrec.offnumParent = parent->offnum;
|
||||
xlrec.nodeI = parent->node;
|
||||
saveNodeLink(index, parent, current->blkno, current->offnum);
|
||||
|
||||
ACCEPT_RDATA_BUFFER(parent->buffer, nRdata);
|
||||
nRdata++;
|
||||
|
||||
/*
|
||||
* Update redirection link (in old current buffer)
|
||||
*/
|
||||
@@ -1314,7 +1272,6 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
current->buffer = newInnerBuffer;
|
||||
current->blkno = BufferGetBlockNumber(current->buffer);
|
||||
current->page = BufferGetPage(current->buffer);
|
||||
xlrec.blknoInner = current->blkno;
|
||||
xlrec.offnumInner = current->offnum =
|
||||
SpGistPageAddNewItem(state, current->page,
|
||||
(Item) innerTuple, innerTuple->size,
|
||||
@@ -1326,16 +1283,11 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
/*
|
||||
* Update parent node link and mark parent page dirty
|
||||
*/
|
||||
xlrec.blknoParent = parent->blkno;
|
||||
xlrec.innerIsParent = (parent->buffer == current->buffer);
|
||||
xlrec.offnumParent = parent->offnum;
|
||||
xlrec.nodeI = parent->node;
|
||||
saveNodeLink(index, parent, current->blkno, current->offnum);
|
||||
|
||||
ACCEPT_RDATA_BUFFER(current->buffer, nRdata);
|
||||
nRdata++;
|
||||
ACCEPT_RDATA_BUFFER(parent->buffer, nRdata);
|
||||
nRdata++;
|
||||
|
||||
/*
|
||||
* Update redirection link (in old current buffer)
|
||||
*/
|
||||
@@ -1357,8 +1309,8 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
|
||||
SpGistInitBuffer(current->buffer, (isNulls ? SPGIST_NULLS : 0));
|
||||
xlrec.initInner = true;
|
||||
xlrec.innerIsParent = false;
|
||||
|
||||
xlrec.blknoInner = current->blkno;
|
||||
xlrec.offnumInner = current->offnum =
|
||||
PageAddItem(current->page, (Item) innerTuple, innerTuple->size,
|
||||
InvalidOffsetNumber, false, false);
|
||||
@@ -1367,7 +1319,6 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
innerTuple->size);
|
||||
|
||||
/* No parent link to update, nor redirection to do */
|
||||
xlrec.blknoParent = InvalidBlockNumber;
|
||||
xlrec.offnumParent = InvalidOffsetNumber;
|
||||
xlrec.nodeI = 0;
|
||||
|
||||
@@ -1381,9 +1332,46 @@ doPickSplit(Relation index, SpGistState *state,
|
||||
if (RelationNeedsWAL(index))
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
int flags;
|
||||
|
||||
XLogBeginInsert();
|
||||
|
||||
xlrec.nInsert = nToInsert;
|
||||
XLogRegisterData((char *) &xlrec, SizeOfSpgxlogPickSplit);
|
||||
|
||||
XLogRegisterData((char *) toDelete,
|
||||
sizeof(OffsetNumber) * xlrec.nDelete);
|
||||
XLogRegisterData((char *) toInsert,
|
||||
sizeof(OffsetNumber) * xlrec.nInsert);
|
||||
XLogRegisterData((char *) leafPageSelect,
|
||||
sizeof(uint8) * xlrec.nInsert);
|
||||
XLogRegisterData((char *) innerTuple, innerTuple->size);
|
||||
XLogRegisterData(leafdata, leafptr - leafdata);
|
||||
|
||||
flags = REGBUF_STANDARD;
|
||||
if (xlrec.initSrc)
|
||||
flags |= REGBUF_WILL_INIT;
|
||||
if (BufferIsValid(saveCurrent.buffer))
|
||||
XLogRegisterBuffer(0, saveCurrent.buffer, flags);
|
||||
|
||||
if (BufferIsValid(newLeafBuffer))
|
||||
{
|
||||
flags = REGBUF_STANDARD;
|
||||
if (xlrec.initDest)
|
||||
flags |= REGBUF_WILL_INIT;
|
||||
XLogRegisterBuffer(1, newLeafBuffer, flags);
|
||||
}
|
||||
XLogRegisterBuffer(2, current->buffer, REGBUF_STANDARD);
|
||||
if (parent->buffer != InvalidBuffer)
|
||||
{
|
||||
if (parent->buffer != current->buffer)
|
||||
XLogRegisterBuffer(3, parent->buffer, REGBUF_STANDARD);
|
||||
else
|
||||
Assert(xlrec.innerIsParent);
|
||||
}
|
||||
|
||||
/* Issue the WAL record */
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_PICKSPLIT, rdata);
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_PICKSPLIT);
|
||||
|
||||
/* Update page LSNs on all affected pages */
|
||||
if (newLeafBuffer != InvalidBuffer)
|
||||
@@ -1489,7 +1477,6 @@ spgAddNodeAction(Relation index, SpGistState *state,
|
||||
int nodeN, Datum nodeLabel)
|
||||
{
|
||||
SpGistInnerTuple newInnerTuple;
|
||||
XLogRecData rdata[5];
|
||||
spgxlogAddNode xlrec;
|
||||
|
||||
/* Should not be applied to nulls */
|
||||
@@ -1499,25 +1486,18 @@ spgAddNodeAction(Relation index, SpGistState *state,
|
||||
newInnerTuple = addNode(state, innerTuple, nodeLabel, nodeN);
|
||||
|
||||
/* Prepare WAL record */
|
||||
xlrec.node = index->rd_node;
|
||||
STORE_STATE(state, xlrec.stateSrc);
|
||||
xlrec.blkno = current->blkno;
|
||||
xlrec.offnum = current->offnum;
|
||||
|
||||
/* we don't fill these unless we need to change the parent downlink */
|
||||
xlrec.blknoParent = InvalidBlockNumber;
|
||||
xlrec.parentBlk = -1;
|
||||
xlrec.offnumParent = InvalidOffsetNumber;
|
||||
xlrec.nodeI = 0;
|
||||
|
||||
/* we don't fill these unless tuple has to be moved */
|
||||
xlrec.blknoNew = InvalidBlockNumber;
|
||||
xlrec.offnumNew = InvalidOffsetNumber;
|
||||
xlrec.newPage = false;
|
||||
|
||||
ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
|
||||
ACCEPT_RDATA_DATA(newInnerTuple, newInnerTuple->size, 1);
|
||||
ACCEPT_RDATA_BUFFER(current->buffer, 2);
|
||||
|
||||
if (PageGetExactFreeSpace(current->page) >=
|
||||
newInnerTuple->size - innerTuple->size)
|
||||
{
|
||||
@@ -1539,7 +1519,13 @@ spgAddNodeAction(Relation index, SpGistState *state,
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE, rdata);
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &xlrec, sizeof(xlrec));
|
||||
XLogRegisterData((char *) newInnerTuple, newInnerTuple->size);
|
||||
|
||||
XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
|
||||
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE);
|
||||
|
||||
PageSetLSN(current->page, recptr);
|
||||
}
|
||||
@@ -1565,7 +1551,6 @@ spgAddNodeAction(Relation index, SpGistState *state,
|
||||
|
||||
saveCurrent = *current;
|
||||
|
||||
xlrec.blknoParent = parent->blkno;
|
||||
xlrec.offnumParent = parent->offnum;
|
||||
xlrec.nodeI = parent->node;
|
||||
|
||||
@@ -1580,8 +1565,6 @@ spgAddNodeAction(Relation index, SpGistState *state,
|
||||
current->blkno = BufferGetBlockNumber(current->buffer);
|
||||
current->page = BufferGetPage(current->buffer);
|
||||
|
||||
xlrec.blknoNew = current->blkno;
|
||||
|
||||
/*
|
||||
* Let's just make real sure new current isn't same as old. Right now
|
||||
* that's impossible, but if SpGistGetBuffer ever got smart enough to
|
||||
@@ -1590,17 +1573,19 @@ spgAddNodeAction(Relation index, SpGistState *state,
|
||||
* replay would be subtly wrong, so I think a mere assert isn't enough
|
||||
* here.
|
||||
*/
|
||||
if (xlrec.blknoNew == xlrec.blkno)
|
||||
if (current->blkno == saveCurrent.blkno)
|
||||
elog(ERROR, "SPGiST new buffer shouldn't be same as old buffer");
|
||||
|
||||
/*
|
||||
* New current and parent buffer will both be modified; but note that
|
||||
* parent buffer could be same as either new or old current.
|
||||
*/
|
||||
ACCEPT_RDATA_BUFFER(current->buffer, 3);
|
||||
if (parent->buffer != current->buffer &&
|
||||
parent->buffer != saveCurrent.buffer)
|
||||
ACCEPT_RDATA_BUFFER(parent->buffer, 4);
|
||||
if (parent->buffer == saveCurrent.buffer)
|
||||
xlrec.parentBlk = 0;
|
||||
else if (parent->buffer == current->buffer)
|
||||
xlrec.parentBlk = 1;
|
||||
else
|
||||
xlrec.parentBlk = 2;
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
@@ -1647,7 +1632,20 @@ spgAddNodeAction(Relation index, SpGistState *state,
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE, rdata);
|
||||
XLogBeginInsert();
|
||||
|
||||
/* orig page */
|
||||
XLogRegisterBuffer(0, saveCurrent.buffer, REGBUF_STANDARD);
|
||||
/* new page */
|
||||
XLogRegisterBuffer(1, current->buffer, REGBUF_STANDARD);
|
||||
/* parent page (if different from orig and new) */
|
||||
if (xlrec.parentBlk == 2)
|
||||
XLogRegisterBuffer(2, parent->buffer, REGBUF_STANDARD);
|
||||
|
||||
XLogRegisterData((char *) &xlrec, sizeof(xlrec));
|
||||
XLogRegisterData((char *) newInnerTuple, newInnerTuple->size);
|
||||
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_ADD_NODE);
|
||||
|
||||
/* we don't bother to check if any of these are redundant */
|
||||
PageSetLSN(current->page, recptr);
|
||||
@@ -1682,7 +1680,6 @@ spgSplitNodeAction(Relation index, SpGistState *state,
|
||||
BlockNumber postfixBlkno;
|
||||
OffsetNumber postfixOffset;
|
||||
int i;
|
||||
XLogRecData rdata[5];
|
||||
spgxlogSplitTuple xlrec;
|
||||
Buffer newBuffer = InvalidBuffer;
|
||||
|
||||
@@ -1725,14 +1722,8 @@ spgSplitNodeAction(Relation index, SpGistState *state,
|
||||
postfixTuple->allTheSame = innerTuple->allTheSame;
|
||||
|
||||
/* prep data for WAL record */
|
||||
xlrec.node = index->rd_node;
|
||||
xlrec.newPage = false;
|
||||
|
||||
ACCEPT_RDATA_DATA(&xlrec, sizeof(xlrec), 0);
|
||||
ACCEPT_RDATA_DATA(prefixTuple, prefixTuple->size, 1);
|
||||
ACCEPT_RDATA_DATA(postfixTuple, postfixTuple->size, 2);
|
||||
ACCEPT_RDATA_BUFFER(current->buffer, 3);
|
||||
|
||||
/*
|
||||
* If we can't fit both tuples on the current page, get a new page for the
|
||||
* postfix tuple. In particular, can't split to the root page.
|
||||
@@ -1752,7 +1743,6 @@ spgSplitNodeAction(Relation index, SpGistState *state,
|
||||
GBUF_INNER_PARITY(current->blkno + 1),
|
||||
postfixTuple->size + sizeof(ItemIdData),
|
||||
&xlrec.newPage);
|
||||
ACCEPT_RDATA_BUFFER(newBuffer, 4);
|
||||
}
|
||||
|
||||
START_CRIT_SECTION();
|
||||
@@ -1767,27 +1757,28 @@ spgSplitNodeAction(Relation index, SpGistState *state,
|
||||
if (xlrec.offnumPrefix != current->offnum)
|
||||
elog(ERROR, "failed to add item of size %u to SPGiST index page",
|
||||
prefixTuple->size);
|
||||
xlrec.blknoPrefix = current->blkno;
|
||||
|
||||
/*
|
||||
* put postfix tuple into appropriate page
|
||||
*/
|
||||
if (newBuffer == InvalidBuffer)
|
||||
{
|
||||
xlrec.blknoPostfix = postfixBlkno = current->blkno;
|
||||
postfixBlkno = current->blkno;
|
||||
xlrec.offnumPostfix = postfixOffset =
|
||||
SpGistPageAddNewItem(state, current->page,
|
||||
(Item) postfixTuple, postfixTuple->size,
|
||||
NULL, false);
|
||||
xlrec.postfixBlkSame = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
xlrec.blknoPostfix = postfixBlkno = BufferGetBlockNumber(newBuffer);
|
||||
postfixBlkno = BufferGetBlockNumber(newBuffer);
|
||||
xlrec.offnumPostfix = postfixOffset =
|
||||
SpGistPageAddNewItem(state, BufferGetPage(newBuffer),
|
||||
(Item) postfixTuple, postfixTuple->size,
|
||||
NULL, false);
|
||||
MarkBufferDirty(newBuffer);
|
||||
xlrec.postfixBlkSame = false;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1808,7 +1799,23 @@ spgSplitNodeAction(Relation index, SpGistState *state,
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_SPLIT_TUPLE, rdata);
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &xlrec, sizeof(xlrec));
|
||||
XLogRegisterData((char *) prefixTuple, prefixTuple->size);
|
||||
XLogRegisterData((char *) postfixTuple, postfixTuple->size);
|
||||
|
||||
XLogRegisterBuffer(0, current->buffer, REGBUF_STANDARD);
|
||||
if (newBuffer != InvalidBuffer)
|
||||
{
|
||||
int flags;
|
||||
|
||||
flags = REGBUF_STANDARD;
|
||||
if (xlrec.newPage)
|
||||
flags |= REGBUF_WILL_INIT;
|
||||
XLogRegisterBuffer(1, newBuffer, flags);
|
||||
}
|
||||
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_SPLIT_TUPLE);
|
||||
|
||||
PageSetLSN(current->page, recptr);
|
||||
|
||||
|
||||
@@ -105,15 +105,18 @@ spgbuild(PG_FUNCTION_ARGS)
|
||||
if (RelationNeedsWAL(index))
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
XLogRecData rdata;
|
||||
|
||||
/* WAL data is just the relfilenode */
|
||||
rdata.data = (char *) &(index->rd_node);
|
||||
rdata.len = sizeof(RelFileNode);
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.next = NULL;
|
||||
XLogBeginInsert();
|
||||
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX, &rdata);
|
||||
/*
|
||||
* Replay will re-initialize the pages, so don't take full pages
|
||||
* images. No other data to log.
|
||||
*/
|
||||
XLogRegisterBuffer(0, metabuffer, REGBUF_WILL_INIT);
|
||||
XLogRegisterBuffer(1, rootbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
|
||||
XLogRegisterBuffer(2, nullbuffer, REGBUF_WILL_INIT | REGBUF_STANDARD);
|
||||
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_CREATE_INDEX);
|
||||
|
||||
PageSetLSN(BufferGetPage(metabuffer), recptr);
|
||||
PageSetLSN(BufferGetPage(rootbuffer), recptr);
|
||||
|
||||
@@ -127,7 +127,6 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
|
||||
{
|
||||
Page page = BufferGetPage(buffer);
|
||||
spgxlogVacuumLeaf xlrec;
|
||||
XLogRecData rdata[8];
|
||||
OffsetNumber toDead[MaxIndexTuplesPerPage];
|
||||
OffsetNumber toPlaceholder[MaxIndexTuplesPerPage];
|
||||
OffsetNumber moveSrc[MaxIndexTuplesPerPage];
|
||||
@@ -323,20 +322,6 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
|
||||
if (nDeletable != xlrec.nDead + xlrec.nPlaceholder + xlrec.nMove)
|
||||
elog(ERROR, "inconsistent counts of deletable tuples");
|
||||
|
||||
/* Prepare WAL record */
|
||||
xlrec.node = index->rd_node;
|
||||
xlrec.blkno = BufferGetBlockNumber(buffer);
|
||||
STORE_STATE(&bds->spgstate, xlrec.stateSrc);
|
||||
|
||||
ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumLeaf, 0);
|
||||
ACCEPT_RDATA_DATA(toDead, sizeof(OffsetNumber) * xlrec.nDead, 1);
|
||||
ACCEPT_RDATA_DATA(toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder, 2);
|
||||
ACCEPT_RDATA_DATA(moveSrc, sizeof(OffsetNumber) * xlrec.nMove, 3);
|
||||
ACCEPT_RDATA_DATA(moveDest, sizeof(OffsetNumber) * xlrec.nMove, 4);
|
||||
ACCEPT_RDATA_DATA(chainSrc, sizeof(OffsetNumber) * xlrec.nChain, 5);
|
||||
ACCEPT_RDATA_DATA(chainDest, sizeof(OffsetNumber) * xlrec.nChain, 6);
|
||||
ACCEPT_RDATA_BUFFER(buffer, 7);
|
||||
|
||||
/* Do the updates */
|
||||
START_CRIT_SECTION();
|
||||
|
||||
@@ -389,7 +374,22 @@ vacuumLeafPage(spgBulkDeleteState *bds, Relation index, Buffer buffer,
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF, rdata);
|
||||
XLogBeginInsert();
|
||||
|
||||
STORE_STATE(&bds->spgstate, xlrec.stateSrc);
|
||||
|
||||
XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumLeaf);
|
||||
/* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
|
||||
XLogRegisterData((char *) toDead, sizeof(OffsetNumber) * xlrec.nDead);
|
||||
XLogRegisterData((char *) toPlaceholder, sizeof(OffsetNumber) * xlrec.nPlaceholder);
|
||||
XLogRegisterData((char *) moveSrc, sizeof(OffsetNumber) * xlrec.nMove);
|
||||
XLogRegisterData((char *) moveDest, sizeof(OffsetNumber) * xlrec.nMove);
|
||||
XLogRegisterData((char *) chainSrc, sizeof(OffsetNumber) * xlrec.nChain);
|
||||
XLogRegisterData((char *) chainDest, sizeof(OffsetNumber) * xlrec.nChain);
|
||||
|
||||
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
|
||||
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_LEAF);
|
||||
|
||||
PageSetLSN(page, recptr);
|
||||
}
|
||||
@@ -407,12 +407,10 @@ vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
|
||||
{
|
||||
Page page = BufferGetPage(buffer);
|
||||
spgxlogVacuumRoot xlrec;
|
||||
XLogRecData rdata[3];
|
||||
OffsetNumber toDelete[MaxIndexTuplesPerPage];
|
||||
OffsetNumber i,
|
||||
max = PageGetMaxOffsetNumber(page);
|
||||
|
||||
xlrec.blkno = BufferGetBlockNumber(buffer);
|
||||
xlrec.nDelete = 0;
|
||||
|
||||
/* Scan page, identify tuples to delete, accumulate stats */
|
||||
@@ -448,15 +446,6 @@ vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
|
||||
if (xlrec.nDelete == 0)
|
||||
return; /* nothing more to do */
|
||||
|
||||
/* Prepare WAL record */
|
||||
xlrec.node = index->rd_node;
|
||||
STORE_STATE(&bds->spgstate, xlrec.stateSrc);
|
||||
|
||||
ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumRoot, 0);
|
||||
/* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
|
||||
ACCEPT_RDATA_DATA(toDelete, sizeof(OffsetNumber) * xlrec.nDelete, 1);
|
||||
ACCEPT_RDATA_BUFFER(buffer, 2);
|
||||
|
||||
/* Do the update */
|
||||
START_CRIT_SECTION();
|
||||
|
||||
@@ -469,7 +458,19 @@ vacuumLeafRoot(spgBulkDeleteState *bds, Relation index, Buffer buffer)
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT, rdata);
|
||||
XLogBeginInsert();
|
||||
|
||||
/* Prepare WAL record */
|
||||
STORE_STATE(&bds->spgstate, xlrec.stateSrc);
|
||||
|
||||
XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRoot);
|
||||
/* sizeof(xlrec) should be a multiple of sizeof(OffsetNumber) */
|
||||
XLogRegisterData((char *) toDelete,
|
||||
sizeof(OffsetNumber) * xlrec.nDelete);
|
||||
|
||||
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
|
||||
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_ROOT);
|
||||
|
||||
PageSetLSN(page, recptr);
|
||||
}
|
||||
@@ -499,10 +500,7 @@ vacuumRedirectAndPlaceholder(Relation index, Buffer buffer)
|
||||
OffsetNumber itemToPlaceholder[MaxIndexTuplesPerPage];
|
||||
OffsetNumber itemnos[MaxIndexTuplesPerPage];
|
||||
spgxlogVacuumRedirect xlrec;
|
||||
XLogRecData rdata[3];
|
||||
|
||||
xlrec.node = index->rd_node;
|
||||
xlrec.blkno = BufferGetBlockNumber(buffer);
|
||||
xlrec.nToPlaceholder = 0;
|
||||
xlrec.newestRedirectXid = InvalidTransactionId;
|
||||
|
||||
@@ -585,11 +583,15 @@ vacuumRedirectAndPlaceholder(Relation index, Buffer buffer)
|
||||
{
|
||||
XLogRecPtr recptr;
|
||||
|
||||
ACCEPT_RDATA_DATA(&xlrec, SizeOfSpgxlogVacuumRedirect, 0);
|
||||
ACCEPT_RDATA_DATA(itemToPlaceholder, sizeof(OffsetNumber) * xlrec.nToPlaceholder, 1);
|
||||
ACCEPT_RDATA_BUFFER(buffer, 2);
|
||||
XLogBeginInsert();
|
||||
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT, rdata);
|
||||
XLogRegisterData((char *) &xlrec, SizeOfSpgxlogVacuumRedirect);
|
||||
XLogRegisterData((char *) itemToPlaceholder,
|
||||
sizeof(OffsetNumber) * xlrec.nToPlaceholder);
|
||||
|
||||
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
|
||||
|
||||
recptr = XLogInsert(RM_SPGIST_ID, XLOG_SPGIST_VACUUM_REDIRECT);
|
||||
|
||||
PageSetLSN(page, recptr);
|
||||
}
|
||||
|
||||
@@ -71,33 +71,30 @@ addOrReplaceTuple(Page page, Item tuple, int size, OffsetNumber offset)
|
||||
}
|
||||
|
||||
static void
|
||||
spgRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
|
||||
spgRedoCreateIndex(XLogReaderState *record)
|
||||
{
|
||||
RelFileNode *node = (RelFileNode *) XLogRecGetData(record);
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
|
||||
/* Backup blocks are not used in create_index records */
|
||||
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
|
||||
|
||||
buffer = XLogReadBuffer(*node, SPGIST_METAPAGE_BLKNO, true);
|
||||
Assert(BufferIsValid(buffer));
|
||||
buffer = XLogInitBufferForRedo(record, 0);
|
||||
Assert(BufferGetBlockNumber(buffer) == SPGIST_METAPAGE_BLKNO);
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
SpGistInitMetapage(page);
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(buffer);
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
buffer = XLogReadBuffer(*node, SPGIST_ROOT_BLKNO, true);
|
||||
Assert(BufferIsValid(buffer));
|
||||
buffer = XLogInitBufferForRedo(record, 1);
|
||||
Assert(BufferGetBlockNumber(buffer) == SPGIST_ROOT_BLKNO);
|
||||
SpGistInitBuffer(buffer, SPGIST_LEAF);
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(buffer);
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
buffer = XLogReadBuffer(*node, SPGIST_NULL_BLKNO, true);
|
||||
Assert(BufferIsValid(buffer));
|
||||
buffer = XLogInitBufferForRedo(record, 2);
|
||||
Assert(BufferGetBlockNumber(buffer) == SPGIST_NULL_BLKNO);
|
||||
SpGistInitBuffer(buffer, SPGIST_LEAF | SPGIST_NULLS);
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
PageSetLSN(page, lsn);
|
||||
@@ -106,8 +103,9 @@ spgRedoCreateIndex(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
static void
|
||||
spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
|
||||
spgRedoAddLeaf(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
char *ptr = XLogRecGetData(record);
|
||||
spgxlogAddLeaf *xldata = (spgxlogAddLeaf *) ptr;
|
||||
char *leafTuple;
|
||||
@@ -128,15 +126,13 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
|
||||
*/
|
||||
if (xldata->newPage)
|
||||
{
|
||||
buffer = XLogReadBuffer(xldata->node, xldata->blknoLeaf, true);
|
||||
buffer = XLogInitBufferForRedo(record, 0);
|
||||
SpGistInitBuffer(buffer,
|
||||
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
|
||||
action = BLK_NEEDS_REDO;
|
||||
}
|
||||
else
|
||||
action = XLogReadBufferForRedo(lsn, record, 0,
|
||||
xldata->node, xldata->blknoLeaf,
|
||||
&buffer);
|
||||
action = XLogReadBufferForRedo(record, 0, &buffer);
|
||||
|
||||
if (action == BLK_NEEDS_REDO)
|
||||
{
|
||||
@@ -164,7 +160,8 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
|
||||
{
|
||||
/* replacing a DEAD tuple */
|
||||
PageIndexTupleDelete(page, xldata->offnumLeaf);
|
||||
if (PageAddItem(page, (Item) leafTuple, leafTupleHdr.size,
|
||||
if (PageAddItem(page,
|
||||
(Item) leafTuple, leafTupleHdr.size,
|
||||
xldata->offnumLeaf, false, false) != xldata->offnumLeaf)
|
||||
elog(ERROR, "failed to add item of size %u to SPGiST index page",
|
||||
leafTupleHdr.size);
|
||||
@@ -177,13 +174,14 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
/* update parent downlink if necessary */
|
||||
if (xldata->blknoParent != InvalidBlockNumber)
|
||||
if (xldata->offnumParent != InvalidOffsetNumber)
|
||||
{
|
||||
if (XLogReadBufferForRedo(lsn, record, 1,
|
||||
xldata->node, xldata->blknoParent,
|
||||
&buffer) == BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 1, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
SpGistInnerTuple tuple;
|
||||
BlockNumber blknoLeaf;
|
||||
|
||||
XLogRecGetBlockTag(record, 0, NULL, NULL, &blknoLeaf);
|
||||
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
@@ -191,7 +189,7 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
|
||||
PageGetItemId(page, xldata->offnumParent));
|
||||
|
||||
spgUpdateNodeLink(tuple, xldata->nodeI,
|
||||
xldata->blknoLeaf, xldata->offnumLeaf);
|
||||
blknoLeaf, xldata->offnumLeaf);
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(buffer);
|
||||
@@ -202,8 +200,9 @@ spgRedoAddLeaf(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
static void
|
||||
spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
|
||||
spgRedoMoveLeafs(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
char *ptr = XLogRecGetData(record);
|
||||
spgxlogMoveLeafs *xldata = (spgxlogMoveLeafs *) ptr;
|
||||
SpGistState state;
|
||||
@@ -213,6 +212,9 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
XLogRedoAction action;
|
||||
BlockNumber blknoDst;
|
||||
|
||||
XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoDst);
|
||||
|
||||
fillFakeState(&state, xldata->stateSrc);
|
||||
|
||||
@@ -235,15 +237,14 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
|
||||
/* Insert tuples on the dest page (do first, so redirect is valid) */
|
||||
if (xldata->newPage)
|
||||
{
|
||||
buffer = XLogReadBuffer(xldata->node, xldata->blknoDst, true);
|
||||
buffer = XLogInitBufferForRedo(record, 1);
|
||||
SpGistInitBuffer(buffer,
|
||||
SPGIST_LEAF | (xldata->storesNulls ? SPGIST_NULLS : 0));
|
||||
action = BLK_NEEDS_REDO;
|
||||
}
|
||||
else
|
||||
action = XLogReadBufferForRedo(lsn, record, 1,
|
||||
xldata->node, xldata->blknoDst,
|
||||
&buffer);
|
||||
action = XLogReadBufferForRedo(record, 1, &buffer);
|
||||
|
||||
if (action == BLK_NEEDS_REDO)
|
||||
{
|
||||
int i;
|
||||
@@ -260,7 +261,8 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
|
||||
* field.
|
||||
*/
|
||||
leafTuple = ptr;
|
||||
memcpy(&leafTupleHdr, leafTuple, sizeof(SpGistLeafTupleData));
|
||||
memcpy(&leafTupleHdr, leafTuple,
|
||||
sizeof(SpGistLeafTupleData));
|
||||
|
||||
addOrReplaceTuple(page, (Item) leafTuple,
|
||||
leafTupleHdr.size, toInsert[i]);
|
||||
@@ -274,14 +276,14 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
/* Delete tuples from the source page, inserting a redirection pointer */
|
||||
if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoSrc,
|
||||
&buffer) == BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
spgPageIndexMultiDelete(&state, page, toDelete, xldata->nMoves,
|
||||
state.isBuild ? SPGIST_PLACEHOLDER : SPGIST_REDIRECT,
|
||||
SPGIST_PLACEHOLDER,
|
||||
xldata->blknoDst,
|
||||
blknoDst,
|
||||
toInsert[nInsert - 1]);
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
@@ -291,8 +293,7 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
/* And update the parent downlink */
|
||||
if (XLogReadBufferForRedo(lsn, record, 2, xldata->node, xldata->blknoParent,
|
||||
&buffer) == BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
SpGistInnerTuple tuple;
|
||||
|
||||
@@ -302,7 +303,7 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
|
||||
PageGetItemId(page, xldata->offnumParent));
|
||||
|
||||
spgUpdateNodeLink(tuple, xldata->nodeI,
|
||||
xldata->blknoDst, toInsert[nInsert - 1]);
|
||||
blknoDst, toInsert[nInsert - 1]);
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(buffer);
|
||||
@@ -312,8 +313,9 @@ spgRedoMoveLeafs(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
static void
|
||||
spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
|
||||
spgRedoAddNode(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
char *ptr = XLogRecGetData(record);
|
||||
spgxlogAddNode *xldata = (spgxlogAddNode *) ptr;
|
||||
char *innerTuple;
|
||||
@@ -321,7 +323,6 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
|
||||
SpGistState state;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
int bbi;
|
||||
XLogRedoAction action;
|
||||
|
||||
ptr += sizeof(spgxlogAddNode);
|
||||
@@ -331,17 +332,18 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
|
||||
|
||||
fillFakeState(&state, xldata->stateSrc);
|
||||
|
||||
if (xldata->blknoNew == InvalidBlockNumber)
|
||||
if (!XLogRecHasBlockRef(record, 1))
|
||||
{
|
||||
/* update in place */
|
||||
Assert(xldata->blknoParent == InvalidBlockNumber);
|
||||
if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
|
||||
&buffer) == BLK_NEEDS_REDO)
|
||||
Assert(xldata->parentBlk == -1);
|
||||
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
PageIndexTupleDelete(page, xldata->offnum);
|
||||
if (PageAddItem(page, (Item) innerTuple, innerTupleHdr.size,
|
||||
xldata->offnum, false, false) != xldata->offnum)
|
||||
xldata->offnum,
|
||||
false, false) != xldata->offnum)
|
||||
elog(ERROR, "failed to add item of size %u to SPGiST index page",
|
||||
innerTupleHdr.size);
|
||||
|
||||
@@ -353,30 +355,30 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
else
|
||||
{
|
||||
BlockNumber blkno;
|
||||
BlockNumber blknoNew;
|
||||
|
||||
XLogRecGetBlockTag(record, 0, NULL, NULL, &blkno);
|
||||
XLogRecGetBlockTag(record, 1, NULL, NULL, &blknoNew);
|
||||
|
||||
/*
|
||||
* In normal operation we would have all three pages (source, dest,
|
||||
* and parent) locked simultaneously; but in WAL replay it should be
|
||||
* safe to update them one at a time, as long as we do it in the right
|
||||
* order.
|
||||
*
|
||||
* The logic here depends on the assumption that blkno != blknoNew,
|
||||
* else we can't tell which BKP bit goes with which page, and the LSN
|
||||
* checks could go wrong too.
|
||||
* order. We must insert the new tuple before replacing the old tuple
|
||||
* with the redirect tuple.
|
||||
*/
|
||||
Assert(xldata->blkno != xldata->blknoNew);
|
||||
|
||||
/* Install new tuple first so redirect is valid */
|
||||
if (xldata->newPage)
|
||||
{
|
||||
buffer = XLogReadBuffer(xldata->node, xldata->blknoNew, true);
|
||||
/* AddNode is not used for nulls pages */
|
||||
buffer = XLogInitBufferForRedo(record, 1);
|
||||
SpGistInitBuffer(buffer, 0);
|
||||
action = BLK_NEEDS_REDO;
|
||||
}
|
||||
else
|
||||
action = XLogReadBufferForRedo(lsn, record, 1,
|
||||
xldata->node, xldata->blknoNew,
|
||||
&buffer);
|
||||
action = XLogReadBufferForRedo(record, 1, &buffer);
|
||||
if (action == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = BufferGetPage(buffer);
|
||||
@@ -385,22 +387,26 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
|
||||
innerTupleHdr.size, xldata->offnumNew);
|
||||
|
||||
/*
|
||||
* If parent is in this same page, don't advance LSN; doing so
|
||||
* would fool us into not applying the parent downlink update
|
||||
* below. We'll update the LSN when we fix the parent downlink.
|
||||
* If parent is in this same page, update it now.
|
||||
*/
|
||||
if (xldata->blknoParent != xldata->blknoNew)
|
||||
if (xldata->parentBlk == 1)
|
||||
{
|
||||
PageSetLSN(page, lsn);
|
||||
SpGistInnerTuple parentTuple;
|
||||
|
||||
parentTuple = (SpGistInnerTuple) PageGetItem(page,
|
||||
PageGetItemId(page, xldata->offnumParent));
|
||||
|
||||
spgUpdateNodeLink(parentTuple, xldata->nodeI,
|
||||
blknoNew, xldata->offnumNew);
|
||||
}
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(buffer);
|
||||
}
|
||||
if (BufferIsValid(buffer))
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
/* Delete old tuple, replacing it with redirect or placeholder tuple */
|
||||
if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
|
||||
&buffer) == BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
SpGistDeadTuple dt;
|
||||
|
||||
@@ -412,11 +418,12 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
|
||||
InvalidOffsetNumber);
|
||||
else
|
||||
dt = spgFormDeadTuple(&state, SPGIST_REDIRECT,
|
||||
xldata->blknoNew,
|
||||
blknoNew,
|
||||
xldata->offnumNew);
|
||||
|
||||
PageIndexTupleDelete(page, xldata->offnum);
|
||||
if (PageAddItem(page, (Item) dt, dt->size, xldata->offnum,
|
||||
if (PageAddItem(page, (Item) dt, dt->size,
|
||||
xldata->offnum,
|
||||
false, false) != xldata->offnum)
|
||||
elog(ERROR, "failed to add item of size %u to SPGiST index page",
|
||||
dt->size);
|
||||
@@ -427,67 +434,55 @@ spgRedoAddNode(XLogRecPtr lsn, XLogRecord *record)
|
||||
SpGistPageGetOpaque(page)->nRedirection++;
|
||||
|
||||
/*
|
||||
* If parent is in this same page, don't advance LSN; doing so
|
||||
* would fool us into not applying the parent downlink update
|
||||
* below. We'll update the LSN when we fix the parent downlink.
|
||||
* If parent is in this same page, update it now.
|
||||
*/
|
||||
if (xldata->blknoParent != xldata->blkno)
|
||||
if (xldata->parentBlk == 0)
|
||||
{
|
||||
PageSetLSN(page, lsn);
|
||||
SpGistInnerTuple parentTuple;
|
||||
|
||||
parentTuple = (SpGistInnerTuple) PageGetItem(page,
|
||||
PageGetItemId(page, xldata->offnumParent));
|
||||
|
||||
spgUpdateNodeLink(parentTuple, xldata->nodeI,
|
||||
blknoNew, xldata->offnumNew);
|
||||
}
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(buffer);
|
||||
}
|
||||
if (BufferIsValid(buffer))
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
/*
|
||||
* Update parent downlink. Since parent could be in either of the
|
||||
* previous two buffers, it's a bit tricky to determine which BKP bit
|
||||
* applies.
|
||||
* Update parent downlink (if we didn't do it as part of the source or
|
||||
* destination page update already).
|
||||
*/
|
||||
if (xldata->blknoParent == xldata->blkno)
|
||||
bbi = 0;
|
||||
else if (xldata->blknoParent == xldata->blknoNew)
|
||||
bbi = 1;
|
||||
else
|
||||
bbi = 2;
|
||||
|
||||
if (record->xl_info & XLR_BKP_BLOCK(bbi))
|
||||
if (xldata->parentBlk == 2)
|
||||
{
|
||||
if (bbi == 2) /* else we already did it */
|
||||
(void) RestoreBackupBlock(lsn, record, bbi, false, false);
|
||||
action = BLK_RESTORED;
|
||||
buffer = InvalidBuffer;
|
||||
}
|
||||
else
|
||||
{
|
||||
action = XLogReadBufferForRedo(lsn, record, bbi, xldata->node,
|
||||
xldata->blknoParent, &buffer);
|
||||
Assert(action != BLK_RESTORED);
|
||||
}
|
||||
if (action == BLK_NEEDS_REDO)
|
||||
{
|
||||
SpGistInnerTuple innerTuple;
|
||||
if (XLogReadBufferForRedo(record, 2, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
SpGistInnerTuple parentTuple;
|
||||
|
||||
page = BufferGetPage(buffer);
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
innerTuple = (SpGistInnerTuple) PageGetItem(page,
|
||||
parentTuple = (SpGistInnerTuple) PageGetItem(page,
|
||||
PageGetItemId(page, xldata->offnumParent));
|
||||
|
||||
spgUpdateNodeLink(innerTuple, xldata->nodeI,
|
||||
xldata->blknoNew, xldata->offnumNew);
|
||||
spgUpdateNodeLink(parentTuple, xldata->nodeI,
|
||||
blknoNew, xldata->offnumNew);
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(buffer);
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(buffer);
|
||||
}
|
||||
if (BufferIsValid(buffer))
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
if (BufferIsValid(buffer))
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
|
||||
spgRedoSplitTuple(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
char *ptr = XLogRecGetData(record);
|
||||
spgxlogSplitTuple *xldata = (spgxlogSplitTuple *) ptr;
|
||||
char *prefixTuple;
|
||||
@@ -496,6 +491,7 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
|
||||
SpGistInnerTupleData postfixTupleHdr;
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
XLogRedoAction action;
|
||||
|
||||
ptr += sizeof(spgxlogSplitTuple);
|
||||
prefixTuple = ptr;
|
||||
@@ -513,22 +509,17 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
|
||||
*/
|
||||
|
||||
/* insert postfix tuple first to avoid dangling link */
|
||||
if (xldata->blknoPostfix != xldata->blknoPrefix)
|
||||
if (!xldata->postfixBlkSame)
|
||||
{
|
||||
XLogRedoAction action;
|
||||
|
||||
if (xldata->newPage)
|
||||
{
|
||||
buffer = XLogReadBuffer(xldata->node, xldata->blknoPostfix, true);
|
||||
buffer = XLogInitBufferForRedo(record, 1);
|
||||
/* SplitTuple is not used for nulls pages */
|
||||
SpGistInitBuffer(buffer, 0);
|
||||
action = BLK_NEEDS_REDO;
|
||||
}
|
||||
else
|
||||
action = XLogReadBufferForRedo(lsn, record, 1,
|
||||
xldata->node, xldata->blknoPostfix,
|
||||
&buffer);
|
||||
|
||||
action = XLogReadBufferForRedo(record, 1, &buffer);
|
||||
if (action == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = BufferGetPage(buffer);
|
||||
@@ -544,18 +535,19 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
/* now handle the original page */
|
||||
if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blknoPrefix,
|
||||
&buffer) == BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
PageIndexTupleDelete(page, xldata->offnumPrefix);
|
||||
if (PageAddItem(page, (Item) prefixTuple, prefixTupleHdr.size,
|
||||
xldata->offnumPrefix, false, false) != xldata->offnumPrefix)
|
||||
elog(ERROR, "failed to add item of size %u to SPGiST index page",
|
||||
prefixTupleHdr.size);
|
||||
|
||||
if (xldata->blknoPostfix == xldata->blknoPrefix)
|
||||
addOrReplaceTuple(page, (Item) postfixTuple, postfixTupleHdr.size,
|
||||
if (xldata->postfixBlkSame)
|
||||
addOrReplaceTuple(page, (Item) postfixTuple,
|
||||
postfixTupleHdr.size,
|
||||
xldata->offnumPostfix);
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
@@ -566,8 +558,9 @@ spgRedoSplitTuple(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
static void
|
||||
spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
spgRedoPickSplit(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
char *ptr = XLogRecGetData(record);
|
||||
spgxlogPickSplit *xldata = (spgxlogPickSplit *) ptr;
|
||||
char *innerTuple;
|
||||
@@ -578,14 +571,16 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
uint8 *leafPageSelect;
|
||||
Buffer srcBuffer;
|
||||
Buffer destBuffer;
|
||||
Buffer innerBuffer;
|
||||
Page srcPage;
|
||||
Page destPage;
|
||||
Buffer innerBuffer;
|
||||
Page page;
|
||||
int bbi;
|
||||
int i;
|
||||
BlockNumber blknoInner;
|
||||
XLogRedoAction action;
|
||||
|
||||
XLogRecGetBlockTag(record, 2, NULL, NULL, &blknoInner);
|
||||
|
||||
fillFakeState(&state, xldata->stateSrc);
|
||||
|
||||
ptr += SizeOfSpgxlogPickSplit;
|
||||
@@ -603,13 +598,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
|
||||
/* now ptr points to the list of leaf tuples */
|
||||
|
||||
/*
|
||||
* It's a bit tricky to identify which pages have been handled as
|
||||
* full-page images, so we explicitly count each referenced buffer.
|
||||
*/
|
||||
bbi = 0;
|
||||
|
||||
if (SpGistBlockIsRoot(xldata->blknoSrc))
|
||||
if (xldata->isRootSplit)
|
||||
{
|
||||
/* when splitting root, we touch it only in the guise of new inner */
|
||||
srcBuffer = InvalidBuffer;
|
||||
@@ -618,8 +607,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
else if (xldata->initSrc)
|
||||
{
|
||||
/* just re-init the source page */
|
||||
srcBuffer = XLogReadBuffer(xldata->node, xldata->blknoSrc, true);
|
||||
Assert(BufferIsValid(srcBuffer));
|
||||
srcBuffer = XLogInitBufferForRedo(record, 0);
|
||||
srcPage = (Page) BufferGetPage(srcBuffer);
|
||||
|
||||
SpGistInitBuffer(srcBuffer,
|
||||
@@ -634,9 +622,8 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
* inserting leaf tuples and the new inner tuple, else the added
|
||||
* redirect tuple will be a dangling link.)
|
||||
*/
|
||||
if (XLogReadBufferForRedo(lsn, record, bbi,
|
||||
xldata->node, xldata->blknoSrc,
|
||||
&srcBuffer) == BLK_NEEDS_REDO)
|
||||
srcPage = NULL;
|
||||
if (XLogReadBufferForRedo(record, 0, &srcBuffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
srcPage = BufferGetPage(srcBuffer);
|
||||
|
||||
@@ -650,7 +637,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
toDelete, xldata->nDelete,
|
||||
SPGIST_REDIRECT,
|
||||
SPGIST_PLACEHOLDER,
|
||||
xldata->blknoInner,
|
||||
blknoInner,
|
||||
xldata->offnumInner);
|
||||
else
|
||||
spgPageIndexMultiDelete(&state, srcPage,
|
||||
@@ -662,15 +649,10 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
|
||||
/* don't update LSN etc till we're done with it */
|
||||
}
|
||||
else
|
||||
{
|
||||
srcPage = NULL; /* don't do any page updates */
|
||||
}
|
||||
bbi++;
|
||||
}
|
||||
|
||||
/* try to access dest page if any */
|
||||
if (xldata->blknoDest == InvalidBlockNumber)
|
||||
if (!XLogRecHasBlockRef(record, 1))
|
||||
{
|
||||
destBuffer = InvalidBuffer;
|
||||
destPage = NULL;
|
||||
@@ -678,8 +660,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
else if (xldata->initDest)
|
||||
{
|
||||
/* just re-init the dest page */
|
||||
destBuffer = XLogReadBuffer(xldata->node, xldata->blknoDest, true);
|
||||
Assert(BufferIsValid(destBuffer));
|
||||
destBuffer = XLogInitBufferForRedo(record, 1);
|
||||
destPage = (Page) BufferGetPage(destBuffer);
|
||||
|
||||
SpGistInitBuffer(destBuffer,
|
||||
@@ -692,17 +673,10 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
* We could probably release the page lock immediately in the
|
||||
* full-page-image case, but for safety let's hold it till later.
|
||||
*/
|
||||
if (XLogReadBufferForRedo(lsn, record, bbi,
|
||||
xldata->node, xldata->blknoDest,
|
||||
&destBuffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
if (XLogReadBufferForRedo(record, 1, &destBuffer) == BLK_NEEDS_REDO)
|
||||
destPage = (Page) BufferGetPage(destBuffer);
|
||||
}
|
||||
else
|
||||
{
|
||||
destPage = NULL; /* don't do any page updates */
|
||||
}
|
||||
bbi++;
|
||||
}
|
||||
|
||||
/* restore leaf tuples to src and/or dest page */
|
||||
@@ -739,14 +713,12 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
/* restore new inner tuple */
|
||||
if (xldata->initInner)
|
||||
{
|
||||
innerBuffer = XLogReadBuffer(xldata->node, xldata->blknoInner, true);
|
||||
SpGistInitBuffer(innerBuffer,
|
||||
(xldata->storesNulls ? SPGIST_NULLS : 0));
|
||||
innerBuffer = XLogInitBufferForRedo(record, 2);
|
||||
SpGistInitBuffer(innerBuffer, (xldata->storesNulls ? SPGIST_NULLS : 0));
|
||||
action = BLK_NEEDS_REDO;
|
||||
}
|
||||
else
|
||||
action = XLogReadBufferForRedo(lsn, record, bbi, xldata->node,
|
||||
xldata->blknoInner, &innerBuffer);
|
||||
action = XLogReadBufferForRedo(record, 2, &innerBuffer);
|
||||
|
||||
if (action == BLK_NEEDS_REDO)
|
||||
{
|
||||
@@ -756,14 +728,14 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
xldata->offnumInner);
|
||||
|
||||
/* if inner is also parent, update link while we're here */
|
||||
if (xldata->blknoInner == xldata->blknoParent)
|
||||
if (xldata->innerIsParent)
|
||||
{
|
||||
SpGistInnerTuple parent;
|
||||
|
||||
parent = (SpGistInnerTuple) PageGetItem(page,
|
||||
PageGetItemId(page, xldata->offnumParent));
|
||||
spgUpdateNodeLink(parent, xldata->nodeI,
|
||||
xldata->blknoInner, xldata->offnumInner);
|
||||
blknoInner, xldata->offnumInner);
|
||||
}
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
@@ -771,7 +743,6 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
if (BufferIsValid(innerBuffer))
|
||||
UnlockReleaseBuffer(innerBuffer);
|
||||
bbi++;
|
||||
|
||||
/*
|
||||
* Now we can release the leaf-page locks. It's okay to do this before
|
||||
@@ -783,18 +754,11 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
UnlockReleaseBuffer(destBuffer);
|
||||
|
||||
/* update parent downlink, unless we did it above */
|
||||
if (xldata->blknoParent == InvalidBlockNumber)
|
||||
{
|
||||
/* no parent cause we split the root */
|
||||
Assert(SpGistBlockIsRoot(xldata->blknoInner));
|
||||
}
|
||||
else if (xldata->blknoInner != xldata->blknoParent)
|
||||
if (XLogRecHasBlockRef(record, 3))
|
||||
{
|
||||
Buffer parentBuffer;
|
||||
|
||||
if (XLogReadBufferForRedo(lsn, record, bbi,
|
||||
xldata->node, xldata->blknoParent,
|
||||
&parentBuffer) == BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 3, &parentBuffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
SpGistInnerTuple parent;
|
||||
|
||||
@@ -803,7 +767,7 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
parent = (SpGistInnerTuple) PageGetItem(page,
|
||||
PageGetItemId(page, xldata->offnumParent));
|
||||
spgUpdateNodeLink(parent, xldata->nodeI,
|
||||
xldata->blknoInner, xldata->offnumInner);
|
||||
blknoInner, xldata->offnumInner);
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(parentBuffer);
|
||||
@@ -811,11 +775,14 @@ spgRedoPickSplit(XLogRecPtr lsn, XLogRecord *record)
|
||||
if (BufferIsValid(parentBuffer))
|
||||
UnlockReleaseBuffer(parentBuffer);
|
||||
}
|
||||
else
|
||||
Assert(xldata->innerIsParent || xldata->isRootSplit);
|
||||
}
|
||||
|
||||
static void
|
||||
spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record)
|
||||
spgRedoVacuumLeaf(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
char *ptr = XLogRecGetData(record);
|
||||
spgxlogVacuumLeaf *xldata = (spgxlogVacuumLeaf *) ptr;
|
||||
OffsetNumber *toDead;
|
||||
@@ -844,8 +811,7 @@ spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record)
|
||||
ptr += sizeof(OffsetNumber) * xldata->nChain;
|
||||
chainDest = (OffsetNumber *) ptr;
|
||||
|
||||
if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
|
||||
&buffer) == BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
@@ -897,8 +863,9 @@ spgRedoVacuumLeaf(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
static void
|
||||
spgRedoVacuumRoot(XLogRecPtr lsn, XLogRecord *record)
|
||||
spgRedoVacuumRoot(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
char *ptr = XLogRecGetData(record);
|
||||
spgxlogVacuumRoot *xldata = (spgxlogVacuumRoot *) ptr;
|
||||
OffsetNumber *toDelete;
|
||||
@@ -907,8 +874,7 @@ spgRedoVacuumRoot(XLogRecPtr lsn, XLogRecord *record)
|
||||
|
||||
toDelete = xldata->offsets;
|
||||
|
||||
if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
|
||||
&buffer) == BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = BufferGetPage(buffer);
|
||||
|
||||
@@ -923,8 +889,9 @@ spgRedoVacuumRoot(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
static void
|
||||
spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record)
|
||||
spgRedoVacuumRedirect(XLogReaderState *record)
|
||||
{
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
char *ptr = XLogRecGetData(record);
|
||||
spgxlogVacuumRedirect *xldata = (spgxlogVacuumRedirect *) ptr;
|
||||
OffsetNumber *itemToPlaceholder;
|
||||
@@ -939,12 +906,16 @@ spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record)
|
||||
if (InHotStandby)
|
||||
{
|
||||
if (TransactionIdIsValid(xldata->newestRedirectXid))
|
||||
{
|
||||
RelFileNode node;
|
||||
|
||||
XLogRecGetBlockTag(record, 0, &node, NULL, NULL);
|
||||
ResolveRecoveryConflictWithSnapshot(xldata->newestRedirectXid,
|
||||
xldata->node);
|
||||
node);
|
||||
}
|
||||
}
|
||||
|
||||
if (XLogReadBufferForRedo(lsn, record, 0, xldata->node, xldata->blkno,
|
||||
&buffer) == BLK_NEEDS_REDO)
|
||||
if (XLogReadBufferForRedo(record, 0, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
Page page = BufferGetPage(buffer);
|
||||
SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
|
||||
@@ -995,40 +966,40 @@ spgRedoVacuumRedirect(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
|
||||
void
|
||||
spg_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
spg_redo(XLogReaderState *record)
|
||||
{
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
MemoryContext oldCxt;
|
||||
|
||||
oldCxt = MemoryContextSwitchTo(opCtx);
|
||||
switch (info)
|
||||
{
|
||||
case XLOG_SPGIST_CREATE_INDEX:
|
||||
spgRedoCreateIndex(lsn, record);
|
||||
spgRedoCreateIndex(record);
|
||||
break;
|
||||
case XLOG_SPGIST_ADD_LEAF:
|
||||
spgRedoAddLeaf(lsn, record);
|
||||
spgRedoAddLeaf(record);
|
||||
break;
|
||||
case XLOG_SPGIST_MOVE_LEAFS:
|
||||
spgRedoMoveLeafs(lsn, record);
|
||||
spgRedoMoveLeafs(record);
|
||||
break;
|
||||
case XLOG_SPGIST_ADD_NODE:
|
||||
spgRedoAddNode(lsn, record);
|
||||
spgRedoAddNode(record);
|
||||
break;
|
||||
case XLOG_SPGIST_SPLIT_TUPLE:
|
||||
spgRedoSplitTuple(lsn, record);
|
||||
spgRedoSplitTuple(record);
|
||||
break;
|
||||
case XLOG_SPGIST_PICKSPLIT:
|
||||
spgRedoPickSplit(lsn, record);
|
||||
spgRedoPickSplit(record);
|
||||
break;
|
||||
case XLOG_SPGIST_VACUUM_LEAF:
|
||||
spgRedoVacuumLeaf(lsn, record);
|
||||
spgRedoVacuumLeaf(record);
|
||||
break;
|
||||
case XLOG_SPGIST_VACUUM_ROOT:
|
||||
spgRedoVacuumRoot(lsn, record);
|
||||
spgRedoVacuumRoot(record);
|
||||
break;
|
||||
case XLOG_SPGIST_VACUUM_REDIRECT:
|
||||
spgRedoVacuumRedirect(lsn, record);
|
||||
spgRedoVacuumRedirect(record);
|
||||
break;
|
||||
default:
|
||||
elog(PANIC, "spg_redo: unknown op code %u", info);
|
||||
|
||||
@@ -440,96 +440,164 @@ happen before the WAL record is inserted; see notes in SyncOneBuffer().)
|
||||
Note that marking a buffer dirty with MarkBufferDirty() should only
|
||||
happen iff you write a WAL record; see Writing Hints below.
|
||||
|
||||
5. If the relation requires WAL-logging, build a WAL log record and pass it
|
||||
to XLogInsert(); then update the page's LSN using the returned XLOG
|
||||
location. For instance,
|
||||
5. If the relation requires WAL-logging, build a WAL record using
|
||||
XLogBeginInsert and XLogRegister* functions, and insert it. (See
|
||||
"Constructing a WAL record" below). Then update the page's LSN using the
|
||||
returned XLOG location. For instance,
|
||||
|
||||
recptr = XLogInsert(rmgr_id, info, rdata);
|
||||
XLogBeginInsert();
|
||||
XLogRegisterBuffer(...)
|
||||
XLogRegisterData(...)
|
||||
recptr = XLogInsert(rmgr_id, info);
|
||||
|
||||
PageSetLSN(dp, recptr);
|
||||
// Note that we no longer do PageSetTLI() from 9.3 onwards
|
||||
// since that field on a page has now changed its meaning.
|
||||
|
||||
6. END_CRIT_SECTION()
|
||||
|
||||
7. Unlock and unpin the buffer(s).
|
||||
|
||||
XLogInsert's "rdata" argument is an array of pointer/size items identifying
|
||||
chunks of data to be written in the XLOG record, plus optional shared-buffer
|
||||
IDs for chunks that are in shared buffers rather than temporary variables.
|
||||
The "rdata" array must mention (at least once) each of the shared buffers
|
||||
being modified, unless the action is such that the WAL replay routine can
|
||||
reconstruct the entire page contents. XLogInsert includes the logic that
|
||||
tests to see whether a shared buffer has been modified since the last
|
||||
checkpoint. If not, the entire page contents are logged rather than just the
|
||||
portion(s) pointed to by "rdata".
|
||||
Complex changes (such as a multilevel index insertion) normally need to be
|
||||
described by a series of atomic-action WAL records. The intermediate states
|
||||
must be self-consistent, so that if the replay is interrupted between any
|
||||
two actions, the system is fully functional. In btree indexes, for example,
|
||||
a page split requires a new page to be allocated, and an insertion of a new
|
||||
key in the parent btree level, but for locking reasons this has to be
|
||||
reflected by two separate WAL records. Replaying the first record, to
|
||||
allocate the new page and move tuples to it, sets a flag on the page to
|
||||
indicate that the key has not been inserted to the parent yet. Replaying the
|
||||
second record clears the flag. This intermediate state is never seen by
|
||||
other backends during normal operation, because the lock on the child page
|
||||
is held across the two actions, but will be seen if the operation is
|
||||
interrupted before writing the second WAL record. The search algorithm works
|
||||
with the intermediate state as normal, but if an insertion encounters a page
|
||||
with the incomplete-split flag set, it will finish the interrupted split by
|
||||
inserting the key to the parent, before proceeding.
|
||||
|
||||
Because XLogInsert drops the rdata components associated with buffers it
|
||||
chooses to log in full, the WAL replay routines normally need to test to see
|
||||
which buffers were handled that way --- otherwise they may be misled about
|
||||
what the XLOG record actually contains. XLOG records that describe multi-page
|
||||
changes therefore require some care to design: you must be certain that you
|
||||
know what data is indicated by each "BKP" bit. An example of the trickiness
|
||||
is that in a HEAP_UPDATE record, BKP(0) normally is associated with the source
|
||||
page and BKP(1) is associated with the destination page --- but if these are
|
||||
the same page, only BKP(0) would have been set.
|
||||
|
||||
For this reason as well as the risk of deadlocking on buffer locks, it's best
|
||||
to design WAL records so that they reflect small atomic actions involving just
|
||||
one or a few pages. The current XLOG infrastructure cannot handle WAL records
|
||||
involving references to more than four shared buffers, anyway.
|
||||
Constructing a WAL record
|
||||
-------------------------
|
||||
|
||||
In the case where the WAL record contains enough information to re-generate
|
||||
the entire contents of a page, do *not* show that page's buffer ID in the
|
||||
rdata array, even if some of the rdata items point into the buffer. This is
|
||||
because you don't want XLogInsert to log the whole page contents. The
|
||||
standard replay-routine pattern for this case is
|
||||
A WAL record consists of a header common to all WAL record types,
|
||||
record-specific data, and information about the data blocks modified. Each
|
||||
modified data block is identified by an ID number, and can optionally have
|
||||
more record-specific data associated with the block. If XLogInsert decides
|
||||
that a full-page image of a block needs to be taken, the data associated
|
||||
with that block is not included.
|
||||
|
||||
buffer = XLogReadBuffer(rnode, blkno, true);
|
||||
Assert(BufferIsValid(buffer));
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
The API for constructing a WAL record consists of five functions:
|
||||
XLogBeginInsert, XLogRegisterBuffer, XLogRegisterData, XLogRegisterBufData,
|
||||
and XLogInsert. First, call XLogBeginInsert(). Then register all the buffers
|
||||
modified, and data needed to replay the changes, using XLogRegister*
|
||||
functions. Finally, insert the constructed record to the WAL by calling
|
||||
XLogInsert().
|
||||
|
||||
... initialize the page ...
|
||||
XLogBeginInsert();
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(buffer);
|
||||
UnlockReleaseBuffer(buffer);
|
||||
/* register buffers modified as part of this WAL-logged action */
|
||||
XLogRegisterBuffer(0, lbuffer, REGBUF_STANDARD);
|
||||
XLogRegisterBuffer(1, rbuffer, REGBUF_STANDARD);
|
||||
|
||||
In the case where the WAL record provides only enough information to
|
||||
incrementally update the page, the rdata array *must* mention the buffer
|
||||
ID at least once; otherwise there is no defense against torn-page problems.
|
||||
The standard replay-routine pattern for this case is
|
||||
/* register data that is always included in the WAL record */
|
||||
XLogRegisterData(&xlrec, SizeOfFictionalAction);
|
||||
|
||||
if (XLogReadBufferForRedo(lsn, record, N, rnode, blkno, &buffer) == BLK_NEEDS_REDO)
|
||||
{
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
/*
|
||||
* register data associated with a buffer. This will not be included
|
||||
* in the record if a full-page image is taken.
|
||||
*/
|
||||
XLogRegisterBufData(0, tuple->data, tuple->len);
|
||||
|
||||
... apply the change ...
|
||||
/* more data associated with the buffer */
|
||||
XLogRegisterBufData(0, data2, len2);
|
||||
|
||||
PageSetLSN(page, lsn);
|
||||
MarkBufferDirty(buffer);
|
||||
}
|
||||
if (BufferIsValid(buffer))
|
||||
UnlockReleaseBuffer(buffer);
|
||||
/*
|
||||
* Ok, all the data and buffers to include in the WAL record have
|
||||
* been registered. Insert the record.
|
||||
*/
|
||||
recptr = XLogInsert(RM_FOO_ID, XLOG_FOOBAR_DO_STUFF);
|
||||
|
||||
XLogReadBufferForRedo reads the page from disk, and checks what action needs to
|
||||
be taken to the page. If the XLR_BKP_BLOCK(N) flag is set, it restores the
|
||||
full page image and returns BLK_RESTORED. If there is no full page image, but
|
||||
page cannot be found or if the change has already been replayed (i.e. the
|
||||
page's LSN >= the record we're replaying), it returns BLK_NOTFOUND or BLK_DONE,
|
||||
respectively. Usually, the redo routine only needs to pay attention to the
|
||||
BLK_NEEDS_REDO return code, which means that the routine should apply the
|
||||
incremental change. In any case, the caller is responsible for unlocking and
|
||||
releasing the buffer. Note that XLogReadBufferForRedo returns the buffer
|
||||
locked even if no redo is required, unless the page does not exist.
|
||||
Details of the API functions:
|
||||
|
||||
As noted above, for a multi-page update you need to be able to determine
|
||||
which XLR_BKP_BLOCK(N) flag applies to each page. If a WAL record reflects
|
||||
a combination of fully-rewritable and incremental updates, then the rewritable
|
||||
pages don't count for the XLR_BKP_BLOCK(N) numbering. (XLR_BKP_BLOCK(N) is
|
||||
associated with the N'th distinct buffer ID seen in the "rdata" array, and
|
||||
per the above discussion, fully-rewritable buffers shouldn't be mentioned in
|
||||
"rdata".)
|
||||
void XLogBeginInsert(void)
|
||||
|
||||
Must be called before XLogRegisterBuffer and XLogRegisterData.
|
||||
|
||||
void XLogResetInsertion(void)
|
||||
|
||||
Clear any currently registered data and buffers from the WAL record
|
||||
construction workspace. This is only needed if you have already called
|
||||
XLogBeginInsert(), but decide to not insert the record after all.
|
||||
|
||||
void XLogEnsureRecordSpace(int max_block_id, int nrdatas)
|
||||
|
||||
Normally, the WAL record construction buffers have the following limits:
|
||||
|
||||
* highest block ID that can be used is 4 (allowing five block references)
|
||||
* Max 20 chunks of registered data
|
||||
|
||||
These default limits are enough for most record types that change some
|
||||
on-disk structures. For the odd case that requires more data, or needs to
|
||||
modify more buffers, these limits can be raised by calling
|
||||
XLogEnsureRecordSpace(). XLogEnsureRecordSpace() must be called before
|
||||
XLogBeginInsert(), and outside a critical section.
|
||||
|
||||
void XLogRegisterBuffer(uint8 block_id, Buffer buf, uint8 flags);
|
||||
|
||||
XLogRegisterBuffer adds information about a data block to the WAL record.
|
||||
block_id is an arbitrary number used to identify this page reference in
|
||||
the redo routine. The information needed to re-find the page at redo -
|
||||
relfilenode, fork, and block number - are included in the WAL record.
|
||||
|
||||
XLogInsert will automatically include a full copy of the page contents, if
|
||||
this is the first modification of the buffer since the last checkpoint.
|
||||
It is important to register every buffer modified by the action with
|
||||
XLogRegisterBuffer, to avoid torn-page hazards.
|
||||
|
||||
The flags control when and how the buffer contents are included in the
|
||||
WAL record. Normally, a full-page image is taken only if the page has not
|
||||
been modified since the last checkpoint, and only if full_page_writes=on
|
||||
or an online backup is in progress. The REGBUF_FORCE_IMAGE flag can be
|
||||
used to force a full-page image to always be included; that is useful
|
||||
e.g. for an operation that rewrites most of the page, so that tracking the
|
||||
details is not worth it. For the rare case where it is not necessary to
|
||||
protect from torn pages, REGBUF_NO_IMAGE flag can be used to suppress
|
||||
full page image from being taken. REGBUF_WILL_INIT also suppresses a full
|
||||
page image, but the redo routine must re-generate the page from scratch,
|
||||
without looking at the old page contents. Re-initializing the page
|
||||
protects from torn page hazards like a full page image does.
|
||||
|
||||
The REGBUF_STANDARD flag can be specified together with the other flags to
|
||||
indicate that the page follows the standard page layout. It causes the
|
||||
area between pd_lower and pd_upper to be left out from the image, reducing
|
||||
WAL volume.
|
||||
|
||||
If the REGBUF_KEEP_DATA flag is given, any per-buffer data registered with
|
||||
XLogRegisterBufData() is included in the WAL record even if a full-page
|
||||
image is taken.
|
||||
|
||||
void XLogRegisterData(char *data, int len);
|
||||
|
||||
XLogRegisterData is used to include arbitrary data in the WAL record. If
|
||||
XLogRegisterData() is called multiple times, the data are appended, and
|
||||
will be made available to the redo routine as one contiguous chunk.
|
||||
|
||||
void XLogRegisterBufData(uint8 block_id, char *data, int len);
|
||||
|
||||
XLogRegisterBufData is used to include data associated with a particular
|
||||
buffer that was registered earlier with XLogRegisterBuffer(). If
|
||||
XLogRegisterBufData() is called multiple times with the same block ID, the
|
||||
data are appended, and will be made available to the redo routine as one
|
||||
contiguous chunk.
|
||||
|
||||
If a full-page image of the buffer is taken at insertion, the data is not
|
||||
included in the WAL record, unless the REGBUF_KEEP_DATA flag is used.
|
||||
|
||||
|
||||
Writing a REDO routine
|
||||
----------------------
|
||||
|
||||
A REDO routine uses the data and page references included in the WAL record
|
||||
to reconstruct the new state of the page. The record decoding functions
|
||||
and macros in xlogreader.c/h can be used to extract the data from the record.
|
||||
|
||||
When replaying a WAL record that describes changes on multiple pages, you
|
||||
must be careful to lock the pages properly to prevent concurrent Hot Standby
|
||||
@@ -545,23 +613,6 @@ either an exclusive buffer lock or a shared lock plus buffer header lock,
|
||||
or be writing the data block directly rather than through shared buffers
|
||||
while holding AccessExclusiveLock on the relation.
|
||||
|
||||
Due to all these constraints, complex changes (such as a multilevel index
|
||||
insertion) normally need to be described by a series of atomic-action WAL
|
||||
records. The intermediate states must be self-consistent, so that if the
|
||||
replay is interrupted between any two actions, the system is fully
|
||||
functional. In btree indexes, for example, a page split requires a new page
|
||||
to be allocated, and an insertion of a new key in the parent btree level,
|
||||
but for locking reasons this has to be reflected by two separate WAL
|
||||
records. Replaying the first record, to allocate the new page and move
|
||||
tuples to it, sets a flag on the page to indicate that the key has not been
|
||||
inserted to the parent yet. Replaying the second record clears the flag.
|
||||
This intermediate state is never seen by other backends during normal
|
||||
operation, because the lock on the child page is held across the two
|
||||
actions, but will be seen if the operation is interrupted before writing
|
||||
the second WAL record. The search algorithm works with the intermediate
|
||||
state as normal, but if an insertion encounters a page with the
|
||||
incomplete-split flag set, it will finish the interrupted split by
|
||||
inserting the key to the parent, before proceeding.
|
||||
|
||||
Writing Hints
|
||||
-------------
|
||||
|
||||
@@ -699,13 +699,9 @@ CLOGPagePrecedes(int page1, int page2)
|
||||
static void
|
||||
WriteZeroPageXlogRec(int pageno)
|
||||
{
|
||||
XLogRecData rdata;
|
||||
|
||||
rdata.data = (char *) (&pageno);
|
||||
rdata.len = sizeof(int);
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.next = NULL;
|
||||
(void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE, &rdata);
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) (&pageno), sizeof(int));
|
||||
(void) XLogInsert(RM_CLOG_ID, CLOG_ZEROPAGE);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -717,14 +713,11 @@ WriteZeroPageXlogRec(int pageno)
|
||||
static void
|
||||
WriteTruncateXlogRec(int pageno)
|
||||
{
|
||||
XLogRecData rdata;
|
||||
XLogRecPtr recptr;
|
||||
|
||||
rdata.data = (char *) (&pageno);
|
||||
rdata.len = sizeof(int);
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.next = NULL;
|
||||
recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE, &rdata);
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) (&pageno), sizeof(int));
|
||||
recptr = XLogInsert(RM_CLOG_ID, CLOG_TRUNCATE);
|
||||
XLogFlush(recptr);
|
||||
}
|
||||
|
||||
@@ -732,12 +725,12 @@ WriteTruncateXlogRec(int pageno)
|
||||
* CLOG resource manager's routines
|
||||
*/
|
||||
void
|
||||
clog_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
clog_redo(XLogReaderState *record)
|
||||
{
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
/* Backup blocks are not used in clog records */
|
||||
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
|
||||
Assert(!XLogRecHasAnyBlockRefs(record));
|
||||
|
||||
if (info == CLOG_ZEROPAGE)
|
||||
{
|
||||
|
||||
@@ -720,7 +720,6 @@ MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
|
||||
{
|
||||
MultiXactId multi;
|
||||
MultiXactOffset offset;
|
||||
XLogRecData rdata[2];
|
||||
xl_multixact_create xlrec;
|
||||
|
||||
debug_elog3(DEBUG2, "Create: %s",
|
||||
@@ -796,17 +795,11 @@ MultiXactIdCreateFromMembers(int nmembers, MultiXactMember *members)
|
||||
* the status flags in one XLogRecData, then all the xids in another one?
|
||||
* Not clear that it's worth the trouble though.
|
||||
*/
|
||||
rdata[0].data = (char *) (&xlrec);
|
||||
rdata[0].len = SizeOfMultiXactCreate;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].next = &(rdata[1]);
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) (&xlrec), SizeOfMultiXactCreate);
|
||||
XLogRegisterData((char *) members, nmembers * sizeof(MultiXactMember));
|
||||
|
||||
rdata[1].data = (char *) members;
|
||||
rdata[1].len = nmembers * sizeof(MultiXactMember);
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
rdata[1].next = NULL;
|
||||
|
||||
(void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID, rdata);
|
||||
(void) XLogInsert(RM_MULTIXACT_ID, XLOG_MULTIXACT_CREATE_ID);
|
||||
|
||||
/* Now enter the information into the OFFSETs and MEMBERs logs */
|
||||
RecordNewMultiXact(multi, offset, nmembers, members);
|
||||
@@ -2705,25 +2698,21 @@ MultiXactOffsetPrecedes(MultiXactOffset offset1, MultiXactOffset offset2)
|
||||
static void
|
||||
WriteMZeroPageXlogRec(int pageno, uint8 info)
|
||||
{
|
||||
XLogRecData rdata;
|
||||
|
||||
rdata.data = (char *) (&pageno);
|
||||
rdata.len = sizeof(int);
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.next = NULL;
|
||||
(void) XLogInsert(RM_MULTIXACT_ID, info, &rdata);
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) (&pageno), sizeof(int));
|
||||
(void) XLogInsert(RM_MULTIXACT_ID, info);
|
||||
}
|
||||
|
||||
/*
|
||||
* MULTIXACT resource manager's routines
|
||||
*/
|
||||
void
|
||||
multixact_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
multixact_redo(XLogReaderState *record)
|
||||
{
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
/* Backup blocks are not used in multixact records */
|
||||
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
|
||||
Assert(!XLogRecHasAnyBlockRefs(record));
|
||||
|
||||
if (info == XLOG_MULTIXACT_ZERO_OFF_PAGE)
|
||||
{
|
||||
@@ -2775,7 +2764,7 @@ multixact_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
* should be unnecessary, since any XID found here ought to have other
|
||||
* evidence in the XLOG, but let's be safe.
|
||||
*/
|
||||
max_xid = record->xl_xid;
|
||||
max_xid = XLogRecGetXid(record);
|
||||
for (i = 0; i < xlrec->nmembers; i++)
|
||||
{
|
||||
if (TransactionIdPrecedes(max_xid, xlrec->members[i].xid))
|
||||
|
||||
@@ -889,14 +889,21 @@ typedef struct TwoPhaseRecordOnDisk
|
||||
|
||||
/*
|
||||
* During prepare, the state file is assembled in memory before writing it
|
||||
* to WAL and the actual state file. We use a chain of XLogRecData blocks
|
||||
* so that we will be able to pass the state file contents directly to
|
||||
* XLogInsert.
|
||||
* to WAL and the actual state file. We use a chain of StateFileChunk blocks
|
||||
* for that.
|
||||
*/
|
||||
typedef struct StateFileChunk
|
||||
{
|
||||
char *data;
|
||||
uint32 len;
|
||||
struct StateFileChunk *next;
|
||||
} StateFileChunk;
|
||||
|
||||
static struct xllist
|
||||
{
|
||||
XLogRecData *head; /* first data block in the chain */
|
||||
XLogRecData *tail; /* last block in chain */
|
||||
StateFileChunk *head; /* first data block in the chain */
|
||||
StateFileChunk *tail; /* last block in chain */
|
||||
uint32 num_chunks;
|
||||
uint32 bytes_free; /* free bytes left in tail block */
|
||||
uint32 total_len; /* total data bytes in chain */
|
||||
} records;
|
||||
@@ -917,11 +924,11 @@ save_state_data(const void *data, uint32 len)
|
||||
|
||||
if (padlen > records.bytes_free)
|
||||
{
|
||||
records.tail->next = palloc0(sizeof(XLogRecData));
|
||||
records.tail->next = palloc0(sizeof(StateFileChunk));
|
||||
records.tail = records.tail->next;
|
||||
records.tail->buffer = InvalidBuffer;
|
||||
records.tail->len = 0;
|
||||
records.tail->next = NULL;
|
||||
records.num_chunks++;
|
||||
|
||||
records.bytes_free = Max(padlen, 512);
|
||||
records.tail->data = palloc(records.bytes_free);
|
||||
@@ -951,8 +958,7 @@ StartPrepare(GlobalTransaction gxact)
|
||||
SharedInvalidationMessage *invalmsgs;
|
||||
|
||||
/* Initialize linked list */
|
||||
records.head = palloc0(sizeof(XLogRecData));
|
||||
records.head->buffer = InvalidBuffer;
|
||||
records.head = palloc0(sizeof(StateFileChunk));
|
||||
records.head->len = 0;
|
||||
records.head->next = NULL;
|
||||
|
||||
@@ -960,6 +966,7 @@ StartPrepare(GlobalTransaction gxact)
|
||||
records.head->data = palloc(records.bytes_free);
|
||||
|
||||
records.tail = records.head;
|
||||
records.num_chunks = 1;
|
||||
|
||||
records.total_len = 0;
|
||||
|
||||
@@ -1019,7 +1026,7 @@ EndPrepare(GlobalTransaction gxact)
|
||||
TransactionId xid = pgxact->xid;
|
||||
TwoPhaseFileHeader *hdr;
|
||||
char path[MAXPGPATH];
|
||||
XLogRecData *record;
|
||||
StateFileChunk *record;
|
||||
pg_crc32 statefile_crc;
|
||||
pg_crc32 bogus_crc;
|
||||
int fd;
|
||||
@@ -1117,12 +1124,16 @@ EndPrepare(GlobalTransaction gxact)
|
||||
* We save the PREPARE record's location in the gxact for later use by
|
||||
* CheckPointTwoPhase.
|
||||
*/
|
||||
XLogEnsureRecordSpace(0, records.num_chunks);
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
MyPgXact->delayChkpt = true;
|
||||
|
||||
gxact->prepare_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE,
|
||||
records.head);
|
||||
XLogBeginInsert();
|
||||
for (record = records.head; record != NULL; record = record->next)
|
||||
XLogRegisterData(record->data, record->len);
|
||||
gxact->prepare_lsn = XLogInsert(RM_XACT_ID, XLOG_XACT_PREPARE);
|
||||
XLogFlush(gxact->prepare_lsn);
|
||||
|
||||
/* If we crash now, we have prepared: WAL replay will fix things */
|
||||
@@ -1180,6 +1191,7 @@ EndPrepare(GlobalTransaction gxact)
|
||||
SyncRepWaitForLSN(gxact->prepare_lsn);
|
||||
|
||||
records.tail = records.head = NULL;
|
||||
records.num_chunks = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2071,8 +2083,6 @@ RecordTransactionCommitPrepared(TransactionId xid,
|
||||
SharedInvalidationMessage *invalmsgs,
|
||||
bool initfileinval)
|
||||
{
|
||||
XLogRecData rdata[4];
|
||||
int lastrdata = 0;
|
||||
xl_xact_commit_prepared xlrec;
|
||||
XLogRecPtr recptr;
|
||||
|
||||
@@ -2094,39 +2104,24 @@ RecordTransactionCommitPrepared(TransactionId xid,
|
||||
xlrec.crec.nsubxacts = nchildren;
|
||||
xlrec.crec.nmsgs = ninvalmsgs;
|
||||
|
||||
rdata[0].data = (char *) (&xlrec);
|
||||
rdata[0].len = MinSizeOfXactCommitPrepared;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) (&xlrec), MinSizeOfXactCommitPrepared);
|
||||
|
||||
/* dump rels to delete */
|
||||
if (nrels > 0)
|
||||
{
|
||||
rdata[0].next = &(rdata[1]);
|
||||
rdata[1].data = (char *) rels;
|
||||
rdata[1].len = nrels * sizeof(RelFileNode);
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
lastrdata = 1;
|
||||
}
|
||||
XLogRegisterData((char *) rels, nrels * sizeof(RelFileNode));
|
||||
|
||||
/* dump committed child Xids */
|
||||
if (nchildren > 0)
|
||||
{
|
||||
rdata[lastrdata].next = &(rdata[2]);
|
||||
rdata[2].data = (char *) children;
|
||||
rdata[2].len = nchildren * sizeof(TransactionId);
|
||||
rdata[2].buffer = InvalidBuffer;
|
||||
lastrdata = 2;
|
||||
}
|
||||
XLogRegisterData((char *) children,
|
||||
nchildren * sizeof(TransactionId));
|
||||
|
||||
/* dump cache invalidation messages */
|
||||
if (ninvalmsgs > 0)
|
||||
{
|
||||
rdata[lastrdata].next = &(rdata[3]);
|
||||
rdata[3].data = (char *) invalmsgs;
|
||||
rdata[3].len = ninvalmsgs * sizeof(SharedInvalidationMessage);
|
||||
rdata[3].buffer = InvalidBuffer;
|
||||
lastrdata = 3;
|
||||
}
|
||||
rdata[lastrdata].next = NULL;
|
||||
XLogRegisterData((char *) invalmsgs,
|
||||
ninvalmsgs * sizeof(SharedInvalidationMessage));
|
||||
|
||||
recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_PREPARED, rdata);
|
||||
recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_PREPARED);
|
||||
|
||||
/*
|
||||
* We don't currently try to sleep before flush here ... nor is there any
|
||||
@@ -2169,8 +2164,6 @@ RecordTransactionAbortPrepared(TransactionId xid,
|
||||
int nrels,
|
||||
RelFileNode *rels)
|
||||
{
|
||||
XLogRecData rdata[3];
|
||||
int lastrdata = 0;
|
||||
xl_xact_abort_prepared xlrec;
|
||||
XLogRecPtr recptr;
|
||||
|
||||
@@ -2189,30 +2182,20 @@ RecordTransactionAbortPrepared(TransactionId xid,
|
||||
xlrec.arec.xact_time = GetCurrentTimestamp();
|
||||
xlrec.arec.nrels = nrels;
|
||||
xlrec.arec.nsubxacts = nchildren;
|
||||
rdata[0].data = (char *) (&xlrec);
|
||||
rdata[0].len = MinSizeOfXactAbortPrepared;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) (&xlrec), MinSizeOfXactAbortPrepared);
|
||||
|
||||
/* dump rels to delete */
|
||||
if (nrels > 0)
|
||||
{
|
||||
rdata[0].next = &(rdata[1]);
|
||||
rdata[1].data = (char *) rels;
|
||||
rdata[1].len = nrels * sizeof(RelFileNode);
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
lastrdata = 1;
|
||||
}
|
||||
XLogRegisterData((char *) rels, nrels * sizeof(RelFileNode));
|
||||
|
||||
/* dump committed child Xids */
|
||||
if (nchildren > 0)
|
||||
{
|
||||
rdata[lastrdata].next = &(rdata[2]);
|
||||
rdata[2].data = (char *) children;
|
||||
rdata[2].len = nchildren * sizeof(TransactionId);
|
||||
rdata[2].buffer = InvalidBuffer;
|
||||
lastrdata = 2;
|
||||
}
|
||||
rdata[lastrdata].next = NULL;
|
||||
XLogRegisterData((char *) children,
|
||||
nchildren * sizeof(TransactionId));
|
||||
|
||||
recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT_PREPARED, rdata);
|
||||
recptr = XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT_PREPARED);
|
||||
|
||||
/* Always flush, since we're about to remove the 2PC state file */
|
||||
XLogFlush(recptr);
|
||||
|
||||
@@ -571,7 +571,6 @@ AssignTransactionId(TransactionState s)
|
||||
if (nUnreportedXids >= PGPROC_MAX_CACHED_SUBXIDS ||
|
||||
log_unknown_top)
|
||||
{
|
||||
XLogRecData rdata[2];
|
||||
xl_xact_assignment xlrec;
|
||||
|
||||
/*
|
||||
@@ -582,17 +581,12 @@ AssignTransactionId(TransactionState s)
|
||||
Assert(TransactionIdIsValid(xlrec.xtop));
|
||||
xlrec.nsubxacts = nUnreportedXids;
|
||||
|
||||
rdata[0].data = (char *) &xlrec;
|
||||
rdata[0].len = MinSizeOfXactAssignment;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
rdata[0].next = &rdata[1];
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &xlrec, MinSizeOfXactAssignment);
|
||||
XLogRegisterData((char *) unreportedXids,
|
||||
nUnreportedXids * sizeof(TransactionId));
|
||||
|
||||
rdata[1].data = (char *) unreportedXids;
|
||||
rdata[1].len = nUnreportedXids * sizeof(TransactionId);
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
rdata[1].next = NULL;
|
||||
|
||||
(void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT, rdata);
|
||||
(void) XLogInsert(RM_XACT_ID, XLOG_XACT_ASSIGNMENT);
|
||||
|
||||
nUnreportedXids = 0;
|
||||
/* mark top, not current xact as having been logged */
|
||||
@@ -1087,8 +1081,6 @@ RecordTransactionCommit(void)
|
||||
if (nrels > 0 || nmsgs > 0 || RelcacheInitFileInval || forceSyncCommit ||
|
||||
XLogLogicalInfoActive())
|
||||
{
|
||||
XLogRecData rdata[4];
|
||||
int lastrdata = 0;
|
||||
xl_xact_commit xlrec;
|
||||
|
||||
/*
|
||||
@@ -1107,63 +1099,38 @@ RecordTransactionCommit(void)
|
||||
xlrec.nrels = nrels;
|
||||
xlrec.nsubxacts = nchildren;
|
||||
xlrec.nmsgs = nmsgs;
|
||||
rdata[0].data = (char *) (&xlrec);
|
||||
rdata[0].len = MinSizeOfXactCommit;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) (&xlrec), MinSizeOfXactCommit);
|
||||
/* dump rels to delete */
|
||||
if (nrels > 0)
|
||||
{
|
||||
rdata[0].next = &(rdata[1]);
|
||||
rdata[1].data = (char *) rels;
|
||||
rdata[1].len = nrels * sizeof(RelFileNode);
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
lastrdata = 1;
|
||||
}
|
||||
XLogRegisterData((char *) rels,
|
||||
nrels * sizeof(RelFileNode));
|
||||
/* dump committed child Xids */
|
||||
if (nchildren > 0)
|
||||
{
|
||||
rdata[lastrdata].next = &(rdata[2]);
|
||||
rdata[2].data = (char *) children;
|
||||
rdata[2].len = nchildren * sizeof(TransactionId);
|
||||
rdata[2].buffer = InvalidBuffer;
|
||||
lastrdata = 2;
|
||||
}
|
||||
XLogRegisterData((char *) children,
|
||||
nchildren * sizeof(TransactionId));
|
||||
/* dump shared cache invalidation messages */
|
||||
if (nmsgs > 0)
|
||||
{
|
||||
rdata[lastrdata].next = &(rdata[3]);
|
||||
rdata[3].data = (char *) invalMessages;
|
||||
rdata[3].len = nmsgs * sizeof(SharedInvalidationMessage);
|
||||
rdata[3].buffer = InvalidBuffer;
|
||||
lastrdata = 3;
|
||||
}
|
||||
rdata[lastrdata].next = NULL;
|
||||
|
||||
(void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT, rdata);
|
||||
XLogRegisterData((char *) invalMessages,
|
||||
nmsgs * sizeof(SharedInvalidationMessage));
|
||||
(void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT);
|
||||
}
|
||||
else
|
||||
{
|
||||
XLogRecData rdata[2];
|
||||
int lastrdata = 0;
|
||||
xl_xact_commit_compact xlrec;
|
||||
|
||||
xlrec.xact_time = xactStopTimestamp;
|
||||
xlrec.nsubxacts = nchildren;
|
||||
rdata[0].data = (char *) (&xlrec);
|
||||
rdata[0].len = MinSizeOfXactCommitCompact;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) (&xlrec), MinSizeOfXactCommitCompact);
|
||||
/* dump committed child Xids */
|
||||
if (nchildren > 0)
|
||||
{
|
||||
rdata[0].next = &(rdata[1]);
|
||||
rdata[1].data = (char *) children;
|
||||
rdata[1].len = nchildren * sizeof(TransactionId);
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
lastrdata = 1;
|
||||
}
|
||||
rdata[lastrdata].next = NULL;
|
||||
XLogRegisterData((char *) children,
|
||||
nchildren * sizeof(TransactionId));
|
||||
|
||||
(void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_COMPACT, rdata);
|
||||
(void) XLogInsert(RM_XACT_ID, XLOG_XACT_COMMIT_COMPACT);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1436,8 +1403,6 @@ RecordTransactionAbort(bool isSubXact)
|
||||
RelFileNode *rels;
|
||||
int nchildren;
|
||||
TransactionId *children;
|
||||
XLogRecData rdata[3];
|
||||
int lastrdata = 0;
|
||||
xl_xact_abort xlrec;
|
||||
|
||||
/*
|
||||
@@ -1486,30 +1451,20 @@ RecordTransactionAbort(bool isSubXact)
|
||||
}
|
||||
xlrec.nrels = nrels;
|
||||
xlrec.nsubxacts = nchildren;
|
||||
rdata[0].data = (char *) (&xlrec);
|
||||
rdata[0].len = MinSizeOfXactAbort;
|
||||
rdata[0].buffer = InvalidBuffer;
|
||||
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) (&xlrec), MinSizeOfXactAbort);
|
||||
|
||||
/* dump rels to delete */
|
||||
if (nrels > 0)
|
||||
{
|
||||
rdata[0].next = &(rdata[1]);
|
||||
rdata[1].data = (char *) rels;
|
||||
rdata[1].len = nrels * sizeof(RelFileNode);
|
||||
rdata[1].buffer = InvalidBuffer;
|
||||
lastrdata = 1;
|
||||
}
|
||||
XLogRegisterData((char *) rels, nrels * sizeof(RelFileNode));
|
||||
|
||||
/* dump committed child Xids */
|
||||
if (nchildren > 0)
|
||||
{
|
||||
rdata[lastrdata].next = &(rdata[2]);
|
||||
rdata[2].data = (char *) children;
|
||||
rdata[2].len = nchildren * sizeof(TransactionId);
|
||||
rdata[2].buffer = InvalidBuffer;
|
||||
lastrdata = 2;
|
||||
}
|
||||
rdata[lastrdata].next = NULL;
|
||||
XLogRegisterData((char *) children,
|
||||
nchildren * sizeof(TransactionId));
|
||||
|
||||
(void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT, rdata);
|
||||
(void) XLogInsert(RM_XACT_ID, XLOG_XACT_ABORT);
|
||||
|
||||
/*
|
||||
* Report the latest async abort LSN, so that the WAL writer knows to
|
||||
@@ -2351,6 +2306,9 @@ AbortTransaction(void)
|
||||
AbortBufferIO();
|
||||
UnlockBuffers();
|
||||
|
||||
/* Reset WAL record construction state */
|
||||
XLogResetInsertion();
|
||||
|
||||
/*
|
||||
* Also clean up any open wait for lock, since the lock manager will choke
|
||||
* if we try to wait for another lock before doing this.
|
||||
@@ -4299,6 +4257,9 @@ AbortSubTransaction(void)
|
||||
AbortBufferIO();
|
||||
UnlockBuffers();
|
||||
|
||||
/* Reset WAL record construction state */
|
||||
XLogResetInsertion();
|
||||
|
||||
/*
|
||||
* Also clean up any open wait for lock, since the lock manager will choke
|
||||
* if we try to wait for another lock before doing this.
|
||||
@@ -4938,42 +4899,42 @@ xact_redo_abort(xl_xact_abort *xlrec, TransactionId xid)
|
||||
}
|
||||
|
||||
void
|
||||
xact_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
xact_redo(XLogReaderState *record)
|
||||
{
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
/* Backup blocks are not used in xact records */
|
||||
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
|
||||
Assert(!XLogRecHasAnyBlockRefs(record));
|
||||
|
||||
if (info == XLOG_XACT_COMMIT_COMPACT)
|
||||
{
|
||||
xl_xact_commit_compact *xlrec = (xl_xact_commit_compact *) XLogRecGetData(record);
|
||||
|
||||
xact_redo_commit_compact(xlrec, record->xl_xid, lsn);
|
||||
xact_redo_commit_compact(xlrec, XLogRecGetXid(record), record->EndRecPtr);
|
||||
}
|
||||
else if (info == XLOG_XACT_COMMIT)
|
||||
{
|
||||
xl_xact_commit *xlrec = (xl_xact_commit *) XLogRecGetData(record);
|
||||
|
||||
xact_redo_commit(xlrec, record->xl_xid, lsn);
|
||||
xact_redo_commit(xlrec, XLogRecGetXid(record), record->EndRecPtr);
|
||||
}
|
||||
else if (info == XLOG_XACT_ABORT)
|
||||
{
|
||||
xl_xact_abort *xlrec = (xl_xact_abort *) XLogRecGetData(record);
|
||||
|
||||
xact_redo_abort(xlrec, record->xl_xid);
|
||||
xact_redo_abort(xlrec, XLogRecGetXid(record));
|
||||
}
|
||||
else if (info == XLOG_XACT_PREPARE)
|
||||
{
|
||||
/* the record contents are exactly the 2PC file */
|
||||
RecreateTwoPhaseFile(record->xl_xid,
|
||||
XLogRecGetData(record), record->xl_len);
|
||||
RecreateTwoPhaseFile(XLogRecGetXid(record),
|
||||
XLogRecGetData(record), XLogRecGetDataLen(record));
|
||||
}
|
||||
else if (info == XLOG_XACT_COMMIT_PREPARED)
|
||||
{
|
||||
xl_xact_commit_prepared *xlrec = (xl_xact_commit_prepared *) XLogRecGetData(record);
|
||||
|
||||
xact_redo_commit(&xlrec->crec, xlrec->xid, lsn);
|
||||
xact_redo_commit(&xlrec->crec, xlrec->xid, record->EndRecPtr);
|
||||
RemoveTwoPhaseFile(xlrec->xid, false);
|
||||
}
|
||||
else if (info == XLOG_XACT_ABORT_PREPARED)
|
||||
|
||||
@@ -757,10 +757,10 @@ static MemoryContext walDebugCxt = NULL;
|
||||
|
||||
static void readRecoveryCommandFile(void);
|
||||
static void exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo);
|
||||
static bool recoveryStopsBefore(XLogRecord *record);
|
||||
static bool recoveryStopsAfter(XLogRecord *record);
|
||||
static bool recoveryStopsBefore(XLogReaderState *record);
|
||||
static bool recoveryStopsAfter(XLogReaderState *record);
|
||||
static void recoveryPausesHere(void);
|
||||
static bool recoveryApplyDelay(XLogRecord *record);
|
||||
static bool recoveryApplyDelay(XLogReaderState *record);
|
||||
static void SetLatestXTime(TimestampTz xtime);
|
||||
static void SetCurrentChunkStartTime(TimestampTz xtime);
|
||||
static void CheckRequiredParameterValues(void);
|
||||
@@ -807,9 +807,9 @@ static char *str_time(pg_time_t tnow);
|
||||
static bool CheckForStandbyTrigger(void);
|
||||
|
||||
#ifdef WAL_DEBUG
|
||||
static void xlog_outrec(StringInfo buf, XLogRecord *record);
|
||||
static void xlog_outrec(StringInfo buf, XLogReaderState *record);
|
||||
#endif
|
||||
static void xlog_outdesc(StringInfo buf, RmgrId rmid, XLogRecord *record);
|
||||
static void xlog_outdesc(StringInfo buf, XLogReaderState *record);
|
||||
static void pg_start_backup_callback(int code, Datum arg);
|
||||
static bool read_backup_label(XLogRecPtr *checkPointLoc,
|
||||
bool *backupEndRequired, bool *backupFromStandby);
|
||||
@@ -861,7 +861,6 @@ XLogRecPtr
|
||||
XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
|
||||
{
|
||||
XLogCtlInsert *Insert = &XLogCtl->Insert;
|
||||
XLogRecData *rdt;
|
||||
pg_crc32 rdata_crc;
|
||||
bool inserted;
|
||||
XLogRecord *rechdr = (XLogRecord *) rdata->data;
|
||||
@@ -870,28 +869,13 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
|
||||
XLogRecPtr StartPos;
|
||||
XLogRecPtr EndPos;
|
||||
|
||||
/* we assume that all of the record header is in the first chunk */
|
||||
Assert(rdata->len >= SizeOfXLogRecord);
|
||||
|
||||
/* cross-check on whether we should be here or not */
|
||||
if (!XLogInsertAllowed())
|
||||
elog(ERROR, "cannot make new WAL entries during recovery");
|
||||
|
||||
/*
|
||||
* Calculate CRC of the data, including all the backup blocks
|
||||
*
|
||||
* Note that the record header isn't added into the CRC initially since we
|
||||
* don't know the prev-link yet. Thus, the CRC will represent the CRC of
|
||||
* the whole record in the order: rdata, then backup blocks, then record
|
||||
* header.
|
||||
*/
|
||||
INIT_CRC32C(rdata_crc);
|
||||
for (rdt = rdata->next; rdt != NULL; rdt = rdt->next)
|
||||
COMP_CRC32C(rdata_crc, rdt->data, rdt->len);
|
||||
|
||||
/*
|
||||
* Calculate CRC of the header, except for prev-link, because we don't
|
||||
* know it yet. It will be added later.
|
||||
*/
|
||||
COMP_CRC32C(rdata_crc, ((char *) rechdr), offsetof(XLogRecord, xl_prev));
|
||||
|
||||
/*----------
|
||||
*
|
||||
* We have now done all the preparatory work we can without holding a
|
||||
@@ -976,10 +960,11 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
|
||||
if (inserted)
|
||||
{
|
||||
/*
|
||||
* Now that xl_prev has been filled in, finish CRC calculation of the
|
||||
* record header.
|
||||
* Now that xl_prev has been filled in, calculate CRC of the record
|
||||
* header.
|
||||
*/
|
||||
COMP_CRC32C(rdata_crc, ((char *) &rechdr->xl_prev), sizeof(XLogRecPtr));
|
||||
rdata_crc = rechdr->xl_crc;
|
||||
COMP_CRC32C(rdata_crc, rechdr, offsetof(XLogRecord, xl_crc));
|
||||
FIN_CRC32C(rdata_crc);
|
||||
rechdr->xl_crc = rdata_crc;
|
||||
|
||||
@@ -1053,34 +1038,47 @@ XLogInsertRecord(XLogRecData *rdata, XLogRecPtr fpw_lsn)
|
||||
#ifdef WAL_DEBUG
|
||||
if (XLOG_DEBUG)
|
||||
{
|
||||
static XLogReaderState *debug_reader = NULL;
|
||||
StringInfoData buf;
|
||||
MemoryContext oldCxt = MemoryContextSwitchTo(walDebugCxt);
|
||||
StringInfoData recordBuf;
|
||||
char *errormsg = NULL;
|
||||
MemoryContext oldCxt;
|
||||
|
||||
oldCxt = MemoryContextSwitchTo(walDebugCxt);
|
||||
|
||||
initStringInfo(&buf);
|
||||
appendStringInfo(&buf, "INSERT @ %X/%X: ",
|
||||
(uint32) (EndPos >> 32), (uint32) EndPos);
|
||||
xlog_outrec(&buf, rechdr);
|
||||
if (rdata->data != NULL)
|
||||
|
||||
/*
|
||||
* We have to piece together the WAL record data from the XLogRecData
|
||||
* entries, so that we can pass it to the rm_desc function as one
|
||||
* contiguous chunk.
|
||||
*/
|
||||
initStringInfo(&recordBuf);
|
||||
for (; rdata != NULL; rdata = rdata->next)
|
||||
appendBinaryStringInfo(&recordBuf, rdata->data, rdata->len);
|
||||
|
||||
if (!debug_reader)
|
||||
debug_reader = XLogReaderAllocate(NULL, NULL);
|
||||
|
||||
if (!debug_reader ||
|
||||
!DecodeXLogRecord(debug_reader, (XLogRecord *) recordBuf.data,
|
||||
&errormsg))
|
||||
{
|
||||
appendStringInfo(&buf, "error decoding record: %s",
|
||||
errormsg ? errormsg : "no error message");
|
||||
}
|
||||
else
|
||||
{
|
||||
StringInfoData recordbuf;
|
||||
|
||||
/*
|
||||
* We have to piece together the WAL record data from the
|
||||
* XLogRecData entries, so that we can pass it to the rm_desc
|
||||
* function as one contiguous chunk.
|
||||
*/
|
||||
initStringInfo(&recordbuf);
|
||||
appendBinaryStringInfo(&recordbuf, (char *) rechdr, sizeof(XLogRecord));
|
||||
for (; rdata != NULL; rdata = rdata->next)
|
||||
appendBinaryStringInfo(&recordbuf, rdata->data, rdata->len);
|
||||
|
||||
appendStringInfoString(&buf, " - ");
|
||||
xlog_outdesc(&buf, rechdr->xl_rmid, (XLogRecord *) recordbuf.data);
|
||||
xlog_outdesc(&buf, debug_reader);
|
||||
}
|
||||
elog(LOG, "%s", buf.data);
|
||||
|
||||
pfree(buf.data);
|
||||
pfree(recordBuf.data);
|
||||
MemoryContextSwitchTo(oldCxt);
|
||||
MemoryContextReset(walDebugCxt);
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -1170,7 +1168,7 @@ ReserveXLogSwitch(XLogRecPtr *StartPos, XLogRecPtr *EndPos, XLogRecPtr *PrevPtr)
|
||||
uint64 startbytepos;
|
||||
uint64 endbytepos;
|
||||
uint64 prevbytepos;
|
||||
uint32 size = SizeOfXLogRecord;
|
||||
uint32 size = MAXALIGN(SizeOfXLogRecord);
|
||||
XLogRecPtr ptr;
|
||||
uint32 segleft;
|
||||
|
||||
@@ -1234,9 +1232,6 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata,
|
||||
XLogRecPtr CurrPos;
|
||||
XLogPageHeader pagehdr;
|
||||
|
||||
/* The first chunk is the record header */
|
||||
Assert(rdata->len == SizeOfXLogRecord);
|
||||
|
||||
/*
|
||||
* Get a pointer to the right place in the right WAL buffer to start
|
||||
* inserting to.
|
||||
@@ -1309,9 +1304,6 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata,
|
||||
}
|
||||
Assert(written == write_len);
|
||||
|
||||
/* Align the end position, so that the next record starts aligned */
|
||||
CurrPos = MAXALIGN64(CurrPos);
|
||||
|
||||
/*
|
||||
* If this was an xlog-switch, it's not enough to write the switch record,
|
||||
* we also have to consume all the remaining space in the WAL segment. We
|
||||
@@ -1341,6 +1333,11 @@ CopyXLogRecordToWAL(int write_len, bool isLogSwitch, XLogRecData *rdata,
|
||||
CurrPos += XLOG_BLCKSZ;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Align the end position, so that the next record starts aligned */
|
||||
CurrPos = MAXALIGN64(CurrPos);
|
||||
}
|
||||
|
||||
if (CurrPos != EndPos)
|
||||
elog(PANIC, "space reserved for WAL record does not match what was written");
|
||||
@@ -4470,6 +4467,7 @@ BootStrapXLOG(void)
|
||||
XLogPageHeader page;
|
||||
XLogLongPageHeader longpage;
|
||||
XLogRecord *record;
|
||||
char *recptr;
|
||||
bool use_existent;
|
||||
uint64 sysidentifier;
|
||||
struct timeval tv;
|
||||
@@ -4541,17 +4539,23 @@ BootStrapXLOG(void)
|
||||
longpage->xlp_xlog_blcksz = XLOG_BLCKSZ;
|
||||
|
||||
/* Insert the initial checkpoint record */
|
||||
record = (XLogRecord *) ((char *) page + SizeOfXLogLongPHD);
|
||||
recptr = ((char *) page + SizeOfXLogLongPHD);
|
||||
record = (XLogRecord *) recptr;
|
||||
record->xl_prev = 0;
|
||||
record->xl_xid = InvalidTransactionId;
|
||||
record->xl_tot_len = SizeOfXLogRecord + sizeof(checkPoint);
|
||||
record->xl_len = sizeof(checkPoint);
|
||||
record->xl_tot_len = SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(checkPoint);
|
||||
record->xl_info = XLOG_CHECKPOINT_SHUTDOWN;
|
||||
record->xl_rmid = RM_XLOG_ID;
|
||||
memcpy(XLogRecGetData(record), &checkPoint, sizeof(checkPoint));
|
||||
recptr += SizeOfXLogRecord;
|
||||
/* fill the XLogRecordDataHeaderShort struct */
|
||||
*(recptr++) = XLR_BLOCK_ID_DATA_SHORT;
|
||||
*(recptr++) = sizeof(checkPoint);
|
||||
memcpy(recptr, &checkPoint, sizeof(checkPoint));
|
||||
recptr += sizeof(checkPoint);
|
||||
Assert(recptr - (char *) record == record->xl_tot_len);
|
||||
|
||||
INIT_CRC32C(crc);
|
||||
COMP_CRC32C(crc, &checkPoint, sizeof(checkPoint));
|
||||
COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
|
||||
COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
|
||||
FIN_CRC32C(crc);
|
||||
record->xl_crc = crc;
|
||||
@@ -4984,36 +4988,37 @@ exitArchiveRecovery(TimeLineID endTLI, XLogSegNo endLogSegNo)
|
||||
* timestamps.
|
||||
*/
|
||||
static bool
|
||||
getRecordTimestamp(XLogRecord *record, TimestampTz *recordXtime)
|
||||
getRecordTimestamp(XLogReaderState *record, TimestampTz *recordXtime)
|
||||
{
|
||||
uint8 record_info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 record_info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
uint8 rmid = XLogRecGetRmid(record);
|
||||
|
||||
if (record->xl_rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
|
||||
if (rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
|
||||
{
|
||||
*recordXtime = ((xl_restore_point *) XLogRecGetData(record))->rp_time;
|
||||
return true;
|
||||
}
|
||||
if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT_COMPACT)
|
||||
if (rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT_COMPACT)
|
||||
{
|
||||
*recordXtime = ((xl_xact_commit_compact *) XLogRecGetData(record))->xact_time;
|
||||
return true;
|
||||
}
|
||||
if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT)
|
||||
if (rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT)
|
||||
{
|
||||
*recordXtime = ((xl_xact_commit *) XLogRecGetData(record))->xact_time;
|
||||
return true;
|
||||
}
|
||||
if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT_PREPARED)
|
||||
if (rmid == RM_XACT_ID && record_info == XLOG_XACT_COMMIT_PREPARED)
|
||||
{
|
||||
*recordXtime = ((xl_xact_commit_prepared *) XLogRecGetData(record))->crec.xact_time;
|
||||
return true;
|
||||
}
|
||||
if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT)
|
||||
if (rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT)
|
||||
{
|
||||
*recordXtime = ((xl_xact_abort *) XLogRecGetData(record))->xact_time;
|
||||
return true;
|
||||
}
|
||||
if (record->xl_rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT_PREPARED)
|
||||
if (rmid == RM_XACT_ID && record_info == XLOG_XACT_ABORT_PREPARED)
|
||||
{
|
||||
*recordXtime = ((xl_xact_abort_prepared *) XLogRecGetData(record))->arec.xact_time;
|
||||
return true;
|
||||
@@ -5030,7 +5035,7 @@ getRecordTimestamp(XLogRecord *record, TimestampTz *recordXtime)
|
||||
* new timeline's history file.
|
||||
*/
|
||||
static bool
|
||||
recoveryStopsBefore(XLogRecord *record)
|
||||
recoveryStopsBefore(XLogReaderState *record)
|
||||
{
|
||||
bool stopsHere = false;
|
||||
uint8 record_info;
|
||||
@@ -5052,14 +5057,14 @@ recoveryStopsBefore(XLogRecord *record)
|
||||
}
|
||||
|
||||
/* Otherwise we only consider stopping before COMMIT or ABORT records. */
|
||||
if (record->xl_rmid != RM_XACT_ID)
|
||||
if (XLogRecGetRmid(record) != RM_XACT_ID)
|
||||
return false;
|
||||
record_info = record->xl_info & ~XLR_INFO_MASK;
|
||||
record_info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
|
||||
if (record_info == XLOG_XACT_COMMIT_COMPACT || record_info == XLOG_XACT_COMMIT)
|
||||
{
|
||||
isCommit = true;
|
||||
recordXid = record->xl_xid;
|
||||
recordXid = XLogRecGetXid(record);
|
||||
}
|
||||
else if (record_info == XLOG_XACT_COMMIT_PREPARED)
|
||||
{
|
||||
@@ -5069,7 +5074,7 @@ recoveryStopsBefore(XLogRecord *record)
|
||||
else if (record_info == XLOG_XACT_ABORT)
|
||||
{
|
||||
isCommit = false;
|
||||
recordXid = record->xl_xid;
|
||||
recordXid = XLogRecGetXid(record);
|
||||
}
|
||||
else if (record_info == XLOG_XACT_ABORT_PREPARED)
|
||||
{
|
||||
@@ -5140,19 +5145,21 @@ recoveryStopsBefore(XLogRecord *record)
|
||||
* record in XLogCtl->recoveryLastXTime.
|
||||
*/
|
||||
static bool
|
||||
recoveryStopsAfter(XLogRecord *record)
|
||||
recoveryStopsAfter(XLogReaderState *record)
|
||||
{
|
||||
uint8 record_info;
|
||||
uint8 rmid;
|
||||
TimestampTz recordXtime;
|
||||
|
||||
record_info = record->xl_info & ~XLR_INFO_MASK;
|
||||
record_info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
rmid = XLogRecGetRmid(record);
|
||||
|
||||
/*
|
||||
* There can be many restore points that share the same name; we stop at
|
||||
* the first one.
|
||||
*/
|
||||
if (recoveryTarget == RECOVERY_TARGET_NAME &&
|
||||
record->xl_rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
|
||||
rmid == RM_XLOG_ID && record_info == XLOG_RESTORE_POINT)
|
||||
{
|
||||
xl_restore_point *recordRestorePointData;
|
||||
|
||||
@@ -5173,7 +5180,7 @@ recoveryStopsAfter(XLogRecord *record)
|
||||
}
|
||||
}
|
||||
|
||||
if (record->xl_rmid == RM_XACT_ID &&
|
||||
if (rmid == RM_XACT_ID &&
|
||||
(record_info == XLOG_XACT_COMMIT_COMPACT ||
|
||||
record_info == XLOG_XACT_COMMIT ||
|
||||
record_info == XLOG_XACT_COMMIT_PREPARED ||
|
||||
@@ -5192,7 +5199,7 @@ recoveryStopsAfter(XLogRecord *record)
|
||||
else if (record_info == XLOG_XACT_ABORT_PREPARED)
|
||||
recordXid = ((xl_xact_abort_prepared *) XLogRecGetData(record))->xid;
|
||||
else
|
||||
recordXid = record->xl_xid;
|
||||
recordXid = XLogRecGetXid(record);
|
||||
|
||||
/*
|
||||
* There can be only one transaction end record with this exact
|
||||
@@ -5307,7 +5314,7 @@ SetRecoveryPause(bool recoveryPause)
|
||||
* usability.
|
||||
*/
|
||||
static bool
|
||||
recoveryApplyDelay(XLogRecord *record)
|
||||
recoveryApplyDelay(XLogReaderState *record)
|
||||
{
|
||||
uint8 record_info;
|
||||
TimestampTz xtime;
|
||||
@@ -5326,8 +5333,8 @@ recoveryApplyDelay(XLogRecord *record)
|
||||
* so there is already opportunity for issues caused by early conflicts on
|
||||
* standbys.
|
||||
*/
|
||||
record_info = record->xl_info & ~XLR_INFO_MASK;
|
||||
if (!(record->xl_rmid == RM_XACT_ID &&
|
||||
record_info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
if (!(XLogRecGetRmid(record) == RM_XACT_ID &&
|
||||
(record_info == XLOG_XACT_COMMIT_COMPACT ||
|
||||
record_info == XLOG_XACT_COMMIT ||
|
||||
record_info == XLOG_XACT_COMMIT_PREPARED)))
|
||||
@@ -5696,7 +5703,7 @@ StartupXLOG(void)
|
||||
record = ReadCheckpointRecord(xlogreader, checkPointLoc, 0, true);
|
||||
if (record != NULL)
|
||||
{
|
||||
memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
|
||||
memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
|
||||
wasShutdown = (record->xl_info == XLOG_CHECKPOINT_SHUTDOWN);
|
||||
ereport(DEBUG1,
|
||||
(errmsg("checkpoint record is at %X/%X",
|
||||
@@ -5793,7 +5800,7 @@ StartupXLOG(void)
|
||||
ereport(PANIC,
|
||||
(errmsg("could not locate a valid checkpoint record")));
|
||||
}
|
||||
memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
|
||||
memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
|
||||
wasShutdown = (record->xl_info == XLOG_CHECKPOINT_SHUTDOWN);
|
||||
}
|
||||
|
||||
@@ -6230,9 +6237,9 @@ StartupXLOG(void)
|
||||
appendStringInfo(&buf, "REDO @ %X/%X; LSN %X/%X: ",
|
||||
(uint32) (ReadRecPtr >> 32), (uint32) ReadRecPtr,
|
||||
(uint32) (EndRecPtr >> 32), (uint32) EndRecPtr);
|
||||
xlog_outrec(&buf, record);
|
||||
xlog_outrec(&buf, xlogreader);
|
||||
appendStringInfoString(&buf, " - ");
|
||||
xlog_outdesc(&buf, record->xl_rmid, record);
|
||||
xlog_outdesc(&buf, xlogreader);
|
||||
elog(LOG, "%s", buf.data);
|
||||
pfree(buf.data);
|
||||
}
|
||||
@@ -6260,7 +6267,7 @@ StartupXLOG(void)
|
||||
/*
|
||||
* Have we reached our recovery target?
|
||||
*/
|
||||
if (recoveryStopsBefore(record))
|
||||
if (recoveryStopsBefore(xlogreader))
|
||||
{
|
||||
reachedStopPoint = true; /* see below */
|
||||
break;
|
||||
@@ -6270,7 +6277,7 @@ StartupXLOG(void)
|
||||
* If we've been asked to lag the master, wait on latch until
|
||||
* enough time has passed.
|
||||
*/
|
||||
if (recoveryApplyDelay(record))
|
||||
if (recoveryApplyDelay(xlogreader))
|
||||
{
|
||||
/*
|
||||
* We test for paused recovery again here. If user sets
|
||||
@@ -6285,7 +6292,7 @@ StartupXLOG(void)
|
||||
|
||||
/* Setup error traceback support for ereport() */
|
||||
errcallback.callback = rm_redo_error_callback;
|
||||
errcallback.arg = (void *) record;
|
||||
errcallback.arg = (void *) xlogreader;
|
||||
errcallback.previous = error_context_stack;
|
||||
error_context_stack = &errcallback;
|
||||
|
||||
@@ -6324,7 +6331,7 @@ StartupXLOG(void)
|
||||
{
|
||||
CheckPoint checkPoint;
|
||||
|
||||
memcpy(&checkPoint, XLogRecGetData(record), sizeof(CheckPoint));
|
||||
memcpy(&checkPoint, XLogRecGetData(xlogreader), sizeof(CheckPoint));
|
||||
newTLI = checkPoint.ThisTimeLineID;
|
||||
prevTLI = checkPoint.PrevTimeLineID;
|
||||
}
|
||||
@@ -6332,7 +6339,7 @@ StartupXLOG(void)
|
||||
{
|
||||
xl_end_of_recovery xlrec;
|
||||
|
||||
memcpy(&xlrec, XLogRecGetData(record), sizeof(xl_end_of_recovery));
|
||||
memcpy(&xlrec, XLogRecGetData(xlogreader), sizeof(xl_end_of_recovery));
|
||||
newTLI = xlrec.ThisTimeLineID;
|
||||
prevTLI = xlrec.PrevTimeLineID;
|
||||
}
|
||||
@@ -6366,7 +6373,7 @@ StartupXLOG(void)
|
||||
RecordKnownAssignedTransactionIds(record->xl_xid);
|
||||
|
||||
/* Now apply the WAL record itself */
|
||||
RmgrTable[record->xl_rmid].rm_redo(EndRecPtr, record);
|
||||
RmgrTable[record->xl_rmid].rm_redo(xlogreader);
|
||||
|
||||
/* Pop the error context stack */
|
||||
error_context_stack = errcallback.previous;
|
||||
@@ -6394,7 +6401,7 @@ StartupXLOG(void)
|
||||
WalSndWakeup();
|
||||
|
||||
/* Exit loop if we reached inclusive recovery target */
|
||||
if (recoveryStopsAfter(record))
|
||||
if (recoveryStopsAfter(xlogreader))
|
||||
{
|
||||
reachedStopPoint = true;
|
||||
break;
|
||||
@@ -7148,8 +7155,7 @@ ReadCheckpointRecord(XLogReaderState *xlogreader, XLogRecPtr RecPtr,
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
if (record->xl_len != sizeof(CheckPoint) ||
|
||||
record->xl_tot_len != SizeOfXLogRecord + sizeof(CheckPoint))
|
||||
if (record->xl_tot_len != SizeOfXLogRecord + SizeOfXLogRecordDataHeaderShort + sizeof(CheckPoint))
|
||||
{
|
||||
switch (whichChkpt)
|
||||
{
|
||||
@@ -7194,6 +7200,9 @@ InitXLOGAccess(void)
|
||||
(void) GetRedoRecPtr();
|
||||
/* Also update our copy of doPageWrites. */
|
||||
doPageWrites = (Insert->fullPageWrites || Insert->forcePageWrites);
|
||||
|
||||
/* Also initialize the working areas for constructing WAL records */
|
||||
InitXLogInsert();
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -7490,7 +7499,6 @@ CreateCheckPoint(int flags)
|
||||
CheckPoint checkPoint;
|
||||
XLogRecPtr recptr;
|
||||
XLogCtlInsert *Insert = &XLogCtl->Insert;
|
||||
XLogRecData rdata;
|
||||
uint32 freespace;
|
||||
XLogSegNo _logSegNo;
|
||||
XLogRecPtr curInsert;
|
||||
@@ -7760,15 +7768,11 @@ CreateCheckPoint(int flags)
|
||||
/*
|
||||
* Now insert the checkpoint record into XLOG.
|
||||
*/
|
||||
rdata.data = (char *) (&checkPoint);
|
||||
rdata.len = sizeof(checkPoint);
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.next = NULL;
|
||||
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) (&checkPoint), sizeof(checkPoint));
|
||||
recptr = XLogInsert(RM_XLOG_ID,
|
||||
shutdown ? XLOG_CHECKPOINT_SHUTDOWN :
|
||||
XLOG_CHECKPOINT_ONLINE,
|
||||
&rdata);
|
||||
XLOG_CHECKPOINT_ONLINE);
|
||||
|
||||
XLogFlush(recptr);
|
||||
|
||||
@@ -7908,7 +7912,6 @@ static void
|
||||
CreateEndOfRecoveryRecord(void)
|
||||
{
|
||||
xl_end_of_recovery xlrec;
|
||||
XLogRecData rdata;
|
||||
XLogRecPtr recptr;
|
||||
|
||||
/* sanity check */
|
||||
@@ -7926,12 +7929,9 @@ CreateEndOfRecoveryRecord(void)
|
||||
|
||||
START_CRIT_SECTION();
|
||||
|
||||
rdata.data = (char *) &xlrec;
|
||||
rdata.len = sizeof(xl_end_of_recovery);
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.next = NULL;
|
||||
|
||||
recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY, &rdata);
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &xlrec, sizeof(xl_end_of_recovery));
|
||||
recptr = XLogInsert(RM_XLOG_ID, XLOG_END_OF_RECOVERY);
|
||||
|
||||
XLogFlush(recptr);
|
||||
|
||||
@@ -8307,13 +8307,9 @@ KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo)
|
||||
void
|
||||
XLogPutNextOid(Oid nextOid)
|
||||
{
|
||||
XLogRecData rdata;
|
||||
|
||||
rdata.data = (char *) (&nextOid);
|
||||
rdata.len = sizeof(Oid);
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.next = NULL;
|
||||
(void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID, &rdata);
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) (&nextOid), sizeof(Oid));
|
||||
(void) XLogInsert(RM_XLOG_ID, XLOG_NEXTOID);
|
||||
|
||||
/*
|
||||
* We need not flush the NEXTOID record immediately, because any of the
|
||||
@@ -8349,15 +8345,10 @@ XLogRecPtr
|
||||
RequestXLogSwitch(void)
|
||||
{
|
||||
XLogRecPtr RecPtr;
|
||||
XLogRecData rdata;
|
||||
|
||||
/* XLOG SWITCH, alone among xlog record types, has no data */
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.data = NULL;
|
||||
rdata.len = 0;
|
||||
rdata.next = NULL;
|
||||
|
||||
RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH, &rdata);
|
||||
/* XLOG SWITCH has no data */
|
||||
XLogBeginInsert();
|
||||
RecPtr = XLogInsert(RM_XLOG_ID, XLOG_SWITCH);
|
||||
|
||||
return RecPtr;
|
||||
}
|
||||
@@ -8369,18 +8360,15 @@ XLogRecPtr
|
||||
XLogRestorePoint(const char *rpName)
|
||||
{
|
||||
XLogRecPtr RecPtr;
|
||||
XLogRecData rdata;
|
||||
xl_restore_point xlrec;
|
||||
|
||||
xlrec.rp_time = GetCurrentTimestamp();
|
||||
strlcpy(xlrec.rp_name, rpName, MAXFNAMELEN);
|
||||
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.data = (char *) &xlrec;
|
||||
rdata.len = sizeof(xl_restore_point);
|
||||
rdata.next = NULL;
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &xlrec, sizeof(xl_restore_point));
|
||||
|
||||
RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT, &rdata);
|
||||
RecPtr = XLogInsert(RM_XLOG_ID, XLOG_RESTORE_POINT);
|
||||
|
||||
ereport(LOG,
|
||||
(errmsg("restore point \"%s\" created at %X/%X",
|
||||
@@ -8412,7 +8400,6 @@ XLogReportParameters(void)
|
||||
*/
|
||||
if (wal_level != ControlFile->wal_level || XLogIsNeeded())
|
||||
{
|
||||
XLogRecData rdata;
|
||||
xl_parameter_change xlrec;
|
||||
XLogRecPtr recptr;
|
||||
|
||||
@@ -8423,12 +8410,10 @@ XLogReportParameters(void)
|
||||
xlrec.wal_level = wal_level;
|
||||
xlrec.wal_log_hints = wal_log_hints;
|
||||
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.data = (char *) &xlrec;
|
||||
rdata.len = sizeof(xlrec);
|
||||
rdata.next = NULL;
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) &xlrec, sizeof(xlrec));
|
||||
|
||||
recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE, &rdata);
|
||||
recptr = XLogInsert(RM_XLOG_ID, XLOG_PARAMETER_CHANGE);
|
||||
XLogFlush(recptr);
|
||||
}
|
||||
|
||||
@@ -8486,14 +8471,10 @@ UpdateFullPageWrites(void)
|
||||
*/
|
||||
if (XLogStandbyInfoActive() && !RecoveryInProgress())
|
||||
{
|
||||
XLogRecData rdata;
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) (&fullPageWrites), sizeof(bool));
|
||||
|
||||
rdata.data = (char *) (&fullPageWrites);
|
||||
rdata.len = sizeof(bool);
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.next = NULL;
|
||||
|
||||
XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE, &rdata);
|
||||
XLogInsert(RM_XLOG_ID, XLOG_FPW_CHANGE);
|
||||
}
|
||||
|
||||
if (!fullPageWrites)
|
||||
@@ -8558,12 +8539,13 @@ checkTimeLineSwitch(XLogRecPtr lsn, TimeLineID newTLI, TimeLineID prevTLI)
|
||||
* not all record types are related to control file updates.
|
||||
*/
|
||||
void
|
||||
xlog_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
xlog_redo(XLogReaderState *record)
|
||||
{
|
||||
uint8 info = record->xl_info & ~XLR_INFO_MASK;
|
||||
uint8 info = XLogRecGetInfo(record) & ~XLR_INFO_MASK;
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
|
||||
/* Backup blocks are not used by XLOG rmgr */
|
||||
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
|
||||
/* in XLOG rmgr, backup blocks are only used by XLOG_FPI records */
|
||||
Assert(!XLogRecHasAnyBlockRefs(record) || info == XLOG_FPI);
|
||||
|
||||
if (info == XLOG_NEXTOID)
|
||||
{
|
||||
@@ -8750,14 +8732,12 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
}
|
||||
else if (info == XLOG_FPI)
|
||||
{
|
||||
char *data;
|
||||
BkpBlock bkpb;
|
||||
Buffer buffer;
|
||||
|
||||
/*
|
||||
* Full-page image (FPI) records contain a backup block stored
|
||||
* "inline" in the normal data since the locking when writing hint
|
||||
* records isn't sufficient to use the normal backup block mechanism,
|
||||
* which assumes exclusive lock on the buffer supplied.
|
||||
* Full-page image (FPI) records contain nothing else but a backup
|
||||
* block. The block reference must include a full-page image -
|
||||
* otherwise there would be no point in this record.
|
||||
*
|
||||
* Since the only change in these backup block are hint bits, there
|
||||
* are no recovery conflicts generated.
|
||||
@@ -8766,11 +8746,9 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
* smgr implementation has no need to implement anything. Which means
|
||||
* nothing is needed in md.c etc
|
||||
*/
|
||||
data = XLogRecGetData(record);
|
||||
memcpy(&bkpb, data, sizeof(BkpBlock));
|
||||
data += sizeof(BkpBlock);
|
||||
|
||||
RestoreBackupBlockContents(lsn, bkpb, data, false, false);
|
||||
if (XLogReadBufferForRedo(record, 0, &buffer) != BLK_RESTORED)
|
||||
elog(ERROR, "unexpected XLogReadBufferForRedo result when restoring backup block");
|
||||
UnlockReleaseBuffer(buffer);
|
||||
}
|
||||
else if (info == XLOG_BACKUP_END)
|
||||
{
|
||||
@@ -8867,22 +8845,42 @@ xlog_redo(XLogRecPtr lsn, XLogRecord *record)
|
||||
#ifdef WAL_DEBUG
|
||||
|
||||
static void
|
||||
xlog_outrec(StringInfo buf, XLogRecord *record)
|
||||
xlog_outrec(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
int i;
|
||||
int block_id;
|
||||
|
||||
appendStringInfo(buf, "prev %X/%X; xid %u",
|
||||
(uint32) (record->xl_prev >> 32),
|
||||
(uint32) record->xl_prev,
|
||||
record->xl_xid);
|
||||
(uint32) (XLogRecGetPrev(record) >> 32),
|
||||
(uint32) XLogRecGetPrev(record),
|
||||
XLogRecGetXid(record));
|
||||
|
||||
appendStringInfo(buf, "; len %u",
|
||||
record->xl_len);
|
||||
XLogRecGetDataLen(record));
|
||||
|
||||
for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
|
||||
/* decode block references */
|
||||
for (block_id = 0; block_id <= record->max_block_id; block_id++)
|
||||
{
|
||||
if (record->xl_info & XLR_BKP_BLOCK(i))
|
||||
appendStringInfo(buf, "; bkpb%d", i);
|
||||
RelFileNode rnode;
|
||||
ForkNumber forknum;
|
||||
BlockNumber blk;
|
||||
|
||||
if (!XLogRecHasBlockRef(record, block_id))
|
||||
continue;
|
||||
|
||||
XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blk);
|
||||
if (forknum != MAIN_FORKNUM)
|
||||
appendStringInfo(buf, "; blkref #%u: rel %u/%u/%u, fork %u, blk %u",
|
||||
block_id,
|
||||
rnode.spcNode, rnode.dbNode, rnode.relNode,
|
||||
forknum,
|
||||
blk);
|
||||
else
|
||||
appendStringInfo(buf, "; blkref #%u: rel %u/%u/%u, blk %u",
|
||||
block_id,
|
||||
rnode.spcNode, rnode.dbNode, rnode.relNode,
|
||||
blk);
|
||||
if (XLogRecHasBlockImage(record, block_id))
|
||||
appendStringInfo(buf, " FPW");
|
||||
}
|
||||
}
|
||||
#endif /* WAL_DEBUG */
|
||||
@@ -8892,17 +8890,18 @@ xlog_outrec(StringInfo buf, XLogRecord *record)
|
||||
* optionally followed by a colon, a space, and a further description.
|
||||
*/
|
||||
static void
|
||||
xlog_outdesc(StringInfo buf, RmgrId rmid, XLogRecord *record)
|
||||
xlog_outdesc(StringInfo buf, XLogReaderState *record)
|
||||
{
|
||||
RmgrId rmid = XLogRecGetRmid(record);
|
||||
uint8 info = XLogRecGetInfo(record);
|
||||
const char *id;
|
||||
|
||||
appendStringInfoString(buf, RmgrTable[rmid].rm_name);
|
||||
appendStringInfoChar(buf, '/');
|
||||
|
||||
id = RmgrTable[rmid].rm_identify(record->xl_info);
|
||||
id = RmgrTable[rmid].rm_identify(info);
|
||||
if (id == NULL)
|
||||
appendStringInfo(buf, "UNKNOWN (%X): ",
|
||||
record->xl_info & ~XLR_INFO_MASK);
|
||||
appendStringInfo(buf, "UNKNOWN (%X): ", info & ~XLR_INFO_MASK);
|
||||
else
|
||||
appendStringInfo(buf, "%s: ", id);
|
||||
|
||||
@@ -9411,7 +9410,6 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
|
||||
XLogRecPtr startpoint;
|
||||
XLogRecPtr stoppoint;
|
||||
TimeLineID stoptli;
|
||||
XLogRecData rdata;
|
||||
pg_time_t stamp_time;
|
||||
char strfbuf[128];
|
||||
char histfilepath[MAXPGPATH];
|
||||
@@ -9618,11 +9616,9 @@ do_pg_stop_backup(char *labelfile, bool waitforarchive, TimeLineID *stoptli_p)
|
||||
/*
|
||||
* Write the backup-end xlog record
|
||||
*/
|
||||
rdata.data = (char *) (&startpoint);
|
||||
rdata.len = sizeof(startpoint);
|
||||
rdata.buffer = InvalidBuffer;
|
||||
rdata.next = NULL;
|
||||
stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END, &rdata);
|
||||
XLogBeginInsert();
|
||||
XLogRegisterData((char *) (&startpoint), sizeof(startpoint));
|
||||
stoppoint = XLogInsert(RM_XLOG_ID, XLOG_BACKUP_END);
|
||||
stoptli = ThisTimeLineID;
|
||||
|
||||
/*
|
||||
@@ -9930,15 +9926,13 @@ read_backup_label(XLogRecPtr *checkPointLoc, bool *backupEndRequired,
|
||||
static void
|
||||
rm_redo_error_callback(void *arg)
|
||||
{
|
||||
XLogRecord *record = (XLogRecord *) arg;
|
||||
XLogReaderState *record = (XLogReaderState *) arg;
|
||||
StringInfoData buf;
|
||||
|
||||
initStringInfo(&buf);
|
||||
xlog_outdesc(&buf, record->xl_rmid, record);
|
||||
xlog_outdesc(&buf, record);
|
||||
|
||||
/* don't bother emitting empty description */
|
||||
if (buf.len > 0)
|
||||
errcontext("xlog redo %s", buf.data);
|
||||
errcontext("xlog redo %s", buf.data);
|
||||
|
||||
pfree(buf.data);
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -37,6 +37,8 @@ report_invalid_record(XLogReaderState *state, const char *fmt,...)
|
||||
the supplied arguments. */
|
||||
__attribute__((format(PG_PRINTF_ATTRIBUTE, 2, 3)));
|
||||
|
||||
static void ResetDecoder(XLogReaderState *state);
|
||||
|
||||
/* size of the buffer allocated for error message. */
|
||||
#define MAX_ERRORMSG_LEN 1000
|
||||
|
||||
@@ -59,46 +61,33 @@ report_invalid_record(XLogReaderState *state, const char *fmt,...)
|
||||
/*
|
||||
* Allocate and initialize a new XLogReader.
|
||||
*
|
||||
* Returns NULL if the xlogreader couldn't be allocated.
|
||||
* The returned XLogReader is palloc'd. (In FRONTEND code, that means that
|
||||
* running out-of-memory causes an immediate exit(1).
|
||||
*/
|
||||
XLogReaderState *
|
||||
XLogReaderAllocate(XLogPageReadCB pagereadfunc, void *private_data)
|
||||
{
|
||||
XLogReaderState *state;
|
||||
|
||||
AssertArg(pagereadfunc != NULL);
|
||||
state = (XLogReaderState *) palloc0(sizeof(XLogReaderState));
|
||||
|
||||
state = (XLogReaderState *) malloc(sizeof(XLogReaderState));
|
||||
if (!state)
|
||||
return NULL;
|
||||
MemSet(state, 0, sizeof(XLogReaderState));
|
||||
state->max_block_id = -1;
|
||||
|
||||
/*
|
||||
* Permanently allocate readBuf. We do it this way, rather than just
|
||||
* making a static array, for two reasons: (1) no need to waste the
|
||||
* storage in most instantiations of the backend; (2) a static char array
|
||||
* isn't guaranteed to have any particular alignment, whereas malloc()
|
||||
* isn't guaranteed to have any particular alignment, whereas palloc()
|
||||
* will provide MAXALIGN'd storage.
|
||||
*/
|
||||
state->readBuf = (char *) malloc(XLOG_BLCKSZ);
|
||||
if (!state->readBuf)
|
||||
{
|
||||
free(state);
|
||||
return NULL;
|
||||
}
|
||||
state->readBuf = (char *) palloc(XLOG_BLCKSZ);
|
||||
|
||||
state->read_page = pagereadfunc;
|
||||
/* system_identifier initialized to zeroes above */
|
||||
state->private_data = private_data;
|
||||
/* ReadRecPtr and EndRecPtr initialized to zeroes above */
|
||||
/* readSegNo, readOff, readLen, readPageTLI initialized to zeroes above */
|
||||
state->errormsg_buf = malloc(MAX_ERRORMSG_LEN + 1);
|
||||
if (!state->errormsg_buf)
|
||||
{
|
||||
free(state->readBuf);
|
||||
free(state);
|
||||
return NULL;
|
||||
}
|
||||
state->errormsg_buf = palloc(MAX_ERRORMSG_LEN + 1);
|
||||
state->errormsg_buf[0] = '\0';
|
||||
|
||||
/*
|
||||
@@ -107,9 +96,9 @@ XLogReaderAllocate(XLogPageReadCB pagereadfunc, void *private_data)
|
||||
*/
|
||||
if (!allocate_recordbuf(state, 0))
|
||||
{
|
||||
free(state->errormsg_buf);
|
||||
free(state->readBuf);
|
||||
free(state);
|
||||
pfree(state->errormsg_buf);
|
||||
pfree(state->readBuf);
|
||||
pfree(state);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -119,11 +108,24 @@ XLogReaderAllocate(XLogPageReadCB pagereadfunc, void *private_data)
|
||||
void
|
||||
XLogReaderFree(XLogReaderState *state)
|
||||
{
|
||||
free(state->errormsg_buf);
|
||||
int block_id;
|
||||
|
||||
for (block_id = 0; block_id <= state->max_block_id; block_id++)
|
||||
{
|
||||
if (state->blocks[block_id].in_use)
|
||||
{
|
||||
if (state->blocks[block_id].data)
|
||||
pfree(state->blocks[block_id].data);
|
||||
}
|
||||
}
|
||||
if (state->main_data)
|
||||
pfree(state->main_data);
|
||||
|
||||
pfree(state->errormsg_buf);
|
||||
if (state->readRecordBuf)
|
||||
free(state->readRecordBuf);
|
||||
free(state->readBuf);
|
||||
free(state);
|
||||
pfree(state->readRecordBuf);
|
||||
pfree(state->readBuf);
|
||||
pfree(state);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -146,14 +148,8 @@ allocate_recordbuf(XLogReaderState *state, uint32 reclength)
|
||||
newSize = Max(newSize, 5 * Max(BLCKSZ, XLOG_BLCKSZ));
|
||||
|
||||
if (state->readRecordBuf)
|
||||
free(state->readRecordBuf);
|
||||
state->readRecordBuf = (char *) malloc(newSize);
|
||||
if (!state->readRecordBuf)
|
||||
{
|
||||
state->readRecordBufSize = 0;
|
||||
return false;
|
||||
}
|
||||
|
||||
pfree(state->readRecordBuf);
|
||||
state->readRecordBuf = (char *) palloc(newSize);
|
||||
state->readRecordBufSize = newSize;
|
||||
return true;
|
||||
}
|
||||
@@ -191,6 +187,8 @@ XLogReadRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg)
|
||||
*errormsg = NULL;
|
||||
state->errormsg_buf[0] = '\0';
|
||||
|
||||
ResetDecoder(state);
|
||||
|
||||
if (RecPtr == InvalidXLogRecPtr)
|
||||
{
|
||||
RecPtr = state->EndRecPtr;
|
||||
@@ -440,7 +438,10 @@ XLogReadRecord(XLogReaderState *state, XLogRecPtr RecPtr, char **errormsg)
|
||||
state->EndRecPtr -= state->EndRecPtr % XLogSegSize;
|
||||
}
|
||||
|
||||
return record;
|
||||
if (DecodeXLogRecord(state, record, errormsg))
|
||||
return record;
|
||||
else
|
||||
return NULL;
|
||||
|
||||
err:
|
||||
|
||||
@@ -579,30 +580,7 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
|
||||
XLogRecPtr PrevRecPtr, XLogRecord *record,
|
||||
bool randAccess)
|
||||
{
|
||||
/*
|
||||
* xl_len == 0 is bad data for everything except XLOG SWITCH, where it is
|
||||
* required.
|
||||
*/
|
||||
if (record->xl_rmid == RM_XLOG_ID && record->xl_info == XLOG_SWITCH)
|
||||
{
|
||||
if (record->xl_len != 0)
|
||||
{
|
||||
report_invalid_record(state,
|
||||
"invalid xlog switch record at %X/%X",
|
||||
(uint32) (RecPtr >> 32), (uint32) RecPtr);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else if (record->xl_len == 0)
|
||||
{
|
||||
report_invalid_record(state,
|
||||
"record with zero length at %X/%X",
|
||||
(uint32) (RecPtr >> 32), (uint32) RecPtr);
|
||||
return false;
|
||||
}
|
||||
if (record->xl_tot_len < SizeOfXLogRecord + record->xl_len ||
|
||||
record->xl_tot_len > SizeOfXLogRecord + record->xl_len +
|
||||
XLR_MAX_BKP_BLOCKS * (sizeof(BkpBlock) + BLCKSZ))
|
||||
if (record->xl_tot_len < SizeOfXLogRecord)
|
||||
{
|
||||
report_invalid_record(state,
|
||||
"invalid record length at %X/%X",
|
||||
@@ -663,79 +641,17 @@ ValidXLogRecordHeader(XLogReaderState *state, XLogRecPtr RecPtr,
|
||||
* We assume all of the record (that is, xl_tot_len bytes) has been read
|
||||
* into memory at *record. Also, ValidXLogRecordHeader() has accepted the
|
||||
* record's header, which means in particular that xl_tot_len is at least
|
||||
* SizeOfXlogRecord, so it is safe to fetch xl_len.
|
||||
* SizeOfXlogRecord.
|
||||
*/
|
||||
static bool
|
||||
ValidXLogRecord(XLogReaderState *state, XLogRecord *record, XLogRecPtr recptr)
|
||||
{
|
||||
pg_crc32 crc;
|
||||
int i;
|
||||
uint32 len = record->xl_len;
|
||||
BkpBlock bkpb;
|
||||
char *blk;
|
||||
size_t remaining = record->xl_tot_len;
|
||||
|
||||
/* First the rmgr data */
|
||||
if (remaining < SizeOfXLogRecord + len)
|
||||
{
|
||||
/* ValidXLogRecordHeader() should've caught this already... */
|
||||
report_invalid_record(state, "invalid record length at %X/%X",
|
||||
(uint32) (recptr >> 32), (uint32) recptr);
|
||||
return false;
|
||||
}
|
||||
remaining -= SizeOfXLogRecord + len;
|
||||
/* Calculate the CRC */
|
||||
INIT_CRC32C(crc);
|
||||
COMP_CRC32C(crc, XLogRecGetData(record), len);
|
||||
|
||||
/* Add in the backup blocks, if any */
|
||||
blk = (char *) XLogRecGetData(record) + len;
|
||||
for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
|
||||
{
|
||||
uint32 blen;
|
||||
|
||||
if (!(record->xl_info & XLR_BKP_BLOCK(i)))
|
||||
continue;
|
||||
|
||||
if (remaining < sizeof(BkpBlock))
|
||||
{
|
||||
report_invalid_record(state,
|
||||
"invalid backup block size in record at %X/%X",
|
||||
(uint32) (recptr >> 32), (uint32) recptr);
|
||||
return false;
|
||||
}
|
||||
memcpy(&bkpb, blk, sizeof(BkpBlock));
|
||||
|
||||
if (bkpb.hole_offset + bkpb.hole_length > BLCKSZ)
|
||||
{
|
||||
report_invalid_record(state,
|
||||
"incorrect hole size in record at %X/%X",
|
||||
(uint32) (recptr >> 32), (uint32) recptr);
|
||||
return false;
|
||||
}
|
||||
blen = sizeof(BkpBlock) + BLCKSZ - bkpb.hole_length;
|
||||
|
||||
if (remaining < blen)
|
||||
{
|
||||
report_invalid_record(state,
|
||||
"invalid backup block size in record at %X/%X",
|
||||
(uint32) (recptr >> 32), (uint32) recptr);
|
||||
return false;
|
||||
}
|
||||
remaining -= blen;
|
||||
COMP_CRC32C(crc, blk, blen);
|
||||
blk += blen;
|
||||
}
|
||||
|
||||
/* Check that xl_tot_len agrees with our calculation */
|
||||
if (remaining != 0)
|
||||
{
|
||||
report_invalid_record(state,
|
||||
"incorrect total length in record at %X/%X",
|
||||
(uint32) (recptr >> 32), (uint32) recptr);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Finally include the record header */
|
||||
COMP_CRC32C(crc, ((char *) record) + SizeOfXLogRecord, record->xl_tot_len - SizeOfXLogRecord);
|
||||
/* include the record header last */
|
||||
COMP_CRC32C(crc, (char *) record, offsetof(XLogRecord, xl_crc));
|
||||
FIN_CRC32C(crc);
|
||||
|
||||
@@ -985,3 +901,321 @@ out:
|
||||
}
|
||||
|
||||
#endif /* FRONTEND */
|
||||
|
||||
|
||||
/* ----------------------------------------
|
||||
* Functions for decoding the data and block references in a record.
|
||||
* ----------------------------------------
|
||||
*/
|
||||
|
||||
/* private function to reset the state between records */
|
||||
static void
|
||||
ResetDecoder(XLogReaderState *state)
|
||||
{
|
||||
int block_id;
|
||||
|
||||
state->decoded_record = NULL;
|
||||
|
||||
state->main_data_len = 0;
|
||||
|
||||
for (block_id = 0; block_id <= state->max_block_id; block_id++)
|
||||
{
|
||||
state->blocks[block_id].in_use = false;
|
||||
state->blocks[block_id].has_image = false;
|
||||
state->blocks[block_id].has_data = false;
|
||||
}
|
||||
state->max_block_id = -1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Decode the previously read record.
|
||||
*
|
||||
* On error, a human-readable error message is returned in *errormsg, and
|
||||
* the return value is false.
|
||||
*/
|
||||
bool
|
||||
DecodeXLogRecord(XLogReaderState *state, XLogRecord *record, char **errormsg)
|
||||
{
|
||||
/*
|
||||
* read next _size bytes from record buffer, but check for overrun first.
|
||||
*/
|
||||
#define COPY_HEADER_FIELD(_dst, _size) \
|
||||
do { \
|
||||
if (remaining < _size) \
|
||||
goto shortdata_err; \
|
||||
memcpy(_dst, ptr, _size); \
|
||||
ptr += _size; \
|
||||
remaining -= _size; \
|
||||
} while(0)
|
||||
|
||||
char *ptr;
|
||||
uint32 remaining;
|
||||
uint32 datatotal;
|
||||
RelFileNode *rnode = NULL;
|
||||
uint8 block_id;
|
||||
|
||||
ResetDecoder(state);
|
||||
|
||||
state->decoded_record = record;
|
||||
|
||||
ptr = (char *) record;
|
||||
ptr += SizeOfXLogRecord;
|
||||
remaining = record->xl_tot_len - SizeOfXLogRecord;
|
||||
|
||||
/* Decode the headers */
|
||||
datatotal = 0;
|
||||
while (remaining > datatotal)
|
||||
{
|
||||
COPY_HEADER_FIELD(&block_id, sizeof(uint8));
|
||||
|
||||
if (block_id == XLR_BLOCK_ID_DATA_SHORT)
|
||||
{
|
||||
/* XLogRecordDataHeaderShort */
|
||||
uint8 main_data_len;
|
||||
|
||||
COPY_HEADER_FIELD(&main_data_len, sizeof(uint8));
|
||||
|
||||
state->main_data_len = main_data_len;
|
||||
datatotal += main_data_len;
|
||||
break; /* by convention, the main data fragment is
|
||||
* always last */
|
||||
}
|
||||
else if (block_id == XLR_BLOCK_ID_DATA_LONG)
|
||||
{
|
||||
/* XLogRecordDataHeaderLong */
|
||||
uint32 main_data_len;
|
||||
|
||||
COPY_HEADER_FIELD(&main_data_len, sizeof(uint32));
|
||||
state->main_data_len = main_data_len;
|
||||
datatotal += main_data_len;
|
||||
break; /* by convention, the main data fragment is
|
||||
* always last */
|
||||
}
|
||||
else if (block_id <= XLR_MAX_BLOCK_ID)
|
||||
{
|
||||
/* XLogRecordBlockHeader */
|
||||
DecodedBkpBlock *blk;
|
||||
uint8 fork_flags;
|
||||
|
||||
if (block_id <= state->max_block_id)
|
||||
{
|
||||
report_invalid_record(state,
|
||||
"out-of-order block_id %u at %X/%X",
|
||||
block_id,
|
||||
(uint32) (state->ReadRecPtr >> 32),
|
||||
(uint32) state->ReadRecPtr);
|
||||
goto err;
|
||||
}
|
||||
state->max_block_id = block_id;
|
||||
|
||||
blk = &state->blocks[block_id];
|
||||
blk->in_use = true;
|
||||
|
||||
COPY_HEADER_FIELD(&fork_flags, sizeof(uint8));
|
||||
blk->forknum = fork_flags & BKPBLOCK_FORK_MASK;
|
||||
blk->flags = fork_flags;
|
||||
blk->has_image = ((fork_flags & BKPBLOCK_HAS_IMAGE) != 0);
|
||||
blk->has_data = ((fork_flags & BKPBLOCK_HAS_DATA) != 0);
|
||||
|
||||
COPY_HEADER_FIELD(&blk->data_len, sizeof(uint16));
|
||||
/* cross-check that the HAS_DATA flag is set iff data_length > 0 */
|
||||
if (blk->has_data && blk->data_len == 0)
|
||||
report_invalid_record(state,
|
||||
"BKPBLOCK_HAS_DATA set, but no data included at %X/%X",
|
||||
(uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
|
||||
if (!blk->has_data && blk->data_len != 0)
|
||||
report_invalid_record(state,
|
||||
"BKPBLOCK_HAS_DATA not set, but data length is %u at %X/%X",
|
||||
(unsigned int) blk->data_len,
|
||||
(uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
|
||||
datatotal += blk->data_len;
|
||||
|
||||
if (blk->has_image)
|
||||
{
|
||||
COPY_HEADER_FIELD(&blk->hole_offset, sizeof(uint16));
|
||||
COPY_HEADER_FIELD(&blk->hole_length, sizeof(uint16));
|
||||
datatotal += BLCKSZ - blk->hole_length;
|
||||
}
|
||||
if (!(fork_flags & BKPBLOCK_SAME_REL))
|
||||
{
|
||||
COPY_HEADER_FIELD(&blk->rnode, sizeof(RelFileNode));
|
||||
rnode = &blk->rnode;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rnode == NULL)
|
||||
{
|
||||
report_invalid_record(state,
|
||||
"BKPBLOCK_SAME_REL set but no previous rel at %X/%X",
|
||||
(uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
|
||||
goto err;
|
||||
}
|
||||
|
||||
blk->rnode = *rnode;
|
||||
}
|
||||
COPY_HEADER_FIELD(&blk->blkno, sizeof(BlockNumber));
|
||||
}
|
||||
else
|
||||
{
|
||||
report_invalid_record(state,
|
||||
"invalid block_id %u at %X/%X",
|
||||
block_id,
|
||||
(uint32) (state->ReadRecPtr >> 32),
|
||||
(uint32) state->ReadRecPtr);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
|
||||
if (remaining != datatotal)
|
||||
goto shortdata_err;
|
||||
|
||||
/*
|
||||
* Ok, we've parsed the fragment headers, and verified that the total
|
||||
* length of the payload in the fragments is equal to the amount of data
|
||||
* left. Copy the data of each fragment to a separate buffer.
|
||||
*
|
||||
* We could just set up pointers into readRecordBuf, but we want to align
|
||||
* the data for the convenience of the callers. Backup images are not
|
||||
* copied, however; they don't need alignment.
|
||||
*/
|
||||
|
||||
/* block data first */
|
||||
for (block_id = 0; block_id <= state->max_block_id; block_id++)
|
||||
{
|
||||
DecodedBkpBlock *blk = &state->blocks[block_id];
|
||||
|
||||
if (!blk->in_use)
|
||||
continue;
|
||||
if (blk->has_image)
|
||||
{
|
||||
blk->bkp_image = ptr;
|
||||
ptr += BLCKSZ - blk->hole_length;
|
||||
}
|
||||
if (blk->has_data)
|
||||
{
|
||||
if (!blk->data || blk->data_len > blk->data_bufsz)
|
||||
{
|
||||
if (blk->data)
|
||||
pfree(blk->data);
|
||||
blk->data_bufsz = blk->data_len;
|
||||
blk->data = palloc(blk->data_bufsz);
|
||||
}
|
||||
memcpy(blk->data, ptr, blk->data_len);
|
||||
ptr += blk->data_len;
|
||||
}
|
||||
}
|
||||
|
||||
/* and finally, the main data */
|
||||
if (state->main_data_len > 0)
|
||||
{
|
||||
if (!state->main_data || state->main_data_len > state->main_data_bufsz)
|
||||
{
|
||||
if (state->main_data)
|
||||
pfree(state->main_data);
|
||||
state->main_data_bufsz = state->main_data_len;
|
||||
state->main_data = palloc(state->main_data_bufsz);
|
||||
}
|
||||
memcpy(state->main_data, ptr, state->main_data_len);
|
||||
ptr += state->main_data_len;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
shortdata_err:
|
||||
report_invalid_record(state,
|
||||
"record with invalid length at %X/%X",
|
||||
(uint32) (state->ReadRecPtr >> 32), (uint32) state->ReadRecPtr);
|
||||
err:
|
||||
*errormsg = state->errormsg_buf;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns information about the block that a block reference refers to.
|
||||
*
|
||||
* If the WAL record contains a block reference with the given ID, *rnode,
|
||||
* *forknum, and *blknum are filled in (if not NULL), and returns TRUE.
|
||||
* Otherwise returns FALSE.
|
||||
*/
|
||||
bool
|
||||
XLogRecGetBlockTag(XLogReaderState *record, uint8 block_id,
|
||||
RelFileNode *rnode, ForkNumber *forknum, BlockNumber *blknum)
|
||||
{
|
||||
DecodedBkpBlock *bkpb;
|
||||
|
||||
if (!record->blocks[block_id].in_use)
|
||||
return false;
|
||||
|
||||
bkpb = &record->blocks[block_id];
|
||||
if (rnode)
|
||||
*rnode = bkpb->rnode;
|
||||
if (forknum)
|
||||
*forknum = bkpb->forknum;
|
||||
if (blknum)
|
||||
*blknum = bkpb->blkno;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the data associated with a block reference, or NULL if there is
|
||||
* no data (e.g. because a full-page image was taken instead). The returned
|
||||
* pointer points to a MAXALIGNed buffer.
|
||||
*/
|
||||
char *
|
||||
XLogRecGetBlockData(XLogReaderState *record, uint8 block_id, Size *len)
|
||||
{
|
||||
DecodedBkpBlock *bkpb;
|
||||
|
||||
if (!record->blocks[block_id].in_use)
|
||||
return NULL;
|
||||
|
||||
bkpb = &record->blocks[block_id];
|
||||
|
||||
if (!bkpb->has_data)
|
||||
{
|
||||
if (len)
|
||||
*len = 0;
|
||||
return NULL;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (len)
|
||||
*len = bkpb->data_len;
|
||||
return bkpb->data;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Restore a full-page image from a backup block attached to an XLOG record.
|
||||
*
|
||||
* Returns the buffer number containing the page.
|
||||
*/
|
||||
bool
|
||||
RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
|
||||
{
|
||||
DecodedBkpBlock *bkpb;
|
||||
|
||||
if (!record->blocks[block_id].in_use)
|
||||
return false;
|
||||
if (!record->blocks[block_id].has_image)
|
||||
return false;
|
||||
|
||||
bkpb = &record->blocks[block_id];
|
||||
|
||||
if (bkpb->hole_length == 0)
|
||||
{
|
||||
memcpy(page, bkpb->bkp_image, BLCKSZ);
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy(page, bkpb->bkp_image, bkpb->hole_offset);
|
||||
/* must zero-fill the hole */
|
||||
MemSet(page + bkpb->hole_offset, 0, bkpb->hole_length);
|
||||
memcpy(page + (bkpb->hole_offset + bkpb->hole_length),
|
||||
bkpb->bkp_image + bkpb->hole_offset,
|
||||
BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -253,9 +253,8 @@ XLogCheckInvalidPages(void)
|
||||
*
|
||||
* 'lsn' is the LSN of the record being replayed. It is compared with the
|
||||
* page's LSN to determine if the record has already been replayed.
|
||||
* 'rnode' and 'blkno' point to the block being replayed (main fork number
|
||||
* is implied, use XLogReadBufferForRedoExtended for other forks).
|
||||
* 'block_index' identifies the backup block in the record for the page.
|
||||
* 'block_id' is the ID number the block was registered with, when the WAL
|
||||
* record was created.
|
||||
*
|
||||
* Returns one of the following:
|
||||
*
|
||||
@@ -272,15 +271,36 @@ XLogCheckInvalidPages(void)
|
||||
* single-process crash recovery, but some subroutines such as MarkBufferDirty
|
||||
* will complain if we don't have the lock. In hot standby mode it's
|
||||
* definitely necessary.)
|
||||
*
|
||||
* Note: when a backup block is available in XLOG, we restore it
|
||||
* unconditionally, even if the page in the database appears newer. This is
|
||||
* to protect ourselves against database pages that were partially or
|
||||
* incorrectly written during a crash. We assume that the XLOG data must be
|
||||
* good because it has passed a CRC check, while the database page might not
|
||||
* be. This will force us to replay all subsequent modifications of the page
|
||||
* that appear in XLOG, rather than possibly ignoring them as already
|
||||
* applied, but that's not a huge drawback.
|
||||
*/
|
||||
XLogRedoAction
|
||||
XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record, int block_index,
|
||||
RelFileNode rnode, BlockNumber blkno,
|
||||
XLogReadBufferForRedo(XLogReaderState *record, uint8 block_id,
|
||||
Buffer *buf)
|
||||
{
|
||||
return XLogReadBufferForRedoExtended(lsn, record, block_index,
|
||||
rnode, MAIN_FORKNUM, blkno,
|
||||
RBM_NORMAL, false, buf);
|
||||
return XLogReadBufferForRedoExtended(record, block_id, RBM_NORMAL,
|
||||
false, buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Pin and lock a buffer referenced by a WAL record, for the purpose of
|
||||
* re-initializing it.
|
||||
*/
|
||||
Buffer
|
||||
XLogInitBufferForRedo(XLogReaderState *record, uint8 block_id)
|
||||
{
|
||||
Buffer buf;
|
||||
|
||||
XLogReadBufferForRedoExtended(record, block_id, RBM_ZERO_AND_LOCK, false,
|
||||
&buf);
|
||||
return buf;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -299,21 +319,54 @@ XLogReadBufferForRedo(XLogRecPtr lsn, XLogRecord *record, int block_index,
|
||||
* using LockBufferForCleanup(), instead of a regular exclusive lock.
|
||||
*/
|
||||
XLogRedoAction
|
||||
XLogReadBufferForRedoExtended(XLogRecPtr lsn, XLogRecord *record,
|
||||
int block_index, RelFileNode rnode,
|
||||
ForkNumber forkno, BlockNumber blkno,
|
||||
XLogReadBufferForRedoExtended(XLogReaderState *record,
|
||||
uint8 block_id,
|
||||
ReadBufferMode mode, bool get_cleanup_lock,
|
||||
Buffer *buf)
|
||||
{
|
||||
if (record->xl_info & XLR_BKP_BLOCK(block_index))
|
||||
XLogRecPtr lsn = record->EndRecPtr;
|
||||
RelFileNode rnode;
|
||||
ForkNumber forknum;
|
||||
BlockNumber blkno;
|
||||
Page page;
|
||||
|
||||
if (!XLogRecGetBlockTag(record, block_id, &rnode, &forknum, &blkno))
|
||||
{
|
||||
*buf = RestoreBackupBlock(lsn, record, block_index,
|
||||
get_cleanup_lock, true);
|
||||
/* Caller specified a bogus block_id */
|
||||
elog(PANIC, "failed to locate backup block with ID %d", block_id);
|
||||
}
|
||||
|
||||
/* If it's a full-page image, restore it. */
|
||||
if (XLogRecHasBlockImage(record, block_id))
|
||||
{
|
||||
*buf = XLogReadBufferExtended(rnode, forknum, blkno,
|
||||
get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK);
|
||||
page = BufferGetPage(*buf);
|
||||
if (!RestoreBlockImage(record, block_id, page))
|
||||
elog(ERROR, "failed to restore block image");
|
||||
|
||||
/*
|
||||
* The page may be uninitialized. If so, we can't set the LSN because
|
||||
* that would corrupt the page.
|
||||
*/
|
||||
if (!PageIsNew(page))
|
||||
{
|
||||
PageSetLSN(page, lsn);
|
||||
}
|
||||
|
||||
MarkBufferDirty(*buf);
|
||||
|
||||
return BLK_RESTORED;
|
||||
}
|
||||
else
|
||||
{
|
||||
*buf = XLogReadBufferExtended(rnode, forkno, blkno, mode);
|
||||
if ((record->blocks[block_id].flags & BKPBLOCK_WILL_INIT) != 0 &&
|
||||
mode != RBM_ZERO_AND_LOCK && mode != RBM_ZERO_AND_CLEANUP_LOCK)
|
||||
{
|
||||
elog(PANIC, "block with WILL_INIT flag in WAL record must be zeroed by redo routine");
|
||||
}
|
||||
|
||||
*buf = XLogReadBufferExtended(rnode, forknum, blkno, mode);
|
||||
if (BufferIsValid(*buf))
|
||||
{
|
||||
if (mode != RBM_ZERO_AND_LOCK && mode != RBM_ZERO_AND_CLEANUP_LOCK)
|
||||
@@ -333,37 +386,6 @@ XLogReadBufferForRedoExtended(XLogRecPtr lsn, XLogRecord *record,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* XLogReadBuffer
|
||||
* Read a page during XLOG replay.
|
||||
*
|
||||
* This is a shorthand of XLogReadBufferExtended() followed by
|
||||
* LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE), for reading from the main
|
||||
* fork.
|
||||
*
|
||||
* (Getting the buffer lock is not really necessary during single-process
|
||||
* crash recovery, but some subroutines such as MarkBufferDirty will complain
|
||||
* if we don't have the lock. In hot standby mode it's definitely necessary.)
|
||||
*
|
||||
* The returned buffer is exclusively-locked.
|
||||
*
|
||||
* For historical reasons, instead of a ReadBufferMode argument, this only
|
||||
* supports RBM_ZERO_AND_LOCK (init == true) and RBM_NORMAL (init == false)
|
||||
* modes.
|
||||
*/
|
||||
Buffer
|
||||
XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
|
||||
{
|
||||
Buffer buf;
|
||||
|
||||
buf = XLogReadBufferExtended(rnode, MAIN_FORKNUM, blkno,
|
||||
init ? RBM_ZERO_AND_LOCK : RBM_NORMAL);
|
||||
if (BufferIsValid(buf) && !init)
|
||||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||
|
||||
return buf;
|
||||
}
|
||||
|
||||
/*
|
||||
* XLogReadBufferExtended
|
||||
* Read a page during XLOG replay
|
||||
@@ -383,6 +405,11 @@ XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init)
|
||||
* In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't
|
||||
* exist, and we don't check for all-zeroes. Thus, no log entry is made
|
||||
* to imply that the page should be dropped or truncated later.
|
||||
*
|
||||
* NB: A redo function should normally not call this directly. To get a page
|
||||
* to modify, use XLogReplayBuffer instead. It is important that all pages
|
||||
* modified by a WAL record are registered in the WAL records, or they will be
|
||||
* invisible to tools that that need to know which pages are modified.
|
||||
*/
|
||||
Buffer
|
||||
XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
|
||||
@@ -473,124 +500,6 @@ XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/*
|
||||
* Restore a full-page image from a backup block attached to an XLOG record.
|
||||
*
|
||||
* lsn: LSN of the XLOG record being replayed
|
||||
* record: the complete XLOG record
|
||||
* block_index: which backup block to restore (0 .. XLR_MAX_BKP_BLOCKS - 1)
|
||||
* get_cleanup_lock: TRUE to get a cleanup rather than plain exclusive lock
|
||||
* keep_buffer: TRUE to return the buffer still locked and pinned
|
||||
*
|
||||
* Returns the buffer number containing the page. Note this is not terribly
|
||||
* useful unless keep_buffer is specified as TRUE.
|
||||
*
|
||||
* Note: when a backup block is available in XLOG, we restore it
|
||||
* unconditionally, even if the page in the database appears newer.
|
||||
* This is to protect ourselves against database pages that were partially
|
||||
* or incorrectly written during a crash. We assume that the XLOG data
|
||||
* must be good because it has passed a CRC check, while the database
|
||||
* page might not be. This will force us to replay all subsequent
|
||||
* modifications of the page that appear in XLOG, rather than possibly
|
||||
* ignoring them as already applied, but that's not a huge drawback.
|
||||
*
|
||||
* If 'get_cleanup_lock' is true, a cleanup lock is obtained on the buffer,
|
||||
* else a normal exclusive lock is used. During crash recovery, that's just
|
||||
* pro forma because there can't be any regular backends in the system, but
|
||||
* in hot standby mode the distinction is important.
|
||||
*
|
||||
* If 'keep_buffer' is true, return without releasing the buffer lock and pin;
|
||||
* then caller is responsible for doing UnlockReleaseBuffer() later. This
|
||||
* is needed in some cases when replaying XLOG records that touch multiple
|
||||
* pages, to prevent inconsistent states from being visible to other backends.
|
||||
* (Again, that's only important in hot standby mode.)
|
||||
*/
|
||||
Buffer
|
||||
RestoreBackupBlock(XLogRecPtr lsn, XLogRecord *record, int block_index,
|
||||
bool get_cleanup_lock, bool keep_buffer)
|
||||
{
|
||||
BkpBlock bkpb;
|
||||
char *blk;
|
||||
int i;
|
||||
|
||||
/* Locate requested BkpBlock in the record */
|
||||
blk = (char *) XLogRecGetData(record) + record->xl_len;
|
||||
for (i = 0; i < XLR_MAX_BKP_BLOCKS; i++)
|
||||
{
|
||||
if (!(record->xl_info & XLR_BKP_BLOCK(i)))
|
||||
continue;
|
||||
|
||||
memcpy(&bkpb, blk, sizeof(BkpBlock));
|
||||
blk += sizeof(BkpBlock);
|
||||
|
||||
if (i == block_index)
|
||||
{
|
||||
/* Found it, apply the update */
|
||||
return RestoreBackupBlockContents(lsn, bkpb, blk, get_cleanup_lock,
|
||||
keep_buffer);
|
||||
}
|
||||
|
||||
blk += BLCKSZ - bkpb.hole_length;
|
||||
}
|
||||
|
||||
/* Caller specified a bogus block_index */
|
||||
elog(ERROR, "failed to restore block_index %d", block_index);
|
||||
return InvalidBuffer; /* keep compiler quiet */
|
||||
}
|
||||
|
||||
/*
|
||||
* Workhorse for RestoreBackupBlock usable without an xlog record
|
||||
*
|
||||
* Restores a full-page image from BkpBlock and a data pointer.
|
||||
*/
|
||||
Buffer
|
||||
RestoreBackupBlockContents(XLogRecPtr lsn, BkpBlock bkpb, char *blk,
|
||||
bool get_cleanup_lock, bool keep_buffer)
|
||||
{
|
||||
Buffer buffer;
|
||||
Page page;
|
||||
|
||||
buffer = XLogReadBufferExtended(bkpb.node, bkpb.fork, bkpb.block,
|
||||
get_cleanup_lock ? RBM_ZERO_AND_CLEANUP_LOCK : RBM_ZERO_AND_LOCK);
|
||||
Assert(BufferIsValid(buffer));
|
||||
|
||||
page = (Page) BufferGetPage(buffer);
|
||||
|
||||
if (bkpb.hole_length == 0)
|
||||
{
|
||||
memcpy((char *) page, blk, BLCKSZ);
|
||||
}
|
||||
else
|
||||
{
|
||||
memcpy((char *) page, blk, bkpb.hole_offset);
|
||||
/* must zero-fill the hole */
|
||||
MemSet((char *) page + bkpb.hole_offset, 0, bkpb.hole_length);
|
||||
memcpy((char *) page + (bkpb.hole_offset + bkpb.hole_length),
|
||||
blk + bkpb.hole_offset,
|
||||
BLCKSZ - (bkpb.hole_offset + bkpb.hole_length));
|
||||
}
|
||||
|
||||
/*
|
||||
* The checksum value on this page is currently invalid. We don't need to
|
||||
* reset it here since it will be set before being written.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The page may be uninitialized. If so, we can't set the LSN because that
|
||||
* would corrupt the page.
|
||||
*/
|
||||
if (!PageIsNew(page))
|
||||
{
|
||||
PageSetLSN(page, lsn);
|
||||
}
|
||||
MarkBufferDirty(buffer);
|
||||
|
||||
if (!keep_buffer)
|
||||
UnlockReleaseBuffer(buffer);
|
||||
|
||||
return buffer;
|
||||
}
|
||||
|
||||
/*
|
||||
* Struct actually returned by XLogFakeRelcacheEntry, though the declared
|
||||
* return type is Relation.
|
||||
|
||||
Reference in New Issue
Block a user