1
0
mirror of https://github.com/postgres/postgres.git synced 2025-06-30 21:42:05 +03:00

Delete empty pages during GiST VACUUM.

To do this, we scan GiST two times. In the first pass we make note of
empty leaf pages and internal pages. At second pass we scan through
internal pages, looking for downlinks to the empty pages.

Deleting internal pages is still not supported, like in nbtree, the last
child of an internal page is never deleted. That means that if you have a
workload where new keys are always inserted to different area than where
old keys are removed, the index will still grow without bound. But the rate
of growth will be an order of magnitude slower than before.

Author: Andrey Borodin
Discussion: https://www.postgresql.org/message-id/B1E4DF12-6CD3-4706-BDBD-BF3283328F60@yandex-team.ru
This commit is contained in:
Heikki Linnakangas
2019-03-22 13:21:20 +02:00
parent df816f6ad5
commit 7df159a620
11 changed files with 626 additions and 49 deletions

View File

@ -23,6 +23,7 @@
#include "miscadmin.h"
#include "storage/procarray.h"
#include "utils/memutils.h"
#include "utils/rel.h"
static MemoryContext opCtx; /* working memory for operations */
@ -508,6 +509,64 @@ gistRedoCreateIndex(XLogReaderState *record)
UnlockReleaseBuffer(buffer);
}
/* redo page deletion */
static void
gistRedoPageDelete(XLogReaderState *record)
{
XLogRecPtr lsn = record->EndRecPtr;
gistxlogPageDelete *xldata = (gistxlogPageDelete *) XLogRecGetData(record);
Buffer parentBuffer;
Buffer leafBuffer;
if (XLogReadBufferForRedo(record, 0, &leafBuffer) == BLK_NEEDS_REDO)
{
Page page = (Page) BufferGetPage(leafBuffer);
GistPageSetDeleteXid(page, xldata->deleteXid);
GistPageSetDeleted(page);
PageSetLSN(page, lsn);
MarkBufferDirty(leafBuffer);
}
if (XLogReadBufferForRedo(record, 1, &parentBuffer) == BLK_NEEDS_REDO)
{
Page page = (Page) BufferGetPage(parentBuffer);
PageIndexTupleDelete(page, xldata->downlinkOffset);
PageSetLSN(page, lsn);
MarkBufferDirty(parentBuffer);
}
if (BufferIsValid(parentBuffer))
UnlockReleaseBuffer(parentBuffer);
if (BufferIsValid(leafBuffer))
UnlockReleaseBuffer(leafBuffer);
}
static void
gistRedoPageReuse(XLogReaderState *record)
{
gistxlogPageReuse *xlrec = (gistxlogPageReuse *) XLogRecGetData(record);
/*
* PAGE_REUSE records exist to provide a conflict point when we reuse
* pages in the index via the FSM. That's all they do though.
*
* latestRemovedXid was the page's deleteXid. The deleteXid <
* RecentGlobalXmin test in gistPageRecyclable() conceptually mirrors the
* pgxact->xmin > limitXmin test in GetConflictingVirtualXIDs().
* Consequently, one XID value achieves the same exclusion effect on
* master and standby.
*/
if (InHotStandby)
{
ResolveRecoveryConflictWithSnapshot(xlrec->latestRemovedXid,
xlrec->node);
}
}
void
gist_redo(XLogReaderState *record)
{
@ -529,12 +588,18 @@ gist_redo(XLogReaderState *record)
case XLOG_GIST_DELETE:
gistRedoDeleteRecord(record);
break;
case XLOG_GIST_PAGE_REUSE:
gistRedoPageReuse(record);
break;
case XLOG_GIST_PAGE_SPLIT:
gistRedoPageSplitRecord(record);
break;
case XLOG_GIST_CREATE_INDEX:
gistRedoCreateIndex(record);
break;
case XLOG_GIST_PAGE_DELETE:
gistRedoPageDelete(record);
break;
default:
elog(PANIC, "gist_redo: unknown op code %u", info);
}
@ -653,6 +718,56 @@ gistXLogSplit(bool page_is_leaf,
return recptr;
}
/*
* Write XLOG record describing a page deletion. This also includes removal of
* downlink from the parent page.
*/
XLogRecPtr
gistXLogPageDelete(Buffer buffer, TransactionId xid,
Buffer parentBuffer, OffsetNumber downlinkOffset)
{
gistxlogPageDelete xlrec;
XLogRecPtr recptr;
xlrec.deleteXid = xid;
xlrec.downlinkOffset = downlinkOffset;
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, SizeOfGistxlogPageDelete);
XLogRegisterBuffer(0, buffer, REGBUF_STANDARD);
XLogRegisterBuffer(1, parentBuffer, REGBUF_STANDARD);
recptr = XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_DELETE);
return recptr;
}
/*
* Write XLOG record about reuse of a deleted page.
*/
void
gistXLogPageReuse(Relation rel, BlockNumber blkno, TransactionId latestRemovedXid)
{
gistxlogPageReuse xlrec_reuse;
/*
* Note that we don't register the buffer with the record, because this
* operation doesn't modify the page. This record only exists to provide a
* conflict point for Hot Standby.
*/
/* XLOG stuff */
xlrec_reuse.node = rel->rd_node;
xlrec_reuse.block = blkno;
xlrec_reuse.latestRemovedXid = latestRemovedXid;
XLogBeginInsert();
XLogRegisterData((char *) &xlrec_reuse, SizeOfGistxlogPageReuse);
XLogInsert(RM_GIST_ID, XLOG_GIST_PAGE_REUSE);
}
/*
* Write XLOG record describing a page update. The update can include any
* number of deletions and/or insertions of tuples on a single index page.