mirror of
https://github.com/postgres/postgres.git
synced 2025-04-24 10:47:04 +03:00
Adjustments to the btree fastpath optimization.
This optimization was introduced in commit 2b272734. The changes include some additional comments and documentation, and also these more substantive changes: . ensure the optimization is only applied on the leaf node of a tree whose root is on level 2 or more. It's of little value on small trees. . Delay calling RelationSetTargetBlock() until after the critical section of _bt_insertonpg . ensure the optimization is also applied to unlogged tables. Pavan Deolasee and Peter Geoghegan with some very light editing from me. Discussion: https://postgr.es/m/CABOikdO8jhRarNC60nZLktZYhxt+TK8z_V97+Ny499YQdyAfug@mail.gmail.com
This commit is contained in:
parent
31f1f0bb4f
commit
074251db67
@ -375,6 +375,25 @@ positives, so long as it never gives a false negative. This makes it
|
|||||||
possible to implement the test with a small counter value stored on each
|
possible to implement the test with a small counter value stored on each
|
||||||
index page.
|
index page.
|
||||||
|
|
||||||
|
Fastpath For Index Insertion
|
||||||
|
----------------------------
|
||||||
|
|
||||||
|
We optimize for a common case of insertion of increasing index key
|
||||||
|
values by caching the last page to which this backend inserted the last
|
||||||
|
value, if this page was the rightmost leaf page. For the next insert, we
|
||||||
|
can then quickly check if the cached page is still the rightmost leaf
|
||||||
|
page and also the correct place to hold the current value. We can avoid
|
||||||
|
the cost of walking down the tree in such common cases.
|
||||||
|
|
||||||
|
The optimization works on the assumption that there can only be one
|
||||||
|
non-ignorable leaf rightmost page, and so even a RecentGlobalXmin style
|
||||||
|
interlock isn't required. We cannot fail to detect that our hint was
|
||||||
|
invalidated, because there can only be one such page in the B-Tree at
|
||||||
|
any time. It's possible that the page will be deleted and recycled
|
||||||
|
without a backend's cached page also being detected as invalidated, but
|
||||||
|
only when we happen to recycle a block that once again gets recycled as the
|
||||||
|
rightmost leaf page.
|
||||||
|
|
||||||
On-the-Fly Deletion Of Index Tuples
|
On-the-Fly Deletion Of Index Tuples
|
||||||
-----------------------------------
|
-----------------------------------
|
||||||
|
|
||||||
|
@ -26,6 +26,8 @@
|
|||||||
#include "storage/smgr.h"
|
#include "storage/smgr.h"
|
||||||
#include "utils/tqual.h"
|
#include "utils/tqual.h"
|
||||||
|
|
||||||
|
/* Minimum tree height for application of fastpath optimization */
|
||||||
|
#define BTREE_FASTPATH_MIN_LEVEL 2
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
@ -125,7 +127,7 @@ _bt_doinsert(Relation rel, IndexTuple itup,
|
|||||||
/*
|
/*
|
||||||
* It's very common to have an index on an auto-incremented or
|
* It's very common to have an index on an auto-incremented or
|
||||||
* monotonically increasing value. In such cases, every insertion happens
|
* monotonically increasing value. In such cases, every insertion happens
|
||||||
* towards the end of the index. We try to optimise that case by caching
|
* towards the end of the index. We try to optimize that case by caching
|
||||||
* the right-most leaf of the index. If our cached block is still the
|
* the right-most leaf of the index. If our cached block is still the
|
||||||
* rightmost leaf, has enough free space to accommodate a new entry and
|
* rightmost leaf, has enough free space to accommodate a new entry and
|
||||||
* the insertion key is strictly greater than the first key in this page,
|
* the insertion key is strictly greater than the first key in this page,
|
||||||
@ -176,13 +178,17 @@ top:
|
|||||||
* the first key on the page.
|
* the first key on the page.
|
||||||
*/
|
*/
|
||||||
if (P_ISLEAF(lpageop) && P_RIGHTMOST(lpageop) &&
|
if (P_ISLEAF(lpageop) && P_RIGHTMOST(lpageop) &&
|
||||||
!P_INCOMPLETE_SPLIT(lpageop) &&
|
|
||||||
!P_IGNORE(lpageop) &&
|
!P_IGNORE(lpageop) &&
|
||||||
(PageGetFreeSpace(page) > itemsz) &&
|
(PageGetFreeSpace(page) > itemsz) &&
|
||||||
PageGetMaxOffsetNumber(page) >= P_FIRSTDATAKEY(lpageop) &&
|
PageGetMaxOffsetNumber(page) >= P_FIRSTDATAKEY(lpageop) &&
|
||||||
_bt_compare(rel, indnkeyatts, itup_scankey, page,
|
_bt_compare(rel, indnkeyatts, itup_scankey, page,
|
||||||
P_FIRSTDATAKEY(lpageop)) > 0)
|
P_FIRSTDATAKEY(lpageop)) > 0)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* The right-most block should never have incomplete split. But
|
||||||
|
* be paranoid and check for it anyway.
|
||||||
|
*/
|
||||||
|
Assert(!P_INCOMPLETE_SPLIT(lpageop));
|
||||||
fastpath = true;
|
fastpath = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -868,6 +874,24 @@ _bt_insertonpg(Relation rel,
|
|||||||
bool newitemonleft;
|
bool newitemonleft;
|
||||||
Buffer rbuf;
|
Buffer rbuf;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we're here then a pagesplit is needed. We should never reach here
|
||||||
|
* if we're using the fastpath since we should have checked for all the
|
||||||
|
* required conditions, including the fact that this page has enough
|
||||||
|
* freespace. Note that this routine can in theory deal with the
|
||||||
|
* situation where a NULL stack pointer is passed (that's what would
|
||||||
|
* happen if the fastpath is taken), like it does during crash
|
||||||
|
* recovery. But that path is much slower, defeating the very purpose
|
||||||
|
* of the optimization. The following assertion should protect us from
|
||||||
|
* any future code changes that invalidate those assumptions.
|
||||||
|
*
|
||||||
|
* Note that whenever we fail to take the fastpath, we clear the
|
||||||
|
* cached block. Checking for a valid cached block at this point is
|
||||||
|
* enough to decide whether we're in a fastpath or not.
|
||||||
|
*/
|
||||||
|
Assert(!(P_ISLEAF(lpageop) &&
|
||||||
|
BlockNumberIsValid(RelationGetTargetBlock(rel))));
|
||||||
|
|
||||||
/* Choose the split point */
|
/* Choose the split point */
|
||||||
firstright = _bt_findsplitloc(rel, page,
|
firstright = _bt_findsplitloc(rel, page,
|
||||||
newitemoff, itemsz,
|
newitemoff, itemsz,
|
||||||
@ -905,6 +929,7 @@ _bt_insertonpg(Relation rel,
|
|||||||
BTMetaPageData *metad = NULL;
|
BTMetaPageData *metad = NULL;
|
||||||
OffsetNumber itup_off;
|
OffsetNumber itup_off;
|
||||||
BlockNumber itup_blkno;
|
BlockNumber itup_blkno;
|
||||||
|
BlockNumber cachedBlock = InvalidBlockNumber;
|
||||||
|
|
||||||
itup_off = newitemoff;
|
itup_off = newitemoff;
|
||||||
itup_blkno = BufferGetBlockNumber(buf);
|
itup_blkno = BufferGetBlockNumber(buf);
|
||||||
@ -962,6 +987,15 @@ _bt_insertonpg(Relation rel,
|
|||||||
MarkBufferDirty(cbuf);
|
MarkBufferDirty(cbuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Cache the block information if we just inserted into the rightmost
|
||||||
|
* leaf page of the index and it's not the root page. For very small
|
||||||
|
* index where root is also the leaf, there is no point trying for any
|
||||||
|
* optimization.
|
||||||
|
*/
|
||||||
|
if (P_RIGHTMOST(lpageop) && P_ISLEAF(lpageop) && !P_ISROOT(lpageop))
|
||||||
|
cachedBlock = BufferGetBlockNumber(buf);
|
||||||
|
|
||||||
/* XLOG stuff */
|
/* XLOG stuff */
|
||||||
if (RelationNeedsWAL(rel))
|
if (RelationNeedsWAL(rel))
|
||||||
{
|
{
|
||||||
@ -977,16 +1011,7 @@ _bt_insertonpg(Relation rel,
|
|||||||
XLogRegisterData((char *) &xlrec, SizeOfBtreeInsert);
|
XLogRegisterData((char *) &xlrec, SizeOfBtreeInsert);
|
||||||
|
|
||||||
if (P_ISLEAF(lpageop))
|
if (P_ISLEAF(lpageop))
|
||||||
{
|
|
||||||
xlinfo = XLOG_BTREE_INSERT_LEAF;
|
xlinfo = XLOG_BTREE_INSERT_LEAF;
|
||||||
|
|
||||||
/*
|
|
||||||
* Cache the block information if we just inserted into the
|
|
||||||
* rightmost leaf page of the index.
|
|
||||||
*/
|
|
||||||
if (P_RIGHTMOST(lpageop))
|
|
||||||
RelationSetTargetBlock(rel, BufferGetBlockNumber(buf));
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
@ -1048,6 +1073,22 @@ _bt_insertonpg(Relation rel,
|
|||||||
if (BufferIsValid(cbuf))
|
if (BufferIsValid(cbuf))
|
||||||
_bt_relbuf(rel, cbuf);
|
_bt_relbuf(rel, cbuf);
|
||||||
_bt_relbuf(rel, buf);
|
_bt_relbuf(rel, buf);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If we decided to cache the insertion target block, then set it now.
|
||||||
|
* But before that, check for the height of the tree and don't go for
|
||||||
|
* the optimization for small indexes. We defer that check to this
|
||||||
|
* point to ensure that we don't call _bt_getrootheight while holding
|
||||||
|
* lock on any other block.
|
||||||
|
*
|
||||||
|
* We do this after dropping locks on all buffers. So the information
|
||||||
|
* about whether the insertion block is still the rightmost block or
|
||||||
|
* not may have changed in between. But we will deal with that during
|
||||||
|
* next insert operation. No special care is required while setting it.
|
||||||
|
*/
|
||||||
|
if (BlockNumberIsValid(cachedBlock) &&
|
||||||
|
_bt_getrootheight(rel) >= BTREE_FASTPATH_MIN_LEVEL)
|
||||||
|
RelationSetTargetBlock(rel, cachedBlock);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user