mirror of
https://github.com/postgres/postgres.git
synced 2025-05-05 09:19:17 +03:00
Fix sloppiness about alignment requirements in findsplitloc() space
calculation, also make it stop when it has a 'good enough' split instead of exhaustively trying all split points.
This commit is contained in:
parent
2f011a9c72
commit
1ea912e16d
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.60 2000/07/21 06:42:32 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.61 2000/07/21 19:21:00 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -377,6 +377,10 @@ _bt_insertonpg(Relation rel,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Do we need to split the page to fit the item on it?
|
* Do we need to split the page to fit the item on it?
|
||||||
|
*
|
||||||
|
* Note: PageGetFreeSpace() subtracts sizeof(ItemIdData) from its
|
||||||
|
* result, so this comparison is correct even though we appear to
|
||||||
|
* be accounting only for the item and not for its line pointer.
|
||||||
*/
|
*/
|
||||||
if (PageGetFreeSpace(page) < itemsz)
|
if (PageGetFreeSpace(page) < itemsz)
|
||||||
{
|
{
|
||||||
@ -743,11 +747,14 @@ _bt_findsplitloc(Relation rel,
|
|||||||
FindSplitData state;
|
FindSplitData state;
|
||||||
int leftspace,
|
int leftspace,
|
||||||
rightspace,
|
rightspace,
|
||||||
|
goodenough,
|
||||||
dataitemtotal,
|
dataitemtotal,
|
||||||
dataitemstoleft;
|
dataitemstoleft;
|
||||||
|
|
||||||
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
opaque = (BTPageOpaque) PageGetSpecialPointer(page);
|
||||||
|
|
||||||
|
/* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
|
||||||
|
newitemsz += sizeof(ItemIdData);
|
||||||
state.newitemsz = newitemsz;
|
state.newitemsz = newitemsz;
|
||||||
state.non_leaf = ! P_ISLEAF(opaque);
|
state.non_leaf = ! P_ISLEAF(opaque);
|
||||||
state.have_split = false;
|
state.have_split = false;
|
||||||
@ -758,11 +765,23 @@ _bt_findsplitloc(Relation rel,
|
|||||||
MAXALIGN(sizeof(BTPageOpaqueData))
|
MAXALIGN(sizeof(BTPageOpaqueData))
|
||||||
+ sizeof(ItemIdData);
|
+ sizeof(ItemIdData);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Finding the best possible split would require checking all the possible
|
||||||
|
* split points, because of the high-key and left-key special cases.
|
||||||
|
* That's probably more work than it's worth; instead, stop as soon as
|
||||||
|
* we find a "good-enough" split, where good-enough is defined as an
|
||||||
|
* imbalance in free space of no more than pagesize/16 (arbitrary...)
|
||||||
|
* This should let us stop near the middle on most pages, instead of
|
||||||
|
* plowing to the end.
|
||||||
|
*/
|
||||||
|
goodenough = leftspace / 16;
|
||||||
|
|
||||||
/* The right page will have the same high key as the old page */
|
/* The right page will have the same high key as the old page */
|
||||||
if (!P_RIGHTMOST(opaque))
|
if (!P_RIGHTMOST(opaque))
|
||||||
{
|
{
|
||||||
itemid = PageGetItemId(page, P_HIKEY);
|
itemid = PageGetItemId(page, P_HIKEY);
|
||||||
rightspace -= (int) (ItemIdGetLength(itemid) + sizeof(ItemIdData));
|
rightspace -= (int) (MAXALIGN(ItemIdGetLength(itemid)) +
|
||||||
|
sizeof(ItemIdData));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Count up total space in data items without actually scanning 'em */
|
/* Count up total space in data items without actually scanning 'em */
|
||||||
@ -770,8 +789,7 @@ _bt_findsplitloc(Relation rel,
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Scan through the data items and calculate space usage for a split
|
* Scan through the data items and calculate space usage for a split
|
||||||
* at each possible position. XXX we could probably stop somewhere
|
* at each possible position.
|
||||||
* near the middle...
|
|
||||||
*/
|
*/
|
||||||
dataitemstoleft = 0;
|
dataitemstoleft = 0;
|
||||||
maxoff = PageGetMaxOffsetNumber(page);
|
maxoff = PageGetMaxOffsetNumber(page);
|
||||||
@ -785,44 +803,65 @@ _bt_findsplitloc(Relation rel,
|
|||||||
rightfree;
|
rightfree;
|
||||||
|
|
||||||
itemid = PageGetItemId(page, offnum);
|
itemid = PageGetItemId(page, offnum);
|
||||||
itemsz = ItemIdGetLength(itemid) + sizeof(ItemIdData);
|
itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We have to allow for the current item becoming the high key of
|
* We have to allow for the current item becoming the high key of
|
||||||
* the left page; therefore it counts against left space.
|
* the left page; therefore it counts against left space as well
|
||||||
|
* as right space.
|
||||||
*/
|
*/
|
||||||
leftfree = leftspace - dataitemstoleft - (int) itemsz;
|
leftfree = leftspace - dataitemstoleft - (int) itemsz;
|
||||||
rightfree = rightspace - (dataitemtotal - dataitemstoleft);
|
rightfree = rightspace - (dataitemtotal - dataitemstoleft);
|
||||||
if (offnum < newitemoff)
|
/*
|
||||||
|
* Will the new item go to left or right of split?
|
||||||
|
*/
|
||||||
|
if (offnum > newitemoff)
|
||||||
|
_bt_checksplitloc(&state, offnum, leftfree, rightfree,
|
||||||
|
true, itemsz);
|
||||||
|
else if (offnum < newitemoff)
|
||||||
_bt_checksplitloc(&state, offnum, leftfree, rightfree,
|
_bt_checksplitloc(&state, offnum, leftfree, rightfree,
|
||||||
false, itemsz);
|
false, itemsz);
|
||||||
else if (offnum > newitemoff)
|
|
||||||
_bt_checksplitloc(&state, offnum, leftfree, rightfree,
|
|
||||||
true, itemsz);
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/* need to try it both ways!! */
|
/* need to try it both ways! */
|
||||||
_bt_checksplitloc(&state, offnum, leftfree, rightfree,
|
|
||||||
false, newitemsz);
|
|
||||||
_bt_checksplitloc(&state, offnum, leftfree, rightfree,
|
_bt_checksplitloc(&state, offnum, leftfree, rightfree,
|
||||||
true, itemsz);
|
true, itemsz);
|
||||||
|
/* here we are contemplating newitem as first on right */
|
||||||
|
_bt_checksplitloc(&state, offnum, leftfree, rightfree,
|
||||||
|
false, newitemsz);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Abort scan once we find a good-enough choice */
|
||||||
|
if (state.have_split && state.best_delta <= goodenough)
|
||||||
|
break;
|
||||||
|
|
||||||
dataitemstoleft += itemsz;
|
dataitemstoleft += itemsz;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* I believe it is not possible to fail to find a feasible split,
|
||||||
|
* but just in case ...
|
||||||
|
*/
|
||||||
if (! state.have_split)
|
if (! state.have_split)
|
||||||
elog(FATAL, "_bt_findsplitloc: can't find a feasible split point for %s",
|
elog(FATAL, "_bt_findsplitloc: can't find a feasible split point for %s",
|
||||||
RelationGetRelationName(rel));
|
RelationGetRelationName(rel));
|
||||||
|
|
||||||
*newitemonleft = state.newitemonleft;
|
*newitemonleft = state.newitemonleft;
|
||||||
return state.firstright;
|
return state.firstright;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Subroutine to analyze a particular possible split choice (ie, firstright
|
||||||
|
* and newitemonleft settings), and record the best split so far in *state.
|
||||||
|
*/
|
||||||
static void
|
static void
|
||||||
_bt_checksplitloc(FindSplitData *state, OffsetNumber firstright,
|
_bt_checksplitloc(FindSplitData *state, OffsetNumber firstright,
|
||||||
int leftfree, int rightfree,
|
int leftfree, int rightfree,
|
||||||
bool newitemonleft, Size firstrightitemsz)
|
bool newitemonleft, Size firstrightitemsz)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* Account for the new item on whichever side it is to be put.
|
||||||
|
*/
|
||||||
if (newitemonleft)
|
if (newitemonleft)
|
||||||
leftfree -= (int) state->newitemsz;
|
leftfree -= (int) state->newitemsz;
|
||||||
else
|
else
|
||||||
@ -833,7 +872,7 @@ _bt_checksplitloc(FindSplitData *state, OffsetNumber firstright,
|
|||||||
*/
|
*/
|
||||||
if (state->non_leaf)
|
if (state->non_leaf)
|
||||||
rightfree += (int) firstrightitemsz -
|
rightfree += (int) firstrightitemsz -
|
||||||
(int) (sizeof(BTItemData) + sizeof(ItemIdData));
|
(int) (MAXALIGN(sizeof(BTItemData)) + sizeof(ItemIdData));
|
||||||
/*
|
/*
|
||||||
* If feasible split point, remember best delta.
|
* If feasible split point, remember best delta.
|
||||||
*/
|
*/
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.38 2000/07/21 06:42:33 tgl Exp $
|
* $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.39 2000/07/21 19:21:00 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -103,6 +103,9 @@ _bt_freeskey(ScanKey skey)
|
|||||||
pfree(skey);
|
pfree(skey);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* free a retracement stack made by _bt_search.
|
||||||
|
*/
|
||||||
void
|
void
|
||||||
_bt_freestack(BTStack stack)
|
_bt_freestack(BTStack stack)
|
||||||
{
|
{
|
||||||
@ -116,12 +119,38 @@ _bt_freestack(BTStack stack)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Construct a BTItem from a plain IndexTuple.
|
||||||
|
*
|
||||||
|
* This is now useless code, since a BTItem *is* an index tuple with
|
||||||
|
* no extra stuff. We hang onto it for the moment to preserve the
|
||||||
|
* notational distinction, in case we want to add some extra stuff
|
||||||
|
* again someday.
|
||||||
|
*/
|
||||||
|
BTItem
|
||||||
|
_bt_formitem(IndexTuple itup)
|
||||||
|
{
|
||||||
|
int nbytes_btitem;
|
||||||
|
BTItem btitem;
|
||||||
|
Size tuplen;
|
||||||
|
extern Oid newoid();
|
||||||
|
|
||||||
|
/* make a copy of the index tuple with room for extra stuff */
|
||||||
|
tuplen = IndexTupleSize(itup);
|
||||||
|
nbytes_btitem = tuplen + (sizeof(BTItemData) - sizeof(IndexTupleData));
|
||||||
|
|
||||||
|
btitem = (BTItem) palloc(nbytes_btitem);
|
||||||
|
memcpy((char *) &(btitem->bti_itup), (char *) itup, tuplen);
|
||||||
|
|
||||||
|
return btitem;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* _bt_orderkeys() -- Put keys in a sensible order for conjunctive quals.
|
* _bt_orderkeys() -- Put keys in a sensible order for conjunctive quals.
|
||||||
*
|
*
|
||||||
* The order of the keys in the qual match the ordering imposed by
|
* The order of the keys in the qual match the ordering imposed by
|
||||||
* the index. This routine only needs to be called if there are
|
* the index. This routine only needs to be called if there is
|
||||||
* more than one qual clauses using this index.
|
* more than one qual clause using this index.
|
||||||
*/
|
*/
|
||||||
void
|
void
|
||||||
_bt_orderkeys(Relation relation, BTScanOpaque so)
|
_bt_orderkeys(Relation relation, BTScanOpaque so)
|
||||||
@ -189,7 +218,8 @@ _bt_orderkeys(Relation relation, BTScanOpaque so)
|
|||||||
if (i == numberOfKeys || cur->sk_attno != attno)
|
if (i == numberOfKeys || cur->sk_attno != attno)
|
||||||
{
|
{
|
||||||
if (cur->sk_attno != attno + 1 && i < numberOfKeys)
|
if (cur->sk_attno != attno + 1 && i < numberOfKeys)
|
||||||
elog(ERROR, "_bt_orderkeys: key(s) for attribute %d missed", attno + 1);
|
elog(ERROR, "_bt_orderkeys: key(s) for attribute %d missed",
|
||||||
|
attno + 1);
|
||||||
|
|
||||||
underEqualStrategy = (!equalStrategyEnd);
|
underEqualStrategy = (!equalStrategyEnd);
|
||||||
|
|
||||||
@ -320,24 +350,18 @@ _bt_orderkeys(Relation relation, BTScanOpaque so)
|
|||||||
pfree(xform);
|
pfree(xform);
|
||||||
}
|
}
|
||||||
|
|
||||||
BTItem
|
/*
|
||||||
_bt_formitem(IndexTuple itup)
|
* Test whether an indextuple satisfies all the scankey conditions
|
||||||
{
|
*
|
||||||
int nbytes_btitem;
|
* If not ("false" return), the number of conditions satisfied is
|
||||||
BTItem btitem;
|
* returned in *keysok. Given proper ordering of the scankey conditions,
|
||||||
Size tuplen;
|
* we can use this to determine whether it's worth continuing the scan.
|
||||||
extern Oid newoid();
|
* See _bt_orderkeys().
|
||||||
|
*
|
||||||
/* make a copy of the index tuple with room for the sequence number */
|
* HACK: *keysok == (Size) -1 means we stopped evaluating because we found
|
||||||
tuplen = IndexTupleSize(itup);
|
* a NULL value in the index tuple. It's not quite clear to me why this
|
||||||
nbytes_btitem = tuplen + (sizeof(BTItemData) - sizeof(IndexTupleData));
|
* case has to be treated specially --- tgl 7/00.
|
||||||
|
*/
|
||||||
btitem = (BTItem) palloc(nbytes_btitem);
|
|
||||||
memcpy((char *) &(btitem->bti_itup), (char *) itup, tuplen);
|
|
||||||
|
|
||||||
return btitem;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool
|
bool
|
||||||
_bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, Size *keysok)
|
_bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, Size *keysok)
|
||||||
{
|
{
|
||||||
@ -389,9 +413,9 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, Size *keysok)
|
|||||||
if (DatumGetBool(test) == !!(key[0].sk_flags & SK_NEGATE))
|
if (DatumGetBool(test) == !!(key[0].sk_flags & SK_NEGATE))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
keysz -= 1;
|
|
||||||
key++;
|
|
||||||
(*keysok)++;
|
(*keysok)++;
|
||||||
|
key++;
|
||||||
|
keysz--;
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user