From 1ea912e16d24487d53ae633b05dad82e1b7c285f Mon Sep 17 00:00:00 2001 From: Tom Lane Date: Fri, 21 Jul 2000 19:21:00 +0000 Subject: [PATCH] Fix sloppiness about alignment requirements in findsplitloc() space calculation, also make it stop when it has a 'good enough' split instead of exhaustively trying all split points. --- src/backend/access/nbtree/nbtinsert.c | 67 +++++++++++++++++++------ src/backend/access/nbtree/nbtutils.c | 72 ++++++++++++++++++--------- 2 files changed, 101 insertions(+), 38 deletions(-) diff --git a/src/backend/access/nbtree/nbtinsert.c b/src/backend/access/nbtree/nbtinsert.c index 6be8e97b508..f16168eadd2 100644 --- a/src/backend/access/nbtree/nbtinsert.c +++ b/src/backend/access/nbtree/nbtinsert.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.60 2000/07/21 06:42:32 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.61 2000/07/21 19:21:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -377,6 +377,10 @@ _bt_insertonpg(Relation rel, /* * Do we need to split the page to fit the item on it? + * + * Note: PageGetFreeSpace() subtracts sizeof(ItemIdData) from its + * result, so this comparison is correct even though we appear to + * be accounting only for the item and not for its line pointer. */ if (PageGetFreeSpace(page) < itemsz) { @@ -743,11 +747,14 @@ _bt_findsplitloc(Relation rel, FindSplitData state; int leftspace, rightspace, + goodenough, dataitemtotal, dataitemstoleft; opaque = (BTPageOpaque) PageGetSpecialPointer(page); + /* Passed-in newitemsz is MAXALIGNED but does not include line pointer */ + newitemsz += sizeof(ItemIdData); state.newitemsz = newitemsz; state.non_leaf = ! P_ISLEAF(opaque); state.have_split = false; @@ -758,11 +765,23 @@ _bt_findsplitloc(Relation rel, MAXALIGN(sizeof(BTPageOpaqueData)) + sizeof(ItemIdData); + /* + * Finding the best possible split would require checking all the possible + * split points, because of the high-key and left-key special cases. + * That's probably more work than it's worth; instead, stop as soon as + * we find a "good-enough" split, where good-enough is defined as an + * imbalance in free space of no more than pagesize/16 (arbitrary...) + * This should let us stop near the middle on most pages, instead of + * plowing to the end. + */ + goodenough = leftspace / 16; + /* The right page will have the same high key as the old page */ if (!P_RIGHTMOST(opaque)) { itemid = PageGetItemId(page, P_HIKEY); - rightspace -= (int) (ItemIdGetLength(itemid) + sizeof(ItemIdData)); + rightspace -= (int) (MAXALIGN(ItemIdGetLength(itemid)) + + sizeof(ItemIdData)); } /* Count up total space in data items without actually scanning 'em */ @@ -770,8 +789,7 @@ _bt_findsplitloc(Relation rel, /* * Scan through the data items and calculate space usage for a split - * at each possible position. XXX we could probably stop somewhere - * near the middle... + * at each possible position. */ dataitemstoleft = 0; maxoff = PageGetMaxOffsetNumber(page); @@ -785,44 +803,65 @@ _bt_findsplitloc(Relation rel, rightfree; itemid = PageGetItemId(page, offnum); - itemsz = ItemIdGetLength(itemid) + sizeof(ItemIdData); + itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData); /* * We have to allow for the current item becoming the high key of - * the left page; therefore it counts against left space. + * the left page; therefore it counts against left space as well + * as right space. */ leftfree = leftspace - dataitemstoleft - (int) itemsz; rightfree = rightspace - (dataitemtotal - dataitemstoleft); - if (offnum < newitemoff) + /* + * Will the new item go to left or right of split? + */ + if (offnum > newitemoff) + _bt_checksplitloc(&state, offnum, leftfree, rightfree, + true, itemsz); + else if (offnum < newitemoff) _bt_checksplitloc(&state, offnum, leftfree, rightfree, false, itemsz); - else if (offnum > newitemoff) - _bt_checksplitloc(&state, offnum, leftfree, rightfree, - true, itemsz); else { - /* need to try it both ways!! */ - _bt_checksplitloc(&state, offnum, leftfree, rightfree, - false, newitemsz); + /* need to try it both ways! */ _bt_checksplitloc(&state, offnum, leftfree, rightfree, true, itemsz); + /* here we are contemplating newitem as first on right */ + _bt_checksplitloc(&state, offnum, leftfree, rightfree, + false, newitemsz); } + /* Abort scan once we find a good-enough choice */ + if (state.have_split && state.best_delta <= goodenough) + break; + dataitemstoleft += itemsz; } + /* + * I believe it is not possible to fail to find a feasible split, + * but just in case ... + */ if (! state.have_split) elog(FATAL, "_bt_findsplitloc: can't find a feasible split point for %s", RelationGetRelationName(rel)); + *newitemonleft = state.newitemonleft; return state.firstright; } +/* + * Subroutine to analyze a particular possible split choice (ie, firstright + * and newitemonleft settings), and record the best split so far in *state. + */ static void _bt_checksplitloc(FindSplitData *state, OffsetNumber firstright, int leftfree, int rightfree, bool newitemonleft, Size firstrightitemsz) { + /* + * Account for the new item on whichever side it is to be put. + */ if (newitemonleft) leftfree -= (int) state->newitemsz; else @@ -833,7 +872,7 @@ _bt_checksplitloc(FindSplitData *state, OffsetNumber firstright, */ if (state->non_leaf) rightfree += (int) firstrightitemsz - - (int) (sizeof(BTItemData) + sizeof(ItemIdData)); + (int) (MAXALIGN(sizeof(BTItemData)) + sizeof(ItemIdData)); /* * If feasible split point, remember best delta. */ diff --git a/src/backend/access/nbtree/nbtutils.c b/src/backend/access/nbtree/nbtutils.c index aabdf809007..eb77bfd8dae 100644 --- a/src/backend/access/nbtree/nbtutils.c +++ b/src/backend/access/nbtree/nbtutils.c @@ -8,7 +8,7 @@ * * * IDENTIFICATION - * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.38 2000/07/21 06:42:33 tgl Exp $ + * $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.39 2000/07/21 19:21:00 tgl Exp $ * *------------------------------------------------------------------------- */ @@ -103,6 +103,9 @@ _bt_freeskey(ScanKey skey) pfree(skey); } +/* + * free a retracement stack made by _bt_search. + */ void _bt_freestack(BTStack stack) { @@ -116,12 +119,38 @@ _bt_freestack(BTStack stack) } } +/* + * Construct a BTItem from a plain IndexTuple. + * + * This is now useless code, since a BTItem *is* an index tuple with + * no extra stuff. We hang onto it for the moment to preserve the + * notational distinction, in case we want to add some extra stuff + * again someday. + */ +BTItem +_bt_formitem(IndexTuple itup) +{ + int nbytes_btitem; + BTItem btitem; + Size tuplen; + extern Oid newoid(); + + /* make a copy of the index tuple with room for extra stuff */ + tuplen = IndexTupleSize(itup); + nbytes_btitem = tuplen + (sizeof(BTItemData) - sizeof(IndexTupleData)); + + btitem = (BTItem) palloc(nbytes_btitem); + memcpy((char *) &(btitem->bti_itup), (char *) itup, tuplen); + + return btitem; +} + /* * _bt_orderkeys() -- Put keys in a sensible order for conjunctive quals. * * The order of the keys in the qual match the ordering imposed by - * the index. This routine only needs to be called if there are - * more than one qual clauses using this index. + * the index. This routine only needs to be called if there is + * more than one qual clause using this index. */ void _bt_orderkeys(Relation relation, BTScanOpaque so) @@ -189,7 +218,8 @@ _bt_orderkeys(Relation relation, BTScanOpaque so) if (i == numberOfKeys || cur->sk_attno != attno) { if (cur->sk_attno != attno + 1 && i < numberOfKeys) - elog(ERROR, "_bt_orderkeys: key(s) for attribute %d missed", attno + 1); + elog(ERROR, "_bt_orderkeys: key(s) for attribute %d missed", + attno + 1); underEqualStrategy = (!equalStrategyEnd); @@ -320,24 +350,18 @@ _bt_orderkeys(Relation relation, BTScanOpaque so) pfree(xform); } -BTItem -_bt_formitem(IndexTuple itup) -{ - int nbytes_btitem; - BTItem btitem; - Size tuplen; - extern Oid newoid(); - - /* make a copy of the index tuple with room for the sequence number */ - tuplen = IndexTupleSize(itup); - nbytes_btitem = tuplen + (sizeof(BTItemData) - sizeof(IndexTupleData)); - - btitem = (BTItem) palloc(nbytes_btitem); - memcpy((char *) &(btitem->bti_itup), (char *) itup, tuplen); - - return btitem; -} - +/* + * Test whether an indextuple satisfies all the scankey conditions + * + * If not ("false" return), the number of conditions satisfied is + * returned in *keysok. Given proper ordering of the scankey conditions, + * we can use this to determine whether it's worth continuing the scan. + * See _bt_orderkeys(). + * + * HACK: *keysok == (Size) -1 means we stopped evaluating because we found + * a NULL value in the index tuple. It's not quite clear to me why this + * case has to be treated specially --- tgl 7/00. + */ bool _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, Size *keysok) { @@ -389,9 +413,9 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, Size *keysok) if (DatumGetBool(test) == !!(key[0].sk_flags & SK_NEGATE)) return false; - keysz -= 1; - key++; (*keysok)++; + key++; + keysz--; } return true;