mirror of
				https://github.com/postgres/postgres.git
				synced 2025-11-03 09:13:20 +03:00 
			
		
		
		
	Fix sloppiness about alignment requirements in findsplitloc() space
calculation, also make it stop when it has a 'good enough' split instead of exhaustively trying all split points.
This commit is contained in:
		@@ -8,7 +8,7 @@
 | 
				
			|||||||
 *
 | 
					 *
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * IDENTIFICATION
 | 
					 * IDENTIFICATION
 | 
				
			||||||
 *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.60 2000/07/21 06:42:32 tgl Exp $
 | 
					 *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtinsert.c,v 1.61 2000/07/21 19:21:00 tgl Exp $
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 *-------------------------------------------------------------------------
 | 
					 *-------------------------------------------------------------------------
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
@@ -377,6 +377,10 @@ _bt_insertonpg(Relation rel,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Do we need to split the page to fit the item on it?
 | 
						 * Do we need to split the page to fit the item on it?
 | 
				
			||||||
 | 
						 *
 | 
				
			||||||
 | 
						 * Note: PageGetFreeSpace() subtracts sizeof(ItemIdData) from its
 | 
				
			||||||
 | 
						 * result, so this comparison is correct even though we appear to
 | 
				
			||||||
 | 
						 * be accounting only for the item and not for its line pointer.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (PageGetFreeSpace(page) < itemsz)
 | 
						if (PageGetFreeSpace(page) < itemsz)
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
@@ -743,11 +747,14 @@ _bt_findsplitloc(Relation rel,
 | 
				
			|||||||
	FindSplitData state;
 | 
						FindSplitData state;
 | 
				
			||||||
	int			leftspace,
 | 
						int			leftspace,
 | 
				
			||||||
				rightspace,
 | 
									rightspace,
 | 
				
			||||||
 | 
									goodenough,
 | 
				
			||||||
				dataitemtotal,
 | 
									dataitemtotal,
 | 
				
			||||||
				dataitemstoleft;
 | 
									dataitemstoleft;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 | 
						opaque = (BTPageOpaque) PageGetSpecialPointer(page);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* Passed-in newitemsz is MAXALIGNED but does not include line pointer */
 | 
				
			||||||
 | 
						newitemsz += sizeof(ItemIdData);
 | 
				
			||||||
	state.newitemsz = newitemsz;
 | 
						state.newitemsz = newitemsz;
 | 
				
			||||||
	state.non_leaf = ! P_ISLEAF(opaque);
 | 
						state.non_leaf = ! P_ISLEAF(opaque);
 | 
				
			||||||
	state.have_split = false;
 | 
						state.have_split = false;
 | 
				
			||||||
@@ -758,11 +765,23 @@ _bt_findsplitloc(Relation rel,
 | 
				
			|||||||
		MAXALIGN(sizeof(BTPageOpaqueData))
 | 
							MAXALIGN(sizeof(BTPageOpaqueData))
 | 
				
			||||||
		+ sizeof(ItemIdData);
 | 
							+ sizeof(ItemIdData);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Finding the best possible split would require checking all the possible
 | 
				
			||||||
 | 
						 * split points, because of the high-key and left-key special cases.
 | 
				
			||||||
 | 
						 * That's probably more work than it's worth; instead, stop as soon as
 | 
				
			||||||
 | 
						 * we find a "good-enough" split, where good-enough is defined as an
 | 
				
			||||||
 | 
						 * imbalance in free space of no more than pagesize/16 (arbitrary...)
 | 
				
			||||||
 | 
						 * This should let us stop near the middle on most pages, instead of
 | 
				
			||||||
 | 
						 * plowing to the end.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
 | 
						goodenough = leftspace / 16;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* The right page will have the same high key as the old page */
 | 
						/* The right page will have the same high key as the old page */
 | 
				
			||||||
	if (!P_RIGHTMOST(opaque))
 | 
						if (!P_RIGHTMOST(opaque))
 | 
				
			||||||
	{
 | 
						{
 | 
				
			||||||
		itemid = PageGetItemId(page, P_HIKEY);
 | 
							itemid = PageGetItemId(page, P_HIKEY);
 | 
				
			||||||
		rightspace -= (int) (ItemIdGetLength(itemid) + sizeof(ItemIdData));
 | 
							rightspace -= (int) (MAXALIGN(ItemIdGetLength(itemid)) +
 | 
				
			||||||
 | 
												 sizeof(ItemIdData));
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	/* Count up total space in data items without actually scanning 'em */
 | 
						/* Count up total space in data items without actually scanning 'em */
 | 
				
			||||||
@@ -770,8 +789,7 @@ _bt_findsplitloc(Relation rel,
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * Scan through the data items and calculate space usage for a split
 | 
						 * Scan through the data items and calculate space usage for a split
 | 
				
			||||||
	 * at each possible position.  XXX we could probably stop somewhere
 | 
						 * at each possible position.
 | 
				
			||||||
	 * near the middle...
 | 
					 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
	dataitemstoleft = 0;
 | 
						dataitemstoleft = 0;
 | 
				
			||||||
	maxoff = PageGetMaxOffsetNumber(page);
 | 
						maxoff = PageGetMaxOffsetNumber(page);
 | 
				
			||||||
@@ -785,44 +803,65 @@ _bt_findsplitloc(Relation rel,
 | 
				
			|||||||
					rightfree;
 | 
										rightfree;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		itemid = PageGetItemId(page, offnum);
 | 
							itemid = PageGetItemId(page, offnum);
 | 
				
			||||||
		itemsz = ItemIdGetLength(itemid) + sizeof(ItemIdData);
 | 
							itemsz = MAXALIGN(ItemIdGetLength(itemid)) + sizeof(ItemIdData);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		/*
 | 
							/*
 | 
				
			||||||
		 * We have to allow for the current item becoming the high key of
 | 
							 * We have to allow for the current item becoming the high key of
 | 
				
			||||||
		 * the left page; therefore it counts against left space.
 | 
							 * the left page; therefore it counts against left space as well
 | 
				
			||||||
 | 
							 * as right space.
 | 
				
			||||||
		 */
 | 
							 */
 | 
				
			||||||
		leftfree = leftspace - dataitemstoleft - (int) itemsz;
 | 
							leftfree = leftspace - dataitemstoleft - (int) itemsz;
 | 
				
			||||||
		rightfree = rightspace - (dataitemtotal - dataitemstoleft);
 | 
							rightfree = rightspace - (dataitemtotal - dataitemstoleft);
 | 
				
			||||||
		if (offnum < newitemoff)
 | 
							/*
 | 
				
			||||||
 | 
							 * Will the new item go to left or right of split?
 | 
				
			||||||
 | 
							 */
 | 
				
			||||||
 | 
							if (offnum > newitemoff)
 | 
				
			||||||
 | 
								_bt_checksplitloc(&state, offnum, leftfree, rightfree,
 | 
				
			||||||
 | 
												  true, itemsz);
 | 
				
			||||||
 | 
							else if (offnum < newitemoff)
 | 
				
			||||||
			_bt_checksplitloc(&state, offnum, leftfree, rightfree,
 | 
								_bt_checksplitloc(&state, offnum, leftfree, rightfree,
 | 
				
			||||||
							  false, itemsz);
 | 
												  false, itemsz);
 | 
				
			||||||
		else if (offnum > newitemoff)
 | 
					 | 
				
			||||||
			_bt_checksplitloc(&state, offnum, leftfree, rightfree,
 | 
					 | 
				
			||||||
							  true, itemsz);
 | 
					 | 
				
			||||||
		else
 | 
							else
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			/* need to try it both ways!! */
 | 
								/* need to try it both ways! */
 | 
				
			||||||
			_bt_checksplitloc(&state, offnum, leftfree, rightfree,
 | 
					 | 
				
			||||||
							  false, newitemsz);
 | 
					 | 
				
			||||||
			_bt_checksplitloc(&state, offnum, leftfree, rightfree,
 | 
								_bt_checksplitloc(&state, offnum, leftfree, rightfree,
 | 
				
			||||||
							  true, itemsz);
 | 
												  true, itemsz);
 | 
				
			||||||
 | 
								/* here we are contemplating newitem as first on right */
 | 
				
			||||||
 | 
								_bt_checksplitloc(&state, offnum, leftfree, rightfree,
 | 
				
			||||||
 | 
												  false, newitemsz);
 | 
				
			||||||
		}
 | 
							}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
							/* Abort scan once we find a good-enough choice */
 | 
				
			||||||
 | 
							if (state.have_split && state.best_delta <= goodenough)
 | 
				
			||||||
 | 
								break;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		dataitemstoleft += itemsz;
 | 
							dataitemstoleft += itemsz;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * I believe it is not possible to fail to find a feasible split,
 | 
				
			||||||
 | 
						 * but just in case ...
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
	if (! state.have_split)
 | 
						if (! state.have_split)
 | 
				
			||||||
		elog(FATAL, "_bt_findsplitloc: can't find a feasible split point for %s",
 | 
							elog(FATAL, "_bt_findsplitloc: can't find a feasible split point for %s",
 | 
				
			||||||
			 RelationGetRelationName(rel));
 | 
								 RelationGetRelationName(rel));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	*newitemonleft = state.newitemonleft;
 | 
						*newitemonleft = state.newitemonleft;
 | 
				
			||||||
	return state.firstright;
 | 
						return state.firstright;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Subroutine to analyze a particular possible split choice (ie, firstright
 | 
				
			||||||
 | 
					 * and newitemonleft settings), and record the best split so far in *state.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
static void
 | 
					static void
 | 
				
			||||||
_bt_checksplitloc(FindSplitData *state, OffsetNumber firstright,
 | 
					_bt_checksplitloc(FindSplitData *state, OffsetNumber firstright,
 | 
				
			||||||
				  int leftfree, int rightfree,
 | 
									  int leftfree, int rightfree,
 | 
				
			||||||
				  bool newitemonleft, Size firstrightitemsz)
 | 
									  bool newitemonleft, Size firstrightitemsz)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
 | 
						/*
 | 
				
			||||||
 | 
						 * Account for the new item on whichever side it is to be put.
 | 
				
			||||||
 | 
						 */
 | 
				
			||||||
	if (newitemonleft)
 | 
						if (newitemonleft)
 | 
				
			||||||
		leftfree -= (int) state->newitemsz;
 | 
							leftfree -= (int) state->newitemsz;
 | 
				
			||||||
	else
 | 
						else
 | 
				
			||||||
@@ -833,7 +872,7 @@ _bt_checksplitloc(FindSplitData *state, OffsetNumber firstright,
 | 
				
			|||||||
	 */
 | 
						 */
 | 
				
			||||||
	if (state->non_leaf)
 | 
						if (state->non_leaf)
 | 
				
			||||||
		rightfree += (int) firstrightitemsz -
 | 
							rightfree += (int) firstrightitemsz -
 | 
				
			||||||
			(int) (sizeof(BTItemData) + sizeof(ItemIdData));
 | 
								(int) (MAXALIGN(sizeof(BTItemData)) + sizeof(ItemIdData));
 | 
				
			||||||
	/*
 | 
						/*
 | 
				
			||||||
	 * If feasible split point, remember best delta.
 | 
						 * If feasible split point, remember best delta.
 | 
				
			||||||
	 */
 | 
						 */
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -8,7 +8,7 @@
 | 
				
			|||||||
 *
 | 
					 *
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 * IDENTIFICATION
 | 
					 * IDENTIFICATION
 | 
				
			||||||
 *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.38 2000/07/21 06:42:33 tgl Exp $
 | 
					 *	  $Header: /cvsroot/pgsql/src/backend/access/nbtree/nbtutils.c,v 1.39 2000/07/21 19:21:00 tgl Exp $
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 *-------------------------------------------------------------------------
 | 
					 *-------------------------------------------------------------------------
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
@@ -103,6 +103,9 @@ _bt_freeskey(ScanKey skey)
 | 
				
			|||||||
	pfree(skey);
 | 
						pfree(skey);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * free a retracement stack made by _bt_search.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
void
 | 
					void
 | 
				
			||||||
_bt_freestack(BTStack stack)
 | 
					_bt_freestack(BTStack stack)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -116,12 +119,38 @@ _bt_freestack(BTStack stack)
 | 
				
			|||||||
	}
 | 
						}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/*
 | 
				
			||||||
 | 
					 * Construct a BTItem from a plain IndexTuple.
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
 | 
					 * This is now useless code, since a BTItem *is* an index tuple with
 | 
				
			||||||
 | 
					 * no extra stuff.  We hang onto it for the moment to preserve the
 | 
				
			||||||
 | 
					 * notational distinction, in case we want to add some extra stuff
 | 
				
			||||||
 | 
					 * again someday.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
 | 
					BTItem
 | 
				
			||||||
 | 
					_bt_formitem(IndexTuple itup)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
						int			nbytes_btitem;
 | 
				
			||||||
 | 
						BTItem		btitem;
 | 
				
			||||||
 | 
						Size		tuplen;
 | 
				
			||||||
 | 
						extern Oid	newoid();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						/* make a copy of the index tuple with room for extra stuff */
 | 
				
			||||||
 | 
						tuplen = IndexTupleSize(itup);
 | 
				
			||||||
 | 
						nbytes_btitem = tuplen + (sizeof(BTItemData) - sizeof(IndexTupleData));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						btitem = (BTItem) palloc(nbytes_btitem);
 | 
				
			||||||
 | 
						memcpy((char *) &(btitem->bti_itup), (char *) itup, tuplen);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
						return btitem;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/*
 | 
					/*
 | 
				
			||||||
 *	_bt_orderkeys() -- Put keys in a sensible order for conjunctive quals.
 | 
					 *	_bt_orderkeys() -- Put keys in a sensible order for conjunctive quals.
 | 
				
			||||||
 *
 | 
					 *
 | 
				
			||||||
 *		The order of the keys in the qual match the ordering imposed by
 | 
					 *		The order of the keys in the qual match the ordering imposed by
 | 
				
			||||||
 *		the index.	This routine only needs to be called if there are
 | 
					 *		the index.	This routine only needs to be called if there is
 | 
				
			||||||
 *		more than one qual clauses using this index.
 | 
					 *		more than one qual clause using this index.
 | 
				
			||||||
 */
 | 
					 */
 | 
				
			||||||
void
 | 
					void
 | 
				
			||||||
_bt_orderkeys(Relation relation, BTScanOpaque so)
 | 
					_bt_orderkeys(Relation relation, BTScanOpaque so)
 | 
				
			||||||
@@ -189,7 +218,8 @@ _bt_orderkeys(Relation relation, BTScanOpaque so)
 | 
				
			|||||||
		if (i == numberOfKeys || cur->sk_attno != attno)
 | 
							if (i == numberOfKeys || cur->sk_attno != attno)
 | 
				
			||||||
		{
 | 
							{
 | 
				
			||||||
			if (cur->sk_attno != attno + 1 && i < numberOfKeys)
 | 
								if (cur->sk_attno != attno + 1 && i < numberOfKeys)
 | 
				
			||||||
				elog(ERROR, "_bt_orderkeys: key(s) for attribute %d missed", attno + 1);
 | 
									elog(ERROR, "_bt_orderkeys: key(s) for attribute %d missed",
 | 
				
			||||||
 | 
										 attno + 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
			underEqualStrategy = (!equalStrategyEnd);
 | 
								underEqualStrategy = (!equalStrategyEnd);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -320,24 +350,18 @@ _bt_orderkeys(Relation relation, BTScanOpaque so)
 | 
				
			|||||||
	pfree(xform);
 | 
						pfree(xform);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
BTItem
 | 
					/*
 | 
				
			||||||
_bt_formitem(IndexTuple itup)
 | 
					 * Test whether an indextuple satisfies all the scankey conditions
 | 
				
			||||||
{
 | 
					 *
 | 
				
			||||||
	int			nbytes_btitem;
 | 
					 * If not ("false" return), the number of conditions satisfied is
 | 
				
			||||||
	BTItem		btitem;
 | 
					 * returned in *keysok.  Given proper ordering of the scankey conditions,
 | 
				
			||||||
	Size		tuplen;
 | 
					 * we can use this to determine whether it's worth continuing the scan.
 | 
				
			||||||
	extern Oid	newoid();
 | 
					 * See _bt_orderkeys().
 | 
				
			||||||
 | 
					 *
 | 
				
			||||||
	/* make a copy of the index tuple with room for the sequence number */
 | 
					 * HACK: *keysok == (Size) -1 means we stopped evaluating because we found
 | 
				
			||||||
	tuplen = IndexTupleSize(itup);
 | 
					 * a NULL value in the index tuple.  It's not quite clear to me why this
 | 
				
			||||||
	nbytes_btitem = tuplen + (sizeof(BTItemData) - sizeof(IndexTupleData));
 | 
					 * case has to be treated specially --- tgl 7/00.
 | 
				
			||||||
 | 
					 */
 | 
				
			||||||
	btitem = (BTItem) palloc(nbytes_btitem);
 | 
					 | 
				
			||||||
	memcpy((char *) &(btitem->bti_itup), (char *) itup, tuplen);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
	return btitem;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
bool
 | 
					bool
 | 
				
			||||||
_bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, Size *keysok)
 | 
					_bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, Size *keysok)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
@@ -389,9 +413,9 @@ _bt_checkkeys(IndexScanDesc scan, IndexTuple tuple, Size *keysok)
 | 
				
			|||||||
		if (DatumGetBool(test) == !!(key[0].sk_flags & SK_NEGATE))
 | 
							if (DatumGetBool(test) == !!(key[0].sk_flags & SK_NEGATE))
 | 
				
			||||||
			return false;
 | 
								return false;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		keysz -= 1;
 | 
					 | 
				
			||||||
		key++;
 | 
					 | 
				
			||||||
		(*keysok)++;
 | 
							(*keysok)++;
 | 
				
			||||||
 | 
							key++;
 | 
				
			||||||
 | 
							keysz--;
 | 
				
			||||||
	}
 | 
						}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
	return true;
 | 
						return true;
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user