postgres/src/backend/access/rtree/rtree.c

/*-------------------------------------------------------------------------
 *
 * rtree.c
 *	  interface routines for the postgres rtree indexed access method.
 *
 * Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 *
 * IDENTIFICATION
 *	  $Header: /cvsroot/pgsql/src/backend/access/rtree/Attic/rtree.c,v 1.61 2001/03/22 03:59:16 momjian Exp $
 *
 *-------------------------------------------------------------------------
 */

#include "postgres.h"

#include "access/genam.h"
#include "access/heapam.h"
#include "access/rtree.h"
#include "access/xlogutils.h"
#include "catalog/index.h"
#include "executor/executor.h"
#include "miscadmin.h"


/*
 * XXX We assume that all datatypes indexable in rtrees are pass-by-reference.
 * To fix this, you'd need to improve the IndexTupleGetDatum() macro, and
 * do something with the various datum-pfreeing code.  However, it's not that
 * unreasonable an assumption in practice.
 */
#define IndexTupleGetDatum(itup)  \
	PointerGetDatum(((char *) (itup)) + sizeof(IndexTupleData))

/*
 * Space-allocation macros.  Note we count the item's line pointer in its size.
 */
#define RTPageAvailSpace  \
	(BLCKSZ - (sizeof(PageHeaderData) - sizeof(ItemIdData)) \
	 - MAXALIGN(sizeof(RTreePageOpaqueData)))
#define IndexTupleTotalSize(itup)  \
	(MAXALIGN(IndexTupleSize(itup)) + sizeof(ItemIdData))
#define IndexTupleAttSize(itup)  \
	(IndexTupleSize(itup) - sizeof(IndexTupleData))

/* results of rtpicksplit() */
typedef struct SPLITVEC
{
	OffsetNumber *spl_left;
	int			spl_nleft;
	Datum		spl_ldatum;
	OffsetNumber *spl_right;
	int			spl_nright;
	Datum		spl_rdatum;
} SPLITVEC;

typedef struct RTSTATE
{
	FmgrInfo	unionFn;		/* union function */
	FmgrInfo	sizeFn;			/* size function */
	FmgrInfo	interFn;		/* intersection function */
} RTSTATE;

/* non-export function prototypes */
static InsertIndexResult rtdoinsert(Relation r, IndexTuple itup,
		   RTSTATE *rtstate);
static void rttighten(Relation r, RTSTACK *stk, Datum datum, int att_size,
		  RTSTATE *rtstate);
static InsertIndexResult rtdosplit(Relation r, Buffer buffer, RTSTACK *stack,
		  IndexTuple itup, RTSTATE *rtstate);
static void rtintinsert(Relation r, RTSTACK *stk, IndexTuple ltup,
			IndexTuple rtup, RTSTATE *rtstate);
static void rtnewroot(Relation r, IndexTuple lt, IndexTuple rt);
static void rtpicksplit(Relation r, Page page, SPLITVEC *v, IndexTuple itup,
			RTSTATE *rtstate);
static void RTInitBuffer(Buffer b, uint32 f);
static OffsetNumber choose(Relation r, Page p, IndexTuple it,
	   RTSTATE *rtstate);
static int	nospace(Page p, IndexTuple it);
static void initRtstate(RTSTATE *rtstate, Relation index);


Datum
rtbuild(PG_FUNCTION_ARGS)
{
	Relation	heap = (Relation) PG_GETARG_POINTER(0);
	Relation	index = (Relation) PG_GETARG_POINTER(1);
	IndexInfo  *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2);
	Node	   *oldPred = (Node *) PG_GETARG_POINTER(3);

#ifdef NOT_USED
	IndexStrategy istrat = (IndexStrategy) PG_GETARG_POINTER(4);

#endif
	HeapScanDesc hscan;
	HeapTuple	htup;
	IndexTuple	itup;
	TupleDesc	htupdesc,
				itupdesc;
	Datum		attdata[INDEX_MAX_KEYS];
	char		nulls[INDEX_MAX_KEYS];
	int			nhtups,
				nitups;
	Node	   *pred = indexInfo->ii_Predicate;

#ifndef OMIT_PARTIAL_INDEX
	TupleTable	tupleTable;
	TupleTableSlot *slot;

#endif
	ExprContext *econtext;
	InsertIndexResult res = NULL;
	Buffer		buffer = InvalidBuffer;
	RTSTATE		rtState;

	initRtstate(&rtState, index);

	/*
	 * We expect to be called exactly once for any index relation. If
	 * that's not the case, big trouble's what we have.
	 */
	if (oldPred == NULL && RelationGetNumberOfBlocks(index) != 0)
		elog(ERROR, "%s already contains data", RelationGetRelationName(index));

	/* initialize the root page (if this is a new index) */
	if (oldPred == NULL)
	{
		buffer = ReadBuffer(index, P_NEW);
		RTInitBuffer(buffer, F_LEAF);
		WriteBuffer(buffer);
	}

	/* get tuple descriptors for heap and index relations */
	htupdesc = RelationGetDescr(heap);
	itupdesc = RelationGetDescr(index);

	/*
	 * If this is a predicate (partial) index, we will need to evaluate
	 * the predicate using ExecQual, which requires the current tuple to
	 * be in a slot of a TupleTable.  In addition, ExecQual must have an
	 * ExprContext referring to that slot.	Here, we initialize dummy
	 * TupleTable and ExprContext objects for this purpose. --Nels, Feb 92
	 *
	 * We construct the ExprContext anyway since we need a per-tuple
	 * temporary memory context for function evaluation -- tgl July 00
	 */
#ifndef OMIT_PARTIAL_INDEX
	if (pred != NULL || oldPred != NULL)
	{
		tupleTable = ExecCreateTupleTable(1);
		slot = ExecAllocTableSlot(tupleTable);
		ExecSetSlotDescriptor(slot, htupdesc, false);
	}
	else
	{
		tupleTable = NULL;
		slot = NULL;
	}
	econtext = MakeExprContext(slot, TransactionCommandContext);
#else
	econtext = MakeExprContext(NULL, TransactionCommandContext);
#endif	 /* OMIT_PARTIAL_INDEX */

	/* count the tuples as we insert them */
	nhtups = nitups = 0;

	/* start a heap scan */
	hscan = heap_beginscan(heap, 0, SnapshotNow, 0, (ScanKey) NULL);

	while (HeapTupleIsValid(htup = heap_getnext(hscan, 0)))
	{
		MemoryContextReset(econtext->ecxt_per_tuple_memory);

		nhtups++;

#ifndef OMIT_PARTIAL_INDEX

		/*
		 * If oldPred != NULL, this is an EXTEND INDEX command, so skip
		 * this tuple if it was already in the existing partial index
		 */
		if (oldPred != NULL)
		{
			slot->val = htup;
			if (ExecQual((List *) oldPred, econtext, false))
			{
				nitups++;
				continue;
			}
		}

		/*
		 * Skip this tuple if it doesn't satisfy the partial-index
		 * predicate
		 */
		if (pred != NULL)
		{
			slot->val = htup;
			if (!ExecQual((List *) pred, econtext, false))
				continue;
		}
#endif	 /* OMIT_PARTIAL_INDEX */

		nitups++;

		/*
		 * For the current heap tuple, extract all the attributes we use
		 * in this index, and note which are null.
		 */
		FormIndexDatum(indexInfo,
					   htup,
					   htupdesc,
					   econtext->ecxt_per_tuple_memory,
					   attdata,
					   nulls);

		/* form an index tuple and point it at the heap tuple */
		itup = index_formtuple(itupdesc, attdata, nulls);
		itup->t_tid = htup->t_self;

		/*
		 * Since we already have the index relation locked, we call
		 * rtdoinsert directly.  Normal access method calls dispatch
		 * through rtinsert, which locks the relation for write.  This is
		 * the right thing to do if you're inserting single tups, but not
		 * when you're initializing the whole index at once.
		 */

		res = rtdoinsert(index, itup, &rtState);
		pfree(itup);
		pfree(res);
	}

	/* okay, all heap tuples are indexed */
	heap_endscan(hscan);

#ifndef OMIT_PARTIAL_INDEX
	if (pred != NULL || oldPred != NULL)
		ExecDropTupleTable(tupleTable, true);
#endif	 /* OMIT_PARTIAL_INDEX */
	FreeExprContext(econtext);

	/*
	 * Since we just counted the tuples in the heap, we update its stats
	 * in pg_class to guarantee that the planner takes advantage of the
	 * index we just created.  But, only update statistics during normal
	 * index definitions, not for indices on system catalogs created
	 * during bootstrap processing.  We must close the relations before
	 * updating statistics to guarantee that the relcache entries are
	 * flushed when we increment the command counter in UpdateStats(). But
	 * we do not release any locks on the relations; those will be held
	 * until end of transaction.
	 */
	if (IsNormalProcessingMode())
	{
		Oid			hrelid = RelationGetRelid(heap);
		Oid			irelid = RelationGetRelid(index);

		heap_close(heap, NoLock);
		index_close(index);
		UpdateStats(hrelid, nhtups);
		UpdateStats(irelid, nitups);
		if (oldPred != NULL)
		{
			if (nitups == nhtups)
				pred = NULL;
			UpdateIndexPredicate(irelid, oldPred, pred);
		}
	}

	PG_RETURN_VOID();
}

/*
 *	rtinsert -- wrapper for rtree tuple insertion.
 *
 *	  This is the public interface routine for tuple insertion in rtrees.
 *	  It doesn't do any work; just locks the relation and passes the buck.
 */
Datum
rtinsert(PG_FUNCTION_ARGS)
{
	Relation	r = (Relation) PG_GETARG_POINTER(0);
	Datum	   *datum = (Datum *) PG_GETARG_POINTER(1);
	char	   *nulls = (char *) PG_GETARG_POINTER(2);
	ItemPointer ht_ctid = (ItemPointer) PG_GETARG_POINTER(3);

#ifdef NOT_USED
	Relation	heapRel = (Relation) PG_GETARG_POINTER(4);

#endif
	InsertIndexResult res;
	IndexTuple	itup;
	RTSTATE		rtState;

	/* generate an index tuple */
	itup = index_formtuple(RelationGetDescr(r), datum, nulls);
	itup->t_tid = *ht_ctid;
	initRtstate(&rtState, r);

	/*
	 * Notes in ExecUtils:ExecOpenIndices()
	 *
	 * RelationSetLockForWrite(r);
	 */

	res = rtdoinsert(r, itup, &rtState);

	PG_RETURN_POINTER(res);
}

static InsertIndexResult
rtdoinsert(Relation r, IndexTuple itup, RTSTATE *rtstate)
{
	Page		page;
	Buffer		buffer;
	BlockNumber blk;
	IndexTuple	which;
	OffsetNumber l;
	RTSTACK    *stack;
	InsertIndexResult res;
	RTreePageOpaque opaque;
	Datum		datum;

	blk = P_ROOT;
	buffer = InvalidBuffer;
	stack = (RTSTACK *) NULL;

	do
	{
		/* let go of current buffer before getting next */
		if (buffer != InvalidBuffer)
			ReleaseBuffer(buffer);

		/* get next buffer */
		buffer = ReadBuffer(r, blk);
		page = (Page) BufferGetPage(buffer);

		opaque = (RTreePageOpaque) PageGetSpecialPointer(page);
		if (!(opaque->flags & F_LEAF))
		{
			RTSTACK    *n;
			ItemId		iid;

			n = (RTSTACK *) palloc(sizeof(RTSTACK));
			n->rts_parent = stack;
			n->rts_blk = blk;
			n->rts_child = choose(r, page, itup, rtstate);
			stack = n;

			iid = PageGetItemId(page, n->rts_child);
			which = (IndexTuple) PageGetItem(page, iid);
			blk = ItemPointerGetBlockNumber(&(which->t_tid));
		}
	} while (!(opaque->flags & F_LEAF));

	if (nospace(page, itup))
	{
		/* need to do a split */
		res = rtdosplit(r, buffer, stack, itup, rtstate);
		freestack(stack);
		WriteBuffer(buffer);	/* don't forget to release buffer! */
		return res;
	}

	/* add the item and write the buffer */
	if (PageIsEmpty(page))
	{
		l = PageAddItem(page, (Item) itup, IndexTupleSize(itup),
						FirstOffsetNumber,
						LP_USED);
	}
	else
	{
		l = PageAddItem(page, (Item) itup, IndexTupleSize(itup),
						OffsetNumberNext(PageGetMaxOffsetNumber(page)),
						LP_USED);
	}
	if (l == InvalidOffsetNumber)
		elog(ERROR, "rtdoinsert: failed to add index item to %s",
			 RelationGetRelationName(r));

	WriteBuffer(buffer);

	datum = IndexTupleGetDatum(itup);

	/* now expand the page boundary in the parent to include the new child */
	rttighten(r, stack, datum, IndexTupleAttSize(itup), rtstate);
	freestack(stack);

	/* build and return an InsertIndexResult for this insertion */
	res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));
	ItemPointerSet(&(res->pointerData), blk, l);

	return res;
}

static void
rttighten(Relation r,
		  RTSTACK *stk,
		  Datum datum,
		  int att_size,
		  RTSTATE *rtstate)
{
	Datum		oldud;
	Datum		tdatum;
	Page		p;
	float		old_size,
				newd_size;
	Buffer		b;

	if (stk == (RTSTACK *) NULL)
		return;

	b = ReadBuffer(r, stk->rts_blk);
	p = BufferGetPage(b);

	oldud = IndexTupleGetDatum(PageGetItem(p,
									  PageGetItemId(p, stk->rts_child)));

	FunctionCall2(&rtstate->sizeFn, oldud,
				  PointerGetDatum(&old_size));

	datum = FunctionCall2(&rtstate->unionFn, oldud, datum);

	FunctionCall2(&rtstate->sizeFn, datum,
				  PointerGetDatum(&newd_size));

	if (newd_size != old_size)
	{
		TupleDesc	td = RelationGetDescr(r);

		if (td->attrs[0]->attlen < 0)
		{

			/*
			 * This is an internal page, so 'oldud' had better be a union
			 * (constant-length) key, too.	(See comment below.)
			 */
			Assert(VARSIZE(DatumGetPointer(datum)) ==
				   VARSIZE(DatumGetPointer(oldud)));
			memmove(DatumGetPointer(oldud), DatumGetPointer(datum),
					VARSIZE(DatumGetPointer(datum)));
		}
		else
		{
			memmove(DatumGetPointer(oldud), DatumGetPointer(datum),
					att_size);
		}
		WriteBuffer(b);

		/*
		 * The user may be defining an index on variable-sized data (like
		 * polygons).  If so, we need to get a constant-sized datum for
		 * insertion on the internal page.	We do this by calling the
		 * union proc, which is required to return a rectangle.
		 */
		tdatum = FunctionCall2(&rtstate->unionFn, datum, datum);

		rttighten(r, stk->rts_parent, tdatum, att_size, rtstate);
		pfree(DatumGetPointer(tdatum));
	}
	else
		ReleaseBuffer(b);
	pfree(DatumGetPointer(datum));
}

/*
 *	rtdosplit -- split a page in the tree.
 *
 *	  rtpicksplit does the interesting work of choosing the split.
 *	  This routine just does the bit-pushing.
 */
static InsertIndexResult
rtdosplit(Relation r,
		  Buffer buffer,
		  RTSTACK *stack,
		  IndexTuple itup,
		  RTSTATE *rtstate)
{
	Page		p;
	Buffer		leftbuf,
				rightbuf;
	Page		left,
				right;
	ItemId		itemid;
	IndexTuple	item;
	IndexTuple	ltup,
				rtup;
	OffsetNumber maxoff;
	OffsetNumber i;
	OffsetNumber leftoff,
				rightoff;
	BlockNumber lbknum,
				rbknum;
	BlockNumber bufblock;
	RTreePageOpaque opaque;
	int			blank;
	InsertIndexResult res;
	char	   *isnull;
	SPLITVEC	v;
	OffsetNumber *spl_left,
			   *spl_right;
	TupleDesc	tupDesc;

	p = (Page) BufferGetPage(buffer);
	opaque = (RTreePageOpaque) PageGetSpecialPointer(p);

	rtpicksplit(r, p, &v, itup, rtstate);

	/*
	 * The root of the tree is the first block in the relation.  If we're
	 * about to split the root, we need to do some hocus-pocus to enforce
	 * this guarantee.
	 */

	if (BufferGetBlockNumber(buffer) == P_ROOT)
	{
		leftbuf = ReadBuffer(r, P_NEW);
		RTInitBuffer(leftbuf, opaque->flags);
		lbknum = BufferGetBlockNumber(leftbuf);
		left = (Page) BufferGetPage(leftbuf);
	}
	else
	{
		leftbuf = buffer;
		IncrBufferRefCount(buffer);
		lbknum = BufferGetBlockNumber(buffer);
		left = (Page) PageGetTempPage(p, sizeof(RTreePageOpaqueData));
	}

	rightbuf = ReadBuffer(r, P_NEW);
	RTInitBuffer(rightbuf, opaque->flags);
	rbknum = BufferGetBlockNumber(rightbuf);
	right = (Page) BufferGetPage(rightbuf);

	spl_left = v.spl_left;
	spl_right = v.spl_right;
	leftoff = rightoff = FirstOffsetNumber;
	maxoff = PageGetMaxOffsetNumber(p);
	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		itemid = PageGetItemId(p, i);
		item = (IndexTuple) PageGetItem(p, itemid);

		if (i == *spl_left)
		{
			if (PageAddItem(left, (Item) item, IndexTupleSize(item),
							leftoff, LP_USED) == InvalidOffsetNumber)
				elog(ERROR, "rtdosplit: failed to copy index item in %s",
					 RelationGetRelationName(r));
			leftoff = OffsetNumberNext(leftoff);
			spl_left++;			/* advance in left split vector */
		}
		else
		{
			Assert(i == *spl_right);
			if (PageAddItem(right, (Item) item, IndexTupleSize(item),
							rightoff, LP_USED) == InvalidOffsetNumber)
				elog(ERROR, "rtdosplit: failed to copy index item in %s",
					 RelationGetRelationName(r));
			rightoff = OffsetNumberNext(rightoff);
			spl_right++;		/* advance in right split vector */
		}
	}

	/* build an InsertIndexResult for this insertion */
	res = (InsertIndexResult) palloc(sizeof(InsertIndexResultData));

	/* now insert the new index tuple */
	if (*spl_left == maxoff + 1)
	{
		if (PageAddItem(left, (Item) itup, IndexTupleSize(itup),
						leftoff, LP_USED) == InvalidOffsetNumber)
			elog(ERROR, "rtdosplit: failed to add index item to %s",
				 RelationGetRelationName(r));
		leftoff = OffsetNumberNext(leftoff);
		ItemPointerSet(&(res->pointerData), lbknum, leftoff);
		spl_left++;
	}
	else
	{
		Assert(*spl_right == maxoff + 1);
		if (PageAddItem(right, (Item) itup, IndexTupleSize(itup),
						rightoff, LP_USED) == InvalidOffsetNumber)
			elog(ERROR, "rtdosplit: failed to add index item to %s",
				 RelationGetRelationName(r));
		rightoff = OffsetNumberNext(rightoff);
		ItemPointerSet(&(res->pointerData), rbknum, rightoff);
		spl_right++;
	}

	/* Make sure we consumed all of the split vectors, and release 'em */
	Assert(*spl_left == InvalidOffsetNumber);
	Assert(*spl_right == InvalidOffsetNumber);
	pfree(v.spl_left);
	pfree(v.spl_right);

	if ((bufblock = BufferGetBlockNumber(buffer)) != P_ROOT)
		PageRestoreTempPage(left, p);
	WriteBuffer(leftbuf);
	WriteBuffer(rightbuf);

	/*
	 * Okay, the page is split.  We have three things left to do:
	 *
	 * 1)  Adjust any active scans on this index to cope with changes we
	 * introduced in its structure by splitting this page.
	 *
	 * 2)  "Tighten" the bounding box of the pointer to the left page in the
	 * parent node in the tree, if any.  Since we moved a bunch of stuff
	 * off the left page, we expect it to get smaller.	This happens in
	 * the internal insertion routine.
	 *
	 * 3)  Insert a pointer to the right page in the parent.  This may cause
	 * the parent to split.  If it does, we need to repeat steps one and
	 * two for each split node in the tree.
	 */

	/* adjust active scans */
	rtadjscans(r, RTOP_SPLIT, bufblock, FirstOffsetNumber);

	tupDesc = r->rd_att;
	isnull = (char *) palloc(r->rd_rel->relnatts);
	for (blank = 0; blank < r->rd_rel->relnatts; blank++)
		isnull[blank] = ' ';

	ltup = (IndexTuple) index_formtuple(tupDesc,
										&(v.spl_ldatum), isnull);
	rtup = (IndexTuple) index_formtuple(tupDesc,
										&(v.spl_rdatum), isnull);
	pfree(isnull);

	/* set pointers to new child pages in the internal index tuples */
	ItemPointerSet(&(ltup->t_tid), lbknum, 1);
	ItemPointerSet(&(rtup->t_tid), rbknum, 1);

	rtintinsert(r, stack, ltup, rtup, rtstate);

	pfree(ltup);
	pfree(rtup);

	return res;
}

static void
rtintinsert(Relation r,
			RTSTACK *stk,
			IndexTuple ltup,
			IndexTuple rtup,
			RTSTATE *rtstate)
{
	IndexTuple	old;
	Buffer		b;
	Page		p;
	Datum		ldatum,
				rdatum,
				newdatum;
	InsertIndexResult res;

	if (stk == (RTSTACK *) NULL)
	{
		rtnewroot(r, ltup, rtup);
		return;
	}

	b = ReadBuffer(r, stk->rts_blk);
	p = BufferGetPage(b);
	old = (IndexTuple) PageGetItem(p, PageGetItemId(p, stk->rts_child));

	/*
	 * This is a hack.	Right now, we force rtree internal keys to be
	 * constant size. To fix this, need delete the old key and add both
	 * left and right for the two new pages.  The insertion of left may
	 * force a split if the new left key is bigger than the old key.
	 */

	if (IndexTupleSize(old) != IndexTupleSize(ltup))
		elog(ERROR, "Variable-length rtree keys are not supported.");

	/* install pointer to left child */
	memmove(old, ltup, IndexTupleSize(ltup));

	if (nospace(p, rtup))
	{
		newdatum = IndexTupleGetDatum(ltup);
		rttighten(r, stk->rts_parent, newdatum,
				  IndexTupleAttSize(ltup), rtstate);
		res = rtdosplit(r, b, stk->rts_parent, rtup, rtstate);
		WriteBuffer(b);			/* don't forget to release buffer!  -
								 * 01/31/94 */
		pfree(res);
	}
	else
	{
		if (PageAddItem(p, (Item) rtup, IndexTupleSize(rtup),
						PageGetMaxOffsetNumber(p),
						LP_USED) == InvalidOffsetNumber)
			elog(ERROR, "rtintinsert: failed to add index item to %s",
				 RelationGetRelationName(r));
		WriteBuffer(b);
		ldatum = IndexTupleGetDatum(ltup);
		rdatum = IndexTupleGetDatum(rtup);
		newdatum = FunctionCall2(&rtstate->unionFn, ldatum, rdatum);

		rttighten(r, stk->rts_parent, newdatum,
				  IndexTupleAttSize(rtup), rtstate);

		pfree(DatumGetPointer(newdatum));
	}
}

static void
rtnewroot(Relation r, IndexTuple lt, IndexTuple rt)
{
	Buffer		b;
	Page		p;

	b = ReadBuffer(r, P_ROOT);
	RTInitBuffer(b, 0);
	p = BufferGetPage(b);
	if (PageAddItem(p, (Item) lt, IndexTupleSize(lt),
					FirstOffsetNumber,
					LP_USED) == InvalidOffsetNumber)
		elog(ERROR, "rtnewroot: failed to add index item to %s",
			 RelationGetRelationName(r));
	if (PageAddItem(p, (Item) rt, IndexTupleSize(rt),
					OffsetNumberNext(FirstOffsetNumber),
					LP_USED) == InvalidOffsetNumber)
		elog(ERROR, "rtnewroot: failed to add index item to %s",
			 RelationGetRelationName(r));
	WriteBuffer(b);
}

/*
 * Choose how to split an rtree page into two pages.
 *
 * We return two vectors of index item numbers, one for the items to be
 * put on the left page, one for the items to be put on the right page.
 * In addition, the item to be added (itup) is listed in the appropriate
 * vector.	It is represented by item number N+1 (N = # of items on page).
 *
 * Both vectors appear in sequence order with a terminating sentinel value
 * of InvalidOffsetNumber.
 *
 * The bounding-box datums for the two new pages are also returned in *v.
 *
 * This is the quadratic-cost split algorithm Guttman describes in
 * his paper.  The reason we chose it is that you can implement this
 * with less information about the data types on which you're operating.
 *
 * We must also deal with a consideration not found in Guttman's algorithm:
 * variable-length data.  In particular, the incoming item might be
 * large enough that not just any split will work.	In the worst case,
 * our "split" may have to be the new item on one page and all the existing
 * items on the other.	Short of that, we have to take care that we do not
 * make a split that leaves both pages too full for the new item.
 */
static void
rtpicksplit(Relation r,
			Page page,
			SPLITVEC *v,
			IndexTuple itup,
			RTSTATE *rtstate)
{
	OffsetNumber maxoff,
				newitemoff;
	OffsetNumber i,
				j;
	IndexTuple	item_1,
				item_2;
	Datum		datum_alpha,
				datum_beta;
	Datum		datum_l,
				datum_r;
	Datum		union_d,
				union_dl,
				union_dr;
	Datum		inter_d;
	bool		firsttime;
	float		size_alpha,
				size_beta,
				size_union,
				size_inter;
	float		size_waste,
				waste;
	float		size_l,
				size_r;
	int			nbytes;
	OffsetNumber seed_1 = 0,
				seed_2 = 0;
	OffsetNumber *left,
			   *right;
	Size		newitemsz,
				item_1_sz,
				item_2_sz,
				left_avail_space,
				right_avail_space;

	/*
	 * First, make sure the new item is not so large that we can't
	 * possibly fit it on a page, even by itself.  (It's sufficient to
	 * make this test here, since any oversize tuple must lead to a page
	 * split attempt.)
	 */
	newitemsz = IndexTupleTotalSize(itup);
	if (newitemsz > RTPageAvailSpace)
		elog(ERROR, "rtree: index item size %lu exceeds maximum %lu",
			 (unsigned long) newitemsz, (unsigned long) RTPageAvailSpace);

	maxoff = PageGetMaxOffsetNumber(page);
	newitemoff = OffsetNumberNext(maxoff);		/* phony index for new
												 * item */

	/* Make arrays big enough for worst case, including sentinel */
	nbytes = (maxoff + 2) * sizeof(OffsetNumber);
	v->spl_left = (OffsetNumber *) palloc(nbytes);
	v->spl_right = (OffsetNumber *) palloc(nbytes);

	firsttime = true;
	waste = 0.0;

	for (i = FirstOffsetNumber; i < maxoff; i = OffsetNumberNext(i))
	{
		item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
		datum_alpha = IndexTupleGetDatum(item_1);
		item_1_sz = IndexTupleTotalSize(item_1);

		for (j = OffsetNumberNext(i); j <= maxoff; j = OffsetNumberNext(j))
		{
			item_2 = (IndexTuple) PageGetItem(page, PageGetItemId(page, j));
			datum_beta = IndexTupleGetDatum(item_2);
			item_2_sz = IndexTupleTotalSize(item_2);

			/*
			 * Ignore seed pairs that don't leave room for the new item on
			 * either split page.
			 */
			if (newitemsz + item_1_sz > RTPageAvailSpace &&
				newitemsz + item_2_sz > RTPageAvailSpace)
				continue;

			/* compute the wasted space by unioning these guys */
			union_d = FunctionCall2(&rtstate->unionFn,
									datum_alpha, datum_beta);
			FunctionCall2(&rtstate->sizeFn, union_d,
						  PointerGetDatum(&size_union));
			inter_d = FunctionCall2(&rtstate->interFn,
									datum_alpha, datum_beta);

			/*
			 * The interFn may return a NULL pointer (not an SQL null!) to
			 * indicate no intersection.  sizeFn must cope with this.
			 */
			FunctionCall2(&rtstate->sizeFn, inter_d,
						  PointerGetDatum(&size_inter));
			size_waste = size_union - size_inter;

			if (DatumGetPointer(union_d) != NULL)
				pfree(DatumGetPointer(union_d));
			if (DatumGetPointer(inter_d) != NULL)
				pfree(DatumGetPointer(inter_d));

			/*
			 * are these a more promising split that what we've already
			 * seen?
			 */
			if (size_waste > waste || firsttime)
			{
				waste = size_waste;
				seed_1 = i;
				seed_2 = j;
				firsttime = false;
			}
		}
	}

	if (firsttime)
	{

		/*
		 * There is no possible split except to put the new item on its
		 * own page.  Since we still have to compute the union rectangles,
		 * we play dumb and run through the split algorithm anyway,
		 * setting seed_1 = first item on page and seed_2 = new item.
		 */
		seed_1 = FirstOffsetNumber;
		seed_2 = newitemoff;
	}

	item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, seed_1));
	datum_alpha = IndexTupleGetDatum(item_1);
	datum_l = FunctionCall2(&rtstate->unionFn, datum_alpha, datum_alpha);
	FunctionCall2(&rtstate->sizeFn, datum_l, PointerGetDatum(&size_l));
	left_avail_space = RTPageAvailSpace - IndexTupleTotalSize(item_1);

	if (seed_2 == newitemoff)
	{
		item_2 = itup;
		/* Needn't leave room for new item in calculations below */
		newitemsz = 0;
	}
	else
		item_2 = (IndexTuple) PageGetItem(page, PageGetItemId(page, seed_2));
	datum_beta = IndexTupleGetDatum(item_2);
	datum_r = FunctionCall2(&rtstate->unionFn, datum_beta, datum_beta);
	FunctionCall2(&rtstate->sizeFn, datum_r, PointerGetDatum(&size_r));
	right_avail_space = RTPageAvailSpace - IndexTupleTotalSize(item_2);

	/*
	 * Now split up the regions between the two seeds.	An important
	 * property of this split algorithm is that the split vector v has the
	 * indices of items to be split in order in its left and right
	 * vectors.  We exploit this property by doing a merge in the code
	 * that actually splits the page.
	 *
	 * For efficiency, we also place the new index tuple in this loop. This
	 * is handled at the very end, when we have placed all the existing
	 * tuples and i == maxoff + 1.
	 */
	left = v->spl_left;
	v->spl_nleft = 0;
	right = v->spl_right;
	v->spl_nright = 0;

	for (i = FirstOffsetNumber; i <= newitemoff; i = OffsetNumberNext(i))
	{
		bool		left_feasible,
					right_feasible,
					choose_left;

		/*
		 * If we've already decided where to place this item, just put it
		 * on the correct list.  Otherwise, we need to figure out which
		 * page needs the least enlargement in order to store the item.
		 */

		if (i == seed_1)
		{
			*left++ = i;
			v->spl_nleft++;
			/* left avail_space & union already includes this one */
			continue;
		}
		if (i == seed_2)
		{
			*right++ = i;
			v->spl_nright++;
			/* right avail_space & union already includes this one */
			continue;
		}

		/* Compute new union datums and sizes for both possible additions */
		if (i == newitemoff)
		{
			item_1 = itup;
			/* Needn't leave room for new item anymore */
			newitemsz = 0;
		}
		else
			item_1 = (IndexTuple) PageGetItem(page, PageGetItemId(page, i));
		item_1_sz = IndexTupleTotalSize(item_1);

		datum_alpha = IndexTupleGetDatum(item_1);
		union_dl = FunctionCall2(&rtstate->unionFn, datum_l, datum_alpha);
		union_dr = FunctionCall2(&rtstate->unionFn, datum_r, datum_alpha);
		FunctionCall2(&rtstate->sizeFn, union_dl,
					  PointerGetDatum(&size_alpha));
		FunctionCall2(&rtstate->sizeFn, union_dr,
					  PointerGetDatum(&size_beta));

		/*
		 * We prefer the page that shows smaller enlargement of its union
		 * area (Guttman's algorithm), but we must take care that at least
		 * one page will still have room for the new item after this one
		 * is added.
		 *
		 * (We know that all the old items together can fit on one page, so
		 * we need not worry about any other problem than failing to fit
		 * the new item.)
		 */
		left_feasible = (left_avail_space >= item_1_sz &&
						 ((left_avail_space - item_1_sz) >= newitemsz ||
						  right_avail_space >= newitemsz));
		right_feasible = (right_avail_space >= item_1_sz &&
						  ((right_avail_space - item_1_sz) >= newitemsz ||
						   left_avail_space >= newitemsz));
		if (left_feasible && right_feasible)
		{
			/* Both feasible, use Guttman's algorithm */
			choose_left = (size_alpha - size_l < size_beta - size_r);
		}
		else if (left_feasible)
			choose_left = true;
		else if (right_feasible)
			choose_left = false;
		else
		{
			elog(ERROR, "rtpicksplit: failed to find a workable page split");
			choose_left = false;/* keep compiler quiet */
		}

		if (choose_left)
		{
			pfree(DatumGetPointer(datum_l));
			pfree(DatumGetPointer(union_dr));
			datum_l = union_dl;
			size_l = size_alpha;
			left_avail_space -= item_1_sz;
			*left++ = i;
			v->spl_nleft++;
		}
		else
		{
			pfree(DatumGetPointer(datum_r));
			pfree(DatumGetPointer(union_dl));
			datum_r = union_dr;
			size_r = size_beta;
			right_avail_space -= item_1_sz;
			*right++ = i;
			v->spl_nright++;
		}
	}

	*left = *right = InvalidOffsetNumber;		/* add ending sentinels */

	v->spl_ldatum = datum_l;
	v->spl_rdatum = datum_r;
}

static void
RTInitBuffer(Buffer b, uint32 f)
{
	RTreePageOpaque opaque;
	Page		page;
	Size		pageSize;

	pageSize = BufferGetPageSize(b);

	page = BufferGetPage(b);
	MemSet(page, 0, (int) pageSize);
	PageInit(page, pageSize, sizeof(RTreePageOpaqueData));

	opaque = (RTreePageOpaque) PageGetSpecialPointer(page);
	opaque->flags = f;
}

static OffsetNumber
choose(Relation r, Page p, IndexTuple it, RTSTATE *rtstate)
{
	OffsetNumber maxoff;
	OffsetNumber i;
	Datum		ud,
				id;
	Datum		datum;
	float		usize,
				dsize;
	OffsetNumber which;
	float		which_grow;

	id = IndexTupleGetDatum(it);
	maxoff = PageGetMaxOffsetNumber(p);
	which_grow = -1.0;
	which = -1;

	for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i))
	{
		datum = IndexTupleGetDatum(PageGetItem(p, PageGetItemId(p, i)));
		FunctionCall2(&rtstate->sizeFn, datum,
					  PointerGetDatum(&dsize));
		ud = FunctionCall2(&rtstate->unionFn, datum, id);
		FunctionCall2(&rtstate->sizeFn, ud,
					  PointerGetDatum(&usize));
		pfree(DatumGetPointer(ud));
		if (which_grow < 0 || usize - dsize < which_grow)
		{
			which = i;
			which_grow = usize - dsize;
			if (which_grow == 0)
				break;
		}
	}

	return which;
}

static int
nospace(Page p, IndexTuple it)
{
	return PageGetFreeSpace(p) < IndexTupleSize(it);
}

void
freestack(RTSTACK *s)
{
	RTSTACK    *p;

	while (s != (RTSTACK *) NULL)
	{
		p = s->rts_parent;
		pfree(s);
		s = p;
	}
}

Datum
rtdelete(PG_FUNCTION_ARGS)
{
	Relation	r = (Relation) PG_GETARG_POINTER(0);
	ItemPointer tid = (ItemPointer) PG_GETARG_POINTER(1);
	BlockNumber blkno;
	OffsetNumber offnum;
	Buffer		buf;
	Page		page;

	/*
	 * Notes in ExecUtils:ExecOpenIndices() Also note that only vacuum
	 * deletes index tuples now...
	 *
	 * RelationSetLockForWrite(r);
	 */

	blkno = ItemPointerGetBlockNumber(tid);
	offnum = ItemPointerGetOffsetNumber(tid);

	/* adjust any scans that will be affected by this deletion */
	rtadjscans(r, RTOP_DEL, blkno, offnum);

	/* delete the index tuple */
	buf = ReadBuffer(r, blkno);
	page = BufferGetPage(buf);

	PageIndexTupleDelete(page, offnum);

	WriteBuffer(buf);

	PG_RETURN_VOID();
}

static void
initRtstate(RTSTATE *rtstate, Relation index)
{
	RegProcedure union_proc,
				size_proc,
				inter_proc;

	union_proc = index_getprocid(index, 1, RT_UNION_PROC);
	size_proc = index_getprocid(index, 1, RT_SIZE_PROC);
	inter_proc = index_getprocid(index, 1, RT_INTER_PROC);
	fmgr_info(union_proc, &rtstate->unionFn);
	fmgr_info(size_proc, &rtstate->sizeFn);
	fmgr_info(inter_proc, &rtstate->interFn);
	return;
}

#ifdef RTDEBUG

void
_rtdump(Relation r)
{
	Buffer		buf;
	Page		page;
	OffsetNumber offnum,
				maxoff;
	BlockNumber blkno;
	BlockNumber nblocks;
	RTreePageOpaque po;
	IndexTuple	itup;
	BlockNumber itblkno;
	OffsetNumber itoffno;
	Datum		datum;
	char	   *itkey;

	nblocks = RelationGetNumberOfBlocks(r);
	for (blkno = 0; blkno < nblocks; blkno++)
	{
		buf = ReadBuffer(r, blkno);
		page = BufferGetPage(buf);
		po = (RTreePageOpaque) PageGetSpecialPointer(page);
		maxoff = PageGetMaxOffsetNumber(page);
		printf("Page %d maxoff %d <%s>\n", blkno, maxoff,
			   (po->flags & F_LEAF ? "LEAF" : "INTERNAL"));

		if (PageIsEmpty(page))
		{
			ReleaseBuffer(buf);
			continue;
		}

		for (offnum = FirstOffsetNumber;
			 offnum <= maxoff;
			 offnum = OffsetNumberNext(offnum))
		{
			itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum));
			itblkno = ItemPointerGetBlockNumber(&(itup->t_tid));
			itoffno = ItemPointerGetOffsetNumber(&(itup->t_tid));
			datum = IndexTupleGetDatum(itup);
			itkey = DatumGetCString(DirectFunctionCall1(box_out,
														datum));
			printf("\t[%d] size %d heap <%d,%d> key:%s\n",
				   offnum, IndexTupleSize(itup), itblkno, itoffno, itkey);
			pfree(itkey);
		}

		ReleaseBuffer(buf);
	}
}

#endif	 /* defined RTDEBUG */

void
rtree_redo(XLogRecPtr lsn, XLogRecord *record)
{
	elog(STOP, "rtree_redo: unimplemented");
}

void
rtree_undo(XLogRecPtr lsn, XLogRecord *record)
{
	elog(STOP, "rtree_undo: unimplemented");
}

void
rtree_desc(char *buf, uint8 xl_info, char *rec)
{
}