Document and clean up gistsplit.c.

Improve comments, rename some variables and functions, slightly simplify a couple of APIs, in an attempt to make this code readable by people other than its original author. Even though this is essentially just cosmetic, back-patch to all active branches, because otherwise it's going to make back-patching future fixes in this file very painful.
2025-08-27 07:42:10 +03:00 · 2013-02-10 11:58:23 -05:00
parent 3ea1ab283a
commit 2f0491dd1a
4 changed files with 320 additions and 171 deletions
--- a/src/backend/access/gist/gist.c
+++ b/src/backend/access/gist/gist.c
@@ -1238,18 +1238,12 @@ gistSplit(Relation r,
 	IndexTuple *lvectup,
 			   *rvectup;
 	GistSplitVector v;
 	GistEntryVector *entryvec;
 	int			i;
 	SplitedPageLayout *res = NULL;
 	/* generate the item array */
 	entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY));
 	entryvec->n = len + 1;
 	memset(v.spl_lisnull, TRUE, sizeof(bool) * giststate->tupdesc->natts);
 	memset(v.spl_risnull, TRUE, sizeof(bool) * giststate->tupdesc->natts);
-	gistSplitByKey(r, page, itup, len, giststate,
+	gistSplitByKey(r, page, itup, len, giststate, &v, 0);
 				   &v, entryvec, 0);
 	/* form left and right vector */
 	lvectup = (IndexTuple *) palloc(sizeof(IndexTuple) * (len + 1));
--- a/src/backend/access/gist/gistsplit.c
+++ b/src/backend/access/gist/gistsplit.c
@@ -1,7 +1,18 @@
 /*-------------------------------------------------------------------------
 *
 * gistsplit.c
- *	  Split page algorithm
+ *	  Multi-column page splitting algorithm
 *
 * This file is concerned with making good page-split decisions in multi-column
 * GiST indexes.  The opclass-specific picksplit functions can only be expected
 * to produce answers based on a single column.  We first run the picksplit
 * function for column 1; then, if there are more columns, we check if any of
 * the tuples are "don't cares" so far as the column 1 split is concerned
 * (that is, they could go to either side for no additional penalty).  If so,
 * we try to redistribute those tuples on the basis of the next column.
 * Repeat till we're out of columns.
 *
 * gistSplitByKey() is the entry point to this file.
 *
 *
 * Portions Copyright (c) 1996-2012, PostgreSQL Global Development Group
@@ -23,13 +34,14 @@ typedef struct
 	int			len;
 	Datum	   *attr;
 	bool	   *isnull;
-	bool	   *equiv;
+	bool	   *dontcare;
 } GistSplitUnion;
 /*
- * Form unions of subkeys after a page split, ignoring any tuples
+ * Form unions of subkeys in itvec[] entries listed in gsvp->entries[],
- * that are marked in gsvp->equiv[]
+ * ignoring any tuples that are marked in gsvp->dontcare[].  Subroutine for
 * gistunionsubkey.
 */
 static void
 gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
@@ -43,7 +55,7 @@ gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
 	for (i = 0; i < gsvp->len; i++)
 	{
-		if (gsvp->equiv && gsvp->equiv[gsvp->entries[i]])
+		if (gsvp->dontcare && gsvp->dontcare[gsvp->entries[i]])
 			continue;
 		cleanedItVec[cleanedLen++] = itvec[gsvp->entries[i] - 1];
@@ -56,15 +68,20 @@ gistunionsubkeyvec(GISTSTATE *giststate, IndexTuple *itvec,
 }
 /*
- * Recompute unions of subkeys after a page split, ignoring any tuples
+ * Recompute unions of left- and right-side subkeys after a page split,
- * that are marked in spl->spl_equiv[]
+ * ignoring any tuples that are marked in spl->spl_dontcare[].
 *
 * Note: we always recompute union keys for all index columns.	In some cases
 * this might represent duplicate work for the leftmost column(s), but it's
 * not safe to assume that "zero penalty to move a tuple" means "the union
 * key doesn't change at all".  Penalty functions aren't 100% accurate.
 */
 static void
 gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl)
 {
 	GistSplitUnion gsvp;
-	gsvp.equiv = spl->spl_equiv;
+	gsvp.dontcare = spl->spl_dontcare;
 	gsvp.entries = spl->splitVector.spl_left;
 	gsvp.len = spl->splitVector.spl_nleft;
@@ -82,90 +99,127 @@ gistunionsubkey(GISTSTATE *giststate, IndexTuple *itvec, GistSplitVector *spl)
 }
 /*
- * find group in vector with equivalent value
+ * Find tuples that are "don't cares", that is could be moved to the other
 * side of the split with zero penalty, so far as the attno column is
 * concerned.
 *
 * Don't-care tuples are marked by setting the corresponding entry in
 * spl->spl_dontcare[] to "true".  Caller must have initialized that array
 * to zeroes.
 *
 * Returns number of don't-cares found.
 */
 static int
-gistfindgroup(Relation r, GISTSTATE *giststate, GISTENTRY *valvec, GistSplitVector *spl, int attno)
+findDontCares(Relation r, GISTSTATE *giststate, GISTENTRY *valvec,
 			  GistSplitVector *spl, int attno)
 {
 	int			i;
 	GISTENTRY	entry;
-	int			len = 0;
+	int			NumDontCare = 0;
 	/*
-	 * attno key is always not null (see gistSplitByKey), so we may not check
+	 * First, search the left-side tuples to see if any have zero penalty to
-	 * for nulls
+	 * be added to the right-side union key.
 	 *
 	 * attno column is known all-not-null (see gistSplitByKey), so we need not
 	 * check for nulls
 	 */
-	gistentryinit(entry, spl->splitVector.spl_rdatum, r, NULL, (OffsetNumber) 0, FALSE);
+	gistentryinit(entry, spl->splitVector.spl_rdatum, r, NULL,
 				  (OffsetNumber) 0, FALSE);
 	for (i = 0; i < spl->splitVector.spl_nleft; i++)
 	{
 		int			j = spl->splitVector.spl_left[i];
 		float		penalty = gistpenalty(giststate, attno, &entry, false,
-							   &valvec[spl->splitVector.spl_left[i]], false);
+										  &valvec[j], false);
 		if (penalty == 0.0)
 		{
-			spl->spl_equiv[spl->splitVector.spl_left[i]] = true;
+			spl->spl_dontcare[j] = true;
-			len++;
+			NumDontCare++;
 		}
 	}
-	gistentryinit(entry, spl->splitVector.spl_ldatum, r, NULL, (OffsetNumber) 0, FALSE);
+	/* And conversely for the right-side tuples */
 	gistentryinit(entry, spl->splitVector.spl_ldatum, r, NULL,
 				  (OffsetNumber) 0, FALSE);
 	for (i = 0; i < spl->splitVector.spl_nright; i++)
 	{
 		int			j = spl->splitVector.spl_right[i];
 		float		penalty = gistpenalty(giststate, attno, &entry, false,
-							  &valvec[spl->splitVector.spl_right[i]], false);
+										  &valvec[j], false);
 		if (penalty == 0.0)
 		{
-			spl->spl_equiv[spl->splitVector.spl_right[i]] = true;
+			spl->spl_dontcare[j] = true;
-			len++;
+			NumDontCare++;
 		}
 	}
-	return len;
+	return NumDontCare;
 }
 /*
 * Remove tuples that are marked don't-cares from the tuple index array a[]
 * of length *len.	This is applied separately to the spl_left and spl_right
 * arrays.
 *
 * Corner case: we do not wish to reduce the index array to zero length.
 * (If we did, then the union key for this side would be null, and having just
 * one of spl_ldatum_exists and spl_rdatum_exists be TRUE might confuse
 * user-defined PickSplit methods.)  To avoid that, we'll forcibly redefine
 * one tuple as non-don't-care if necessary.  Hence, we must be able to adjust
 * caller's NumDontCare count.
 */
 static void
-cleanupOffsets(OffsetNumber *a, int *len, bool *equiv, int *LenEquiv)
+removeDontCares(OffsetNumber *a, int *len, bool *dontcare, int *NumDontCare)
 {
-	int			curlen,
+	int			origlen,
 				curlen,
 				i;
 	OffsetNumber *curwpos;
-	curlen = *len;
+	origlen = curlen = *len;
 	curwpos = a;
-	for (i = 0; i < *len; i++)
+	for (i = 0; i < origlen; i++)
 	{
-		if (equiv[a[i]] == FALSE)
+		OffsetNumber ai = a[i];
 		if (dontcare[ai] == FALSE)
 		{
-			*curwpos = a[i];
+			/* re-emit item into a[] */
 			*curwpos = ai;
 			curwpos++;
 		}
-		else
+		else if (curlen == 1)
 		{
-			/* corner case: we shouldn't make void array */
+			/* corner case: don't let array become empty */
-			if (curlen == 1)
+			dontcare[ai] = FALSE;		/* mark item as non-dont-care */
-			{
+			*NumDontCare -= 1;
-				equiv[a[i]] = FALSE;	/* mark item as non-equivalent */
+			i--;				/* reprocess item on next iteration */
 				i--;			/* redo the same */
 				*LenEquiv -= 1;
 				continue;
 		}
 		else
 			curlen--;
 	}
 	}
 	*len = curlen;
 }
 /*
 * Place a single don't-care tuple into either the left or right side of the
 * split, according to which has least penalty for merging the tuple into
 * the previously-computed union keys.	We need consider only columns starting
 * at attno.
 */
 static void
-placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v, IndexTuple itup, OffsetNumber off, int attno)
+placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v,
 		 IndexTuple itup, OffsetNumber off, int attno)
 {
 	GISTENTRY	identry[INDEX_MAX_KEYS];
 	bool		isnull[INDEX_MAX_KEYS];
 	bool		toLeft = true;
-	gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0, identry, isnull);
+	gistDeCompressAtt(giststate, r, itup, NULL, (OffsetNumber) 0,
 					  identry, isnull);
 	for (; attno < giststate->tupdesc->natts; attno++)
 	{
@@ -174,9 +228,11 @@ placeOne(Relation r, GISTSTATE *giststate, GistSplitVector *v, IndexTuple itup,
 		GISTENTRY	entry;
 		gistentryinit(entry, v->spl_lattr[attno], r, NULL, 0, FALSE);
-		lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno], identry + attno, isnull[attno]);
+		lpenalty = gistpenalty(giststate, attno, &entry, v->spl_lisnull[attno],
 							   identry + attno, isnull[attno]);
 		gistentryinit(entry, v->spl_rattr[attno], r, NULL, 0, FALSE);
-		rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno], identry + attno, isnull[attno]);
+		rpenalty = gistpenalty(giststate, attno, &entry, v->spl_risnull[attno],
 							   identry + attno, isnull[attno]);
 		if (lpenalty != rpenalty)
 		{
@@ -200,13 +256,21 @@ do {	\
 } while(0)
 /*
- * adjust left and right unions according to splits by previous
+ * Clean up when we did a secondary split but the user-defined PickSplit
- * split by first columns. This function is called only in case
+ * method didn't support it (leaving spl_ldatum_exists or spl_rdatum_exists
- * when pickSplit doesn't support subsplit.
+ * true).
 *
 * We consider whether to swap the left and right outputs of the secondary
 * split; this can be worthwhile if the penalty for merging those tuples into
 * the previously chosen sets is less that way.
 *
 * In any case we must update the union datums for the current column by
 * adding in the previous union keys (oldL/oldR), since the user-defined
 * PickSplit method didn't do so.
 */
 static void
-supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno, GIST_SPLITVEC *sv, Datum oldL, Datum oldR)
+supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno,
 					  GIST_SPLITVEC *sv, Datum oldL, Datum oldR)
 {
 	bool		leaveOnLeft = true,
 				tmpBool;
@@ -232,7 +296,6 @@ supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno, GIST_SPLITVEC
 		if (penalty1 > penalty2)
 			leaveOnLeft = false;
 	}
 	else
 	{
@@ -244,7 +307,6 @@ supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno, GIST_SPLITVEC
 		 * there is only one previously defined union, so we just choose swap
 		 * or not by lowest penalty
 		 */
 		penalty1 = gistpenalty(giststate, attno, entry1, false, &entrySL, false);
 		penalty2 = gistpenalty(giststate, attno, entry1, false, &entrySR, false);
@@ -282,10 +344,9 @@ supportSecondarySplit(Relation r, GISTSTATE *giststate, int attno, GIST_SPLITVEC
 }
 /*
- * Trivial picksplit implementaion. Function called only
+ * Trivial picksplit implementation. Function called only
- * if user-defined picksplit puts all keys to the one page.
+ * if user-defined picksplit puts all keys on the same side of the split.
- * That is a bug of user-defined picksplit but we'd like
+ * That is a bug of user-defined picksplit but we don't want to fail.
 * to "fix" that.
 */
 static void
 genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC *v, int attno)
@@ -318,9 +379,8 @@ genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC
 	}
 	/*
-	 * Form unions of each page
+	 * Form union datums for each side
 	 */
 	evec = palloc(sizeof(GISTENTRY) * entryvec->n + GEVHDRSZ);
 	evec->n = v->spl_nleft;
@@ -341,12 +401,17 @@ genericPickSplit(GISTSTATE *giststate, GistEntryVector *entryvec, GIST_SPLITVEC
 }
 /*
- * Calls user picksplit method for attno columns to split vector to
+ * Calls user picksplit method for attno column to split tuples into
- * two vectors. May use attno+n columns data to
+ * two vectors.
- * get better split.
+ *
- * Returns TRUE and v->spl_equiv = NULL if left and right unions of attno columns are the same,
+ * Returns FALSE if split is complete (there are no more index columns, or
- * so caller may find better split
+ * there is no need to consider them).	Note that in this case the union
- * Returns TRUE and v->spl_equiv != NULL if there is tuples which may be freely moved
+ * keys for all columns must be computed here.
 * Returns TRUE and v->spl_dontcare = NULL if left and right unions of attno
 * column are the same, so we should split on next column instead.
 * Returns TRUE and v->spl_dontcare != NULL if there are don't-care tuples
 * that could be relocated based on the next column(s).  The don't-care
 * tuples have been removed from the split and must be reinserted by caller.
 */
 static bool
 gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVector *v,
@@ -355,15 +420,18 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
 	GIST_SPLITVEC *sv = &v->splitVector;
 	/*
-	 * now let the user-defined picksplit function set up the split vector; in
+	 * Prepare spl_ldatum/spl_rdatum/spl_ldatum_exists/spl_rdatum_exists in
-	 * entryvec there is no null value!!
+	 * case we are doing a secondary split (see comments in gist.h).
 	 */
 	sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true;
 	sv->spl_rdatum_exists = (v->spl_risnull[attno]) ? false : true;
 	sv->spl_ldatum = v->spl_lattr[attno];
 	sv->spl_rdatum = v->spl_rattr[attno];
 	/*
 	 * Let the opclass-specific PickSplit method do its thing.	Note that at
 	 * this point we know there are no null keys in the entryvec.
 	 */
 	FunctionCall2Coll(&giststate->picksplitFn[attno],
 					  giststate->supportCollation[attno],
 					  PointerGetDatum(entryvec),
@@ -371,6 +439,10 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
 	if (sv->spl_nleft == 0 || sv->spl_nright == 0)
 	{
 		/*
 		 * User-defined picksplit failed to create an actual split, ie it put
 		 * everything on the same side.  Complain but cope.
 		 */
 		ereport(DEBUG1,
 				(errcode(ERRCODE_INTERNAL_ERROR),
 			  errmsg("picksplit method for column %d of index \"%s\" failed",
@@ -378,107 +450,129 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
 				 errhint("The index is not optimal. To optimize it, contact a developer, or try to use the column as the second one in the CREATE INDEX command.")));
 		/*
-		 * Reinit GIST_SPLITVEC. Although that fields are not used by
+		 * Reinit GIST_SPLITVEC. Although these fields are not used by
-		 * genericPickSplit(), let us set up it for further processing
+		 * genericPickSplit(), set them up for further processing
 		 */
 		sv->spl_ldatum_exists = (v->spl_lisnull[attno]) ? false : true;
 		sv->spl_rdatum_exists = (v->spl_risnull[attno]) ? false : true;
 		sv->spl_ldatum = v->spl_lattr[attno];
 		sv->spl_rdatum = v->spl_rattr[attno];
 		/* Do a generic split */
 		genericPickSplit(giststate, entryvec, sv, attno);
 		/* Clean up if we're in a secondary split */
 		if (sv->spl_ldatum_exists || sv->spl_rdatum_exists)
-			supportSecondarySplit(r, giststate, attno, sv, v->spl_lattr[attno], v->spl_rattr[attno]);
+			supportSecondarySplit(r, giststate, attno, sv,
 								  v->spl_lattr[attno], v->spl_rattr[attno]);
 	}
 	else
 	{
-		/* compatibility with old code */
+		/* hack for compatibility with old picksplit API */
 		if (sv->spl_left[sv->spl_nleft - 1] == InvalidOffsetNumber)
 			sv->spl_left[sv->spl_nleft - 1] = (OffsetNumber) (entryvec->n - 1);
 		if (sv->spl_right[sv->spl_nright - 1] == InvalidOffsetNumber)
 			sv->spl_right[sv->spl_nright - 1] = (OffsetNumber) (entryvec->n - 1);
 		/* Clean up if we're in a secondary split */
 		if (sv->spl_ldatum_exists || sv->spl_rdatum_exists)
 		{
 			elog(LOG, "picksplit method for column %d of index \"%s\" doesn't support secondary split",
 				 attno + 1, RelationGetRelationName(r));
-			supportSecondarySplit(r, giststate, attno, sv, v->spl_lattr[attno], v->spl_rattr[attno]);
+			supportSecondarySplit(r, giststate, attno, sv,
 								  v->spl_lattr[attno], v->spl_rattr[attno]);
 		}
 	}
 	/* emit union datums computed by PickSplit back to v arrays */
 	v->spl_lattr[attno] = sv->spl_ldatum;
 	v->spl_rattr[attno] = sv->spl_rdatum;
 	v->spl_lisnull[attno] = false;
 	v->spl_risnull[attno] = false;
 	/*
-	 * if index is multikey, then we must to try get smaller bounding box for
+	 * If index columns remain, then consider whether we can improve the split
-	 * subkey(s)
+	 * by using them.  Even if we can't, we must compute union keys for those
 	 * columns before we can return FALSE.
 	 */
-	v->spl_equiv = NULL;
+	v->spl_dontcare = NULL;
-	if (giststate->tupdesc->natts > 1 && attno + 1 != giststate->tupdesc->natts)
+	if (attno + 1 < giststate->tupdesc->natts)
 	{
 		int			NumDontCare;
 		if (gistKeyIsEQ(giststate, attno, sv->spl_ldatum, sv->spl_rdatum))
 		{
 			/*
-			 * Left and right key's unions are equial, so we can get better
+			 * Left and right union keys are equal, so we can get better split
-			 * split by following columns. Note, unions for attno columns are
+			 * by considering next column.
 			 * already done.
 			 */
 			return true;
 		}
 		else
 		{
 			int			LenEquiv;
 			v->spl_equiv = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1));
 			LenEquiv = gistfindgroup(r, giststate, entryvec->vector, v, attno);
 		/*
-			 * if possible, we should distribute equivalent tuples
+		 * Locate don't-care tuples, if any
 		 */
-			if (LenEquiv == 0)
+		v->spl_dontcare = (bool *) palloc0(sizeof(bool) * (entryvec->n + 1));
 		NumDontCare = findDontCares(r, giststate, entryvec->vector, v, attno);
 		if (NumDontCare == 0)
 		{
 			/*
 			 * There are no don't-cares, so just compute the union keys for
 			 * remaining columns and we're done.
 			 */
 			gistunionsubkey(giststate, itup, v);
 		}
 		else
 		{
-				cleanupOffsets(sv->spl_left, &sv->spl_nleft, v->spl_equiv, &LenEquiv);
+			/*
-				cleanupOffsets(sv->spl_right, &sv->spl_nright, v->spl_equiv, &LenEquiv);
+			 * Remove don't-cares from spl_left[] and spl_right[].  NOTE: this
 			 * could reduce NumDontCare to zero.
 			 */
 			removeDontCares(sv->spl_left, &sv->spl_nleft,
 							v->spl_dontcare, &NumDontCare);
 			removeDontCares(sv->spl_right, &sv->spl_nright,
 							v->spl_dontcare, &NumDontCare);
 			/*
 			 * Recompute union keys, considering only non-don't-care tuples.
 			 * NOTE: this will set union keys for remaining index columns,
 			 * which will cause later calls of gistUserPicksplit to pass those
 			 * values down to user-defined PickSplit methods with
 			 * spl_ldatum_exists/spl_rdatum_exists set true.
 			 */
 			gistunionsubkey(giststate, itup, v);
-				if (LenEquiv == 1)
+
 			if (NumDontCare == 1)
 			{
 				/*
-					 * In case with one tuple we just choose left-right by
+				 * If there's only one don't-care tuple then we can't do a
-					 * penalty. It's simplify user-defined pickSplit
+				 * PickSplit on it, so just choose whether to send it left or
 				 * right by comparing penalties.
 				 */
-					OffsetNumber toMove = InvalidOffsetNumber;
+				OffsetNumber toMove;
 				/* find it ... */
 				for (toMove = FirstOffsetNumber; toMove < entryvec->n; toMove++)
-						if (v->spl_equiv[toMove])
+				{
 					if (v->spl_dontcare[toMove])
 						break;
 				}
 				Assert(toMove < entryvec->n);
 				/* ... and assign it to cheaper side */
 				placeOne(r, giststate, v, itup[toMove - 1], toMove, attno + 1);
-					/*
+				/* recompute the union keys including this tuple */
-					 * redo gistunionsubkey(): it will not degradate
+				v->spl_dontcare = NULL;
 					 * performance, because it's very rarely
 					 */
 					v->spl_equiv = NULL;
 				gistunionsubkey(giststate, itup, v);
 					return false;
 			}
-				else if (LenEquiv > 1)
+			else if (NumDontCare > 1)
 				return true;
-			}
+			/* else NumDontCare is now zero; handle same as above */
 		}
 	}
@@ -486,7 +580,7 @@ gistUserPicksplit(Relation r, GistEntryVector *entryvec, int attno, GistSplitVec
 }
 /*
- * simple split page
+ * simply split page in half
 */
 static void
 gistSplitHalf(GIST_SPLITVEC *v, int len)
@@ -501,26 +595,53 @@ gistSplitHalf(GIST_SPLITVEC *v, int len)
 			v->spl_right[v->spl_nright++] = i;
 		else
 			v->spl_left[v->spl_nleft++] = i;
 	/* we need not compute union keys, caller took care of it */
 }
 /*
- * tries to split page by attno key, in case of null
+ * gistSplitByKey: main entry point for page-splitting algorithm
- * values move those to separate page.
+ *
 * r: index relation
 * page: page being split
 * itup: array of IndexTuples to be processed
 * len: number of IndexTuples to be processed (must be at least 2)
 * giststate: additional info about index
 * v: working state and output area
 * attno: column we are working on (zero-based index)
 *
 * Outside caller must initialize v->spl_lisnull and v->spl_risnull arrays
 * to all-TRUE.  On return, spl_left/spl_nleft contain indexes of tuples
 * to go left, spl_right/spl_nright contain indexes of tuples to go right,
 * spl_lattr/spl_lisnull contain left-side union key values, and
 * spl_rattr/spl_risnull contain right-side union key values.  Other fields
 * in this struct are workspace for this file.
 *
 * Outside caller must pass zero for attno.  The function may internally
 * recurse to the next column by passing attno+1.
 */
 void
-gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *giststate,
+gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len,
-			   GistSplitVector *v, GistEntryVector *entryvec, int attno)
+			   GISTSTATE *giststate, GistSplitVector *v, int attno)
 {
-	int			i;
+	GistEntryVector *entryvec;
-	static OffsetNumber offNullTuples[MaxOffsetNumber];
+	OffsetNumber *offNullTuples;
 	int			nOffNullTuples = 0;
 	int			i;
 	/* generate the item array, and identify tuples with null keys */
 	/* note that entryvec->vector[0] goes unused in this code */
 	entryvec = palloc(GEVHDRSZ + (len + 1) * sizeof(GISTENTRY));
 	entryvec->n = len + 1;
 	offNullTuples = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
 	for (i = 1; i <= len; i++)
 	{
 		Datum		datum;
 		bool		IsNull;
-		datum = index_getattr(itup[i - 1], attno + 1, giststate->tupdesc, &IsNull);
+		datum = index_getattr(itup[i - 1], attno + 1, giststate->tupdesc,
 							  &IsNull);
 		gistdentryinit(giststate, attno, &(entryvec->vector[i]),
 					   datum, r, page, i,
 					   FALSE, IsNull);
@@ -531,24 +652,24 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *gist
 	if (nOffNullTuples == len)
 	{
 		/*
-		 * Corner case: All keys in attno column are null, we should try to
+		 * Corner case: All keys in attno column are null, so just transfer
-		 * split by keys in next column. If all keys in all columns are NULL
+		 * our attention to the next column.  If there's no next column, just
-		 * just split page half by half
+		 * split page in half.
 		 */
 		v->spl_risnull[attno] = v->spl_lisnull[attno] = TRUE;
-		if (attno + 1 == r->rd_att->natts)
+		if (attno + 1 < r->rd_att->natts)
-			gistSplitHalf(&v->splitVector, len);
+			gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
 		else
-			gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno + 1);
+			gistSplitHalf(&v->splitVector, len);
 	}
 	else if (nOffNullTuples > 0)
 	{
 		int			j = 0;
 		/*
-		 * We don't want to mix NULLs and not-NULLs keys on one page, so move
+		 * We don't want to mix NULL and not-NULL keys on one page, so split
-		 * nulls to right page
+		 * nulls to right page and not-nulls to left.
 		 */
 		v->splitVector.spl_right = offNullTuples;
 		v->splitVector.spl_nright = nOffNullTuples;
@@ -562,62 +683,76 @@ gistSplitByKey(Relation r, Page page, IndexTuple *itup, int len, GISTSTATE *gist
 			else
 				v->splitVector.spl_left[v->splitVector.spl_nleft++] = i;
-		v->spl_equiv = NULL;
+		/* Must compute union keys for this and any following columns */
 		v->spl_dontcare = NULL;
 		gistunionsubkey(giststate, itup, v);
 	}
 	else
 	{
 		/*
-		 * all keys are not-null
+		 * all keys are not-null, so apply user-defined PickSplit method
 		 */
-		entryvec->n = len + 1;
+		if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate))
 		{
 			/*
 			 * Splitting on attno column is not optimal, so consider
 			 * redistributing don't-care tuples according to the next column
 			 */
 			Assert(attno + 1 < r->rd_att->natts);
-		if (gistUserPicksplit(r, entryvec, attno, v, itup, len, giststate) && attno + 1 != r->rd_att->natts)
+			if (v->spl_dontcare == NULL)
 			{
 				/*
-			 * Splitting on attno column is not optimized: there is a tuples
+				 * Simple case: left and right keys for attno column are
-			 * which can be freely left or right page, we will try to split
+				 * equal, so just split according to the next column.
 			 * page by following columns
 				 */
-			if (v->spl_equiv == NULL)
+				gistSplitByKey(r, page, itup, len, giststate, v, attno + 1);
 			{
 				/*
 				 * simple case: left and right keys for attno column are equal
 				 */
 				gistSplitByKey(r, page, itup, len, giststate, v, entryvec, attno + 1);
 			}
 			else
 			{
-				/* we should clean up vector from already distributed tuples */
+				/*
-				IndexTuple *newitup = (IndexTuple *) palloc((len + 1) * sizeof(IndexTuple));
+				 * Form an array of just the don't-care tuples to pass to a
-				OffsetNumber *map = (OffsetNumber *) palloc((len + 1) * sizeof(IndexTuple));
+				 * recursive invocation of this function for the next column.
 				 */
 				IndexTuple *newitup = (IndexTuple *) palloc(len * sizeof(IndexTuple));
 				OffsetNumber *map = (OffsetNumber *) palloc(len * sizeof(OffsetNumber));
 				int			newlen = 0;
-				GIST_SPLITVEC backupSplit = v->splitVector;
+				GIST_SPLITVEC backupSplit;
 				for (i = 0; i < len; i++)
 					if (v->spl_equiv[i + 1])
 				{
 					if (v->spl_dontcare[i + 1])
 					{
 						newitup[newlen] = itup[i];
 						map[newlen] = i + 1;
-						newitup[newlen++] = itup[i];
+						newlen++;
 					}
 				}
 				Assert(newlen > 0);
 				/*
 				 * Make a backup copy of v->splitVector, since the recursive
 				 * call will overwrite that with its own result.
 				 */
 				backupSplit = v->splitVector;
 				backupSplit.spl_left = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
 				memcpy(backupSplit.spl_left, v->splitVector.spl_left, sizeof(OffsetNumber) * v->splitVector.spl_nleft);
 				backupSplit.spl_right = (OffsetNumber *) palloc(sizeof(OffsetNumber) * len);
 				memcpy(backupSplit.spl_right, v->splitVector.spl_right, sizeof(OffsetNumber) * v->splitVector.spl_nright);
-				gistSplitByKey(r, page, newitup, newlen, giststate, v, entryvec, attno + 1);
+				/* Recursively decide how to split the don't-care tuples */
 				gistSplitByKey(r, page, newitup, newlen, giststate, v, attno + 1);
-				/* merge result of subsplit */
+				/* Merge result of subsplit with non-don't-care tuples */
 				for (i = 0; i < v->splitVector.spl_nleft; i++)
 					backupSplit.spl_left[backupSplit.spl_nleft++] = map[v->splitVector.spl_left[i] - 1];
 				for (i = 0; i < v->splitVector.spl_nright; i++)
 					backupSplit.spl_right[backupSplit.spl_nright++] = map[v->splitVector.spl_right[i] - 1];
 				v->splitVector = backupSplit;
-				/* reunion left and right datums */
+
 				/* recompute left and right union datums */
 				gistunionsubkey(giststate, itup, v);
 			}
 		}
--- a/src/include/access/gist.h
+++ b/src/include/access/gist.h
@@ -85,11 +85,30 @@ typedef GISTPageOpaqueData *GISTPageOpaque;
 /*
 * This is the Split Vector to be returned by the PickSplit method.
- * PickSplit should check spl_(r|l)datum_exists. If it is 'true',
+ * PickSplit should fill the indexes of tuples to go to the left side into
- * that corresponding spl_(r|l)datum already defined and
+ * spl_left[], and those to go to the right into spl_right[] (note the method
- * PickSplit should use that value. PickSplit should always set
+ * is responsible for palloc'ing both of these arrays!).  The tuple counts
- * spl_(r|l)datum_exists to false: GiST will check value to
+ * go into spl_nleft/spl_nright, and spl_ldatum/spl_rdatum must be set to
- * control supporting this feature by PickSplit...
+ * the union keys for each side.
 *
 * If spl_ldatum_exists and spl_rdatum_exists are true, then we are performing
 * a "secondary split" using a non-first index column.  In this case some
 * decisions have already been made about a page split, and the set of tuples
 * being passed to PickSplit is just the tuples about which we are undecided.
 * spl_ldatum/spl_rdatum then contain the union keys for the tuples already
 * chosen to go left or right.  Ideally the PickSplit method should take those
 * keys into account while deciding what to do with the remaining tuples, ie
 * it should try to "build out" from those unions so as to minimally expand
 * them.  If it does so, it should union the given tuples' keys into the
 * existing spl_ldatum/spl_rdatum values rather than just setting those values
 * from scratch, and then set spl_ldatum_exists/spl_rdatum_exists to false to
 * show it has done this.
 *
 * If the PickSplit method fails to clear spl_ldatum_exists/spl_rdatum_exists,
 * the core GiST code will make its own decision about how to merge the
 * secondary-split results with the previously-chosen tuples, and will then
 * recompute the union keys from scratch.  This is a workable though often not
 * optimal approach.
 */
 typedef struct GIST_SPLITVEC
 {
--- a/src/include/access/gist_private.h
+++ b/src/include/access/gist_private.h
@@ -254,20 +254,21 @@ typedef struct GISTInsertStack
 	struct GISTInsertStack *parent;
 } GISTInsertStack;
 /* Working state and results for multi-column split logic in gistsplit.c */
 typedef struct GistSplitVector
 {
-	GIST_SPLITVEC splitVector;	/* to/from PickSplit method */
+	GIST_SPLITVEC splitVector;	/* passed to/from user PickSplit method */
 	Datum		spl_lattr[INDEX_MAX_KEYS];		/* Union of subkeys in
-												 * spl_left */
+												 * splitVector.spl_left */
 	bool		spl_lisnull[INDEX_MAX_KEYS];
 	Datum		spl_rattr[INDEX_MAX_KEYS];		/* Union of subkeys in
-												 * spl_right */
+												 * splitVector.spl_right */
 	bool		spl_risnull[INDEX_MAX_KEYS];
-	bool	   *spl_equiv;		/* equivalent tuples which can be freely
+	bool	   *spl_dontcare;	/* flags tuples which could go to either side
-								 * distributed between left and right pages */
+								 * of the split for zero penalty */
 } GistSplitVector;
 typedef struct
@@ -526,7 +527,7 @@ extern Datum gistvacuumcleanup(PG_FUNCTION_ARGS);
 /* gistsplit.c */
 extern void gistSplitByKey(Relation r, Page page, IndexTuple *itup,
 			   int len, GISTSTATE *giststate,
-			   GistSplitVector *v, GistEntryVector *entryvec,
+			   GistSplitVector *v,
 			   int attno);
 /* gistbuild.c */