Use abbreviated keys for faster sorting of text datums.

This commit extends the SortSupport infrastructure to allow operator classes the option to provide abbreviated representations of Datums; in the case of text, we abbreviate by taking the first few characters of the strxfrm() blob. If the abbreviated comparison is insufficent to resolve the comparison, we fall back on the normal comparator. This can be much faster than the old way of doing sorting if the first few bytes of the string are usually sufficient to resolve the comparison. There is the potential for a performance regression if all of the strings to be sorted are identical for the first 8+ characters and differ only in later positions; therefore, the SortSupport machinery now provides an infrastructure to abort the use of abbreviation if it appears that abbreviation is producing comparatively few distinct keys. HyperLogLog, a streaming cardinality estimator, is included in this commit and used to make that determination for text. Peter Geoghegan, reviewed by me.
2025-11-10 17:42:29 +03:00 · 2015-01-19 15:20:31 -05:00
parent 1605291b6c
commit 4ea51cdfe8
13 changed files with 1149 additions and 76 deletions
--- a/src/backend/utils/sort/tuplesort.c
+++ b/src/backend/utils/sort/tuplesort.c
@@ -150,7 +150,10 @@ bool		optimize_bounded_sort = true;
 * When sorting single Datums, the data value is represented directly by
 * datum1/isnull1.  If the datatype is pass-by-reference and isnull1 is false,
 * then datum1 points to a separately palloc'd data value that is also pointed
- * to by the "tuple" pointer; otherwise "tuple" is NULL.
+ * to by the "tuple" pointer; otherwise "tuple" is NULL.  There is one special
+ * case:  when the sort support infrastructure provides an "abbreviated key"
+ * representation, where the key is (typically) a pass by value proxy for a
+ * pass by reference type.
 *
 * While building initial runs, tupindex holds the tuple's run number.  During
 * merge passes, we re-use it to hold the input tape number that each tuple in
@@ -346,6 +349,14 @@ struct Tuplesortstate
 	 */
 	SortSupport onlyKey;

+	/*
+	 * Additional state for managing "abbreviated key" sortsupport routines
+	 * (which currently may be used by all cases except the Datum sort case and
+	 * hash index case).  Tracks the intervals at which the optimization's
+	 * effectiveness is tested.
+	 */
+	int64		abbrevNext;		/* Tuple # at which to next check applicability */
+
 	/*
 	 * These variables are specific to the CLUSTER case; they are set by
 	 * tuplesort_begin_cluster.
@@ -442,6 +453,7 @@ struct Tuplesortstate

 static Tuplesortstate *tuplesort_begin_common(int workMem, bool randomAccess);
 static void puttuple_common(Tuplesortstate *state, SortTuple *tuple);
+static bool consider_abort_common(Tuplesortstate *state);
 static void inittapes(Tuplesortstate *state);
 static void selectnewtape(Tuplesortstate *state);
 static void mergeruns(Tuplesortstate *state);
@@ -619,6 +631,7 @@ tuplesort_begin_heap(TupleDesc tupDesc,
 	state->readtup = readtup_heap;

 	state->tupDesc = tupDesc;	/* assume we need not copy tupDesc */
+	state->abbrevNext = 10;

 	/* Prepare SortSupport data for each column */
 	state->sortKeys = (SortSupport) palloc0(nkeys * sizeof(SortSupportData));
@@ -634,11 +647,19 @@ tuplesort_begin_heap(TupleDesc tupDesc,
 		sortKey->ssup_collation = sortCollations[i];
 		sortKey->ssup_nulls_first = nullsFirstFlags[i];
 		sortKey->ssup_attno = attNums[i];
+		/* Convey if abbreviation optimization is applicable in principle */
+		sortKey->abbreviate = (i == 0);

 		PrepareSortSupportFromOrderingOp(sortOperators[i], sortKey);
 	}

-	if (nkeys == 1)
+	/*
+	 * The "onlyKey" optimization cannot be used with abbreviated keys, since
+	 * tie-breaker comparisons may be required.  Typically, the optimization is
+	 * only of value to pass-by-value types anyway, whereas abbreviated keys
+	 * are typically only of value to pass-by-reference types.
+	 */
+	if (nkeys == 1 && !state->sortKeys->abbrev_converter)
 		state->onlyKey = state->sortKeys;

 	MemoryContextSwitchTo(oldcontext);
@@ -680,6 +701,7 @@ tuplesort_begin_cluster(TupleDesc tupDesc,
 	state->copytup = copytup_cluster;
 	state->writetup = writetup_cluster;
 	state->readtup = readtup_cluster;
+	state->abbrevNext = 10;

 	state->indexInfo = BuildIndexInfo(indexRel);

@@ -719,6 +741,8 @@ tuplesort_begin_cluster(TupleDesc tupDesc,
 		sortKey->ssup_nulls_first =
 			(scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
 		sortKey->ssup_attno = scanKey->sk_attno;
+		/* Convey if abbreviation optimization is applicable in principle */
+		sortKey->abbreviate = (i == 0);

 		AssertState(sortKey->ssup_attno != 0);

@@ -768,6 +792,7 @@ tuplesort_begin_index_btree(Relation heapRel,
 	state->copytup = copytup_index;
 	state->writetup = writetup_index;
 	state->readtup = readtup_index;
+	state->abbrevNext = 10;

 	state->heapRel = heapRel;
 	state->indexRel = indexRel;
@@ -791,6 +816,8 @@ tuplesort_begin_index_btree(Relation heapRel,
 		sortKey->ssup_nulls_first =
 			(scanKey->sk_flags & SK_BT_NULLS_FIRST) != 0;
 		sortKey->ssup_attno = scanKey->sk_attno;
+		/* Convey if abbreviation optimization is applicable in principle */
+		sortKey->abbreviate = (i == 0);

 		AssertState(sortKey->ssup_attno != 0);

@@ -883,6 +910,13 @@ tuplesort_begin_datum(Oid datumType, Oid sortOperator, Oid sortCollation,
 	state->onlyKey->ssup_cxt = CurrentMemoryContext;
 	state->onlyKey->ssup_collation = sortCollation;
 	state->onlyKey->ssup_nulls_first = nullsFirstFlag;
+	/*
+	 * Conversion to abbreviated representation infeasible in the Datum case.
+	 * It must be possible to subsequently fetch original datum values within
+	 * tuplesort_getdatum(), which would require special-case preservation of
+	 * original values.
+	 */
+	state->onlyKey->abbreviate = false;

 	PrepareSortSupportFromOrderingOp(sortOperator, state->onlyKey);

@@ -928,6 +962,19 @@ tuplesort_set_bound(Tuplesortstate *state, int64 bound)

 	state->bounded = true;
 	state->bound = (int) bound;
+
+	/*
+	 * Bounded sorts are not an effective target for abbreviated key
+	 * optimization.  Disable by setting state to be consistent with no
+	 * abbreviation support.
+	 */
+	state->sortKeys->abbrev_converter = NULL;
+	if (state->sortKeys->abbrev_full_comparator)
+		state->sortKeys->comparator = state->sortKeys->abbrev_full_comparator;
+
+	/* Not strictly necessary, but be tidy */
+	state->sortKeys->abbrev_abort = NULL;
+	state->sortKeys->abbrev_full_comparator = NULL;
 }

 /*
@@ -1186,15 +1233,63 @@ tuplesort_putindextuplevalues(Tuplesortstate *state, Relation rel,
 {
 	MemoryContext oldcontext = MemoryContextSwitchTo(state->sortcontext);
 	SortTuple	stup;
+	Datum		original;
+	IndexTuple	tuple;

 	stup.tuple = index_form_tuple(RelationGetDescr(rel), values, isnull);
-	((IndexTuple) stup.tuple)->t_tid = *self;
+	tuple = ((IndexTuple) stup.tuple);
+	tuple->t_tid = *self;
 	USEMEM(state, GetMemoryChunkSpace(stup.tuple));
 	/* set up first-column key value */
-	stup.datum1 = index_getattr((IndexTuple) stup.tuple,
-								1,
-								RelationGetDescr(state->indexRel),
-								&stup.isnull1);
+	original = index_getattr(tuple,
+							 1,
+							 RelationGetDescr(state->indexRel),
+							 &stup.isnull1);
+
+	if (!state->sortKeys->abbrev_converter || stup.isnull1)
+	{
+		/*
+		 * Store ordinary Datum representation, or NULL value.  If there is a
+		 * converter it won't expect NULL values, and cost model is not
+		 * required to account for NULL, so in that case we avoid calling
+		 * converter and just set datum1 to "void" representation (to be
+		 * consistent).
+		 */
+		stup.datum1 = original;
+	}
+	else if (!consider_abort_common(state))
+	{
+		/* Store abbreviated key representation */
+		stup.datum1 = state->sortKeys->abbrev_converter(original,
+														state->sortKeys);
+	}
+	else
+	{
+		/* Abort abbreviation */
+		int		i;
+
+		stup.datum1 = original;
+
+		/*
+		 * Set state to be consistent with never trying abbreviation.
+		 *
+		 * Alter datum1 representation in already-copied tuples, so as to
+		 * ensure a consistent representation (current tuple was just handled).
+		 * Note that we rely on all tuples copied so far actually being
+		 * contained within memtuples array.
+		 */
+		for (i = 0; i < state->memtupcount; i++)
+		{
+			SortTuple *mtup = &state->memtuples[i];
+
+			tuple = mtup->tuple;
+			mtup->datum1 = index_getattr(tuple,
+										 1,
+										 RelationGetDescr(state->indexRel),
+										 &stup.isnull1);
+		}
+	}
+
 	puttuple_common(state, &stup);

 	MemoryContextSwitchTo(oldcontext);
@@ -1359,6 +1454,47 @@ puttuple_common(Tuplesortstate *state, SortTuple *tuple)
 	}
 }

+static bool
+consider_abort_common(Tuplesortstate *state)
+{
+	Assert(state->sortKeys[0].abbrev_converter != NULL);
+	Assert(state->sortKeys[0].abbrev_abort != NULL);
+	Assert(state->sortKeys[0].abbrev_full_comparator != NULL);
+
+	/*
+	 * Check effectiveness of abbreviation optimization.  Consider aborting
+	 * when still within memory limit.
+	 */
+	if (state->status == TSS_INITIAL &&
+		state->memtupcount >= state->abbrevNext)
+	{
+		state->abbrevNext *= 2;
+
+		/*
+		 * Check opclass-supplied abbreviation abort routine.  It may
+		 * indicate that abbreviation should not proceed.
+		 */
+		if (!state->sortKeys->abbrev_abort(state->memtupcount,
+										   state->sortKeys))
+			return false;
+
+		/*
+		 * Finally, restore authoritative comparator, and indicate that
+		 * abbreviation is not in play by setting abbrev_converter to NULL
+		 */
+		state->sortKeys[0].comparator = state->sortKeys[0].abbrev_full_comparator;
+		state->sortKeys[0].abbrev_converter = NULL;
+		/* Not strictly necessary, but be tidy */
+		state->sortKeys[0].abbrev_abort = NULL;
+		state->sortKeys[0].abbrev_full_comparator = NULL;
+
+		/* Give up - expect original pass-by-value representation */
+		return true;
+	}
+
+	return false;
+}
+
 /*
 * All tuples have been provided; finish the sort.
 */
@@ -2853,6 +2989,12 @@ comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
 	TupleDesc	tupDesc;
 	int			nkey;
 	int32		compare;
+	AttrNumber	attno;
+	Datum		datum1,
+				datum2;
+	bool		isnull1,
+				isnull2;
+

 	/* Compare the leading sort key */
 	compare = ApplySortComparator(a->datum1, a->isnull1,
@@ -2867,14 +3009,25 @@ comparetup_heap(const SortTuple *a, const SortTuple *b, Tuplesortstate *state)
 	rtup.t_len = ((MinimalTuple) b->tuple)->t_len + MINIMAL_TUPLE_OFFSET;
 	rtup.t_data = (HeapTupleHeader) ((char *) b->tuple - MINIMAL_TUPLE_OFFSET);
 	tupDesc = state->tupDesc;
+
+	if (sortKey->abbrev_converter)
+	{
+		attno = sortKey->ssup_attno;
+
+		datum1 = heap_getattr(&ltup, attno, tupDesc, &isnull1);
+		datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2);
+
+		compare = ApplySortAbbrevFullComparator(datum1, isnull1,
+												datum2, isnull2,
+												sortKey);
+		if (compare != 0)
+			return compare;
+	}
+
 	sortKey++;
 	for (nkey = 1; nkey < state->nKeys; nkey++, sortKey++)
 	{
-		AttrNumber	attno = sortKey->ssup_attno;
-		Datum		datum1,
-					datum2;
-		bool		isnull1,
-					isnull2;
+		attno = sortKey->ssup_attno;

 		datum1 = heap_getattr(&ltup, attno, tupDesc, &isnull1);
 		datum2 = heap_getattr(&rtup, attno, tupDesc, &isnull2);
@@ -2897,6 +3050,7 @@ copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup)
 	 * MinimalTuple using the exported interface for that.
 	 */
 	TupleTableSlot *slot = (TupleTableSlot *) tup;
+	Datum			original;
 	MinimalTuple tuple;
 	HeapTupleData htup;

@@ -2907,10 +3061,58 @@ copytup_heap(Tuplesortstate *state, SortTuple *stup, void *tup)
 	/* set up first-column key value */
 	htup.t_len = tuple->t_len + MINIMAL_TUPLE_OFFSET;
 	htup.t_data = (HeapTupleHeader) ((char *) tuple - MINIMAL_TUPLE_OFFSET);
-	stup->datum1 = heap_getattr(&htup,
-								state->sortKeys[0].ssup_attno,
-								state->tupDesc,
-								&stup->isnull1);
+	original = heap_getattr(&htup,
+							state->sortKeys[0].ssup_attno,
+							state->tupDesc,
+							&stup->isnull1);
+
+	if (!state->sortKeys->abbrev_converter || stup->isnull1)
+	{
+		/*
+		 * Store ordinary Datum representation, or NULL value.  If there is a
+		 * converter it won't expect NULL values, and cost model is not
+		 * required to account for NULL, so in that case we avoid calling
+		 * converter and just set datum1 to "void" representation (to be
+		 * consistent).
+		 */
+		stup->datum1 = original;
+	}
+	else if (!consider_abort_common(state))
+	{
+		/* Store abbreviated key representation */
+		stup->datum1 = state->sortKeys->abbrev_converter(original,
+														 state->sortKeys);
+	}
+	else
+	{
+		/* Abort abbreviation */
+		int		i;
+
+		stup->datum1 = original;
+
+		/*
+		 * Set state to be consistent with never trying abbreviation.
+		 *
+		 * Alter datum1 representation in already-copied tuples, so as to
+		 * ensure a consistent representation (current tuple was just handled).
+		 * Note that we rely on all tuples copied so far actually being
+		 * contained within memtuples array.
+		 */
+		for (i = 0; i < state->memtupcount; i++)
+		{
+			SortTuple *mtup = &state->memtuples[i];
+
+			htup.t_len = ((MinimalTuple) mtup->tuple)->t_len +
+							MINIMAL_TUPLE_OFFSET;
+			htup.t_data = (HeapTupleHeader) ((char *) mtup->tuple -
+							MINIMAL_TUPLE_OFFSET);
+
+			mtup->datum1 = heap_getattr(&htup,
+										state->sortKeys[0].ssup_attno,
+										state->tupDesc,
+										&mtup->isnull1);
+		}
+	}
 }

 static void
@@ -2980,13 +3182,35 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b,
 	TupleDesc	tupDesc;
 	int			nkey;
 	int32		compare;
+	Datum		datum1,
+				datum2;
+	bool		isnull1,
+				isnull2;
+	AttrNumber	leading = state->indexInfo->ii_KeyAttrNumbers[0];
+
+	/* Be prepared to compare additional sort keys */
+	ltup = (HeapTuple) a->tuple;
+	rtup = (HeapTuple) b->tuple;
+	tupDesc = state->tupDesc;

 	/* Compare the leading sort key, if it's simple */
-	if (state->indexInfo->ii_KeyAttrNumbers[0] != 0)
+	if (leading != 0)
 	{
 		compare = ApplySortComparator(a->datum1, a->isnull1,
 									  b->datum1, b->isnull1,
 									  sortKey);
+		if (compare != 0)
+			return compare;
+
+		if (sortKey->abbrev_converter)
+		{
+			datum1 = heap_getattr(ltup, leading, tupDesc, &isnull1);
+			datum2 = heap_getattr(rtup, leading, tupDesc, &isnull2);
+
+			compare = ApplySortAbbrevFullComparator(datum1, isnull1,
+													datum2, isnull2,
+													sortKey);
+		}
 		if (compare != 0 || state->nKeys == 1)
 			return compare;
 		/* Compare additional columns the hard way */
@@ -2999,22 +3223,13 @@ comparetup_cluster(const SortTuple *a, const SortTuple *b,
 		nkey = 0;
 	}

-	/* Compare additional sort keys */
-	ltup = (HeapTuple) a->tuple;
-	rtup = (HeapTuple) b->tuple;
-
 	if (state->indexInfo->ii_Expressions == NULL)
 	{
 		/* If not expression index, just compare the proper heap attrs */
-		tupDesc = state->tupDesc;

 		for (; nkey < state->nKeys; nkey++, sortKey++)
 		{
 			AttrNumber	attno = state->indexInfo->ii_KeyAttrNumbers[nkey];
-			Datum		datum1,
-						datum2;
-			bool		isnull1,
-						isnull2;

 			datum1 = heap_getattr(ltup, attno, tupDesc, &isnull1);
 			datum2 = heap_getattr(rtup, attno, tupDesc, &isnull2);
@@ -3072,17 +3287,67 @@ static void
 copytup_cluster(Tuplesortstate *state, SortTuple *stup, void *tup)
 {
 	HeapTuple	tuple = (HeapTuple) tup;
+	Datum		original;

 	/* copy the tuple into sort storage */
 	tuple = heap_copytuple(tuple);
 	stup->tuple = (void *) tuple;
 	USEMEM(state, GetMemoryChunkSpace(tuple));
-	/* set up first-column key value, if it's a simple column */
-	if (state->indexInfo->ii_KeyAttrNumbers[0] != 0)
-		stup->datum1 = heap_getattr(tuple,
-									state->indexInfo->ii_KeyAttrNumbers[0],
-									state->tupDesc,
-									&stup->isnull1);
+	/*
+	 * set up first-column key value, and potentially abbreviate, if it's a
+	 * simple column
+	 */
+	if (state->indexInfo->ii_KeyAttrNumbers[0] == 0)
+		return;
+
+	original = heap_getattr(tuple,
+							state->indexInfo->ii_KeyAttrNumbers[0],
+							state->tupDesc,
+							&stup->isnull1);
+
+	if (!state->sortKeys->abbrev_converter || stup->isnull1)
+	{
+		/*
+		 * Store ordinary Datum representation, or NULL value.  If there is a
+		 * converter it won't expect NULL values, and cost model is not
+		 * required to account for NULL, so in that case we avoid calling
+		 * converter and just set datum1 to "void" representation (to be
+		 * consistent).
+		 */
+		stup->datum1 = original;
+	}
+	else if (!consider_abort_common(state))
+	{
+		/* Store abbreviated key representation */
+		stup->datum1 = state->sortKeys->abbrev_converter(original,
+														 state->sortKeys);
+	}
+	else
+	{
+		/* Abort abbreviation */
+		int		i;
+
+		stup->datum1 = original;
+
+		/*
+		 * Set state to be consistent with never trying abbreviation.
+		 *
+		 * Alter datum1 representation in already-copied tuples, so as to
+		 * ensure a consistent representation (current tuple was just handled).
+		 * Note that we rely on all tuples copied so far actually being
+		 * contained within memtuples array.
+		 */
+		for (i = 0; i < state->memtupcount; i++)
+		{
+			SortTuple *mtup = &state->memtuples[i];
+
+			tuple = (HeapTuple) mtup->tuple;
+			mtup->datum1 = heap_getattr(tuple,
+										state->indexInfo->ii_KeyAttrNumbers[0],
+										state->tupDesc,
+										&stup->isnull1);
+		}
+	}
 }

 static void
@@ -3162,6 +3427,11 @@ comparetup_index_btree(const SortTuple *a, const SortTuple *b,
 	bool		equal_hasnull = false;
 	int			nkey;
 	int32		compare;
+	Datum		datum1,
+				datum2;
+	bool		isnull1,
+				isnull2;
+

 	/* Compare the leading sort key */
 	compare = ApplySortComparator(a->datum1, a->isnull1,
@@ -3170,23 +3440,31 @@ comparetup_index_btree(const SortTuple *a, const SortTuple *b,
 	if (compare != 0)
 		return compare;

-	/* they are equal, so we only need to examine one null flag */
-	if (a->isnull1)
-		equal_hasnull = true;
-
 	/* Compare additional sort keys */
 	tuple1 = (IndexTuple) a->tuple;
 	tuple2 = (IndexTuple) b->tuple;
 	keysz = state->nKeys;
 	tupDes = RelationGetDescr(state->indexRel);
+
+	if (sortKey->abbrev_converter)
+	{
+		datum1 = index_getattr(tuple1, 1, tupDes, &isnull1);
+		datum2 = index_getattr(tuple2, 1, tupDes, &isnull2);
+
+		compare = ApplySortAbbrevFullComparator(datum1, isnull1,
+												datum2, isnull2,
+												sortKey);
+		if (compare != 0)
+			return compare;
+	}
+
+	/* they are equal, so we only need to examine one null flag */
+	if (a->isnull1)
+		equal_hasnull = true;
+
 	sortKey++;
 	for (nkey = 2; nkey <= keysz; nkey++, sortKey++)
 	{
-		Datum		datum1,
-					datum2;
-		bool		isnull1,
-					isnull2;
-
 		datum1 = index_getattr(tuple1, nkey, tupDes, &isnull1);
 		datum2 = index_getattr(tuple2, nkey, tupDes, &isnull2);

@@ -3313,6 +3591,7 @@ copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup)
 	IndexTuple	tuple = (IndexTuple) tup;
 	unsigned int tuplen = IndexTupleSize(tuple);
 	IndexTuple	newtuple;
+	Datum		original;

 	/* copy the tuple into sort storage */
 	newtuple = (IndexTuple) palloc(tuplen);
@@ -3320,10 +3599,54 @@ copytup_index(Tuplesortstate *state, SortTuple *stup, void *tup)
 	USEMEM(state, GetMemoryChunkSpace(newtuple));
 	stup->tuple = (void *) newtuple;
 	/* set up first-column key value */
-	stup->datum1 = index_getattr(newtuple,
-								 1,
-								 RelationGetDescr(state->indexRel),
-								 &stup->isnull1);
+	original = index_getattr(newtuple,
+							 1,
+							 RelationGetDescr(state->indexRel),
+							 &stup->isnull1);
+
+	if (!state->sortKeys->abbrev_converter || stup->isnull1)
+	{
+		/*
+		 * Store ordinary Datum representation, or NULL value.  If there is a
+		 * converter it won't expect NULL values, and cost model is not
+		 * required to account for NULL, so in that case we avoid calling
+		 * converter and just set datum1 to "void" representation (to be
+		 * consistent).
+		 */
+		stup->datum1 = original;
+	}
+	else if (!consider_abort_common(state))
+	{
+		/* Store abbreviated key representation */
+		stup->datum1 = state->sortKeys->abbrev_converter(original,
+														 state->sortKeys);
+	}
+	else
+	{
+		/* Abort abbreviation */
+		int		i;
+
+		stup->datum1 = original;
+
+		/*
+		 * Set state to be consistent with never trying abbreviation.
+		 *
+		 * Alter datum1 representation in already-copied tuples, so as to
+		 * ensure a consistent representation (current tuple was just handled).
+		 * Note that we rely on all tuples copied so far actually being
+		 * contained within memtuples array.
+		 */
+		for (i = 0; i < state->memtupcount; i++)
+		{
+			SortTuple *mtup = &state->memtuples[i];
+
+			tuple = (IndexTuple) mtup->tuple;
+			mtup->datum1 = index_getattr(tuple,
+										 1,
+										 RelationGetDescr(state->indexRel),
+										 &stup->isnull1);
+		}
+	}
 }

 static void