Revert "TupleHashTable: store additional data along with tuple."

This reverts commit e0ece2a981 due to performance regressions. Reported-by: David Rowley
2025-11-19 13:42:17 +03:00 · 2025-01-13 14:14:07 -08:00
parent af2317652d
commit b4a07f532b
6 changed files with 31 additions and 78 deletions
--- a/src/backend/executor/execGrouping.c
+++ b/src/backend/executor/execGrouping.c
@@ -20,13 +20,6 @@
 #include "miscadmin.h"
 #include "utils/lsyscache.h"

-struct TupleHashEntryData
-{
-	MinimalTuple firstTuple;	/* copy of first tuple in this group */
-	uint32		status;			/* hash status */
-	uint32		hash;			/* hash value (cached) */
-};
-
 static int	TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2);
 static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb,
 												 const MinimalTuple tuple);
@@ -203,7 +196,6 @@ BuildTupleHashTable(PlanState *parent,
 	hashtable->tab_collations = collations;
 	hashtable->tablecxt = tablecxt;
 	hashtable->tempcxt = tempcxt;
-	hashtable->additionalsize = additionalsize;
 	hashtable->tableslot = NULL;	/* will be made on first lookup */
 	hashtable->inputslot = NULL;
 	hashtable->in_hash_expr = NULL;
@@ -281,15 +273,6 @@ ResetTupleHashTable(TupleHashTable hashtable)
 	tuplehash_reset(hashtable->hashtab);
 }

-/*
- * Return size of the hash bucket. Useful for estimating memory usage.
- */
-size_t
-TupleHashEntrySize(void)
-{
-	return sizeof(TupleHashEntryData);
-}
-
 /*
 * Find or create a hashtable entry for the tuple group containing the
 * given tuple.  The tuple must be the same type as the hashtable entries.
@@ -356,24 +339,6 @@ TupleHashTableHash(TupleHashTable hashtable, TupleTableSlot *slot)
 	return hash;
 }

-MinimalTuple
-TupleHashEntryGetTuple(TupleHashEntry entry)
-{
-	return entry->firstTuple;
-}
-
-/*
- * Get a pointer into the additional space allocated for this entry. The
- * amount of space available is the additionalsize specified to
- * BuildTupleHashTable(). If additionalsize was specified as zero, no
- * additional space is available and this function should not be called.
- */
-void *
-TupleHashEntryGetAdditional(TupleHashEntry entry)
-{
-	return (char *) entry->firstTuple + MAXALIGN(entry->firstTuple->t_len);
-}
-
 /*
 * A variant of LookupTupleHashEntry for callers that have already computed
 * the hash value.
@@ -512,31 +477,13 @@ LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot,
 		}
 		else
 		{
-			MinimalTuple firstTuple;
-			size_t		totalsize;	/* including alignment and additionalsize */
-
 			/* created new entry */
 			*isnew = true;
 			/* zero caller data */
+			entry->additional = NULL;
 			MemoryContextSwitchTo(hashtable->tablecxt);
-
 			/* Copy the first tuple into the table context */
-			firstTuple = ExecCopySlotMinimalTuple(slot);
-
-			/*
-			 * Allocate additional space right after the MinimalTuple of size
-			 * additionalsize. The caller can get a pointer to this data with
-			 * TupleHashEntryGetAdditional(), and store arbitrary data there.
-			 *
-			 * This avoids the need to store an extra pointer or allocate an
-			 * additional chunk, which would waste memory.
-			 */
-			totalsize = MAXALIGN(firstTuple->t_len) + hashtable->additionalsize;
-			firstTuple = repalloc(firstTuple, totalsize);
-			memset((char *) firstTuple + firstTuple->t_len, 0,
-				   totalsize - firstTuple->t_len);
-
-			entry->firstTuple = firstTuple;
+			entry->firstTuple = ExecCopySlotMinimalTuple(slot);
 		}
 	}
 	else
--- a/src/backend/executor/nodeAgg.c
+++ b/src/backend/executor/nodeAgg.c
@@ -1713,7 +1713,7 @@ hash_agg_entry_size(int numTrans, Size tupleWidth, Size transitionSpace)
 		transitionChunkSize = 0;

 	return
-		TupleHashEntrySize() +
+		sizeof(TupleHashEntryData) +
 		tupleChunkSize +
 		pergroupChunkSize +
 		transitionChunkSize;
@@ -1954,7 +1954,7 @@ hash_agg_update_metrics(AggState *aggstate, bool from_tape, int npartitions)
 	if (aggstate->hash_ngroups_current > 0)
 	{
 		aggstate->hashentrysize =
-			TupleHashEntrySize() +
+			sizeof(TupleHashEntryData) +
 			(hashkey_mem / (double) aggstate->hash_ngroups_current);
 	}
 }
@@ -2055,7 +2055,11 @@ initialize_hash_entry(AggState *aggstate, TupleHashTable hashtable,
 	if (aggstate->numtrans == 0)
 		return;

-	pergroup = (AggStatePerGroup) TupleHashEntryGetAdditional(entry);
+	pergroup = (AggStatePerGroup)
+		MemoryContextAlloc(hashtable->tablecxt,
+						   sizeof(AggStatePerGroupData) * aggstate->numtrans);
+
+	entry->additional = pergroup;

 	/*
 	 * Initialize aggregates for new tuple group, lookup_hash_entries()
@@ -2119,7 +2123,7 @@ lookup_hash_entries(AggState *aggstate)
 		{
 			if (isnew)
 				initialize_hash_entry(aggstate, hashtable, entry);
-			pergroup[setno] = TupleHashEntryGetAdditional(entry);
+			pergroup[setno] = entry->additional;
 		}
 		else
 		{
@@ -2677,7 +2681,7 @@ agg_refill_hash_table(AggState *aggstate)
 		{
 			if (isnew)
 				initialize_hash_entry(aggstate, perhash->hashtable, entry);
-			aggstate->hash_pergroup[batch->setno] = TupleHashEntryGetAdditional(entry);
+			aggstate->hash_pergroup[batch->setno] = entry->additional;
 			advance_aggregates(aggstate);
 		}
 		else
@@ -2769,7 +2773,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
 	ExprContext *econtext;
 	AggStatePerAgg peragg;
 	AggStatePerGroup pergroup;
-	TupleHashEntry entry;
+	TupleHashEntryData *entry;
 	TupleTableSlot *firstSlot;
 	TupleTableSlot *result;
 	AggStatePerHash perhash;
@@ -2841,7 +2845,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
 		 * Transform representative tuple back into one with the right
 		 * columns.
 		 */
-		ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry), hashslot, false);
+		ExecStoreMinimalTuple(entry->firstTuple, hashslot, false);
 		slot_getallattrs(hashslot);

 		ExecClearTuple(firstSlot);
@@ -2857,7 +2861,7 @@ agg_retrieve_hash_table_in_memory(AggState *aggstate)
 		}
 		ExecStoreVirtualTuple(firstSlot);

-		pergroup = (AggStatePerGroup) TupleHashEntryGetAdditional(entry);
+		pergroup = (AggStatePerGroup) entry->additional;

 		/*
 		 * Use the representative input tuple for any references to
--- a/src/backend/executor/nodeSetOp.c
+++ b/src/backend/executor/nodeSetOp.c
@@ -425,7 +425,6 @@ setop_fill_hash_table(SetOpState *setopstate)
 	{
 		TupleTableSlot *outerslot;
 		TupleHashEntryData *entry;
-		SetOpStatePerGroup pergroup;
 		bool		isnew;

 		outerslot = ExecProcNode(outerPlan);
@@ -438,16 +437,16 @@ setop_fill_hash_table(SetOpState *setopstate)
 									 outerslot,
 									 &isnew, NULL);

-		pergroup = TupleHashEntryGetAdditional(entry);
 		/* If new tuple group, initialize counts to zero */
 		if (isnew)
 		{
-			pergroup->numLeft = 0;
-			pergroup->numRight = 0;
+			entry->additional = (SetOpStatePerGroup)
+				MemoryContextAllocZero(setopstate->hashtable->tablecxt,
+									   sizeof(SetOpStatePerGroupData));
 		}

 		/* Advance the counts */
-		pergroup->numLeft++;
+		((SetOpStatePerGroup) entry->additional)->numLeft++;

 		/* Must reset expression context after each hashtable lookup */
 		ResetExprContext(econtext);
@@ -479,7 +478,7 @@ setop_fill_hash_table(SetOpState *setopstate)

 			/* Advance the counts if entry is already present */
 			if (entry)
-				((SetOpStatePerGroup) TupleHashEntryGetAdditional(entry))->numRight++;
+				((SetOpStatePerGroup) entry->additional)->numRight++;

 			/* Must reset expression context after each hashtable lookup */
 			ResetExprContext(econtext);
@@ -497,7 +496,7 @@ setop_fill_hash_table(SetOpState *setopstate)
 static TupleTableSlot *
 setop_retrieve_hash_table(SetOpState *setopstate)
 {
-	TupleHashEntry entry;
+	TupleHashEntryData *entry;
 	TupleTableSlot *resultTupleSlot;

 	/*
@@ -527,12 +526,12 @@ setop_retrieve_hash_table(SetOpState *setopstate)
 		 * See if we should emit any copies of this tuple, and if so return
 		 * the first copy.
 		 */
-		set_output_count(setopstate, (SetOpStatePerGroup) TupleHashEntryGetAdditional(entry));
+		set_output_count(setopstate, (SetOpStatePerGroup) entry->additional);

 		if (setopstate->numOutput > 0)
 		{
 			setopstate->numOutput--;
-			return ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry),
+			return ExecStoreMinimalTuple(entry->firstTuple,
 										 resultTupleSlot,
 										 false);
 		}
--- a/src/backend/executor/nodeSubplan.c
+++ b/src/backend/executor/nodeSubplan.c
@@ -753,7 +753,7 @@ findPartialMatch(TupleHashTable hashtable, TupleTableSlot *slot,
 	{
 		CHECK_FOR_INTERRUPTS();

-		ExecStoreMinimalTuple(TupleHashEntryGetTuple(entry), hashtable->tableslot, false);
+		ExecStoreMinimalTuple(entry->firstTuple, hashtable->tableslot, false);
 		if (!execTuplesUnequal(slot, hashtable->tableslot,
 							   numCols, keyColIdx,
 							   eqfunctions,
--- a/src/include/executor/executor.h
+++ b/src/include/executor/executor.h
@@ -148,12 +148,9 @@ extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable,
 										   bool *isnew, uint32 *hash);
 extern uint32 TupleHashTableHash(TupleHashTable hashtable,
 								 TupleTableSlot *slot);
-extern size_t TupleHashEntrySize(void);
 extern TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable,
 											   TupleTableSlot *slot,
 											   bool *isnew, uint32 hash);
-extern MinimalTuple TupleHashEntryGetTuple(TupleHashEntry entry);
-extern void *TupleHashEntryGetAdditional(TupleHashEntry entry);
 extern TupleHashEntry FindTupleHashEntry(TupleHashTable hashtable,
 										 TupleTableSlot *slot,
 										 ExprState *eqcomp,
--- a/src/include/nodes/execnodes.h
+++ b/src/include/nodes/execnodes.h
@@ -806,10 +806,17 @@ typedef struct ExecAuxRowMark
 * point to tab_hash_expr and tab_eq_func respectively.
 * ----------------------------------------------------------------
 */
-typedef struct TupleHashEntryData TupleHashEntryData;
 typedef struct TupleHashEntryData *TupleHashEntry;
 typedef struct TupleHashTableData *TupleHashTable;

+typedef struct TupleHashEntryData
+{
+	MinimalTuple firstTuple;	/* copy of first tuple in this group */
+	void	   *additional;		/* user data */
+	uint32		status;			/* hash status */
+	uint32		hash;			/* hash value (cached) */
+} TupleHashEntryData;
+
 /* define parameters necessary to generate the tuple hash table interface */
 #define SH_PREFIX tuplehash
 #define SH_ELEMENT_TYPE TupleHashEntryData
@@ -828,7 +835,6 @@ typedef struct TupleHashTableData
 	Oid		   *tab_collations; /* collations for hash and comparison */
 	MemoryContext tablecxt;		/* memory context containing table */
 	MemoryContext tempcxt;		/* context for function evaluations */
-	Size		additionalsize; /* size of additional data */
 	TupleTableSlot *tableslot;	/* slot for referencing table entries */
 	/* The following fields are set transiently for each table search: */
 	TupleTableSlot *inputslot;	/* current input tuple's slot */