mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Fix LookupTupleHashEntryHash() pipeline-stall issue.
Refactor hash lookups in nodeAgg.c to improve performance. Author: Andres Freund and Jeff Davis Discussion: https://postgr.es/m/20200612213715.op4ye4q7gktqvpuo%40alap3.anarazel.de Backpatch-through: 13
This commit is contained in:
		| @@ -22,9 +22,9 @@ | ||||
| #include "utils/memutils.h" | ||||
|  | ||||
| static int	TupleHashTableMatch(struct tuplehash_hash *tb, const MinimalTuple tuple1, const MinimalTuple tuple2); | ||||
| static uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb, | ||||
| static inline uint32 TupleHashTableHash_internal(struct tuplehash_hash *tb, | ||||
| 												 const MinimalTuple tuple); | ||||
| static TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable, | ||||
| static inline TupleHashEntry LookupTupleHashEntry_internal(TupleHashTable hashtable, | ||||
| 														   TupleTableSlot *slot, | ||||
| 														   bool *isnew, uint32 hash); | ||||
|  | ||||
| @@ -291,6 +291,9 @@ ResetTupleHashTable(TupleHashTable hashtable) | ||||
|  * If isnew is NULL, we do not create new entries; we return NULL if no | ||||
|  * match is found. | ||||
|  * | ||||
|  * If hash is not NULL, we set it to the calculated hash value. This allows | ||||
|  * callers access to the hash value even if no entry is returned. | ||||
|  * | ||||
|  * If isnew isn't NULL, then a new entry is created if no existing entry | ||||
|  * matches.  On return, *isnew is true if the entry is newly created, | ||||
|  * false if it existed already.  ->additional_data in the new entry has | ||||
| @@ -298,11 +301,11 @@ ResetTupleHashTable(TupleHashTable hashtable) | ||||
|  */ | ||||
| TupleHashEntry | ||||
| LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, | ||||
| 					 bool *isnew) | ||||
| 					 bool *isnew, uint32 *hash) | ||||
| { | ||||
| 	TupleHashEntry entry; | ||||
| 	MemoryContext oldContext; | ||||
| 	uint32		hash; | ||||
| 	uint32		local_hash; | ||||
|  | ||||
| 	/* Need to run the hash functions in short-lived context */ | ||||
| 	oldContext = MemoryContextSwitchTo(hashtable->tempcxt); | ||||
| @@ -312,8 +315,13 @@ LookupTupleHashEntry(TupleHashTable hashtable, TupleTableSlot *slot, | ||||
| 	hashtable->in_hash_funcs = hashtable->tab_hash_funcs; | ||||
| 	hashtable->cur_eq_func = hashtable->tab_eq_func; | ||||
|  | ||||
| 	hash = TupleHashTableHash_internal(hashtable->hashtab, NULL); | ||||
| 	entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash); | ||||
| 	local_hash = TupleHashTableHash_internal(hashtable->hashtab, NULL); | ||||
| 	entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, local_hash); | ||||
|  | ||||
| 	if (hash != NULL) | ||||
| 		*hash = local_hash; | ||||
|  | ||||
| 	Assert(entry == NULL || entry->hash == local_hash); | ||||
|  | ||||
| 	MemoryContextSwitchTo(oldContext); | ||||
|  | ||||
| @@ -362,6 +370,7 @@ LookupTupleHashEntryHash(TupleHashTable hashtable, TupleTableSlot *slot, | ||||
| 	hashtable->cur_eq_func = hashtable->tab_eq_func; | ||||
|  | ||||
| 	entry = LookupTupleHashEntry_internal(hashtable, slot, isnew, hash); | ||||
| 	Assert(entry == NULL || entry->hash == hash); | ||||
|  | ||||
| 	MemoryContextSwitchTo(oldContext); | ||||
|  | ||||
| @@ -480,7 +489,7 @@ TupleHashTableHash_internal(struct tuplehash_hash *tb, | ||||
|  * NB: This function may or may not change the memory context. Caller is | ||||
|  * expected to change it back. | ||||
|  */ | ||||
| static TupleHashEntry | ||||
| static inline TupleHashEntry | ||||
| LookupTupleHashEntry_internal(TupleHashTable hashtable, TupleTableSlot *slot, | ||||
| 							  bool *isnew, uint32 hash) | ||||
| { | ||||
|   | ||||
| @@ -391,7 +391,9 @@ static void finalize_partialaggregate(AggState *aggstate, | ||||
| 									  AggStatePerAgg peragg, | ||||
| 									  AggStatePerGroup pergroupstate, | ||||
| 									  Datum *resultVal, bool *resultIsNull); | ||||
| static void prepare_hash_slot(AggState *aggstate); | ||||
| static inline void prepare_hash_slot(AggStatePerHash perhash, | ||||
| 									 TupleTableSlot *inputslot, | ||||
| 									 TupleTableSlot *hashslot); | ||||
| static void prepare_projection_slot(AggState *aggstate, | ||||
| 									TupleTableSlot *slot, | ||||
| 									int currentSet); | ||||
| @@ -413,8 +415,9 @@ static int	hash_choose_num_partitions(uint64 input_groups, | ||||
| 									   double hashentrysize, | ||||
| 									   int used_bits, | ||||
| 									   int *log2_npartittions); | ||||
| static AggStatePerGroup lookup_hash_entry(AggState *aggstate, uint32 hash, | ||||
| 										  bool *in_hash_table); | ||||
| static void initialize_hash_entry(AggState *aggstate, | ||||
| 								  TupleHashTable hashtable, | ||||
| 								  TupleHashEntry entry); | ||||
| static void lookup_hash_entries(AggState *aggstate); | ||||
| static TupleTableSlot *agg_retrieve_direct(AggState *aggstate); | ||||
| static void agg_fill_hash_table(AggState *aggstate); | ||||
| @@ -1207,12 +1210,11 @@ finalize_partialaggregate(AggState *aggstate, | ||||
|  * Extract the attributes that make up the grouping key into the | ||||
|  * hashslot. This is necessary to compute the hash or perform a lookup. | ||||
|  */ | ||||
| static void | ||||
| prepare_hash_slot(AggState *aggstate) | ||||
| static inline void | ||||
| prepare_hash_slot(AggStatePerHash perhash, | ||||
| 				  TupleTableSlot *inputslot, | ||||
| 				  TupleTableSlot *hashslot) | ||||
| { | ||||
| 	TupleTableSlot *inputslot = aggstate->tmpcontext->ecxt_outertuple; | ||||
| 	AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set]; | ||||
| 	TupleTableSlot *hashslot = perhash->hashslot; | ||||
| 	int			i; | ||||
|  | ||||
| 	/* transfer just the needed columns into hashslot */ | ||||
| @@ -2013,44 +2015,11 @@ hash_choose_num_partitions(uint64 input_groups, double hashentrysize, | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * Find or create a hashtable entry for the tuple group containing the current | ||||
|  * tuple (already set in tmpcontext's outertuple slot), in the current grouping | ||||
|  * set (which the caller must have selected - note that initialize_aggregate | ||||
|  * depends on this). | ||||
|  * | ||||
|  * When called, CurrentMemoryContext should be the per-query context. The | ||||
|  * already-calculated hash value for the tuple must be specified. | ||||
|  * | ||||
|  * If in "spill mode", then only find existing hashtable entries; don't create | ||||
|  * new ones. If a tuple's group is not already present in the hash table for | ||||
|  * the current grouping set, assign *in_hash_table=false and the caller will | ||||
|  * spill it to disk. | ||||
|  * Initialize a freshly-created TupleHashEntry. | ||||
|  */ | ||||
| static AggStatePerGroup | ||||
| lookup_hash_entry(AggState *aggstate, uint32 hash, bool *in_hash_table) | ||||
| { | ||||
| 	AggStatePerHash perhash = &aggstate->perhash[aggstate->current_set]; | ||||
| 	TupleTableSlot *hashslot = perhash->hashslot; | ||||
| 	TupleHashEntryData *entry; | ||||
| 	bool		isnew = false; | ||||
| 	bool	   *p_isnew; | ||||
|  | ||||
| 	/* if hash table already spilled, don't create new entries */ | ||||
| 	p_isnew = aggstate->hash_spill_mode ? NULL : &isnew; | ||||
|  | ||||
| 	/* find or create the hashtable entry using the filtered tuple */ | ||||
| 	entry = LookupTupleHashEntryHash(perhash->hashtable, hashslot, p_isnew, | ||||
| 									 hash); | ||||
|  | ||||
| 	if (entry == NULL) | ||||
| 	{ | ||||
| 		*in_hash_table = false; | ||||
| 		return NULL; | ||||
| 	} | ||||
| 	else | ||||
| 		*in_hash_table = true; | ||||
|  | ||||
| 	if (isnew) | ||||
| static void | ||||
| initialize_hash_entry(AggState *aggstate, TupleHashTable hashtable, | ||||
| 					  TupleHashEntry entry) | ||||
| { | ||||
| 	AggStatePerGroup pergroup; | ||||
| 	int			transno; | ||||
| @@ -2060,10 +2029,10 @@ lookup_hash_entry(AggState *aggstate, uint32 hash, bool *in_hash_table) | ||||
|  | ||||
| 	/* no need to allocate or initialize per-group state */ | ||||
| 	if (aggstate->numtrans == 0) | ||||
| 			return NULL; | ||||
| 		return; | ||||
|  | ||||
| 	pergroup = (AggStatePerGroup) | ||||
| 			MemoryContextAlloc(perhash->hashtable->tablecxt, | ||||
| 		MemoryContextAlloc(hashtable->tablecxt, | ||||
| 						   sizeof(AggStatePerGroupData) * aggstate->numtrans); | ||||
|  | ||||
| 	entry->additional = pergroup; | ||||
| @@ -2081,9 +2050,6 @@ lookup_hash_entry(AggState *aggstate, uint32 hash, bool *in_hash_table) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| 	return entry->additional; | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * Look up hash entries for the current tuple in all hashed grouping sets, | ||||
|  * returning an array of pergroup pointers suitable for advance_aggregates. | ||||
| @@ -2106,21 +2072,37 @@ static void | ||||
| lookup_hash_entries(AggState *aggstate) | ||||
| { | ||||
| 	AggStatePerGroup *pergroup = aggstate->hash_pergroup; | ||||
| 	TupleTableSlot *outerslot = aggstate->tmpcontext->ecxt_outertuple; | ||||
| 	int			setno; | ||||
|  | ||||
| 	for (setno = 0; setno < aggstate->num_hashes; setno++) | ||||
| 	{ | ||||
| 		AggStatePerHash perhash = &aggstate->perhash[setno]; | ||||
| 		TupleHashTable hashtable = perhash->hashtable; | ||||
| 		TupleTableSlot *hashslot = perhash->hashslot; | ||||
| 		TupleHashEntry entry; | ||||
| 		uint32		hash; | ||||
| 		bool		in_hash_table; | ||||
| 		bool		isnew = false; | ||||
| 		bool	   *p_isnew; | ||||
|  | ||||
| 		/* if hash table already spilled, don't create new entries */ | ||||
| 		p_isnew = aggstate->hash_spill_mode ? NULL : &isnew; | ||||
|  | ||||
| 		select_current_set(aggstate, setno, true); | ||||
| 		prepare_hash_slot(aggstate); | ||||
| 		hash = TupleHashTableHash(perhash->hashtable, perhash->hashslot); | ||||
| 		pergroup[setno] = lookup_hash_entry(aggstate, hash, &in_hash_table); | ||||
| 		prepare_hash_slot(perhash, | ||||
| 						  outerslot, | ||||
| 						  hashslot); | ||||
|  | ||||
| 		/* check to see if we need to spill the tuple for this grouping set */ | ||||
| 		if (!in_hash_table) | ||||
| 		entry = LookupTupleHashEntry(hashtable, hashslot, | ||||
| 									 p_isnew, &hash); | ||||
|  | ||||
| 		if (entry != NULL) | ||||
| 		{ | ||||
| 			if (isnew) | ||||
| 				initialize_hash_entry(aggstate, hashtable, entry); | ||||
| 			pergroup[setno] = entry->additional; | ||||
| 		} | ||||
| 		else | ||||
| 		{ | ||||
| 			HashAggSpill *spill = &aggstate->hash_spills[setno]; | ||||
| 			TupleTableSlot *slot = aggstate->tmpcontext->ecxt_outertuple; | ||||
| @@ -2131,6 +2113,7 @@ lookup_hash_entries(AggState *aggstate) | ||||
| 								   aggstate->hashentrysize); | ||||
|  | ||||
| 			hashagg_spill_tuple(aggstate, spill, slot, hash); | ||||
| 			pergroup[setno] = NULL; | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| @@ -2588,6 +2571,7 @@ static bool | ||||
| agg_refill_hash_table(AggState *aggstate) | ||||
| { | ||||
| 	HashAggBatch *batch; | ||||
| 	AggStatePerHash perhash; | ||||
| 	HashAggSpill spill; | ||||
| 	HashTapeInfo *tapeinfo = aggstate->hash_tapeinfo; | ||||
| 	uint64		ngroups_estimate; | ||||
| @@ -2639,6 +2623,8 @@ agg_refill_hash_table(AggState *aggstate) | ||||
|  | ||||
| 	select_current_set(aggstate, batch->setno, true); | ||||
|  | ||||
| 	perhash = &aggstate->perhash[aggstate->current_set]; | ||||
|  | ||||
| 	/* | ||||
| 	 * Spilled tuples are always read back as MinimalTuples, which may be | ||||
| 	 * different from the outer plan, so recompile the aggregate expressions. | ||||
| @@ -2652,10 +2638,13 @@ agg_refill_hash_table(AggState *aggstate) | ||||
| 							 HASHAGG_READ_BUFFER_SIZE); | ||||
| 	for (;;) | ||||
| 	{ | ||||
| 		TupleTableSlot *slot = aggstate->hash_spill_rslot; | ||||
| 		TupleTableSlot *spillslot = aggstate->hash_spill_rslot; | ||||
| 		TupleTableSlot *hashslot = perhash->hashslot; | ||||
| 		TupleHashEntry entry; | ||||
| 		MinimalTuple tuple; | ||||
| 		uint32		hash; | ||||
| 		bool		in_hash_table; | ||||
| 		bool		isnew = false; | ||||
| 		bool	   *p_isnew = aggstate->hash_spill_mode ? NULL : &isnew; | ||||
|  | ||||
| 		CHECK_FOR_INTERRUPTS(); | ||||
|  | ||||
| @@ -2663,16 +2652,20 @@ agg_refill_hash_table(AggState *aggstate) | ||||
| 		if (tuple == NULL) | ||||
| 			break; | ||||
|  | ||||
| 		ExecStoreMinimalTuple(tuple, slot, true); | ||||
| 		aggstate->tmpcontext->ecxt_outertuple = slot; | ||||
| 		ExecStoreMinimalTuple(tuple, spillslot, true); | ||||
| 		aggstate->tmpcontext->ecxt_outertuple = spillslot; | ||||
|  | ||||
| 		prepare_hash_slot(aggstate); | ||||
| 		aggstate->hash_pergroup[batch->setno] = | ||||
| 			lookup_hash_entry(aggstate, hash, &in_hash_table); | ||||
| 		prepare_hash_slot(perhash, | ||||
| 						  aggstate->tmpcontext->ecxt_outertuple, | ||||
| 						  hashslot); | ||||
| 		entry = LookupTupleHashEntryHash( | ||||
| 										 perhash->hashtable, hashslot, p_isnew, hash); | ||||
|  | ||||
| 		if (in_hash_table) | ||||
| 		if (entry != NULL) | ||||
| 		{ | ||||
| 			/* Advance the aggregates (or combine functions) */ | ||||
| 			if (isnew) | ||||
| 				initialize_hash_entry(aggstate, perhash->hashtable, entry); | ||||
| 			aggstate->hash_pergroup[batch->setno] = entry->additional; | ||||
| 			advance_aggregates(aggstate); | ||||
| 		} | ||||
| 		else | ||||
| @@ -2688,7 +2681,9 @@ agg_refill_hash_table(AggState *aggstate) | ||||
| 								   ngroups_estimate, aggstate->hashentrysize); | ||||
| 			} | ||||
| 			/* no memory for a new group, spill */ | ||||
| 			hashagg_spill_tuple(aggstate, &spill, slot, hash); | ||||
| 			hashagg_spill_tuple(aggstate, &spill, spillslot, hash); | ||||
|  | ||||
| 			aggstate->hash_pergroup[batch->setno] = NULL; | ||||
| 		} | ||||
|  | ||||
| 		/* | ||||
|   | ||||
| @@ -94,7 +94,7 @@ ExecRecursiveUnion(PlanState *pstate) | ||||
| 			if (plan->numCols > 0) | ||||
| 			{ | ||||
| 				/* Find or build hashtable entry for this tuple's group */ | ||||
| 				LookupTupleHashEntry(node->hashtable, slot, &isnew); | ||||
| 				LookupTupleHashEntry(node->hashtable, slot, &isnew, NULL); | ||||
| 				/* Must reset temp context after each hashtable lookup */ | ||||
| 				MemoryContextReset(node->tempContext); | ||||
| 				/* Ignore tuple if already seen */ | ||||
| @@ -141,7 +141,7 @@ ExecRecursiveUnion(PlanState *pstate) | ||||
| 		if (plan->numCols > 0) | ||||
| 		{ | ||||
| 			/* Find or build hashtable entry for this tuple's group */ | ||||
| 			LookupTupleHashEntry(node->hashtable, slot, &isnew); | ||||
| 			LookupTupleHashEntry(node->hashtable, slot, &isnew, NULL); | ||||
| 			/* Must reset temp context after each hashtable lookup */ | ||||
| 			MemoryContextReset(node->tempContext); | ||||
| 			/* Ignore tuple if already seen */ | ||||
|   | ||||
| @@ -381,7 +381,7 @@ setop_fill_hash_table(SetOpState *setopstate) | ||||
|  | ||||
| 			/* Find or build hashtable entry for this tuple's group */ | ||||
| 			entry = LookupTupleHashEntry(setopstate->hashtable, outerslot, | ||||
| 										 &isnew); | ||||
| 										 &isnew, NULL); | ||||
|  | ||||
| 			/* If new tuple group, initialize counts */ | ||||
| 			if (isnew) | ||||
| @@ -402,7 +402,7 @@ setop_fill_hash_table(SetOpState *setopstate) | ||||
|  | ||||
| 			/* For tuples not seen previously, do not make hashtable entry */ | ||||
| 			entry = LookupTupleHashEntry(setopstate->hashtable, outerslot, | ||||
| 										 NULL); | ||||
| 										 NULL, NULL); | ||||
|  | ||||
| 			/* Advance the counts if entry is already present */ | ||||
| 			if (entry) | ||||
|   | ||||
| @@ -595,12 +595,12 @@ buildSubPlanHash(SubPlanState *node, ExprContext *econtext) | ||||
| 		 */ | ||||
| 		if (slotNoNulls(slot)) | ||||
| 		{ | ||||
| 			(void) LookupTupleHashEntry(node->hashtable, slot, &isnew); | ||||
| 			(void) LookupTupleHashEntry(node->hashtable, slot, &isnew, NULL); | ||||
| 			node->havehashrows = true; | ||||
| 		} | ||||
| 		else if (node->hashnulls) | ||||
| 		{ | ||||
| 			(void) LookupTupleHashEntry(node->hashnulls, slot, &isnew); | ||||
| 			(void) LookupTupleHashEntry(node->hashnulls, slot, &isnew, NULL); | ||||
| 			node->havenullrows = true; | ||||
| 		} | ||||
|  | ||||
|   | ||||
| @@ -139,7 +139,7 @@ extern TupleHashTable BuildTupleHashTableExt(PlanState *parent, | ||||
| 											 MemoryContext tempcxt, bool use_variable_hash_iv); | ||||
| extern TupleHashEntry LookupTupleHashEntry(TupleHashTable hashtable, | ||||
| 										   TupleTableSlot *slot, | ||||
| 										   bool *isnew); | ||||
| 										   bool *isnew, uint32 *hash); | ||||
| extern uint32 TupleHashTableHash(TupleHashTable hashtable, | ||||
| 								 TupleTableSlot *slot); | ||||
| extern TupleHashEntry LookupTupleHashEntryHash(TupleHashTable hashtable, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user