Improve planner's estimation of the space needed for HashAgg plans:

look at the actual aggregate transition datatypes and the actual overhead needed by nodeAgg.c, instead of using pessimistic round numbers. Per a discussion with Michael Tiemann.
2025-07-07 00:36:50 +03:00 · 2005-01-28 19:34:28 +00:00
parent c3a4e22e82
commit 0bf2587df4
5 changed files with 141 additions and 71 deletions
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.177 2004/12/31 22:00:09 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.178 2005/01/28 19:34:05 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -20,6 +20,7 @@
 #include "catalog/pg_operator.h"
 #include "catalog/pg_type.h"
 #include "executor/executor.h"
+#include "executor/nodeAgg.h"
 #include "miscadmin.h"
 #include "nodes/makefuncs.h"
 #ifdef OPTIMIZER_DEBUG
@ -660,10 +661,12 @@ grouping_planner(Query *parse, double tuple_fraction)
 		Path	   *sorted_path;
 		double		dNumGroups = 0;
 		long		numGroups = 0;
-		int			numAggs = 0;
+		AggClauseCounts agg_counts;
 		int			numGroupCols = list_length(parse->groupClause);
 		bool		use_hashed_grouping = false;

+		MemSet(&agg_counts, 0, sizeof(AggClauseCounts));
+
 		/* Preprocess targetlist in case we are inside an INSERT/UPDATE. */
 		tlist = preprocess_targetlist(tlist,
 									  parse->commandType,
@ -752,8 +755,10 @@ grouping_planner(Query *parse, double tuple_fraction)
 		 * the aggregate semantics (eg, producing only one output row).
 		 */
 		if (parse->hasAggs)
-			numAggs = count_agg_clause((Node *) tlist) +
-				count_agg_clause(parse->havingQual);
+		{
+			count_agg_clauses((Node *) tlist, &agg_counts);
+			count_agg_clauses(parse->havingQual, &agg_counts);
+		}

 		/*
 		 * Figure out whether we need a sorted result from query_planner.
@ -990,9 +995,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 			 */
 			if (!enable_hashagg || !hash_safe_grouping(parse))
 				use_hashed_grouping = false;
-			else if (parse->hasAggs &&
-					 (contain_distinct_agg_clause((Node *) tlist) ||
-					  contain_distinct_agg_clause(parse->havingQual)))
+			else if (agg_counts.numDistinctAggs != 0)
 				use_hashed_grouping = false;
 			else
 			{
@ -1003,13 +1006,15 @@ grouping_planner(Query *parse, double tuple_fraction)
 				 * the need for sorted input is usually a win, the fact
 				 * that the output won't be sorted may be a loss; so we
 				 * need to do an actual cost comparison.
-				 *
-				 * In most cases we have no good way to estimate the size of
-				 * the transition value needed by an aggregate;
-				 * arbitrarily assume it is 100 bytes.	Also set the
-				 * overhead per hashtable entry at 64 bytes.
 				 */
-				int			hashentrysize = cheapest_path_width + 64 + numAggs * 100;
+				Size		hashentrysize;
+
+				/* Estimate per-hash-entry space at tuple width... */
+				hashentrysize = cheapest_path_width;
+				/* plus space for pass-by-ref transition values... */
+				hashentrysize += agg_counts.transitionSpace;
+				/* plus the per-hash-entry overhead */
+				hashentrysize += hash_agg_entry_size(agg_counts.numAggs);

 				if (hashentrysize * dNumGroups <= work_mem * 1024L)
 				{
@ -1030,7 +1035,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 					Path		sorted_p;

 					cost_agg(&hashed_p, parse,
-							 AGG_HASHED, numAggs,
+							 AGG_HASHED, agg_counts.numAggs,
 							 numGroupCols, dNumGroups,
 							 cheapest_path->startup_cost,
 							 cheapest_path->total_cost,
@ -1065,7 +1070,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 					}
 					if (parse->hasAggs)
 						cost_agg(&sorted_p, parse,
-								 AGG_SORTED, numAggs,
+								 AGG_SORTED, agg_counts.numAggs,
 								 numGroupCols, dNumGroups,
 								 sorted_p.startup_cost,
 								 sorted_p.total_cost,
@ -1202,7 +1207,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 											numGroupCols,
 											groupColIdx,
 											numGroups,
-											numAggs,
+											agg_counts.numAggs,
 											result_plan);
 			/* Hashed aggregation produces randomly-ordered results */
 			current_pathkeys = NIL;
@ -1244,7 +1249,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 											numGroupCols,
 											groupColIdx,
 											numGroups,
-											numAggs,
+											agg_counts.numAggs,
 											result_plan);
 		}
 		else