1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-07 00:36:50 +03:00

Improve planner's estimation of the space needed for HashAgg plans:

look at the actual aggregate transition datatypes and the actual overhead
needed by nodeAgg.c, instead of using pessimistic round numbers.
Per a discussion with Michael Tiemann.
This commit is contained in:
Tom Lane
2005-01-28 19:34:28 +00:00
parent c3a4e22e82
commit 0bf2587df4
5 changed files with 141 additions and 71 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.177 2004/12/31 22:00:09 pgsql Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.178 2005/01/28 19:34:05 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -20,6 +20,7 @@
#include "catalog/pg_operator.h"
#include "catalog/pg_type.h"
#include "executor/executor.h"
#include "executor/nodeAgg.h"
#include "miscadmin.h"
#include "nodes/makefuncs.h"
#ifdef OPTIMIZER_DEBUG
@ -660,10 +661,12 @@ grouping_planner(Query *parse, double tuple_fraction)
Path *sorted_path;
double dNumGroups = 0;
long numGroups = 0;
int numAggs = 0;
AggClauseCounts agg_counts;
int numGroupCols = list_length(parse->groupClause);
bool use_hashed_grouping = false;
MemSet(&agg_counts, 0, sizeof(AggClauseCounts));
/* Preprocess targetlist in case we are inside an INSERT/UPDATE. */
tlist = preprocess_targetlist(tlist,
parse->commandType,
@ -752,8 +755,10 @@ grouping_planner(Query *parse, double tuple_fraction)
* the aggregate semantics (eg, producing only one output row).
*/
if (parse->hasAggs)
numAggs = count_agg_clause((Node *) tlist) +
count_agg_clause(parse->havingQual);
{
count_agg_clauses((Node *) tlist, &agg_counts);
count_agg_clauses(parse->havingQual, &agg_counts);
}
/*
* Figure out whether we need a sorted result from query_planner.
@ -990,9 +995,7 @@ grouping_planner(Query *parse, double tuple_fraction)
*/
if (!enable_hashagg || !hash_safe_grouping(parse))
use_hashed_grouping = false;
else if (parse->hasAggs &&
(contain_distinct_agg_clause((Node *) tlist) ||
contain_distinct_agg_clause(parse->havingQual)))
else if (agg_counts.numDistinctAggs != 0)
use_hashed_grouping = false;
else
{
@ -1003,13 +1006,15 @@ grouping_planner(Query *parse, double tuple_fraction)
* the need for sorted input is usually a win, the fact
* that the output won't be sorted may be a loss; so we
* need to do an actual cost comparison.
*
* In most cases we have no good way to estimate the size of
* the transition value needed by an aggregate;
* arbitrarily assume it is 100 bytes. Also set the
* overhead per hashtable entry at 64 bytes.
*/
int hashentrysize = cheapest_path_width + 64 + numAggs * 100;
Size hashentrysize;
/* Estimate per-hash-entry space at tuple width... */
hashentrysize = cheapest_path_width;
/* plus space for pass-by-ref transition values... */
hashentrysize += agg_counts.transitionSpace;
/* plus the per-hash-entry overhead */
hashentrysize += hash_agg_entry_size(agg_counts.numAggs);
if (hashentrysize * dNumGroups <= work_mem * 1024L)
{
@ -1030,7 +1035,7 @@ grouping_planner(Query *parse, double tuple_fraction)
Path sorted_p;
cost_agg(&hashed_p, parse,
AGG_HASHED, numAggs,
AGG_HASHED, agg_counts.numAggs,
numGroupCols, dNumGroups,
cheapest_path->startup_cost,
cheapest_path->total_cost,
@ -1065,7 +1070,7 @@ grouping_planner(Query *parse, double tuple_fraction)
}
if (parse->hasAggs)
cost_agg(&sorted_p, parse,
AGG_SORTED, numAggs,
AGG_SORTED, agg_counts.numAggs,
numGroupCols, dNumGroups,
sorted_p.startup_cost,
sorted_p.total_cost,
@ -1202,7 +1207,7 @@ grouping_planner(Query *parse, double tuple_fraction)
numGroupCols,
groupColIdx,
numGroups,
numAggs,
agg_counts.numAggs,
result_plan);
/* Hashed aggregation produces randomly-ordered results */
current_pathkeys = NIL;
@ -1244,7 +1249,7 @@ grouping_planner(Query *parse, double tuple_fraction)
numGroupCols,
groupColIdx,
numGroups,
numAggs,
agg_counts.numAggs,
result_plan);
}
else