mirror of
https://github.com/postgres/postgres.git
synced 2025-07-08 11:42:09 +03:00
Support hashed aggregation with grouping sets.
This extends the Aggregate node with two new features: HashAggregate can now run multiple hashtables concurrently, and a new strategy MixedAggregate populates hashtables while doing sorted grouping. The planner will now attempt to save as many sorts as possible when planning grouping sets queries, while not exceeding work_mem for the estimated combined sizes of all hashtables used. No SQL-level changes are required. There should be no user-visible impact other than the new EXPLAIN output and possible changes to result ordering when ORDER BY was not used (which affected a few regression tests). The enable_hashagg option is respected. Author: Andrew Gierth Reviewers: Mark Dilger, Andres Freund Discussion: https://postgr.es/m/87vatszyhj.fsf@news-spur.riddles.org.uk
This commit is contained in:
@ -1884,11 +1884,16 @@ cost_agg(Path *path, PlannerInfo *root,
|
||||
total_cost = startup_cost + cpu_tuple_cost;
|
||||
output_tuples = 1;
|
||||
}
|
||||
else if (aggstrategy == AGG_SORTED)
|
||||
else if (aggstrategy == AGG_SORTED || aggstrategy == AGG_MIXED)
|
||||
{
|
||||
/* Here we are able to deliver output on-the-fly */
|
||||
startup_cost = input_startup_cost;
|
||||
total_cost = input_total_cost;
|
||||
if (aggstrategy == AGG_MIXED && !enable_hashagg)
|
||||
{
|
||||
startup_cost += disable_cost;
|
||||
total_cost += disable_cost;
|
||||
}
|
||||
/* calcs phrased this way to match HASHED case, see note above */
|
||||
total_cost += aggcosts->transCost.startup;
|
||||
total_cost += aggcosts->transCost.per_tuple * input_tuples;
|
||||
|
@ -1783,18 +1783,15 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
|
||||
{
|
||||
Agg *plan;
|
||||
Plan *subplan;
|
||||
List *rollup_groupclauses = best_path->rollup_groupclauses;
|
||||
List *rollup_lists = best_path->rollup_lists;
|
||||
List *rollups = best_path->rollups;
|
||||
AttrNumber *grouping_map;
|
||||
int maxref;
|
||||
List *chain;
|
||||
ListCell *lc,
|
||||
*lc2;
|
||||
ListCell *lc;
|
||||
|
||||
/* Shouldn't get here without grouping sets */
|
||||
Assert(root->parse->groupingSets);
|
||||
Assert(rollup_lists != NIL);
|
||||
Assert(list_length(rollup_lists) == list_length(rollup_groupclauses));
|
||||
Assert(rollups != NIL);
|
||||
|
||||
/*
|
||||
* Agg can project, so no need to be terribly picky about child tlist, but
|
||||
@ -1846,72 +1843,86 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
|
||||
* costs will be shown by EXPLAIN.
|
||||
*/
|
||||
chain = NIL;
|
||||
if (list_length(rollup_groupclauses) > 1)
|
||||
if (list_length(rollups) > 1)
|
||||
{
|
||||
forboth(lc, rollup_groupclauses, lc2, rollup_lists)
|
||||
ListCell *lc2 = lnext(list_head(rollups));
|
||||
bool is_first_sort = ((RollupData *) linitial(rollups))->is_hashed;
|
||||
|
||||
for_each_cell(lc, lc2)
|
||||
{
|
||||
List *groupClause = (List *) lfirst(lc);
|
||||
List *gsets = (List *) lfirst(lc2);
|
||||
RollupData *rollup = lfirst(lc);
|
||||
AttrNumber *new_grpColIdx;
|
||||
Plan *sort_plan;
|
||||
Plan *sort_plan = NULL;
|
||||
Plan *agg_plan;
|
||||
AggStrategy strat;
|
||||
|
||||
/* We want to iterate over all but the last rollup list elements */
|
||||
if (lnext(lc) == NULL)
|
||||
break;
|
||||
new_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
|
||||
|
||||
new_grpColIdx = remap_groupColIdx(root, groupClause);
|
||||
if (!rollup->is_hashed && !is_first_sort)
|
||||
{
|
||||
sort_plan = (Plan *)
|
||||
make_sort_from_groupcols(rollup->groupClause,
|
||||
new_grpColIdx,
|
||||
subplan);
|
||||
}
|
||||
|
||||
sort_plan = (Plan *)
|
||||
make_sort_from_groupcols(groupClause,
|
||||
new_grpColIdx,
|
||||
subplan);
|
||||
if (!rollup->is_hashed)
|
||||
is_first_sort = false;
|
||||
|
||||
if (rollup->is_hashed)
|
||||
strat = AGG_HASHED;
|
||||
else if (list_length(linitial(rollup->gsets)) == 0)
|
||||
strat = AGG_PLAIN;
|
||||
else
|
||||
strat = AGG_SORTED;
|
||||
|
||||
agg_plan = (Plan *) make_agg(NIL,
|
||||
NIL,
|
||||
AGG_SORTED,
|
||||
strat,
|
||||
AGGSPLIT_SIMPLE,
|
||||
list_length((List *) linitial(gsets)),
|
||||
list_length((List *) linitial(rollup->gsets)),
|
||||
new_grpColIdx,
|
||||
extract_grouping_ops(groupClause),
|
||||
gsets,
|
||||
extract_grouping_ops(rollup->groupClause),
|
||||
rollup->gsets,
|
||||
NIL,
|
||||
0, /* numGroups not needed */
|
||||
rollup->numGroups,
|
||||
sort_plan);
|
||||
|
||||
/*
|
||||
* Nuke stuff we don't need to avoid bloating debug output.
|
||||
* Remove stuff we don't need to avoid bloating debug output.
|
||||
*/
|
||||
sort_plan->targetlist = NIL;
|
||||
sort_plan->lefttree = NULL;
|
||||
if (sort_plan)
|
||||
{
|
||||
sort_plan->targetlist = NIL;
|
||||
sort_plan->lefttree = NULL;
|
||||
}
|
||||
|
||||
chain = lappend(chain, agg_plan);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Now make the final Agg node
|
||||
* Now make the real Agg node
|
||||
*/
|
||||
{
|
||||
List *groupClause = (List *) llast(rollup_groupclauses);
|
||||
List *gsets = (List *) llast(rollup_lists);
|
||||
RollupData *rollup = linitial(rollups);
|
||||
AttrNumber *top_grpColIdx;
|
||||
int numGroupCols;
|
||||
|
||||
top_grpColIdx = remap_groupColIdx(root, groupClause);
|
||||
top_grpColIdx = remap_groupColIdx(root, rollup->groupClause);
|
||||
|
||||
numGroupCols = list_length((List *) linitial(gsets));
|
||||
numGroupCols = list_length((List *) linitial(rollup->gsets));
|
||||
|
||||
plan = make_agg(build_path_tlist(root, &best_path->path),
|
||||
best_path->qual,
|
||||
(numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
|
||||
best_path->aggstrategy,
|
||||
AGGSPLIT_SIMPLE,
|
||||
numGroupCols,
|
||||
top_grpColIdx,
|
||||
extract_grouping_ops(groupClause),
|
||||
gsets,
|
||||
extract_grouping_ops(rollup->groupClause),
|
||||
rollup->gsets,
|
||||
chain,
|
||||
0, /* numGroups not needed */
|
||||
rollup->numGroups,
|
||||
subplan);
|
||||
|
||||
/* Copy cost data from Path to Plan */
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -2697,10 +2697,9 @@ create_agg_path(PlannerInfo *root,
|
||||
* 'subpath' is the path representing the source of data
|
||||
* 'target' is the PathTarget to be computed
|
||||
* 'having_qual' is the HAVING quals if any
|
||||
* 'rollup_lists' is a list of grouping sets
|
||||
* 'rollup_groupclauses' is a list of grouping clauses for grouping sets
|
||||
* 'rollups' is a list of RollupData nodes
|
||||
* 'agg_costs' contains cost info about the aggregate functions to be computed
|
||||
* 'numGroups' is the estimated number of groups
|
||||
* 'numGroups' is the estimated total number of groups
|
||||
*/
|
||||
GroupingSetsPath *
|
||||
create_groupingsets_path(PlannerInfo *root,
|
||||
@ -2708,13 +2707,15 @@ create_groupingsets_path(PlannerInfo *root,
|
||||
Path *subpath,
|
||||
PathTarget *target,
|
||||
List *having_qual,
|
||||
List *rollup_lists,
|
||||
List *rollup_groupclauses,
|
||||
AggStrategy aggstrategy,
|
||||
List *rollups,
|
||||
const AggClauseCosts *agg_costs,
|
||||
double numGroups)
|
||||
{
|
||||
GroupingSetsPath *pathnode = makeNode(GroupingSetsPath);
|
||||
int numGroupCols;
|
||||
ListCell *lc;
|
||||
bool is_first = true;
|
||||
bool is_first_sort = true;
|
||||
|
||||
/* The topmost generated Plan node will be an Agg */
|
||||
pathnode->path.pathtype = T_Agg;
|
||||
@ -2727,75 +2728,110 @@ create_groupingsets_path(PlannerInfo *root,
|
||||
pathnode->path.parallel_workers = subpath->parallel_workers;
|
||||
pathnode->subpath = subpath;
|
||||
|
||||
/*
|
||||
* Simplify callers by downgrading AGG_SORTED to AGG_PLAIN, and AGG_MIXED
|
||||
* to AGG_HASHED, here if possible.
|
||||
*/
|
||||
if (aggstrategy == AGG_SORTED &&
|
||||
list_length(rollups) == 1 &&
|
||||
((RollupData *) linitial(rollups))->groupClause == NIL)
|
||||
aggstrategy = AGG_PLAIN;
|
||||
|
||||
if (aggstrategy == AGG_MIXED &&
|
||||
list_length(rollups) == 1)
|
||||
aggstrategy = AGG_HASHED;
|
||||
|
||||
/*
|
||||
* Output will be in sorted order by group_pathkeys if, and only if, there
|
||||
* is a single rollup operation on a non-empty list of grouping
|
||||
* expressions.
|
||||
*/
|
||||
if (list_length(rollup_groupclauses) == 1 &&
|
||||
((List *) linitial(rollup_groupclauses)) != NIL)
|
||||
if (aggstrategy == AGG_SORTED && list_length(rollups) == 1)
|
||||
pathnode->path.pathkeys = root->group_pathkeys;
|
||||
else
|
||||
pathnode->path.pathkeys = NIL;
|
||||
|
||||
pathnode->rollup_groupclauses = rollup_groupclauses;
|
||||
pathnode->rollup_lists = rollup_lists;
|
||||
pathnode->aggstrategy = aggstrategy;
|
||||
pathnode->rollups = rollups;
|
||||
pathnode->qual = having_qual;
|
||||
|
||||
Assert(rollup_lists != NIL);
|
||||
Assert(list_length(rollup_lists) == list_length(rollup_groupclauses));
|
||||
Assert(rollups != NIL);
|
||||
Assert(aggstrategy != AGG_PLAIN || list_length(rollups) == 1);
|
||||
Assert(aggstrategy != AGG_MIXED || list_length(rollups) > 1);
|
||||
|
||||
/* Account for cost of the topmost Agg node */
|
||||
numGroupCols = list_length((List *) linitial((List *) llast(rollup_lists)));
|
||||
|
||||
cost_agg(&pathnode->path, root,
|
||||
(numGroupCols > 0) ? AGG_SORTED : AGG_PLAIN,
|
||||
agg_costs,
|
||||
numGroupCols,
|
||||
numGroups,
|
||||
subpath->startup_cost,
|
||||
subpath->total_cost,
|
||||
subpath->rows);
|
||||
|
||||
/*
|
||||
* Add in the costs and output rows of the additional sorting/aggregation
|
||||
* steps, if any. Only total costs count, since the extra sorts aren't
|
||||
* run on startup.
|
||||
*/
|
||||
if (list_length(rollup_lists) > 1)
|
||||
foreach(lc, rollups)
|
||||
{
|
||||
ListCell *lc;
|
||||
RollupData *rollup = lfirst(lc);
|
||||
List *gsets = rollup->gsets;
|
||||
int numGroupCols = list_length(linitial(gsets));
|
||||
|
||||
foreach(lc, rollup_lists)
|
||||
/*
|
||||
* In AGG_SORTED or AGG_PLAIN mode, the first rollup takes the
|
||||
* (already-sorted) input, and following ones do their own sort.
|
||||
*
|
||||
* In AGG_HASHED mode, there is one rollup for each grouping set.
|
||||
*
|
||||
* In AGG_MIXED mode, the first rollups are hashed, the first
|
||||
* non-hashed one takes the (already-sorted) input, and following ones
|
||||
* do their own sort.
|
||||
*/
|
||||
if (is_first)
|
||||
{
|
||||
cost_agg(&pathnode->path, root,
|
||||
aggstrategy,
|
||||
agg_costs,
|
||||
numGroupCols,
|
||||
rollup->numGroups,
|
||||
subpath->startup_cost,
|
||||
subpath->total_cost,
|
||||
subpath->rows);
|
||||
is_first = false;
|
||||
if (!rollup->is_hashed)
|
||||
is_first_sort = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
List *gsets = (List *) lfirst(lc);
|
||||
Path sort_path; /* dummy for result of cost_sort */
|
||||
Path agg_path; /* dummy for result of cost_agg */
|
||||
|
||||
/* We must iterate over all but the last rollup_lists element */
|
||||
if (lnext(lc) == NULL)
|
||||
break;
|
||||
if (rollup->is_hashed || is_first_sort)
|
||||
{
|
||||
/*
|
||||
* Account for cost of aggregation, but don't charge input
|
||||
* cost again
|
||||
*/
|
||||
cost_agg(&agg_path, root,
|
||||
rollup->is_hashed ? AGG_HASHED : AGG_SORTED,
|
||||
agg_costs,
|
||||
numGroupCols,
|
||||
rollup->numGroups,
|
||||
0.0, 0.0,
|
||||
subpath->rows);
|
||||
if (!rollup->is_hashed)
|
||||
is_first_sort = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Account for cost of sort, but don't charge input cost again */
|
||||
cost_sort(&sort_path, root, NIL,
|
||||
0.0,
|
||||
subpath->rows,
|
||||
subpath->pathtarget->width,
|
||||
0.0,
|
||||
work_mem,
|
||||
-1.0);
|
||||
|
||||
/* Account for cost of sort, but don't charge input cost again */
|
||||
cost_sort(&sort_path, root, NIL,
|
||||
0.0,
|
||||
subpath->rows,
|
||||
subpath->pathtarget->width,
|
||||
0.0,
|
||||
work_mem,
|
||||
-1.0);
|
||||
/* Account for cost of aggregation */
|
||||
|
||||
/* Account for cost of aggregation */
|
||||
numGroupCols = list_length((List *) linitial(gsets));
|
||||
|
||||
cost_agg(&agg_path, root,
|
||||
AGG_SORTED,
|
||||
agg_costs,
|
||||
numGroupCols,
|
||||
numGroups, /* XXX surely not right for all steps? */
|
||||
sort_path.startup_cost,
|
||||
sort_path.total_cost,
|
||||
sort_path.rows);
|
||||
cost_agg(&agg_path, root,
|
||||
AGG_SORTED,
|
||||
agg_costs,
|
||||
numGroupCols,
|
||||
rollup->numGroups,
|
||||
sort_path.startup_cost,
|
||||
sort_path.total_cost,
|
||||
sort_path.rows);
|
||||
}
|
||||
|
||||
pathnode->path.total_cost += agg_path.total_cost;
|
||||
pathnode->path.rows += agg_path.rows;
|
||||
|
Reference in New Issue
Block a user