mirror of
https://github.com/postgres/postgres.git
synced 2025-07-28 23:42:10 +03:00
Finish implementation of hashed aggregation. Add enable_hashagg GUC
parameter to allow it to be forced off for comparison purposes. Add ORDER BY clauses to a bunch of regression test queries that will otherwise produce randomly-ordered output in the new regime.
This commit is contained in:
@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.129 2002/11/19 23:21:59 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.130 2002/11/21 00:42:19 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@ -933,11 +933,13 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
List *sub_tlist;
|
||||
List *group_pathkeys;
|
||||
AttrNumber *groupColIdx = NULL;
|
||||
double sub_tuple_fraction;
|
||||
Path *cheapest_path;
|
||||
Path *sorted_path;
|
||||
double dNumGroups = 0;
|
||||
long numGroups = 0;
|
||||
int numAggs = 0;
|
||||
int numGroupCols = length(parse->groupClause);
|
||||
bool use_hashed_grouping = false;
|
||||
|
||||
/* Preprocess targetlist in case we are inside an INSERT/UPDATE. */
|
||||
@ -1169,6 +1171,12 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* With grouping or aggregation, the tuple fraction to pass to
|
||||
* query_planner() may be different from what it is at top level.
|
||||
*/
|
||||
sub_tuple_fraction = tuple_fraction;
|
||||
|
||||
if (parse->groupClause)
|
||||
{
|
||||
/*
|
||||
@ -1182,8 +1190,8 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
* amounts to assuming that all the groups are about the same
|
||||
* size).
|
||||
*/
|
||||
if (tuple_fraction >= 1.0)
|
||||
tuple_fraction = 0.25;
|
||||
if (sub_tuple_fraction >= 1.0)
|
||||
sub_tuple_fraction = 0.25;
|
||||
|
||||
/*
|
||||
* If both GROUP BY and ORDER BY are specified, we will need
|
||||
@ -1195,7 +1203,7 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
if (parse->groupClause && parse->sortClause &&
|
||||
!noncanonical_pathkeys_contained_in(sort_pathkeys,
|
||||
group_pathkeys))
|
||||
tuple_fraction = 0.0;
|
||||
sub_tuple_fraction = 0.0;
|
||||
}
|
||||
else if (parse->hasAggs)
|
||||
{
|
||||
@ -1203,7 +1211,7 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
* Ungrouped aggregate will certainly want all the input
|
||||
* tuples.
|
||||
*/
|
||||
tuple_fraction = 0.0;
|
||||
sub_tuple_fraction = 0.0;
|
||||
}
|
||||
else if (parse->distinctClause)
|
||||
{
|
||||
@ -1212,15 +1220,15 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
* number of input tuples per output tuple. Handle the same
|
||||
* way.
|
||||
*/
|
||||
if (tuple_fraction >= 1.0)
|
||||
tuple_fraction = 0.25;
|
||||
if (sub_tuple_fraction >= 1.0)
|
||||
sub_tuple_fraction = 0.25;
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate the best unsorted and presorted paths for this Query
|
||||
* (but note there may not be any presorted path).
|
||||
*/
|
||||
query_planner(parse, sub_tlist, tuple_fraction,
|
||||
query_planner(parse, sub_tlist, sub_tuple_fraction,
|
||||
&cheapest_path, &sorted_path);
|
||||
|
||||
/*
|
||||
@ -1236,11 +1244,13 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
if (parse->groupClause)
|
||||
{
|
||||
/*
|
||||
* Always estimate the number of groups.
|
||||
* Always estimate the number of groups. We can't do this until
|
||||
* after running query_planner(), either.
|
||||
*/
|
||||
dNumGroups = estimate_num_groups(parse,
|
||||
parse->groupClause,
|
||||
cheapest_path->parent->rows);
|
||||
/* Also want it as a long int --- but 'ware overflow! */
|
||||
numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
|
||||
|
||||
/*
|
||||
@ -1248,9 +1258,11 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
* aggregates. (Doing so would imply storing *all* the input
|
||||
* values in the hash table, which seems like a certain loser.)
|
||||
*/
|
||||
if (parse->hasAggs &&
|
||||
(contain_distinct_agg_clause((Node *) tlist) ||
|
||||
contain_distinct_agg_clause(parse->havingQual)))
|
||||
if (!enable_hashagg)
|
||||
use_hashed_grouping = false;
|
||||
else if (parse->hasAggs &&
|
||||
(contain_distinct_agg_clause((Node *) tlist) ||
|
||||
contain_distinct_agg_clause(parse->havingQual)))
|
||||
use_hashed_grouping = false;
|
||||
else
|
||||
{
|
||||
@ -1272,11 +1284,96 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
|
||||
if (hashentrysize * dNumGroups <= SortMem * 1024L)
|
||||
{
|
||||
/* much more to do here */
|
||||
#if 0
|
||||
/* TEMPORARY HOTWIRE FOR TESTING */
|
||||
use_hashed_grouping = true;
|
||||
#endif
|
||||
/*
|
||||
* Okay, do the cost comparison. We need to consider
|
||||
* cheapest_path + hashagg [+ final sort]
|
||||
* versus either
|
||||
* cheapest_path [+ sort] + group or agg [+ final sort]
|
||||
* or
|
||||
* presorted_path + group or agg [+ final sort]
|
||||
* where brackets indicate a step that may not be needed.
|
||||
* We assume query_planner() will have returned a
|
||||
* presorted path only if it's a winner compared to
|
||||
* cheapest_path for this purpose.
|
||||
*
|
||||
* These path variables are dummies that just hold cost
|
||||
* fields; we don't make actual Paths for these steps.
|
||||
*/
|
||||
Path hashed_p;
|
||||
Path sorted_p;
|
||||
|
||||
cost_agg(&hashed_p, parse,
|
||||
AGG_HASHED, numAggs,
|
||||
numGroupCols, dNumGroups,
|
||||
cheapest_path->startup_cost,
|
||||
cheapest_path->total_cost,
|
||||
cheapest_path->parent->rows);
|
||||
/* Result of hashed agg is always unsorted */
|
||||
if (sort_pathkeys)
|
||||
cost_sort(&hashed_p, parse, sort_pathkeys,
|
||||
hashed_p.total_cost,
|
||||
dNumGroups,
|
||||
cheapest_path->parent->width);
|
||||
|
||||
if (sorted_path)
|
||||
{
|
||||
sorted_p.startup_cost = sorted_path->startup_cost;
|
||||
sorted_p.total_cost = sorted_path->total_cost;
|
||||
current_pathkeys = sorted_path->pathkeys;
|
||||
}
|
||||
else
|
||||
{
|
||||
sorted_p.startup_cost = cheapest_path->startup_cost;
|
||||
sorted_p.total_cost = cheapest_path->total_cost;
|
||||
current_pathkeys = cheapest_path->pathkeys;
|
||||
}
|
||||
if (!pathkeys_contained_in(group_pathkeys,
|
||||
current_pathkeys))
|
||||
{
|
||||
cost_sort(&sorted_p, parse, group_pathkeys,
|
||||
sorted_p.total_cost,
|
||||
cheapest_path->parent->rows,
|
||||
cheapest_path->parent->width);
|
||||
current_pathkeys = group_pathkeys;
|
||||
}
|
||||
if (parse->hasAggs)
|
||||
cost_agg(&sorted_p, parse,
|
||||
AGG_SORTED, numAggs,
|
||||
numGroupCols, dNumGroups,
|
||||
sorted_p.startup_cost,
|
||||
sorted_p.total_cost,
|
||||
cheapest_path->parent->rows);
|
||||
else
|
||||
cost_group(&sorted_p, parse,
|
||||
numGroupCols, dNumGroups,
|
||||
sorted_p.startup_cost,
|
||||
sorted_p.total_cost,
|
||||
cheapest_path->parent->rows);
|
||||
/* The Agg or Group node will preserve ordering */
|
||||
if (sort_pathkeys &&
|
||||
!pathkeys_contained_in(sort_pathkeys,
|
||||
current_pathkeys))
|
||||
{
|
||||
cost_sort(&sorted_p, parse, sort_pathkeys,
|
||||
sorted_p.total_cost,
|
||||
dNumGroups,
|
||||
cheapest_path->parent->width);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now make the decision using the top-level tuple
|
||||
* fraction. First we have to convert an absolute
|
||||
* count (LIMIT) into fractional form.
|
||||
*/
|
||||
if (tuple_fraction >= 1.0)
|
||||
tuple_fraction /= dNumGroups;
|
||||
|
||||
if (compare_fractional_path_costs(&hashed_p, &sorted_p,
|
||||
tuple_fraction) <= 0)
|
||||
{
|
||||
/* Hashed is cheaper, so use it */
|
||||
use_hashed_grouping = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1284,50 +1381,17 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
/*
|
||||
* Select the best path and create a plan to execute it.
|
||||
*
|
||||
* If no special sort order is wanted, or if the cheapest path is
|
||||
* already appropriately ordered, use the cheapest path.
|
||||
* Otherwise, look to see if we have an already-ordered path that is
|
||||
* cheaper than doing an explicit sort on the cheapest-total-cost
|
||||
* path.
|
||||
* If we are doing hashed grouping, we will always read all the
|
||||
* input tuples, so use the cheapest-total path. Otherwise,
|
||||
* trust query_planner's decision about which to use.
|
||||
*/
|
||||
if (parse->query_pathkeys == NIL ||
|
||||
pathkeys_contained_in(parse->query_pathkeys,
|
||||
cheapest_path->pathkeys))
|
||||
if (sorted_path && !use_hashed_grouping)
|
||||
{
|
||||
result_plan = create_plan(parse, cheapest_path);
|
||||
current_pathkeys = cheapest_path->pathkeys;
|
||||
}
|
||||
else if (sorted_path)
|
||||
{
|
||||
Path sort_path; /* dummy for result of cost_sort */
|
||||
|
||||
cost_sort(&sort_path, parse, parse->query_pathkeys,
|
||||
sorted_path->parent->rows, sorted_path->parent->width);
|
||||
sort_path.startup_cost += cheapest_path->total_cost;
|
||||
sort_path.total_cost += cheapest_path->total_cost;
|
||||
/* Convert absolute-count tuple_fraction into a fraction */
|
||||
if (tuple_fraction >= 1.0)
|
||||
tuple_fraction /= sorted_path->parent->rows;
|
||||
if (compare_fractional_path_costs(sorted_path, &sort_path,
|
||||
tuple_fraction) <= 0)
|
||||
{
|
||||
/* Presorted path is cheaper, use it */
|
||||
result_plan = create_plan(parse, sorted_path);
|
||||
current_pathkeys = sorted_path->pathkeys;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* otherwise, doing it the hard way is still cheaper */
|
||||
result_plan = create_plan(parse, cheapest_path);
|
||||
current_pathkeys = cheapest_path->pathkeys;
|
||||
}
|
||||
result_plan = create_plan(parse, sorted_path);
|
||||
current_pathkeys = sorted_path->pathkeys;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* No sorted path, so we must use the cheapest-total path.
|
||||
* The actual sort step will be generated below.
|
||||
*/
|
||||
result_plan = create_plan(parse, cheapest_path);
|
||||
current_pathkeys = cheapest_path->pathkeys;
|
||||
}
|
||||
@ -1362,10 +1426,11 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
if (use_hashed_grouping)
|
||||
{
|
||||
/* Hashed aggregate plan --- no sort needed */
|
||||
result_plan = (Plan *) make_agg(tlist,
|
||||
result_plan = (Plan *) make_agg(parse,
|
||||
tlist,
|
||||
(List *) parse->havingQual,
|
||||
AGG_HASHED,
|
||||
length(parse->groupClause),
|
||||
numGroupCols,
|
||||
groupColIdx,
|
||||
numGroups,
|
||||
numAggs,
|
||||
@ -1401,10 +1466,11 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
current_pathkeys = NIL;
|
||||
}
|
||||
|
||||
result_plan = (Plan *) make_agg(tlist,
|
||||
result_plan = (Plan *) make_agg(parse,
|
||||
tlist,
|
||||
(List *) parse->havingQual,
|
||||
aggstrategy,
|
||||
length(parse->groupClause),
|
||||
numGroupCols,
|
||||
groupColIdx,
|
||||
numGroups,
|
||||
numAggs,
|
||||
@ -1436,11 +1502,13 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
current_pathkeys = group_pathkeys;
|
||||
}
|
||||
|
||||
result_plan = (Plan *) make_group(tlist,
|
||||
length(parse->groupClause),
|
||||
result_plan = (Plan *) make_group(parse,
|
||||
tlist,
|
||||
numGroupCols,
|
||||
groupColIdx,
|
||||
dNumGroups,
|
||||
result_plan);
|
||||
/* The Group node won't change sort ordering */
|
||||
}
|
||||
}
|
||||
} /* end of if (setOperations) */
|
||||
|
Reference in New Issue
Block a user