1
0
mirror of https://github.com/postgres/postgres.git synced 2025-09-02 04:21:28 +03:00

Optimize order of GROUP BY keys

When evaluating a query with a multi-column GROUP BY clause using sort,
the cost may be heavily dependent on the order in which the keys are
compared when building the groups. Grouping does not imply any ordering,
so we're allowed to compare the keys in arbitrary order, and a Hash Agg
leverages this. But for Group Agg, we simply compared keys in the order
as specified in the query. This commit explores alternative ordering of
the keys, trying to find a cheaper one.

In principle, we might generate grouping paths for all permutations of
the keys, and leave the rest to the optimizer. But that might get very
expensive, so we try to pick only a couple interesting orderings based
on both local and global information.

When planning the grouping path, we explore statistics (number of
distinct values, cost of the comparison function) for the keys and
reorder them to minimize comparison costs. Intuitively, it may be better
to perform more expensive comparisons (for complex data types etc.)
last, because maybe the cheaper comparisons will be enough. Similarly,
the higher the cardinality of a key, the lower the probability we’ll
need to compare more keys. The patch generates and costs various
orderings, picking the cheapest ones.

The ordering of group keys may interact with other parts of the query,
some of which may not be known while planning the grouping. E.g. there
may be an explicit ORDER BY clause, or some other ordering-dependent
operation, higher up in the query, and using the same ordering may allow
using either incremental sort or even eliminate the sort entirely.

The patch generates orderings and picks those minimizing the comparison
cost (for various pathkeys), and then adds orderings that might be
useful for operations higher up in the plan (ORDER BY, etc.). Finally,
it always keeps the ordering specified in the query, on the assumption
the user might have additional insights.

This introduces a new GUC enable_group_by_reordering, so that the
optimization may be disabled if needed.

The original patch was proposed by Teodor Sigaev, and later improved and
reworked by Dmitry Dolgov. Reviews by a number of people, including me,
Andrey Lepikhov, Claudio Freire, Ibrar Ahmed and Zhihong Yu.

Author: Dmitry Dolgov, Teodor Sigaev, Tomas Vondra
Reviewed-by: Tomas Vondra, Andrey Lepikhov, Claudio Freire, Ibrar Ahmed, Zhihong Yu
Discussion: https://postgr.es/m/7c79e6a5-8597-74e8-0671-1c39d124c9d6%40sigaev.ru
Discussion: https://postgr.es/m/CA%2Bq6zcW_4o2NC0zutLkOJPsFt80megSpX_dVRo6GK9PC-Jx_Ag%40mail.gmail.com
This commit is contained in:
Tomas Vondra
2022-03-31 00:09:11 +02:00
parent 606948b058
commit db0d67db24
24 changed files with 1887 additions and 499 deletions

View File

@@ -6218,24 +6218,121 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
*/
foreach(lc, input_rel->pathlist)
{
ListCell *lc2;
Path *path = (Path *) lfirst(lc);
Path *path_original = path;
bool is_sorted;
int presorted_keys;
is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
path->pathkeys,
&presorted_keys);
List *pathkey_orderings = NIL;
if (path == cheapest_path || is_sorted)
List *group_pathkeys = root->group_pathkeys;
List *group_clauses = parse->groupClause;
/* generate alternative group orderings that might be useful */
pathkey_orderings = get_useful_group_keys_orderings(root,
path->rows,
path->pathkeys,
group_pathkeys,
group_clauses);
Assert(list_length(pathkey_orderings) > 0);
/* process all potentially interesting grouping reorderings */
foreach (lc2, pathkey_orderings)
{
/* Sort the cheapest-total path if it isn't already sorted */
if (!is_sorted)
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
bool is_sorted;
int presorted_keys = 0;
PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
/* restore the path (we replace it in the loop) */
path = path_original;
is_sorted = pathkeys_count_contained_in(info->pathkeys,
path->pathkeys,
&presorted_keys);
if (path == cheapest_path || is_sorted)
{
/* Sort the cheapest-total path if it isn't already sorted */
if (!is_sorted)
path = (Path *) create_sort_path(root,
grouped_rel,
path,
info->pathkeys,
-1.0);
/* Now decide what to stick atop it */
if (parse->groupingSets)
{
consider_groupingsets_paths(root, grouped_rel,
path, true, can_hash,
gd, agg_costs, dNumGroups);
}
else if (parse->hasAggs)
{
/*
* We have aggregation, possibly with plain GROUP BY. Make
* an AggPath.
*/
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
grouped_rel->reltarget,
info->clauses ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_SIMPLE,
info->clauses,
havingQual,
agg_costs,
dNumGroups));
}
else if (group_clauses)
{
/*
* We have GROUP BY without aggregation or grouping sets.
* Make a GroupPath.
*/
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
info->clauses,
havingQual,
dNumGroups));
}
else
{
/* Other cases should have been handled above */
Assert(false);
}
}
/*
* Now we may consider incremental sort on this path, but only
* when the path is not already sorted and when incremental sort
* is enabled.
*/
if (is_sorted || !enable_incremental_sort)
continue;
/* Restore the input path (we might have added Sort on top). */
path = path_original;
/* no shared prefix, no point in building incremental sort */
if (presorted_keys == 0)
continue;
/*
* We should have already excluded pathkeys of length 1 because
* then presorted_keys > 0 would imply is_sorted was true.
*/
Assert(list_length(root->group_pathkeys) != 1);
path = (Path *) create_incremental_sort_path(root,
grouped_rel,
path,
info->pathkeys,
presorted_keys,
-1.0);
/* Now decide what to stick atop it */
if (parse->groupingSets)
@@ -6247,17 +6344,17 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
else if (parse->hasAggs)
{
/*
* We have aggregation, possibly with plain GROUP BY. Make
* an AggPath.
* We have aggregation, possibly with plain GROUP BY. Make an
* AggPath.
*/
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
info->clauses ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_SIMPLE,
parse->groupClause,
info->clauses,
havingQual,
agg_costs,
dNumGroups));
@@ -6265,14 +6362,14 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
else if (parse->groupClause)
{
/*
* We have GROUP BY without aggregation or grouping sets.
* Make a GroupPath.
* We have GROUP BY without aggregation or grouping sets. Make
* a GroupPath.
*/
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
parse->groupClause,
info->clauses,
havingQual,
dNumGroups));
}
@@ -6282,79 +6379,6 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
Assert(false);
}
}
/*
* Now we may consider incremental sort on this path, but only
* when the path is not already sorted and when incremental sort
* is enabled.
*/
if (is_sorted || !enable_incremental_sort)
continue;
/* Restore the input path (we might have added Sort on top). */
path = path_original;
/* no shared prefix, no point in building incremental sort */
if (presorted_keys == 0)
continue;
/*
* We should have already excluded pathkeys of length 1 because
* then presorted_keys > 0 would imply is_sorted was true.
*/
Assert(list_length(root->group_pathkeys) != 1);
path = (Path *) create_incremental_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
presorted_keys,
-1.0);
/* Now decide what to stick atop it */
if (parse->groupingSets)
{
consider_groupingsets_paths(root, grouped_rel,
path, true, can_hash,
gd, agg_costs, dNumGroups);
}
else if (parse->hasAggs)
{
/*
* We have aggregation, possibly with plain GROUP BY. Make an
* AggPath.
*/
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_SIMPLE,
parse->groupClause,
havingQual,
agg_costs,
dNumGroups));
}
else if (parse->groupClause)
{
/*
* We have GROUP BY without aggregation or grouping sets. Make
* a GroupPath.
*/
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
parse->groupClause,
havingQual,
dNumGroups));
}
else
{
/* Other cases should have been handled above */
Assert(false);
}
}
/*
@@ -6365,100 +6389,124 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
{
foreach(lc, partially_grouped_rel->pathlist)
{
ListCell *lc2;
Path *path = (Path *) lfirst(lc);
Path *path_original = path;
bool is_sorted;
int presorted_keys;
is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
path->pathkeys,
&presorted_keys);
List *pathkey_orderings = NIL;
/*
* Insert a Sort node, if required. But there's no point in
* sorting anything but the cheapest path.
*/
if (!is_sorted)
List *group_pathkeys = root->group_pathkeys;
List *group_clauses = parse->groupClause;
/* generate alternative group orderings that might be useful */
pathkey_orderings = get_useful_group_keys_orderings(root,
path->rows,
path->pathkeys,
group_pathkeys,
group_clauses);
Assert(list_length(pathkey_orderings) > 0);
/* process all potentially interesting grouping reorderings */
foreach (lc2, pathkey_orderings)
{
if (path != partially_grouped_rel->cheapest_total_path)
bool is_sorted;
int presorted_keys = 0;
PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
/* restore the path (we replace it in the loop) */
path = path_original;
is_sorted = pathkeys_count_contained_in(info->pathkeys,
path->pathkeys,
&presorted_keys);
/*
* Insert a Sort node, if required. But there's no point in
* sorting anything but the cheapest path.
*/
if (!is_sorted)
{
if (path != partially_grouped_rel->cheapest_total_path)
continue;
path = (Path *) create_sort_path(root,
grouped_rel,
path,
info->pathkeys,
-1.0);
}
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
grouped_rel->reltarget,
info->clauses ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
info->clauses,
havingQual,
agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
info->clauses,
havingQual,
dNumGroups));
/*
* Now we may consider incremental sort on this path, but only
* when the path is not already sorted and when incremental
* sort is enabled.
*/
if (is_sorted || !enable_incremental_sort)
continue;
path = (Path *) create_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
-1.0);
/* Restore the input path (we might have added Sort on top). */
path = path_original;
/* no shared prefix, not point in building incremental sort */
if (presorted_keys == 0)
continue;
/*
* We should have already excluded pathkeys of length 1
* because then presorted_keys > 0 would imply is_sorted was
* true.
*/
Assert(list_length(root->group_pathkeys) != 1);
path = (Path *) create_incremental_sort_path(root,
grouped_rel,
path,
info->pathkeys,
presorted_keys,
-1.0);
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
grouped_rel->reltarget,
info->clauses ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
info->clauses,
havingQual,
agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
info->clauses,
havingQual,
dNumGroups));
}
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
havingQual,
agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
parse->groupClause,
havingQual,
dNumGroups));
/*
* Now we may consider incremental sort on this path, but only
* when the path is not already sorted and when incremental
* sort is enabled.
*/
if (is_sorted || !enable_incremental_sort)
continue;
/* Restore the input path (we might have added Sort on top). */
path = path_original;
/* no shared prefix, not point in building incremental sort */
if (presorted_keys == 0)
continue;
/*
* We should have already excluded pathkeys of length 1
* because then presorted_keys > 0 would imply is_sorted was
* true.
*/
Assert(list_length(root->group_pathkeys) != 1);
path = (Path *) create_incremental_sort_path(root,
grouped_rel,
path,
root->group_pathkeys,
presorted_keys,
-1.0);
if (parse->hasAggs)
add_path(grouped_rel, (Path *)
create_agg_path(root,
grouped_rel,
path,
grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_FINAL_DESERIAL,
parse->groupClause,
havingQual,
agg_final_costs,
dNumGroups));
else
add_path(grouped_rel, (Path *)
create_group_path(root,
grouped_rel,
path,
parse->groupClause,
havingQual,
dNumGroups));
}
}
}
@@ -6661,41 +6709,71 @@ create_partial_grouping_paths(PlannerInfo *root,
*/
foreach(lc, input_rel->pathlist)
{
ListCell *lc2;
Path *path = (Path *) lfirst(lc);
bool is_sorted;
Path *path_save = path;
is_sorted = pathkeys_contained_in(root->group_pathkeys,
path->pathkeys);
if (path == cheapest_total_path || is_sorted)
List *pathkey_orderings = NIL;
List *group_pathkeys = root->group_pathkeys;
List *group_clauses = parse->groupClause;
/* generate alternative group orderings that might be useful */
pathkey_orderings = get_useful_group_keys_orderings(root,
path->rows,
path->pathkeys,
group_pathkeys,
group_clauses);
Assert(list_length(pathkey_orderings) > 0);
/* process all potentially interesting grouping reorderings */
foreach (lc2, pathkey_orderings)
{
/* Sort the cheapest partial path, if it isn't already */
if (!is_sorted)
path = (Path *) create_sort_path(root,
partially_grouped_rel,
path,
root->group_pathkeys,
-1.0);
bool is_sorted;
int presorted_keys = 0;
PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
if (parse->hasAggs)
add_path(partially_grouped_rel, (Path *)
create_agg_path(root,
partially_grouped_rel,
path,
partially_grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
NIL,
agg_partial_costs,
dNumPartialGroups));
else
add_path(partially_grouped_rel, (Path *)
create_group_path(root,
partially_grouped_rel,
path,
parse->groupClause,
NIL,
dNumPartialGroups));
/* restore the path (we replace it in the loop) */
path = path_save;
is_sorted = pathkeys_count_contained_in(info->pathkeys,
path->pathkeys,
&presorted_keys);
if (path == cheapest_total_path || is_sorted)
{
/* Sort the cheapest partial path, if it isn't already */
if (!is_sorted)
{
path = (Path *) create_sort_path(root,
partially_grouped_rel,
path,
info->pathkeys,
-1.0);
}
if (parse->hasAggs)
add_path(partially_grouped_rel, (Path *)
create_agg_path(root,
partially_grouped_rel,
path,
partially_grouped_rel->reltarget,
info->clauses ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
info->clauses,
NIL,
agg_partial_costs,
dNumPartialGroups));
else
add_path(partially_grouped_rel, (Path *)
create_group_path(root,
partially_grouped_rel,
path,
info->clauses,
NIL,
dNumPartialGroups));
}
}
}
@@ -6705,6 +6783,8 @@ create_partial_grouping_paths(PlannerInfo *root,
* We can also skip the entire loop when we only have a single-item
* group_pathkeys because then we can't possibly have a presorted
* prefix of the list without having the list be fully sorted.
*
* XXX Shouldn't this also consider the group-key-reordering?
*/
if (enable_incremental_sort && list_length(root->group_pathkeys) > 1)
{
@@ -6763,24 +6843,100 @@ create_partial_grouping_paths(PlannerInfo *root,
/* Similar to above logic, but for partial paths. */
foreach(lc, input_rel->partial_pathlist)
{
ListCell *lc2;
Path *path = (Path *) lfirst(lc);
Path *path_original = path;
bool is_sorted;
int presorted_keys;
is_sorted = pathkeys_count_contained_in(root->group_pathkeys,
path->pathkeys,
&presorted_keys);
List *pathkey_orderings = NIL;
if (path == cheapest_partial_path || is_sorted)
List *group_pathkeys = root->group_pathkeys;
List *group_clauses = parse->groupClause;
/* generate alternative group orderings that might be useful */
pathkey_orderings = get_useful_group_keys_orderings(root,
path->rows,
path->pathkeys,
group_pathkeys,
group_clauses);
Assert(list_length(pathkey_orderings) > 0);
/* process all potentially interesting grouping reorderings */
foreach (lc2, pathkey_orderings)
{
/* Sort the cheapest partial path, if it isn't already */
if (!is_sorted)
path = (Path *) create_sort_path(root,
partially_grouped_rel,
path,
root->group_pathkeys,
-1.0);
bool is_sorted;
int presorted_keys = 0;
PathKeyInfo *info = (PathKeyInfo *) lfirst(lc2);
/* restore the path (we replace it in the loop) */
path = path_original;
is_sorted = pathkeys_count_contained_in(info->pathkeys,
path->pathkeys,
&presorted_keys);
if (path == cheapest_partial_path || is_sorted)
{
/* Sort the cheapest partial path, if it isn't already */
if (!is_sorted)
{
path = (Path *) create_sort_path(root,
partially_grouped_rel,
path,
info->pathkeys,
-1.0);
}
if (parse->hasAggs)
add_partial_path(partially_grouped_rel, (Path *)
create_agg_path(root,
partially_grouped_rel,
path,
partially_grouped_rel->reltarget,
info->clauses ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
info->clauses,
NIL,
agg_partial_costs,
dNumPartialPartialGroups));
else
add_partial_path(partially_grouped_rel, (Path *)
create_group_path(root,
partially_grouped_rel,
path,
info->clauses,
NIL,
dNumPartialPartialGroups));
}
/*
* Now we may consider incremental sort on this path, but only
* when the path is not already sorted and when incremental sort
* is enabled.
*/
if (is_sorted || !enable_incremental_sort)
continue;
/* Restore the input path (we might have added Sort on top). */
path = path_original;
/* no shared prefix, not point in building incremental sort */
if (presorted_keys == 0)
continue;
/*
* We should have already excluded pathkeys of length 1 because
* then presorted_keys > 0 would imply is_sorted was true.
*/
Assert(list_length(root->group_pathkeys) != 1);
path = (Path *) create_incremental_sort_path(root,
partially_grouped_rel,
path,
info->pathkeys,
presorted_keys,
-1.0);
if (parse->hasAggs)
add_partial_path(partially_grouped_rel, (Path *)
@@ -6788,9 +6944,9 @@ create_partial_grouping_paths(PlannerInfo *root,
partially_grouped_rel,
path,
partially_grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
info->clauses ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
info->clauses,
NIL,
agg_partial_costs,
dNumPartialPartialGroups));
@@ -6799,59 +6955,10 @@ create_partial_grouping_paths(PlannerInfo *root,
create_group_path(root,
partially_grouped_rel,
path,
parse->groupClause,
info->clauses,
NIL,
dNumPartialPartialGroups));
}
/*
* Now we may consider incremental sort on this path, but only
* when the path is not already sorted and when incremental sort
* is enabled.
*/
if (is_sorted || !enable_incremental_sort)
continue;
/* Restore the input path (we might have added Sort on top). */
path = path_original;
/* no shared prefix, not point in building incremental sort */
if (presorted_keys == 0)
continue;
/*
* We should have already excluded pathkeys of length 1 because
* then presorted_keys > 0 would imply is_sorted was true.
*/
Assert(list_length(root->group_pathkeys) != 1);
path = (Path *) create_incremental_sort_path(root,
partially_grouped_rel,
path,
root->group_pathkeys,
presorted_keys,
-1.0);
if (parse->hasAggs)
add_partial_path(partially_grouped_rel, (Path *)
create_agg_path(root,
partially_grouped_rel,
path,
partially_grouped_rel->reltarget,
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
AGGSPLIT_INITIAL_SERIAL,
parse->groupClause,
NIL,
agg_partial_costs,
dNumPartialPartialGroups));
else
add_partial_path(partially_grouped_rel, (Path *)
create_group_path(root,
partially_grouped_rel,
path,
parse->groupClause,
NIL,
dNumPartialPartialGroups));
}
}