mirror of
https://github.com/postgres/postgres.git
synced 2025-07-18 17:42:25 +03:00
Simplify query_planner's API by having it return the top-level RelOptInfo.
Formerly, query_planner returned one or possibly two Paths for the topmost join relation, so that grouping_planner didn't see the join RelOptInfo (at least not directly; it didn't have any hesitation about examining cheapest_path->parent, though). However, correct selection of the Paths involved a significant amount of coupling between query_planner and grouping_planner, a problem which has gotten worse over time. It seems best to give up on this API choice and instead return the topmost RelOptInfo explicitly. Then grouping_planner can pull out the Paths it wants from the rel's path list. In this way we can remove all knowledge of grouping behaviors from query_planner. The only real benefit of the old way is that in the case of an empty FROM clause, we never made any RelOptInfos at all, just a Path. Now we have to gin up a dummy RelOptInfo to represent the empty FROM clause. That's not a very big deal though. While at it, simplify query_planner's API a bit more by having the caller set up root->tuple_fraction and root->limit_tuples, rather than passing those values as separate parameters. Since query_planner no longer does anything with either value, requiring it to fill the PlannerInfo fields seemed pretty arbitrary. This patch just rearranges code; it doesn't (intentionally) change any behaviors. Followup patches will do more interesting things.
This commit is contained in:
@ -39,6 +39,7 @@
|
||||
#include "parser/parsetree.h"
|
||||
#include "rewrite/rewriteManip.h"
|
||||
#include "utils/rel.h"
|
||||
#include "utils/selfuncs.h"
|
||||
|
||||
|
||||
/* GUC parameter */
|
||||
@ -1125,10 +1126,10 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
||||
{
|
||||
/* No set operations, do regular planning */
|
||||
List *sub_tlist;
|
||||
double sub_limit_tuples;
|
||||
AttrNumber *groupColIdx = NULL;
|
||||
bool need_tlist_eval = true;
|
||||
standard_qp_extra qp_extra;
|
||||
RelOptInfo *final_rel;
|
||||
Path *cheapest_path;
|
||||
Path *sorted_path;
|
||||
Path *best_path;
|
||||
@ -1204,6 +1205,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
||||
preprocess_minmax_aggregates(root, tlist);
|
||||
}
|
||||
|
||||
/* Make tuple_fraction accessible to lower-level routines */
|
||||
root->tuple_fraction = tuple_fraction;
|
||||
|
||||
/*
|
||||
* Figure out whether there's a hard limit on the number of rows that
|
||||
* query_planner's result subplan needs to return. Even if we know a
|
||||
@ -1215,9 +1219,9 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
||||
parse->hasAggs ||
|
||||
parse->hasWindowFuncs ||
|
||||
root->hasHavingQual)
|
||||
sub_limit_tuples = -1.0;
|
||||
root->limit_tuples = -1.0;
|
||||
else
|
||||
sub_limit_tuples = limit_tuples;
|
||||
root->limit_tuples = limit_tuples;
|
||||
|
||||
/* Set up data needed by standard_qp_callback */
|
||||
qp_extra.tlist = tlist;
|
||||
@ -1225,31 +1229,164 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
||||
|
||||
/*
|
||||
* Generate the best unsorted and presorted paths for this Query (but
|
||||
* note there may not be any presorted path). We also generate (in
|
||||
* note there may not be any presorted paths). We also generate (in
|
||||
* standard_qp_callback) pathkey representations of the query's sort
|
||||
* clause, distinct clause, etc. query_planner will also estimate the
|
||||
* number of groups in the query.
|
||||
* clause, distinct clause, etc.
|
||||
*/
|
||||
query_planner(root, sub_tlist, tuple_fraction, sub_limit_tuples,
|
||||
standard_qp_callback, &qp_extra,
|
||||
&cheapest_path, &sorted_path, &dNumGroups);
|
||||
final_rel = query_planner(root, sub_tlist,
|
||||
standard_qp_callback, &qp_extra);
|
||||
|
||||
/*
|
||||
* Extract rowcount and width estimates for possible use in grouping
|
||||
* decisions. Beware here of the possibility that
|
||||
* cheapest_path->parent is NULL (ie, there is no FROM clause).
|
||||
* Extract rowcount and width estimates for use below.
|
||||
*/
|
||||
if (cheapest_path->parent)
|
||||
path_rows = final_rel->rows;
|
||||
path_width = final_rel->width;
|
||||
|
||||
/*
|
||||
* If there's grouping going on, estimate the number of result groups.
|
||||
* We couldn't do this any earlier because it depends on relation size
|
||||
* estimates that are created within query_planner().
|
||||
*
|
||||
* Then convert tuple_fraction to fractional form if it is absolute,
|
||||
* and if grouping or aggregation is involved, adjust tuple_fraction
|
||||
* to describe the fraction of the underlying un-aggregated tuples
|
||||
* that will be fetched.
|
||||
*/
|
||||
dNumGroups = 1; /* in case not grouping */
|
||||
|
||||
if (parse->groupClause)
|
||||
{
|
||||
path_rows = cheapest_path->parent->rows;
|
||||
path_width = cheapest_path->parent->width;
|
||||
List *groupExprs;
|
||||
|
||||
groupExprs = get_sortgrouplist_exprs(parse->groupClause,
|
||||
parse->targetList);
|
||||
dNumGroups = estimate_num_groups(root, groupExprs, path_rows);
|
||||
|
||||
/*
|
||||
* In GROUP BY mode, an absolute LIMIT is relative to the number
|
||||
* of groups not the number of tuples. If the caller gave us a
|
||||
* fraction, keep it as-is. (In both cases, we are effectively
|
||||
* assuming that all the groups are about the same size.)
|
||||
*/
|
||||
if (tuple_fraction >= 1.0)
|
||||
tuple_fraction /= dNumGroups;
|
||||
|
||||
/*
|
||||
* If both GROUP BY and ORDER BY are specified, we will need two
|
||||
* levels of sort --- and, therefore, certainly need to read all
|
||||
* the tuples --- unless ORDER BY is a subset of GROUP BY.
|
||||
* Likewise if we have both DISTINCT and GROUP BY, or if we have a
|
||||
* window specification not compatible with the GROUP BY.
|
||||
*/
|
||||
if (!pathkeys_contained_in(root->sort_pathkeys,
|
||||
root->group_pathkeys) ||
|
||||
!pathkeys_contained_in(root->distinct_pathkeys,
|
||||
root->group_pathkeys) ||
|
||||
!pathkeys_contained_in(root->window_pathkeys,
|
||||
root->group_pathkeys))
|
||||
tuple_fraction = 0.0;
|
||||
}
|
||||
else if (parse->hasAggs || root->hasHavingQual)
|
||||
{
|
||||
/*
|
||||
* Ungrouped aggregate will certainly want to read all the tuples,
|
||||
* and it will deliver a single result row (so leave dNumGroups
|
||||
* set to 1).
|
||||
*/
|
||||
tuple_fraction = 0.0;
|
||||
}
|
||||
else if (parse->distinctClause)
|
||||
{
|
||||
/*
|
||||
* Since there was no grouping or aggregation, it's reasonable to
|
||||
* assume the UNIQUE filter has effects comparable to GROUP BY.
|
||||
* (If DISTINCT is used with grouping, we ignore its effects for
|
||||
* rowcount estimation purposes; this amounts to assuming the
|
||||
* grouped rows are distinct already.)
|
||||
*/
|
||||
List *distinctExprs;
|
||||
|
||||
distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
|
||||
parse->targetList);
|
||||
dNumGroups = estimate_num_groups(root, distinctExprs, path_rows);
|
||||
|
||||
/*
|
||||
* Adjust tuple_fraction the same way as for GROUP BY, too.
|
||||
*/
|
||||
if (tuple_fraction >= 1.0)
|
||||
tuple_fraction /= dNumGroups;
|
||||
}
|
||||
else
|
||||
{
|
||||
path_rows = 1; /* assume non-set result */
|
||||
path_width = 100; /* arbitrary */
|
||||
/*
|
||||
* Plain non-grouped, non-aggregated query: an absolute tuple
|
||||
* fraction can be divided by the number of tuples.
|
||||
*/
|
||||
if (tuple_fraction >= 1.0)
|
||||
tuple_fraction /= path_rows;
|
||||
}
|
||||
|
||||
/*
|
||||
* Pick out the cheapest-total path as well as the cheapest presorted
|
||||
* path for the requested pathkeys (if there is one). We should take
|
||||
* the tuple fraction into account when selecting the cheapest
|
||||
* presorted path, but not when selecting the cheapest-total path,
|
||||
* since if we have to sort then we'll have to fetch all the tuples.
|
||||
* (But there's a special case: if query_pathkeys is NIL, meaning
|
||||
* order doesn't matter, then the "cheapest presorted" path will be
|
||||
* the cheapest overall for the tuple fraction.)
|
||||
*/
|
||||
cheapest_path = final_rel->cheapest_total_path;
|
||||
|
||||
sorted_path =
|
||||
get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
|
||||
root->query_pathkeys,
|
||||
NULL,
|
||||
tuple_fraction);
|
||||
|
||||
/* Don't consider same path in both guises; just wastes effort */
|
||||
if (sorted_path == cheapest_path)
|
||||
sorted_path = NULL;
|
||||
|
||||
/*
|
||||
* Forget about the presorted path if it would be cheaper to sort the
|
||||
* cheapest-total path. Here we need consider only the behavior at
|
||||
* the tuple_fraction point. Also, limit_tuples is only relevant if
|
||||
* not grouping/aggregating, so use root->limit_tuples in the
|
||||
* cost_sort call.
|
||||
*/
|
||||
if (sorted_path)
|
||||
{
|
||||
Path sort_path; /* dummy for result of cost_sort */
|
||||
|
||||
if (root->query_pathkeys == NIL ||
|
||||
pathkeys_contained_in(root->query_pathkeys,
|
||||
cheapest_path->pathkeys))
|
||||
{
|
||||
/* No sort needed for cheapest path */
|
||||
sort_path.startup_cost = cheapest_path->startup_cost;
|
||||
sort_path.total_cost = cheapest_path->total_cost;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Figure cost for sorting */
|
||||
cost_sort(&sort_path, root, root->query_pathkeys,
|
||||
cheapest_path->total_cost,
|
||||
path_rows, path_width,
|
||||
0.0, work_mem, root->limit_tuples);
|
||||
}
|
||||
|
||||
if (compare_fractional_path_costs(sorted_path, &sort_path,
|
||||
tuple_fraction) > 0)
|
||||
{
|
||||
/* Presorted path is a loser */
|
||||
sorted_path = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Consider whether we want to use hashing instead of sorting.
|
||||
*/
|
||||
if (parse->groupClause)
|
||||
{
|
||||
/*
|
||||
@ -1288,7 +1425,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
||||
/*
|
||||
* Select the best path. If we are doing hashed grouping, we will
|
||||
* always read all the input tuples, so use the cheapest-total path.
|
||||
* Otherwise, trust query_planner's decision about which to use.
|
||||
* Otherwise, the comparison above is correct.
|
||||
*/
|
||||
if (use_hashed_grouping || use_hashed_distinct || !sorted_path)
|
||||
best_path = cheapest_path;
|
||||
@ -1658,7 +1795,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
||||
* If there was grouping or aggregation, use the current number of
|
||||
* rows as the estimated number of DISTINCT rows (ie, assume the
|
||||
* result was already mostly unique). If not, use the number of
|
||||
* distinct-groups calculated by query_planner.
|
||||
* distinct-groups calculated previously.
|
||||
*/
|
||||
if (parse->groupClause || root->hasHavingQual || parse->hasAggs)
|
||||
dNumDistinctRows = result_plan->plan_rows;
|
||||
@ -2576,8 +2713,8 @@ choose_hashed_grouping(PlannerInfo *root,
|
||||
* We need to consider cheapest_path + hashagg [+ final sort] versus
|
||||
* either cheapest_path [+ sort] + group or agg [+ final sort] or
|
||||
* presorted_path + group or agg [+ final sort] where brackets indicate a
|
||||
* step that may not be needed. We assume query_planner() will have
|
||||
* returned a presorted path only if it's a winner compared to
|
||||
* step that may not be needed. We assume grouping_planner() will have
|
||||
* passed us a presorted path only if it's a winner compared to
|
||||
* cheapest_path for this purpose.
|
||||
*
|
||||
* These path variables are dummies that just hold cost fields; we don't
|
||||
@ -2630,12 +2767,8 @@ choose_hashed_grouping(PlannerInfo *root,
|
||||
0.0, work_mem, limit_tuples);
|
||||
|
||||
/*
|
||||
* Now make the decision using the top-level tuple fraction. First we
|
||||
* have to convert an absolute count (LIMIT) into fractional form.
|
||||
* Now make the decision using the top-level tuple fraction.
|
||||
*/
|
||||
if (tuple_fraction >= 1.0)
|
||||
tuple_fraction /= dNumGroups;
|
||||
|
||||
if (compare_fractional_path_costs(&hashed_p, &sorted_p,
|
||||
tuple_fraction) < 0)
|
||||
{
|
||||
@ -2781,12 +2914,8 @@ choose_hashed_distinct(PlannerInfo *root,
|
||||
0.0, work_mem, limit_tuples);
|
||||
|
||||
/*
|
||||
* Now make the decision using the top-level tuple fraction. First we
|
||||
* have to convert an absolute count (LIMIT) into fractional form.
|
||||
* Now make the decision using the top-level tuple fraction.
|
||||
*/
|
||||
if (tuple_fraction >= 1.0)
|
||||
tuple_fraction /= dNumDistinctRows;
|
||||
|
||||
if (compare_fractional_path_costs(&hashed_p, &sorted_p,
|
||||
tuple_fraction) < 0)
|
||||
{
|
||||
|
Reference in New Issue
Block a user