1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-07 00:36:50 +03:00

Create the planner mechanism for optimizing simple MIN and MAX queries

into indexscans on matching indexes.  For the moment, it only handles
int4 and text datatypes; next step is to add a column to pg_aggregate
so that all MIN/MAX aggregates can be handled.  Per my recent proposal.
This commit is contained in:
Tom Lane
2005-04-11 23:06:57 +00:00
parent c3294f1cbf
commit addc42c339
14 changed files with 983 additions and 204 deletions

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.183 2005/04/10 19:50:08 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.184 2005/04/11 23:06:55 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -362,43 +362,12 @@ subquery_planner(Query *parse, double tuple_fraction)
/*
* If any subplans were generated, or if we're inside a subplan, build
* initPlan list and extParam/allParam sets for plan nodes.
* initPlan list and extParam/allParam sets for plan nodes, and attach
* the initPlans to the top plan node.
*/
if (PlannerPlanId != saved_planid || PlannerQueryLevel > 1)
{
Cost initplan_cost = 0;
/* Prepare extParam/allParam sets for all nodes in tree */
SS_finalize_plan(plan, parse->rtable);
/*
* SS_finalize_plan doesn't handle initPlans, so we have to
* manually attach them to the topmost plan node, and add their
* extParams to the topmost node's, too.
*
* We also add the total_cost of each initPlan to the startup cost of
* the top node. This is a conservative overestimate, since in
* fact each initPlan might be executed later than plan startup,
* or even not at all.
*/
plan->initPlan = PlannerInitPlan;
foreach(l, plan->initPlan)
{
SubPlan *initplan = (SubPlan *) lfirst(l);
plan->extParam = bms_add_members(plan->extParam,
initplan->plan->extParam);
/* allParam must include all members of extParam */
plan->allParam = bms_add_members(plan->allParam,
plan->extParam);
initplan_cost += initplan->plan->total_cost;
}
plan->startup_cost += initplan_cost;
plan->total_cost += initplan_cost;
}
/* Return to outer subquery context */
PlannerQueryLevel--;
PlannerInitPlan = saved_initplan;
@ -692,6 +661,7 @@ grouping_planner(Query *parse, double tuple_fraction)
double sub_tuple_fraction;
Path *cheapest_path;
Path *sorted_path;
Path *best_path;
double dNumGroups = 0;
long numGroups = 0;
AggClauseCounts agg_counts;
@ -959,114 +929,175 @@ grouping_planner(Query *parse, double tuple_fraction)
}
/*
* Select the best path and create a plan to execute it.
*
* If we are doing hashed grouping, we will always read all the input
* tuples, so use the cheapest-total path. Otherwise, trust
* query_planner's decision about which to use.
* Select the best path. If we are doing hashed grouping, we will
* always read all the input tuples, so use the cheapest-total
* path. Otherwise, trust query_planner's decision about which to use.
*/
if (sorted_path && !use_hashed_grouping)
if (use_hashed_grouping || !sorted_path)
best_path = cheapest_path;
else
best_path = sorted_path;
/*
* Check to see if it's possible to optimize MIN/MAX aggregates.
* If so, we will forget all the work we did so far to choose a
* "regular" path ... but we had to do it anyway to be able to
* tell which way is cheaper.
*/
result_plan = optimize_minmax_aggregates(parse,
tlist,
best_path);
if (result_plan != NULL)
{
result_plan = create_plan(parse, sorted_path);
current_pathkeys = sorted_path->pathkeys;
/*
* optimize_minmax_aggregates generated the full plan, with
* the right tlist, and it has no sort order.
*/
current_pathkeys = NIL;
}
else
{
result_plan = create_plan(parse, cheapest_path);
current_pathkeys = cheapest_path->pathkeys;
}
/*
* create_plan() returns a plan with just a "flat" tlist of
* required Vars. Usually we need to insert the sub_tlist as the
* tlist of the top plan node. However, we can skip that if we
* determined that whatever query_planner chose to return will be
* good enough.
*/
if (need_tlist_eval)
{
/*
* If the top-level plan node is one that cannot do expression
* evaluation, we must insert a Result node to project the
* desired tlist.
* Normal case --- create a plan according to query_planner's
* results.
*/
if (!is_projection_capable_plan(result_plan))
result_plan = create_plan(parse, best_path);
current_pathkeys = best_path->pathkeys;
/*
* create_plan() returns a plan with just a "flat" tlist of
* required Vars. Usually we need to insert the sub_tlist as the
* tlist of the top plan node. However, we can skip that if we
* determined that whatever query_planner chose to return will be
* good enough.
*/
if (need_tlist_eval)
{
result_plan = (Plan *) make_result(sub_tlist, NULL,
result_plan);
/*
* If the top-level plan node is one that cannot do expression
* evaluation, we must insert a Result node to project the
* desired tlist.
*/
if (!is_projection_capable_plan(result_plan))
{
result_plan = (Plan *) make_result(sub_tlist, NULL,
result_plan);
}
else
{
/*
* Otherwise, just replace the subplan's flat tlist with
* the desired tlist.
*/
result_plan->targetlist = sub_tlist;
}
/*
* Also, account for the cost of evaluation of the sub_tlist.
*
* Up to now, we have only been dealing with "flat" tlists,
* containing just Vars. So their evaluation cost is zero
* according to the model used by cost_qual_eval() (or if you
* prefer, the cost is factored into cpu_tuple_cost). Thus we
* can avoid accounting for tlist cost throughout
* query_planner() and subroutines. But now we've inserted a
* tlist that might contain actual operators, sub-selects, etc
* --- so we'd better account for its cost.
*
* Below this point, any tlist eval cost for added-on nodes
* should be accounted for as we create those nodes.
* Presently, of the node types we can add on, only Agg and
* Group project new tlists (the rest just copy their input
* tuples) --- so make_agg() and make_group() are responsible
* for computing the added cost.
*/
cost_qual_eval(&tlist_cost, sub_tlist);
result_plan->startup_cost += tlist_cost.startup;
result_plan->total_cost += tlist_cost.startup +
tlist_cost.per_tuple * result_plan->plan_rows;
}
else
{
/*
* Otherwise, just replace the subplan's flat tlist with
* the desired tlist.
* Since we're using query_planner's tlist and not the one
* make_subplanTargetList calculated, we have to refigure any
* grouping-column indexes make_subplanTargetList computed.
*/
result_plan->targetlist = sub_tlist;
locate_grouping_columns(parse, tlist, result_plan->targetlist,
groupColIdx);
}
/*
* Also, account for the cost of evaluation of the sub_tlist.
* Insert AGG or GROUP node if needed, plus an explicit sort step
* if necessary.
*
* Up to now, we have only been dealing with "flat" tlists,
* containing just Vars. So their evaluation cost is zero
* according to the model used by cost_qual_eval() (or if you
* prefer, the cost is factored into cpu_tuple_cost). Thus we
* can avoid accounting for tlist cost throughout
* query_planner() and subroutines. But now we've inserted a
* tlist that might contain actual operators, sub-selects, etc
* --- so we'd better account for its cost.
*
* Below this point, any tlist eval cost for added-on nodes
* should be accounted for as we create those nodes.
* Presently, of the node types we can add on, only Agg and
* Group project new tlists (the rest just copy their input
* tuples) --- so make_agg() and make_group() are responsible
* for computing the added cost.
* HAVING clause, if any, becomes qual of the Agg or Group node.
*/
cost_qual_eval(&tlist_cost, sub_tlist);
result_plan->startup_cost += tlist_cost.startup;
result_plan->total_cost += tlist_cost.startup +
tlist_cost.per_tuple * result_plan->plan_rows;
}
else
{
/*
* Since we're using query_planner's tlist and not the one
* make_subplanTargetList calculated, we have to refigure any
* grouping-column indexes make_subplanTargetList computed.
*/
locate_grouping_columns(parse, tlist, result_plan->targetlist,
groupColIdx);
}
/*
* Insert AGG or GROUP node if needed, plus an explicit sort step
* if necessary.
*
* HAVING clause, if any, becomes qual of the Agg or Group node.
*/
if (use_hashed_grouping)
{
/* Hashed aggregate plan --- no sort needed */
result_plan = (Plan *) make_agg(parse,
tlist,
(List *) parse->havingQual,
AGG_HASHED,
numGroupCols,
groupColIdx,
numGroups,
agg_counts.numAggs,
result_plan);
/* Hashed aggregation produces randomly-ordered results */
current_pathkeys = NIL;
}
else if (parse->hasAggs)
{
/* Plain aggregate plan --- sort if needed */
AggStrategy aggstrategy;
if (parse->groupClause)
if (use_hashed_grouping)
{
/* Hashed aggregate plan --- no sort needed */
result_plan = (Plan *) make_agg(parse,
tlist,
(List *) parse->havingQual,
AGG_HASHED,
numGroupCols,
groupColIdx,
numGroups,
agg_counts.numAggs,
result_plan);
/* Hashed aggregation produces randomly-ordered results */
current_pathkeys = NIL;
}
else if (parse->hasAggs)
{
/* Plain aggregate plan --- sort if needed */
AggStrategy aggstrategy;
if (parse->groupClause)
{
if (!pathkeys_contained_in(group_pathkeys,
current_pathkeys))
{
result_plan = (Plan *)
make_sort_from_groupcols(parse,
parse->groupClause,
groupColIdx,
result_plan);
current_pathkeys = group_pathkeys;
}
aggstrategy = AGG_SORTED;
/*
* The AGG node will not change the sort ordering of its
* groups, so current_pathkeys describes the result too.
*/
}
else
{
aggstrategy = AGG_PLAIN;
/* Result will be only one row anyway; no sort order */
current_pathkeys = NIL;
}
result_plan = (Plan *) make_agg(parse,
tlist,
(List *) parse->havingQual,
aggstrategy,
numGroupCols,
groupColIdx,
numGroups,
agg_counts.numAggs,
result_plan);
}
else if (parse->groupClause)
{
/*
* GROUP BY without aggregation, so insert a group node (plus
* the appropriate sort node, if necessary).
*
* Add an explicit sort if we couldn't make the path come
* out the way the GROUP node needs it.
*/
if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
{
result_plan = (Plan *)
@ -1076,75 +1107,34 @@ grouping_planner(Query *parse, double tuple_fraction)
result_plan);
current_pathkeys = group_pathkeys;
}
aggstrategy = AGG_SORTED;
result_plan = (Plan *) make_group(parse,
tlist,
(List *) parse->havingQual,
numGroupCols,
groupColIdx,
dNumGroups,
result_plan);
/* The Group node won't change sort ordering */
}
else if (parse->hasHavingQual)
{
/*
* The AGG node will not change the sort ordering of its
* groups, so current_pathkeys describes the result too.
* No aggregates, and no GROUP BY, but we have a HAVING qual.
* This is a degenerate case in which we are supposed to emit
* either 0 or 1 row depending on whether HAVING succeeds.
* Furthermore, there cannot be any variables in either HAVING
* or the targetlist, so we actually do not need the FROM table
* at all! We can just throw away the plan-so-far and generate
* a Result node. This is a sufficiently unusual corner case
* that it's not worth contorting the structure of this routine
* to avoid having to generate the plan in the first place.
*/
result_plan = (Plan *) make_result(tlist,
parse->havingQual,
NULL);
}
else
{
aggstrategy = AGG_PLAIN;
/* Result will be only one row anyway; no sort order */
current_pathkeys = NIL;
}
result_plan = (Plan *) make_agg(parse,
tlist,
(List *) parse->havingQual,
aggstrategy,
numGroupCols,
groupColIdx,
numGroups,
agg_counts.numAggs,
result_plan);
}
else if (parse->groupClause)
{
/*
* GROUP BY without aggregation, so insert a group node (plus the
* appropriate sort node, if necessary).
*
* Add an explicit sort if we couldn't make the path come
* out the way the GROUP node needs it.
*/
if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
{
result_plan = (Plan *)
make_sort_from_groupcols(parse,
parse->groupClause,
groupColIdx,
result_plan);
current_pathkeys = group_pathkeys;
}
result_plan = (Plan *) make_group(parse,
tlist,
(List *) parse->havingQual,
numGroupCols,
groupColIdx,
dNumGroups,
result_plan);
/* The Group node won't change sort ordering */
}
else if (parse->hasHavingQual)
{
/*
* No aggregates, and no GROUP BY, but we have a HAVING qual.
* This is a degenerate case in which we are supposed to emit
* either 0 or 1 row depending on whether HAVING succeeds.
* Furthermore, there cannot be any variables in either HAVING
* or the targetlist, so we actually do not need the FROM table
* at all! We can just throw away the plan-so-far and generate
* a Result node. This is a sufficiently unusual corner case
* that it's not worth contorting the structure of this routine
* to avoid having to generate the plan in the first place.
*/
result_plan = (Plan *) make_result(tlist,
parse->havingQual,
NULL);
}
} /* end of non-minmax-aggregate case */
} /* end of if (setOperations) */
/*