mirror of
https://github.com/postgres/postgres.git
synced 2025-08-31 17:02:12 +03:00
Finish implementation of hashed aggregation. Add enable_hashagg GUC
parameter to allow it to be forced off for comparison purposes. Add ORDER BY clauses to a bunch of regression test queries that will otherwise produce randomly-ordered output in the new regime.
This commit is contained in:
@@ -42,7 +42,7 @@
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.90 2002/09/04 20:31:20 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.91 2002/11/21 00:42:19 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -79,6 +79,7 @@ bool enable_seqscan = true;
|
||||
bool enable_indexscan = true;
|
||||
bool enable_tidscan = true;
|
||||
bool enable_sort = true;
|
||||
bool enable_hashagg = true;
|
||||
bool enable_nestloop = true;
|
||||
bool enable_mergejoin = true;
|
||||
bool enable_hashjoin = true;
|
||||
@@ -423,10 +424,8 @@ cost_functionscan(Path *path, Query *root, RelOptInfo *baserel)
|
||||
|
||||
/*
|
||||
* cost_sort
|
||||
* Determines and returns the cost of sorting a relation.
|
||||
*
|
||||
* The cost of supplying the input data is NOT included; the caller should
|
||||
* add that cost to both startup and total costs returned from this routine!
|
||||
* Determines and returns the cost of sorting a relation, including
|
||||
* the cost of reading the input data.
|
||||
*
|
||||
* If the total volume of data to sort is less than SortMem, we will do
|
||||
* an in-memory sort, which requires no I/O and about t*log2(t) tuple
|
||||
@@ -449,6 +448,7 @@ cost_functionscan(Path *path, Query *root, RelOptInfo *baserel)
|
||||
* the right ballpark in most cases.
|
||||
*
|
||||
* 'pathkeys' is a list of sort keys
|
||||
* 'input_cost' is the total cost for reading the input data
|
||||
* 'tuples' is the number of tuples in the relation
|
||||
* 'width' is the average tuple width in bytes
|
||||
*
|
||||
@@ -456,12 +456,14 @@ cost_functionscan(Path *path, Query *root, RelOptInfo *baserel)
|
||||
* can't conveniently supply the sort keys. Since this routine doesn't
|
||||
* currently do anything with pathkeys anyway, that doesn't matter...
|
||||
* but if it ever does, it should react gracefully to lack of key data.
|
||||
* (Actually, the thing we'd most likely be interested in is just the number
|
||||
* of sort keys, which all callers *could* supply.)
|
||||
*/
|
||||
void
|
||||
cost_sort(Path *path, Query *root,
|
||||
List *pathkeys, double tuples, int width)
|
||||
List *pathkeys, Cost input_cost, double tuples, int width)
|
||||
{
|
||||
Cost startup_cost = 0;
|
||||
Cost startup_cost = input_cost;
|
||||
Cost run_cost = 0;
|
||||
double nbytes = relation_byte_size(tuples, width);
|
||||
long sortmembytes = SortMem * 1024L;
|
||||
@@ -511,6 +513,92 @@ cost_sort(Path *path, Query *root,
|
||||
path->total_cost = startup_cost + run_cost;
|
||||
}
|
||||
|
||||
/*
|
||||
* cost_agg
|
||||
* Determines and returns the cost of performing an Agg plan node,
|
||||
* including the cost of its input.
|
||||
*
|
||||
* Note: when aggstrategy == AGG_SORTED, caller must ensure that input costs
|
||||
* are for appropriately-sorted input.
|
||||
*/
|
||||
void
|
||||
cost_agg(Path *path, Query *root,
|
||||
AggStrategy aggstrategy, int numAggs,
|
||||
int numGroupCols, double numGroups,
|
||||
Cost input_startup_cost, Cost input_total_cost,
|
||||
double input_tuples)
|
||||
{
|
||||
Cost startup_cost;
|
||||
Cost total_cost;
|
||||
|
||||
/*
|
||||
* We charge one cpu_operator_cost per aggregate function per input
|
||||
* tuple, and another one per output tuple (corresponding to transfn
|
||||
* and finalfn calls respectively). If we are grouping, we charge an
|
||||
* additional cpu_operator_cost per grouping column per input tuple
|
||||
* for grouping comparisons.
|
||||
*
|
||||
* We will produce a single output tuple if not grouping,
|
||||
* and a tuple per group otherwise.
|
||||
*/
|
||||
if (aggstrategy == AGG_PLAIN)
|
||||
{
|
||||
startup_cost = input_total_cost;
|
||||
startup_cost += cpu_operator_cost * (input_tuples + 1) * numAggs;
|
||||
/* we aren't grouping */
|
||||
total_cost = startup_cost;
|
||||
}
|
||||
else if (aggstrategy == AGG_SORTED)
|
||||
{
|
||||
/* Here we are able to deliver output on-the-fly */
|
||||
startup_cost = input_startup_cost;
|
||||
total_cost = input_total_cost;
|
||||
total_cost += cpu_operator_cost * (input_tuples + numGroups) * numAggs;
|
||||
total_cost += cpu_operator_cost * input_tuples * numGroupCols;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* must be AGG_HASHED */
|
||||
startup_cost = input_total_cost;
|
||||
startup_cost += cpu_operator_cost * input_tuples * numAggs;
|
||||
startup_cost += cpu_operator_cost * input_tuples * numGroupCols;
|
||||
total_cost = startup_cost;
|
||||
total_cost += cpu_operator_cost * numGroups * numAggs;
|
||||
}
|
||||
|
||||
path->startup_cost = startup_cost;
|
||||
path->total_cost = total_cost;
|
||||
}
|
||||
|
||||
/*
|
||||
* cost_group
|
||||
* Determines and returns the cost of performing a Group plan node,
|
||||
* including the cost of its input.
|
||||
*
|
||||
* Note: caller must ensure that input costs are for appropriately-sorted
|
||||
* input.
|
||||
*/
|
||||
void
|
||||
cost_group(Path *path, Query *root,
|
||||
int numGroupCols, double numGroups,
|
||||
Cost input_startup_cost, Cost input_total_cost,
|
||||
double input_tuples)
|
||||
{
|
||||
Cost startup_cost;
|
||||
Cost total_cost;
|
||||
|
||||
startup_cost = input_startup_cost;
|
||||
total_cost = input_total_cost;
|
||||
|
||||
/*
|
||||
* Charge one cpu_operator_cost per comparison per input tuple. We
|
||||
* assume all columns get compared at most of the tuples.
|
||||
*/
|
||||
total_cost += cpu_operator_cost * input_tuples * numGroupCols;
|
||||
|
||||
path->startup_cost = startup_cost;
|
||||
path->total_cost = total_cost;
|
||||
}
|
||||
|
||||
/*
|
||||
* cost_nestloop
|
||||
@@ -658,10 +746,10 @@ cost_mergejoin(Path *path, Query *root,
|
||||
*/
|
||||
if (outersortkeys) /* do we need to sort outer? */
|
||||
{
|
||||
startup_cost += outer_path->total_cost;
|
||||
cost_sort(&sort_path,
|
||||
root,
|
||||
outersortkeys,
|
||||
outer_path->total_cost,
|
||||
outer_path->parent->rows,
|
||||
outer_path->parent->width);
|
||||
startup_cost += sort_path.startup_cost;
|
||||
@@ -677,10 +765,10 @@ cost_mergejoin(Path *path, Query *root,
|
||||
|
||||
if (innersortkeys) /* do we need to sort inner? */
|
||||
{
|
||||
startup_cost += inner_path->total_cost;
|
||||
cost_sort(&sort_path,
|
||||
root,
|
||||
innersortkeys,
|
||||
inner_path->total_cost,
|
||||
inner_path->parent->rows,
|
||||
inner_path->parent->width);
|
||||
startup_cost += sort_path.startup_cost;
|
||||
|
@@ -10,7 +10,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.123 2002/11/19 23:21:58 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.124 2002/11/21 00:42:19 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -1573,9 +1573,11 @@ make_sort(Query *root, List *tlist, Plan *lefttree, int keycount)
|
||||
|
||||
copy_plan_costsize(plan, lefttree); /* only care about copying size */
|
||||
cost_sort(&sort_path, root, NIL,
|
||||
lefttree->plan_rows, lefttree->plan_width);
|
||||
plan->startup_cost = sort_path.startup_cost + lefttree->total_cost;
|
||||
plan->total_cost = sort_path.total_cost + lefttree->total_cost;
|
||||
lefttree->total_cost,
|
||||
lefttree->plan_rows,
|
||||
lefttree->plan_width);
|
||||
plan->startup_cost = sort_path.startup_cost;
|
||||
plan->total_cost = sort_path.total_cost;
|
||||
plan->state = (EState *) NULL;
|
||||
plan->targetlist = tlist;
|
||||
plan->qual = NIL;
|
||||
@@ -1683,39 +1685,39 @@ make_material(List *tlist, Plan *lefttree)
|
||||
}
|
||||
|
||||
Agg *
|
||||
make_agg(List *tlist, List *qual, AggStrategy aggstrategy,
|
||||
int ngrp, AttrNumber *grpColIdx, long numGroups, int numAggs,
|
||||
make_agg(Query *root, List *tlist, List *qual,
|
||||
AggStrategy aggstrategy,
|
||||
int numGroupCols, AttrNumber *grpColIdx,
|
||||
long numGroups, int numAggs,
|
||||
Plan *lefttree)
|
||||
{
|
||||
Agg *node = makeNode(Agg);
|
||||
Plan *plan = &node->plan;
|
||||
Path agg_path; /* dummy for result of cost_agg */
|
||||
|
||||
node->aggstrategy = aggstrategy;
|
||||
node->numCols = ngrp;
|
||||
node->numCols = numGroupCols;
|
||||
node->grpColIdx = grpColIdx;
|
||||
node->numGroups = numGroups;
|
||||
|
||||
copy_plan_costsize(plan, lefttree);
|
||||
|
||||
/*
|
||||
* Charge one cpu_operator_cost per aggregate function per input
|
||||
* tuple.
|
||||
*/
|
||||
plan->total_cost += cpu_operator_cost * plan->plan_rows * numAggs;
|
||||
copy_plan_costsize(plan, lefttree); /* only care about copying size */
|
||||
cost_agg(&agg_path, root,
|
||||
aggstrategy, numAggs,
|
||||
numGroupCols, numGroups,
|
||||
lefttree->startup_cost,
|
||||
lefttree->total_cost,
|
||||
lefttree->plan_rows);
|
||||
plan->startup_cost = agg_path.startup_cost;
|
||||
plan->total_cost = agg_path.total_cost;
|
||||
|
||||
/*
|
||||
* We will produce a single output tuple if not grouping,
|
||||
* and a tuple per group otherwise.
|
||||
*/
|
||||
if (aggstrategy == AGG_PLAIN)
|
||||
{
|
||||
plan->plan_rows = 1;
|
||||
plan->startup_cost = plan->total_cost;
|
||||
}
|
||||
else
|
||||
{
|
||||
plan->plan_rows = numGroups;
|
||||
}
|
||||
|
||||
plan->state = (EState *) NULL;
|
||||
plan->qual = qual;
|
||||
@@ -1727,22 +1729,28 @@ make_agg(List *tlist, List *qual, AggStrategy aggstrategy,
|
||||
}
|
||||
|
||||
Group *
|
||||
make_group(List *tlist,
|
||||
int ngrp,
|
||||
make_group(Query *root,
|
||||
List *tlist,
|
||||
int numGroupCols,
|
||||
AttrNumber *grpColIdx,
|
||||
double numGroups,
|
||||
Plan *lefttree)
|
||||
{
|
||||
Group *node = makeNode(Group);
|
||||
Plan *plan = &node->plan;
|
||||
Path group_path; /* dummy for result of cost_group */
|
||||
|
||||
copy_plan_costsize(plan, lefttree);
|
||||
node->numCols = numGroupCols;
|
||||
node->grpColIdx = grpColIdx;
|
||||
|
||||
/*
|
||||
* Charge one cpu_operator_cost per comparison per input tuple. We
|
||||
* assume all columns get compared at most of the tuples.
|
||||
*/
|
||||
plan->total_cost += cpu_operator_cost * plan->plan_rows * ngrp;
|
||||
copy_plan_costsize(plan, lefttree); /* only care about copying size */
|
||||
cost_group(&group_path, root,
|
||||
numGroupCols, numGroups,
|
||||
lefttree->startup_cost,
|
||||
lefttree->total_cost,
|
||||
lefttree->plan_rows);
|
||||
plan->startup_cost = group_path.startup_cost;
|
||||
plan->total_cost = group_path.total_cost;
|
||||
|
||||
/* One output tuple per estimated result group */
|
||||
plan->plan_rows = numGroups;
|
||||
@@ -1752,8 +1760,6 @@ make_group(List *tlist,
|
||||
plan->targetlist = tlist;
|
||||
plan->lefttree = lefttree;
|
||||
plan->righttree = (Plan *) NULL;
|
||||
node->numCols = ngrp;
|
||||
node->grpColIdx = grpColIdx;
|
||||
|
||||
return node;
|
||||
}
|
||||
|
@@ -14,19 +14,17 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.71 2002/11/06 00:00:44 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planmain.c,v 1.72 2002/11/21 00:42:19 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "optimizer/clauses.h"
|
||||
#include "optimizer/cost.h"
|
||||
#include "optimizer/pathnode.h"
|
||||
#include "optimizer/paths.h"
|
||||
#include "optimizer/planmain.h"
|
||||
#include "optimizer/tlist.h"
|
||||
#include "parser/parsetree.h"
|
||||
#include "utils/memutils.h"
|
||||
|
||||
|
||||
/*--------------------
|
||||
@@ -36,11 +34,12 @@
|
||||
*
|
||||
* Since query_planner does not handle the toplevel processing (grouping,
|
||||
* sorting, etc) it cannot select the best path by itself. It selects
|
||||
* two paths: the cheapest path that produces the required tuples, independent
|
||||
* of any ordering considerations, and the cheapest path that produces the
|
||||
* required tuples in the required ordering, if there is a path that
|
||||
* can produce them without an explicit top-level sort step. The caller
|
||||
* (grouping_planner) will make the final decision about which to use.
|
||||
* two paths: the cheapest path that produces all the required tuples,
|
||||
* independent of any ordering considerations, and the cheapest path that
|
||||
* produces the expected fraction of the required tuples in the required
|
||||
* ordering, if there is a path that is cheaper for this than just sorting
|
||||
* the output of the cheapest overall path. The caller (grouping_planner)
|
||||
* will make the final decision about which to use.
|
||||
*
|
||||
* Input parameters:
|
||||
* root is the query to plan
|
||||
@@ -50,7 +49,7 @@
|
||||
* Output parameters:
|
||||
* *cheapest_path receives the overall-cheapest path for the query
|
||||
* *sorted_path receives the cheapest presorted path for the query,
|
||||
* if any (it may be NULL, or the same as cheapest_path)
|
||||
* if any (NULL if there is no useful presorted path)
|
||||
*
|
||||
* Note: the Query node also includes a query_pathkeys field, which is both
|
||||
* an input and an output of query_planner(). The input value signals
|
||||
@@ -78,6 +77,8 @@ query_planner(Query *root, List *tlist, double tuple_fraction,
|
||||
{
|
||||
List *constant_quals;
|
||||
RelOptInfo *final_rel;
|
||||
Path *cheapestpath;
|
||||
Path *sortedpath;
|
||||
|
||||
/*
|
||||
* If the query has an empty join tree, then it's something easy like
|
||||
@@ -166,34 +167,76 @@ query_planner(Query *root, List *tlist, double tuple_fraction,
|
||||
|
||||
/*
|
||||
* Pick out the cheapest-total path and the cheapest presorted path
|
||||
* for the requested pathkeys (if there is one). We can take the
|
||||
* for the requested pathkeys (if there is one). We should take the
|
||||
* tuple fraction into account when selecting the cheapest presorted
|
||||
* path, but not when selecting the cheapest-total path, since if we
|
||||
* have to sort then we'll have to fetch all the tuples. (But there's
|
||||
* a special case: if query_pathkeys is NIL, meaning order doesn't
|
||||
* matter, then the "cheapest presorted" path will be the cheapest
|
||||
* overall for the tuple fraction.)
|
||||
*
|
||||
* The cheapest-total path is also the one to use if grouping_planner
|
||||
* decides to use hashed aggregation, so we return it separately even
|
||||
* if this routine thinks the presorted path is the winner.
|
||||
*/
|
||||
*cheapest_path = final_rel->cheapest_total_path;
|
||||
cheapestpath = final_rel->cheapest_total_path;
|
||||
|
||||
*sorted_path =
|
||||
sortedpath =
|
||||
get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
|
||||
root->query_pathkeys,
|
||||
tuple_fraction);
|
||||
|
||||
/* Don't return same path in both guises; just wastes effort */
|
||||
if (sortedpath == cheapestpath)
|
||||
sortedpath = NULL;
|
||||
|
||||
/*
|
||||
* Forget about the presorted path if it would be cheaper to sort the
|
||||
* cheapest-total path. Here we need consider only the behavior at
|
||||
* the tuple fraction point.
|
||||
*/
|
||||
if (sortedpath)
|
||||
{
|
||||
Path sort_path; /* dummy for result of cost_sort */
|
||||
|
||||
if (root->query_pathkeys == NIL ||
|
||||
pathkeys_contained_in(root->query_pathkeys,
|
||||
cheapestpath->pathkeys))
|
||||
{
|
||||
/* No sort needed for cheapest path */
|
||||
sort_path.startup_cost = cheapestpath->startup_cost;
|
||||
sort_path.total_cost = cheapestpath->total_cost;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* Figure cost for sorting */
|
||||
cost_sort(&sort_path, root, root->query_pathkeys,
|
||||
cheapestpath->total_cost,
|
||||
final_rel->rows, final_rel->width);
|
||||
}
|
||||
|
||||
if (compare_fractional_path_costs(sortedpath, &sort_path,
|
||||
tuple_fraction) > 0)
|
||||
{
|
||||
/* Presorted path is a loser */
|
||||
sortedpath = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have constant quals, add a toplevel Result step to process them.
|
||||
*/
|
||||
if (constant_quals)
|
||||
{
|
||||
*cheapest_path = (Path *)
|
||||
create_result_path((*cheapest_path)->parent,
|
||||
*cheapest_path,
|
||||
constant_quals);
|
||||
if (*sorted_path)
|
||||
*sorted_path = (Path *)
|
||||
create_result_path((*sorted_path)->parent,
|
||||
*sorted_path,
|
||||
constant_quals);
|
||||
cheapestpath = (Path *) create_result_path(final_rel,
|
||||
cheapestpath,
|
||||
constant_quals);
|
||||
if (sortedpath)
|
||||
sortedpath = (Path *) create_result_path(final_rel,
|
||||
sortedpath,
|
||||
constant_quals);
|
||||
}
|
||||
|
||||
*cheapest_path = cheapestpath;
|
||||
*sorted_path = sortedpath;
|
||||
}
|
||||
|
@@ -8,7 +8,7 @@
|
||||
*
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.129 2002/11/19 23:21:59 tgl Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/planner.c,v 1.130 2002/11/21 00:42:19 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
@@ -933,11 +933,13 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
List *sub_tlist;
|
||||
List *group_pathkeys;
|
||||
AttrNumber *groupColIdx = NULL;
|
||||
double sub_tuple_fraction;
|
||||
Path *cheapest_path;
|
||||
Path *sorted_path;
|
||||
double dNumGroups = 0;
|
||||
long numGroups = 0;
|
||||
int numAggs = 0;
|
||||
int numGroupCols = length(parse->groupClause);
|
||||
bool use_hashed_grouping = false;
|
||||
|
||||
/* Preprocess targetlist in case we are inside an INSERT/UPDATE. */
|
||||
@@ -1169,6 +1171,12 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* With grouping or aggregation, the tuple fraction to pass to
|
||||
* query_planner() may be different from what it is at top level.
|
||||
*/
|
||||
sub_tuple_fraction = tuple_fraction;
|
||||
|
||||
if (parse->groupClause)
|
||||
{
|
||||
/*
|
||||
@@ -1182,8 +1190,8 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
* amounts to assuming that all the groups are about the same
|
||||
* size).
|
||||
*/
|
||||
if (tuple_fraction >= 1.0)
|
||||
tuple_fraction = 0.25;
|
||||
if (sub_tuple_fraction >= 1.0)
|
||||
sub_tuple_fraction = 0.25;
|
||||
|
||||
/*
|
||||
* If both GROUP BY and ORDER BY are specified, we will need
|
||||
@@ -1195,7 +1203,7 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
if (parse->groupClause && parse->sortClause &&
|
||||
!noncanonical_pathkeys_contained_in(sort_pathkeys,
|
||||
group_pathkeys))
|
||||
tuple_fraction = 0.0;
|
||||
sub_tuple_fraction = 0.0;
|
||||
}
|
||||
else if (parse->hasAggs)
|
||||
{
|
||||
@@ -1203,7 +1211,7 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
* Ungrouped aggregate will certainly want all the input
|
||||
* tuples.
|
||||
*/
|
||||
tuple_fraction = 0.0;
|
||||
sub_tuple_fraction = 0.0;
|
||||
}
|
||||
else if (parse->distinctClause)
|
||||
{
|
||||
@@ -1212,15 +1220,15 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
* number of input tuples per output tuple. Handle the same
|
||||
* way.
|
||||
*/
|
||||
if (tuple_fraction >= 1.0)
|
||||
tuple_fraction = 0.25;
|
||||
if (sub_tuple_fraction >= 1.0)
|
||||
sub_tuple_fraction = 0.25;
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate the best unsorted and presorted paths for this Query
|
||||
* (but note there may not be any presorted path).
|
||||
*/
|
||||
query_planner(parse, sub_tlist, tuple_fraction,
|
||||
query_planner(parse, sub_tlist, sub_tuple_fraction,
|
||||
&cheapest_path, &sorted_path);
|
||||
|
||||
/*
|
||||
@@ -1236,11 +1244,13 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
if (parse->groupClause)
|
||||
{
|
||||
/*
|
||||
* Always estimate the number of groups.
|
||||
* Always estimate the number of groups. We can't do this until
|
||||
* after running query_planner(), either.
|
||||
*/
|
||||
dNumGroups = estimate_num_groups(parse,
|
||||
parse->groupClause,
|
||||
cheapest_path->parent->rows);
|
||||
/* Also want it as a long int --- but 'ware overflow! */
|
||||
numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
|
||||
|
||||
/*
|
||||
@@ -1248,9 +1258,11 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
* aggregates. (Doing so would imply storing *all* the input
|
||||
* values in the hash table, which seems like a certain loser.)
|
||||
*/
|
||||
if (parse->hasAggs &&
|
||||
(contain_distinct_agg_clause((Node *) tlist) ||
|
||||
contain_distinct_agg_clause(parse->havingQual)))
|
||||
if (!enable_hashagg)
|
||||
use_hashed_grouping = false;
|
||||
else if (parse->hasAggs &&
|
||||
(contain_distinct_agg_clause((Node *) tlist) ||
|
||||
contain_distinct_agg_clause(parse->havingQual)))
|
||||
use_hashed_grouping = false;
|
||||
else
|
||||
{
|
||||
@@ -1272,11 +1284,96 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
|
||||
if (hashentrysize * dNumGroups <= SortMem * 1024L)
|
||||
{
|
||||
/* much more to do here */
|
||||
#if 0
|
||||
/* TEMPORARY HOTWIRE FOR TESTING */
|
||||
use_hashed_grouping = true;
|
||||
#endif
|
||||
/*
|
||||
* Okay, do the cost comparison. We need to consider
|
||||
* cheapest_path + hashagg [+ final sort]
|
||||
* versus either
|
||||
* cheapest_path [+ sort] + group or agg [+ final sort]
|
||||
* or
|
||||
* presorted_path + group or agg [+ final sort]
|
||||
* where brackets indicate a step that may not be needed.
|
||||
* We assume query_planner() will have returned a
|
||||
* presorted path only if it's a winner compared to
|
||||
* cheapest_path for this purpose.
|
||||
*
|
||||
* These path variables are dummies that just hold cost
|
||||
* fields; we don't make actual Paths for these steps.
|
||||
*/
|
||||
Path hashed_p;
|
||||
Path sorted_p;
|
||||
|
||||
cost_agg(&hashed_p, parse,
|
||||
AGG_HASHED, numAggs,
|
||||
numGroupCols, dNumGroups,
|
||||
cheapest_path->startup_cost,
|
||||
cheapest_path->total_cost,
|
||||
cheapest_path->parent->rows);
|
||||
/* Result of hashed agg is always unsorted */
|
||||
if (sort_pathkeys)
|
||||
cost_sort(&hashed_p, parse, sort_pathkeys,
|
||||
hashed_p.total_cost,
|
||||
dNumGroups,
|
||||
cheapest_path->parent->width);
|
||||
|
||||
if (sorted_path)
|
||||
{
|
||||
sorted_p.startup_cost = sorted_path->startup_cost;
|
||||
sorted_p.total_cost = sorted_path->total_cost;
|
||||
current_pathkeys = sorted_path->pathkeys;
|
||||
}
|
||||
else
|
||||
{
|
||||
sorted_p.startup_cost = cheapest_path->startup_cost;
|
||||
sorted_p.total_cost = cheapest_path->total_cost;
|
||||
current_pathkeys = cheapest_path->pathkeys;
|
||||
}
|
||||
if (!pathkeys_contained_in(group_pathkeys,
|
||||
current_pathkeys))
|
||||
{
|
||||
cost_sort(&sorted_p, parse, group_pathkeys,
|
||||
sorted_p.total_cost,
|
||||
cheapest_path->parent->rows,
|
||||
cheapest_path->parent->width);
|
||||
current_pathkeys = group_pathkeys;
|
||||
}
|
||||
if (parse->hasAggs)
|
||||
cost_agg(&sorted_p, parse,
|
||||
AGG_SORTED, numAggs,
|
||||
numGroupCols, dNumGroups,
|
||||
sorted_p.startup_cost,
|
||||
sorted_p.total_cost,
|
||||
cheapest_path->parent->rows);
|
||||
else
|
||||
cost_group(&sorted_p, parse,
|
||||
numGroupCols, dNumGroups,
|
||||
sorted_p.startup_cost,
|
||||
sorted_p.total_cost,
|
||||
cheapest_path->parent->rows);
|
||||
/* The Agg or Group node will preserve ordering */
|
||||
if (sort_pathkeys &&
|
||||
!pathkeys_contained_in(sort_pathkeys,
|
||||
current_pathkeys))
|
||||
{
|
||||
cost_sort(&sorted_p, parse, sort_pathkeys,
|
||||
sorted_p.total_cost,
|
||||
dNumGroups,
|
||||
cheapest_path->parent->width);
|
||||
}
|
||||
|
||||
/*
|
||||
* Now make the decision using the top-level tuple
|
||||
* fraction. First we have to convert an absolute
|
||||
* count (LIMIT) into fractional form.
|
||||
*/
|
||||
if (tuple_fraction >= 1.0)
|
||||
tuple_fraction /= dNumGroups;
|
||||
|
||||
if (compare_fractional_path_costs(&hashed_p, &sorted_p,
|
||||
tuple_fraction) <= 0)
|
||||
{
|
||||
/* Hashed is cheaper, so use it */
|
||||
use_hashed_grouping = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1284,50 +1381,17 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
/*
|
||||
* Select the best path and create a plan to execute it.
|
||||
*
|
||||
* If no special sort order is wanted, or if the cheapest path is
|
||||
* already appropriately ordered, use the cheapest path.
|
||||
* Otherwise, look to see if we have an already-ordered path that is
|
||||
* cheaper than doing an explicit sort on the cheapest-total-cost
|
||||
* path.
|
||||
* If we are doing hashed grouping, we will always read all the
|
||||
* input tuples, so use the cheapest-total path. Otherwise,
|
||||
* trust query_planner's decision about which to use.
|
||||
*/
|
||||
if (parse->query_pathkeys == NIL ||
|
||||
pathkeys_contained_in(parse->query_pathkeys,
|
||||
cheapest_path->pathkeys))
|
||||
if (sorted_path && !use_hashed_grouping)
|
||||
{
|
||||
result_plan = create_plan(parse, cheapest_path);
|
||||
current_pathkeys = cheapest_path->pathkeys;
|
||||
}
|
||||
else if (sorted_path)
|
||||
{
|
||||
Path sort_path; /* dummy for result of cost_sort */
|
||||
|
||||
cost_sort(&sort_path, parse, parse->query_pathkeys,
|
||||
sorted_path->parent->rows, sorted_path->parent->width);
|
||||
sort_path.startup_cost += cheapest_path->total_cost;
|
||||
sort_path.total_cost += cheapest_path->total_cost;
|
||||
/* Convert absolute-count tuple_fraction into a fraction */
|
||||
if (tuple_fraction >= 1.0)
|
||||
tuple_fraction /= sorted_path->parent->rows;
|
||||
if (compare_fractional_path_costs(sorted_path, &sort_path,
|
||||
tuple_fraction) <= 0)
|
||||
{
|
||||
/* Presorted path is cheaper, use it */
|
||||
result_plan = create_plan(parse, sorted_path);
|
||||
current_pathkeys = sorted_path->pathkeys;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* otherwise, doing it the hard way is still cheaper */
|
||||
result_plan = create_plan(parse, cheapest_path);
|
||||
current_pathkeys = cheapest_path->pathkeys;
|
||||
}
|
||||
result_plan = create_plan(parse, sorted_path);
|
||||
current_pathkeys = sorted_path->pathkeys;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
* No sorted path, so we must use the cheapest-total path.
|
||||
* The actual sort step will be generated below.
|
||||
*/
|
||||
result_plan = create_plan(parse, cheapest_path);
|
||||
current_pathkeys = cheapest_path->pathkeys;
|
||||
}
|
||||
@@ -1362,10 +1426,11 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
if (use_hashed_grouping)
|
||||
{
|
||||
/* Hashed aggregate plan --- no sort needed */
|
||||
result_plan = (Plan *) make_agg(tlist,
|
||||
result_plan = (Plan *) make_agg(parse,
|
||||
tlist,
|
||||
(List *) parse->havingQual,
|
||||
AGG_HASHED,
|
||||
length(parse->groupClause),
|
||||
numGroupCols,
|
||||
groupColIdx,
|
||||
numGroups,
|
||||
numAggs,
|
||||
@@ -1401,10 +1466,11 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
current_pathkeys = NIL;
|
||||
}
|
||||
|
||||
result_plan = (Plan *) make_agg(tlist,
|
||||
result_plan = (Plan *) make_agg(parse,
|
||||
tlist,
|
||||
(List *) parse->havingQual,
|
||||
aggstrategy,
|
||||
length(parse->groupClause),
|
||||
numGroupCols,
|
||||
groupColIdx,
|
||||
numGroups,
|
||||
numAggs,
|
||||
@@ -1436,11 +1502,13 @@ grouping_planner(Query *parse, double tuple_fraction)
|
||||
current_pathkeys = group_pathkeys;
|
||||
}
|
||||
|
||||
result_plan = (Plan *) make_group(tlist,
|
||||
length(parse->groupClause),
|
||||
result_plan = (Plan *) make_group(parse,
|
||||
tlist,
|
||||
numGroupCols,
|
||||
groupColIdx,
|
||||
dNumGroups,
|
||||
result_plan);
|
||||
/* The Group node won't change sort ordering */
|
||||
}
|
||||
}
|
||||
} /* end of if (setOperations) */
|
||||
|
@@ -5,7 +5,7 @@
|
||||
* command, configuration file, and command line options.
|
||||
* See src/backend/utils/misc/README for more information.
|
||||
*
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.106 2002/11/15 02:44:57 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/utils/misc/guc.c,v 1.107 2002/11/21 00:42:19 tgl Exp $
|
||||
*
|
||||
* Copyright 2000 by PostgreSQL Global Development Group
|
||||
* Written by Peter Eisentraut <peter_e@gmx.net>.
|
||||
@@ -323,6 +323,10 @@ static struct config_bool
|
||||
{"enable_sort", PGC_USERSET}, &enable_sort,
|
||||
true, NULL, NULL
|
||||
},
|
||||
{
|
||||
{"enable_hashagg", PGC_USERSET}, &enable_hashagg,
|
||||
true, NULL, NULL
|
||||
},
|
||||
{
|
||||
{"enable_nestloop", PGC_USERSET}, &enable_nestloop,
|
||||
true, NULL, NULL
|
||||
|
@@ -83,6 +83,7 @@
|
||||
#enable_indexscan = true
|
||||
#enable_tidscan = true
|
||||
#enable_sort = true
|
||||
#enable_hashagg = true
|
||||
#enable_nestloop = true
|
||||
#enable_mergejoin = true
|
||||
#enable_hashjoin = true
|
||||
|
Reference in New Issue
Block a user