mirror of
https://github.com/postgres/postgres.git
synced 2025-04-21 12:05:57 +03:00
Make GROUP BY work properly for datatypes that only support hashing and not
sorting. The infrastructure for this was all in place already; it's only necessary to fix the planner to not assume that sorting is always an available option.
This commit is contained in:
parent
82a1f09953
commit
ec73b56a31
@ -14,7 +14,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.107 2008/07/31 22:47:56 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planmain.c,v 1.108 2008/08/03 19:10:52 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -288,8 +288,7 @@ query_planner(PlannerInfo *root, List *tlist,
|
|||||||
* levels of sort --- and, therefore, certainly need to read all the
|
* levels of sort --- and, therefore, certainly need to read all the
|
||||||
* tuples --- unless ORDER BY is a subset of GROUP BY.
|
* tuples --- unless ORDER BY is a subset of GROUP BY.
|
||||||
*/
|
*/
|
||||||
if (root->group_pathkeys && root->sort_pathkeys &&
|
if (!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
|
||||||
!pathkeys_contained_in(root->sort_pathkeys, root->group_pathkeys))
|
|
||||||
tuple_fraction = 0.0;
|
tuple_fraction = 0.0;
|
||||||
}
|
}
|
||||||
else if (parse->hasAggs || root->hasHavingQual)
|
else if (parse->hasAggs || root->hasHavingQual)
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.236 2008/08/02 21:32:00 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.237 2008/08/03 19:10:52 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -69,11 +69,12 @@ static double preprocess_limit(PlannerInfo *root,
|
|||||||
int64 *offset_est, int64 *count_est);
|
int64 *offset_est, int64 *count_est);
|
||||||
static void preprocess_groupclause(PlannerInfo *root);
|
static void preprocess_groupclause(PlannerInfo *root);
|
||||||
static Oid *extract_grouping_ops(List *groupClause);
|
static Oid *extract_grouping_ops(List *groupClause);
|
||||||
|
static bool grouping_is_sortable(List *groupClause);
|
||||||
|
static bool grouping_is_hashable(List *groupClause);
|
||||||
static bool choose_hashed_grouping(PlannerInfo *root,
|
static bool choose_hashed_grouping(PlannerInfo *root,
|
||||||
double tuple_fraction, double limit_tuples,
|
double tuple_fraction, double limit_tuples,
|
||||||
Path *cheapest_path, Path *sorted_path,
|
Path *cheapest_path, Path *sorted_path,
|
||||||
Oid *groupOperators, double dNumGroups,
|
double dNumGroups, AggClauseCounts *agg_counts);
|
||||||
AggClauseCounts *agg_counts);
|
|
||||||
static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
|
static List *make_subplanTargetList(PlannerInfo *root, List *tlist,
|
||||||
AttrNumber **groupColIdx, bool *need_tlist_eval);
|
AttrNumber **groupColIdx, bool *need_tlist_eval);
|
||||||
static void locate_grouping_columns(PlannerInfo *root,
|
static void locate_grouping_columns(PlannerInfo *root,
|
||||||
@ -839,7 +840,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
|||||||
List *sub_tlist;
|
List *sub_tlist;
|
||||||
List *group_pathkeys;
|
List *group_pathkeys;
|
||||||
AttrNumber *groupColIdx = NULL;
|
AttrNumber *groupColIdx = NULL;
|
||||||
Oid *groupOperators = NULL;
|
|
||||||
bool need_tlist_eval = true;
|
bool need_tlist_eval = true;
|
||||||
QualCost tlist_cost;
|
QualCost tlist_cost;
|
||||||
Path *cheapest_path;
|
Path *cheapest_path;
|
||||||
@ -877,11 +877,15 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
|||||||
* DISTINCT and ORDER BY requirements. This should be changed
|
* DISTINCT and ORDER BY requirements. This should be changed
|
||||||
* someday, but DISTINCT ON is a bit of a problem ...
|
* someday, but DISTINCT ON is a bit of a problem ...
|
||||||
*/
|
*/
|
||||||
|
if (parse->groupClause && grouping_is_sortable(parse->groupClause))
|
||||||
root->group_pathkeys =
|
root->group_pathkeys =
|
||||||
make_pathkeys_for_sortclauses(root,
|
make_pathkeys_for_sortclauses(root,
|
||||||
parse->groupClause,
|
parse->groupClause,
|
||||||
tlist,
|
tlist,
|
||||||
false);
|
false);
|
||||||
|
else
|
||||||
|
root->group_pathkeys = NIL;
|
||||||
|
|
||||||
if (list_length(parse->distinctClause) > list_length(parse->sortClause))
|
if (list_length(parse->distinctClause) > list_length(parse->sortClause))
|
||||||
root->sort_pathkeys =
|
root->sort_pathkeys =
|
||||||
make_pathkeys_for_sortclauses(root,
|
make_pathkeys_for_sortclauses(root,
|
||||||
@ -915,12 +919,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
|||||||
/*
|
/*
|
||||||
* Figure out whether we need a sorted result from query_planner.
|
* Figure out whether we need a sorted result from query_planner.
|
||||||
*
|
*
|
||||||
* If we have a GROUP BY clause, then we want a result sorted properly
|
* If we have a sortable GROUP BY clause, then we want a result sorted
|
||||||
* for grouping. Otherwise, if there is an ORDER BY clause, we want
|
* properly for grouping. Otherwise, if there is an ORDER BY clause,
|
||||||
* to sort by the ORDER BY clause. (Note: if we have both, and ORDER
|
* we want to sort by the ORDER BY clause. (Note: if we have both, and
|
||||||
* BY is a superset of GROUP BY, it would be tempting to request sort
|
* ORDER BY is a superset of GROUP BY, it would be tempting to request
|
||||||
* by ORDER BY --- but that might just leave us failing to exploit an
|
* sort by ORDER BY --- but that might just leave us failing to
|
||||||
* available sort order at all. Needs more thought...)
|
* exploit an available sort order at all. Needs more thought...)
|
||||||
*/
|
*/
|
||||||
if (root->group_pathkeys)
|
if (root->group_pathkeys)
|
||||||
root->query_pathkeys = root->group_pathkeys;
|
root->query_pathkeys = root->group_pathkeys;
|
||||||
@ -942,17 +946,39 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
|||||||
sort_pathkeys = root->sort_pathkeys;
|
sort_pathkeys = root->sort_pathkeys;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If grouping, extract the grouping operators and decide whether we
|
* If grouping, decide whether to use sorted or hashed grouping.
|
||||||
* want to use hashed grouping.
|
|
||||||
*/
|
*/
|
||||||
if (parse->groupClause)
|
if (parse->groupClause)
|
||||||
{
|
{
|
||||||
groupOperators = extract_grouping_ops(parse->groupClause);
|
bool can_hash;
|
||||||
|
bool can_sort;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Executor doesn't support hashed aggregation with DISTINCT
|
||||||
|
* aggregates. (Doing so would imply storing *all* the input
|
||||||
|
* values in the hash table, which seems like a certain loser.)
|
||||||
|
*/
|
||||||
|
can_hash = (agg_counts.numDistinctAggs == 0 &&
|
||||||
|
grouping_is_hashable(parse->groupClause));
|
||||||
|
can_sort = grouping_is_sortable(parse->groupClause);
|
||||||
|
if (can_hash && can_sort)
|
||||||
|
{
|
||||||
|
/* we have a meaningful choice to make ... */
|
||||||
use_hashed_grouping =
|
use_hashed_grouping =
|
||||||
choose_hashed_grouping(root, tuple_fraction, limit_tuples,
|
choose_hashed_grouping(root,
|
||||||
|
tuple_fraction, limit_tuples,
|
||||||
cheapest_path, sorted_path,
|
cheapest_path, sorted_path,
|
||||||
groupOperators, dNumGroups,
|
dNumGroups, &agg_counts);
|
||||||
&agg_counts);
|
}
|
||||||
|
else if (can_hash)
|
||||||
|
use_hashed_grouping = true;
|
||||||
|
else if (can_sort)
|
||||||
|
use_hashed_grouping = false;
|
||||||
|
else
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
|
||||||
|
errmsg("could not implement GROUP BY"),
|
||||||
|
errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
|
||||||
|
|
||||||
/* Also convert # groups to long int --- but 'ware overflow! */
|
/* Also convert # groups to long int --- but 'ware overflow! */
|
||||||
numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
|
numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
|
||||||
@ -1088,7 +1114,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
|||||||
AGG_HASHED,
|
AGG_HASHED,
|
||||||
numGroupCols,
|
numGroupCols,
|
||||||
groupColIdx,
|
groupColIdx,
|
||||||
groupOperators,
|
extract_grouping_ops(parse->groupClause),
|
||||||
numGroups,
|
numGroups,
|
||||||
agg_counts.numAggs,
|
agg_counts.numAggs,
|
||||||
result_plan);
|
result_plan);
|
||||||
@ -1131,7 +1157,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
|||||||
aggstrategy,
|
aggstrategy,
|
||||||
numGroupCols,
|
numGroupCols,
|
||||||
groupColIdx,
|
groupColIdx,
|
||||||
groupOperators,
|
extract_grouping_ops(parse->groupClause),
|
||||||
numGroups,
|
numGroups,
|
||||||
agg_counts.numAggs,
|
agg_counts.numAggs,
|
||||||
result_plan);
|
result_plan);
|
||||||
@ -1160,7 +1186,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
|
|||||||
(List *) parse->havingQual,
|
(List *) parse->havingQual,
|
||||||
numGroupCols,
|
numGroupCols,
|
||||||
groupColIdx,
|
groupColIdx,
|
||||||
groupOperators,
|
extract_grouping_ops(parse->groupClause),
|
||||||
dNumGroups,
|
dNumGroups,
|
||||||
result_plan);
|
result_plan);
|
||||||
/* The Group node won't change sort ordering */
|
/* The Group node won't change sort ordering */
|
||||||
@ -1495,6 +1521,9 @@ preprocess_limit(PlannerInfo *root, double tuple_fraction,
|
|||||||
* GROUP BY elements, which could match the sort ordering of other
|
* GROUP BY elements, which could match the sort ordering of other
|
||||||
* possible plans (eg an indexscan) and thereby reduce cost. We don't
|
* possible plans (eg an indexscan) and thereby reduce cost. We don't
|
||||||
* bother with that, though. Hashed grouping will frequently win anyway.
|
* bother with that, though. Hashed grouping will frequently win anyway.
|
||||||
|
*
|
||||||
|
* Note: we need no comparable processing of the distinctClause because
|
||||||
|
* the parser already enforced that that matches ORDER BY.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
preprocess_groupclause(PlannerInfo *root)
|
preprocess_groupclause(PlannerInfo *root)
|
||||||
@ -1505,7 +1534,7 @@ preprocess_groupclause(PlannerInfo *root)
|
|||||||
ListCell *sl;
|
ListCell *sl;
|
||||||
ListCell *gl;
|
ListCell *gl;
|
||||||
|
|
||||||
/* If no ORDER BY, nothing useful to do here anyway */
|
/* If no ORDER BY, nothing useful to do here */
|
||||||
if (parse->sortClause == NIL)
|
if (parse->sortClause == NIL)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -1546,7 +1575,8 @@ preprocess_groupclause(PlannerInfo *root)
|
|||||||
* were able to make a complete match. In other words, we only
|
* were able to make a complete match. In other words, we only
|
||||||
* rearrange the GROUP BY list if the result is that one list is a
|
* rearrange the GROUP BY list if the result is that one list is a
|
||||||
* prefix of the other --- otherwise there's no possibility of a
|
* prefix of the other --- otherwise there's no possibility of a
|
||||||
* common sort.
|
* common sort. Also, give up if there are any non-sortable GROUP BY
|
||||||
|
* items, since then there's no hope anyway.
|
||||||
*/
|
*/
|
||||||
foreach(gl, parse->groupClause)
|
foreach(gl, parse->groupClause)
|
||||||
{
|
{
|
||||||
@ -1556,6 +1586,8 @@ preprocess_groupclause(PlannerInfo *root)
|
|||||||
continue; /* it matched an ORDER BY item */
|
continue; /* it matched an ORDER BY item */
|
||||||
if (partial_match)
|
if (partial_match)
|
||||||
return; /* give up, no common sort possible */
|
return; /* give up, no common sort possible */
|
||||||
|
if (!OidIsValid(gc->sortop))
|
||||||
|
return; /* give up, GROUP BY can't be sorted */
|
||||||
new_groupclause = lappend(new_groupclause, gc);
|
new_groupclause = lappend(new_groupclause, gc);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1566,7 +1598,7 @@ preprocess_groupclause(PlannerInfo *root)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* extract_grouping_ops - make an array of the equality operator OIDs
|
* extract_grouping_ops - make an array of the equality operator OIDs
|
||||||
* for the GROUP BY clause
|
* for a SortGroupClause list
|
||||||
*/
|
*/
|
||||||
static Oid *
|
static Oid *
|
||||||
extract_grouping_ops(List *groupClause)
|
extract_grouping_ops(List *groupClause)
|
||||||
@ -1590,15 +1622,59 @@ extract_grouping_ops(List *groupClause)
|
|||||||
return groupOperators;
|
return groupOperators;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* grouping_is_sortable - is it possible to implement grouping list by sorting?
|
||||||
|
*
|
||||||
|
* This is easy since the parser will have included a sortop if one exists.
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
grouping_is_sortable(List *groupClause)
|
||||||
|
{
|
||||||
|
ListCell *glitem;
|
||||||
|
|
||||||
|
foreach(glitem, groupClause)
|
||||||
|
{
|
||||||
|
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
|
||||||
|
|
||||||
|
if (!OidIsValid(groupcl->sortop))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* grouping_is_hashable - is it possible to implement grouping list by hashing?
|
||||||
|
*
|
||||||
|
* We assume hashing is OK if the equality operators are marked oprcanhash.
|
||||||
|
* (If there isn't actually a supporting hash function, the executor will
|
||||||
|
* complain at runtime; but this is a misdeclaration of the operator, not
|
||||||
|
* a system bug.)
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
grouping_is_hashable(List *groupClause)
|
||||||
|
{
|
||||||
|
ListCell *glitem;
|
||||||
|
|
||||||
|
foreach(glitem, groupClause)
|
||||||
|
{
|
||||||
|
SortGroupClause *groupcl = (SortGroupClause *) lfirst(glitem);
|
||||||
|
|
||||||
|
if (!op_hashjoinable(groupcl->eqop))
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* choose_hashed_grouping - should we use hashed grouping?
|
* choose_hashed_grouping - should we use hashed grouping?
|
||||||
|
*
|
||||||
|
* Note: this is only applied when both alternatives are actually feasible.
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
choose_hashed_grouping(PlannerInfo *root,
|
choose_hashed_grouping(PlannerInfo *root,
|
||||||
double tuple_fraction, double limit_tuples,
|
double tuple_fraction, double limit_tuples,
|
||||||
Path *cheapest_path, Path *sorted_path,
|
Path *cheapest_path, Path *sorted_path,
|
||||||
Oid *groupOperators, double dNumGroups,
|
double dNumGroups, AggClauseCounts *agg_counts)
|
||||||
AggClauseCounts *agg_counts)
|
|
||||||
{
|
{
|
||||||
int numGroupCols = list_length(root->parse->groupClause);
|
int numGroupCols = list_length(root->parse->groupClause);
|
||||||
double cheapest_path_rows;
|
double cheapest_path_rows;
|
||||||
@ -1607,27 +1683,10 @@ choose_hashed_grouping(PlannerInfo *root,
|
|||||||
List *current_pathkeys;
|
List *current_pathkeys;
|
||||||
Path hashed_p;
|
Path hashed_p;
|
||||||
Path sorted_p;
|
Path sorted_p;
|
||||||
int i;
|
|
||||||
|
|
||||||
/*
|
/* Prefer sorting when enable_hashagg is off */
|
||||||
* Check can't-do-it conditions, including whether the grouping operators
|
|
||||||
* are hashjoinable. (We assume hashing is OK if they are marked
|
|
||||||
* oprcanhash. If there isn't actually a supporting hash function, the
|
|
||||||
* executor will complain at runtime.)
|
|
||||||
*
|
|
||||||
* Executor doesn't support hashed aggregation with DISTINCT aggregates.
|
|
||||||
* (Doing so would imply storing *all* the input values in the hash table,
|
|
||||||
* which seems like a certain loser.)
|
|
||||||
*/
|
|
||||||
if (!enable_hashagg)
|
if (!enable_hashagg)
|
||||||
return false;
|
return false;
|
||||||
if (agg_counts->numDistinctAggs != 0)
|
|
||||||
return false;
|
|
||||||
for (i = 0; i < numGroupCols; i++)
|
|
||||||
{
|
|
||||||
if (!op_hashjoinable(groupOperators[i]))
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Don't do it if it doesn't look like the hashtable will fit into
|
* Don't do it if it doesn't look like the hashtable will fit into
|
||||||
|
@ -8,7 +8,7 @@
|
|||||||
*
|
*
|
||||||
*
|
*
|
||||||
* IDENTIFICATION
|
* IDENTIFICATION
|
||||||
* $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.172 2008/08/02 21:32:00 tgl Exp $
|
* $PostgreSQL: pgsql/src/backend/parser/parse_clause.c,v 1.173 2008/08/03 19:10:52 tgl Exp $
|
||||||
*
|
*
|
||||||
*-------------------------------------------------------------------------
|
*-------------------------------------------------------------------------
|
||||||
*/
|
*/
|
||||||
@ -1351,15 +1351,11 @@ transformGroupClause(ParseState *pstate, List *grouplist,
|
|||||||
/*
|
/*
|
||||||
* If no match in ORDER BY, just add it to the result using
|
* If no match in ORDER BY, just add it to the result using
|
||||||
* default sort/group semantics.
|
* default sort/group semantics.
|
||||||
*
|
|
||||||
* XXX for now, the planner requires groupClause to be sortable,
|
|
||||||
* so we have to insist on that here.
|
|
||||||
*/
|
*/
|
||||||
if (!found)
|
if (!found)
|
||||||
result = addTargetToGroupList(pstate, tle,
|
result = addTargetToGroupList(pstate, tle,
|
||||||
result, *targetlist,
|
result, *targetlist,
|
||||||
true, /* XXX for now */
|
false, true);
|
||||||
true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user