mirror of
https://github.com/postgres/postgres.git
synced 2025-07-14 08:21:07 +03:00
Remove GROUP BY columns that are functionally dependent on other columns.
If a GROUP BY clause includes all columns of a non-deferred primary key, as well as other columns of the same relation, those other columns are redundant and can be dropped from the grouping; the pkey is enough to ensure that each row of the table corresponds to a separate group. Getting rid of the excess columns will reduce the cost of the sorting or hashing needed to implement GROUP BY, and can indeed remove the need for a sort step altogether. This seems worth testing for since many query authors are not aware of the GROUP-BY-primary-key exception to the rule about queries not being allowed to reference non-grouped-by columns in their targetlists or HAVING clauses. Thus, redundant GROUP BY items are not uncommon. Also, we can make the test pretty cheap in most queries where it won't help by not looking up a rel's primary key until we've found that at least two of its columns are in GROUP BY. David Rowley, reviewed by Julien Rouhaud
This commit is contained in:
@ -20,7 +20,9 @@
|
||||
|
||||
#include "access/htup_details.h"
|
||||
#include "access/parallel.h"
|
||||
#include "access/sysattr.h"
|
||||
#include "access/xact.h"
|
||||
#include "catalog/pg_constraint_fn.h"
|
||||
#include "executor/executor.h"
|
||||
#include "executor/nodeAgg.h"
|
||||
#include "foreign/fdwapi.h"
|
||||
@ -89,6 +91,7 @@ static double preprocess_limit(PlannerInfo *root,
|
||||
double tuple_fraction,
|
||||
int64 *offset_est, int64 *count_est);
|
||||
static bool limit_needed(Query *parse);
|
||||
static void remove_useless_groupby_columns(PlannerInfo *root);
|
||||
static List *preprocess_groupclause(PlannerInfo *root, List *force);
|
||||
static List *extract_rollup_sets(List *groupingSets);
|
||||
static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
|
||||
@ -710,6 +713,9 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
|
||||
}
|
||||
parse->havingQual = (Node *) newHaving;
|
||||
|
||||
/* Remove any redundant GROUP BY columns */
|
||||
remove_useless_groupby_columns(root);
|
||||
|
||||
/*
|
||||
* If we have any outer joins, try to reduce them to plain inner joins.
|
||||
* This step is most easily done after we've done expression
|
||||
@ -3207,6 +3213,159 @@ limit_needed(Query *parse)
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* remove_useless_groupby_columns
|
||||
* Remove any columns in the GROUP BY clause that are redundant due to
|
||||
* being functionally dependent on other GROUP BY columns.
|
||||
*
|
||||
* Since some other DBMSes do not allow references to ungrouped columns, it's
|
||||
* not unusual to find all columns listed in GROUP BY even though listing the
|
||||
* primary-key columns would be sufficient. Deleting such excess columns
|
||||
* avoids redundant sorting work, so it's worth doing. When we do this, we
|
||||
* must mark the plan as dependent on the pkey constraint (compare the
|
||||
* parser's check_ungrouped_columns() and check_functional_grouping()).
|
||||
*
|
||||
* In principle, we could treat any NOT-NULL columns appearing in a UNIQUE
|
||||
* index as the determining columns. But as with check_functional_grouping(),
|
||||
* there's currently no way to represent dependency on a NOT NULL constraint,
|
||||
* so we consider only the pkey for now.
|
||||
*/
|
||||
static void
|
||||
remove_useless_groupby_columns(PlannerInfo *root)
|
||||
{
|
||||
Query *parse = root->parse;
|
||||
Bitmapset **groupbyattnos;
|
||||
Bitmapset **surplusvars;
|
||||
ListCell *lc;
|
||||
int relid;
|
||||
|
||||
/* No chance to do anything if there are less than two GROUP BY items */
|
||||
if (list_length(parse->groupClause) < 2)
|
||||
return;
|
||||
|
||||
/* Don't fiddle with the GROUP BY clause if the query has grouping sets */
|
||||
if (parse->groupingSets)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
|
||||
* Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
|
||||
* that are GROUP BY items.
|
||||
*/
|
||||
groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
|
||||
(list_length(parse->rtable) + 1));
|
||||
foreach(lc, parse->groupClause)
|
||||
{
|
||||
SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
|
||||
TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
|
||||
Var *var = (Var *) tle->expr;
|
||||
|
||||
/*
|
||||
* Ignore non-Vars and Vars from other query levels.
|
||||
*
|
||||
* XXX in principle, stable expressions containing Vars could also be
|
||||
* removed, if all the Vars are functionally dependent on other GROUP
|
||||
* BY items. But it's not clear that such cases occur often enough to
|
||||
* be worth troubling over.
|
||||
*/
|
||||
if (!IsA(var, Var) ||
|
||||
var->varlevelsup > 0)
|
||||
continue;
|
||||
|
||||
/* OK, remember we have this Var */
|
||||
relid = var->varno;
|
||||
Assert(relid <= list_length(parse->rtable));
|
||||
groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
|
||||
var->varattno - FirstLowInvalidHeapAttributeNumber);
|
||||
}
|
||||
|
||||
/*
|
||||
* Consider each relation and see if it is possible to remove some of its
|
||||
* Vars from GROUP BY. For simplicity and speed, we do the actual removal
|
||||
* in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
|
||||
* of the column attnos of RTE k that are removable GROUP BY items.
|
||||
*/
|
||||
surplusvars = NULL; /* don't allocate array unless required */
|
||||
relid = 0;
|
||||
foreach(lc, parse->rtable)
|
||||
{
|
||||
RangeTblEntry *rte = (RangeTblEntry *) lfirst(lc);
|
||||
Bitmapset *relattnos;
|
||||
Bitmapset *pkattnos;
|
||||
Oid constraintOid;
|
||||
|
||||
relid++;
|
||||
|
||||
/* Only plain relations could have primary-key constraints */
|
||||
if (rte->rtekind != RTE_RELATION)
|
||||
continue;
|
||||
|
||||
/* Nothing to do unless this rel has multiple Vars in GROUP BY */
|
||||
relattnos = groupbyattnos[relid];
|
||||
if (bms_membership(relattnos) != BMS_MULTIPLE)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Can't remove any columns for this rel if there is no suitable
|
||||
* (i.e., nondeferrable) primary key constraint.
|
||||
*/
|
||||
pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
|
||||
if (pkattnos == NULL)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If the primary key is a proper subset of relattnos then we have
|
||||
* some items in the GROUP BY that can be removed.
|
||||
*/
|
||||
if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
|
||||
{
|
||||
/*
|
||||
* To easily remember whether we've found anything to do, we don't
|
||||
* allocate the surplusvars[] array until we find something.
|
||||
*/
|
||||
if (surplusvars == NULL)
|
||||
surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
|
||||
(list_length(parse->rtable) + 1));
|
||||
|
||||
/* Remember the attnos of the removable columns */
|
||||
surplusvars[relid] = bms_difference(relattnos, pkattnos);
|
||||
|
||||
/* Also, mark the resulting plan as dependent on this constraint */
|
||||
parse->constraintDeps = lappend_oid(parse->constraintDeps,
|
||||
constraintOid);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we found any surplus Vars, build a new GROUP BY clause without them.
|
||||
* (Note: this may leave some TLEs with unreferenced ressortgroupref
|
||||
* markings, but that's harmless.)
|
||||
*/
|
||||
if (surplusvars != NULL)
|
||||
{
|
||||
List *new_groupby = NIL;
|
||||
|
||||
foreach(lc, parse->groupClause)
|
||||
{
|
||||
SortGroupClause *sgc = (SortGroupClause *) lfirst(lc);
|
||||
TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
|
||||
Var *var = (Var *) tle->expr;
|
||||
|
||||
/*
|
||||
* New list must include non-Vars, outer Vars, and anything not
|
||||
* marked as surplus.
|
||||
*/
|
||||
if (!IsA(var, Var) ||
|
||||
var->varlevelsup > 0 ||
|
||||
!bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
|
||||
surplusvars[var->varno]))
|
||||
new_groupby = lappend(new_groupby, sgc);
|
||||
}
|
||||
|
||||
parse->groupClause = new_groupby;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* preprocess_groupclause - do preparatory work on GROUP BY clause
|
||||
*
|
||||
|
Reference in New Issue
Block a user