mirror of
https://github.com/postgres/postgres.git
synced 2025-06-03 01:21:48 +03:00
Defer remove_useless_groupby_columns() work until query_planner()
Traditionally, remove_useless_groupby_columns() was called during grouping_planner() directly after the call to preprocess_groupclause(). While in many ways, it made sense to populate the field and remove the functionally dependent columns from processed_groupClause at the same time, it's just that doing so had the disadvantage that remove_useless_groupby_columns() was being called before the RelOptInfos were populated for the relations mentioned in the query. Not having RelOptInfos available meant we needed to manually query the catalog tables to get the required details about the primary key constraint for the table. Here we move the remove_useless_groupby_columns() call to query_planner() and put it directly after the RelOptInfos are populated. This is fine to do as processed_groupClause still isn't final at this point as it can still be modified inside standard_qp_callback() by make_pathkeys_for_sortclauses_extended(). This commit is just a refactor and simply moves remove_useless_groupby_columns() into initsplan.c. A planned follow-up commit will adjust that function so it uses RelOptInfo instead of doing catalog lookups and also teach it how to use unique indexes as proofs to expand the cases where we can remove functionally dependent columns from the GROUP BY. Reviewed-by: Andrei Lepikhov, jian he Discussion: https://postgr.es/m/CAApHDvqLezKwoEBBQd0dp4Y9MDkFBDbny0f3SzEeqOFoU7Z5+A@mail.gmail.com
This commit is contained in:
parent
78c5e141e9
commit
430a5952de
@ -1,7 +1,7 @@
|
||||
/*-------------------------------------------------------------------------
|
||||
*
|
||||
* initsplan.c
|
||||
* Target list, qualification, joininfo initialization routines
|
||||
* Target list, group by, qualification, joininfo initialization routines
|
||||
*
|
||||
* Portions Copyright (c) 1996-2024, PostgreSQL Global Development Group
|
||||
* Portions Copyright (c) 1994, Regents of the University of California
|
||||
@ -14,6 +14,7 @@
|
||||
*/
|
||||
#include "postgres.h"
|
||||
|
||||
#include "catalog/pg_constraint.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "nodes/makefuncs.h"
|
||||
#include "nodes/nodeFuncs.h"
|
||||
@ -386,6 +387,170 @@ add_vars_to_attr_needed(PlannerInfo *root, List *vars,
|
||||
}
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
*
|
||||
* GROUP BY
|
||||
*
|
||||
*****************************************************************************/
|
||||
|
||||
/*
|
||||
* remove_useless_groupby_columns
|
||||
* Remove any columns in the GROUP BY clause that are redundant due to
|
||||
* being functionally dependent on other GROUP BY columns.
|
||||
*
|
||||
* Since some other DBMSes do not allow references to ungrouped columns, it's
|
||||
* not unusual to find all columns listed in GROUP BY even though listing the
|
||||
* primary-key columns would be sufficient. Deleting such excess columns
|
||||
* avoids redundant sorting work, so it's worth doing.
|
||||
*
|
||||
* Relcache invalidations will ensure that cached plans become invalidated
|
||||
* when the underlying index of the pkey constraint is dropped.
|
||||
*
|
||||
* Currently, we only make use of pkey constraints for this, however, we may
|
||||
* wish to take this further in the future and also use unique constraints
|
||||
* which have NOT NULL columns. In that case, plan invalidation will still
|
||||
* work since relations will receive a relcache invalidation when a NOT NULL
|
||||
* constraint is dropped.
|
||||
*/
|
||||
void
|
||||
remove_useless_groupby_columns(PlannerInfo *root)
|
||||
{
|
||||
Query *parse = root->parse;
|
||||
Bitmapset **groupbyattnos;
|
||||
Bitmapset **surplusvars;
|
||||
ListCell *lc;
|
||||
int relid;
|
||||
|
||||
/* No chance to do anything if there are less than two GROUP BY items */
|
||||
if (list_length(root->processed_groupClause) < 2)
|
||||
return;
|
||||
|
||||
/* Don't fiddle with the GROUP BY clause if the query has grouping sets */
|
||||
if (parse->groupingSets)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
|
||||
* Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
|
||||
* that are GROUP BY items.
|
||||
*/
|
||||
groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
|
||||
(list_length(parse->rtable) + 1));
|
||||
foreach(lc, root->processed_groupClause)
|
||||
{
|
||||
SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
|
||||
TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
|
||||
Var *var = (Var *) tle->expr;
|
||||
|
||||
/*
|
||||
* Ignore non-Vars and Vars from other query levels.
|
||||
*
|
||||
* XXX in principle, stable expressions containing Vars could also be
|
||||
* removed, if all the Vars are functionally dependent on other GROUP
|
||||
* BY items. But it's not clear that such cases occur often enough to
|
||||
* be worth troubling over.
|
||||
*/
|
||||
if (!IsA(var, Var) ||
|
||||
var->varlevelsup > 0)
|
||||
continue;
|
||||
|
||||
/* OK, remember we have this Var */
|
||||
relid = var->varno;
|
||||
Assert(relid <= list_length(parse->rtable));
|
||||
groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
|
||||
var->varattno - FirstLowInvalidHeapAttributeNumber);
|
||||
}
|
||||
|
||||
/*
|
||||
* Consider each relation and see if it is possible to remove some of its
|
||||
* Vars from GROUP BY. For simplicity and speed, we do the actual removal
|
||||
* in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
|
||||
* of the column attnos of RTE k that are removable GROUP BY items.
|
||||
*/
|
||||
surplusvars = NULL; /* don't allocate array unless required */
|
||||
relid = 0;
|
||||
foreach(lc, parse->rtable)
|
||||
{
|
||||
RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc);
|
||||
Bitmapset *relattnos;
|
||||
Bitmapset *pkattnos;
|
||||
Oid constraintOid;
|
||||
|
||||
relid++;
|
||||
|
||||
/* Only plain relations could have primary-key constraints */
|
||||
if (rte->rtekind != RTE_RELATION)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We must skip inheritance parent tables as some of the child rels
|
||||
* may cause duplicate rows. This cannot happen with partitioned
|
||||
* tables, however.
|
||||
*/
|
||||
if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
|
||||
continue;
|
||||
|
||||
/* Nothing to do unless this rel has multiple Vars in GROUP BY */
|
||||
relattnos = groupbyattnos[relid];
|
||||
if (bms_membership(relattnos) != BMS_MULTIPLE)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Can't remove any columns for this rel if there is no suitable
|
||||
* (i.e., nondeferrable) primary key constraint.
|
||||
*/
|
||||
pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
|
||||
if (pkattnos == NULL)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If the primary key is a proper subset of relattnos then we have
|
||||
* some items in the GROUP BY that can be removed.
|
||||
*/
|
||||
if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
|
||||
{
|
||||
/*
|
||||
* To easily remember whether we've found anything to do, we don't
|
||||
* allocate the surplusvars[] array until we find something.
|
||||
*/
|
||||
if (surplusvars == NULL)
|
||||
surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
|
||||
(list_length(parse->rtable) + 1));
|
||||
|
||||
/* Remember the attnos of the removable columns */
|
||||
surplusvars[relid] = bms_difference(relattnos, pkattnos);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we found any surplus Vars, build a new GROUP BY clause without them.
|
||||
* (Note: this may leave some TLEs with unreferenced ressortgroupref
|
||||
* markings, but that's harmless.)
|
||||
*/
|
||||
if (surplusvars != NULL)
|
||||
{
|
||||
List *new_groupby = NIL;
|
||||
|
||||
foreach(lc, root->processed_groupClause)
|
||||
{
|
||||
SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
|
||||
TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
|
||||
Var *var = (Var *) tle->expr;
|
||||
|
||||
/*
|
||||
* New list must include non-Vars, outer Vars, and anything not
|
||||
* marked as surplus.
|
||||
*/
|
||||
if (!IsA(var, Var) ||
|
||||
var->varlevelsup > 0 ||
|
||||
!bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
|
||||
surplusvars[var->varno]))
|
||||
new_groupby = lappend(new_groupby, sgc);
|
||||
}
|
||||
|
||||
root->processed_groupClause = new_groupby;
|
||||
}
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
*
|
||||
|
@ -169,6 +169,9 @@ query_planner(PlannerInfo *root,
|
||||
*/
|
||||
add_base_rels_to_query(root, (Node *) parse->jointree);
|
||||
|
||||
/* Remove any redundant GROUP BY columns */
|
||||
remove_useless_groupby_columns(root);
|
||||
|
||||
/*
|
||||
* Examine the targetlist and join tree, adding entries to baserel
|
||||
* targetlists for all referenced Vars, and generating PlaceHolderInfo
|
||||
|
@ -23,7 +23,6 @@
|
||||
#include "access/sysattr.h"
|
||||
#include "access/table.h"
|
||||
#include "catalog/pg_aggregate.h"
|
||||
#include "catalog/pg_constraint.h"
|
||||
#include "catalog/pg_inherits.h"
|
||||
#include "catalog/pg_proc.h"
|
||||
#include "catalog/pg_type.h"
|
||||
@ -139,7 +138,6 @@ static void preprocess_rowmarks(PlannerInfo *root);
|
||||
static double preprocess_limit(PlannerInfo *root,
|
||||
double tuple_fraction,
|
||||
int64 *offset_est, int64 *count_est);
|
||||
static void remove_useless_groupby_columns(PlannerInfo *root);
|
||||
static List *preprocess_groupclause(PlannerInfo *root, List *force);
|
||||
static List *extract_rollup_sets(List *groupingSets);
|
||||
static List *reorder_grouping_sets(List *groupingSets, List *sortclause);
|
||||
@ -1487,8 +1485,6 @@ grouping_planner(PlannerInfo *root, double tuple_fraction,
|
||||
{
|
||||
/* Preprocess regular GROUP BY clause, if any */
|
||||
root->processed_groupClause = preprocess_groupclause(root, NIL);
|
||||
/* Remove any redundant GROUP BY columns */
|
||||
remove_useless_groupby_columns(root);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2724,166 +2720,6 @@ limit_needed(Query *parse)
|
||||
return false; /* don't need a Limit plan node */
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* remove_useless_groupby_columns
|
||||
* Remove any columns in the GROUP BY clause that are redundant due to
|
||||
* being functionally dependent on other GROUP BY columns.
|
||||
*
|
||||
* Since some other DBMSes do not allow references to ungrouped columns, it's
|
||||
* not unusual to find all columns listed in GROUP BY even though listing the
|
||||
* primary-key columns would be sufficient. Deleting such excess columns
|
||||
* avoids redundant sorting work, so it's worth doing.
|
||||
*
|
||||
* Relcache invalidations will ensure that cached plans become invalidated
|
||||
* when the underlying index of the pkey constraint is dropped.
|
||||
*
|
||||
* Currently, we only make use of pkey constraints for this, however, we may
|
||||
* wish to take this further in the future and also use unique constraints
|
||||
* which have NOT NULL columns. In that case, plan invalidation will still
|
||||
* work since relations will receive a relcache invalidation when a NOT NULL
|
||||
* constraint is dropped.
|
||||
*/
|
||||
static void
|
||||
remove_useless_groupby_columns(PlannerInfo *root)
|
||||
{
|
||||
Query *parse = root->parse;
|
||||
Bitmapset **groupbyattnos;
|
||||
Bitmapset **surplusvars;
|
||||
ListCell *lc;
|
||||
int relid;
|
||||
|
||||
/* No chance to do anything if there are less than two GROUP BY items */
|
||||
if (list_length(root->processed_groupClause) < 2)
|
||||
return;
|
||||
|
||||
/* Don't fiddle with the GROUP BY clause if the query has grouping sets */
|
||||
if (parse->groupingSets)
|
||||
return;
|
||||
|
||||
/*
|
||||
* Scan the GROUP BY clause to find GROUP BY items that are simple Vars.
|
||||
* Fill groupbyattnos[k] with a bitmapset of the column attnos of RTE k
|
||||
* that are GROUP BY items.
|
||||
*/
|
||||
groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
|
||||
(list_length(parse->rtable) + 1));
|
||||
foreach(lc, root->processed_groupClause)
|
||||
{
|
||||
SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
|
||||
TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
|
||||
Var *var = (Var *) tle->expr;
|
||||
|
||||
/*
|
||||
* Ignore non-Vars and Vars from other query levels.
|
||||
*
|
||||
* XXX in principle, stable expressions containing Vars could also be
|
||||
* removed, if all the Vars are functionally dependent on other GROUP
|
||||
* BY items. But it's not clear that such cases occur often enough to
|
||||
* be worth troubling over.
|
||||
*/
|
||||
if (!IsA(var, Var) ||
|
||||
var->varlevelsup > 0)
|
||||
continue;
|
||||
|
||||
/* OK, remember we have this Var */
|
||||
relid = var->varno;
|
||||
Assert(relid <= list_length(parse->rtable));
|
||||
groupbyattnos[relid] = bms_add_member(groupbyattnos[relid],
|
||||
var->varattno - FirstLowInvalidHeapAttributeNumber);
|
||||
}
|
||||
|
||||
/*
|
||||
* Consider each relation and see if it is possible to remove some of its
|
||||
* Vars from GROUP BY. For simplicity and speed, we do the actual removal
|
||||
* in a separate pass. Here, we just fill surplusvars[k] with a bitmapset
|
||||
* of the column attnos of RTE k that are removable GROUP BY items.
|
||||
*/
|
||||
surplusvars = NULL; /* don't allocate array unless required */
|
||||
relid = 0;
|
||||
foreach(lc, parse->rtable)
|
||||
{
|
||||
RangeTblEntry *rte = lfirst_node(RangeTblEntry, lc);
|
||||
Bitmapset *relattnos;
|
||||
Bitmapset *pkattnos;
|
||||
Oid constraintOid;
|
||||
|
||||
relid++;
|
||||
|
||||
/* Only plain relations could have primary-key constraints */
|
||||
if (rte->rtekind != RTE_RELATION)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We must skip inheritance parent tables as some of the child rels
|
||||
* may cause duplicate rows. This cannot happen with partitioned
|
||||
* tables, however.
|
||||
*/
|
||||
if (rte->inh && rte->relkind != RELKIND_PARTITIONED_TABLE)
|
||||
continue;
|
||||
|
||||
/* Nothing to do unless this rel has multiple Vars in GROUP BY */
|
||||
relattnos = groupbyattnos[relid];
|
||||
if (bms_membership(relattnos) != BMS_MULTIPLE)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* Can't remove any columns for this rel if there is no suitable
|
||||
* (i.e., nondeferrable) primary key constraint.
|
||||
*/
|
||||
pkattnos = get_primary_key_attnos(rte->relid, false, &constraintOid);
|
||||
if (pkattnos == NULL)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* If the primary key is a proper subset of relattnos then we have
|
||||
* some items in the GROUP BY that can be removed.
|
||||
*/
|
||||
if (bms_subset_compare(pkattnos, relattnos) == BMS_SUBSET1)
|
||||
{
|
||||
/*
|
||||
* To easily remember whether we've found anything to do, we don't
|
||||
* allocate the surplusvars[] array until we find something.
|
||||
*/
|
||||
if (surplusvars == NULL)
|
||||
surplusvars = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
|
||||
(list_length(parse->rtable) + 1));
|
||||
|
||||
/* Remember the attnos of the removable columns */
|
||||
surplusvars[relid] = bms_difference(relattnos, pkattnos);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* If we found any surplus Vars, build a new GROUP BY clause without them.
|
||||
* (Note: this may leave some TLEs with unreferenced ressortgroupref
|
||||
* markings, but that's harmless.)
|
||||
*/
|
||||
if (surplusvars != NULL)
|
||||
{
|
||||
List *new_groupby = NIL;
|
||||
|
||||
foreach(lc, root->processed_groupClause)
|
||||
{
|
||||
SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
|
||||
TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
|
||||
Var *var = (Var *) tle->expr;
|
||||
|
||||
/*
|
||||
* New list must include non-Vars, outer Vars, and anything not
|
||||
* marked as surplus.
|
||||
*/
|
||||
if (!IsA(var, Var) ||
|
||||
var->varlevelsup > 0 ||
|
||||
!bms_is_member(var->varattno - FirstLowInvalidHeapAttributeNumber,
|
||||
surplusvars[var->varno]))
|
||||
new_groupby = lappend(new_groupby, sgc);
|
||||
}
|
||||
|
||||
root->processed_groupClause = new_groupby;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* preprocess_groupclause - do preparatory work on GROUP BY clause
|
||||
*
|
||||
|
@ -74,6 +74,7 @@ extern void add_vars_to_targetlist(PlannerInfo *root, List *vars,
|
||||
Relids where_needed);
|
||||
extern void add_vars_to_attr_needed(PlannerInfo *root, List *vars,
|
||||
Relids where_needed);
|
||||
extern void remove_useless_groupby_columns(PlannerInfo *root);
|
||||
extern void find_lateral_references(PlannerInfo *root);
|
||||
extern void rebuild_lateral_attr_needed(PlannerInfo *root);
|
||||
extern void create_lateral_join_info(PlannerInfo *root);
|
||||
|
Loading…
x
Reference in New Issue
Block a user