Implement Eager Aggregation

Eager aggregation is a query optimization technique that partially pushes aggregation past a join, and finalizes it once all the relations are joined. Eager aggregation may reduce the number of input rows to the join and thus could result in a better overall plan. In the current planner architecture, the separation between the scan/join planning phase and the post-scan/join phase means that aggregation steps are not visible when constructing the join tree, limiting the planner's ability to exploit aggregation-aware optimizations. To implement eager aggregation, we collect information about aggregate functions in the targetlist and HAVING clause, along with grouping expressions from the GROUP BY clause, and store it in the PlannerInfo node. During the scan/join planning phase, this information is used to evaluate each base or join relation to determine whether eager aggregation can be applied. If applicable, we create a separate RelOptInfo, referred to as a grouped relation, to represent the partially-aggregated version of the relation and generate grouped paths for it. Grouped relation paths can be generated in two ways. The first method involves adding sorted and hashed partial aggregation paths on top of the non-grouped paths. To limit planning time, we only consider the cheapest or suitably-sorted non-grouped paths in this step. Alternatively, grouped paths can be generated by joining a grouped relation with a non-grouped relation. Joining two grouped relations is currently not supported. To further limit planning time, we currently adopt a strategy where partial aggregation is pushed only to the lowest feasible level in the join tree where it provides a significant reduction in row count. This strategy also helps ensure that all grouped paths for the same grouped relation produce the same set of rows, which is important to support a fundamental assumption of the planner. For the partial aggregation that is pushed down to a non-aggregated relation, we need to consider all expressions from this relation that are involved in upper join clauses and include them in the grouping keys, using compatible operators. This is essential to ensure that an aggregated row from the partial aggregation matches the other side of the join if and only if each row in the partial group does. This ensures that all rows within the same partial group share the same "destiny", which is crucial for maintaining correctness. One restriction is that we cannot push partial aggregation down to a relation that is in the nullable side of an outer join, because the NULL-extended rows produced by the outer join would not be available when we perform the partial aggregation, while with a non-eager-aggregation plan these rows are available for the top-level aggregation. Pushing partial aggregation in this case may result in the rows being grouped differently than expected, or produce incorrect values from the aggregate functions. If we have generated a grouped relation for the topmost join relation, we finalize its paths at the end. The final paths will compete in the usual way with paths built from regular planning. The patch was originally proposed by Antonin Houska in 2017. This commit reworks various important aspects and rewrites most of the current code. However, the original patch and reviews were very useful. Author: Richard Guo <guofenglinux@gmail.com> Author: Antonin Houska <ah@cybertec.at> (in an older version) Reviewed-by: Robert Haas <robertmhaas@gmail.com> Reviewed-by: Jian He <jian.universality@gmail.com> Reviewed-by: Tender Wang <tndrwang@gmail.com> Reviewed-by: Matheus Alcantara <matheusssilv97@gmail.com> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: David Rowley <dgrowleyml@gmail.com> Reviewed-by: Tomas Vondra <tomas@vondra.me> (in an older version) Reviewed-by: Andy Fan <zhihuifan1213@163.com> (in an older version) Reviewed-by: Ashutosh Bapat <ashutosh.bapat.oss@gmail.com> (in an older version) Discussion: https://postgr.es/m/CAMbWs48jzLrPt1J_00ZcPZXWUQKawQOFE8ROc-ADiYqsqrpBNw@mail.gmail.com
2025-11-28 11:44:57 +03:00 · 2025-10-08 17:04:23 +09:00
parent 185e304263
commit 8e11859102
26 changed files with 4293 additions and 76 deletions
--- a/src/backend/optimizer/plan/initsplan.c
+++ b/src/backend/optimizer/plan/initsplan.c
@@ -14,6 +14,7 @@
 */
 #include "postgres.h"

+#include "access/nbtree.h"
 #include "catalog/pg_constraint.h"
 #include "catalog/pg_type.h"
 #include "nodes/makefuncs.h"
@@ -31,6 +32,7 @@
 #include "optimizer/restrictinfo.h"
 #include "parser/analyze.h"
 #include "rewrite/rewriteManip.h"
+#include "utils/fmgroids.h"
 #include "utils/lsyscache.h"
 #include "utils/rel.h"
 #include "utils/typcache.h"
@@ -81,6 +83,12 @@ typedef struct JoinTreeItem
 } JoinTreeItem;


+static bool is_partial_agg_memory_risky(PlannerInfo *root);
+static void create_agg_clause_infos(PlannerInfo *root);
+static void create_grouping_expr_infos(PlannerInfo *root);
+static EquivalenceClass *get_eclass_for_sortgroupclause(PlannerInfo *root,
+														SortGroupClause *sgc,
+														Expr *expr);
 static void extract_lateral_references(PlannerInfo *root, RelOptInfo *brel,
 									   Index rtindex);
 static List *deconstruct_recurse(PlannerInfo *root, Node *jtnode,
@@ -628,6 +636,368 @@ remove_useless_groupby_columns(PlannerInfo *root)
 	}
 }

+/*
+ * setup_eager_aggregation
+ *	  Check if eager aggregation is applicable, and if so collect suitable
+ *	  aggregate expressions and grouping expressions in the query.
+ */
+void
+setup_eager_aggregation(PlannerInfo *root)
+{
+	/*
+	 * Don't apply eager aggregation if disabled by user.
+	 */
+	if (!enable_eager_aggregate)
+		return;
+
+	/*
+	 * Don't apply eager aggregation if there are no available GROUP BY
+	 * clauses.
+	 */
+	if (!root->processed_groupClause)
+		return;
+
+	/*
+	 * For now we don't try to support grouping sets.
+	 */
+	if (root->parse->groupingSets)
+		return;
+
+	/*
+	 * For now we don't try to support DISTINCT or ORDER BY aggregates.
+	 */
+	if (root->numOrderedAggs > 0)
+		return;
+
+	/*
+	 * If there are any aggregates that do not support partial mode, or any
+	 * partial aggregates that are non-serializable, do not apply eager
+	 * aggregation.
+	 */
+	if (root->hasNonPartialAggs || root->hasNonSerialAggs)
+		return;
+
+	/*
+	 * We don't try to apply eager aggregation if there are set-returning
+	 * functions in targetlist.
+	 */
+	if (root->parse->hasTargetSRFs)
+		return;
+
+	/*
+	 * Eager aggregation only makes sense if there are multiple base rels in
+	 * the query.
+	 */
+	if (bms_membership(root->all_baserels) != BMS_MULTIPLE)
+		return;
+
+	/*
+	 * Don't apply eager aggregation if any aggregate poses a risk of
+	 * excessive memory usage during partial aggregation.
+	 */
+	if (is_partial_agg_memory_risky(root))
+		return;
+
+	/*
+	 * Collect aggregate expressions and plain Vars that appear in the
+	 * targetlist and havingQual.
+	 */
+	create_agg_clause_infos(root);
+
+	/*
+	 * If there are no suitable aggregate expressions, we cannot apply eager
+	 * aggregation.
+	 */
+	if (root->agg_clause_list == NIL)
+		return;
+
+	/*
+	 * Collect grouping expressions that appear in grouping clauses.
+	 */
+	create_grouping_expr_infos(root);
+}
+
+/*
+ * is_partial_agg_memory_risky
+ *	  Check if any aggregate poses a risk of excessive memory usage during
+ *	  partial aggregation.
+ *
+ * We check if any aggregate has a negative aggtransspace value, which
+ * indicates that its transition state data can grow unboundedly in size.
+ * Applying eager aggregation in such cases risks high memory usage since
+ * partial aggregation results might be stored in join hash tables or
+ * materialized nodes.
+ */
+static bool
+is_partial_agg_memory_risky(PlannerInfo *root)
+{
+	ListCell   *lc;
+
+	foreach(lc, root->aggtransinfos)
+	{
+		AggTransInfo *transinfo = lfirst_node(AggTransInfo, lc);
+
+		if (transinfo->aggtransspace < 0)
+			return true;
+	}
+
+	return false;
+}
+
+/*
+ * create_agg_clause_infos
+ *	  Search the targetlist and havingQual for Aggrefs and plain Vars, and
+ *	  create an AggClauseInfo for each Aggref node.
+ */
+static void
+create_agg_clause_infos(PlannerInfo *root)
+{
+	List	   *tlist_exprs;
+	List	   *agg_clause_list = NIL;
+	List	   *tlist_vars = NIL;
+	Relids		aggregate_relids = NULL;
+	bool		eager_agg_applicable = true;
+	ListCell   *lc;
+
+	Assert(root->agg_clause_list == NIL);
+	Assert(root->tlist_vars == NIL);
+
+	tlist_exprs = pull_var_clause((Node *) root->processed_tlist,
+								  PVC_INCLUDE_AGGREGATES |
+								  PVC_RECURSE_WINDOWFUNCS |
+								  PVC_RECURSE_PLACEHOLDERS);
+
+	/*
+	 * Aggregates within the HAVING clause need to be processed in the same
+	 * way as those in the targetlist.  Note that HAVING can contain Aggrefs
+	 * but not WindowFuncs.
+	 */
+	if (root->parse->havingQual != NULL)
+	{
+		List	   *having_exprs;
+
+		having_exprs = pull_var_clause((Node *) root->parse->havingQual,
+									   PVC_INCLUDE_AGGREGATES |
+									   PVC_RECURSE_PLACEHOLDERS);
+		if (having_exprs != NIL)
+		{
+			tlist_exprs = list_concat(tlist_exprs, having_exprs);
+			list_free(having_exprs);
+		}
+	}
+
+	foreach(lc, tlist_exprs)
+	{
+		Expr	   *expr = (Expr *) lfirst(lc);
+		Aggref	   *aggref;
+		Relids		agg_eval_at;
+		AggClauseInfo *ac_info;
+
+		/* For now we don't try to support GROUPING() expressions */
+		if (IsA(expr, GroupingFunc))
+		{
+			eager_agg_applicable = false;
+			break;
+		}
+
+		/* Collect plain Vars for future reference */
+		if (IsA(expr, Var))
+		{
+			tlist_vars = list_append_unique(tlist_vars, expr);
+			continue;
+		}
+
+		aggref = castNode(Aggref, expr);
+
+		Assert(aggref->aggorder == NIL);
+		Assert(aggref->aggdistinct == NIL);
+
+		/*
+		 * If there are any securityQuals, do not try to apply eager
+		 * aggregation if any non-leakproof aggregate functions are present.
+		 * This is overly strict, but for now...
+		 */
+		if (root->qual_security_level > 0 &&
+			!get_func_leakproof(aggref->aggfnoid))
+		{
+			eager_agg_applicable = false;
+			break;
+		}
+
+		agg_eval_at = pull_varnos(root, (Node *) aggref);
+
+		/*
+		 * If all base relations in the query are referenced by aggregate
+		 * functions, then eager aggregation is not applicable.
+		 */
+		aggregate_relids = bms_add_members(aggregate_relids, agg_eval_at);
+		if (bms_is_subset(root->all_baserels, aggregate_relids))
+		{
+			eager_agg_applicable = false;
+			break;
+		}
+
+		/* OK, create the AggClauseInfo node */
+		ac_info = makeNode(AggClauseInfo);
+		ac_info->aggref = aggref;
+		ac_info->agg_eval_at = agg_eval_at;
+
+		/* ... and add it to the list */
+		agg_clause_list = list_append_unique(agg_clause_list, ac_info);
+	}
+
+	list_free(tlist_exprs);
+
+	if (eager_agg_applicable)
+	{
+		root->agg_clause_list = agg_clause_list;
+		root->tlist_vars = tlist_vars;
+	}
+	else
+	{
+		list_free_deep(agg_clause_list);
+		list_free(tlist_vars);
+	}
+}
+
+/*
+ * create_grouping_expr_infos
+ *	  Create a GroupingExprInfo for each expression usable as grouping key.
+ *
+ * If any grouping expression is not suitable, we will just return with
+ * root->group_expr_list being NIL.
+ */
+static void
+create_grouping_expr_infos(PlannerInfo *root)
+{
+	List	   *exprs = NIL;
+	List	   *sortgrouprefs = NIL;
+	List	   *ecs = NIL;
+	ListCell   *lc,
+			   *lc1,
+			   *lc2,
+			   *lc3;
+
+	Assert(root->group_expr_list == NIL);
+
+	foreach(lc, root->processed_groupClause)
+	{
+		SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
+		TargetEntry *tle = get_sortgroupclause_tle(sgc, root->processed_tlist);
+		TypeCacheEntry *tce;
+		Oid			equalimageproc;
+
+		Assert(tle->ressortgroupref > 0);
+
+		/*
+		 * For now we only support plain Vars as grouping expressions.
+		 */
+		if (!IsA(tle->expr, Var))
+			return;
+
+		/*
+		 * Eager aggregation is only possible if equality implies image
+		 * equality for each grouping key.  Otherwise, placing keys with
+		 * different byte images into the same group may result in the loss of
+		 * information that could be necessary to evaluate upper qual clauses.
+		 *
+		 * For instance, the NUMERIC data type is not supported, as values
+		 * that are considered equal by the equality operator (e.g., 0 and
+		 * 0.0) can have different scales.
+		 */
+		tce = lookup_type_cache(exprType((Node *) tle->expr),
+								TYPECACHE_BTREE_OPFAMILY);
+		if (!OidIsValid(tce->btree_opf) ||
+			!OidIsValid(tce->btree_opintype))
+			return;
+
+		equalimageproc = get_opfamily_proc(tce->btree_opf,
+										   tce->btree_opintype,
+										   tce->btree_opintype,
+										   BTEQUALIMAGE_PROC);
+		if (!OidIsValid(equalimageproc) ||
+			!DatumGetBool(OidFunctionCall1Coll(equalimageproc,
+											   tce->typcollation,
+											   ObjectIdGetDatum(tce->btree_opintype))))
+			return;
+
+		exprs = lappend(exprs, tle->expr);
+		sortgrouprefs = lappend_int(sortgrouprefs, tle->ressortgroupref);
+		ecs = lappend(ecs, get_eclass_for_sortgroupclause(root, sgc, tle->expr));
+	}
+
+	/*
+	 * Construct a GroupingExprInfo for each expression.
+	 */
+	forthree(lc1, exprs, lc2, sortgrouprefs, lc3, ecs)
+	{
+		Expr	   *expr = (Expr *) lfirst(lc1);
+		int			sortgroupref = lfirst_int(lc2);
+		EquivalenceClass *ec = (EquivalenceClass *) lfirst(lc3);
+		GroupingExprInfo *ge_info;
+
+		ge_info = makeNode(GroupingExprInfo);
+		ge_info->expr = (Expr *) copyObject(expr);
+		ge_info->sortgroupref = sortgroupref;
+		ge_info->ec = ec;
+
+		root->group_expr_list = lappend(root->group_expr_list, ge_info);
+	}
+}
+
+/*
+ * get_eclass_for_sortgroupclause
+ *	  Given a group clause and an expression, find an existing equivalence
+ *	  class that the expression is a member of; return NULL if none.
+ */
+static EquivalenceClass *
+get_eclass_for_sortgroupclause(PlannerInfo *root, SortGroupClause *sgc,
+							   Expr *expr)
+{
+	Oid			opfamily,
+				opcintype,
+				collation;
+	CompareType cmptype;
+	Oid			equality_op;
+	List	   *opfamilies;
+
+	/* Punt if the group clause is not sortable */
+	if (!OidIsValid(sgc->sortop))
+		return NULL;
+
+	/* Find the operator in pg_amop --- failure shouldn't happen */
+	if (!get_ordering_op_properties(sgc->sortop,
+									&opfamily, &opcintype, &cmptype))
+		elog(ERROR, "operator %u is not a valid ordering operator",
+			 sgc->sortop);
+
+	/* Because SortGroupClause doesn't carry collation, consult the expr */
+	collation = exprCollation((Node *) expr);
+
+	/*
+	 * EquivalenceClasses need to contain opfamily lists based on the family
+	 * membership of mergejoinable equality operators, which could belong to
+	 * more than one opfamily.  So we have to look up the opfamily's equality
+	 * operator and get its membership.
+	 */
+	equality_op = get_opfamily_member_for_cmptype(opfamily,
+												  opcintype,
+												  opcintype,
+												  COMPARE_EQ);
+	if (!OidIsValid(equality_op))	/* shouldn't happen */
+		elog(ERROR, "missing operator %d(%u,%u) in opfamily %u",
+			 COMPARE_EQ, opcintype, opcintype, opfamily);
+	opfamilies = get_mergejoin_opfamilies(equality_op);
+	if (!opfamilies)			/* certainly should find some */
+		elog(ERROR, "could not find opfamilies for equality operator %u",
+			 equality_op);
+
+	/* Now find a matching EquivalenceClass */
+	return get_eclass_for_sort_expr(root, expr, opfamilies, opcintype,
+									collation, sgc->tleSortGroupRef,
+									NULL, false);
+}
+
 /*****************************************************************************
 *
 *	  LATERAL REFERENCES