Remove redundant grouping and DISTINCT columns.

Avoid explicitly grouping by columns that we know are redundant for sorting, for example we need group by only one of x and y in SELECT ... WHERE x = y GROUP BY x, y This comes up more often than you might think, as shown by the changes in the regression tests. It's nearly free to detect too, since we are just piggybacking on the existing logic that detects redundant pathkeys. (In some of the existing plans that change, it's visible that a sort step preceding the grouping step already didn't bother to sort by the redundant column, making the old plan a bit silly-looking.) To do this, build processed_groupClause and processed_distinctClause lists that omit any provably-redundant sort items, and consult those not the originals where relevant. This means that within the planner, one should usually consult root->processed_groupClause or root->processed_distinctClause if one wants to know which columns are to be grouped on; but to check whether grouping or distinct-ing is happening at all, check non-NIL-ness of parse->groupClause or parse->distinctClause. This is comparable to longstanding rules about handling the HAVING clause, so I don't think it'll be a huge maintenance problem. nodeAgg.c also needs minor mods, because it's now possible to generate AGG_PLAIN and AGG_SORTED Agg nodes with zero grouping columns. Patch by me; thanks to Richard Guo and David Rowley for review. Discussion: https://postgr.es/m/185315.1672179489@sss.pgh.pa.us
2025-08-25 20:23:07 +03:00 · 2023-01-18 12:37:57 -05:00
parent d540a02a72
commit 8d83a5d0a2
16 changed files with 303 additions and 171 deletions
--- a/src/backend/optimizer/path/pathkeys.c
+++ b/src/backend/optimizer/path/pathkeys.c
@@ -1152,18 +1152,62 @@ List *
 make_pathkeys_for_sortclauses(PlannerInfo *root,
 							  List *sortclauses,
 							  List *tlist)
+{
+	List	   *result;
+	bool		sortable;
+
+	result = make_pathkeys_for_sortclauses_extended(root,
+													&sortclauses,
+													tlist,
+													false,
+													&sortable);
+	/* It's caller error if not all clauses were sortable */
+	Assert(sortable);
+	return result;
+}
+
+/*
+ * make_pathkeys_for_sortclauses_extended
+ *		Generate a pathkeys list that represents the sort order specified
+ *		by a list of SortGroupClauses
+ *
+ * The comments for make_pathkeys_for_sortclauses apply here too. In addition:
+ *
+ * If remove_redundant is true, then any sort clauses that are found to
+ * give rise to redundant pathkeys are removed from the sortclauses list
+ * (which therefore must be pass-by-reference in this version).
+ *
+ * *sortable is set to true if all the sort clauses are in fact sortable.
+ * If any are not, they are ignored except for setting *sortable false.
+ * (In that case, the output pathkey list isn't really useful.  However,
+ * we process the whole sortclauses list anyway, because it's still valid
+ * to remove any clauses that can be proven redundant via the eclass logic.
+ * Even though we'll have to hash in that case, we might as well not hash
+ * redundant columns.)
+ */
+List *
+make_pathkeys_for_sortclauses_extended(PlannerInfo *root,
+									   List **sortclauses,
+									   List *tlist,
+									   bool remove_redundant,
+									   bool *sortable)
 {
 	List	   *pathkeys = NIL;
 	ListCell   *l;

-	foreach(l, sortclauses)
+	*sortable = true;
+	foreach(l, *sortclauses)
 	{
 		SortGroupClause *sortcl = (SortGroupClause *) lfirst(l);
 		Expr	   *sortkey;
 		PathKey    *pathkey;

 		sortkey = (Expr *) get_sortgroupclause_expr(sortcl, tlist);
-		Assert(OidIsValid(sortcl->sortop));
+		if (!OidIsValid(sortcl->sortop))
+		{
+			*sortable = false;
+			continue;
+		}
 		pathkey = make_pathkey_from_sortop(root,
 										   sortkey,
 										   root->nullable_baserels,
@@ -1175,6 +1219,8 @@ make_pathkeys_for_sortclauses(PlannerInfo *root,
 		/* Canonical form eliminates redundant ordering keys */
 		if (!pathkey_is_redundant(pathkey, pathkeys))
 			pathkeys = lappend(pathkeys, pathkey);
+		else if (remove_redundant)
+			*sortclauses = foreach_delete_current(*sortclauses, l);
 	}
 	return pathkeys;
 }
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -2404,7 +2404,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
 	 * sizing.
 	 */
 	maxref = 0;
-	foreach(lc, root->parse->groupClause)
+	foreach(lc, root->processed_groupClause)
 	{
 		SortGroupClause *gc = (SortGroupClause *) lfirst(lc);

@@ -2415,7 +2415,7 @@ create_groupingsets_plan(PlannerInfo *root, GroupingSetsPath *best_path)
 	grouping_map = (AttrNumber *) palloc0((maxref + 1) * sizeof(AttrNumber));

 	/* Now look up the column numbers in the child's tlist */
-	foreach(lc, root->parse->groupClause)
+	foreach(lc, root->processed_groupClause)
 	{
 		SortGroupClause *gc = (SortGroupClause *) lfirst(lc);
 		TargetEntry *tle = get_sortgroupclause_tle(gc, subplan->targetlist);
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -95,17 +95,9 @@ create_upper_paths_hook_type create_upper_paths_hook = NULL;
 #define EXPRKIND_TABLEFUNC			11
 #define EXPRKIND_TABLEFUNC_LATERAL	12

-/* Passthrough data for standard_qp_callback */
-typedef struct
-{
-	List	   *activeWindows;	/* active windows, if any */
-	List	   *groupClause;	/* overrides parse->groupClause */
-} standard_qp_extra;
-
 /*
 * Data specific to grouping sets
 */
-
 typedef struct
 {
 	List	   *rollups;
@@ -129,6 +121,13 @@ typedef struct
 								 * clauses per Window */
 } WindowClauseSortData;

+/* Passthrough data for standard_qp_callback */
+typedef struct
+{
+	List	   *activeWindows;	/* active windows, if any */
+	grouping_sets_data *gset_data;	/* grouping sets data, if any */
+} standard_qp_extra;
+
 /* Local functions */
 static Node *preprocess_expression(PlannerInfo *root, Node *expr, int kind);
 static void preprocess_qual_conditions(PlannerInfo *root, Node *jtnode);
@@ -636,6 +635,8 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	root->rowMarks = NIL;
 	memset(root->upper_rels, 0, sizeof(root->upper_rels));
 	memset(root->upper_targets, 0, sizeof(root->upper_targets));
+	root->processed_groupClause = NIL;
+	root->processed_distinctClause = NIL;
 	root->processed_tlist = NIL;
 	root->update_colnos = NIL;
 	root->grouping_map = NULL;
@@ -1032,9 +1033,6 @@ subquery_planner(PlannerGlobal *glob, Query *parse,
 	}
 	parse->havingQual = (Node *) newHaving;

-	/* Remove any redundant GROUP BY columns */
-	remove_useless_groupby_columns(root);
-
 	/*
 	 * If we have any outer joins, try to reduce them to plain inner joins.
 	 * This step is most easily done after we've done expression
@@ -1393,11 +1391,12 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)
 		{
 			gset_data = preprocess_grouping_sets(root);
 		}
-		else
+		else if (parse->groupClause)
 		{
 			/* Preprocess regular GROUP BY clause, if any */
-			if (parse->groupClause)
-				parse->groupClause = preprocess_groupclause(root, NIL);
+			root->processed_groupClause = preprocess_groupclause(root, NIL);
+			/* Remove any redundant GROUP BY columns */
+			remove_useless_groupby_columns(root);
 		}

 		/*
@@ -1475,9 +1474,7 @@ grouping_planner(PlannerInfo *root, double tuple_fraction)

 		/* Set up data needed by standard_qp_callback */
 		qp_extra.activeWindows = activeWindows;
-		qp_extra.groupClause = (gset_data
-								? (gset_data->rollups ? linitial_node(RollupData, gset_data->rollups)->groupClause : NIL)
-								: parse->groupClause);
+		qp_extra.gset_data = gset_data;

 		/*
 		 * Generate the best unsorted and presorted paths for the scan/join
@@ -2011,6 +2008,12 @@ preprocess_grouping_sets(PlannerInfo *root)
 	gd->unsortable_refs = NULL;
 	gd->unsortable_sets = NIL;

+	/*
+	 * We don't currently make any attempt to optimize the groupClause when
+	 * there are grouping sets, so just duplicate it in processed_groupClause.
+	 */
+	root->processed_groupClause = parse->groupClause;
+
 	if (parse->groupClause)
 	{
 		ListCell   *lc;
@@ -2638,7 +2641,7 @@ remove_useless_groupby_columns(PlannerInfo *root)
 	int			relid;

 	/* No chance to do anything if there are less than two GROUP BY items */
-	if (list_length(parse->groupClause) < 2)
+	if (list_length(root->processed_groupClause) < 2)
 		return;

 	/* Don't fiddle with the GROUP BY clause if the query has grouping sets */
@@ -2652,7 +2655,7 @@ remove_useless_groupby_columns(PlannerInfo *root)
 	 */
 	groupbyattnos = (Bitmapset **) palloc0(sizeof(Bitmapset *) *
 										   (list_length(parse->rtable) + 1));
-	foreach(lc, parse->groupClause)
+	foreach(lc, root->processed_groupClause)
 	{
 		SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
 		TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
@@ -2747,7 +2750,7 @@ remove_useless_groupby_columns(PlannerInfo *root)
 	{
 		List	   *new_groupby = NIL;

-		foreach(lc, parse->groupClause)
+		foreach(lc, root->processed_groupClause)
 		{
 			SortGroupClause *sgc = lfirst_node(SortGroupClause, lc);
 			TargetEntry *tle = get_sortgroupclause_tle(sgc, parse->targetList);
@@ -2764,7 +2767,7 @@ remove_useless_groupby_columns(PlannerInfo *root)
 				new_groupby = lappend(new_groupby, sgc);
 		}

-		parse->groupClause = new_groupby;
+		root->processed_groupClause = new_groupby;
 	}
 }

@@ -2784,6 +2787,10 @@ remove_useless_groupby_columns(PlannerInfo *root)
 * Note: we need no comparable processing of the distinctClause because
 * the parser already enforced that that matches ORDER BY.
 *
+ * Note: we return a fresh List, but its elements are the same
+ * SortGroupClauses appearing in parse->groupClause.  This is important
+ * because later processing may modify the processed_groupClause list.
+ *
 * For grouping sets, the order of items is instead forced to agree with that
 * of the grouping set (and items not in the grouping set are skipped). The
 * work of sorting the order of grouping set elements to match the ORDER BY if
@@ -2814,7 +2821,7 @@ preprocess_groupclause(PlannerInfo *root, List *force)

 	/* If no ORDER BY, nothing useful to do here */
 	if (parse->sortClause == NIL)
-		return parse->groupClause;
+		return list_copy(parse->groupClause);

 	/*
 	 * Scan the ORDER BY clause and construct a list of matching GROUP BY
@@ -2845,7 +2852,7 @@ preprocess_groupclause(PlannerInfo *root, List *force)

 	/* If no match at all, no point in reordering GROUP BY */
 	if (new_groupclause == NIL)
-		return parse->groupClause;
+		return list_copy(parse->groupClause);

 	/*
 	 * Add any remaining GROUP BY items to the new list, but only if we were
@@ -2861,10 +2868,10 @@ preprocess_groupclause(PlannerInfo *root, List *force)

 		if (list_member_ptr(new_groupclause, gc))
 			continue;			/* it matched an ORDER BY item */
-		if (partial_match)
-			return parse->groupClause;	/* give up, no common sort possible */
-		if (!OidIsValid(gc->sortop))
-			return parse->groupClause;	/* give up, GROUP BY can't be sorted */
+		if (partial_match)		/* give up, no common sort possible */
+			return list_copy(parse->groupClause);
+		if (!OidIsValid(gc->sortop))	/* give up, GROUP BY can't be sorted */
+			return list_copy(parse->groupClause);
 		new_groupclause = lappend(new_groupclause, gc);
 	}

@@ -3169,23 +3176,17 @@ has_volatile_pathkey(List *keys)
 }

 /*
- * make_pathkeys_for_groupagg
- *		Determine the pathkeys for the GROUP BY clause and/or any ordered
- *		aggregates.  We expect at least one of these here.
+ * adjust_group_pathkeys_for_groupagg
+ *		Add pathkeys to root->group_pathkeys to reflect the best set of
+ *		pre-ordered input for ordered aggregates.
 *
- * Building the pathkeys for the GROUP BY is simple.  Most of the complexity
- * involved here comes from calculating the best pathkeys for ordered
- * aggregates.  We define "best" as the pathkeys that suit the most number of
+ * We define "best" as the pathkeys that suit the largest number of
 * aggregate functions.  We find these by looking at the first ORDER BY /
 * DISTINCT aggregate and take the pathkeys for that before searching for
 * other aggregates that require the same or a more strict variation of the
 * same pathkeys.  We then repeat that process for any remaining aggregates
 * with different pathkeys and if we find another set of pathkeys that suits a
- * larger number of aggregates then we return those pathkeys instead.
- *
- * *number_groupby_pathkeys gets set to the number of elements in the returned
- * list that belong to the GROUP BY clause.  Any elements above this number
- * must belong to ORDER BY / DISTINCT aggregates.
+ * larger number of aggregates then we select those pathkeys instead.
 *
 * When the best pathkeys are found we also mark each Aggref that can use
 * those pathkeys as aggpresorted = true.
@@ -3203,43 +3204,24 @@ has_volatile_pathkey(List *keys)
 * query contains, we always force Aggrefs with volatile functions to perform
 * their own sorts.
 */
-static List *
-make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
-						   int *number_groupby_pathkeys)
+static void
+adjust_group_pathkeys_for_groupagg(PlannerInfo *root)
 {
-	List	   *grouppathkeys = NIL;
+	List	   *grouppathkeys = root->group_pathkeys;
 	List	   *bestpathkeys;
 	Bitmapset  *bestaggs;
 	Bitmapset  *unprocessed_aggs;
 	ListCell   *lc;
 	int			i;

-	Assert(groupClause != NIL || root->numOrderedAggs > 0);
+	/* Shouldn't be here if there are grouping sets */
+	Assert(root->parse->groupingSets == NIL);
+	/* Shouldn't be here unless there are some ordered aggregates */
+	Assert(root->numOrderedAggs > 0);

-	if (groupClause != NIL)
-	{
-		/* no pathkeys possible if there's an unsortable GROUP BY */
-		if (!grouping_is_sortable(groupClause))
-		{
-			*number_groupby_pathkeys = 0;
-			return NIL;
-		}
-
-		grouppathkeys = make_pathkeys_for_sortclauses(root, groupClause,
-													  tlist);
-		*number_groupby_pathkeys = list_length(grouppathkeys);
-	}
-	else
-		*number_groupby_pathkeys = 0;
-
-	/*
-	 * We can't add pathkeys for ordered aggregates if there are any grouping
-	 * sets.  All handling specific to ordered aggregates must be done by the
-	 * executor in that case.
-	 */
-	if (root->numOrderedAggs == 0 || root->parse->groupingSets != NIL ||
-		!enable_presorted_aggregate)
-		return grouppathkeys;
+	/* Do nothing if disabled */
+	if (!enable_presorted_aggregate)
+		return;

 	/*
 	 * Make a first pass over all AggInfos to collect a Bitmapset containing
@@ -3370,6 +3352,14 @@ make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
 		}
 	}

+	/*
+	 * If we found any ordered aggregates, update root->group_pathkeys to add
+	 * the best set of aggregate pathkeys.  Note that bestpathkeys includes
+	 * the original GROUP BY pathkeys already.
+	 */
+	if (bestpathkeys != NIL)
+		root->group_pathkeys = bestpathkeys;
+
 	/*
 	 * Now that we've found the best set of aggregates we can set the
 	 * presorted flag to indicate to the executor that it needn't bother
@@ -3390,16 +3380,6 @@ make_pathkeys_for_groupagg(PlannerInfo *root, List *groupClause, List *tlist,
 			aggref->aggpresorted = true;
 		}
 	}
-
-	/*
-	 * bestpathkeys includes the GROUP BY pathkeys, so if we found any ordered
-	 * aggregates, then return bestpathkeys, otherwise return the
-	 * grouppathkeys.
-	 */
-	if (bestpathkeys != NIL)
-		return bestpathkeys;
-
-	return grouppathkeys;
 }

 /*
@@ -3417,11 +3397,62 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 	 * Calculate pathkeys that represent grouping/ordering and/or ordered
 	 * aggregate requirements.
 	 */
-	if (qp_extra->groupClause != NIL || root->numOrderedAggs > 0)
-		root->group_pathkeys = make_pathkeys_for_groupagg(root,
-														  qp_extra->groupClause,
-														  tlist,
-														  &root->num_groupby_pathkeys);
+	if (qp_extra->gset_data)
+	{
+		/*
+		 * With grouping sets, just use the first RollupData's groupClause. We
+		 * don't make any effort to optimize grouping clauses when there are
+		 * grouping sets, nor can we combine aggregate ordering keys with
+		 * grouping.
+		 */
+		List	   *rollups = qp_extra->gset_data->rollups;
+		List	   *groupClause = (rollups ? linitial_node(RollupData, rollups)->groupClause : NIL);
+
+		if (grouping_is_sortable(groupClause))
+		{
+			root->group_pathkeys = make_pathkeys_for_sortclauses(root,
+																 groupClause,
+																 tlist);
+			root->num_groupby_pathkeys = list_length(root->group_pathkeys);
+		}
+		else
+		{
+			root->group_pathkeys = NIL;
+			root->num_groupby_pathkeys = 0;
+		}
+	}
+	else if (parse->groupClause || root->numOrderedAggs > 0)
+	{
+		/*
+		 * With a plain GROUP BY list, we can remove any grouping items that
+		 * are proven redundant by EquivalenceClass processing.  For example,
+		 * we can remove y given "WHERE x = y GROUP BY x, y".  These aren't
+		 * especially common cases, but they're nearly free to detect.  Note
+		 * that we remove redundant items from processed_groupClause but not
+		 * the original parse->groupClause.
+		 */
+		bool		sortable;
+
+		root->group_pathkeys =
+			make_pathkeys_for_sortclauses_extended(root,
+												   &root->processed_groupClause,
+												   tlist,
+												   true,
+												   &sortable);
+		if (!sortable)
+		{
+			/* Can't sort; no point in considering aggregate ordering either */
+			root->group_pathkeys = NIL;
+			root->num_groupby_pathkeys = 0;
+		}
+		else
+		{
+			root->num_groupby_pathkeys = list_length(root->group_pathkeys);
+			/* If we have ordered aggs, consider adding onto group_pathkeys */
+			if (root->numOrderedAggs > 0)
+				adjust_group_pathkeys_for_groupagg(root);
+		}
+	}
 	else
 	{
 		root->group_pathkeys = NIL;
@@ -3440,12 +3471,27 @@ standard_qp_callback(PlannerInfo *root, void *extra)
 	else
 		root->window_pathkeys = NIL;

-	if (parse->distinctClause &&
-		grouping_is_sortable(parse->distinctClause))
+	/*
+	 * As with GROUP BY, we can discard any DISTINCT items that are proven
+	 * redundant by EquivalenceClass processing.  The non-redundant list is
+	 * kept in root->processed_distinctClause, leaving the original
+	 * parse->distinctClause alone.
+	 */
+	if (parse->distinctClause)
+	{
+		bool		sortable;
+
+		/* Make a copy since pathkey processing can modify the list */
+		root->processed_distinctClause = list_copy(parse->distinctClause);
 		root->distinct_pathkeys =
-			make_pathkeys_for_sortclauses(root,
-										  parse->distinctClause,
-										  tlist);
+			make_pathkeys_for_sortclauses_extended(root,
+												   &root->processed_distinctClause,
+												   tlist,
+												   true,
+												   &sortable);
+		if (!sortable)
+			root->distinct_pathkeys = NIL;
+	}
 	else
 		root->distinct_pathkeys = NIL;

@@ -3574,8 +3620,8 @@ get_number_of_groups(PlannerInfo *root,
 		}
 		else
 		{
-			/* Plain GROUP BY */
-			groupExprs = get_sortgrouplist_exprs(parse->groupClause,
+			/* Plain GROUP BY -- estimate based on optimized groupClause */
+			groupExprs = get_sortgrouplist_exprs(root->processed_groupClause,
 												 target_list);

 			dNumGroups = estimate_num_groups(root, groupExprs, path_rows,
@@ -3653,8 +3699,8 @@ create_grouping_paths(PlannerInfo *root,

 		/*
 		 * Determine whether it's possible to perform sort-based
-		 * implementations of grouping.  (Note that if groupClause is empty,
-		 * grouping_is_sortable() is trivially true, and all the
+		 * implementations of grouping.  (Note that if processed_groupClause
+		 * is empty, grouping_is_sortable() is trivially true, and all the
 		 * pathkeys_contained_in() tests will succeed too, so that we'll
 		 * consider every surviving input path.)
 		 *
@@ -3663,7 +3709,7 @@ create_grouping_paths(PlannerInfo *root,
 		 * must consider any sorted-input plan.
 		 */
 		if ((gd && gd->rollups != NIL)
-			|| grouping_is_sortable(parse->groupClause))
+			|| grouping_is_sortable(root->processed_groupClause))
 			flags |= GROUPING_CAN_USE_SORT;

 		/*
@@ -3688,7 +3734,7 @@ create_grouping_paths(PlannerInfo *root,
 		 */
 		if ((parse->groupClause != NIL &&
 			 root->numOrderedAggs == 0 &&
-			 (gd ? gd->any_hashable : grouping_is_hashable(parse->groupClause))))
+			 (gd ? gd->any_hashable : grouping_is_hashable(root->processed_groupClause))))
 			flags |= GROUPING_CAN_USE_HASH;

 		/*
@@ -3899,6 +3945,9 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
 		 * partial partitionwise aggregation.  But if partial aggregation is
 		 * not supported in general then we can't use it for partitionwise
 		 * aggregation either.
+		 *
+		 * Check parse->groupClause not processed_groupClause, because it's
+		 * okay if some of the partitioning columns were proved redundant.
 		 */
 		if (extra->patype == PARTITIONWISE_AGGREGATE_FULL &&
 			group_by_has_partkey(input_rel, extra->targetList,
@@ -4689,7 +4738,7 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,

 	cheapest_partial_path = linitial(input_rel->partial_pathlist);

-	distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
+	distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
 											parse->targetList);

 	/* estimate how many distinct rows we'll get from each worker */
@@ -4701,7 +4750,7 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
 	 * Try sorting the cheapest path and incrementally sorting any paths with
 	 * presorted keys and put a unique paths atop of those.
 	 */
-	if (grouping_is_sortable(parse->distinctClause))
+	if (grouping_is_sortable(root->processed_distinctClause))
 	{
 		foreach(lc, input_rel->partial_pathlist)
 		{
@@ -4763,7 +4812,7 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
 	 * path here, we treat enable_hashagg as a hard off-switch rather than the
 	 * slightly softer variant in create_final_distinct_paths.
 	 */
-	if (enable_hashagg && grouping_is_hashable(parse->distinctClause))
+	if (enable_hashagg && grouping_is_hashable(root->processed_distinctClause))
 	{
 		add_partial_path(partial_distinct_rel, (Path *)
 						 create_agg_path(root,
@@ -4772,7 +4821,7 @@ create_partial_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
 										 cheapest_partial_path->pathtarget,
 										 AGG_HASHED,
 										 AGGSPLIT_SIMPLE,
-										 parse->distinctClause,
+										 root->processed_distinctClause,
 										 NIL,
 										 NULL,
 										 numDistinctRows));
@@ -4844,7 +4893,7 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
 		 */
 		List	   *distinctExprs;

-		distinctExprs = get_sortgrouplist_exprs(parse->distinctClause,
+		distinctExprs = get_sortgrouplist_exprs(root->processed_distinctClause,
 												parse->targetList);
 		numDistinctRows = estimate_num_groups(root, distinctExprs,
 											  cheapest_input_path->rows,
@@ -4854,7 +4903,7 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
 	/*
 	 * Consider sort-based implementations of DISTINCT, if possible.
 	 */
-	if (grouping_is_sortable(parse->distinctClause))
+	if (grouping_is_sortable(root->processed_distinctClause))
 	{
 		/*
 		 * Firstly, if we have any adequately-presorted paths, just stick a
@@ -4988,7 +5037,7 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
 	else
 		allow_hash = true;		/* default */

-	if (allow_hash && grouping_is_hashable(parse->distinctClause))
+	if (allow_hash && grouping_is_hashable(root->processed_distinctClause))
 	{
 		/* Generate hashed aggregate path --- no sort needed */
 		add_path(distinct_rel, (Path *)
@@ -4998,7 +5047,7 @@ create_final_distinct_paths(PlannerInfo *root, RelOptInfo *input_rel,
 								 cheapest_input_path->pathtarget,
 								 AGG_HASHED,
 								 AGGSPLIT_SIMPLE,
-								 parse->distinctClause,
+								 root->processed_distinctClause,
 								 NIL,
 								 NULL,
 								 numDistinctRows));
@@ -5293,8 +5342,9 @@ make_group_input_target(PlannerInfo *root, PathTarget *final_target)
 		Expr	   *expr = (Expr *) lfirst(lc);
 		Index		sgref = get_pathtarget_sortgroupref(final_target, i);

-		if (sgref && parse->groupClause &&
-			get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
+		if (sgref && root->processed_groupClause &&
+			get_sortgroupref_clause_noerr(sgref,
+										  root->processed_groupClause) != NULL)
 		{
 			/*
 			 * It's a grouping column, so add it to the input target as-is.
@@ -5362,7 +5412,6 @@ make_partial_grouping_target(PlannerInfo *root,
 							 PathTarget *grouping_target,
 							 Node *havingQual)
 {
-	Query	   *parse = root->parse;
 	PathTarget *partial_target;
 	List	   *non_group_cols;
 	List	   *non_group_exprs;
@@ -5378,8 +5427,9 @@ make_partial_grouping_target(PlannerInfo *root,
 		Expr	   *expr = (Expr *) lfirst(lc);
 		Index		sgref = get_pathtarget_sortgroupref(grouping_target, i);

-		if (sgref && parse->groupClause &&
-			get_sortgroupref_clause_noerr(sgref, parse->groupClause) != NULL)
+		if (sgref && root->processed_groupClause &&
+			get_sortgroupref_clause_noerr(sgref,
+										  root->processed_groupClause) != NULL)
 		{
 			/*
 			 * It's a grouping column, so add it to the partial_target as-is.
@@ -5834,7 +5884,6 @@ make_window_input_target(PlannerInfo *root,
 						 PathTarget *final_target,
 						 List *activeWindows)
 {
-	Query	   *parse = root->parse;
 	PathTarget *input_target;
 	Bitmapset  *sgrefs;
 	List	   *flattenable_cols;
@@ -5842,7 +5891,7 @@ make_window_input_target(PlannerInfo *root,
 	int			i;
 	ListCell   *lc;

-	Assert(parse->hasWindowFuncs);
+	Assert(root->parse->hasWindowFuncs);

 	/*
 	 * Collect the sortgroupref numbers of window PARTITION/ORDER BY clauses
@@ -5869,7 +5918,7 @@ make_window_input_target(PlannerInfo *root,
 	}

 	/* Add in sortgroupref numbers of GROUP BY clauses, too */
-	foreach(lc, parse->groupClause)
+	foreach(lc, root->processed_groupClause)
 	{
 		SortGroupClause *grpcl = lfirst_node(SortGroupClause, lc);

@@ -6788,7 +6837,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 										 grouped_rel->reltarget,
 										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 										 AGGSPLIT_SIMPLE,
-										 parse->groupClause,
+										 root->processed_groupClause,
 										 havingQual,
 										 agg_costs,
 										 dNumGroups));
@@ -6803,7 +6852,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 						 create_group_path(root,
 										   grouped_rel,
 										   path,
-										   parse->groupClause,
+										   root->processed_groupClause,
 										   havingQual,
 										   dNumGroups));
 			}
@@ -6872,7 +6921,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 											 grouped_rel->reltarget,
 											 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 											 AGGSPLIT_FINAL_DESERIAL,
-											 parse->groupClause,
+											 root->processed_groupClause,
 											 havingQual,
 											 agg_final_costs,
 											 dNumGroups));
@@ -6881,7 +6930,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 							 create_group_path(root,
 											   grouped_rel,
 											   path,
-											   parse->groupClause,
+											   root->processed_groupClause,
 											   havingQual,
 											   dNumGroups));

@@ -6912,7 +6961,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 									 grouped_rel->reltarget,
 									 AGG_HASHED,
 									 AGGSPLIT_SIMPLE,
-									 parse->groupClause,
+									 root->processed_groupClause,
 									 havingQual,
 									 agg_costs,
 									 dNumGroups));
@@ -6933,7 +6982,7 @@ add_paths_to_grouping_rel(PlannerInfo *root, RelOptInfo *input_rel,
 									 grouped_rel->reltarget,
 									 AGG_HASHED,
 									 AGGSPLIT_FINAL_DESERIAL,
-									 parse->groupClause,
+									 root->processed_groupClause,
 									 havingQual,
 									 agg_final_costs,
 									 dNumGroups));
@@ -7135,7 +7184,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 										 partially_grouped_rel->reltarget,
 										 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 										 AGGSPLIT_INITIAL_SERIAL,
-										 parse->groupClause,
+										 root->processed_groupClause,
 										 NIL,
 										 agg_partial_costs,
 										 dNumPartialGroups));
@@ -7144,7 +7193,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 						 create_group_path(root,
 										   partially_grouped_rel,
 										   path,
-										   parse->groupClause,
+										   root->processed_groupClause,
 										   NIL,
 										   dNumPartialGroups));
 		}
@@ -7204,7 +7253,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 												 partially_grouped_rel->reltarget,
 												 parse->groupClause ? AGG_SORTED : AGG_PLAIN,
 												 AGGSPLIT_INITIAL_SERIAL,
-												 parse->groupClause,
+												 root->processed_groupClause,
 												 NIL,
 												 agg_partial_costs,
 												 dNumPartialPartialGroups));
@@ -7213,7 +7262,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 								 create_group_path(root,
 												   partially_grouped_rel,
 												   path,
-												   parse->groupClause,
+												   root->processed_groupClause,
 												   NIL,
 												   dNumPartialPartialGroups));
 		}
@@ -7234,7 +7283,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 								 partially_grouped_rel->reltarget,
 								 AGG_HASHED,
 								 AGGSPLIT_INITIAL_SERIAL,
-								 parse->groupClause,
+								 root->processed_groupClause,
 								 NIL,
 								 agg_partial_costs,
 								 dNumPartialGroups));
@@ -7252,7 +7301,7 @@ create_partial_grouping_paths(PlannerInfo *root,
 										 partially_grouped_rel->reltarget,
 										 AGG_HASHED,
 										 AGGSPLIT_INITIAL_SERIAL,
-										 parse->groupClause,
+										 root->processed_groupClause,
 										 NIL,
 										 agg_partial_costs,
 										 dNumPartialPartialGroups));
--- a/src/backend/optimizer/prep/prepjointree.c
+++ b/src/backend/optimizer/prep/prepjointree.c
@@ -1008,6 +1008,8 @@ pull_up_simple_subquery(PlannerInfo *root, Node *jtnode, RangeTblEntry *rte,
 	subroot->rowMarks = NIL;
 	memset(subroot->upper_rels, 0, sizeof(subroot->upper_rels));
 	memset(subroot->upper_targets, 0, sizeof(subroot->upper_targets));
+	subroot->processed_groupClause = NIL;
+	subroot->processed_distinctClause = NIL;
 	subroot->processed_tlist = NIL;
 	subroot->update_colnos = NIL;
 	subroot->grouping_map = NULL;