Make the behavior of HAVING without GROUP BY conform to the SQL spec.

Formerly, if such a clause contained no aggregate functions we mistakenly treated it as equivalent to WHERE. Per spec it must cause the query to be treated as a grouped query of a single group, the same as appearance of aggregate functions would do. Also, the HAVING filter must execute after aggregate function computation even if it itself contains no aggregate functions.
2025-11-07 19:06:32 +03:00 · 2005-03-10 23:21:26 +00:00
parent 609e32b929
commit 595ed2a855
20 changed files with 385 additions and 238 deletions
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.123 2004/12/31 22:00:00 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/path/allpaths.c,v 1.124 2005/03/10 23:21:21 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -334,13 +334,10 @@ set_subquery_pathlist(Query *root, RelOptInfo *rel,

 	/*
 	 * If there are any restriction clauses that have been attached to the
-	 * subquery relation, consider pushing them down to become HAVING
-	 * quals of the subquery itself.  (Not WHERE clauses, since they may
-	 * refer to subquery outputs that are aggregate results.  But
-	 * planner.c will transfer them into the subquery's WHERE if they do
-	 * not.)  This transformation is useful because it may allow us to
-	 * generate a better plan for the subquery than evaluating all the
-	 * subquery output rows and then filtering them.
+	 * subquery relation, consider pushing them down to become WHERE or
+	 * HAVING quals of the subquery itself.  This transformation is useful
+	 * because it may allow us to generate a better plan for the subquery
+	 * than evaluating all the subquery output rows and then filtering them.
 	 *
 	 * There are several cases where we cannot push down clauses.
 	 * Restrictions involving the subquery are checked by
@@ -795,8 +792,17 @@ subquery_push_qual(Query *subquery, List *rtable, Index rti, Node *qual)
 		qual = ResolveNew(qual, rti, 0, rtable,
 						  subquery->targetList,
 						  CMD_SELECT, 0);
-		subquery->havingQual = make_and_qual(subquery->havingQual,
-											 qual);
+
+		/*
+		 * Now attach the qual to the proper place: normally WHERE, but
+		 * if the subquery uses grouping or aggregation, put it in HAVING
+		 * (since the qual really refers to the group-result rows).
+		 */
+		if (subquery->hasAggs || subquery->groupClause || subquery->havingQual)
+			subquery->havingQual = make_and_qual(subquery->havingQual, qual);
+		else
+			subquery->jointree->quals =
+				make_and_qual(subquery->jointree->quals, qual);

 		/*
 		 * We need not change the subquery's hasAggs or hasSublinks flags,
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -10,7 +10,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.175 2004/12/31 22:00:08 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.176 2005/03/10 23:21:22 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -2158,6 +2158,7 @@ make_agg(Query *root, List *tlist, List *qual,
 Group *
 make_group(Query *root,
 		   List *tlist,
+		   List *qual,
 		   int numGroupCols,
 		   AttrNumber *grpColIdx,
 		   double numGroups,
@@ -2184,7 +2185,8 @@ make_group(Query *root,
 	plan->plan_rows = numGroups;

 	/*
-	 * We also need to account for the cost of evaluation of the tlist.
+	 * We also need to account for the cost of evaluation of the qual (ie,
+	 * the HAVING clause) and the tlist.
 	 *
 	 * XXX this double-counts the cost of evaluation of any expressions used
 	 * for grouping, since in reality those will have been evaluated at a
@@ -2194,12 +2196,19 @@ make_group(Query *root,
 	 * See notes in grouping_planner about why this routine and make_agg are
 	 * the only ones in this file that worry about tlist eval cost.
 	 */
+	if (qual)
+	{
+		cost_qual_eval(&qual_cost, qual);
+		plan->startup_cost += qual_cost.startup;
+		plan->total_cost += qual_cost.startup;
+		plan->total_cost += qual_cost.per_tuple * plan->plan_rows;
+	}
 	cost_qual_eval(&qual_cost, tlist);
 	plan->startup_cost += qual_cost.startup;
 	plan->total_cost += qual_cost.startup;
 	plan->total_cost += qual_cost.per_tuple * plan->plan_rows;

-	plan->qual = NIL;
+	plan->qual = qual;
 	plan->targetlist = tlist;
 	plan->lefttree = lefttree;
 	plan->righttree = NULL;
--- a/src/backend/optimizer/plan/planner.c
+++ b/src/backend/optimizer/plan/planner.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.178 2005/01/28 19:34:05 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.179 2005/03/10 23:21:22 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -235,6 +235,13 @@ subquery_planner(Query *parse, double tuple_fraction)
 		}
 	}

+	/*
+	 * Set hasHavingQual to remember if HAVING clause is present.  Needed
+	 * because preprocess_expression will reduce a constant-true condition
+	 * to an empty qual list ... but "HAVING TRUE" is not a semantic no-op.
+	 */
+	parse->hasHavingQual = (parse->havingQual != NULL);
+
 	/*
 	 * Do expression preprocessing on targetlist and quals.
 	 */
@@ -267,17 +274,25 @@ subquery_planner(Query *parse, double tuple_fraction)
 	}

 	/*
-	 * A HAVING clause without aggregates is equivalent to a WHERE clause
-	 * (except it can only refer to grouped fields).  Transfer any
-	 * agg-free clauses of the HAVING qual into WHERE.	This may seem like
-	 * wasting cycles to cater to stupidly-written queries, but there are
-	 * other reasons for doing it.	Firstly, if the query contains no aggs
-	 * at all, then we aren't going to generate an Agg plan node, and so
-	 * there'll be no place to execute HAVING conditions; without this
-	 * transfer, we'd lose the HAVING condition entirely, which is wrong.
-	 * Secondly, when we push down a qual condition into a sub-query, it's
-	 * easiest to push the qual into HAVING always, in case it contains
-	 * aggs, and then let this code sort it out.
+	 * In some cases we may want to transfer a HAVING clause into WHERE.
+	 * We cannot do so if the HAVING clause contains aggregates (obviously)
+	 * or volatile functions (since a HAVING clause is supposed to be executed
+	 * only once per group).  Also, it may be that the clause is so expensive
+	 * to execute that we're better off doing it only once per group, despite
+	 * the loss of selectivity.  This is hard to estimate short of doing the
+	 * entire planning process twice, so we use a heuristic: clauses
+	 * containing subplans are left in HAVING.  Otherwise, we move or copy
+	 * the HAVING clause into WHERE, in hopes of eliminating tuples before
+	 * aggregation instead of after.
+	 *
+	 * If the query has explicit grouping then we can simply move such a
+	 * clause into WHERE; any group that fails the clause will not be
+	 * in the output because none of its tuples will reach the grouping
+	 * or aggregation stage.  Otherwise we must have a degenerate
+	 * (variable-free) HAVING clause, which we put in WHERE so that
+	 * query_planner() can use it in a gating Result node, but also keep
+	 * in HAVING to ensure that we don't emit a bogus aggregated row.
+	 * (This could be done better, but it seems not worth optimizing.)
 	 *
 	 * Note that both havingQual and parse->jointree->quals are in
 	 * implicitly-ANDed-list form at this point, even though they are
@@ -288,11 +303,27 @@ subquery_planner(Query *parse, double tuple_fraction)
 	{
 		Node	   *havingclause = (Node *) lfirst(l);

-		if (contain_agg_clause(havingclause))
+		if (contain_agg_clause(havingclause) ||
+			contain_volatile_functions(havingclause) ||
+			contain_subplans(havingclause))
+		{
+			/* keep it in HAVING */
 			newHaving = lappend(newHaving, havingclause);
-		else
+		}
+		else if (parse->groupClause)
+		{
+			/* move it to WHERE */
 			parse->jointree->quals = (Node *)
 				lappend((List *) parse->jointree->quals, havingclause);
+		}
+		else
+		{
+			/* put a copy in WHERE, keep it in HAVING */
+			parse->jointree->quals = (Node *)
+				lappend((List *) parse->jointree->quals,
+						copyObject(havingclause));
+			newHaving = lappend(newHaving, havingclause);
+		}
 	}
 	parse->havingQual = (Node *) newHaving;

@@ -1195,7 +1226,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 		 * Insert AGG or GROUP node if needed, plus an explicit sort step
 		 * if necessary.
 		 *
-		 * HAVING clause, if any, becomes qual of the Agg node
+		 * HAVING clause, if any, becomes qual of the Agg or Group node.
 		 */
 		if (use_hashed_grouping)
 		{
@@ -1252,42 +1283,50 @@ grouping_planner(Query *parse, double tuple_fraction)
 											agg_counts.numAggs,
 											result_plan);
 		}
-		else
+		else if (parse->groupClause)
 		{
 			/*
-			 * If there are no Aggs, we shouldn't have any HAVING qual
-			 * anymore
-			 */
-			Assert(parse->havingQual == NULL);
-
-			/*
-			 * If we have a GROUP BY clause, insert a group node (plus the
+			 * GROUP BY without aggregation, so insert a group node (plus the
 			 * appropriate sort node, if necessary).
+			 *
+			 * Add an explicit sort if we couldn't make the path come
+			 * out the way the GROUP node needs it.
 			 */
-			if (parse->groupClause)
+			if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
 			{
-				/*
-				 * Add an explicit sort if we couldn't make the path come
-				 * out the way the GROUP node needs it.
-				 */
-				if (!pathkeys_contained_in(group_pathkeys, current_pathkeys))
-				{
-					result_plan = (Plan *)
-						make_sort_from_groupcols(parse,
-												 parse->groupClause,
-												 groupColIdx,
-												 result_plan);
-					current_pathkeys = group_pathkeys;
-				}
-
-				result_plan = (Plan *) make_group(parse,
-												  tlist,
-												  numGroupCols,
-												  groupColIdx,
-												  dNumGroups,
-												  result_plan);
-				/* The Group node won't change sort ordering */
+				result_plan = (Plan *)
+					make_sort_from_groupcols(parse,
+											 parse->groupClause,
+											 groupColIdx,
+											 result_plan);
+				current_pathkeys = group_pathkeys;
 			}
+
+			result_plan = (Plan *) make_group(parse,
+											  tlist,
+											  (List *) parse->havingQual,
+											  numGroupCols,
+											  groupColIdx,
+											  dNumGroups,
+											  result_plan);
+			/* The Group node won't change sort ordering */
+		}
+		else if (parse->hasHavingQual)
+		{
+			/*
+			 * No aggregates, and no GROUP BY, but we have a HAVING qual.
+			 * This is a degenerate case in which we are supposed to emit
+			 * either 0 or 1 row depending on whether HAVING succeeds.
+			 * Furthermore, there cannot be any variables in either HAVING
+			 * or the targetlist, so we actually do not need the FROM table
+			 * at all!  We can just throw away the plan-so-far and generate
+			 * a Result node.  This is a sufficiently unusual corner case
+			 * that it's not worth contorting the structure of this routine
+			 * to avoid having to generate the plan in the first place.
+			 */
+			result_plan = (Plan *) make_result(tlist,
+											   parse->havingQual,
+											   NULL);
 		}
 	}							/* end of if (setOperations) */

@@ -1320,7 +1359,7 @@ grouping_planner(Query *parse, double tuple_fraction)
 		 * it's reasonable to assume the UNIQUE filter has effects
 		 * comparable to GROUP BY.
 		 */
-		if (!parse->groupClause && !parse->hasAggs)
+		if (!parse->groupClause && !parse->hasHavingQual && !parse->hasAggs)
 		{
 			List	   *distinctExprs;

@@ -1384,19 +1423,18 @@ hash_safe_grouping(Query *parse)
 * make_subplanTargetList
 *	  Generate appropriate target list when grouping is required.
 *
- * When grouping_planner inserts Aggregate or Group plan nodes above
- * the result of query_planner, we typically want to pass a different
+ * When grouping_planner inserts Aggregate, Group, or Result plan nodes
+ * above the result of query_planner, we typically want to pass a different
 * target list to query_planner than the outer plan nodes should have.
 * This routine generates the correct target list for the subplan.
 *
 * The initial target list passed from the parser already contains entries
 * for all ORDER BY and GROUP BY expressions, but it will not have entries
 * for variables used only in HAVING clauses; so we need to add those
- * variables to the subplan target list.  Also, if we are doing either
- * grouping or aggregation, we flatten all expressions except GROUP BY items
- * into their component variables; the other expressions will be computed by
- * the inserted nodes rather than by the subplan.  For example,
- * given a query like
+ * variables to the subplan target list.  Also, we flatten all expressions
+ * except GROUP BY items into their component variables; the other expressions
+ * will be computed by the inserted nodes rather than by the subplan.
+ * For example, given a query like
 *		SELECT a+b,SUM(c+d) FROM table GROUP BY a+b;
 * we want to pass this targetlist to the subplan:
 *		a,b,c,d,a+b
@@ -1436,10 +1474,10 @@ make_subplanTargetList(Query *parse,
 	*groupColIdx = NULL;

 	/*
-	 * If we're not grouping or aggregating, nothing to do here;
+	 * If we're not grouping or aggregating, there's nothing to do here;
 	 * query_planner should receive the unmodified target list.
 	 */
-	if (!parse->hasAggs && !parse->groupClause)
+	if (!parse->hasAggs && !parse->groupClause && !parse->hasHavingQual)
 	{
 		*need_tlist_eval = true;
 		return tlist;
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.111 2004/12/31 22:00:23 pgsql Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.112 2005/03/10 23:21:22 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -797,6 +797,15 @@ is_distinct_query(Query *query)
 		if (!gl)				/* got to the end? */
 			return true;
 	}
+	else
+	{
+		/*
+		 * If we have no GROUP BY, but do have aggregates or HAVING, then
+		 * the result is at most one row so it's surely unique.
+		 */
+		if (query->hasAggs || query->havingQual)
+			return true;
+	}

 	/*
 	 * XXX Are there any other cases in which we can easily see the result