Improve UniquePath logic to detect the case where the input is already

known unique (eg, it is a SELECT DISTINCT ... subquery), and not do a redundant unique-ification step.
2025-07-28 23:42:10 +03:00 · 2004-01-05 18:04:39 +00:00
parent cce442da6d
commit 5c74ce23db
6 changed files with 72 additions and 15 deletions
--- a/src/backend/nodes/outfuncs.c
+++ b/src/backend/nodes/outfuncs.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.226 2004/01/05 05:07:35 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.227 2004/01/05 18:04:38 tgl Exp $
 *
 * NOTES
 *	  Every node type that can appear in stored rules' parsetrees *must*
@ -1023,7 +1023,7 @@ _outUniquePath(StringInfo str, UniquePath *node)
 	_outPathInfo(str, (Path *) node);
 	WRITE_NODE_FIELD(subpath);
-	WRITE_BOOL_FIELD(use_hash);
+	WRITE_ENUM_FIELD(umethod, UniquePathMethod);
 	WRITE_FLOAT_FIELD(rows, "%.0f");
 }
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@ -10,7 +10,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.162 2004/01/05 05:07:35 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.163 2004/01/05 18:04:38 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -605,10 +605,14 @@ create_unique_plan(Query *root, UniquePath *best_path)
 			subplan->targetlist = newtlist;
 	}
 	/* Done if we don't need to do any actual unique-ifying */
 	if (best_path->umethod == UNIQUE_PATH_NOOP)
 		return subplan;
 	/* Copy tlist again to make one we can put sorting labels on */
 	my_tlist = copyObject(subplan->targetlist);
-	if (best_path->use_hash)
+	if (best_path->umethod == UNIQUE_PATH_HASH)
 	{
 		long		numGroups;
--- a/src/backend/optimizer/util/clauses.c
+++ b/src/backend/optimizer/util/clauses.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.159 2004/01/04 03:51:52 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.160 2004/01/05 18:04:39 tgl Exp $
 *
 * HISTORY
 *	  AUTHOR			DATE			MAJOR EVENT
@ -921,6 +921,21 @@ has_distinct_on_clause(Query *query)
 	return false;
 }
 /*
 * Test whether a query uses simple DISTINCT, ie, has a distinct-list that
 * is the same as the set of output columns.
 */
 bool
 has_distinct_clause(Query *query)
 {
 	/* Is there a DISTINCT clause at all? */
 	if (query->distinctClause == NIL)
 		return false;
 	/* It's DISTINCT if it's not DISTINCT ON */
 	return !has_distinct_on_clause(query);
 }
 /*****************************************************************************
 *																			 *
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.97 2004/01/05 05:07:35 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.98 2004/01/05 18:04:39 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -20,12 +20,14 @@
 #include "executor/executor.h"
 #include "miscadmin.h"
 #include "nodes/plannodes.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
 #include "optimizer/pathnode.h"
 #include "optimizer/paths.h"
 #include "optimizer/restrictinfo.h"
 #include "parser/parse_expr.h"
 #include "parser/parse_oper.h"
 #include "parser/parsetree.h"
 #include "utils/memutils.h"
 #include "utils/selfuncs.h"
 #include "utils/syscache.h"
@ -546,6 +548,30 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
 	pathnode->subpath = subpath;
 	/*
 	 * If the input is a subquery that uses DISTINCT, we don't need to do
 	 * anything; its output is already unique.  (Are there any other cases
 	 * in which we can easily prove the input must be distinct?)
 	 */
 	if (rel->rtekind == RTE_SUBQUERY)
 	{
 		RangeTblEntry *rte = rt_fetch(rel->relid, root->rtable);
 		Query	   *subquery = rte->subquery;
 		if (has_distinct_clause(subquery))
 		{
 			pathnode->umethod = UNIQUE_PATH_NOOP;
 			pathnode->rows = rel->rows;
 			pathnode->path.startup_cost = subpath->startup_cost;
 			pathnode->path.total_cost = subpath->total_cost;
 			pathnode->path.pathkeys = subpath->pathkeys;
 			rel->cheapest_unique_path = (Path *) pathnode;
 			return pathnode;
 		}
 	}
 	/*
 	 * Try to identify the targetlist that will actually be unique-ified.
 	 * In current usage, this routine is only used for sub-selects of IN
@ -599,7 +625,7 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
 	 * compare costs.  We only try this if we know the targetlist for sure
 	 * (else we can't be sure about the datatypes involved).
 	 */
-	pathnode->use_hash = false;
+	pathnode->umethod = UNIQUE_PATH_SORT;
 	if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist))
 	{
 		/*
@ -617,11 +643,11 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
 					 subpath->total_cost,
 					 rel->rows);
 			if (agg_path.total_cost < sort_path.total_cost)
-				pathnode->use_hash = true;
+				pathnode->umethod = UNIQUE_PATH_HASH;
 		}
 	}
-	if (pathnode->use_hash)
+	if (pathnode->umethod == UNIQUE_PATH_HASH)
 	{
 		pathnode->path.startup_cost = agg_path.startup_cost;
 		pathnode->path.total_cost = agg_path.total_cost;
--- a/src/include/nodes/relation.h
+++ b/src/include/nodes/relation.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.91 2004/01/05 05:07:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.92 2004/01/05 18:04:39 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -442,15 +442,26 @@ typedef struct MaterialPath
 * its subpath.
 *
 * This is unlike the other Path nodes in that it can actually generate
- * two different plans: either hash-based or sort-based implementation.
+ * different plans: either hash-based or sort-based implementation, or a
- * The decision is sufficiently localized that it's not worth having two
+ * no-op if the input path can be proven distinct already.  The decision
- * separate Path node types.
+ * is sufficiently localized that it's not worth having separate Path node
 * types.  (Note: in the no-op case, we could eliminate the UniquePath node
 * entirely and just return the subpath; but it's convenient to have a
 * UniquePath in the path tree to signal upper-level routines that the input
 * is known distinct.)
 */
 typedef enum
 {
 	UNIQUE_PATH_NOOP,			/* input is known unique already */
 	UNIQUE_PATH_HASH,			/* use hashing */
 	UNIQUE_PATH_SORT			/* use sorting */
 } UniquePathMethod;
 typedef struct UniquePath
 {
 	Path		path;
 	Path	   *subpath;
-	bool		use_hash;
+	UniquePathMethod umethod;
 	double		rows;			/* estimated number of result tuples */
 } UniquePath;
--- a/src/include/optimizer/clauses.h
+++ b/src/include/optimizer/clauses.h
@ -7,7 +7,7 @@
 * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
 * Portions Copyright (c) 1994, Regents of the University of California
 *
- * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.71 2004/01/04 03:51:52 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.72 2004/01/05 18:04:39 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -57,6 +57,7 @@ extern bool is_pseudo_constant_clause(Node *clause);
 extern bool is_pseudo_constant_clause_relids(Node *clause, Relids relids);
 extern List *pull_constant_clauses(List *quals, List **constantQual);
 extern bool has_distinct_clause(Query *query);
 extern bool has_distinct_on_clause(Query *query);
 extern int	NumRelids(Node *clause);