Pathify RHS unique-ification for semijoin planning

There are two implementation techniques for semijoins: one uses the JOIN_SEMI jointype, where the executor emits at most one matching row per left-hand side (LHS) row; the other unique-ifies the right-hand side (RHS) and then performs a plain inner join. The latter technique currently has some drawbacks related to the unique-ification step. * Only the cheapest-total path of the RHS is considered during unique-ification. This may cause us to miss some optimization opportunities; for example, a path with a better sort order might be overlooked simply because it is not the cheapest in total cost. Such a path could help avoid a sort at a higher level, potentially resulting in a cheaper overall plan. * We currently rely on heuristics to choose between hash-based and sort-based unique-ification. A better approach would be to generate paths for both methods and allow add_path() to decide which one is preferable, consistent with how path selection is handled elsewhere in the planner. * In the sort-based implementation, we currently pay no attention to the pathkeys of the input subpath or the resulting output. This can result in redundant sort nodes being added to the final plan. This patch improves semijoin planning by creating a new RelOptInfo for the RHS rel to represent its unique-ified version. It then generates multiple paths that represent elimination of distinct rows from the RHS, considering both a hash-based implementation using the cheapest total path of the original RHS rel, and sort-based implementations that either exploit presorted input paths or explicitly sort the cheapest total path. All resulting paths compete in add_path(), and those deemed worthy of consideration are added to the new RelOptInfo. Finally, the unique-ified rel is joined with the other side of the semijoin using a plain inner join. As a side effect, most of the code related to the JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER jointypes -- used to indicate that the LHS or RHS path should be made unique -- has been removed. Besides, the T_Unique path now has the same meaning for both semijoins and upper DISTINCT clauses: it represents adjacent-duplicate removal on presorted input. This patch unifies their handling by sharing the same data structures and functions. This patch also removes the UNIQUE_PATH_NOOP related code along the way, as it is dead code -- if the RHS rel is provably unique, the semijoin should have already been simplified to a plain inner join by analyzejoins.c. Author: Richard Guo <guofenglinux@gmail.com> Reviewed-by: Alexandra Wang <alexandra.wang.oss@gmail.com> Reviewed-by: wenhui qiu <qiuwenhuifx@gmail.com> Discussion: https://postgr.es/m/CAMbWs4-EBnaRvEs7frTLbsXiweSTUXifsteF-d3rvv01FKO86w@mail.gmail.com
2025-11-19 13:42:17 +03:00 · 2025-08-19 09:35:40 +09:00
parent 3c07944d04
commit 24225ad9aa
18 changed files with 1074 additions and 971 deletions
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -46,7 +46,6 @@ typedef enum
 */
 #define STD_FUZZ_FACTOR 1.01

-static List *translate_sub_tlist(List *tlist, int relid);
 static int	append_total_cost_compare(const ListCell *a, const ListCell *b);
 static int	append_startup_cost_compare(const ListCell *a, const ListCell *b);
 static List *reparameterize_pathlist_by_child(PlannerInfo *root,
@@ -381,7 +380,6 @@ set_cheapest(RelOptInfo *parent_rel)

 	parent_rel->cheapest_startup_path = cheapest_startup_path;
 	parent_rel->cheapest_total_path = cheapest_total_path;
-	parent_rel->cheapest_unique_path = NULL;	/* computed only if needed */
 	parent_rel->cheapest_parameterized_paths = parameterized_paths;
 }

@@ -1740,246 +1738,6 @@ create_memoize_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	return pathnode;
 }

-/*
- * create_unique_path
- *	  Creates a path representing elimination of distinct rows from the
- *	  input data.  Distinct-ness is defined according to the needs of the
- *	  semijoin represented by sjinfo.  If it is not possible to identify
- *	  how to make the data unique, NULL is returned.
- *
- * If used at all, this is likely to be called repeatedly on the same rel;
- * and the input subpath should always be the same (the cheapest_total path
- * for the rel).  So we cache the result.
- */
-UniquePath *
-create_unique_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
-				   SpecialJoinInfo *sjinfo)
-{
-	UniquePath *pathnode;
-	Path		sort_path;		/* dummy for result of cost_sort */
-	Path		agg_path;		/* dummy for result of cost_agg */
-	MemoryContext oldcontext;
-	int			numCols;
-
-	/* Caller made a mistake if subpath isn't cheapest_total ... */
-	Assert(subpath == rel->cheapest_total_path);
-	Assert(subpath->parent == rel);
-	/* ... or if SpecialJoinInfo is the wrong one */
-	Assert(sjinfo->jointype == JOIN_SEMI);
-	Assert(bms_equal(rel->relids, sjinfo->syn_righthand));
-
-	/* If result already cached, return it */
-	if (rel->cheapest_unique_path)
-		return (UniquePath *) rel->cheapest_unique_path;
-
-	/* If it's not possible to unique-ify, return NULL */
-	if (!(sjinfo->semi_can_btree || sjinfo->semi_can_hash))
-		return NULL;
-
-	/*
-	 * When called during GEQO join planning, we are in a short-lived memory
-	 * context.  We must make sure that the path and any subsidiary data
-	 * structures created for a baserel survive the GEQO cycle, else the
-	 * baserel is trashed for future GEQO cycles.  On the other hand, when we
-	 * are creating those for a joinrel during GEQO, we don't want them to
-	 * clutter the main planning context.  Upshot is that the best solution is
-	 * to explicitly allocate memory in the same context the given RelOptInfo
-	 * is in.
-	 */
-	oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
-
-	pathnode = makeNode(UniquePath);
-
-	pathnode->path.pathtype = T_Unique;
-	pathnode->path.parent = rel;
-	pathnode->path.pathtarget = rel->reltarget;
-	pathnode->path.param_info = subpath->param_info;
-	pathnode->path.parallel_aware = false;
-	pathnode->path.parallel_safe = rel->consider_parallel &&
-		subpath->parallel_safe;
-	pathnode->path.parallel_workers = subpath->parallel_workers;
-
-	/*
-	 * Assume the output is unsorted, since we don't necessarily have pathkeys
-	 * to represent it.  (This might get overridden below.)
-	 */
-	pathnode->path.pathkeys = NIL;
-
-	pathnode->subpath = subpath;
-
-	/*
-	 * Under GEQO and when planning child joins, the sjinfo might be
-	 * short-lived, so we'd better make copies of data structures we extract
-	 * from it.
-	 */
-	pathnode->in_operators = copyObject(sjinfo->semi_operators);
-	pathnode->uniq_exprs = copyObject(sjinfo->semi_rhs_exprs);
-
-	/*
-	 * If the input is a relation and it has a unique index that proves the
-	 * semi_rhs_exprs are unique, then we don't need to do anything.  Note
-	 * that relation_has_unique_index_for automatically considers restriction
-	 * clauses for the rel, as well.
-	 */
-	if (rel->rtekind == RTE_RELATION && sjinfo->semi_can_btree &&
-		relation_has_unique_index_for(root, rel, NIL,
-									  sjinfo->semi_rhs_exprs,
-									  sjinfo->semi_operators))
-	{
-		pathnode->umethod = UNIQUE_PATH_NOOP;
-		pathnode->path.rows = rel->rows;
-		pathnode->path.disabled_nodes = subpath->disabled_nodes;
-		pathnode->path.startup_cost = subpath->startup_cost;
-		pathnode->path.total_cost = subpath->total_cost;
-		pathnode->path.pathkeys = subpath->pathkeys;
-
-		rel->cheapest_unique_path = (Path *) pathnode;
-
-		MemoryContextSwitchTo(oldcontext);
-
-		return pathnode;
-	}
-
-	/*
-	 * If the input is a subquery whose output must be unique already, then we
-	 * don't need to do anything.  The test for uniqueness has to consider
-	 * exactly which columns we are extracting; for example "SELECT DISTINCT
-	 * x,y" doesn't guarantee that x alone is distinct. So we cannot check for
-	 * this optimization unless semi_rhs_exprs consists only of simple Vars
-	 * referencing subquery outputs.  (Possibly we could do something with
-	 * expressions in the subquery outputs, too, but for now keep it simple.)
-	 */
-	if (rel->rtekind == RTE_SUBQUERY)
-	{
-		RangeTblEntry *rte = planner_rt_fetch(rel->relid, root);
-
-		if (query_supports_distinctness(rte->subquery))
-		{
-			List	   *sub_tlist_colnos;
-
-			sub_tlist_colnos = translate_sub_tlist(sjinfo->semi_rhs_exprs,
-												   rel->relid);
-
-			if (sub_tlist_colnos &&
-				query_is_distinct_for(rte->subquery,
-									  sub_tlist_colnos,
-									  sjinfo->semi_operators))
-			{
-				pathnode->umethod = UNIQUE_PATH_NOOP;
-				pathnode->path.rows = rel->rows;
-				pathnode->path.disabled_nodes = subpath->disabled_nodes;
-				pathnode->path.startup_cost = subpath->startup_cost;
-				pathnode->path.total_cost = subpath->total_cost;
-				pathnode->path.pathkeys = subpath->pathkeys;
-
-				rel->cheapest_unique_path = (Path *) pathnode;
-
-				MemoryContextSwitchTo(oldcontext);
-
-				return pathnode;
-			}
-		}
-	}
-
-	/* Estimate number of output rows */
-	pathnode->path.rows = estimate_num_groups(root,
-											  sjinfo->semi_rhs_exprs,
-											  rel->rows,
-											  NULL,
-											  NULL);
-	numCols = list_length(sjinfo->semi_rhs_exprs);
-
-	if (sjinfo->semi_can_btree)
-	{
-		/*
-		 * Estimate cost for sort+unique implementation
-		 */
-		cost_sort(&sort_path, root, NIL,
-				  subpath->disabled_nodes,
-				  subpath->total_cost,
-				  rel->rows,
-				  subpath->pathtarget->width,
-				  0.0,
-				  work_mem,
-				  -1.0);
-
-		/*
-		 * Charge one cpu_operator_cost per comparison per input tuple. We
-		 * assume all columns get compared at most of the tuples. (XXX
-		 * probably this is an overestimate.)  This should agree with
-		 * create_upper_unique_path.
-		 */
-		sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
-	}
-
-	if (sjinfo->semi_can_hash)
-	{
-		/*
-		 * Estimate the overhead per hashtable entry at 64 bytes (same as in
-		 * planner.c).
-		 */
-		int			hashentrysize = subpath->pathtarget->width + 64;
-
-		if (hashentrysize * pathnode->path.rows > get_hash_memory_limit())
-		{
-			/*
-			 * We should not try to hash.  Hack the SpecialJoinInfo to
-			 * remember this, in case we come through here again.
-			 */
-			sjinfo->semi_can_hash = false;
-		}
-		else
-			cost_agg(&agg_path, root,
-					 AGG_HASHED, NULL,
-					 numCols, pathnode->path.rows,
-					 NIL,
-					 subpath->disabled_nodes,
-					 subpath->startup_cost,
-					 subpath->total_cost,
-					 rel->rows,
-					 subpath->pathtarget->width);
-	}
-
-	if (sjinfo->semi_can_btree && sjinfo->semi_can_hash)
-	{
-		if (agg_path.disabled_nodes < sort_path.disabled_nodes ||
-			(agg_path.disabled_nodes == sort_path.disabled_nodes &&
-			 agg_path.total_cost < sort_path.total_cost))
-			pathnode->umethod = UNIQUE_PATH_HASH;
-		else
-			pathnode->umethod = UNIQUE_PATH_SORT;
-	}
-	else if (sjinfo->semi_can_btree)
-		pathnode->umethod = UNIQUE_PATH_SORT;
-	else if (sjinfo->semi_can_hash)
-		pathnode->umethod = UNIQUE_PATH_HASH;
-	else
-	{
-		/* we can get here only if we abandoned hashing above */
-		MemoryContextSwitchTo(oldcontext);
-		return NULL;
-	}
-
-	if (pathnode->umethod == UNIQUE_PATH_HASH)
-	{
-		pathnode->path.disabled_nodes = agg_path.disabled_nodes;
-		pathnode->path.startup_cost = agg_path.startup_cost;
-		pathnode->path.total_cost = agg_path.total_cost;
-	}
-	else
-	{
-		pathnode->path.disabled_nodes = sort_path.disabled_nodes;
-		pathnode->path.startup_cost = sort_path.startup_cost;
-		pathnode->path.total_cost = sort_path.total_cost;
-	}
-
-	rel->cheapest_unique_path = (Path *) pathnode;
-
-	MemoryContextSwitchTo(oldcontext);
-
-	return pathnode;
-}
-
 /*
 * create_gather_merge_path
 *
@@ -2031,36 +1789,6 @@ create_gather_merge_path(PlannerInfo *root, RelOptInfo *rel, Path *subpath,
 	return pathnode;
 }

-/*
- * translate_sub_tlist - get subquery column numbers represented by tlist
- *
- * The given targetlist usually contains only Vars referencing the given relid.
- * Extract their varattnos (ie, the column numbers of the subquery) and return
- * as an integer List.
- *
- * If any of the tlist items is not a simple Var, we cannot determine whether
- * the subquery's uniqueness condition (if any) matches ours, so punt and
- * return NIL.
- */
-static List *
-translate_sub_tlist(List *tlist, int relid)
-{
-	List	   *result = NIL;
-	ListCell   *l;
-
-	foreach(l, tlist)
-	{
-		Var		   *var = (Var *) lfirst(l);
-
-		if (!var || !IsA(var, Var) ||
-			var->varno != relid)
-			return NIL;			/* punt */
-
-		result = lappend_int(result, var->varattno);
-	}
-	return result;
-}
-
 /*
 * create_gather_path
 *	  Creates a path corresponding to a gather scan, returning the
@@ -2818,8 +2546,7 @@ create_projection_path(PlannerInfo *root,
 	pathnode->path.pathtype = T_Result;
 	pathnode->path.parent = rel;
 	pathnode->path.pathtarget = target;
-	/* For now, assume we are above any joins, so no parameterization */
-	pathnode->path.param_info = NULL;
+	pathnode->path.param_info = subpath->param_info;
 	pathnode->path.parallel_aware = false;
 	pathnode->path.parallel_safe = rel->consider_parallel &&
 		subpath->parallel_safe &&
@@ -3074,8 +2801,7 @@ create_incremental_sort_path(PlannerInfo *root,
 	pathnode->path.parent = rel;
 	/* Sort doesn't project, so use source path's pathtarget */
 	pathnode->path.pathtarget = subpath->pathtarget;
-	/* For now, assume we are above any joins, so no parameterization */
-	pathnode->path.param_info = NULL;
+	pathnode->path.param_info = subpath->param_info;
 	pathnode->path.parallel_aware = false;
 	pathnode->path.parallel_safe = rel->consider_parallel &&
 		subpath->parallel_safe;
@@ -3122,8 +2848,7 @@ create_sort_path(PlannerInfo *root,
 	pathnode->path.parent = rel;
 	/* Sort doesn't project, so use source path's pathtarget */
 	pathnode->path.pathtarget = subpath->pathtarget;
-	/* For now, assume we are above any joins, so no parameterization */
-	pathnode->path.param_info = NULL;
+	pathnode->path.param_info = subpath->param_info;
 	pathnode->path.parallel_aware = false;
 	pathnode->path.parallel_safe = rel->consider_parallel &&
 		subpath->parallel_safe;
@@ -3199,13 +2924,10 @@ create_group_path(PlannerInfo *root,
 }

 /*
- * create_upper_unique_path
+ * create_unique_path
 *	  Creates a pathnode that represents performing an explicit Unique step
 *	  on presorted input.
 *
- * This produces a Unique plan node, but the use-case is so different from
- * create_unique_path that it doesn't seem worth trying to merge the two.
- *
 * 'rel' is the parent relation associated with the result
 * 'subpath' is the path representing the source of data
 * 'numCols' is the number of grouping columns
@@ -3214,21 +2936,20 @@ create_group_path(PlannerInfo *root,
 * The input path must be sorted on the grouping columns, plus possibly
 * additional columns; so the first numCols pathkeys are the grouping columns
 */
-UpperUniquePath *
-create_upper_unique_path(PlannerInfo *root,
-						 RelOptInfo *rel,
-						 Path *subpath,
-						 int numCols,
-						 double numGroups)
+UniquePath *
+create_unique_path(PlannerInfo *root,
+				   RelOptInfo *rel,
+				   Path *subpath,
+				   int numCols,
+				   double numGroups)
 {
-	UpperUniquePath *pathnode = makeNode(UpperUniquePath);
+	UniquePath *pathnode = makeNode(UniquePath);

 	pathnode->path.pathtype = T_Unique;
 	pathnode->path.parent = rel;
 	/* Unique doesn't project, so use source path's pathtarget */
 	pathnode->path.pathtarget = subpath->pathtarget;
-	/* For now, assume we are above any joins, so no parameterization */
-	pathnode->path.param_info = NULL;
+	pathnode->path.param_info = subpath->param_info;
 	pathnode->path.parallel_aware = false;
 	pathnode->path.parallel_safe = rel->consider_parallel &&
 		subpath->parallel_safe;
@@ -3284,8 +3005,7 @@ create_agg_path(PlannerInfo *root,
 	pathnode->path.pathtype = T_Agg;
 	pathnode->path.parent = rel;
 	pathnode->path.pathtarget = target;
-	/* For now, assume we are above any joins, so no parameterization */
-	pathnode->path.param_info = NULL;
+	pathnode->path.param_info = subpath->param_info;
 	pathnode->path.parallel_aware = false;
 	pathnode->path.parallel_safe = rel->consider_parallel &&
 		subpath->parallel_safe;
--- a/src/backend/optimizer/util/relnode.c
+++ b/src/backend/optimizer/util/relnode.c
@@ -217,7 +217,6 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
 	rel->partial_pathlist = NIL;
 	rel->cheapest_startup_path = NULL;
 	rel->cheapest_total_path = NULL;
-	rel->cheapest_unique_path = NULL;
 	rel->cheapest_parameterized_paths = NIL;
 	rel->relid = relid;
 	rel->rtekind = rte->rtekind;
@@ -269,6 +268,9 @@ build_simple_rel(PlannerInfo *root, int relid, RelOptInfo *parent)
 	rel->fdw_private = NULL;
 	rel->unique_for_rels = NIL;
 	rel->non_unique_for_rels = NIL;
+	rel->unique_rel = NULL;
+	rel->unique_pathkeys = NIL;
+	rel->unique_groupclause = NIL;
 	rel->baserestrictinfo = NIL;
 	rel->baserestrictcost.startup = 0;
 	rel->baserestrictcost.per_tuple = 0;
@@ -713,7 +715,6 @@ build_join_rel(PlannerInfo *root,
 	joinrel->partial_pathlist = NIL;
 	joinrel->cheapest_startup_path = NULL;
 	joinrel->cheapest_total_path = NULL;
-	joinrel->cheapest_unique_path = NULL;
 	joinrel->cheapest_parameterized_paths = NIL;
 	/* init direct_lateral_relids from children; we'll finish it up below */
 	joinrel->direct_lateral_relids =
@@ -748,6 +749,9 @@ build_join_rel(PlannerInfo *root,
 	joinrel->fdw_private = NULL;
 	joinrel->unique_for_rels = NIL;
 	joinrel->non_unique_for_rels = NIL;
+	joinrel->unique_rel = NULL;
+	joinrel->unique_pathkeys = NIL;
+	joinrel->unique_groupclause = NIL;
 	joinrel->baserestrictinfo = NIL;
 	joinrel->baserestrictcost.startup = 0;
 	joinrel->baserestrictcost.per_tuple = 0;
@@ -906,7 +910,6 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
 	joinrel->partial_pathlist = NIL;
 	joinrel->cheapest_startup_path = NULL;
 	joinrel->cheapest_total_path = NULL;
-	joinrel->cheapest_unique_path = NULL;
 	joinrel->cheapest_parameterized_paths = NIL;
 	joinrel->direct_lateral_relids = NULL;
 	joinrel->lateral_relids = NULL;
@@ -933,6 +936,9 @@ build_child_join_rel(PlannerInfo *root, RelOptInfo *outer_rel,
 	joinrel->useridiscurrent = false;
 	joinrel->fdwroutine = NULL;
 	joinrel->fdw_private = NULL;
+	joinrel->unique_rel = NULL;
+	joinrel->unique_pathkeys = NIL;
+	joinrel->unique_groupclause = NIL;
 	joinrel->baserestrictinfo = NIL;
 	joinrel->baserestrictcost.startup = 0;
 	joinrel->baserestrictcost.per_tuple = 0;
@@ -1488,7 +1494,6 @@ fetch_upper_rel(PlannerInfo *root, UpperRelationKind kind, Relids relids)
 	upperrel->pathlist = NIL;
 	upperrel->cheapest_startup_path = NULL;
 	upperrel->cheapest_total_path = NULL;
-	upperrel->cheapest_unique_path = NULL;
 	upperrel->cheapest_parameterized_paths = NIL;

 	root->upper_rels[kind] = lappend(root->upper_rels[kind], upperrel);