Convert SetOp to read its inputs as outerPlan and innerPlan.

The original design for set operations involved appending the two input relations into one and adding a flag column that allows distinguishing which side each row came from. Then the SetOp node pries them apart again based on the flag. This is bizarre. The only apparent reason to do it is that when sorting, we'd only need one Sort node not two. But since sorting is at least O(N log N), sorting all the data is actually worse than sorting each side separately --- plus, we have no chance of taking advantage of presorted input. On top of that, adding the flag column frequently requires an additional projection step that adds cycles, and then the Append node isn't free either. Let's get rid of all of that and make the SetOp node have two separate children, using the existing outerPlan/innerPlan infrastructure. This initial patch re-implements nodeSetop.c and does a bare minimum of work on the planner side to generate correctly-shaped plans. In particular, I've tried not to change the cost estimates here, so that the visible changes in the regression test results will only involve removal of useless projection steps and not any changes in whether to use sorted vs hashed mode. For SORTED mode, we combine successive identical tuples from each input into groups, and then merge-join the groups. The tuple comparisons now use SortSupport instead of simple equality, but the group-formation part should involve roughly the same number of tuple comparisons as before. The cross-comparisons between left and right groups probably add to that, but I'm not sure to quantify how many more comparisons we might need. For HASHED mode, nodeSetop's logic is almost the same as before, just refactored into two separate loops instead of one loop that has an assumption that it will see all the left-hand inputs first. In both modes, I added early-exit logic to not bother reading the right-hand relation if the left-hand input is empty, since neither INTERSECT nor EXCEPT modes can produce any output if the left input is empty. This could have been done before in the hashed mode, but not in sorted mode. Sorted mode can also stop as soon as it exhausts the left input; any remaining right-hand tuples cannot have matches. Also, this patch adds some infrastructure for detecting whether child plan nodes all output the same type of tuple table slot. If they do, the hash table logic can use slightly more efficient code based on assuming that that's the input slot type it will see. We'll make use of that infrastructure in other plan node types later. Patch by me; thanks to Richard Guo and David Rowley for review. Discussion: https://postgr.es/m/1850138.1731549611@sss.pgh.pa.us
2025-11-19 13:42:17 +03:00 · 2024-12-19 16:23:45 -05:00
parent 2128cebcdb
commit 2762792952
15 changed files with 697 additions and 525 deletions
--- a/src/backend/optimizer/README
+++ b/src/backend/optimizer/README
@@ -649,7 +649,7 @@ RelOptInfo      - a relation or joined relations
  GroupingSetsPath - an Agg plan node used to implement GROUPING SETS
  MinMaxAggPath - a Result plan node with subplans performing MIN/MAX
  WindowAggPath - a WindowAgg plan node applied to some sub-path
-  SetOpPath     - a SetOp plan node applied to some sub-path
+  SetOpPath     - a SetOp plan node applied to two sub-paths
  RecursiveUnionPath - a RecursiveUnion plan node applied to two sub-paths
  LockRowsPath  - a LockRows plan node applied to some sub-path
  ModifyTablePath - a ModifyTable plan node applied to some sub-path(s)
--- a/src/backend/optimizer/plan/createplan.c
+++ b/src/backend/optimizer/plan/createplan.c
@@ -301,9 +301,9 @@ static Unique *make_unique_from_pathkeys(Plan *lefttree,
 										 List *pathkeys, int numCols);
 static Gather *make_gather(List *qptlist, List *qpqual,
 						   int nworkers, int rescan_param, bool single_copy, Plan *subplan);
-static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
-						 List *distinctList, AttrNumber flagColIdx, int firstFlag,
-						 long numGroups);
+static SetOp *make_setop(SetOpCmd cmd, SetOpStrategy strategy,
+						 List *tlist, Plan *lefttree, Plan *righttree,
+						 List *groupList, long numGroups);
 static LockRows *make_lockrows(Plan *lefttree, List *rowMarks, int epqParam);
 static Result *make_result(List *tlist, Node *resconstantqual, Plan *subplan);
 static ProjectSet *make_project_set(List *tlist, Plan *subplan);
@@ -2719,25 +2719,29 @@ static SetOp *
 create_setop_plan(PlannerInfo *root, SetOpPath *best_path, int flags)
 {
 	SetOp	   *plan;
-	Plan	   *subplan;
+	List	   *tlist = build_path_tlist(root, &best_path->path);
+	Plan	   *leftplan;
+	Plan	   *rightplan;
 	long		numGroups;

 	/*
 	 * SetOp doesn't project, so tlist requirements pass through; moreover we
 	 * need grouping columns to be labeled.
 	 */
-	subplan = create_plan_recurse(root, best_path->subpath,
-								  flags | CP_LABEL_TLIST);
+	leftplan = create_plan_recurse(root, best_path->leftpath,
+								   flags | CP_LABEL_TLIST);
+	rightplan = create_plan_recurse(root, best_path->rightpath,
+									flags | CP_LABEL_TLIST);

 	/* Convert numGroups to long int --- but 'ware overflow! */
 	numGroups = clamp_cardinality_to_long(best_path->numGroups);

 	plan = make_setop(best_path->cmd,
 					  best_path->strategy,
-					  subplan,
-					  best_path->distinctList,
-					  best_path->flagColIdx,
-					  best_path->firstFlag,
+					  tlist,
+					  leftplan,
+					  rightplan,
+					  best_path->groupList,
 					  numGroups);

 	copy_generic_path_info(&plan->plan, (Path *) best_path);
@@ -6950,57 +6954,62 @@ make_gather(List *qptlist,
 }

 /*
- * distinctList is a list of SortGroupClauses, identifying the targetlist
- * items that should be considered by the SetOp filter.  The input path must
- * already be sorted accordingly.
+ * groupList is a list of SortGroupClauses, identifying the targetlist
+ * items that should be considered by the SetOp filter.  The input plans must
+ * already be sorted accordingly, if we're doing SETOP_SORTED mode.
 */
 static SetOp *
-make_setop(SetOpCmd cmd, SetOpStrategy strategy, Plan *lefttree,
-		   List *distinctList, AttrNumber flagColIdx, int firstFlag,
-		   long numGroups)
+make_setop(SetOpCmd cmd, SetOpStrategy strategy,
+		   List *tlist, Plan *lefttree, Plan *righttree,
+		   List *groupList, long numGroups)
 {
 	SetOp	   *node = makeNode(SetOp);
 	Plan	   *plan = &node->plan;
-	int			numCols = list_length(distinctList);
+	int			numCols = list_length(groupList);
 	int			keyno = 0;
-	AttrNumber *dupColIdx;
-	Oid		   *dupOperators;
-	Oid		   *dupCollations;
+	AttrNumber *cmpColIdx;
+	Oid		   *cmpOperators;
+	Oid		   *cmpCollations;
+	bool	   *cmpNullsFirst;
 	ListCell   *slitem;

-	plan->targetlist = lefttree->targetlist;
+	plan->targetlist = tlist;
 	plan->qual = NIL;
 	plan->lefttree = lefttree;
-	plan->righttree = NULL;
+	plan->righttree = righttree;

 	/*
-	 * convert SortGroupClause list into arrays of attr indexes and equality
+	 * convert SortGroupClause list into arrays of attr indexes and comparison
 	 * operators, as wanted by executor
 	 */
-	dupColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
-	dupOperators = (Oid *) palloc(sizeof(Oid) * numCols);
-	dupCollations = (Oid *) palloc(sizeof(Oid) * numCols);
+	cmpColIdx = (AttrNumber *) palloc(sizeof(AttrNumber) * numCols);
+	cmpOperators = (Oid *) palloc(sizeof(Oid) * numCols);
+	cmpCollations = (Oid *) palloc(sizeof(Oid) * numCols);
+	cmpNullsFirst = (bool *) palloc(sizeof(bool) * numCols);

-	foreach(slitem, distinctList)
+	foreach(slitem, groupList)
 	{
 		SortGroupClause *sortcl = (SortGroupClause *) lfirst(slitem);
 		TargetEntry *tle = get_sortgroupclause_tle(sortcl, plan->targetlist);

-		dupColIdx[keyno] = tle->resno;
-		dupOperators[keyno] = sortcl->eqop;
-		dupCollations[keyno] = exprCollation((Node *) tle->expr);
-		Assert(OidIsValid(dupOperators[keyno]));
+		cmpColIdx[keyno] = tle->resno;
+		if (strategy == SETOP_HASHED)
+			cmpOperators[keyno] = sortcl->eqop;
+		else
+			cmpOperators[keyno] = sortcl->sortop;
+		Assert(OidIsValid(cmpOperators[keyno]));
+		cmpCollations[keyno] = exprCollation((Node *) tle->expr);
+		cmpNullsFirst[keyno] = sortcl->nulls_first;
 		keyno++;
 	}

 	node->cmd = cmd;
 	node->strategy = strategy;
 	node->numCols = numCols;
-	node->dupColIdx = dupColIdx;
-	node->dupOperators = dupOperators;
-	node->dupCollations = dupCollations;
-	node->flagColIdx = flagColIdx;
-	node->firstFlag = firstFlag;
+	node->cmpColIdx = cmpColIdx;
+	node->cmpOperators = cmpOperators;
+	node->cmpCollations = cmpCollations;
+	node->cmpNullsFirst = cmpNullsFirst;
 	node->numGroups = numGroups;

 	return node;
--- a/src/backend/optimizer/prep/prepunion.c
+++ b/src/backend/optimizer/prep/prepunion.c
@@ -65,7 +65,7 @@ static List *plan_union_children(PlannerInfo *root,
 								 List **istrivial_tlist);
 static void postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel);
 static bool choose_hashed_setop(PlannerInfo *root, List *groupClauses,
-								Path *input_path,
+								Path *lpath, Path *rpath,
 								double dNumGroups, double dNumOutputRows,
 								const char *construct);
 static List *generate_setop_tlist(List *colTypes, List *colCollations,
@@ -315,8 +315,8 @@ recurse_set_operations(Node *setOp, PlannerInfo *root,
 		 * to the corresponding tlist entries of the subplan. However, since
 		 * the subplan was generated by generate_union_paths() or
 		 * generate_nonunion_paths(), and hence its tlist was generated by
-		 * generate_append_tlist(), this will work.  We just tell
-		 * generate_setop_tlist() to use varno 0.
+		 * generate_append_tlist() or generate_setop_tlist(), this will work.
+		 * We just tell generate_setop_tlist() to use varno 0.
 		 */
 		if (flag >= 0 ||
 			!tlist_same_datatypes(*pTargetList, colTypes, junkOK) ||
@@ -1028,29 +1028,27 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
 			   *path;
 	List	   *lpath_tlist,
 			   *rpath_tlist,
-			   *tlist_list,
 			   *tlist,
-			   *groupList,
-			   *pathlist;
+			   *groupList;
 	bool		lpath_trivial_tlist,
-				rpath_trivial_tlist;
+				rpath_trivial_tlist,
+				result_trivial_tlist;
 	double		dLeftGroups,
 				dRightGroups,
 				dNumGroups,
 				dNumOutputRows;
 	bool		use_hash;
 	SetOpCmd	cmd;
-	int			firstFlag;

 	/*
 	 * Tell children to fetch all tuples.
 	 */
 	root->tuple_fraction = 0.0;

-	/* Recurse on children, ensuring their outputs are marked */
+	/* Recurse on children */
 	lrel = recurse_set_operations(op->larg, root,
 								  op->colTypes, op->colCollations,
-								  false, 0,
+								  false, -1,
 								  refnames_tlist,
 								  &lpath_tlist,
 								  &lpath_trivial_tlist);
@@ -1060,10 +1058,9 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
 	else
 		dLeftGroups = lrel->rows;

-	lpath = lrel->cheapest_total_path;
 	rrel = recurse_set_operations(op->rarg, root,
 								  op->colTypes, op->colCollations,
-								  false, 1,
+								  false, -1,
 								  refnames_tlist,
 								  &rpath_tlist,
 								  &rpath_trivial_tlist);
@@ -1073,41 +1070,51 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
 	else
 		dRightGroups = rrel->rows;

-	rpath = rrel->cheapest_total_path;
-
 	/* Undo effects of forcing tuple_fraction to 0 */
 	root->tuple_fraction = save_fraction;

 	/*
 	 * For EXCEPT, we must put the left input first.  For INTERSECT, either
 	 * order should give the same results, and we prefer to put the smaller
-	 * input first in order to minimize the size of the hash table in the
-	 * hashing case.  "Smaller" means the one with the fewer groups.
+	 * input first in order to (a) minimize the size of the hash table in the
+	 * hashing case, and (b) improve our chances of exploiting the executor's
+	 * fast path for empty left-hand input.  "Smaller" means the one with the
+	 * fewer groups.
 	 */
-	if (op->op == SETOP_EXCEPT || dLeftGroups <= dRightGroups)
+	if (op->op != SETOP_EXCEPT && dLeftGroups > dRightGroups)
 	{
-		pathlist = list_make2(lpath, rpath);
-		tlist_list = list_make2(lpath_tlist, rpath_tlist);
-		firstFlag = 0;
-	}
-	else
-	{
-		pathlist = list_make2(rpath, lpath);
-		tlist_list = list_make2(rpath_tlist, lpath_tlist);
-		firstFlag = 1;
+		/* need to swap the two inputs */
+		RelOptInfo *tmprel;
+		List	   *tmplist;
+		double		tmpd;
+
+		tmprel = lrel;
+		lrel = rrel;
+		rrel = tmprel;
+		tmplist = lpath_tlist;
+		lpath_tlist = rpath_tlist;
+		rpath_tlist = tmplist;
+		tmpd = dLeftGroups;
+		dLeftGroups = dRightGroups;
+		dRightGroups = tmpd;
 	}

+	lpath = lrel->cheapest_total_path;
+	rpath = rrel->cheapest_total_path;
+
 	/*
-	 * Generate tlist for Append plan node.
+	 * Generate tlist for SetOp plan node.
 	 *
-	 * The tlist for an Append plan isn't important as far as the Append is
+	 * The tlist for a SetOp plan isn't important so far as the SetOp is
 	 * concerned, but we must make it look real anyway for the benefit of the
-	 * next plan level up.  In fact, it has to be real enough that the flag
-	 * column is shown as a variable not a constant, else setrefs.c will get
-	 * confused.
+	 * next plan level up.
 	 */
-	tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
-								  tlist_list, refnames_tlist);
+	tlist = generate_setop_tlist(op->colTypes, op->colCollations, -1,
+								 0, false, lpath_tlist, refnames_tlist,
+								 &result_trivial_tlist);
+
+	/* We should not have needed any type coercions in the tlist */
+	Assert(result_trivial_tlist);

 	*pTargetList = tlist;

@@ -1116,12 +1123,6 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
 								 bms_union(lrel->relids, rrel->relids));
 	result_rel->reltarget = create_pathtarget(root, tlist);

-	/*
-	 * Append the child results together.
-	 */
-	path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
-									   NIL, NULL, 0, false, -1);
-
 	/* Identify the grouping semantics */
 	groupList = generate_setop_grouplist(op, tlist);

@@ -1140,25 +1141,40 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
 	}
 	else
 	{
-		dNumGroups = Min(dLeftGroups, dRightGroups);
+		dNumGroups = dLeftGroups;
 		dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
 	}

 	/*
-	 * Decide whether to hash or sort, and add a sort node if needed.
+	 * Decide whether to hash or sort, and add sort nodes if needed.
 	 */
-	use_hash = choose_hashed_setop(root, groupList, path,
+	use_hash = choose_hashed_setop(root, groupList, lpath, rpath,
 								   dNumGroups, dNumOutputRows,
 								   (op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");

 	if (groupList && !use_hash)
-		path = (Path *) create_sort_path(root,
-										 result_rel,
-										 path,
-										 make_pathkeys_for_sortclauses(root,
-																	   groupList,
-																	   tlist),
-										 -1.0);
+	{
+		List	   *pathkeys;
+
+		pathkeys = make_pathkeys_for_sortclauses(root,
+												 groupList,
+												 lpath_tlist);
+		if (!pathkeys_contained_in(pathkeys, lpath->pathkeys))
+			lpath = (Path *) create_sort_path(root,
+											  lpath->parent,
+											  lpath,
+											  pathkeys,
+											  -1.0);
+		pathkeys = make_pathkeys_for_sortclauses(root,
+												 groupList,
+												 rpath_tlist);
+		if (!pathkeys_contained_in(pathkeys, rpath->pathkeys))
+			rpath = (Path *) create_sort_path(root,
+											  rpath->parent,
+											  rpath,
+											  pathkeys,
+											  -1.0);
+	}

 	/*
 	 * Finally, add a SetOp path node to generate the correct output.
@@ -1178,12 +1194,11 @@ generate_nonunion_paths(SetOperationStmt *op, PlannerInfo *root,
 	}
 	path = (Path *) create_setop_path(root,
 									  result_rel,
-									  path,
+									  lpath,
+									  rpath,
 									  cmd,
 									  use_hash ? SETOP_HASHED : SETOP_SORTED,
 									  groupList,
-									  list_length(op->colTypes) + 1,
-									  use_hash ? firstFlag : -1,
 									  dNumGroups,
 									  dNumOutputRows);

@@ -1285,10 +1300,13 @@ postprocess_setop_rel(PlannerInfo *root, RelOptInfo *rel)

 /*
 * choose_hashed_setop - should we use hashing for a set operation?
+ *
+ * XXX probably this should go away: just make both paths and let
+ * add_path sort it out.
 */
 static bool
 choose_hashed_setop(PlannerInfo *root, List *groupClauses,
-					Path *input_path,
+					Path *lpath, Path *rpath,
 					double dNumGroups, double dNumOutputRows,
 					const char *construct)
 {
@@ -1327,7 +1345,7 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
 	 * Don't do it if it doesn't look like the hashtable will fit into
 	 * hash_mem.
 	 */
-	hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
+	hashentrysize = MAXALIGN(lpath->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);

 	if (hashentrysize * dNumGroups > hash_mem_limit)
 		return false;
@@ -1336,9 +1354,9 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
 	 * See if the estimated cost is no more than doing it the other way.
 	 *
 	 * We need to consider input_plan + hashagg versus input_plan + sort +
-	 * group.  Note that the actual result plan might involve a SetOp or
-	 * Unique node, not Agg or Group, but the cost estimates for Agg and Group
-	 * should be close enough for our purposes here.
+	 * group. XXX NOT TRUE: Note that the actual result plan might involve a
+	 * SetOp or Unique node, not Agg or Group, but the cost estimates for Agg
+	 * and Group should be close enough for our purposes here.
 	 *
 	 * These path variables are dummies that just hold cost fields; we don't
 	 * make actual Paths for these steps.
@@ -1346,27 +1364,31 @@ choose_hashed_setop(PlannerInfo *root, List *groupClauses,
 	cost_agg(&hashed_p, root, AGG_HASHED, NULL,
 			 numGroupCols, dNumGroups,
 			 NIL,
-			 input_path->disabled_nodes,
-			 input_path->startup_cost, input_path->total_cost,
-			 input_path->rows, input_path->pathtarget->width);
+			 lpath->disabled_nodes + rpath->disabled_nodes,
+			 lpath->startup_cost + rpath->startup_cost,
+			 lpath->total_cost + rpath->total_cost,
+			 lpath->rows + rpath->rows,
+			 lpath->pathtarget->width);

 	/*
-	 * Now for the sorted case.  Note that the input is *always* unsorted,
-	 * since it was made by appending unrelated sub-relations together.
+	 * Now for the sorted case.  XXX NOT TRUE: Note that the input is *always*
+	 * unsorted, since it was made by appending unrelated sub-relations
+	 * together.
 	 */
-	sorted_p.disabled_nodes = input_path->disabled_nodes;
-	sorted_p.startup_cost = input_path->startup_cost;
-	sorted_p.total_cost = input_path->total_cost;
+	sorted_p.disabled_nodes = lpath->disabled_nodes + rpath->disabled_nodes;
+	sorted_p.startup_cost = lpath->startup_cost + rpath->startup_cost;
+	sorted_p.total_cost = lpath->total_cost + rpath->total_cost;
 	/* XXX cost_sort doesn't actually look at pathkeys, so just pass NIL */
 	cost_sort(&sorted_p, root, NIL, sorted_p.disabled_nodes,
 			  sorted_p.total_cost,
-			  input_path->rows, input_path->pathtarget->width,
+			  lpath->rows + rpath->rows,
+			  lpath->pathtarget->width,
 			  0.0, work_mem, -1.0);
 	cost_group(&sorted_p, root, numGroupCols, dNumGroups,
 			   NIL,
 			   sorted_p.disabled_nodes,
 			   sorted_p.startup_cost, sorted_p.total_cost,
-			   input_path->rows);
+			   lpath->rows + rpath->rows);

 	/*
 	 * Now make the decision using the top-level tuple fraction.  First we
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@@ -3634,25 +3634,26 @@ create_windowagg_path(PlannerInfo *root,
 *	  Creates a pathnode that represents computation of INTERSECT or EXCEPT
 *
 * 'rel' is the parent relation associated with the result
- * 'subpath' is the path representing the source of data
+ * 'leftpath' is the path representing the left-hand source of data
+ * 'rightpath' is the path representing the right-hand source of data
 * 'cmd' is the specific semantics (INTERSECT or EXCEPT, with/without ALL)
 * 'strategy' is the implementation strategy (sorted or hashed)
- * 'distinctList' is a list of SortGroupClause's representing the grouping
- * 'flagColIdx' is the column number where the flag column will be, if any
- * 'firstFlag' is the flag value for the first input relation when hashing;
- *		or -1 when sorting
- * 'numGroups' is the estimated number of distinct groups
+ * 'groupList' is a list of SortGroupClause's representing the grouping
+ * 'numGroups' is the estimated number of distinct groups in left-hand input
 * 'outputRows' is the estimated number of output rows
+ *
+ * leftpath and rightpath must produce the same columns.  Moreover, if
+ * strategy is SETOP_SORTED, leftpath and rightpath must both be sorted
+ * by all the grouping columns.
 */
 SetOpPath *
 create_setop_path(PlannerInfo *root,
 				  RelOptInfo *rel,
-				  Path *subpath,
+				  Path *leftpath,
+				  Path *rightpath,
 				  SetOpCmd cmd,
 				  SetOpStrategy strategy,
-				  List *distinctList,
-				  AttrNumber flagColIdx,
-				  int firstFlag,
+				  List *groupList,
 				  double numGroups,
 				  double outputRows)
 {
@@ -3660,34 +3661,37 @@ create_setop_path(PlannerInfo *root,

 	pathnode->path.pathtype = T_SetOp;
 	pathnode->path.parent = rel;
-	/* SetOp doesn't project, so use source path's pathtarget */
-	pathnode->path.pathtarget = subpath->pathtarget;
+	pathnode->path.pathtarget = rel->reltarget;
 	/* For now, assume we are above any joins, so no parameterization */
 	pathnode->path.param_info = NULL;
 	pathnode->path.parallel_aware = false;
 	pathnode->path.parallel_safe = rel->consider_parallel &&
-		subpath->parallel_safe;
-	pathnode->path.parallel_workers = subpath->parallel_workers;
+		leftpath->parallel_safe && rightpath->parallel_safe;
+	pathnode->path.parallel_workers =
+		leftpath->parallel_workers + rightpath->parallel_workers;
 	/* SetOp preserves the input sort order if in sort mode */
 	pathnode->path.pathkeys =
-		(strategy == SETOP_SORTED) ? subpath->pathkeys : NIL;
+		(strategy == SETOP_SORTED) ? leftpath->pathkeys : NIL;

-	pathnode->subpath = subpath;
+	pathnode->leftpath = leftpath;
+	pathnode->rightpath = rightpath;
 	pathnode->cmd = cmd;
 	pathnode->strategy = strategy;
-	pathnode->distinctList = distinctList;
-	pathnode->flagColIdx = flagColIdx;
-	pathnode->firstFlag = firstFlag;
+	pathnode->groupList = groupList;
 	pathnode->numGroups = numGroups;

 	/*
 	 * Charge one cpu_operator_cost per comparison per input tuple. We assume
 	 * all columns get compared at most of the tuples.
+	 *
+	 * XXX all wrong for hashing
 	 */
-	pathnode->path.disabled_nodes = subpath->disabled_nodes;
-	pathnode->path.startup_cost = subpath->startup_cost;
-	pathnode->path.total_cost = subpath->total_cost +
-		cpu_operator_cost * subpath->rows * list_length(distinctList);
+	pathnode->path.disabled_nodes =
+		leftpath->disabled_nodes + rightpath->disabled_nodes;
+	pathnode->path.startup_cost =
+		leftpath->startup_cost + rightpath->startup_cost;
+	pathnode->path.total_cost = leftpath->total_cost + rightpath->total_cost +
+		cpu_operator_cost * (leftpath->rows + rightpath->rows) * list_length(groupList);
 	pathnode->path.rows = outputRows;

 	return pathnode;