Improve planning of Materialize nodes inserted atop the inner input of a

mergejoin to shield it from doing mark/restore and refetches. Put an explicit flag in MergePath so we can centralize the logic that knows about this, and add costing logic that considers using Materialize even when it's not forced by the previously-existing considerations. This is in response to a discussion back in August that suggested that materializing an inner indexscan can be helpful when the refetch percentage is high enough.
2025-07-05 07:21:24 +03:00 · 2009-11-15 02:45:35 +00:00
parent 29faadcd27
commit caf9c830d9
7 changed files with 125 additions and 116 deletions
--- a/src/backend/optimizer/util/pathnode.c
+++ b/src/backend/optimizer/util/pathnode.c
@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.154 2009/09/17 20:49:29 tgl Exp $
+ *	  $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.155 2009/11/15 02:45:35 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@ -17,7 +17,6 @@
 #include <math.h>

 #include "catalog/pg_operator.h"
-#include "executor/executor.h"
 #include "miscadmin.h"
 #include "optimizer/clauses.h"
 #include "optimizer/cost.h"
@ -1414,47 +1413,6 @@ create_mergejoin_path(PlannerInfo *root,
 		pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
 		innersortkeys = NIL;

-	/*
-	 * If we are not sorting the inner path, we may need a materialize node to
-	 * ensure it can be marked/restored.
-	 *
-	 * Since the inner side must be ordered, and only Sorts and IndexScans can
-	 * create order to begin with, and they both support mark/restore, you
-	 * might think there's no problem --- but you'd be wrong.  Nestloop and
-	 * merge joins can *preserve* the order of their inputs, so they can be
-	 * selected as the input of a mergejoin, and they don't support
-	 * mark/restore at present.
-	 *
-	 * Note: Sort supports mark/restore, so no materialize is really needed in
-	 * that case; but one may be desirable anyway to optimize the sort.
-	 * However, since we aren't representing the sort step separately in the
-	 * Path tree, we can't explicitly represent the materialize either. So
-	 * that case is not handled here.  Instead, cost_mergejoin has to factor
-	 * in the cost and create_mergejoin_plan has to add the plan node.
-	 */
-	if (innersortkeys == NIL &&
-		!ExecSupportsMarkRestore(inner_path->pathtype))
-	{
-		Path	   *mpath;
-
-		mpath = (Path *) create_material_path(inner_path->parent, inner_path);
-
-		/*
-		 * We expect the materialize won't spill to disk (it could only do so
-		 * if there were a whole lot of duplicate tuples, which is a case
-		 * cost_mergejoin will avoid choosing anyway).	Therefore
-		 * cost_material's cost estimate is bogus and we should charge just
-		 * cpu_tuple_cost per tuple.  (Keep this estimate in sync with similar
-		 * ones in cost_mergejoin and create_mergejoin_plan; also see
-		 * cost_rescan.)
-		 */
-		mpath->startup_cost = inner_path->startup_cost;
-		mpath->total_cost = inner_path->total_cost;
-		mpath->total_cost += cpu_tuple_cost * inner_path->parent->rows;
-
-		inner_path = mpath;
-	}
-
 	pathnode->jpath.path.pathtype = T_MergeJoin;
 	pathnode->jpath.path.parent = joinrel;
 	pathnode->jpath.jointype = jointype;
@ -1465,6 +1423,7 @@ create_mergejoin_path(PlannerInfo *root,
 	pathnode->path_mergeclauses = mergeclauses;
 	pathnode->outersortkeys = outersortkeys;
 	pathnode->innersortkeys = innersortkeys;
+	/* pathnode->materialize_inner will be set by cost_mergejoin */

 	cost_mergejoin(pathnode, root, sjinfo);