Be more realistic about plans involving Materialize nodes: take their

cost into account while planning.
2025-11-13 16:22:44 +03:00 · 2002-11-30 05:21:03 +00:00
parent 829cedc8cf
commit 935969415a
19 changed files with 320 additions and 160 deletions
--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.92 2002/11/13 00:39:47 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.93 2002/11/30 05:21:02 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -724,33 +724,34 @@ static void
 print_path(Query *root, Path *path, int indent)
 {
 	const char *ptype;
-	bool		join;
+	bool		join = false;
+	Path	   *subpath = NULL;
 	int			i;

 	switch (nodeTag(path))
 	{
 		case T_Path:
 			ptype = "SeqScan";
-			join = false;
 			break;
 		case T_IndexPath:
 			ptype = "IdxScan";
-			join = false;
 			break;
 		case T_TidPath:
 			ptype = "TidScan";
-			join = false;
 			break;
 		case T_AppendPath:
 			ptype = "Append";
-			join = false;
 			break;
 		case T_ResultPath:
 			ptype = "Result";
-			join = false;
+			subpath = ((ResultPath *) path)->subpath;
+			break;
+		case T_MaterialPath:
+			ptype = "Material";
+			subpath = ((MaterialPath *) path)->subpath;
 			break;
 		case T_NestPath:
-			ptype = "Nestloop";
+			ptype = "NestLoop";
 			join = true;
 			break;
 		case T_MergePath:
@@ -763,7 +764,6 @@ print_path(Query *root, Path *path, int indent)
 			break;
 		default:
 			ptype = "???Path";
-			join = false;
 			break;
 	}

@@ -814,6 +814,9 @@ print_path(Query *root, Path *path, int indent)
 		print_path(root, jp->outerjoinpath, indent + 1);
 		print_path(root, jp->innerjoinpath, indent + 1);
 	}
+
+	if (subpath)
+		print_path(root, subpath, indent + 1);
 }

 void
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -42,7 +42,7 @@
 * Portions Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.92 2002/11/30 00:08:16 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.93 2002/11/30 05:21:02 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -230,7 +230,7 @@ cost_index(Path *path, Query *root,
 	Assert(length(baserel->relids) == 1);
 	Assert(baserel->rtekind == RTE_RELATION);

-	if (!enable_indexscan && !is_injoin)
+	if (!enable_indexscan)
 		startup_cost += disable_cost;

 	/*
@@ -513,6 +513,43 @@ cost_sort(Path *path, Query *root,
 	path->total_cost = startup_cost + run_cost;
 }

+/*
+ * cost_material
+ *	  Determines and returns the cost of materializing a relation, including
+ *	  the cost of reading the input data.
+ *
+ * If the total volume of data to materialize exceeds SortMem, we will need
+ * to write it to disk, so the cost is much higher in that case.
+ */
+void
+cost_material(Path *path,
+			  Cost input_cost, double tuples, int width)
+{
+	Cost		startup_cost = input_cost;
+	Cost		run_cost = 0;
+	double		nbytes = relation_byte_size(tuples, width);
+	long		sortmembytes = SortMem * 1024L;
+
+	/* disk costs */
+	if (nbytes > sortmembytes)
+	{
+		double		npages = ceil(nbytes / BLCKSZ);
+
+		/* We'll write during startup and read during retrieval */
+		startup_cost += npages;
+		run_cost += npages;
+	}
+
+	/*
+	 * Also charge a small amount per extracted tuple.  We use cpu_tuple_cost
+	 * so that it doesn't appear worthwhile to materialize a bare seqscan.
+	 */
+	run_cost += cpu_tuple_cost * tuples;
+
+	path->startup_cost = startup_cost;
+	path->total_cost = startup_cost + run_cost;
+}
+
 /*
 * cost_agg
 *		Determines and returns the cost of performing an Agg plan node,
@@ -630,19 +667,17 @@ cost_nestloop(Path *path, Query *root,
 	 * before we can start returning tuples, so the join's startup cost is
 	 * their sum.  What's not so clear is whether the inner path's
 	 * startup_cost must be paid again on each rescan of the inner path.
-	 * This is not true if the inner path is materialized, but probably is
-	 * true otherwise.	Since we don't yet have clean handling of the
-	 * decision whether to materialize a path, we can't tell here which
-	 * will happen.  As a compromise, charge 50% of the inner startup cost
-	 * for each restart.
+	 * This is not true if the inner path is materialized or is a hashjoin,
+	 * but probably is true otherwise.
 	 */
 	startup_cost += outer_path->startup_cost + inner_path->startup_cost;
 	run_cost += outer_path->total_cost - outer_path->startup_cost;
 	run_cost += outer_path->parent->rows *
 		(inner_path->total_cost - inner_path->startup_cost);
-	if (outer_path->parent->rows > 1)
-		run_cost += (outer_path->parent->rows - 1) *
-			inner_path->startup_cost * 0.5;
+	if (!(IsA(inner_path, MaterialPath) ||
+		  IsA(inner_path, HashPath)) &&
+		outer_path->parent->rows > 1)
+		run_cost += (outer_path->parent->rows - 1) * inner_path->startup_cost;

 	/*
 	 * Number of tuples processed (not number emitted!).  If inner path is
@@ -1544,7 +1579,7 @@ set_rel_width(Query *root, RelOptInfo *rel)
 static double
 relation_byte_size(double tuples, int width)
 {
-	return tuples * ((double) MAXALIGN(width + sizeof(HeapTupleData)));
+	return tuples * (MAXALIGN(width) + MAXALIGN(sizeof(HeapTupleData)));
 }

 /*
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.73 2002/11/30 00:08:16 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.74 2002/11/30 05:21:02 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -281,8 +281,9 @@ sort_inner_and_outer(Query *root,
 *	  only outer paths that are already ordered well enough for merging).
 *
 * We always generate a nestloop path for each available outer path.
- * In fact we may generate as many as three: one on the cheapest-total-cost
- * inner path, one on the cheapest-startup-cost inner path (if different),
+ * In fact we may generate as many as four: one on the cheapest-total-cost
+ * inner path, one on the same with materialization, one on the
+ * cheapest-startup-cost inner path (if different), 
 * and one on the best inner-indexscan path (if any).
 *
 * We also consider mergejoins if mergejoin clauses are available.	We have
@@ -315,7 +316,8 @@ match_unsorted_outer(Query *root,
 {
 	bool		nestjoinOK;
 	bool		useallclauses;
-	Path	   *bestinnerjoin;
+	Path	   *matpath = NULL;
+	Path	   *bestinnerjoin = NULL;
 	List	   *i;

 	/*
@@ -345,12 +347,26 @@ match_unsorted_outer(Query *root,
 			break;
 	}

-	/*
-	 * Get the best innerjoin indexpath (if any) for this outer rel. It's
-	 * the same for all outer paths.
-	 */
-	bestinnerjoin = best_inner_indexscan(root, innerrel,
-										 outerrel->relids, jointype);
+	if (nestjoinOK)
+	{
+		/*
+		 * If the cheapest inner path is a join or seqscan, we should consider
+		 * materializing it.  (This is a heuristic: we could consider it
+		 * always, but for inner indexscans it's probably a waste of time.)
+		 */
+		if (!(IsA(innerrel->cheapest_total_path, IndexPath) ||
+			  IsA(innerrel->cheapest_total_path, TidPath)))
+			matpath = (Path *)
+				create_material_path(innerrel, 
+									 innerrel->cheapest_total_path);
+
+		/*
+		 * Get the best innerjoin indexpath (if any) for this outer rel. It's
+		 * the same for all outer paths.
+		 */
+		bestinnerjoin = best_inner_indexscan(root, innerrel,
+											 outerrel->relids, jointype);
+	}

 	foreach(i, outerrel->pathlist)
 	{
@@ -376,8 +392,9 @@ match_unsorted_outer(Query *root,
 		{
 			/*
 			 * Always consider a nestloop join with this outer and
-			 * cheapest-total-cost inner.	Consider nestloops using the
-			 * cheapest-startup-cost inner as well, and the best innerjoin
+			 * cheapest-total-cost inner.  When appropriate, also consider
+			 * using the materialized form of the cheapest inner, the
+			 * cheapest-startup-cost inner path, and the best innerjoin
 			 * indexpath.
 			 */
 			add_path(joinrel, (Path *)
@@ -388,6 +405,15 @@ match_unsorted_outer(Query *root,
 										  innerrel->cheapest_total_path,
 										  restrictlist,
 										  merge_pathkeys));
+			if (matpath != NULL)
+				add_path(joinrel, (Path *)
+						 create_nestloop_path(root,
+											  joinrel,
+											  jointype,
+											  outerpath,
+											  matpath,
+											  restrictlist,
+											  merge_pathkeys));
 			if (innerrel->cheapest_startup_path !=
 				innerrel->cheapest_total_path)
 				add_path(joinrel, (Path *)