Revise handling of index-type-specific indexscan cost estimation, per

pghackers discussion of 5-Jan-2000. The amopselect and amopnpages estimators are gone, and in their place is a per-AM amcostestimate procedure (linked to from pg_am, not pg_amop).
2025-11-15 03:41:20 +03:00 · 2000-01-22 23:50:30 +00:00
parent 78845177bb
commit 71ed7eb494
30 changed files with 502 additions and 1113 deletions
--- a/src/backend/optimizer/path/_deadcode/xfunc.c
+++ b/src/backend/optimizer/path/_deadcode/xfunc.c
@@ -9,7 +9,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/_deadcode/Attic/xfunc.c,v 1.11 1999/11/22 17:56:10 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/_deadcode/Attic/xfunc.c,v 1.12 2000/01/22 23:50:13 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -1100,27 +1100,27 @@ xfunc_expense_per_tuple(JoinPath joinnode, int whichchild)
 	if (IsA(joinnode, HashPath))
 	{
 		if (whichchild == INNER)
-			return (1 + _CPU_PAGE_WEIGHT_) * outers_per_page / NBuffers;
+			return (1 + cpu_page_weight) * outers_per_page / NBuffers;
 		else
-			return (((1 + _CPU_PAGE_WEIGHT_) * outers_per_page / NBuffers)
-					+ _CPU_PAGE_WEIGHT_
+			return (((1 + cpu_page_weight) * outers_per_page / NBuffers)
+					+ cpu_page_weight
 					/ xfunc_card_product(get_relids(innerrel)));
 	}
 	else if (IsA(joinnode, MergePath))
 	{
 		/* assumes sort exists, and costs one (I/O + CPU) per tuple */
 		if (whichchild == INNER)
-			return ((2 * _CPU_PAGE_WEIGHT_ + 1)
+			return ((2 * cpu_page_weight + 1)
 					/ xfunc_card_product(get_relids(outerrel)));
 		else
-			return ((2 * _CPU_PAGE_WEIGHT_ + 1)
+			return ((2 * cpu_page_weight + 1)
 					/ xfunc_card_product(get_relids(innerrel)));
 	}
 	else
 /* nestloop */
 	{
 		Assert(IsA(joinnode, JoinPath));
-		return _CPU_PAGE_WEIGHT_;
+		return cpu_page_weight;
 	}
 }

--- a/src/backend/optimizer/path/allpaths.c
+++ b/src/backend/optimizer/path/allpaths.c
@@ -7,7 +7,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.55 2000/01/09 00:26:29 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/allpaths.c,v 1.56 2000/01/22 23:50:14 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -21,13 +21,12 @@
 #include "optimizer/paths.h"

 #ifdef GEQO
-bool		_use_geqo_ = true;
-
+bool		enable_geqo = true;
 #else
-bool		_use_geqo_ = false;
-
+bool		enable_geqo = false;
 #endif
-int32		_use_geqo_rels_ = GEQO_RELS;
+
+int			geqo_rels = GEQO_RELS;


 static void set_base_rel_pathlist(Query *root, List *rels);
@@ -165,11 +164,11 @@ make_one_rel_by_joins(Query *root, List *rels, int levels_needed)
 	 * genetic query optimizer entry point	   *
 	 *	  <utesch@aut.tu-freiberg.de>		   *
 	 *******************************************/
-	if ((_use_geqo_) && length(root->base_rel_list) >= _use_geqo_rels_)
+	if (enable_geqo && length(root->base_rel_list) >= geqo_rels)
 		return geqo(root);

 	/*******************************************
-	 * rest will be deprecated in case of GEQO *
+	 * rest will be skipped in case of GEQO    *
 	 *******************************************/

 	while (--levels_needed)
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -5,20 +5,20 @@
 *
 * Path costs are measured in units of disk accesses: one page fetch
 * has cost 1.  The other primitive unit is the CPU time required to
- * process one tuple, which we set at "_cpu_page_weight_" of a page
+ * process one tuple, which we set at "cpu_page_weight" of a page
 * fetch.  Obviously, the CPU time per tuple depends on the query
 * involved, but the relative CPU and disk speeds of a given platform
 * are so variable that we are lucky if we can get useful numbers
- * at all.  _cpu_page_weight_ is user-settable, in case a particular
+ * at all.  cpu_page_weight is user-settable, in case a particular
 * user is clueful enough to have a better-than-default estimate
- * of the ratio for his platform.  There is also _cpu_index_page_weight_,
+ * of the ratio for his platform.  There is also cpu_index_page_weight,
 * the cost to process a tuple of an index during an index scan.
 *
 * 
 * Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.47 2000/01/09 00:26:31 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.48 2000/01/22 23:50:14 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -44,6 +44,20 @@
 #include "utils/lsyscache.h"


+Cost		cpu_page_weight = CPU_PAGE_WEIGHT;
+Cost		cpu_index_page_weight = CPU_INDEX_PAGE_WEIGHT;
+
+Cost		disable_cost = 100000000.0;
+
+bool		enable_seqscan = true;
+bool		enable_indexscan = true;
+bool		enable_tidscan = true;
+bool		enable_sort = true;
+bool		enable_nestloop = true;
+bool		enable_mergejoin = true;
+bool		enable_hashjoin = true;
+
+
 static void set_rel_width(Query *root, RelOptInfo *rel);
 static int	compute_attribute_width(TargetEntry *tlistentry);
 static double relation_byte_size(double tuples, int width);
@@ -51,19 +65,6 @@ static double page_size(double tuples, int width);
 static double base_log(double x, double b);


-Cost		_cpu_page_weight_ = _CPU_PAGE_WEIGHT_;
-Cost		_cpu_index_page_weight_ = _CPU_INDEX_PAGE_WEIGHT_;
-
-Cost		_disable_cost_ = 100000000.0;
-
-bool		_enable_seqscan_ = true;
-bool		_enable_indexscan_ = true;
-bool		_enable_sort_ = true;
-bool		_enable_nestloop_ = true;
-bool		_enable_mergejoin_ = true;
-bool		_enable_hashjoin_ = true;
-bool		_enable_tidscan_ = true;
-
 /*
 * cost_seqscan
 *	  Determines and returns the cost of scanning a relation sequentially.
@@ -84,8 +85,8 @@ cost_seqscan(RelOptInfo *baserel)
 	/* Should only be applied to base relations */
 	Assert(length(baserel->relids) == 1);

-	if (!_enable_seqscan_)
-		temp += _disable_cost_;
+	if (!enable_seqscan)
+		temp += disable_cost;

 	if (lfirsti(baserel->relids) < 0)
 	{
@@ -97,7 +98,7 @@ cost_seqscan(RelOptInfo *baserel)
 	else
 	{
 		temp += baserel->pages;
-		temp += _cpu_page_weight_ * baserel->tuples;
+		temp += cpu_page_weight * baserel->tuples;
 	}

 	Assert(temp >= 0);
@@ -109,58 +110,54 @@ cost_seqscan(RelOptInfo *baserel)
 * cost_index
 *	  Determines and returns the cost of scanning a relation using an index.
 *
- *		disk = expected-index-pages + expected-data-pages
- *		cpu = CPU-INDEX-PAGE-WEIGHT * expected-index-tuples +
- *		      CPU-PAGE-WEIGHT * expected-data-tuples
+ *	  NOTE: an indexscan plan node can actually represent several passes,
+ *	  but here we consider the cost of just one pass.
 *
+ * 'root' is the query root
 * 'baserel' is the base relation the index is for
 * 'index' is the index to be used
- * 'expected_indexpages' is the estimated number of index pages that will
- *		be touched in the scan (this is computed by index-type-specific code)
- * 'selec' is the selectivity of the index, ie, the fraction of base-relation
- *		tuples that we will have to fetch and examine
+ * 'indexQuals' is the list of applicable qual clauses (implicit AND semantics)
 * 'is_injoin' is T if we are considering using the index scan as the inside
 *		of a nestloop join.
 *
- * NOTE: 'selec' should be calculated on the basis of indexqual conditions
- * only.  Any additional quals evaluated as qpquals may reduce the number
- * of returned tuples, but they won't reduce the number of tuples we have
- * to fetch from the table, so they don't reduce the scan cost.
+ * NOTE: 'indexQuals' must contain only clauses usable as index restrictions.
+ * Any additional quals evaluated as qpquals may reduce the number of returned
+ * tuples, but they won't reduce the number of tuples we have to fetch from
+ * the table, so they don't reduce the scan cost.
 */
 Cost
-cost_index(RelOptInfo *baserel,
+cost_index(Query *root,
+		   RelOptInfo *baserel,
 		   IndexOptInfo *index,
-		   long expected_indexpages,
-		   Selectivity selec,
+		   List *indexQuals,
 		   bool is_injoin)
 {
 	Cost		temp = 0;
-	double		reltuples = selec * baserel->tuples;
-	double		indextuples = selec * index->tuples;
+	Cost		indexAccessCost;
+	Selectivity	indexSelectivity;
+	double		reltuples;
 	double		relpages;

 	/* Should only be applied to base relations */
 	Assert(IsA(baserel, RelOptInfo) && IsA(index, IndexOptInfo));
 	Assert(length(baserel->relids) == 1);

-	if (!_enable_indexscan_ && !is_injoin)
-		temp += _disable_cost_;
+	if (!enable_indexscan && !is_injoin)
+		temp += disable_cost;

 	/*
-	 * We want to be sure we estimate the cost of an index scan as more
-	 * than the cost of a sequential scan (when selec == 1.0), even if we
-	 * don't have good stats.  So, disbelieve zero index size.
+	 * Call index-access-method-specific code to estimate the processing
+	 * cost for scanning the index, as well as the selectivity of the index
+	 * (ie, the fraction of main-table tuples we will have to retrieve).
 	 */
-	if (expected_indexpages <= 0)
-		expected_indexpages = 1;
-	if (indextuples <= 0.0)
-		indextuples = 1.0;
+	fmgr(index->amcostestimate, root, baserel, index, indexQuals,
+		 &indexAccessCost, &indexSelectivity);

-	/* expected index relation pages */
-	temp += expected_indexpages;
+	/* all costs for touching index itself included here */
+	temp += indexAccessCost;

 	/*--------------------
-	 * expected base relation pages
+	 * Estimate number of main-table tuples and pages touched.
 	 *
 	 * Worst case is that each tuple the index tells us to fetch comes
 	 * from a different base-rel page, in which case the I/O cost would be
@@ -178,6 +175,8 @@ cost_index(RelOptInfo *baserel,
 	 * So, we guess-and-hope that these sources of error will more or less
 	 * balance out.
 	 *
+	 * XXX need to add a penalty for nonsequential page fetches.
+	 *
 	 * XXX if the relation has recently been "clustered" using this index,
 	 * then in fact the target tuples will be highly nonuniformly distributed,
 	 * and we will be seriously overestimating the scan cost!  Currently we
@@ -186,16 +185,18 @@ cost_index(RelOptInfo *baserel,
 	 * effect.  Would be nice to do better someday.
 	 *--------------------
 	 */
+
+	reltuples = indexSelectivity * baserel->tuples;
+
 	relpages = reltuples;
 	if (baserel->pages > 0 && baserel->pages < relpages)
 		relpages = baserel->pages;
+
+	/* disk costs for main table */
 	temp += relpages;

-	/* per index tuples */
-	temp += _cpu_index_page_weight_ * indextuples;
-
-	/* per heap tuples */
-	temp += _cpu_page_weight_ * reltuples;
+	/* CPU costs for heap tuples */
+	temp += cpu_page_weight * reltuples;

 	Assert(temp >= 0);
 	return temp;
@@ -213,10 +214,10 @@ cost_tidscan(RelOptInfo *baserel, List *tideval)
 {
 	Cost	temp = 0;

-	if (!_enable_tidscan_)
-		temp += _disable_cost_;
+	if (!enable_tidscan)
+		temp += disable_cost;

-	temp += (1.0 + _cpu_page_weight_) * length(tideval);
+	temp += (1.0 + cpu_page_weight) * length(tideval);

 	return temp;
 }
@@ -227,7 +228,7 @@ cost_tidscan(RelOptInfo *baserel, List *tideval)
 *
 * If the total volume of data to sort is less than SortMem, we will do
 * an in-memory sort, which requires no I/O and about t*log2(t) tuple
- * comparisons for t tuples.  We use _cpu_index_page_weight as the cost
+ * comparisons for t tuples.  We use cpu_index_page_weight as the cost
 * of a tuple comparison (is this reasonable, or do we need another
 * basic parameter?).
 *
@@ -257,8 +258,8 @@ cost_sort(List *pathkeys, double tuples, int width)
 	double		nbytes = relation_byte_size(tuples, width);
 	long		sortmembytes = SortMem * 1024L;

-	if (!_enable_sort_)
-		temp += _disable_cost_;
+	if (!enable_sort)
+		temp += disable_cost;

 	/*
 	 * We want to be sure the cost of a sort is never estimated as zero,
@@ -268,7 +269,7 @@ cost_sort(List *pathkeys, double tuples, int width)
 	if (tuples < 2.0)
 		tuples = 2.0;

-	temp += _cpu_index_page_weight_ * tuples * base_log(tuples, 2.0);
+	temp += cpu_index_page_weight * tuples * base_log(tuples, 2.0);

 	if (nbytes > sortmembytes)
 	{
@@ -298,7 +299,7 @@ cost_result(double tuples, int width)
 	Cost		temp = 0;

 	temp += page_size(tuples, width);
-	temp += _cpu_page_weight_ * tuples;
+	temp += cpu_page_weight * tuples;
 	Assert(temp >= 0);
 	return temp;
 }
@@ -321,8 +322,8 @@ cost_nestloop(Path *outer_path,
 {
 	Cost		temp = 0;

-	if (!_enable_nestloop_)
-		temp += _disable_cost_;
+	if (!enable_nestloop)
+		temp += disable_cost;

 	temp += outer_path->path_cost;
 	temp += outer_path->parent->rows * inner_path->path_cost;
@@ -350,8 +351,8 @@ cost_mergejoin(Path *outer_path,
 {
 	Cost		temp = 0;

-	if (!_enable_mergejoin_)
-		temp += _disable_cost_;
+	if (!enable_mergejoin)
+		temp += disable_cost;

 	/* cost of source data */
 	temp += outer_path->path_cost + inner_path->path_cost;
@@ -372,8 +373,8 @@ cost_mergejoin(Path *outer_path,
 	 * underestimate if there are many equal-keyed tuples in either relation,
 	 * but we have no good way of estimating that...
 	 */
-	temp += _cpu_page_weight_ * (outer_path->parent->rows +
-								 inner_path->parent->rows);
+	temp += cpu_page_weight * (outer_path->parent->rows +
+							   inner_path->parent->rows);

 	Assert(temp >= 0);
 	return temp;
@@ -401,23 +402,23 @@ cost_hashjoin(Path *outer_path,
 												inner_path->parent->width);
 	long		hashtablebytes = SortMem * 1024L;

-	if (!_enable_hashjoin_)
-		temp += _disable_cost_;
+	if (!enable_hashjoin)
+		temp += disable_cost;

 	/* cost of source data */
 	temp += outer_path->path_cost + inner_path->path_cost;

 	/* cost of computing hash function: must do it once per tuple */
-	temp += _cpu_page_weight_ * (outer_path->parent->rows +
-								 inner_path->parent->rows);
+	temp += cpu_page_weight * (outer_path->parent->rows +
+							   inner_path->parent->rows);

 	/* the number of tuple comparisons needed is the number of outer
 	 * tuples times the typical hash bucket size, which we estimate
 	 * conservatively as the inner disbursion times the inner tuple
-	 * count.  The cost per comparison is set at _cpu_index_page_weight_;
+	 * count.  The cost per comparison is set at cpu_index_page_weight;
 	 * is that reasonable, or do we need another basic parameter?
 	 */
-	temp += _cpu_index_page_weight_ * outer_path->parent->rows *
+	temp += cpu_index_page_weight * outer_path->parent->rows *
 		(inner_path->parent->rows * innerdisbursion);

 	/*
--- a/src/backend/optimizer/path/indxpath.c
+++ b/src/backend/optimizer/path/indxpath.c
@@ -8,7 +8,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.76 2000/01/09 00:26:31 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/indxpath.c,v 1.77 2000/01/22 23:50:14 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -1393,19 +1393,6 @@ index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index,
 		List	   *clausegroup = lfirst(i);
 		IndexPath  *pathnode = makeNode(IndexPath);
 		List	   *indexquals;
-		long		npages;
-		Selectivity	selec;
-
-		indexquals = get_actual_clauses(clausegroup);
-		/* expand special operators to indexquals the executor can handle */
-		indexquals = expand_indexqual_conditions(indexquals);
-
-		index_selectivity(root,
-						  rel,
-						  index,
-						  indexquals,
-						  &npages,
-						  &selec);

 		/* XXX this code ought to be merged with create_index_path? */

@@ -1413,6 +1400,10 @@ index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index,
 		pathnode->path.parent = rel;
 		pathnode->path.pathkeys = build_index_pathkeys(root, rel, index);

+		indexquals = get_actual_clauses(clausegroup);
+		/* expand special operators to indexquals the executor can handle */
+		indexquals = expand_indexqual_conditions(indexquals);
+
 		/* Note that we are making a pathnode for a single-scan indexscan;
 		 * therefore, both indexid and indexqual should be single-element
 		 * lists.
@@ -1423,8 +1414,7 @@ index_innerjoin(Query *root, RelOptInfo *rel, IndexOptInfo *index,
 		/* joinrelids saves the rels needed on the outer side of the join */
 		pathnode->joinrelids = lfirst(outerrelids_list);

-		pathnode->path.path_cost = cost_index(rel, index,
-											  npages, selec,
+		pathnode->path.path_cost = cost_index(root, rel, index, indexquals,
 											  true);

 		path_list = lappend(path_list, pathnode);
--- a/src/backend/optimizer/path/joinpath.c
+++ b/src/backend/optimizer/path/joinpath.c
@@ -7,7 +7,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.47 2000/01/09 00:26:33 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/joinpath.c,v 1.48 2000/01/22 23:50:15 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -102,7 +102,7 @@ update_rels_pathlist_for_joins(Query *root, List *joinrels)
 		/*
 		 * Find potential mergejoin clauses.
 		 */
-		if (_enable_mergejoin_)
+		if (enable_mergejoin)
 			mergeclause_list = select_mergejoin_clauses(joinrel->restrictinfo);

 		/*
@@ -141,7 +141,7 @@ update_rels_pathlist_for_joins(Query *root, List *joinrels)
 		 * 4. Consider paths where both outer and inner relations must be
 		 * hashed before being joined.
 		 */
-		if (_enable_hashjoin_)
+		if (enable_hashjoin)
 			pathlist = add_pathlist(joinrel, pathlist,
 									hash_inner_and_outer(root, joinrel,
 														 outerrel,
--- a/src/backend/optimizer/path/orindxpath.c
+++ b/src/backend/optimizer/path/orindxpath.c
@@ -7,7 +7,7 @@
 *
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.33 2000/01/09 00:26:33 tgl Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/orindxpath.c,v 1.34 2000/01/22 23:50:15 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */
@@ -215,21 +215,11 @@ best_or_subclause_index(Query *root,
 	foreach(ilist, indices)
 	{
 		IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
-		long		npages;
-		Selectivity	selec;
 		Cost		subcost;

 		Assert(IsA(index, IndexOptInfo));

-		index_selectivity(root,
-						  rel,
-						  index,
-						  indexqual,
-						  &npages,
-						  &selec);
-
-		subcost = cost_index(rel, index,
-							 npages, selec,
+		subcost = cost_index(root, rel, index, indexqual,
 							 false);

 		if (first_run || subcost < *retCost)