Another round of planner/optimizer work. This is just restructuring and

code cleanup; no major improvements yet. However, EXPLAIN does produce more intuitive outputs for nested loops with indexscans now...
2025-11-21 00:42:43 +03:00 · 2000-01-09 00:26:47 +00:00
parent 69d4299e3e
commit 166b5c1def
35 changed files with 1239 additions and 1448 deletions
--- a/src/backend/optimizer/path/costsize.c
+++ b/src/backend/optimizer/path/costsize.c
@@ -18,15 +18,14 @@
 * Copyright (c) 1994, Regents of the University of California
 *
 * IDENTIFICATION
- *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.46 1999/11/23 20:06:54 momjian Exp $
+ *	  $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.47 2000/01/09 00:26:31 tgl Exp $
 *
 *-------------------------------------------------------------------------
 */

-#include <math.h>
-
 #include "postgres.h"

+#include <math.h>
 #ifdef HAVE_LIMITS_H
 #include <limits.h>
 #ifndef MAXINT
@@ -45,13 +44,17 @@
 #include "utils/lsyscache.h"


-static int	compute_targetlist_width(List *targetlist);
+static void set_rel_width(Query *root, RelOptInfo *rel);
 static int	compute_attribute_width(TargetEntry *tlistentry);
-static double relation_byte_size(int tuples, int width);
+static double relation_byte_size(double tuples, int width);
+static double page_size(double tuples, int width);
 static double base_log(double x, double b);


-int			_disable_cost_ = 30000000;
+Cost		_cpu_page_weight_ = _CPU_PAGE_WEIGHT_;
+Cost		_cpu_index_page_weight_ = _CPU_INDEX_PAGE_WEIGHT_;
+
+Cost		_disable_cost_ = 100000000.0;

 bool		_enable_seqscan_ = true;
 bool		_enable_indexscan_ = true;
@@ -61,9 +64,6 @@ bool		_enable_mergejoin_ = true;
 bool		_enable_hashjoin_ = true;
 bool		_enable_tidscan_ = true;

-Cost		 _cpu_page_weight_ = _CPU_PAGE_WEIGHT_;
-Cost		_cpu_index_page_weight_ = _CPU_INDEX_PAGE_WEIGHT_;
-
 /*
 * cost_seqscan
 *	  Determines and returns the cost of scanning a relation sequentially.
@@ -74,27 +74,21 @@ Cost		_cpu_index_page_weight_ = _CPU_INDEX_PAGE_WEIGHT_;
 *	  be).
 *
 *		disk = p
- *		cpu = *CPU-PAGE-WEIGHT* * t
- *
- * 'relid' is the relid of the relation to be scanned
- * 'relpages' is the number of pages in the relation to be scanned
- *		(as determined from the system catalogs)
- * 'reltuples' is the number of tuples in the relation to be scanned
- *
- * Returns a flonum.
- *
+ *		cpu = CPU-PAGE-WEIGHT * t
 */
 Cost
-cost_seqscan(int relid, int relpages, int reltuples)
+cost_seqscan(RelOptInfo *baserel)
 {
 	Cost		temp = 0;

+	/* Should only be applied to base relations */
+	Assert(length(baserel->relids) == 1);
+
 	if (!_enable_seqscan_)
 		temp += _disable_cost_;

-	if (relid < 0)
+	if (lfirsti(baserel->relids) < 0)
 	{
-
 		/*
 		 * cost of sequentially scanning a materialized temporary relation
 		 */
@@ -102,9 +96,10 @@ cost_seqscan(int relid, int relpages, int reltuples)
 	}
 	else
 	{
-		temp += relpages;
-		temp += _cpu_page_weight_ * reltuples;
+		temp += baserel->pages;
+		temp += _cpu_page_weight_ * baserel->tuples;
 	}
+
 	Assert(temp >= 0);
 	return temp;
 }
@@ -115,31 +110,38 @@ cost_seqscan(int relid, int relpages, int reltuples)
 *	  Determines and returns the cost of scanning a relation using an index.
 *
 *		disk = expected-index-pages + expected-data-pages
- *		cpu = *CPU-PAGE-WEIGHT* *
- *				(expected-index-tuples + expected-data-tuples)
+ *		cpu = CPU-INDEX-PAGE-WEIGHT * expected-index-tuples +
+ *		      CPU-PAGE-WEIGHT * expected-data-tuples
 *
- * 'indexid' is the index OID
- * 'expected-indexpages' is the number of index pages examined in the scan
- * 'selec' is the selectivity of the index
- * 'relpages' is the number of pages in the main relation
- * 'reltuples' is the number of tuples in the main relation
- * 'indexpages' is the number of pages in the index relation
- * 'indextuples' is the number of tuples in the index relation
- *
- * Returns a flonum.
+ * 'baserel' is the base relation the index is for
+ * 'index' is the index to be used
+ * 'expected_indexpages' is the estimated number of index pages that will
+ *		be touched in the scan (this is computed by index-type-specific code)
+ * 'selec' is the selectivity of the index, ie, the fraction of base-relation
+ *		tuples that we will have to fetch and examine
+ * 'is_injoin' is T if we are considering using the index scan as the inside
+ *		of a nestloop join.
 *
+ * NOTE: 'selec' should be calculated on the basis of indexqual conditions
+ * only.  Any additional quals evaluated as qpquals may reduce the number
+ * of returned tuples, but they won't reduce the number of tuples we have
+ * to fetch from the table, so they don't reduce the scan cost.
 */
 Cost
-cost_index(Oid indexid,
-		   int expected_indexpages,
-		   Cost selec,
-		   int relpages,
-		   int reltuples,
-		   int indexpages,
-		   int indextuples,
+cost_index(RelOptInfo *baserel,
+		   IndexOptInfo *index,
+		   long expected_indexpages,
+		   Selectivity selec,
 		   bool is_injoin)
 {
 	Cost		temp = 0;
+	double		reltuples = selec * baserel->tuples;
+	double		indextuples = selec * index->tuples;
+	double		relpages;
+
+	/* Should only be applied to base relations */
+	Assert(IsA(baserel, RelOptInfo) && IsA(index, IndexOptInfo));
+	Assert(length(baserel->relids) == 1);

 	if (!_enable_indexscan_ && !is_injoin)
 		temp += _disable_cost_;
@@ -151,25 +153,49 @@ cost_index(Oid indexid,
 	 */
 	if (expected_indexpages <= 0)
 		expected_indexpages = 1;
-	if (indextuples <= 0)
-		indextuples = 1;
+	if (indextuples <= 0.0)
+		indextuples = 1.0;

 	/* expected index relation pages */
 	temp += expected_indexpages;

-	/*
-	 * expected base relation pages XXX this isn't really right, since we
-	 * will access the table nonsequentially and might have to fetch the
-	 * same page more than once.  This calculation assumes the buffer
-	 * cache will prevent that from happening...
+	/*--------------------
+	 * expected base relation pages
+	 *
+	 * Worst case is that each tuple the index tells us to fetch comes
+	 * from a different base-rel page, in which case the I/O cost would be
+	 * 'reltuples' pages.  In practice we can expect the number of page
+	 * fetches to be reduced by the buffer cache, because more than one
+	 * tuple can be retrieved per page fetched.  Currently, we estimate
+	 * the number of pages to be retrieved as
+	 *			MIN(reltuples, relpages)
+	 * This amounts to assuming that the buffer cache is perfectly efficient
+	 * and never ends up reading the same page twice within one scan, which
+	 * of course is too optimistic.  On the other hand, we are assuming that
+	 * the target tuples are perfectly uniformly distributed across the
+	 * relation's pages, which is too pessimistic --- any nonuniformity of
+	 * distribution will reduce the number of pages we have to fetch.
+	 * So, we guess-and-hope that these sources of error will more or less
+	 * balance out.
+	 *
+	 * XXX if the relation has recently been "clustered" using this index,
+	 * then in fact the target tuples will be highly nonuniformly distributed,
+	 * and we will be seriously overestimating the scan cost!  Currently we
+	 * have no way to know whether the relation has been clustered, nor how
+	 * much it's been modified since the last clustering, so we ignore this
+	 * effect.  Would be nice to do better someday.
+	 *--------------------
 	 */
-	temp += ceil(((double) selec) * ((double) relpages));
+	relpages = reltuples;
+	if (baserel->pages > 0 && baserel->pages < relpages)
+		relpages = baserel->pages;
+	temp += relpages;

 	/* per index tuples */
-	temp += _cpu_index_page_weight_ * selec * indextuples;
+	temp += _cpu_index_page_weight_ * indextuples;

 	/* per heap tuples */
-	temp += _cpu_page_weight_ * selec * reltuples;
+	temp += _cpu_page_weight_ * reltuples;

 	Assert(temp >= 0);
 	return temp;
@@ -180,13 +206,10 @@ cost_index(Oid indexid,
 *	  Determines and returns the cost of scanning a relation using tid-s.
 *
 *		disk = number of tids
- *		cpu = *CPU-PAGE-WEIGHT* * number_of_tids
- *
- * Returns a flonum.
- *
+ *		cpu = CPU-PAGE-WEIGHT * number_of_tids
 */
 Cost
-cost_tidscan(List *tideval)
+cost_tidscan(RelOptInfo *baserel, List *tideval)
 {
 	Cost	temp = 0;

@@ -200,28 +223,39 @@ cost_tidscan(List *tideval)
 
 /*
 * cost_sort
- *	  Determines and returns the cost of sorting a relation by considering
- *	  the cost of doing an external sort:	XXX this is probably too low
- *				disk = (p lg p)
- *				cpu = *CPU-PAGE-WEIGHT* * (t lg t)
+ *	  Determines and returns the cost of sorting a relation.
+ *
+ * If the total volume of data to sort is less than SortMem, we will do
+ * an in-memory sort, which requires no I/O and about t*log2(t) tuple
+ * comparisons for t tuples.  We use _cpu_index_page_weight as the cost
+ * of a tuple comparison (is this reasonable, or do we need another
+ * basic parameter?).
+ *
+ * If the total volume exceeds SortMem, we switch to a tape-style merge
+ * algorithm.  There will still be about t*log2(t) tuple comparisons in
+ * total, but we will also need to write and read each tuple once per
+ * merge pass.  We expect about ceil(log6(r)) merge passes where r is the
+ * number of initial runs formed (log6 because tuplesort.c uses six-tape
+ * merging).  Since the average initial run should be about twice SortMem,
+ * we have
+ *		disk = 2 * p * ceil(log6(p / (2*SortMem)))
+ *		cpu = CPU-INDEX-PAGE-WEIGHT * t * log2(t)
 *
 * 'pathkeys' is a list of sort keys
 * 'tuples' is the number of tuples in the relation
 * 'width' is the average tuple width in bytes
 *
- * NOTE: some callers currently pass NULL for pathkeys because they
+ * NOTE: some callers currently pass NIL for pathkeys because they
 * can't conveniently supply the sort keys.  Since this routine doesn't
 * currently do anything with pathkeys anyway, that doesn't matter...
 * but if it ever does, it should react gracefully to lack of key data.
- *
- * Returns a flonum.
 */
 Cost
-cost_sort(List *pathkeys, int tuples, int width)
+cost_sort(List *pathkeys, double tuples, int width)
 {
 	Cost		temp = 0;
-	int			npages = page_size(tuples, width);
-	double		log_npages;
+	double		nbytes = relation_byte_size(tuples, width);
+	long		sortmembytes = SortMem * 1024L;

 	if (!_enable_sort_)
 		temp += _disable_cost_;
@@ -231,25 +265,23 @@ cost_sort(List *pathkeys, int tuples, int width)
 	 * even if passed-in tuple count is zero.  Besides, mustn't do
 	 * log(0)...
 	 */
-	if (tuples <= 0)
-		tuples = 1;
-	if (npages <= 0)
-		npages = 1;
+	if (tuples < 2.0)
+		tuples = 2.0;

-	log_npages = ceil(base_log((double) npages, 2.0));
-	if (log_npages <= 0.0)
-		log_npages = 1.0;
+	temp += _cpu_index_page_weight_ * tuples * base_log(tuples, 2.0);

-	temp += npages * log_npages;
+	if (nbytes > sortmembytes)
+	{
+		double		npages = ceil(nbytes / BLCKSZ);
+		double		nruns = nbytes / (sortmembytes * 2);
+		double		log_runs = ceil(base_log(nruns, 6.0));

-	/*
-	 * could be base_log(tuples, NBuffers), but we are only doing 2-way
-	 * merges
-	 */
-	temp += _cpu_page_weight_ * tuples * base_log((double) tuples, 2.0);
+		if (log_runs < 1.0)
+			log_runs = 1.0;
+		temp += 2 * npages * log_runs;
+	}

 	Assert(temp > 0);
-
 	return temp;
 }

@@ -258,18 +290,15 @@ cost_sort(List *pathkeys, int tuples, int width)
 * cost_result
 *	  Determines and returns the cost of writing a relation of 'tuples'
 *	  tuples of 'width' bytes out to a result relation.
- *
- * Returns a flonum.
- *
 */
 #ifdef NOT_USED
 Cost
-cost_result(int tuples, int width)
+cost_result(double tuples, int width)
 {
 	Cost		temp = 0;

-	temp = temp + page_size(tuples, width);
-	temp = temp + _cpu_page_weight_ * tuples;
+	temp += page_size(tuples, width);
+	temp += _cpu_page_weight_ * tuples;
 	Assert(temp >= 0);
 	return temp;
 }
@@ -281,111 +310,106 @@ cost_result(int tuples, int width)
 *	  Determines and returns the cost of joining two relations using the
 *	  nested loop algorithm.
 *
- * 'outercost' is the (disk+cpu) cost of scanning the outer relation
- * 'innercost' is the (disk+cpu) cost of scanning the inner relation
- * 'outertuples' is the number of tuples in the outer relation
- *
- * Returns a flonum.
- *
+ * 'outer_path' is the path for the outer relation
+ * 'inner_path' is the path for the inner relation
+ * 'is_indexjoin' is true if we are using an indexscan for the inner relation
 */
 Cost
-cost_nestloop(Cost outercost,
-			  Cost innercost,
-			  int outertuples,
-			  int innertuples,
-			  int outerpages,
+cost_nestloop(Path *outer_path,
+			  Path *inner_path,
 			  bool is_indexjoin)
 {
 	Cost		temp = 0;

 	if (!_enable_nestloop_)
 		temp += _disable_cost_;
-	temp += outercost;
-	temp += outertuples * innercost;
-	Assert(temp >= 0);

+	temp += outer_path->path_cost;
+	temp += outer_path->parent->rows * inner_path->path_cost;
+
+	Assert(temp >= 0);
 	return temp;
 }

 /*
 * cost_mergejoin
- *	  'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
- *				outer and inner relations
- *	  'outersortkeys' and 'innersortkeys' are lists of the keys to be used
- *				to sort the outer and inner relations (or NIL if no explicit
- *				sort is needed because the source path is already ordered)
- *	  'outertuples' and 'innertuples' are the number of tuples in the outer
- *				and inner relations
- *	  'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
- *				of the tuples of the outer and inner relations
- *
- * Returns a flonum.
+ *	  Determines and returns the cost of joining two relations using the
+ *	  merge join algorithm.
 *
+ * 'outer_path' is the path for the outer relation
+ * 'inner_path' is the path for the inner relation
+ * 'outersortkeys' and 'innersortkeys' are lists of the keys to be used
+ *				to sort the outer and inner relations, or NIL if no explicit
+ *				sort is needed because the source path is already ordered
 */
 Cost
-cost_mergejoin(Cost outercost,
-			   Cost innercost,
+cost_mergejoin(Path *outer_path,
+			   Path *inner_path,
 			   List *outersortkeys,
-			   List *innersortkeys,
-			   int outersize,
-			   int innersize,
-			   int outerwidth,
-			   int innerwidth)
+			   List *innersortkeys)
 {
 	Cost		temp = 0;

 	if (!_enable_mergejoin_)
 		temp += _disable_cost_;

-	temp += outercost;
-	temp += innercost;
+	/* cost of source data */
+	temp += outer_path->path_cost + inner_path->path_cost;
+
 	if (outersortkeys)			/* do we need to sort? */
-		temp += cost_sort(outersortkeys, outersize, outerwidth);
+		temp += cost_sort(outersortkeys,
+						  outer_path->parent->rows,
+						  outer_path->parent->width);
+
 	if (innersortkeys)			/* do we need to sort? */
-		temp += cost_sort(innersortkeys, innersize, innerwidth);
-	temp += _cpu_page_weight_ * (outersize + innersize);
+		temp += cost_sort(innersortkeys,
+						  inner_path->parent->rows,
+						  inner_path->parent->width);
+
+	/*
+	 * Estimate the number of tuples to be processed in the mergejoin itself
+	 * as one per tuple in the two source relations.  This could be a drastic
+	 * underestimate if there are many equal-keyed tuples in either relation,
+	 * but we have no good way of estimating that...
+	 */
+	temp += _cpu_page_weight_ * (outer_path->parent->rows +
+								 inner_path->parent->rows);

 	Assert(temp >= 0);
-
 	return temp;
 }

 /*
 * cost_hashjoin
+ *	  Determines and returns the cost of joining two relations using the
+ *	  hash join algorithm.
 *
- *	  'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
- *				outer and inner relations
- *	  'outersize' and 'innersize' are the number of tuples in the outer
- *				and inner relations
- *	  'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
- *				of the tuples of the outer and inner relations
- *	  'innerdisbursion' is an estimate of the disbursion statistic
+ * 'outer_path' is the path for the outer relation
+ * 'inner_path' is the path for the inner relation
+ * 'innerdisbursion' is an estimate of the disbursion statistic
 *				for the inner hash key.
- *
- * Returns a flonum.
 */
 Cost
-cost_hashjoin(Cost outercost,
-			  Cost innercost,
-			  int outersize,
-			  int innersize,
-			  int outerwidth,
-			  int innerwidth,
-			  Cost innerdisbursion)
+cost_hashjoin(Path *outer_path,
+			  Path *inner_path,
+			  Selectivity innerdisbursion)
 {
 	Cost		temp = 0;
-	double		outerbytes = relation_byte_size(outersize, outerwidth);
-	double		innerbytes = relation_byte_size(innersize, innerwidth);
+	double		outerbytes = relation_byte_size(outer_path->parent->rows,
+												outer_path->parent->width);
+	double		innerbytes = relation_byte_size(inner_path->parent->rows,
+												inner_path->parent->width);
 	long		hashtablebytes = SortMem * 1024L;

 	if (!_enable_hashjoin_)
 		temp += _disable_cost_;

 	/* cost of source data */
-	temp += outercost + innercost;
+	temp += outer_path->path_cost + inner_path->path_cost;

 	/* cost of computing hash function: must do it once per tuple */
-	temp += _cpu_page_weight_ * (outersize + innersize);
+	temp += _cpu_page_weight_ * (outer_path->parent->rows +
+								 inner_path->parent->rows);

 	/* the number of tuple comparisons needed is the number of outer
 	 * tuples times the typical hash bucket size, which we estimate
@@ -393,8 +417,8 @@ cost_hashjoin(Cost outercost,
 	 * count.  The cost per comparison is set at _cpu_index_page_weight_;
 	 * is that reasonable, or do we need another basic parameter?
 	 */
-	temp += _cpu_index_page_weight_ * outersize *
-		(innersize * innerdisbursion);
+	temp += _cpu_index_page_weight_ * outer_path->parent->rows *
+		(inner_path->parent->rows * innerdisbursion);

 	/*
 	 * if inner relation is too big then we will need to "batch" the join,
@@ -402,8 +426,10 @@ cost_hashjoin(Cost outercost,
 	 * extra time.  Charge one cost unit per page of I/O.
 	 */
 	if (innerbytes > hashtablebytes)
-		temp += 2 * (page_size(outersize, outerwidth) +
-					 page_size(innersize, innerwidth));
+		temp += 2 * (page_size(outer_path->parent->rows,
+							   outer_path->parent->width) +
+					 page_size(inner_path->parent->rows,
+							   inner_path->parent->width));

 	/*
 	 * Bias against putting larger relation on inside.  We don't want
@@ -415,76 +441,74 @@ cost_hashjoin(Cost outercost,
 		temp *= 1.1;			/* is this an OK fudge factor? */

 	Assert(temp >= 0);
-
 	return temp;
 }

 /*
- * compute_rel_size
- *	  Computes the size of each relation in 'rel_list' (after applying
- *	  restrictions), by multiplying the selectivity of each restriction
- *	  by the original size of the relation.
+ * set_rel_rows_width
+ *		Set the 'rows' and 'width' estimates for the given base relation.
 *
- *	  Sets the 'size' field for each relation entry with this computed size.
- *
- * Returns the size.
+ * 'rows' is the estimated number of output tuples (after applying
+ * restriction clauses).
+ * 'width' is the estimated average output tuple width in bytes.
 */
-int
-compute_rel_size(RelOptInfo *rel)
+void
+set_rel_rows_width(Query *root, RelOptInfo *rel)
 {
-	Cost		temp;
-	int			temp1;
+	/* Should only be applied to base relations */
+	Assert(length(rel->relids) == 1);
+
+	rel->rows = rel->tuples * restrictlist_selec(root, rel->restrictinfo);
+	Assert(rel->rows >= 0);
+
+	set_rel_width(root, rel);
+}
+
+/*
+ * set_joinrel_rows_width
+ *		Set the 'rows' and 'width' estimates for the given join relation.
+ */
+void
+set_joinrel_rows_width(Query *root, RelOptInfo *rel,
+					   JoinPath *joinpath)
+{
+	double		temp;
+
+	/* cartesian product */
+	temp = joinpath->outerjoinpath->parent->rows *
+		joinpath->innerjoinpath->parent->rows;
+
+	/* apply restrictivity */
+	temp *= restrictlist_selec(root, joinpath->path.parent->restrictinfo);

-	temp = rel->tuples * product_selec(rel->restrictinfo);
 	Assert(temp >= 0);
-	if (temp >= (MAXINT - 1))
-		temp1 = MAXINT;
-	else
-		temp1 = ceil((double) temp);
-	Assert(temp1 >= 0);
-	Assert(temp1 <= MAXINT);
-	return temp1;
+	rel->rows = temp;
+
+	set_rel_width(root, rel);
 }

 /*
- * compute_rel_width
- *	  Computes the width in bytes of a tuple from 'rel'.
- *
- * Returns the width of the tuple as a fixnum.
+ * set_rel_width
+ *		Set the estimated output width of the relation.
 */
-int
-compute_rel_width(RelOptInfo *rel)
+static void
+set_rel_width(Query *root, RelOptInfo *rel)
 {
-	return compute_targetlist_width(rel->targetlist);
-}
-
-/*
- * compute_targetlist_width
- *	  Computes the width in bytes of a tuple made from 'targetlist'.
- *
- * Returns the width of the tuple as a fixnum.
- */
-static int
-compute_targetlist_width(List *targetlist)
-{
-	List	   *temp_tl;
 	int			tuple_width = 0;
+	List	   *tle;

-	foreach(temp_tl, targetlist)
-	{
-		tuple_width += compute_attribute_width(lfirst(temp_tl));
-	}
-	return tuple_width;
+	foreach(tle, rel->targetlist)
+		tuple_width += compute_attribute_width((TargetEntry *) lfirst(tle));
+	Assert(tuple_width >= 0);
+	rel->width = tuple_width;
 }

 /*
 * compute_attribute_width
 *	  Given a target list entry, find the size in bytes of the attribute.
 *
- *	  If a field is variable-length, it is assumed to be at least the size
- *	  of a TID field.
- *
- * Returns the width of the attribute as a fixnum.
+ *	  If a field is variable-length, we make a default assumption.  Would be
+ *	  better if VACUUM recorded some stats about the average field width...
 */
 static int
 compute_attribute_width(TargetEntry *tlistentry)
@@ -497,47 +521,15 @@ compute_attribute_width(TargetEntry *tlistentry)
 		return width;
 }

-/*
- * compute_joinrel_size
- *	  Computes the size of the join relation 'joinrel'.
- *
- * Returns a fixnum.
- */
-int
-compute_joinrel_size(JoinPath *joinpath)
-{
-	Cost		temp = 1.0;
-	int			temp1 = 0;
-
-	/* cartesian product */
-	temp *= ((Path *) joinpath->outerjoinpath)->parent->size;
-	temp *= ((Path *) joinpath->innerjoinpath)->parent->size;
-
-	temp = temp * product_selec(joinpath->pathinfo);
-	if (temp >= (MAXINT - 1) / 2)
-	{
-		/* if we exceed (MAXINT-1)/2, we switch to log scale */
-		/* +1 prevents log(0) */
-		temp1 = ceil(log(temp + 1 - (MAXINT - 1) / 2) + (MAXINT - 1) / 2);
-	}
-	else
-		temp1 = ceil((double) temp);
-	Assert(temp1 >= 0);
-
-	return temp1;
-}
-
 /*
 * relation_byte_size
 *	  Estimate the storage space in bytes for a given number of tuples
 *	  of a given width (size in bytes).
- *	  To avoid overflow with big relations, result is a double.
 */
-
 static double
-relation_byte_size(int tuples, int width)
+relation_byte_size(double tuples, int width)
 {
-	return ((double) tuples) * ((double) (width + sizeof(HeapTupleData)));
+	return tuples * ((double) (width + sizeof(HeapTupleData)));
 }

 /*
@@ -545,14 +537,10 @@ relation_byte_size(int tuples, int width)
 *	  Returns an estimate of the number of pages covered by a given
 *	  number of tuples of a given width (size in bytes).
 */
-int
-page_size(int tuples, int width)
+static double
+page_size(double tuples, int width)
 {
-	int			temp;
-
-	temp = (int) ceil(relation_byte_size(tuples, width) / BLCKSZ);
-	Assert(temp >= 0);
-	return temp;
+	return ceil(relation_byte_size(tuples, width) / BLCKSZ);
 }

 static double