mirror of
https://github.com/postgres/postgres.git
synced 2025-11-21 00:42:43 +03:00
Another round of planner/optimizer work. This is just restructuring and
code cleanup; no major improvements yet. However, EXPLAIN does produce more intuitive outputs for nested loops with indexscans now...
This commit is contained in:
@@ -18,15 +18,14 @@
|
||||
* Copyright (c) 1994, Regents of the University of California
|
||||
*
|
||||
* IDENTIFICATION
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.46 1999/11/23 20:06:54 momjian Exp $
|
||||
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.47 2000/01/09 00:26:31 tgl Exp $
|
||||
*
|
||||
*-------------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "postgres.h"
|
||||
|
||||
#include <math.h>
|
||||
#ifdef HAVE_LIMITS_H
|
||||
#include <limits.h>
|
||||
#ifndef MAXINT
|
||||
@@ -45,13 +44,17 @@
|
||||
#include "utils/lsyscache.h"
|
||||
|
||||
|
||||
static int compute_targetlist_width(List *targetlist);
|
||||
static void set_rel_width(Query *root, RelOptInfo *rel);
|
||||
static int compute_attribute_width(TargetEntry *tlistentry);
|
||||
static double relation_byte_size(int tuples, int width);
|
||||
static double relation_byte_size(double tuples, int width);
|
||||
static double page_size(double tuples, int width);
|
||||
static double base_log(double x, double b);
|
||||
|
||||
|
||||
int _disable_cost_ = 30000000;
|
||||
Cost _cpu_page_weight_ = _CPU_PAGE_WEIGHT_;
|
||||
Cost _cpu_index_page_weight_ = _CPU_INDEX_PAGE_WEIGHT_;
|
||||
|
||||
Cost _disable_cost_ = 100000000.0;
|
||||
|
||||
bool _enable_seqscan_ = true;
|
||||
bool _enable_indexscan_ = true;
|
||||
@@ -61,9 +64,6 @@ bool _enable_mergejoin_ = true;
|
||||
bool _enable_hashjoin_ = true;
|
||||
bool _enable_tidscan_ = true;
|
||||
|
||||
Cost _cpu_page_weight_ = _CPU_PAGE_WEIGHT_;
|
||||
Cost _cpu_index_page_weight_ = _CPU_INDEX_PAGE_WEIGHT_;
|
||||
|
||||
/*
|
||||
* cost_seqscan
|
||||
* Determines and returns the cost of scanning a relation sequentially.
|
||||
@@ -74,27 +74,21 @@ Cost _cpu_index_page_weight_ = _CPU_INDEX_PAGE_WEIGHT_;
|
||||
* be).
|
||||
*
|
||||
* disk = p
|
||||
* cpu = *CPU-PAGE-WEIGHT* * t
|
||||
*
|
||||
* 'relid' is the relid of the relation to be scanned
|
||||
* 'relpages' is the number of pages in the relation to be scanned
|
||||
* (as determined from the system catalogs)
|
||||
* 'reltuples' is the number of tuples in the relation to be scanned
|
||||
*
|
||||
* Returns a flonum.
|
||||
*
|
||||
* cpu = CPU-PAGE-WEIGHT * t
|
||||
*/
|
||||
Cost
|
||||
cost_seqscan(int relid, int relpages, int reltuples)
|
||||
cost_seqscan(RelOptInfo *baserel)
|
||||
{
|
||||
Cost temp = 0;
|
||||
|
||||
/* Should only be applied to base relations */
|
||||
Assert(length(baserel->relids) == 1);
|
||||
|
||||
if (!_enable_seqscan_)
|
||||
temp += _disable_cost_;
|
||||
|
||||
if (relid < 0)
|
||||
if (lfirsti(baserel->relids) < 0)
|
||||
{
|
||||
|
||||
/*
|
||||
* cost of sequentially scanning a materialized temporary relation
|
||||
*/
|
||||
@@ -102,9 +96,10 @@ cost_seqscan(int relid, int relpages, int reltuples)
|
||||
}
|
||||
else
|
||||
{
|
||||
temp += relpages;
|
||||
temp += _cpu_page_weight_ * reltuples;
|
||||
temp += baserel->pages;
|
||||
temp += _cpu_page_weight_ * baserel->tuples;
|
||||
}
|
||||
|
||||
Assert(temp >= 0);
|
||||
return temp;
|
||||
}
|
||||
@@ -115,31 +110,38 @@ cost_seqscan(int relid, int relpages, int reltuples)
|
||||
* Determines and returns the cost of scanning a relation using an index.
|
||||
*
|
||||
* disk = expected-index-pages + expected-data-pages
|
||||
* cpu = *CPU-PAGE-WEIGHT* *
|
||||
* (expected-index-tuples + expected-data-tuples)
|
||||
* cpu = CPU-INDEX-PAGE-WEIGHT * expected-index-tuples +
|
||||
* CPU-PAGE-WEIGHT * expected-data-tuples
|
||||
*
|
||||
* 'indexid' is the index OID
|
||||
* 'expected-indexpages' is the number of index pages examined in the scan
|
||||
* 'selec' is the selectivity of the index
|
||||
* 'relpages' is the number of pages in the main relation
|
||||
* 'reltuples' is the number of tuples in the main relation
|
||||
* 'indexpages' is the number of pages in the index relation
|
||||
* 'indextuples' is the number of tuples in the index relation
|
||||
*
|
||||
* Returns a flonum.
|
||||
* 'baserel' is the base relation the index is for
|
||||
* 'index' is the index to be used
|
||||
* 'expected_indexpages' is the estimated number of index pages that will
|
||||
* be touched in the scan (this is computed by index-type-specific code)
|
||||
* 'selec' is the selectivity of the index, ie, the fraction of base-relation
|
||||
* tuples that we will have to fetch and examine
|
||||
* 'is_injoin' is T if we are considering using the index scan as the inside
|
||||
* of a nestloop join.
|
||||
*
|
||||
* NOTE: 'selec' should be calculated on the basis of indexqual conditions
|
||||
* only. Any additional quals evaluated as qpquals may reduce the number
|
||||
* of returned tuples, but they won't reduce the number of tuples we have
|
||||
* to fetch from the table, so they don't reduce the scan cost.
|
||||
*/
|
||||
Cost
|
||||
cost_index(Oid indexid,
|
||||
int expected_indexpages,
|
||||
Cost selec,
|
||||
int relpages,
|
||||
int reltuples,
|
||||
int indexpages,
|
||||
int indextuples,
|
||||
cost_index(RelOptInfo *baserel,
|
||||
IndexOptInfo *index,
|
||||
long expected_indexpages,
|
||||
Selectivity selec,
|
||||
bool is_injoin)
|
||||
{
|
||||
Cost temp = 0;
|
||||
double reltuples = selec * baserel->tuples;
|
||||
double indextuples = selec * index->tuples;
|
||||
double relpages;
|
||||
|
||||
/* Should only be applied to base relations */
|
||||
Assert(IsA(baserel, RelOptInfo) && IsA(index, IndexOptInfo));
|
||||
Assert(length(baserel->relids) == 1);
|
||||
|
||||
if (!_enable_indexscan_ && !is_injoin)
|
||||
temp += _disable_cost_;
|
||||
@@ -151,25 +153,49 @@ cost_index(Oid indexid,
|
||||
*/
|
||||
if (expected_indexpages <= 0)
|
||||
expected_indexpages = 1;
|
||||
if (indextuples <= 0)
|
||||
indextuples = 1;
|
||||
if (indextuples <= 0.0)
|
||||
indextuples = 1.0;
|
||||
|
||||
/* expected index relation pages */
|
||||
temp += expected_indexpages;
|
||||
|
||||
/*
|
||||
* expected base relation pages XXX this isn't really right, since we
|
||||
* will access the table nonsequentially and might have to fetch the
|
||||
* same page more than once. This calculation assumes the buffer
|
||||
* cache will prevent that from happening...
|
||||
/*--------------------
|
||||
* expected base relation pages
|
||||
*
|
||||
* Worst case is that each tuple the index tells us to fetch comes
|
||||
* from a different base-rel page, in which case the I/O cost would be
|
||||
* 'reltuples' pages. In practice we can expect the number of page
|
||||
* fetches to be reduced by the buffer cache, because more than one
|
||||
* tuple can be retrieved per page fetched. Currently, we estimate
|
||||
* the number of pages to be retrieved as
|
||||
* MIN(reltuples, relpages)
|
||||
* This amounts to assuming that the buffer cache is perfectly efficient
|
||||
* and never ends up reading the same page twice within one scan, which
|
||||
* of course is too optimistic. On the other hand, we are assuming that
|
||||
* the target tuples are perfectly uniformly distributed across the
|
||||
* relation's pages, which is too pessimistic --- any nonuniformity of
|
||||
* distribution will reduce the number of pages we have to fetch.
|
||||
* So, we guess-and-hope that these sources of error will more or less
|
||||
* balance out.
|
||||
*
|
||||
* XXX if the relation has recently been "clustered" using this index,
|
||||
* then in fact the target tuples will be highly nonuniformly distributed,
|
||||
* and we will be seriously overestimating the scan cost! Currently we
|
||||
* have no way to know whether the relation has been clustered, nor how
|
||||
* much it's been modified since the last clustering, so we ignore this
|
||||
* effect. Would be nice to do better someday.
|
||||
*--------------------
|
||||
*/
|
||||
temp += ceil(((double) selec) * ((double) relpages));
|
||||
relpages = reltuples;
|
||||
if (baserel->pages > 0 && baserel->pages < relpages)
|
||||
relpages = baserel->pages;
|
||||
temp += relpages;
|
||||
|
||||
/* per index tuples */
|
||||
temp += _cpu_index_page_weight_ * selec * indextuples;
|
||||
temp += _cpu_index_page_weight_ * indextuples;
|
||||
|
||||
/* per heap tuples */
|
||||
temp += _cpu_page_weight_ * selec * reltuples;
|
||||
temp += _cpu_page_weight_ * reltuples;
|
||||
|
||||
Assert(temp >= 0);
|
||||
return temp;
|
||||
@@ -180,13 +206,10 @@ cost_index(Oid indexid,
|
||||
* Determines and returns the cost of scanning a relation using tid-s.
|
||||
*
|
||||
* disk = number of tids
|
||||
* cpu = *CPU-PAGE-WEIGHT* * number_of_tids
|
||||
*
|
||||
* Returns a flonum.
|
||||
*
|
||||
* cpu = CPU-PAGE-WEIGHT * number_of_tids
|
||||
*/
|
||||
Cost
|
||||
cost_tidscan(List *tideval)
|
||||
cost_tidscan(RelOptInfo *baserel, List *tideval)
|
||||
{
|
||||
Cost temp = 0;
|
||||
|
||||
@@ -200,28 +223,39 @@ cost_tidscan(List *tideval)
|
||||
|
||||
/*
|
||||
* cost_sort
|
||||
* Determines and returns the cost of sorting a relation by considering
|
||||
* the cost of doing an external sort: XXX this is probably too low
|
||||
* disk = (p lg p)
|
||||
* cpu = *CPU-PAGE-WEIGHT* * (t lg t)
|
||||
* Determines and returns the cost of sorting a relation.
|
||||
*
|
||||
* If the total volume of data to sort is less than SortMem, we will do
|
||||
* an in-memory sort, which requires no I/O and about t*log2(t) tuple
|
||||
* comparisons for t tuples. We use _cpu_index_page_weight as the cost
|
||||
* of a tuple comparison (is this reasonable, or do we need another
|
||||
* basic parameter?).
|
||||
*
|
||||
* If the total volume exceeds SortMem, we switch to a tape-style merge
|
||||
* algorithm. There will still be about t*log2(t) tuple comparisons in
|
||||
* total, but we will also need to write and read each tuple once per
|
||||
* merge pass. We expect about ceil(log6(r)) merge passes where r is the
|
||||
* number of initial runs formed (log6 because tuplesort.c uses six-tape
|
||||
* merging). Since the average initial run should be about twice SortMem,
|
||||
* we have
|
||||
* disk = 2 * p * ceil(log6(p / (2*SortMem)))
|
||||
* cpu = CPU-INDEX-PAGE-WEIGHT * t * log2(t)
|
||||
*
|
||||
* 'pathkeys' is a list of sort keys
|
||||
* 'tuples' is the number of tuples in the relation
|
||||
* 'width' is the average tuple width in bytes
|
||||
*
|
||||
* NOTE: some callers currently pass NULL for pathkeys because they
|
||||
* NOTE: some callers currently pass NIL for pathkeys because they
|
||||
* can't conveniently supply the sort keys. Since this routine doesn't
|
||||
* currently do anything with pathkeys anyway, that doesn't matter...
|
||||
* but if it ever does, it should react gracefully to lack of key data.
|
||||
*
|
||||
* Returns a flonum.
|
||||
*/
|
||||
Cost
|
||||
cost_sort(List *pathkeys, int tuples, int width)
|
||||
cost_sort(List *pathkeys, double tuples, int width)
|
||||
{
|
||||
Cost temp = 0;
|
||||
int npages = page_size(tuples, width);
|
||||
double log_npages;
|
||||
double nbytes = relation_byte_size(tuples, width);
|
||||
long sortmembytes = SortMem * 1024L;
|
||||
|
||||
if (!_enable_sort_)
|
||||
temp += _disable_cost_;
|
||||
@@ -231,25 +265,23 @@ cost_sort(List *pathkeys, int tuples, int width)
|
||||
* even if passed-in tuple count is zero. Besides, mustn't do
|
||||
* log(0)...
|
||||
*/
|
||||
if (tuples <= 0)
|
||||
tuples = 1;
|
||||
if (npages <= 0)
|
||||
npages = 1;
|
||||
if (tuples < 2.0)
|
||||
tuples = 2.0;
|
||||
|
||||
log_npages = ceil(base_log((double) npages, 2.0));
|
||||
if (log_npages <= 0.0)
|
||||
log_npages = 1.0;
|
||||
temp += _cpu_index_page_weight_ * tuples * base_log(tuples, 2.0);
|
||||
|
||||
temp += npages * log_npages;
|
||||
if (nbytes > sortmembytes)
|
||||
{
|
||||
double npages = ceil(nbytes / BLCKSZ);
|
||||
double nruns = nbytes / (sortmembytes * 2);
|
||||
double log_runs = ceil(base_log(nruns, 6.0));
|
||||
|
||||
/*
|
||||
* could be base_log(tuples, NBuffers), but we are only doing 2-way
|
||||
* merges
|
||||
*/
|
||||
temp += _cpu_page_weight_ * tuples * base_log((double) tuples, 2.0);
|
||||
if (log_runs < 1.0)
|
||||
log_runs = 1.0;
|
||||
temp += 2 * npages * log_runs;
|
||||
}
|
||||
|
||||
Assert(temp > 0);
|
||||
|
||||
return temp;
|
||||
}
|
||||
|
||||
@@ -258,18 +290,15 @@ cost_sort(List *pathkeys, int tuples, int width)
|
||||
* cost_result
|
||||
* Determines and returns the cost of writing a relation of 'tuples'
|
||||
* tuples of 'width' bytes out to a result relation.
|
||||
*
|
||||
* Returns a flonum.
|
||||
*
|
||||
*/
|
||||
#ifdef NOT_USED
|
||||
Cost
|
||||
cost_result(int tuples, int width)
|
||||
cost_result(double tuples, int width)
|
||||
{
|
||||
Cost temp = 0;
|
||||
|
||||
temp = temp + page_size(tuples, width);
|
||||
temp = temp + _cpu_page_weight_ * tuples;
|
||||
temp += page_size(tuples, width);
|
||||
temp += _cpu_page_weight_ * tuples;
|
||||
Assert(temp >= 0);
|
||||
return temp;
|
||||
}
|
||||
@@ -281,111 +310,106 @@ cost_result(int tuples, int width)
|
||||
* Determines and returns the cost of joining two relations using the
|
||||
* nested loop algorithm.
|
||||
*
|
||||
* 'outercost' is the (disk+cpu) cost of scanning the outer relation
|
||||
* 'innercost' is the (disk+cpu) cost of scanning the inner relation
|
||||
* 'outertuples' is the number of tuples in the outer relation
|
||||
*
|
||||
* Returns a flonum.
|
||||
*
|
||||
* 'outer_path' is the path for the outer relation
|
||||
* 'inner_path' is the path for the inner relation
|
||||
* 'is_indexjoin' is true if we are using an indexscan for the inner relation
|
||||
*/
|
||||
Cost
|
||||
cost_nestloop(Cost outercost,
|
||||
Cost innercost,
|
||||
int outertuples,
|
||||
int innertuples,
|
||||
int outerpages,
|
||||
cost_nestloop(Path *outer_path,
|
||||
Path *inner_path,
|
||||
bool is_indexjoin)
|
||||
{
|
||||
Cost temp = 0;
|
||||
|
||||
if (!_enable_nestloop_)
|
||||
temp += _disable_cost_;
|
||||
temp += outercost;
|
||||
temp += outertuples * innercost;
|
||||
Assert(temp >= 0);
|
||||
|
||||
temp += outer_path->path_cost;
|
||||
temp += outer_path->parent->rows * inner_path->path_cost;
|
||||
|
||||
Assert(temp >= 0);
|
||||
return temp;
|
||||
}
|
||||
|
||||
/*
|
||||
* cost_mergejoin
|
||||
* 'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
|
||||
* outer and inner relations
|
||||
* 'outersortkeys' and 'innersortkeys' are lists of the keys to be used
|
||||
* to sort the outer and inner relations (or NIL if no explicit
|
||||
* sort is needed because the source path is already ordered)
|
||||
* 'outertuples' and 'innertuples' are the number of tuples in the outer
|
||||
* and inner relations
|
||||
* 'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
|
||||
* of the tuples of the outer and inner relations
|
||||
*
|
||||
* Returns a flonum.
|
||||
* Determines and returns the cost of joining two relations using the
|
||||
* merge join algorithm.
|
||||
*
|
||||
* 'outer_path' is the path for the outer relation
|
||||
* 'inner_path' is the path for the inner relation
|
||||
* 'outersortkeys' and 'innersortkeys' are lists of the keys to be used
|
||||
* to sort the outer and inner relations, or NIL if no explicit
|
||||
* sort is needed because the source path is already ordered
|
||||
*/
|
||||
Cost
|
||||
cost_mergejoin(Cost outercost,
|
||||
Cost innercost,
|
||||
cost_mergejoin(Path *outer_path,
|
||||
Path *inner_path,
|
||||
List *outersortkeys,
|
||||
List *innersortkeys,
|
||||
int outersize,
|
||||
int innersize,
|
||||
int outerwidth,
|
||||
int innerwidth)
|
||||
List *innersortkeys)
|
||||
{
|
||||
Cost temp = 0;
|
||||
|
||||
if (!_enable_mergejoin_)
|
||||
temp += _disable_cost_;
|
||||
|
||||
temp += outercost;
|
||||
temp += innercost;
|
||||
/* cost of source data */
|
||||
temp += outer_path->path_cost + inner_path->path_cost;
|
||||
|
||||
if (outersortkeys) /* do we need to sort? */
|
||||
temp += cost_sort(outersortkeys, outersize, outerwidth);
|
||||
temp += cost_sort(outersortkeys,
|
||||
outer_path->parent->rows,
|
||||
outer_path->parent->width);
|
||||
|
||||
if (innersortkeys) /* do we need to sort? */
|
||||
temp += cost_sort(innersortkeys, innersize, innerwidth);
|
||||
temp += _cpu_page_weight_ * (outersize + innersize);
|
||||
temp += cost_sort(innersortkeys,
|
||||
inner_path->parent->rows,
|
||||
inner_path->parent->width);
|
||||
|
||||
/*
|
||||
* Estimate the number of tuples to be processed in the mergejoin itself
|
||||
* as one per tuple in the two source relations. This could be a drastic
|
||||
* underestimate if there are many equal-keyed tuples in either relation,
|
||||
* but we have no good way of estimating that...
|
||||
*/
|
||||
temp += _cpu_page_weight_ * (outer_path->parent->rows +
|
||||
inner_path->parent->rows);
|
||||
|
||||
Assert(temp >= 0);
|
||||
|
||||
return temp;
|
||||
}
|
||||
|
||||
/*
|
||||
* cost_hashjoin
|
||||
* Determines and returns the cost of joining two relations using the
|
||||
* hash join algorithm.
|
||||
*
|
||||
* 'outercost' and 'innercost' are the (disk+cpu) costs of scanning the
|
||||
* outer and inner relations
|
||||
* 'outersize' and 'innersize' are the number of tuples in the outer
|
||||
* and inner relations
|
||||
* 'outerwidth' and 'innerwidth' are the (typical) widths (in bytes)
|
||||
* of the tuples of the outer and inner relations
|
||||
* 'innerdisbursion' is an estimate of the disbursion statistic
|
||||
* 'outer_path' is the path for the outer relation
|
||||
* 'inner_path' is the path for the inner relation
|
||||
* 'innerdisbursion' is an estimate of the disbursion statistic
|
||||
* for the inner hash key.
|
||||
*
|
||||
* Returns a flonum.
|
||||
*/
|
||||
Cost
|
||||
cost_hashjoin(Cost outercost,
|
||||
Cost innercost,
|
||||
int outersize,
|
||||
int innersize,
|
||||
int outerwidth,
|
||||
int innerwidth,
|
||||
Cost innerdisbursion)
|
||||
cost_hashjoin(Path *outer_path,
|
||||
Path *inner_path,
|
||||
Selectivity innerdisbursion)
|
||||
{
|
||||
Cost temp = 0;
|
||||
double outerbytes = relation_byte_size(outersize, outerwidth);
|
||||
double innerbytes = relation_byte_size(innersize, innerwidth);
|
||||
double outerbytes = relation_byte_size(outer_path->parent->rows,
|
||||
outer_path->parent->width);
|
||||
double innerbytes = relation_byte_size(inner_path->parent->rows,
|
||||
inner_path->parent->width);
|
||||
long hashtablebytes = SortMem * 1024L;
|
||||
|
||||
if (!_enable_hashjoin_)
|
||||
temp += _disable_cost_;
|
||||
|
||||
/* cost of source data */
|
||||
temp += outercost + innercost;
|
||||
temp += outer_path->path_cost + inner_path->path_cost;
|
||||
|
||||
/* cost of computing hash function: must do it once per tuple */
|
||||
temp += _cpu_page_weight_ * (outersize + innersize);
|
||||
temp += _cpu_page_weight_ * (outer_path->parent->rows +
|
||||
inner_path->parent->rows);
|
||||
|
||||
/* the number of tuple comparisons needed is the number of outer
|
||||
* tuples times the typical hash bucket size, which we estimate
|
||||
@@ -393,8 +417,8 @@ cost_hashjoin(Cost outercost,
|
||||
* count. The cost per comparison is set at _cpu_index_page_weight_;
|
||||
* is that reasonable, or do we need another basic parameter?
|
||||
*/
|
||||
temp += _cpu_index_page_weight_ * outersize *
|
||||
(innersize * innerdisbursion);
|
||||
temp += _cpu_index_page_weight_ * outer_path->parent->rows *
|
||||
(inner_path->parent->rows * innerdisbursion);
|
||||
|
||||
/*
|
||||
* if inner relation is too big then we will need to "batch" the join,
|
||||
@@ -402,8 +426,10 @@ cost_hashjoin(Cost outercost,
|
||||
* extra time. Charge one cost unit per page of I/O.
|
||||
*/
|
||||
if (innerbytes > hashtablebytes)
|
||||
temp += 2 * (page_size(outersize, outerwidth) +
|
||||
page_size(innersize, innerwidth));
|
||||
temp += 2 * (page_size(outer_path->parent->rows,
|
||||
outer_path->parent->width) +
|
||||
page_size(inner_path->parent->rows,
|
||||
inner_path->parent->width));
|
||||
|
||||
/*
|
||||
* Bias against putting larger relation on inside. We don't want
|
||||
@@ -415,76 +441,74 @@ cost_hashjoin(Cost outercost,
|
||||
temp *= 1.1; /* is this an OK fudge factor? */
|
||||
|
||||
Assert(temp >= 0);
|
||||
|
||||
return temp;
|
||||
}
|
||||
|
||||
/*
|
||||
* compute_rel_size
|
||||
* Computes the size of each relation in 'rel_list' (after applying
|
||||
* restrictions), by multiplying the selectivity of each restriction
|
||||
* by the original size of the relation.
|
||||
* set_rel_rows_width
|
||||
* Set the 'rows' and 'width' estimates for the given base relation.
|
||||
*
|
||||
* Sets the 'size' field for each relation entry with this computed size.
|
||||
*
|
||||
* Returns the size.
|
||||
* 'rows' is the estimated number of output tuples (after applying
|
||||
* restriction clauses).
|
||||
* 'width' is the estimated average output tuple width in bytes.
|
||||
*/
|
||||
int
|
||||
compute_rel_size(RelOptInfo *rel)
|
||||
void
|
||||
set_rel_rows_width(Query *root, RelOptInfo *rel)
|
||||
{
|
||||
Cost temp;
|
||||
int temp1;
|
||||
/* Should only be applied to base relations */
|
||||
Assert(length(rel->relids) == 1);
|
||||
|
||||
rel->rows = rel->tuples * restrictlist_selec(root, rel->restrictinfo);
|
||||
Assert(rel->rows >= 0);
|
||||
|
||||
set_rel_width(root, rel);
|
||||
}
|
||||
|
||||
/*
|
||||
* set_joinrel_rows_width
|
||||
* Set the 'rows' and 'width' estimates for the given join relation.
|
||||
*/
|
||||
void
|
||||
set_joinrel_rows_width(Query *root, RelOptInfo *rel,
|
||||
JoinPath *joinpath)
|
||||
{
|
||||
double temp;
|
||||
|
||||
/* cartesian product */
|
||||
temp = joinpath->outerjoinpath->parent->rows *
|
||||
joinpath->innerjoinpath->parent->rows;
|
||||
|
||||
/* apply restrictivity */
|
||||
temp *= restrictlist_selec(root, joinpath->path.parent->restrictinfo);
|
||||
|
||||
temp = rel->tuples * product_selec(rel->restrictinfo);
|
||||
Assert(temp >= 0);
|
||||
if (temp >= (MAXINT - 1))
|
||||
temp1 = MAXINT;
|
||||
else
|
||||
temp1 = ceil((double) temp);
|
||||
Assert(temp1 >= 0);
|
||||
Assert(temp1 <= MAXINT);
|
||||
return temp1;
|
||||
rel->rows = temp;
|
||||
|
||||
set_rel_width(root, rel);
|
||||
}
|
||||
|
||||
/*
|
||||
* compute_rel_width
|
||||
* Computes the width in bytes of a tuple from 'rel'.
|
||||
*
|
||||
* Returns the width of the tuple as a fixnum.
|
||||
* set_rel_width
|
||||
* Set the estimated output width of the relation.
|
||||
*/
|
||||
int
|
||||
compute_rel_width(RelOptInfo *rel)
|
||||
static void
|
||||
set_rel_width(Query *root, RelOptInfo *rel)
|
||||
{
|
||||
return compute_targetlist_width(rel->targetlist);
|
||||
}
|
||||
|
||||
/*
|
||||
* compute_targetlist_width
|
||||
* Computes the width in bytes of a tuple made from 'targetlist'.
|
||||
*
|
||||
* Returns the width of the tuple as a fixnum.
|
||||
*/
|
||||
static int
|
||||
compute_targetlist_width(List *targetlist)
|
||||
{
|
||||
List *temp_tl;
|
||||
int tuple_width = 0;
|
||||
List *tle;
|
||||
|
||||
foreach(temp_tl, targetlist)
|
||||
{
|
||||
tuple_width += compute_attribute_width(lfirst(temp_tl));
|
||||
}
|
||||
return tuple_width;
|
||||
foreach(tle, rel->targetlist)
|
||||
tuple_width += compute_attribute_width((TargetEntry *) lfirst(tle));
|
||||
Assert(tuple_width >= 0);
|
||||
rel->width = tuple_width;
|
||||
}
|
||||
|
||||
/*
|
||||
* compute_attribute_width
|
||||
* Given a target list entry, find the size in bytes of the attribute.
|
||||
*
|
||||
* If a field is variable-length, it is assumed to be at least the size
|
||||
* of a TID field.
|
||||
*
|
||||
* Returns the width of the attribute as a fixnum.
|
||||
* If a field is variable-length, we make a default assumption. Would be
|
||||
* better if VACUUM recorded some stats about the average field width...
|
||||
*/
|
||||
static int
|
||||
compute_attribute_width(TargetEntry *tlistentry)
|
||||
@@ -497,47 +521,15 @@ compute_attribute_width(TargetEntry *tlistentry)
|
||||
return width;
|
||||
}
|
||||
|
||||
/*
|
||||
* compute_joinrel_size
|
||||
* Computes the size of the join relation 'joinrel'.
|
||||
*
|
||||
* Returns a fixnum.
|
||||
*/
|
||||
int
|
||||
compute_joinrel_size(JoinPath *joinpath)
|
||||
{
|
||||
Cost temp = 1.0;
|
||||
int temp1 = 0;
|
||||
|
||||
/* cartesian product */
|
||||
temp *= ((Path *) joinpath->outerjoinpath)->parent->size;
|
||||
temp *= ((Path *) joinpath->innerjoinpath)->parent->size;
|
||||
|
||||
temp = temp * product_selec(joinpath->pathinfo);
|
||||
if (temp >= (MAXINT - 1) / 2)
|
||||
{
|
||||
/* if we exceed (MAXINT-1)/2, we switch to log scale */
|
||||
/* +1 prevents log(0) */
|
||||
temp1 = ceil(log(temp + 1 - (MAXINT - 1) / 2) + (MAXINT - 1) / 2);
|
||||
}
|
||||
else
|
||||
temp1 = ceil((double) temp);
|
||||
Assert(temp1 >= 0);
|
||||
|
||||
return temp1;
|
||||
}
|
||||
|
||||
/*
|
||||
* relation_byte_size
|
||||
* Estimate the storage space in bytes for a given number of tuples
|
||||
* of a given width (size in bytes).
|
||||
* To avoid overflow with big relations, result is a double.
|
||||
*/
|
||||
|
||||
static double
|
||||
relation_byte_size(int tuples, int width)
|
||||
relation_byte_size(double tuples, int width)
|
||||
{
|
||||
return ((double) tuples) * ((double) (width + sizeof(HeapTupleData)));
|
||||
return tuples * ((double) (width + sizeof(HeapTupleData)));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -545,14 +537,10 @@ relation_byte_size(int tuples, int width)
|
||||
* Returns an estimate of the number of pages covered by a given
|
||||
* number of tuples of a given width (size in bytes).
|
||||
*/
|
||||
int
|
||||
page_size(int tuples, int width)
|
||||
static double
|
||||
page_size(double tuples, int width)
|
||||
{
|
||||
int temp;
|
||||
|
||||
temp = (int) ceil(relation_byte_size(tuples, width) / BLCKSZ);
|
||||
Assert(temp >= 0);
|
||||
return temp;
|
||||
return ceil(relation_byte_size(tuples, width) / BLCKSZ);
|
||||
}
|
||||
|
||||
static double
|
||||
|
||||
Reference in New Issue
Block a user