1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-21 00:42:43 +03:00

Standard pgindent run for 8.1.

This commit is contained in:
Bruce Momjian
2005-10-15 02:49:52 +00:00
parent 790c01d280
commit 1dc3498251
770 changed files with 34334 additions and 32507 deletions

View File

@@ -49,7 +49,7 @@
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.148 2005/10/05 17:19:19 tgl Exp $
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.149 2005/10/15 02:49:19 momjian Exp $
*
*-------------------------------------------------------------------------
*/
@@ -121,8 +121,8 @@ clamp_row_est(double nrows)
{
/*
* Force estimate to be at least one row, to make explain output look
* better and to avoid possible divide-by-zero when interpolating
* costs. Make it an integer, too.
* better and to avoid possible divide-by-zero when interpolating costs.
* Make it an integer, too.
*/
if (nrows < 1.0)
nrows = 1.0;
@@ -155,12 +155,11 @@ cost_seqscan(Path *path, PlannerInfo *root,
/*
* disk costs
*
* The cost of reading a page sequentially is 1.0, by definition. Note
* that the Unix kernel will typically do some amount of read-ahead
* optimization, so that this cost is less than the true cost of
* reading a page from disk. We ignore that issue here, but must take
* it into account when estimating the cost of non-sequential
* accesses!
* The cost of reading a page sequentially is 1.0, by definition. Note that
* the Unix kernel will typically do some amount of read-ahead
* optimization, so that this cost is less than the true cost of reading a
* page from disk. We ignore that issue here, but must take it into
* account when estimating the cost of non-sequential accesses!
*/
run_cost += baserel->pages; /* sequential fetches with cost 1.0 */
@@ -276,10 +275,10 @@ cost_index(IndexPath *path, PlannerInfo *root,
startup_cost += disable_cost;
/*
* Call index-access-method-specific code to estimate the processing
* cost for scanning the index, as well as the selectivity of the
* index (ie, the fraction of main-table tuples we will have to
* retrieve) and its correlation to the main-table tuple order.
* Call index-access-method-specific code to estimate the processing cost
* for scanning the index, as well as the selectivity of the index (ie,
* the fraction of main-table tuples we will have to retrieve) and its
* correlation to the main-table tuple order.
*/
OidFunctionCall7(index->amcostestimate,
PointerGetDatum(root),
@@ -292,8 +291,8 @@ cost_index(IndexPath *path, PlannerInfo *root,
/*
* Save amcostestimate's results for possible use in bitmap scan planning.
* We don't bother to save indexStartupCost or indexCorrelation, because
* a bitmap scan doesn't care about either.
* We don't bother to save indexStartupCost or indexCorrelation, because a
* bitmap scan doesn't care about either.
*/
path->indextotalcost = indexTotalCost;
path->indexselectivity = indexSelectivity;
@@ -366,19 +365,18 @@ cost_index(IndexPath *path, PlannerInfo *root,
}
/*
* min_IO_cost corresponds to the perfectly correlated case
* (csquared=1), max_IO_cost to the perfectly uncorrelated case
* (csquared=0). Note that we just charge random_page_cost per page
* in the uncorrelated case, rather than using
* cost_nonsequential_access, since we've already accounted for
* caching effects by using the Mackert model.
* min_IO_cost corresponds to the perfectly correlated case (csquared=1),
* max_IO_cost to the perfectly uncorrelated case (csquared=0). Note that
* we just charge random_page_cost per page in the uncorrelated case,
* rather than using cost_nonsequential_access, since we've already
* accounted for caching effects by using the Mackert model.
*/
min_IO_cost = ceil(indexSelectivity * T);
max_IO_cost = pages_fetched * random_page_cost;
/*
* Now interpolate based on estimated index order correlation to get
* total disk I/O cost for main table accesses.
* Now interpolate based on estimated index order correlation to get total
* disk I/O cost for main table accesses.
*/
csquared = indexCorrelation * indexCorrelation;
@@ -390,9 +388,9 @@ cost_index(IndexPath *path, PlannerInfo *root,
* Normally the indexquals will be removed from the list of restriction
* clauses that we have to evaluate as qpquals, so we should subtract
* their costs from baserestrictcost. But if we are doing a join then
* some of the indexquals are join clauses and shouldn't be
* subtracted. Rather than work out exactly how much to subtract, we
* don't subtract anything.
* some of the indexquals are join clauses and shouldn't be subtracted.
* Rather than work out exactly how much to subtract, we don't subtract
* anything.
*/
startup_cost += baserel->baserestrictcost.startup;
cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
@@ -467,9 +465,9 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
/*
* For small numbers of pages we should charge random_page_cost apiece,
* while if nearly all the table's pages are being read, it's more
* appropriate to charge 1.0 apiece. The effect is nonlinear, too.
* For lack of a better idea, interpolate like this to determine the
* cost per page.
* appropriate to charge 1.0 apiece. The effect is nonlinear, too. For
* lack of a better idea, interpolate like this to determine the cost per
* page.
*/
if (pages_fetched >= 2.0)
cost_per_page = random_page_cost -
@@ -482,10 +480,10 @@ cost_bitmap_heap_scan(Path *path, PlannerInfo *root, RelOptInfo *baserel,
/*
* Estimate CPU costs per tuple.
*
* Often the indexquals don't need to be rechecked at each tuple ...
* but not always, especially not if there are enough tuples involved
* that the bitmaps become lossy. For the moment, just assume they
* will be rechecked always.
* Often the indexquals don't need to be rechecked at each tuple ... but not
* always, especially not if there are enough tuples involved that the
* bitmaps become lossy. For the moment, just assume they will be
* rechecked always.
*/
startup_cost += baserel->baserestrictcost.startup;
cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost.per_tuple;
@@ -527,7 +525,7 @@ cost_bitmap_tree_node(Path *path, Cost *cost, Selectivity *selec)
* Estimate the cost of a BitmapAnd node
*
* Note that this considers only the costs of index scanning and bitmap
* creation, not the eventual heap access. In that sense the object isn't
* creation, not the eventual heap access. In that sense the object isn't
* truly a Path, but it has enough path-like properties (costs in particular)
* to warrant treating it as one.
*/
@@ -535,24 +533,24 @@ void
cost_bitmap_and_node(BitmapAndPath *path, PlannerInfo *root)
{
Cost totalCost;
Selectivity selec;
Selectivity selec;
ListCell *l;
/*
* We estimate AND selectivity on the assumption that the inputs
* are independent. This is probably often wrong, but we don't
* have the info to do better.
* We estimate AND selectivity on the assumption that the inputs are
* independent. This is probably often wrong, but we don't have the info
* to do better.
*
* The runtime cost of the BitmapAnd itself is estimated at 100x
* cpu_operator_cost for each tbm_intersect needed. Probably too
* small, definitely too simplistic?
* cpu_operator_cost for each tbm_intersect needed. Probably too small,
* definitely too simplistic?
*/
totalCost = 0.0;
selec = 1.0;
foreach(l, path->bitmapquals)
{
Path *subpath = (Path *) lfirst(l);
Cost subCost;
Path *subpath = (Path *) lfirst(l);
Cost subCost;
Selectivity subselec;
cost_bitmap_tree_node(subpath, &subCost, &subselec);
@@ -578,25 +576,25 @@ void
cost_bitmap_or_node(BitmapOrPath *path, PlannerInfo *root)
{
Cost totalCost;
Selectivity selec;
Selectivity selec;
ListCell *l;
/*
* We estimate OR selectivity on the assumption that the inputs
* are non-overlapping, since that's often the case in "x IN (list)"
* type situations. Of course, we clamp to 1.0 at the end.
* We estimate OR selectivity on the assumption that the inputs are
* non-overlapping, since that's often the case in "x IN (list)" type
* situations. Of course, we clamp to 1.0 at the end.
*
* The runtime cost of the BitmapOr itself is estimated at 100x
* cpu_operator_cost for each tbm_union needed. Probably too
* small, definitely too simplistic? We are aware that the tbm_unions
* are optimized out when the inputs are BitmapIndexScans.
* cpu_operator_cost for each tbm_union needed. Probably too small,
* definitely too simplistic? We are aware that the tbm_unions are
* optimized out when the inputs are BitmapIndexScans.
*/
totalCost = 0.0;
selec = 0.0;
foreach(l, path->bitmapquals)
{
Path *subpath = (Path *) lfirst(l);
Cost subCost;
Path *subpath = (Path *) lfirst(l);
Cost subCost;
Selectivity subselec;
cost_bitmap_tree_node(subpath, &subCost, &subselec);
@@ -661,10 +659,9 @@ cost_subqueryscan(Path *path, RelOptInfo *baserel)
Assert(baserel->rtekind == RTE_SUBQUERY);
/*
* Cost of path is cost of evaluating the subplan, plus cost of
* evaluating any restriction clauses that will be attached to the
* SubqueryScan node, plus cpu_tuple_cost to account for selection and
* projection overhead.
* Cost of path is cost of evaluating the subplan, plus cost of evaluating
* any restriction clauses that will be attached to the SubqueryScan node,
* plus cpu_tuple_cost to account for selection and projection overhead.
*/
path->startup_cost = baserel->subplan->startup_cost;
path->total_cost = baserel->subplan->total_cost;
@@ -694,8 +691,8 @@ cost_functionscan(Path *path, PlannerInfo *root, RelOptInfo *baserel)
/*
* For now, estimate function's cost at one operator eval per function
* call. Someday we should revive the function cost estimate columns
* in pg_proc...
* call. Someday we should revive the function cost estimate columns in
* pg_proc...
*/
cpu_per_tuple = cpu_operator_cost;
@@ -758,9 +755,8 @@ cost_sort(Path *path, PlannerInfo *root,
startup_cost += disable_cost;
/*
* We want to be sure the cost of a sort is never estimated as zero,
* even if passed-in tuple count is zero. Besides, mustn't do
* log(0)...
* We want to be sure the cost of a sort is never estimated as zero, even
* if passed-in tuple count is zero. Besides, mustn't do log(0)...
*/
if (tuples < 2.0)
tuples = 2.0;
@@ -790,8 +786,8 @@ cost_sort(Path *path, PlannerInfo *root,
}
/*
* Also charge a small amount (arbitrarily set equal to operator cost)
* per extracted tuple.
* Also charge a small amount (arbitrarily set equal to operator cost) per
* extracted tuple.
*/
run_cost += cpu_operator_cost * tuples;
@@ -828,17 +824,16 @@ cost_material(Path *path,
/*
* Charge a very small amount per inserted tuple, to reflect bookkeeping
* costs. We use cpu_tuple_cost/10 for this. This is needed to break
* the tie that would otherwise exist between nestloop with A outer,
* costs. We use cpu_tuple_cost/10 for this. This is needed to break the
* tie that would otherwise exist between nestloop with A outer,
* materialized B inner and nestloop with B outer, materialized A inner.
* The extra cost ensures we'll prefer materializing the smaller rel.
*/
startup_cost += cpu_tuple_cost * 0.1 * tuples;
/*
* Also charge a small amount per extracted tuple. We use
* cpu_tuple_cost so that it doesn't appear worthwhile to materialize
* a bare seqscan.
* Also charge a small amount per extracted tuple. We use cpu_tuple_cost
* so that it doesn't appear worthwhile to materialize a bare seqscan.
*/
run_cost += cpu_tuple_cost * tuples;
@@ -865,23 +860,22 @@ cost_agg(Path *path, PlannerInfo *root,
Cost total_cost;
/*
* We charge one cpu_operator_cost per aggregate function per input
* tuple, and another one per output tuple (corresponding to transfn
* and finalfn calls respectively). If we are grouping, we charge an
* additional cpu_operator_cost per grouping column per input tuple
* for grouping comparisons.
* We charge one cpu_operator_cost per aggregate function per input tuple,
* and another one per output tuple (corresponding to transfn and finalfn
* calls respectively). If we are grouping, we charge an additional
* cpu_operator_cost per grouping column per input tuple for grouping
* comparisons.
*
* We will produce a single output tuple if not grouping, and a tuple per
* group otherwise. We charge cpu_tuple_cost for each output tuple.
*
* Note: in this cost model, AGG_SORTED and AGG_HASHED have exactly the
* same total CPU cost, but AGG_SORTED has lower startup cost. If the
* input path is already sorted appropriately, AGG_SORTED should be
* preferred (since it has no risk of memory overflow). This will
* happen as long as the computed total costs are indeed exactly equal
* --- but if there's roundoff error we might do the wrong thing. So
* be sure that the computations below form the same intermediate
* values in the same order.
* Note: in this cost model, AGG_SORTED and AGG_HASHED have exactly the same
* total CPU cost, but AGG_SORTED has lower startup cost. If the input
* path is already sorted appropriately, AGG_SORTED should be preferred
* (since it has no risk of memory overflow). This will happen as long as
* the computed total costs are indeed exactly equal --- but if there's
* roundoff error we might do the wrong thing. So be sure that the
* computations below form the same intermediate values in the same order.
*/
if (aggstrategy == AGG_PLAIN)
{
@@ -937,8 +931,8 @@ cost_group(Path *path, PlannerInfo *root,
total_cost = input_total_cost;
/*
* Charge one cpu_operator_cost per comparison per input tuple. We
* assume all columns get compared at most of the tuples.
* Charge one cpu_operator_cost per comparison per input tuple. We assume
* all columns get compared at most of the tuples.
*/
total_cost += cpu_operator_cost * input_tuples * numGroupCols;
@@ -968,10 +962,10 @@ cost_nestloop(NestPath *path, PlannerInfo *root)
Selectivity joininfactor;
/*
* If inner path is an indexscan, be sure to use its estimated output
* row count, which may be lower than the restriction-clause-only row
* count of its parent. (We don't include this case in the PATH_ROWS
* macro because it applies *only* to a nestloop's inner relation.)
* If inner path is an indexscan, be sure to use its estimated output row
* count, which may be lower than the restriction-clause-only row count of
* its parent. (We don't include this case in the PATH_ROWS macro because
* it applies *only* to a nestloop's inner relation.)
*/
if (IsA(inner_path, IndexPath))
inner_path_rows = ((IndexPath *) inner_path)->rows;
@@ -982,11 +976,11 @@ cost_nestloop(NestPath *path, PlannerInfo *root)
startup_cost += disable_cost;
/*
* If we're doing JOIN_IN then we will stop scanning inner tuples for
* an outer tuple as soon as we have one match. Account for the
* effects of this by scaling down the cost estimates in proportion to
* the JOIN_IN selectivity. (This assumes that all the quals attached
* to the join are IN quals, which should be true.)
* If we're doing JOIN_IN then we will stop scanning inner tuples for an
* outer tuple as soon as we have one match. Account for the effects of
* this by scaling down the cost estimates in proportion to the JOIN_IN
* selectivity. (This assumes that all the quals attached to the join are
* IN quals, which should be true.)
*/
joininfactor = join_in_selectivity(path, root);
@@ -996,9 +990,9 @@ cost_nestloop(NestPath *path, PlannerInfo *root)
* NOTE: clearly, we must pay both outer and inner paths' startup_cost
* before we can start returning tuples, so the join's startup cost is
* their sum. What's not so clear is whether the inner path's
* startup_cost must be paid again on each rescan of the inner path.
* This is not true if the inner path is materialized or is a
* hashjoin, but probably is true otherwise.
* startup_cost must be paid again on each rescan of the inner path. This
* is not true if the inner path is materialized or is a hashjoin, but
* probably is true otherwise.
*/
startup_cost += outer_path->startup_cost + inner_path->startup_cost;
run_cost += outer_path->total_cost - outer_path->startup_cost;
@@ -1077,12 +1071,11 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
/*
* Compute cost and selectivity of the mergequals and qpquals (other
* restriction clauses) separately. We use approx_selectivity here
* for speed --- in most cases, any errors won't affect the result
* much.
* restriction clauses) separately. We use approx_selectivity here for
* speed --- in most cases, any errors won't affect the result much.
*
* Note: it's probably bogus to use the normal selectivity calculation
* here when either the outer or inner path is a UniquePath.
* Note: it's probably bogus to use the normal selectivity calculation here
* when either the outer or inner path is a UniquePath.
*/
merge_selec = approx_selectivity(root, mergeclauses,
path->jpath.jointype);
@@ -1095,31 +1088,30 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
mergejointuples = clamp_row_est(merge_selec * outer_path_rows * inner_path_rows);
/*
* When there are equal merge keys in the outer relation, the
* mergejoin must rescan any matching tuples in the inner relation.
* This means re-fetching inner tuples. Our cost model for this is
* that a re-fetch costs the same as an original fetch, which is
* probably an overestimate; but on the other hand we ignore the
* bookkeeping costs of mark/restore. Not clear if it's worth
* developing a more refined model.
* When there are equal merge keys in the outer relation, the mergejoin
* must rescan any matching tuples in the inner relation. This means
* re-fetching inner tuples. Our cost model for this is that a re-fetch
* costs the same as an original fetch, which is probably an overestimate;
* but on the other hand we ignore the bookkeeping costs of mark/restore.
* Not clear if it's worth developing a more refined model.
*
* The number of re-fetches can be estimated approximately as size of
* merge join output minus size of inner relation. Assume that the
* distinct key values are 1, 2, ..., and denote the number of values
* of each key in the outer relation as m1, m2, ...; in the inner
* relation, n1, n2, ... Then we have
* The number of re-fetches can be estimated approximately as size of merge
* join output minus size of inner relation. Assume that the distinct key
* values are 1, 2, ..., and denote the number of values of each key in
* the outer relation as m1, m2, ...; in the inner relation, n1, n2, ...
* Then we have
*
* size of join = m1 * n1 + m2 * n2 + ...
*
* number of rescanned tuples = (m1 - 1) * n1 + (m2 - 1) * n2 + ... = m1 *
* n1 + m2 * n2 + ... - (n1 + n2 + ...) = size of join - size of inner
* number of rescanned tuples = (m1 - 1) * n1 + (m2 - 1) * n2 + ... = m1 * n1
* + m2 * n2 + ... - (n1 + n2 + ...) = size of join - size of inner
* relation
*
* This equation works correctly for outer tuples having no inner match
* (nk = 0), but not for inner tuples having no outer match (mk = 0);
* we are effectively subtracting those from the number of rescanned
* tuples, when we should not. Can we do better without expensive
* selectivity computations?
* This equation works correctly for outer tuples having no inner match (nk =
* 0), but not for inner tuples having no outer match (mk = 0); we are
* effectively subtracting those from the number of rescanned tuples, when
* we should not. Can we do better without expensive selectivity
* computations?
*/
if (IsA(outer_path, UniquePath))
rescannedtuples = 0;
@@ -1140,9 +1132,9 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
* inputs that will actually need to be scanned. We use only the first
* (most significant) merge clause for this purpose.
*
* Since this calculation is somewhat expensive, and will be the same for
* all mergejoin paths associated with the merge clause, we cache the
* results in the RestrictInfo node.
* Since this calculation is somewhat expensive, and will be the same for all
* mergejoin paths associated with the merge clause, we cache the results
* in the RestrictInfo node.
*/
if (mergeclauses && path->jpath.jointype != JOIN_FULL)
{
@@ -1181,9 +1173,8 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
/*
* Readjust scan selectivities to account for above rounding. This is
* normally an insignificant effect, but when there are only a few
* rows in the inputs, failing to do this makes for a large percentage
* error.
* normally an insignificant effect, but when there are only a few rows in
* the inputs, failing to do this makes for a large percentage error.
*/
outerscansel = outer_rows / outer_path_rows;
innerscansel = inner_rows / inner_path_rows;
@@ -1231,20 +1222,20 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
/* CPU costs */
/*
* If we're doing JOIN_IN then we will stop outputting inner tuples
* for an outer tuple as soon as we have one match. Account for the
* effects of this by scaling down the cost estimates in proportion to
* the expected output size. (This assumes that all the quals
* attached to the join are IN quals, which should be true.)
* If we're doing JOIN_IN then we will stop outputting inner tuples for an
* outer tuple as soon as we have one match. Account for the effects of
* this by scaling down the cost estimates in proportion to the expected
* output size. (This assumes that all the quals attached to the join are
* IN quals, which should be true.)
*/
joininfactor = join_in_selectivity(&path->jpath, root);
/*
* The number of tuple comparisons needed is approximately number of
* outer rows plus number of inner rows plus number of rescanned
* tuples (can we refine this?). At each one, we need to evaluate the
* mergejoin quals. NOTE: JOIN_IN mode does not save any work here,
* so do NOT include joininfactor.
* The number of tuple comparisons needed is approximately number of outer
* rows plus number of inner rows plus number of rescanned tuples (can we
* refine this?). At each one, we need to evaluate the mergejoin quals.
* NOTE: JOIN_IN mode does not save any work here, so do NOT include
* joininfactor.
*/
startup_cost += merge_qual_cost.startup;
run_cost += merge_qual_cost.per_tuple *
@@ -1253,9 +1244,9 @@ cost_mergejoin(MergePath *path, PlannerInfo *root)
/*
* For each tuple that gets through the mergejoin proper, we charge
* cpu_tuple_cost plus the cost of evaluating additional restriction
* clauses that are to be applied at the join. (This is pessimistic
* since not all of the quals may get evaluated at each tuple.) This
* work is skipped in JOIN_IN mode, so apply the factor.
* clauses that are to be applied at the join. (This is pessimistic since
* not all of the quals may get evaluated at each tuple.) This work is
* skipped in JOIN_IN mode, so apply the factor.
*/
startup_cost += qp_qual_cost.startup;
cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
@@ -1290,9 +1281,9 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
double outer_path_rows = PATH_ROWS(outer_path);
double inner_path_rows = PATH_ROWS(inner_path);
double outerbytes = relation_byte_size(outer_path_rows,
outer_path->parent->width);
outer_path->parent->width);
double innerbytes = relation_byte_size(inner_path_rows,
inner_path->parent->width);
inner_path->parent->width);
int num_hashclauses = list_length(hashclauses);
int numbuckets;
int numbatches;
@@ -1306,12 +1297,11 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
/*
* Compute cost and selectivity of the hashquals and qpquals (other
* restriction clauses) separately. We use approx_selectivity here
* for speed --- in most cases, any errors won't affect the result
* much.
* restriction clauses) separately. We use approx_selectivity here for
* speed --- in most cases, any errors won't affect the result much.
*
* Note: it's probably bogus to use the normal selectivity calculation
* here when either the outer or inner path is a UniquePath.
* Note: it's probably bogus to use the normal selectivity calculation here
* when either the outer or inner path is a UniquePath.
*/
hash_selec = approx_selectivity(root, hashclauses,
path->jpath.jointype);
@@ -1329,13 +1319,12 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
startup_cost += inner_path->total_cost;
/*
* Cost of computing hash function: must do it once per input tuple.
* We charge one cpu_operator_cost for each column's hash function.
* Cost of computing hash function: must do it once per input tuple. We
* charge one cpu_operator_cost for each column's hash function.
*
* XXX when a hashclause is more complex than a single operator, we
* really should charge the extra eval costs of the left or right
* side, as appropriate, here. This seems more work than it's worth
* at the moment.
* XXX when a hashclause is more complex than a single operator, we really
* should charge the extra eval costs of the left or right side, as
* appropriate, here. This seems more work than it's worth at the moment.
*/
startup_cost += cpu_operator_cost * num_hashclauses * inner_path_rows;
run_cost += cpu_operator_cost * num_hashclauses * outer_path_rows;
@@ -1345,17 +1334,17 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
inner_path->parent->width,
&numbuckets,
&numbatches);
virtualbuckets = (double) numbuckets * (double) numbatches;
virtualbuckets = (double) numbuckets *(double) numbatches;
/*
* Determine bucketsize fraction for inner relation. We use the
* smallest bucketsize estimated for any individual hashclause; this
* is undoubtedly conservative.
* Determine bucketsize fraction for inner relation. We use the smallest
* bucketsize estimated for any individual hashclause; this is undoubtedly
* conservative.
*
* BUT: if inner relation has been unique-ified, we can assume it's good
* for hashing. This is important both because it's the right answer,
* and because we avoid contaminating the cache with a value that's
* wrong for non-unique-ified paths.
* BUT: if inner relation has been unique-ified, we can assume it's good for
* hashing. This is important both because it's the right answer, and
* because we avoid contaminating the cache with a value that's wrong for
* non-unique-ified paths.
*/
if (IsA(inner_path, UniquePath))
innerbucketsize = 1.0 / virtualbuckets;
@@ -1370,13 +1359,12 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
Assert(IsA(restrictinfo, RestrictInfo));
/*
* First we have to figure out which side of the hashjoin
* clause is the inner side.
* First we have to figure out which side of the hashjoin clause
* is the inner side.
*
* Since we tend to visit the same clauses over and over when
* planning a large query, we cache the bucketsize estimate in
* the RestrictInfo node to avoid repeated lookups of
* statistics.
* planning a large query, we cache the bucketsize estimate in the
* RestrictInfo node to avoid repeated lookups of statistics.
*/
if (bms_is_subset(restrictinfo->right_relids,
inner_path->parent->relids))
@@ -1388,7 +1376,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
/* not cached yet */
thisbucketsize =
estimate_hash_bucketsize(root,
get_rightop(restrictinfo->clause),
get_rightop(restrictinfo->clause),
virtualbuckets);
restrictinfo->right_bucketsize = thisbucketsize;
}
@@ -1404,7 +1392,7 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
/* not cached yet */
thisbucketsize =
estimate_hash_bucketsize(root,
get_leftop(restrictinfo->clause),
get_leftop(restrictinfo->clause),
virtualbuckets);
restrictinfo->left_bucketsize = thisbucketsize;
}
@@ -1417,10 +1405,10 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
/*
* If inner relation is too big then we will need to "batch" the join,
* which implies writing and reading most of the tuples to disk an
* extra time. Charge one cost unit per page of I/O (correct since it
* should be nice and sequential...). Writing the inner rel counts as
* startup cost, all the rest as run cost.
* which implies writing and reading most of the tuples to disk an extra
* time. Charge one cost unit per page of I/O (correct since it should be
* nice and sequential...). Writing the inner rel counts as startup cost,
* all the rest as run cost.
*/
if (numbatches > 1)
{
@@ -1436,21 +1424,21 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
/* CPU costs */
/*
* If we're doing JOIN_IN then we will stop comparing inner tuples to
* an outer tuple as soon as we have one match. Account for the
* effects of this by scaling down the cost estimates in proportion to
* the expected output size. (This assumes that all the quals
* attached to the join are IN quals, which should be true.)
* If we're doing JOIN_IN then we will stop comparing inner tuples to an
* outer tuple as soon as we have one match. Account for the effects of
* this by scaling down the cost estimates in proportion to the expected
* output size. (This assumes that all the quals attached to the join are
* IN quals, which should be true.)
*/
joininfactor = join_in_selectivity(&path->jpath, root);
/*
* The number of tuple comparisons needed is the number of outer
* tuples times the typical number of tuples in a hash bucket, which
* is the inner relation size times its bucketsize fraction. At each
* one, we need to evaluate the hashjoin quals. (Note: charging the
* full qual eval cost at each tuple is pessimistic, since we don't
* evaluate the quals unless the hash values match exactly.)
* The number of tuple comparisons needed is the number of outer tuples
* times the typical number of tuples in a hash bucket, which is the inner
* relation size times its bucketsize fraction. At each one, we need to
* evaluate the hashjoin quals. (Note: charging the full qual eval cost
* at each tuple is pessimistic, since we don't evaluate the quals unless
* the hash values match exactly.)
*/
startup_cost += hash_qual_cost.startup;
run_cost += hash_qual_cost.per_tuple *
@@ -1460,8 +1448,8 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
/*
* For each tuple that gets through the hashjoin proper, we charge
* cpu_tuple_cost plus the cost of evaluating additional restriction
* clauses that are to be applied at the join. (This is pessimistic
* since not all of the quals may get evaluated at each tuple.)
* clauses that are to be applied at the join. (This is pessimistic since
* not all of the quals may get evaluated at each tuple.)
*/
startup_cost += qp_qual_cost.startup;
cpu_per_tuple = cpu_tuple_cost + qp_qual_cost.per_tuple;
@@ -1469,16 +1457,16 @@ cost_hashjoin(HashPath *path, PlannerInfo *root)
/*
* Bias against putting larger relation on inside. We don't want an
* absolute prohibition, though, since larger relation might have
* better bucketsize --- and we can't trust the size estimates
* unreservedly, anyway. Instead, inflate the run cost by the square
* root of the size ratio. (Why square root? No real good reason,
* but it seems reasonable...)
* absolute prohibition, though, since larger relation might have better
* bucketsize --- and we can't trust the size estimates unreservedly,
* anyway. Instead, inflate the run cost by the square root of the size
* ratio. (Why square root? No real good reason, but it seems
* reasonable...)
*
* Note: before 7.4 we implemented this by inflating startup cost; but if
* there's a disable_cost component in the input paths' startup cost,
* that unfairly penalizes the hash. Probably it'd be better to keep
* track of disable penalty separately from cost.
* there's a disable_cost component in the input paths' startup cost, that
* unfairly penalizes the hash. Probably it'd be better to keep track of
* disable penalty separately from cost.
*/
if (innerbytes > outerbytes && outerbytes > 0)
run_cost *= sqrt(innerbytes / outerbytes);
@@ -1545,13 +1533,13 @@ cost_qual_eval_walker(Node *node, QualCost *total)
return false;
/*
* Our basic strategy is to charge one cpu_operator_cost for each
* operator or function node in the given tree. Vars and Consts are
* charged zero, and so are boolean operators (AND, OR, NOT).
* Simplistic, but a lot better than no model at all.
* Our basic strategy is to charge one cpu_operator_cost for each operator
* or function node in the given tree. Vars and Consts are charged zero,
* and so are boolean operators (AND, OR, NOT). Simplistic, but a lot
* better than no model at all.
*
* Should we try to account for the possibility of short-circuit
* evaluation of AND/OR?
* Should we try to account for the possibility of short-circuit evaluation
* of AND/OR?
*/
if (IsA(node, FuncExpr) ||
IsA(node, OpExpr) ||
@@ -1572,12 +1560,12 @@ cost_qual_eval_walker(Node *node, QualCost *total)
{
/*
* A subplan node in an expression typically indicates that the
* subplan will be executed on each evaluation, so charge
* accordingly. (Sub-selects that can be executed as InitPlans
* have already been removed from the expression.)
* subplan will be executed on each evaluation, so charge accordingly.
* (Sub-selects that can be executed as InitPlans have already been
* removed from the expression.)
*
* An exception occurs when we have decided we can implement the
* subplan by hashing.
* An exception occurs when we have decided we can implement the subplan
* by hashing.
*
*/
SubPlan *subplan = (SubPlan *) node;
@@ -1586,32 +1574,31 @@ cost_qual_eval_walker(Node *node, QualCost *total)
if (subplan->useHashTable)
{
/*
* If we are using a hash table for the subquery outputs, then
* the cost of evaluating the query is a one-time cost. We
* charge one cpu_operator_cost per tuple for the work of
* loading the hashtable, too.
* If we are using a hash table for the subquery outputs, then the
* cost of evaluating the query is a one-time cost. We charge one
* cpu_operator_cost per tuple for the work of loading the
* hashtable, too.
*/
total->startup += plan->total_cost +
cpu_operator_cost * plan->plan_rows;
/*
* The per-tuple costs include the cost of evaluating the
* lefthand expressions, plus the cost of probing the
* hashtable. Recursion into the exprs list will handle the
* lefthand expressions properly, and will count one
* cpu_operator_cost for each comparison operator. That is
* probably too low for the probing cost, but it's hard to
* make a better estimate, so live with it for now.
* The per-tuple costs include the cost of evaluating the lefthand
* expressions, plus the cost of probing the hashtable. Recursion
* into the exprs list will handle the lefthand expressions
* properly, and will count one cpu_operator_cost for each
* comparison operator. That is probably too low for the probing
* cost, but it's hard to make a better estimate, so live with it
* for now.
*/
}
else
{
/*
* Otherwise we will be rescanning the subplan output on each
* evaluation. We need to estimate how much of the output we
* will actually need to scan. NOTE: this logic should agree
* with the estimates used by make_subplan() in
* plan/subselect.c.
* evaluation. We need to estimate how much of the output we will
* actually need to scan. NOTE: this logic should agree with the
* estimates used by make_subplan() in plan/subselect.c.
*/
Cost plan_run_cost = plan->total_cost - plan->startup_cost;
@@ -1636,10 +1623,10 @@ cost_qual_eval_walker(Node *node, QualCost *total)
/*
* Also account for subplan's startup cost. If the subplan is
* uncorrelated or undirect correlated, AND its topmost node
* is a Sort or Material node, assume that we'll only need to
* pay its startup cost once; otherwise assume we pay the
* startup cost every time.
* uncorrelated or undirect correlated, AND its topmost node is a
* Sort or Material node, assume that we'll only need to pay its
* startup cost once; otherwise assume we pay the startup cost
* every time.
*/
if (subplan->parParam == NIL &&
(IsA(plan, Sort) ||
@@ -1761,9 +1748,9 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
/*
* Compute joinclause selectivity. Note that we are only considering
* clauses that become restriction clauses at this join level; we are
* not double-counting them because they were not considered in
* estimating the sizes of the component rels.
* clauses that become restriction clauses at this join level; we are not
* double-counting them because they were not considered in estimating the
* sizes of the component rels.
*/
selec = clauselist_selectivity(root,
restrictlist,
@@ -1773,13 +1760,13 @@ set_joinrel_size_estimates(PlannerInfo *root, RelOptInfo *rel,
/*
* Basically, we multiply size of Cartesian product by selectivity.
*
* If we are doing an outer join, take that into account: the output must
* be at least as large as the non-nullable input. (Is there any
* chance of being even smarter?)
* If we are doing an outer join, take that into account: the output must be
* at least as large as the non-nullable input. (Is there any chance of
* being even smarter?)
*
* For JOIN_IN and variants, the Cartesian product is figured with
* respect to a unique-ified input, and then we can clamp to the size
* of the other input.
* For JOIN_IN and variants, the Cartesian product is figured with respect to
* a unique-ified input, and then we can clamp to the size of the other
* input.
*/
switch (jointype)
{
@@ -1848,12 +1835,11 @@ join_in_selectivity(JoinPath *path, PlannerInfo *root)
return 1.0;
/*
* Return 1.0 if the inner side is already known unique. The case
* where the inner path is already a UniquePath probably cannot happen
* in current usage, but check it anyway for completeness. The
* interesting case is where we've determined the inner relation
* itself is unique, which we can check by looking at the rows
* estimate for its UniquePath.
* Return 1.0 if the inner side is already known unique. The case where
* the inner path is already a UniquePath probably cannot happen in
* current usage, but check it anyway for completeness. The interesting
* case is where we've determined the inner relation itself is unique,
* which we can check by looking at the rows estimate for its UniquePath.
*/
if (IsA(path->innerjoinpath, UniquePath))
return 1.0;
@@ -1866,10 +1852,9 @@ join_in_selectivity(JoinPath *path, PlannerInfo *root)
/*
* Compute same result set_joinrel_size_estimates would compute for
* JOIN_INNER. Note that we use the input rels' absolute size
* estimates, not PATH_ROWS() which might be less; if we used
* PATH_ROWS() we'd be double-counting the effects of any join clauses
* used in input scans.
* JOIN_INNER. Note that we use the input rels' absolute size estimates,
* not PATH_ROWS() which might be less; if we used PATH_ROWS() we'd be
* double-counting the effects of any join clauses used in input scans.
*/
selec = clauselist_selectivity(root,
path->joinrestrictinfo,
@@ -1908,8 +1893,8 @@ set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel)
/*
* Estimate number of rows the function itself will return.
*
* XXX no idea how to do this yet; but we can at least check whether
* function returns set or not...
* XXX no idea how to do this yet; but we can at least check whether function
* returns set or not...
*/
if (expression_returns_set(rte->funcexpr))
rel->tuples = 1000;
@@ -1957,8 +1942,7 @@ set_rel_width(PlannerInfo *root, RelOptInfo *rel)
ndx = var->varattno - rel->min_attr;
/*
* The width probably hasn't been cached yet, but may as well
* check
* The width probably hasn't been cached yet, but may as well check
*/
if (rel->attr_widths[ndx] > 0)
{