1
0
mirror of https://github.com/postgres/postgres.git synced 2025-07-14 08:21:07 +03:00

First cut at making indexscan cost estimates depend on correlation

between index order and table order.
This commit is contained in:
Tom Lane
2001-05-09 23:13:37 +00:00
parent e02033572d
commit c23bc6fbb0
8 changed files with 317 additions and 88 deletions

View File

@ -31,17 +31,18 @@
* result by interpolating between startup_cost and total_cost. In detail:
* actual_cost = startup_cost +
* (total_cost - startup_cost) * tuples_to_fetch / path->parent->rows;
* Note that a relation's rows count (and, by extension, a Plan's plan_rows)
* are set without regard to any LIMIT, so that this equation works properly.
* (Also, these routines guarantee not to set the rows count to zero, so there
* will be no zero divide.) The LIMIT is applied as a separate Plan node.
* Note that a base relation's rows count (and, by extension, plan_rows for
* plan nodes below the LIMIT node) are set without regard to any LIMIT, so
* that this equation works properly. (Also, these routines guarantee not to
* set the rows count to zero, so there will be no zero divide.) The LIMIT is
* applied as a top-level plan node.
*
*
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
* Portions Copyright (c) 1994, Regents of the University of California
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.72 2001/05/09 00:35:09 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.73 2001/05/09 23:13:34 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -205,12 +206,18 @@ cost_index(Path *path, Query *root,
{
Cost startup_cost = 0;
Cost run_cost = 0;
Cost cpu_per_tuple;
Cost indexStartupCost;
Cost indexTotalCost;
Selectivity indexSelectivity;
double indexCorrelation,
csquared;
Cost min_IO_cost,
max_IO_cost;
Cost cpu_per_tuple;
double tuples_fetched;
double pages_fetched;
double T,
b;
/* Should only be applied to base relations */
Assert(IsA(baserel, RelOptInfo) &&IsA(index, IndexOptInfo));
@ -224,38 +231,52 @@ cost_index(Path *path, Query *root,
* Call index-access-method-specific code to estimate the processing
* cost for scanning the index, as well as the selectivity of the
* index (ie, the fraction of main-table tuples we will have to
* retrieve).
* retrieve) and its correlation to the main-table tuple order.
*/
OidFunctionCall7(index->amcostestimate,
OidFunctionCall8(index->amcostestimate,
PointerGetDatum(root),
PointerGetDatum(baserel),
PointerGetDatum(index),
PointerGetDatum(indexQuals),
PointerGetDatum(&indexStartupCost),
PointerGetDatum(&indexTotalCost),
PointerGetDatum(&indexSelectivity));
PointerGetDatum(&indexSelectivity),
PointerGetDatum(&indexCorrelation));
/* all costs for touching index itself included here */
startup_cost += indexStartupCost;
run_cost += indexTotalCost - indexStartupCost;
/*
/*----------
* Estimate number of main-table tuples and pages fetched.
*
* If the number of tuples is much smaller than the number of pages in
* the relation, each tuple will cost a separate nonsequential fetch.
* If it is comparable or larger, then probably we will be able to
* avoid some fetches. We use a growth rate of log(#tuples/#pages +
* 1) --- probably totally bogus, but intuitively it gives the right
* shape of curve at least.
* When the index ordering is uncorrelated with the table ordering,
* we use an approximation proposed by Mackert and Lohman, "Index Scans
* Using a Finite LRU Buffer: A Validated I/O Model", ACM Transactions
* on Database Systems, Vol. 14, No. 3, September 1989, Pages 401-424.
* The Mackert and Lohman approximation is that the number of pages
* fetched is
* PF =
* min(2TNs/(2T+Ns), T) when T <= b
* 2TNs/(2T+Ns) when T > b and Ns <= 2Tb/(2T-b)
* b + (Ns - 2Tb/(2T-b))*(T-b)/T when T > b and Ns > 2Tb/(2T-b)
* where
* T = # pages in table
* N = # tuples in table
* s = selectivity = fraction of table to be scanned
* b = # buffer pages available (we include kernel space here)
*
* XXX if the relation has recently been "clustered" using this index,
* then in fact the target tuples will be highly nonuniformly
* distributed, and we will be seriously overestimating the scan cost!
* Currently we have no way to know whether the relation has been
* clustered, nor how much it's been modified since the last
* clustering, so we ignore this effect. Would be nice to do better
* someday.
* When the index ordering is exactly correlated with the table ordering
* (just after a CLUSTER, for example), the number of pages fetched should
* be just sT. What's more, these will be sequential fetches, not the
* random fetches that occur in the uncorrelated case. So, depending on
* the extent of correlation, we should estimate the actual I/O cost
* somewhere between s * T * 1.0 and PF * random_cost. We currently
* interpolate linearly between these two endpoints based on the
* correlation squared (XXX is that appropriate?).
*
* In any case the number of tuples fetched is Ns.
*----------
*/
tuples_fetched = indexSelectivity * baserel->tuples;
@ -263,24 +284,56 @@ cost_index(Path *path, Query *root,
if (tuples_fetched < 1.0)
tuples_fetched = 1.0;
if (baserel->pages > 0)
pages_fetched = ceil(baserel->pages *
log(tuples_fetched / baserel->pages + 1.0));
/* This part is the Mackert and Lohman formula */
T = (baserel->pages > 1) ? (double) baserel->pages : 1.0;
b = (effective_cache_size > 1) ? effective_cache_size : 1.0;
if (T <= b)
{
pages_fetched =
(2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
if (pages_fetched > T)
pages_fetched = T;
}
else
pages_fetched = tuples_fetched;
{
double lim;
lim = (2.0 * T * b) / (2.0 * T - b);
if (tuples_fetched <= lim)
{
pages_fetched =
(2.0 * T * tuples_fetched) / (2.0 * T + tuples_fetched);
}
else
{
pages_fetched =
b + (tuples_fetched - lim) * (T - b) / T;
}
}
/*
* Now estimate one nonsequential access per page fetched, plus
* appropriate CPU costs per tuple.
* min_IO_cost corresponds to the perfectly correlated case (csquared=1),
* max_IO_cost to the perfectly uncorrelated case (csquared=0). Note
* that we just charge random_page_cost per page in the uncorrelated
* case, rather than using cost_nonsequential_access, since we've already
* accounted for caching effects by using the Mackert model.
*/
/* disk costs for main table */
run_cost += pages_fetched * cost_nonsequential_access(baserel->pages);
/* CPU costs */
cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost;
min_IO_cost = ceil(indexSelectivity * T);
max_IO_cost = pages_fetched * random_page_cost;
/*
* Now interpolate based on estimated index order correlation
* to get total disk I/O cost for main table accesses.
*/
csquared = indexCorrelation * indexCorrelation;
run_cost += max_IO_cost + csquared * (min_IO_cost - max_IO_cost);
/*
* Estimate CPU costs per tuple.
*
* Normally the indexquals will be removed from the list of
* restriction clauses that we have to evaluate as qpquals, so we
* should subtract their costs from baserestrictcost. For a lossy
@ -290,6 +343,8 @@ cost_index(Path *path, Query *root,
* Rather than work out exactly how much to subtract, we don't
* subtract anything in that case either.
*/
cpu_per_tuple = cpu_tuple_cost + baserel->baserestrictcost;
if (!index->lossy && !is_injoin)
cpu_per_tuple -= cost_qual_eval(indexQuals);

View File

@ -8,7 +8,7 @@
*
*
* IDENTIFICATION
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/var.c,v 1.31 2001/04/18 20:42:55 tgl Exp $
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/var.c,v 1.32 2001/05/09 23:13:35 tgl Exp $
*
*-------------------------------------------------------------------------
*/
@ -28,8 +28,9 @@ typedef struct
typedef struct
{
int varno;
int varattno;
int sublevels_up;
} contain_whole_tuple_var_context;
} contain_var_reference_context;
typedef struct
{
@ -39,8 +40,8 @@ typedef struct
static bool pull_varnos_walker(Node *node,
pull_varnos_context *context);
static bool contain_whole_tuple_var_walker(Node *node,
contain_whole_tuple_var_context *context);
static bool contain_var_reference_walker(Node *node,
contain_var_reference_context *context);
static bool contain_var_clause_walker(Node *node, void *context);
static bool pull_var_clause_walker(Node *node,
pull_var_clause_context *context);
@ -129,10 +130,10 @@ pull_varnos_walker(Node *node, pull_varnos_context *context)
/*
* contain_whole_tuple_var
* contain_var_reference
*
* Detect whether a parsetree contains any references to the whole
* tuple of a given rtable entry (ie, a Var with varattno = 0).
* Detect whether a parsetree contains any references to a specified
* attribute of a specified rtable entry.
*
* NOTE: this is used on not-yet-planned expressions. It may therefore find
* bare SubLinks, and if so it needs to recurse into them to look for uplevel
@ -140,11 +141,12 @@ pull_varnos_walker(Node *node, pull_varnos_context *context)
* SubPlan, we only need to look at the parameters passed to the subplan.
*/
bool
contain_whole_tuple_var(Node *node, int varno, int levelsup)
contain_var_reference(Node *node, int varno, int varattno, int levelsup)
{
contain_whole_tuple_var_context context;
contain_var_reference_context context;
context.varno = varno;
context.varattno = varattno;
context.sublevels_up = levelsup;
/*
@ -154,15 +156,15 @@ contain_whole_tuple_var(Node *node, int varno, int levelsup)
*/
if (node && IsA(node, Query))
return query_tree_walker((Query *) node,
contain_whole_tuple_var_walker,
contain_var_reference_walker,
(void *) &context, true);
else
return contain_whole_tuple_var_walker(node, &context);
return contain_var_reference_walker(node, &context);
}
static bool
contain_whole_tuple_var_walker(Node *node,
contain_whole_tuple_var_context *context)
contain_var_reference_walker(Node *node,
contain_var_reference_context *context)
{
if (node == NULL)
return false;
@ -171,8 +173,8 @@ contain_whole_tuple_var_walker(Node *node,
Var *var = (Var *) node;
if (var->varno == context->varno &&
var->varlevelsup == context->sublevels_up &&
var->varattno == InvalidAttrNumber)
var->varattno == context->varattno &&
var->varlevelsup == context->sublevels_up)
return true;
return false;
}
@ -187,11 +189,11 @@ contain_whole_tuple_var_walker(Node *node,
*/
Expr *expr = (Expr *) node;
if (contain_whole_tuple_var_walker((Node *) ((SubPlan *) expr->oper)->sublink->oper,
context))
if (contain_var_reference_walker((Node *) ((SubPlan *) expr->oper)->sublink->oper,
context))
return true;
if (contain_whole_tuple_var_walker((Node *) expr->args,
context))
if (contain_var_reference_walker((Node *) expr->args,
context))
return true;
return false;
}
@ -202,16 +204,29 @@ contain_whole_tuple_var_walker(Node *node,
context->sublevels_up++;
result = query_tree_walker((Query *) node,
contain_whole_tuple_var_walker,
contain_var_reference_walker,
(void *) context, true);
context->sublevels_up--;
return result;
}
return expression_tree_walker(node, contain_whole_tuple_var_walker,
return expression_tree_walker(node, contain_var_reference_walker,
(void *) context);
}
/*
* contain_whole_tuple_var
*
* Detect whether a parsetree contains any references to the whole
* tuple of a given rtable entry (ie, a Var with varattno = 0).
*/
bool
contain_whole_tuple_var(Node *node, int varno, int levelsup)
{
return contain_var_reference(node, varno, InvalidAttrNumber, levelsup);
}
/*
* contain_var_clause
* Recursively scan a clause to discover whether it contains any Var nodes