1
0
mirror of https://github.com/postgres/postgres.git synced 2025-10-27 00:12:01 +03:00

Build out the planner support function infrastructure.

Add support function requests for estimating the selectivity, cost,
and number of result rows (if a SRF) of the target function.

The lack of a way to estimate selectivity of a boolean-returning
function in WHERE has been a recognized deficiency of the planner
since Berkeley days.  This commit finally fixes it.

In addition, non-constant estimates of cost and number of output
rows are now possible.  We still fall back to looking at procost
and prorows if the support function doesn't service the request,
of course.

To make concrete use of the possibility of estimating output rowcount
for SRFs, this commit adds support functions for array_unnest(anyarray)
and the integer variants of generate_series; the lack of plausible
rowcount estimates for those, even when it's obvious to a human,
has been a repeated subject of complaints.  Obviously, much more
could now be done in this line, but I'm mostly just trying to get
the infrastructure in place.

Discussion: https://postgr.es/m/15193.1548028093@sss.pgh.pa.us
This commit is contained in:
Tom Lane
2019-02-09 18:32:23 -05:00
parent 1fb57af920
commit a391ff3c3d
27 changed files with 792 additions and 90 deletions

View File

@@ -2112,9 +2112,9 @@ cost_agg(Path *path, PlannerInfo *root,
/*
* The transCost.per_tuple component of aggcosts should be charged once
* per input tuple, corresponding to the costs of evaluating the aggregate
* transfns and their input expressions (with any startup cost of course
* charged but once). The finalCost component is charged once per output
* tuple, corresponding to the costs of evaluating the finalfns.
* transfns and their input expressions. The finalCost.per_tuple component
* is charged once per output tuple, corresponding to the costs of
* evaluating the finalfns. Startup costs are of course charged but once.
*
* If we are grouping, we charge an additional cpu_operator_cost per
* grouping column per input tuple for grouping comparisons.
@@ -2136,7 +2136,8 @@ cost_agg(Path *path, PlannerInfo *root,
startup_cost = input_total_cost;
startup_cost += aggcosts->transCost.startup;
startup_cost += aggcosts->transCost.per_tuple * input_tuples;
startup_cost += aggcosts->finalCost;
startup_cost += aggcosts->finalCost.startup;
startup_cost += aggcosts->finalCost.per_tuple;
/* we aren't grouping */
total_cost = startup_cost + cpu_tuple_cost;
output_tuples = 1;
@@ -2155,7 +2156,8 @@ cost_agg(Path *path, PlannerInfo *root,
total_cost += aggcosts->transCost.startup;
total_cost += aggcosts->transCost.per_tuple * input_tuples;
total_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
total_cost += aggcosts->finalCost * numGroups;
total_cost += aggcosts->finalCost.startup;
total_cost += aggcosts->finalCost.per_tuple * numGroups;
total_cost += cpu_tuple_cost * numGroups;
output_tuples = numGroups;
}
@@ -2168,8 +2170,9 @@ cost_agg(Path *path, PlannerInfo *root,
startup_cost += aggcosts->transCost.startup;
startup_cost += aggcosts->transCost.per_tuple * input_tuples;
startup_cost += (cpu_operator_cost * numGroupCols) * input_tuples;
startup_cost += aggcosts->finalCost.startup;
total_cost = startup_cost;
total_cost += aggcosts->finalCost * numGroups;
total_cost += aggcosts->finalCost.per_tuple * numGroups;
total_cost += cpu_tuple_cost * numGroups;
output_tuples = numGroups;
}
@@ -2234,7 +2237,11 @@ cost_windowagg(Path *path, PlannerInfo *root,
Cost wfunccost;
QualCost argcosts;
wfunccost = get_func_cost(wfunc->winfnoid) * cpu_operator_cost;
argcosts.startup = argcosts.per_tuple = 0;
add_function_cost(root, wfunc->winfnoid, (Node *) wfunc,
&argcosts);
startup_cost += argcosts.startup;
wfunccost = argcosts.per_tuple;
/* also add the input expressions' cost to per-input-row costs */
cost_qual_eval_node(&argcosts, (Node *) wfunc->args, root);
@@ -3864,8 +3871,8 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
*/
if (IsA(node, FuncExpr))
{
context->total.per_tuple +=
get_func_cost(((FuncExpr *) node)->funcid) * cpu_operator_cost;
add_function_cost(context->root, ((FuncExpr *) node)->funcid, node,
&context->total);
}
else if (IsA(node, OpExpr) ||
IsA(node, DistinctExpr) ||
@@ -3873,8 +3880,8 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
{
/* rely on struct equivalence to treat these all alike */
set_opfuncid((OpExpr *) node);
context->total.per_tuple +=
get_func_cost(((OpExpr *) node)->opfuncid) * cpu_operator_cost;
add_function_cost(context->root, ((OpExpr *) node)->opfuncid, node,
&context->total);
}
else if (IsA(node, ScalarArrayOpExpr))
{
@@ -3884,10 +3891,15 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
*/
ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) node;
Node *arraynode = (Node *) lsecond(saop->args);
QualCost sacosts;
set_sa_opfuncid(saop);
context->total.per_tuple += get_func_cost(saop->opfuncid) *
cpu_operator_cost * estimate_array_length(arraynode) * 0.5;
sacosts.startup = sacosts.per_tuple = 0;
add_function_cost(context->root, saop->opfuncid, NULL,
&sacosts);
context->total.startup += sacosts.startup;
context->total.per_tuple += sacosts.per_tuple *
estimate_array_length(arraynode) * 0.5;
}
else if (IsA(node, Aggref) ||
IsA(node, WindowFunc))
@@ -3913,11 +3925,13 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
/* check the result type's input function */
getTypeInputInfo(iocoerce->resulttype,
&iofunc, &typioparam);
context->total.per_tuple += get_func_cost(iofunc) * cpu_operator_cost;
add_function_cost(context->root, iofunc, NULL,
&context->total);
/* check the input type's output function */
getTypeOutputInfo(exprType((Node *) iocoerce->arg),
&iofunc, &typisvarlena);
context->total.per_tuple += get_func_cost(iofunc) * cpu_operator_cost;
add_function_cost(context->root, iofunc, NULL,
&context->total);
}
else if (IsA(node, ArrayCoerceExpr))
{
@@ -3941,8 +3955,8 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
{
Oid opid = lfirst_oid(lc);
context->total.per_tuple += get_func_cost(get_opcode(opid)) *
cpu_operator_cost;
add_function_cost(context->root, get_opcode(opid), NULL,
&context->total);
}
}
else if (IsA(node, MinMaxExpr) ||
@@ -4941,7 +4955,7 @@ set_function_size_estimates(PlannerInfo *root, RelOptInfo *rel)
foreach(lc, rte->functions)
{
RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc);
double ntup = expression_returns_set_rows(rtfunc->funcexpr);
double ntup = expression_returns_set_rows(root, rtfunc->funcexpr);
if (ntup > rel->tuples)
rel->tuples = ntup;