1
0
mirror of https://github.com/postgres/postgres.git synced 2025-11-06 07:49:08 +03:00

Re-allow using statistics for bool-valued functions in WHERE.

Commit a391ff3c3, which added the ability for a function's support
function to provide a custom selectivity estimate for "WHERE f(...)",
unintentionally removed the possibility of applying expression
statistics after finding there's no applicable support function.
That happened because we no longer fell through to boolvarsel()
as before.  Refactor to do so again, putting the 0.3333333 default
back into boolvarsel() where it had been (cf. commit 39df0f150).

I surely wouldn't have made this error if 39df0f150 had included
a test case, so add one now.  At the time we did not have the
"extended statistics" infrastructure, but we do now, and it is
also unable to work in this scenario because of this error.
So make use of that for the test case.

This is very clearly a bug fix, but I'm afraid to put it into
released branches because of the likelihood of altering plan
choices, which we avoid doing in minor releases.  So, master only.

Reported-by: Frédéric Yhuel <frederic.yhuel@dalibo.com>
Author: Tom Lane <tgl@sss.pgh.pa.us>
Discussion: https://postgr.es/m/a8b99dce-1bfb-4d97-af73-54a32b85c916@dalibo.com
This commit is contained in:
Tom Lane
2025-09-20 12:44:52 -04:00
parent 18cdf5932a
commit 1eccb93150
5 changed files with 53 additions and 13 deletions

View File

@@ -874,6 +874,10 @@ clause_selectivity_ext(PlannerInfo *root,
varRelid,
jointype,
sjinfo);
/* If no support, fall back on boolvarsel */
if (s1 < 0)
s1 = boolvarsel(root, clause, varRelid);
}
else if (IsA(clause, ScalarArrayOpExpr))
{

View File

@@ -2143,9 +2143,8 @@ join_selectivity(PlannerInfo *root,
/*
* function_selectivity
*
* Returns the selectivity of a specified boolean function clause.
* This code executes registered procedures stored in the
* pg_proc relation, by calling the function manager.
* Attempt to estimate the selectivity of a specified boolean function clause
* by asking its support function. If the function lacks support, return -1.
*
* See clause_selectivity() for the meaning of the additional parameters.
*/
@@ -2163,15 +2162,8 @@ function_selectivity(PlannerInfo *root,
SupportRequestSelectivity req;
SupportRequestSelectivity *sresult;
/*
* If no support function is provided, use our historical default
* estimate, 0.3333333. This seems a pretty unprincipled choice, but
* Postgres has been using that estimate for function calls since 1992.
* The hoariness of this behavior suggests that we should not be in too
* much hurry to use another value.
*/
if (!prosupport)
return (Selectivity) 0.3333333;
return (Selectivity) -1; /* no support function */
req.type = T_SupportRequestSelectivity;
req.root = root;
@@ -2188,9 +2180,8 @@ function_selectivity(PlannerInfo *root,
DatumGetPointer(OidFunctionCall1(prosupport,
PointerGetDatum(&req)));
/* If support function fails, use default */
if (sresult != &req)
return (Selectivity) 0.3333333;
return (Selectivity) -1; /* function did not honor request */
if (req.selectivity < 0.0 || req.selectivity > 1.0)
elog(ERROR, "invalid function selectivity: %f", req.selectivity);

View File

@@ -1528,6 +1528,17 @@ boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
selec = var_eq_const(&vardata, BooleanEqualOperator, InvalidOid,
BoolGetDatum(true), false, true, false);
}
else if (is_funcclause(arg))
{
/*
* If we have no stats and it's a function call, estimate 0.3333333.
* This seems a pretty unprincipled choice, but Postgres has been
* using that estimate for function calls since 1992. The hoariness
* of this behavior suggests that we should not be in too much hurry
* to use another value.
*/
selec = 0.3333333;
}
else
{
/* Otherwise, the default estimate is 0.5 */

View File

@@ -3537,4 +3537,24 @@ SELECT FROM sb_1 LEFT JOIN sb_2
RESET enable_nestloop;
RESET enable_mergejoin;
-- Check that we can use statistics on a bool-valued function.
CREATE FUNCTION extstat_small(x numeric) RETURNS bool
STRICT IMMUTABLE LANGUAGE plpgsql
AS $$ BEGIN RETURN x < 1; END $$;
SELECT * FROM check_estimated_rows('SELECT * FROM sb_2 WHERE extstat_small(y)');
estimated | actual
-----------+--------
3333 | 196
(1 row)
CREATE STATISTICS extstat_sb_2_small ON extstat_small(y) FROM sb_2;
ANALYZE sb_2;
SELECT * FROM check_estimated_rows('SELECT * FROM sb_2 WHERE extstat_small(y)');
estimated | actual
-----------+--------
196 | 196
(1 row)
-- Tidy up
DROP TABLE sb_1, sb_2 CASCADE;
DROP FUNCTION extstat_small(x numeric);

View File

@@ -1811,4 +1811,18 @@ SELECT FROM sb_1 LEFT JOIN sb_2
RESET enable_nestloop;
RESET enable_mergejoin;
-- Check that we can use statistics on a bool-valued function.
CREATE FUNCTION extstat_small(x numeric) RETURNS bool
STRICT IMMUTABLE LANGUAGE plpgsql
AS $$ BEGIN RETURN x < 1; END $$;
SELECT * FROM check_estimated_rows('SELECT * FROM sb_2 WHERE extstat_small(y)');
CREATE STATISTICS extstat_sb_2_small ON extstat_small(y) FROM sb_2;
ANALYZE sb_2;
SELECT * FROM check_estimated_rows('SELECT * FROM sb_2 WHERE extstat_small(y)');
-- Tidy up
DROP TABLE sb_1, sb_2 CASCADE;
DROP FUNCTION extstat_small(x numeric);