mirror of
https://github.com/postgres/postgres.git
synced 2025-07-07 00:36:50 +03:00
Build out the planner support function infrastructure.
Add support function requests for estimating the selectivity, cost, and number of result rows (if a SRF) of the target function. The lack of a way to estimate selectivity of a boolean-returning function in WHERE has been a recognized deficiency of the planner since Berkeley days. This commit finally fixes it. In addition, non-constant estimates of cost and number of output rows are now possible. We still fall back to looking at procost and prorows if the support function doesn't service the request, of course. To make concrete use of the possibility of estimating output rowcount for SRFs, this commit adds support functions for array_unnest(anyarray) and the integer variants of generate_series; the lack of plausible rowcount estimates for those, even when it's obvious to a human, has been a repeated subject of complaints. Obviously, much more could now be done in this line, but I'm mostly just trying to get the infrastructure in place. Discussion: https://postgr.es/m/15193.1548028093@sss.pgh.pa.us
This commit is contained in:
@ -133,3 +133,63 @@ ERROR: function num_nulls() does not exist
|
||||
LINE 1: SELECT num_nulls();
|
||||
^
|
||||
HINT: No function matches the given name and argument types. You might need to add explicit type casts.
|
||||
--
|
||||
-- Test adding a support function to a subject function
|
||||
--
|
||||
CREATE FUNCTION my_int_eq(int, int) RETURNS bool
|
||||
LANGUAGE internal STRICT IMMUTABLE PARALLEL SAFE
|
||||
AS $$int4eq$$;
|
||||
-- By default, planner does not think that's selective
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1
|
||||
WHERE my_int_eq(a.unique2, 42);
|
||||
QUERY PLAN
|
||||
----------------------------------------------
|
||||
Hash Join
|
||||
Hash Cond: (b.unique1 = a.unique1)
|
||||
-> Seq Scan on tenk1 b
|
||||
-> Hash
|
||||
-> Seq Scan on tenk1 a
|
||||
Filter: my_int_eq(unique2, 42)
|
||||
(6 rows)
|
||||
|
||||
-- With support function that knows it's int4eq, we get a different plan
|
||||
ALTER FUNCTION my_int_eq(int, int) SUPPORT test_support_func;
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1
|
||||
WHERE my_int_eq(a.unique2, 42);
|
||||
QUERY PLAN
|
||||
-------------------------------------------------
|
||||
Nested Loop
|
||||
-> Seq Scan on tenk1 a
|
||||
Filter: my_int_eq(unique2, 42)
|
||||
-> Index Scan using tenk1_unique1 on tenk1 b
|
||||
Index Cond: (unique1 = a.unique1)
|
||||
(5 rows)
|
||||
|
||||
-- Also test non-default rowcount estimate
|
||||
CREATE FUNCTION my_gen_series(int, int) RETURNS SETOF integer
|
||||
LANGUAGE internal STRICT IMMUTABLE PARALLEL SAFE
|
||||
AS $$generate_series_int4$$
|
||||
SUPPORT test_support_func;
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g;
|
||||
QUERY PLAN
|
||||
----------------------------------------
|
||||
Hash Join
|
||||
Hash Cond: (g.g = a.unique1)
|
||||
-> Function Scan on my_gen_series g
|
||||
-> Hash
|
||||
-> Seq Scan on tenk1 a
|
||||
(5 rows)
|
||||
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
|
||||
QUERY PLAN
|
||||
-------------------------------------------------
|
||||
Nested Loop
|
||||
-> Function Scan on my_gen_series g
|
||||
-> Index Scan using tenk1_unique1 on tenk1 a
|
||||
Index Cond: (unique1 = g.g)
|
||||
(4 rows)
|
||||
|
||||
|
@ -904,7 +904,7 @@ select * from int4_tbl where
|
||||
--
|
||||
explain (verbose, costs off)
|
||||
select * from int4_tbl o where (f1, f1) in
|
||||
(select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
|
||||
(select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1);
|
||||
QUERY PLAN
|
||||
-------------------------------------------------------------------
|
||||
Nested Loop Semi Join
|
||||
@ -918,9 +918,9 @@ select * from int4_tbl o where (f1, f1) in
|
||||
Output: "ANY_subquery".f1, "ANY_subquery".g
|
||||
Filter: ("ANY_subquery".f1 = "ANY_subquery".g)
|
||||
-> Result
|
||||
Output: i.f1, ((generate_series(1, 2)) / 10)
|
||||
Output: i.f1, ((generate_series(1, 50)) / 10)
|
||||
-> ProjectSet
|
||||
Output: generate_series(1, 2), i.f1
|
||||
Output: generate_series(1, 50), i.f1
|
||||
-> HashAggregate
|
||||
Output: i.f1
|
||||
Group Key: i.f1
|
||||
@ -929,7 +929,7 @@ select * from int4_tbl o where (f1, f1) in
|
||||
(19 rows)
|
||||
|
||||
select * from int4_tbl o where (f1, f1) in
|
||||
(select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
|
||||
(select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1);
|
||||
f1
|
||||
----
|
||||
0
|
||||
|
@ -68,6 +68,11 @@ CREATE FUNCTION test_fdw_handler()
|
||||
AS '@libdir@/regress@DLSUFFIX@', 'test_fdw_handler'
|
||||
LANGUAGE C;
|
||||
|
||||
CREATE FUNCTION test_support_func(internal)
|
||||
RETURNS internal
|
||||
AS '@libdir@/regress@DLSUFFIX@', 'test_support_func'
|
||||
LANGUAGE C STRICT;
|
||||
|
||||
-- Things that shouldn't work:
|
||||
|
||||
CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
|
||||
|
@ -60,6 +60,10 @@ CREATE FUNCTION test_fdw_handler()
|
||||
RETURNS fdw_handler
|
||||
AS '@libdir@/regress@DLSUFFIX@', 'test_fdw_handler'
|
||||
LANGUAGE C;
|
||||
CREATE FUNCTION test_support_func(internal)
|
||||
RETURNS internal
|
||||
AS '@libdir@/regress@DLSUFFIX@', 'test_support_func'
|
||||
LANGUAGE C STRICT;
|
||||
-- Things that shouldn't work:
|
||||
CREATE FUNCTION test1 (int) RETURNS int LANGUAGE SQL
|
||||
AS 'SELECT ''not an integer'';';
|
||||
|
@ -23,12 +23,16 @@
|
||||
#include "access/transam.h"
|
||||
#include "access/tuptoaster.h"
|
||||
#include "access/xact.h"
|
||||
#include "catalog/pg_operator.h"
|
||||
#include "catalog/pg_type.h"
|
||||
#include "commands/sequence.h"
|
||||
#include "commands/trigger.h"
|
||||
#include "executor/executor.h"
|
||||
#include "executor/spi.h"
|
||||
#include "miscadmin.h"
|
||||
#include "nodes/supportnodes.h"
|
||||
#include "optimizer/optimizer.h"
|
||||
#include "optimizer/plancat.h"
|
||||
#include "port/atomics.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/geo_decls.h"
|
||||
@ -863,3 +867,76 @@ test_fdw_handler(PG_FUNCTION_ARGS)
|
||||
elog(ERROR, "test_fdw_handler is not implemented");
|
||||
PG_RETURN_NULL();
|
||||
}
|
||||
|
||||
PG_FUNCTION_INFO_V1(test_support_func);
|
||||
Datum
|
||||
test_support_func(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
|
||||
Node *ret = NULL;
|
||||
|
||||
if (IsA(rawreq, SupportRequestSelectivity))
|
||||
{
|
||||
/*
|
||||
* Assume that the target is int4eq; that's safe as long as we don't
|
||||
* attach this to any other boolean-returning function.
|
||||
*/
|
||||
SupportRequestSelectivity *req = (SupportRequestSelectivity *) rawreq;
|
||||
Selectivity s1;
|
||||
|
||||
if (req->is_join)
|
||||
s1 = join_selectivity(req->root, Int4EqualOperator,
|
||||
req->args,
|
||||
req->inputcollid,
|
||||
req->jointype,
|
||||
req->sjinfo);
|
||||
else
|
||||
s1 = restriction_selectivity(req->root, Int4EqualOperator,
|
||||
req->args,
|
||||
req->inputcollid,
|
||||
req->varRelid);
|
||||
|
||||
req->selectivity = s1;
|
||||
ret = (Node *) req;
|
||||
}
|
||||
|
||||
if (IsA(rawreq, SupportRequestCost))
|
||||
{
|
||||
/* Provide some generic estimate */
|
||||
SupportRequestCost *req = (SupportRequestCost *) rawreq;
|
||||
|
||||
req->startup = 0;
|
||||
req->per_tuple = 2 * cpu_operator_cost;
|
||||
ret = (Node *) req;
|
||||
}
|
||||
|
||||
if (IsA(rawreq, SupportRequestRows))
|
||||
{
|
||||
/*
|
||||
* Assume that the target is generate_series_int4; that's safe as long
|
||||
* as we don't attach this to any other set-returning function.
|
||||
*/
|
||||
SupportRequestRows *req = (SupportRequestRows *) rawreq;
|
||||
|
||||
if (req->node && IsA(req->node, FuncExpr)) /* be paranoid */
|
||||
{
|
||||
List *args = ((FuncExpr *) req->node)->args;
|
||||
Node *arg1 = linitial(args);
|
||||
Node *arg2 = lsecond(args);
|
||||
|
||||
if (IsA(arg1, Const) &&
|
||||
!((Const *) arg1)->constisnull &&
|
||||
IsA(arg2, Const) &&
|
||||
!((Const *) arg2)->constisnull)
|
||||
{
|
||||
int32 val1 = DatumGetInt32(((Const *) arg1)->constvalue);
|
||||
int32 val2 = DatumGetInt32(((Const *) arg2)->constvalue);
|
||||
|
||||
req->rows = val2 - val1 + 1;
|
||||
ret = (Node *) req;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(ret);
|
||||
}
|
||||
|
@ -29,3 +29,35 @@ SELECT num_nulls(VARIADIC '{}'::int[]);
|
||||
-- should fail, one or more arguments is required
|
||||
SELECT num_nonnulls();
|
||||
SELECT num_nulls();
|
||||
|
||||
--
|
||||
-- Test adding a support function to a subject function
|
||||
--
|
||||
|
||||
CREATE FUNCTION my_int_eq(int, int) RETURNS bool
|
||||
LANGUAGE internal STRICT IMMUTABLE PARALLEL SAFE
|
||||
AS $$int4eq$$;
|
||||
|
||||
-- By default, planner does not think that's selective
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1
|
||||
WHERE my_int_eq(a.unique2, 42);
|
||||
|
||||
-- With support function that knows it's int4eq, we get a different plan
|
||||
ALTER FUNCTION my_int_eq(int, int) SUPPORT test_support_func;
|
||||
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT * FROM tenk1 a JOIN tenk1 b ON a.unique1 = b.unique1
|
||||
WHERE my_int_eq(a.unique2, 42);
|
||||
|
||||
-- Also test non-default rowcount estimate
|
||||
CREATE FUNCTION my_gen_series(int, int) RETURNS SETOF integer
|
||||
LANGUAGE internal STRICT IMMUTABLE PARALLEL SAFE
|
||||
AS $$generate_series_int4$$
|
||||
SUPPORT test_support_func;
|
||||
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g;
|
||||
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
|
||||
|
@ -498,9 +498,9 @@ select * from int4_tbl where
|
||||
--
|
||||
explain (verbose, costs off)
|
||||
select * from int4_tbl o where (f1, f1) in
|
||||
(select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
|
||||
(select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1);
|
||||
select * from int4_tbl o where (f1, f1) in
|
||||
(select f1, generate_series(1,2) / 10 g from int4_tbl i group by f1);
|
||||
(select f1, generate_series(1,50) / 10 g from int4_tbl i group by f1);
|
||||
|
||||
--
|
||||
-- check for over-optimization of whole-row Var referencing an Append plan
|
||||
|
Reference in New Issue
Block a user