mirror of
https://github.com/postgres/postgres.git
synced 2025-05-08 07:21:33 +03:00
Teach planner how to estimate rows for timestamp generate_series
This provides the planner with row estimates for generate_series(TIMESTAMP, TIMESTAMP, INTERVAL), generate_series(TIMESTAMPTZ, TIMESTAMPTZ, INTERVAL) and generate_series(TIMESTAMPTZ, TIMESTAMPTZ, INTERVAL, TEXT) when the input parameter values can be estimated during planning. Author: David Rowley Reviewed-by: jian he <jian.universality@gmail.com> Discussion: https://postgr.es/m/CAApHDvrBE%3D%2BASo_sGYmQJ3GvO8GPvX5yxXhRS%3Dt_ybd4odFkhQ%40mail.gmail.com
This commit is contained in:
parent
5193ca8e15
commit
036bdcec9f
@ -27,6 +27,7 @@
|
|||||||
#include "funcapi.h"
|
#include "funcapi.h"
|
||||||
#include "libpq/pqformat.h"
|
#include "libpq/pqformat.h"
|
||||||
#include "miscadmin.h"
|
#include "miscadmin.h"
|
||||||
|
#include "optimizer/optimizer.h"
|
||||||
#include "nodes/nodeFuncs.h"
|
#include "nodes/nodeFuncs.h"
|
||||||
#include "nodes/supportnodes.h"
|
#include "nodes/supportnodes.h"
|
||||||
#include "parser/scansup.h"
|
#include "parser/scansup.h"
|
||||||
@ -6680,6 +6681,93 @@ generate_series_timestamptz_at_zone(PG_FUNCTION_ARGS)
|
|||||||
return generate_series_timestamptz_internal(fcinfo);
|
return generate_series_timestamptz_internal(fcinfo);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Planner support function for generate_series(timestamp, timestamp, interval)
|
||||||
|
*/
|
||||||
|
Datum
|
||||||
|
generate_series_timestamp_support(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
|
||||||
|
Node *ret = NULL;
|
||||||
|
|
||||||
|
if (IsA(rawreq, SupportRequestRows))
|
||||||
|
{
|
||||||
|
/* Try to estimate the number of rows returned */
|
||||||
|
SupportRequestRows *req = (SupportRequestRows *) rawreq;
|
||||||
|
|
||||||
|
if (is_funcclause(req->node)) /* be paranoid */
|
||||||
|
{
|
||||||
|
List *args = ((FuncExpr *) req->node)->args;
|
||||||
|
Node *arg1,
|
||||||
|
*arg2,
|
||||||
|
*arg3;
|
||||||
|
|
||||||
|
/* We can use estimated argument values here */
|
||||||
|
arg1 = estimate_expression_value(req->root, linitial(args));
|
||||||
|
arg2 = estimate_expression_value(req->root, lsecond(args));
|
||||||
|
arg3 = estimate_expression_value(req->root, lthird(args));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If any argument is constant NULL, we can safely assume that
|
||||||
|
* zero rows are returned. Otherwise, if they're all non-NULL
|
||||||
|
* constants, we can calculate the number of rows that will be
|
||||||
|
* returned.
|
||||||
|
*/
|
||||||
|
if ((IsA(arg1, Const) && ((Const *) arg1)->constisnull) ||
|
||||||
|
(IsA(arg2, Const) && ((Const *) arg2)->constisnull) ||
|
||||||
|
(IsA(arg3, Const) && ((Const *) arg3)->constisnull))
|
||||||
|
{
|
||||||
|
req->rows = 0;
|
||||||
|
ret = (Node *) req;
|
||||||
|
}
|
||||||
|
else if (IsA(arg1, Const) && IsA(arg2, Const) && IsA(arg3, Const))
|
||||||
|
{
|
||||||
|
Timestamp start,
|
||||||
|
finish;
|
||||||
|
Interval *step;
|
||||||
|
Datum diff;
|
||||||
|
double dstep;
|
||||||
|
int64 dummy;
|
||||||
|
|
||||||
|
start = DatumGetTimestamp(((Const *) arg1)->constvalue);
|
||||||
|
finish = DatumGetTimestamp(((Const *) arg2)->constvalue);
|
||||||
|
step = DatumGetIntervalP(((Const *) arg3)->constvalue);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Perform some prechecks which could cause timestamp_mi to
|
||||||
|
* raise an ERROR. It's much better to just return some
|
||||||
|
* default estimate than error out in a support function.
|
||||||
|
*/
|
||||||
|
if (!TIMESTAMP_NOT_FINITE(start) && !TIMESTAMP_NOT_FINITE(finish) &&
|
||||||
|
!pg_sub_s64_overflow(finish, start, &dummy))
|
||||||
|
{
|
||||||
|
diff = DirectFunctionCall2(timestamp_mi,
|
||||||
|
TimestampGetDatum(finish),
|
||||||
|
TimestampGetDatum(start));
|
||||||
|
|
||||||
|
#define INTERVAL_TO_MICROSECONDS(i) ((((double) (i)->month * DAYS_PER_MONTH + (i)->day)) * USECS_PER_DAY + (i)->time)
|
||||||
|
|
||||||
|
dstep = INTERVAL_TO_MICROSECONDS(step);
|
||||||
|
|
||||||
|
/* This equation works for either sign of step */
|
||||||
|
if (dstep != 0.0)
|
||||||
|
{
|
||||||
|
Interval *idiff = DatumGetIntervalP(diff);
|
||||||
|
double ddiff = INTERVAL_TO_MICROSECONDS(idiff);
|
||||||
|
|
||||||
|
req->rows = floor(ddiff / dstep + 1.0);
|
||||||
|
ret = (Node *) req;
|
||||||
|
}
|
||||||
|
#undef INTERVAL_TO_MICROSECONDS
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* timestamp_at_local()
|
/* timestamp_at_local()
|
||||||
* timestamptz_at_local()
|
* timestamptz_at_local()
|
||||||
*
|
*
|
||||||
|
@ -8331,19 +8331,25 @@
|
|||||||
prorettype => 'numeric', proargtypes => 'numeric numeric',
|
prorettype => 'numeric', proargtypes => 'numeric numeric',
|
||||||
prosrc => 'generate_series_numeric' },
|
prosrc => 'generate_series_numeric' },
|
||||||
{ oid => '938', descr => 'non-persistent series generator',
|
{ oid => '938', descr => 'non-persistent series generator',
|
||||||
proname => 'generate_series', prorows => '1000', proretset => 't',
|
proname => 'generate_series', prorows => '1000',
|
||||||
|
prosupport => 'generate_series_timestamp_support', proretset => 't',
|
||||||
prorettype => 'timestamp', proargtypes => 'timestamp timestamp interval',
|
prorettype => 'timestamp', proargtypes => 'timestamp timestamp interval',
|
||||||
prosrc => 'generate_series_timestamp' },
|
prosrc => 'generate_series_timestamp' },
|
||||||
{ oid => '939', descr => 'non-persistent series generator',
|
{ oid => '939', descr => 'non-persistent series generator',
|
||||||
proname => 'generate_series', prorows => '1000', proretset => 't',
|
proname => 'generate_series', prorows => '1000',
|
||||||
|
prosupport => 'generate_series_timestamp_support', proretset => 't',
|
||||||
provolatile => 's', prorettype => 'timestamptz',
|
provolatile => 's', prorettype => 'timestamptz',
|
||||||
proargtypes => 'timestamptz timestamptz interval',
|
proargtypes => 'timestamptz timestamptz interval',
|
||||||
prosrc => 'generate_series_timestamptz' },
|
prosrc => 'generate_series_timestamptz' },
|
||||||
{ oid => '6274', descr => 'non-persistent series generator',
|
{ oid => '6274', descr => 'non-persistent series generator',
|
||||||
proname => 'generate_series', prorows => '1000', proretset => 't',
|
proname => 'generate_series', prorows => '1000',
|
||||||
|
prosupport => 'generate_series_timestamp_support', proretset => 't',
|
||||||
prorettype => 'timestamptz',
|
prorettype => 'timestamptz',
|
||||||
proargtypes => 'timestamptz timestamptz interval text',
|
proargtypes => 'timestamptz timestamptz interval text',
|
||||||
prosrc => 'generate_series_timestamptz_at_zone' },
|
prosrc => 'generate_series_timestamptz_at_zone' },
|
||||||
|
{ oid => '8402', descr => 'planner support for generate_series',
|
||||||
|
proname => 'generate_series_timestamp_support', prorettype => 'internal',
|
||||||
|
proargtypes => 'internal', prosrc => 'generate_series_timestamp_support' },
|
||||||
|
|
||||||
# boolean aggregates
|
# boolean aggregates
|
||||||
{ oid => '2515', descr => 'aggregate transition function',
|
{ oid => '2515', descr => 'aggregate transition function',
|
||||||
|
@ -2,6 +2,43 @@
|
|||||||
\getenv libdir PG_LIBDIR
|
\getenv libdir PG_LIBDIR
|
||||||
\getenv dlsuffix PG_DLSUFFIX
|
\getenv dlsuffix PG_DLSUFFIX
|
||||||
\set regresslib :libdir '/regress' :dlsuffix
|
\set regresslib :libdir '/regress' :dlsuffix
|
||||||
|
-- Function to assist with verifying EXPLAIN which includes costs. A series
|
||||||
|
-- of bool flags allows control over which portions are masked out
|
||||||
|
CREATE FUNCTION explain_mask_costs(query text, do_analyze bool,
|
||||||
|
hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text
|
||||||
|
LANGUAGE plpgsql AS
|
||||||
|
$$
|
||||||
|
DECLARE
|
||||||
|
ln text;
|
||||||
|
analyze_str text;
|
||||||
|
BEGIN
|
||||||
|
IF do_analyze = true THEN
|
||||||
|
analyze_str := 'on';
|
||||||
|
ELSE
|
||||||
|
analyze_str := 'off';
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
FOR ln IN
|
||||||
|
EXECUTE format('explain (analyze %s, costs on, summary off, timing off) %s',
|
||||||
|
analyze_str, query)
|
||||||
|
LOOP
|
||||||
|
IF hide_costs = true THEN
|
||||||
|
ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N');
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF hide_row_est = true THEN
|
||||||
|
-- don't use 'g' so that we leave the actual rows intact
|
||||||
|
ln := regexp_replace(ln, 'rows=\d+', 'rows=N');
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF hide_width = true THEN
|
||||||
|
ln := regexp_replace(ln, 'width=\d+', 'width=N');
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN NEXT ln;
|
||||||
|
END LOOP;
|
||||||
|
END;
|
||||||
|
$$;
|
||||||
--
|
--
|
||||||
-- num_nulls()
|
-- num_nulls()
|
||||||
--
|
--
|
||||||
@ -594,6 +631,78 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
|
|||||||
Index Cond: (unique1 = g.g)
|
Index Cond: (unique1 = g.g)
|
||||||
(4 rows)
|
(4 rows)
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Test the SupportRequestRows support function for generate_series_timestamp()
|
||||||
|
--
|
||||||
|
-- Ensure the row estimate matches the actual rows
|
||||||
|
SELECT explain_mask_costs($$
|
||||||
|
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$,
|
||||||
|
true, true, false, true);
|
||||||
|
explain_mask_costs
|
||||||
|
------------------------------------------------------------------------------------------
|
||||||
|
Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- As above but with generate_series_timestamp
|
||||||
|
SELECT explain_mask_costs($$
|
||||||
|
SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$,
|
||||||
|
true, true, false, true);
|
||||||
|
explain_mask_costs
|
||||||
|
------------------------------------------------------------------------------------------
|
||||||
|
Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- As above but with generate_series_timestamptz_at_zone()
|
||||||
|
SELECT explain_mask_costs($$
|
||||||
|
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$,
|
||||||
|
true, true, false, true);
|
||||||
|
explain_mask_costs
|
||||||
|
------------------------------------------------------------------------------------------
|
||||||
|
Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Ensure the estimated and actual row counts match when the range isn't
|
||||||
|
-- evenly divisible by the step
|
||||||
|
SELECT explain_mask_costs($$
|
||||||
|
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$,
|
||||||
|
true, true, false, true);
|
||||||
|
explain_mask_costs
|
||||||
|
----------------------------------------------------------------------------------------
|
||||||
|
Function Scan on generate_series g (cost=N..N rows=5 width=N) (actual rows=5 loops=1)
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Ensure the estimates match when step is decreasing
|
||||||
|
SELECT explain_mask_costs($$
|
||||||
|
SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$,
|
||||||
|
true, true, false, true);
|
||||||
|
explain_mask_costs
|
||||||
|
------------------------------------------------------------------------------------------
|
||||||
|
Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Ensure an empty range estimates 1 row
|
||||||
|
SELECT explain_mask_costs($$
|
||||||
|
SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$,
|
||||||
|
true, true, false, true);
|
||||||
|
explain_mask_costs
|
||||||
|
----------------------------------------------------------------------------------------
|
||||||
|
Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0 loops=1)
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Ensure we get the default row estimate for infinity values
|
||||||
|
SELECT explain_mask_costs($$
|
||||||
|
SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$,
|
||||||
|
false, true, false, true);
|
||||||
|
explain_mask_costs
|
||||||
|
-------------------------------------------------------------------
|
||||||
|
Function Scan on generate_series g (cost=N..N rows=1000 width=N)
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
-- Ensure the row estimate behaves correctly when step size is zero.
|
||||||
|
-- We expect generate_series_timestamp() to throw the error rather than in
|
||||||
|
-- the support function.
|
||||||
|
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s);
|
||||||
|
ERROR: step size cannot equal zero
|
||||||
-- Test functions for control data
|
-- Test functions for control data
|
||||||
SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
|
SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
|
||||||
ok
|
ok
|
||||||
@ -706,3 +815,4 @@ SELECT pg_column_toast_chunk_id(a) IS NULL,
|
|||||||
(1 row)
|
(1 row)
|
||||||
|
|
||||||
DROP TABLE test_chunk_id;
|
DROP TABLE test_chunk_id;
|
||||||
|
DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
|
||||||
|
@ -4,6 +4,44 @@
|
|||||||
|
|
||||||
\set regresslib :libdir '/regress' :dlsuffix
|
\set regresslib :libdir '/regress' :dlsuffix
|
||||||
|
|
||||||
|
-- Function to assist with verifying EXPLAIN which includes costs. A series
|
||||||
|
-- of bool flags allows control over which portions are masked out
|
||||||
|
CREATE FUNCTION explain_mask_costs(query text, do_analyze bool,
|
||||||
|
hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text
|
||||||
|
LANGUAGE plpgsql AS
|
||||||
|
$$
|
||||||
|
DECLARE
|
||||||
|
ln text;
|
||||||
|
analyze_str text;
|
||||||
|
BEGIN
|
||||||
|
IF do_analyze = true THEN
|
||||||
|
analyze_str := 'on';
|
||||||
|
ELSE
|
||||||
|
analyze_str := 'off';
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
FOR ln IN
|
||||||
|
EXECUTE format('explain (analyze %s, costs on, summary off, timing off) %s',
|
||||||
|
analyze_str, query)
|
||||||
|
LOOP
|
||||||
|
IF hide_costs = true THEN
|
||||||
|
ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N');
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF hide_row_est = true THEN
|
||||||
|
-- don't use 'g' so that we leave the actual rows intact
|
||||||
|
ln := regexp_replace(ln, 'rows=\d+', 'rows=N');
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF hide_width = true THEN
|
||||||
|
ln := regexp_replace(ln, 'width=\d+', 'width=N');
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN NEXT ln;
|
||||||
|
END LOOP;
|
||||||
|
END;
|
||||||
|
$$;
|
||||||
|
|
||||||
--
|
--
|
||||||
-- num_nulls()
|
-- num_nulls()
|
||||||
--
|
--
|
||||||
@ -224,6 +262,51 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g;
|
|||||||
EXPLAIN (COSTS OFF)
|
EXPLAIN (COSTS OFF)
|
||||||
SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
|
SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
|
||||||
|
|
||||||
|
--
|
||||||
|
-- Test the SupportRequestRows support function for generate_series_timestamp()
|
||||||
|
--
|
||||||
|
|
||||||
|
-- Ensure the row estimate matches the actual rows
|
||||||
|
SELECT explain_mask_costs($$
|
||||||
|
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$,
|
||||||
|
true, true, false, true);
|
||||||
|
|
||||||
|
-- As above but with generate_series_timestamp
|
||||||
|
SELECT explain_mask_costs($$
|
||||||
|
SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$,
|
||||||
|
true, true, false, true);
|
||||||
|
|
||||||
|
-- As above but with generate_series_timestamptz_at_zone()
|
||||||
|
SELECT explain_mask_costs($$
|
||||||
|
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$,
|
||||||
|
true, true, false, true);
|
||||||
|
|
||||||
|
-- Ensure the estimated and actual row counts match when the range isn't
|
||||||
|
-- evenly divisible by the step
|
||||||
|
SELECT explain_mask_costs($$
|
||||||
|
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$,
|
||||||
|
true, true, false, true);
|
||||||
|
|
||||||
|
-- Ensure the estimates match when step is decreasing
|
||||||
|
SELECT explain_mask_costs($$
|
||||||
|
SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$,
|
||||||
|
true, true, false, true);
|
||||||
|
|
||||||
|
-- Ensure an empty range estimates 1 row
|
||||||
|
SELECT explain_mask_costs($$
|
||||||
|
SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$,
|
||||||
|
true, true, false, true);
|
||||||
|
|
||||||
|
-- Ensure we get the default row estimate for infinity values
|
||||||
|
SELECT explain_mask_costs($$
|
||||||
|
SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$,
|
||||||
|
false, true, false, true);
|
||||||
|
|
||||||
|
-- Ensure the row estimate behaves correctly when step size is zero.
|
||||||
|
-- We expect generate_series_timestamp() to throw the error rather than in
|
||||||
|
-- the support function.
|
||||||
|
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s);
|
||||||
|
|
||||||
-- Test functions for control data
|
-- Test functions for control data
|
||||||
SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
|
SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
|
||||||
SELECT count(*) > 0 AS ok FROM pg_control_init();
|
SELECT count(*) > 0 AS ok FROM pg_control_init();
|
||||||
@ -273,3 +356,4 @@ SELECT pg_column_toast_chunk_id(a) IS NULL,
|
|||||||
pg_column_toast_chunk_id(b) IN (SELECT chunk_id FROM pg_toast.:toastrel)
|
pg_column_toast_chunk_id(b) IN (SELECT chunk_id FROM pg_toast.:toastrel)
|
||||||
FROM test_chunk_id;
|
FROM test_chunk_id;
|
||||||
DROP TABLE test_chunk_id;
|
DROP TABLE test_chunk_id;
|
||||||
|
DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user