mirror of
https://github.com/postgres/postgres.git
synced 2025-05-08 07:21:33 +03:00
Teach planner how to estimate rows for timestamp generate_series
This provides the planner with row estimates for generate_series(TIMESTAMP, TIMESTAMP, INTERVAL), generate_series(TIMESTAMPTZ, TIMESTAMPTZ, INTERVAL) and generate_series(TIMESTAMPTZ, TIMESTAMPTZ, INTERVAL, TEXT) when the input parameter values can be estimated during planning. Author: David Rowley Reviewed-by: jian he <jian.universality@gmail.com> Discussion: https://postgr.es/m/CAApHDvrBE%3D%2BASo_sGYmQJ3GvO8GPvX5yxXhRS%3Dt_ybd4odFkhQ%40mail.gmail.com
This commit is contained in:
parent
5193ca8e15
commit
036bdcec9f
@ -27,6 +27,7 @@
|
||||
#include "funcapi.h"
|
||||
#include "libpq/pqformat.h"
|
||||
#include "miscadmin.h"
|
||||
#include "optimizer/optimizer.h"
|
||||
#include "nodes/nodeFuncs.h"
|
||||
#include "nodes/supportnodes.h"
|
||||
#include "parser/scansup.h"
|
||||
@ -6680,6 +6681,93 @@ generate_series_timestamptz_at_zone(PG_FUNCTION_ARGS)
|
||||
return generate_series_timestamptz_internal(fcinfo);
|
||||
}
|
||||
|
||||
/*
|
||||
* Planner support function for generate_series(timestamp, timestamp, interval)
|
||||
*/
|
||||
Datum
|
||||
generate_series_timestamp_support(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Node *rawreq = (Node *) PG_GETARG_POINTER(0);
|
||||
Node *ret = NULL;
|
||||
|
||||
if (IsA(rawreq, SupportRequestRows))
|
||||
{
|
||||
/* Try to estimate the number of rows returned */
|
||||
SupportRequestRows *req = (SupportRequestRows *) rawreq;
|
||||
|
||||
if (is_funcclause(req->node)) /* be paranoid */
|
||||
{
|
||||
List *args = ((FuncExpr *) req->node)->args;
|
||||
Node *arg1,
|
||||
*arg2,
|
||||
*arg3;
|
||||
|
||||
/* We can use estimated argument values here */
|
||||
arg1 = estimate_expression_value(req->root, linitial(args));
|
||||
arg2 = estimate_expression_value(req->root, lsecond(args));
|
||||
arg3 = estimate_expression_value(req->root, lthird(args));
|
||||
|
||||
/*
|
||||
* If any argument is constant NULL, we can safely assume that
|
||||
* zero rows are returned. Otherwise, if they're all non-NULL
|
||||
* constants, we can calculate the number of rows that will be
|
||||
* returned.
|
||||
*/
|
||||
if ((IsA(arg1, Const) && ((Const *) arg1)->constisnull) ||
|
||||
(IsA(arg2, Const) && ((Const *) arg2)->constisnull) ||
|
||||
(IsA(arg3, Const) && ((Const *) arg3)->constisnull))
|
||||
{
|
||||
req->rows = 0;
|
||||
ret = (Node *) req;
|
||||
}
|
||||
else if (IsA(arg1, Const) && IsA(arg2, Const) && IsA(arg3, Const))
|
||||
{
|
||||
Timestamp start,
|
||||
finish;
|
||||
Interval *step;
|
||||
Datum diff;
|
||||
double dstep;
|
||||
int64 dummy;
|
||||
|
||||
start = DatumGetTimestamp(((Const *) arg1)->constvalue);
|
||||
finish = DatumGetTimestamp(((Const *) arg2)->constvalue);
|
||||
step = DatumGetIntervalP(((Const *) arg3)->constvalue);
|
||||
|
||||
/*
|
||||
* Perform some prechecks which could cause timestamp_mi to
|
||||
* raise an ERROR. It's much better to just return some
|
||||
* default estimate than error out in a support function.
|
||||
*/
|
||||
if (!TIMESTAMP_NOT_FINITE(start) && !TIMESTAMP_NOT_FINITE(finish) &&
|
||||
!pg_sub_s64_overflow(finish, start, &dummy))
|
||||
{
|
||||
diff = DirectFunctionCall2(timestamp_mi,
|
||||
TimestampGetDatum(finish),
|
||||
TimestampGetDatum(start));
|
||||
|
||||
#define INTERVAL_TO_MICROSECONDS(i) ((((double) (i)->month * DAYS_PER_MONTH + (i)->day)) * USECS_PER_DAY + (i)->time)
|
||||
|
||||
dstep = INTERVAL_TO_MICROSECONDS(step);
|
||||
|
||||
/* This equation works for either sign of step */
|
||||
if (dstep != 0.0)
|
||||
{
|
||||
Interval *idiff = DatumGetIntervalP(diff);
|
||||
double ddiff = INTERVAL_TO_MICROSECONDS(idiff);
|
||||
|
||||
req->rows = floor(ddiff / dstep + 1.0);
|
||||
ret = (Node *) req;
|
||||
}
|
||||
#undef INTERVAL_TO_MICROSECONDS
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(ret);
|
||||
}
|
||||
|
||||
|
||||
/* timestamp_at_local()
|
||||
* timestamptz_at_local()
|
||||
*
|
||||
|
@ -8331,19 +8331,25 @@
|
||||
prorettype => 'numeric', proargtypes => 'numeric numeric',
|
||||
prosrc => 'generate_series_numeric' },
|
||||
{ oid => '938', descr => 'non-persistent series generator',
|
||||
proname => 'generate_series', prorows => '1000', proretset => 't',
|
||||
proname => 'generate_series', prorows => '1000',
|
||||
prosupport => 'generate_series_timestamp_support', proretset => 't',
|
||||
prorettype => 'timestamp', proargtypes => 'timestamp timestamp interval',
|
||||
prosrc => 'generate_series_timestamp' },
|
||||
{ oid => '939', descr => 'non-persistent series generator',
|
||||
proname => 'generate_series', prorows => '1000', proretset => 't',
|
||||
proname => 'generate_series', prorows => '1000',
|
||||
prosupport => 'generate_series_timestamp_support', proretset => 't',
|
||||
provolatile => 's', prorettype => 'timestamptz',
|
||||
proargtypes => 'timestamptz timestamptz interval',
|
||||
prosrc => 'generate_series_timestamptz' },
|
||||
{ oid => '6274', descr => 'non-persistent series generator',
|
||||
proname => 'generate_series', prorows => '1000', proretset => 't',
|
||||
proname => 'generate_series', prorows => '1000',
|
||||
prosupport => 'generate_series_timestamp_support', proretset => 't',
|
||||
prorettype => 'timestamptz',
|
||||
proargtypes => 'timestamptz timestamptz interval text',
|
||||
prosrc => 'generate_series_timestamptz_at_zone' },
|
||||
{ oid => '8402', descr => 'planner support for generate_series',
|
||||
proname => 'generate_series_timestamp_support', prorettype => 'internal',
|
||||
proargtypes => 'internal', prosrc => 'generate_series_timestamp_support' },
|
||||
|
||||
# boolean aggregates
|
||||
{ oid => '2515', descr => 'aggregate transition function',
|
||||
|
@ -2,6 +2,43 @@
|
||||
\getenv libdir PG_LIBDIR
|
||||
\getenv dlsuffix PG_DLSUFFIX
|
||||
\set regresslib :libdir '/regress' :dlsuffix
|
||||
-- Function to assist with verifying EXPLAIN which includes costs. A series
|
||||
-- of bool flags allows control over which portions are masked out
|
||||
CREATE FUNCTION explain_mask_costs(query text, do_analyze bool,
|
||||
hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text
|
||||
LANGUAGE plpgsql AS
|
||||
$$
|
||||
DECLARE
|
||||
ln text;
|
||||
analyze_str text;
|
||||
BEGIN
|
||||
IF do_analyze = true THEN
|
||||
analyze_str := 'on';
|
||||
ELSE
|
||||
analyze_str := 'off';
|
||||
END IF;
|
||||
|
||||
FOR ln IN
|
||||
EXECUTE format('explain (analyze %s, costs on, summary off, timing off) %s',
|
||||
analyze_str, query)
|
||||
LOOP
|
||||
IF hide_costs = true THEN
|
||||
ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N');
|
||||
END IF;
|
||||
|
||||
IF hide_row_est = true THEN
|
||||
-- don't use 'g' so that we leave the actual rows intact
|
||||
ln := regexp_replace(ln, 'rows=\d+', 'rows=N');
|
||||
END IF;
|
||||
|
||||
IF hide_width = true THEN
|
||||
ln := regexp_replace(ln, 'width=\d+', 'width=N');
|
||||
END IF;
|
||||
|
||||
RETURN NEXT ln;
|
||||
END LOOP;
|
||||
END;
|
||||
$$;
|
||||
--
|
||||
-- num_nulls()
|
||||
--
|
||||
@ -594,6 +631,78 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
|
||||
Index Cond: (unique1 = g.g)
|
||||
(4 rows)
|
||||
|
||||
--
|
||||
-- Test the SupportRequestRows support function for generate_series_timestamp()
|
||||
--
|
||||
-- Ensure the row estimate matches the actual rows
|
||||
SELECT explain_mask_costs($$
|
||||
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$,
|
||||
true, true, false, true);
|
||||
explain_mask_costs
|
||||
------------------------------------------------------------------------------------------
|
||||
Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
|
||||
(1 row)
|
||||
|
||||
-- As above but with generate_series_timestamp
|
||||
SELECT explain_mask_costs($$
|
||||
SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$,
|
||||
true, true, false, true);
|
||||
explain_mask_costs
|
||||
------------------------------------------------------------------------------------------
|
||||
Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
|
||||
(1 row)
|
||||
|
||||
-- As above but with generate_series_timestamptz_at_zone()
|
||||
SELECT explain_mask_costs($$
|
||||
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$,
|
||||
true, true, false, true);
|
||||
explain_mask_costs
|
||||
------------------------------------------------------------------------------------------
|
||||
Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
|
||||
(1 row)
|
||||
|
||||
-- Ensure the estimated and actual row counts match when the range isn't
|
||||
-- evenly divisible by the step
|
||||
SELECT explain_mask_costs($$
|
||||
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$,
|
||||
true, true, false, true);
|
||||
explain_mask_costs
|
||||
----------------------------------------------------------------------------------------
|
||||
Function Scan on generate_series g (cost=N..N rows=5 width=N) (actual rows=5 loops=1)
|
||||
(1 row)
|
||||
|
||||
-- Ensure the estimates match when step is decreasing
|
||||
SELECT explain_mask_costs($$
|
||||
SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$,
|
||||
true, true, false, true);
|
||||
explain_mask_costs
|
||||
------------------------------------------------------------------------------------------
|
||||
Function Scan on generate_series g (cost=N..N rows=30 width=N) (actual rows=30 loops=1)
|
||||
(1 row)
|
||||
|
||||
-- Ensure an empty range estimates 1 row
|
||||
SELECT explain_mask_costs($$
|
||||
SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$,
|
||||
true, true, false, true);
|
||||
explain_mask_costs
|
||||
----------------------------------------------------------------------------------------
|
||||
Function Scan on generate_series g (cost=N..N rows=1 width=N) (actual rows=0 loops=1)
|
||||
(1 row)
|
||||
|
||||
-- Ensure we get the default row estimate for infinity values
|
||||
SELECT explain_mask_costs($$
|
||||
SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$,
|
||||
false, true, false, true);
|
||||
explain_mask_costs
|
||||
-------------------------------------------------------------------
|
||||
Function Scan on generate_series g (cost=N..N rows=1000 width=N)
|
||||
(1 row)
|
||||
|
||||
-- Ensure the row estimate behaves correctly when step size is zero.
|
||||
-- We expect generate_series_timestamp() to throw the error rather than in
|
||||
-- the support function.
|
||||
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s);
|
||||
ERROR: step size cannot equal zero
|
||||
-- Test functions for control data
|
||||
SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
|
||||
ok
|
||||
@ -706,3 +815,4 @@ SELECT pg_column_toast_chunk_id(a) IS NULL,
|
||||
(1 row)
|
||||
|
||||
DROP TABLE test_chunk_id;
|
||||
DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
|
||||
|
@ -4,6 +4,44 @@
|
||||
|
||||
\set regresslib :libdir '/regress' :dlsuffix
|
||||
|
||||
-- Function to assist with verifying EXPLAIN which includes costs. A series
|
||||
-- of bool flags allows control over which portions are masked out
|
||||
CREATE FUNCTION explain_mask_costs(query text, do_analyze bool,
|
||||
hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text
|
||||
LANGUAGE plpgsql AS
|
||||
$$
|
||||
DECLARE
|
||||
ln text;
|
||||
analyze_str text;
|
||||
BEGIN
|
||||
IF do_analyze = true THEN
|
||||
analyze_str := 'on';
|
||||
ELSE
|
||||
analyze_str := 'off';
|
||||
END IF;
|
||||
|
||||
FOR ln IN
|
||||
EXECUTE format('explain (analyze %s, costs on, summary off, timing off) %s',
|
||||
analyze_str, query)
|
||||
LOOP
|
||||
IF hide_costs = true THEN
|
||||
ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N');
|
||||
END IF;
|
||||
|
||||
IF hide_row_est = true THEN
|
||||
-- don't use 'g' so that we leave the actual rows intact
|
||||
ln := regexp_replace(ln, 'rows=\d+', 'rows=N');
|
||||
END IF;
|
||||
|
||||
IF hide_width = true THEN
|
||||
ln := regexp_replace(ln, 'width=\d+', 'width=N');
|
||||
END IF;
|
||||
|
||||
RETURN NEXT ln;
|
||||
END LOOP;
|
||||
END;
|
||||
$$;
|
||||
|
||||
--
|
||||
-- num_nulls()
|
||||
--
|
||||
@ -224,6 +262,51 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g;
|
||||
EXPLAIN (COSTS OFF)
|
||||
SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g;
|
||||
|
||||
--
|
||||
-- Test the SupportRequestRows support function for generate_series_timestamp()
|
||||
--
|
||||
|
||||
-- Ensure the row estimate matches the actual rows
|
||||
SELECT explain_mask_costs($$
|
||||
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$,
|
||||
true, true, false, true);
|
||||
|
||||
-- As above but with generate_series_timestamp
|
||||
SELECT explain_mask_costs($$
|
||||
SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$,
|
||||
true, true, false, true);
|
||||
|
||||
-- As above but with generate_series_timestamptz_at_zone()
|
||||
SELECT explain_mask_costs($$
|
||||
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$,
|
||||
true, true, false, true);
|
||||
|
||||
-- Ensure the estimated and actual row counts match when the range isn't
|
||||
-- evenly divisible by the step
|
||||
SELECT explain_mask_costs($$
|
||||
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$,
|
||||
true, true, false, true);
|
||||
|
||||
-- Ensure the estimates match when step is decreasing
|
||||
SELECT explain_mask_costs($$
|
||||
SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$,
|
||||
true, true, false, true);
|
||||
|
||||
-- Ensure an empty range estimates 1 row
|
||||
SELECT explain_mask_costs($$
|
||||
SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$,
|
||||
true, true, false, true);
|
||||
|
||||
-- Ensure we get the default row estimate for infinity values
|
||||
SELECT explain_mask_costs($$
|
||||
SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$,
|
||||
false, true, false, true);
|
||||
|
||||
-- Ensure the row estimate behaves correctly when step size is zero.
|
||||
-- We expect generate_series_timestamp() to throw the error rather than in
|
||||
-- the support function.
|
||||
SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s);
|
||||
|
||||
-- Test functions for control data
|
||||
SELECT count(*) > 0 AS ok FROM pg_control_checkpoint();
|
||||
SELECT count(*) > 0 AS ok FROM pg_control_init();
|
||||
@ -273,3 +356,4 @@ SELECT pg_column_toast_chunk_id(a) IS NULL,
|
||||
pg_column_toast_chunk_id(b) IN (SELECT chunk_id FROM pg_toast.:toastrel)
|
||||
FROM test_chunk_id;
|
||||
DROP TABLE test_chunk_id;
|
||||
DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool);
|
||||
|
Loading…
x
Reference in New Issue
Block a user