mirror of
				https://github.com/postgres/postgres.git
				synced 2025-10-25 13:17:41 +03:00 
			
		
		
		
	Teach planner how to estimate rows for timestamp generate_series
This provides the planner with row estimates for generate_series(TIMESTAMP, TIMESTAMP, INTERVAL), generate_series(TIMESTAMPTZ, TIMESTAMPTZ, INTERVAL) and generate_series(TIMESTAMPTZ, TIMESTAMPTZ, INTERVAL, TEXT) when the input parameter values can be estimated during planning. Author: David Rowley Reviewed-by: jian he <jian.universality@gmail.com> Discussion: https://postgr.es/m/CAApHDvrBE%3D%2BASo_sGYmQJ3GvO8GPvX5yxXhRS%3Dt_ybd4odFkhQ%40mail.gmail.com
This commit is contained in:
		| @@ -27,6 +27,7 @@ | ||||
| #include "funcapi.h" | ||||
| #include "libpq/pqformat.h" | ||||
| #include "miscadmin.h" | ||||
| #include "optimizer/optimizer.h" | ||||
| #include "nodes/nodeFuncs.h" | ||||
| #include "nodes/supportnodes.h" | ||||
| #include "parser/scansup.h" | ||||
| @@ -6680,6 +6681,93 @@ generate_series_timestamptz_at_zone(PG_FUNCTION_ARGS) | ||||
| 	return generate_series_timestamptz_internal(fcinfo); | ||||
| } | ||||
|  | ||||
| /* | ||||
|  * Planner support function for generate_series(timestamp, timestamp, interval) | ||||
|  */ | ||||
| Datum | ||||
| generate_series_timestamp_support(PG_FUNCTION_ARGS) | ||||
| { | ||||
| 	Node	   *rawreq = (Node *) PG_GETARG_POINTER(0); | ||||
| 	Node	   *ret = NULL; | ||||
|  | ||||
| 	if (IsA(rawreq, SupportRequestRows)) | ||||
| 	{ | ||||
| 		/* Try to estimate the number of rows returned */ | ||||
| 		SupportRequestRows *req = (SupportRequestRows *) rawreq; | ||||
|  | ||||
| 		if (is_funcclause(req->node))	/* be paranoid */ | ||||
| 		{ | ||||
| 			List	   *args = ((FuncExpr *) req->node)->args; | ||||
| 			Node	   *arg1, | ||||
| 					   *arg2, | ||||
| 					   *arg3; | ||||
|  | ||||
| 			/* We can use estimated argument values here */ | ||||
| 			arg1 = estimate_expression_value(req->root, linitial(args)); | ||||
| 			arg2 = estimate_expression_value(req->root, lsecond(args)); | ||||
| 			arg3 = estimate_expression_value(req->root, lthird(args)); | ||||
|  | ||||
| 			/* | ||||
| 			 * If any argument is constant NULL, we can safely assume that | ||||
| 			 * zero rows are returned.  Otherwise, if they're all non-NULL | ||||
| 			 * constants, we can calculate the number of rows that will be | ||||
| 			 * returned. | ||||
| 			 */ | ||||
| 			if ((IsA(arg1, Const) && ((Const *) arg1)->constisnull) || | ||||
| 				(IsA(arg2, Const) && ((Const *) arg2)->constisnull) || | ||||
| 				(IsA(arg3, Const) && ((Const *) arg3)->constisnull)) | ||||
| 			{ | ||||
| 				req->rows = 0; | ||||
| 				ret = (Node *) req; | ||||
| 			} | ||||
| 			else if (IsA(arg1, Const) && IsA(arg2, Const) && IsA(arg3, Const)) | ||||
| 			{ | ||||
| 				Timestamp	start, | ||||
| 							finish; | ||||
| 				Interval   *step; | ||||
| 				Datum		diff; | ||||
| 				double		dstep; | ||||
| 				int64		dummy; | ||||
|  | ||||
| 				start = DatumGetTimestamp(((Const *) arg1)->constvalue); | ||||
| 				finish = DatumGetTimestamp(((Const *) arg2)->constvalue); | ||||
| 				step = DatumGetIntervalP(((Const *) arg3)->constvalue); | ||||
|  | ||||
| 				/* | ||||
| 				 * Perform some prechecks which could cause timestamp_mi to | ||||
| 				 * raise an ERROR.  It's much better to just return some | ||||
| 				 * default estimate than error out in a support function. | ||||
| 				 */ | ||||
| 				if (!TIMESTAMP_NOT_FINITE(start) && !TIMESTAMP_NOT_FINITE(finish) && | ||||
| 					!pg_sub_s64_overflow(finish, start, &dummy)) | ||||
| 				{ | ||||
| 					diff = DirectFunctionCall2(timestamp_mi, | ||||
| 											   TimestampGetDatum(finish), | ||||
| 											   TimestampGetDatum(start)); | ||||
|  | ||||
| #define INTERVAL_TO_MICROSECONDS(i) ((((double) (i)->month * DAYS_PER_MONTH + (i)->day)) * USECS_PER_DAY + (i)->time) | ||||
|  | ||||
| 					dstep = INTERVAL_TO_MICROSECONDS(step); | ||||
|  | ||||
| 					/* This equation works for either sign of step */ | ||||
| 					if (dstep != 0.0) | ||||
| 					{ | ||||
| 						Interval   *idiff = DatumGetIntervalP(diff); | ||||
| 						double		ddiff = INTERVAL_TO_MICROSECONDS(idiff); | ||||
|  | ||||
| 						req->rows = floor(ddiff / dstep + 1.0); | ||||
| 						ret = (Node *) req; | ||||
| 					} | ||||
| #undef INTERVAL_TO_MICROSECONDS | ||||
| 				} | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
|  | ||||
| 	PG_RETURN_POINTER(ret); | ||||
| } | ||||
|  | ||||
|  | ||||
| /* timestamp_at_local() | ||||
|  * timestamptz_at_local() | ||||
|  * | ||||
|   | ||||
| @@ -8331,19 +8331,25 @@ | ||||
|   prorettype => 'numeric', proargtypes => 'numeric numeric', | ||||
|   prosrc => 'generate_series_numeric' }, | ||||
| { oid => '938', descr => 'non-persistent series generator', | ||||
|   proname => 'generate_series', prorows => '1000', proretset => 't', | ||||
|   proname => 'generate_series', prorows => '1000', | ||||
|   prosupport => 'generate_series_timestamp_support', proretset => 't', | ||||
|   prorettype => 'timestamp', proargtypes => 'timestamp timestamp interval', | ||||
|   prosrc => 'generate_series_timestamp' }, | ||||
| { oid => '939', descr => 'non-persistent series generator', | ||||
|   proname => 'generate_series', prorows => '1000', proretset => 't', | ||||
|   proname => 'generate_series', prorows => '1000', | ||||
|   prosupport => 'generate_series_timestamp_support', proretset => 't', | ||||
|   provolatile => 's', prorettype => 'timestamptz', | ||||
|   proargtypes => 'timestamptz timestamptz interval', | ||||
|   prosrc => 'generate_series_timestamptz' }, | ||||
| { oid => '6274', descr => 'non-persistent series generator', | ||||
|   proname => 'generate_series', prorows => '1000', proretset => 't', | ||||
|   proname => 'generate_series', prorows => '1000', | ||||
|   prosupport => 'generate_series_timestamp_support',  proretset => 't', | ||||
|   prorettype => 'timestamptz', | ||||
|   proargtypes => 'timestamptz timestamptz interval text', | ||||
|   prosrc => 'generate_series_timestamptz_at_zone' }, | ||||
| { oid => '8402', descr => 'planner support for generate_series', | ||||
|   proname => 'generate_series_timestamp_support', prorettype => 'internal', | ||||
|   proargtypes => 'internal', prosrc => 'generate_series_timestamp_support' }, | ||||
|  | ||||
| # boolean aggregates | ||||
| { oid => '2515', descr => 'aggregate transition function', | ||||
|   | ||||
| @@ -2,6 +2,43 @@ | ||||
| \getenv libdir PG_LIBDIR | ||||
| \getenv dlsuffix PG_DLSUFFIX | ||||
| \set regresslib :libdir '/regress' :dlsuffix | ||||
| -- Function to assist with verifying EXPLAIN which includes costs.  A series | ||||
| -- of bool flags allows control over which portions are masked out | ||||
| CREATE FUNCTION explain_mask_costs(query text, do_analyze bool, | ||||
|     hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text | ||||
| LANGUAGE plpgsql AS | ||||
| $$ | ||||
| DECLARE | ||||
|     ln text; | ||||
|     analyze_str text; | ||||
| BEGIN | ||||
|     IF do_analyze = true THEN | ||||
|         analyze_str := 'on'; | ||||
|     ELSE | ||||
|         analyze_str := 'off'; | ||||
|     END IF; | ||||
|  | ||||
|     FOR ln IN | ||||
|         EXECUTE format('explain (analyze %s, costs on, summary off, timing off) %s', | ||||
|             analyze_str, query) | ||||
|     LOOP | ||||
|         IF hide_costs = true THEN | ||||
|             ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N'); | ||||
|         END IF; | ||||
|  | ||||
|         IF hide_row_est = true THEN | ||||
|             -- don't use 'g' so that we leave the actual rows intact | ||||
|             ln := regexp_replace(ln, 'rows=\d+', 'rows=N'); | ||||
|         END IF; | ||||
|  | ||||
|         IF hide_width = true THEN | ||||
|             ln := regexp_replace(ln, 'width=\d+', 'width=N'); | ||||
|         END IF; | ||||
|  | ||||
|         RETURN NEXT ln; | ||||
|     END LOOP; | ||||
| END; | ||||
| $$; | ||||
| -- | ||||
| -- num_nulls() | ||||
| -- | ||||
| @@ -594,6 +631,78 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g; | ||||
|          Index Cond: (unique1 = g.g) | ||||
| (4 rows) | ||||
|  | ||||
| -- | ||||
| -- Test the SupportRequestRows support function for generate_series_timestamp() | ||||
| -- | ||||
| -- Ensure the row estimate matches the actual rows | ||||
| SELECT explain_mask_costs($$ | ||||
| SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$, | ||||
| true, true, false, true); | ||||
|                                     explain_mask_costs                                     | ||||
| ------------------------------------------------------------------------------------------ | ||||
|  Function Scan on generate_series g  (cost=N..N rows=30 width=N) (actual rows=30 loops=1) | ||||
| (1 row) | ||||
|  | ||||
| -- As above but with generate_series_timestamp | ||||
| SELECT explain_mask_costs($$ | ||||
| SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$, | ||||
| true, true, false, true); | ||||
|                                     explain_mask_costs                                     | ||||
| ------------------------------------------------------------------------------------------ | ||||
|  Function Scan on generate_series g  (cost=N..N rows=30 width=N) (actual rows=30 loops=1) | ||||
| (1 row) | ||||
|  | ||||
| -- As above but with generate_series_timestamptz_at_zone() | ||||
| SELECT explain_mask_costs($$ | ||||
| SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$, | ||||
| true, true, false, true); | ||||
|                                     explain_mask_costs                                     | ||||
| ------------------------------------------------------------------------------------------ | ||||
|  Function Scan on generate_series g  (cost=N..N rows=30 width=N) (actual rows=30 loops=1) | ||||
| (1 row) | ||||
|  | ||||
| -- Ensure the estimated and actual row counts match when the range isn't | ||||
| -- evenly divisible by the step | ||||
| SELECT explain_mask_costs($$ | ||||
| SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$, | ||||
| true, true, false, true); | ||||
|                                    explain_mask_costs                                    | ||||
| ---------------------------------------------------------------------------------------- | ||||
|  Function Scan on generate_series g  (cost=N..N rows=5 width=N) (actual rows=5 loops=1) | ||||
| (1 row) | ||||
|  | ||||
| -- Ensure the estimates match when step is decreasing | ||||
| SELECT explain_mask_costs($$ | ||||
| SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$, | ||||
| true, true, false, true); | ||||
|                                     explain_mask_costs                                     | ||||
| ------------------------------------------------------------------------------------------ | ||||
|  Function Scan on generate_series g  (cost=N..N rows=30 width=N) (actual rows=30 loops=1) | ||||
| (1 row) | ||||
|  | ||||
| -- Ensure an empty range estimates 1 row | ||||
| SELECT explain_mask_costs($$ | ||||
| SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$, | ||||
| true, true, false, true); | ||||
|                                    explain_mask_costs                                    | ||||
| ---------------------------------------------------------------------------------------- | ||||
|  Function Scan on generate_series g  (cost=N..N rows=1 width=N) (actual rows=0 loops=1) | ||||
| (1 row) | ||||
|  | ||||
| -- Ensure we get the default row estimate for infinity values | ||||
| SELECT explain_mask_costs($$ | ||||
| SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$, | ||||
| false, true, false, true); | ||||
|                         explain_mask_costs                          | ||||
| ------------------------------------------------------------------- | ||||
|  Function Scan on generate_series g  (cost=N..N rows=1000 width=N) | ||||
| (1 row) | ||||
|  | ||||
| -- Ensure the row estimate behaves correctly when step size is zero. | ||||
| -- We expect generate_series_timestamp() to throw the error rather than in | ||||
| -- the support function. | ||||
| SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s); | ||||
| ERROR:  step size cannot equal zero | ||||
| -- Test functions for control data | ||||
| SELECT count(*) > 0 AS ok FROM pg_control_checkpoint(); | ||||
|  ok  | ||||
| @@ -706,3 +815,4 @@ SELECT pg_column_toast_chunk_id(a) IS NULL, | ||||
| (1 row) | ||||
|  | ||||
| DROP TABLE test_chunk_id; | ||||
| DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool); | ||||
|   | ||||
| @@ -4,6 +4,44 @@ | ||||
|  | ||||
| \set regresslib :libdir '/regress' :dlsuffix | ||||
|  | ||||
| -- Function to assist with verifying EXPLAIN which includes costs.  A series | ||||
| -- of bool flags allows control over which portions are masked out | ||||
| CREATE FUNCTION explain_mask_costs(query text, do_analyze bool, | ||||
|     hide_costs bool, hide_row_est bool, hide_width bool) RETURNS setof text | ||||
| LANGUAGE plpgsql AS | ||||
| $$ | ||||
| DECLARE | ||||
|     ln text; | ||||
|     analyze_str text; | ||||
| BEGIN | ||||
|     IF do_analyze = true THEN | ||||
|         analyze_str := 'on'; | ||||
|     ELSE | ||||
|         analyze_str := 'off'; | ||||
|     END IF; | ||||
|  | ||||
|     FOR ln IN | ||||
|         EXECUTE format('explain (analyze %s, costs on, summary off, timing off) %s', | ||||
|             analyze_str, query) | ||||
|     LOOP | ||||
|         IF hide_costs = true THEN | ||||
|             ln := regexp_replace(ln, 'cost=\d+\.\d\d\.\.\d+\.\d\d', 'cost=N..N'); | ||||
|         END IF; | ||||
|  | ||||
|         IF hide_row_est = true THEN | ||||
|             -- don't use 'g' so that we leave the actual rows intact | ||||
|             ln := regexp_replace(ln, 'rows=\d+', 'rows=N'); | ||||
|         END IF; | ||||
|  | ||||
|         IF hide_width = true THEN | ||||
|             ln := regexp_replace(ln, 'width=\d+', 'width=N'); | ||||
|         END IF; | ||||
|  | ||||
|         RETURN NEXT ln; | ||||
|     END LOOP; | ||||
| END; | ||||
| $$; | ||||
|  | ||||
| -- | ||||
| -- num_nulls() | ||||
| -- | ||||
| @@ -224,6 +262,51 @@ SELECT * FROM tenk1 a JOIN my_gen_series(1,1000) g ON a.unique1 = g; | ||||
| EXPLAIN (COSTS OFF) | ||||
| SELECT * FROM tenk1 a JOIN my_gen_series(1,10) g ON a.unique1 = g; | ||||
|  | ||||
| -- | ||||
| -- Test the SupportRequestRows support function for generate_series_timestamp() | ||||
| -- | ||||
|  | ||||
| -- Ensure the row estimate matches the actual rows | ||||
| SELECT explain_mask_costs($$ | ||||
| SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day') g(s);$$, | ||||
| true, true, false, true); | ||||
|  | ||||
| -- As above but with generate_series_timestamp | ||||
| SELECT explain_mask_costs($$ | ||||
| SELECT * FROM generate_series(TIMESTAMP '2024-02-01', TIMESTAMP '2024-03-01', INTERVAL '1 day') g(s);$$, | ||||
| true, true, false, true); | ||||
|  | ||||
| -- As above but with generate_series_timestamptz_at_zone() | ||||
| SELECT explain_mask_costs($$ | ||||
| SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '1 day', 'UTC') g(s);$$, | ||||
| true, true, false, true); | ||||
|  | ||||
| -- Ensure the estimated and actual row counts match when the range isn't | ||||
| -- evenly divisible by the step | ||||
| SELECT explain_mask_costs($$ | ||||
| SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '7 day') g(s);$$, | ||||
| true, true, false, true); | ||||
|  | ||||
| -- Ensure the estimates match when step is decreasing | ||||
| SELECT explain_mask_costs($$ | ||||
| SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '-1 day') g(s);$$, | ||||
| true, true, false, true); | ||||
|  | ||||
| -- Ensure an empty range estimates 1 row | ||||
| SELECT explain_mask_costs($$ | ||||
| SELECT * FROM generate_series(TIMESTAMPTZ '2024-03-01', TIMESTAMPTZ '2024-02-01', INTERVAL '1 day') g(s);$$, | ||||
| true, true, false, true); | ||||
|  | ||||
| -- Ensure we get the default row estimate for infinity values | ||||
| SELECT explain_mask_costs($$ | ||||
| SELECT * FROM generate_series(TIMESTAMPTZ '-infinity', TIMESTAMPTZ 'infinity', INTERVAL '1 day') g(s);$$, | ||||
| false, true, false, true); | ||||
|  | ||||
| -- Ensure the row estimate behaves correctly when step size is zero. | ||||
| -- We expect generate_series_timestamp() to throw the error rather than in | ||||
| -- the support function. | ||||
| SELECT * FROM generate_series(TIMESTAMPTZ '2024-02-01', TIMESTAMPTZ '2024-03-01', INTERVAL '0 day') g(s); | ||||
|  | ||||
| -- Test functions for control data | ||||
| SELECT count(*) > 0 AS ok FROM pg_control_checkpoint(); | ||||
| SELECT count(*) > 0 AS ok FROM pg_control_init(); | ||||
| @@ -273,3 +356,4 @@ SELECT pg_column_toast_chunk_id(a) IS NULL, | ||||
|   pg_column_toast_chunk_id(b) IN (SELECT chunk_id FROM pg_toast.:toastrel) | ||||
|   FROM test_chunk_id; | ||||
| DROP TABLE test_chunk_id; | ||||
| DROP FUNCTION explain_mask_costs(text, bool, bool, bool, bool); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user