diff --git a/src/backend/optimizer/path/allpaths.c b/src/backend/optimizer/path/allpaths.c index b5bc9b602e2..df3453f99f0 100644 --- a/src/backend/optimizer/path/allpaths.c +++ b/src/backend/optimizer/path/allpaths.c @@ -1371,9 +1371,23 @@ add_paths_to_append_rel(PlannerInfo *root, RelOptInfo *rel, */ if (rel->consider_startup && childrel->cheapest_startup_path != NULL) { + Path *cheapest_path; + + /* + * With an indication of how many tuples the query should provide, + * the optimizer tries to choose the path optimal for that + * specific number of tuples. + */ + if (root->tuple_fraction > 0.0) + cheapest_path = + get_cheapest_fractional_path(childrel, + root->tuple_fraction); + else + cheapest_path = childrel->cheapest_startup_path; + /* cheapest_startup_path must not be a parameterized path. */ - Assert(childrel->cheapest_startup_path->param_info == NULL); - accumulate_append_subpath(childrel->cheapest_startup_path, + Assert(cheapest_path->param_info == NULL); + accumulate_append_subpath(cheapest_path, &startup_subpaths, NULL); } diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 36ee6dd43de..014e80c30e6 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -6414,6 +6414,11 @@ make_sort_input_target(PlannerInfo *root, * Find the cheapest path for retrieving a specified fraction of all * the tuples expected to be returned by the given relation. * + * Do not consider parameterized paths. If the caller needs a path for upper + * rel, it can't have parameterized paths. If the caller needs an append + * subpath, it could become limited by the treatment of similar + * parameterization of all the subpaths. + * * We interpret tuple_fraction the same way as grouping_planner. * * We assume set_cheapest() has been run on the given rel. @@ -6436,6 +6441,9 @@ get_cheapest_fractional_path(RelOptInfo *rel, double tuple_fraction) { Path *path = (Path *) lfirst(l); + if (path->param_info) + continue; + if (path == rel->cheapest_total_path || compare_fractional_path_costs(best_path, path, tuple_fraction) <= 0) continue; diff --git a/src/test/regress/expected/partition_join.out b/src/test/regress/expected/partition_join.out index 34b963ce6fe..938cedd79ad 100644 --- a/src/test/regress/expected/partition_join.out +++ b/src/test/regress/expected/partition_join.out @@ -5260,6 +5260,122 @@ SELECT x.id, y.id FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY x.id DE Index Cond: (id = x_2.id) (11 rows) +-- +-- Test Append's fractional paths +-- +CREATE INDEX pht1_c_idx ON pht1(c); +-- SeqScan might be the best choice if we need one single tuple +EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 1; + QUERY PLAN +-------------------------------------------------- + Limit + -> Append + -> Nested Loop + Join Filter: (p1_1.c = p2_1.c) + -> Seq Scan on pht1_p1 p1_1 + -> Materialize + -> Seq Scan on pht1_p1 p2_1 + -> Nested Loop + Join Filter: (p1_2.c = p2_2.c) + -> Seq Scan on pht1_p2 p1_2 + -> Materialize + -> Seq Scan on pht1_p2 p2_2 + -> Nested Loop + Join Filter: (p1_3.c = p2_3.c) + -> Seq Scan on pht1_p3 p1_3 + -> Materialize + -> Seq Scan on pht1_p3 p2_3 +(17 rows) + +-- Increase number of tuples requested and an IndexScan will be chosen +EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100; + QUERY PLAN +------------------------------------------------------------------------ + Limit + -> Append + -> Nested Loop + -> Seq Scan on pht1_p1 p1_1 + -> Memoize + Cache Key: p1_1.c + Cache Mode: logical + -> Index Scan using pht1_p1_c_idx on pht1_p1 p2_1 + Index Cond: (c = p1_1.c) + -> Nested Loop + -> Seq Scan on pht1_p2 p1_2 + -> Memoize + Cache Key: p1_2.c + Cache Mode: logical + -> Index Scan using pht1_p2_c_idx on pht1_p2 p2_2 + Index Cond: (c = p1_2.c) + -> Nested Loop + -> Seq Scan on pht1_p3 p1_3 + -> Memoize + Cache Key: p1_3.c + Cache Mode: logical + -> Index Scan using pht1_p3_c_idx on pht1_p3 p2_3 + Index Cond: (c = p1_3.c) +(23 rows) + +-- If almost all the data should be fetched - prefer SeqScan +EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 1000; + QUERY PLAN +-------------------------------------------------- + Limit + -> Append + -> Hash Join + Hash Cond: (p1_1.c = p2_1.c) + -> Seq Scan on pht1_p1 p1_1 + -> Hash + -> Seq Scan on pht1_p1 p2_1 + -> Hash Join + Hash Cond: (p1_2.c = p2_2.c) + -> Seq Scan on pht1_p2 p1_2 + -> Hash + -> Seq Scan on pht1_p2 p2_2 + -> Hash Join + Hash Cond: (p1_3.c = p2_3.c) + -> Seq Scan on pht1_p3 p1_3 + -> Hash + -> Seq Scan on pht1_p3 p2_3 +(17 rows) + +SET max_parallel_workers_per_gather = 1; +SET debug_parallel_query = on; +-- Partial paths should also be smart enough to employ limits +EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100; + QUERY PLAN +------------------------------------------------------------------------------ + Gather + Workers Planned: 1 + Single Copy: true + -> Limit + -> Append + -> Nested Loop + -> Seq Scan on pht1_p1 p1_1 + -> Memoize + Cache Key: p1_1.c + Cache Mode: logical + -> Index Scan using pht1_p1_c_idx on pht1_p1 p2_1 + Index Cond: (c = p1_1.c) + -> Nested Loop + -> Seq Scan on pht1_p2 p1_2 + -> Memoize + Cache Key: p1_2.c + Cache Mode: logical + -> Index Scan using pht1_p2_c_idx on pht1_p2 p2_2 + Index Cond: (c = p1_2.c) + -> Nested Loop + -> Seq Scan on pht1_p3 p1_3 + -> Memoize + Cache Key: p1_3.c + Cache Mode: logical + -> Index Scan using pht1_p3_c_idx on pht1_p3 p2_3 + Index Cond: (c = p1_3.c) +(26 rows) + +RESET debug_parallel_query; +-- Remove indexes from the partitioned table and its partitions +DROP INDEX pht1_c_idx CASCADE; -- cleanup DROP TABLE fract_t; RESET max_parallel_workers_per_gather; diff --git a/src/test/regress/expected/union.out b/src/test/regress/expected/union.out index caa8fe70a05..96962817ed4 100644 --- a/src/test/regress/expected/union.out +++ b/src/test/regress/expected/union.out @@ -1472,18 +1472,17 @@ select t1.unique1 from tenk1 t1 inner join tenk2 t2 on t1.tenthous = t2.tenthous and t2.thousand = 0 union all (values(1)) limit 1; - QUERY PLAN --------------------------------------------------------- + QUERY PLAN +--------------------------------------------------------------------- Limit -> Append -> Nested Loop - Join Filter: (t1.tenthous = t2.tenthous) - -> Seq Scan on tenk1 t1 - -> Materialize - -> Seq Scan on tenk2 t2 - Filter: (thousand = 0) + -> Seq Scan on tenk2 t2 + Filter: (thousand = 0) + -> Index Scan using tenk1_thous_tenthous on tenk1 t1 + Index Cond: (tenthous = t2.tenthous) -> Result -(9 rows) +(8 rows) -- Ensure there is no problem if cheapest_startup_path is NULL explain (costs off) diff --git a/src/test/regress/sql/partition_join.sql b/src/test/regress/sql/partition_join.sql index 26b8e3d063f..b76c5451001 100644 --- a/src/test/regress/sql/partition_join.sql +++ b/src/test/regress/sql/partition_join.sql @@ -1225,6 +1225,27 @@ SELECT x.id, y.id FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY x.id AS EXPLAIN (COSTS OFF) SELECT x.id, y.id FROM fract_t x LEFT JOIN fract_t y USING (id) ORDER BY x.id DESC LIMIT 10; +-- +-- Test Append's fractional paths +-- + +CREATE INDEX pht1_c_idx ON pht1(c); +-- SeqScan might be the best choice if we need one single tuple +EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 1; +-- Increase number of tuples requested and an IndexScan will be chosen +EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100; +-- If almost all the data should be fetched - prefer SeqScan +EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 1000; + +SET max_parallel_workers_per_gather = 1; +SET debug_parallel_query = on; +-- Partial paths should also be smart enough to employ limits +EXPLAIN (COSTS OFF) SELECT * FROM pht1 p1 JOIN pht1 p2 USING (c) LIMIT 100; +RESET debug_parallel_query; + +-- Remove indexes from the partitioned table and its partitions +DROP INDEX pht1_c_idx CASCADE; + -- cleanup DROP TABLE fract_t;