diff --git a/src/backend/optimizer/plan/planner.c b/src/backend/optimizer/plan/planner.c index 0c7273b9ccd..8ffbbdf2312 100644 --- a/src/backend/optimizer/plan/planner.c +++ b/src/backend/optimizer/plan/planner.c @@ -4023,9 +4023,10 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel, * If this is the topmost relation or if the parent relation is doing * full partitionwise aggregation, then we can do full partitionwise * aggregation provided that the GROUP BY clause contains all of the - * partitioning columns at this level. Otherwise, we can do at most - * partial partitionwise aggregation. But if partial aggregation is - * not supported in general then we can't use it for partitionwise + * partitioning columns at this level and the collation used by GROUP + * BY matches the partitioning collation. Otherwise, we can do at + * most partial partitionwise aggregation. But if partial aggregation + * is not supported in general then we can't use it for partitionwise * aggregation either. * * Check parse->groupClause not processed_groupClause, because it's @@ -8005,8 +8006,8 @@ create_partitionwise_grouping_paths(PlannerInfo *root, /* * group_by_has_partkey * - * Returns true, if all the partition keys of the given relation are part of - * the GROUP BY clauses, false otherwise. + * Returns true if all the partition keys of the given relation are part of + * the GROUP BY clauses, including having matching collation, false otherwise. */ static bool group_by_has_partkey(RelOptInfo *input_rel, @@ -8034,13 +8035,40 @@ group_by_has_partkey(RelOptInfo *input_rel, foreach(lc, partexprs) { + ListCell *lg; Expr *partexpr = lfirst(lc); + Oid partcoll = input_rel->part_scheme->partcollation[cnt]; - if (list_member(groupexprs, partexpr)) + foreach(lg, groupexprs) { - found = true; - break; + Expr *groupexpr = lfirst(lg); + Oid groupcoll = exprCollation((Node *) groupexpr); + + /* + * Note: we can assume there is at most one RelabelType node; + * eval_const_expressions() will have simplified if more than + * one. + */ + if (IsA(groupexpr, RelabelType)) + groupexpr = ((RelabelType *) groupexpr)->arg; + + if (equal(groupexpr, partexpr)) + { + /* + * Reject a match if the grouping collation does not match + * the partitioning collation. + */ + if (OidIsValid(partcoll) && OidIsValid(groupcoll) && + partcoll != groupcoll) + return false; + + found = true; + break; + } } + + if (found) + break; } /* diff --git a/src/test/regress/expected/collate.icu.utf8.out b/src/test/regress/expected/collate.icu.utf8.out index 7d59fb44316..c7807a14ca6 100644 --- a/src/test/regress/expected/collate.icu.utf8.out +++ b/src/test/regress/expected/collate.icu.utf8.out @@ -2052,6 +2052,96 @@ SELECT (SELECT count(*) FROM test33_0) <> (SELECT count(*) FROM test33_1); t (1 row) +-- +-- Bug #18568 +-- +-- Partitionwise aggregate (full or partial) should not be used when a +-- partition key's collation doesn't match that of the GROUP BY column it is +-- matched with. +SET max_parallel_workers_per_gather TO 0; +SET enable_incremental_sort TO off; +CREATE TABLE pagg_tab3 (a text, c text collate case_insensitive) PARTITION BY LIST(c collate "C"); +CREATE TABLE pagg_tab3_p1 PARTITION OF pagg_tab3 FOR VALUES IN ('a', 'b'); +CREATE TABLE pagg_tab3_p2 PARTITION OF pagg_tab3 FOR VALUES IN ('B', 'A'); +INSERT INTO pagg_tab3 SELECT i % 4 + 1, substr('abAB', (i % 4) + 1 , 1) FROM generate_series(0, 19) i; +ANALYZE pagg_tab3; +SET enable_partitionwise_aggregate TO false; +EXPLAIN (COSTS OFF) +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; + QUERY PLAN +----------------------------------------------------------- + Sort + Sort Key: (upper(pagg_tab3.c)) COLLATE case_insensitive + -> HashAggregate + Group Key: pagg_tab3.c + -> Append + -> Seq Scan on pagg_tab3_p2 pagg_tab3_1 + -> Seq Scan on pagg_tab3_p1 pagg_tab3_2 +(7 rows) + +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; + upper | count +-------+------- + A | 10 + B | 10 +(2 rows) + +-- No "full" partitionwise aggregation allowed, though "partial" is allowed. +SET enable_partitionwise_aggregate TO true; +EXPLAIN (COSTS OFF) +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; + QUERY PLAN +-------------------------------------------------------------- + Sort + Sort Key: (upper(pagg_tab3.c)) COLLATE case_insensitive + -> Finalize HashAggregate + Group Key: pagg_tab3.c + -> Append + -> Partial HashAggregate + Group Key: pagg_tab3.c + -> Seq Scan on pagg_tab3_p2 pagg_tab3 + -> Partial HashAggregate + Group Key: pagg_tab3_1.c + -> Seq Scan on pagg_tab3_p1 pagg_tab3_1 +(11 rows) + +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; + upper | count +-------+------- + A | 10 + B | 10 +(2 rows) + +-- OK to use full partitionwise aggregate after changing the GROUP BY column's +-- collation to be the same as that of the partition key. +EXPLAIN (COSTS OFF) +SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1; + QUERY PLAN +-------------------------------------------------------- + Sort + Sort Key: ((pagg_tab3.c)::text) COLLATE "C" + -> Append + -> HashAggregate + Group Key: (pagg_tab3.c)::text + -> Seq Scan on pagg_tab3_p2 pagg_tab3 + -> HashAggregate + Group Key: (pagg_tab3_1.c)::text + -> Seq Scan on pagg_tab3_p1 pagg_tab3_1 +(9 rows) + +SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1; + c | count +---+------- + A | 5 + B | 5 + a | 5 + b | 5 +(4 rows) + +DROP TABLE pagg_tab3; +RESET enable_partitionwise_aggregate; +RESET max_parallel_workers_per_gather; +RESET enable_incremental_sort; -- cleanup RESET search_path; SET client_min_messages TO warning; diff --git a/src/test/regress/sql/collate.icu.utf8.sql b/src/test/regress/sql/collate.icu.utf8.sql index 80f28a97d78..9f7c06aa38a 100644 --- a/src/test/regress/sql/collate.icu.utf8.sql +++ b/src/test/regress/sql/collate.icu.utf8.sql @@ -796,6 +796,43 @@ INSERT INTO test33 VALUES (2, 'DEF'); -- they end up in the same partition (but it's platform-dependent which one) SELECT (SELECT count(*) FROM test33_0) <> (SELECT count(*) FROM test33_1); +-- +-- Bug #18568 +-- +-- Partitionwise aggregate (full or partial) should not be used when a +-- partition key's collation doesn't match that of the GROUP BY column it is +-- matched with. +SET max_parallel_workers_per_gather TO 0; +SET enable_incremental_sort TO off; + +CREATE TABLE pagg_tab3 (a text, c text collate case_insensitive) PARTITION BY LIST(c collate "C"); +CREATE TABLE pagg_tab3_p1 PARTITION OF pagg_tab3 FOR VALUES IN ('a', 'b'); +CREATE TABLE pagg_tab3_p2 PARTITION OF pagg_tab3 FOR VALUES IN ('B', 'A'); +INSERT INTO pagg_tab3 SELECT i % 4 + 1, substr('abAB', (i % 4) + 1 , 1) FROM generate_series(0, 19) i; +ANALYZE pagg_tab3; + +SET enable_partitionwise_aggregate TO false; +EXPLAIN (COSTS OFF) +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; + +-- No "full" partitionwise aggregation allowed, though "partial" is allowed. +SET enable_partitionwise_aggregate TO true; +EXPLAIN (COSTS OFF) +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; +SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1; + +-- OK to use full partitionwise aggregate after changing the GROUP BY column's +-- collation to be the same as that of the partition key. +EXPLAIN (COSTS OFF) +SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1; +SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1; + +DROP TABLE pagg_tab3; + +RESET enable_partitionwise_aggregate; +RESET max_parallel_workers_per_gather; +RESET enable_incremental_sort; -- cleanup RESET search_path;