mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
MDEV-30660 Aggregation functions fail to leverage uniqueness property
When executing a statement of the form SELECT AGGR_FN(DISTINCT c1, c2,..,cn) FROM t1, where AGGR_FN is an aggregate function such as COUNT(), AVG() or SUM(), and a unique index exists on table t1 covering some or all of the columns (c1, c2,..,cn), the retrieved values are inherently unique. Consequently, the need for de-duplication imposed by the DISTINCT clause can be eliminated, leading to optimization of aggregation operations. This optimization applies under the following conditions: - only one table involved in the join (not counting const tables) - some arguments of the aggregate function are fields (not functions/subqueries) This optimization extends to queries of the form SELECT AGGR_FN(c1, c2,..,cn) GROUP BY cx,..cy when a unique index covers some or all of the columns (c1, c2,..cn, cx,..cy)
This commit is contained in:
322
mysql-test/main/distinct_notembedded.result
Normal file
322
mysql-test/main/distinct_notembedded.result
Normal file
@ -0,0 +1,322 @@
|
|||||||
|
#
|
||||||
|
# MDEV-30660 COUNT DISTINCT seems unnecessarily slow when run on a PK
|
||||||
|
#
|
||||||
|
set @save_optimizer_trace = @@optimizer_trace;
|
||||||
|
SET optimizer_trace='enabled=on';
|
||||||
|
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY, b INT NOT NULL);
|
||||||
|
INSERT INTO t1 VALUES (1,1), (2,1), (3,1);
|
||||||
|
# Optimization is applied (aggregator=simple):
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
|
COUNT(DISTINCT a)
|
||||||
|
3
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "count(distinct t1.a)",
|
||||||
|
"aggregator_type": "simple"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
SELECT AVG(DISTINCT a), SUM(DISTINCT b) FROM t1;
|
||||||
|
AVG(DISTINCT a) SUM(DISTINCT b)
|
||||||
|
2.0000 1
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "avg(distinct t1.a)",
|
||||||
|
"aggregator_type": "simple"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"function": "sum(distinct t1.b)",
|
||||||
|
"aggregator_type": "distinct"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
# Only `a` is unique but it's enough to eliminate DISTINCT:
|
||||||
|
SELECT COUNT(DISTINCT b, a) FROM t1;
|
||||||
|
COUNT(DISTINCT b, a)
|
||||||
|
3
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "count(distinct t1.b,t1.a)",
|
||||||
|
"aggregator_type": "simple"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
SELECT COUNT(DISTINCT a, a + b) FROM t1;
|
||||||
|
COUNT(DISTINCT a, a + b)
|
||||||
|
3
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "count(distinct t1.a,t1.a + t1.b)",
|
||||||
|
"aggregator_type": "simple"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
SELECT SUM(DISTINCT a), AVG(DISTINCT a), COUNT(DISTINCT a) FROM t1 WHERE a > 1;
|
||||||
|
SUM(DISTINCT a) AVG(DISTINCT a) COUNT(DISTINCT a)
|
||||||
|
5 2.5000 2
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "sum(distinct t1.a)",
|
||||||
|
"aggregator_type": "simple"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"function": "avg(distinct t1.a)",
|
||||||
|
"aggregator_type": "simple"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"function": "count(distinct t1.a)",
|
||||||
|
"aggregator_type": "simple"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
# Optimization is not applied 'cause function argument is not a field
|
||||||
|
# (aggregator=distinct):
|
||||||
|
SELECT SUM(DISTINCT a + b) FROM t1;
|
||||||
|
SUM(DISTINCT a + b)
|
||||||
|
9
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "sum(distinct t1.a + t1.b)",
|
||||||
|
"aggregator_type": "distinct"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
SELECT COUNT(DISTINCT b) FROM t1;
|
||||||
|
COUNT(DISTINCT b)
|
||||||
|
1
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "count(distinct t1.b)",
|
||||||
|
"aggregator_type": "distinct"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
SELECT AVG(DISTINCT b / a) FROM t1;
|
||||||
|
AVG(DISTINCT b / a)
|
||||||
|
0.61110000
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "avg(distinct t1.b / t1.a)",
|
||||||
|
"aggregator_type": "distinct"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
EXPLAIN SELECT COUNT(DISTINCT (SELECT a)) FROM t1;
|
||||||
|
id select_type table type possible_keys key key_len ref rows Extra
|
||||||
|
1 PRIMARY t1 index NULL PRIMARY 4 NULL 3 Using index
|
||||||
|
2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "count(distinct (/* select#2 */ select t1.a))",
|
||||||
|
"aggregator_type": "distinct"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
CREATE TABLE t2 (a INT);
|
||||||
|
INSERT INTO t2 VALUES (1), (2);
|
||||||
|
# Optimization is not applied 'cause there is more than one table
|
||||||
|
SELECT COUNT(DISTINCT t1.a) FROM t1, t2;
|
||||||
|
COUNT(DISTINCT t1.a)
|
||||||
|
3
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "count(distinct t1.a)",
|
||||||
|
"aggregator_type": "distinct"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
SELECT AVG(DISTINCT t1.a) FROM t1, t2;
|
||||||
|
AVG(DISTINCT t1.a)
|
||||||
|
2.0000
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "avg(distinct t1.a)",
|
||||||
|
"aggregator_type": "distinct"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
# Const tables, optimization is applied
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1, (SELECT 1) AS t2;
|
||||||
|
COUNT(DISTINCT a)
|
||||||
|
3
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "count(distinct t1.a)",
|
||||||
|
"aggregator_type": "simple"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
SELECT AVG(DISTINCT t1.a) FROM (SELECT 1 AS a) AS t2, t1, (SELECT 2 AS a) AS t3;
|
||||||
|
AVG(DISTINCT t1.a)
|
||||||
|
2.0000
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "avg(distinct t1.a)",
|
||||||
|
"aggregator_type": "simple"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1, (SELECT 1 UNION SELECT 2) AS t2;
|
||||||
|
COUNT(DISTINCT a)
|
||||||
|
3
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "count(distinct t1.a)",
|
||||||
|
"aggregator_type": "distinct"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
# Unique index on two columns
|
||||||
|
CREATE TABLE t3 (a INT NOT NULL, b INT NOT NULL);
|
||||||
|
INSERT INTO t3 VALUES (1,1), (1,2), (1,3), (2,1), (2,2), (3,1), (3,2);
|
||||||
|
CREATE UNIQUE INDEX t3_a_b ON t3 (a, b);
|
||||||
|
# Optimization is applied:
|
||||||
|
SELECT COUNT(DISTINCT a, b) FROM t3;
|
||||||
|
COUNT(DISTINCT a, b)
|
||||||
|
7
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "count(distinct t3.a,t3.b)",
|
||||||
|
"aggregator_type": "simple"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
SELECT COUNT(DISTINCT b, a) FROM t3;
|
||||||
|
COUNT(DISTINCT b, a)
|
||||||
|
7
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "count(distinct t3.b,t3.a)",
|
||||||
|
"aggregator_type": "simple"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
SELECT COUNT(DISTINCT b, a) FROM t3 WHERE a < 3;
|
||||||
|
COUNT(DISTINCT b, a)
|
||||||
|
5
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "count(distinct t3.b,t3.a)",
|
||||||
|
"aggregator_type": "simple"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
# Optimization is applied to one of the functions:
|
||||||
|
SELECT COUNT(DISTINCT b), SUM(DISTINCT a), SUM(DISTINCT a + b) FROM t3 GROUP BY a;
|
||||||
|
COUNT(DISTINCT b) SUM(DISTINCT a) SUM(DISTINCT a + b)
|
||||||
|
3 1 9
|
||||||
|
2 2 7
|
||||||
|
2 3 9
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "count(distinct t3.b)",
|
||||||
|
"aggregator_type": "simple"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"function": "sum(distinct t3.a)",
|
||||||
|
"aggregator_type": "distinct"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"function": "sum(distinct t3.a + t3.b)",
|
||||||
|
"aggregator_type": "distinct"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
# Can't apply optimization 'cause GROUP BY argument is not a field:
|
||||||
|
SELECT COUNT(DISTINCT b) FROM t3 GROUP BY a+b;
|
||||||
|
COUNT(DISTINCT b)
|
||||||
|
1
|
||||||
|
2
|
||||||
|
3
|
||||||
|
1
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "count(distinct t3.b)",
|
||||||
|
"aggregator_type": "distinct"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
# Test merged view
|
||||||
|
CREATE VIEW v1 AS SELECT * FROM t1;
|
||||||
|
# Optimization is applied
|
||||||
|
SELECT COUNT(DISTINCT a, b) FROM v1;
|
||||||
|
COUNT(DISTINCT a, b)
|
||||||
|
3
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "count(distinct t1.a,t1.b)",
|
||||||
|
"aggregator_type": "simple"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
# GROUP_CONCAT implements non-standard distinct aggregator
|
||||||
|
SELECT GROUP_CONCAT(b) FROM t1;
|
||||||
|
GROUP_CONCAT(b)
|
||||||
|
1,1,1
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "group_concat(t1.b separator ',')",
|
||||||
|
"aggregator_type": "simple"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
SELECT GROUP_CONCAT(DISTINCT b) FROM t1;
|
||||||
|
GROUP_CONCAT(DISTINCT b)
|
||||||
|
1
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
JS
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"function": "group_concat(distinct t1.b separator ',')",
|
||||||
|
"aggregator_type": "distinct"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
DROP TABLE t1, t2, t3;
|
||||||
|
DROP VIEW v1;
|
||||||
|
SET optimizer_trace = @save_optimizer_trace;
|
||||||
|
#
|
||||||
|
# end of 10.5 tests
|
||||||
|
#
|
109
mysql-test/main/distinct_notembedded.test
Normal file
109
mysql-test/main/distinct_notembedded.test
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
# Embedded doesn't have optimizer trace:
|
||||||
|
--source include/not_embedded.inc
|
||||||
|
--source include/have_sequence.inc
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # MDEV-30660 COUNT DISTINCT seems unnecessarily slow when run on a PK
|
||||||
|
--echo #
|
||||||
|
|
||||||
|
set @save_optimizer_trace = @@optimizer_trace;
|
||||||
|
SET optimizer_trace='enabled=on';
|
||||||
|
let $trace=
|
||||||
|
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '\$**.prepare_sum_aggregators')) AS JS
|
||||||
|
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||||
|
|
||||||
|
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY, b INT NOT NULL);
|
||||||
|
INSERT INTO t1 VALUES (1,1), (2,1), (3,1);
|
||||||
|
|
||||||
|
--echo # Optimization is applied (aggregator=simple):
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
SELECT AVG(DISTINCT a), SUM(DISTINCT b) FROM t1;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
--echo # Only `a` is unique but it's enough to eliminate DISTINCT:
|
||||||
|
SELECT COUNT(DISTINCT b, a) FROM t1;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
SELECT COUNT(DISTINCT a, a + b) FROM t1;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
SELECT SUM(DISTINCT a), AVG(DISTINCT a), COUNT(DISTINCT a) FROM t1 WHERE a > 1;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
--echo # Optimization is not applied 'cause function argument is not a field
|
||||||
|
--echo # (aggregator=distinct):
|
||||||
|
SELECT SUM(DISTINCT a + b) FROM t1;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
SELECT COUNT(DISTINCT b) FROM t1;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
SELECT AVG(DISTINCT b / a) FROM t1;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
EXPLAIN SELECT COUNT(DISTINCT (SELECT a)) FROM t1;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
CREATE TABLE t2 (a INT);
|
||||||
|
INSERT INTO t2 VALUES (1), (2);
|
||||||
|
|
||||||
|
--echo # Optimization is not applied 'cause there is more than one table
|
||||||
|
SELECT COUNT(DISTINCT t1.a) FROM t1, t2;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
SELECT AVG(DISTINCT t1.a) FROM t1, t2;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
--echo # Const tables, optimization is applied
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1, (SELECT 1) AS t2;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
SELECT AVG(DISTINCT t1.a) FROM (SELECT 1 AS a) AS t2, t1, (SELECT 2 AS a) AS t3;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
SELECT COUNT(DISTINCT a) FROM t1, (SELECT 1 UNION SELECT 2) AS t2;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
--echo # Unique index on two columns
|
||||||
|
CREATE TABLE t3 (a INT NOT NULL, b INT NOT NULL);
|
||||||
|
INSERT INTO t3 VALUES (1,1), (1,2), (1,3), (2,1), (2,2), (3,1), (3,2);
|
||||||
|
CREATE UNIQUE INDEX t3_a_b ON t3 (a, b);
|
||||||
|
--echo # Optimization is applied:
|
||||||
|
SELECT COUNT(DISTINCT a, b) FROM t3;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
SELECT COUNT(DISTINCT b, a) FROM t3;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
SELECT COUNT(DISTINCT b, a) FROM t3 WHERE a < 3;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
--echo # Optimization is applied to one of the functions:
|
||||||
|
SELECT COUNT(DISTINCT b), SUM(DISTINCT a), SUM(DISTINCT a + b) FROM t3 GROUP BY a;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
--echo # Can't apply optimization 'cause GROUP BY argument is not a field:
|
||||||
|
SELECT COUNT(DISTINCT b) FROM t3 GROUP BY a+b;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
--echo # Test merged view
|
||||||
|
CREATE VIEW v1 AS SELECT * FROM t1;
|
||||||
|
--echo # Optimization is applied
|
||||||
|
SELECT COUNT(DISTINCT a, b) FROM v1;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
--echo # GROUP_CONCAT implements non-standard distinct aggregator
|
||||||
|
SELECT GROUP_CONCAT(b) FROM t1;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
SELECT GROUP_CONCAT(DISTINCT b) FROM t1;
|
||||||
|
eval $trace;
|
||||||
|
|
||||||
|
DROP TABLE t1, t2, t3;
|
||||||
|
DROP VIEW v1;
|
||||||
|
SET optimizer_trace = @save_optimizer_trace;
|
||||||
|
--echo #
|
||||||
|
--echo # end of 10.5 tests
|
||||||
|
--echo #
|
@ -1496,6 +1496,12 @@ EXPLAIN SELECT MIN(d) FROM t1 where b=2 and c=3 group by a {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"test_if_skip_sort_order": []
|
"test_if_skip_sort_order": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prepare_sum_aggregators": {
|
||||||
|
"function": "min(t1.d)",
|
||||||
|
"aggregator_type": "simple"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
@ -1693,6 +1699,18 @@ EXPLAIN SELECT id,MIN(a),MAX(a) FROM t1 WHERE a>=20010104e0 GROUP BY id {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"test_if_skip_sort_order": []
|
"test_if_skip_sort_order": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prepare_sum_aggregators": {
|
||||||
|
"function": "min(t1.a)",
|
||||||
|
"aggregator_type": "simple"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"prepare_sum_aggregators": {
|
||||||
|
"function": "max(t1.a)",
|
||||||
|
"aggregator_type": "simple"
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -597,6 +597,17 @@ public:
|
|||||||
|
|
||||||
bool with_sum_func() const { return true; }
|
bool with_sum_func() const { return true; }
|
||||||
virtual void set_partition_row_count(ulonglong count) { DBUG_ASSERT(0); }
|
virtual void set_partition_row_count(ulonglong count) { DBUG_ASSERT(0); }
|
||||||
|
|
||||||
|
/*
|
||||||
|
While most Item_sum descendants employ standard aggregators configured
|
||||||
|
through Item_sum::set_aggregator() call, there are exceptions like
|
||||||
|
Item_func_group_concat, which implements its own custom aggregators for
|
||||||
|
deduplication values.
|
||||||
|
This function distinguishes between the use of standard and custom
|
||||||
|
aggregators by the object
|
||||||
|
*/
|
||||||
|
virtual bool uses_non_standard_aggregator_for_distinct() const
|
||||||
|
{ return false; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -1952,6 +1963,9 @@ protected:
|
|||||||
{ return f->val_str(tmp, key + offset); }
|
{ return f->val_str(tmp, key + offset); }
|
||||||
virtual void cut_max_length(String *result,
|
virtual void cut_max_length(String *result,
|
||||||
uint old_length, uint max_length) const;
|
uint old_length, uint max_length) const;
|
||||||
|
bool uses_non_standard_aggregator_for_distinct() const override
|
||||||
|
{ return distinct; }
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// Methods used by ColumnStore
|
// Methods used by ColumnStore
|
||||||
bool get_distinct() const { return distinct; }
|
bool get_distinct() const { return distinct; }
|
||||||
|
@ -278,7 +278,6 @@ static void update_tmptable_sum_func(Item_sum **func,TABLE *tmp_table);
|
|||||||
static void copy_sum_funcs(Item_sum **func_ptr, Item_sum **end);
|
static void copy_sum_funcs(Item_sum **func_ptr, Item_sum **end);
|
||||||
static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab);
|
static bool add_ref_to_table_cond(THD *thd, JOIN_TAB *join_tab);
|
||||||
static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr);
|
static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr);
|
||||||
static bool prepare_sum_aggregators(Item_sum **func_ptr, bool need_distinct);
|
|
||||||
static bool init_sum_functions(Item_sum **func, Item_sum **end);
|
static bool init_sum_functions(Item_sum **func, Item_sum **end);
|
||||||
static bool update_sum_func(Item_sum **func);
|
static bool update_sum_func(Item_sum **func);
|
||||||
static void select_describe(JOIN *join, bool need_tmp_table,bool need_order,
|
static void select_describe(JOIN *join, bool need_tmp_table,bool need_order,
|
||||||
@ -3656,7 +3655,7 @@ bool JOIN::make_aggr_tables_info()
|
|||||||
{
|
{
|
||||||
if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true))
|
if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true))
|
||||||
DBUG_RETURN(true);
|
DBUG_RETURN(true);
|
||||||
if (prepare_sum_aggregators(sum_funcs,
|
if (prepare_sum_aggregators(thd, sum_funcs,
|
||||||
!join_tab->is_using_agg_loose_index_scan()))
|
!join_tab->is_using_agg_loose_index_scan()))
|
||||||
DBUG_RETURN(true);
|
DBUG_RETURN(true);
|
||||||
group_list= NULL;
|
group_list= NULL;
|
||||||
@ -3766,7 +3765,7 @@ bool JOIN::make_aggr_tables_info()
|
|||||||
}
|
}
|
||||||
if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true))
|
if (make_sum_func_list(*curr_all_fields, *curr_fields_list, true))
|
||||||
DBUG_RETURN(true);
|
DBUG_RETURN(true);
|
||||||
if (prepare_sum_aggregators(sum_funcs,
|
if (prepare_sum_aggregators(thd, sum_funcs,
|
||||||
!join_tab ||
|
!join_tab ||
|
||||||
!join_tab-> is_using_agg_loose_index_scan()))
|
!join_tab-> is_using_agg_loose_index_scan()))
|
||||||
DBUG_RETURN(true);
|
DBUG_RETURN(true);
|
||||||
@ -3947,8 +3946,8 @@ JOIN::create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *table_fields,
|
|||||||
goto err;
|
goto err;
|
||||||
if (make_sum_func_list(all_fields, fields_list, true))
|
if (make_sum_func_list(all_fields, fields_list, true))
|
||||||
goto err;
|
goto err;
|
||||||
if (prepare_sum_aggregators(sum_funcs,
|
if (prepare_sum_aggregators(thd, sum_funcs,
|
||||||
!(tables_list &&
|
!(tables_list &&
|
||||||
join_tab->is_using_agg_loose_index_scan())))
|
join_tab->is_using_agg_loose_index_scan())))
|
||||||
goto err;
|
goto err;
|
||||||
if (setup_sum_funcs(thd, sum_funcs))
|
if (setup_sum_funcs(thd, sum_funcs))
|
||||||
@ -3957,7 +3956,7 @@ JOIN::create_postjoin_aggr_table(JOIN_TAB *tab, List<Item> *table_fields,
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (prepare_sum_aggregators(sum_funcs,
|
if (prepare_sum_aggregators(thd, sum_funcs,
|
||||||
!join_tab->is_using_agg_loose_index_scan()))
|
!join_tab->is_using_agg_loose_index_scan()))
|
||||||
goto err;
|
goto err;
|
||||||
if (setup_sum_funcs(thd, sum_funcs))
|
if (setup_sum_funcs(thd, sum_funcs))
|
||||||
@ -26406,13 +26405,86 @@ static bool setup_sum_funcs(THD *thd, Item_sum **func_ptr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool prepare_sum_aggregators(Item_sum **func_ptr, bool need_distinct)
|
/*
|
||||||
|
@brief
|
||||||
|
Setup aggregate functions.
|
||||||
|
|
||||||
|
@param thd Thread descriptor
|
||||||
|
@param func_ptr Array of pointers to aggregate functions
|
||||||
|
@param need_distinct FALSE means that the table access method already
|
||||||
|
guarantees that arguments of all aggregate functions
|
||||||
|
will be unique. (This is the case for Loose Scan)
|
||||||
|
TRUE - Otherwise.
|
||||||
|
@return
|
||||||
|
false Ok
|
||||||
|
true Error
|
||||||
|
*/
|
||||||
|
|
||||||
|
bool JOIN::prepare_sum_aggregators(THD *thd, Item_sum **func_ptr,
|
||||||
|
bool need_distinct)
|
||||||
{
|
{
|
||||||
Item_sum *func;
|
Item_sum *func;
|
||||||
DBUG_ENTER("prepare_sum_aggregators");
|
DBUG_ENTER("prepare_sum_aggregators");
|
||||||
while ((func= *(func_ptr++)))
|
while ((func= *(func_ptr++)))
|
||||||
{
|
{
|
||||||
if (func->set_aggregator(need_distinct && func->has_with_distinct() ?
|
bool need_distinct_aggregator= need_distinct && func->has_with_distinct();
|
||||||
|
if (need_distinct_aggregator && table_count - const_tables == 1)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
We are doing setup for an aggregate with DISTINCT, like
|
||||||
|
|
||||||
|
SELECT agg_func(DISTINCT col1, col2 ...) FROM ...
|
||||||
|
|
||||||
|
In general case, agg_func will need to use Aggregator_distinct to
|
||||||
|
remove duplicates from its arguments.
|
||||||
|
We won't have to remove duplicates if we know the arguments are already
|
||||||
|
unique. This is true when
|
||||||
|
1. the join operation has only one non-const table (checked above)
|
||||||
|
2. the argument list covers a PRIMARY or a UNIQUE index.
|
||||||
|
|
||||||
|
Example: here the values of t1.pk are unique:
|
||||||
|
|
||||||
|
SELECT agg_func(DISTINCT t1.pk, ...) FROM t1
|
||||||
|
|
||||||
|
and so the whole argument of agg_func is unique.
|
||||||
|
*/
|
||||||
|
List<Item> arg_fields;
|
||||||
|
for (uint i= 0; i < func->argument_count(); i++)
|
||||||
|
{
|
||||||
|
if (func->arguments()[i]->real_item()->type() == Item::FIELD_ITEM)
|
||||||
|
arg_fields.push_back(func->arguments()[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
If the query has a GROUP BY, then it's sufficient that a unique
|
||||||
|
key is covered by a concatenation of {argument_list, group_by_list}.
|
||||||
|
|
||||||
|
Example: Suppose t1 has PRIMARY KEY(pk1, pk2). Then:
|
||||||
|
|
||||||
|
SELECT agg_func(DISTINCT t1.pk1, ...) FROM t1 GROUP BY t1.pk2
|
||||||
|
|
||||||
|
Each GROUP BY group will have t1.pk2 fixed. Then, the values of t1.pk1
|
||||||
|
will be unique, and no de-duplication will be needed.
|
||||||
|
*/
|
||||||
|
for (ORDER *group= group_list; group ; group= group->next)
|
||||||
|
{
|
||||||
|
if ((*group->item)->real_item()->type() == Item::FIELD_ITEM)
|
||||||
|
arg_fields.push_back(*group->item);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (list_contains_unique_index(join_tab[const_tables].table,
|
||||||
|
find_field_in_item_list,
|
||||||
|
(void *) &arg_fields))
|
||||||
|
need_distinct_aggregator= false;
|
||||||
|
}
|
||||||
|
Json_writer_object trace_wrapper(thd);
|
||||||
|
Json_writer_object trace_aggr(thd, "prepare_sum_aggregators");
|
||||||
|
trace_aggr.add("function", func);
|
||||||
|
trace_aggr.add("aggregator_type",
|
||||||
|
(need_distinct_aggregator ||
|
||||||
|
func->uses_non_standard_aggregator_for_distinct()) ?
|
||||||
|
"distinct" : "simple");
|
||||||
|
if (func->set_aggregator(need_distinct_aggregator ?
|
||||||
Aggregator::DISTINCT_AGGREGATOR :
|
Aggregator::DISTINCT_AGGREGATOR :
|
||||||
Aggregator::SIMPLE_AGGREGATOR))
|
Aggregator::SIMPLE_AGGREGATOR))
|
||||||
DBUG_RETURN(TRUE);
|
DBUG_RETURN(TRUE);
|
||||||
|
@ -1905,6 +1905,8 @@ private:
|
|||||||
bool add_fields_for_current_rowid(JOIN_TAB *cur, List<Item> *fields);
|
bool add_fields_for_current_rowid(JOIN_TAB *cur, List<Item> *fields);
|
||||||
void free_pushdown_handlers(List<TABLE_LIST>& join_list);
|
void free_pushdown_handlers(List<TABLE_LIST>& join_list);
|
||||||
void init_join_cache_and_keyread();
|
void init_join_cache_and_keyread();
|
||||||
|
bool prepare_sum_aggregators(THD *thd,Item_sum **func_ptr,
|
||||||
|
bool need_distinct);
|
||||||
};
|
};
|
||||||
|
|
||||||
enum enum_with_bush_roots { WITH_BUSH_ROOTS, WITHOUT_BUSH_ROOTS};
|
enum enum_with_bush_roots { WITH_BUSH_ROOTS, WITHOUT_BUSH_ROOTS};
|
||||||
|
Reference in New Issue
Block a user