mirror of
https://github.com/MariaDB/server.git
synced 2025-07-30 16:24:05 +03:00
MDEV-30660 Aggregation functions fail to leverage uniqueness property
When executing a statement of the form SELECT AGGR_FN(DISTINCT c1, c2,..,cn) FROM t1, where AGGR_FN is an aggregate function such as COUNT(), AVG() or SUM(), and a unique index exists on table t1 covering some or all of the columns (c1, c2,..,cn), the retrieved values are inherently unique. Consequently, the need for de-duplication imposed by the DISTINCT clause can be eliminated, leading to optimization of aggregation operations. This optimization applies under the following conditions: - only one table involved in the join (not counting const tables) - some arguments of the aggregate function are fields (not functions/subqueries) This optimization extends to queries of the form SELECT AGGR_FN(c1, c2,..,cn) GROUP BY cx,..cy when a unique index covers some or all of the columns (c1, c2,..cn, cx,..cy)
This commit is contained in:
322
mysql-test/main/distinct_notembedded.result
Normal file
322
mysql-test/main/distinct_notembedded.result
Normal file
@ -0,0 +1,322 @@
|
||||
#
|
||||
# MDEV-30660 COUNT DISTINCT seems unnecessarily slow when run on a PK
|
||||
#
|
||||
set @save_optimizer_trace = @@optimizer_trace;
|
||||
SET optimizer_trace='enabled=on';
|
||||
CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY, b INT NOT NULL);
|
||||
INSERT INTO t1 VALUES (1,1), (2,1), (3,1);
|
||||
# Optimization is applied (aggregator=simple):
|
||||
SELECT COUNT(DISTINCT a) FROM t1;
|
||||
COUNT(DISTINCT a)
|
||||
3
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "count(distinct t1.a)",
|
||||
"aggregator_type": "simple"
|
||||
}
|
||||
]
|
||||
SELECT AVG(DISTINCT a), SUM(DISTINCT b) FROM t1;
|
||||
AVG(DISTINCT a) SUM(DISTINCT b)
|
||||
2.0000 1
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "avg(distinct t1.a)",
|
||||
"aggregator_type": "simple"
|
||||
},
|
||||
{
|
||||
"function": "sum(distinct t1.b)",
|
||||
"aggregator_type": "distinct"
|
||||
}
|
||||
]
|
||||
# Only `a` is unique but it's enough to eliminate DISTINCT:
|
||||
SELECT COUNT(DISTINCT b, a) FROM t1;
|
||||
COUNT(DISTINCT b, a)
|
||||
3
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "count(distinct t1.b,t1.a)",
|
||||
"aggregator_type": "simple"
|
||||
}
|
||||
]
|
||||
SELECT COUNT(DISTINCT a, a + b) FROM t1;
|
||||
COUNT(DISTINCT a, a + b)
|
||||
3
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "count(distinct t1.a,t1.a + t1.b)",
|
||||
"aggregator_type": "simple"
|
||||
}
|
||||
]
|
||||
SELECT SUM(DISTINCT a), AVG(DISTINCT a), COUNT(DISTINCT a) FROM t1 WHERE a > 1;
|
||||
SUM(DISTINCT a) AVG(DISTINCT a) COUNT(DISTINCT a)
|
||||
5 2.5000 2
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "sum(distinct t1.a)",
|
||||
"aggregator_type": "simple"
|
||||
},
|
||||
{
|
||||
"function": "avg(distinct t1.a)",
|
||||
"aggregator_type": "simple"
|
||||
},
|
||||
{
|
||||
"function": "count(distinct t1.a)",
|
||||
"aggregator_type": "simple"
|
||||
}
|
||||
]
|
||||
# Optimization is not applied 'cause function argument is not a field
|
||||
# (aggregator=distinct):
|
||||
SELECT SUM(DISTINCT a + b) FROM t1;
|
||||
SUM(DISTINCT a + b)
|
||||
9
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "sum(distinct t1.a + t1.b)",
|
||||
"aggregator_type": "distinct"
|
||||
}
|
||||
]
|
||||
SELECT COUNT(DISTINCT b) FROM t1;
|
||||
COUNT(DISTINCT b)
|
||||
1
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "count(distinct t1.b)",
|
||||
"aggregator_type": "distinct"
|
||||
}
|
||||
]
|
||||
SELECT AVG(DISTINCT b / a) FROM t1;
|
||||
AVG(DISTINCT b / a)
|
||||
0.61110000
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "avg(distinct t1.b / t1.a)",
|
||||
"aggregator_type": "distinct"
|
||||
}
|
||||
]
|
||||
EXPLAIN SELECT COUNT(DISTINCT (SELECT a)) FROM t1;
|
||||
id select_type table type possible_keys key key_len ref rows Extra
|
||||
1 PRIMARY t1 index NULL PRIMARY 4 NULL 3 Using index
|
||||
2 DEPENDENT SUBQUERY NULL NULL NULL NULL NULL NULL NULL No tables used
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "count(distinct (/* select#2 */ select t1.a))",
|
||||
"aggregator_type": "distinct"
|
||||
}
|
||||
]
|
||||
CREATE TABLE t2 (a INT);
|
||||
INSERT INTO t2 VALUES (1), (2);
|
||||
# Optimization is not applied 'cause there is more than one table
|
||||
SELECT COUNT(DISTINCT t1.a) FROM t1, t2;
|
||||
COUNT(DISTINCT t1.a)
|
||||
3
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "count(distinct t1.a)",
|
||||
"aggregator_type": "distinct"
|
||||
}
|
||||
]
|
||||
SELECT AVG(DISTINCT t1.a) FROM t1, t2;
|
||||
AVG(DISTINCT t1.a)
|
||||
2.0000
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "avg(distinct t1.a)",
|
||||
"aggregator_type": "distinct"
|
||||
}
|
||||
]
|
||||
# Const tables, optimization is applied
|
||||
SELECT COUNT(DISTINCT a) FROM t1, (SELECT 1) AS t2;
|
||||
COUNT(DISTINCT a)
|
||||
3
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "count(distinct t1.a)",
|
||||
"aggregator_type": "simple"
|
||||
}
|
||||
]
|
||||
SELECT AVG(DISTINCT t1.a) FROM (SELECT 1 AS a) AS t2, t1, (SELECT 2 AS a) AS t3;
|
||||
AVG(DISTINCT t1.a)
|
||||
2.0000
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "avg(distinct t1.a)",
|
||||
"aggregator_type": "simple"
|
||||
}
|
||||
]
|
||||
SELECT COUNT(DISTINCT a) FROM t1, (SELECT 1 UNION SELECT 2) AS t2;
|
||||
COUNT(DISTINCT a)
|
||||
3
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "count(distinct t1.a)",
|
||||
"aggregator_type": "distinct"
|
||||
}
|
||||
]
|
||||
# Unique index on two columns
|
||||
CREATE TABLE t3 (a INT NOT NULL, b INT NOT NULL);
|
||||
INSERT INTO t3 VALUES (1,1), (1,2), (1,3), (2,1), (2,2), (3,1), (3,2);
|
||||
CREATE UNIQUE INDEX t3_a_b ON t3 (a, b);
|
||||
# Optimization is applied:
|
||||
SELECT COUNT(DISTINCT a, b) FROM t3;
|
||||
COUNT(DISTINCT a, b)
|
||||
7
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "count(distinct t3.a,t3.b)",
|
||||
"aggregator_type": "simple"
|
||||
}
|
||||
]
|
||||
SELECT COUNT(DISTINCT b, a) FROM t3;
|
||||
COUNT(DISTINCT b, a)
|
||||
7
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "count(distinct t3.b,t3.a)",
|
||||
"aggregator_type": "simple"
|
||||
}
|
||||
]
|
||||
SELECT COUNT(DISTINCT b, a) FROM t3 WHERE a < 3;
|
||||
COUNT(DISTINCT b, a)
|
||||
5
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "count(distinct t3.b,t3.a)",
|
||||
"aggregator_type": "simple"
|
||||
}
|
||||
]
|
||||
# Optimization is applied to one of the functions:
|
||||
SELECT COUNT(DISTINCT b), SUM(DISTINCT a), SUM(DISTINCT a + b) FROM t3 GROUP BY a;
|
||||
COUNT(DISTINCT b) SUM(DISTINCT a) SUM(DISTINCT a + b)
|
||||
3 1 9
|
||||
2 2 7
|
||||
2 3 9
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "count(distinct t3.b)",
|
||||
"aggregator_type": "simple"
|
||||
},
|
||||
{
|
||||
"function": "sum(distinct t3.a)",
|
||||
"aggregator_type": "distinct"
|
||||
},
|
||||
{
|
||||
"function": "sum(distinct t3.a + t3.b)",
|
||||
"aggregator_type": "distinct"
|
||||
}
|
||||
]
|
||||
# Can't apply optimization 'cause GROUP BY argument is not a field:
|
||||
SELECT COUNT(DISTINCT b) FROM t3 GROUP BY a+b;
|
||||
COUNT(DISTINCT b)
|
||||
1
|
||||
2
|
||||
3
|
||||
1
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "count(distinct t3.b)",
|
||||
"aggregator_type": "distinct"
|
||||
}
|
||||
]
|
||||
# Test merged view
|
||||
CREATE VIEW v1 AS SELECT * FROM t1;
|
||||
# Optimization is applied
|
||||
SELECT COUNT(DISTINCT a, b) FROM v1;
|
||||
COUNT(DISTINCT a, b)
|
||||
3
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "count(distinct t1.a,t1.b)",
|
||||
"aggregator_type": "simple"
|
||||
}
|
||||
]
|
||||
# GROUP_CONCAT implements non-standard distinct aggregator
|
||||
SELECT GROUP_CONCAT(b) FROM t1;
|
||||
GROUP_CONCAT(b)
|
||||
1,1,1
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "group_concat(t1.b separator ',')",
|
||||
"aggregator_type": "simple"
|
||||
}
|
||||
]
|
||||
SELECT GROUP_CONCAT(DISTINCT b) FROM t1;
|
||||
GROUP_CONCAT(DISTINCT b)
|
||||
1
|
||||
SELECT JSON_DETAILED(JSON_EXTRACT(trace, '$**.prepare_sum_aggregators')) AS JS
|
||||
FROM INFORMATION_SCHEMA.OPTIMIZER_TRACE;
|
||||
JS
|
||||
[
|
||||
{
|
||||
"function": "group_concat(distinct t1.b separator ',')",
|
||||
"aggregator_type": "distinct"
|
||||
}
|
||||
]
|
||||
DROP TABLE t1, t2, t3;
|
||||
DROP VIEW v1;
|
||||
SET optimizer_trace = @save_optimizer_trace;
|
||||
#
|
||||
# end of 10.5 tests
|
||||
#
|
Reference in New Issue
Block a user