1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-08-01 06:46:55 +03:00

MCOL-4234: improve GROUP BY and ORDER BY interaction (#3194)

This patch fixes the problem in MCOL-4234 and also generally improves
behavior of GROUP BY.

It does so by introducing a "dummy" aggregate and by wrapping columns
into it. This allows for columns that are not in GROUP BY to be used
more freely, for example, in SELECT * FROM tbl GROUP BY col - all
columns that are not "col" will be wrapped into an aggregate and query
will proceed to execution.

The dummy aggregate itself does nothing more than remember last value
passed into it.

There also an additional error message that tries to explain what types
of expressions can be wrapped into an aggregate.
This commit is contained in:
Sergey Zefirov
2024-06-17 20:00:54 +03:00
committed by GitHub
parent b1045d27b6
commit 1122b64cb1
16 changed files with 312 additions and 25 deletions

View File

@ -80,7 +80,8 @@ SET columnstore_select_handler=ON;
SELECT SUM(col1) AS col2 FROM t1 GROUP BY col2;
ERROR 42000: The storage engine for the table doesn't support MCS-2016: Non supported item 'col2' on the GROUP BY list.
SELECT col1 c FROM t1 ORDER BY AVG(col1);
ERROR HY000: Internal error: MCS-2021: 'c' is not in GROUP BY clause. All non-aggregate columns in the SELECT and ORDER BY clause must be included in the GROUP BY clause.
c
10
SET columnstore_select_handler=AUTO;
SELECT SUM(col1) AS col2 FROM t1 GROUP BY col2;
col2

View File

@ -56,7 +56,9 @@ NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
SELECT * FROM t1 GROUP BY t1_tinyint;
ERROR HY000: Internal error: MCS-2021: '`mcs35_db1`.`t1`.`t1_int`' is not in GROUP BY clause. All non-aggregate columns in the SELECT and ORDER BY clause must be included in the GROUP BY clause.
t1_tinyint t1_int t1_bigint t1_double t1_float t1_blob t1_text t1_char t1_varchar t1_datetime
0 NULL 403685477580676 54.797693231 8.40287 222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222222 dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd d hello world! 4387-11-08 11:22:30
NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL
SELECT COUNT(*) FROM t1 GROUP BY t1_tinyint;
COUNT(*)
11

View File

@ -69,7 +69,11 @@ spID userid MIN(t1.score)
3 3 3
SELECT t1.spID, t2.userid, MIN(t1.score)
FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY t2.userid, t1.spID ORDER BY t1.date;
ERROR HY000: Internal error: MCS-2021: 'mcs36_db1.t1.date' is not in GROUP BY clause. All non-aggregate columns in the SELECT and ORDER BY clause must be included in the GROUP BY clause.
spID userid MIN(t1.score)
1 1 1
2 1 1
2 2 2
3 3 3
SELECT t2.userid, MIN(t1.score)
FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY unknown ORDER BY t2.userid;
ERROR 42S22: Unknown column 'unknown' in 'group statement'
@ -78,5 +82,20 @@ FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY t2.userid ORDER BY unknown;
ERROR 42S22: Unknown column 'unknown' in 'order clause'
SELECT t2.userid, MIN(t1.score)
FROM t1, t2 WHERE t1.userID=t2.userID GROUP BY t2.userid ORDER BY NULL;
ERROR HY000: Internal error: MCS-2021: 'unknown db.unknown table.unknown field' is not in GROUP BY clause. All non-aggregate columns in the SELECT and ORDER BY clause must be included in the GROUP BY clause.
userid MIN(t1.score)
1 1
2 2
3 3
SELECT * FROM t1 GROUP BY spID, userID ORDER BY score ASC, spID, userID;
spID userID score lsg date
1 1 1 0000-00-00
2 1 1 0000-00-00
2 2 2 0000-00-00
3 3 3 0000-00-00
SELECT * FROM t1 GROUP BY spID, userID ORDER BY score DESC, spID, userID;
spID userID score lsg date
3 3 3 0000-00-00
2 2 2 0000-00-00
1 1 1 0000-00-00
2 1 1 0000-00-00
DROP DATABASE IF EXISTS mcs36_db1;

View File

@ -23,7 +23,9 @@ col1 col2
3 sss
4 ooo
SELECT col1, col2, SUM(LENGTH(col2)) FROM t1 GROUP BY col1 HAVING col1 > 1 AND col2 LIKE '%o%' ORDER BY col1;
ERROR HY000: Internal error: MCS-2021: '`mcs76_db`.`t1`.`col2`' is not in GROUP BY clause. All non-aggregate columns in the SELECT and ORDER BY clause must be included in the GROUP BY clause.
col1 col2 SUM(LENGTH(col2))
2 oooooooooooooooooooo 40
4 ooo 6
CREATE TABLE t2(col1 INT, col2 DATETIME)ENGINE=Columnstore;
INSERT INTO t2 VALUES(1, '2020-2-2'),(2, '2020-3-3'),(5,'2020-6-6'),(6, '2020-7-7');
SELECT t1.col1, SUM(t1.col1*t2.col1) AS a FROM t1 JOIN t2 ON t1.col1 = t2.col1 GROUP BY t1.col1 HAVING a>1 ORDER BY t1.col1;