1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

fix(group by, having): MCOL-5776: GROUP BY/HAVING closer to server's (#3371)

This patch introduces an internal aggregate operator SELECT_SOME that
is automatically added to columns that are not in GROUP BY. It
"computes" some plausible value of the column (actually, last one
passed).

Along the way it fixes incorrect handling of HAVING being transferred
into WHERE, window function handling and a bit of other inconsistencies.
This commit is contained in:
Sergey Zefirov
2024-12-20 22:11:47 +03:00
committed by GitHub
parent 073f0b7f41
commit 60dc7550f1
20 changed files with 601 additions and 131 deletions

View File

@ -26,6 +26,9 @@ SELECT col1, col2, SUM(LENGTH(col2)) FROM t1 GROUP BY col1 HAVING col1 > 1 AND c
col1 col2 SUM(LENGTH(col2))
2 oooooooooooooooooooo 40
4 ooo 6
SELECT 'some output';
some output
some output
CREATE TABLE t2(col1 INT, col2 DATETIME)ENGINE=Columnstore;
INSERT INTO t2 VALUES(1, '2020-2-2'),(2, '2020-3-3'),(5,'2020-6-6'),(6, '2020-7-7');
SELECT t1.col1, SUM(t1.col1*t2.col1) AS a FROM t1 JOIN t2 ON t1.col1 = t2.col1 GROUP BY t1.col1 HAVING a>1 ORDER BY t1.col1;

View File

@ -21,6 +21,7 @@ SELECT col1, col2 FROM t1 GROUP BY col1, col2 HAVING col1 > 1 OR col2 LIKE '%o%'
--sorted_result
SELECT col1, col2, SUM(LENGTH(col2)) FROM t1 GROUP BY col1 HAVING col1 > 1 AND col2 LIKE '%o%' ORDER BY col1;
SELECT 'some output';
CREATE TABLE t2(col1 INT, col2 DATETIME)ENGINE=Columnstore;
INSERT INTO t2 VALUES(1, '2020-2-2'),(2, '2020-3-3'),(5,'2020-6-6'),(6, '2020-7-7');
SELECT t1.col1, SUM(t1.col1*t2.col1) AS a FROM t1 JOIN t2 ON t1.col1 = t2.col1 GROUP BY t1.col1 HAVING a>1 ORDER BY t1.col1;

View File

@ -0,0 +1,214 @@
DROP DATABASE IF EXISTS MCOL5776;
CREATE DATABASE MCOL5776;
USE MCOL5776;
CREATE TABLE t(x INTEGER, y INTEGER) ENGINE=Columnstore;
INSERT INTO t(x,y) VALUES (1,2), (2,3), (3,3);
SELECT COUNT(y) OVER (PARTITION BY y) FROM t GROUP BY x;
COUNT(y) OVER (PARTITION BY y)
1
2
2
SELECT COUNT(y) OVER (PARTITION BY LEFT(y, 10)) FROM t GROUP BY x;
COUNT(y) OVER (PARTITION BY LEFT(y, 10))
1
2
2
DROP TABLE t;
CREATE TABLE t(ci1 integer, ci2 integer) engine=Columnstore;
INSERT INTO t(ci1, ci2) VALUES (NULL, 1), (NULL, 2), (1,3), (1,4), (2,5), (2,6), (3,7), (3,8);
SELECT ci1+ci2, ci1+ci2, SUM(ci2), AVG(ci2) FROM t GROUP BY ci1+ci2, ci1+ci2;
ci1+ci2 ci1+ci2 SUM(ci2) AVG(ci2)
10 10 7 7.0000
11 11 8 8.0000
4 4 3 3.0000
5 5 4 4.0000
7 7 5 5.0000
8 8 6 6.0000
NULL NULL 3 1.5000
SELECT CONCAT(ci1,ci2), CONCAT(ci1,ci2), SUM(ci2), AVG(ci2) FROM t GROUP BY ci1;
CONCAT(ci1,ci2) CONCAT(ci1,ci2) SUM(ci2) AVG(ci2)
14 14 7 3.5000
26 26 11 5.5000
38 38 15 7.5000
NULL NULL 3 1.5000
SELECT sum(ci1), abs(ci1) FROM t GROUP BY abs(ci1), abs(ci1);
sum(ci1) abs(ci1)
2 1
4 2
6 3
NULL NULL
SELECT sum(ci1), abs(ci1) FROM t GROUP BY abs(ci1);
sum(ci1) abs(ci1)
2 1
4 2
6 3
NULL NULL
DROP TABLE t;
CREATE TABLE t1(col1 INT, col2 TEXT)ENGINE=Columnstore;
INSERT INTO t1 VALUES(1, repeat('s',20)),(2, repeat('o',20)),(3, 'sss'),(4, 'ooo');
INSERT INTO t1 SELECT * FROM t1;
SELECT * FROM t1;
col1 col2
1 ssssssssssssssssssss
2 oooooooooooooooooooo
3 sss
4 ooo
1 ssssssssssssssssssss
2 oooooooooooooooooooo
3 sss
4 ooo
SELECT col2 FROM t1 GROUP BY col2 HAVING col2 LIKE '%o%' ORDER BY col2;
col2
ooo
oooooooooooooooooooo
SELECT col1 FROM t1 GROUP BY col1 HAVING col1 > 1 ORDER BY col1;
col1
2
3
4
SELECT col1, col2 FROM t1 GROUP BY col2 HAVING col1 > 1 AND col2 LIKE '%o%' ORDER BY col1;
col1 col2
2 oooooooooooooooooooo
4 ooo
SELECT col1, col2 FROM t1 GROUP BY col2 HAVING col2 LIKE '%o%' AND col1 > 1 ORDER BY col1;
col1 col2
2 oooooooooooooooooooo
4 ooo
SELECT col2, col1 FROM t1 GROUP BY col2 HAVING col1 > 1 AND col2 LIKE '%o%' ORDER BY col1;
col2 col1
oooooooooooooooooooo 2
ooo 4
SELECT col2, col1 FROM t1 GROUP BY col2 HAVING col2 LIKE '%o%' AND col1 > 1 ORDER BY col1;
col2 col1
oooooooooooooooooooo 2
ooo 4
SELECT col1, col2 FROM t1 GROUP BY col2 HAVING col2 LIKE '%o%' AND col1 > 1 ORDER BY col1;
col1 col2
2 oooooooooooooooooooo
4 ooo
SELECT col1, col2 FROM t1 GROUP BY col1 HAVING col1 > 1 AND col2 LIKE '%o%' ORDER BY col1;
col1 col2
2 oooooooooooooooooooo
4 ooo
SELECT col1, col2 FROM t1 GROUP BY col1 HAVING col2 LIKE '%o%' AND col1 > 1 ORDER BY col1;
col1 col2
2 oooooooooooooooooooo
4 ooo
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING col1 > 1 AND col2 LIKE '%o%' ORDER BY col1;
col2 col1
oooooooooooooooooooo 2
ooo 4
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING col2 LIKE '%o%' AND col1 > 1 ORDER BY col1;
col2 col1
oooooooooooooooooooo 2
ooo 4
SELECT col1, col2 FROM t1 GROUP BY col1 HAVING col2 LIKE '%o%' AND col1 > 1 ORDER BY col1;
col1 col2
2 oooooooooooooooooooo
4 ooo
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING col1 > 1 AND col2 = 'ooo' ORDER BY col1;
col2 col1
ooo 4
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING col2 = 'ooo' AND col1 > 1 ORDER BY col1;
col2 col1
ooo 4
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING col1 > 1 AND col2 >= 'ooo' ORDER BY col1;
col2 col1
oooooooooooooooooooo 2
sss 3
ooo 4
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING col2 >= 'ooo' AND col1 > 1 ORDER BY col1;
col2 col1
oooooooooooooooooooo 2
sss 3
ooo 4
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING col1 > 1 AND col2 < 'ooo' ORDER BY col1;
col2 col1
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING col2 < 'ooo' AND col1 > 1 ORDER BY col1;
col2 col1
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING col1 > 1 AND 'ooo' < col2 ORDER BY col1;
col2 col1
oooooooooooooooooooo 2
sss 3
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING 'ooo' < col2 AND col1 > 1 ORDER BY col1;
col2 col1
oooooooooooooooooooo 2
sss 3
SELECT col1, col2, SUM(LENGTH(col2)) FROM t1 GROUP BY col1 HAVING SUM(LENGTH(col2)) > 10 ORDER BY col1;
col1 col2 SUM(LENGTH(col2))
1 ssssssssssssssssssss 40
2 oooooooooooooooooooo 40
SELECT col1, col2, SUM(LENGTH(col2)) a FROM t1 GROUP BY col1 HAVING a > 1 AND col2 LIKE '%o%' ORDER BY col1;
col1 col2 a
2 oooooooooooooooooooo 40
4 ooo 6
DROP TABLE t1;
CREATE TABLE empsalary (
depname VARCHAR(100),
empno BIGINT,
salary INT,
enroll_date DATE
) ENGINE=Columnstore;
INSERT INTO empsalary VALUES ('develop' , 10, 5200, '2007-08-01');
INSERT INTO empsalary VALUES ('sales' , 1, 5000, '2006-10-01');
INSERT INTO empsalary VALUES ('personnel', 5, 3500, '2007-12-10');
INSERT INTO empsalary VALUES ('sales' , 4, 4800, '2007-08-08');
INSERT INTO empsalary VALUES ('personnel', 2, 3900, '2006-12-23');
INSERT INTO empsalary VALUES ('develop' , 7, 4200, '2008-01-01');
INSERT INTO empsalary VALUES ('develop' , 9, 4500, '2008-01-01');
INSERT INTO empsalary VALUES ('sales' , 3, 4800, '2007-08-01');
INSERT INTO empsalary VALUES ('develop' , 8, 6000, '2006-10-01');
INSERT INTO empsalary VALUES ('develop' , 11, 5200, '2007-08-15');
INSERT INTO empsalary VALUES ('develop' , 12, null, '2008-08-09');
SELECT depname, empno, MODA(salary) OVER(PARTITION BY depname ORDER BY enroll_date) FROM empsalary ORDER BY depname, empno, enroll_date;
depname empno MODA(salary) OVER(PARTITION BY depname ORDER BY enroll_date)
develop 7 5200
develop 8 6000
develop 9 5200
develop 10 5200
develop 11 5200
develop 12 5200
personnel 2 3900
personnel 5 3500
sales 1 5000
sales 3 4800
sales 4 4800
SELECT AVG(salary),depname, MODA(salary) OVER(PARTITION BY depname ORDER BY enroll_date) FROM empsalary GROUP BY depname ORDER BY depname, AVG(salary);
AVG(salary) depname MODA(salary) OVER(PARTITION BY depname ORDER BY enroll_date)
5020.0000 develop 0
3700.0000 personnel 3900
4866.6667 sales 4800
DROP TABLE empsalary;
CREATE TABLE orders(o_custkey INT) ENGINE=Columnstore;
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) < 20 ORDER BY 1;
o_custkey COUNT(*)
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) <= 20 ORDER BY 1;
o_custkey COUNT(*)
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) > 20 ORDER BY 1;
o_custkey COUNT(*)
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) >= 20 ORDER BY 1;
o_custkey COUNT(*)
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) = 20 ORDER BY 1;
o_custkey COUNT(*)
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) <> 20 ORDER BY 1;
o_custkey COUNT(*)
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) IN (15, 20) ORDER BY 1;
o_custkey COUNT(*)
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) NOT IN (15, 20) ORDER BY 1;
o_custkey COUNT(*)
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) BETWEEN 15 AND 20 ORDER BY 1;
o_custkey COUNT(*)
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) NOT BETWEEN 15 and 20 ORDER BY 1;
o_custkey COUNT(*)
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) IS NULL ORDER BY 1;
o_custkey COUNT(*)
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) IS NOT NULL ORDER BY 1;
o_custkey COUNT(*)
DROP TABLE orders;
CREATE TABLE t(k INT) ENGINE = Columnstore;
INSERT INTO t(k) VALUES (1), (2), (2), (3), (3), (4), (4),(4),(4),(4),(995), (NULL);
SELECT k + k a FROM t GROUP BY a HAVING a >= 8;
a
1990
8
DROP DATABASE MCOL5776;

View File

@ -0,0 +1,99 @@
--disable_warnings
DROP DATABASE IF EXISTS MCOL5776;
--enable_warnings
CREATE DATABASE MCOL5776;
USE MCOL5776;
CREATE TABLE t(x INTEGER, y INTEGER) ENGINE=Columnstore;
INSERT INTO t(x,y) VALUES (1,2), (2,3), (3,3);
SELECT COUNT(y) OVER (PARTITION BY y) FROM t GROUP BY x;
SELECT COUNT(y) OVER (PARTITION BY LEFT(y, 10)) FROM t GROUP BY x;
DROP TABLE t;
CREATE TABLE t(ci1 integer, ci2 integer) engine=Columnstore;
INSERT INTO t(ci1, ci2) VALUES (NULL, 1), (NULL, 2), (1,3), (1,4), (2,5), (2,6), (3,7), (3,8);
--sorted_result
SELECT ci1+ci2, ci1+ci2, SUM(ci2), AVG(ci2) FROM t GROUP BY ci1+ci2, ci1+ci2;
--sorted_result
SELECT CONCAT(ci1,ci2), CONCAT(ci1,ci2), SUM(ci2), AVG(ci2) FROM t GROUP BY ci1;
--sorted_result
SELECT sum(ci1), abs(ci1) FROM t GROUP BY abs(ci1), abs(ci1);
--sorted_result
SELECT sum(ci1), abs(ci1) FROM t GROUP BY abs(ci1);
DROP TABLE t;
CREATE TABLE t1(col1 INT, col2 TEXT)ENGINE=Columnstore;
INSERT INTO t1 VALUES(1, repeat('s',20)),(2, repeat('o',20)),(3, 'sss'),(4, 'ooo');
INSERT INTO t1 SELECT * FROM t1;
SELECT * FROM t1;
SELECT col2 FROM t1 GROUP BY col2 HAVING col2 LIKE '%o%' ORDER BY col2;
SELECT col1 FROM t1 GROUP BY col1 HAVING col1 > 1 ORDER BY col1;
SELECT col1, col2 FROM t1 GROUP BY col2 HAVING col1 > 1 AND col2 LIKE '%o%' ORDER BY col1;
SELECT col1, col2 FROM t1 GROUP BY col2 HAVING col2 LIKE '%o%' AND col1 > 1 ORDER BY col1;
SELECT col2, col1 FROM t1 GROUP BY col2 HAVING col1 > 1 AND col2 LIKE '%o%' ORDER BY col1;
SELECT col2, col1 FROM t1 GROUP BY col2 HAVING col2 LIKE '%o%' AND col1 > 1 ORDER BY col1;
SELECT col1, col2 FROM t1 GROUP BY col2 HAVING col2 LIKE '%o%' AND col1 > 1 ORDER BY col1;
SELECT col1, col2 FROM t1 GROUP BY col1 HAVING col1 > 1 AND col2 LIKE '%o%' ORDER BY col1;
SELECT col1, col2 FROM t1 GROUP BY col1 HAVING col2 LIKE '%o%' AND col1 > 1 ORDER BY col1;
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING col1 > 1 AND col2 LIKE '%o%' ORDER BY col1;
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING col2 LIKE '%o%' AND col1 > 1 ORDER BY col1;
SELECT col1, col2 FROM t1 GROUP BY col1 HAVING col2 LIKE '%o%' AND col1 > 1 ORDER BY col1;
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING col1 > 1 AND col2 = 'ooo' ORDER BY col1;
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING col2 = 'ooo' AND col1 > 1 ORDER BY col1;
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING col1 > 1 AND col2 >= 'ooo' ORDER BY col1;
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING col2 >= 'ooo' AND col1 > 1 ORDER BY col1;
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING col1 > 1 AND col2 < 'ooo' ORDER BY col1;
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING col2 < 'ooo' AND col1 > 1 ORDER BY col1;
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING col1 > 1 AND 'ooo' < col2 ORDER BY col1;
SELECT col2, col1 FROM t1 GROUP BY col1 HAVING 'ooo' < col2 AND col1 > 1 ORDER BY col1;
SELECT col1, col2, SUM(LENGTH(col2)) FROM t1 GROUP BY col1 HAVING SUM(LENGTH(col2)) > 10 ORDER BY col1;
SELECT col1, col2, SUM(LENGTH(col2)) a FROM t1 GROUP BY col1 HAVING a > 1 AND col2 LIKE '%o%' ORDER BY col1;
DROP TABLE t1;
CREATE TABLE empsalary (
depname VARCHAR(100),
empno BIGINT,
salary INT,
enroll_date DATE
) ENGINE=Columnstore;
INSERT INTO empsalary VALUES ('develop' , 10, 5200, '2007-08-01');
INSERT INTO empsalary VALUES ('sales' , 1, 5000, '2006-10-01');
INSERT INTO empsalary VALUES ('personnel', 5, 3500, '2007-12-10');
INSERT INTO empsalary VALUES ('sales' , 4, 4800, '2007-08-08');
INSERT INTO empsalary VALUES ('personnel', 2, 3900, '2006-12-23');
INSERT INTO empsalary VALUES ('develop' , 7, 4200, '2008-01-01');
INSERT INTO empsalary VALUES ('develop' , 9, 4500, '2008-01-01');
INSERT INTO empsalary VALUES ('sales' , 3, 4800, '2007-08-01');
INSERT INTO empsalary VALUES ('develop' , 8, 6000, '2006-10-01');
INSERT INTO empsalary VALUES ('develop' , 11, 5200, '2007-08-15');
INSERT INTO empsalary VALUES ('develop' , 12, null, '2008-08-09');
SELECT depname, empno, MODA(salary) OVER(PARTITION BY depname ORDER BY enroll_date) FROM empsalary ORDER BY depname, empno, enroll_date;
SELECT AVG(salary),depname, MODA(salary) OVER(PARTITION BY depname ORDER BY enroll_date) FROM empsalary GROUP BY depname ORDER BY depname, AVG(salary);
DROP TABLE empsalary;
CREATE TABLE orders(o_custkey INT) ENGINE=Columnstore;
# These checks for absence of exceptions and SIGSEGV's
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) < 20 ORDER BY 1;
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) <= 20 ORDER BY 1;
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) > 20 ORDER BY 1;
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) >= 20 ORDER BY 1;
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) = 20 ORDER BY 1;
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) <> 20 ORDER BY 1;
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) IN (15, 20) ORDER BY 1;
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) NOT IN (15, 20) ORDER BY 1;
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) BETWEEN 15 AND 20 ORDER BY 1;
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) NOT BETWEEN 15 and 20 ORDER BY 1;
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) IS NULL ORDER BY 1;
SELECT o_custkey, COUNT(*) FROM orders WHERE o_custkey < 100 GROUP BY o_custkey HAVING COUNT(*) IS NOT NULL ORDER BY 1;
DROP TABLE orders;
CREATE TABLE t(k INT) ENGINE = Columnstore;
INSERT INTO t(k) VALUES (1), (2), (2), (3), (3), (4), (4),(4),(4),(4),(995), (NULL);
--sorted_result
SELECT k + k a FROM t GROUP BY a HAVING a >= 8;
DROP DATABASE MCOL5776;

View File

@ -14,14 +14,6 @@ sales 3 4800
sales 4 4800
select avg(salary),depname, moda(salary) over(partition by depname order by enroll_date) from empsalary group by depname order by depname, avg(salary);
avg(salary) depname moda(salary) over(partition by depname order by enroll_date)
NULL develop 5200
4200.0000 develop 5200
4500.0000 develop 5200
5200.0000 develop 5200
5200.0000 develop 5200
6000.0000 develop 6000
3500.0000 personnel 3500
3900.0000 personnel 3900
4800.0000 sales 4800
4800.0000 sales 4800
5000.0000 sales 5000
5020.0000 develop 0
3700.0000 personnel 3900
4866.6667 sales 4800