1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

feat(extent-elimination)!: re-enable extent-elimination for dictionary columns scanning

This is "productization" of an old code that would enable extent
elimination for dictionary columns.

This concrete patch enables it, fixes perfomance degradation (main
problem with old code) and also fixes incorrect behavior of cpimport.
This commit is contained in:
Sergey Zefirov
2023-11-17 17:14:35 +03:00
committed by GitHub
parent f5ff63b52f
commit 69b8e1c779
11 changed files with 197 additions and 64 deletions

View File

@ -0,0 +1,64 @@
DROP DATABASE IF EXISTS MCOL4580;
CREATE DATABASE MCOL4580;
USE MCOL4580;
CREATE TABLE t(d TEXT) ENGINE=COLUMNSTORE;
INSERT INTO t(d) VALUES ('b'),('b'),('b');
SELECT CALSETTRACE(1);
CALSETTRACE(1)
0
SELECT COUNT(*) FROM t WHERE d = 'a';
COUNT(*)
0
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
touched eliminated
BlocksTouched-0; PartitionBlocksEliminated-1;
SELECT COUNT(*) FROM t WHERE d < 'b';
COUNT(*)
0
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
touched eliminated
BlocksTouched-3; PartitionBlocksEliminated-0;
SELECT COUNT(*) FROM t WHERE d > 'b';
COUNT(*)
0
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
touched eliminated
BlocksTouched-3; PartitionBlocksEliminated-0;
SELECT COUNT(*) FROM t WHERE d <= 'a';
COUNT(*)
0
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
touched eliminated
BlocksTouched-0; PartitionBlocksEliminated-1;
SELECT COUNT(*) FROM t WHERE d >= 'c';
COUNT(*)
0
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
touched eliminated
BlocksTouched-0; PartitionBlocksEliminated-1;
SELECT COUNT(*) FROM t WHERE d != 'b';
COUNT(*)
0
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
touched eliminated
BlocksTouched-0; PartitionBlocksEliminated-1;
INSERT INTO t SELECT * FROM t;
SELECT COUNT(*) FROM t WHERE d = 'b';
COUNT(*)
6
DROP TABLE t;
CREATE TABLE t (c TEXT CHARACTER SET utf8 COLLATE utf8_czech_ci) engine=columnstore;
INSERT INTO t(c) VALUES ('ch'), ('ch');
SELECT COUNT(*) FROM t WHERE c < 'cz';
COUNT(*)
0
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
touched eliminated
BlocksTouched-0; PartitionBlocksEliminated-1;
SELECT COUNT(*) FROM t WHERE c > 'cz';
COUNT(*)
2
SELECT COUNT(*) FROM t WHERE c = 'CH';
COUNT(*)
2
DROP DATABASE MCOL4580;

View File

@ -0,0 +1,47 @@
--disable_warnings # we disable warnings through the test: as we use calsettrace(1), it produces many unnecessary warnings.
DROP DATABASE IF EXISTS MCOL4580;
CREATE DATABASE MCOL4580;
USE MCOL4580;
# -----------------------------------------------------------------------------
# Binary collation test.
CREATE TABLE t(d TEXT) ENGINE=COLUMNSTORE;
INSERT INTO t(d) VALUES ('b'),('b'),('b');
SELECT CALSETTRACE(1);
SELECT COUNT(*) FROM t WHERE d = 'a';
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
# As strict comparisons became soft (strict less '<' became less or equal '<='), these two parts will not work as expected.
SELECT COUNT(*) FROM t WHERE d < 'b';
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
SELECT COUNT(*) FROM t WHERE d > 'b';
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
SELECT COUNT(*) FROM t WHERE d <= 'a';
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
SELECT COUNT(*) FROM t WHERE d >= 'c';
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
# note that extent elimination will eliminate extents with only single value
# in the case of not-equal predicate.
SELECT COUNT(*) FROM t WHERE d != 'b';
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
INSERT INTO t SELECT * FROM t;
SELECT COUNT(*) FROM t WHERE d = 'b';
DROP TABLE t;
# -----------------------------------------------------------------------------
# Actual collation test.
# Reference chart: https://collation-charts.org/mysql60/mysql604.utf8_czech_ci.html
# We will use the fact that "cz" should go before "ch".
CREATE TABLE t (c TEXT CHARACTER SET utf8 COLLATE utf8_czech_ci) engine=columnstore;
INSERT INTO t(c) VALUES ('ch'), ('ch');
SELECT COUNT(*) FROM t WHERE c < 'cz';
SELECT REGEXP_SUBSTR(CALGETTRACE(1),'(BlocksTouched-[^;]*);') AS touched, REGEXP_SUBSTR(CALGETTRACE(1),'(PartitionBlocksEliminated-[^;]*);') AS eliminated;
# and to see we do not broke anything (must be 1):
SELECT COUNT(*) FROM t WHERE c > 'cz';
SELECT COUNT(*) FROM t WHERE c = 'CH';
DROP DATABASE MCOL4580;
--enable_warnings