1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-4580 extent elimination for dictionary-based text/varchar types

The idea is relatively simple - encode prefixes of collated strings as
integers and use them to compute extents' ranges. Then we can eliminate
extents with strings.

The actual patch does have all the code there but miss one important
step: we do not keep collation index, we keep charset index. Because of
this, some of the tests in the bugfix suite fail and thus main
functionality is turned off.

The reason of this patch to be put into PR at all is that it contains
changes that made CHAR/VARCHAR columns unsigned. This change is needed in
vectorization work.
This commit is contained in:
Serguey Zefirov
2022-02-04 11:55:09 +00:00
parent a66a8dfabf
commit 53b9a2a0f9
54 changed files with 698 additions and 227 deletions

View File

@ -87,7 +87,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET koi8u COLLATE koi8u_general_ci)
c1 HEX(c1)
Ъ─ FF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_koi8u t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
mcs_ctype_extent_koi8u t1 c1 80FF 80FF
c1
Ъ─
Level Code Message
@ -100,7 +100,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET koi8u COLLATE koi8u_general_ci)
c1 HEX(c1)
ЪЪЪ─ FFFFFF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_koi8u t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
mcs_ctype_extent_koi8u t1 c1 80FFFFFF 80FFFFFF
c1
ЪЪЪ─
Level Code Message
@ -127,7 +127,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET koi8u COLLATE koi8u_general_nopad_ci)
c1 HEX(c1)
Ъ─ FF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_koi8u t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
mcs_ctype_extent_koi8u t1 c1 80FF 80FF
c1
Ъ─
Level Code Message
@ -140,7 +140,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET koi8u COLLATE koi8u_general_nopad_ci)
c1 HEX(c1)
ЪЪЪ─ FFFFFF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_koi8u t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
mcs_ctype_extent_koi8u t1 c1 80FFFFFF 80FFFFFF
c1
ЪЪЪ─
Level Code Message
@ -167,7 +167,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET koi8u COLLATE koi8u_bin)
c1 HEX(c1)
Ъ─ FF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_koi8u t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
mcs_ctype_extent_koi8u t1 c1 80FF 80FF
c1
Ъ─
Level Code Message
@ -180,7 +180,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET koi8u COLLATE koi8u_bin)
c1 HEX(c1)
ЪЪЪ─ FFFFFF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_koi8u t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
mcs_ctype_extent_koi8u t1 c1 80FFFFFF 80FFFFFF
c1
ЪЪЪ─
Level Code Message
@ -207,7 +207,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET koi8u COLLATE koi8u_nopad_bin)
c1 HEX(c1)
Ъ─ FF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_koi8u t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
mcs_ctype_extent_koi8u t1 c1 80FF 80FF
c1
Ъ─
Level Code Message
@ -220,7 +220,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET koi8u COLLATE koi8u_nopad_bin)
c1 HEX(c1)
ЪЪЪ─ FFFFFF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_koi8u t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
mcs_ctype_extent_koi8u t1 c1 80FFFFFF 80FFFFFF
c1
ЪЪЪ─
Level Code Message

View File

@ -99,7 +99,7 @@ CREATE TABLE t1 (c1 CHAR(1) CHARACTER SET latin1 COLLATE latin1_swedish_ci)
c1 HEX(c1)
é E9
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFFFFE9 FFFFFFFFFFFFFFE9
mcs_ctype_extent_latin1 t1 c1 E9 E9
c1
é
Level Code Message
@ -112,7 +112,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET latin1 COLLATE latin1_swedish_ci)
c1 HEX(c1)
ÿ€ FF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
mcs_ctype_extent_latin1 t1 c1 80FF 80FF
c1
ÿ€
Level Code Message
@ -125,7 +125,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET latin1 COLLATE latin1_swedish_ci)
c1 HEX(c1)
ÿÿÿ€ FFFFFF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
mcs_ctype_extent_latin1 t1 c1 80FFFFFF 80FFFFFF
c1
ÿÿÿ€
Level Code Message
@ -152,7 +152,7 @@ CREATE TABLE t1 (c1 CHAR(1) CHARACTER SET latin1 COLLATE latin1_swedish_nopad_ci
c1 HEX(c1)
é E9
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFFFFE9 FFFFFFFFFFFFFFE9
mcs_ctype_extent_latin1 t1 c1 E9 E9
c1
é
Level Code Message
@ -165,7 +165,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET latin1 COLLATE latin1_swedish_nopad_ci
c1 HEX(c1)
ÿ€ FF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
mcs_ctype_extent_latin1 t1 c1 80FF 80FF
c1
ÿ€
Level Code Message
@ -178,7 +178,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET latin1 COLLATE latin1_swedish_nopad_ci
c1 HEX(c1)
ÿÿÿ€ FFFFFF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
mcs_ctype_extent_latin1 t1 c1 80FFFFFF 80FFFFFF
c1
ÿÿÿ€
Level Code Message
@ -205,7 +205,7 @@ CREATE TABLE t1 (c1 CHAR(1) CHARACTER SET latin1 COLLATE latin1_bin)
c1 HEX(c1)
é E9
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFFFFE9 FFFFFFFFFFFFFFE9
mcs_ctype_extent_latin1 t1 c1 E9 E9
c1
é
Level Code Message
@ -218,7 +218,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET latin1 COLLATE latin1_bin)
c1 HEX(c1)
ÿ€ FF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
mcs_ctype_extent_latin1 t1 c1 80FF 80FF
c1
ÿ€
Level Code Message
@ -231,7 +231,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET latin1 COLLATE latin1_bin)
c1 HEX(c1)
ÿÿÿ€ FFFFFF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
mcs_ctype_extent_latin1 t1 c1 80FFFFFF 80FFFFFF
c1
ÿÿÿ€
Level Code Message
@ -258,7 +258,7 @@ CREATE TABLE t1 (c1 CHAR(1) CHARACTER SET latin1 COLLATE latin1_nopad_bin)
c1 HEX(c1)
é E9
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFFFFE9 FFFFFFFFFFFFFFE9
mcs_ctype_extent_latin1 t1 c1 E9 E9
c1
é
Level Code Message
@ -271,7 +271,7 @@ CREATE TABLE t1 (c1 CHAR(2) CHARACTER SET latin1 COLLATE latin1_nopad_bin)
c1 HEX(c1)
ÿ€ FF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFFFFFF80FF FFFFFFFFFFFF80FF
mcs_ctype_extent_latin1 t1 c1 80FF 80FF
c1
ÿ€
Level Code Message
@ -284,7 +284,7 @@ CREATE TABLE t1 (c1 CHAR(4) CHARACTER SET latin1 COLLATE latin1_nopad_bin)
c1 HEX(c1)
ÿÿÿ€ FFFFFF80
table_schema table_name column_name hex(CAST(e.min_value AS SIGNED)) hex(CAST(e.max_value AS SIGNED))
mcs_ctype_extent_latin1 t1 c1 FFFFFFFF80FFFFFF FFFFFFFF80FFFFFF
mcs_ctype_extent_latin1 t1 c1 80FFFFFF 80FFFFFF
c1
ÿÿÿ€
Level Code Message

View File

@ -6,6 +6,6 @@ insert into rounding_table values (26805, 1252, -9647);
insert into rounding_table values (26806, 573, -2804.5);
SELECT CASE a WHEN 26805 THEN ROUND(c/b, 2) WHEN 26806 THEN b END MCOL4940 FROM ( SELECT a, SUM(b) b, SUM(c) c FROM rounding_table GROUP BY a ) abc ;
MCOL4940
573
-7.71
573
DROP DATABASE mcol_4940;

View File

@ -8,7 +8,8 @@ USE mcol_4940;
create table rounding_table ( a int, b double, c double) engine=columnstore;
insert into rounding_table values (26805, 1252, -9647);
insert into rounding_table values (26806, 573, -2804.5);
--sorted_result
SELECT CASE a WHEN 26805 THEN ROUND(c/b, 2) WHEN 26806 THEN b END MCOL4940 FROM ( SELECT a, SUM(b) b, SUM(c) c FROM rounding_table GROUP BY a ) abc ;
DROP DATABASE mcol_4940;