From 5bcc1cd1f0865151ee8e229b7c2c10664644d602 Mon Sep 17 00:00:00 2001 From: Alexander Barkov Date: Mon, 15 Feb 2021 16:25:48 +0400 Subject: [PATCH] A join patch for MCOL-4527 (a performance hack) and MCOL-4539 (a bug fix) - MCOL-4527 Simple query performace is degraded between 5.4 and 5.5 xxx_nopad_bin collations are now around 30% faster on simple queries like: SELECT * FROM t1 WHERE short_char_column_nopad_bin = 'literal' The gain is achieved by comparing two short CHAR values as uint64_t. Note, this patch does not affect xxx_bin collations! It wouldn't be correct to apply the same improvement for xxx_bin collations (i.e. with PAD SPACE attribute), because it would change the way how trailing spaces are compared. - MCOL-4539 WHERE short_char_column='literal' ignores the collation on a huge table Only the first thread used a correct collation when performing: WHERE short_char_char='literal' Other (15) threads used the server default collation, because the charsetNumber attribute was not copyed during cloning. - This patch also adds mtr/basic/suite.opt, so "mtr" can run without --extern. --- mtr/basic/r/ctype_cmp_char4_latin1_bin.result | 43 ++++++++++++++++ mtr/basic/suite.opt | 1 + mtr/basic/t/ctype_cmp_char4_latin1_bin.test | 51 +++++++++++++++++++ mtr/basic/t/ctype_cmp_create.inc | 18 +------ .../default_storage_engine_by_combination.inc | 17 +++++++ primitives/linux-port/column.cpp | 5 ++ primitives/primproc/columncommand.cpp | 1 + 7 files changed, 119 insertions(+), 17 deletions(-) create mode 100644 mtr/basic/r/ctype_cmp_char4_latin1_bin.result create mode 100644 mtr/basic/suite.opt create mode 100644 mtr/basic/t/ctype_cmp_char4_latin1_bin.test create mode 100644 mtr/basic/t/default_storage_engine_by_combination.inc diff --git a/mtr/basic/r/ctype_cmp_char4_latin1_bin.result b/mtr/basic/r/ctype_cmp_char4_latin1_bin.result new file mode 100644 index 000000000..220aa1163 --- /dev/null +++ b/mtr/basic/r/ctype_cmp_char4_latin1_bin.result @@ -0,0 +1,43 @@ +SET NAMES utf8; +# +# MCOL-4539 WHERE short_char_column='literal' ignores the collation on a huge table +# +DROP DATABASE IF EXISTS mcs_ctype_cmp_char4_latin1_bin; +CREATE DATABASE mcs_ctype_cmp_char4_latin1_bin; +USE mcs_ctype_cmp_char4_latin1_bin; +CREATE TABLE t1 (id INT, c CHAR(4) CHARACTER SET latin1 COLLATE latin1_bin); +INSERT INTO t1 VALUES (0,'aaaa'); +INSERT INTO t1 VALUES (0,'bbbb'); +INSERT INTO t1 VALUES (0,'AAAA'); +INSERT INTO t1 VALUES (0,'BBBB'); +INSERT INTO t1 VALUES (0,'aaaa'); +INSERT INTO t1 VALUES (0,'bbbb'); +INSERT INTO t1 VALUES (0,'AAAA'); +INSERT INTO t1 VALUES (0,'BBBB'); +CREATE TABLE t2 (c CHAR(4) CHARACTER SET latin1 COLLATE latin1_bin); +INSERT INTO t2 SELECT t1_5.c FROM +t1 t1_0 +JOIN t1 t1_1 USING (id) +JOIN t1 t1_2 USING (id) +JOIN t1 t1_3 USING (id) +JOIN t1 t1_4 USING (id) +JOIN t1 t1_5 USING (id) +; +SELECT COUNT(*) FROM t2; +COUNT(*) +262144 +SELECT DISTINCT c FROM t2 WHERE c='aaaa'; +c +aaaa +SELECT DISTINCT c FROM t2 WHERE c='AAAA'; +c +AAAA +CREATE TABLE t3 LIKE t2; +INSERT INTO t3 SELECT * FROM t2 WHERE c='aaaa'; +SELECT c, MIN(c), MAX(c), COUNT(*) FROM t3 GROUP BY c; +c MIN(c) MAX(c) COUNT(*) +aaaa aaaa aaaa 65536 +DROP TABLE t3; +DROP TABLE t2; +DROP TABLE t1; +DROP DATABASE mcs_ctype_cmp_char4_latin1_bin; diff --git a/mtr/basic/suite.opt b/mtr/basic/suite.opt new file mode 100644 index 000000000..fbd322fdd --- /dev/null +++ b/mtr/basic/suite.opt @@ -0,0 +1 @@ +--plugin-load-add=$HA_COLUMNSTORE_SO diff --git a/mtr/basic/t/ctype_cmp_char4_latin1_bin.test b/mtr/basic/t/ctype_cmp_char4_latin1_bin.test new file mode 100644 index 000000000..04843639e --- /dev/null +++ b/mtr/basic/t/ctype_cmp_char4_latin1_bin.test @@ -0,0 +1,51 @@ +--source ../include/have_columnstore.inc +--source ctype_cmp_combinations.inc +--source default_storage_engine_by_combination.inc + + +SET NAMES utf8; + +--echo # +--echo # MCOL-4539 WHERE short_char_column='literal' ignores the collation on a huge table +--echo # + +--disable_warnings +DROP DATABASE IF EXISTS mcs_ctype_cmp_char4_latin1_bin; +--enable_warnings + +CREATE DATABASE mcs_ctype_cmp_char4_latin1_bin; +USE mcs_ctype_cmp_char4_latin1_bin; + +CREATE TABLE t1 (id INT, c CHAR(4) CHARACTER SET latin1 COLLATE latin1_bin); +INSERT INTO t1 VALUES (0,'aaaa'); +INSERT INTO t1 VALUES (0,'bbbb'); +INSERT INTO t1 VALUES (0,'AAAA'); +INSERT INTO t1 VALUES (0,'BBBB'); +INSERT INTO t1 VALUES (0,'aaaa'); +INSERT INTO t1 VALUES (0,'bbbb'); +INSERT INTO t1 VALUES (0,'AAAA'); +INSERT INTO t1 VALUES (0,'BBBB'); + +CREATE TABLE t2 (c CHAR(4) CHARACTER SET latin1 COLLATE latin1_bin); + +INSERT INTO t2 SELECT t1_5.c FROM + t1 t1_0 + JOIN t1 t1_1 USING (id) + JOIN t1 t1_2 USING (id) + JOIN t1 t1_3 USING (id) + JOIN t1 t1_4 USING (id) + JOIN t1 t1_5 USING (id) +; +SELECT COUNT(*) FROM t2; +SELECT DISTINCT c FROM t2 WHERE c='aaaa'; +SELECT DISTINCT c FROM t2 WHERE c='AAAA'; + +CREATE TABLE t3 LIKE t2; +INSERT INTO t3 SELECT * FROM t2 WHERE c='aaaa'; +SELECT c, MIN(c), MAX(c), COUNT(*) FROM t3 GROUP BY c; + +DROP TABLE t3; +DROP TABLE t2; +DROP TABLE t1; + +DROP DATABASE mcs_ctype_cmp_char4_latin1_bin; diff --git a/mtr/basic/t/ctype_cmp_create.inc b/mtr/basic/t/ctype_cmp_create.inc index d4202749c..09690b00a 100644 --- a/mtr/basic/t/ctype_cmp_create.inc +++ b/mtr/basic/t/ctype_cmp_create.inc @@ -1,20 +1,4 @@ ---disable_query_log -SET @combination=NULL; -if ($MTR_COMBINATION_MYISAM) -{ - SET @combination="MYISAM"; -} -if ($MTR_COMBINATION_COLUMNSTORE) -{ - SET @combination="COLUMNSTORE"; -} -# -# If we're running without combinations, e.g. with --extern, -# then set default_storage_engine to ColumnStore. -# Otherwise (when running without --extern) it's set by the *.combinations file. -# -SET @@default_storage_engine=COALESCE(@combination,'ColumnStore'); ---enable_query_log +--source default_storage_engine_by_combination.inc --disable_query_log --eval SET @mysqltest_file='$MYSQLTEST_FILE'; diff --git a/mtr/basic/t/default_storage_engine_by_combination.inc b/mtr/basic/t/default_storage_engine_by_combination.inc new file mode 100644 index 000000000..56fd51044 --- /dev/null +++ b/mtr/basic/t/default_storage_engine_by_combination.inc @@ -0,0 +1,17 @@ +--disable_query_log +SET @combination=NULL; +if ($MTR_COMBINATION_MYISAM) +{ + SET @combination="MYISAM"; +} +if ($MTR_COMBINATION_COLUMNSTORE) +{ + SET @combination="COLUMNSTORE"; +} +# +# If we're running without combinations, e.g. with --extern, +# then set default_storage_engine to ColumnStore. +# Otherwise (when running without --extern) it's set by the *.combinations file. +# +SET @@default_storage_engine=COALESCE(@combination,'ColumnStore'); +--enable_query_log diff --git a/primitives/linux-port/column.cpp b/primitives/linux-port/column.cpp index fd5214c0b..7f42e1655 100644 --- a/primitives/linux-port/column.cpp +++ b/primitives/linux-port/column.cpp @@ -707,6 +707,11 @@ inline bool colCompare(int64_t val1, int64_t val2, uint8_t COP, uint8_t rf, { if (!regex.used && !rf) { + // A temporary hack for xxx_nopad_bin collations + // TODO: MCOL-4534 Improve comparison performance in 8bit nopad_bin collations + if ((typeHolder.getCharset().state & (MY_CS_BINSORT|MY_CS_NOPAD)) == + (MY_CS_BINSORT|MY_CS_NOPAD)) + return colCompare_(order_swap(val1), order_swap(val2), COP); utils::ConstString s1 = {reinterpret_cast(&val1), 8}; utils::ConstString s2 = {reinterpret_cast(&val2), 8}; return colCompareStr(typeHolder, COP, s1.rtrimZero(), s2.rtrimZero()); diff --git a/primitives/primproc/columncommand.cpp b/primitives/primproc/columncommand.cpp index acccf0537..a785b75cb 100644 --- a/primitives/primproc/columncommand.cpp +++ b/primitives/primproc/columncommand.cpp @@ -842,6 +842,7 @@ void ColumnCommand::duplicate(ColumnCommand* cc) cc->colType.colDataType = colType.colDataType; cc->colType.compressionType = colType.compressionType; cc->colType.colWidth = colType.colWidth; + cc->colType.charsetNumber = colType.charsetNumber; cc->BOP = BOP; cc->filterCount = filterCount; cc->fFilterFeeder = fFilterFeeder;