From 05897948e4c353bcd49f6291b1f76b39a72a0a53 Mon Sep 17 00:00:00 2001 From: Roman Nozdrin Date: Thu, 30 Dec 2021 13:44:53 +0000 Subject: [PATCH] MCOL-4899 MCS now applies a correct collation running IN for character data types --- dbcon/execplan/simplecolumn.cpp | 41 +++++++----- dbcon/joblist/dictstep-jl.cpp | 2 +- .../columnstore/bugfixes/mcol-4899.result | 65 +++++++++++++++++++ .../columnstore/bugfixes/mcol-4899.test | 42 ++++++++++++ primitives/primproc/dictstep.cpp | 21 +----- 5 files changed, 136 insertions(+), 35 deletions(-) create mode 100644 mysql-test/columnstore/bugfixes/mcol-4899.result create mode 100644 mysql-test/columnstore/bugfixes/mcol-4899.test diff --git a/dbcon/execplan/simplecolumn.cpp b/dbcon/execplan/simplecolumn.cpp index 56a926108..8b77426ea 100644 --- a/dbcon/execplan/simplecolumn.cpp +++ b/dbcon/execplan/simplecolumn.cpp @@ -159,7 +159,7 @@ SimpleColumn::SimpleColumn(const string& token, const uint32_t sessionID): SimpleColumn::SimpleColumn(const string& schemaName, const string& tableName, const string& columnName, - const uint32_t sessionID, + const uint32_t sessionID, const int lower_case_table_names) : ReturnedColumn(sessionID), fSchemaName (schemaName), @@ -263,23 +263,32 @@ ostream& operator<<(ostream& output, const SimpleColumn& rhs) const string SimpleColumn::toString() const { + static const char delim = '/'; ostringstream output; + output << "SimpleColumn " << data() << endl; - output << " s/t/c/v/o/ct/TA/CA/RA/#/card/join/source/engine/colPos: " << schemaName() << '/' - << tableName() << '/' - << columnName() << '/' - << viewName() << '/' - << oid() << '/' - << colDataTypeToString(fResultType.colDataType) << '/' - << tableAlias() << '/' - << alias() << '/' - << returnAll() << '/' - << sequence() << '/' - << cardinality() << '/' - << joinInfo() << '/' - << colSource() << '/' - << (isColumnStore() ? "ColumnStore" : "ForeignEngine") << '/' - << colPosition() << endl; + // collations in both result and operations type are the same and + // set in the plugin code. + datatypes::Charset cs(fResultType.charsetNumber); + output << " s/t/c/v/o/ct/TA/CA/RA/#/card/join/source/engine/colPos/cs/coll: " + << schemaName() << delim + << tableName() << delim + << columnName() << delim + << viewName() << delim + << oid() << delim + << colDataTypeToString(fResultType.colDataType) << delim + << tableAlias() << delim + << alias() << delim + << returnAll() << delim + << sequence() << delim + << cardinality() << delim + << joinInfo() << delim + << colSource() << delim + << (isColumnStore() ? "ColumnStore" : "ForeignEngine") << delim + << colPosition() << delim + << cs.getCharset().cs_name.str << delim + << cs.getCharset().coll_name.str << delim + << endl; return output.str(); } diff --git a/dbcon/joblist/dictstep-jl.cpp b/dbcon/joblist/dictstep-jl.cpp index cc34f45ac..352fee68b 100644 --- a/dbcon/joblist/dictstep-jl.cpp +++ b/dbcon/joblist/dictstep-jl.cpp @@ -76,6 +76,7 @@ void DictStepJL::createCommand(ByteStream& bs) const bs << (uint8_t) DICT_STEP; bs << BOP; bs << (uint8_t)compressionType; + bs << charsetNumber; bs << filterCount; bs << (uint8_t) hasEqFilter; @@ -89,7 +90,6 @@ void DictStepJL::createCommand(ByteStream& bs) const } else bs << filterString; - bs << charsetNumber; CommandJL::createCommand(bs); } diff --git a/mysql-test/columnstore/bugfixes/mcol-4899.result b/mysql-test/columnstore/bugfixes/mcol-4899.result new file mode 100644 index 000000000..1f740121c --- /dev/null +++ b/mysql-test/columnstore/bugfixes/mcol-4899.result @@ -0,0 +1,65 @@ +DROP DATABASE IF EXISTS `mcol_4899`; +CREATE DATABASE `mcol_4899` CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; +USE `mcol_4899`; +SELECT @cs_conn := @@character_set_connection; +@cs_conn := @@character_set_connection +latin1 +SET character_set_connection=utf8mb4; +CREATE TABLE collation_test (LETTERS TEXT NULL)ENGINE=ColumnStore; +INSERT INTO collation_test (LETTERS) VALUES ('a'),('A'),('b'),('B'); +SELECT * FROM collation_test; +LETTERS +a +A +b +B +SELECT * FROM collation_test WHERE LETTERS LIKE 'a'; +LETTERS +a +SELECT * FROM collation_test WHERE LETTERS IN ('a','B'); +LETTERS +a +B +SELECT * FROM collation_test WHERE LETTERS IN ('a' COLLATE utf8mb4_bin); +LETTERS +a +DROP TABLE collation_test; +CREATE TABLE collation_test (LETTERS TEXT NULL)ENGINE=ColumnStore DEFAULT CHARSET=utf8mb4 COLLATE utf8mb4_bin; +INSERT INTO collation_test (LETTERS) VALUES ('a'),('A'),('b'),('B'); +SELECT * FROM collation_test; +LETTERS +a +A +b +B +SELECT * FROM collation_test WHERE LETTERS LIKE 'a'; +LETTERS +a +SELECT * FROM collation_test WHERE LETTERS IN ('a','B'); +LETTERS +a +B +SELECT * FROM collation_test WHERE LETTERS IN ('a' COLLATE utf8mb4_bin); +LETTERS +a +DROP TABLE collation_test; +CREATE TABLE collation_test (LETTERS TEXT NULL)ENGINE=ColumnStore DEFAULT CHARSET=utf8mb4; +INSERT INTO collation_test (LETTERS) VALUES ('a'),('A'),('b'),('B'); +SELECT * FROM collation_test; +LETTERS +a +A +b +B +SELECT * FROM collation_test WHERE LETTERS LIKE 'a'; +LETTERS +a +A +SELECT * FROM collation_test WHERE LETTERS IN ('a','B'); +LETTERS +a +A +b +B +DROP DATABASE `mcol_4899`; +SET character_set_connection=@cs_conn; diff --git a/mysql-test/columnstore/bugfixes/mcol-4899.test b/mysql-test/columnstore/bugfixes/mcol-4899.test new file mode 100644 index 000000000..1cb2ffa84 --- /dev/null +++ b/mysql-test/columnstore/bugfixes/mcol-4899.test @@ -0,0 +1,42 @@ +# +# MCOL-4899 MCS doesn't apply collation running IN with character data types +# + +-- source ../include/have_columnstore.inc + +--disable_warnings +DROP DATABASE IF EXISTS `mcol_4899`; +--enable_warnings +CREATE DATABASE `mcol_4899` CHARACTER SET utf8mb4 COLLATE utf8mb4_bin; +USE `mcol_4899`; + +SELECT @cs_conn := @@character_set_connection; +SET character_set_connection=utf8mb4; + +CREATE TABLE collation_test (LETTERS TEXT NULL)ENGINE=ColumnStore; +INSERT INTO collation_test (LETTERS) VALUES ('a'),('A'),('b'),('B'); +SELECT * FROM collation_test; +SELECT * FROM collation_test WHERE LETTERS LIKE 'a'; +SELECT * FROM collation_test WHERE LETTERS IN ('a','B'); +# MCS doesn't apply explicit COLLATE as of 6.2.2 +SELECT * FROM collation_test WHERE LETTERS IN ('a' COLLATE utf8mb4_bin); + +DROP TABLE collation_test; +CREATE TABLE collation_test (LETTERS TEXT NULL)ENGINE=ColumnStore DEFAULT CHARSET=utf8mb4 COLLATE utf8mb4_bin; +INSERT INTO collation_test (LETTERS) VALUES ('a'),('A'),('b'),('B'); +SELECT * FROM collation_test; +SELECT * FROM collation_test WHERE LETTERS LIKE 'a'; +SELECT * FROM collation_test WHERE LETTERS IN ('a','B'); +# MCS doesn't apply explicit COLLATE as of 6.2.2 +SELECT * FROM collation_test WHERE LETTERS IN ('a' COLLATE utf8mb4_bin); + +# re-testing with case insensitive collation +DROP TABLE collation_test; +CREATE TABLE collation_test (LETTERS TEXT NULL)ENGINE=ColumnStore DEFAULT CHARSET=utf8mb4; +INSERT INTO collation_test (LETTERS) VALUES ('a'),('A'),('b'),('B'); +SELECT * FROM collation_test; +SELECT * FROM collation_test WHERE LETTERS LIKE 'a'; +SELECT * FROM collation_test WHERE LETTERS IN ('a','B'); + +DROP DATABASE `mcol_4899`; +SET character_set_connection=@cs_conn; diff --git a/primitives/primproc/dictstep.cpp b/primitives/primproc/dictstep.cpp index 0aa6e92fa..2aa8566b0 100644 --- a/primitives/primproc/dictstep.cpp +++ b/primitives/primproc/dictstep.cpp @@ -78,6 +78,7 @@ void DictStep::createCommand(ByteStream& bs) bs >> BOP; bs >> tmp8; compressionType = tmp8; + bs >> charsetNumber; bs >> filterCount; bs >> tmp8; hasEqFilter = tmp8; @@ -85,34 +86,18 @@ void DictStep::createCommand(ByteStream& bs) if (hasEqFilter) { string strTmp; - - eqFilter.reset(new primitives::DictEqualityFilter(my_charset_latin1)); + datatypes::Charset cs(charsetNumber); + eqFilter.reset(new primitives::DictEqualityFilter(cs)); bs >> eqOp; - //cout << "saw the eqfilter count=" << filterCount << endl; for (uint32_t i = 0; i < filterCount; i++) { bs >> strTmp; - //cout << " " << strTmp << endl; eqFilter->insert(strTmp); } } else bs >> filterString; - - bs >> charsetNumber; -#if 0 - cout << "see " << filterCount << " filters\n"; - DictFilterElement* filters = (DictFilterElement*) filterString.buf(); - - for (uint32_t i = 0; i < filterCount; i++) - { - cout << " COP=" << (int) filters->COP << endl; - cout << " len=" << filters->len << endl; - cout << " string=" << filters->data << endl; - } - -#endif Command::createCommand(bs); }