1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-4899 MCS now applies a correct collation running IN for character data types

This commit is contained in:
Roman Nozdrin
2021-12-30 13:44:53 +00:00
committed by Roman Nozdrin
parent b3ab3fb514
commit 05897948e4
5 changed files with 136 additions and 35 deletions

View File

@ -159,7 +159,7 @@ SimpleColumn::SimpleColumn(const string& token, const uint32_t sessionID):
SimpleColumn::SimpleColumn(const string& schemaName, SimpleColumn::SimpleColumn(const string& schemaName,
const string& tableName, const string& tableName,
const string& columnName, const string& columnName,
const uint32_t sessionID, const uint32_t sessionID,
const int lower_case_table_names) : const int lower_case_table_names) :
ReturnedColumn(sessionID), ReturnedColumn(sessionID),
fSchemaName (schemaName), fSchemaName (schemaName),
@ -263,23 +263,32 @@ ostream& operator<<(ostream& output, const SimpleColumn& rhs)
const string SimpleColumn::toString() const const string SimpleColumn::toString() const
{ {
static const char delim = '/';
ostringstream output; ostringstream output;
output << "SimpleColumn " << data() << endl; output << "SimpleColumn " << data() << endl;
output << " s/t/c/v/o/ct/TA/CA/RA/#/card/join/source/engine/colPos: " << schemaName() << '/' // collations in both result and operations type are the same and
<< tableName() << '/' // set in the plugin code.
<< columnName() << '/' datatypes::Charset cs(fResultType.charsetNumber);
<< viewName() << '/' output << " s/t/c/v/o/ct/TA/CA/RA/#/card/join/source/engine/colPos/cs/coll: "
<< oid() << '/' << schemaName() << delim
<< colDataTypeToString(fResultType.colDataType) << '/' << tableName() << delim
<< tableAlias() << '/' << columnName() << delim
<< alias() << '/' << viewName() << delim
<< returnAll() << '/' << oid() << delim
<< sequence() << '/' << colDataTypeToString(fResultType.colDataType) << delim
<< cardinality() << '/' << tableAlias() << delim
<< joinInfo() << '/' << alias() << delim
<< colSource() << '/' << returnAll() << delim
<< (isColumnStore() ? "ColumnStore" : "ForeignEngine") << '/' << sequence() << delim
<< colPosition() << endl; << cardinality() << delim
<< joinInfo() << delim
<< colSource() << delim
<< (isColumnStore() ? "ColumnStore" : "ForeignEngine") << delim
<< colPosition() << delim
<< cs.getCharset().cs_name.str << delim
<< cs.getCharset().coll_name.str << delim
<< endl;
return output.str(); return output.str();
} }

View File

@ -76,6 +76,7 @@ void DictStepJL::createCommand(ByteStream& bs) const
bs << (uint8_t) DICT_STEP; bs << (uint8_t) DICT_STEP;
bs << BOP; bs << BOP;
bs << (uint8_t)compressionType; bs << (uint8_t)compressionType;
bs << charsetNumber;
bs << filterCount; bs << filterCount;
bs << (uint8_t) hasEqFilter; bs << (uint8_t) hasEqFilter;
@ -89,7 +90,6 @@ void DictStepJL::createCommand(ByteStream& bs) const
} }
else else
bs << filterString; bs << filterString;
bs << charsetNumber;
CommandJL::createCommand(bs); CommandJL::createCommand(bs);
} }

View File

@ -0,0 +1,65 @@
DROP DATABASE IF EXISTS `mcol_4899`;
CREATE DATABASE `mcol_4899` CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
USE `mcol_4899`;
SELECT @cs_conn := @@character_set_connection;
@cs_conn := @@character_set_connection
latin1
SET character_set_connection=utf8mb4;
CREATE TABLE collation_test (LETTERS TEXT NULL)ENGINE=ColumnStore;
INSERT INTO collation_test (LETTERS) VALUES ('a'),('A'),('b'),('B');
SELECT * FROM collation_test;
LETTERS
a
A
b
B
SELECT * FROM collation_test WHERE LETTERS LIKE 'a';
LETTERS
a
SELECT * FROM collation_test WHERE LETTERS IN ('a','B');
LETTERS
a
B
SELECT * FROM collation_test WHERE LETTERS IN ('a' COLLATE utf8mb4_bin);
LETTERS
a
DROP TABLE collation_test;
CREATE TABLE collation_test (LETTERS TEXT NULL)ENGINE=ColumnStore DEFAULT CHARSET=utf8mb4 COLLATE utf8mb4_bin;
INSERT INTO collation_test (LETTERS) VALUES ('a'),('A'),('b'),('B');
SELECT * FROM collation_test;
LETTERS
a
A
b
B
SELECT * FROM collation_test WHERE LETTERS LIKE 'a';
LETTERS
a
SELECT * FROM collation_test WHERE LETTERS IN ('a','B');
LETTERS
a
B
SELECT * FROM collation_test WHERE LETTERS IN ('a' COLLATE utf8mb4_bin);
LETTERS
a
DROP TABLE collation_test;
CREATE TABLE collation_test (LETTERS TEXT NULL)ENGINE=ColumnStore DEFAULT CHARSET=utf8mb4;
INSERT INTO collation_test (LETTERS) VALUES ('a'),('A'),('b'),('B');
SELECT * FROM collation_test;
LETTERS
a
A
b
B
SELECT * FROM collation_test WHERE LETTERS LIKE 'a';
LETTERS
a
A
SELECT * FROM collation_test WHERE LETTERS IN ('a','B');
LETTERS
a
A
b
B
DROP DATABASE `mcol_4899`;
SET character_set_connection=@cs_conn;

View File

@ -0,0 +1,42 @@
#
# MCOL-4899 MCS doesn't apply collation running IN with character data types
#
-- source ../include/have_columnstore.inc
--disable_warnings
DROP DATABASE IF EXISTS `mcol_4899`;
--enable_warnings
CREATE DATABASE `mcol_4899` CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
USE `mcol_4899`;
SELECT @cs_conn := @@character_set_connection;
SET character_set_connection=utf8mb4;
CREATE TABLE collation_test (LETTERS TEXT NULL)ENGINE=ColumnStore;
INSERT INTO collation_test (LETTERS) VALUES ('a'),('A'),('b'),('B');
SELECT * FROM collation_test;
SELECT * FROM collation_test WHERE LETTERS LIKE 'a';
SELECT * FROM collation_test WHERE LETTERS IN ('a','B');
# MCS doesn't apply explicit COLLATE as of 6.2.2
SELECT * FROM collation_test WHERE LETTERS IN ('a' COLLATE utf8mb4_bin);
DROP TABLE collation_test;
CREATE TABLE collation_test (LETTERS TEXT NULL)ENGINE=ColumnStore DEFAULT CHARSET=utf8mb4 COLLATE utf8mb4_bin;
INSERT INTO collation_test (LETTERS) VALUES ('a'),('A'),('b'),('B');
SELECT * FROM collation_test;
SELECT * FROM collation_test WHERE LETTERS LIKE 'a';
SELECT * FROM collation_test WHERE LETTERS IN ('a','B');
# MCS doesn't apply explicit COLLATE as of 6.2.2
SELECT * FROM collation_test WHERE LETTERS IN ('a' COLLATE utf8mb4_bin);
# re-testing with case insensitive collation
DROP TABLE collation_test;
CREATE TABLE collation_test (LETTERS TEXT NULL)ENGINE=ColumnStore DEFAULT CHARSET=utf8mb4;
INSERT INTO collation_test (LETTERS) VALUES ('a'),('A'),('b'),('B');
SELECT * FROM collation_test;
SELECT * FROM collation_test WHERE LETTERS LIKE 'a';
SELECT * FROM collation_test WHERE LETTERS IN ('a','B');
DROP DATABASE `mcol_4899`;
SET character_set_connection=@cs_conn;

View File

@ -78,6 +78,7 @@ void DictStep::createCommand(ByteStream& bs)
bs >> BOP; bs >> BOP;
bs >> tmp8; bs >> tmp8;
compressionType = tmp8; compressionType = tmp8;
bs >> charsetNumber;
bs >> filterCount; bs >> filterCount;
bs >> tmp8; bs >> tmp8;
hasEqFilter = tmp8; hasEqFilter = tmp8;
@ -85,34 +86,18 @@ void DictStep::createCommand(ByteStream& bs)
if (hasEqFilter) if (hasEqFilter)
{ {
string strTmp; string strTmp;
datatypes::Charset cs(charsetNumber);
eqFilter.reset(new primitives::DictEqualityFilter(my_charset_latin1)); eqFilter.reset(new primitives::DictEqualityFilter(cs));
bs >> eqOp; bs >> eqOp;
//cout << "saw the eqfilter count=" << filterCount << endl;
for (uint32_t i = 0; i < filterCount; i++) for (uint32_t i = 0; i < filterCount; i++)
{ {
bs >> strTmp; bs >> strTmp;
//cout << " " << strTmp << endl;
eqFilter->insert(strTmp); eqFilter->insert(strTmp);
} }
} }
else else
bs >> filterString; bs >> filterString;
bs >> charsetNumber;
#if 0
cout << "see " << filterCount << " filters\n";
DictFilterElement* filters = (DictFilterElement*) filterString.buf();
for (uint32_t i = 0; i < filterCount; i++)
{
cout << " COP=" << (int) filters->COP << endl;
cout << " len=" << filters->len << endl;
cout << " string=" << filters->data << endl;
}
#endif
Command::createCommand(bs); Command::createCommand(bs);
} }