1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-4899 MCS now applies a correct collation running IN for character data types

This commit is contained in:
Roman Nozdrin
2021-12-30 13:44:53 +00:00
committed by Roman Nozdrin
parent b3ab3fb514
commit 05897948e4
5 changed files with 136 additions and 35 deletions

View File

@ -263,23 +263,32 @@ ostream& operator<<(ostream& output, const SimpleColumn& rhs)
const string SimpleColumn::toString() const
{
static const char delim = '/';
ostringstream output;
output << "SimpleColumn " << data() << endl;
output << " s/t/c/v/o/ct/TA/CA/RA/#/card/join/source/engine/colPos: " << schemaName() << '/'
<< tableName() << '/'
<< columnName() << '/'
<< viewName() << '/'
<< oid() << '/'
<< colDataTypeToString(fResultType.colDataType) << '/'
<< tableAlias() << '/'
<< alias() << '/'
<< returnAll() << '/'
<< sequence() << '/'
<< cardinality() << '/'
<< joinInfo() << '/'
<< colSource() << '/'
<< (isColumnStore() ? "ColumnStore" : "ForeignEngine") << '/'
<< colPosition() << endl;
// collations in both result and operations type are the same and
// set in the plugin code.
datatypes::Charset cs(fResultType.charsetNumber);
output << " s/t/c/v/o/ct/TA/CA/RA/#/card/join/source/engine/colPos/cs/coll: "
<< schemaName() << delim
<< tableName() << delim
<< columnName() << delim
<< viewName() << delim
<< oid() << delim
<< colDataTypeToString(fResultType.colDataType) << delim
<< tableAlias() << delim
<< alias() << delim
<< returnAll() << delim
<< sequence() << delim
<< cardinality() << delim
<< joinInfo() << delim
<< colSource() << delim
<< (isColumnStore() ? "ColumnStore" : "ForeignEngine") << delim
<< colPosition() << delim
<< cs.getCharset().cs_name.str << delim
<< cs.getCharset().coll_name.str << delim
<< endl;
return output.str();
}

View File

@ -76,6 +76,7 @@ void DictStepJL::createCommand(ByteStream& bs) const
bs << (uint8_t) DICT_STEP;
bs << BOP;
bs << (uint8_t)compressionType;
bs << charsetNumber;
bs << filterCount;
bs << (uint8_t) hasEqFilter;
@ -89,7 +90,6 @@ void DictStepJL::createCommand(ByteStream& bs) const
}
else
bs << filterString;
bs << charsetNumber;
CommandJL::createCommand(bs);
}

View File

@ -0,0 +1,65 @@
DROP DATABASE IF EXISTS `mcol_4899`;
CREATE DATABASE `mcol_4899` CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
USE `mcol_4899`;
SELECT @cs_conn := @@character_set_connection;
@cs_conn := @@character_set_connection
latin1
SET character_set_connection=utf8mb4;
CREATE TABLE collation_test (LETTERS TEXT NULL)ENGINE=ColumnStore;
INSERT INTO collation_test (LETTERS) VALUES ('a'),('A'),('b'),('B');
SELECT * FROM collation_test;
LETTERS
a
A
b
B
SELECT * FROM collation_test WHERE LETTERS LIKE 'a';
LETTERS
a
SELECT * FROM collation_test WHERE LETTERS IN ('a','B');
LETTERS
a
B
SELECT * FROM collation_test WHERE LETTERS IN ('a' COLLATE utf8mb4_bin);
LETTERS
a
DROP TABLE collation_test;
CREATE TABLE collation_test (LETTERS TEXT NULL)ENGINE=ColumnStore DEFAULT CHARSET=utf8mb4 COLLATE utf8mb4_bin;
INSERT INTO collation_test (LETTERS) VALUES ('a'),('A'),('b'),('B');
SELECT * FROM collation_test;
LETTERS
a
A
b
B
SELECT * FROM collation_test WHERE LETTERS LIKE 'a';
LETTERS
a
SELECT * FROM collation_test WHERE LETTERS IN ('a','B');
LETTERS
a
B
SELECT * FROM collation_test WHERE LETTERS IN ('a' COLLATE utf8mb4_bin);
LETTERS
a
DROP TABLE collation_test;
CREATE TABLE collation_test (LETTERS TEXT NULL)ENGINE=ColumnStore DEFAULT CHARSET=utf8mb4;
INSERT INTO collation_test (LETTERS) VALUES ('a'),('A'),('b'),('B');
SELECT * FROM collation_test;
LETTERS
a
A
b
B
SELECT * FROM collation_test WHERE LETTERS LIKE 'a';
LETTERS
a
A
SELECT * FROM collation_test WHERE LETTERS IN ('a','B');
LETTERS
a
A
b
B
DROP DATABASE `mcol_4899`;
SET character_set_connection=@cs_conn;

View File

@ -0,0 +1,42 @@
#
# MCOL-4899 MCS doesn't apply collation running IN with character data types
#
-- source ../include/have_columnstore.inc
--disable_warnings
DROP DATABASE IF EXISTS `mcol_4899`;
--enable_warnings
CREATE DATABASE `mcol_4899` CHARACTER SET utf8mb4 COLLATE utf8mb4_bin;
USE `mcol_4899`;
SELECT @cs_conn := @@character_set_connection;
SET character_set_connection=utf8mb4;
CREATE TABLE collation_test (LETTERS TEXT NULL)ENGINE=ColumnStore;
INSERT INTO collation_test (LETTERS) VALUES ('a'),('A'),('b'),('B');
SELECT * FROM collation_test;
SELECT * FROM collation_test WHERE LETTERS LIKE 'a';
SELECT * FROM collation_test WHERE LETTERS IN ('a','B');
# MCS doesn't apply explicit COLLATE as of 6.2.2
SELECT * FROM collation_test WHERE LETTERS IN ('a' COLLATE utf8mb4_bin);
DROP TABLE collation_test;
CREATE TABLE collation_test (LETTERS TEXT NULL)ENGINE=ColumnStore DEFAULT CHARSET=utf8mb4 COLLATE utf8mb4_bin;
INSERT INTO collation_test (LETTERS) VALUES ('a'),('A'),('b'),('B');
SELECT * FROM collation_test;
SELECT * FROM collation_test WHERE LETTERS LIKE 'a';
SELECT * FROM collation_test WHERE LETTERS IN ('a','B');
# MCS doesn't apply explicit COLLATE as of 6.2.2
SELECT * FROM collation_test WHERE LETTERS IN ('a' COLLATE utf8mb4_bin);
# re-testing with case insensitive collation
DROP TABLE collation_test;
CREATE TABLE collation_test (LETTERS TEXT NULL)ENGINE=ColumnStore DEFAULT CHARSET=utf8mb4;
INSERT INTO collation_test (LETTERS) VALUES ('a'),('A'),('b'),('B');
SELECT * FROM collation_test;
SELECT * FROM collation_test WHERE LETTERS LIKE 'a';
SELECT * FROM collation_test WHERE LETTERS IN ('a','B');
DROP DATABASE `mcol_4899`;
SET character_set_connection=@cs_conn;

View File

@ -78,6 +78,7 @@ void DictStep::createCommand(ByteStream& bs)
bs >> BOP;
bs >> tmp8;
compressionType = tmp8;
bs >> charsetNumber;
bs >> filterCount;
bs >> tmp8;
hasEqFilter = tmp8;
@ -85,35 +86,19 @@ void DictStep::createCommand(ByteStream& bs)
if (hasEqFilter)
{
string strTmp;
eqFilter.reset(new primitives::DictEqualityFilter(my_charset_latin1));
datatypes::Charset cs(charsetNumber);
eqFilter.reset(new primitives::DictEqualityFilter(cs));
bs >> eqOp;
//cout << "saw the eqfilter count=" << filterCount << endl;
for (uint32_t i = 0; i < filterCount; i++)
{
bs >> strTmp;
//cout << " " << strTmp << endl;
eqFilter->insert(strTmp);
}
}
else
bs >> filterString;
bs >> charsetNumber;
#if 0
cout << "see " << filterCount << " filters\n";
DictFilterElement* filters = (DictFilterElement*) filterString.buf();
for (uint32_t i = 0; i < filterCount; i++)
{
cout << " COP=" << (int) filters->COP << endl;
cout << " len=" << filters->len << endl;
cout << " string=" << filters->data << endl;
}
#endif
Command::createCommand(bs);
}