1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-4721 CHAR(1) is not collation-aware for GROUP/DISTINCT

This commit is contained in:
Alexander Barkov
2021-05-18 11:29:21 +04:00
parent 9a2887748e
commit bd4cbb542d
5 changed files with 61 additions and 60 deletions

View File

@ -372,6 +372,15 @@ inline bool isCharType(const datatypes::SystemCatalog::ColDataType type)
datatypes::SystemCatalog::TEXT == type); datatypes::SystemCatalog::TEXT == type);
} }
inline bool typeHasCollation(const datatypes::SystemCatalog::ColDataType type)
{
return datatypes::SystemCatalog::VARCHAR == type ||
datatypes::SystemCatalog::CHAR == type ||
datatypes::SystemCatalog::TEXT == type;
}
/** convenience function to determine if column type is a /** convenience function to determine if column type is a
* numeric type * numeric type
*/ */

View File

@ -0,0 +1,22 @@
SET NAMES utf8;
#
# MCOL-4721 CHAR(1) is not collation-aware for GROUP/DISTINCT
#
CREATE TABLE t1 (c1 CHAR(1) CHARACTER SET latin1 COLLATE latin1_swedish_ci);
INSERT INTO t1 VALUES ('a'),('A');
SELECT c1, COUNT(*) FROM t1 GROUP BY c1;
c1 COUNT(*)
a 2
SELECT DISTINCT c1 FROM t1;
c1
a
INSERT INTO t1 VALUES ('ä'),('Ä'),('ã'),('Ã');
SELECT c1, COUNT(*) FROM t1 GROUP BY c1 ORDER BY c1;
c1 COUNT(*)
a 4
ä 2
SELECT DISTINCT c1 FROM t1 ORDER BY c1;
c1
a
ä
DROP TABLE t1;

View File

@ -0,0 +1,17 @@
--source ../include/have_columnstore.inc
--source ctype_cmp_combinations.inc
SET NAMES utf8;
--echo #
--echo # MCOL-4721 CHAR(1) is not collation-aware for GROUP/DISTINCT
--echo #
CREATE TABLE t1 (c1 CHAR(1) CHARACTER SET latin1 COLLATE latin1_swedish_ci);
INSERT INTO t1 VALUES ('a'),('A');
SELECT c1, COUNT(*) FROM t1 GROUP BY c1;
SELECT DISTINCT c1 FROM t1;
INSERT INTO t1 VALUES ('ä'),('Ä'),('ã'),('Ã');
SELECT c1, COUNT(*) FROM t1 GROUP BY c1 ORDER BY c1;
SELECT DISTINCT c1 FROM t1 ORDER BY c1;
DROP TABLE t1;

View File

@ -1130,54 +1130,6 @@ bool Row::equals(const std::string& val, uint32_t col) const
return true; return true;
} }
bool Row::equals(const Row& r2, const std::vector<uint32_t>& keyCols) const
{
for (uint32_t i = 0; i < keyCols.size(); i++)
{
const uint32_t& col = keyCols[i];
cscDataType columnType = getColType(col);
if (UNLIKELY(columnType == execplan::CalpontSystemCatalog::VARCHAR ||
(columnType == execplan::CalpontSystemCatalog::CHAR && (colWidths[col] > 1)) ||
columnType == execplan::CalpontSystemCatalog::TEXT))
{
CHARSET_INFO* cs = getCharset(col);
if (cs->strnncollsp(getStringPointer(col), getStringLength(col),
r2.getStringPointer(col), r2.getStringLength(col)))
{
return false;
}
}
else if (UNLIKELY(columnType == execplan::CalpontSystemCatalog::BLOB))
{
if (getStringLength(col) != r2.getStringLength(col))
return false;
if (memcmp(getStringPointer(col), r2.getStringPointer(col), getStringLength(col)))
return false;
}
else
{
if (UNLIKELY(columnType == execplan::CalpontSystemCatalog::LONGDOUBLE))
{
if (getLongDoubleField(col) != r2.getLongDoubleField(col))
return false;
}
else if (UNLIKELY(datatypes::isWideDecimalType(columnType, colWidths[col])))
{
if (*getBinaryField<int128_t>(col) != *r2.getBinaryField<int128_t>(col))
return false;
}
else if (getUintField(col) != r2.getUintField(col))
{
return false;
}
}
}
return true;
}
bool Row::equals(const Row& r2, uint32_t lastCol) const bool Row::equals(const Row& r2, uint32_t lastCol) const
{ {
// This check fires with empty r2 only. // This check fires with empty r2 only.
@ -1196,9 +1148,7 @@ bool Row::equals(const Row& r2, uint32_t lastCol) const
for (uint32_t col = 0; col <= lastCol; col++) for (uint32_t col = 0; col <= lastCol; col++)
{ {
cscDataType columnType = getColType(col); cscDataType columnType = getColType(col);
if (UNLIKELY(columnType == execplan::CalpontSystemCatalog::VARCHAR || if (UNLIKELY(typeHasCollation(columnType)))
(columnType == execplan::CalpontSystemCatalog::CHAR && (colWidths[col] > 1)) ||
columnType == execplan::CalpontSystemCatalog::TEXT))
{ {
CHARSET_INFO* cs = getCharset(col); CHARSET_INFO* cs = getCharset(col);
if (cs->strnncollsp(getStringPointer(col), getStringLength(col), if (cs->strnncollsp(getStringPointer(col), getStringLength(col),
@ -1304,10 +1254,7 @@ RowGroup::RowGroup(uint32_t colCount,
else else
stOffsets[i + 1] = stOffsets[i] + colWidths[i]; stOffsets[i + 1] = stOffsets[i] + colWidths[i];
execplan::CalpontSystemCatalog::ColDataType type = types[i]; if (colHasCollation(i))
if ((type == execplan::CalpontSystemCatalog::CHAR && (colWidths[i] > 1)) ||
type == execplan::CalpontSystemCatalog::VARCHAR ||
type == execplan::CalpontSystemCatalog::TEXT)
{ {
hasCollation = true; hasCollation = true;
} }
@ -1916,10 +1863,7 @@ RowGroup RowGroup::truncate(uint32_t cols)
ret.hasLongStringField = true; ret.hasLongStringField = true;
} }
execplan::CalpontSystemCatalog::ColDataType type = types[i]; if (colHasCollation(i))
if ((type == execplan::CalpontSystemCatalog::CHAR && (colWidths[i] > 1)) ||
type == execplan::CalpontSystemCatalog::VARCHAR ||
type == execplan::CalpontSystemCatalog::TEXT)
{ {
ret.hasCollation = true; ret.hasCollation = true;
} }

View File

@ -372,6 +372,11 @@ public:
inline bool isShortString(uint32_t colIndex) const; inline bool isShortString(uint32_t colIndex) const;
inline bool isLongString(uint32_t colIndex) const; inline bool isLongString(uint32_t colIndex) const;
bool colHasCollation(uint32_t colIndex) const
{
return datatypes::typeHasCollation(getColType(colIndex));
}
template<int len> inline uint64_t getUintField(uint32_t colIndex) const; template<int len> inline uint64_t getUintField(uint32_t colIndex) const;
inline uint64_t getUintField(uint32_t colIndex) const; inline uint64_t getUintField(uint32_t colIndex) const;
template<int len> inline int64_t getIntField(uint32_t colIndex) const; template<int len> inline int64_t getIntField(uint32_t colIndex) const;
@ -555,7 +560,6 @@ public:
inline uint64_t hash() const; // generates a hash for all cols inline uint64_t hash() const; // generates a hash for all cols
inline void colUpdateMariaDBHasher(datatypes::MariaDBHasher &hasher, uint32_t col) const; inline void colUpdateMariaDBHasher(datatypes::MariaDBHasher &hasher, uint32_t col) const;
bool equals(const Row&, const std::vector<uint32_t>& keyColumns) const;
bool equals(const Row&, uint32_t lastCol) const; bool equals(const Row&, uint32_t lastCol) const;
inline bool equals(const Row&) const; inline bool equals(const Row&) const;
@ -1508,6 +1512,11 @@ public:
inline bool isShortString(uint32_t colIndex) const; inline bool isShortString(uint32_t colIndex) const;
inline bool isLongString(uint32_t colIndex) const; inline bool isLongString(uint32_t colIndex) const;
bool colHasCollation(uint32_t colIndex) const
{
return datatypes::typeHasCollation(getColType(colIndex));
}
inline const std::vector<uint32_t>& getScale() const; inline const std::vector<uint32_t>& getScale() const;
inline const std::vector<uint32_t>& getPrecision() const; inline const std::vector<uint32_t>& getPrecision() const;