diff --git a/dbcon/execplan/aggregatecolumn.cpp b/dbcon/execplan/aggregatecolumn.cpp index a715de6f2..2546e9f18 100644 --- a/dbcon/execplan/aggregatecolumn.cpp +++ b/dbcon/execplan/aggregatecolumn.cpp @@ -586,7 +586,16 @@ void AggregateColumn::evaluate(Row& row, bool& isNull) break; case CalpontSystemCatalog::VARBINARY: - case CalpontSystemCatalog::BLOB: isNull = true; break; + case CalpontSystemCatalog::BLOB: + { + auto const str = row.getConstString(fInputIndex); + fResult.strVal.dropString(); + if (!str.isNull()) + fResult.strVal.assign((const uint8_t*)str.str(), str.length()); + + isNull = isNull || fResult.strVal.isNull(); + } + break; default: // treat as int64 if (row.equals<8>(BIGINTNULL, fInputIndex)) diff --git a/mysql-test/columnstore/basic/r/mcs35_select_group_by.result b/mysql-test/columnstore/basic/r/mcs35_select_group_by.result index eb7dbc28b..7cf594ab5 100644 --- a/mysql-test/columnstore/basic/r/mcs35_select_group_by.result +++ b/mysql-test/columnstore/basic/r/mcs35_select_group_by.result @@ -86,7 +86,11 @@ COUNT(*) 9 SELECT COUNT(*) FROM t1 GROUP BY t1_blob; COUNT(*) -14 +1 +1 +3 +3 +6 SELECT COUNT(*) FROM t1 GROUP BY t1_text; COUNT(*) 1 diff --git a/mysql-test/columnstore/bugfixes/MCOL-5755-incorrect-group-by-handling-of-BLOB.result b/mysql-test/columnstore/bugfixes/MCOL-5755-incorrect-group-by-handling-of-BLOB.result new file mode 100644 index 000000000..944999a55 --- /dev/null +++ b/mysql-test/columnstore/bugfixes/MCOL-5755-incorrect-group-by-handling-of-BLOB.result @@ -0,0 +1,29 @@ +DROP DATABASE IF EXISTS MCOL5755; +CREATE DATABASE MCOL5755; +USE MCOL5755; +CREATE TABLE t1 (t TEXT,c CHAR(10),b BLOB) ENGINE = columnstore; +INSERT INTO t1 VALUES (NULL,NULL,NULL); +INSERT INTO t1 VALUES ("","",""); +INSERT INTO t1 VALUES ("hello","hello","hello"); +INSERT INTO t1 VALUES ("HELLO","HELLO","HELLO"); +INSERT INTO t1 VALUES ("HELLO MY","HELLO MY","HELLO MY"); +INSERT INTO t1 VALUES ("a","a","a"); +INSERT INTO t1 VALUES (1,1,1); +SELECT b FROM t1 GROUP BY b; +b + +1 +HELLO +HELLO MY +NULL +a +hello +SELECT t FROM t1 GROUP BY t; +t + +1 +HELLO MY +NULL +a +hello +DROP DATABASE MCOL5755; diff --git a/mysql-test/columnstore/bugfixes/MCOL-5755-incorrect-group-by-handling-of-BLOB.test b/mysql-test/columnstore/bugfixes/MCOL-5755-incorrect-group-by-handling-of-BLOB.test new file mode 100644 index 000000000..ff0160be6 --- /dev/null +++ b/mysql-test/columnstore/bugfixes/MCOL-5755-incorrect-group-by-handling-of-BLOB.test @@ -0,0 +1,18 @@ +--disable_warnings +DROP DATABASE IF EXISTS MCOL5755; +--enable_warnings +CREATE DATABASE MCOL5755; +USE MCOL5755; +CREATE TABLE t1 (t TEXT,c CHAR(10),b BLOB) ENGINE = columnstore; +INSERT INTO t1 VALUES (NULL,NULL,NULL); +INSERT INTO t1 VALUES ("","",""); +INSERT INTO t1 VALUES ("hello","hello","hello"); +INSERT INTO t1 VALUES ("HELLO","HELLO","HELLO"); +INSERT INTO t1 VALUES ("HELLO MY","HELLO MY","HELLO MY"); +INSERT INTO t1 VALUES ("a","a","a"); +INSERT INTO t1 VALUES (1,1,1); +--sorted_result +SELECT b FROM t1 GROUP BY b; +--sorted_result +SELECT t FROM t1 GROUP BY t; +DROP DATABASE MCOL5755; diff --git a/utils/common/conststring.h b/utils/common/conststring.h index 021529bf5..103b84a1c 100644 --- a/utils/common/conststring.h +++ b/utils/common/conststring.h @@ -81,7 +81,7 @@ class ConstString } bool eq(const ConstString& rhs) const { - if (!mStr) + if (!mStr || !rhs.mStr) { return mStr == rhs.mStr; } diff --git a/utils/rowgroup/rowaggregation.cpp b/utils/rowgroup/rowaggregation.cpp index f2d5ec2a8..b68aad4f8 100644 --- a/utils/rowgroup/rowaggregation.cpp +++ b/utils/rowgroup/rowaggregation.cpp @@ -953,6 +953,10 @@ void RowAggregation::initMapData(const Row& rowIn) break; } + case execplan::CalpontSystemCatalog::VARBINARY: + case execplan::CalpontSystemCatalog::BLOB: + fRow.setStringField(rowIn.getConstString(colIn), colOut); + break; case execplan::CalpontSystemCatalog::DOUBLE: case execplan::CalpontSystemCatalog::UDOUBLE: @@ -1304,6 +1308,8 @@ void RowAggregation::doSelectSome(const Row& rowIn, int64_t colIn, int64_t colOu case execplan::CalpontSystemCatalog::CHAR: case execplan::CalpontSystemCatalog::VARCHAR: case execplan::CalpontSystemCatalog::TEXT: + case execplan::CalpontSystemCatalog::CLOB: + case execplan::CalpontSystemCatalog::BLOB: { auto valIn = rowIn.getStringField(colIn); fRow.setStringField(valIn, colOut); @@ -1343,12 +1349,6 @@ void RowAggregation::doSelectSome(const Row& rowIn, int64_t colIn, int64_t colOu break; } - case execplan::CalpontSystemCatalog::CLOB: - case execplan::CalpontSystemCatalog::BLOB: - { - fRow.setVarBinaryField(rowIn.getVarBinaryField(colIn), rowIn.getVarBinaryLength(colIn), colOut); - break; - } default: { idbassert_s(0, "unknown data type in doSelectSome()"); diff --git a/utils/rowgroup/rowgroup.cpp b/utils/rowgroup/rowgroup.cpp index 5d05b9cdc..b8f835d7f 100644 --- a/utils/rowgroup/rowgroup.cpp +++ b/utils/rowgroup/rowgroup.cpp @@ -977,8 +977,14 @@ bool Row::equals(const Row& r2, uint32_t lastCol) const cscDataType columnType = getColType(col); if (UNLIKELY(typeHasCollation(columnType))) { + auto c1 = getConstString(col); + auto c2 = r2.getConstString(col); + if (c1.isNull() != c2.isNull()) + { + return false; + } datatypes::Charset cs(getCharset(col)); - if (cs.strnncollsp(getConstString(col), r2.getConstString(col))) + if (cs.strnncollsp(c1, c2)) { return false; } diff --git a/utils/rowgroup/rowgroup.h b/utils/rowgroup/rowgroup.h index 41af8a0a8..113f0ae6e 100644 --- a/utils/rowgroup/rowgroup.h +++ b/utils/rowgroup/rowgroup.h @@ -507,6 +507,9 @@ class Row inline void setBinaryField(const T* value, uint32_t colIndex); template inline void setBinaryField_offset(const T* value, uint32_t width, uint32_t colIndex); + // XXX: TODO: I'd deprecate these two functions in favor of get/setStringField. + // getSetStringField properly support binary data of up to 4G bytes + // and also provide perfomant interface through use of ConstString. // support VARBINARY // Add 2-byte length at the CHARSET_INFO*beginning of the field. nullptr and zero length field are // treated the same, could use one of the length bit to distinguish these two cases.