You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
fix: MCOL-5755: incorrect handling of BLOB (and TEXT) in GROUP BY
BLOB fields did not work as grouping keys at all, they were assigned value NULL for any value, be it NULL or not. The fix is in the rowaggregation.cpp in the initMapping(), a switch/case branch was added to handle BLOB field copying there. Also, TEXT columns did not distinguish between NULL and empty string in the grouping algorithm, now they do. The fix is in the equals() function, now we specifically check for isNull() equality between values.
This commit is contained in:
committed by
Sergey Zefirov
parent
f5089c7d80
commit
6e995e2e80
@ -586,7 +586,16 @@ void AggregateColumn::evaluate(Row& row, bool& isNull)
|
||||
break;
|
||||
|
||||
case CalpontSystemCatalog::VARBINARY:
|
||||
case CalpontSystemCatalog::BLOB: isNull = true; break;
|
||||
case CalpontSystemCatalog::BLOB:
|
||||
{
|
||||
auto const str = row.getConstString(fInputIndex);
|
||||
fResult.strVal.dropString();
|
||||
if (!str.isNull())
|
||||
fResult.strVal.assign((const uint8_t*)str.str(), str.length());
|
||||
|
||||
isNull = isNull || fResult.strVal.isNull();
|
||||
}
|
||||
break;
|
||||
|
||||
default: // treat as int64
|
||||
if (row.equals<8>(BIGINTNULL, fInputIndex))
|
||||
|
@ -86,7 +86,11 @@ COUNT(*)
|
||||
9
|
||||
SELECT COUNT(*) FROM t1 GROUP BY t1_blob;
|
||||
COUNT(*)
|
||||
14
|
||||
1
|
||||
1
|
||||
3
|
||||
3
|
||||
6
|
||||
SELECT COUNT(*) FROM t1 GROUP BY t1_text;
|
||||
COUNT(*)
|
||||
1
|
||||
|
@ -0,0 +1,29 @@
|
||||
DROP DATABASE IF EXISTS MCOL5755;
|
||||
CREATE DATABASE MCOL5755;
|
||||
USE MCOL5755;
|
||||
CREATE TABLE t1 (t TEXT,c CHAR(10),b BLOB) ENGINE = columnstore;
|
||||
INSERT INTO t1 VALUES (NULL,NULL,NULL);
|
||||
INSERT INTO t1 VALUES ("","","");
|
||||
INSERT INTO t1 VALUES ("hello","hello","hello");
|
||||
INSERT INTO t1 VALUES ("HELLO","HELLO","HELLO");
|
||||
INSERT INTO t1 VALUES ("HELLO MY","HELLO MY","HELLO MY");
|
||||
INSERT INTO t1 VALUES ("a","a","a");
|
||||
INSERT INTO t1 VALUES (1,1,1);
|
||||
SELECT b FROM t1 GROUP BY b;
|
||||
b
|
||||
|
||||
1
|
||||
HELLO
|
||||
HELLO MY
|
||||
NULL
|
||||
a
|
||||
hello
|
||||
SELECT t FROM t1 GROUP BY t;
|
||||
t
|
||||
|
||||
1
|
||||
HELLO MY
|
||||
NULL
|
||||
a
|
||||
hello
|
||||
DROP DATABASE MCOL5755;
|
@ -0,0 +1,18 @@
|
||||
--disable_warnings
|
||||
DROP DATABASE IF EXISTS MCOL5755;
|
||||
--enable_warnings
|
||||
CREATE DATABASE MCOL5755;
|
||||
USE MCOL5755;
|
||||
CREATE TABLE t1 (t TEXT,c CHAR(10),b BLOB) ENGINE = columnstore;
|
||||
INSERT INTO t1 VALUES (NULL,NULL,NULL);
|
||||
INSERT INTO t1 VALUES ("","","");
|
||||
INSERT INTO t1 VALUES ("hello","hello","hello");
|
||||
INSERT INTO t1 VALUES ("HELLO","HELLO","HELLO");
|
||||
INSERT INTO t1 VALUES ("HELLO MY","HELLO MY","HELLO MY");
|
||||
INSERT INTO t1 VALUES ("a","a","a");
|
||||
INSERT INTO t1 VALUES (1,1,1);
|
||||
--sorted_result
|
||||
SELECT b FROM t1 GROUP BY b;
|
||||
--sorted_result
|
||||
SELECT t FROM t1 GROUP BY t;
|
||||
DROP DATABASE MCOL5755;
|
@ -81,7 +81,7 @@ class ConstString
|
||||
}
|
||||
bool eq(const ConstString& rhs) const
|
||||
{
|
||||
if (!mStr)
|
||||
if (!mStr || !rhs.mStr)
|
||||
{
|
||||
return mStr == rhs.mStr;
|
||||
}
|
||||
|
@ -953,6 +953,10 @@ void RowAggregation::initMapData(const Row& rowIn)
|
||||
|
||||
break;
|
||||
}
|
||||
case execplan::CalpontSystemCatalog::VARBINARY:
|
||||
case execplan::CalpontSystemCatalog::BLOB:
|
||||
fRow.setStringField(rowIn.getConstString(colIn), colOut);
|
||||
break;
|
||||
|
||||
case execplan::CalpontSystemCatalog::DOUBLE:
|
||||
case execplan::CalpontSystemCatalog::UDOUBLE:
|
||||
@ -1304,6 +1308,8 @@ void RowAggregation::doSelectSome(const Row& rowIn, int64_t colIn, int64_t colOu
|
||||
case execplan::CalpontSystemCatalog::CHAR:
|
||||
case execplan::CalpontSystemCatalog::VARCHAR:
|
||||
case execplan::CalpontSystemCatalog::TEXT:
|
||||
case execplan::CalpontSystemCatalog::CLOB:
|
||||
case execplan::CalpontSystemCatalog::BLOB:
|
||||
{
|
||||
auto valIn = rowIn.getStringField(colIn);
|
||||
fRow.setStringField(valIn, colOut);
|
||||
@ -1343,12 +1349,6 @@ void RowAggregation::doSelectSome(const Row& rowIn, int64_t colIn, int64_t colOu
|
||||
break;
|
||||
}
|
||||
|
||||
case execplan::CalpontSystemCatalog::CLOB:
|
||||
case execplan::CalpontSystemCatalog::BLOB:
|
||||
{
|
||||
fRow.setVarBinaryField(rowIn.getVarBinaryField(colIn), rowIn.getVarBinaryLength(colIn), colOut);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
{
|
||||
idbassert_s(0, "unknown data type in doSelectSome()");
|
||||
|
@ -977,8 +977,14 @@ bool Row::equals(const Row& r2, uint32_t lastCol) const
|
||||
cscDataType columnType = getColType(col);
|
||||
if (UNLIKELY(typeHasCollation(columnType)))
|
||||
{
|
||||
auto c1 = getConstString(col);
|
||||
auto c2 = r2.getConstString(col);
|
||||
if (c1.isNull() != c2.isNull())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
datatypes::Charset cs(getCharset(col));
|
||||
if (cs.strnncollsp(getConstString(col), r2.getConstString(col)))
|
||||
if (cs.strnncollsp(c1, c2))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
@ -507,6 +507,9 @@ class Row
|
||||
inline void setBinaryField(const T* value, uint32_t colIndex);
|
||||
template <typename T>
|
||||
inline void setBinaryField_offset(const T* value, uint32_t width, uint32_t colIndex);
|
||||
// XXX: TODO: I'd deprecate these two functions in favor of get/setStringField.
|
||||
// getSetStringField properly support binary data of up to 4G bytes
|
||||
// and also provide perfomant interface through use of ConstString.
|
||||
// support VARBINARY
|
||||
// Add 2-byte length at the CHARSET_INFO*beginning of the field. nullptr and zero length field are
|
||||
// treated the same, could use one of the length bit to distinguish these two cases.
|
||||
|
Reference in New Issue
Block a user