1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

fix: MCOL-5755: incorrect handling of BLOB (and TEXT) in GROUP BY

BLOB fields did not work as grouping keys at all, they were assigned
value NULL for any value, be it NULL or not. The fix is in the
rowaggregation.cpp in the initMapping(), a switch/case branch was added
to handle BLOB field copying there.

Also, TEXT columns did not distinguish between NULL and empty string in
the grouping algorithm, now they do. The fix is in the equals()
function, now we specifically check for isNull() equality between
values.
This commit is contained in:
Serguey Zefirov
2024-07-09 10:29:08 +03:00
committed by Sergey Zefirov
parent f5089c7d80
commit 6e995e2e80
8 changed files with 79 additions and 10 deletions

View File

@ -586,7 +586,16 @@ void AggregateColumn::evaluate(Row& row, bool& isNull)
break;
case CalpontSystemCatalog::VARBINARY:
case CalpontSystemCatalog::BLOB: isNull = true; break;
case CalpontSystemCatalog::BLOB:
{
auto const str = row.getConstString(fInputIndex);
fResult.strVal.dropString();
if (!str.isNull())
fResult.strVal.assign((const uint8_t*)str.str(), str.length());
isNull = isNull || fResult.strVal.isNull();
}
break;
default: // treat as int64
if (row.equals<8>(BIGINTNULL, fInputIndex))

View File

@ -86,7 +86,11 @@ COUNT(*)
9
SELECT COUNT(*) FROM t1 GROUP BY t1_blob;
COUNT(*)
14
1
1
3
3
6
SELECT COUNT(*) FROM t1 GROUP BY t1_text;
COUNT(*)
1

View File

@ -0,0 +1,29 @@
DROP DATABASE IF EXISTS MCOL5755;
CREATE DATABASE MCOL5755;
USE MCOL5755;
CREATE TABLE t1 (t TEXT,c CHAR(10),b BLOB) ENGINE = columnstore;
INSERT INTO t1 VALUES (NULL,NULL,NULL);
INSERT INTO t1 VALUES ("","","");
INSERT INTO t1 VALUES ("hello","hello","hello");
INSERT INTO t1 VALUES ("HELLO","HELLO","HELLO");
INSERT INTO t1 VALUES ("HELLO MY","HELLO MY","HELLO MY");
INSERT INTO t1 VALUES ("a","a","a");
INSERT INTO t1 VALUES (1,1,1);
SELECT b FROM t1 GROUP BY b;
b
1
HELLO
HELLO MY
NULL
a
hello
SELECT t FROM t1 GROUP BY t;
t
1
HELLO MY
NULL
a
hello
DROP DATABASE MCOL5755;

View File

@ -0,0 +1,18 @@
--disable_warnings
DROP DATABASE IF EXISTS MCOL5755;
--enable_warnings
CREATE DATABASE MCOL5755;
USE MCOL5755;
CREATE TABLE t1 (t TEXT,c CHAR(10),b BLOB) ENGINE = columnstore;
INSERT INTO t1 VALUES (NULL,NULL,NULL);
INSERT INTO t1 VALUES ("","","");
INSERT INTO t1 VALUES ("hello","hello","hello");
INSERT INTO t1 VALUES ("HELLO","HELLO","HELLO");
INSERT INTO t1 VALUES ("HELLO MY","HELLO MY","HELLO MY");
INSERT INTO t1 VALUES ("a","a","a");
INSERT INTO t1 VALUES (1,1,1);
--sorted_result
SELECT b FROM t1 GROUP BY b;
--sorted_result
SELECT t FROM t1 GROUP BY t;
DROP DATABASE MCOL5755;

View File

@ -81,7 +81,7 @@ class ConstString
}
bool eq(const ConstString& rhs) const
{
if (!mStr)
if (!mStr || !rhs.mStr)
{
return mStr == rhs.mStr;
}

View File

@ -953,6 +953,10 @@ void RowAggregation::initMapData(const Row& rowIn)
break;
}
case execplan::CalpontSystemCatalog::VARBINARY:
case execplan::CalpontSystemCatalog::BLOB:
fRow.setStringField(rowIn.getConstString(colIn), colOut);
break;
case execplan::CalpontSystemCatalog::DOUBLE:
case execplan::CalpontSystemCatalog::UDOUBLE:
@ -1304,6 +1308,8 @@ void RowAggregation::doSelectSome(const Row& rowIn, int64_t colIn, int64_t colOu
case execplan::CalpontSystemCatalog::CHAR:
case execplan::CalpontSystemCatalog::VARCHAR:
case execplan::CalpontSystemCatalog::TEXT:
case execplan::CalpontSystemCatalog::CLOB:
case execplan::CalpontSystemCatalog::BLOB:
{
auto valIn = rowIn.getStringField(colIn);
fRow.setStringField(valIn, colOut);
@ -1343,12 +1349,6 @@ void RowAggregation::doSelectSome(const Row& rowIn, int64_t colIn, int64_t colOu
break;
}
case execplan::CalpontSystemCatalog::CLOB:
case execplan::CalpontSystemCatalog::BLOB:
{
fRow.setVarBinaryField(rowIn.getVarBinaryField(colIn), rowIn.getVarBinaryLength(colIn), colOut);
break;
}
default:
{
idbassert_s(0, "unknown data type in doSelectSome()");

View File

@ -977,8 +977,14 @@ bool Row::equals(const Row& r2, uint32_t lastCol) const
cscDataType columnType = getColType(col);
if (UNLIKELY(typeHasCollation(columnType)))
{
auto c1 = getConstString(col);
auto c2 = r2.getConstString(col);
if (c1.isNull() != c2.isNull())
{
return false;
}
datatypes::Charset cs(getCharset(col));
if (cs.strnncollsp(getConstString(col), r2.getConstString(col)))
if (cs.strnncollsp(c1, c2))
{
return false;
}

View File

@ -507,6 +507,9 @@ class Row
inline void setBinaryField(const T* value, uint32_t colIndex);
template <typename T>
inline void setBinaryField_offset(const T* value, uint32_t width, uint32_t colIndex);
// XXX: TODO: I'd deprecate these two functions in favor of get/setStringField.
// getSetStringField properly support binary data of up to 4G bytes
// and also provide perfomant interface through use of ConstString.
// support VARBINARY
// Add 2-byte length at the CHARSET_INFO*beginning of the field. nullptr and zero length field are
// treated the same, could use one of the length bit to distinguish these two cases.