You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
MCOL-271 empty strings should not be NULLs (#2794)
This patch improves handling of NULLs in textual fields in ColumnStore. Previously empty strings were considered NULLs and it could be a problem if data scheme allows for empty strings. It was also one of major reasons of behavior difference between ColumnStore and other engines in MariaDB family. Also, this patch fixes some other bugs and incorrect behavior, for example, incorrect comparison for "column <= ''" which evaluates to constant True for all purposes before this patch.
This commit is contained in:
committed by
Roman Nozdrin
parent
0ea592da80
commit
b53c231ca6
@ -791,7 +791,9 @@ void WriteEngineWrapper::convertValue(const CalpontSystemCatalog::ColType& cscCo
|
||||
|
||||
case WriteEngine::WR_ULONGLONG: ((uint64_t*)valArray)[pos] = boost::any_cast<uint64_t>(data); break;
|
||||
|
||||
case WriteEngine::WR_TOKEN: ((Token*)valArray)[pos] = boost::any_cast<Token>(data); break;
|
||||
case WriteEngine::WR_TOKEN:
|
||||
((Token*)valArray)[pos] = boost::any_cast<Token>(data);
|
||||
break;
|
||||
|
||||
case WriteEngine::WR_BINARY:
|
||||
size_t size = cscColType.colWidth;
|
||||
@ -1714,7 +1716,7 @@ int WriteEngineWrapper::insertColumnRecs(
|
||||
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||
int64_t strPrefix;
|
||||
#endif
|
||||
if (dctStr_iter->length() == 0)
|
||||
if (dctStr_iter->isNull())
|
||||
{
|
||||
Token nullToken;
|
||||
col_iter->data = nullToken;
|
||||
@ -1728,10 +1730,10 @@ int WriteEngineWrapper::insertColumnRecs(
|
||||
timer.start("tokenize");
|
||||
#endif
|
||||
DctnryTuple dctTuple;
|
||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
|
||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
|
||||
dctTuple.sigSize = dctStr_iter->length();
|
||||
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||
strPrefix = encodeStringPrefix_check_null(dctTuple.sigValue, dctTuple.sigSize,
|
||||
strPrefix = encodeStringPrefix(dctTuple.sigValue, dctTuple.sigSize,
|
||||
dctnryStructList[i].fCharsetNumber);
|
||||
#endif
|
||||
dctTuple.isNull = false;
|
||||
@ -1782,7 +1784,7 @@ int WriteEngineWrapper::insertColumnRecs(
|
||||
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||
int64_t strPrefix;
|
||||
#endif
|
||||
if (dctStr_iter->length() == 0)
|
||||
if (dctStr_iter->isNull())
|
||||
{
|
||||
Token nullToken;
|
||||
col_iter->data = nullToken;
|
||||
@ -1796,7 +1798,7 @@ int WriteEngineWrapper::insertColumnRecs(
|
||||
timer.start("tokenize");
|
||||
#endif
|
||||
DctnryTuple dctTuple;
|
||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
|
||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
|
||||
dctTuple.sigSize = dctStr_iter->length();
|
||||
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
|
||||
strPrefix = encodeStringPrefix_check_null(dctTuple.sigValue, dctTuple.sigSize,
|
||||
@ -2532,7 +2534,7 @@ int WriteEngineWrapper::insertColumnRecsBinary(
|
||||
{
|
||||
colValPtr = &colValueList[(i * rowsPerColumn) + rows];
|
||||
|
||||
if (dctStr_iter->length() == 0)
|
||||
if (dctStr_iter->isNull())
|
||||
{
|
||||
Token nullToken;
|
||||
memcpy(colValPtr, &nullToken, 8);
|
||||
@ -2543,7 +2545,7 @@ int WriteEngineWrapper::insertColumnRecsBinary(
|
||||
timer.start("tokenize");
|
||||
#endif
|
||||
DctnryTuple dctTuple;
|
||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
|
||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
|
||||
dctTuple.sigSize = dctStr_iter->length();
|
||||
dctTuple.isNull = false;
|
||||
rc = tokenize(txnid, dctTuple, dctnryStructList[i].fCompressionType);
|
||||
@ -2589,7 +2591,7 @@ int WriteEngineWrapper::insertColumnRecsBinary(
|
||||
{
|
||||
colValPtr = &colValueList[(i * rowsPerColumn) + rows];
|
||||
|
||||
if (dctStr_iter->length() == 0)
|
||||
if (dctStr_iter->isNull())
|
||||
{
|
||||
Token nullToken;
|
||||
memcpy(colValPtr, &nullToken, 8);
|
||||
@ -2600,7 +2602,7 @@ int WriteEngineWrapper::insertColumnRecsBinary(
|
||||
timer.start("tokenize");
|
||||
#endif
|
||||
DctnryTuple dctTuple;
|
||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
|
||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
|
||||
dctTuple.sigSize = dctStr_iter->length();
|
||||
dctTuple.isNull = false;
|
||||
rc = tokenize(txnid, dctTuple, newDctnryStructList[i].fCompressionType);
|
||||
@ -3107,7 +3109,7 @@ int WriteEngineWrapper::insertColumnRec_SYS(const TxnID& txnid, const CSCTypesLi
|
||||
|
||||
for (uint32_t rows = 0; rows < (totalRow - rowsLeft); rows++)
|
||||
{
|
||||
if (dctStr_iter->length() == 0)
|
||||
if (dctStr_iter->isNull())
|
||||
{
|
||||
Token nullToken;
|
||||
col_iter->data = nullToken;
|
||||
@ -3118,7 +3120,7 @@ int WriteEngineWrapper::insertColumnRec_SYS(const TxnID& txnid, const CSCTypesLi
|
||||
timer.start("tokenize");
|
||||
#endif
|
||||
DctnryTuple dctTuple;
|
||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
|
||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
|
||||
dctTuple.sigSize = dctStr_iter->length();
|
||||
dctTuple.isNull = false;
|
||||
rc = tokenize(txnid, dctTuple, dctnryStructList[i].fCompressionType);
|
||||
@ -3181,7 +3183,7 @@ int WriteEngineWrapper::insertColumnRec_SYS(const TxnID& txnid, const CSCTypesLi
|
||||
|
||||
for (uint32_t rows = 0; rows < rowsLeft; rows++)
|
||||
{
|
||||
if (dctStr_iter->length() == 0)
|
||||
if (dctStr_iter->isNull())
|
||||
{
|
||||
Token nullToken;
|
||||
col_iter->data = nullToken;
|
||||
@ -3192,7 +3194,7 @@ int WriteEngineWrapper::insertColumnRec_SYS(const TxnID& txnid, const CSCTypesLi
|
||||
timer.start("tokenize");
|
||||
#endif
|
||||
DctnryTuple dctTuple;
|
||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
|
||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
|
||||
dctTuple.sigSize = dctStr_iter->length();
|
||||
dctTuple.isNull = false;
|
||||
rc = tokenize(txnid, dctTuple, newDctnryStructList[i].fCompressionType);
|
||||
@ -3769,7 +3771,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, const CSCType
|
||||
|
||||
for (uint32_t rows = 0; rows < (totalRow - rowsLeft); rows++)
|
||||
{
|
||||
if (dctStr_iter->length() == 0)
|
||||
if (dctStr_iter->isNull())
|
||||
{
|
||||
Token nullToken;
|
||||
col_iter->data = nullToken;
|
||||
@ -3780,7 +3782,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, const CSCType
|
||||
timer.start("tokenize");
|
||||
#endif
|
||||
DctnryTuple dctTuple;
|
||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
|
||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
|
||||
dctTuple.sigSize = dctStr_iter->length();
|
||||
dctTuple.isNull = false;
|
||||
rc = tokenize(txnid, dctTuple, dctnryStructList[i].fCompressionType);
|
||||
@ -3844,7 +3846,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, const CSCType
|
||||
|
||||
for (uint32_t rows = 0; rows < rowsLeft; rows++)
|
||||
{
|
||||
if (dctStr_iter->length() == 0)
|
||||
if (dctStr_iter->isNull())
|
||||
{
|
||||
Token nullToken;
|
||||
col_iter->data = nullToken;
|
||||
@ -3855,7 +3857,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, const CSCType
|
||||
timer.start("tokenize");
|
||||
#endif
|
||||
DctnryTuple dctTuple;
|
||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
|
||||
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
|
||||
dctTuple.sigSize = dctStr_iter->length();
|
||||
dctTuple.isNull = false;
|
||||
rc = tokenize(txnid, dctTuple, newDctnryStructList[i].fCompressionType);
|
||||
@ -4218,7 +4220,7 @@ void WriteEngineWrapper::printInputValue(const ColStructList& colStructList, con
|
||||
{
|
||||
// We presume there will be a value.
|
||||
auto tokenOidIdx = oidToIdxMap[dctnryStructList[i].columnOid];
|
||||
std::cerr << "string [" << dictStrList[i][j] << "]" << std::endl;
|
||||
std::cerr << "string [" << dictStrList[i][j].safeString("<<null>>") << "]" << std::endl;
|
||||
bool isToken = colStructList[tokenOidIdx].colType == WriteEngine::WR_TOKEN &&
|
||||
colStructList[tokenOidIdx].tokenFlag;
|
||||
if (isToken && !colValueList[tokenOidIdx][j].data.empty())
|
||||
|
Reference in New Issue
Block a user