1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-271 empty strings should not be NULLs (#2794)

This patch improves handling of NULLs in textual fields in ColumnStore.
Previously empty strings were considered NULLs and it could be a problem
if data scheme allows for empty strings. It was also one of major
reasons of behavior difference between ColumnStore and other engines in
MariaDB family.

Also, this patch fixes some other bugs and incorrect behavior, for
example, incorrect comparison for "column <= ''" which evaluates to
constant True for all purposes before this patch.
This commit is contained in:
Sergey Zefirov
2023-03-30 17:26:45 +01:00
committed by Roman Nozdrin
parent 0ea592da80
commit b53c231ca6
417 changed files with 12459 additions and 3520 deletions

View File

@ -791,7 +791,9 @@ void WriteEngineWrapper::convertValue(const CalpontSystemCatalog::ColType& cscCo
case WriteEngine::WR_ULONGLONG: ((uint64_t*)valArray)[pos] = boost::any_cast<uint64_t>(data); break;
case WriteEngine::WR_TOKEN: ((Token*)valArray)[pos] = boost::any_cast<Token>(data); break;
case WriteEngine::WR_TOKEN:
((Token*)valArray)[pos] = boost::any_cast<Token>(data);
break;
case WriteEngine::WR_BINARY:
size_t size = cscColType.colWidth;
@ -1714,7 +1716,7 @@ int WriteEngineWrapper::insertColumnRecs(
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
int64_t strPrefix;
#endif
if (dctStr_iter->length() == 0)
if (dctStr_iter->isNull())
{
Token nullToken;
col_iter->data = nullToken;
@ -1728,10 +1730,10 @@ int WriteEngineWrapper::insertColumnRecs(
timer.start("tokenize");
#endif
DctnryTuple dctTuple;
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
dctTuple.sigSize = dctStr_iter->length();
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
strPrefix = encodeStringPrefix_check_null(dctTuple.sigValue, dctTuple.sigSize,
strPrefix = encodeStringPrefix(dctTuple.sigValue, dctTuple.sigSize,
dctnryStructList[i].fCharsetNumber);
#endif
dctTuple.isNull = false;
@ -1782,7 +1784,7 @@ int WriteEngineWrapper::insertColumnRecs(
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
int64_t strPrefix;
#endif
if (dctStr_iter->length() == 0)
if (dctStr_iter->isNull())
{
Token nullToken;
col_iter->data = nullToken;
@ -1796,7 +1798,7 @@ int WriteEngineWrapper::insertColumnRecs(
timer.start("tokenize");
#endif
DctnryTuple dctTuple;
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
dctTuple.sigSize = dctStr_iter->length();
#if defined(XXX_WRITEENGINE_TOKENS_RANGES_XXX)
strPrefix = encodeStringPrefix_check_null(dctTuple.sigValue, dctTuple.sigSize,
@ -2532,7 +2534,7 @@ int WriteEngineWrapper::insertColumnRecsBinary(
{
colValPtr = &colValueList[(i * rowsPerColumn) + rows];
if (dctStr_iter->length() == 0)
if (dctStr_iter->isNull())
{
Token nullToken;
memcpy(colValPtr, &nullToken, 8);
@ -2543,7 +2545,7 @@ int WriteEngineWrapper::insertColumnRecsBinary(
timer.start("tokenize");
#endif
DctnryTuple dctTuple;
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
dctTuple.sigSize = dctStr_iter->length();
dctTuple.isNull = false;
rc = tokenize(txnid, dctTuple, dctnryStructList[i].fCompressionType);
@ -2589,7 +2591,7 @@ int WriteEngineWrapper::insertColumnRecsBinary(
{
colValPtr = &colValueList[(i * rowsPerColumn) + rows];
if (dctStr_iter->length() == 0)
if (dctStr_iter->isNull())
{
Token nullToken;
memcpy(colValPtr, &nullToken, 8);
@ -2600,7 +2602,7 @@ int WriteEngineWrapper::insertColumnRecsBinary(
timer.start("tokenize");
#endif
DctnryTuple dctTuple;
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
dctTuple.sigSize = dctStr_iter->length();
dctTuple.isNull = false;
rc = tokenize(txnid, dctTuple, newDctnryStructList[i].fCompressionType);
@ -3107,7 +3109,7 @@ int WriteEngineWrapper::insertColumnRec_SYS(const TxnID& txnid, const CSCTypesLi
for (uint32_t rows = 0; rows < (totalRow - rowsLeft); rows++)
{
if (dctStr_iter->length() == 0)
if (dctStr_iter->isNull())
{
Token nullToken;
col_iter->data = nullToken;
@ -3118,7 +3120,7 @@ int WriteEngineWrapper::insertColumnRec_SYS(const TxnID& txnid, const CSCTypesLi
timer.start("tokenize");
#endif
DctnryTuple dctTuple;
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
dctTuple.sigSize = dctStr_iter->length();
dctTuple.isNull = false;
rc = tokenize(txnid, dctTuple, dctnryStructList[i].fCompressionType);
@ -3181,7 +3183,7 @@ int WriteEngineWrapper::insertColumnRec_SYS(const TxnID& txnid, const CSCTypesLi
for (uint32_t rows = 0; rows < rowsLeft; rows++)
{
if (dctStr_iter->length() == 0)
if (dctStr_iter->isNull())
{
Token nullToken;
col_iter->data = nullToken;
@ -3192,7 +3194,7 @@ int WriteEngineWrapper::insertColumnRec_SYS(const TxnID& txnid, const CSCTypesLi
timer.start("tokenize");
#endif
DctnryTuple dctTuple;
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
dctTuple.sigSize = dctStr_iter->length();
dctTuple.isNull = false;
rc = tokenize(txnid, dctTuple, newDctnryStructList[i].fCompressionType);
@ -3769,7 +3771,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, const CSCType
for (uint32_t rows = 0; rows < (totalRow - rowsLeft); rows++)
{
if (dctStr_iter->length() == 0)
if (dctStr_iter->isNull())
{
Token nullToken;
col_iter->data = nullToken;
@ -3780,7 +3782,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, const CSCType
timer.start("tokenize");
#endif
DctnryTuple dctTuple;
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
dctTuple.sigSize = dctStr_iter->length();
dctTuple.isNull = false;
rc = tokenize(txnid, dctTuple, dctnryStructList[i].fCompressionType);
@ -3844,7 +3846,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, const CSCType
for (uint32_t rows = 0; rows < rowsLeft; rows++)
{
if (dctStr_iter->length() == 0)
if (dctStr_iter->isNull())
{
Token nullToken;
col_iter->data = nullToken;
@ -3855,7 +3857,7 @@ int WriteEngineWrapper::insertColumnRec_Single(const TxnID& txnid, const CSCType
timer.start("tokenize");
#endif
DctnryTuple dctTuple;
dctTuple.sigValue = (unsigned char*)dctStr_iter->c_str();
dctTuple.sigValue = (unsigned char*)dctStr_iter->str();
dctTuple.sigSize = dctStr_iter->length();
dctTuple.isNull = false;
rc = tokenize(txnid, dctTuple, newDctnryStructList[i].fCompressionType);
@ -4218,7 +4220,7 @@ void WriteEngineWrapper::printInputValue(const ColStructList& colStructList, con
{
// We presume there will be a value.
auto tokenOidIdx = oidToIdxMap[dctnryStructList[i].columnOid];
std::cerr << "string [" << dictStrList[i][j] << "]" << std::endl;
std::cerr << "string [" << dictStrList[i][j].safeString("<<null>>") << "]" << std::endl;
bool isToken = colStructList[tokenOidIdx].colType == WriteEngine::WR_TOKEN &&
colStructList[tokenOidIdx].tokenFlag;
if (isToken && !colValueList[tokenOidIdx][j].data.empty())