1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-641 atoi128 now correctly processes decimal point and - signs.

There are multiple overloaded version of the low level DML write methods to
push down CSC column type. WE needs the type to convert values correctly.

Replaced WE_INT128 with CSC data type that is more informative.

Removed commented and obsolete code.

Replaced switch-case blocks with oneliners.
This commit is contained in:
drrtuy
2020-01-12 15:11:31 +03:00
committed by Roman Nozdrin
parent 49a5573418
commit 0c67b6ab50
8 changed files with 1475 additions and 362 deletions

View File

@ -1603,12 +1603,13 @@ bool optimizeIdbPatitionSimpleFilter(SimpleFilter* sf, JobStepVector& jsv, JobIn
// WIP MCOL-641 put this in dataconvert // WIP MCOL-641 put this in dataconvert
void atoi_(const string &arg, unsigned __int128 &res) void atoi128(const string& arg, unsigned __int128& res)
{ {
res = 0; res = 0;
for (size_t j = 0; j < arg.size(); j++) for (size_t j = 0; j < arg.size(); j++)
{ {
res = res*10 + arg[j] - '0'; if (LIKELY(arg[j]-'0' >= 0))
res = res*10 + arg[j] - '0';
} }
} }
@ -1902,7 +1903,7 @@ const JobStepVector doSimpleFilter(SimpleFilter* sf, JobInfo& jobInfo)
if (ct.colDataType == CalpontSystemCatalog::DECIMAL && if (ct.colDataType == CalpontSystemCatalog::DECIMAL &&
ct.colWidth == 16) ct.colWidth == 16)
{ {
atoi_(constval, val128); atoi128(constval, val128);
} }
else else
{ {

View File

@ -36,6 +36,7 @@ using namespace boost::algorithm;
#include "calpontsystemcatalog.h" #include "calpontsystemcatalog.h"
#include "calpontselectexecutionplan.h" #include "calpontselectexecutionplan.h"
#include "columnresult.h" #include "columnresult.h"
#include "common/branchpred.h"
using namespace execplan; using namespace execplan;
#include "joblisttypes.h" #include "joblisttypes.h"
@ -1207,7 +1208,9 @@ void DataConvert::toString(unsigned __int128 i, char *p)
// WIP MCOL-641 // WIP MCOL-641
// Template this // Template this
// result must be calloc-ed // result must be calloc-ed
void atoi_(const string &arg, int128_t &res, size_t &size) //template <typename T>
//void atoi_(const string &arg, T &res)
void atoi128(const string& arg, int128_t& res)
{ {
// WIP // WIP
//char buf[40]; //char buf[40];
@ -1215,13 +1218,13 @@ void atoi_(const string &arg, int128_t &res, size_t &size)
res = 0; res = 0;
for (size_t j = 0; j < arg.size(); j++) for (size_t j = 0; j < arg.size(); j++)
{ {
// WIP // WIP Optimize this
res = res*10 + arg[j] - '0'; if (LIKELY(arg[j]-'0' >= 0))
res = res*10 + arg[j] - '0';
} }
//toString(res, buf); //toString(res, buf);
//std::cerr << "atoi_ " << buf <<endl; //std::cerr << "atoi_ " << buf <<endl;
//*res_ptr = res; //*res_ptr = res;
size = 16;
} }
// WIP MCOL-641 // WIP MCOL-641
@ -1352,13 +1355,15 @@ DataConvert::convertColumnData(const CalpontSystemCatalog::ColType& colType,
break; break;
// MCOL-641 WIP // MCOL-641 WIP
// Simplest form of a template will use colType and width as a parameter
// There will be lots specializations
case CalpontSystemCatalog::DECIMAL: case CalpontSystemCatalog::DECIMAL:
if (colType.colWidth == 16) if (colType.colWidth == 16)
{ {
//value = data;
size_t size;
int128_t bigint; int128_t bigint;
atoi_(data, bigint, size); // WIP
//atoi_<int128_t>(data, bigint);
atoi128(data, bigint);
value = bigint; value = bigint;
} }
else if (colType.colWidth == 1) else if (colType.colWidth == 1)
@ -2651,216 +2656,6 @@ std::string DataConvert::timeToString1( long long datetimevalue )
return buf; return buf;
} }
#if 0
bool DataConvert::isNullData(ColumnResult* cr, int rownum, CalpontSystemCatalog::ColType colType)
{
switch (colType.colDataType)
{
case CalpontSystemCatalog::TINYINT:
if (cr->GetData(rownum) == joblist::TINYINTNULL)
return true;
return false;
case CalpontSystemCatalog::SMALLINT:
if (cr->GetData(rownum) == joblist::SMALLINTNULL)
return true;
return false;
case CalpontSystemCatalog::MEDINT:
case CalpontSystemCatalog::INT:
if (cr->GetData(rownum) == joblist::INTNULL)
return true;
return false;
case CalpontSystemCatalog::BIGINT:
if (cr->GetData(rownum) == static_cast<int64_t>(joblist::BIGINTNULL))
return true;
return false;
case CalpontSystemCatalog::DECIMAL:
case CalpontSystemCatalog::UDECIMAL:
{
if (colType.colWidth <= CalpontSystemCatalog::FOUR_BYTE)
{
if (cr->GetData(rownum) == joblist::SMALLINTNULL)
return true;
return false;
}
else if (colType.colWidth <= 9)
{
if (cr->GetData(rownum) == joblist::INTNULL)
return true;
else return false;
}
else if (colType.colWidth <= 18)
{
if (cr->GetData(rownum) == static_cast<int64_t>(joblist::BIGINTNULL))
return true;
return false;
}
else
{
if (cr->GetStringData(rownum) == "\376\377\377\377\377\377\377\377")
return true;
return false;
}
}
case CalpontSystemCatalog::FLOAT:
case CalpontSystemCatalog::UFLOAT:
//if (cr->GetStringData(rownum) == joblist::FLOATNULL)
if (cr->GetStringData(rownum).compare("null") == 0 )
return true;
return false;
case CalpontSystemCatalog::DOUBLE:
case CalpontSystemCatalog::UDOUBLE:
//if (cr->GetStringData(rownum) == joblist::DOUBLENULL)
if (cr->GetStringData(rownum).compare("null") == 0 )
return true;
return false;
case CalpontSystemCatalog::DATE:
if (cr->GetData(rownum) == joblist::DATENULL)
return true;
return false;
case CalpontSystemCatalog::DATETIME:
if (cr->GetData(rownum) == static_cast<int64_t>(joblist::DATETIMENULL))
return true;
return false;
case CalpontSystemCatalog::CHAR:
{
std::string charnull;
if ( cr->GetStringData(rownum) == "")
{
return true;
}
if (colType.colWidth == 1)
{
if (cr->GetStringData(rownum) == "\376")
return true;
return false;
}
else if (colType.colWidth == 2)
{
if (cr->GetStringData(rownum) == "\377\376")
return true;
return false;
}
else if (( colType.colWidth < 5 ) && ( colType.colWidth > 2 ))
{
if (cr->GetStringData(rownum) == "\377\377\377\376")
return true;
return false;
}
else if (( colType.colWidth < 9 ) && ( colType.colWidth > 4 ))
{
if (cr->GetStringData(rownum) == "\377\377\377\377\377\377\377\376")
return true;
return false;
}
else
{
if (cr->GetStringData(rownum) == "\376\377\377\377\377\377\377\377")
return true;
return false;
}
}
case CalpontSystemCatalog::VARCHAR:
{
std::string charnull;
if ( cr->GetStringData(rownum) == "")
{
return true;
}
if (colType.colWidth == 1)
{
if (cr->GetStringData(rownum) == "\377\376")
return true;
return false;
}
else if ((colType.colWidth < 4) && (colType.colWidth > 1))
{
if (cr->GetStringData(rownum) == "\377\377\377\376")
return true;
return false;
}
else if ((colType.colWidth < 8) && (colType.colWidth > 3))
{
if (cr->GetStringData(rownum) == "\377\377\377\377\377\377\377\376")
return true;
return false;
}
else
{
WriteEngine::Token nullToken;
// bytes reversed
if (cr->GetStringData(rownum) == "\376\377\377\377\377\377\377\377")
return true;
return false;
}
}
case CalpontSystemCatalog::UTINYINT:
if (cr->GetData(rownum) == joblist::UTINYINTNULL)
return true;
return false;
case CalpontSystemCatalog::USMALLINT:
if (cr->GetData(rownum) == joblist::USMALLINTNULL)
return true;
return false;
case CalpontSystemCatalog::UMEDINT:
case CalpontSystemCatalog::UINT:
if (cr->GetData(rownum) == joblist::UINTNULL)
return true;
return false;
case CalpontSystemCatalog::UBIGINT:
if (cr->GetData(rownum) == joblist::UBIGINTNULL)
return true;
return false;
default:
throw QueryDataExcept("convertColumnData: unknown column data type.", dataTypeErr);
}
}
#endif
int64_t DataConvert::dateToInt(const string& date) int64_t DataConvert::dateToInt(const string& date)
{ {
return stringToDate(date); return stringToDate(date);

View File

@ -1,5 +1,5 @@
/* Copyright (C) 2014 InfiniDB, Inc. /* Copyright (C) 2014 InfiniDB, Inc.
Copyright (C) 2016 MariaDB Corporation Copyright (C) 2016-2019 MariaDB Corporation
This program is free software; you can redistribute it and/or This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License modify it under the terms of the GNU General Public License
@ -116,6 +116,7 @@ uint8_t WE_DMLCommandProc::processSingleInsert(messageqcpp::ByteStream& bs, std:
RowList rows = tablePtr->get_RowList(); RowList rows = tablePtr->get_RowList();
WriteEngine::ColStructList colStructs; WriteEngine::ColStructList colStructs;
WriteEngine::CSCTypesList cscColTypes;
WriteEngine::DctnryStructList dctnryStructList; WriteEngine::DctnryStructList dctnryStructList;
WriteEngine::DctnryValueList dctnryValueList; WriteEngine::DctnryValueList dctnryValueList;
WriteEngine::ColValueList colValuesList; WriteEngine::ColValueList colValuesList;
@ -139,12 +140,18 @@ uint8_t WE_DMLCommandProc::processSingleInsert(messageqcpp::ByteStream& bs, std:
{ {
Row* rowPtr = rows.at(0); Row* rowPtr = rows.at(0);
ColumnList columns = rowPtr->get_ColumnList(); ColumnList columns = rowPtr->get_ColumnList();
unsigned int numcols = rowPtr->get_NumberOfColumns();
cscColTypes.reserve(numcols);
// WIP
// We presume that DictCols number is low
colStructs.reserve(numcols);
ColumnList::const_iterator column_iterator = columns.begin(); ColumnList::const_iterator column_iterator = columns.begin();
while (column_iterator != columns.end()) while (column_iterator != columns.end())
{ {
DMLColumn* columnPtr = *column_iterator; DMLColumn* columnPtr = *column_iterator;
tableColName.column = columnPtr->get_Name(); tableColName.column = columnPtr->get_Name();
// WIP MCOL-641 replace with getColRidsOidsTypes()
CalpontSystemCatalog::ROPair roPair = systemCatalogPtr->columnRID(tableColName); CalpontSystemCatalog::ROPair roPair = systemCatalogPtr->columnRID(tableColName);
CalpontSystemCatalog::OID oid = systemCatalogPtr->lookupOID(tableColName); CalpontSystemCatalog::OID oid = systemCatalogPtr->lookupOID(tableColName);
@ -152,6 +159,7 @@ uint8_t WE_DMLCommandProc::processSingleInsert(messageqcpp::ByteStream& bs, std:
CalpontSystemCatalog::ColType colType; CalpontSystemCatalog::ColType colType;
colType = systemCatalogPtr->colType(oid); colType = systemCatalogPtr->colType(oid);
cscColTypes.push_back(colType);
WriteEngine::ColStruct colStruct; WriteEngine::ColStruct colStruct;
colStruct.fColDbRoot = dbroot; colStruct.fColDbRoot = dbroot;
WriteEngine::DctnryStruct dctnryStruct; WriteEngine::DctnryStruct dctnryStruct;
@ -163,6 +171,7 @@ uint8_t WE_DMLCommandProc::processSingleInsert(messageqcpp::ByteStream& bs, std:
// Token // Token
if ( isDictCol(colType) ) if ( isDictCol(colType) )
{ {
// WIP Hardcoded value
colStruct.colWidth = 8; colStruct.colWidth = 8;
colStruct.tokenFlag = true; colStruct.tokenFlag = true;
} }
@ -194,7 +203,6 @@ uint8_t WE_DMLCommandProc::processSingleInsert(messageqcpp::ByteStream& bs, std:
++column_iterator; ++column_iterator;
} }
unsigned int numcols = rowPtr->get_NumberOfColumns();
std::string tmpStr(""); std::string tmpStr("");
for (unsigned int i = 0; i < numcols; i++) for (unsigned int i = 0; i < numcols; i++)
@ -210,6 +218,7 @@ uint8_t WE_DMLCommandProc::processSingleInsert(messageqcpp::ByteStream& bs, std:
const DMLColumn* columnPtr = rowPtr->get_ColumnAt(i); const DMLColumn* columnPtr = rowPtr->get_ColumnAt(i);
tableColName.column = columnPtr->get_Name(); tableColName.column = columnPtr->get_Name();
// WIP MCOL-641 remove these calls
CalpontSystemCatalog::OID oid = systemCatalogPtr->lookupOID(tableColName); CalpontSystemCatalog::OID oid = systemCatalogPtr->lookupOID(tableColName);
CalpontSystemCatalog::ColType colType; CalpontSystemCatalog::ColType colType;
@ -303,6 +312,8 @@ uint8_t WE_DMLCommandProc::processSingleInsert(messageqcpp::ByteStream& bs, std:
{ {
try try
{ {
// WIP What if we combine this and previous loop and fail
// after get nextAIValue ?
nextVal = systemCatalogPtr->nextAutoIncrValue(tableName); nextVal = systemCatalogPtr->nextAutoIncrValue(tableName);
fDbrm.startAISequence(oid, nextVal, colType.colWidth, colType.colDataType); fDbrm.startAISequence(oid, nextVal, colType.colWidth, colType.colDataType);
} }
@ -359,6 +370,8 @@ uint8_t WE_DMLCommandProc::processSingleInsert(messageqcpp::ByteStream& bs, std:
try try
{ {
// WIP
// make convertColumnData a template
datavalue = DataConvert::convertColumnData(colType, indata, pushWarning, insertPkg.get_TimeZone(), isNULL, false, false); datavalue = DataConvert::convertColumnData(colType, indata, pushWarning, insertPkg.get_TimeZone(), isNULL, false, false);
} }
catch (exception&) catch (exception&)
@ -412,6 +425,7 @@ uint8_t WE_DMLCommandProc::processSingleInsert(messageqcpp::ByteStream& bs, std:
// call the write engine to write the rows // call the write engine to write the rows
int error = NO_ERROR; int error = NO_ERROR;
// WIP
fWEWrapper.setDebugLevel(WriteEngine::DEBUG_3); fWEWrapper.setDebugLevel(WriteEngine::DEBUG_3);
cout << "inserting a row with transaction id " << txnid.id << endl; cout << "inserting a row with transaction id " << txnid.id << endl;
fWEWrapper.setIsInsert(true); fWEWrapper.setIsInsert(true);
@ -420,6 +434,7 @@ uint8_t WE_DMLCommandProc::processSingleInsert(messageqcpp::ByteStream& bs, std:
//For hdfs use only //For hdfs use only
uint32_t tblOid = tableRoPair.objnum; uint32_t tblOid = tableRoPair.objnum;
// WIP are we saving HDFS?
if (idbdatafile::IDBPolicy::useHdfs()) if (idbdatafile::IDBPolicy::useHdfs())
{ {
@ -523,7 +538,7 @@ uint8_t WE_DMLCommandProc::processSingleInsert(messageqcpp::ByteStream& bs, std:
if (colValuesList[0].size() > 0) if (colValuesList[0].size() > 0)
{ {
if (NO_ERROR != if (NO_ERROR !=
(error = fWEWrapper.insertColumnRec_Single(txnid.id, colStructs, colValuesList, dctnryStructList, dicStringList, tableRoPair.objnum))) (error = fWEWrapper.insertColumnRec_Single(txnid.id, cscColTypes, colStructs, colValuesList, dctnryStructList, dicStringList, tableRoPair.objnum)))
{ {
if (error == ERR_BRM_DEAD_LOCK) if (error == ERR_BRM_DEAD_LOCK)
{ {

View File

@ -639,7 +639,8 @@ void Convertor::convertColType(ColStruct* curStruct)
default: default:
// WIP replace with BINARY // WIP replace with BINARY
*internalType = WriteEngine::WR_INT128; //*internalType = WriteEngine::WR_INT128;
*internalType = WriteEngine::WR_BINARY;
break; break;
} }

View File

@ -111,9 +111,9 @@ enum ColType /** @brief Column type enumeration*/
WR_TEXT = 17, /** @brief TEXT */ WR_TEXT = 17, /** @brief TEXT */
WR_MEDINT = 18, /** @brief Medium Int */ WR_MEDINT = 18, /** @brief Medium Int */
WR_UMEDINT = 19, /** @brief Unsigned Medium Int */ WR_UMEDINT = 19, /** @brief Unsigned Medium Int */
WR_BINARY = 20, /** @brief BINARY */ WR_BINARY = 20 /** @brief BINARY */
// WIP We might be good using WR_BINARY // WIP
WR_INT128 = 21 /** @brief __int128 */ //WR_INT128
}; };
// Describes relation of field to column for a bulk load // Describes relation of field to column for a bulk load
@ -302,6 +302,7 @@ struct ColStruct /** @brief Column Interface Struct*/
typedef std::vector<ColStruct> ColStructList; /** @brief column struct list */ typedef std::vector<ColStruct> ColStructList; /** @brief column struct list */
typedef std::vector<ColTupleList> ColValueList; /** @brief column value list */ typedef std::vector<ColTupleList> ColValueList; /** @brief column value list */
typedef std::vector<RID> RIDList; /** @brief RID list */ typedef std::vector<RID> RIDList; /** @brief RID list */
typedef std::vector<execplan::CalpontSystemCatalog::ColType> CSCTypesList; /** @brief CSC column types list */
typedef std::vector<std::string> dictStr; typedef std::vector<std::string> dictStr;
typedef std::vector<dictStr> DictStrList; typedef std::vector<dictStr> DictStrList;

View File

@ -1684,12 +1684,13 @@ int ColumnOp::writeRow(Column& curCol, uint64_t totalRow, const RID* rowIdArray,
if (!bDelete) pVal = &((uint64_t*) valArray)[i]; if (!bDelete) pVal = &((uint64_t*) valArray)[i];
break; break;
case WriteEngine::WR_INT128: // WIP
pVal = &((uint128_t*) valArray)[i]; //case WriteEngine::WR_INT128:
break;
case WriteEngine::WR_BINARY: case WriteEngine::WR_BINARY:
if (!bDelete) pVal = (uint8_t*) valArray + i * curCol.colWidth; // WIP CSCCol type
pVal = &((uint128_t*) valArray)[i];
//pVal = (uint8_t*) valArray + i * curCol.colWidth;
break; break;
default : default :

File diff suppressed because it is too large Load Diff

View File

@ -157,7 +157,15 @@ public:
/** /**
* @brief Convert interface value list to internal value array * @brief Convert interface value list to internal value array
*/ */
EXPORT void convertValArray(size_t totalRow, const ColType colType, EXPORT void convertValArray(const size_t totalRow,
const execplan::CalpontSystemCatalog::ColType& cscColType,
const ColType colType,
ColTupleList& curTupleList, void* valArray,
bool bFromList = true) ;
// WIP legacy
EXPORT void convertValArray(const size_t totalRow,
const ColType colType,
ColTupleList& curTupleList, void* valArray, ColTupleList& curTupleList, void* valArray,
bool bFromList = true) ; bool bFromList = true) ;
@ -364,6 +372,7 @@ public:
* @param dicStringListt dictionary values list * @param dicStringListt dictionary values list
*/ */
EXPORT int insertColumnRec_Single(const TxnID& txnid, EXPORT int insertColumnRec_Single(const TxnID& txnid,
CSCTypesList& cscColTypesList,
ColStructList& colStructList, ColStructList& colStructList,
ColValueList& colValueList, ColValueList& colValueList,
DctnryStructList& dctnryStructList, DctnryStructList& dctnryStructList,
@ -650,10 +659,8 @@ private:
/** /**
* @brief Convert interface column type to a internal column type * @brief Convert interface column type to a internal column type
*/ */
// void convertColType(void* curStruct, const FuncType curType = FUNC_WRITE_ENGINE) const; void convertValue(const execplan::CalpontSystemCatalog::ColType &fullColType, ColType colType, void* valArray, size_t pos, boost::any& data, bool fromList = true);
void convertValue(const ColType colType, void* valArray, size_t pos, boost::any& data, bool fromList = true); void convertValue(const ColType colType, void* valArray, size_t pos, boost::any& data, bool fromList = true);
/** /**
* @brief Convert column value to its internal representation * @brief Convert column value to its internal representation
* *
@ -661,6 +668,7 @@ private:
* @param value Memory pointer for storing output value. Should be pre-allocated * @param value Memory pointer for storing output value. Should be pre-allocated
* @param data Column data * @param data Column data
*/ */
void convertValue(const execplan::CalpontSystemCatalog::ColType &fullColType, const ColType colType, void* value, boost::any& data);
void convertValue(const ColType colType, void* value, boost::any& data); void convertValue(const ColType colType, void* value, boost::any& data);
/** /**
@ -690,11 +698,21 @@ private:
/** /**
* @brief Common methods to write values to a column * @brief Common methods to write values to a column
*/ */
int writeColumnRec(const TxnID& txnid, const ColStructList& colStructList, int writeColumnRec(const TxnID& txnid,
const CSCTypesList& cscColTypes,
const ColStructList& colStructList,
ColValueList& colValueList, ColValueList& colValueList,
RID* rowIdArray, const ColStructList& newColStructList, RID* rowIdArray, const ColStructList& newColStructList,
ColValueList& newColValueList, const int32_t tableOid, ColValueList& newColValueList, const int32_t tableOid,
bool useTmpSuffix, bool versioning = true); bool useTmpSuffix, bool versioning = true);
// WIP
int writeColumnRec(const TxnID& txnid,
const ColStructList& colStructList,
ColValueList& colValueList,
RID* rowIdArray, const ColStructList& newColStructList,
ColValueList& newColValueList, const int32_t tableOid,
bool useTmpSuffix, bool versioning = true);
int writeColumnRecBinary(const TxnID& txnid, const ColStructList& colStructList, int writeColumnRecBinary(const TxnID& txnid, const ColStructList& colStructList,
std::vector<uint64_t>& colValueList, std::vector<uint64_t>& colValueList,
@ -705,10 +723,19 @@ private:
//@Bug 1886,2870 pass the address of ridList vector //@Bug 1886,2870 pass the address of ridList vector
int writeColumnRec(const TxnID& txnid, const ColStructList& colStructList, int writeColumnRec(const TxnID& txnid,
const CSCTypesList& cscColTypes,
const ColStructList& colStructList,
const ColValueList& colValueList, std::vector<void*>& colOldValueList, const ColValueList& colValueList, std::vector<void*>& colOldValueList,
const RIDList& ridList, const int32_t tableOid, const RIDList& ridList, const int32_t tableOid,
bool convertStructFlag = true, ColTupleList::size_type nRows = 0); bool convertStructFlag = true, ColTupleList::size_type nRows = 0);
// WIP legacy
int writeColumnRec(const TxnID& txnid,
const ColStructList& colStructList,
const ColValueList& colValueList, std::vector<void*>& colOldValueList,
const RIDList& ridList, const int32_t tableOid,
bool convertStructFlag = true, ColTupleList::size_type nRows = 0);
//For update column from column to use //For update column from column to use
int writeColumnRecords(const TxnID& txnid, std::vector<ColStruct>& colStructList, int writeColumnRecords(const TxnID& txnid, std::vector<ColStruct>& colStructList,