1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-5505 Add TypeHandler functions.

This commit is contained in:
Denis Khalikov
2023-10-30 18:35:40 +03:00
committed by Leonid Fedorov
parent 491ba6e0aa
commit 865cca11c9
5 changed files with 360 additions and 289 deletions

View File

@ -1609,6 +1609,11 @@ boost::any TypeHandlerDate::convertFromString(const TypeAttributesStd& colType,
return dataconvert::DataConvert::StringToDate(data, pushWarning);
}
int32_t TypeHandlerDate::convertArrowColumnDate(int32_t dayVal, int& status) const
{
return dataconvert::DataConvert::convertArrowColumnDate(dayVal, status);
}
boost::any TypeHandlerDatetime::convertFromString(const TypeAttributesStd& colType,
const ConvertFromStringParam& prm, const std::string& data,
bool& pushWarning) const
@ -1616,6 +1621,16 @@ boost::any TypeHandlerDatetime::convertFromString(const TypeAttributesStd& colTy
return dataconvert::DataConvert::StringToDatetime(data, pushWarning);
}
int64_t TypeHandlerDatetime::convertArrowColumnDatetime(int64_t timeVal, int& status) const
{
return dataconvert::DataConvert::convertArrowColumnDatetime(timeVal, status);
}
int64_t TypeHandlerDatetime::convertArrowColumnDatetimeUs(int64_t timeVal, int& status) const
{
return dataconvert::DataConvert::convertArrowColumnDatetimeUs(timeVal, status);
}
boost::any TypeHandlerTime::convertFromString(const TypeAttributesStd& colType,
const ConvertFromStringParam& prm, const std::string& data,
bool& pushWarning) const
@ -1623,6 +1638,16 @@ boost::any TypeHandlerTime::convertFromString(const TypeAttributesStd& colType,
return dataconvert::DataConvert::StringToTime(colType, data, pushWarning);
}
int64_t TypeHandlerTime::convertArrowColumnTime64(int64_t timeVal, int& status) const
{
return dataconvert::DataConvert::convertArrowColumnTime64(timeVal, status);
}
int64_t TypeHandlerTime::convertArrowColumnTime32(int32_t timeVal, int& status) const
{
return dataconvert::DataConvert::convertArrowColumnTime32(timeVal, status);
}
boost::any TypeHandlerTimestamp::convertFromString(const TypeAttributesStd& colType,
const ConvertFromStringParam& prm, const std::string& data,
bool& pushWarning) const
@ -1630,6 +1655,16 @@ boost::any TypeHandlerTimestamp::convertFromString(const TypeAttributesStd& colT
return dataconvert::DataConvert::StringToTimestamp(prm, data, pushWarning);
}
int64_t TypeHandlerTimestamp::convertArrowColumnTimestamp(int64_t timeVal, int& status) const
{
return dataconvert::DataConvert::convertArrowColumnTimestamp(timeVal, status);
}
int64_t TypeHandlerTimestamp::convertArrowColumnTimestampUs(int64_t timeVal, int& status) const
{
return dataconvert::DataConvert::convertArrowColumnTimestampUs(timeVal, status);
}
boost::any TypeHandlerChar::convertFromString(const TypeAttributesStd& colType,
const ConvertFromStringParam& prm, const std::string& data,
bool& pushWarning) const

View File

@ -2276,6 +2276,10 @@ class TypeHandlerTemporal : public TypeHandler
class TypeHandlerDate : public TypeHandlerTemporal
{
public:
int32_t convertArrowColumnDate(int32_t dayVal, int& status) const;
private:
const string& name() const override;
code_t code() const override
{
@ -2301,6 +2305,11 @@ class TypeHandlerDate : public TypeHandlerTemporal
class TypeHandlerDatetime : public TypeHandlerTemporal
{
public:
int64_t convertArrowColumnDatetime(int64_t timeVal, int& status) const;
int64_t convertArrowColumnDatetimeUs(int64_t timeVal, int& status) const;
private:
const string& name() const override;
code_t code() const override
{
@ -2326,6 +2335,11 @@ class TypeHandlerDatetime : public TypeHandlerTemporal
class TypeHandlerTime : public TypeHandlerTemporal
{
public:
int64_t convertArrowColumnTime64(int64_t timeVal, int& status) const;
int64_t convertArrowColumnTime32(int32_t timeVal, int& status) const;
private:
const string& name() const override;
code_t code() const override
{
@ -2351,6 +2365,11 @@ class TypeHandlerTime : public TypeHandlerTemporal
class TypeHandlerTimestamp : public TypeHandlerTemporal
{
public:
int64_t convertArrowColumnTimestamp(int64_t timeVal, int& status) const;
int64_t convertArrowColumnTimestampUs(int64_t timeVal, int& status) const;
private:
const string& name() const override;
code_t code() const override
{

View File

@ -1576,7 +1576,7 @@ boost::any DataConvert::StringToTimestamp(const datatypes::ConvertFromStringPara
//------------------------------------------------------------------------------
// Convert date32 parquet data to binary date. Used by BulkLoad.
//------------------------------------------------------------------------------
int32_t DataConvert::ConvertArrowColumnDate(int32_t dayVal, int& status)
int32_t DataConvert::convertArrowColumnDate(int32_t dayVal, int& status)
{
int inYear;
int inMonth;

View File

@ -1176,7 +1176,7 @@ class DataConvert
* @param dayVal the input data representing days
* @param status 0 - success, -1 - fail
*/
EXPORT static int32_t ConvertArrowColumnDate(int32_t dayVal, int& status);
EXPORT static int32_t convertArrowColumnDate(int32_t dayVal, int& status);
/**
* @brief convert a date column data, represnted as a string, to it's native

View File

@ -40,6 +40,7 @@
#include "dataconvert.h"
#include "exceptclasses.h"
#include "mcs_decimal.h"
#include "mcs_datatype.h"
#include "joblisttypes.h"
@ -1690,7 +1691,8 @@ int BulkLoadBuffer::parseColParquet(ColumnInfo& columnInfo)
}
}
convertParquet(columnData, buf, columnInfo.column, bufStats, lastInputRowInExtent, columnInfo, updateCPInfoPendingFlag, section);
convertParquet(columnData, buf, columnInfo.column, bufStats, lastInputRowInExtent, columnInfo,
updateCPInfoPendingFlag, section);
if (updateCPInfoPendingFlag)
{
@ -1764,8 +1766,9 @@ int BulkLoadBuffer::parseColParquet(ColumnInfo& columnInfo)
// fTotalReadRowsParser (in) - current batch size(row number)
// fAutoIncNextValue (in) - first auto increment number of this batch
//-----------------------------------------------------------------------------------
void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, unsigned char* buf, const JobColumn& column,
BLBufferStats& bufStats, RID& lastInputRowInExtent, ColumnInfo& columnInfo,
void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, unsigned char* buf,
const JobColumn& column, BLBufferStats& bufStats,
RID& lastInputRowInExtent, ColumnInfo& columnInfo,
bool& updateCPInfoPendingFlag, ColumnBufferSection* section)
{
char biVal;
@ -1841,8 +1844,7 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag,
section, i);
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag, section, i);
}
}
break;
@ -1895,8 +1897,7 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag,
section, i);
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag, section, i);
}
}
break;
@ -1992,8 +1993,7 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag,
section, i);
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag, section, i);
}
}
break;
@ -2073,8 +2073,7 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag,
section, i);
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag, section, i);
}
}
break;
@ -2147,8 +2146,7 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag,
section, i);
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag, section, i);
}
}
break;
@ -2159,10 +2157,10 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
//----------------------------------------------------------------------
case WriteEngine::WR_BYTE:
{
long long origVal;
// if use int8_t here, it will take 8 bool value of parquet array
std::shared_ptr<arrow::BooleanArray> boolArray = std::static_pointer_cast<arrow::BooleanArray>(columnData);
std::shared_ptr<arrow::BooleanArray> boolArray =
std::static_pointer_cast<arrow::BooleanArray>(columnData);
const int8_t* dataPtr = columnData->data()->GetValues<int8_t>(1);
for (unsigned int i = 0; i < fTotalReadRowsParser; i++)
@ -2185,7 +2183,6 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
memcpy(p, pVal, width);
continue;
}
}
else
{
@ -2224,7 +2221,6 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
if (bSatVal)
bufStats.satCount++;
if (origVal < bufStats.minBufferVal)
bufStats.minBufferVal = origVal;
@ -2238,8 +2234,7 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag,
section, i);
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag, section, i);
}
}
break;
@ -2317,8 +2312,7 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag,
section, i);
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag, section, i);
}
}
}
@ -2372,8 +2366,7 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag,
section, i);
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag, section, i);
}
}
}
@ -2387,14 +2380,13 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
case WriteEngine::WR_LONGLONG:
{
if (column.dataType != CalpontSystemCatalog::DATETIME &&
column.dataType != CalpontSystemCatalog::TIMESTAMP &&
column.dataType != CalpontSystemCatalog::TIME)
column.dataType != CalpontSystemCatalog::TIMESTAMP && column.dataType != CalpontSystemCatalog::TIME)
{
const long long *dataPtr = columnData->data()->GetValues<long long>(1);
const long long* dataPtr = columnData->data()->GetValues<long long>(1);
for (unsigned int i = 0; i < fTotalReadRowsParser; i++)
{
void *p = buf + i * width;
void* p = buf + i * width;
bool bSatVal = false;
if (columnData->IsNull(i))
@ -2459,259 +2451,282 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag,
section, i);
}
}
}
else if (column.dataType == CalpontSystemCatalog::TIME)
{
// time conversion here
int rc = 0;
// for parquet, there are two time type, time32 and time64
// if it's time32, unit is millisecond, int32
if (columnData->type_id() == arrow::Type::type::TIME32 || columnData->type_id() == arrow::Type::type::NA)
{
std::shared_ptr<arrow::Time32Array> timeArray = std::static_pointer_cast<arrow::Time32Array>(columnData);
for (unsigned int i = 0; i < fTotalReadRowsParser; i++)
{
void *p = buf + i * width;
if (columnData->IsNull(i))
{
if (column.fWithDefault)
{
llDate = column.fDefaultInt;
}
else
{
llDate = joblist::TIMENULL;
pVal = &llDate;
memcpy(p, pVal, width);
continue;
}
}
else
{
// timeVal is millisecond since midnight
int32_t timeVal = timeArray->Value(i);
llDate = dataconvert::DataConvert::convertArrowColumnTime32(timeVal, rc);
}
if (rc == 0)
{
if (llDate < bufStats.minBufferVal)
bufStats.minBufferVal = llDate;
if (llDate > bufStats.maxBufferVal)
bufStats.maxBufferVal = llDate;
}
else
{
bufStats.satCount++;
}
pVal = &llDate;
memcpy(p, pVal, width);
updateCPInfoPendingFlag = true;
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag,
section, i);
}
}
}
// if it's time64, unit is microsecond, int64
else if (columnData->type_id() == arrow::Type::type::TIME64)
{
std::shared_ptr<arrow::Time64Array> timeArray = std::static_pointer_cast<arrow::Time64Array>(columnData);
for (unsigned int i = 0; i < fTotalReadRowsParser; i++)
{
void *p = buf + i * width;
if (columnData->IsNull(i))
{
if (column.fWithDefault)
{
llDate = column.fDefaultInt;
}
else
{
llDate = joblist::TIMENULL;
pVal = &llDate;
memcpy(p, pVal, width);
continue;
}
}
else
{
// timeVal is macrosecond since midnight
int64_t timeVal = timeArray->Value(i);
llDate = dataconvert::DataConvert::convertArrowColumnTime64(timeVal, rc);
}
if (rc == 0)
{
if (llDate < bufStats.minBufferVal)
bufStats.minBufferVal = llDate;
if (llDate > bufStats.maxBufferVal)
bufStats.maxBufferVal = llDate;
}
else
{
bufStats.satCount++;
}
pVal = &llDate;
memcpy(p, pVal, width);
updateCPInfoPendingFlag = true;
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag,
section, i);
}
}
}
}
else if (column.dataType == CalpontSystemCatalog::TIMESTAMP)
{
// timestamp conversion here
// default column type is TIMESTAMP
// default unit is millisecond
std::shared_ptr<arrow::TimestampArray> timeStampArray = std::static_pointer_cast<arrow::TimestampArray>(columnData);
for (unsigned int i = 0; i < fTotalReadRowsParser; i++)
{
int rc = 0;
void *p = buf + i * width;
if (columnData->IsNull(i))
{
if (column.fWithDefault)
{
llDate = column.fDefaultInt;
}
else
{
llDate = joblist::TIMESTAMPNULL;
pVal = &llDate;
memcpy(p, pVal, width);
continue;
}
}
else
{
int64_t timeVal = timeStampArray->Value(i);
std::shared_ptr<arrow::TimestampType> fType = std::static_pointer_cast<arrow::TimestampType>(columnData->type());
if (fType->unit() == arrow::TimeUnit::MILLI)
{
llDate = dataconvert::DataConvert::convertArrowColumnTimestamp(timeVal, rc);
}
else
{
llDate = dataconvert::DataConvert::convertArrowColumnTimestampUs(timeVal, rc);
}
}
if (rc == 0)
{
if (llDate < bufStats.minBufferVal)
bufStats.minBufferVal = llDate;
if (llDate > bufStats.maxBufferVal)
bufStats.maxBufferVal = llDate;
}
else
{
llDate = 0;
bufStats.satCount++;
}
pVal = &llDate;
memcpy(p, pVal, width);
updateCPInfoPendingFlag = true;
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag,
section, i);
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag, section, i);
}
}
}
else
{
// datetime conversion here
// default column type is TIMESTAMP
std::shared_ptr<arrow::TimestampArray> dateTimeArray = std::static_pointer_cast<arrow::TimestampArray>(columnData);
datatypes::TypeAttributesStd dummyTypeAttribute;
const auto* typeHandler = datatypes::TypeHandler::find(column.dataType, dummyTypeAttribute);
for (unsigned int i = 0; i < fTotalReadRowsParser; i++)
if (column.dataType == CalpontSystemCatalog::TIME)
{
// time conversion here
int rc = 0;
void *p = buf + i * width;
if (columnData->IsNull(i))
// for parquet, there are two time type, time32 and time64
// if it's time32, unit is millisecond, int32
if (columnData->type_id() == arrow::Type::type::TIME32 ||
columnData->type_id() == arrow::Type::type::NA)
{
if (column.fWithDefault)
std::shared_ptr<arrow::Time32Array> timeArray =
std::static_pointer_cast<arrow::Time32Array>(columnData);
for (unsigned int i = 0; i < fTotalReadRowsParser; i++)
{
llDate = column.fDefaultInt;
}
else
{
llDate = joblist::DATETIMENULL;
void* p = buf + i * width;
if (columnData->IsNull(i))
{
if (column.fWithDefault)
{
llDate = column.fDefaultInt;
}
else
{
llDate = joblist::TIMENULL;
pVal = &llDate;
memcpy(p, pVal, width);
continue;
}
}
else
{
// timeVal is millisecond since midnight
int32_t timeVal = timeArray->Value(i);
const datatypes::TypeHandlerTime* typeHandlerTime =
dynamic_cast<const datatypes::TypeHandlerTime*>(typeHandler);
idbassert(typeHandlerTime);
llDate = typeHandlerTime->convertArrowColumnTime32(timeVal, rc);
}
if (rc == 0)
{
if (llDate < bufStats.minBufferVal)
bufStats.minBufferVal = llDate;
if (llDate > bufStats.maxBufferVal)
bufStats.maxBufferVal = llDate;
}
else
{
bufStats.satCount++;
}
pVal = &llDate;
memcpy(p, pVal, width);
continue;
updateCPInfoPendingFlag = true;
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag, section,
i);
}
}
}
else
// if it's time64, unit is microsecond, int64
else if (columnData->type_id() == arrow::Type::type::TIME64)
{
int64_t timeVal = dateTimeArray->Value(i);
std::shared_ptr<arrow::TimestampType> fType = std::static_pointer_cast<arrow::TimestampType>(columnData->type());
std::shared_ptr<arrow::Time64Array> timeArray =
std::static_pointer_cast<arrow::Time64Array>(columnData);
if (fType->unit() == arrow::TimeUnit::MILLI)
for (unsigned int i = 0; i < fTotalReadRowsParser; i++)
{
llDate = dataconvert::DataConvert::convertArrowColumnDatetime(timeVal, rc);
void* p = buf + i * width;
if (columnData->IsNull(i))
{
if (column.fWithDefault)
{
llDate = column.fDefaultInt;
}
else
{
llDate = joblist::TIMENULL;
pVal = &llDate;
memcpy(p, pVal, width);
continue;
}
}
else
{
// timeVal is macrosecond since midnight
int64_t timeVal = timeArray->Value(i);
const datatypes::TypeHandlerTime* typeHandlerTime =
dynamic_cast<const datatypes::TypeHandlerTime*>(typeHandler);
idbassert(typeHandlerTime);
llDate = typeHandlerTime->convertArrowColumnTime64(timeVal, rc);
}
if (rc == 0)
{
if (llDate < bufStats.minBufferVal)
bufStats.minBufferVal = llDate;
if (llDate > bufStats.maxBufferVal)
bufStats.maxBufferVal = llDate;
}
else
{
bufStats.satCount++;
}
pVal = &llDate;
memcpy(p, pVal, width);
updateCPInfoPendingFlag = true;
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag, section,
i);
}
}
}
}
else if (column.dataType == CalpontSystemCatalog::TIMESTAMP)
{
// timestamp conversion here
// default column type is TIMESTAMP
// default unit is millisecond
std::shared_ptr<arrow::TimestampArray> timeStampArray =
std::static_pointer_cast<arrow::TimestampArray>(columnData);
for (unsigned int i = 0; i < fTotalReadRowsParser; i++)
{
int rc = 0;
void* p = buf + i * width;
if (columnData->IsNull(i))
{
if (column.fWithDefault)
{
llDate = column.fDefaultInt;
}
else
{
llDate = joblist::TIMESTAMPNULL;
pVal = &llDate;
memcpy(p, pVal, width);
continue;
}
}
else
{
llDate = dataconvert::DataConvert::convertArrowColumnDatetimeUs(timeVal, rc);
int64_t timeVal = timeStampArray->Value(i);
std::shared_ptr<arrow::TimestampType> fType =
std::static_pointer_cast<arrow::TimestampType>(columnData->type());
const datatypes::TypeHandlerTimestamp* typeHandlerTimestamp =
dynamic_cast<const datatypes::TypeHandlerTimestamp*>(typeHandler);
idbassert(typeHandlerTimestamp);
if (fType->unit() == arrow::TimeUnit::MILLI)
{
llDate = typeHandlerTimestamp->convertArrowColumnTimestamp(timeVal, rc);
}
else
{
llDate = typeHandlerTimestamp->convertArrowColumnTimestampUs(timeVal, rc);
}
}
if (rc == 0)
{
if (llDate < bufStats.minBufferVal)
bufStats.minBufferVal = llDate;
if (llDate > bufStats.maxBufferVal)
bufStats.maxBufferVal = llDate;
}
else
{
llDate = 0;
bufStats.satCount++;
}
pVal = &llDate;
memcpy(p, pVal, width);
updateCPInfoPendingFlag = true;
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag, section, i);
}
}
}
else
{
// datetime conversion here
// default column type is TIMESTAMP
std::shared_ptr<arrow::TimestampArray> dateTimeArray =
std::static_pointer_cast<arrow::TimestampArray>(columnData);
if (rc == 0)
for (unsigned int i = 0; i < fTotalReadRowsParser; i++)
{
if (llDate < bufStats.minBufferVal)
bufStats.minBufferVal = llDate;
int rc = 0;
void* p = buf + i * width;
if (llDate > bufStats.maxBufferVal)
bufStats.maxBufferVal = llDate;
}
else
{
llDate = 0;
bufStats.satCount++;
}
if (columnData->IsNull(i))
{
if (column.fWithDefault)
{
llDate = column.fDefaultInt;
}
else
{
llDate = joblist::DATETIMENULL;
pVal = &llDate;
memcpy(p, pVal, width);
continue;
}
}
else
{
int64_t timeVal = dateTimeArray->Value(i);
std::shared_ptr<arrow::TimestampType> fType =
std::static_pointer_cast<arrow::TimestampType>(columnData->type());
pVal = &llDate;
memcpy(p, pVal, width);
updateCPInfoPendingFlag = true;
const datatypes::TypeHandlerDatetime* typeHandlerDateTime =
dynamic_cast<const datatypes::TypeHandlerDatetime*>(typeHandler);
idbassert(typeHandlerDateTime);
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag,
section, i);
if (fType->unit() == arrow::TimeUnit::MILLI)
{
llDate = typeHandlerDateTime->convertArrowColumnDatetime(timeVal, rc);
}
else
{
llDate = typeHandlerDateTime->convertArrowColumnDatetimeUs(timeVal, rc);
}
}
if (rc == 0)
{
if (llDate < bufStats.minBufferVal)
bufStats.minBufferVal = llDate;
if (llDate > bufStats.maxBufferVal)
bufStats.maxBufferVal = llDate;
}
else
{
llDate = 0;
bufStats.satCount++;
}
pVal = &llDate;
memcpy(p, pVal, width);
updateCPInfoPendingFlag = true;
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag, section, i);
}
}
}
}
break;
}
@ -2721,8 +2736,10 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
//----------------------------------------------------------------------
case WriteEngine::WR_BINARY:
{
std::shared_ptr<arrow::Decimal128Array> decimalArray = std::static_pointer_cast<arrow::Decimal128Array>(columnData);
std::shared_ptr<arrow::DecimalType> fType = std::static_pointer_cast<arrow::DecimalType>(decimalArray->type());
std::shared_ptr<arrow::Decimal128Array> decimalArray =
std::static_pointer_cast<arrow::Decimal128Array>(columnData);
std::shared_ptr<arrow::DecimalType> fType =
std::static_pointer_cast<arrow::DecimalType>(decimalArray->type());
const int128_t* dataPtr = decimalArray->data()->GetValues<int128_t>(1);
for (unsigned int i = 0; i < fTotalReadRowsParser; i++)
@ -2775,8 +2792,7 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag,
section, i);
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag, section, i);
}
}
break;
@ -2817,7 +2833,7 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
}
else
{
memcpy(&ullVal, dataPtr+i, width);
memcpy(&ullVal, dataPtr + i, width);
}
if (ullVal > column.fMaxIntSat)
@ -2840,8 +2856,7 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag,
section, i);
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag, section, i);
}
}
break;
@ -2917,8 +2932,7 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag,
section, i);
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag, section, i);
}
}
break;
@ -3003,15 +3017,19 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag,
section, i);
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag, section, i);
}
}
}
else
{
// date conversion here
std::shared_ptr<arrow::Date32Array> dateArray = std::static_pointer_cast<arrow::Date32Array>(columnData);
// Parquet support.
std::shared_ptr<arrow::Date32Array> dateArray =
std::static_pointer_cast<arrow::Date32Array>(columnData);
datatypes::TypeAttributesStd dummyTypeAttribute;
const datatypes::TypeHandlerDate* typeHandlerDate = dynamic_cast<const datatypes::TypeHandlerDate*>(
datatypes::TypeHandler::find(column.dataType, dummyTypeAttribute));
idbassert(typeHandlerDate);
for (unsigned int i = 0; i < fTotalReadRowsParser; i++)
{
@ -3035,7 +3053,7 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
else
{
int32_t dayVal = dateArray->Value(i);
iDate = dataconvert::DataConvert::ConvertArrowColumnDate(dayVal, rc);
iDate = typeHandlerDate->convertArrowColumnDate(dayVal, rc);
}
if (rc == 0)
@ -3058,8 +3076,7 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
if ((fStartRowParser + i) == lastInputRowInExtent)
{
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag,
section, i);
updateCPMinMax(columnInfo, lastInputRowInExtent, bufStats, updateCPInfoPendingFlag, section, i);
}
}
}
@ -3067,8 +3084,9 @@ void BulkLoadBuffer::convertParquet(std::shared_ptr<arrow::Array> columnData, un
}
}
inline void BulkLoadBuffer::updateCPMinMax(ColumnInfo& columnInfo, RID& lastInputRowInExtent, BLBufferStats& bufStats,
bool& updateCPInfoPendingFlag, ColumnBufferSection* section, uint32_t curRow)
inline void BulkLoadBuffer::updateCPMinMax(ColumnInfo& columnInfo, RID& lastInputRowInExtent,
BLBufferStats& bufStats, bool& updateCPInfoPendingFlag,
ColumnBufferSection* section, uint32_t curRow)
{
if (columnInfo.column.width <= 8)
{
@ -3085,9 +3103,8 @@ inline void BulkLoadBuffer::updateCPMinMax(ColumnInfo& columnInfo, RID& lastInpu
if (fLog->isDebug(DEBUG_2))
{
ostringstream oss;
oss << "ColRelSecOut: OID-" << columnInfo.column.mapOid
<< "; StartRID/Rows1: " << section->startRowId() << " " << curRow + 1
<< "; lastExtentRow: " << lastInputRowInExtent;
oss << "ColRelSecOut: OID-" << columnInfo.column.mapOid << "; StartRID/Rows1: " << section->startRowId()
<< " " << curRow + 1 << "; lastExtentRow: " << lastInputRowInExtent;
parseColLogMinMax(oss, columnInfo.column.dataType, bufStats.minBufferVal, bufStats.maxBufferVal);
fLog->logMsg(oss.str(), MSGLVL_INFO2);