1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-641 Simple aggregates support: min, max, sum, avg for wide-DECIMALs.

This commit is contained in:
Roman Nozdrin
2020-05-08 10:17:17 +00:00
parent 3d94ec1568
commit e88cbe9bc1
9 changed files with 343 additions and 212 deletions

View File

@ -203,6 +203,11 @@ namespace datatypes
return std::string(buf);
}
std::string Decimal::toString(const execplan::IDB_Decimal& value)
{
return toString(const_cast<execplan::IDB_Decimal&>(value));
}
int Decimal::compare(const execplan::IDB_Decimal& l, const execplan::IDB_Decimal& r)
{
int128_t divisorL, divisorR;

View File

@ -24,6 +24,7 @@
#include "calpontsystemcatalog.h"
using int128_t = __int128;
using ColTypeAlias = execplan::CalpontSystemCatalog::ColType;
namespace execplan
{
@ -37,6 +38,8 @@ constexpr uint32_t MAXDECIMALWIDTH = 16U;
constexpr uint8_t INT64MAXPRECISION = 18U;
constexpr uint8_t INT128MAXPRECISION = 38U;
constexpr uint8_t MAXLEGACYWIDTH = 8U;
constexpr uint8_t MAXSCALEINC4AVG = 4U;
constexpr int8_t IGNOREPRECISION = -1;
const uint64_t mcs_pow_10[20] =
{
@ -159,12 +162,13 @@ class Decimal
@brief Convenience method to put decimal into a std::string.
*/
static std::string toString(execplan::IDB_Decimal& value);
static std::string toString(const execplan::IDB_Decimal& value);
/**
@brief The method detects whether decimal type is wide
using csc data type.
*/
static constexpr inline bool isWideDecimalType(const execplan::CalpontSystemCatalog::ColType& ct)
static constexpr inline bool isWideDecimalType(const ColTypeAlias& ct)
{
return ((ct.colDataType == execplan::CalpontSystemCatalog::DECIMAL ||
ct.colDataType == execplan::CalpontSystemCatalog::UDECIMAL) &&
@ -185,7 +189,7 @@ class Decimal
@brief The method sets the legacy scale and precision of a wide decimal
column which is the result of an arithmetic operation.
*/
static inline void setDecimalScalePrecisionLegacy(execplan::CalpontSystemCatalog::ColType& ct,
static inline void setDecimalScalePrecisionLegacy(ColTypeAlias& ct,
unsigned int precision, unsigned int scale)
{
ct.scale = scale;
@ -200,7 +204,7 @@ class Decimal
@brief The method sets the scale and precision of a wide decimal
column which is the result of an arithmetic operation.
*/
static inline void setDecimalScalePrecision(execplan::CalpontSystemCatalog::ColType& ct,
static inline void setDecimalScalePrecision(ColTypeAlias& ct,
unsigned int precision, unsigned int scale)
{
ct.colWidth = (precision > INT64MAXPRECISION)
@ -216,7 +220,7 @@ class Decimal
@brief The method sets the scale and precision of a wide decimal
column which is the result of an arithmetic operation, based on a heuristic.
*/
static inline void setDecimalScalePrecisionHeuristic(execplan::CalpontSystemCatalog::ColType& ct,
static inline void setDecimalScalePrecisionHeuristic(ColTypeAlias& ct,
unsigned int precision, unsigned int scale)
{
unsigned int diff = 0;
@ -337,6 +341,19 @@ class Decimal
return static_cast<int64_t>(value);
}
/**
@brief MDB increases scale by up to 4 digits calculating avg()
*/
static inline void setScalePrecision4Avg(
unsigned int& precision,
unsigned int& scale)
{
uint32_t scaleAvailable = INT128MAXPRECISION - scale;
uint32_t precisionAvailable = INT128MAXPRECISION - precision;
scale += (scaleAvailable >= MAXSCALEINC4AVG) ? MAXSCALEINC4AVG : scaleAvailable;
precision += (precisionAvailable >= MAXSCALEINC4AVG) ? MAXSCALEINC4AVG : precisionAvailable;
}
};
/**

View File

@ -343,6 +343,41 @@ string keyName(uint64_t i, uint32_t key, const joblist::JobInfo& jobInfo)
namespace joblist
{
void wideDecimalOrLongDouble(const uint64_t colProj,
const CalpontSystemCatalog::ColDataType type,
const vector<uint32_t>& precisionProj,
const vector<uint32_t>& oidsProj,
const uint32_t aggKey,
const vector<uint32_t>& scaleProj,
const vector<uint32_t>& width,
vector<uint32_t>& oidsAgg,
vector<uint32_t>& keysAgg,
vector<CalpontSystemCatalog::ColDataType>& typeAgg,
vector<uint32_t>& scaleAgg,
vector<uint32_t>& precisionAgg,
vector<uint32_t>& widthAgg)
{
if ((type == CalpontSystemCatalog::DECIMAL
|| type == CalpontSystemCatalog::UDECIMAL)
&& datatypes::Decimal::isWideDecimalType(precisionProj[colProj]))
{
oidsAgg.push_back(oidsProj[colProj]);
keysAgg.push_back(aggKey);
typeAgg.push_back(type);
scaleAgg.push_back(scaleProj[colProj]);
precisionAgg.push_back(precisionProj[colProj]);
widthAgg.push_back(width[colProj]);
}
else
{
oidsAgg.push_back(oidsProj[colProj]);
keysAgg.push_back(aggKey);
typeAgg.push_back(CalpontSystemCatalog::LONGDOUBLE);
scaleAgg.push_back(0);
precisionAgg.push_back(-1);
widthAgg.push_back(sizeof(long double));
}
}
TupleAggregateStep::TupleAggregateStep(
const SP_ROWAGG_UM_t& agg,
@ -717,25 +752,47 @@ void TupleAggregateStep::configDeliveredRowGroup(const JobInfo& jobInfo)
// correct the scale
vector<uint32_t> scale = fRowGroupOut.getScale();
vector<uint32_t> precision = fRowGroupOut.getPrecision();
// for (uint64_t i = 0; i < scale.size(); i++)
// {
// to support CNX_DECIMAL_SCALE the avg column's scale is coded with two scales:
// fe's avg column scale << 8 + original column scale
//if ((scale[i] & 0x0000FF00) > 0)
// scale[i] = scale[i] & 0x000000FF;
// }
size_t retColCount = jobInfo.nonConstDelCols.size();
size_t retColCount = 0;
auto scaleIter = scale.begin();
auto precisionIter = precision.begin();
if (jobInfo.havingStep)
{
retColCount = jobInfo.returnedColVec.size();
idbassert(jobInfo.returnedColVec.size() == jobInfo.nonConstCols.size());
for (auto& rc : jobInfo.nonConstCols)
{
auto& colType = rc->resultType();
if (datatypes::Decimal::isWideDecimalType(colType))
{
*scaleIter = colType.scale;
*precisionIter = colType.precision;
}
scaleIter++; precisionIter++;
}
}
else
{
retColCount = jobInfo.nonConstDelCols.size();
for (auto& rc : jobInfo.nonConstDelCols)
{
auto& colType = rc->resultType();
if (datatypes::Decimal::isWideDecimalType(colType))
{
*scaleIter = colType.scale;
*precisionIter = colType.precision;
}
scaleIter++; precisionIter++;
}
}
vector<uint32_t>::const_iterator offsets0 = fRowGroupOut.getOffsets().begin();
vector<CalpontSystemCatalog::ColDataType>::const_iterator types0 =
fRowGroupOut.getColTypes().begin();
vector<uint32_t> csNums = fRowGroupOut.getCharsetNumbers();
vector<uint32_t>::const_iterator precision0 = fRowGroupOut.getPrecision().begin();
vector<uint32_t>::const_iterator precision0 = precision.begin();
fRowGroupDelivered = RowGroup(retColCount,
vector<uint32_t>(offsets0, offsets0 + retColCount + 1),
vector<uint32_t>(oids.begin(), oids.begin() + retColCount),
@ -896,7 +953,6 @@ SJSTEP TupleAggregateStep::prepAggregate(SJSTEP& step, JobInfo& jobInfo)
// preprocess the columns used by group_concat
jobInfo.groupConcatInfo.prepGroupConcat(jobInfo);
bool doUMOnly = jobInfo.groupConcatInfo.columns().size() > 0
// || jobInfo.windowSet.size() > 0
|| sas
|| ces;
@ -1303,14 +1359,11 @@ void TupleAggregateStep::prep1PhaseAggregate(
cerr << "prep1PhaseAggregate: " << emsg << endl;
throw IDBExcept(emsg, ERR_AGGREGATE_TYPE_NOT_SUPPORT);
}
oidsAgg.push_back(oidsProj[colProj]);
keysAgg.push_back(key);
typeAgg.push_back(CalpontSystemCatalog::LONGDOUBLE);
wideDecimalOrLongDouble(colProj, typeProj[colProj],
precisionProj, oidsProj, key, scaleProj, width,
oidsAgg, keysAgg, typeAgg, scaleAgg,
precisionAgg, widthAgg);
csNumAgg.push_back(csNumProj[colProj]);
precisionAgg.push_back(-1);
widthAgg.push_back(sizeof(long double));
scaleAgg.push_back(0);
}
break;
@ -1755,11 +1808,6 @@ void TupleAggregateStep::prep1PhaseDistinctAggregate(
throw logic_error(emsg.str());
}
// skip sum / count(column) if avg is also selected
// if ((aggOp == ROWAGG_SUM || aggOp == ROWAGG_COUNT_COL_NAME) &&
// (avgSet.find(aggKey) != avgSet.end()))
// continue;
if (aggOp == ROWAGG_DISTINCT_SUM ||
aggOp == ROWAGG_DISTINCT_AVG ||
aggOp == ROWAGG_COUNT_DISTINCT_COL_NAME)
@ -3128,31 +3176,11 @@ void TupleAggregateStep::prep2PhasesAggregate(
cerr << "prep2PhasesAggregate: " << emsg << endl;
throw IDBExcept(emsg, ERR_AGGREGATE_TYPE_NOT_SUPPORT);
}
// WIP MCOL-641 Replace condition with a
// dynamic one
if (typeProj[colProj] == CalpontSystemCatalog::DECIMAL
&& width[colProj] == 16)
{
oidsAggPm.push_back(oidsProj[colProj]);
keysAggPm.push_back(aggKey);
typeAggPm.push_back(CalpontSystemCatalog::DECIMAL);
wideDecimalOrLongDouble(colProj, typeProj[colProj],
precisionProj, oidsProj, aggKey, scaleProj, width,
oidsAggPm, keysAggPm, typeAggPm, scaleAggPm,
precisionAggPm, widthAggPm);
scaleAggPm.push_back(0);
// WIP makes this dynamic
precisionAggPm.push_back(38);
widthAggPm.push_back(width[colProj]);
csNumAggPm.push_back(8);
}
else
{
oidsAggPm.push_back(oidsProj[colProj]);
keysAggPm.push_back(aggKey);
typeAggPm.push_back(CalpontSystemCatalog::LONGDOUBLE);
scaleAggPm.push_back(0);
csNumAggPm.push_back(8);
precisionAggPm.push_back(-1);
widthAggPm.push_back(sizeof(long double));
}
colAggPm++;
}
@ -3435,13 +3463,11 @@ void TupleAggregateStep::prep2PhasesAggregate(
if (aggOp == ROWAGG_SUM)
{
oidsAggUm.push_back(oidsAggPm[colPm]);
keysAggUm.push_back(retKey);
scaleAggUm.push_back(0);
typeAggUm.push_back(CalpontSystemCatalog::LONGDOUBLE);
wideDecimalOrLongDouble(colPm, typeProj[colPm],
precisionProj, oidsProj, retKey, scaleProj, widthAggPm,
oidsAggUm, keysAggUm, typeAggUm, scaleAggUm,
precisionAggUm, widthAggUm);
csNumAggUm.push_back(8);
precisionAggUm.push_back(-1);
widthAggUm.push_back(sizeof(long double));
}
else
{

View File

@ -3621,7 +3621,8 @@ ArithmeticColumn* buildArithmeticColumn(
unsigned int precision = idp->max_length;
unsigned int scale = idp->decimals;
datatypes::Decimal::setDecimalScalePrecisionLegacy(mysql_type, precision, scale);
datatypes::Decimal::setDecimalScalePrecisionLegacy(mysql_type,
precision, scale);
}
else
{
@ -3638,7 +3639,8 @@ ArithmeticColumn* buildArithmeticColumn(
int32_t scale2 = pt->right()->data()->resultType().scale;
if (funcName == "/" &&
(mysql_type.scale - (scale1 - scale2)) > datatypes::INT128MAXPRECISION)
(mysql_type.scale - (scale1 - scale2)) >
datatypes::INT128MAXPRECISION)
{
Item_decimal* idp = (Item_decimal*)item;
@ -4980,15 +4982,33 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
// use the first parm for result type.
parm = ac->aggParms()[0];
// WIP why do we use LONGDOUBLE for AVG?
if (isp->sum_func() == Item_sum::AVG_FUNC ||
isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC)
bool isAvg = (isp->sum_func() == Item_sum::AVG_FUNC ||
isp->sum_func() == Item_sum::AVG_DISTINCT_FUNC);
if (isAvg || isp->sum_func() == Item_sum::SUM_FUNC ||
isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC)
{
CalpontSystemCatalog::ColType ct = parm->resultType();
if (datatypes::Decimal::isWideDecimalType(ct))
{
uint32_t precision = ct.precision;
uint32_t scale = ct.scale;
if (isAvg)
{
datatypes::Decimal::setScalePrecision4Avg(precision, scale);
}
ct.precision = precision;
ct.scale = scale;
}
else
{
ct.colDataType = CalpontSystemCatalog::LONGDOUBLE;
ct.colWidth = sizeof(long double);
ct.scale += 4;
ct.precision = -1;
if (isAvg)
{
ct.scale += datatypes::MAXSCALEINC4AVG;
}
ct.precision = datatypes::IGNOREPRECISION;
}
ac->resultType(ct);
}
else if (isp->sum_func() == Item_sum::COUNT_FUNC ||
@ -5000,25 +5020,6 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
ct.scale = parm->resultType().scale;
ac->resultType(ct);
}
else if (isp->sum_func() == Item_sum::SUM_FUNC ||
isp->sum_func() == Item_sum::SUM_DISTINCT_FUNC)
{
// WIP MCOL-641 This fast hack breaks aggregates for
// all float DT's
// UPD it doesn't break b/c actual DT for result type
// is set during JobList creation.
/*CalpontSystemCatalog::ColType ct = parm->resultType();
ct.colDataType = CalpontSystemCatalog::LONGDOUBLE;
ct.colWidth = sizeof(long double);
ct.precision = -1;*/
CalpontSystemCatalog::ColType ct = parm->resultType();
ct.colDataType = CalpontSystemCatalog::DECIMAL;
ct.colWidth = 16;
ct.precision = 38;
// WIP set the scale if argument is a float-based DT
ct.scale = 0;
ac->resultType(ct);
}
else if (isp->sum_func() == Item_sum::STD_FUNC ||
isp->sum_func() == Item_sum::VARIANCE_FUNC)
{
@ -5058,7 +5059,11 @@ ReturnedColumn* buildAggregateColumn(Item* item, gp_walk_info& gwi)
}
// adjust decimal result type according to internalDecimalScale
if (gwi.internalDecimalScale >= 0 && ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL)
bool isWideDecimal =
datatypes::Decimal::isWideDecimalType(ac->resultType());
// This must be also valid for UDECIMAL
if (!isWideDecimal && gwi.internalDecimalScale >= 0
&& ac->resultType().colDataType == CalpontSystemCatalog::DECIMAL)
{
CalpontSystemCatalog::ColType ct = ac->resultType();
ct.scale = gwi.internalDecimalScale;

View File

@ -1100,7 +1100,7 @@ extern "C"
mapit = partMap.find(logicalPartNum);
int state;
int state = CP_INVALID;
if (ct.colWidth <= 8)
state = em.getExtentMaxMin(iter->range.start, partInfo.max, partInfo.min, seqNum);

View File

@ -1127,7 +1127,7 @@ void TupleJoiner::updateCPData(const Row& r)
}
else
{
int64_t val;
int64_t val = 0;
if (r.getColType(colIdx) == CalpontSystemCatalog::LONGDOUBLE)
{
double dval = (double)roundl(r.getLongDoubleField(colIdx));

View File

@ -1,6 +1,6 @@
/*
Copyright (C) 2014 InfiniDB, Inc.
Copyright (c) 2019 MariaDB Corporation
Copyright (c) 2019-2020 MariaDB Corporation
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
@ -57,6 +57,8 @@
//..comment out NDEBUG to enable assertions, uncomment NDEBUG to disable
//#define NDEBUG
#include "funcexp/utils_utf8.h"
#include "mcs_decimal.h"
using namespace std;
using namespace boost;
@ -70,12 +72,16 @@ namespace
const int64_t AGG_ROWGROUP_SIZE = 256;
template <typename T>
bool minMax(T d1, T d2, int type)
inline bool minMax(T d1, T d2, int type)
{
if (type == rowgroup::ROWAGG_MIN) return d1 < d2;
else return d1 > d2;
}
inline bool minMax(int128_t* d1, int128_t* d2, int type)
{
return (type == rowgroup::ROWAGG_MIN) ? *d1 < *d2 : *d1 > *d2;
}
inline int64_t getIntNullValue(int colType)
{
@ -334,6 +340,16 @@ inline bool ExternalKeyEq::operator()(const RowPosition& pos1, const RowPosition
static const string overflowMsg("Aggregation overflow.");
inline void RowAggregation::updateIntMinMax(int128_t* val1, int128_t* val2, int64_t col, int func)
{
int32_t colOutOffset = fRow.getOffset(col);
if (isNull(fRowGroupOut, fRow, col))
fRow.setBinaryField_offset(val1, sizeof(int128_t), colOutOffset);
else if (minMax(val1, val2, func))
fRow.setBinaryField_offset(val1, sizeof(int128_t), colOutOffset);
}
inline void RowAggregation::updateIntMinMax(int64_t val1, int64_t val2, int64_t col, int func)
{
if (isNull(fRowGroupOut, fRow, col))
@ -1010,13 +1026,31 @@ void RowAggregation::initMapData(const Row& rowIn)
case execplan::CalpontSystemCatalog::MEDINT:
case execplan::CalpontSystemCatalog::INT:
case execplan::CalpontSystemCatalog::BIGINT:
case execplan::CalpontSystemCatalog::DECIMAL:
case execplan::CalpontSystemCatalog::UDECIMAL:
{
fRow.setIntField(rowIn.getIntField(colIn), colOut);
break;
}
case execplan::CalpontSystemCatalog::DECIMAL:
case execplan::CalpontSystemCatalog::UDECIMAL:
{
if (LIKELY(fRow.getColumnWidth(colIn) == datatypes::MAXDECIMALWIDTH))
{
uint32_t colOutOffset = fRow.getOffset(colOut);
fRow.setBinaryField_offset(
rowIn.getBinaryField<int128_t>(colIn),
sizeof(int128_t),
colOutOffset);
}
else
{
fRow.setIntField(rowIn.getIntField(colIn), colOut);
}
break;
}
case execplan::CalpontSystemCatalog::UTINYINT:
case execplan::CalpontSystemCatalog::USMALLINT:
case execplan::CalpontSystemCatalog::UMEDINT:
@ -1113,8 +1147,6 @@ void RowAggregation::makeAggFieldsNull(Row& row)
fFunctionCols[i]->fAggFunction == ROWAGG_GROUP_CONCAT ||
fFunctionCols[i]->fAggFunction == ROWAGG_STATS)
{
// done by memset
// row.setIntField(0, colOut);
continue;
}
@ -1160,18 +1192,18 @@ void RowAggregation::makeAggFieldsNull(Row& row)
case execplan::CalpontSystemCatalog::UDECIMAL:
{
int colWidth = fRowGroupOut->getColumnWidth(colOut);
if (colWidth <= 8)
if (LIKELY(colWidth == datatypes::MAXDECIMALWIDTH))
{
uint32_t offset = row.getOffset(colOut);
row.setBinaryField_offset(
const_cast<int128_t*>(&datatypes::Decimal128Null),
colWidth,
offset);
}
else if (colWidth == datatypes::MAXLEGACYWIDTH)
{
row.setIntField(getUintNullValue(colDataType, colWidth), colOut);
}
else
{
int128_t nullValue = 0;
utils::setWideDecimalNullValue(nullValue);
uint32_t offset = row.getOffset(colOut);
row.setBinaryField_offset(&nullValue, sizeof(nullValue),
offset);
}
break;
}
@ -1183,7 +1215,7 @@ void RowAggregation::makeAggFieldsNull(Row& row)
{
int colWidth = fRowGroupOut->getColumnWidth(colOut);
if (colWidth <= 8)
if (colWidth <= datatypes::MAXLEGACYWIDTH)
{
row.setUintField(getUintNullValue(colDataType, colWidth), colOut);
}
@ -1256,8 +1288,6 @@ void RowAggregation::doMinMax(const Row& rowIn, int64_t colIn, int64_t colOut, i
case execplan::CalpontSystemCatalog::MEDINT:
case execplan::CalpontSystemCatalog::INT:
case execplan::CalpontSystemCatalog::BIGINT:
case execplan::CalpontSystemCatalog::DECIMAL:
case execplan::CalpontSystemCatalog::UDECIMAL:
{
int64_t valIn = rowIn.getIntField(colIn);
int64_t valOut = fRow.getIntField(colOut);
@ -1265,6 +1295,24 @@ void RowAggregation::doMinMax(const Row& rowIn, int64_t colIn, int64_t colOut, i
break;
}
case execplan::CalpontSystemCatalog::DECIMAL:
case execplan::CalpontSystemCatalog::UDECIMAL:
{
if (LIKELY(fRow.getColumnWidth(colIn) == datatypes::MAXDECIMALWIDTH))
{
updateIntMinMax(rowIn.getBinaryField<int128_t>(colIn),
fRow.getBinaryField<int128_t>(colOut),
colOut, funcType);
}
else
{
int64_t valIn = rowIn.getIntField(colIn);
int64_t valOut = fRow.getIntField(colOut);
updateIntMinMax(valIn, valOut, colOut, funcType);
}
break;
}
case execplan::CalpontSystemCatalog::UTINYINT:
case execplan::CalpontSystemCatalog::USMALLINT:
case execplan::CalpontSystemCatalog::UMEDINT:
@ -1340,17 +1388,12 @@ void RowAggregation::doMinMax(const Row& rowIn, int64_t colIn, int64_t colOut, i
// Note: NULL value check must be done on UM & PM
// UM may receive NULL values, too.
//------------------------------------------------------------------------------
// WIP MCOL-641. This and other methods must be type based to avoid needless mem
// allocation for wide DTs
void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int funcType)
{
int colDataType = (fRowGroupIn.getColTypes())[colIn];
long double valIn = 0;
bool isWideDataType = false;
void *wideValInPtr = NULL;
// WIP MCOL-641 Probably the width must be taken
// from colOut
uint32_t width = fRowGroupOut->getColumnWidth(colOut);
void *wideValInPtr = nullptr;
if (isNull(&fRowGroupIn, rowIn, colIn) == true)
return;
@ -1380,28 +1423,20 @@ void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int
case execplan::CalpontSystemCatalog::DECIMAL:
case execplan::CalpontSystemCatalog::UDECIMAL:
{
// WIP MCOL-641 make the size dynamic and use branch prediction cond
isWideDataType = (width) > 8 ? true : false;
if (!isWideDataType)
{
valIn = rowIn.getIntField(colIn);
double scale = (double)(fRowGroupIn.getScale())[colIn];
if (valIn != 0 && scale > 0)
{
valIn /= pow(10.0, scale);
}
}
else
{
if (colDataType == execplan::CalpontSystemCatalog::DECIMAL)
uint32_t width = fRowGroupOut->getColumnWidth(colOut);
isWideDataType = width == datatypes::MAXDECIMALWIDTH;
if(LIKELY(isWideDataType))
{
int128_t *dec = rowIn.getBinaryField<int128_t>(colIn);
wideValInPtr = reinterpret_cast<void*>(dec);
}
else
{
uint128_t *dec = rowIn.getBinaryField<uint128_t>(colIn);
wideValInPtr = reinterpret_cast<void*>(dec);
valIn = rowIn.getIntField(colIn);
double scale = (double)(fRowGroupIn.getScale())[colIn];
if (valIn != 0 && scale > 0)
{
valIn /= pow(10.0, scale);
}
}
@ -1455,50 +1490,32 @@ void RowAggregation::doSum(const Row& rowIn, int64_t colIn, int64_t colOut, int
break;
}
}
// WIP MCOL-641
if (!isWideDataType)
if (LIKELY(!isWideDataType))
{
if (isNull(fRowGroupOut, fRow, colOut))
{
fRow.setLongDoubleField(valIn, colOut);
}
else
if (LIKELY(!isNull(fRowGroupOut, fRow, colOut)))
{
long double valOut = fRow.getLongDoubleField(colOut);
fRow.setLongDoubleField(valIn+valOut, colOut);
}
else
{
fRow.setLongDoubleField(valIn, colOut);
}
}
else
{
uint32_t offset = fRow.getOffset(colOut);
if (colDataType == execplan::CalpontSystemCatalog::DECIMAL)
{
int128_t *dec = reinterpret_cast<int128_t*>(wideValInPtr);
if (isNull(fRowGroupOut, fRow, colOut))
{
fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
}
else
int128_t* dec = reinterpret_cast<int128_t*>(wideValInPtr);
if (LIKELY(!isNull(fRowGroupOut, fRow, colOut)))
{
int128_t *valOutPtr = fRow.getBinaryField(valOutPtr, colOut);
int128_t sum = *valOutPtr + *dec;
fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
}
}
else
{
uint128_t *dec = reinterpret_cast<uint128_t*>(wideValInPtr);
if (isNull(fRowGroupOut, fRow, colOut))
{
fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
}
else
{
uint128_t *valOutPtr = fRow.getBinaryField(valOutPtr, colOut);
uint128_t sum = *valOutPtr + *dec;
fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
}
}
} // end-of isWideDataType block
}
@ -1791,7 +1808,7 @@ void RowAggregation::updateEntry(const Row& rowIn)
case ROWAGG_AVG:
// count(column) for average is inserted after the sum,
// colOut+1 is the position of the count column.
// colOut+1 is the position of the aux count column.
doAvg(rowIn, colIn, colOut, colOut + 1);
break;
@ -1851,6 +1868,8 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
int colDataType = (fRowGroupIn.getColTypes())[colIn];
long double valIn = 0;
long double valOut = fRow.getLongDoubleField(colOut);
bool isWideDataType = false;
void *wideValInPtr = nullptr;
switch (colDataType)
{
@ -1862,7 +1881,6 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
{
valIn = rowIn.getIntField(colIn);
break;
break;
}
case execplan::CalpontSystemCatalog::UTINYINT:
@ -1877,6 +1895,15 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
case execplan::CalpontSystemCatalog::DECIMAL:
case execplan::CalpontSystemCatalog::UDECIMAL:
{
uint32_t width = fRowGroupOut->getColumnWidth(colOut);
isWideDataType = width == datatypes::MAXDECIMALWIDTH;
if(LIKELY(isWideDataType))
{
int128_t* dec = rowIn.getBinaryField<int128_t>(colIn);
wideValInPtr = reinterpret_cast<void*>(dec);
}
else
{
valIn = rowIn.getIntField(colIn);
double scale = (double)(fRowGroupIn.getScale())[colIn];
@ -1884,6 +1911,7 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
{
valIn /= pow(10.0, scale);
}
}
break;
}
@ -1917,16 +1945,32 @@ void RowAggregation::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut, int6
}
}
if (fRow.getUintField(colAux) == 0)
// min(count) = 0
uint64_t count = fRow.getUintField(colAux) + 1;
fRow.setUintField<8>(count, colAux);
bool notFirstValue = count > 1;
if (LIKELY(!isWideDataType))
{
// This is the first value
if (LIKELY(notFirstValue))
fRow.setLongDoubleField(valIn + valOut, colOut);
else // This is the first value
fRow.setLongDoubleField(valIn, colOut);
fRow.setUintField(1, colAux);
}
else
{
fRow.setLongDoubleField(valIn + valOut, colOut);
fRow.setUintField(fRow.getUintField(colAux) + 1, colAux);
uint32_t offset = fRow.getOffset(colOut);
int128_t* dec = reinterpret_cast<int128_t*>(wideValInPtr);
if (LIKELY(notFirstValue))
{
int128_t *valOutPtr = fRow.getBinaryField(valOutPtr, colOut);
int128_t sum = *valOutPtr + *dec;
fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
}
else
{
fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
}
}
}
@ -2632,11 +2676,6 @@ void RowAggregationUM::calculateAvgColumns()
int64_t colOut = fFunctionCols[i]->fOutputColumnIndex;
int64_t colAux = fFunctionCols[i]->fAuxColumnIndex;
// int scale = fRowGroupOut->getScale()[colOut];
// int scale1 = scale >> 8;
// int scale2 = scale & 0x000000FF;
// long double factor = pow(10.0, scale2 - scale1);
for (uint64_t j = 0; j < fRowGroupOut->getRowCount(); j++)
{
fRowGroupOut->getRow(j, &fRow);
@ -2645,15 +2684,39 @@ void RowAggregationUM::calculateAvgColumns()
if (cnt == 0) // empty set, value is initialized to null.
continue;
uint32_t precision = fRow.getPrecision(colOut);
bool isWideDecimal =
datatypes::Decimal::isWideDecimalType(precision);
if (LIKELY(!isWideDecimal))
{
long double sum = 0.0;
long double avg = 0.0;
// MCOL-1822 Always long double
sum = fRow.getLongDoubleField(colOut);
avg = sum / cnt;
// avg *= factor;
fRow.setLongDoubleField(avg, colOut);
}
else
{
uint32_t offset = fRow.getOffset(colOut);
uint32_t scale = fRow.getScale(colOut);
// Get multiplied to deliver AVG with the scale closest
// to the expected original scale + 4.
// There is a counterpart in buildAggregateColumn.
datatypes::Decimal::setScalePrecision4Avg(precision, scale);
int128_t* sumPnt = fRow.getBinaryField_offset<int128_t>(offset);
uint32_t scaleDiff = scale - fRow.getScale(colOut);
// multiplication overflow check
datatypes::MultiplicationOverflowCheck multOp;
int128_t sum = 0;
if (scaleDiff > 0)
multOp(*sumPnt, datatypes::mcs_pow_10[scaleDiff], sum);
else
sum = *sumPnt;
int128_t avg = sum / cnt;
fRow.setBinaryField_offset(&avg, sizeof(avg), offset);
}
}
}
}
}
@ -4174,6 +4237,8 @@ void RowAggregationUMP2::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut,
int colDataType = (fRowGroupIn.getColTypes())[colIn];
long double valIn = 0;
long double valOut = fRow.getLongDoubleField(colOut);
bool isWideDataType = false;
void *wideValInPtr = nullptr;
switch (colDataType)
{
@ -4199,14 +4264,23 @@ void RowAggregationUMP2::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut,
case execplan::CalpontSystemCatalog::DECIMAL:
case execplan::CalpontSystemCatalog::UDECIMAL:
{
uint32_t width = fRowGroupOut->getColumnWidth(colOut);
isWideDataType = width == datatypes::MAXDECIMALWIDTH;
if(LIKELY(isWideDataType))
{
int128_t* dec = rowIn.getBinaryField<int128_t>(colIn);
wideValInPtr = reinterpret_cast<void*>(dec);
}
else
{
valIn = rowIn.getIntField(colIn);
break;
double scale = (double)(fRowGroupIn.getScale())[colIn];
if (valIn != 0 && scale > 0)
{
valIn /= pow(10.0, scale);
}
}
break;
}
@ -4240,17 +4314,37 @@ void RowAggregationUMP2::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut,
}
}
int64_t cnt = fRow.getUintField(colAux);
if (cnt == 0)
uint64_t cnt = fRow.getUintField(colAux);
if (LIKELY(!isWideDataType))
{
if (LIKELY(cnt > 0))
{
fRow.setLongDoubleField(valIn + valOut, colOut);
fRow.setUintField(rowIn.getUintField(colIn + 1) + cnt, colAux);
}
else
{
fRow.setLongDoubleField(valIn, colOut);
fRow.setUintField(rowIn.getUintField(colIn + 1), colAux);
}
}
else
{
fRow.setLongDoubleField(valIn + valOut, colOut);
uint32_t offset = fRow.getOffset(colOut);
int128_t* dec = reinterpret_cast<int128_t*>(wideValInPtr);
if (LIKELY(cnt > 0))
{
int128_t *valOutPtr = fRow.getBinaryField(valOutPtr, colOut);
int128_t sum = *valOutPtr + *dec;
fRow.setBinaryField_offset(&sum, sizeof(sum), offset);
fRow.setUintField(rowIn.getUintField(colIn + 1) + cnt, colAux);
}
else
{
fRow.setBinaryField_offset(dec, sizeof(*dec), offset);
fRow.setUintField(rowIn.getUintField(colIn + 1), colAux);
}
}
}
//------------------------------------------------------------------------------

View File

@ -653,6 +653,7 @@ protected:
copyRow(fNullRow, &row);
}
inline void updateIntMinMax(int128_t* val1, int128_t* val2, int64_t col, int func);
inline void updateIntMinMax(int64_t val1, int64_t val2, int64_t col, int func);
inline void updateUintMinMax(uint64_t val1, uint64_t val2, int64_t col, int func);
inline void updateCharMinMax(uint64_t val1, uint64_t val2, int64_t col, int func);

View File

@ -810,25 +810,16 @@ inline uint32_t Row::getStringLength(uint32_t colIndex) const
return strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex));
}
// WIP Remove this
// Check whether memcpy affects perf here
/*inline void Row::setBinaryField(const uint8_t* strdata, uint32_t length, uint32_t offset)
{
memcpy(&data[offset], strdata, length);
}*/
// MCOL-641. This method can be applied to uint8_t* buffers.
template<typename T>
inline void Row::setBinaryField(T* value, uint32_t width, uint32_t colIndex)
{
memcpy(&data[offsets[colIndex]], value, width);
}
// MCOL-641. This method !cannot! be applied to uint8_t* buffers.
// This method !cannot! be applied to uint8_t* buffers.
template<typename T>
inline void Row::setBinaryField_offset(T* value, uint32_t width, uint32_t offset)
{
// WIP Compare performance.
*reinterpret_cast<T*>(&data[offset]) = *value;
}
@ -871,23 +862,15 @@ inline std::string Row::getStringField(uint32_t colIndex) const
strnlen((char*) &data[offsets[colIndex]], getColumnWidth(colIndex)));
}
/*inline std::string Row::getBinaryField(uint32_t colIndex) const
{
return std::string((char*) &data[offsets[colIndex]], getColumnWidth(colIndex));
}*/
// WIP MCOL-641
template <typename T>
inline T* Row::getBinaryField(uint32_t colIndex) const
{
//return reinterpret_cast<T*>(&data[offsets[colIndex]]);
return getBinaryField_offset<T>(offsets[colIndex]);
}
template <typename T>
inline T* Row::getBinaryField(T* argtype, uint32_t colIndex) const
{
//return reinterpret_cast<T*>(&data[offsets[colIndex]]);
return getBinaryField_offset<T>(offsets[colIndex]);
}