You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
Reapply "fix(aggregation, disk-based) MCOL-5691 distinct aggregate disk based (#3145)"
This reverts commit a5c12b98d7
.
This commit is contained in:
committed by
drrtuy
parent
5e5d328269
commit
e0a01c6cf4
@ -57,7 +57,7 @@
|
||||
#include "rowstorage.h"
|
||||
|
||||
//..comment out NDEBUG to enable assertions, uncomment NDEBUG to disable
|
||||
//#define NDEBUG
|
||||
// #define NDEBUG
|
||||
#include "mcs_decimal.h"
|
||||
|
||||
using namespace std;
|
||||
@ -315,7 +315,7 @@ void RowAggregation::updateStringMinMax(utils::NullString val1, utils::NullStrin
|
||||
if (val1.isNull())
|
||||
{
|
||||
// as any comparison with NULL is false, it should not affect min/max ranges.
|
||||
return ; // do nothing.
|
||||
return; // do nothing.
|
||||
}
|
||||
CHARSET_INFO* cs = fRow.getCharset(col);
|
||||
int tmp = cs->strnncoll(val1.str(), val1.length(), val2.str(), val2.length());
|
||||
@ -810,8 +810,9 @@ void RowAggregation::aggregateRow(Row& row, const uint64_t* hash,
|
||||
std::vector<mcsv1sdk::mcsv1Context>* rgContextColl)
|
||||
{
|
||||
uint32_t cnt = fRollupFlag ? fGroupByCols.size() : 1;
|
||||
for (uint32_t z = 0; z < cnt; z++) {
|
||||
// groupby column list is not empty, find the entry.
|
||||
for (uint32_t z = 0; z < cnt; z++)
|
||||
{
|
||||
// groupby column list is not empty, find the entry.
|
||||
if (!fGroupByCols.empty())
|
||||
{
|
||||
bool is_new_row;
|
||||
@ -856,7 +857,8 @@ void RowAggregation::aggregateRow(Row& row, const uint64_t* hash,
|
||||
updateEntry(row, rgContextColl);
|
||||
// these quantities are unsigned and comparing z and cnt - 1 can be incorrect
|
||||
// because cnt can be zero.
|
||||
if ((z + 1 < cnt)) {
|
||||
if ((z + 1 < cnt))
|
||||
{
|
||||
// if we are rolling up, we mark appropriate field as NULL and also increment
|
||||
// value in the "mark" column, so that we can differentiate between data and
|
||||
// various rollups.
|
||||
@ -1169,8 +1171,8 @@ void RowAggregation::doMinMax(const Row& rowIn, int64_t colIn, int64_t colOut, i
|
||||
{
|
||||
if (LIKELY(rowIn.getColumnWidth(colIn) == datatypes::MAXDECIMALWIDTH))
|
||||
{
|
||||
updateIntMinMax(rowIn.getTSInt128Field(colIn).getValue(), fRow.getTSInt128Field(colOut).getValue(), colOut,
|
||||
funcType);
|
||||
updateIntMinMax(rowIn.getTSInt128Field(colIn).getValue(), fRow.getTSInt128Field(colOut).getValue(),
|
||||
colOut, funcType);
|
||||
}
|
||||
else if (rowIn.getColumnWidth(colIn) <= datatypes::MAXLEGACYWIDTH)
|
||||
{
|
||||
@ -2120,10 +2122,9 @@ void RowAggregation::doStatistics(const Row& rowIn, int64_t colIn, int64_t colOu
|
||||
long double mean = fRow.getLongDoubleField(colAux);
|
||||
long double scaledMomentum2 = fRow.getLongDoubleField(colAux + 1);
|
||||
volatile long double delta = valIn - mean;
|
||||
mean += delta/count;
|
||||
mean += delta / count;
|
||||
scaledMomentum2 += delta * (valIn - mean);
|
||||
|
||||
|
||||
fRow.setDoubleField(count, colOut);
|
||||
fRow.setLongDoubleField(mean, colAux);
|
||||
fRow.setLongDoubleField(scaledMomentum2, colAux + 1);
|
||||
@ -2173,8 +2174,7 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut, int
|
||||
cc = dynamic_cast<execplan::ConstantColumn*>(fFunctionCols[funcColsIdx]->fpConstCol.get());
|
||||
}
|
||||
|
||||
if ((cc && cc->isNull()) ||
|
||||
(!cc && isNull(&fRowGroupIn, rowIn, colIn) == true))
|
||||
if ((cc && cc->isNull()) || (!cc && isNull(&fRowGroupIn, rowIn, colIn) == true))
|
||||
{
|
||||
if (udafContextsColl[origFuncColsIdx].getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS))
|
||||
{
|
||||
@ -2500,7 +2500,8 @@ void RowAggregation::loadEmptySet(messageqcpp::ByteStream& bs)
|
||||
//------------------------------------------------------------------------------
|
||||
RowAggregationUM::RowAggregationUM(const vector<SP_ROWAGG_GRPBY_t>& rowAggGroupByCols,
|
||||
const vector<SP_ROWAGG_FUNC_t>& rowAggFunctionCols,
|
||||
joblist::ResourceManager* r, boost::shared_ptr<int64_t> sessionLimit, bool withRollup)
|
||||
joblist::ResourceManager* r, boost::shared_ptr<int64_t> sessionLimit,
|
||||
bool withRollup)
|
||||
: RowAggregation(rowAggGroupByCols, rowAggFunctionCols, r, sessionLimit, withRollup)
|
||||
, fHasAvg(false)
|
||||
, fHasStatsFunc(false)
|
||||
@ -3228,7 +3229,7 @@ void RowAggregationUM::SetUDAFAnyValue(static_any::any& valOut, int64_t colOut)
|
||||
|
||||
case execplan::CalpontSystemCatalog::CHAR:
|
||||
case execplan::CalpontSystemCatalog::VARCHAR:
|
||||
case execplan::CalpontSystemCatalog::TEXT: fRow.setStringField(strOut, colOut); break;
|
||||
case execplan::CalpontSystemCatalog::TEXT: fRow.setStringField(strOut, colOut); break;
|
||||
|
||||
case execplan::CalpontSystemCatalog::VARBINARY:
|
||||
case execplan::CalpontSystemCatalog::CLOB:
|
||||
@ -4220,13 +4221,26 @@ bool RowAggregationUM::nextRowGroup()
|
||||
return more;
|
||||
}
|
||||
|
||||
bool RowAggregationUM::nextOutputRowGroup()
|
||||
{
|
||||
bool more = fRowAggStorage->getNextOutputRGData(fCurRGData);
|
||||
|
||||
if (more)
|
||||
{
|
||||
fRowGroupOut->setData(fCurRGData.get());
|
||||
}
|
||||
|
||||
return more;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Row Aggregation constructor used on UM
|
||||
// For 2nd phase of two-phase case, from partial RG to final aggregated RG
|
||||
//------------------------------------------------------------------------------
|
||||
RowAggregationUMP2::RowAggregationUMP2(const vector<SP_ROWAGG_GRPBY_t>& rowAggGroupByCols,
|
||||
const vector<SP_ROWAGG_FUNC_t>& rowAggFunctionCols,
|
||||
joblist::ResourceManager* r, boost::shared_ptr<int64_t> sessionLimit, bool withRollup)
|
||||
joblist::ResourceManager* r, boost::shared_ptr<int64_t> sessionLimit,
|
||||
bool withRollup)
|
||||
: RowAggregationUM(rowAggGroupByCols, rowAggFunctionCols, r, sessionLimit, withRollup)
|
||||
{
|
||||
}
|
||||
@ -4450,7 +4464,7 @@ void RowAggregationUMP2::doAvg(const Row& rowIn, int64_t colIn, int64_t colOut,
|
||||
{
|
||||
if (LIKELY(cnt > 0))
|
||||
{
|
||||
int128_t valOut = fRow.getTSInt128Field(colOut).getValue();;
|
||||
int128_t valOut = fRow.getTSInt128Field(colOut).getValue();
|
||||
int128_t sum = valOut + wideValue;
|
||||
fRow.setInt128Field(sum, colOut);
|
||||
fRow.setUintField(rowIn.getUintField(colAuxIn) + cnt, colAux);
|
||||
@ -4509,7 +4523,8 @@ void RowAggregationUMP2::doStatistics(const Row& rowIn, int64_t colIn, int64_t c
|
||||
{
|
||||
volatile long double delta = mean - blockMean;
|
||||
nextMean = (mean * count + blockMean * blockCount) / nextCount;
|
||||
nextScaledMomentum2 = scaledMomentum2 + blockScaledMomentum2 + delta * delta * (count * blockCount / nextCount);
|
||||
nextScaledMomentum2 =
|
||||
scaledMomentum2 + blockScaledMomentum2 + delta * delta * (count * blockCount / nextCount);
|
||||
}
|
||||
fRow.setDoubleField(nextCount, colOut);
|
||||
fRow.setLongDoubleField(nextMean, colAux);
|
||||
@ -4682,19 +4697,29 @@ void RowAggregationDistinct::addRowGroup(const RowGroup* pRows,
|
||||
//------------------------------------------------------------------------------
|
||||
void RowAggregationDistinct::doDistinctAggregation()
|
||||
{
|
||||
while (dynamic_cast<RowAggregationUM*>(fAggregator.get())->nextRowGroup())
|
||||
auto* umAggregator = dynamic_cast<RowAggregationUM*>(fAggregator.get());
|
||||
if (umAggregator)
|
||||
{
|
||||
fRowGroupIn.setData(fAggregator->getOutputRowGroup()->getRGData());
|
||||
|
||||
Row rowIn;
|
||||
fRowGroupIn.initRow(&rowIn);
|
||||
fRowGroupIn.getRow(0, &rowIn);
|
||||
|
||||
for (uint64_t i = 0; i < fRowGroupIn.getRowCount(); ++i, rowIn.nextRow())
|
||||
while (umAggregator->nextOutputRowGroup())
|
||||
{
|
||||
aggregateRow(rowIn);
|
||||
fRowGroupIn.setData(fAggregator->getOutputRowGroup()->getRGData());
|
||||
|
||||
Row rowIn;
|
||||
fRowGroupIn.initRow(&rowIn);
|
||||
fRowGroupIn.getRow(0, &rowIn);
|
||||
|
||||
for (uint64_t i = 0; i < fRowGroupIn.getRowCount(); ++i, rowIn.nextRow())
|
||||
{
|
||||
aggregateRow(rowIn);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::ostringstream errmsg;
|
||||
errmsg << "RowAggregationDistinct: incorrect fAggregator class.";
|
||||
cerr << errmsg.str() << endl;
|
||||
}
|
||||
}
|
||||
|
||||
void RowAggregationDistinct::doDistinctAggregation_rowVec(vector<std::pair<Row::Pointer, uint64_t>>& inRows)
|
||||
|
Reference in New Issue
Block a user