1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-30 19:23:07 +03:00

MCOL-1793, fix REGR_SLOPE calculations, change scale to DECIMAL_NOT_SPECIFIED (variable length) for most REGR_*** functions.

This commit is contained in:
David Hall
2018-11-20 16:29:17 -06:00
parent 06d1c9fcbe
commit 9c1bc910a5
11 changed files with 44 additions and 39 deletions

View File

@ -61,8 +61,8 @@ mcsv1_UDAF::ReturnCode corr::init(mcsv1Context* context,
context->setUserDataSize(sizeof(corr_data)); context->setUserDataSize(sizeof(corr_data));
context->setResultType(CalpontSystemCatalog::DOUBLE); context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8); context->setColWidth(8);
context->setScale(colTypes[0].scale + 8); context->setScale(DECIMAL_NOT_SPECIFIED);
context->setPrecision(19); context->setPrecision(0);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS; return mcsv1_UDAF::SUCCESS;

View File

@ -59,8 +59,8 @@ mcsv1_UDAF::ReturnCode covar_pop::init(mcsv1Context* context,
context->setUserDataSize(sizeof(covar_pop_data)); context->setUserDataSize(sizeof(covar_pop_data));
context->setResultType(CalpontSystemCatalog::DOUBLE); context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8); context->setColWidth(8);
context->setScale(colTypes[0].scale + 8); context->setScale(DECIMAL_NOT_SPECIFIED);
context->setPrecision(19); context->setPrecision(0);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS; return mcsv1_UDAF::SUCCESS;

View File

@ -59,8 +59,8 @@ mcsv1_UDAF::ReturnCode covar_samp::init(mcsv1Context* context,
context->setUserDataSize(sizeof(covar_samp_data)); context->setUserDataSize(sizeof(covar_samp_data));
context->setResultType(CalpontSystemCatalog::DOUBLE); context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8); context->setColWidth(8);
context->setScale(colTypes[0].scale + 8); context->setScale(DECIMAL_NOT_SPECIFIED);
context->setPrecision(19); context->setPrecision(0);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS; return mcsv1_UDAF::SUCCESS;
@ -136,7 +136,7 @@ mcsv1_UDAF::ReturnCode covar_samp::evaluate(mcsv1Context* context, static_any::a
{ {
struct covar_samp_data* data = (struct covar_samp_data*)context->getUserData()->data; struct covar_samp_data* data = (struct covar_samp_data*)context->getUserData()->data;
double N = data->cnt; double N = data->cnt;
if (N > 0) if (N > 1)
{ {
double sumx = data->sumx; double sumx = data->sumx;
double sumy = data->sumy; double sumy = data->sumy;
@ -145,6 +145,11 @@ mcsv1_UDAF::ReturnCode covar_samp::evaluate(mcsv1Context* context, static_any::a
double covar_samp = (sumxy - ((sumx * sumy) / N)) / (N - 1); double covar_samp = (sumxy - ((sumx * sumy) / N)) / (N - 1);
valOut = covar_samp; valOut = covar_samp;
} }
else
if (N == 1)
{
valOut = 0;
}
return mcsv1_UDAF::SUCCESS; return mcsv1_UDAF::SUCCESS;
} }

View File

@ -60,8 +60,8 @@ mcsv1_UDAF::ReturnCode regr_intercept::init(mcsv1Context* context,
context->setUserDataSize(sizeof(regr_intercept_data)); context->setUserDataSize(sizeof(regr_intercept_data));
context->setResultType(CalpontSystemCatalog::DOUBLE); context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8); context->setColWidth(8);
context->setScale(colTypes[0].scale + 8); context->setScale(DECIMAL_NOT_SPECIFIED);
context->setPrecision(19); context->setPrecision(0);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS; return mcsv1_UDAF::SUCCESS;
@ -145,13 +145,13 @@ mcsv1_UDAF::ReturnCode regr_intercept::evaluate(mcsv1Context* context, static_an
double sumy = data->sumy; double sumy = data->sumy;
double sumx2 = data->sumx2; double sumx2 = data->sumx2;
double sumxy = data->sumxy; double sumxy = data->sumxy;
double slope = 0.0; double slope = 0;
double variance = (N * sumx2) - (sumx * sumx); double variance = (N * sumx2) - (sumx * sumx);
if (variance != 0) if (variance != 0)
{ {
slope = ((N * sumxy) - (sumx * sumy)) / variance; slope = ((N * sumxy) - (sumx * sumy)) / variance;
valOut = (sumy - (slope * sumx)) / N;
} }
valOut = (sumy - (slope * sumx)) / N;
} }
return mcsv1_UDAF::SUCCESS; return mcsv1_UDAF::SUCCESS;
} }

View File

@ -61,8 +61,8 @@ mcsv1_UDAF::ReturnCode regr_r2::init(mcsv1Context* context,
context->setUserDataSize(sizeof(regr_r2_data)); context->setUserDataSize(sizeof(regr_r2_data));
context->setResultType(CalpontSystemCatalog::DOUBLE); context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8); context->setColWidth(8);
context->setScale(colTypes[0].scale + 8); context->setScale(DECIMAL_NOT_SPECIFIED);
context->setPrecision(19); context->setPrecision(0);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS; return mcsv1_UDAF::SUCCESS;

View File

@ -60,8 +60,8 @@ mcsv1_UDAF::ReturnCode regr_slope::init(mcsv1Context* context,
context->setUserDataSize(sizeof(regr_slope_data)); context->setUserDataSize(sizeof(regr_slope_data));
context->setResultType(CalpontSystemCatalog::DOUBLE); context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8); context->setColWidth(8);
context->setScale(colTypes[0].scale + 8); context->setScale(DECIMAL_NOT_SPECIFIED);
context->setPrecision(19); context->setPrecision(0);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS; return mcsv1_UDAF::SUCCESS;
@ -141,14 +141,16 @@ mcsv1_UDAF::ReturnCode regr_slope::evaluate(mcsv1Context* context, static_any::a
double N = data->cnt; double N = data->cnt;
if (N > 0) if (N > 0)
{ {
// COVAR_POP(y, x) / VAR_POP(x)
double sumx = data->sumx; double sumx = data->sumx;
double sumy = data->sumy; double sumy = data->sumy;
double sumx2 = data->sumx2; double sumx2 = data->sumx2;
double sumxy = data->sumxy; double sumxy = data->sumxy;
double variance = (N * sumx2) - (sumx * sumx); double covar_pop = N * sumxy - sumx * sumy;
if (variance != 0) double var_pop = N * sumx2 - sumx * sumx;
if (var_pop != 0)
{ {
double slope = ((N * sumxy) - (sumx * sumy)) / variance; double slope = covar_pop / var_pop;
valOut = slope; valOut = slope;
} }
} }

View File

@ -58,8 +58,8 @@ mcsv1_UDAF::ReturnCode regr_sxx::init(mcsv1Context* context,
context->setUserDataSize(sizeof(regr_sxx_data)); context->setUserDataSize(sizeof(regr_sxx_data));
context->setResultType(CalpontSystemCatalog::DOUBLE); context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8); context->setColWidth(8);
context->setScale(colTypes[0].scale + 8); context->setScale(DECIMAL_NOT_SPECIFIED);
context->setPrecision(19); context->setPrecision(0);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS; return mcsv1_UDAF::SUCCESS;

View File

@ -59,8 +59,8 @@ mcsv1_UDAF::ReturnCode regr_sxy::init(mcsv1Context* context,
context->setUserDataSize(sizeof(regr_sxy_data)); context->setUserDataSize(sizeof(regr_sxy_data));
context->setResultType(CalpontSystemCatalog::DOUBLE); context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8); context->setColWidth(8);
context->setScale(colTypes[0].scale + 8); context->setScale(DECIMAL_NOT_SPECIFIED);
context->setPrecision(19); context->setPrecision(0);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS; return mcsv1_UDAF::SUCCESS;

View File

@ -58,8 +58,8 @@ mcsv1_UDAF::ReturnCode regr_syy::init(mcsv1Context* context,
context->setUserDataSize(sizeof(regr_syy_data)); context->setUserDataSize(sizeof(regr_syy_data));
context->setResultType(CalpontSystemCatalog::DOUBLE); context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8); context->setColWidth(8);
context->setScale(colTypes[0].scale + 8); context->setScale(DECIMAL_NOT_SPECIFIED);
context->setPrecision(19); context->setPrecision(0);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS; return mcsv1_UDAF::SUCCESS;

View File

@ -581,12 +581,13 @@ extern "C"
double sumy = data->sumy; double sumy = data->sumy;
double sumx2 = data->sumx2; double sumx2 = data->sumx2;
double sumxy = data->sumxy; double sumxy = data->sumxy;
double slope = 0;
double variance = (N * sumx2) - (sumx * sumx); double variance = (N * sumx2) - (sumx * sumx);
if (variance) if (variance)
{ {
double slope = ((N * sumxy) - (sumx * sumy)) / variance; slope = ((N * sumxy) - (sumx * sumy)) / variance;
return (sumy - (slope * sumx)) / N;
} }
return (sumy - (slope * sumx)) / N;
} }
*is_null = 1; *is_null = 1;
return 0; return 0;

View File

@ -1673,11 +1673,10 @@ void RowAggregation::updateEntry(const Row& rowIn)
{ {
for (uint64_t i = 0; i < fFunctionCols.size(); i++) for (uint64_t i = 0; i < fFunctionCols.size(); i++)
{ {
SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[i]; int64_t colIn = fFunctionCols[i]->fInputColumnIndex;
int64_t colIn = pFunctionCol->fInputColumnIndex; int64_t colOut = fFunctionCols[i]->fOutputColumnIndex;
int64_t colOut = pFunctionCol->fOutputColumnIndex;
switch (pFunctionCol->fAggFunction) switch (fFunctionCols[i]->fAggFunction)
{ {
case ROWAGG_COUNT_COL_NAME: case ROWAGG_COUNT_COL_NAME:
@ -1691,7 +1690,7 @@ void RowAggregation::updateEntry(const Row& rowIn)
case ROWAGG_MIN: case ROWAGG_MIN:
case ROWAGG_MAX: case ROWAGG_MAX:
case ROWAGG_SUM: case ROWAGG_SUM:
doMinMaxSum(rowIn, colIn, colOut, pFunctionCol->fAggFunction); doMinMaxSum(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction);
break; break;
case ROWAGG_AVG: case ROWAGG_AVG:
@ -1708,7 +1707,7 @@ void RowAggregation::updateEntry(const Row& rowIn)
case ROWAGG_BIT_OR: case ROWAGG_BIT_OR:
case ROWAGG_BIT_XOR: case ROWAGG_BIT_XOR:
{ {
doBitOp(rowIn, colIn, colOut, pFunctionCol->fAggFunction); doBitOp(rowIn, colIn, colOut, fFunctionCols[i]->fAggFunction);
break; break;
} }
@ -1731,7 +1730,7 @@ void RowAggregation::updateEntry(const Row& rowIn)
{ {
std::ostringstream errmsg; std::ostringstream errmsg;
errmsg << "RowAggregation: function (id = " << errmsg << "RowAggregation: function (id = " <<
(uint64_t) pFunctionCol->fAggFunction << ") is not supported."; (uint64_t) fFunctionCols[i]->fAggFunction << ") is not supported.";
cerr << errmsg.str() << endl; cerr << errmsg.str() << endl;
throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr); throw logging::QueryDataExcept(errmsg.str(), logging::aggregateFuncErr);
break; break;
@ -2015,7 +2014,6 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut,
for (uint32_t i = 0; i < paramCount; ++i) for (uint32_t i = 0; i < paramCount; ++i)
{ {
SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx];
mcsv1sdk::ColumnDatum& datum = valsIn[i]; mcsv1sdk::ColumnDatum& datum = valsIn[i];
// Turn on NULL flags based on the data // Turn on NULL flags based on the data
dataFlags[i] = 0; dataFlags[i] = 0;
@ -2024,9 +2022,9 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut,
// to acces the constant value rather than a row value. // to acces the constant value rather than a row value.
cc = NULL; cc = NULL;
if (pFunctionCol->fpConstCol) if (fFunctionCols[funcColsIdx]->fpConstCol)
{ {
cc = dynamic_cast<ConstantColumn*>(pFunctionCol->fpConstCol.get()); cc = dynamic_cast<ConstantColumn*>(fFunctionCols[funcColsIdx]->fpConstCol.get());
} }
if ((cc && cc->type() == ConstantColumn::NULLDATA) if ((cc && cc->type() == ConstantColumn::NULLDATA)
@ -2243,9 +2241,8 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut,
&& fFunctionCols[funcColsIdx + 1]->fAggFunction == ROWAGG_MULTI_PARM) && fFunctionCols[funcColsIdx + 1]->fAggFunction == ROWAGG_MULTI_PARM)
{ {
++funcColsIdx; ++funcColsIdx;
SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx]; colIn = fFunctionCols[funcColsIdx]->fInputColumnIndex;
colIn = pFunctionCol->fInputColumnIndex; colOut = fFunctionCols[funcColsIdx]->fOutputColumnIndex;
colOut = pFunctionCol->fOutputColumnIndex;
} }
else else
{ {