1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-4516 check for var_pop < 0

In some cases, because of rounding error, var_pop will evaluate to some value just less than 0. We check for this and force to round to 0.
This commit is contained in:
David Hall
2021-03-09 13:36:10 -06:00
parent 508d5455a8
commit af20387985
8 changed files with 37 additions and 26 deletions

View File

@ -160,13 +160,13 @@ mcsv1_UDAF::ReturnCode corr::evaluate(mcsv1Context* context, static_any::any& va
long double sumxy = data->sumxy;
long double var_popx = (sumx2 - (sumx * sumx / N)) / N;
if (var_popx == 0)
if (var_popx <= 0) // Catch -0
{
// When var_popx is 0, NULL is the result.
return mcsv1_UDAF::SUCCESS;
}
long double var_popy = (sumy2 - (sumy * sumy / N)) / N;
if (var_popy == 0)
if (var_popy <= 0) // Catch -0
{
// When var_popy is 0, NULL is the result
return mcsv1_UDAF::SUCCESS;

View File

@ -149,7 +149,7 @@ mcsv1_UDAF::ReturnCode covar_pop::evaluate(mcsv1Context* context, static_any::an
long double sumy = data->sumy;
long double sumxy = data->sumxy;
long double covar_pop = (sumxy - ((sumx * sumy) / N)) / N ;
long double covar_pop = (sumxy - ((sumx * sumy) / N)) / N;
valOut = static_cast<double>(covar_pop);
}
return mcsv1_UDAF::SUCCESS;

View File

@ -145,16 +145,18 @@ mcsv1_UDAF::ReturnCode regr_intercept::subEvaluate(mcsv1Context* context, const
mcsv1_UDAF::ReturnCode regr_intercept::evaluate(mcsv1Context* context, static_any::any& valOut)
{
struct regr_intercept_data* data = (struct regr_intercept_data*)context->getUserData()->data;
double N = data->cnt;
long double N = data->cnt;
if (N > 1)
{
long double sumx = data->sumx;
long double sumy = data->sumy;
long double sumx2 = data->sumx2;
long double sumxy = data->sumxy;
// regr_intercept is AVG(y) - slope(y,x)*avg(x)
// We do some algebra and we can get a slightly smaller calculation
long double numerator = sumy * sumx2 - sumx * sumxy;
long double var_pop = (N * sumx2) - (sumx * sumx);
if (var_pop != 0)
long double var_pop = (N * sumx2) - (sumx * sumx); // Not realy var_pop, but sort of after some reductions
if (var_pop > 0)
{
valOut = static_cast<double>(numerator / var_pop);
}

View File

@ -160,13 +160,13 @@ mcsv1_UDAF::ReturnCode regr_r2::evaluate(mcsv1Context* context, static_any::any&
long double sumxy = data->sumxy;
long double var_popx = (sumx2 - (sumx * sumx / N)) / N;
if (var_popx == 0)
if (var_popx <= 0) // Catch -0
{
// When var_popx is 0, NULL is the result.
return mcsv1_UDAF::SUCCESS;
}
double var_popy = (sumy2 - (sumy * sumy / N)) / N;
if (var_popy == 0)
if (var_popy <= 0) // Catch -0
{
// When var_popy is 0, 1 is the result
valOut = 1.0;

View File

@ -152,12 +152,14 @@ mcsv1_UDAF::ReturnCode regr_slope::evaluate(mcsv1Context* context, static_any::a
long double sumy = data->sumy;
long double sumx2 = data->sumx2;
long double sumxy = data->sumxy;
// These aren't really covar_pop and var_pop. For the purposes of this calculation
// we multiplied everything by N to reduce calc time and variance.
// It all comes out after the final divide
long double covar_pop = N * sumxy - sumx * sumy;
long double var_pop = N * sumx2 - sumx * sumx;
if (var_pop != 0)
if (var_pop > 0)
{
long double slope = covar_pop / var_pop;
valOut = static_cast<double>(slope);
valOut = static_cast<double>(covar_pop / var_pop);
}
}
return mcsv1_UDAF::SUCCESS;

View File

@ -128,8 +128,10 @@ mcsv1_UDAF::ReturnCode regr_sxx::evaluate(mcsv1Context* context, static_any::any
long double N = data->cnt;
if (N > 0)
{
long double var_popx = (data->sumx2 - (data->sumx * data->sumx / N));
valOut = static_cast<double>(var_popx);
long double regr_sxx = (data->sumx2 - (data->sumx * data->sumx / N));
if (regr_sxx < 0) // catch -0
regr_sxx = 0;
valOut = static_cast<double>(regr_sxx);
}
return mcsv1_UDAF::SUCCESS;
}

View File

@ -129,6 +129,8 @@ mcsv1_UDAF::ReturnCode regr_syy::evaluate(mcsv1Context* context, static_any::any
if (N > 0)
{
long double var_popy = (data->sumy2 - (data->sumy * data->sumy / N));
if (var_popy < 0) // might be -0
var_popy = 0;
valOut = static_cast<double>(var_popy);
}
return mcsv1_UDAF::SUCCESS;

View File

@ -537,7 +537,7 @@ extern "C"
long double sumxy = data->sumxy;
long double covar_pop = N * sumxy - sumx * sumy;
long double var_pop = N * sumx2 - sumx * sumx;
if (var_pop != 0)
if (var_pop > 0)
{
valOut = static_cast<double>(covar_pop / var_pop);
*is_null = 0;
@ -657,7 +657,7 @@ extern "C"
long double sumxy = data->sumxy;
long double numerator = sumy * sumx2 - sumx * sumxy;
long double var_pop = (N * sumx2) - (sumx * sumx);
if (var_pop != 0)
if (var_pop > 0)
{
valOut = static_cast<double>(numerator / var_pop);
*is_null = 0;
@ -781,14 +781,14 @@ extern "C"
long double sumy2 = data->sumy2;
long double sumxy = data->sumxy;
long double var_popx = (sumx2 - (sumx * sumx / N)) / N;
if (var_popx == 0)
if (var_popx <= 0) // Catch -0
{
// When var_popx is 0, NULL is the result.
*is_null = 1;
return 0;
}
long double var_popy = (sumy2 - (sumy * sumy / N)) / N;
if (var_popy == 0)
if (var_popy <= 0) // Catch -0
{
// When var_popy is 0, 1 is the result
return 1;
@ -921,14 +921,14 @@ extern "C"
long double sumy2 = data->sumy2;
long double sumxy = data->sumxy;
long double var_popx = (sumx2 - (sumx * sumx / N)) / N;
if (var_popx == 0)
if (var_popx <= 0) // Catch -0
{
// When var_popx is 0, NULL is the result.
*is_null = 1;
return 0;
}
long double var_popy = (sumy2 - (sumy * sumy / N)) / N;
if (var_popy == 0)
if (var_popy <= 0) // Catch -0
{
// When var_popy is 0, 1 is the result
return 1;
@ -1044,8 +1044,10 @@ extern "C"
{
long double sumx = data->sumx;
long double sumx2 = data->sumx2;
long double var_popx = (sumx2 - (sumx * sumx / N)) / N;
valOut = static_cast<double>(N * var_popx);
long double sxx = (sumx2 - (sumx * sumx / N));
if (sxx < 0) // catch -0
sxx = 0;
valOut = static_cast<double>(sxx);
}
else
{
@ -1151,8 +1153,10 @@ extern "C"
{
long double sumy = data->sumy;
long double sumy2 = data->sumy2;
long double var_popy = (sumy2 - (sumy * sumy / N)) / N;
valOut = static_cast<double>(N * var_popy);
long double syy = (sumy2 - (sumy * sumy / N));
if (syy < 0) // might be -0
syy = 0;
valOut = static_cast<double>(syy);
}
else
{
@ -1265,8 +1269,7 @@ extern "C"
long double sumx = data->sumx;
long double sumy = data->sumy;
long double sumxy = data->sumxy;
long double covar_pop = (sumxy - ((sumx * sumy) / N)) / N;
long double regr_sxy = N * covar_pop;
long double regr_sxy = (sumxy - ((sumx * sumy) / N));
valOut = static_cast<double>(regr_sxy);
}
else
@ -1380,7 +1383,7 @@ extern "C"
long double sumx = data->sumx;
long double sumy = data->sumy;
long double sumxy = data->sumxy;
long double covar_pop = (sumxy - ((sumx * sumy) / N)) / N ;
long double covar_pop = (sumxy - ((sumx * sumy) / N)) / N;
valOut = static_cast<double>(covar_pop);
}
else