1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-02 17:22:27 +03:00

MCOL-1985 Modify regrmysql to use long double where practical and to use the latest algorithms for regr_xxx functions.

This commit is contained in:
David Hall
2019-04-15 16:36:27 -05:00
parent 19ebde8b2a
commit c17e32d5e0
3 changed files with 257 additions and 178 deletions

View File

@ -147,7 +147,7 @@ extern "C"
*/
struct regr_avgx_data
{
double sumx;
long double sumx;
int64_t cnt;
};
@ -159,21 +159,26 @@ extern "C"
struct regr_avgx_data* data;
if (args->arg_count != 2)
{
strcpy(message,"regr_avgx() requires two arguments");
return 1;
strcpy(message,"regr_avgx() requires two arguments");
return 1;
}
if (!(isNumeric(args->arg_type[1], args->attributes[1])))
{
strcpy(message,"regr_avgx() with a non-numeric independant (second) argument");
return 1;
}
if (args->arg_type[1] == DECIMAL_RESULT)
{
initid->decimals +=4;
}
if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data))))
if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
strmov(message,"Couldn't allocate memory");
return 1;
}
data->sumx = 0;
data->sumx = 0;
data->cnt = 0;
initid->ptr = (char*)data;
@ -226,7 +231,17 @@ extern "C"
char* is_null, char* error __attribute__((unused)))
{
struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr;
return data->sumx / data->cnt;
double valOut = 0;
if (data->cnt > 0)
{
valOut = static_cast<double>(data->sumx / data->cnt);
}
else
{
*is_null = 1;
}
return valOut;
}
//=======================================================================
@ -236,8 +251,8 @@ extern "C"
*/
struct regr_avgy_data
{
double sumy;
int64_t cnt;
long double sumy;
int64_t cnt;
};
#ifdef _MSC_VER
@ -248,8 +263,8 @@ extern "C"
struct regr_avgy_data* data;
if (args->arg_count != 2)
{
strcpy(message,"regr_avgy() requires two arguments");
return 1;
strcpy(message,"regr_avgy() requires two arguments");
return 1;
}
if (!(isNumeric(args->arg_type[0], args->attributes[0])))
{
@ -257,10 +272,15 @@ extern "C"
return 1;
}
if (!(data = (struct regr_avgy_data*) malloc(sizeof(struct regr_avgy_data))))
if (args->arg_type[0] == DECIMAL_RESULT)
{
initid->decimals +=4;
}
if (!(data = (struct regr_avgy_data*) malloc(sizeof(struct regr_avgy_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
strmov(message,"Couldn't allocate memory");
return 1;
}
data->sumy = 0;
data->cnt = 0;
@ -315,7 +335,16 @@ extern "C"
char* is_null, char* error __attribute__((unused)))
{
struct regr_avgy_data* data = (struct regr_avgy_data*)initid->ptr;
return data->sumy / data->cnt;
double valOut = 0;
if (data->cnt > 0)
{
valOut = static_cast<double>(data->sumy / data->cnt);
}
else
{
*is_null = 1;
}
return valOut;
}
//=======================================================================
@ -336,14 +365,14 @@ extern "C"
struct regr_count_data* data;
if (args->arg_count != 2)
{
strcpy(message,"regr_count() requires two arguments");
return 1;
strcpy(message,"regr_count() requires two arguments");
return 1;
}
if (!(data = (struct regr_count_data*) malloc(sizeof(struct regr_count_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
@ -405,10 +434,10 @@ extern "C"
struct regr_slope_data
{
int64_t cnt;
double sumx;
double sumx2; // sum of (x squared)
double sumy;
double sumxy; // sum of (x*y)
long double sumx;
long double sumx2; // sum of (x squared)
long double sumy;
long double sumxy; // sum of (x*y)
};
#ifdef _MSC_VER
@ -419,8 +448,8 @@ extern "C"
struct regr_slope_data* data;
if (args->arg_count != 2)
{
strcpy(message,"regr_slope() requires two arguments");
return 1;
strcpy(message,"regr_slope() requires two arguments");
return 1;
}
if (!(isNumeric(args->arg_type[0], args->attributes[0]) && isNumeric(args->arg_type[1], args->attributes[1])))
{
@ -428,10 +457,12 @@ extern "C"
return 1;
}
initid->decimals = DECIMAL_NOT_SPECIFIED;
if (!(data = (struct regr_slope_data*) malloc(sizeof(struct regr_slope_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
data->sumx = 0.0;
@ -497,20 +528,23 @@ extern "C"
{
struct regr_slope_data* data = (struct regr_slope_data*)initid->ptr;
double N = data->cnt;
double valOut = 0;
*is_null = 1;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumx2 = data->sumx2;
double sumxy = data->sumxy;
double variance = (N * sumx2) - (sumx * sumx);
if (variance)
long double sumx = data->sumx;
long double sumy = data->sumy;
long double sumx2 = data->sumx2;
long double sumxy = data->sumxy;
long double covar_pop = N * sumxy - sumx * sumy;
long double var_pop = N * sumx2 - sumx * sumx;
if (var_pop != 0)
{
return ((N * sumxy) - (sumx * sumy)) / variance;
valOut = static_cast<double>(covar_pop / var_pop);
*is_null = 0;
}
}
*is_null = 1;
return 0;
return valOut;
}
//=======================================================================
@ -521,10 +555,10 @@ extern "C"
struct regr_intercept_data
{
int64_t cnt;
double sumx;
double sumx2; // sum of (x squared)
double sumy;
double sumxy; // sum of (x*y)
long double sumx;
long double sumx2; // sum of (x squared)
long double sumy;
long double sumxy; // sum of (x*y)
};
#ifdef _MSC_VER
@ -535,8 +569,8 @@ extern "C"
struct regr_intercept_data* data;
if (args->arg_count != 2)
{
strcpy(message,"regr_intercept() requires two arguments");
return 1;
strcpy(message,"regr_intercept() requires two arguments");
return 1;
}
if (!(isNumeric(args->arg_type[0], args->attributes[0]) && isNumeric(args->arg_type[1], args->attributes[1])))
{
@ -544,10 +578,11 @@ extern "C"
return 1;
}
if (!(data = (struct regr_intercept_data*) malloc(sizeof(struct regr_intercept_data))))
initid->decimals = DECIMAL_NOT_SPECIFIED;
if (!(data = (struct regr_intercept_data*) malloc(sizeof(struct regr_intercept_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
data->sumx = 0.0;
@ -613,22 +648,23 @@ extern "C"
{
struct regr_intercept_data* data = (struct regr_intercept_data*)initid->ptr;
double N = data->cnt;
double valOut = 0;
*is_null = 1;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumx2 = data->sumx2;
double sumxy = data->sumxy;
double slope = 0;
double variance = (N * sumx2) - (sumx * sumx);
if (variance)
long double sumx = data->sumx;
long double sumy = data->sumy;
long double sumx2 = data->sumx2;
long double sumxy = data->sumxy;
long double numerator = sumy * sumx2 - sumx * sumxy;
long double var_pop = (N * sumx2) - (sumx * sumx);
if (var_pop != 0)
{
slope = ((N * sumxy) - (sumx * sumy)) / variance;
valOut = static_cast<double>(numerator / var_pop);
*is_null = 0;
}
return (sumy - (slope * sumx)) / N;
}
*is_null = 1;
return 0;
return valOut;
}
//=======================================================================
@ -639,11 +675,11 @@ extern "C"
struct regr_r2_data
{
int64_t cnt;
double sumx;
double sumx2; // sum of (x squared)
double sumy;
double sumy2; // sum of (y squared)
double sumxy; // sum of (x*y)
long double sumx;
long double sumx2; // sum of (x squared)
long double sumy;
long double sumy2; // sum of (y squared)
long double sumxy; // sum of (x*y)
};
#ifdef _MSC_VER
@ -654,8 +690,8 @@ extern "C"
struct regr_r2_data* data;
if (args->arg_count != 2)
{
strcpy(message,"regr_r2() requires two arguments");
return 1;
strcpy(message,"regr_r2() requires two arguments");
return 1;
}
if (!(isNumeric(args->arg_type[0], args->attributes[0]) && isNumeric(args->arg_type[1], args->attributes[1])))
{
@ -663,10 +699,12 @@ extern "C"
return 1;
}
if (!(data = (struct regr_r2_data*) malloc(sizeof(struct regr_r2_data))))
initid->decimals = DECIMAL_NOT_SPECIFIED;
if (!(data = (struct regr_r2_data*) malloc(sizeof(struct regr_r2_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
data->sumx = 0.0;
@ -735,34 +773,38 @@ extern "C"
{
struct regr_r2_data* data = (struct regr_r2_data*)initid->ptr;
double N = data->cnt;
double valOut = 0;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumx2 = data->sumx2;
double sumy2 = data->sumy2;
double sumxy = data->sumxy;
double var_popx = (sumx2 - (sumx * sumx / N)) / N;
long double sumx = data->sumx;
long double sumy = data->sumy;
long double sumx2 = data->sumx2;
long double sumy2 = data->sumy2;
long double sumxy = data->sumxy;
long double var_popx = (sumx2 - (sumx * sumx / N)) / N;
if (var_popx == 0)
{
// When var_popx is 0, NULL is the result.
*is_null = 1;
return 0;
}
double var_popy = (sumy2 - (sumy * sumy / N)) / N;
long double var_popy = (sumy2 - (sumy * sumy / N)) / N;
if (var_popy == 0)
{
// When var_popy is 0, 1 is the result
return 1;
}
double std_popx = sqrt(var_popx);
double std_popy = sqrt(var_popy);
double covar_pop = (sumxy - ((sumx * sumy) / N)) / N;
double corr = covar_pop / (std_popy * std_popx);
return corr * corr;
long double std_popx = sqrt(var_popx);
long double std_popy = sqrt(var_popy);
long double covar_pop = (sumxy - ((sumx * sumy) / N)) / N;
long double corr = covar_pop / (std_popy * std_popx);
valOut = static_cast<double>(corr * corr);
}
*is_null = 1;
return 0;
else
{
*is_null = 1;
}
return valOut;
}
//=======================================================================
@ -773,11 +815,11 @@ extern "C"
struct corr_data
{
int64_t cnt;
double sumx;
double sumx2; // sum of (x squared)
double sumy;
double sumy2; // sum of (y squared)
double sumxy; // sum of (x*y)
long double sumx;
long double sumx2; // sum of (x squared)
long double sumy;
long double sumy2; // sum of (y squared)
long double sumxy; // sum of (x*y)
};
#ifdef _MSC_VER
@ -788,8 +830,8 @@ extern "C"
struct corr_data* data;
if (args->arg_count != 2)
{
strcpy(message,"corr() requires two arguments");
return 1;
strcpy(message,"corr() requires two arguments");
return 1;
}
if (!(isNumeric(args->arg_type[0], args->attributes[0]) && isNumeric(args->arg_type[1], args->attributes[1])))
{
@ -797,10 +839,12 @@ extern "C"
return 1;
}
if (!(data = (struct corr_data*) malloc(sizeof(struct corr_data))))
initid->decimals = DECIMAL_NOT_SPECIFIED;
if (!(data = (struct corr_data*) malloc(sizeof(struct corr_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
data->sumx = 0.0;
@ -869,34 +913,38 @@ extern "C"
{
struct corr_data* data = (struct corr_data*)initid->ptr;
double N = data->cnt;
double valOut = 0;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumx2 = data->sumx2;
double sumy2 = data->sumy2;
double sumxy = data->sumxy;
double var_popx = (sumx2 - (sumx * sumx / N)) / N;
long double sumx = data->sumx;
long double sumy = data->sumy;
long double sumx2 = data->sumx2;
long double sumy2 = data->sumy2;
long double sumxy = data->sumxy;
long double var_popx = (sumx2 - (sumx * sumx / N)) / N;
if (var_popx == 0)
{
// When var_popx is 0, NULL is the result.
*is_null = 1;
return 0;
}
double var_popy = (sumy2 - (sumy * sumy / N)) / N;
long double var_popy = (sumy2 - (sumy * sumy / N)) / N;
if (var_popy == 0)
{
// When var_popy is 0, 1 is the result
return 1;
}
double std_popx = sqrt(var_popx);
double std_popy = sqrt(var_popy);
double covar_pop = (sumxy - ((sumx * sumy) / N)) / N;
double corr = covar_pop / (std_popy * std_popx);
return corr;
long double std_popx = sqrt(var_popx);
long double std_popy = sqrt(var_popy);
long double covar_pop = (sumxy - ((sumx * sumy) / N)) / N;
long double corr = covar_pop / (std_popy * std_popx);
return static_cast<double>(corr);
}
*is_null = 1;
return 0;
else
{
*is_null = 1;
}
return valOut;
}
//=======================================================================
@ -907,8 +955,8 @@ extern "C"
struct regr_sxx_data
{
int64_t cnt;
double sumx;
double sumx2; // sum of (x squared)
long double sumx;
long double sumx2; // sum of (x squared)
};
#ifdef _MSC_VER
@ -919,8 +967,8 @@ extern "C"
struct regr_sxx_data* data;
if (args->arg_count != 2)
{
strcpy(message,"regr_sxx() requires two arguments");
return 1;
strcpy(message,"regr_sxx() requires two arguments");
return 1;
}
if (!(isNumeric(args->arg_type[1], args->attributes[1])))
{
@ -928,10 +976,12 @@ extern "C"
return 1;
}
if (!(data = (struct regr_sxx_data*) malloc(sizeof(struct regr_sxx_data))))
initid->decimals = DECIMAL_NOT_SPECIFIED;
if (!(data = (struct regr_sxx_data*) malloc(sizeof(struct regr_sxx_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
data->sumx = 0.0;
@ -990,15 +1040,19 @@ extern "C"
{
struct regr_sxx_data* data = (struct regr_sxx_data*)initid->ptr;
double N = data->cnt;
double valOut = 0;
if (N > 0)
{
double sumx = data->sumx;
double sumx2 = data->sumx2;
double var_popx = (sumx2 - (sumx * sumx / N)) / N;
return data->cnt * var_popx;
long double sumx = data->sumx;
long double sumx2 = data->sumx2;
long double var_popx = (sumx2 - (sumx * sumx / N)) / N;
valOut = static_cast<double>(N * var_popx);
}
*is_null = 1;
return 0;
else
{
*is_null = 1;
}
return valOut;
}
//=======================================================================
@ -1008,8 +1062,8 @@ extern "C"
struct regr_syy_data
{
int64_t cnt;
double sumy;
double sumy2; // sum of (y squared)
long double sumy;
long double sumy2; // sum of (y squared)
};
#ifdef _MSC_VER
@ -1020,8 +1074,8 @@ extern "C"
struct regr_syy_data* data;
if (args->arg_count != 2)
{
strcpy(message,"regr_syy() requires two arguments");
return 1;
strcpy(message,"regr_syy() requires two arguments");
return 1;
}
if (!(isNumeric(args->arg_type[0], args->attributes[0])))
{
@ -1029,10 +1083,12 @@ extern "C"
return 1;
}
if (!(data = (struct regr_syy_data*) malloc(sizeof(struct regr_syy_data))))
initid->decimals = DECIMAL_NOT_SPECIFIED;
if (!(data = (struct regr_syy_data*) malloc(sizeof(struct regr_syy_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
data->sumy = 0.0;
@ -1091,15 +1147,19 @@ extern "C"
{
struct regr_syy_data* data = (struct regr_syy_data*)initid->ptr;
double N = data->cnt;
double valOut = 0;
if (N > 0)
{
double sumy = data->sumy;
double sumy2 = data->sumy2;
double var_popy = (sumy2 - (sumy * sumy / N)) / N;
return data->cnt * var_popy;
long double sumy = data->sumy;
long double sumy2 = data->sumy2;
long double var_popy = (sumy2 - (sumy * sumy / N)) / N;
valOut = static_cast<double>(N * var_popy);
}
*is_null = 1;
return 0;
else
{
*is_null = 1;
}
return valOut;
}
//=======================================================================
@ -1110,9 +1170,9 @@ extern "C"
struct regr_sxy_data
{
int64_t cnt;
double sumx;
double sumy;
double sumxy; // sum of (x*y)
long double sumx;
long double sumy;
long double sumxy; // sum of (x*y)
};
#ifdef _MSC_VER
@ -1123,8 +1183,8 @@ extern "C"
struct regr_sxy_data* data;
if (args->arg_count != 2)
{
strcpy(message,"regr_sxy() requires two arguments");
return 1;
strcpy(message,"regr_sxy() requires two arguments");
return 1;
}
if (!(isNumeric(args->arg_type[0], args->attributes[0]) && isNumeric(args->arg_type[1], args->attributes[1])))
{
@ -1132,10 +1192,12 @@ extern "C"
return 1;
}
if (!(data = (struct regr_sxy_data*) malloc(sizeof(struct regr_sxy_data))))
initid->decimals = DECIMAL_NOT_SPECIFIED;
if (!(data = (struct regr_sxy_data*) malloc(sizeof(struct regr_sxy_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
data->sumx = 0.0;
@ -1198,16 +1260,21 @@ extern "C"
{
struct regr_sxy_data* data = (struct regr_sxy_data*)initid->ptr;
double N = data->cnt;
double valOut = 0;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumxy = data->sumxy;
double covar_pop = (sumxy - ((sumx * sumy) / N)) / N;
return data->cnt * covar_pop;
long double sumx = data->sumx;
long double sumy = data->sumy;
long double sumxy = data->sumxy;
long double covar_pop = (sumxy - ((sumx * sumy) / N)) / N;
long double regr_sxy = N * covar_pop;
valOut = static_cast<double>(regr_sxy);
}
*is_null = 1;
return 0;
else
{
*is_null = 1;
}
return valOut;
}
//=======================================================================
@ -1218,9 +1285,9 @@ extern "C"
struct covar_pop_data
{
int64_t cnt;
double sumx;
double sumy;
double sumxy; // sum of (x*y)
long double sumx;
long double sumy;
long double sumxy; // sum of (x*y)
};
#ifdef _MSC_VER
@ -1231,8 +1298,8 @@ extern "C"
struct covar_pop_data* data;
if (args->arg_count != 2)
{
strcpy(message,"covar_pop() requires two arguments");
return 1;
strcpy(message,"covar_pop() requires two arguments");
return 1;
}
if (!(isNumeric(args->arg_type[0], args->attributes[0]) && isNumeric(args->arg_type[1], args->attributes[1])))
{
@ -1240,10 +1307,12 @@ extern "C"
return 1;
}
if (!(data = (struct covar_pop_data*) malloc(sizeof(struct covar_pop_data))))
initid->decimals = DECIMAL_NOT_SPECIFIED;
if (!(data = (struct covar_pop_data*) malloc(sizeof(struct covar_pop_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
data->sumx = 0.0;
@ -1306,16 +1375,20 @@ extern "C"
{
struct covar_pop_data* data = (struct covar_pop_data*)initid->ptr;
double N = data->cnt;
double valOut = 0;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumxy = data->sumxy;
double covar_pop = (sumxy - ((sumx * sumy) / N)) / N;
return covar_pop;
long double sumx = data->sumx;
long double sumy = data->sumy;
long double sumxy = data->sumxy;
long double covar_pop = (sumxy - ((sumx * sumy) / N)) / N ;
valOut = static_cast<double>(covar_pop);
}
*is_null = 1;
return 0;
else
{
*is_null = 1;
}
return valOut;
}
//=======================================================================
@ -1325,9 +1398,9 @@ extern "C"
struct covar_samp_data
{
int64_t cnt;
double sumx;
double sumy;
double sumxy; // sum of (x*y)
long double sumx;
long double sumy;
long double sumxy; // sum of (x*y)
};
#ifdef _MSC_VER
@ -1338,8 +1411,8 @@ extern "C"
struct covar_samp_data* data;
if (args->arg_count != 2)
{
strcpy(message,"covar_samp() requires two arguments");
return 1;
strcpy(message,"covar_samp() requires two arguments");
return 1;
}
if (!(isNumeric(args->arg_type[0], args->attributes[0]) && isNumeric(args->arg_type[1], args->attributes[1])))
{
@ -1347,10 +1420,12 @@ extern "C"
return 1;
}
if (!(data = (struct covar_samp_data*) malloc(sizeof(struct covar_samp_data))))
initid->decimals = DECIMAL_NOT_SPECIFIED;
if (!(data = (struct covar_samp_data*) malloc(sizeof(struct covar_samp_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
data->sumx = 0.0;
@ -1413,16 +1488,20 @@ extern "C"
{
struct covar_samp_data* data = (struct covar_samp_data*)initid->ptr;
double N = data->cnt;
double valOut = 0;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumxy = data->sumxy;
double covar_samp = (sumxy - ((sumx * sumy) / N)) / (N-1);
return covar_samp;
long double sumx = data->sumx;
long double sumy = data->sumy;
long double sumxy = data->sumxy;
long double covar_samp = (sumxy - ((sumx * sumy) / N)) / (N - 1);
valOut = static_cast<double>(covar_samp);
}
*is_null = 1;
return 0;
else
{
*is_null = 1;
}
return valOut;
}
}
// vim:ts=4 sw=4: