From 0cef3bf31ca8995b675ee860eb08da9e5ad17f3e Mon Sep 17 00:00:00 2001 From: David Hall Date: Thu, 29 Nov 2018 13:31:12 -0600 Subject: [PATCH 1/6] MCOL-1983 Have regr_intercept, regr_slope and regr_r2 return NULL for 1 or 0 rows in set --- utils/regr/regr_intercept.cpp | 2 +- utils/regr/regr_r2.cpp | 2 +- utils/regr/regr_slope.cpp | 2 +- utils/windowfunction/wf_udaf.cpp | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/utils/regr/regr_intercept.cpp b/utils/regr/regr_intercept.cpp index 6d4c35a47..011488d9d 100644 --- a/utils/regr/regr_intercept.cpp +++ b/utils/regr/regr_intercept.cpp @@ -139,7 +139,7 @@ mcsv1_UDAF::ReturnCode regr_intercept::evaluate(mcsv1Context* context, static_an { struct regr_intercept_data* data = (struct regr_intercept_data*)context->getUserData()->data; double N = data->cnt; - if (N > 0) + if (N > 1) { double sumx = data->sumx; double sumy = data->sumy; diff --git a/utils/regr/regr_r2.cpp b/utils/regr/regr_r2.cpp index 34e8888e8..96b0ada18 100644 --- a/utils/regr/regr_r2.cpp +++ b/utils/regr/regr_r2.cpp @@ -144,7 +144,7 @@ mcsv1_UDAF::ReturnCode regr_r2::evaluate(mcsv1Context* context, static_any::any& { struct regr_r2_data* data = (struct regr_r2_data*)context->getUserData()->data; double N = data->cnt; - if (N > 0) + if (N > 1) { double sumx = data->sumx; double sumy = data->sumy; diff --git a/utils/regr/regr_slope.cpp b/utils/regr/regr_slope.cpp index da178673a..fdb94795a 100644 --- a/utils/regr/regr_slope.cpp +++ b/utils/regr/regr_slope.cpp @@ -139,7 +139,7 @@ mcsv1_UDAF::ReturnCode regr_slope::evaluate(mcsv1Context* context, static_any::a { struct regr_slope_data* data = (struct regr_slope_data*)context->getUserData()->data; double N = data->cnt; - if (N > 0) + if (N > 1) { // COVAR_POP(y, x) / VAR_POP(x) double sumx = data->sumx; diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index 2b7e28abc..5986c2872 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -84,7 +84,6 @@ void WF_udaf::resetData() getContext().getFunction()->reset(&getContext()); fDistinctMap.clear(); WindowFunctionType::resetData(); - fValOut.reset(); } void WF_udaf::parseParms(const std::vector& parms) @@ -709,6 +708,7 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) mcsv1sdk::mcsv1_UDAF::ReturnCode rc; uint64_t colOut = fFieldIndex[0]; bool isNull = false; + fValOut.reset(); if ((fFrameUnit == WF__FRAME_ROWS) || (fPrev == -1) || From 4e74bbc7d7af6c59f11e0e6c5550ab48e121c06b Mon Sep 17 00:00:00 2001 From: David Hall Date: Thu, 29 Nov 2018 15:29:57 -0600 Subject: [PATCH 2/6] MCOL-1676 -- setvalue based on output column type rather than input column type. --- utils/windowfunction/windowfunctiontype.cpp | 73 ++++++++++++++++++++- 1 file changed, 71 insertions(+), 2 deletions(-) diff --git a/utils/windowfunction/windowfunctiontype.cpp b/utils/windowfunction/windowfunctiontype.cpp index efede3ef5..bdfe8c73c 100644 --- a/utils/windowfunction/windowfunctiontype.cpp +++ b/utils/windowfunction/windowfunctiontype.cpp @@ -325,6 +325,29 @@ template<> void WindowFunctionType::setValue(uint64_t i, string& t) fRow.setStringField(t, i); } +//template void WindowFunctionType::setValue(int, int64_t, int64_t, int64_t, string*); +template<> +void WindowFunctionType::setValue(int ct, int64_t b, int64_t e, int64_t c, string* v) +{ + if (c != WF__BOUND_ALL) + b = e = c; + + uint64_t i = fFieldIndex[0]; + + if (v == NULL) + v = (string*) getNullValueByType(ct, i); + + for (int64_t j = b; j <= e; j++) + { + if (j % 1000 == 0 && fStep->cancelled()) + break; + + fRow.setData(getPointer((*fRowData)[j])); + // MCOL-1985 Set the data based on out column type (ct) + setValue(i, *v); + } +} + template void WindowFunctionType::setValue(int ct, int64_t b, int64_t e, int64_t c, T* v) { @@ -342,7 +365,53 @@ void WindowFunctionType::setValue(int ct, int64_t b, int64_t e, int64_t c, T* v) break; fRow.setData(getPointer((*fRowData)[j])); - setValue(i, *v); + // MCOL-1985 Set the data based on out column type (ct) + switch (ct) + { + case CalpontSystemCatalog::TINYINT: + case CalpontSystemCatalog::SMALLINT: + case CalpontSystemCatalog::MEDINT: + case CalpontSystemCatalog::INT: + case CalpontSystemCatalog::BIGINT: + case CalpontSystemCatalog::DECIMAL: + { + int64_t iv = *v; + setValue(i, iv); + break; + } + + case CalpontSystemCatalog::UTINYINT: + case CalpontSystemCatalog::USMALLINT: + case CalpontSystemCatalog::UMEDINT: + case CalpontSystemCatalog::UINT: + case CalpontSystemCatalog::UBIGINT: + case CalpontSystemCatalog::UDECIMAL: + { + uint64_t uv = *v; + setValue(i, uv); + break; + } + + case CalpontSystemCatalog::DOUBLE: + case CalpontSystemCatalog::UDOUBLE: + { + double dv = *v; + setValue(i, dv); + break; + } + + case CalpontSystemCatalog::FLOAT: + case CalpontSystemCatalog::UFLOAT: + { + float fv = *v; + setValue(i, fv); + break; + } + default: + { + setValue(i, *v); + } + } } } @@ -476,7 +545,7 @@ template void WindowFunctionType::setValue(int, int64_t, int64_t, int64 template void WindowFunctionType::setValue(int, int64_t, int64_t, int64_t, uint64_t*); template void WindowFunctionType::setValue(int, int64_t, int64_t, int64_t, float*); template void WindowFunctionType::setValue(int, int64_t, int64_t, int64_t, double*); -template void WindowFunctionType::setValue(int, int64_t, int64_t, int64_t, string*); +//template void WindowFunctionType::setValue(int, int64_t, int64_t, int64_t, string*); void* WindowFunctionType::getNullValueByType(int ct, int pos) From cc70726a33976cb8069e7bd9f5fdf0a37573bf12 Mon Sep 17 00:00:00 2001 From: David Hall Date: Thu, 13 Dec 2018 15:49:11 -0600 Subject: [PATCH 3/6] MCOL-1983 For MCOL-1676 the reset of fValOut to NULL happens too soon. --- utils/windowfunction/wf_udaf.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils/windowfunction/wf_udaf.cpp b/utils/windowfunction/wf_udaf.cpp index 5986c2872..91ae5f9b2 100644 --- a/utils/windowfunction/wf_udaf.cpp +++ b/utils/windowfunction/wf_udaf.cpp @@ -708,12 +708,12 @@ void WF_udaf::operator()(int64_t b, int64_t e, int64_t c) mcsv1sdk::mcsv1_UDAF::ReturnCode rc; uint64_t colOut = fFieldIndex[0]; bool isNull = false; - fValOut.reset(); if ((fFrameUnit == WF__FRAME_ROWS) || (fPrev == -1) || (!fPeer->operator()(getPointer(fRowData->at(c)), getPointer(fRowData->at(fPrev))))) { + fValOut.reset(); // for unbounded - current row special handling if (fPrev >= b && fPrev < c) b = c; From 8258bd80e2b575ce5b7b1e4532cc2515d3adc921 Mon Sep 17 00:00:00 2001 From: Ben Thompson Date: Fri, 21 Dec 2018 13:55:50 -0600 Subject: [PATCH 4/6] Update VERSION 1.3.0 --- VERSION | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/VERSION b/VERSION index dee66414e..f5d54e457 100644 --- a/VERSION +++ b/VERSION @@ -1,4 +1,4 @@ COLUMNSTORE_VERSION_MAJOR=1 -COLUMNSTORE_VERSION_MINOR=2 -COLUMNSTORE_VERSION_PATCH=2 +COLUMNSTORE_VERSION_MINOR=3 +COLUMNSTORE_VERSION_PATCH=0 COLUMNSTORE_VERSION_RELEASE=1 From eb75d3cd7ea285ad845041527cae32aaafc29a27 Mon Sep 17 00:00:00 2001 From: David Hall Date: Wed, 2 Jan 2019 10:31:53 -0600 Subject: [PATCH 5/6] MCOL-1983 Reject non-numeric arguments for regr_*** functions that require numeric --- utils/regr/corr.cpp | 7 ++++ utils/regr/covar_pop.cpp | 7 ++++ utils/regr/covar_samp.cpp | 7 ++++ utils/regr/regr_avgx.cpp | 7 ++++ utils/regr/regr_avgy.cpp | 7 ++++ utils/regr/regr_intercept.cpp | 7 ++++ utils/regr/regr_r2.cpp | 7 ++++ utils/regr/regr_slope.cpp | 8 +++- utils/regr/regr_sxx.cpp | 7 ++++ utils/regr/regr_sxy.cpp | 7 ++++ utils/regr/regr_syy.cpp | 7 ++++ utils/regr/regrmysql.cpp | 75 ++++++++++++++++++++++++++++++++++- utils/udfsdk/mcsv1_udaf.h | 2 +- 13 files changed, 152 insertions(+), 3 deletions(-) diff --git a/utils/regr/corr.cpp b/utils/regr/corr.cpp index 9d12680ea..13efbe897 100644 --- a/utils/regr/corr.cpp +++ b/utils/regr/corr.cpp @@ -57,6 +57,13 @@ mcsv1_UDAF::ReturnCode corr::init(mcsv1Context* context, context->setErrorMessage("corr() with other than 2 arguments"); return mcsv1_UDAF::ERROR; } + if (!(isNumeric(colTypes[0].dataType) && isNumeric(colTypes[1].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("corr() with non-numeric arguments"); + return mcsv1_UDAF::ERROR; + } context->setUserDataSize(sizeof(corr_data)); context->setResultType(CalpontSystemCatalog::DOUBLE); diff --git a/utils/regr/covar_pop.cpp b/utils/regr/covar_pop.cpp index 51d9a036f..6313e49d2 100644 --- a/utils/regr/covar_pop.cpp +++ b/utils/regr/covar_pop.cpp @@ -55,6 +55,13 @@ mcsv1_UDAF::ReturnCode covar_pop::init(mcsv1Context* context, context->setErrorMessage("covar_pop() with other than 2 arguments"); return mcsv1_UDAF::ERROR; } + if (!(isNumeric(colTypes[0].dataType) && isNumeric(colTypes[1].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("covar_pop() with non-numeric arguments"); + return mcsv1_UDAF::ERROR; + } context->setUserDataSize(sizeof(covar_pop_data)); context->setResultType(CalpontSystemCatalog::DOUBLE); diff --git a/utils/regr/covar_samp.cpp b/utils/regr/covar_samp.cpp index b0ebb168b..0a79ff31a 100644 --- a/utils/regr/covar_samp.cpp +++ b/utils/regr/covar_samp.cpp @@ -55,6 +55,13 @@ mcsv1_UDAF::ReturnCode covar_samp::init(mcsv1Context* context, context->setErrorMessage("covar_samp() with other than 2 arguments"); return mcsv1_UDAF::ERROR; } + if (!(isNumeric(colTypes[0].dataType) && isNumeric(colTypes[1].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("covar_samp() with non-numeric arguments"); + return mcsv1_UDAF::ERROR; + } context->setUserDataSize(sizeof(covar_samp_data)); context->setResultType(CalpontSystemCatalog::DOUBLE); diff --git a/utils/regr/regr_avgx.cpp b/utils/regr/regr_avgx.cpp index c366f4e5a..e601b4172 100644 --- a/utils/regr/regr_avgx.cpp +++ b/utils/regr/regr_avgx.cpp @@ -63,6 +63,13 @@ mcsv1_UDAF::ReturnCode regr_avgx::init(mcsv1Context* context, context->setErrorMessage("regr_avgx() with a non-numeric x argument"); return mcsv1_UDAF::ERROR; } + if (!(isNumeric(colTypes[1].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgx() with a non-numeric independant (second) argument"); + return mcsv1_UDAF::ERROR; + } context->setUserDataSize(sizeof(regr_avgx_data)); context->setResultType(CalpontSystemCatalog::DOUBLE); diff --git a/utils/regr/regr_avgy.cpp b/utils/regr/regr_avgy.cpp index 87a729263..0d0cc58dd 100644 --- a/utils/regr/regr_avgy.cpp +++ b/utils/regr/regr_avgy.cpp @@ -63,6 +63,13 @@ mcsv1_UDAF::ReturnCode regr_avgy::init(mcsv1Context* context, context->setErrorMessage("regr_avgy() with a non-numeric x argument"); return mcsv1_UDAF::ERROR; } + if (!(isNumeric(colTypes[0].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_avgy() with a non-numeric dependant (first) argument"); + return mcsv1_UDAF::ERROR; + } context->setUserDataSize(sizeof(regr_avgy_data)); context->setResultType(CalpontSystemCatalog::DOUBLE); diff --git a/utils/regr/regr_intercept.cpp b/utils/regr/regr_intercept.cpp index 011488d9d..d46d3117c 100644 --- a/utils/regr/regr_intercept.cpp +++ b/utils/regr/regr_intercept.cpp @@ -56,6 +56,13 @@ mcsv1_UDAF::ReturnCode regr_intercept::init(mcsv1Context* context, context->setErrorMessage("regr_intercept() with other than 2 arguments"); return mcsv1_UDAF::ERROR; } + if (!(isNumeric(colTypes[0].dataType) && isNumeric(colTypes[1].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_intercept() with non-numeric arguments"); + return mcsv1_UDAF::ERROR; + } context->setUserDataSize(sizeof(regr_intercept_data)); context->setResultType(CalpontSystemCatalog::DOUBLE); diff --git a/utils/regr/regr_r2.cpp b/utils/regr/regr_r2.cpp index 96b0ada18..f159d834a 100644 --- a/utils/regr/regr_r2.cpp +++ b/utils/regr/regr_r2.cpp @@ -57,6 +57,13 @@ mcsv1_UDAF::ReturnCode regr_r2::init(mcsv1Context* context, context->setErrorMessage("regr_r2() with other than 2 arguments"); return mcsv1_UDAF::ERROR; } + if (!(isNumeric(colTypes[0].dataType) && isNumeric(colTypes[1].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_r2() with non-numeric arguments"); + return mcsv1_UDAF::ERROR; + } context->setUserDataSize(sizeof(regr_r2_data)); context->setResultType(CalpontSystemCatalog::DOUBLE); diff --git a/utils/regr/regr_slope.cpp b/utils/regr/regr_slope.cpp index fdb94795a..6790e223e 100644 --- a/utils/regr/regr_slope.cpp +++ b/utils/regr/regr_slope.cpp @@ -56,7 +56,13 @@ mcsv1_UDAF::ReturnCode regr_slope::init(mcsv1Context* context, context->setErrorMessage("regr_slope() with other than 2 arguments"); return mcsv1_UDAF::ERROR; } - + if (!(isNumeric(colTypes[0].dataType) && isNumeric(colTypes[1].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_slope() with non-numeric arguments"); + return mcsv1_UDAF::ERROR; + } context->setUserDataSize(sizeof(regr_slope_data)); context->setResultType(CalpontSystemCatalog::DOUBLE); context->setColWidth(8); diff --git a/utils/regr/regr_sxx.cpp b/utils/regr/regr_sxx.cpp index 3f06af61b..b4c467fc3 100644 --- a/utils/regr/regr_sxx.cpp +++ b/utils/regr/regr_sxx.cpp @@ -54,6 +54,13 @@ mcsv1_UDAF::ReturnCode regr_sxx::init(mcsv1Context* context, context->setErrorMessage("regr_sxx() with other than 2 arguments"); return mcsv1_UDAF::ERROR; } + if (!(isNumeric(colTypes[1].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_sxx() with a non-numeric independant (second) argument"); + return mcsv1_UDAF::ERROR; + } context->setUserDataSize(sizeof(regr_sxx_data)); context->setResultType(CalpontSystemCatalog::DOUBLE); diff --git a/utils/regr/regr_sxy.cpp b/utils/regr/regr_sxy.cpp index e6d005597..9cdbffb6a 100644 --- a/utils/regr/regr_sxy.cpp +++ b/utils/regr/regr_sxy.cpp @@ -55,6 +55,13 @@ mcsv1_UDAF::ReturnCode regr_sxy::init(mcsv1Context* context, context->setErrorMessage("regr_sxy() with other than 2 arguments"); return mcsv1_UDAF::ERROR; } + if (!(isNumeric(colTypes[0].dataType) && isNumeric(colTypes[1].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_sxy() with non-numeric arguments"); + return mcsv1_UDAF::ERROR; + } context->setUserDataSize(sizeof(regr_sxy_data)); context->setResultType(CalpontSystemCatalog::DOUBLE); diff --git a/utils/regr/regr_syy.cpp b/utils/regr/regr_syy.cpp index d0841f723..98e614a8e 100644 --- a/utils/regr/regr_syy.cpp +++ b/utils/regr/regr_syy.cpp @@ -54,6 +54,13 @@ mcsv1_UDAF::ReturnCode regr_syy::init(mcsv1Context* context, context->setErrorMessage("regr_syy() with other than 2 arguments"); return mcsv1_UDAF::ERROR; } + if (!(isNumeric(colTypes[0].dataType))) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_syy() with a non-numeric dependant (first) argument"); + return mcsv1_UDAF::ERROR; + } context->setUserDataSize(sizeof(regr_syy_data)); context->setResultType(CalpontSystemCatalog::DOUBLE); diff --git a/utils/regr/regrmysql.cpp b/utils/regr/regrmysql.cpp index 08f46bb11..4980108e3 100644 --- a/utils/regr/regrmysql.cpp +++ b/utils/regr/regrmysql.cpp @@ -2,12 +2,30 @@ #include #include #include +#include using namespace std; #include "idb_mysql.h" namespace { +inline bool isNumeric(int type, const char* attr) +{ + if (type == INT_RESULT || type == REAL_RESULT || type == DECIMAL_RESULT) + { + return true; + } +#if _MSC_VER + if (_strnicmp("NULL", attr, 4) == 0)) +#else + if (strncasecmp("NULL", attr, 4) == 0) +#endif + { + return true; + } + return false; +} + inline double cvtArgToDouble(int t, const char* v) { double d = 0.0; @@ -144,7 +162,12 @@ extern "C" strcpy(message,"regr_avgx() requires two arguments"); return 1; } - + if (!(isNumeric(args->arg_type[1], args->attributes[1]))) + { + strcpy(message,"regr_avgx() with a non-numeric independant (second) argument"); + return 1; + } + if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data)))) { strmov(message,"Couldn't allocate memory"); @@ -228,6 +251,11 @@ extern "C" strcpy(message,"regr_avgy() requires two arguments"); return 1; } + if (!(isNumeric(args->arg_type[0], args->attributes[0]))) + { + strcpy(message,"regr_avgy() with a non-numeric dependant (first) argument"); + return 1; + } if (!(data = (struct regr_avgy_data*) malloc(sizeof(struct regr_avgy_data)))) { @@ -394,6 +422,11 @@ extern "C" strcpy(message,"regr_slope() requires two arguments"); return 1; } + if (!(isNumeric(args->arg_type[0], args->attributes[0]) && isNumeric(args->arg_type[1], args->attributes[1]))) + { + strcpy(message,"regr_slope() with non-numeric arguments"); + return 1; + } if (!(data = (struct regr_slope_data*) malloc(sizeof(struct regr_slope_data)))) { @@ -505,6 +538,11 @@ extern "C" strcpy(message,"regr_intercept() requires two arguments"); return 1; } + if (!(isNumeric(args->arg_type[0], args->attributes[0]) && isNumeric(args->arg_type[1], args->attributes[1]))) + { + strcpy(message,"regr_intercept() with non-numeric arguments"); + return 1; + } if (!(data = (struct regr_intercept_data*) malloc(sizeof(struct regr_intercept_data)))) { @@ -619,6 +657,11 @@ extern "C" strcpy(message,"regr_r2() requires two arguments"); return 1; } + if (!(isNumeric(args->arg_type[0], args->attributes[0]) && isNumeric(args->arg_type[1], args->attributes[1]))) + { + strcpy(message,"regr_r2() with non-numeric arguments"); + return 1; + } if (!(data = (struct regr_r2_data*) malloc(sizeof(struct regr_r2_data)))) { @@ -748,6 +791,11 @@ extern "C" strcpy(message,"corr() requires two arguments"); return 1; } + if (!(isNumeric(args->arg_type[0], args->attributes[0]) && isNumeric(args->arg_type[1], args->attributes[1]))) + { + strcpy(message,"corr() with non-numeric arguments"); + return 1; + } if (!(data = (struct corr_data*) malloc(sizeof(struct corr_data)))) { @@ -874,6 +922,11 @@ extern "C" strcpy(message,"regr_sxx() requires two arguments"); return 1; } + if (!(isNumeric(args->arg_type[1], args->attributes[1]))) + { + strcpy(message,"regr_avgx() with a non-numeric independant (second) argument"); + return 1; + } if (!(data = (struct regr_sxx_data*) malloc(sizeof(struct regr_sxx_data)))) { @@ -970,6 +1023,11 @@ extern "C" strcpy(message,"regr_syy() requires two arguments"); return 1; } + if (!(isNumeric(args->arg_type[0], args->attributes[0]))) + { + strcpy(message,"regr_syy() with a non-numeric dependant (first) argument"); + return 1; + } if (!(data = (struct regr_syy_data*) malloc(sizeof(struct regr_syy_data)))) { @@ -1068,6 +1126,11 @@ extern "C" strcpy(message,"regr_sxy() requires two arguments"); return 1; } + if (!(isNumeric(args->arg_type[0], args->attributes[0]) && isNumeric(args->arg_type[1], args->attributes[1]))) + { + strcpy(message,"regr_sxy() with non-numeric arguments"); + return 1; + } if (!(data = (struct regr_sxy_data*) malloc(sizeof(struct regr_sxy_data)))) { @@ -1171,6 +1234,11 @@ extern "C" strcpy(message,"covar_pop() requires two arguments"); return 1; } + if (!(isNumeric(args->arg_type[0], args->attributes[0]) && isNumeric(args->arg_type[1], args->attributes[1]))) + { + strcpy(message,"covar_pop() with non-numeric arguments"); + return 1; + } if (!(data = (struct covar_pop_data*) malloc(sizeof(struct covar_pop_data)))) { @@ -1273,6 +1341,11 @@ extern "C" strcpy(message,"covar_samp() requires two arguments"); return 1; } + if (!(isNumeric(args->arg_type[0], args->attributes[0]) && isNumeric(args->arg_type[1], args->attributes[1]))) + { + strcpy(message,"covar_samp() with non-numeric arguments"); + return 1; + } if (!(data = (struct covar_samp_data*) malloc(sizeof(struct covar_samp_data)))) { diff --git a/utils/udfsdk/mcsv1_udaf.h b/utils/udfsdk/mcsv1_udaf.h index ec0d0cb79..d6ba04483 100644 --- a/utils/udfsdk/mcsv1_udaf.h +++ b/utils/udfsdk/mcsv1_udaf.h @@ -976,7 +976,7 @@ inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::createUserData(UserData*& userData, in template inline T mcsv1_UDAF::convertAnyTo(static_any::any& valIn) { - T val; + T val = 0; if (valIn.compatible(longTypeId)) { val = valIn.cast(); From 16a6a0a27c87fbe3cde9fb25c09c3b1761db3bf3 Mon Sep 17 00:00:00 2001 From: David Hall Date: Mon, 21 Jan 2019 14:39:51 -0600 Subject: [PATCH 6/6] MCOL-1983 Use long double for internal calculations of regr_*** functions. --- utils/regr/corr.cpp | 34 +++++++++++++++++----------------- utils/regr/covar_pop.cpp | 16 ++++++++-------- utils/regr/covar_samp.cpp | 16 ++++++++-------- utils/regr/regr_avgx.cpp | 4 ++-- utils/regr/regr_avgy.cpp | 4 ++-- utils/regr/regr_intercept.cpp | 22 +++++++++++----------- utils/regr/regr_r2.cpp | 32 ++++++++++++++++---------------- utils/regr/regr_slope.cpp | 24 ++++++++++++------------ utils/regr/regr_sxx.cpp | 12 ++++++------ utils/regr/regr_sxy.cpp | 22 +++++++++++----------- utils/regr/regr_syy.cpp | 14 +++++++------- 11 files changed, 100 insertions(+), 100 deletions(-) diff --git a/utils/regr/corr.cpp b/utils/regr/corr.cpp index 13efbe897..c1c388da9 100644 --- a/utils/regr/corr.cpp +++ b/utils/regr/corr.cpp @@ -39,11 +39,11 @@ static Add_corr_ToUDAFMap addToMap; struct corr_data { uint64_t cnt; - double sumx; - double sumx2; // sum of (x squared) - double sumy; - double sumy2; // sum of (y squared) - double sumxy; // sum of x * y + long double sumx; + long double sumx2; // sum of (x squared) + long double sumy; + long double sumy2; // sum of (y squared) + long double sumxy; // sum of x * y }; @@ -153,29 +153,29 @@ mcsv1_UDAF::ReturnCode corr::evaluate(mcsv1Context* context, static_any::any& va double N = data->cnt; if (N > 1) { - double sumx = data->sumx; - double sumy = data->sumy; - double sumx2 = data->sumx2; - double sumy2 = data->sumy2; - double sumxy = data->sumxy; + long double sumx = data->sumx; + long double sumy = data->sumy; + long double sumx2 = data->sumx2; + long double sumy2 = data->sumy2; + long double sumxy = data->sumxy; - double var_popx = (sumx2 - (sumx * sumx / N)) / N; + long double var_popx = (sumx2 - (sumx * sumx / N)) / N; if (var_popx == 0) { // When var_popx is 0, NULL is the result. return mcsv1_UDAF::SUCCESS; } - double var_popy = (sumy2 - (sumy * sumy / N)) / N; + long double var_popy = (sumy2 - (sumy * sumy / N)) / N; if (var_popy == 0) { // When var_popy is 0, NULL is the result return mcsv1_UDAF::SUCCESS; } - double std_popx = sqrt(var_popx); - double std_popy = sqrt(var_popy); - double covar_pop = (sumxy - ((sumx * sumy) / N)) / N; - double corr = covar_pop / (std_popy * std_popx); - valOut = corr; + long double std_popx = sqrt(var_popx); + long double std_popy = sqrt(var_popy); + long double covar_pop = (sumxy - ((sumx * sumy) / N)) / N; + long double corr = covar_pop / (std_popy * std_popx); + valOut = static_cast(corr); } return mcsv1_UDAF::SUCCESS; } diff --git a/utils/regr/covar_pop.cpp b/utils/regr/covar_pop.cpp index 6313e49d2..876be1f30 100644 --- a/utils/regr/covar_pop.cpp +++ b/utils/regr/covar_pop.cpp @@ -39,9 +39,9 @@ static Add_covar_pop_ToUDAFMap addToMap; struct covar_pop_data { uint64_t cnt; - double sumx; - double sumy; - double sumxy; // sum of x * y + long double sumx; + long double sumy; + long double sumxy; // sum of x * y }; @@ -145,12 +145,12 @@ mcsv1_UDAF::ReturnCode covar_pop::evaluate(mcsv1Context* context, static_any::an double N = data->cnt; if (N > 0) { - double sumx = data->sumx; - double sumy = data->sumy; - double sumxy = data->sumxy; + long double sumx = data->sumx; + long double sumy = data->sumy; + long double sumxy = data->sumxy; - double covar_pop = (sumxy - ((sumx * sumy) / N)) / N ; - valOut = covar_pop; + long double covar_pop = (sumxy - ((sumx * sumy) / N)) / N ; + valOut = static_cast(covar_pop); } return mcsv1_UDAF::SUCCESS; } diff --git a/utils/regr/covar_samp.cpp b/utils/regr/covar_samp.cpp index 0a79ff31a..ccc302046 100644 --- a/utils/regr/covar_samp.cpp +++ b/utils/regr/covar_samp.cpp @@ -39,9 +39,9 @@ static Add_covar_samp_ToUDAFMap addToMap; struct covar_samp_data { uint64_t cnt; - double sumx; - double sumy; - double sumxy; // sum of x * y + long double sumx; + long double sumy; + long double sumxy; // sum of x * y }; @@ -145,12 +145,12 @@ mcsv1_UDAF::ReturnCode covar_samp::evaluate(mcsv1Context* context, static_any::a double N = data->cnt; if (N > 1) { - double sumx = data->sumx; - double sumy = data->sumy; - double sumxy = data->sumxy; + long double sumx = data->sumx; + long double sumy = data->sumy; + long double sumxy = data->sumxy; - double covar_samp = (sumxy - ((sumx * sumy) / N)) / (N - 1); - valOut = covar_samp; + long double covar_samp = (sumxy - ((sumx * sumy) / N)) / (N - 1); + valOut = static_cast(covar_samp); } else if (N == 1) diff --git a/utils/regr/regr_avgx.cpp b/utils/regr/regr_avgx.cpp index e601b4172..bf010e648 100644 --- a/utils/regr/regr_avgx.cpp +++ b/utils/regr/regr_avgx.cpp @@ -40,7 +40,7 @@ static Add_regr_avgx_ToUDAFMap addToMap; // Use the simple data model struct regr_avgx_data { - double sum; + long double sum; uint64_t cnt; }; @@ -132,7 +132,7 @@ mcsv1_UDAF::ReturnCode regr_avgx::evaluate(mcsv1Context* context, static_any::an if (data->cnt > 0) { - valOut = data->sum / (double)data->cnt; + valOut = static_cast(data->sum / (long double)data->cnt); } return mcsv1_UDAF::SUCCESS; diff --git a/utils/regr/regr_avgy.cpp b/utils/regr/regr_avgy.cpp index 0d0cc58dd..7325d991f 100644 --- a/utils/regr/regr_avgy.cpp +++ b/utils/regr/regr_avgy.cpp @@ -40,7 +40,7 @@ static Add_regr_avgy_ToUDAFMap addToMap; // Use the simple data model struct regr_avgy_data { - double sum; + long double sum; uint64_t cnt; }; @@ -130,7 +130,7 @@ mcsv1_UDAF::ReturnCode regr_avgy::evaluate(mcsv1Context* context, static_any::an if (data->cnt > 0) { - valOut = data->sum / (double)data->cnt; + valOut = static_cast(data->sum / (long double)data->cnt); } return mcsv1_UDAF::SUCCESS; } diff --git a/utils/regr/regr_intercept.cpp b/utils/regr/regr_intercept.cpp index d46d3117c..df9310f03 100644 --- a/utils/regr/regr_intercept.cpp +++ b/utils/regr/regr_intercept.cpp @@ -39,10 +39,10 @@ static Add_regr_intercept_ToUDAFMap addToMap; struct regr_intercept_data { uint64_t cnt; - double sumx; - double sumx2; // sum of (x squared) - double sumy; - double sumxy; // sum of (x*y) + long double sumx; + long double sumx2; // sum of (x squared) + long double sumy; + long double sumxy; // sum of x * y }; @@ -148,15 +148,15 @@ mcsv1_UDAF::ReturnCode regr_intercept::evaluate(mcsv1Context* context, static_an double N = data->cnt; if (N > 1) { - double sumx = data->sumx; - double sumy = data->sumy; - double sumx2 = data->sumx2; - double sumxy = data->sumxy; - double numerator = sumy * sumx2 - sumx * sumxy; - double var_pop = (N * sumx2) - (sumx * sumx); + long double sumx = data->sumx; + long double sumy = data->sumy; + long double sumx2 = data->sumx2; + long double sumxy = data->sumxy; + long double numerator = sumy * sumx2 - sumx * sumxy; + long double var_pop = (N * sumx2) - (sumx * sumx); if (var_pop != 0) { - valOut = numerator / var_pop; + valOut = static_cast(numerator / var_pop); } } return mcsv1_UDAF::SUCCESS; diff --git a/utils/regr/regr_r2.cpp b/utils/regr/regr_r2.cpp index f159d834a..1abd3ea2e 100644 --- a/utils/regr/regr_r2.cpp +++ b/utils/regr/regr_r2.cpp @@ -39,11 +39,11 @@ static Add_regr_r2_ToUDAFMap addToMap; struct regr_r2_data { uint64_t cnt; - double sumx; - double sumx2; // sum of (x squared) - double sumy; - double sumy2; // sum of (y squared) - double sumxy; // sum of x * y + long double sumx; + long double sumx2; // sum of (x squared) + long double sumy; + long double sumy2; // sum of (y squared) + long double sumxy; // sum of x * y }; @@ -153,13 +153,13 @@ mcsv1_UDAF::ReturnCode regr_r2::evaluate(mcsv1Context* context, static_any::any& double N = data->cnt; if (N > 1) { - double sumx = data->sumx; - double sumy = data->sumy; - double sumx2 = data->sumx2; - double sumy2 = data->sumy2; - double sumxy = data->sumxy; + long double sumx = data->sumx; + long double sumy = data->sumy; + long double sumx2 = data->sumx2; + long double sumy2 = data->sumy2; + long double sumxy = data->sumxy; - double var_popx = (sumx2 - (sumx * sumx / N)) / N; + long double var_popx = (sumx2 - (sumx * sumx / N)) / N; if (var_popx == 0) { // When var_popx is 0, NULL is the result. @@ -172,11 +172,11 @@ mcsv1_UDAF::ReturnCode regr_r2::evaluate(mcsv1Context* context, static_any::any& valOut = 1.0; return mcsv1_UDAF::SUCCESS; } - double std_popx = sqrt(var_popx); - double std_popy = sqrt(var_popy); - double covar_pop = (sumxy - ((sumx * sumy) / N)) / N; - double corr = covar_pop / (std_popy * std_popx); - valOut = corr * corr; + long double std_popx = sqrt(var_popx); + long double std_popy = sqrt(var_popy); + long double covar_pop = (sumxy - ((sumx * sumy) / N)) / N; + long double corr = covar_pop / (std_popy * std_popx); + valOut = static_cast(corr * corr); } return mcsv1_UDAF::SUCCESS; } diff --git a/utils/regr/regr_slope.cpp b/utils/regr/regr_slope.cpp index 6790e223e..de9eab5c7 100644 --- a/utils/regr/regr_slope.cpp +++ b/utils/regr/regr_slope.cpp @@ -39,10 +39,10 @@ static Add_regr_slope_ToUDAFMap addToMap; struct regr_slope_data { uint64_t cnt; - double sumx; - double sumx2; // sum of (x squared) - double sumy; - double sumxy; // sum of (x*y) + long double sumx; + long double sumx2; // sum of (x squared) + long double sumy; + long double sumxy; // sum of x * y }; @@ -148,16 +148,16 @@ mcsv1_UDAF::ReturnCode regr_slope::evaluate(mcsv1Context* context, static_any::a if (N > 1) { // COVAR_POP(y, x) / VAR_POP(x) - double sumx = data->sumx; - double sumy = data->sumy; - double sumx2 = data->sumx2; - double sumxy = data->sumxy; - double covar_pop = N * sumxy - sumx * sumy; - double var_pop = N * sumx2 - sumx * sumx; + long double sumx = data->sumx; + long double sumy = data->sumy; + long double sumx2 = data->sumx2; + long double sumxy = data->sumxy; + long double covar_pop = N * sumxy - sumx * sumy; + long double var_pop = N * sumx2 - sumx * sumx; if (var_pop != 0) { - double slope = covar_pop / var_pop; - valOut = slope; + long double slope = covar_pop / var_pop; + valOut = static_cast(slope); } } return mcsv1_UDAF::SUCCESS; diff --git a/utils/regr/regr_sxx.cpp b/utils/regr/regr_sxx.cpp index b4c467fc3..5769a227b 100644 --- a/utils/regr/regr_sxx.cpp +++ b/utils/regr/regr_sxx.cpp @@ -39,8 +39,8 @@ static Add_regr_sxx_ToUDAFMap addToMap; struct regr_sxx_data { uint64_t cnt; - double sumx; - double sumx2; // sum of (x squared) + long double sumx; + long double sumx2; // sum of (x squared) }; @@ -128,11 +128,11 @@ mcsv1_UDAF::ReturnCode regr_sxx::evaluate(mcsv1Context* context, static_any::any double N = data->cnt; if (N > 0) { - double sumx = data->sumx; - double sumx2 = data->sumx2; + long double sumx = data->sumx; + long double sumx2 = data->sumx2; - double var_popx = (sumx2 - (sumx * sumx / N)) / N; - valOut = data->cnt * var_popx; + long double var_popx = (sumx2 - (sumx * sumx / N)) / N; + valOut = static_cast(data->cnt * var_popx); } return mcsv1_UDAF::SUCCESS; } diff --git a/utils/regr/regr_sxy.cpp b/utils/regr/regr_sxy.cpp index 9cdbffb6a..76e1373c4 100644 --- a/utils/regr/regr_sxy.cpp +++ b/utils/regr/regr_sxy.cpp @@ -39,9 +39,9 @@ static Add_regr_sxy_ToUDAFMap addToMap; struct regr_sxy_data { uint64_t cnt; - double sumx; - double sumy; - double sumxy; // sum of x * y + long double sumx; + long double sumy; + long double sumxy; // sum of x * y }; @@ -88,8 +88,8 @@ mcsv1_UDAF::ReturnCode regr_sxy::nextValue(mcsv1Context* context, ColumnDatum* v static_any::any& valIn_y = valsIn[0].columnData; static_any::any& valIn_x = valsIn[1].columnData; struct regr_sxy_data* data = (struct regr_sxy_data*)context->getUserData()->data; - double valx = 0.0; - double valy = 0.0; + long double valx = 0.0; + long double valy = 0.0; valx = convertAnyTo(valIn_x); valy = convertAnyTo(valIn_y); @@ -145,13 +145,13 @@ mcsv1_UDAF::ReturnCode regr_sxy::evaluate(mcsv1Context* context, static_any::any double N = data->cnt; if (N > 0) { - double sumx = data->sumx; - double sumy = data->sumy; - double sumxy = data->sumxy; + long double sumx = data->sumx; + long double sumy = data->sumy; + long double sumxy = data->sumxy; - double covar_pop = (sumxy - ((sumx * sumy) / N)) / N; - double regr_sxy = data->cnt * covar_pop; - valOut = regr_sxy; + long double covar_pop = (sumxy - ((sumx * sumy) / N)) / N; + long double regr_sxy = data->cnt * covar_pop; + valOut = static_cast(regr_sxy); } return mcsv1_UDAF::SUCCESS; } diff --git a/utils/regr/regr_syy.cpp b/utils/regr/regr_syy.cpp index 98e614a8e..014a28389 100644 --- a/utils/regr/regr_syy.cpp +++ b/utils/regr/regr_syy.cpp @@ -39,8 +39,8 @@ static Add_regr_syy_ToUDAFMap addToMap; struct regr_syy_data { uint64_t cnt; - double sumy; - double sumy2; // sum of (y squared) + long double sumy; + long double sumy2; // sum of (y squared) }; @@ -125,14 +125,14 @@ mcsv1_UDAF::ReturnCode regr_syy::subEvaluate(mcsv1Context* context, const UserDa mcsv1_UDAF::ReturnCode regr_syy::evaluate(mcsv1Context* context, static_any::any& valOut) { struct regr_syy_data* data = (struct regr_syy_data*)context->getUserData()->data; - double N = data->cnt; + long double N = data->cnt; if (N > 0) { - double sumy = data->sumy; - double sumy2 = data->sumy2; + long double sumy = data->sumy; + long double sumy2 = data->sumy2; - double var_popy = (sumy2 - (sumy * sumy / N)) / N; - valOut = data->cnt * var_popy; + long double var_popy = (sumy2 - (sumy * sumy / N)) / N; + valOut = static_cast(data->cnt * var_popy); } return mcsv1_UDAF::SUCCESS; }