diff --git a/utils/regr/CMakeLists.txt b/utils/regr/CMakeLists.txt index 16f44d9af..ec2059661 100755 --- a/utils/regr/CMakeLists.txt +++ b/utils/regr/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES} ########### next target ############### -set(regr_LIB_SRCS regr_avgx.cpp regr_avgy.cpp regr_count.cpp regr_slope.cpp regr_intercept regr_r2) +set(regr_LIB_SRCS regr_avgx.cpp regr_avgy.cpp regr_count.cpp regr_slope.cpp regr_intercept.cpp regr_r2.cpp corr.cpp regr_sxx.cpp regr_syy.cpp regr_sxy.cpp covar_pop.cpp covar_samp.cpp) add_definitions(-DMYSQL_DYNAMIC_PLUGIN) diff --git a/utils/regr/corr.cpp b/utils/regr/corr.cpp new file mode 100644 index 000000000..f8d645ad4 --- /dev/null +++ b/utils/regr/corr.cpp @@ -0,0 +1,216 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "corr.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +class Add_corr_ToUDAFMap +{ +public: + Add_corr_ToUDAFMap() + { + UDAFMap::getMap()["corr"] = new corr(); + } +}; + +static Add_corr_ToUDAFMap addToMap; + +// Use the simple data model +struct corr_data +{ + uint64_t cnt; + double sumx; + double sumx2; // sum of (x squared) + double sumy; + double sumy2; // sum of (y squared) + double sumxy; // sum of x * y +}; + + +mcsv1_UDAF::ReturnCode corr::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("corr() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(corr_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 8); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode corr::reset(mcsv1Context* context) +{ + struct corr_data* data = (struct corr_data*)context->getUserData()->data; + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + data->sumy2 = 0.0; + data->sumxy = 0.0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode corr::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct corr_data* data = (struct corr_data*)context->getUserData()->data; + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsIn[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy += valy; + data->sumy2 += valy*valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsIn[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scalex); + } + + data->sumx += valx; + data->sumx2 += valx*valx; + + data->sumxy += valx*valy; + + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode corr::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct corr_data* outData = (struct corr_data*)context->getUserData()->data; + struct corr_data* inData = (struct corr_data*)userDataIn->data; + + outData->sumx += inData->sumx; + outData->sumx2 += inData->sumx2; + outData->sumy += inData->sumy; + outData->sumy2 += inData->sumy2; + outData->sumxy += inData->sumxy; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode corr::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct corr_data* data = (struct corr_data*)context->getUserData()->data; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumx2 = data->sumx2; + double sumy2 = data->sumy2; + double sumxy = data->sumxy; + + double var_popx = (sumx2 - (sumx * sumx / N)) / N; + if (var_popx == 0) + { + // When var_popx is 0, NULL is the result. + return mcsv1_UDAF::SUCCESS; + } + double var_popy = (sumy2 - (sumy * sumy / N)) / N; + if (var_popy == 0) + { + // When var_popy is 0, 1 is the result + valOut = 1.0; + return mcsv1_UDAF::SUCCESS; + } + double std_popx = sqrt(var_popx); + double std_popy = sqrt(var_popy); + double covar_pop = (sumxy - ((sumx * sumy) / N)) / N; + double corr = covar_pop / (std_popy * std_popx); + valOut = corr; + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode corr::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct corr_data* data = (struct corr_data*)context->getUserData()->data; + + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsDropped[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy -= valy; + data->sumy2 -= valy*valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsDropped[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scalex); + } + + data->sumx -= valx; + data->sumx2 -= valx*valx; + + data->sumxy -= valx*valy; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/regr/corr.h b/utils/regr/corr.h new file mode 100644 index 000000000..eba7597eb --- /dev/null +++ b/utils/regr/corr.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* corr.h +***********************************************************************/ + +/** + * Columnstore interface for for the corr function + * + * + * CREATE AGGREGATE FUNCTION corr returns REAL + * soname 'libregr_mysql.so'; + * + */ +#ifndef HEADER_corr +#define HEADER_corr + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Return the corr value of the dataset + +class corr : public mcsv1_UDAF +{ +public: + // Defaults OK + corr() : mcsv1_UDAF() {}; + virtual ~corr() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_corr.h + diff --git a/utils/regr/covar_pop.cpp b/utils/regr/covar_pop.cpp new file mode 100644 index 000000000..539497b6a --- /dev/null +++ b/utils/regr/covar_pop.cpp @@ -0,0 +1,188 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "covar_pop.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +class Add_covar_pop_ToUDAFMap +{ +public: + Add_covar_pop_ToUDAFMap() + { + UDAFMap::getMap()["covar_pop"] = new covar_pop(); + } +}; + +static Add_covar_pop_ToUDAFMap addToMap; + +// Use the simple data model +struct covar_pop_data +{ + uint64_t cnt; + double sumx; + double sumy; + double sumxy; // sum of x * y +}; + + +mcsv1_UDAF::ReturnCode covar_pop::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("covar_pop() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(covar_pop_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 8); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode covar_pop::reset(mcsv1Context* context) +{ + struct covar_pop_data* data = (struct covar_pop_data*)context->getUserData()->data; + data->cnt = 0; + data->sumx = 0.0; + data->sumy = 0.0; + data->sumxy = 0.0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode covar_pop::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct covar_pop_data* data = (struct covar_pop_data*)context->getUserData()->data; + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsIn[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy += valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsIn[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scalex); + } + + data->sumx += valx; + + data->sumxy += valx*valy; + + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode covar_pop::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct covar_pop_data* outData = (struct covar_pop_data*)context->getUserData()->data; + struct covar_pop_data* inData = (struct covar_pop_data*)userDataIn->data; + + outData->sumx += inData->sumx; + outData->sumy += inData->sumy; + outData->sumxy += inData->sumxy; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode covar_pop::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct covar_pop_data* data = (struct covar_pop_data*)context->getUserData()->data; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumxy = data->sumxy; + + double covar_pop = (sumxy - ((sumx * sumy) / N)) / N ; + valOut = covar_pop; + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode covar_pop::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct covar_pop_data* data = (struct covar_pop_data*)context->getUserData()->data; + + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsDropped[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy -= valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsDropped[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scalex); + } + + data->sumx -= valx; + + data->sumxy -= valx*valy; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/regr/covar_pop.h b/utils/regr/covar_pop.h new file mode 100644 index 000000000..fc47d4497 --- /dev/null +++ b/utils/regr/covar_pop.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* covar_pop.h +***********************************************************************/ + +/** + * Columnstore interface for for the covar_pop function + * + * + * CREATE AGGREGATE FUNCTION covar_pop returns REAL + * soname 'libregr_mysql.so'; + * + */ +#ifndef HEADER_covar_pop +#define HEADER_covar_pop + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Return the covar_pop value of the dataset + +class covar_pop : public mcsv1_UDAF +{ +public: + // Defaults OK + covar_pop() : mcsv1_UDAF() {}; + virtual ~covar_pop() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_covar_pop.h + diff --git a/utils/regr/covar_samp.cpp b/utils/regr/covar_samp.cpp new file mode 100644 index 000000000..f3e16ffc4 --- /dev/null +++ b/utils/regr/covar_samp.cpp @@ -0,0 +1,188 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "covar_samp.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +class Add_covar_samp_ToUDAFMap +{ +public: + Add_covar_samp_ToUDAFMap() + { + UDAFMap::getMap()["covar_samp"] = new covar_samp(); + } +}; + +static Add_covar_samp_ToUDAFMap addToMap; + +// Use the simple data model +struct covar_samp_data +{ + uint64_t cnt; + double sumx; + double sumy; + double sumxy; // sum of x * y +}; + + +mcsv1_UDAF::ReturnCode covar_samp::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("covar_samp() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(covar_samp_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 8); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode covar_samp::reset(mcsv1Context* context) +{ + struct covar_samp_data* data = (struct covar_samp_data*)context->getUserData()->data; + data->cnt = 0; + data->sumx = 0.0; + data->sumy = 0.0; + data->sumxy = 0.0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode covar_samp::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct covar_samp_data* data = (struct covar_samp_data*)context->getUserData()->data; + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsIn[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy += valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsIn[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scalex); + } + + data->sumx += valx; + + data->sumxy += valx*valy; + + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode covar_samp::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct covar_samp_data* outData = (struct covar_samp_data*)context->getUserData()->data; + struct covar_samp_data* inData = (struct covar_samp_data*)userDataIn->data; + + outData->sumx += inData->sumx; + outData->sumy += inData->sumy; + outData->sumxy += inData->sumxy; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode covar_samp::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct covar_samp_data* data = (struct covar_samp_data*)context->getUserData()->data; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumxy = data->sumxy; + + double covar_samp = (sumxy - ((sumx * sumy) / N)) / (N - 1); + valOut = covar_samp; + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode covar_samp::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct covar_samp_data* data = (struct covar_samp_data*)context->getUserData()->data; + + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsDropped[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy -= valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsDropped[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scalex); + } + + data->sumx -= valx; + + data->sumxy -= valx*valy; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/regr/covar_samp.h b/utils/regr/covar_samp.h new file mode 100644 index 000000000..6aba65054 --- /dev/null +++ b/utils/regr/covar_samp.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* covar_samp.h +***********************************************************************/ + +/** + * Columnstore interface for for the covar_samp function + * + * + * CREATE AGGREGATE FUNCTION covar_samp returns REAL + * soname 'libregr_mysql.so'; + * + */ +#ifndef HEADER_covar_samp +#define HEADER_covar_samp + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Return the covar_samp value of the dataset + +class covar_samp : public mcsv1_UDAF +{ +public: + // Defaults OK + covar_samp() : mcsv1_UDAF() {}; + virtual ~covar_samp() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_covar_samp.h + diff --git a/utils/regr/regr.vpj b/utils/regr/regr.vpj index 0de8c7282..3dcedb5c5 100644 --- a/utils/regr/regr.vpj +++ b/utils/regr/regr.vpj @@ -194,23 +194,35 @@ + + + + + + + + + + + + 0) { - valx /= pow(10.0, (double)scaley); + valx /= pow(10.0, (double)scalex); } data->sumx += valx; @@ -183,7 +183,7 @@ mcsv1_UDAF::ReturnCode regr_intercept::dropValue(mcsv1Context* context, ColumnDa if (valx != 0 && scalex > 0) { - valx /= pow(10.0, (double)scaley); + valx /= pow(10.0, (double)scalex); } data->sumx -= valx; diff --git a/utils/regr/regr_r2.cpp b/utils/regr/regr_r2.cpp index 052b5dcfc..f8c923ee8 100644 --- a/utils/regr/regr_r2.cpp +++ b/utils/regr/regr_r2.cpp @@ -107,7 +107,7 @@ mcsv1_UDAF::ReturnCode regr_r2::nextValue(mcsv1Context* context, ColumnDatum* va if (valx != 0 && scalex > 0) { - valx /= pow(10.0, (double)scaley); + valx /= pow(10.0, (double)scalex); } data->sumx += valx; @@ -202,7 +202,7 @@ mcsv1_UDAF::ReturnCode regr_r2::dropValue(mcsv1Context* context, ColumnDatum* va if (valx != 0 && scalex > 0) { - valx /= pow(10.0, (double)scaley); + valx /= pow(10.0, (double)scalex); } data->sumx -= valx; diff --git a/utils/regr/regr_r2.h b/utils/regr/regr_r2.h index 6ff65009a..d440ad5a1 100644 --- a/utils/regr/regr_r2.h +++ b/utils/regr/regr_r2.h @@ -29,8 +29,8 @@ * soname 'libregr_mysql.so'; * */ -#ifndef HEADER_regr_intercept -#define HEADER_regr_intercept +#ifndef HEADER_regr_r2 +#define HEADER_regr_r2 #include #include @@ -84,5 +84,5 @@ protected: #undef EXPORT -#endif // HEADER_regr_intercept.h +#endif // HEADER_regr_r2.h diff --git a/utils/regr/regr_slope.cpp b/utils/regr/regr_slope.cpp index 51f649046..721ab6a22 100644 --- a/utils/regr/regr_slope.cpp +++ b/utils/regr/regr_slope.cpp @@ -104,7 +104,7 @@ mcsv1_UDAF::ReturnCode regr_slope::nextValue(mcsv1Context* context, ColumnDatum* if (valx != 0 && scalex > 0) { - valx /= pow(10.0, (double)scaley); + valx /= pow(10.0, (double)scalex); } data->sumx += valx; @@ -182,7 +182,7 @@ mcsv1_UDAF::ReturnCode regr_slope::dropValue(mcsv1Context* context, ColumnDatum* if (valx != 0 && scalex > 0) { - valx /= pow(10.0, (double)scaley); + valx /= pow(10.0, (double)scalex); } data->sumx -= valx; diff --git a/utils/regr/regr_sxx.cpp b/utils/regr/regr_sxx.cpp new file mode 100644 index 000000000..a11b06a7d --- /dev/null +++ b/utils/regr/regr_sxx.cpp @@ -0,0 +1,157 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_sxx.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +class Add_regr_sxx_ToUDAFMap +{ +public: + Add_regr_sxx_ToUDAFMap() + { + UDAFMap::getMap()["regr_sxx"] = new regr_sxx(); + } +}; + +static Add_regr_sxx_ToUDAFMap addToMap; + +// Use the simple data model +struct regr_sxx_data +{ + uint64_t cnt; + double sumx; + double sumx2; // sum of (x squared) +}; + + +mcsv1_UDAF::ReturnCode regr_sxx::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_sxx() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_sxx_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 8); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_sxx::reset(mcsv1Context* context) +{ + struct regr_sxx_data* data = (struct regr_sxx_data*)context->getUserData()->data; + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_sxx::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_x = valsIn[1].columnData; + struct regr_sxx_data* data = (struct regr_sxx_data*)context->getUserData()->data; + double valx = 0.0; + + valx = convertAnyTo(valIn_x); + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsIn[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scalex); + } + + data->sumx += valx; + data->sumx2 += valx*valx; + + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_sxx::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_sxx_data* outData = (struct regr_sxx_data*)context->getUserData()->data; + struct regr_sxx_data* inData = (struct regr_sxx_data*)userDataIn->data; + + outData->sumx += inData->sumx; + outData->sumx2 += inData->sumx2; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_sxx::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_sxx_data* data = (struct regr_sxx_data*)context->getUserData()->data; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumx2 = data->sumx2; + + double var_popx = (sumx2 - (sumx * sumx / N)) / N; + valOut = data->cnt * var_popx; + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_sxx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_x = valsDropped[1].columnData; + struct regr_sxx_data* data = (struct regr_sxx_data*)context->getUserData()->data; + + double valx = 0.0; + + valx = convertAnyTo(valIn_x); + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsDropped[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scalex); + } + + data->sumx -= valx; + data->sumx2 -= valx*valx; + + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/regr/regr_sxx.h b/utils/regr/regr_sxx.h new file mode 100644 index 000000000..14d82bd55 --- /dev/null +++ b/utils/regr/regr_sxx.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_sxx.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_sxx function + * + * + * CREATE AGGREGATE FUNCTION regr_sxx returns REAL + * soname 'libregr_mysql.so'; + * + */ +#ifndef HEADER_regr_sxx +#define HEADER_regr_sxx + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Return the regr_sxx value of the dataset + +class regr_sxx : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_sxx() : mcsv1_UDAF() {}; + virtual ~regr_sxx() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_sxx.h + diff --git a/utils/regr/regr_sxy.cpp b/utils/regr/regr_sxy.cpp new file mode 100644 index 000000000..e3df580b6 --- /dev/null +++ b/utils/regr/regr_sxy.cpp @@ -0,0 +1,189 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_sxy.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +class Add_regr_sxy_ToUDAFMap +{ +public: + Add_regr_sxy_ToUDAFMap() + { + UDAFMap::getMap()["regr_sxy"] = new regr_sxy(); + } +}; + +static Add_regr_sxy_ToUDAFMap addToMap; + +// Use the simple data model +struct regr_sxy_data +{ + uint64_t cnt; + double sumx; + double sumy; + double sumxy; // sum of x * y +}; + + +mcsv1_UDAF::ReturnCode regr_sxy::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_sxy() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_sxy_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 8); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_sxy::reset(mcsv1Context* context) +{ + struct regr_sxy_data* data = (struct regr_sxy_data*)context->getUserData()->data; + data->cnt = 0; + data->sumx = 0.0; + data->sumy = 0.0; + data->sumxy = 0.0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_sxy::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + static_any::any& valIn_x = valsIn[1].columnData; + struct regr_sxy_data* data = (struct regr_sxy_data*)context->getUserData()->data; + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsIn[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy += valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsIn[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scalex); + } + + data->sumx += valx; + + data->sumxy += valx*valy; + + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_sxy::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_sxy_data* outData = (struct regr_sxy_data*)context->getUserData()->data; + struct regr_sxy_data* inData = (struct regr_sxy_data*)userDataIn->data; + + outData->sumx += inData->sumx; + outData->sumy += inData->sumy; + outData->sumxy += inData->sumxy; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_sxy::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_sxy_data* data = (struct regr_sxy_data*)context->getUserData()->data; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumxy = data->sumxy; + + double covar_pop = (sumxy - ((sumx * sumy) / N)) / N; + double regr_sxy = data->cnt * covar_pop; + valOut = regr_sxy; + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_sxy::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + static_any::any& valIn_x = valsDropped[1].columnData; + struct regr_sxy_data* data = (struct regr_sxy_data*)context->getUserData()->data; + + double valx = 0.0; + double valy = 0.0; + + valx = convertAnyTo(valIn_x); + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsDropped[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy -= valy; + + // For decimal types, we need to move the decimal point. + uint32_t scalex = valsDropped[1].scale; + + if (valx != 0 && scalex > 0) + { + valx /= pow(10.0, (double)scalex); + } + + data->sumx -= valx; + + data->sumxy -= valx*valy; + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/regr/regr_sxy.h b/utils/regr/regr_sxy.h new file mode 100644 index 000000000..25aa34145 --- /dev/null +++ b/utils/regr/regr_sxy.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_sxy.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_sxy function + * + * + * CREATE AGGREGATE FUNCTION regr_sxy returns REAL + * soname 'libregr_mysql.so'; + * + */ +#ifndef HEADER_regr_sxy +#define HEADER_regr_sxy + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Return the regr_sxy value of the dataset + +class regr_sxy : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_sxy() : mcsv1_UDAF() {}; + virtual ~regr_sxy() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_sxy.h + diff --git a/utils/regr/regr_syy.cpp b/utils/regr/regr_syy.cpp new file mode 100644 index 000000000..3b0ec7c8d --- /dev/null +++ b/utils/regr/regr_syy.cpp @@ -0,0 +1,157 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +#include +#include +#include +#include "regr_syy.h" +#include "bytestream.h" +#include "objectreader.h" + +using namespace mcsv1sdk; + +class Add_regr_syy_ToUDAFMap +{ +public: + Add_regr_syy_ToUDAFMap() + { + UDAFMap::getMap()["regr_syy"] = new regr_syy(); + } +}; + +static Add_regr_syy_ToUDAFMap addToMap; + +// Use the simple data model +struct regr_syy_data +{ + uint64_t cnt; + double sumy; + double sumy2; // sum of (y squared) +}; + + +mcsv1_UDAF::ReturnCode regr_syy::init(mcsv1Context* context, + ColumnDatum* colTypes) +{ + if (context->getParameterCount() != 2) + { + // The error message will be prepended with + // "The storage engine for the table doesn't support " + context->setErrorMessage("regr_syy() with other than 2 arguments"); + return mcsv1_UDAF::ERROR; + } + + context->setUserDataSize(sizeof(regr_syy_data)); + context->setResultType(CalpontSystemCatalog::DOUBLE); + context->setColWidth(8); + context->setScale(colTypes[0].scale + 8); + context->setPrecision(19); + context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS); + return mcsv1_UDAF::SUCCESS; + +} + +mcsv1_UDAF::ReturnCode regr_syy::reset(mcsv1Context* context) +{ + struct regr_syy_data* data = (struct regr_syy_data*)context->getUserData()->data; + data->cnt = 0; + data->sumy = 0.0; + data->sumy2 = 0.0; + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_syy::nextValue(mcsv1Context* context, ColumnDatum* valsIn) +{ + static_any::any& valIn_y = valsIn[0].columnData; + struct regr_syy_data* data = (struct regr_syy_data*)context->getUserData()->data; + double valy = 0.0; + + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsIn[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy += valy; + data->sumy2 += valy*valy; + + ++data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_syy::subEvaluate(mcsv1Context* context, const UserData* userDataIn) +{ + if (!userDataIn) + { + return mcsv1_UDAF::SUCCESS; + } + + struct regr_syy_data* outData = (struct regr_syy_data*)context->getUserData()->data; + struct regr_syy_data* inData = (struct regr_syy_data*)userDataIn->data; + + outData->sumy += inData->sumy; + outData->sumy2 += inData->sumy2; + outData->cnt += inData->cnt; + + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_syy::evaluate(mcsv1Context* context, static_any::any& valOut) +{ + struct regr_syy_data* data = (struct regr_syy_data*)context->getUserData()->data; + double N = data->cnt; + if (N > 0) + { + double sumy = data->sumy; + double sumy2 = data->sumy2; + + double var_popy = (sumy2 - (sumy * sumy / N)) / N; + valOut = data->cnt * var_popy; + } + return mcsv1_UDAF::SUCCESS; +} + +mcsv1_UDAF::ReturnCode regr_syy::dropValue(mcsv1Context* context, ColumnDatum* valsDropped) +{ + static_any::any& valIn_y = valsDropped[0].columnData; + struct regr_syy_data* data = (struct regr_syy_data*)context->getUserData()->data; + + double valy = 0.0; + + valy = convertAnyTo(valIn_y); + + // For decimal types, we need to move the decimal point. + uint32_t scaley = valsDropped[0].scale; + + if (valy != 0 && scaley > 0) + { + valy /= pow(10.0, (double)scaley); + } + + data->sumy -= valy; + data->sumy2 -= valy*valy; + + --data->cnt; + + return mcsv1_UDAF::SUCCESS; +} + diff --git a/utils/regr/regr_syy.h b/utils/regr/regr_syy.h new file mode 100644 index 000000000..a837fab13 --- /dev/null +++ b/utils/regr/regr_syy.h @@ -0,0 +1,88 @@ +/* Copyright (C) 2017 MariaDB Corporaton + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License + as published by the Free Software Foundation; version 2 of + the License. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ + +/*********************************************************************** +* $Id$ +* +* regr_syy.h +***********************************************************************/ + +/** + * Columnstore interface for for the regr_syy function + * + * + * CREATE AGGREGATE FUNCTION regr_syy returns REAL + * soname 'libregr_mysql.so'; + * + */ +#ifndef HEADER_regr_syy +#define HEADER_regr_syy + +#include +#include +#include +#ifdef _MSC_VER +#include +#else +#include +#endif + +#include "mcsv1_udaf.h" +#include "calpontsystemcatalog.h" +#include "windowfunctioncolumn.h" +using namespace execplan; + +#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT) +#define EXPORT __declspec(dllexport) +#else +#define EXPORT +#endif + +namespace mcsv1sdk +{ + +// Return the regr_syy value of the dataset + +class regr_syy : public mcsv1_UDAF +{ +public: + // Defaults OK + regr_syy() : mcsv1_UDAF() {}; + virtual ~regr_syy() {}; + + virtual ReturnCode init(mcsv1Context* context, + ColumnDatum* colTypes); + + virtual ReturnCode reset(mcsv1Context* context); + + virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn); + + virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn); + + virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut); + + virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped); + +protected: +}; + +}; // namespace + +#undef EXPORT + +#endif // HEADER_regr_syy.h + diff --git a/utils/regr/regrmysql.cpp b/utils/regr/regrmysql.cpp index fce6bb440..822d05ca6 100644 --- a/utils/regr/regrmysql.cpp +++ b/utils/regr/regrmysql.cpp @@ -720,6 +720,636 @@ extern "C" *is_null = 1; return 0; } + +//======================================================================= + + /** + * corr + */ + struct corr_data + { + int64_t cnt; + double sumx; + double sumx2; // sum of (x squared) + double sumy; + double sumy2; // sum of (y squared) + double sumxy; // sum of (x*y) + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool corr_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct corr_data* data; + if (args->arg_count != 2) + { + strcpy(message,"corr() requires two arguments"); + return 1; + } + + if (!(data = (struct corr_data*) malloc(sizeof(struct corr_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + data->sumy2 = 0.0; + data->sumxy = 0.0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void corr_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + corr_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct corr_data* data = (struct corr_data*)initid->ptr; + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + data->sumy = 0.0; + data->sumy2 = 0.0; + data->sumxy = 0.0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + corr_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // Test for NULL in x and y + if (args->args[0] == 0 || args->args[1] == 0) + { + return; + } + struct corr_data* data = (struct corr_data*)initid->ptr; + double yval = cvtArgToDouble(args->arg_type[0], args->args[0]); + double xval = cvtArgToDouble(args->arg_type[1], args->args[1]); + data->sumy += yval; + data->sumx += xval; + data->sumx2 += xval*xval; + data->sumy2 += yval*yval; + data->sumxy += xval*yval; + ++data->cnt; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + double corr(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct corr_data* data = (struct corr_data*)initid->ptr; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumx2 = data->sumx2; + double sumy2 = data->sumy2; + double sumxy = data->sumxy; + double var_popx = (sumx2 - (sumx * sumx / N)) / N; + if (var_popx == 0) + { + // When var_popx is 0, NULL is the result. + *is_null = 1; + return 0; + } + double var_popy = (sumy2 - (sumy * sumy / N)) / N; + if (var_popy == 0) + { + // When var_popy is 0, 1 is the result + return 1; + } + double std_popx = sqrt(var_popx); + double std_popy = sqrt(var_popy); + double covar_pop = (sumxy - ((sumx * sumy) / N)) / N; + double corr = covar_pop / (std_popy * std_popx); + return corr; + } + *is_null = 1; + return 0; + } + +//======================================================================= + + /** + * regr_sxx + */ + struct regr_sxx_data + { + int64_t cnt; + double sumx; + double sumx2; // sum of (x squared) + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_sxx_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_sxx_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_sxx() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_sxx_data*) malloc(sizeof(struct regr_sxx_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_sxx_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_sxx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_sxx_data* data = (struct regr_sxx_data*)initid->ptr; + data->cnt = 0; + data->sumx = 0.0; + data->sumx2 = 0.0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_sxx_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // Test for NULL in x and y + if (args->args[0] == 0 || args->args[1] == 0) + { + return; + } + struct regr_sxx_data* data = (struct regr_sxx_data*)initid->ptr; + double xval = cvtArgToDouble(args->arg_type[1], args->args[1]); + data->sumx += xval; + data->sumx2 += xval*xval; + ++data->cnt; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + double regr_sxx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_sxx_data* data = (struct regr_sxx_data*)initid->ptr; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumx2 = data->sumx2; + double var_popx = (sumx2 - (sumx * sumx / N)) / N; + return data->cnt * var_popx; + } + *is_null = 1; + return 0; + } +//======================================================================= + + /** + * regr_syy + */ + struct regr_syy_data + { + int64_t cnt; + double sumy; + double sumy2; // sum of (y squared) + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_syy_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_syy_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_syy() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_syy_data*) malloc(sizeof(struct regr_syy_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->cnt = 0; + data->sumy = 0.0; + data->sumy2 = 0.0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_syy_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_syy_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_syy_data* data = (struct regr_syy_data*)initid->ptr; + data->cnt = 0; + data->sumy = 0.0; + data->sumy2 = 0.0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_syy_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // Test for NULL in x and y + if (args->args[0] == 0 || args->args[1] == 0) + { + return; + } + struct regr_syy_data* data = (struct regr_syy_data*)initid->ptr; + double yval = cvtArgToDouble(args->arg_type[0], args->args[0]); + data->sumy += yval; + data->sumy2 += yval*yval; + ++data->cnt; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + double regr_syy(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_syy_data* data = (struct regr_syy_data*)initid->ptr; + double N = data->cnt; + if (N > 0) + { + double sumy = data->sumy; + double sumy2 = data->sumy2; + double var_popy = (sumy2 - (sumy * sumy / N)) / N; + return data->cnt * var_popy; + } + *is_null = 1; + return 0; + } + +//======================================================================= + + /** + * regr_sxy + */ + struct regr_sxy_data + { + int64_t cnt; + double sumx; + double sumy; + double sumxy; // sum of (x*y) + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool regr_sxy_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct regr_sxy_data* data; + if (args->arg_count != 2) + { + strcpy(message,"regr_sxy() requires two arguments"); + return 1; + } + + if (!(data = (struct regr_sxy_data*) malloc(sizeof(struct regr_sxy_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->cnt = 0; + data->sumx = 0.0; + data->sumy = 0.0; + data->sumxy = 0.0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void regr_sxy_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_sxy_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct regr_sxy_data* data = (struct regr_sxy_data*)initid->ptr; + data->cnt = 0; + data->sumx = 0.0; + data->sumy = 0.0; + data->sumxy = 0.0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + regr_sxy_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // Test for NULL in x and y + if (args->args[0] == 0 || args->args[1] == 0) + { + return; + } + struct regr_sxy_data* data = (struct regr_sxy_data*)initid->ptr; + double yval = cvtArgToDouble(args->arg_type[0], args->args[0]); + double xval = cvtArgToDouble(args->arg_type[1], args->args[1]); + data->sumy += yval; + data->sumx += xval; + data->sumxy += xval*yval; + ++data->cnt; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + double regr_sxy(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct regr_sxy_data* data = (struct regr_sxy_data*)initid->ptr; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumxy = data->sumxy; + double covar_pop = (sumxy - ((sumx * sumy) / N)) / N; + return data->cnt * covar_pop; + } + *is_null = 1; + return 0; + } + +//======================================================================= + + /** + * covar_pop + */ + struct covar_pop_data + { + int64_t cnt; + double sumx; + double sumy; + double sumxy; // sum of (x*y) + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool covar_pop_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct covar_pop_data* data; + if (args->arg_count != 2) + { + strcpy(message,"covar_pop() requires two arguments"); + return 1; + } + + if (!(data = (struct covar_pop_data*) malloc(sizeof(struct covar_pop_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->cnt = 0; + data->sumx = 0.0; + data->sumy = 0.0; + data->sumxy = 0.0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void covar_pop_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + covar_pop_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct covar_pop_data* data = (struct covar_pop_data*)initid->ptr; + data->cnt = 0; + data->sumx = 0.0; + data->sumy = 0.0; + data->sumxy = 0.0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + covar_pop_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // Test for NULL in x and y + if (args->args[0] == 0 || args->args[1] == 0) + { + return; + } + struct covar_pop_data* data = (struct covar_pop_data*)initid->ptr; + double yval = cvtArgToDouble(args->arg_type[0], args->args[0]); + double xval = cvtArgToDouble(args->arg_type[1], args->args[1]); + data->sumy += yval; + data->sumx += xval; + data->sumxy += xval*yval; + ++data->cnt; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + double covar_pop(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct covar_pop_data* data = (struct covar_pop_data*)initid->ptr; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumxy = data->sumxy; + double covar_pop = (sumxy - ((sumx * sumy) / N)) / N; + return covar_pop; + } + *is_null = 1; + return 0; + } +//======================================================================= + + /** + * covar_samp + */ + struct covar_samp_data + { + int64_t cnt; + double sumx; + double sumy; + double sumxy; // sum of (x*y) + }; + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + my_bool covar_samp_init(UDF_INIT* initid, UDF_ARGS* args, char* message) + { + struct covar_samp_data* data; + if (args->arg_count != 2) + { + strcpy(message,"covar_samp() requires two arguments"); + return 1; + } + + if (!(data = (struct covar_samp_data*) malloc(sizeof(struct covar_samp_data)))) + { + strmov(message,"Couldn't allocate memory"); + return 1; + } + data->cnt = 0; + data->sumx = 0.0; + data->sumy = 0.0; + data->sumxy = 0.0; + + initid->ptr = (char*)data; + return 0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void covar_samp_deinit(UDF_INIT* initid) + { + free(initid->ptr); + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + covar_samp_clear(UDF_INIT* initid, char* is_null __attribute__((unused)), + char* message __attribute__((unused))) + { + struct covar_samp_data* data = (struct covar_samp_data*)initid->ptr; + data->cnt = 0; + data->sumx = 0.0; + data->sumy = 0.0; + data->sumxy = 0.0; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + void + covar_samp_add(UDF_INIT* initid, UDF_ARGS* args, + char* is_null, + char* message __attribute__((unused))) + { + // Test for NULL in x and y + if (args->args[0] == 0 || args->args[1] == 0) + { + return; + } + struct covar_samp_data* data = (struct covar_samp_data*)initid->ptr; + double yval = cvtArgToDouble(args->arg_type[0], args->args[0]); + double xval = cvtArgToDouble(args->arg_type[1], args->args[1]); + data->sumy += yval; + data->sumx += xval; + data->sumxy += xval*yval; + ++data->cnt; + } + + #ifdef _MSC_VER + __declspec(dllexport) + #endif + double covar_samp(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)), + char* is_null, char* error __attribute__((unused))) + { + struct covar_samp_data* data = (struct covar_samp_data*)initid->ptr; + double N = data->cnt; + if (N > 0) + { + double sumx = data->sumx; + double sumy = data->sumy; + double sumxy = data->sumxy; + double covar_samp = (sumxy - ((sumx * sumy) / N)) / (N-1); + return covar_samp; + } + *is_null = 1; + return 0; + } } // vim:ts=4 sw=4: