1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-07-29 08:21:15 +03:00

MCOL-1759 Add corr(), covar_pop(), covar_samp(), regr_sxx(), regr_sxy(), regr_syy() functions as UDAF

This commit is contained in:
David Hall
2018-10-09 17:06:01 -05:00
parent 1a7fca7fbc
commit e1d8211f27
19 changed files with 2275 additions and 10 deletions

View File

@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES}
########### next target ###############
set(regr_LIB_SRCS regr_avgx.cpp regr_avgy.cpp regr_count.cpp regr_slope.cpp regr_intercept regr_r2)
set(regr_LIB_SRCS regr_avgx.cpp regr_avgy.cpp regr_count.cpp regr_slope.cpp regr_intercept.cpp regr_r2.cpp corr.cpp regr_sxx.cpp regr_syy.cpp regr_sxy.cpp covar_pop.cpp covar_samp.cpp)
add_definitions(-DMYSQL_DYNAMIC_PLUGIN)

216
utils/regr/corr.cpp Normal file
View File

@ -0,0 +1,216 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <sstream>
#include <cstring>
#include <typeinfo>
#include "corr.h"
#include "bytestream.h"
#include "objectreader.h"
using namespace mcsv1sdk;
class Add_corr_ToUDAFMap
{
public:
Add_corr_ToUDAFMap()
{
UDAFMap::getMap()["corr"] = new corr();
}
};
static Add_corr_ToUDAFMap addToMap;
// Use the simple data model
struct corr_data
{
uint64_t cnt;
double sumx;
double sumx2; // sum of (x squared)
double sumy;
double sumy2; // sum of (y squared)
double sumxy; // sum of x * y
};
mcsv1_UDAF::ReturnCode corr::init(mcsv1Context* context,
ColumnDatum* colTypes)
{
if (context->getParameterCount() != 2)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("corr() with other than 2 arguments");
return mcsv1_UDAF::ERROR;
}
context->setUserDataSize(sizeof(corr_data));
context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8);
context->setScale(colTypes[0].scale + 8);
context->setPrecision(19);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode corr::reset(mcsv1Context* context)
{
struct corr_data* data = (struct corr_data*)context->getUserData()->data;
data->cnt = 0;
data->sumx = 0.0;
data->sumx2 = 0.0;
data->sumy = 0.0;
data->sumy2 = 0.0;
data->sumxy = 0.0;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode corr::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
{
static_any::any& valIn_y = valsIn[0].columnData;
static_any::any& valIn_x = valsIn[1].columnData;
struct corr_data* data = (struct corr_data*)context->getUserData()->data;
double valx = 0.0;
double valy = 0.0;
valx = convertAnyTo<double>(valIn_x);
valy = convertAnyTo<double>(valIn_y);
// For decimal types, we need to move the decimal point.
uint32_t scaley = valsIn[0].scale;
if (valy != 0 && scaley > 0)
{
valy /= pow(10.0, (double)scaley);
}
data->sumy += valy;
data->sumy2 += valy*valy;
// For decimal types, we need to move the decimal point.
uint32_t scalex = valsIn[1].scale;
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scalex);
}
data->sumx += valx;
data->sumx2 += valx*valx;
data->sumxy += valx*valy;
++data->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode corr::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
{
if (!userDataIn)
{
return mcsv1_UDAF::SUCCESS;
}
struct corr_data* outData = (struct corr_data*)context->getUserData()->data;
struct corr_data* inData = (struct corr_data*)userDataIn->data;
outData->sumx += inData->sumx;
outData->sumx2 += inData->sumx2;
outData->sumy += inData->sumy;
outData->sumy2 += inData->sumy2;
outData->sumxy += inData->sumxy;
outData->cnt += inData->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode corr::evaluate(mcsv1Context* context, static_any::any& valOut)
{
struct corr_data* data = (struct corr_data*)context->getUserData()->data;
double N = data->cnt;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumx2 = data->sumx2;
double sumy2 = data->sumy2;
double sumxy = data->sumxy;
double var_popx = (sumx2 - (sumx * sumx / N)) / N;
if (var_popx == 0)
{
// When var_popx is 0, NULL is the result.
return mcsv1_UDAF::SUCCESS;
}
double var_popy = (sumy2 - (sumy * sumy / N)) / N;
if (var_popy == 0)
{
// When var_popy is 0, 1 is the result
valOut = 1.0;
return mcsv1_UDAF::SUCCESS;
}
double std_popx = sqrt(var_popx);
double std_popy = sqrt(var_popy);
double covar_pop = (sumxy - ((sumx * sumy) / N)) / N;
double corr = covar_pop / (std_popy * std_popx);
valOut = corr;
}
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode corr::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
static_any::any& valIn_y = valsDropped[0].columnData;
static_any::any& valIn_x = valsDropped[1].columnData;
struct corr_data* data = (struct corr_data*)context->getUserData()->data;
double valx = 0.0;
double valy = 0.0;
valx = convertAnyTo<double>(valIn_x);
valy = convertAnyTo<double>(valIn_y);
// For decimal types, we need to move the decimal point.
uint32_t scaley = valsDropped[0].scale;
if (valy != 0 && scaley > 0)
{
valy /= pow(10.0, (double)scaley);
}
data->sumy -= valy;
data->sumy2 -= valy*valy;
// For decimal types, we need to move the decimal point.
uint32_t scalex = valsDropped[1].scale;
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scalex);
}
data->sumx -= valx;
data->sumx2 -= valx*valx;
data->sumxy -= valx*valy;
--data->cnt;
return mcsv1_UDAF::SUCCESS;
}

88
utils/regr/corr.h Normal file
View File

@ -0,0 +1,88 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/***********************************************************************
* $Id$
*
* corr.h
***********************************************************************/
/**
* Columnstore interface for for the corr function
*
*
* CREATE AGGREGATE FUNCTION corr returns REAL
* soname 'libregr_mysql.so';
*
*/
#ifndef HEADER_corr
#define HEADER_corr
#include <cstdlib>
#include <string>
#include <vector>
#ifdef _MSC_VER
#include <unordered_map>
#else
#include <tr1/unordered_map>
#endif
#include "mcsv1_udaf.h"
#include "calpontsystemcatalog.h"
#include "windowfunctioncolumn.h"
using namespace execplan;
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
#define EXPORT __declspec(dllexport)
#else
#define EXPORT
#endif
namespace mcsv1sdk
{
// Return the corr value of the dataset
class corr : public mcsv1_UDAF
{
public:
// Defaults OK
corr() : mcsv1_UDAF() {};
virtual ~corr() {};
virtual ReturnCode init(mcsv1Context* context,
ColumnDatum* colTypes);
virtual ReturnCode reset(mcsv1Context* context);
virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn);
virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn);
virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped);
protected:
};
}; // namespace
#undef EXPORT
#endif // HEADER_corr.h

188
utils/regr/covar_pop.cpp Normal file
View File

@ -0,0 +1,188 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <sstream>
#include <cstring>
#include <typeinfo>
#include "covar_pop.h"
#include "bytestream.h"
#include "objectreader.h"
using namespace mcsv1sdk;
class Add_covar_pop_ToUDAFMap
{
public:
Add_covar_pop_ToUDAFMap()
{
UDAFMap::getMap()["covar_pop"] = new covar_pop();
}
};
static Add_covar_pop_ToUDAFMap addToMap;
// Use the simple data model
struct covar_pop_data
{
uint64_t cnt;
double sumx;
double sumy;
double sumxy; // sum of x * y
};
mcsv1_UDAF::ReturnCode covar_pop::init(mcsv1Context* context,
ColumnDatum* colTypes)
{
if (context->getParameterCount() != 2)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("covar_pop() with other than 2 arguments");
return mcsv1_UDAF::ERROR;
}
context->setUserDataSize(sizeof(covar_pop_data));
context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8);
context->setScale(colTypes[0].scale + 8);
context->setPrecision(19);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode covar_pop::reset(mcsv1Context* context)
{
struct covar_pop_data* data = (struct covar_pop_data*)context->getUserData()->data;
data->cnt = 0;
data->sumx = 0.0;
data->sumy = 0.0;
data->sumxy = 0.0;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode covar_pop::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
{
static_any::any& valIn_y = valsIn[0].columnData;
static_any::any& valIn_x = valsIn[1].columnData;
struct covar_pop_data* data = (struct covar_pop_data*)context->getUserData()->data;
double valx = 0.0;
double valy = 0.0;
valx = convertAnyTo<double>(valIn_x);
valy = convertAnyTo<double>(valIn_y);
// For decimal types, we need to move the decimal point.
uint32_t scaley = valsIn[0].scale;
if (valy != 0 && scaley > 0)
{
valy /= pow(10.0, (double)scaley);
}
data->sumy += valy;
// For decimal types, we need to move the decimal point.
uint32_t scalex = valsIn[1].scale;
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scalex);
}
data->sumx += valx;
data->sumxy += valx*valy;
++data->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode covar_pop::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
{
if (!userDataIn)
{
return mcsv1_UDAF::SUCCESS;
}
struct covar_pop_data* outData = (struct covar_pop_data*)context->getUserData()->data;
struct covar_pop_data* inData = (struct covar_pop_data*)userDataIn->data;
outData->sumx += inData->sumx;
outData->sumy += inData->sumy;
outData->sumxy += inData->sumxy;
outData->cnt += inData->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode covar_pop::evaluate(mcsv1Context* context, static_any::any& valOut)
{
struct covar_pop_data* data = (struct covar_pop_data*)context->getUserData()->data;
double N = data->cnt;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumxy = data->sumxy;
double covar_pop = (sumxy - ((sumx * sumy) / N)) / N ;
valOut = covar_pop;
}
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode covar_pop::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
static_any::any& valIn_y = valsDropped[0].columnData;
static_any::any& valIn_x = valsDropped[1].columnData;
struct covar_pop_data* data = (struct covar_pop_data*)context->getUserData()->data;
double valx = 0.0;
double valy = 0.0;
valx = convertAnyTo<double>(valIn_x);
valy = convertAnyTo<double>(valIn_y);
// For decimal types, we need to move the decimal point.
uint32_t scaley = valsDropped[0].scale;
if (valy != 0 && scaley > 0)
{
valy /= pow(10.0, (double)scaley);
}
data->sumy -= valy;
// For decimal types, we need to move the decimal point.
uint32_t scalex = valsDropped[1].scale;
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scalex);
}
data->sumx -= valx;
data->sumxy -= valx*valy;
--data->cnt;
return mcsv1_UDAF::SUCCESS;
}

88
utils/regr/covar_pop.h Normal file
View File

@ -0,0 +1,88 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/***********************************************************************
* $Id$
*
* covar_pop.h
***********************************************************************/
/**
* Columnstore interface for for the covar_pop function
*
*
* CREATE AGGREGATE FUNCTION covar_pop returns REAL
* soname 'libregr_mysql.so';
*
*/
#ifndef HEADER_covar_pop
#define HEADER_covar_pop
#include <cstdlib>
#include <string>
#include <vector>
#ifdef _MSC_VER
#include <unordered_map>
#else
#include <tr1/unordered_map>
#endif
#include "mcsv1_udaf.h"
#include "calpontsystemcatalog.h"
#include "windowfunctioncolumn.h"
using namespace execplan;
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
#define EXPORT __declspec(dllexport)
#else
#define EXPORT
#endif
namespace mcsv1sdk
{
// Return the covar_pop value of the dataset
class covar_pop : public mcsv1_UDAF
{
public:
// Defaults OK
covar_pop() : mcsv1_UDAF() {};
virtual ~covar_pop() {};
virtual ReturnCode init(mcsv1Context* context,
ColumnDatum* colTypes);
virtual ReturnCode reset(mcsv1Context* context);
virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn);
virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn);
virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped);
protected:
};
}; // namespace
#undef EXPORT
#endif // HEADER_covar_pop.h

188
utils/regr/covar_samp.cpp Normal file
View File

@ -0,0 +1,188 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <sstream>
#include <cstring>
#include <typeinfo>
#include "covar_samp.h"
#include "bytestream.h"
#include "objectreader.h"
using namespace mcsv1sdk;
class Add_covar_samp_ToUDAFMap
{
public:
Add_covar_samp_ToUDAFMap()
{
UDAFMap::getMap()["covar_samp"] = new covar_samp();
}
};
static Add_covar_samp_ToUDAFMap addToMap;
// Use the simple data model
struct covar_samp_data
{
uint64_t cnt;
double sumx;
double sumy;
double sumxy; // sum of x * y
};
mcsv1_UDAF::ReturnCode covar_samp::init(mcsv1Context* context,
ColumnDatum* colTypes)
{
if (context->getParameterCount() != 2)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("covar_samp() with other than 2 arguments");
return mcsv1_UDAF::ERROR;
}
context->setUserDataSize(sizeof(covar_samp_data));
context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8);
context->setScale(colTypes[0].scale + 8);
context->setPrecision(19);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode covar_samp::reset(mcsv1Context* context)
{
struct covar_samp_data* data = (struct covar_samp_data*)context->getUserData()->data;
data->cnt = 0;
data->sumx = 0.0;
data->sumy = 0.0;
data->sumxy = 0.0;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode covar_samp::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
{
static_any::any& valIn_y = valsIn[0].columnData;
static_any::any& valIn_x = valsIn[1].columnData;
struct covar_samp_data* data = (struct covar_samp_data*)context->getUserData()->data;
double valx = 0.0;
double valy = 0.0;
valx = convertAnyTo<double>(valIn_x);
valy = convertAnyTo<double>(valIn_y);
// For decimal types, we need to move the decimal point.
uint32_t scaley = valsIn[0].scale;
if (valy != 0 && scaley > 0)
{
valy /= pow(10.0, (double)scaley);
}
data->sumy += valy;
// For decimal types, we need to move the decimal point.
uint32_t scalex = valsIn[1].scale;
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scalex);
}
data->sumx += valx;
data->sumxy += valx*valy;
++data->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode covar_samp::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
{
if (!userDataIn)
{
return mcsv1_UDAF::SUCCESS;
}
struct covar_samp_data* outData = (struct covar_samp_data*)context->getUserData()->data;
struct covar_samp_data* inData = (struct covar_samp_data*)userDataIn->data;
outData->sumx += inData->sumx;
outData->sumy += inData->sumy;
outData->sumxy += inData->sumxy;
outData->cnt += inData->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode covar_samp::evaluate(mcsv1Context* context, static_any::any& valOut)
{
struct covar_samp_data* data = (struct covar_samp_data*)context->getUserData()->data;
double N = data->cnt;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumxy = data->sumxy;
double covar_samp = (sumxy - ((sumx * sumy) / N)) / (N - 1);
valOut = covar_samp;
}
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode covar_samp::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
static_any::any& valIn_y = valsDropped[0].columnData;
static_any::any& valIn_x = valsDropped[1].columnData;
struct covar_samp_data* data = (struct covar_samp_data*)context->getUserData()->data;
double valx = 0.0;
double valy = 0.0;
valx = convertAnyTo<double>(valIn_x);
valy = convertAnyTo<double>(valIn_y);
// For decimal types, we need to move the decimal point.
uint32_t scaley = valsDropped[0].scale;
if (valy != 0 && scaley > 0)
{
valy /= pow(10.0, (double)scaley);
}
data->sumy -= valy;
// For decimal types, we need to move the decimal point.
uint32_t scalex = valsDropped[1].scale;
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scalex);
}
data->sumx -= valx;
data->sumxy -= valx*valy;
--data->cnt;
return mcsv1_UDAF::SUCCESS;
}

88
utils/regr/covar_samp.h Normal file
View File

@ -0,0 +1,88 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/***********************************************************************
* $Id$
*
* covar_samp.h
***********************************************************************/
/**
* Columnstore interface for for the covar_samp function
*
*
* CREATE AGGREGATE FUNCTION covar_samp returns REAL
* soname 'libregr_mysql.so';
*
*/
#ifndef HEADER_covar_samp
#define HEADER_covar_samp
#include <cstdlib>
#include <string>
#include <vector>
#ifdef _MSC_VER
#include <unordered_map>
#else
#include <tr1/unordered_map>
#endif
#include "mcsv1_udaf.h"
#include "calpontsystemcatalog.h"
#include "windowfunctioncolumn.h"
using namespace execplan;
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
#define EXPORT __declspec(dllexport)
#else
#define EXPORT
#endif
namespace mcsv1sdk
{
// Return the covar_samp value of the dataset
class covar_samp : public mcsv1_UDAF
{
public:
// Defaults OK
covar_samp() : mcsv1_UDAF() {};
virtual ~covar_samp() {};
virtual ReturnCode init(mcsv1Context* context,
ColumnDatum* colTypes);
virtual ReturnCode reset(mcsv1Context* context);
virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn);
virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn);
virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped);
protected:
};
}; // namespace
#undef EXPORT
#endif // HEADER_covar_samp.h

View File

@ -194,23 +194,35 @@
<Folder
Name="Source Files"
Filters="*.c;*.C;*.cc;*.cpp;*.cp;*.cxx;*.c++;*.prg;*.pas;*.dpr;*.asm;*.s;*.bas;*.java;*.cs;*.sc;*.e;*.cob;*.html;*.rc;*.tcl;*.py;*.pl;*.d">
<F N="corr.cpp"/>
<F N="covar_pop.cpp"/>
<F N="covar_samp.cpp"/>
<F N="regr_avgx.cpp"/>
<F N="regr_avgy.cpp"/>
<F N="regr_count.cpp"/>
<F N="regr_intercept.cpp"/>
<F N="regr_r2.cpp"/>
<F N="regr_slope.cpp"/>
<F N="regr_sxx.cpp"/>
<F N="regr_sxy.cpp"/>
<F N="regr_syy.cpp"/>
<F N="regrmysql.cpp"/>
</Folder>
<Folder
Name="Header Files"
Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if">
<F N="corr.h"/>
<F N="covar_pop.h"/>
<F N="covar_samp.h"/>
<F N="regr_avgx.h"/>
<F N="regr_avgy.h"/>
<F N="regr_count.h"/>
<F N="regr_intercept.h"/>
<F N="regr_r2.h"/>
<F N="regr_slope.h"/>
<F N="regr_sxx.h"/>
<F N="regr_sxy.h"/>
<F N="regr_syy.h"/>
</Folder>
<Folder
Name="Resource Files"

View File

@ -104,7 +104,7 @@ mcsv1_UDAF::ReturnCode regr_intercept::nextValue(mcsv1Context* context, ColumnDa
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scaley);
valx /= pow(10.0, (double)scalex);
}
data->sumx += valx;
@ -183,7 +183,7 @@ mcsv1_UDAF::ReturnCode regr_intercept::dropValue(mcsv1Context* context, ColumnDa
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scaley);
valx /= pow(10.0, (double)scalex);
}
data->sumx -= valx;

View File

@ -107,7 +107,7 @@ mcsv1_UDAF::ReturnCode regr_r2::nextValue(mcsv1Context* context, ColumnDatum* va
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scaley);
valx /= pow(10.0, (double)scalex);
}
data->sumx += valx;
@ -202,7 +202,7 @@ mcsv1_UDAF::ReturnCode regr_r2::dropValue(mcsv1Context* context, ColumnDatum* va
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scaley);
valx /= pow(10.0, (double)scalex);
}
data->sumx -= valx;

View File

@ -29,8 +29,8 @@
* soname 'libregr_mysql.so';
*
*/
#ifndef HEADER_regr_intercept
#define HEADER_regr_intercept
#ifndef HEADER_regr_r2
#define HEADER_regr_r2
#include <cstdlib>
#include <string>
@ -84,5 +84,5 @@ protected:
#undef EXPORT
#endif // HEADER_regr_intercept.h
#endif // HEADER_regr_r2.h

View File

@ -104,7 +104,7 @@ mcsv1_UDAF::ReturnCode regr_slope::nextValue(mcsv1Context* context, ColumnDatum*
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scaley);
valx /= pow(10.0, (double)scalex);
}
data->sumx += valx;
@ -182,7 +182,7 @@ mcsv1_UDAF::ReturnCode regr_slope::dropValue(mcsv1Context* context, ColumnDatum*
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scaley);
valx /= pow(10.0, (double)scalex);
}
data->sumx -= valx;

157
utils/regr/regr_sxx.cpp Normal file
View File

@ -0,0 +1,157 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <sstream>
#include <cstring>
#include <typeinfo>
#include "regr_sxx.h"
#include "bytestream.h"
#include "objectreader.h"
using namespace mcsv1sdk;
class Add_regr_sxx_ToUDAFMap
{
public:
Add_regr_sxx_ToUDAFMap()
{
UDAFMap::getMap()["regr_sxx"] = new regr_sxx();
}
};
static Add_regr_sxx_ToUDAFMap addToMap;
// Use the simple data model
struct regr_sxx_data
{
uint64_t cnt;
double sumx;
double sumx2; // sum of (x squared)
};
mcsv1_UDAF::ReturnCode regr_sxx::init(mcsv1Context* context,
ColumnDatum* colTypes)
{
if (context->getParameterCount() != 2)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("regr_sxx() with other than 2 arguments");
return mcsv1_UDAF::ERROR;
}
context->setUserDataSize(sizeof(regr_sxx_data));
context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8);
context->setScale(colTypes[0].scale + 8);
context->setPrecision(19);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_sxx::reset(mcsv1Context* context)
{
struct regr_sxx_data* data = (struct regr_sxx_data*)context->getUserData()->data;
data->cnt = 0;
data->sumx = 0.0;
data->sumx2 = 0.0;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_sxx::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
{
static_any::any& valIn_x = valsIn[1].columnData;
struct regr_sxx_data* data = (struct regr_sxx_data*)context->getUserData()->data;
double valx = 0.0;
valx = convertAnyTo<double>(valIn_x);
// For decimal types, we need to move the decimal point.
uint32_t scalex = valsIn[1].scale;
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scalex);
}
data->sumx += valx;
data->sumx2 += valx*valx;
++data->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_sxx::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
{
if (!userDataIn)
{
return mcsv1_UDAF::SUCCESS;
}
struct regr_sxx_data* outData = (struct regr_sxx_data*)context->getUserData()->data;
struct regr_sxx_data* inData = (struct regr_sxx_data*)userDataIn->data;
outData->sumx += inData->sumx;
outData->sumx2 += inData->sumx2;
outData->cnt += inData->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_sxx::evaluate(mcsv1Context* context, static_any::any& valOut)
{
struct regr_sxx_data* data = (struct regr_sxx_data*)context->getUserData()->data;
double N = data->cnt;
if (N > 0)
{
double sumx = data->sumx;
double sumx2 = data->sumx2;
double var_popx = (sumx2 - (sumx * sumx / N)) / N;
valOut = data->cnt * var_popx;
}
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_sxx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
static_any::any& valIn_x = valsDropped[1].columnData;
struct regr_sxx_data* data = (struct regr_sxx_data*)context->getUserData()->data;
double valx = 0.0;
valx = convertAnyTo<double>(valIn_x);
// For decimal types, we need to move the decimal point.
uint32_t scalex = valsDropped[1].scale;
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scalex);
}
data->sumx -= valx;
data->sumx2 -= valx*valx;
--data->cnt;
return mcsv1_UDAF::SUCCESS;
}

88
utils/regr/regr_sxx.h Normal file
View File

@ -0,0 +1,88 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/***********************************************************************
* $Id$
*
* regr_sxx.h
***********************************************************************/
/**
* Columnstore interface for for the regr_sxx function
*
*
* CREATE AGGREGATE FUNCTION regr_sxx returns REAL
* soname 'libregr_mysql.so';
*
*/
#ifndef HEADER_regr_sxx
#define HEADER_regr_sxx
#include <cstdlib>
#include <string>
#include <vector>
#ifdef _MSC_VER
#include <unordered_map>
#else
#include <tr1/unordered_map>
#endif
#include "mcsv1_udaf.h"
#include "calpontsystemcatalog.h"
#include "windowfunctioncolumn.h"
using namespace execplan;
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
#define EXPORT __declspec(dllexport)
#else
#define EXPORT
#endif
namespace mcsv1sdk
{
// Return the regr_sxx value of the dataset
class regr_sxx : public mcsv1_UDAF
{
public:
// Defaults OK
regr_sxx() : mcsv1_UDAF() {};
virtual ~regr_sxx() {};
virtual ReturnCode init(mcsv1Context* context,
ColumnDatum* colTypes);
virtual ReturnCode reset(mcsv1Context* context);
virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn);
virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn);
virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped);
protected:
};
}; // namespace
#undef EXPORT
#endif // HEADER_regr_sxx.h

189
utils/regr/regr_sxy.cpp Normal file
View File

@ -0,0 +1,189 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <sstream>
#include <cstring>
#include <typeinfo>
#include "regr_sxy.h"
#include "bytestream.h"
#include "objectreader.h"
using namespace mcsv1sdk;
class Add_regr_sxy_ToUDAFMap
{
public:
Add_regr_sxy_ToUDAFMap()
{
UDAFMap::getMap()["regr_sxy"] = new regr_sxy();
}
};
static Add_regr_sxy_ToUDAFMap addToMap;
// Use the simple data model
struct regr_sxy_data
{
uint64_t cnt;
double sumx;
double sumy;
double sumxy; // sum of x * y
};
mcsv1_UDAF::ReturnCode regr_sxy::init(mcsv1Context* context,
ColumnDatum* colTypes)
{
if (context->getParameterCount() != 2)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("regr_sxy() with other than 2 arguments");
return mcsv1_UDAF::ERROR;
}
context->setUserDataSize(sizeof(regr_sxy_data));
context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8);
context->setScale(colTypes[0].scale + 8);
context->setPrecision(19);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_sxy::reset(mcsv1Context* context)
{
struct regr_sxy_data* data = (struct regr_sxy_data*)context->getUserData()->data;
data->cnt = 0;
data->sumx = 0.0;
data->sumy = 0.0;
data->sumxy = 0.0;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_sxy::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
{
static_any::any& valIn_y = valsIn[0].columnData;
static_any::any& valIn_x = valsIn[1].columnData;
struct regr_sxy_data* data = (struct regr_sxy_data*)context->getUserData()->data;
double valx = 0.0;
double valy = 0.0;
valx = convertAnyTo<double>(valIn_x);
valy = convertAnyTo<double>(valIn_y);
// For decimal types, we need to move the decimal point.
uint32_t scaley = valsIn[0].scale;
if (valy != 0 && scaley > 0)
{
valy /= pow(10.0, (double)scaley);
}
data->sumy += valy;
// For decimal types, we need to move the decimal point.
uint32_t scalex = valsIn[1].scale;
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scalex);
}
data->sumx += valx;
data->sumxy += valx*valy;
++data->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_sxy::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
{
if (!userDataIn)
{
return mcsv1_UDAF::SUCCESS;
}
struct regr_sxy_data* outData = (struct regr_sxy_data*)context->getUserData()->data;
struct regr_sxy_data* inData = (struct regr_sxy_data*)userDataIn->data;
outData->sumx += inData->sumx;
outData->sumy += inData->sumy;
outData->sumxy += inData->sumxy;
outData->cnt += inData->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_sxy::evaluate(mcsv1Context* context, static_any::any& valOut)
{
struct regr_sxy_data* data = (struct regr_sxy_data*)context->getUserData()->data;
double N = data->cnt;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumxy = data->sumxy;
double covar_pop = (sumxy - ((sumx * sumy) / N)) / N;
double regr_sxy = data->cnt * covar_pop;
valOut = regr_sxy;
}
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_sxy::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
static_any::any& valIn_y = valsDropped[0].columnData;
static_any::any& valIn_x = valsDropped[1].columnData;
struct regr_sxy_data* data = (struct regr_sxy_data*)context->getUserData()->data;
double valx = 0.0;
double valy = 0.0;
valx = convertAnyTo<double>(valIn_x);
valy = convertAnyTo<double>(valIn_y);
// For decimal types, we need to move the decimal point.
uint32_t scaley = valsDropped[0].scale;
if (valy != 0 && scaley > 0)
{
valy /= pow(10.0, (double)scaley);
}
data->sumy -= valy;
// For decimal types, we need to move the decimal point.
uint32_t scalex = valsDropped[1].scale;
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scalex);
}
data->sumx -= valx;
data->sumxy -= valx*valy;
--data->cnt;
return mcsv1_UDAF::SUCCESS;
}

88
utils/regr/regr_sxy.h Normal file
View File

@ -0,0 +1,88 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/***********************************************************************
* $Id$
*
* regr_sxy.h
***********************************************************************/
/**
* Columnstore interface for for the regr_sxy function
*
*
* CREATE AGGREGATE FUNCTION regr_sxy returns REAL
* soname 'libregr_mysql.so';
*
*/
#ifndef HEADER_regr_sxy
#define HEADER_regr_sxy
#include <cstdlib>
#include <string>
#include <vector>
#ifdef _MSC_VER
#include <unordered_map>
#else
#include <tr1/unordered_map>
#endif
#include "mcsv1_udaf.h"
#include "calpontsystemcatalog.h"
#include "windowfunctioncolumn.h"
using namespace execplan;
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
#define EXPORT __declspec(dllexport)
#else
#define EXPORT
#endif
namespace mcsv1sdk
{
// Return the regr_sxy value of the dataset
class regr_sxy : public mcsv1_UDAF
{
public:
// Defaults OK
regr_sxy() : mcsv1_UDAF() {};
virtual ~regr_sxy() {};
virtual ReturnCode init(mcsv1Context* context,
ColumnDatum* colTypes);
virtual ReturnCode reset(mcsv1Context* context);
virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn);
virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn);
virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped);
protected:
};
}; // namespace
#undef EXPORT
#endif // HEADER_regr_sxy.h

157
utils/regr/regr_syy.cpp Normal file
View File

@ -0,0 +1,157 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <sstream>
#include <cstring>
#include <typeinfo>
#include "regr_syy.h"
#include "bytestream.h"
#include "objectreader.h"
using namespace mcsv1sdk;
class Add_regr_syy_ToUDAFMap
{
public:
Add_regr_syy_ToUDAFMap()
{
UDAFMap::getMap()["regr_syy"] = new regr_syy();
}
};
static Add_regr_syy_ToUDAFMap addToMap;
// Use the simple data model
struct regr_syy_data
{
uint64_t cnt;
double sumy;
double sumy2; // sum of (y squared)
};
mcsv1_UDAF::ReturnCode regr_syy::init(mcsv1Context* context,
ColumnDatum* colTypes)
{
if (context->getParameterCount() != 2)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("regr_syy() with other than 2 arguments");
return mcsv1_UDAF::ERROR;
}
context->setUserDataSize(sizeof(regr_syy_data));
context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8);
context->setScale(colTypes[0].scale + 8);
context->setPrecision(19);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_syy::reset(mcsv1Context* context)
{
struct regr_syy_data* data = (struct regr_syy_data*)context->getUserData()->data;
data->cnt = 0;
data->sumy = 0.0;
data->sumy2 = 0.0;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_syy::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
{
static_any::any& valIn_y = valsIn[0].columnData;
struct regr_syy_data* data = (struct regr_syy_data*)context->getUserData()->data;
double valy = 0.0;
valy = convertAnyTo<double>(valIn_y);
// For decimal types, we need to move the decimal point.
uint32_t scaley = valsIn[0].scale;
if (valy != 0 && scaley > 0)
{
valy /= pow(10.0, (double)scaley);
}
data->sumy += valy;
data->sumy2 += valy*valy;
++data->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_syy::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
{
if (!userDataIn)
{
return mcsv1_UDAF::SUCCESS;
}
struct regr_syy_data* outData = (struct regr_syy_data*)context->getUserData()->data;
struct regr_syy_data* inData = (struct regr_syy_data*)userDataIn->data;
outData->sumy += inData->sumy;
outData->sumy2 += inData->sumy2;
outData->cnt += inData->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_syy::evaluate(mcsv1Context* context, static_any::any& valOut)
{
struct regr_syy_data* data = (struct regr_syy_data*)context->getUserData()->data;
double N = data->cnt;
if (N > 0)
{
double sumy = data->sumy;
double sumy2 = data->sumy2;
double var_popy = (sumy2 - (sumy * sumy / N)) / N;
valOut = data->cnt * var_popy;
}
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_syy::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
static_any::any& valIn_y = valsDropped[0].columnData;
struct regr_syy_data* data = (struct regr_syy_data*)context->getUserData()->data;
double valy = 0.0;
valy = convertAnyTo<double>(valIn_y);
// For decimal types, we need to move the decimal point.
uint32_t scaley = valsDropped[0].scale;
if (valy != 0 && scaley > 0)
{
valy /= pow(10.0, (double)scaley);
}
data->sumy -= valy;
data->sumy2 -= valy*valy;
--data->cnt;
return mcsv1_UDAF::SUCCESS;
}

88
utils/regr/regr_syy.h Normal file
View File

@ -0,0 +1,88 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/***********************************************************************
* $Id$
*
* regr_syy.h
***********************************************************************/
/**
* Columnstore interface for for the regr_syy function
*
*
* CREATE AGGREGATE FUNCTION regr_syy returns REAL
* soname 'libregr_mysql.so';
*
*/
#ifndef HEADER_regr_syy
#define HEADER_regr_syy
#include <cstdlib>
#include <string>
#include <vector>
#ifdef _MSC_VER
#include <unordered_map>
#else
#include <tr1/unordered_map>
#endif
#include "mcsv1_udaf.h"
#include "calpontsystemcatalog.h"
#include "windowfunctioncolumn.h"
using namespace execplan;
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
#define EXPORT __declspec(dllexport)
#else
#define EXPORT
#endif
namespace mcsv1sdk
{
// Return the regr_syy value of the dataset
class regr_syy : public mcsv1_UDAF
{
public:
// Defaults OK
regr_syy() : mcsv1_UDAF() {};
virtual ~regr_syy() {};
virtual ReturnCode init(mcsv1Context* context,
ColumnDatum* colTypes);
virtual ReturnCode reset(mcsv1Context* context);
virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn);
virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn);
virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped);
protected:
};
}; // namespace
#undef EXPORT
#endif // HEADER_regr_syy.h

View File

@ -720,6 +720,636 @@ extern "C"
*is_null = 1;
return 0;
}
//=======================================================================
/**
* corr
*/
struct corr_data
{
int64_t cnt;
double sumx;
double sumx2; // sum of (x squared)
double sumy;
double sumy2; // sum of (y squared)
double sumxy; // sum of (x*y)
};
#ifdef _MSC_VER
__declspec(dllexport)
#endif
my_bool corr_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
struct corr_data* data;
if (args->arg_count != 2)
{
strcpy(message,"corr() requires two arguments");
return 1;
}
if (!(data = (struct corr_data*) malloc(sizeof(struct corr_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
data->sumx = 0.0;
data->sumx2 = 0.0;
data->sumy = 0.0;
data->sumy2 = 0.0;
data->sumxy = 0.0;
initid->ptr = (char*)data;
return 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void corr_deinit(UDF_INIT* initid)
{
free(initid->ptr);
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
corr_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
{
struct corr_data* data = (struct corr_data*)initid->ptr;
data->cnt = 0;
data->sumx = 0.0;
data->sumx2 = 0.0;
data->sumy = 0.0;
data->sumy2 = 0.0;
data->sumxy = 0.0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
corr_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
{
// Test for NULL in x and y
if (args->args[0] == 0 || args->args[1] == 0)
{
return;
}
struct corr_data* data = (struct corr_data*)initid->ptr;
double yval = cvtArgToDouble(args->arg_type[0], args->args[0]);
double xval = cvtArgToDouble(args->arg_type[1], args->args[1]);
data->sumy += yval;
data->sumx += xval;
data->sumx2 += xval*xval;
data->sumy2 += yval*yval;
data->sumxy += xval*yval;
++data->cnt;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
double corr(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
{
struct corr_data* data = (struct corr_data*)initid->ptr;
double N = data->cnt;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumx2 = data->sumx2;
double sumy2 = data->sumy2;
double sumxy = data->sumxy;
double var_popx = (sumx2 - (sumx * sumx / N)) / N;
if (var_popx == 0)
{
// When var_popx is 0, NULL is the result.
*is_null = 1;
return 0;
}
double var_popy = (sumy2 - (sumy * sumy / N)) / N;
if (var_popy == 0)
{
// When var_popy is 0, 1 is the result
return 1;
}
double std_popx = sqrt(var_popx);
double std_popy = sqrt(var_popy);
double covar_pop = (sumxy - ((sumx * sumy) / N)) / N;
double corr = covar_pop / (std_popy * std_popx);
return corr;
}
*is_null = 1;
return 0;
}
//=======================================================================
/**
* regr_sxx
*/
struct regr_sxx_data
{
int64_t cnt;
double sumx;
double sumx2; // sum of (x squared)
};
#ifdef _MSC_VER
__declspec(dllexport)
#endif
my_bool regr_sxx_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
struct regr_sxx_data* data;
if (args->arg_count != 2)
{
strcpy(message,"regr_sxx() requires two arguments");
return 1;
}
if (!(data = (struct regr_sxx_data*) malloc(sizeof(struct regr_sxx_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
data->sumx = 0.0;
data->sumx2 = 0.0;
initid->ptr = (char*)data;
return 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void regr_sxx_deinit(UDF_INIT* initid)
{
free(initid->ptr);
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_sxx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
{
struct regr_sxx_data* data = (struct regr_sxx_data*)initid->ptr;
data->cnt = 0;
data->sumx = 0.0;
data->sumx2 = 0.0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_sxx_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
{
// Test for NULL in x and y
if (args->args[0] == 0 || args->args[1] == 0)
{
return;
}
struct regr_sxx_data* data = (struct regr_sxx_data*)initid->ptr;
double xval = cvtArgToDouble(args->arg_type[1], args->args[1]);
data->sumx += xval;
data->sumx2 += xval*xval;
++data->cnt;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
double regr_sxx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
{
struct regr_sxx_data* data = (struct regr_sxx_data*)initid->ptr;
double N = data->cnt;
if (N > 0)
{
double sumx = data->sumx;
double sumx2 = data->sumx2;
double var_popx = (sumx2 - (sumx * sumx / N)) / N;
return data->cnt * var_popx;
}
*is_null = 1;
return 0;
}
//=======================================================================
/**
* regr_syy
*/
struct regr_syy_data
{
int64_t cnt;
double sumy;
double sumy2; // sum of (y squared)
};
#ifdef _MSC_VER
__declspec(dllexport)
#endif
my_bool regr_syy_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
struct regr_syy_data* data;
if (args->arg_count != 2)
{
strcpy(message,"regr_syy() requires two arguments");
return 1;
}
if (!(data = (struct regr_syy_data*) malloc(sizeof(struct regr_syy_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
data->sumy = 0.0;
data->sumy2 = 0.0;
initid->ptr = (char*)data;
return 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void regr_syy_deinit(UDF_INIT* initid)
{
free(initid->ptr);
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_syy_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
{
struct regr_syy_data* data = (struct regr_syy_data*)initid->ptr;
data->cnt = 0;
data->sumy = 0.0;
data->sumy2 = 0.0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_syy_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
{
// Test for NULL in x and y
if (args->args[0] == 0 || args->args[1] == 0)
{
return;
}
struct regr_syy_data* data = (struct regr_syy_data*)initid->ptr;
double yval = cvtArgToDouble(args->arg_type[0], args->args[0]);
data->sumy += yval;
data->sumy2 += yval*yval;
++data->cnt;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
double regr_syy(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
{
struct regr_syy_data* data = (struct regr_syy_data*)initid->ptr;
double N = data->cnt;
if (N > 0)
{
double sumy = data->sumy;
double sumy2 = data->sumy2;
double var_popy = (sumy2 - (sumy * sumy / N)) / N;
return data->cnt * var_popy;
}
*is_null = 1;
return 0;
}
//=======================================================================
/**
* regr_sxy
*/
struct regr_sxy_data
{
int64_t cnt;
double sumx;
double sumy;
double sumxy; // sum of (x*y)
};
#ifdef _MSC_VER
__declspec(dllexport)
#endif
my_bool regr_sxy_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
struct regr_sxy_data* data;
if (args->arg_count != 2)
{
strcpy(message,"regr_sxy() requires two arguments");
return 1;
}
if (!(data = (struct regr_sxy_data*) malloc(sizeof(struct regr_sxy_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
data->sumx = 0.0;
data->sumy = 0.0;
data->sumxy = 0.0;
initid->ptr = (char*)data;
return 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void regr_sxy_deinit(UDF_INIT* initid)
{
free(initid->ptr);
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_sxy_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
{
struct regr_sxy_data* data = (struct regr_sxy_data*)initid->ptr;
data->cnt = 0;
data->sumx = 0.0;
data->sumy = 0.0;
data->sumxy = 0.0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_sxy_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
{
// Test for NULL in x and y
if (args->args[0] == 0 || args->args[1] == 0)
{
return;
}
struct regr_sxy_data* data = (struct regr_sxy_data*)initid->ptr;
double yval = cvtArgToDouble(args->arg_type[0], args->args[0]);
double xval = cvtArgToDouble(args->arg_type[1], args->args[1]);
data->sumy += yval;
data->sumx += xval;
data->sumxy += xval*yval;
++data->cnt;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
double regr_sxy(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
{
struct regr_sxy_data* data = (struct regr_sxy_data*)initid->ptr;
double N = data->cnt;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumxy = data->sumxy;
double covar_pop = (sumxy - ((sumx * sumy) / N)) / N;
return data->cnt * covar_pop;
}
*is_null = 1;
return 0;
}
//=======================================================================
/**
* covar_pop
*/
struct covar_pop_data
{
int64_t cnt;
double sumx;
double sumy;
double sumxy; // sum of (x*y)
};
#ifdef _MSC_VER
__declspec(dllexport)
#endif
my_bool covar_pop_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
struct covar_pop_data* data;
if (args->arg_count != 2)
{
strcpy(message,"covar_pop() requires two arguments");
return 1;
}
if (!(data = (struct covar_pop_data*) malloc(sizeof(struct covar_pop_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
data->sumx = 0.0;
data->sumy = 0.0;
data->sumxy = 0.0;
initid->ptr = (char*)data;
return 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void covar_pop_deinit(UDF_INIT* initid)
{
free(initid->ptr);
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
covar_pop_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
{
struct covar_pop_data* data = (struct covar_pop_data*)initid->ptr;
data->cnt = 0;
data->sumx = 0.0;
data->sumy = 0.0;
data->sumxy = 0.0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
covar_pop_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
{
// Test for NULL in x and y
if (args->args[0] == 0 || args->args[1] == 0)
{
return;
}
struct covar_pop_data* data = (struct covar_pop_data*)initid->ptr;
double yval = cvtArgToDouble(args->arg_type[0], args->args[0]);
double xval = cvtArgToDouble(args->arg_type[1], args->args[1]);
data->sumy += yval;
data->sumx += xval;
data->sumxy += xval*yval;
++data->cnt;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
double covar_pop(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
{
struct covar_pop_data* data = (struct covar_pop_data*)initid->ptr;
double N = data->cnt;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumxy = data->sumxy;
double covar_pop = (sumxy - ((sumx * sumy) / N)) / N;
return covar_pop;
}
*is_null = 1;
return 0;
}
//=======================================================================
/**
* covar_samp
*/
struct covar_samp_data
{
int64_t cnt;
double sumx;
double sumy;
double sumxy; // sum of (x*y)
};
#ifdef _MSC_VER
__declspec(dllexport)
#endif
my_bool covar_samp_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
struct covar_samp_data* data;
if (args->arg_count != 2)
{
strcpy(message,"covar_samp() requires two arguments");
return 1;
}
if (!(data = (struct covar_samp_data*) malloc(sizeof(struct covar_samp_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
data->sumx = 0.0;
data->sumy = 0.0;
data->sumxy = 0.0;
initid->ptr = (char*)data;
return 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void covar_samp_deinit(UDF_INIT* initid)
{
free(initid->ptr);
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
covar_samp_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
{
struct covar_samp_data* data = (struct covar_samp_data*)initid->ptr;
data->cnt = 0;
data->sumx = 0.0;
data->sumy = 0.0;
data->sumxy = 0.0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
covar_samp_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
{
// Test for NULL in x and y
if (args->args[0] == 0 || args->args[1] == 0)
{
return;
}
struct covar_samp_data* data = (struct covar_samp_data*)initid->ptr;
double yval = cvtArgToDouble(args->arg_type[0], args->args[0]);
double xval = cvtArgToDouble(args->arg_type[1], args->args[1]);
data->sumy += yval;
data->sumx += xval;
data->sumxy += xval*yval;
++data->cnt;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
double covar_samp(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
{
struct covar_samp_data* data = (struct covar_samp_data*)initid->ptr;
double N = data->cnt;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumxy = data->sumxy;
double covar_samp = (sumxy - ((sumx * sumy) / N)) / (N-1);
return covar_samp;
}
*is_null = 1;
return 0;
}
}
// vim:ts=4 sw=4: