You've already forked mariadb-columnstore-engine
mirror of
https://github.com/mariadb-corporation/mariadb-columnstore-engine.git
synced 2025-07-30 19:23:07 +03:00
MCOL-521 add regr_intecept and regr_r2
This commit is contained in:
216
utils/regr/regr_r2.cpp
Normal file
216
utils/regr/regr_r2.cpp
Normal file
@ -0,0 +1,216 @@
|
||||
/* Copyright (C) 2017 MariaDB Corporaton
|
||||
|
||||
This program is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU General Public License
|
||||
as published by the Free Software Foundation; version 2 of
|
||||
the License.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with this program; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
MA 02110-1301, USA. */
|
||||
|
||||
#include <sstream>
|
||||
#include <cstring>
|
||||
#include <typeinfo>
|
||||
#include "regr_r2.h"
|
||||
#include "bytestream.h"
|
||||
#include "objectreader.h"
|
||||
|
||||
using namespace mcsv1sdk;
|
||||
|
||||
class Add_regr_r2_ToUDAFMap
|
||||
{
|
||||
public:
|
||||
Add_regr_r2_ToUDAFMap()
|
||||
{
|
||||
UDAFMap::getMap()["regr_r2"] = new regr_r2();
|
||||
}
|
||||
};
|
||||
|
||||
static Add_regr_r2_ToUDAFMap addToMap;
|
||||
|
||||
// Use the simple data model
|
||||
struct regr_r2_data
|
||||
{
|
||||
uint64_t cnt;
|
||||
double sumx;
|
||||
double sumx2; // sum of (x squared)
|
||||
double sumy;
|
||||
double sumy2; // sum of (y squared)
|
||||
double sumxy; // sum of x * y
|
||||
};
|
||||
|
||||
|
||||
mcsv1_UDAF::ReturnCode regr_r2::init(mcsv1Context* context,
|
||||
ColumnDatum* colTypes)
|
||||
{
|
||||
if (context->getParameterCount() != 2)
|
||||
{
|
||||
// The error message will be prepended with
|
||||
// "The storage engine for the table doesn't support "
|
||||
context->setErrorMessage("regr_r2() with other than 2 arguments");
|
||||
return mcsv1_UDAF::ERROR;
|
||||
}
|
||||
|
||||
context->setUserDataSize(sizeof(regr_r2_data));
|
||||
context->setResultType(CalpontSystemCatalog::DOUBLE);
|
||||
context->setColWidth(8);
|
||||
context->setScale(colTypes[0].scale + 8);
|
||||
context->setPrecision(19);
|
||||
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
|
||||
return mcsv1_UDAF::SUCCESS;
|
||||
|
||||
}
|
||||
|
||||
mcsv1_UDAF::ReturnCode regr_r2::reset(mcsv1Context* context)
|
||||
{
|
||||
struct regr_r2_data* data = (struct regr_r2_data*)context->getUserData()->data;
|
||||
data->cnt = 0;
|
||||
data->sumx = 0.0;
|
||||
data->sumx2 = 0.0;
|
||||
data->sumy = 0.0;
|
||||
data->sumy2 = 0.0;
|
||||
data->sumxy = 0.0;
|
||||
return mcsv1_UDAF::SUCCESS;
|
||||
}
|
||||
|
||||
mcsv1_UDAF::ReturnCode regr_r2::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
|
||||
{
|
||||
static_any::any& valIn_y = valsIn[0].columnData;
|
||||
static_any::any& valIn_x = valsIn[1].columnData;
|
||||
struct regr_r2_data* data = (struct regr_r2_data*)context->getUserData()->data;
|
||||
double valx = 0.0;
|
||||
double valy = 0.0;
|
||||
|
||||
valx = convertAnyTo<double>(valIn_x);
|
||||
valy = convertAnyTo<double>(valIn_y);
|
||||
|
||||
// For decimal types, we need to move the decimal point.
|
||||
uint32_t scaley = valsIn[0].scale;
|
||||
|
||||
if (valy != 0 && scaley > 0)
|
||||
{
|
||||
valy /= pow(10.0, (double)scaley);
|
||||
}
|
||||
|
||||
data->sumy += valy;
|
||||
data->sumy2 += valy*valy;
|
||||
|
||||
// For decimal types, we need to move the decimal point.
|
||||
uint32_t scalex = valsIn[1].scale;
|
||||
|
||||
if (valx != 0 && scalex > 0)
|
||||
{
|
||||
valx /= pow(10.0, (double)scaley);
|
||||
}
|
||||
|
||||
data->sumx += valx;
|
||||
data->sumx2 += valx*valx;
|
||||
|
||||
data->sumxy += valx*valy;
|
||||
|
||||
++data->cnt;
|
||||
|
||||
return mcsv1_UDAF::SUCCESS;
|
||||
}
|
||||
|
||||
mcsv1_UDAF::ReturnCode regr_r2::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
|
||||
{
|
||||
if (!userDataIn)
|
||||
{
|
||||
return mcsv1_UDAF::SUCCESS;
|
||||
}
|
||||
|
||||
struct regr_r2_data* outData = (struct regr_r2_data*)context->getUserData()->data;
|
||||
struct regr_r2_data* inData = (struct regr_r2_data*)userDataIn->data;
|
||||
|
||||
outData->sumx += inData->sumx;
|
||||
outData->sumx2 += inData->sumx2;
|
||||
outData->sumy += inData->sumy;
|
||||
outData->sumy2 += inData->sumy2;
|
||||
outData->sumxy += inData->sumxy;
|
||||
outData->cnt += inData->cnt;
|
||||
|
||||
return mcsv1_UDAF::SUCCESS;
|
||||
}
|
||||
|
||||
mcsv1_UDAF::ReturnCode regr_r2::evaluate(mcsv1Context* context, static_any::any& valOut)
|
||||
{
|
||||
struct regr_r2_data* data = (struct regr_r2_data*)context->getUserData()->data;
|
||||
double N = data->cnt;
|
||||
if (N > 0)
|
||||
{
|
||||
double sumx = data->sumx;
|
||||
double sumy = data->sumy;
|
||||
double sumx2 = data->sumx2;
|
||||
double sumy2 = data->sumy2;
|
||||
double sumxy = data->sumxy;
|
||||
|
||||
double var_popx = (sumx2 - (sumx * sumx / N)) / N;
|
||||
if (var_popx == 0)
|
||||
{
|
||||
// When var_popx is 0, NULL is the result.
|
||||
return mcsv1_UDAF::SUCCESS;
|
||||
}
|
||||
double var_popy = (sumy2 - (sumy * sumy / N)) / N;
|
||||
if (var_popy == 0)
|
||||
{
|
||||
// When var_popy is 0, 1 is the result
|
||||
valOut = 1.0;
|
||||
return mcsv1_UDAF::SUCCESS;
|
||||
}
|
||||
double std_popx = sqrt(var_popx);
|
||||
double std_popy = sqrt(var_popy);
|
||||
double covar_pop = (sumxy - ((sumx * sumy) / N)) / N;
|
||||
double corr = covar_pop / (std_popy * std_popx);
|
||||
valOut = corr * corr;
|
||||
}
|
||||
return mcsv1_UDAF::SUCCESS;
|
||||
}
|
||||
|
||||
mcsv1_UDAF::ReturnCode regr_r2::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
|
||||
{
|
||||
static_any::any& valIn_y = valsDropped[0].columnData;
|
||||
static_any::any& valIn_x = valsDropped[1].columnData;
|
||||
struct regr_r2_data* data = (struct regr_r2_data*)context->getUserData()->data;
|
||||
|
||||
double valx = 0.0;
|
||||
double valy = 0.0;
|
||||
|
||||
valx = convertAnyTo<double>(valIn_x);
|
||||
valy = convertAnyTo<double>(valIn_y);
|
||||
|
||||
// For decimal types, we need to move the decimal point.
|
||||
uint32_t scaley = valsDropped[0].scale;
|
||||
|
||||
if (valy != 0 && scaley > 0)
|
||||
{
|
||||
valy /= pow(10.0, (double)scaley);
|
||||
}
|
||||
|
||||
data->sumy -= valy;
|
||||
data->sumy2 -= valy*valy;
|
||||
|
||||
// For decimal types, we need to move the decimal point.
|
||||
uint32_t scalex = valsDropped[1].scale;
|
||||
|
||||
if (valx != 0 && scalex > 0)
|
||||
{
|
||||
valx /= pow(10.0, (double)scaley);
|
||||
}
|
||||
|
||||
data->sumx -= valx;
|
||||
data->sumx2 -= valx*valx;
|
||||
|
||||
data->sumxy -= valx*valy;
|
||||
--data->cnt;
|
||||
|
||||
return mcsv1_UDAF::SUCCESS;
|
||||
}
|
||||
|
Reference in New Issue
Block a user