1
0
mirror of https://github.com/mariadb-corporation/mariadb-columnstore-engine.git synced 2025-12-24 14:20:59 +03:00

Merge pull request #578 from mariadb-corporation/MCOL-521-b

Mcol 521 b
This commit is contained in:
Andrew Hutchings
2018-10-02 20:13:19 +01:00
committed by GitHub
36 changed files with 3652 additions and 765 deletions

View File

@@ -158,7 +158,7 @@ SET (ENGINE_TOOLSDIR "${INSTALL_ENGINE}/tools")
SET (ENGINE_COMMON_LIBS messageqcpp loggingcpp configcpp idbboot ${Boost_LIBRARIES} xml2 pthread rt libmysql_client)
SET (ENGINE_OAM_LIBS oamcpp alarmmanager)
SET (ENGINE_BRM_LIBS brm idbdatafile cacheutils rwlock ${ENGINE_OAM_LIBS} ${ENGINE_COMMON_LIBS})
SET (ENGINE_EXEC_LIBS joblist execplan windowfunction joiner rowgroup funcexp udfsdk dataconvert common compress querystats querytele thrift threadpool ${ENGINE_BRM_LIBS})
SET (ENGINE_EXEC_LIBS joblist execplan windowfunction joiner rowgroup funcexp udfsdk regr dataconvert common compress querystats querytele thrift threadpool ${ENGINE_BRM_LIBS})
SET (ENGINE_WRITE_LIBS ddlpackageproc ddlpackage dmlpackageproc dmlpackage writeengine writeengineclient idbdatafile cacheutils ${ENGINE_EXEC_LIBS})
SET (ENGINE_COMMON_LDFLAGS "")

File diff suppressed because it is too large Load Diff

View File

@@ -323,6 +323,9 @@ string ConvertFuncName(Item_sum* item)
case Item_sum::LAG_FUNC:
return "LAG";
break;
default:
// We just don't handle it.
break;
};
return "";

View File

@@ -84,7 +84,14 @@ CREATE FUNCTION idbpartition RETURNS STRING soname 'libcalmysql.so';
CREATE FUNCTION idblocalpm RETURNS INTEGER soname 'libcalmysql.so';
CREATE FUNCTION mcssystemready RETURNS INTEGER soname 'libcalmysql.so';
CREATE FUNCTION mcssystemreadonly RETURNS INTEGER soname 'libcalmysql.so';
CREATE AGGREGATE FUNCTION regr_avgx RETURNS REAL soname 'libudf_mysql.so';
CREATE AGGREGATE FUNCTION regr_avgx RETURNS REAL soname 'libregr_mysql.so';
CREATE AGGREGATE FUNCTION regr_avgy RETURNS REAL soname 'libregr_mysql.so';
CREATE AGGREGATE FUNCTION regr_count RETURNS INTEGER soname 'libregr_mysql.so';
CREATE AGGREGATE FUNCTION regr_slope RETURNS REAL soname 'libregr_mysql.so';
CREATE AGGREGATE FUNCTION regr_intercept RETURNS REAL soname 'libregr_mysql.so';
CREATE AGGREGATE FUNCTION regr_r2 RETURNS REAL soname 'libregr_mysql.so';
CREATE AGGREGATE FUNCTION distinct_count RETURNS INTEGER soname 'libudf_mysql.so';
CREATE DATABASE IF NOT EXISTS infinidb_vtable;
CREATE DATABASE IF NOT EXISTS infinidb_querystats;

View File

@@ -44,6 +44,7 @@
<Project File="primitives/primproc/primproc.vpj"/>
<Project File="procmgr/procmgr.vpj"/>
<Project File="procmon/procmon.vpj"/>
<Project File="utils/regr/regr.vpj"/>
<Project File="oamapps/replayTransactionLog/ReplayTransactionLog.vpj"/>
<Project File="oamapps/resourceMonitor/resourceMonitor.vpj"/>
<Project File="utils/rowgroup/rowgroup.vpj"/>

View File

@@ -72,9 +72,11 @@ fi
if [ -f $installdir/lib/libcalmysql.so.1.0.0 ]; then
libcalmysql=$installdir/lib/libcalmysql.so.1.0.0
libudfsdk=$installdir/lib/libudf_mysql.so.1.0.0
libregrsdk=$installdir/lib/libregr_mysql.so.1.0.0
elif [ -f $installdir/lib/libcalmysql.so.1 ]; then
libcalmysql=$installdir/lib/libcalmysql.so.1
libudfsdk=$installdir/lib/libudf_mysql.so.1
libregrsdk=$installdir/lib/libregr_mysql.so.1
else
libcalmysql=
fi
@@ -84,6 +86,7 @@ if [ -d $installdir/mysql/lib64/mysql/plugin -a -n "$libcalmysql" ]; then
ln -sf $libcalmysql libcalmysql.so
ln -sf $libcalmysql libcalmysqlent.so
ln -sf $libudfsdk libudf_mysql.so
ln -sf $libregrsdk libregr_mysql.so
fi
if [ $installdir != "/usr/local/mariadb/columnstore" ]; then

View File

@@ -83,6 +83,7 @@ chown -R $user.$user $installdir/mysql
if [ -f $installdir/lib/libcalmysql.so.1.0.0 ]; then
libcalmysql=$installdir/lib/libcalmysql.so.1.0.0
libudfsdk=$installdir/lib/libudf_mysql.so.1.0.0
libregrsdk=$installdir/lib/libregr_mysql.so.1.0.0
is_columnstore_tables=$installdir/lib/is_columnstore_tables.so.1.0.0
is_columnstore_columns=$installdir/lib/is_columnstore_columns.so.1.0.0
is_columnstore_extents=$installdir/lib/is_columnstore_extents.so.1.0.0
@@ -90,6 +91,7 @@ if [ -f $installdir/lib/libcalmysql.so.1.0.0 ]; then
elif [ -f $installdir/lib/libcalmysql.so.1 ]; then
libcalmysql=$installdir/lib/libcalmysql.so.1
libudfsdk=$installdir/lib/libudf_mysql.so.1
libregrsdk=$installdir/lib/libregr_mysql.so.1
is_columnstore_tables=$installdir/lib/is_columnstore_tables.so.1
is_columnstore_columns=$installdir/lib/is_columnstore_columns.so.1
is_columnstore_extents=$installdir/lib/is_columnstore_extents.so.1
@@ -104,6 +106,7 @@ if [ -n "$libcalmysql" ]; then
ln -sf $libcalmysql libcalmysql.so
ln -sf $libcalmysql libcalmysqlent.so
ln -sf $libudfsdk libudf_mysql.so
ln -sf $libregrsdk libregr_mysql.so
ln -sf $is_columnstore_tables is_columnstore_tables.so
ln -sf $is_columnstore_columns is_columnstore_columns.so
ln -sf $is_columnstore_extents is_columnstore_extents.so

View File

@@ -25,3 +25,5 @@ add_subdirectory(thrift)
add_subdirectory(querytele)
add_subdirectory(clusterTester)
add_subdirectory(libmysql_client)
add_subdirectory(regr)

26
utils/regr/CMakeLists.txt Executable file
View File

@@ -0,0 +1,26 @@
include_directories( ${ENGINE_COMMON_INCLUDES}
../../dbcon/mysql )
########### next target ###############
set(regr_LIB_SRCS regr_avgx.cpp regr_avgy.cpp regr_count.cpp regr_slope.cpp regr_intercept regr_r2)
add_definitions(-DMYSQL_DYNAMIC_PLUGIN)
add_library(regr SHARED ${regr_LIB_SRCS} )
set_target_properties(regr PROPERTIES VERSION 1.1.0 SOVERSION 1)
install(TARGETS regr DESTINATION ${ENGINE_LIBDIR} COMPONENT libs)
set(regr_mysql_LIB_SRCS regrmysql.cpp)
add_library(regr_mysql SHARED ${regr_mysql_LIB_SRCS})
set_target_properties(regr_mysql PROPERTIES VERSION 1.0.0 SOVERSION 1)
install(TARGETS regr_mysql DESTINATION ${ENGINE_LIBDIR} COMPONENT storage-engine)

227
utils/regr/regr.vpj Normal file
View File

@@ -0,0 +1,227 @@
<!DOCTYPE Project SYSTEM "http://www.slickedit.com/dtd/vse/10.0/vpj.dtd">
<Project
Version="10.0"
VendorName="SlickEdit"
TemplateName="GNU C/C++"
WorkingDir=".">
<Config
Name="Debug"
Type="gnuc"
DebugCallbackName="gdb"
Version="1"
OutputFile="%bdregr.a"
CompilerConfigName="Latest Version">
<Menu>
<Target
Name="Compile"
MenuCaption="&amp;Compile"
Dialog="_gnuc_options_form Compile"
CaptureOutputWith="ProcessBuffer"
Deletable="0"
OutputExts="*.o"
SaveOption="SaveCurrent"
RunFromDir="%rw">
<Exec CmdLine='g++ -c %xup %defd -g -o "%bd%n%oe" %i "%f"'/>
</Target>
<Target
Name="Link"
MenuCaption="&amp;Link"
ShowOnMenu="Never"
Dialog="_gnuc_options_form Link"
CaptureOutputWith="ProcessBuffer"
Deletable="0"
SaveOption="SaveCurrent"
RunFromDir="%rw">
<Exec CmdLine='ar -rs %xup "%o" %f'/>
</Target>
<Target
Name="Build"
MenuCaption="&amp;Build"
CaptureOutputWith="ProcessBuffer"
Deletable="0"
SaveOption="SaveWorkspaceFiles"
RunFromDir="%rw">
<Exec CmdLine="make"/>
</Target>
<Target
Name="Rebuild"
MenuCaption="&amp;Rebuild"
CaptureOutputWith="ProcessBuffer"
Deletable="0"
SaveOption="SaveWorkspaceFiles"
RunFromDir="%rw">
<Exec CmdLine=""/>
</Target>
<Target
Name="Debug"
MenuCaption="&amp;Debug"
Dialog="_gnuc_options_form Run/Debug"
BuildFirst="1"
CaptureOutputWith="ProcessBuffer"
Deletable="0"
SaveOption="SaveNone"
RunFromDir="%rw">
<Exec CmdLine=""/>
</Target>
<Target
Name="Execute"
MenuCaption="E&amp;xecute"
Dialog="_gnuc_options_form Run/Debug"
BuildFirst="1"
CaptureOutputWith="ProcessBuffer"
Deletable="0"
SaveOption="SaveWorkspaceFiles"
RunFromDir="%rw">
<Exec CmdLine=""/>
</Target>
<Target
Name="dash"
MenuCaption="-"
Deletable="0">
<Exec/>
</Target>
<Target
Name="GNU C Options"
MenuCaption="GNU C &amp;Options..."
ShowOnMenu="HideIfNoCmdLine"
Deletable="0"
SaveOption="SaveNone">
<Exec
CmdLine="gnucoptions"
Type="Slick-C"/>
</Target>
</Menu>
<List Name="GNUC Options">
<Item
Name="LinkerOutputType"
Value="StaticLibrary"/>
</List>
</Config>
<Config
Name="Release"
Type="gnuc"
DebugCallbackName="gdb"
Version="1"
OutputFile="%bdregr.a"
CompilerConfigName="Latest Version">
<Menu>
<Target
Name="Compile"
MenuCaption="&amp;Compile"
Dialog="_gnuc_options_form Compile"
CaptureOutputWith="ProcessBuffer"
Deletable="0"
OutputExts="*.o"
SaveOption="SaveCurrent"
RunFromDir="%rw">
<Exec CmdLine='g++ -c %xup %defd -o "%bd%n%oe" %i "%f"'/>
</Target>
<Target
Name="Link"
MenuCaption="&amp;Link"
ShowOnMenu="Never"
Dialog="_gnuc_options_form Link"
CaptureOutputWith="ProcessBuffer"
Deletable="0"
SaveOption="SaveCurrent"
RunFromDir="%rw">
<Exec CmdLine='ar -rs %xup "%o" %f'/>
</Target>
<Target
Name="Build"
MenuCaption="&amp;Build"
CaptureOutputWith="ProcessBuffer"
Deletable="0"
SaveOption="SaveWorkspaceFiles"
RunFromDir="%rw">
<Exec CmdLine="make"/>
</Target>
<Target
Name="Rebuild"
MenuCaption="&amp;Rebuild"
CaptureOutputWith="ProcessBuffer"
Deletable="0"
SaveOption="SaveWorkspaceFiles"
RunFromDir="%rw">
<Exec CmdLine=""/>
</Target>
<Target
Name="Debug"
MenuCaption="&amp;Debug"
Dialog="_gnuc_options_form Run/Debug"
BuildFirst="1"
CaptureOutputWith="ProcessBuffer"
Deletable="0"
SaveOption="SaveNone"
RunFromDir="%rw">
<Exec CmdLine=""/>
</Target>
<Target
Name="Execute"
MenuCaption="E&amp;xecute"
Dialog="_gnuc_options_form Run/Debug"
BuildFirst="1"
CaptureOutputWith="ProcessBuffer"
Deletable="0"
SaveOption="SaveWorkspaceFiles"
RunFromDir="%rw">
<Exec CmdLine=""/>
</Target>
<Target
Name="dash"
MenuCaption="-"
Deletable="0">
<Exec/>
</Target>
<Target
Name="GNU C Options"
MenuCaption="GNU C &amp;Options..."
ShowOnMenu="HideIfNoCmdLine"
Deletable="0"
SaveOption="SaveNone">
<Exec
CmdLine="gnucoptions"
Type="Slick-C"/>
</Target>
</Menu>
<List Name="GNUC Options">
<Item
Name="LinkerOutputType"
Value="StaticLibrary"/>
</List>
</Config>
<Files>
<Folder
Name="Source Files"
Filters="*.c;*.C;*.cc;*.cpp;*.cp;*.cxx;*.c++;*.prg;*.pas;*.dpr;*.asm;*.s;*.bas;*.java;*.cs;*.sc;*.e;*.cob;*.html;*.rc;*.tcl;*.py;*.pl;*.d">
<F N="regr_avgx.cpp"/>
<F N="regr_avgy.cpp"/>
<F N="regr_count.cpp"/>
<F N="regr_intercept.cpp"/>
<F N="regr_r2.cpp"/>
<F N="regr_slope.cpp"/>
<F N="regrmysql.cpp"/>
</Folder>
<Folder
Name="Header Files"
Filters="*.h;*.H;*.hh;*.hpp;*.hxx;*.inc;*.sh;*.cpy;*.if">
<F N="regr_avgx.h"/>
<F N="regr_avgy.h"/>
<F N="regr_count.h"/>
<F N="regr_intercept.h"/>
<F N="regr_r2.h"/>
<F N="regr_slope.h"/>
</Folder>
<Folder
Name="Resource Files"
Filters="*.ico;*.cur;*.dlg"/>
<Folder
Name="Bitmaps"
Filters="*.bmp"/>
<Folder
Name="Other Files"
Filters="">
<F N="CMakeLists.txt"/>
</Folder>
</Files>
</Project>

View File

@@ -24,6 +24,17 @@
using namespace mcsv1sdk;
class Add_regr_avgx_ToUDAFMap
{
public:
Add_regr_avgx_ToUDAFMap()
{
UDAFMap::getMap()["regr_avgx"] = new regr_avgx();
}
};
static Add_regr_avgx_ToUDAFMap addToMap;
#define DATATYPE double
// Use the simple data model
@@ -73,77 +84,12 @@ mcsv1_UDAF::ReturnCode regr_avgx::reset(mcsv1Context* context)
mcsv1_UDAF::ReturnCode regr_avgx::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
{
static_any::any& valIn_y = valsIn[0].columnData;
static_any::any& valIn_x = valsIn[1].columnData;
struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data;
DATATYPE val = 0.0;
if (context->isParamNull(0) || context->isParamNull(1))
{
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
}
if (valIn_x.empty() || valIn_y.empty()) // Usually empty if NULL. Probably redundant
{
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
}
if (valIn_x.compatible(longTypeId))
{
val = valIn_x.cast<long>();
}
else if (valIn_x.compatible(charTypeId))
{
val = valIn_x.cast<char>();
}
else if (valIn_x.compatible(scharTypeId))
{
val = valIn_x.cast<signed char>();
}
else if (valIn_x.compatible(shortTypeId))
{
val = valIn_x.cast<short>();
}
else if (valIn_x.compatible(intTypeId))
{
val = valIn_x.cast<int>();
}
else if (valIn_x.compatible(llTypeId))
{
val = valIn_x.cast<long long>();
}
else if (valIn_x.compatible(ucharTypeId))
{
val = valIn_x.cast<unsigned char>();
}
else if (valIn_x.compatible(ushortTypeId))
{
val = valIn_x.cast<unsigned short>();
}
else if (valIn_x.compatible(uintTypeId))
{
val = valIn_x.cast<unsigned int>();
}
else if (valIn_x.compatible(ulongTypeId))
{
val = valIn_x.cast<unsigned long>();
}
else if (valIn_x.compatible(ullTypeId))
{
val = valIn_x.cast<unsigned long long>();
}
else if (valIn_x.compatible(floatTypeId))
{
val = valIn_x.cast<float>();
}
else if (valIn_x.compatible(doubleTypeId))
{
val = valIn_x.cast<double>();
}
DATATYPE val = convertAnyTo<double>(valIn_x);
// For decimal types, we need to move the decimal point.
uint32_t scale = valsIn[1].scale;
if (val != 0 && scale > 0)
{
val /= pow(10.0, (double)scale);
@@ -191,72 +137,12 @@ mcsv1_UDAF::ReturnCode regr_avgx::evaluate(mcsv1Context* context, static_any::an
mcsv1_UDAF::ReturnCode regr_avgx::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
static_any::any& valIn_y = valsDropped[0].columnData;
static_any::any& valIn_x = valsDropped[1].columnData;
struct regr_avgx_data* data = (struct regr_avgx_data*)context->getUserData()->data;
DATATYPE val = 0.0;
if (valIn_x.empty() || valIn_y.empty())
{
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
}
if (valIn_x.compatible(charTypeId))
{
val = valIn_x.cast<char>();
}
else if (valIn_x.compatible(scharTypeId))
{
val = valIn_x.cast<signed char>();
}
else if (valIn_x.compatible(shortTypeId))
{
val = valIn_x.cast<short>();
}
else if (valIn_x.compatible(intTypeId))
{
val = valIn_x.cast<int>();
}
else if (valIn_x.compatible(longTypeId))
{
val = valIn_x.cast<long>();
}
else if (valIn_x.compatible(llTypeId))
{
val = valIn_x.cast<long long>();
}
else if (valIn_x.compatible(ucharTypeId))
{
val = valIn_x.cast<unsigned char>();
}
else if (valIn_x.compatible(ushortTypeId))
{
val = valIn_x.cast<unsigned short>();
}
else if (valIn_x.compatible(uintTypeId))
{
val = valIn_x.cast<unsigned int>();
}
else if (valIn_x.compatible(ulongTypeId))
{
val = valIn_x.cast<unsigned long>();
}
else if (valIn_x.compatible(ullTypeId))
{
val = valIn_x.cast<unsigned long long>();
}
else if (valIn_x.compatible(floatTypeId))
{
val = valIn_x.cast<float>();
}
else if (valIn_x.compatible(doubleTypeId))
{
val = valIn_x.cast<double>();
}
double val = convertAnyTo<double>(valIn_x);
// For decimal types, we need to move the decimal point.
uint32_t scale = valsDropped[1].scale;
if (val != 0 && scale > 0)
{
val /= pow(10.0, (double)scale);

View File

@@ -26,7 +26,7 @@
*
*
* CREATE AGGREGATE FUNCTION regr_avgx returns REAL soname
* 'libudf_mysql.so';
* 'libregr_mysql.so';
*
*/
#ifndef HEADER_regr_avgx

153
utils/regr/regr_avgy.cpp Normal file
View File

@@ -0,0 +1,153 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <sstream>
#include <cstring>
#include <typeinfo>
#include "regr_avgy.h"
#include "bytestream.h"
#include "objectreader.h"
using namespace mcsv1sdk;
class Add_regr_avgy_ToUDAFMap
{
public:
Add_regr_avgy_ToUDAFMap()
{
UDAFMap::getMap()["regr_avgy"] = new regr_avgy();
}
};
static Add_regr_avgy_ToUDAFMap addToMap;
#define DATATYPE double
// Use the simple data model
struct regr_avgy_data
{
double sum;
uint64_t cnt;
};
mcsv1_UDAF::ReturnCode regr_avgy::init(mcsv1Context* context,
ColumnDatum* colTypes)
{
if (context->getParameterCount() != 2)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("regr_avgy() with other than 2 arguments");
return mcsv1_UDAF::ERROR;
}
if (!(isNumeric(colTypes[0].dataType)))
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("regr_avgy() with a non-numeric x argument");
return mcsv1_UDAF::ERROR;
}
context->setUserDataSize(sizeof(regr_avgy_data));
context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8);
context->setScale(colTypes[0].scale + 4);
context->setPrecision(19);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_avgy::reset(mcsv1Context* context)
{
struct regr_avgy_data* data = (struct regr_avgy_data*)context->getUserData()->data;
data->sum = 0;
data->cnt = 0;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_avgy::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
{
static_any::any& valIn_y = valsIn[0].columnData;
struct regr_avgy_data* data = (struct regr_avgy_data*)context->getUserData()->data;
double val = convertAnyTo<double>(valIn_y);
// For decimal types, we need to move the decimal point.
uint32_t scale = valsIn[0].scale;
if (val != 0 && scale > 0)
{
val /= pow(10.0, (double)scale);
}
data->sum += val;
++data->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_avgy::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
{
if (!userDataIn)
{
return mcsv1_UDAF::SUCCESS;
}
struct regr_avgy_data* outData = (struct regr_avgy_data*)context->getUserData()->data;
struct regr_avgy_data* inData = (struct regr_avgy_data*)userDataIn->data;
outData->sum += inData->sum;
outData->cnt += inData->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_avgy::evaluate(mcsv1Context* context, static_any::any& valOut)
{
struct regr_avgy_data* data = (struct regr_avgy_data*)context->getUserData()->data;
if (data->cnt == 0)
{
valOut = 0;
}
else
{
valOut = data->sum / (double)data->cnt;
}
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_avgy::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
static_any::any& valIn_y = valsDropped[0].columnData;
struct regr_avgy_data* data = (struct regr_avgy_data*)context->getUserData()->data;
double val = convertAnyTo<double>(valIn_y);
// For decimal types, we need to move the decimal point.
uint32_t scale = valsDropped[0].scale;
if (val != 0 && scale > 0)
{
val /= pow(10.0, (double)scale);
}
data->sum -= val;
--data->cnt;
return mcsv1_UDAF::SUCCESS;
}

88
utils/regr/regr_avgy.h Normal file
View File

@@ -0,0 +1,88 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/***********************************************************************
* $Id$
*
* regr_avgy.h
***********************************************************************/
/**
* Columnstore interface for for the regr_avgy function
*
*
* CREATE AGGREGATE FUNCTION regr_avgy returns REAL soname
* 'libregr_mysql.so';
*
*/
#ifndef HEADER_regr_avgy
#define HEADER_regr_avgy
#include <cstdlib>
#include <string>
#include <vector>
#ifdef _MSC_VER
#include <unordered_map>
#else
#include <tr1/unordered_map>
#endif
#include "mcsv1_udaf.h"
#include "calpontsystemcatalog.h"
#include "windowfunctioncolumn.h"
using namespace execplan;
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
#define EXPORT __declspec(dllexport)
#else
#define EXPORT
#endif
namespace mcsv1sdk
{
// Return the regr_avgy value of the dataset
class regr_avgy : public mcsv1_UDAF
{
public:
// Defaults OK
regr_avgy() : mcsv1_UDAF() {};
virtual ~regr_avgy() {};
virtual ReturnCode init(mcsv1Context* context,
ColumnDatum* colTypes);
virtual ReturnCode reset(mcsv1Context* context);
virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn);
virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn);
virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped);
protected:
};
}; // namespace
#undef EXPORT
#endif // HEADER_regr_avgy.h

131
utils/regr/regr_count.cpp Normal file
View File

@@ -0,0 +1,131 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <sstream>
#include <cstring>
#include <typeinfo>
#include "regr_count.h"
#include "bytestream.h"
#include "objectreader.h"
using namespace mcsv1sdk;
class Add_regr_count_ToUDAFMap
{
public:
Add_regr_count_ToUDAFMap()
{
UDAFMap::getMap()["regr_count"] = new regr_count();
}
};
static Add_regr_count_ToUDAFMap addToMap;
// Use the simple data model
struct regr_count_data
{
uint64_t cnt;
};
mcsv1_UDAF::ReturnCode regr_count::init(mcsv1Context* context,
ColumnDatum* colTypes)
{
if (context->getParameterCount() != 2)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("regr_count() with other than 2 arguments");
return mcsv1_UDAF::ERROR;
}
context->setUserDataSize(sizeof(regr_count_data));
context->setResultType(CalpontSystemCatalog::BIGINT);
context->setColWidth(8);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_count::reset(mcsv1Context* context)
{
struct regr_count_data* data = (struct regr_count_data*)context->getUserData()->data;
data->cnt = 0;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_count::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
{
static_any::any& valIn_y = valsIn[0].columnData;
static_any::any& valIn_x = valsIn[1].columnData;
struct regr_count_data* data = (struct regr_count_data*)context->getUserData()->data;
if (context->isParamNull(0) || context->isParamNull(1))
{
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
}
if (valIn_x.empty() || valIn_y.empty()) // Usually empty if NULL. Probably redundant
{
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
}
++data->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_count::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
{
if (!userDataIn)
{
return mcsv1_UDAF::SUCCESS;
}
struct regr_count_data* outData = (struct regr_count_data*)context->getUserData()->data;
struct regr_count_data* inData = (struct regr_count_data*)userDataIn->data;
outData->cnt += inData->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_count::evaluate(mcsv1Context* context, static_any::any& valOut)
{
struct regr_count_data* data = (struct regr_count_data*)context->getUserData()->data;
valOut = data->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_count::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
static_any::any& valIn_y = valsDropped[0].columnData;
static_any::any& valIn_x = valsDropped[1].columnData;
struct regr_count_data* data = (struct regr_count_data*)context->getUserData()->data;
if (context->isParamNull(0) || context->isParamNull(1))
{
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
}
if (valIn_x.empty() || valIn_y.empty())
{
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
}
--data->cnt;
return mcsv1_UDAF::SUCCESS;
}

88
utils/regr/regr_count.h Normal file
View File

@@ -0,0 +1,88 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/***********************************************************************
* $Id$
*
* regr_count.h
***********************************************************************/
/**
* Columnstore interface for for the regr_count function
*
*
* CREATE AGGREGATE FUNCTION regr_count returns INTEGER
* soname 'libregr_mysql.so';
*
*/
#ifndef HEADER_regr_count
#define HEADER_regr_count
#include <cstdlib>
#include <string>
#include <vector>
#ifdef _MSC_VER
#include <unordered_map>
#else
#include <tr1/unordered_map>
#endif
#include "mcsv1_udaf.h"
#include "calpontsystemcatalog.h"
#include "windowfunctioncolumn.h"
using namespace execplan;
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
#define EXPORT __declspec(dllexport)
#else
#define EXPORT
#endif
namespace mcsv1sdk
{
// Return the regr_count value of the dataset
class regr_count : public mcsv1_UDAF
{
public:
// Defaults OK
regr_count() : mcsv1_UDAF() {};
virtual ~regr_count() {};
virtual ReturnCode init(mcsv1Context* context,
ColumnDatum* colTypes);
virtual ReturnCode reset(mcsv1Context* context);
virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn);
virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn);
virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped);
protected:
};
}; // namespace
#undef EXPORT
#endif // HEADER_regr_count.h

View File

@@ -0,0 +1,197 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <sstream>
#include <cstring>
#include <typeinfo>
#include "regr_intercept.h"
#include "bytestream.h"
#include "objectreader.h"
using namespace mcsv1sdk;
class Add_regr_intercept_ToUDAFMap
{
public:
Add_regr_intercept_ToUDAFMap()
{
UDAFMap::getMap()["regr_intercept"] = new regr_intercept();
}
};
static Add_regr_intercept_ToUDAFMap addToMap;
// Use the simple data model
struct regr_intercept_data
{
uint64_t cnt;
double sumx;
double sumx2; // sum of (x squared)
double sumy;
double sumxy; // sum of (x*y)
};
mcsv1_UDAF::ReturnCode regr_intercept::init(mcsv1Context* context,
ColumnDatum* colTypes)
{
if (context->getParameterCount() != 2)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("regr_intercept() with other than 2 arguments");
return mcsv1_UDAF::ERROR;
}
context->setUserDataSize(sizeof(regr_intercept_data));
context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8);
context->setScale(colTypes[0].scale + 8);
context->setPrecision(19);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_intercept::reset(mcsv1Context* context)
{
struct regr_intercept_data* data = (struct regr_intercept_data*)context->getUserData()->data;
data->cnt = 0;
data->sumx = 0.0;
data->sumx2 = 0.0;
data->sumy = 0.0;
data->sumxy = 0.0;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_intercept::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
{
static_any::any& valIn_y = valsIn[0].columnData;
static_any::any& valIn_x = valsIn[1].columnData;
struct regr_intercept_data* data = (struct regr_intercept_data*)context->getUserData()->data;
double valx = 0.0;
double valy = 0.0;
valx = convertAnyTo<double>(valIn_x);
valy = convertAnyTo<double>(valIn_y);
// For decimal types, we need to move the decimal point.
uint32_t scaley = valsIn[0].scale;
if (valy != 0 && scaley > 0)
{
valy /= pow(10.0, (double)scaley);
}
data->sumy += valy;
// For decimal types, we need to move the decimal point.
uint32_t scalex = valsIn[1].scale;
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scaley);
}
data->sumx += valx;
data->sumx2 += valx*valx;
data->sumxy += valx*valy;
++data->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_intercept::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
{
if (!userDataIn)
{
return mcsv1_UDAF::SUCCESS;
}
struct regr_intercept_data* outData = (struct regr_intercept_data*)context->getUserData()->data;
struct regr_intercept_data* inData = (struct regr_intercept_data*)userDataIn->data;
outData->sumx += inData->sumx;
outData->sumx2 += inData->sumx2;
outData->sumy += inData->sumy;
outData->sumxy += inData->sumxy;
outData->cnt += inData->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_intercept::evaluate(mcsv1Context* context, static_any::any& valOut)
{
struct regr_intercept_data* data = (struct regr_intercept_data*)context->getUserData()->data;
double N = data->cnt;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumx2 = data->sumx2;
double sumxy = data->sumxy;
double slope = 0.0;
double variance = (N * sumx2) - (sumx * sumx);
if (variance != 0)
{
slope = ((N * sumxy) - (sumx * sumy)) / variance;
valOut = (sumy - (slope * sumx)) / N;
}
}
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_intercept::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
static_any::any& valIn_y = valsDropped[0].columnData;
static_any::any& valIn_x = valsDropped[1].columnData;
struct regr_intercept_data* data = (struct regr_intercept_data*)context->getUserData()->data;
double valx = 0.0;
double valy = 0.0;
valx = convertAnyTo<double>(valIn_x);
valy = convertAnyTo<double>(valIn_y);
// For decimal types, we need to move the decimal point.
uint32_t scaley = valsDropped[0].scale;
if (valy != 0 && scaley > 0)
{
valy /= pow(10.0, (double)scaley);
}
data->sumy -= valy;
// For decimal types, we need to move the decimal point.
uint32_t scalex = valsDropped[1].scale;
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scaley);
}
data->sumx -= valx;
data->sumx2 -= valx*valx;
data->sumxy -= valx*valy;
--data->cnt;
return mcsv1_UDAF::SUCCESS;
}

View File

@@ -0,0 +1,88 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/***********************************************************************
* $Id$
*
* regr_intercept.h
***********************************************************************/
/**
* Columnstore interface for for the regr_intercept function
*
*
* CREATE AGGREGATE FUNCTION regr_intercept returns REAL
* soname 'libregr_mysql.so';
*
*/
#ifndef HEADER_regr_intercept
#define HEADER_regr_intercept
#include <cstdlib>
#include <string>
#include <vector>
#ifdef _MSC_VER
#include <unordered_map>
#else
#include <tr1/unordered_map>
#endif
#include "mcsv1_udaf.h"
#include "calpontsystemcatalog.h"
#include "windowfunctioncolumn.h"
using namespace execplan;
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
#define EXPORT __declspec(dllexport)
#else
#define EXPORT
#endif
namespace mcsv1sdk
{
// Return the regr_intercept value of the dataset
class regr_intercept : public mcsv1_UDAF
{
public:
// Defaults OK
regr_intercept() : mcsv1_UDAF() {};
virtual ~regr_intercept() {};
virtual ReturnCode init(mcsv1Context* context,
ColumnDatum* colTypes);
virtual ReturnCode reset(mcsv1Context* context);
virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn);
virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn);
virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped);
protected:
};
}; // namespace
#undef EXPORT
#endif // HEADER_regr_intercept.h

216
utils/regr/regr_r2.cpp Normal file
View File

@@ -0,0 +1,216 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <sstream>
#include <cstring>
#include <typeinfo>
#include "regr_r2.h"
#include "bytestream.h"
#include "objectreader.h"
using namespace mcsv1sdk;
class Add_regr_r2_ToUDAFMap
{
public:
Add_regr_r2_ToUDAFMap()
{
UDAFMap::getMap()["regr_r2"] = new regr_r2();
}
};
static Add_regr_r2_ToUDAFMap addToMap;
// Use the simple data model
struct regr_r2_data
{
uint64_t cnt;
double sumx;
double sumx2; // sum of (x squared)
double sumy;
double sumy2; // sum of (y squared)
double sumxy; // sum of x * y
};
mcsv1_UDAF::ReturnCode regr_r2::init(mcsv1Context* context,
ColumnDatum* colTypes)
{
if (context->getParameterCount() != 2)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("regr_r2() with other than 2 arguments");
return mcsv1_UDAF::ERROR;
}
context->setUserDataSize(sizeof(regr_r2_data));
context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8);
context->setScale(colTypes[0].scale + 8);
context->setPrecision(19);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_r2::reset(mcsv1Context* context)
{
struct regr_r2_data* data = (struct regr_r2_data*)context->getUserData()->data;
data->cnt = 0;
data->sumx = 0.0;
data->sumx2 = 0.0;
data->sumy = 0.0;
data->sumy2 = 0.0;
data->sumxy = 0.0;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_r2::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
{
static_any::any& valIn_y = valsIn[0].columnData;
static_any::any& valIn_x = valsIn[1].columnData;
struct regr_r2_data* data = (struct regr_r2_data*)context->getUserData()->data;
double valx = 0.0;
double valy = 0.0;
valx = convertAnyTo<double>(valIn_x);
valy = convertAnyTo<double>(valIn_y);
// For decimal types, we need to move the decimal point.
uint32_t scaley = valsIn[0].scale;
if (valy != 0 && scaley > 0)
{
valy /= pow(10.0, (double)scaley);
}
data->sumy += valy;
data->sumy2 += valy*valy;
// For decimal types, we need to move the decimal point.
uint32_t scalex = valsIn[1].scale;
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scaley);
}
data->sumx += valx;
data->sumx2 += valx*valx;
data->sumxy += valx*valy;
++data->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_r2::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
{
if (!userDataIn)
{
return mcsv1_UDAF::SUCCESS;
}
struct regr_r2_data* outData = (struct regr_r2_data*)context->getUserData()->data;
struct regr_r2_data* inData = (struct regr_r2_data*)userDataIn->data;
outData->sumx += inData->sumx;
outData->sumx2 += inData->sumx2;
outData->sumy += inData->sumy;
outData->sumy2 += inData->sumy2;
outData->sumxy += inData->sumxy;
outData->cnt += inData->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_r2::evaluate(mcsv1Context* context, static_any::any& valOut)
{
struct regr_r2_data* data = (struct regr_r2_data*)context->getUserData()->data;
double N = data->cnt;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumx2 = data->sumx2;
double sumy2 = data->sumy2;
double sumxy = data->sumxy;
double var_popx = (sumx2 - (sumx * sumx / N)) / N;
if (var_popx == 0)
{
// When var_popx is 0, NULL is the result.
return mcsv1_UDAF::SUCCESS;
}
double var_popy = (sumy2 - (sumy * sumy / N)) / N;
if (var_popy == 0)
{
// When var_popy is 0, 1 is the result
valOut = 1.0;
return mcsv1_UDAF::SUCCESS;
}
double std_popx = sqrt(var_popx);
double std_popy = sqrt(var_popy);
double covar_pop = (sumxy - ((sumx * sumy) / N)) / N;
double corr = covar_pop / (std_popy * std_popx);
valOut = corr * corr;
}
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_r2::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
static_any::any& valIn_y = valsDropped[0].columnData;
static_any::any& valIn_x = valsDropped[1].columnData;
struct regr_r2_data* data = (struct regr_r2_data*)context->getUserData()->data;
double valx = 0.0;
double valy = 0.0;
valx = convertAnyTo<double>(valIn_x);
valy = convertAnyTo<double>(valIn_y);
// For decimal types, we need to move the decimal point.
uint32_t scaley = valsDropped[0].scale;
if (valy != 0 && scaley > 0)
{
valy /= pow(10.0, (double)scaley);
}
data->sumy -= valy;
data->sumy2 -= valy*valy;
// For decimal types, we need to move the decimal point.
uint32_t scalex = valsDropped[1].scale;
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scaley);
}
data->sumx -= valx;
data->sumx2 -= valx*valx;
data->sumxy -= valx*valy;
--data->cnt;
return mcsv1_UDAF::SUCCESS;
}

88
utils/regr/regr_r2.h Normal file
View File

@@ -0,0 +1,88 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/***********************************************************************
* $Id$
*
* regr_r2.h
***********************************************************************/
/**
* Columnstore interface for for the regr_r2 function
*
*
* CREATE AGGREGATE FUNCTION regr_r2 returns REAL
* soname 'libregr_mysql.so';
*
*/
#ifndef HEADER_regr_intercept
#define HEADER_regr_intercept
#include <cstdlib>
#include <string>
#include <vector>
#ifdef _MSC_VER
#include <unordered_map>
#else
#include <tr1/unordered_map>
#endif
#include "mcsv1_udaf.h"
#include "calpontsystemcatalog.h"
#include "windowfunctioncolumn.h"
using namespace execplan;
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
#define EXPORT __declspec(dllexport)
#else
#define EXPORT
#endif
namespace mcsv1sdk
{
// Return the regr_r2 value of the dataset
class regr_r2 : public mcsv1_UDAF
{
public:
// Defaults OK
regr_r2() : mcsv1_UDAF() {};
virtual ~regr_r2() {};
virtual ReturnCode init(mcsv1Context* context,
ColumnDatum* colTypes);
virtual ReturnCode reset(mcsv1Context* context);
virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn);
virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn);
virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped);
protected:
};
}; // namespace
#undef EXPORT
#endif // HEADER_regr_intercept.h

196
utils/regr/regr_slope.cpp Normal file
View File

@@ -0,0 +1,196 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <sstream>
#include <cstring>
#include <typeinfo>
#include "regr_slope.h"
#include "bytestream.h"
#include "objectreader.h"
using namespace mcsv1sdk;
class Add_regr_slope_ToUDAFMap
{
public:
Add_regr_slope_ToUDAFMap()
{
UDAFMap::getMap()["regr_slope"] = new regr_slope();
}
};
static Add_regr_slope_ToUDAFMap addToMap;
// Use the simple data model
struct regr_slope_data
{
uint64_t cnt;
double sumx;
double sumx2; // sum of (x squared)
double sumy;
double sumxy; // sum of (x*y)
};
mcsv1_UDAF::ReturnCode regr_slope::init(mcsv1Context* context,
ColumnDatum* colTypes)
{
if (context->getParameterCount() != 2)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("regr_slope() with other than 2 arguments");
return mcsv1_UDAF::ERROR;
}
context->setUserDataSize(sizeof(regr_slope_data));
context->setResultType(CalpontSystemCatalog::DOUBLE);
context->setColWidth(8);
context->setScale(colTypes[0].scale + 8);
context->setPrecision(19);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_slope::reset(mcsv1Context* context)
{
struct regr_slope_data* data = (struct regr_slope_data*)context->getUserData()->data;
data->cnt = 0;
data->sumx = 0.0;
data->sumx2 = 0.0;
data->sumy = 0.0;
data->sumxy = 0.0;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_slope::nextValue(mcsv1Context* context, ColumnDatum* valsIn)
{
static_any::any& valIn_y = valsIn[0].columnData;
static_any::any& valIn_x = valsIn[1].columnData;
struct regr_slope_data* data = (struct regr_slope_data*)context->getUserData()->data;
double valx = 0.0;
double valy = 0.0;
valx = convertAnyTo<double>(valIn_x);
valy = convertAnyTo<double>(valIn_y);
// For decimal types, we need to move the decimal point.
uint32_t scaley = valsIn[0].scale;
if (valy != 0 && scaley > 0)
{
valy /= pow(10.0, (double)scaley);
}
data->sumy += valy;
// For decimal types, we need to move the decimal point.
uint32_t scalex = valsIn[1].scale;
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scaley);
}
data->sumx += valx;
data->sumx2 += valx*valx;
data->sumxy += valx*valy;
++data->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_slope::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
{
if (!userDataIn)
{
return mcsv1_UDAF::SUCCESS;
}
struct regr_slope_data* outData = (struct regr_slope_data*)context->getUserData()->data;
struct regr_slope_data* inData = (struct regr_slope_data*)userDataIn->data;
outData->sumx += inData->sumx;
outData->sumx2 += inData->sumx2;
outData->sumy += inData->sumy;
outData->sumxy += inData->sumxy;
outData->cnt += inData->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_slope::evaluate(mcsv1Context* context, static_any::any& valOut)
{
struct regr_slope_data* data = (struct regr_slope_data*)context->getUserData()->data;
double N = data->cnt;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumx2 = data->sumx2;
double sumxy = data->sumxy;
double variance = (N * sumx2) - (sumx * sumx);
if (variance != 0)
{
double slope = ((N * sumxy) - (sumx * sumy)) / variance;
valOut = slope;
}
}
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode regr_slope::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
static_any::any& valIn_y = valsDropped[0].columnData;
static_any::any& valIn_x = valsDropped[1].columnData;
struct regr_slope_data* data = (struct regr_slope_data*)context->getUserData()->data;
double valx = 0.0;
double valy = 0.0;
valx = convertAnyTo<double>(valIn_x);
valy = convertAnyTo<double>(valIn_y);
// For decimal types, we need to move the decimal point.
uint32_t scaley = valsDropped[0].scale;
if (valy != 0 && scaley > 0)
{
valy /= pow(10.0, (double)scaley);
}
data->sumy -= valy;
// For decimal types, we need to move the decimal point.
uint32_t scalex = valsDropped[1].scale;
if (valx != 0 && scalex > 0)
{
valx /= pow(10.0, (double)scaley);
}
data->sumx -= valx;
data->sumx2 -= valx*valx;
data->sumxy -= valx*valy;
--data->cnt;
return mcsv1_UDAF::SUCCESS;
}

88
utils/regr/regr_slope.h Normal file
View File

@@ -0,0 +1,88 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/***********************************************************************
* $Id$
*
* regr_slope.h
***********************************************************************/
/**
* Columnstore interface for for the regr_slope function
*
*
* CREATE AGGREGATE FUNCTION regr_slope returns REAL
* soname 'libregr_mysql.so';
*
*/
#ifndef HEADER_regr_slope
#define HEADER_regr_slope
#include <cstdlib>
#include <string>
#include <vector>
#ifdef _MSC_VER
#include <unordered_map>
#else
#include <tr1/unordered_map>
#endif
#include "mcsv1_udaf.h"
#include "calpontsystemcatalog.h"
#include "windowfunctioncolumn.h"
using namespace execplan;
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
#define EXPORT __declspec(dllexport)
#else
#define EXPORT
#endif
namespace mcsv1sdk
{
// Return the regr_slope value of the dataset
class regr_slope : public mcsv1_UDAF
{
public:
// Defaults OK
regr_slope() : mcsv1_UDAF() {};
virtual ~regr_slope() {};
virtual ReturnCode init(mcsv1Context* context,
ColumnDatum* colTypes);
virtual ReturnCode reset(mcsv1Context* context);
virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn);
virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* valIn);
virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped);
protected:
};
}; // namespace
#undef EXPORT
#endif // HEADER_regr_slope.h

725
utils/regr/regrmysql.cpp Normal file
View File

@@ -0,0 +1,725 @@
#include <my_config.h>
#include <cmath>
#include <iostream>
#include <sstream>
using namespace std;
#include "idb_mysql.h"
namespace
{
inline double cvtArgToDouble(int t, const char* v)
{
double d = 0.0;
switch (t)
{
case INT_RESULT:
d = (double)(*((long long*)v));
break;
case REAL_RESULT:
d = *((double*)v);
break;
case DECIMAL_RESULT:
case STRING_RESULT:
d = strtod(v, 0);
break;
case ROW_RESULT:
break;
}
return d;
}
inline long long cvtArgToInt(int t, const char* v)
{
long long ll = 0;
switch (t)
{
case INT_RESULT:
ll = *((long long*)v);
break;
case REAL_RESULT:
ll = (long long)(*((double*)v));
break;
case DECIMAL_RESULT:
case STRING_RESULT:
ll = strtoll(v, 0, 0);
break;
case ROW_RESULT:
break;
}
return ll;
}
inline string cvtArgToString(int t, const char* v)
{
string str;
switch (t)
{
case INT_RESULT:
{
long long ll;
ll = *((long long*)v);
ostringstream oss;
oss << ll;
str = oss.str();
break;
}
case REAL_RESULT:
{
double d;
d = *((double*)v);
ostringstream oss;
oss << d;
str = oss.str();
break;
}
case DECIMAL_RESULT:
case STRING_RESULT:
str = v;
break;
case ROW_RESULT:
break;
}
return str;
}
}
/****************************************************************************
* UDF function interface for MariaDB connector to recognize is defined in
* this section. MariaDB's UDF function creation guideline needs to be followed.
*
* Three interface need to be defined on the connector for each UDF function.
*
* XXX_init: To allocate the necessary memory for the UDF function and validate
* the input.
* XXX_deinit: To clean up the memory.
* XXX: The function implementation.
* Detailed instruction can be found at MariaDB source directory:
* ~/sql/udf_example.cc.
*
* Please note that the implementation of the function defined on the connector
* will only be called when all the input arguments are constant. e.g.,
* mcs_add(2,3). That way, the function does not run in a distributed fashion
* and could be slow. If there is a need for the UDF function to run with
* pure constant input, then one needs to put a implementation in the XXX
* body, which is very similar to the ones in getXXXval API. If there's no
* such need for a given UDF, then the XXX interface can just return a dummy
* result because this function will never be called.
*/
extern "C"
{
//=======================================================================
/**
* regr_avgx
*/
struct regr_avgx_data
{
double sumx;
int64_t cnt;
};
#ifdef _MSC_VER
__declspec(dllexport)
#endif
my_bool regr_avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
struct regr_avgx_data* data;
if (args->arg_count != 2)
{
strcpy(message,"regr_avgx() requires two arguments");
return 1;
}
if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
}
data->sumx = 0;
data->cnt = 0;
initid->ptr = (char*)data;
return 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void regr_avgx_deinit(UDF_INIT* initid)
{
free(initid->ptr);
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
{
struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr;
data->sumx = 0;
data->cnt = 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_avgx_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
{
// Test for NULL in x and y
if (args->args[0] == 0 || args->args[1] == 0)
{
return;
}
struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr;
double xval = cvtArgToDouble(args->arg_type[1], args->args[1]);
++data->cnt;
data->sumx += xval;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
double regr_avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
{
struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr;
return data->sumx / data->cnt;
}
//=======================================================================
/**
* regr_avgy
*/
struct regr_avgy_data
{
double sumy;
int64_t cnt;
};
#ifdef _MSC_VER
__declspec(dllexport)
#endif
my_bool regr_avgy_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
struct regr_avgy_data* data;
if (args->arg_count != 2)
{
strcpy(message,"regr_avgy() requires two arguments");
return 1;
}
if (!(data = (struct regr_avgy_data*) malloc(sizeof(struct regr_avgy_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
}
data->sumy = 0;
data->cnt = 0;
initid->ptr = (char*)data;
return 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void regr_avgy_deinit(UDF_INIT* initid)
{
free(initid->ptr);
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_avgy_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
{
struct regr_avgy_data* data = (struct regr_avgy_data*)initid->ptr;
data->sumy = 0;
data->cnt = 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_avgy_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
{
// Test for NULL in x and y
if (args->args[0] == 0 || args->args[1] == 0)
{
return;
}
struct regr_avgy_data* data = (struct regr_avgy_data*)initid->ptr;
double yval = cvtArgToDouble(args->arg_type[0], args->args[0]);
++data->cnt;
data->sumy += yval;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
double regr_avgy(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
{
struct regr_avgy_data* data = (struct regr_avgy_data*)initid->ptr;
return data->sumy / data->cnt;
}
//=======================================================================
/**
* regr_count
*/
struct regr_count_data
{
int64_t cnt;
};
#ifdef _MSC_VER
__declspec(dllexport)
#endif
my_bool regr_count_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
struct regr_count_data* data;
if (args->arg_count != 2)
{
strcpy(message,"regr_count() requires two arguments");
return 1;
}
if (!(data = (struct regr_count_data*) malloc(sizeof(struct regr_count_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
initid->ptr = (char*)data;
return 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void regr_count_deinit(UDF_INIT* initid)
{
free(initid->ptr);
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_count_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
{
struct regr_count_data* data = (struct regr_count_data*)initid->ptr;
data->cnt = 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_count_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
{
// Test for NULL in x and y
if (args->args[0] == 0 || args->args[1] == 0)
{
return;
}
struct regr_count_data* data = (struct regr_count_data*)initid->ptr;
++data->cnt;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
long long regr_count(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
{
struct regr_count_data* data = (struct regr_count_data*)initid->ptr;
return data->cnt;
}
//=======================================================================
/**
* regr_slope
*/
struct regr_slope_data
{
int64_t cnt;
double sumx;
double sumx2; // sum of (x squared)
double sumy;
double sumxy; // sum of (x*y)
};
#ifdef _MSC_VER
__declspec(dllexport)
#endif
my_bool regr_slope_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
struct regr_slope_data* data;
if (args->arg_count != 2)
{
strcpy(message,"regr_slope() requires two arguments");
return 1;
}
if (!(data = (struct regr_slope_data*) malloc(sizeof(struct regr_slope_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
data->sumx = 0.0;
data->sumx2 = 0.0;
data->sumy = 0.0;
data->sumxy = 0.0;
initid->ptr = (char*)data;
return 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void regr_slope_deinit(UDF_INIT* initid)
{
free(initid->ptr);
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_slope_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
{
struct regr_slope_data* data = (struct regr_slope_data*)initid->ptr;
data->cnt = 0;
data->sumx = 0.0;
data->sumx2 = 0.0;
data->sumy = 0.0;
data->sumxy = 0.0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_slope_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
{
// Test for NULL in x and y
if (args->args[0] == 0 || args->args[1] == 0)
{
return;
}
struct regr_slope_data* data = (struct regr_slope_data*)initid->ptr;
double yval = cvtArgToDouble(args->arg_type[0], args->args[0]);
double xval = cvtArgToDouble(args->arg_type[1], args->args[1]);
data->sumy += yval;
data->sumx += xval;
data->sumx2 += xval*xval;
data->sumxy += xval*yval;
++data->cnt;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
double regr_slope(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
{
struct regr_slope_data* data = (struct regr_slope_data*)initid->ptr;
double N = data->cnt;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumx2 = data->sumx2;
double sumxy = data->sumxy;
double variance = (N * sumx2) - (sumx * sumx);
if (variance)
{
return ((N * sumxy) - (sumx * sumy)) / variance;
}
}
*is_null = 1;
return 0;
}
//=======================================================================
/**
* regr_intercept
*/
struct regr_intercept_data
{
int64_t cnt;
double sumx;
double sumx2; // sum of (x squared)
double sumy;
double sumxy; // sum of (x*y)
};
#ifdef _MSC_VER
__declspec(dllexport)
#endif
my_bool regr_intercept_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
struct regr_intercept_data* data;
if (args->arg_count != 2)
{
strcpy(message,"regr_intercept() requires two arguments");
return 1;
}
if (!(data = (struct regr_intercept_data*) malloc(sizeof(struct regr_intercept_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
data->sumx = 0.0;
data->sumx2 = 0.0;
data->sumy = 0.0;
data->sumxy = 0.0;
initid->ptr = (char*)data;
return 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void regr_intercept_deinit(UDF_INIT* initid)
{
free(initid->ptr);
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_intercept_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
{
struct regr_intercept_data* data = (struct regr_intercept_data*)initid->ptr;
data->cnt = 0;
data->sumx = 0.0;
data->sumx2 = 0.0;
data->sumy = 0.0;
data->sumxy = 0.0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_intercept_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
{
// Test for NULL in x and y
if (args->args[0] == 0 || args->args[1] == 0)
{
return;
}
struct regr_intercept_data* data = (struct regr_intercept_data*)initid->ptr;
double yval = cvtArgToDouble(args->arg_type[0], args->args[0]);
double xval = cvtArgToDouble(args->arg_type[1], args->args[1]);
data->sumy += yval;
data->sumx += xval;
data->sumx2 += xval*xval;
data->sumxy += xval*yval;
++data->cnt;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
double regr_intercept(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
{
struct regr_intercept_data* data = (struct regr_intercept_data*)initid->ptr;
double N = data->cnt;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumx2 = data->sumx2;
double sumxy = data->sumxy;
double variance = (N * sumx2) - (sumx * sumx);
if (variance)
{
double slope = ((N * sumxy) - (sumx * sumy)) / variance;
return (sumy - (slope * sumx)) / N;
}
}
*is_null = 1;
return 0;
}
//=======================================================================
/**
* regr_r2
*/
struct regr_r2_data
{
int64_t cnt;
double sumx;
double sumx2; // sum of (x squared)
double sumy;
double sumy2; // sum of (y squared)
double sumxy; // sum of (x*y)
};
#ifdef _MSC_VER
__declspec(dllexport)
#endif
my_bool regr_r2_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
struct regr_r2_data* data;
if (args->arg_count != 2)
{
strcpy(message,"regr_r2() requires two arguments");
return 1;
}
if (!(data = (struct regr_r2_data*) malloc(sizeof(struct regr_r2_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
}
data->cnt = 0;
data->sumx = 0.0;
data->sumx2 = 0.0;
data->sumy = 0.0;
data->sumy2 = 0.0;
data->sumxy = 0.0;
initid->ptr = (char*)data;
return 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void regr_r2_deinit(UDF_INIT* initid)
{
free(initid->ptr);
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_r2_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
{
struct regr_r2_data* data = (struct regr_r2_data*)initid->ptr;
data->cnt = 0;
data->sumx = 0.0;
data->sumx2 = 0.0;
data->sumy = 0.0;
data->sumy2 = 0.0;
data->sumxy = 0.0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_r2_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
{
// Test for NULL in x and y
if (args->args[0] == 0 || args->args[1] == 0)
{
return;
}
struct regr_r2_data* data = (struct regr_r2_data*)initid->ptr;
double yval = cvtArgToDouble(args->arg_type[0], args->args[0]);
double xval = cvtArgToDouble(args->arg_type[1], args->args[1]);
data->sumy += yval;
data->sumx += xval;
data->sumx2 += xval*xval;
data->sumy2 += yval*yval;
data->sumxy += xval*yval;
++data->cnt;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
double regr_r2(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
{
struct regr_r2_data* data = (struct regr_r2_data*)initid->ptr;
double N = data->cnt;
if (N > 0)
{
double sumx = data->sumx;
double sumy = data->sumy;
double sumx2 = data->sumx2;
double sumy2 = data->sumy2;
double sumxy = data->sumxy;
double var_popx = (sumx2 - (sumx * sumx / N)) / N;
if (var_popx == 0)
{
// When var_popx is 0, NULL is the result.
*is_null = 1;
return 0;
}
double var_popy = (sumy2 - (sumy * sumy / N)) / N;
if (var_popy == 0)
{
// When var_popy is 0, 1 is the result
return 1;
}
double std_popx = sqrt(var_popx);
double std_popy = sqrt(var_popy);
double covar_pop = (sumxy - ((sumx * sumy) / N)) / N;
double corr = covar_pop / (std_popy * std_popx);
return corr * corr;
}
*is_null = 1;
return 0;
}
}
// vim:ts=4 sw=4:

View File

@@ -2015,18 +2015,9 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut,
for (uint32_t i = 0; i < paramCount; ++i)
{
// If UDAF_IGNORE_NULLS is on, bIsNull gets set the first time
// we find a null. We still need to eat the rest of the parameters
// to sync updateEntry
if (bIsNull)
{
++funcColsIdx;
continue;
}
SP_ROWAGG_FUNC_t pFunctionCol = fFunctionCols[funcColsIdx];
mcsv1sdk::ColumnDatum& datum = valsIn[i];
// Turn on NULL flags
// Turn on NULL flags based on the data
dataFlags[i] = 0;
// If this particular parameter is a constant, then we need
@@ -2043,9 +2034,11 @@ void RowAggregation::doUDAF(const Row& rowIn, int64_t colIn, int64_t colOut,
{
if (fRGContext.getRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS))
{
bIsNull = true;
++funcColsIdx;
continue;
// When Ignore nulls, if there are multiple parameters and any
// one of them is NULL, we ignore the entry. We need to increment
// funcColsIdx the number of extra parameters.
funcColsIdx += paramCount - i - 1;
return;
}
dataFlags[i] |= mcsv1sdk::PARAM_IS_NULL;

View File

@@ -242,7 +242,7 @@ struct RowUDAFFunctionCol : public RowAggFunctionCol
mcsv1sdk::mcsv1Context fUDAFContext; // The UDAF context
bool bInterrupted; // Shared by all the threads
};
};
inline void RowAggFunctionCol::serialize(messageqcpp::ByteStream& bs) const
{

View File

@@ -4,7 +4,7 @@ include_directories( ${ENGINE_COMMON_INCLUDES}
########### next target ###############
set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp avg_mode.cpp regr_avgx.cpp avgx.cpp)
set(udfsdk_LIB_SRCS udfsdk.cpp mcsv1_udaf.cpp allnull.cpp ssq.cpp avg_mode.cpp avgx.cpp distinct_count.cpp)
add_definitions(-DMYSQL_DYNAMIC_PLUGIN)

View File

@@ -0,0 +1,99 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include "distinct_count.h"
using namespace mcsv1sdk;
struct distinct_count_data
{
uint64_t cnt;
};
#define OUT_TYPE int64_t
mcsv1_UDAF::ReturnCode distinct_count::init(mcsv1Context* context,
ColumnDatum* colTypes)
{
context->setUserDataSize(sizeof(distinct_count_data));
if (context->getParameterCount() != 1)
{
// The error message will be prepended with
// "The storage engine for the table doesn't support "
context->setErrorMessage("avgx() with other than 1 arguments");
return mcsv1_UDAF::ERROR;
}
context->setResultType(CalpontSystemCatalog::BIGINT);
context->setColWidth(8);
context->setRunFlag(mcsv1sdk::UDAF_IGNORE_NULLS);
context->setRunFlag(mcsv1sdk::UDAF_DISTINCT);
context->setRunFlag(mcsv1sdk::UDAF_OVER_REQUIRED);
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode distinct_count::reset(mcsv1Context* context)
{
struct distinct_count_data* data = (struct distinct_count_data*)context->getUserData()->data;
data->cnt = 0;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode distinct_count::nextValue(mcsv1Context* context,
ColumnDatum* valsIn)
{
static_any::any& valIn = valsIn[0].columnData;
struct distinct_count_data* data = (struct distinct_count_data*)context->getUserData()->data;
if (valIn.empty())
{
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
}
data->cnt++;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode distinct_count::subEvaluate(mcsv1Context* context, const UserData* userDataIn)
{
struct distinct_count_data* outData = (struct distinct_count_data*)context->getUserData()->data;
struct distinct_count_data* inData = (struct distinct_count_data*)userDataIn->data;
outData->cnt += inData->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode distinct_count::evaluate(mcsv1Context* context, static_any::any& valOut)
{
struct distinct_count_data* data = (struct distinct_count_data*)context->getUserData()->data;
valOut = data->cnt;
return mcsv1_UDAF::SUCCESS;
}
mcsv1_UDAF::ReturnCode distinct_count::dropValue(mcsv1Context* context,
ColumnDatum* valsDropped)
{
static_any::any& valIn = valsDropped[0].columnData;
struct distinct_count_data* data = (struct distinct_count_data*)context->getUserData()->data;
if (valIn.empty())
{
return mcsv1_UDAF::SUCCESS; // Ought not happen when UDAF_IGNORE_NULLS is on.
}
data->cnt--;
return mcsv1_UDAF::SUCCESS;
}

View File

@@ -0,0 +1,222 @@
/* Copyright (C) 2017 MariaDB Corporaton
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; version 2 of
the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/***********************************************************************
* $Id$
*
* mcsv1_UDAF.h
***********************************************************************/
/**
* Columnstore interface for writing a User Defined Aggregate
* Functions (UDAF) and User Defined Analytic Functions (UDAnF)
* or a function that can act as either - UDA(n)F
*
* The basic steps are:
*
* 1. Create a the UDA(n)F function interface in some .h file.
* 2. Create the UDF function implementation in some .cpp file
* 3. Create the connector stub (MariaDB UDAF definition) for
* this UDF function.
* 4. build the dynamic library using all of the source.
* 5 Put the library in $COLUMNSTORE_INSTALL/lib of
* all modules
* 6. restart the Columnstore system.
* 7. notify mysqld about the new functions with commands like:
*
* CREATE AGGREGATE FUNCTION distinct_count returns INT
* soname 'libudf_mysql.so';
*
*/
#ifndef HEADER_distinct_count
#define HEADER_distinct_count
#include <cstdlib>
#include <string>
#include <boost/any.hpp>
#include "mcsv1_udaf.h"
#include "calpontsystemcatalog.h"
#include "windowfunctioncolumn.h"
using namespace execplan;
#if defined(_MSC_VER) && defined(xxxRGNODE_DLLEXPORT)
#define EXPORT __declspec(dllexport)
#else
#define EXPORT
#endif
namespace mcsv1sdk
{
// Override mcsv1_UDAF to build your User Defined Aggregate (UDAF) and/or
// User Defined Analytic Function (UDAnF).
// These will be singleton classes, so don't put any instance
// specific data in here. All instance data is stored in mcsv1Context
// passed to each user function and retrieved by the getUserData() method.
//
// Each API function returns a ReturnCode. If ERROR is returned at any time,
// the query is aborted, getInterrupted() will begin to return true and the
// message set in config->setErrorMessage() is returned to MariaDB.
class distinct_count : public mcsv1_UDAF
{
public:
// Defaults OK
distinct_count() : mcsv1_UDAF(){};
virtual ~distinct_count(){};
/**
* init()
*
* Mandatory. Implement this to initialize flags and instance
* data. Called once per SQL statement. You can do any sanity
* checks here.
*
* colTypes (in) - A vector of ColDataType defining the
* parameters of the UDA(n)F call. These can be used to decide
* to override the default return type. If desired, the new
* return type can be set by context->setReturnType() and
* decimal precision can be set in context->
* setResultDecimalCharacteristics.
*
* Return mcsv1_UDAF::ERROR on any error, such as non-compatible
* colTypes or wrong number of arguments. Else return
* mcsv1_UDAF::SUCCESS.
*/
virtual ReturnCode init(mcsv1Context* context, ColumnDatum* colTypes);
/**
* reset()
*
* Mandatory. Reset the UDA(n)F for a new group, partition or,
* in some cases, new Window Frame. Do not free any memory
* allocated by context->setUserDataSize(). The SDK Framework owns
* that memory and will handle that. Use this opportunity to
* reset any variables in context->getUserData() needed for the
* next aggregation. May be called multiple times if running in
* a ditributed fashion.
*
* Use this opportunity to initialize the userData.
*/
virtual ReturnCode reset(mcsv1Context* context);
/**
* nextValue()
*
* Mandatory. Handle a single row.
*
* colsIn - A vector of data structure describing the input
* data.
*
* This function is called once for every row in the filtered
* result set (before aggregation). It is very important that
* this function is efficient.
*
* If the UDAF is running in a distributed fashion, nextValue
* cannot depend on order, as it will only be called for each
* row found on the specific PM.
*
* valsIn (in) - a vector of the parameters from the row.
*/
virtual ReturnCode nextValue(mcsv1Context* context, ColumnDatum* valsIn);
/**
* subEvaluate()
*
* Mandatory -- Called if the UDAF is running in a distributed
* fashion. Columnstore tries to run all aggregate functions
* distributed, depending on context.
*
* Perform an aggregation on rows partially aggregated by
* nextValue. Columnstore calls nextValue for each row on a
* given PM for a group (GROUP BY). subEvaluate is called on the
* UM to consolodate those values into a single instance of
* userData. Keep your aggregated totals in context's userData.
* The first time this is called for a group, reset() would have
* been called with this version of userData.
*
* Called for every partial data set in each group in GROUP BY.
*
* When subEvaluate has been called for all subAggregated data
* sets, Evaluate will be called with the same context as here.
*
* valIn (In) - This is a pointer to a memory block of the size
* set in setUserDataSize. It will contain the value of userData
* as seen in the last call to NextValue for a given PM.
*
*/
virtual ReturnCode subEvaluate(mcsv1Context* context, const UserData* userDataIn);
/**
* evaluate()
*
* Mandatory. Get the aggregated value.
*
* Called for every new group if UDAF GROUP BY, UDAnF partition
* or, in some cases, new Window Frame.
*
* Set the aggregated value into valOut. The datatype is assumed
* to be the same as that set in the init() function;
*
* If the UDAF is running in a distributed fashion, evaluate is
* called after a series of subEvaluate calls.
*
* valOut (out) - Set the aggregated value here. The datatype is
* assumed to be the same as that set in the init() function;
*
* To return a NULL value, don't assign to valOut.
*/
virtual ReturnCode evaluate(mcsv1Context* context, static_any::any& valOut);
/**
* dropValue()
*
* Optional -- If defined, the server will call this instead of
* reset for UDAnF.
*
* Don't implement if a UDAnF has one or more of the following:
* The UDAnF can't be used with a Window Frame
* The UDAnF is not reversable in some way
* The UDAnF is not interested in optimal performance
*
* If not implemented, reset() followed by a series of
* nextValue() will be called for each movement of the Window
* Frame.
*
* If implemented, then each movement of the Window Frame will
* result in dropValue() being called for each row falling out
* of the Frame and nextValue() being called for each new row
* coming into the Frame.
*
* valsDropped (in) - a vector of the parameters from the row
* leaving the Frame
*
* dropValue() will not be called for unbounded/current row type
* frames, as those are already optimized.
*/
virtual ReturnCode dropValue(mcsv1Context* context, ColumnDatum* valsDropped);
protected:
};
}; // namespace
#undef EXPORT
#endif // HEADER_distinct_count.h

View File

@@ -31,14 +31,21 @@ using namespace mcsv1sdk;
* This is a temporary kludge until we get the library loader
* task complete
*/
UDAF_MAP UDAFMap::fm;
#include "allnull.h"
#include "ssq.h"
#include "median.h"
#include "avg_mode.h"
#include "regr_avgx.h"
#include "avgx.h"
UDAF_MAP& UDAFMap::fm()
{
static UDAF_MAP* m = new UDAF_MAP;
return *m;
}
UDAF_MAP& UDAFMap::getMap()
{
UDAF_MAP& fm = UDAFMap::fm();
if (fm.size() > 0)
{
return fm;
@@ -51,8 +58,8 @@ UDAF_MAP& UDAFMap::getMap()
// the function names passed to the interface is always in lower case.
fm["allnull"] = new allnull();
fm["ssq"] = new ssq();
// fm["median"] = new median();
fm["avg_mode"] = new avg_mode();
fm["regr_avgx"] = new regr_avgx();
fm["avgx"] = new avgx();
return fm;

View File

@@ -108,7 +108,7 @@ public:
static EXPORT UDAF_MAP& getMap();
private:
static UDAF_MAP fm;
static UDAF_MAP& fm();
};
/**
@@ -189,6 +189,7 @@ static uint64_t UDAF_WINDOWFRAME_REQUIRED __attribute__ ((unused)) = 1 << 4; //
static uint64_t UDAF_WINDOWFRAME_ALLOWED __attribute__ ((unused)) = 1 << 5; // If used as UDAnF, a WINDOW FRAME is optional
static uint64_t UDAF_MAYBE_NULL __attribute__ ((unused)) = 1 << 6; // If UDA(n)F might return NULL.
static uint64_t UDAF_IGNORE_NULLS __attribute__ ((unused)) = 1 << 7; // If UDA(n)F wants NULL rows suppressed.
static uint64_t UDAF_DISTINCT __attribute__ ((unused)) = 1 << 8; // Force UDA(n)F to be distinct on first param.
// Flags set by the framework to define the context of the call.
// User code shouldn't use these directly
@@ -380,6 +381,7 @@ private:
std::string functionName;
mcsv1sdk::mcsv1_UDAF* func;
int32_t fParamCount;
std::vector<uint32_t> paramKeys;
public:
// For use by the framework
@@ -402,6 +404,7 @@ public:
EXPORT mcsv1sdk::mcsv1_UDAF* getFunction() const;
EXPORT boost::shared_ptr<UserData> getUserDataSP();
EXPORT void setParamCount(int32_t paramCount);
std::vector<uint32_t>* getParamKeys();
};
// Since aggregate functions can operate on any data type, we use the following structure
@@ -605,6 +608,9 @@ public:
virtual ReturnCode createUserData(UserData*& userdata, int32_t& length);
protected:
// some handy conversion routines
template<typename T>
T convertAnyTo(static_any::any&);
// These are handy for testing the actual type of static_any
static const static_any::any& charTypeId;
static const static_any::any& scharTypeId;
@@ -947,6 +953,11 @@ inline void mcsv1Context::setParamCount(int32_t paramCount)
fParamCount = paramCount;
}
inline std::vector<uint32_t>* mcsv1Context::getParamKeys()
{
return &paramKeys;
}
inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::dropValue(mcsv1Context* context, ColumnDatum* valsDropped)
{
return NOT_IMPLEMENTED;
@@ -959,6 +970,68 @@ inline mcsv1_UDAF::ReturnCode mcsv1_UDAF::createUserData(UserData*& userData, in
return SUCCESS;
}
// Handy helper functions
template<typename T>
inline T mcsv1_UDAF::convertAnyTo(static_any::any& valIn)
{
T val;
if (valIn.compatible(longTypeId))
{
val = valIn.cast<long>();
}
else if (valIn.compatible(charTypeId))
{
val = valIn.cast<char>();
}
else if (valIn.compatible(scharTypeId))
{
val = valIn.cast<signed char>();
}
else if (valIn.compatible(shortTypeId))
{
val = valIn.cast<short>();
}
else if (valIn.compatible(intTypeId))
{
val = valIn.cast<int>();
}
else if (valIn.compatible(llTypeId))
{
val = valIn.cast<long long>();
}
else if (valIn.compatible(ucharTypeId))
{
val = valIn.cast<unsigned char>();
}
else if (valIn.compatible(ushortTypeId))
{
val = valIn.cast<unsigned short>();
}
else if (valIn.compatible(uintTypeId))
{
val = valIn.cast<unsigned int>();
}
else if (valIn.compatible(ulongTypeId))
{
val = valIn.cast<unsigned long>();
}
else if (valIn.compatible(ullTypeId))
{
val = valIn.cast<unsigned long long>();
}
else if (valIn.compatible(floatTypeId))
{
val = valIn.cast<float>();
}
else if (valIn.compatible(doubleTypeId))
{
val = valIn.cast<double>();
}
return val;
}
}; // namespace mcssdk
#undef EXPORT

View File

@@ -349,6 +349,78 @@ extern "C"
return data->sumsq;
}
//=======================================================================
/**
* MEDIAN connector stub
*/
#ifdef _MSC_VER
__declspec(dllexport)
#endif
my_bool median_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
if (args->arg_count != 1)
{
strcpy(message, "median() requires one argument");
return 1;
}
/*
if (!(data = (struct ssq_data*) malloc(sizeof(struct ssq_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
}
data->sumsq = 0;
initid->ptr = (char*)data;
*/
return 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void median_deinit(UDF_INIT* initid)
{
// free(initid->ptr);
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
median_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
{
// struct ssq_data* data = (struct ssq_data*)initid->ptr;
// data->sumsq = 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
median_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
{
// struct ssq_data* data = (struct ssq_data*)initid->ptr;
// double val = cvtArgToDouble(args->arg_type[0], args->args[0]);
// data->sumsq = val*val;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
long long median(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
{
// struct ssq_data* data = (struct ssq_data*)initid->ptr;
// return data->sumsq;
return 0;
}
/**
* avg_mode connector stub
*/
@@ -422,167 +494,83 @@ extern "C"
//=======================================================================
/**
* regr_avgx connector stub
*/
struct regr_avgx_data
{
double sumx;
int64_t cnt;
};
#ifdef _MSC_VER
__declspec(dllexport)
#endif
my_bool regr_avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
struct regr_avgx_data* data;
if (args->arg_count != 2)
{
strcpy(message, "regr_avgx() requires two arguments");
return 1;
}
if (!(data = (struct regr_avgx_data*) malloc(sizeof(struct regr_avgx_data))))
{
strmov(message, "Couldn't allocate memory");
return 1;
}
data->sumx = 0;
data->cnt = 0;
initid->ptr = (char*)data;
return 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void regr_avgx_deinit(UDF_INIT* initid)
{
free(initid->ptr);
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
{
struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr;
data->sumx = 0;
data->cnt = 0;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
void
regr_avgx_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
{
// TODO test for NULL in x and y
struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr;
double xval = cvtArgToDouble(args->arg_type[1], args->args[0]);
++data->cnt;
data->sumx += xval;
}
#ifdef _MSC_VER
__declspec(dllexport)
#endif
long long regr_avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
{
struct regr_avgx_data* data = (struct regr_avgx_data*)initid->ptr;
return data->sumx / data->cnt;
}
//=======================================================================
/**
* avgx connector stub. Exactly the same functionality as the
* built in avg() function. Use to test the performance of the
* API
* avgx connector stub. Exactly the same functionality as the
* built in avg() function. Use to test the performance of the
* API
*/
struct avgx_data
{
double sumx;
int64_t cnt;
double sumx;
int64_t cnt;
};
#ifdef _MSC_VER
#ifdef _MSC_VER
__declspec(dllexport)
#endif
#endif
my_bool avgx_init(UDF_INIT* initid, UDF_ARGS* args, char* message)
{
struct avgx_data* data;
struct avgx_data* data;
if (args->arg_count != 1)
{
strcpy(message,"avgx() requires one argument");
return 1;
}
if (args->arg_count != 1)
{
strcpy(message, "avgx() requires one argument");
return 1;
}
if (!(data = (struct avgx_data*) malloc(sizeof(struct avgx_data))))
{
strmov(message, "Couldn't allocate memory");
return 1;
}
data->sumx = 0;
if (!(data = (struct avgx_data*) malloc(sizeof(struct avgx_data))))
{
strmov(message,"Couldn't allocate memory");
return 1;
}
data->sumx = 0;
data->cnt = 0;
initid->ptr = (char*)data;
return 0;
initid->ptr = (char*)data;
return 0;
}
#ifdef _MSC_VER
#ifdef _MSC_VER
__declspec(dllexport)
#endif
#endif
void avgx_deinit(UDF_INIT* initid)
{
free(initid->ptr);
}
free(initid->ptr);
}
#ifdef _MSC_VER
#ifdef _MSC_VER
__declspec(dllexport)
#endif
#endif
void
avgx_clear(UDF_INIT* initid, char* is_null __attribute__((unused)),
char* message __attribute__((unused)))
char* message __attribute__((unused)))
{
struct avgx_data* data = (struct avgx_data*)initid->ptr;
data->sumx = 0;
struct avgx_data* data = (struct avgx_data*)initid->ptr;
data->sumx = 0;
data->cnt = 0;
}
#ifdef _MSC_VER
#ifdef _MSC_VER
__declspec(dllexport)
#endif
#endif
void
avgx_add(UDF_INIT* initid, UDF_ARGS* args,
char* is_null,
char* message __attribute__((unused)))
char* is_null,
char* message __attribute__((unused)))
{
// TODO test for NULL in x and y
struct avgx_data* data = (struct avgx_data*)initid->ptr;
double xval = cvtArgToDouble(args->arg_type[1], args->args[0]);
struct avgx_data* data = (struct avgx_data*)initid->ptr;
double xval = cvtArgToDouble(args->arg_type[1], args->args[0]);
++data->cnt;
data->sumx += xval;
data->sumx += xval;
}
#ifdef _MSC_VER
#ifdef _MSC_VER
__declspec(dllexport)
#endif
#endif
long long avgx(UDF_INIT* initid, UDF_ARGS* args __attribute__((unused)),
char* is_null, char* error __attribute__((unused)))
char* is_null, char* error __attribute__((unused)))
{
struct avgx_data* data = (struct avgx_data*)initid->ptr;
return data->sumx / data->cnt;
struct avgx_data* data = (struct avgx_data*)initid->ptr;
return data->sumx / data->cnt;
}
}
// vim:ts=4 sw=4:

View File

@@ -207,7 +207,6 @@
<F N="avgx.cpp"/>
<F N="mcsv1_udaf.cpp"/>
<F N="median.cpp"/>
<F N="regr_avgx.cpp"/>
<F N="ssq.cpp"/>
<F N="udfmysql.cpp"/>
<F N="udfsdk.cpp"/>
@@ -220,7 +219,6 @@
<F N="avgx.h"/>
<F N="mcsv1_udaf.h"/>
<F N="median.h"/>
<F N="regr_avgx.h"/>
<F N="ssq.h"/>
<F N="udfsdk.h"/>
</Folder>

View File

@@ -292,6 +292,8 @@
Filters="*.bmp"/>
<Folder
Name="Other Files"
Filters=""/>
Filters="">
<F N="CMakeLists.txt"/>
</Folder>
</Files>
</Project>

View File

@@ -53,69 +53,11 @@ using namespace joblist;
namespace windowfunction
{
template<typename T>
boost::shared_ptr<WindowFunctionType> WF_udaf<T>::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context)
boost::shared_ptr<WindowFunctionType> WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context)
{
boost::shared_ptr<WindowFunctionType> func;
switch (ct)
{
case CalpontSystemCatalog::TINYINT:
case CalpontSystemCatalog::SMALLINT:
case CalpontSystemCatalog::MEDINT:
case CalpontSystemCatalog::INT:
case CalpontSystemCatalog::BIGINT:
case CalpontSystemCatalog::DECIMAL:
{
func.reset(new WF_udaf<int64_t>(id, name, context));
break;
}
case CalpontSystemCatalog::UTINYINT:
case CalpontSystemCatalog::USMALLINT:
case CalpontSystemCatalog::UMEDINT:
case CalpontSystemCatalog::UINT:
case CalpontSystemCatalog::UBIGINT:
case CalpontSystemCatalog::UDECIMAL:
{
func.reset(new WF_udaf<uint64_t>(id, name, context));
break;
}
case CalpontSystemCatalog::DOUBLE:
case CalpontSystemCatalog::UDOUBLE:
{
func.reset(new WF_udaf<double>(id, name, context));
break;
}
case CalpontSystemCatalog::FLOAT:
case CalpontSystemCatalog::UFLOAT:
{
func.reset(new WF_udaf<float>(id, name, context));
break;
}
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::VARCHAR:
case CalpontSystemCatalog::VARBINARY:
case CalpontSystemCatalog::TEXT:
case CalpontSystemCatalog::BLOB:
{
func.reset(new WF_udaf<string>(id, name, context));
break;
}
default:
{
string errStr = name + "(" + colType2String[ct] + ")";
errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr);
cerr << errStr << endl;
throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE);
break;
}
}
func.reset(new WF_udaf(id, name, context));
// Get the UDAnF function object
WF_udaf* wfUDAF = (WF_udaf*)func.get();
@@ -125,30 +67,26 @@ boost::shared_ptr<WindowFunctionType> WF_udaf<T>::makeFunction(int id, const str
return func;
}
template<typename T>
WF_udaf<T>::WF_udaf(WF_udaf& rhs) : fUDAFContext(rhs.getContext()),
WF_udaf::WF_udaf(WF_udaf& rhs) : fUDAFContext(rhs.getContext()),
bInterrupted(rhs.getInterrupted()),
fDistinct(rhs.getDistinct())
{
getContext().setInterrupted(getInterruptedPtr());
}
template<typename T>
WindowFunctionType* WF_udaf<T>::clone() const
WindowFunctionType* WF_udaf::clone() const
{
return new WF_udaf(*const_cast<WF_udaf*>(this));
}
template<typename T>
void WF_udaf<T>::resetData()
void WF_udaf::resetData()
{
getContext().getFunction()->reset(&getContext());
fDistinctSet.clear();
fDistinctMap.clear();
WindowFunctionType::resetData();
}
template<typename T>
void WF_udaf<T>::parseParms(const std::vector<execplan::SRCP>& parms)
void WF_udaf::parseParms(const std::vector<execplan::SRCP>& parms)
{
bRespectNulls = true;
// The last parms: respect null | ignore null
@@ -156,10 +94,13 @@ void WF_udaf<T>::parseParms(const std::vector<execplan::SRCP>& parms)
idbassert(cc != NULL);
bool isNull = false; // dummy, harded coded
bRespectNulls = (cc->getIntVal(fRow, isNull) > 0);
if (getContext().getRunFlag(mcsv1sdk::UDAF_DISTINCT))
{
setDistinct();
}
}
template<typename T>
bool WF_udaf<T>::dropValues(int64_t b, int64_t e)
bool WF_udaf::dropValues(int64_t b, int64_t e)
{
if (!bHasDropValue)
{
@@ -168,6 +109,7 @@ bool WF_udaf<T>::dropValues(int64_t b, int64_t e)
}
mcsv1sdk::mcsv1_UDAF::ReturnCode rc;
bool isNull = false;
// Turn on the Analytic flag so the function is aware it is being called
// as a Window Function.
@@ -175,14 +117,26 @@ bool WF_udaf<T>::dropValues(int64_t b, int64_t e)
// Put the parameter metadata (type, scale, precision) into valsIn
mcsv1sdk::ColumnDatum valsIn[getContext().getParameterCount()];
ConstantColumn* cc = NULL;
for (uint32_t i = 0; i < getContext().getParameterCount(); ++i)
{
uint64_t colIn = fFieldIndex[i + 1];
mcsv1sdk::ColumnDatum& datum = valsIn[i];
datum.dataType = fRow.getColType(colIn);
datum.scale = fRow.getScale(colIn);
datum.precision = fRow.getPrecision(colIn);
cc = static_cast<ConstantColumn*>(fConstantParms[i].get());
if (cc)
{
datum.dataType = cc->resultType().colDataType;
datum.scale = cc->resultType().scale;
datum.precision = cc->resultType().precision;
}
else
{
uint64_t colIn = fFieldIndex[i + 1];
datum.dataType = fRow.getColType(colIn);
datum.scale = fRow.getScale(colIn);
datum.precision = fRow.getPrecision(colIn);
}
}
for (int64_t i = b; i < e; i++)
@@ -190,52 +144,326 @@ bool WF_udaf<T>::dropValues(int64_t b, int64_t e)
if (i % 1000 == 0 && fStep->cancelled())
break;
bool bHasNull = false;
fRow.setData(getPointer(fRowData->at(i)));
// Turn on NULL flags
// NULL flags
uint32_t flags[getContext().getParameterCount()];
bool bSkipIt = false;
for (uint32_t k = 0; k < getContext().getParameterCount(); ++k)
{
cc = static_cast<ConstantColumn*>(fConstantParms[k].get());
uint64_t colIn = fFieldIndex[k + 1];
mcsv1sdk::ColumnDatum& datum = valsIn[k];
flags[k] = 0;
if (fRow.isNullValue(colIn) == true)
// Turn on Null flags or skip based on respect nulls
flags[k] = 0;
if ((!cc && fRow.isNullValue(colIn) == true)
|| (cc && cc->type() == ConstantColumn::NULLDATA))
{
if (!bRespectNulls)
{
bHasNull = true;
bSkipIt = true;
break;
}
flags[k] |= mcsv1sdk::PARAM_IS_NULL;
}
T valIn;
getValue(colIn, valIn, &datum.dataType);
// Check for distinct, if turned on.
// Currently, distinct only works for param 1
if (k == 0)
if (!bSkipIt && !(flags[k] & mcsv1sdk::PARAM_IS_NULL))
{
if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end()))
switch (datum.dataType)
{
continue;
case CalpontSystemCatalog::TINYINT:
case CalpontSystemCatalog::SMALLINT:
case CalpontSystemCatalog::MEDINT:
case CalpontSystemCatalog::INT:
case CalpontSystemCatalog::BIGINT:
{
int64_t valIn;
if (cc)
{
valIn = cc->getIntVal(fRow, isNull);
}
else
{
getValue(colIn, valIn);
}
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
{
if (fDistinct)
{
DistinctMap::iterator distinct;
distinct = fDistinctMap.find(valIn);
if (distinct != fDistinctMap.end())
{
// This is a duplicate: decrement the count
--(*distinct).second;
if ((*distinct).second > 0) // still more of these
{
bSkipIt = true;
continue;
}
else
{
fDistinctMap.erase(distinct);
}
}
}
}
datum.columnData = valIn;
break;
}
case CalpontSystemCatalog::DECIMAL:
case CalpontSystemCatalog::UDECIMAL:
{
int64_t valIn;
if (cc)
{
valIn = cc->getDecimalVal(fRow, isNull).value;
}
else
{
getValue(colIn, valIn);
}
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
{
if (fDistinct)
{
DistinctMap::iterator distinct;
distinct = fDistinctMap.find(valIn);
if (distinct != fDistinctMap.end())
{
// This is a duplicate: decrement the count
--(*distinct).second;
if ((*distinct).second > 0) // still more of these
{
bSkipIt = true;
continue;
}
else
{
fDistinctMap.erase(distinct);
}
}
}
}
datum.columnData = valIn;
break;
}
case CalpontSystemCatalog::UTINYINT:
case CalpontSystemCatalog::USMALLINT:
case CalpontSystemCatalog::UMEDINT:
case CalpontSystemCatalog::UINT:
case CalpontSystemCatalog::UBIGINT:
case CalpontSystemCatalog::TIME:
case CalpontSystemCatalog::DATE:
case CalpontSystemCatalog::DATETIME:
{
uint64_t valIn;
if (cc)
{
valIn = cc->getUintVal(fRow, isNull);
}
else
{
getValue(colIn, valIn);
}
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
{
if (fDistinct)
{
DistinctMap::iterator distinct;
distinct = fDistinctMap.find(valIn);
if (distinct != fDistinctMap.end())
{
// This is a duplicate: decrement the count
--(*distinct).second;
if ((*distinct).second > 0) // still more of these
{
bSkipIt = true;
continue;
}
else
{
fDistinctMap.erase(distinct);
}
}
}
}
datum.columnData = valIn;
break;
}
case CalpontSystemCatalog::DOUBLE:
case CalpontSystemCatalog::UDOUBLE:
{
double valIn;
if (cc)
{
valIn = cc->getDoubleVal(fRow, isNull);
}
else
{
getValue(colIn, valIn);
}
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
{
if (fDistinct)
{
DistinctMap::iterator distinct;
distinct = fDistinctMap.find(valIn);
if (distinct != fDistinctMap.end())
{
// This is a duplicate: decrement the count
--(*distinct).second;
if ((*distinct).second > 0) // still more of these
{
bSkipIt = true;
continue;
}
else
{
fDistinctMap.erase(distinct);
}
}
}
}
datum.columnData = valIn;
break;
}
case CalpontSystemCatalog::FLOAT:
case CalpontSystemCatalog::UFLOAT:
{
float valIn;
if (cc)
{
valIn = cc->getFloatVal(fRow, isNull);
}
else
{
getValue(colIn, valIn);
}
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
{
if (fDistinct)
{
DistinctMap::iterator distinct;
distinct = fDistinctMap.find(valIn);
if (distinct != fDistinctMap.end())
{
// This is a duplicate: decrement the count
--(*distinct).second;
if ((*distinct).second > 0) // still more of these
{
bSkipIt = true;
continue;
}
else
{
fDistinctMap.erase(distinct);
}
}
}
}
datum.columnData = valIn;
break;
}
case CalpontSystemCatalog::CHAR:
case CalpontSystemCatalog::VARCHAR:
case CalpontSystemCatalog::VARBINARY:
case CalpontSystemCatalog::TEXT:
case CalpontSystemCatalog::BLOB:
{
string valIn;
if (cc)
{
valIn = cc->getStrVal(fRow, isNull);
}
else
{
getValue(colIn, valIn);
}
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
{
if (fDistinct)
{
DistinctMap::iterator distinct;
distinct = fDistinctMap.find(valIn);
if (distinct != fDistinctMap.end())
{
// This is a duplicate: decrement the count
--(*distinct).second;
if ((*distinct).second > 0) // still more of these
{
bSkipIt = true;
continue;
}
else
{
fDistinctMap.erase(distinct);
}
}
}
}
datum.columnData = valIn;
break;
}
default:
{
string errStr = "(" + colType2String[(int)datum.dataType] + ")";
errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr);
cerr << errStr << endl;
throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE);
break;
}
}
if (fDistinct)
fDistinctSet.insert(valIn);
}
datum.columnData = valIn;
}
if (bHasNull)
if (bSkipIt)
{
continue;
}
getContext().setDataFlags(flags);
rc = getContext().getFunction()->dropValue(&getContext(), valsIn);
if (rc == mcsv1sdk::mcsv1_UDAF::NOT_IMPLEMENTED)
@@ -257,8 +485,7 @@ bool WF_udaf<T>::dropValues(int64_t b, int64_t e)
}
// Sets the value from valOut into column colOut, performing any conversions.
template<typename T>
void WF_udaf<T>::SetUDAFValue(static_any::any& valOut, int64_t colOut,
void WF_udaf::SetUDAFValue(static_any::any& valOut, int64_t colOut,
int64_t b, int64_t e, int64_t c)
{
static const static_any::any& charTypeId = (char)1;
@@ -279,15 +506,6 @@ void WF_udaf<T>::SetUDAFValue(static_any::any& valOut, int64_t colOut,
CDT colDataType = fRow.getColType(colOut);
if (valOut.empty())
{
// If valOut is empty, we return NULL
T* pv = NULL;
setValue(colDataType, b, e, c, pv);
fPrev = c;
return;
}
// This may seem a bit convoluted. Users shouldn't return a type
// that they didn't set in mcsv1_UDAF::init(), but this
// handles whatever return type is given and casts
@@ -405,7 +623,14 @@ void WF_udaf<T>::SetUDAFValue(static_any::any& valOut, int64_t colOut,
case execplan::CalpontSystemCatalog::BIGINT:
case execplan::CalpontSystemCatalog::DECIMAL:
case execplan::CalpontSystemCatalog::UDECIMAL:
setValue(colDataType, b, e, c, &intOut);
if (valOut.empty())
{
setValue(colDataType, b, e, c, (int64_t*)NULL);
}
else
{
setValue(colDataType, b, e, c, &intOut);
}
break;
case execplan::CalpontSystemCatalog::UTINYINT:
@@ -416,17 +641,38 @@ void WF_udaf<T>::SetUDAFValue(static_any::any& valOut, int64_t colOut,
case execplan::CalpontSystemCatalog::DATE:
case execplan::CalpontSystemCatalog::DATETIME:
case execplan::CalpontSystemCatalog::TIME:
setValue(colDataType, b, e, c, &uintOut);
if (valOut.empty())
{
setValue(colDataType, b, e, c, (uint64_t*)NULL);
}
else
{
setValue(colDataType, b, e, c, &uintOut);
}
break;
case execplan::CalpontSystemCatalog::FLOAT:
case execplan::CalpontSystemCatalog::UFLOAT:
setValue(colDataType, b, e, c, &floatOut);
if (valOut.empty())
{
setValue(colDataType, b, e, c, (float*)NULL);
}
else
{
setValue(colDataType, b, e, c, &floatOut);
}
break;
case execplan::CalpontSystemCatalog::DOUBLE:
case execplan::CalpontSystemCatalog::UDOUBLE:
setValue(colDataType, b, e, c, &doubleOut);
if (valOut.empty())
{
setValue(colDataType, b, e, c, (double*)NULL);
}
else
{
setValue(colDataType, b, e, c, &doubleOut);
}
break;
case execplan::CalpontSystemCatalog::CHAR:
@@ -435,7 +681,14 @@ void WF_udaf<T>::SetUDAFValue(static_any::any& valOut, int64_t colOut,
case execplan::CalpontSystemCatalog::VARBINARY:
case execplan::CalpontSystemCatalog::CLOB:
case execplan::CalpontSystemCatalog::BLOB:
setValue(colDataType, b, e, c, &strOut);
if (valOut.empty())
{
setValue(colDataType, b, e, c, (string*)NULL);
}
else
{
setValue(colDataType, b, e, c, &strOut);
}
break;
default:
@@ -449,8 +702,7 @@ void WF_udaf<T>::SetUDAFValue(static_any::any& valOut, int64_t colOut,
}
}
template<typename T>
void WF_udaf<T>::operator()(int64_t b, int64_t e, int64_t c)
void WF_udaf::operator()(int64_t b, int64_t e, int64_t c)
{
mcsv1sdk::mcsv1_UDAF::ReturnCode rc;
uint64_t colOut = fFieldIndex[0];
@@ -499,7 +751,7 @@ void WF_udaf<T>::operator()(int64_t b, int64_t e, int64_t c)
else
getContext().clearContextFlag(mcsv1sdk::CONTEXT_HAS_CURRENT_ROW);
bool bHasNull = false;
bool bSkipIt = false;
for (int64_t i = b; i <= e; i++)
{
@@ -510,7 +762,7 @@ void WF_udaf<T>::operator()(int64_t b, int64_t e, int64_t c)
// NULL flags
uint32_t flags[getContext().getParameterCount()];
bHasNull = false;
bSkipIt = false;
for (uint32_t k = 0; k < getContext().getParameterCount(); ++k)
{
@@ -526,14 +778,14 @@ void WF_udaf<T>::operator()(int64_t b, int64_t e, int64_t c)
{
if (!bRespectNulls)
{
bHasNull = true;
bSkipIt = true;
break;
}
flags[k] |= mcsv1sdk::PARAM_IS_NULL;
}
if (!bHasNull && !(flags[k] & mcsv1sdk::PARAM_IS_NULL))
if (!bSkipIt && !(flags[k] & mcsv1sdk::PARAM_IS_NULL))
{
switch (datum.dataType)
{
@@ -556,15 +808,23 @@ void WF_udaf<T>::operator()(int64_t b, int64_t e, int64_t c)
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
if (k == 0 && fDistinct)
{
if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end()))
// MCOL-1698
std::pair<static_any::any, uint64_t> val = make_pair(valIn, 1);
// Unordered_map will not insert a duplicate key (valIn).
// If it doesn't insert, the original pair will be returned
// in distinct.first and distinct.second will be a bool --
// true if newly inserted, false if a duplicate.
std::pair<DistinctMap::iterator, bool> distinct;
distinct = fDistinctMap.insert(val);
if (distinct.second == false)
{
// This is a duplicate: increment the count
++(*distinct.first).second;
bSkipIt = true;
continue;
}
if (fDistinct)
fDistinctSet.insert(valIn);
}
datum.columnData = valIn;
@@ -587,15 +847,17 @@ void WF_udaf<T>::operator()(int64_t b, int64_t e, int64_t c)
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
if (k == 0 && fDistinct)
{
if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end()))
std::pair<static_any::any, uint64_t> val = make_pair(valIn, 1);
std::pair<DistinctMap::iterator, bool> distinct;
distinct = fDistinctMap.insert(val);
if (distinct.second == false)
{
++(*distinct.first).second;
bSkipIt = true;
continue;
}
if (fDistinct)
fDistinctSet.insert(valIn);
}
datum.columnData = valIn;
@@ -607,6 +869,9 @@ void WF_udaf<T>::operator()(int64_t b, int64_t e, int64_t c)
case CalpontSystemCatalog::UMEDINT:
case CalpontSystemCatalog::UINT:
case CalpontSystemCatalog::UBIGINT:
case CalpontSystemCatalog::TIME:
case CalpontSystemCatalog::DATE:
case CalpontSystemCatalog::DATETIME:
{
uint64_t valIn;
@@ -621,15 +886,17 @@ void WF_udaf<T>::operator()(int64_t b, int64_t e, int64_t c)
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
if (k == 0 && fDistinct)
{
if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end()))
std::pair<static_any::any, uint64_t> val = make_pair(valIn, 1);
std::pair<DistinctMap::iterator, bool> distinct;
distinct = fDistinctMap.insert(val);
if (distinct.second == false)
{
++(*distinct.first).second;
bSkipIt = true;
continue;
}
if (fDistinct)
fDistinctSet.insert(valIn);
}
datum.columnData = valIn;
@@ -652,15 +919,17 @@ void WF_udaf<T>::operator()(int64_t b, int64_t e, int64_t c)
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
if (k == 0 && fDistinct)
{
if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end()))
std::pair<static_any::any, uint64_t> val = make_pair(valIn, 1);
std::pair<DistinctMap::iterator, bool> distinct;
distinct = fDistinctMap.insert(val);
if (distinct.second == false)
{
++(*distinct.first).second;
bSkipIt = true;
continue;
}
if (fDistinct)
fDistinctSet.insert(valIn);
}
datum.columnData = valIn;
@@ -683,15 +952,17 @@ void WF_udaf<T>::operator()(int64_t b, int64_t e, int64_t c)
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
if (k == 0 && fDistinct)
{
if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end()))
std::pair<static_any::any, uint64_t> val = make_pair(valIn, 1);
std::pair<DistinctMap::iterator, bool> distinct;
distinct = fDistinctMap.insert(val);
if (distinct.second == false)
{
++(*distinct.first).second;
bSkipIt = true;
continue;
}
if (fDistinct)
fDistinctSet.insert(valIn);
}
datum.columnData = valIn;
@@ -717,15 +988,17 @@ void WF_udaf<T>::operator()(int64_t b, int64_t e, int64_t c)
// Check for distinct, if turned on.
// Currently, distinct only works on the first parameter.
if (k == 0)
if (k == 0 && fDistinct)
{
if ((fDistinct) || (fDistinctSet.find(valIn) != fDistinctSet.end()))
std::pair<static_any::any, uint64_t> val = make_pair(valIn, 1);
std::pair<DistinctMap::iterator, bool> distinct;
distinct = fDistinctMap.insert(val);
if (distinct.second == false)
{
++(*distinct.first).second;
bSkipIt = true;
continue;
}
if (fDistinct)
fDistinctSet.insert(valIn);
}
datum.columnData = valIn;
@@ -734,7 +1007,7 @@ void WF_udaf<T>::operator()(int64_t b, int64_t e, int64_t c)
default:
{
string errStr = "(" + colType2String[i] + ")";
string errStr = "(" + colType2String[(int)datum.dataType] + ")";
errStr = IDBErrorInfo::instance()->errorMsg(ERR_WF_INVALID_PARM_TYPE, errStr);
cerr << errStr << endl;
throw IDBExcept(errStr, ERR_WF_INVALID_PARM_TYPE);
@@ -746,7 +1019,7 @@ void WF_udaf<T>::operator()(int64_t b, int64_t e, int64_t c)
}
// Skip if any value is NULL and respect nulls is off.
if (bHasNull)
if (bSkipIt)
{
continue;
}
@@ -780,8 +1053,7 @@ void WF_udaf<T>::operator()(int64_t b, int64_t e, int64_t c)
fPrev = c;
}
template
boost::shared_ptr<WindowFunctionType> WF_udaf<int64_t>::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context);
boost::shared_ptr<WindowFunctionType> WF_udaf::makeFunction(int id, const string& name, int ct, mcsv1sdk::mcsv1Context& context);
} //namespace
// vim:ts=4 sw=4:

View File

@@ -22,9 +22,9 @@
#define UTILS_WF_UDAF_H
#ifndef _MSC_VER
#include <tr1/unordered_set>
#include <tr1/unordered_map>
#else
#include <unordered_set>
#include <unordered_map>
#endif
#include "windowfunctiontype.h"
#include "mcsv1_udaf.h"
@@ -45,7 +45,7 @@ public:
class DistinctEqual
{
public:
inline bool operator()(const static_any::any& lhs, static_any::any& rhs) const
inline bool operator()(const static_any::any lhs, static_any::any rhs) const
{
return lhs == rhs;
}
@@ -53,7 +53,6 @@ public:
// A class to control the execution of User Define Analytic Functions (UDAnF)
// as defined by a specialization of mcsv1sdk::mcsv1_UDAF
template<typename T>
class WF_udaf : public WindowFunctionType
{
public:
@@ -84,6 +83,11 @@ public:
return fDistinct;
}
void setDistinct(bool d = true)
{
fDistinct = d;
}
protected:
void SetUDAFValue(static_any::any& valOut, int64_t colOut, int64_t b, int64_t e, int64_t c);
@@ -92,8 +96,10 @@ protected:
bool fDistinct;
bool bRespectNulls; // respect null | ignore null
bool bHasDropValue; // Set to false when we discover the UDAnF doesn't implement dropValue.
// To hold distinct values
std::tr1::unordered_set<static_any::any, DistinctHasher, DistinctEqual> fDistinctSet;
// To hold distinct values and their counts
typedef std::tr1::unordered_map<static_any::any, uint64_t, DistinctHasher, DistinctEqual> DistinctMap;
DistinctMap fDistinctMap;
static_any::any fValOut; // The return value
public:

View File

@@ -208,7 +208,7 @@ WindowFunctionType::makeWindowFunction(const string& name, int ct, WindowFunctio
break;
case WF__UDAF:
af = WF_udaf<int64_t>::makeFunction(functionId, name, ct, wc->getUDAFContext());
af = WF_udaf::makeFunction(functionId, name, ct, wc->getUDAFContext());
break;
case WF__REGR_SLOPE: